Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Support/APInt.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- APInt.cpp - Implement APInt class ---------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements a class to represent arbitrary precision integer
10
// constant values and provide a variety of arithmetic operations on them.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "llvm/ADT/APInt.h"
15
#include "llvm/ADT/ArrayRef.h"
16
#include "llvm/ADT/FoldingSet.h"
17
#include "llvm/ADT/Hashing.h"
18
#include "llvm/ADT/Optional.h"
19
#include "llvm/ADT/SmallString.h"
20
#include "llvm/ADT/StringRef.h"
21
#include "llvm/ADT/bit.h"
22
#include "llvm/Config/llvm-config.h"
23
#include "llvm/Support/Debug.h"
24
#include "llvm/Support/ErrorHandling.h"
25
#include "llvm/Support/MathExtras.h"
26
#include "llvm/Support/raw_ostream.h"
27
#include <climits>
28
#include <cmath>
29
#include <cstdlib>
30
#include <cstring>
31
using namespace llvm;
32
33
#define DEBUG_TYPE "apint"
34
35
/// A utility function for allocating memory, checking for allocation failures,
36
/// and ensuring the contents are zeroed.
37
20.9M
inline static uint64_t* getClearedMemory(unsigned numWords) {
38
20.9M
  uint64_t *result = new uint64_t[numWords];
39
20.9M
  memset(result, 0, numWords * sizeof(uint64_t));
40
20.9M
  return result;
41
20.9M
}
42
43
/// A utility function for allocating memory and checking for allocation
44
/// failure.  The content is not zeroed.
45
162M
inline static uint64_t* getMemory(unsigned numWords) {
46
162M
  return new uint64_t[numWords];
47
162M
}
48
49
/// A utility function that converts a character to a digit.
50
6.25M
inline static unsigned getDigit(char cdigit, uint8_t radix) {
51
6.25M
  unsigned r;
52
6.25M
53
6.25M
  if (radix == 16 || 
radix == 366.23M
) {
54
13.9k
    r = cdigit - '0';
55
13.9k
    if (r <= 9)
56
12.5k
      return r;
57
1.40k
58
1.40k
    r = cdigit - 'A';
59
1.40k
    if (r <= radix - 11U)
60
569
      return r + 10;
61
837
62
837
    r = cdigit - 'a';
63
837
    if (r <= radix - 11U)
64
837
      return r + 10;
65
0
66
0
    radix = 10;
67
0
  }
68
6.25M
69
6.25M
  r = cdigit - '0';
70
6.23M
  if (r < radix)
71
6.23M
    return r;
72
0
73
0
  return -1U;
74
0
}
75
76
77
20.9M
void APInt::initSlowCase(uint64_t val, bool isSigned) {
78
20.9M
  U.pVal = getClearedMemory(getNumWords());
79
20.9M
  U.pVal[0] = val;
80
20.9M
  if (isSigned && 
int64_t(val) < 05.38M
)
81
8.76M
    
for (unsigned i = 1; 4.37M
i < getNumWords();
++i4.38M
)
82
4.38M
      U.pVal[i] = WORDTYPE_MAX;
83
20.9M
  clearUnusedBits();
84
20.9M
}
85
86
53.6M
void APInt::initSlowCase(const APInt& that) {
87
53.6M
  U.pVal = getMemory(getNumWords());
88
53.6M
  memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE);
89
53.6M
}
90
91
178k
void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
92
178k
  assert(BitWidth && "Bitwidth too small");
93
178k
  assert(bigVal.data() && "Null pointer detected!");
94
178k
  if (isSingleWord())
95
160k
    U.VAL = bigVal[0];
96
17.8k
  else {
97
17.8k
    // Get memory, cleared to 0
98
17.8k
    U.pVal = getClearedMemory(getNumWords());
99
17.8k
    // Calculate the number of words to copy
100
17.8k
    unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
101
17.8k
    // Copy the words from bigVal to pVal
102
17.8k
    memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE);
103
17.8k
  }
104
178k
  // Make sure unused high bits are cleared
105
178k
  clearUnusedBits();
106
178k
}
107
108
APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
109
95.6k
  : BitWidth(numBits) {
110
95.6k
  initFromArray(bigVal);
111
95.6k
}
112
113
APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
114
82.9k
  : BitWidth(numBits) {
115
82.9k
  initFromArray(makeArrayRef(bigVal, numWords));
116
82.9k
}
117
118
APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
119
4.27M
  : BitWidth(numbits) {
120
4.27M
  assert(BitWidth && "Bitwidth too small");
121
4.27M
  fromString(numbits, Str, radix);
122
4.27M
}
123
124
6.24M
void APInt::reallocate(unsigned NewBitWidth) {
125
6.24M
  // If the number of words is the same we can just change the width and stop.
126
6.24M
  if (getNumWords() == getNumWords(NewBitWidth)) {
127
418k
    BitWidth = NewBitWidth;
128
418k
    return;
129
418k
  }
130
5.82M
131
5.82M
  // If we have an allocation, delete it.
132
5.82M
  if (!isSingleWord())
133
8.12k
    delete [] U.pVal;
134
5.82M
135
5.82M
  // Update BitWidth.
136
5.82M
  BitWidth = NewBitWidth;
137
5.82M
138
5.82M
  // If we are supposed to have an allocation, create it.
139
5.82M
  if (!isSingleWord())
140
5.82M
    U.pVal = getMemory(getNumWords());
141
5.82M
}
142
143
696k
void APInt::AssignSlowCase(const APInt& RHS) {
144
696k
  // Don't do anything for X = X
145
696k
  if (this == &RHS)
146
27.1k
    return;
147
669k
148
669k
  // Adjust the bit width and handle allocations as necessary.
149
669k
  reallocate(RHS.getBitWidth());
150
669k
151
669k
  // Copy the data.
152
669k
  if (isSingleWord())
153
8.12k
    U.VAL = RHS.U.VAL;
154
661k
  else
155
661k
    memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE);
156
669k
}
157
158
/// This method 'profiles' an APInt for use with FoldingSet.
159
6.50M
void APInt::Profile(FoldingSetNodeID& ID) const {
160
6.50M
  ID.AddInteger(BitWidth);
161
6.50M
162
6.50M
  if (isSingleWord()) {
163
6.50M
    ID.AddInteger(U.VAL);
164
6.50M
    return;
165
6.50M
  }
166
6
167
6
  unsigned NumWords = getNumWords();
168
18
  for (unsigned i = 0; i < NumWords; 
++i12
)
169
12
    ID.AddInteger(U.pVal[i]);
170
6
}
171
172
/// Prefix increment operator. Increments the APInt by one.
173
174M
APInt& APInt::operator++() {
174
174M
  if (isSingleWord())
175
169M
    ++U.VAL;
176
5.12M
  else
177
5.12M
    tcIncrement(U.pVal, getNumWords());
178
174M
  return clearUnusedBits();
179
174M
}
180
181
/// Prefix decrement operator. Decrements the APInt by one.
182
793k
APInt& APInt::operator--() {
183
793k
  if (isSingleWord())
184
793k
    --U.VAL;
185
222
  else
186
222
    tcDecrement(U.pVal, getNumWords());
187
793k
  return clearUnusedBits();
188
793k
}
189
190
/// Adds the RHS APint to this APInt.
191
/// @returns this, after addition of RHS.
192
/// Addition assignment operator.
193
423M
APInt& APInt::operator+=(const APInt& RHS) {
194
423M
  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
195
423M
  if (isSingleWord())
196
420M
    U.VAL += RHS.U.VAL;
197
2.42M
  else
198
2.42M
    tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords());
199
423M
  return clearUnusedBits();
200
423M
}
201
202
464M
APInt& APInt::operator+=(uint64_t RHS) {
203
464M
  if (isSingleWord())
204
454M
    U.VAL += RHS;
205
9.82M
  else
206
9.82M
    tcAddPart(U.pVal, RHS, getNumWords());
207
464M
  return clearUnusedBits();
208
464M
}
209
210
/// Subtracts the RHS APInt from this APInt
211
/// @returns this, after subtraction
212
/// Subtraction assignment operator.
213
126M
APInt& APInt::operator-=(const APInt& RHS) {
214
126M
  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
215
126M
  if (isSingleWord())
216
122M
    U.VAL -= RHS.U.VAL;
217
3.66M
  else
218
3.66M
    tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords());
219
126M
  return clearUnusedBits();
220
126M
}
221
222
173M
APInt& APInt::operator-=(uint64_t RHS) {
223
173M
  if (isSingleWord())
224
168M
    U.VAL -= RHS;
225
4.54M
  else
226
4.54M
    tcSubtractPart(U.pVal, RHS, getNumWords());
227
173M
  return clearUnusedBits();
228
173M
}
229
230
169M
APInt APInt::operator*(const APInt& RHS) const {
231
169M
  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
232
169M
  if (isSingleWord())
233
134M
    return APInt(BitWidth, U.VAL * RHS.U.VAL);
234
34.8M
235
34.8M
  APInt Result(getMemory(getNumWords()), getBitWidth());
236
34.8M
237
34.8M
  tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords());
238
34.8M
239
34.8M
  Result.clearUnusedBits();
240
34.8M
  return Result;
241
34.8M
}
242
243
2.83M
void APInt::AndAssignSlowCase(const APInt& RHS) {
244
2.83M
  tcAnd(U.pVal, RHS.U.pVal, getNumWords());
245
2.83M
}
246
247
7.53M
void APInt::OrAssignSlowCase(const APInt& RHS) {
248
7.53M
  tcOr(U.pVal, RHS.U.pVal, getNumWords());
249
7.53M
}
250
251
204k
void APInt::XorAssignSlowCase(const APInt& RHS) {
252
204k
  tcXor(U.pVal, RHS.U.pVal, getNumWords());
253
204k
}
254
255
24.4M
APInt& APInt::operator*=(const APInt& RHS) {
256
24.4M
  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
257
24.4M
  *this = *this * RHS;
258
24.4M
  return *this;
259
24.4M
}
260
261
47.3M
APInt& APInt::operator*=(uint64_t RHS) {
262
47.3M
  if (isSingleWord()) {
263
47.2M
    U.VAL *= RHS;
264
47.2M
  } else {
265
37.5k
    unsigned NumWords = getNumWords();
266
37.5k
    tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false);
267
37.5k
  }
268
47.3M
  return clearUnusedBits();
269
47.3M
}
270
271
47.9M
bool APInt::EqualSlowCase(const APInt& RHS) const {
272
47.9M
  return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal);
273
47.9M
}
274
275
800M
int APInt::compare(const APInt& RHS) const {
276
800M
  assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
277
800M
  if (isSingleWord())
278
769M
    return U.VAL < RHS.U.VAL ? 
-1338M
:
U.VAL > RHS.U.VAL430M
;
279
31.1M
280
31.1M
  return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
281
31.1M
}
282
283
243M
int APInt::compareSigned(const APInt& RHS) const {
284
243M
  assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
285
243M
  if (isSingleWord()) {
286
230M
    int64_t lhsSext = SignExtend64(U.VAL, BitWidth);
287
230M
    int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth);
288
230M
    return lhsSext < rhsSext ? 
-1184M
:
lhsSext > rhsSext45.8M
;
289
230M
  }
290
13.3M
291
13.3M
  bool lhsNeg = isNegative();
292
13.3M
  bool rhsNeg = RHS.isNegative();
293
13.3M
294
13.3M
  // If the sign bits don't match, then (LHS < RHS) if LHS is negative
295
13.3M
  if (lhsNeg != rhsNeg)
296
5.84M
    return lhsNeg ? 
-11.64M
:
14.20M
;
297
7.50M
298
7.50M
  // Otherwise we can just use an unsigned comparison, because even negative
299
7.50M
  // numbers compare correctly this way if both have the same signed-ness.
300
7.50M
  return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
301
7.50M
}
302
303
1.84M
void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
304
1.84M
  unsigned loWord = whichWord(loBit);
305
1.84M
  unsigned hiWord = whichWord(hiBit);
306
1.84M
307
1.84M
  // Create an initial mask for the low word with zeros below loBit.
308
1.84M
  uint64_t loMask = WORDTYPE_MAX << whichBit(loBit);
309
1.84M
310
1.84M
  // If hiBit is not aligned, we need a high mask.
311
1.84M
  unsigned hiShiftAmt = whichBit(hiBit);
312
1.84M
  if (hiShiftAmt != 0) {
313
78.4k
    // Create a high mask with zeros above hiBit.
314
78.4k
    uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
315
78.4k
    // If loWord and hiWord are equal, then we combine the masks. Otherwise,
316
78.4k
    // set the bits in hiWord.
317
78.4k
    if (hiWord == loWord)
318
19.8k
      loMask &= hiMask;
319
58.6k
    else
320
58.6k
      U.pVal[hiWord] |= hiMask;
321
78.4k
  }
322
1.84M
  // Apply the mask to the low word.
323
1.84M
  U.pVal[loWord] |= loMask;
324
1.84M
325
1.84M
  // Fill any words between loWord and hiWord with all ones.
326
2.07M
  for (unsigned word = loWord + 1; word < hiWord; 
++word223k
)
327
223k
    U.pVal[word] = WORDTYPE_MAX;
328
1.84M
}
329
330
/// Toggle every bit to its opposite value.
331
5.66M
void APInt::flipAllBitsSlowCase() {
332
5.66M
  tcComplement(U.pVal, getNumWords());
333
5.66M
  clearUnusedBits();
334
5.66M
}
335
336
/// Toggle a given bit to its opposite value whose position is given
337
/// as "bitPosition".
338
/// Toggles a given bit to its opposite value.
339
0
void APInt::flipBit(unsigned bitPosition) {
340
0
  assert(bitPosition < BitWidth && "Out of the bit-width range!");
341
0
  if ((*this)[bitPosition]) clearBit(bitPosition);
342
0
  else setBit(bitPosition);
343
0
}
344
345
2.26M
void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
346
2.26M
  unsigned subBitWidth = subBits.getBitWidth();
347
2.26M
  assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
348
2.26M
         "Illegal bit insertion");
349
2.26M
350
2.26M
  // Insertion is a direct copy.
351
2.26M
  if (subBitWidth == BitWidth) {
352
6.19k
    *this = subBits;
353
6.19k
    return;
354
6.19k
  }
355
2.25M
356
2.25M
  // Single word result can be done as a direct bitmask.
357
2.25M
  if (isSingleWord()) {
358
1.35M
    uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
359
1.35M
    U.VAL &= ~(mask << bitPosition);
360
1.35M
    U.VAL |= (subBits.U.VAL << bitPosition);
361
1.35M
    return;
362
1.35M
  }
363
903k
364
903k
  unsigned loBit = whichBit(bitPosition);
365
903k
  unsigned loWord = whichWord(bitPosition);
366
903k
  unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);
367
903k
368
903k
  // Insertion within a single word can be done as a direct bitmask.
369
903k
  if (loWord == hi1Word) {
370
903k
    uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
371
903k
    U.pVal[loWord] &= ~(mask << loBit);
372
903k
    U.pVal[loWord] |= (subBits.U.VAL << loBit);
373
903k
    return;
374
903k
  }
375
74
376
74
  // Insert on word boundaries.
377
74
  if (loBit == 0) {
378
64
    // Direct copy whole words.
379
64
    unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
380
64
    memcpy(U.pVal + loWord, subBits.getRawData(),
381
64
           numWholeSubWords * APINT_WORD_SIZE);
382
64
383
64
    // Mask+insert remaining bits.
384
64
    unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
385
64
    if (remainingBits != 0) {
386
3
      uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits);
387
3
      U.pVal[hi1Word] &= ~mask;
388
3
      U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
389
3
    }
390
64
    return;
391
64
  }
392
10
393
10
  // General case - set/clear individual bits in dst based on src.
394
10
  // TODO - there is scope for optimization here, but at the moment this code
395
10
  // path is barely used so prefer readability over performance.
396
554
  
for (unsigned i = 0; 10
i != subBitWidth;
++i544
) {
397
544
    if (subBits[i])
398
202
      setBit(bitPosition + i);
399
342
    else
400
342
      clearBit(bitPosition + i);
401
544
  }
402
10
}
403
404
4.18M
APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
405
4.18M
  assert(numBits > 0 && "Can't extract zero bits");
406
4.18M
  assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
407
4.18M
         "Illegal bit extraction");
408
4.18M
409
4.18M
  if (isSingleWord())
410
2.98M
    return APInt(numBits, U.VAL >> bitPosition);
411
1.20M
412
1.20M
  unsigned loBit = whichBit(bitPosition);
413
1.20M
  unsigned loWord = whichWord(bitPosition);
414
1.20M
  unsigned hiWord = whichWord(bitPosition + numBits - 1);
415
1.20M
416
1.20M
  // Single word result extracting bits from a single word source.
417
1.20M
  if (loWord == hiWord)
418
1.20M
    return APInt(numBits, U.pVal[loWord] >> loBit);
419
120
420
120
  // Extracting bits that start on a source word boundary can be done
421
120
  // as a fast memory copy.
422
120
  if (loBit == 0)
423
106
    return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord));
424
14
425
14
  // General case - shift + copy source words directly into place.
426
14
  APInt Result(numBits, 0);
427
14
  unsigned NumSrcWords = getNumWords();
428
14
  unsigned NumDstWords = Result.getNumWords();
429
14
430
14
  uint64_t *DestPtr = Result.isSingleWord() ? 
&Result.U.VAL11
:
Result.U.pVal3
;
431
32
  for (unsigned word = 0; word < NumDstWords; 
++word18
) {
432
18
    uint64_t w0 = U.pVal[loWord + word];
433
18
    uint64_t w1 =
434
18
        (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 
00
;
435
18
    DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
436
18
  }
437
14
438
14
  return Result.clearUnusedBits();
439
14
}
440
441
80
unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
442
80
  assert(!str.empty() && "Invalid string length");
443
80
  assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
444
80
          radix == 36) &&
445
80
         "Radix should be 2, 8, 10, 16, or 36!");
446
80
447
80
  size_t slen = str.size();
448
80
449
80
  // Each computation below needs to know if it's negative.
450
80
  StringRef::iterator p = str.begin();
451
80
  unsigned isNegative = *p == '-';
452
80
  if (*p == '-' || 
*p == '+'45
) {
453
55
    p++;
454
55
    slen--;
455
55
    assert(slen && "String is only a sign, needs a value.");
456
55
  }
457
80
458
80
  // For radixes of power-of-two values, the bits required is accurately and
459
80
  // easily computed
460
80
  if (radix == 2)
461
15
    return slen + isNegative;
462
65
  if (radix == 8)
463
15
    return slen * 3 + isNegative;
464
50
  if (radix == 16)
465
19
    return slen * 4 + isNegative;
466
31
467
31
  // FIXME: base 36
468
31
469
31
  // This is grossly inefficient but accurate. We could probably do something
470
31
  // with a computation of roughly slen*64/20 and then adjust by the value of
471
31
  // the first few digits. But, I'm not sure how accurate that could be.
472
31
473
31
  // Compute a sufficient number of bits that is always large enough but might
474
31
  // be too large. This avoids the assertion in the constructor. This
475
31
  // calculation doesn't work appropriately for the numbers 0-9, so just use 4
476
31
  // bits in that case.
477
31
  unsigned sufficient
478
31
    = radix == 10? (slen == 1 ? 
411
:
slen * 64/1820
)
479
31
                 : 
(slen == 1 0
?
70
:
slen * 16/30
);
480
31
481
31
  // Convert to the actual binary value.
482
31
  APInt tmp(sufficient, StringRef(p, slen), radix);
483
31
484
31
  // Compute how many bits are required. If the log is infinite, assume we need
485
31
  // just bit. If the log is exact and value is negative, then the value is
486
31
  // MinSignedValue with (log + 1) bits.
487
31
  unsigned log = tmp.logBase2();
488
31
  if (log == (unsigned)-1) {
489
3
    return isNegative + 1;
490
28
  } else if (isNegative && 
tmp.isPowerOf2()19
) {
491
11
    return isNegative + log;
492
17
  } else {
493
17
    return isNegative + log + 1;
494
17
  }
495
31
}
496
497
244M
hash_code llvm::hash_value(const APInt &Arg) {
498
244M
  if (Arg.isSingleWord())
499
240M
    return hash_combine(Arg.U.VAL);
500
4.11M
501
4.11M
  return hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords());
502
4.11M
}
503
504
184k
bool APInt::isSplat(unsigned SplatSizeInBits) const {
505
184k
  assert(getBitWidth() % SplatSizeInBits == 0 &&
506
184k
         "SplatSizeInBits must divide width!");
507
184k
  // We can check that all parts of an integer are equal by making use of a
508
184k
  // little trick: rotate and check if it's still the same value.
509
184k
  return *this == rotl(SplatSizeInBits);
510
184k
}
511
512
/// This function returns the high "numBits" bits of this APInt.
513
1.07M
APInt APInt::getHiBits(unsigned numBits) const {
514
1.07M
  return this->lshr(BitWidth - numBits);
515
1.07M
}
516
517
/// This function returns the low "numBits" bits of this APInt.
518
42.7M
APInt APInt::getLoBits(unsigned numBits) const {
519
42.7M
  APInt Result(getLowBitsSet(BitWidth, numBits));
520
42.7M
  Result &= *this;
521
42.7M
  return Result;
522
42.7M
}
523
524
/// Return a value containing V broadcasted over NewLen bits.
525
6.50k
APInt APInt::getSplat(unsigned NewLen, const APInt &V) {
526
6.50k
  assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!");
527
6.50k
528
6.50k
  APInt Val = V.zextOrSelf(NewLen);
529
18.9k
  for (unsigned I = V.getBitWidth(); I < NewLen; 
I <<= 112.4k
)
530
12.4k
    Val |= Val << I;
531
6.50k
532
6.50k
  return Val;
533
6.50k
}
534
535
54.6M
unsigned APInt::countLeadingZerosSlowCase() const {
536
54.6M
  unsigned Count = 0;
537
270M
  for (int i = getNumWords()-1; i >= 0; 
--i215M
) {
538
244M
    uint64_t V = U.pVal[i];
539
244M
    if (V == 0)
540
215M
      Count += APINT_BITS_PER_WORD;
541
29.5M
    else {
542
29.5M
      Count += llvm::countLeadingZeros(V);
543
29.5M
      break;
544
29.5M
    }
545
244M
  }
546
54.6M
  // Adjust for unused bits in the most significant word (they are zero).
547
54.6M
  unsigned Mod = BitWidth % APINT_BITS_PER_WORD;
548
54.6M
  Count -= Mod > 0 ? 
APINT_BITS_PER_WORD - Mod2.25M
:
052.4M
;
549
54.6M
  return Count;
550
54.6M
}
551
552
418k
unsigned APInt::countLeadingOnesSlowCase() const {
553
418k
  unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
554
418k
  unsigned shift;
555
418k
  if (!highWordBits) {
556
417k
    highWordBits = APINT_BITS_PER_WORD;
557
417k
    shift = 0;
558
417k
  } else {
559
1.72k
    shift = APINT_BITS_PER_WORD - highWordBits;
560
1.72k
  }
561
418k
  int i = getNumWords() - 1;
562
418k
  unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift);
563
418k
  if (Count == highWordBits) {
564
418k
    for (i--; i >= 0; 
--i11.3k
) {
565
407k
      if (U.pVal[i] == WORDTYPE_MAX)
566
11.3k
        Count += APINT_BITS_PER_WORD;
567
395k
      else {
568
395k
        Count += llvm::countLeadingOnes(U.pVal[i]);
569
395k
        break;
570
395k
      }
571
407k
    }
572
406k
  }
573
418k
  return Count;
574
418k
}
575
576
107k
unsigned APInt::countTrailingZerosSlowCase() const {
577
107k
  unsigned Count = 0;
578
107k
  unsigned i = 0;
579
140k
  for (; i < getNumWords() && 
U.pVal[i] == 0125k
;
++i33.2k
)
580
33.2k
    Count += APINT_BITS_PER_WORD;
581
107k
  if (i < getNumWords())
582
92.5k
    Count += llvm::countTrailingZeros(U.pVal[i]);
583
107k
  return std::min(Count, BitWidth);
584
107k
}
585
586
4.10M
unsigned APInt::countTrailingOnesSlowCase() const {
587
4.10M
  unsigned Count = 0;
588
4.10M
  unsigned i = 0;
589
6.56M
  for (; i < getNumWords() && 
U.pVal[i] == WORDTYPE_MAX5.62M
;
++i2.45M
)
590
2.45M
    Count += APINT_BITS_PER_WORD;
591
4.10M
  if (i < getNumWords())
592
3.16M
    Count += llvm::countTrailingOnes(U.pVal[i]);
593
4.10M
  assert(Count <= BitWidth);
594
4.10M
  return Count;
595
4.10M
}
596
597
296k
unsigned APInt::countPopulationSlowCase() const {
598
296k
  unsigned Count = 0;
599
903k
  for (unsigned i = 0; i < getNumWords(); 
++i606k
)
600
606k
    Count += llvm::countPopulation(U.pVal[i]);
601
296k
  return Count;
602
296k
}
603
604
1.00M
bool APInt::intersectsSlowCase(const APInt &RHS) const {
605
3.03M
  for (unsigned i = 0, e = getNumWords(); i != e; 
++i2.02M
)
606
2.02M
    if ((U.pVal[i] & RHS.U.pVal[i]) != 0)
607
1.45k
      return true;
608
1.00M
609
1.00M
  
return false1.00M
;
610
1.00M
}
611
612
300k
bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
613
389k
  for (unsigned i = 0, e = getNumWords(); i != e; 
++i89.2k
)
614
372k
    if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0)
615
283k
      return false;
616
300k
617
300k
  
return true16.8k
;
618
300k
}
619
620
75.9k
APInt APInt::byteSwap() const {
621
75.9k
  assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
622
75.9k
  if (BitWidth == 16)
623
10.6k
    return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL)));
624
65.3k
  if (BitWidth == 32)
625
56.5k
    return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL)));
626
8.82k
  if (BitWidth == 48) {
627
0
    unsigned Tmp1 = unsigned(U.VAL >> 16);
628
0
    Tmp1 = ByteSwap_32(Tmp1);
629
0
    uint16_t Tmp2 = uint16_t(U.VAL);
630
0
    Tmp2 = ByteSwap_16(Tmp2);
631
0
    return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
632
0
  }
633
8.82k
  if (BitWidth == 64)
634
8.82k
    return APInt(BitWidth, ByteSwap_64(U.VAL));
635
1
636
1
  APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
637
3
  for (unsigned I = 0, N = getNumWords(); I != N; 
++I2
)
638
2
    Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]);
639
1
  if (Result.BitWidth != BitWidth) {
640
1
    Result.lshrInPlace(Result.BitWidth - BitWidth);
641
1
    Result.BitWidth = BitWidth;
642
1
  }
643
1
  return Result;
644
1
}
645
646
11.1k
APInt APInt::reverseBits() const {
647
11.1k
  switch (BitWidth) {
648
11.1k
  case 64:
649
2.26k
    return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL));
650
11.1k
  case 32:
651
4.79k
    return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL));
652
11.1k
  case 16:
653
286
    return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL));
654
11.1k
  case 8:
655
320
    return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL));
656
11.1k
  default:
657
3.52k
    break;
658
3.52k
  }
659
3.52k
660
3.52k
  APInt Val(*this);
661
3.52k
  APInt Reversed(BitWidth, 0);
662
3.52k
  unsigned S = BitWidth;
663
3.52k
664
1.16M
  for (; Val != 0; 
Val.lshrInPlace(1)1.15M
) {
665
1.15M
    Reversed <<= 1;
666
1.15M
    Reversed |= Val[0];
667
1.15M
    --S;
668
1.15M
  }
669
3.52k
670
3.52k
  Reversed <<= S;
671
3.52k
  return Reversed;
672
3.52k
}
673
674
5.12k
APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
675
5.12k
  // Fast-path a common case.
676
5.12k
  if (A == B) 
return A1.62k
;
677
3.50k
678
3.50k
  // Corner cases: if either operand is zero, the other is the gcd.
679
3.50k
  if (!A) 
return B1.67k
;
680
1.83k
  if (!B) 
return A392
;
681
1.43k
682
1.43k
  // Count common powers of 2 and remove all other powers of 2.
683
1.43k
  unsigned Pow2;
684
1.43k
  {
685
1.43k
    unsigned Pow2_A = A.countTrailingZeros();
686
1.43k
    unsigned Pow2_B = B.countTrailingZeros();
687
1.43k
    if (Pow2_A > Pow2_B) {
688
189
      A.lshrInPlace(Pow2_A - Pow2_B);
689
189
      Pow2 = Pow2_B;
690
1.24k
    } else if (Pow2_B > Pow2_A) {
691
929
      B.lshrInPlace(Pow2_B - Pow2_A);
692
929
      Pow2 = Pow2_A;
693
929
    } else {
694
320
      Pow2 = Pow2_A;
695
320
    }
696
1.43k
  }
697
1.43k
698
1.43k
  // Both operands are odd multiples of 2^Pow_2:
699
1.43k
  //
700
1.43k
  //   gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
701
1.43k
  //
702
1.43k
  // This is a modified version of Stein's algorithm, taking advantage of
703
1.43k
  // efficient countTrailingZeros().
704
5.40k
  while (A != B) {
705
3.96k
    if (A.ugt(B)) {
706
583
      A -= B;
707
583
      A.lshrInPlace(A.countTrailingZeros() - Pow2);
708
3.38k
    } else {
709
3.38k
      B -= A;
710
3.38k
      B.lshrInPlace(B.countTrailingZeros() - Pow2);
711
3.38k
    }
712
3.96k
  }
713
1.43k
714
1.43k
  return A;
715
1.43k
}
716
717
40
APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
718
40
  uint64_t I = bit_cast<uint64_t>(Double);
719
40
720
40
  // Get the sign bit from the highest order bit
721
40
  bool isNeg = I >> 63;
722
40
723
40
  // Get the 11-bit exponent and adjust for the 1023 bit bias
724
40
  int64_t exp = ((I >> 52) & 0x7ff) - 1023;
725
40
726
40
  // If the exponent is negative, the value is < 0 so just return 0.
727
40
  if (exp < 0)
728
16
    return APInt(width, 0u);
729
24
730
24
  // Extract the mantissa by clearing the top 12 bits (sign + exponent).
731
24
  uint64_t mantissa = (I & (~0ULL >> 12)) | 1ULL << 52;
732
24
733
24
  // If the exponent doesn't shift all bits out of the mantissa
734
24
  if (exp < 52)
735
24
    return isNeg ? 
-APInt(width, mantissa >> (52 - exp))8
:
736
24
                    
APInt(width, mantissa >> (52 - exp))16
;
737
0
738
0
  // If the client didn't provide enough bits for us to shift the mantissa into
739
0
  // then the result is undefined, just return 0
740
0
  if (width <= exp - 52)
741
0
    return APInt(width, 0);
742
0
743
0
  // Otherwise, we have to shift the mantissa bits up to the right location
744
0
  APInt Tmp(width, mantissa);
745
0
  Tmp <<= (unsigned)exp - 52;
746
0
  return isNeg ? -Tmp : Tmp;
747
0
}
748
749
/// This function converts this APInt to a double.
750
/// The layout for double is as following (IEEE Standard 754):
751
///  --------------------------------------
752
/// |  Sign    Exponent    Fraction    Bias |
753
/// |-------------------------------------- |
754
/// |  1[63]   11[62-52]   52[51-00]   1023 |
755
///  --------------------------------------
756
50
double APInt::roundToDouble(bool isSigned) const {
757
50
758
50
  // Handle the simple case where the value is contained in one uint64_t.
759
50
  // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
760
50
  if (isSingleWord() || 
getActiveBits() <= APINT_BITS_PER_WORD0
) {
761
50
    if (isSigned) {
762
25
      int64_t sext = SignExtend64(getWord(0), BitWidth);
763
25
      return double(sext);
764
25
    } else
765
25
      return double(getWord(0));
766
0
  }
767
0
768
0
  // Determine if the value is negative.
769
0
  bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
770
0
771
0
  // Construct the absolute value if we're negative.
772
0
  APInt Tmp(isNeg ? -(*this) : (*this));
773
0
774
0
  // Figure out how many bits we're using.
775
0
  unsigned n = Tmp.getActiveBits();
776
0
777
0
  // The exponent (without bias normalization) is just the number of bits
778
0
  // we are using. Note that the sign bit is gone since we constructed the
779
0
  // absolute value.
780
0
  uint64_t exp = n;
781
0
782
0
  // Return infinity for exponent overflow
783
0
  if (exp > 1023) {
784
0
    if (!isSigned || !isNeg)
785
0
      return std::numeric_limits<double>::infinity();
786
0
    else
787
0
      return -std::numeric_limits<double>::infinity();
788
0
  }
789
0
  exp += 1023; // Increment for 1023 bias
790
0
791
0
  // Number of bits in mantissa is 52. To obtain the mantissa value, we must
792
0
  // extract the high 52 bits from the correct words in pVal.
793
0
  uint64_t mantissa;
794
0
  unsigned hiWord = whichWord(n-1);
795
0
  if (hiWord == 0) {
796
0
    mantissa = Tmp.U.pVal[0];
797
0
    if (n > 52)
798
0
      mantissa >>= n - 52; // shift down, we want the top 52 bits.
799
0
  } else {
800
0
    assert(hiWord > 0 && "huh?");
801
0
    uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
802
0
    uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
803
0
    mantissa = hibits | lobits;
804
0
  }
805
0
806
0
  // The leading bit of mantissa is implicit, so get rid of it.
807
0
  uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
808
0
  uint64_t I = sign | (exp << 52) | mantissa;
809
0
  return bit_cast<double>(I);
810
0
}
811
812
// Truncate to new width.
813
106M
APInt APInt::trunc(unsigned width) const {
814
106M
  assert(width < BitWidth && "Invalid APInt Truncate request");
815
106M
  assert(width && "Can't truncate to 0 bits");
816
106M
817
106M
  if (width <= APINT_BITS_PER_WORD)
818
106M
    return APInt(width, getRawData()[0]);
819
246k
820
246k
  APInt Result(getMemory(getNumWords(width)), width);
821
246k
822
246k
  // Copy full words.
823
246k
  unsigned i;
824
668k
  for (i = 0; i != width / APINT_BITS_PER_WORD; 
i++421k
)
825
421k
    Result.U.pVal[i] = U.pVal[i];
826
246k
827
246k
  // Truncate and copy any partial word.
828
246k
  unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
829
246k
  if (bits != 0)
830
104k
    Result.U.pVal[i] = U.pVal[i] << bits >> bits;
831
246k
832
246k
  return Result;
833
246k
}
834
835
// Sign extend to a new width.
836
94.4M
APInt APInt::sext(unsigned Width) const {
837
94.4M
  assert(Width > BitWidth && "Invalid APInt SignExtend request");
838
94.4M
839
94.4M
  if (Width <= APINT_BITS_PER_WORD)
840
42.1M
    return APInt(Width, SignExtend64(U.VAL, BitWidth));
841
52.3M
842
52.3M
  APInt Result(getMemory(getNumWords(Width)), Width);
843
52.3M
844
52.3M
  // Copy words.
845
52.3M
  std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
846
52.3M
847
52.3M
  // Sign extend the last word since there may be unused bits in the input.
848
52.3M
  Result.U.pVal[getNumWords() - 1] =
849
52.3M
      SignExtend64(Result.U.pVal[getNumWords() - 1],
850
52.3M
                   ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
851
52.3M
852
52.3M
  // Fill with sign bits.
853
52.3M
  std::memset(Result.U.pVal + getNumWords(), isNegative() ? 
-14.24M
:
048.0M
,
854
52.3M
              (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
855
52.3M
  Result.clearUnusedBits();
856
52.3M
  return Result;
857
52.3M
}
858
859
//  Zero extend to a new width.
860
90.5M
APInt APInt::zext(unsigned width) const {
861
90.5M
  assert(width > BitWidth && "Invalid APInt ZeroExtend request");
862
90.5M
863
90.5M
  if (width <= APINT_BITS_PER_WORD)
864
74.6M
    return APInt(width, U.VAL);
865
15.8M
866
15.8M
  APInt Result(getMemory(getNumWords(width)), width);
867
15.8M
868
15.8M
  // Copy words.
869
15.8M
  std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
870
15.8M
871
15.8M
  // Zero remaining words.
872
15.8M
  std::memset(Result.U.pVal + getNumWords(), 0,
873
15.8M
              (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
874
15.8M
875
15.8M
  return Result;
876
15.8M
}
877
878
108M
APInt APInt::zextOrTrunc(unsigned width) const {
879
108M
  if (BitWidth < width)
880
8.76M
    return zext(width);
881
100M
  if (BitWidth > width)
882
54.5M
    return trunc(width);
883
45.6M
  return *this;
884
45.6M
}
885
886
164M
APInt APInt::sextOrTrunc(unsigned width) const {
887
164M
  if (BitWidth < width)
888
24.9M
    return sext(width);
889
139M
  if (BitWidth > width)
890
755k
    return trunc(width);
891
138M
  return *this;
892
138M
}
893
894
17.4M
APInt APInt::zextOrSelf(unsigned width) const {
895
17.4M
  if (BitWidth < width)
896
11.3M
    return zext(width);
897
6.03M
  return *this;
898
6.03M
}
899
900
55.6M
APInt APInt::sextOrSelf(unsigned width) const {
901
55.6M
  if (BitWidth < width)
902
317k
    return sext(width);
903
55.2M
  return *this;
904
55.2M
}
905
906
/// Arithmetic right-shift this APInt by shiftAmt.
907
/// Arithmetic right-shift function.
908
490k
void APInt::ashrInPlace(const APInt &shiftAmt) {
909
490k
  ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
910
490k
}
911
912
/// Arithmetic right-shift this APInt by shiftAmt.
913
/// Arithmetic right-shift function.
914
242k
void APInt::ashrSlowCase(unsigned ShiftAmt) {
915
242k
  // Don't bother performing a no-op shift.
916
242k
  if (!ShiftAmt)
917
1.02k
    return;
918
241k
919
241k
  // Save the original sign bit for later.
920
241k
  bool Negative = isNegative();
921
241k
922
241k
  // WordShift is the inter-part shift; BitShift is intra-part shift.
923
241k
  unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD;
924
241k
  unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD;
925
241k
926
241k
  unsigned WordsToMove = getNumWords() - WordShift;
927
241k
  if (WordsToMove != 0) {
928
241k
    // Sign extend the last word to fill in the unused bits.
929
241k
    U.pVal[getNumWords() - 1] = SignExtend64(
930
241k
        U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
931
241k
932
241k
    // Fastpath for moving by whole words.
933
241k
    if (BitShift == 0) {
934
988
      std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE);
935
240k
    } else {
936
240k
      // Move the words containing significant bits.
937
420k
      for (unsigned i = 0; i != WordsToMove - 1; 
++i179k
)
938
179k
        U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) |
939
179k
                    (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift));
940
240k
941
240k
      // Handle the last word which has no high bits to copy.
942
240k
      U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift;
943
240k
      // Sign extend one more time.
944
240k
      U.pVal[WordsToMove - 1] =
945
240k
          SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift);
946
240k
    }
947
241k
  }
948
241k
949
241k
  // Fill in the remainder based on the original sign.
950
241k
  std::memset(U.pVal + WordsToMove, Negative ? 
-11.44k
:
0240k
,
951
241k
              WordShift * APINT_WORD_SIZE);
952
241k
  clearUnusedBits();
953
241k
}
954
955
/// Logical right-shift this APInt by shiftAmt.
956
/// Logical right-shift function.
957
504k
void APInt::lshrInPlace(const APInt &shiftAmt) {
958
504k
  lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
959
504k
}
960
961
/// Logical right-shift this APInt by shiftAmt.
962
/// Logical right-shift function.
963
2.83M
void APInt::lshrSlowCase(unsigned ShiftAmt) {
964
2.83M
  tcShiftRight(U.pVal, getNumWords(), ShiftAmt);
965
2.83M
}
966
967
/// Left-shift this APInt by shiftAmt.
968
/// Left-shift function.
969
924k
APInt &APInt::operator<<=(const APInt &shiftAmt) {
970
924k
  // It's undefined behavior in C to shift by BitWidth or greater.
971
924k
  *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth);
972
924k
  return *this;
973
924k
}
974
975
8.90M
void APInt::shlSlowCase(unsigned ShiftAmt) {
976
8.90M
  tcShiftLeft(U.pVal, getNumWords(), ShiftAmt);
977
8.90M
  clearUnusedBits();
978
8.90M
}
979
980
// Calculate the rotate amount modulo the bit width.
981
48
static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
982
48
  unsigned rotBitWidth = rotateAmt.getBitWidth();
983
48
  APInt rot = rotateAmt;
984
48
  if (rotBitWidth < BitWidth) {
985
14
    // Extend the rotate APInt, so that the urem doesn't divide by 0.
986
14
    // e.g. APInt(1, 32) would give APInt(1, 0).
987
14
    rot = rotateAmt.zext(BitWidth);
988
14
  }
989
48
  rot = rot.urem(APInt(rot.getBitWidth(), BitWidth));
990
48
  return rot.getLimitedValue(BitWidth);
991
48
}
992
993
32
APInt APInt::rotl(const APInt &rotateAmt) const {
994
32
  return rotl(rotateModulo(BitWidth, rotateAmt));
995
32
}
996
997
185k
APInt APInt::rotl(unsigned rotateAmt) const {
998
185k
  rotateAmt %= BitWidth;
999
185k
  if (rotateAmt == 0)
1000
347
    return *this;
1001
184k
  return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
1002
184k
}
1003
1004
16
APInt APInt::rotr(const APInt &rotateAmt) const {
1005
16
  return rotr(rotateModulo(BitWidth, rotateAmt));
1006
16
}
1007
1008
487
APInt APInt::rotr(unsigned rotateAmt) const {
1009
487
  rotateAmt %= BitWidth;
1010
487
  if (rotateAmt == 0)
1011
97
    return *this;
1012
390
  return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
1013
390
}
1014
1015
// Square Root - this method computes and returns the square root of "this".
1016
// Three mechanisms are used for computation. For small values (<= 5 bits),
1017
// a table lookup is done. This gets some performance for common cases. For
1018
// values using less than 52 bits, the value is converted to double and then
1019
// the libc sqrt function is called. The result is rounded and then converted
1020
// back to a uint64_t which is then used to construct the result. Finally,
1021
// the Babylonian method for computing square roots is used.
1022
288k
APInt APInt::sqrt() const {
1023
288k
1024
288k
  // Determine the magnitude of the value.
1025
288k
  unsigned magnitude = getActiveBits();
1026
288k
1027
288k
  // Use a fast table for some small values. This also gets rid of some
1028
288k
  // rounding errors in libc sqrt for small values.
1029
288k
  if (magnitude <= 5) {
1030
3.06k
    static const uint8_t results[32] = {
1031
3.06k
      /*     0 */ 0,
1032
3.06k
      /*  1- 2 */ 1, 1,
1033
3.06k
      /*  3- 6 */ 2, 2, 2, 2,
1034
3.06k
      /*  7-12 */ 3, 3, 3, 3, 3, 3,
1035
3.06k
      /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1036
3.06k
      /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1037
3.06k
      /*    31 */ 6
1038
3.06k
    };
1039
3.06k
    return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : 
U.pVal[0]0
) ]);
1040
3.06k
  }
1041
285k
1042
285k
  // If the magnitude of the value fits in less than 52 bits (the precision of
1043
285k
  // an IEEE double precision floating point value), then we can use the
1044
285k
  // libc sqrt function which will probably use a hardware sqrt computation.
1045
285k
  // This should be faster than the algorithm below.
1046
285k
  if (magnitude < 52) {
1047
285k
    return APInt(BitWidth,
1048
285k
                 uint64_t(::round(::sqrt(double(isSingleWord() ? 
U.VAL285k
1049
285k
                                                               : 
U.pVal[0]61
)))));
1050
285k
  }
1051
3
1052
3
  // Okay, all the short cuts are exhausted. We must compute it. The following
1053
3
  // is a classical Babylonian method for computing the square root. This code
1054
3
  // was adapted to APInt from a wikipedia article on such computations.
1055
3
  // See http://www.wikipedia.org/ and go to the page named
1056
3
  // Calculate_an_integer_square_root.
1057
3
  unsigned nbits = BitWidth, i = 4;
1058
3
  APInt testy(BitWidth, 16);
1059
3
  APInt x_old(BitWidth, 1);
1060
3
  APInt x_new(BitWidth, 0);
1061
3
  APInt two(BitWidth, 2);
1062
3
1063
3
  // Select a good starting value using binary logarithms.
1064
96
  for (;; i += 2, testy = testy.shl(2))
1065
99
    if (i >= nbits || this->ule(testy)) {
1066
3
      x_old = x_old.shl(i / 2);
1067
3
      break;
1068
3
    }
1069
3
1070
3
  // Use the Babylonian method to arrive at the integer square root:
1071
6
  for (;;) {
1072
6
    x_new = (this->udiv(x_old) + x_old).udiv(two);
1073
6
    if (x_old.ule(x_new))
1074
3
      break;
1075
3
    x_old = x_new;
1076
3
  }
1077
3
1078
3
  // Make sure we return the closest approximation
1079
3
  // NOTE: The rounding calculation below is correct. It will produce an
1080
3
  // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1081
3
  // determined to be a rounding issue with pari/gp as it begins to use a
1082
3
  // floating point representation after 192 bits. There are no discrepancies
1083
3
  // between this algorithm and pari/gp for bit widths < 192 bits.
1084
3
  APInt square(x_old * x_old);
1085
3
  APInt nextSquare((x_old + 1) * (x_old +1));
1086
3
  if (this->ult(square))
1087
0
    return x_old;
1088
3
  assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
1089
3
  APInt midpoint((nextSquare - square).udiv(two));
1090
3
  APInt offset(*this - square);
1091
3
  if (offset.ult(midpoint))
1092
0
    return x_old;
1093
3
  return x_old + 1;
1094
3
}
1095
1096
/// Computes the multiplicative inverse of this APInt for a given modulo. The
1097
/// iterative extended Euclidean algorithm is used to solve for this value,
1098
/// however we simplify it to speed up calculating only the inverse, and take
1099
/// advantage of div+rem calculations. We also use some tricks to avoid copying
1100
/// (potentially large) APInts around.
1101
/// WARNING: a value of '0' may be returned,
1102
///          signifying that no multiplicative inverse exists!
1103
161k
APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1104
161k
  assert(ult(modulo) && "This APInt must be smaller than the modulo");
1105
161k
1106
161k
  // Using the properties listed at the following web page (accessed 06/21/08):
1107
161k
  //   http://www.numbertheory.org/php/euclid.html
1108
161k
  // (especially the properties numbered 3, 4 and 9) it can be proved that
1109
161k
  // BitWidth bits suffice for all the computations in the algorithm implemented
1110
161k
  // below. More precisely, this number of bits suffice if the multiplicative
1111
161k
  // inverse exists, but may not suffice for the general extended Euclidean
1112
161k
  // algorithm.
1113
161k
1114
161k
  APInt r[2] = { modulo, *this };
1115
161k
  APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1116
161k
  APInt q(BitWidth, 0);
1117
161k
1118
161k
  unsigned i;
1119
1.33M
  for (i = 0; r[i^1] != 0; 
i ^= 11.16M
) {
1120
1.16M
    // An overview of the math without the confusing bit-flipping:
1121
1.16M
    // q = r[i-2] / r[i-1]
1122
1.16M
    // r[i] = r[i-2] % r[i-1]
1123
1.16M
    // t[i] = t[i-2] - t[i-1] * q
1124
1.16M
    udivrem(r[i], r[i^1], q, r[i]);
1125
1.16M
    t[i] -= t[i^1] * q;
1126
1.16M
  }
1127
161k
1128
161k
  // If this APInt and the modulo are not coprime, there is no multiplicative
1129
161k
  // inverse, so return 0. We check this by looking at the next-to-last
1130
161k
  // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1131
161k
  // algorithm.
1132
161k
  if (r[i] != 1)
1133
65.5k
    return APInt(BitWidth, 0);
1134
95.6k
1135
95.6k
  // The next-to-last t is the multiplicative inverse.  However, we are
1136
95.6k
  // interested in a positive inverse. Calculate a positive one from a negative
1137
95.6k
  // one if necessary. A simple addition of the modulo suffices because
1138
95.6k
  // abs(t[i]) is known to be less than *this/2 (see the link above).
1139
95.6k
  if (t[i].isNegative())
1140
35.1k
    t[i] += modulo;
1141
95.6k
1142
95.6k
  return std::move(t[i]);
1143
95.6k
}
1144
1145
/// Calculate the magic numbers required to implement a signed integer division
1146
/// by a constant as a sequence of multiplies, adds and shifts.  Requires that
1147
/// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
1148
/// Warren, Jr., chapter 10.
1149
3.80k
APInt::ms APInt::magic() const {
1150
3.80k
  const APInt& d = *this;
1151
3.80k
  unsigned p;
1152
3.80k
  APInt ad, anc, delta, q1, r1, q2, r2, t;
1153
3.80k
  APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1154
3.80k
  struct ms mag;
1155
3.80k
1156
3.80k
  ad = d.abs();
1157
3.80k
  t = signedMin + (d.lshr(d.getBitWidth() - 1));
1158
3.80k
  anc = t - 1 - t.urem(ad);   // absolute value of nc
1159
3.80k
  p = d.getBitWidth() - 1;    // initialize p
1160
3.80k
  q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
1161
3.80k
  r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
1162
3.80k
  q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
1163
3.80k
  r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
1164
14.1k
  do {
1165
14.1k
    p = p + 1;
1166
14.1k
    q1 = q1<<1;          // update q1 = 2p/abs(nc)
1167
14.1k
    r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
1168
14.1k
    if (r1.uge(anc)) {  // must be unsigned comparison
1169
759
      q1 = q1 + 1;
1170
759
      r1 = r1 - anc;
1171
759
    }
1172
14.1k
    q2 = q2<<1;          // update q2 = 2p/abs(d)
1173
14.1k
    r2 = r2<<1;          // update r2 = rem(2p/abs(d))
1174
14.1k
    if (r2.uge(ad)) {   // must be unsigned comparison
1175
5.02k
      q2 = q2 + 1;
1176
5.02k
      r2 = r2 - ad;
1177
5.02k
    }
1178
14.1k
    delta = ad - r2;
1179
14.1k
  } while (q1.ult(delta) || 
(3.80k
q1 == delta3.80k
&&
r1 == 0364
));
1180
3.80k
1181
3.80k
  mag.m = q2 + 1;
1182
3.80k
  if (d.isNegative()) 
mag.m = -mag.m206
; // resulting magic number
1183
3.80k
  mag.s = p - d.getBitWidth();          // resulting shift
1184
3.80k
  return mag;
1185
3.80k
}
1186
1187
/// Calculate the magic numbers required to implement an unsigned integer
1188
/// division by a constant as a sequence of multiplies, adds and shifts.
1189
/// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
1190
/// S. Warren, Jr., chapter 10.
1191
/// LeadingZeros can be used to simplify the calculation if the upper bits
1192
/// of the divided value are known zero.
1193
4.50k
APInt::mu APInt::magicu(unsigned LeadingZeros) const {
1194
4.50k
  const APInt& d = *this;
1195
4.50k
  unsigned p;
1196
4.50k
  APInt nc, delta, q1, r1, q2, r2;
1197
4.50k
  struct mu magu;
1198
4.50k
  magu.a = 0;               // initialize "add" indicator
1199
4.50k
  APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
1200
4.50k
  APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1201
4.50k
  APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1202
4.50k
1203
4.50k
  nc = allOnes - (allOnes - d).urem(d);
1204
4.50k
  p = d.getBitWidth() - 1;  // initialize p
1205
4.50k
  q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
1206
4.50k
  r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
1207
4.50k
  q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
1208
4.50k
  r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
1209
28.3k
  do {
1210
28.3k
    p = p + 1;
1211
28.3k
    if (r1.uge(nc - r1)) {
1212
4.80k
      q1 = q1 + q1 + 1;  // update q1
1213
4.80k
      r1 = r1 + r1 - nc; // update r1
1214
4.80k
    }
1215
23.5k
    else {
1216
23.5k
      q1 = q1+q1; // update q1
1217
23.5k
      r1 = r1+r1; // update r1
1218
23.5k
    }
1219
28.3k
    if ((r2 + 1).uge(d - r2)) {
1220
8.82k
      if (q2.uge(signedMax)) 
magu.a = 1125
;
1221
8.82k
      q2 = q2+q2 + 1;     // update q2
1222
8.82k
      r2 = r2+r2 + 1 - d; // update r2
1223
8.82k
    }
1224
19.4k
    else {
1225
19.4k
      if (q2.uge(signedMin)) 
magu.a = 11.95k
;
1226
19.4k
      q2 = q2+q2;     // update q2
1227
19.4k
      r2 = r2+r2 + 1; // update r2
1228
19.4k
    }
1229
28.3k
    delta = d - 1 - r2;
1230
28.3k
  } while (p < d.getBitWidth()*2 &&
1231
28.3k
           
(28.3k
q1.ult(delta)28.3k
||
(4.50k
q1 == delta4.50k
&&
r1 == 08
)));
1232
4.50k
  magu.m = q2 + 1; // resulting magic number
1233
4.50k
  magu.s = p - d.getBitWidth();  // resulting shift
1234
4.50k
  return magu;
1235
4.50k
}
1236
1237
/// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1238
/// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1239
/// variables here have the same names as in the algorithm. Comments explain
1240
/// the algorithm and any deviation from it.
1241
static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
1242
1.70M
                     unsigned m, unsigned n) {
1243
1.70M
  assert(u && "Must provide dividend");
1244
1.70M
  assert(v && "Must provide divisor");
1245
1.70M
  assert(q && "Must provide quotient");
1246
1.70M
  assert(u != v && u != q && v != q && "Must use different memory");
1247
1.70M
  assert(n>1 && "n must be > 1");
1248
1.70M
1249
1.70M
  // b denotes the base of the number system. In our case b is 2^32.
1250
1.70M
  const uint64_t b = uint64_t(1) << 32;
1251
1.70M
1252
1.70M
// The DEBUG macros here tend to be spam in the debug output if you're not
1253
1.70M
// debugging this code. Disable them unless KNUTH_DEBUG is defined.
1254
#ifdef KNUTH_DEBUG
1255
#define DEBUG_KNUTH(X) LLVM_DEBUG(X)
1256
#else
1257
86.0M
#define DEBUG_KNUTH(X) do {} while(false)
1258
1.70M
#endif
1259
1.70M
1260
1.70M
  DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1261
1.70M
  DEBUG_KNUTH(dbgs() << "KnuthDiv: original:");
1262
1.70M
  DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1263
1.70M
  DEBUG_KNUTH(dbgs() << " by");
1264
1.70M
  DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1265
1.70M
  DEBUG_KNUTH(dbgs() << '\n');
1266
1.70M
  // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1267
1.70M
  // u and v by d. Note that we have taken Knuth's advice here to use a power
1268
1.70M
  // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1269
1.70M
  // 2 allows us to shift instead of multiply and it is easy to determine the
1270
1.70M
  // shift amount from the leading zeros.  We are basically normalizing the u
1271
1.70M
  // and v so that its high bits are shifted to the top of v's range without
1272
1.70M
  // overflow. Note that this can require an extra word in u so that u must
1273
1.70M
  // be of length m+n+1.
1274
1.70M
  unsigned shift = countLeadingZeros(v[n-1]);
1275
1.70M
  uint32_t v_carry = 0;
1276
1.70M
  uint32_t u_carry = 0;
1277
1.70M
  if (shift) {
1278
2.35M
    for (unsigned i = 0; i < m+n; 
++i1.88M
) {
1279
1.88M
      uint32_t u_tmp = u[i] >> (32 - shift);
1280
1.88M
      u[i] = (u[i] << shift) | u_carry;
1281
1.88M
      u_carry = u_tmp;
1282
1.88M
    }
1283
1.42M
    for (unsigned i = 0; i < n; 
++i963k
) {
1284
963k
      uint32_t v_tmp = v[i] >> (32 - shift);
1285
963k
      v[i] = (v[i] << shift) | v_carry;
1286
963k
      v_carry = v_tmp;
1287
963k
    }
1288
463k
  }
1289
1.70M
  u[m+n] = u_carry;
1290
1.70M
1291
1.70M
  DEBUG_KNUTH(dbgs() << "KnuthDiv:   normal:");
1292
1.70M
  DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1293
1.70M
  DEBUG_KNUTH(dbgs() << " by");
1294
1.70M
  DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1295
1.70M
  DEBUG_KNUTH(dbgs() << '\n');
1296
1.70M
1297
1.70M
  // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1298
1.70M
  int j = m;
1299
5.07M
  do {
1300
5.07M
    DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1301
5.07M
    // D3. [Calculate q'.].
1302
5.07M
    //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1303
5.07M
    //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1304
5.07M
    // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1305
5.07M
    // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
1306
5.07M
    // on v[n-2] determines at high speed most of the cases in which the trial
1307
5.07M
    // value qp is one too large, and it eliminates all cases where qp is two
1308
5.07M
    // too large.
1309
5.07M
    uint64_t dividend = Make_64(u[j+n], u[j+n-1]);
1310
5.07M
    DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1311
5.07M
    uint64_t qp = dividend / v[n-1];
1312
5.07M
    uint64_t rp = dividend % v[n-1];
1313
5.07M
    if (qp == b || 
qp*v[n-2] > b*rp + u[j+n-2]5.07M
) {
1314
2.17k
      qp--;
1315
2.17k
      rp += v[n-1];
1316
2.17k
      if (rp < b && 
(1.38k
qp == b1.38k
||
qp*v[n-2] > b*rp + u[j+n-2]1.22k
))
1317
161
        qp--;
1318
2.17k
    }
1319
5.07M
    DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1320
5.07M
1321
5.07M
    // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1322
5.07M
    // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1323
5.07M
    // consists of a simple multiplication by a one-place number, combined with
1324
5.07M
    // a subtraction.
1325
5.07M
    // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1326
5.07M
    // this step is actually negative, (u[j+n]...u[j]) should be left as the
1327
5.07M
    // true value plus b**(n+1), namely as the b's complement of
1328
5.07M
    // the true value, and a "borrow" to the left should be remembered.
1329
5.07M
    int64_t borrow = 0;
1330
15.4M
    for (unsigned i = 0; i < n; 
++i10.3M
) {
1331
10.3M
      uint64_t p = uint64_t(qp) * uint64_t(v[i]);
1332
10.3M
      int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p);
1333
10.3M
      u[j+i] = Lo_32(subres);
1334
10.3M
      borrow = Hi_32(p) - Hi_32(subres);
1335
10.3M
      DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i]
1336
10.3M
                        << ", borrow = " << borrow << '\n');
1337
10.3M
    }
1338
5.07M
    bool isNeg = u[j+n] < borrow;
1339
5.07M
    u[j+n] -= Lo_32(borrow);
1340
5.07M
1341
5.07M
    DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:");
1342
5.07M
    DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1343
5.07M
    DEBUG_KNUTH(dbgs() << '\n');
1344
5.07M
1345
5.07M
    // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1346
5.07M
    // negative, go to step D6; otherwise go on to step D7.
1347
5.07M
    q[j] = Lo_32(qp);
1348
5.07M
    if (isNeg) {
1349
48
      // D6. [Add back]. The probability that this step is necessary is very
1350
48
      // small, on the order of only 2/b. Make sure that test data accounts for
1351
48
      // this possibility. Decrease q[j] by 1
1352
48
      q[j]--;
1353
48
      // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1354
48
      // A carry will occur to the left of u[j+n], and it should be ignored
1355
48
      // since it cancels with the borrow that occurred in D4.
1356
48
      bool carry = false;
1357
456
      for (unsigned i = 0; i < n; 
i++408
) {
1358
408
        uint32_t limit = std::min(u[j+i],v[i]);
1359
408
        u[j+i] += v[i] + carry;
1360
408
        carry = u[j+i] < limit || 
(348
carry348
&&
u[j+i] == limit252
);
1361
408
      }
1362
48
      u[j+n] += carry;
1363
48
    }
1364
5.07M
    DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:");
1365
5.07M
    DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1366
5.07M
    DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1367
5.07M
1368
5.07M
    // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1369
5.07M
  } while (--j >= 0);
1370
1.70M
1371
1.70M
  DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:");
1372
1.70M
  DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]);
1373
1.70M
  DEBUG_KNUTH(dbgs() << '\n');
1374
1.70M
1375
1.70M
  // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1376
1.70M
  // remainder may be obtained by dividing u[...] by d. If r is non-null we
1377
1.70M
  // compute the remainder (urem uses this).
1378
1.70M
  if (r) {
1379
1.34M
    // The value d is expressed by the "shift" value above since we avoided
1380
1.34M
    // multiplication by d by using a shift left. So, all we have to do is
1381
1.34M
    // shift right here.
1382
1.34M
    if (shift) {
1383
368k
      uint32_t carry = 0;
1384
368k
      DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:");
1385
1.10M
      for (int i = n-1; i >= 0; 
i--737k
) {
1386
737k
        r[i] = (u[i] >> shift) | carry;
1387
737k
        carry = u[i] << (32 - shift);
1388
737k
        DEBUG_KNUTH(dbgs() << " " << r[i]);
1389
737k
      }
1390
980k
    } else {
1391
2.94M
      for (int i = n-1; i >= 0; 
i--1.96M
) {
1392
1.96M
        r[i] = u[i];
1393
1.96M
        DEBUG_KNUTH(dbgs() << " " << r[i]);
1394
1.96M
      }
1395
980k
    }
1396
1.34M
    DEBUG_KNUTH(dbgs() << '\n');
1397
1.34M
  }
1398
1.70M
  DEBUG_KNUTH(dbgs() << '\n');
1399
1.70M
}
1400
1401
void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS,
1402
3.42M
                   unsigned rhsWords, WordType *Quotient, WordType *Remainder) {
1403
3.42M
  assert(lhsWords >= rhsWords && "Fractional result");
1404
3.42M
1405
3.42M
  // First, compose the values into an array of 32-bit words instead of
1406
3.42M
  // 64-bit words. This is a necessity of both the "short division" algorithm
1407
3.42M
  // and the Knuth "classical algorithm" which requires there to be native
1408
3.42M
  // operations for +, -, and * on an m bit value with an m*2 bit result. We
1409
3.42M
  // can't use 64-bit operands here because we don't have native results of
1410
3.42M
  // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1411
3.42M
  // work on large-endian machines.
1412
3.42M
  unsigned n = rhsWords * 2;
1413
3.42M
  unsigned m = (lhsWords * 2) - n;
1414
3.42M
1415
3.42M
  // Allocate space for the temporary values we need either on the stack, if
1416
3.42M
  // it will fit, or on the heap if it won't.
1417
3.42M
  uint32_t SPACE[128];
1418
3.42M
  uint32_t *U = nullptr;
1419
3.42M
  uint32_t *V = nullptr;
1420
3.42M
  uint32_t *Q = nullptr;
1421
3.42M
  uint32_t *R = nullptr;
1422
3.42M
  if ((Remainder?
42.77M
:
3652k
)*n+2*m+1 <= 128) {
1423
3.42M
    U = &SPACE[0];
1424
3.42M
    V = &SPACE[m+n+1];
1425
3.42M
    Q = &SPACE[(m+n+1) + n];
1426
3.42M
    if (Remainder)
1427
2.77M
      R = &SPACE[(m+n+1) + n + (m+n)];
1428
3.42M
  } else {
1429
358
    U = new uint32_t[m + n + 1];
1430
358
    V = new uint32_t[n];
1431
358
    Q = new uint32_t[m+n];
1432
358
    if (Remainder)
1433
8
      R = new uint32_t[n];
1434
358
  }
1435
3.42M
1436
3.42M
  // Initialize the dividend
1437
3.42M
  memset(U, 0, (m+n+1)*sizeof(uint32_t));
1438
10.2M
  for (unsigned i = 0; i < lhsWords; 
++i6.86M
) {
1439
6.86M
    uint64_t tmp = LHS[i];
1440
6.86M
    U[i * 2] = Lo_32(tmp);
1441
6.86M
    U[i * 2 + 1] = Hi_32(tmp);
1442
6.86M
  }
1443
3.42M
  U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1444
3.42M
1445
3.42M
  // Initialize the divisor
1446
3.42M
  memset(V, 0, (n)*sizeof(uint32_t));
1447
6.88M
  for (unsigned i = 0; i < rhsWords; 
++i3.45M
) {
1448
3.45M
    uint64_t tmp = RHS[i];
1449
3.45M
    V[i * 2] = Lo_32(tmp);
1450
3.45M
    V[i * 2 + 1] = Hi_32(tmp);
1451
3.45M
  }
1452
3.42M
1453
3.42M
  // initialize the quotient and remainder
1454
3.42M
  memset(Q, 0, (m+n) * sizeof(uint32_t));
1455
3.42M
  if (Remainder)
1456
2.77M
    memset(R, 0, n * sizeof(uint32_t));
1457
3.42M
1458
3.42M
  // Now, adjust m and n for the Knuth division. n is the number of words in
1459
3.42M
  // the divisor. m is the number of words by which the dividend exceeds the
1460
3.42M
  // divisor (i.e. m+n is the length of the dividend). These sizes must not
1461
3.42M
  // contain any zero words or the Knuth algorithm fails.
1462
5.14M
  for (unsigned i = n; i > 0 && V[i-1] == 0; 
i--1.72M
) {
1463
1.72M
    n--;
1464
1.72M
    m++;
1465
1.72M
  }
1466
3.57M
  for (unsigned i = m+n; i > 0 && U[i-1] == 0; 
i--152k
)
1467
152k
    m--;
1468
3.42M
1469
3.42M
  // If we're left with only a single word for the divisor, Knuth doesn't work
1470
3.42M
  // so we implement the short division algorithm here. This is much simpler
1471
3.42M
  // and faster because we are certain that we can divide a 64-bit quantity
1472
3.42M
  // by a 32-bit quantity at hardware speed and short division is simply a
1473
3.42M
  // series of such operations. This is just like doing short division but we
1474
3.42M
  // are using base 2^32 instead of base 10.
1475
3.42M
  assert(n != 0 && "Divide by zero?");
1476
3.42M
  if (n == 1) {
1477
1.72M
    uint32_t divisor = V[0];
1478
1.72M
    uint32_t remainder = 0;
1479
8.45M
    for (int i = m; i >= 0; 
i--6.73M
) {
1480
6.73M
      uint64_t partial_dividend = Make_64(remainder, U[i]);
1481
6.73M
      if (partial_dividend == 0) {
1482
1.31M
        Q[i] = 0;
1483
1.31M
        remainder = 0;
1484
5.42M
      } else if (partial_dividend < divisor) {
1485
148k
        Q[i] = 0;
1486
148k
        remainder = Lo_32(partial_dividend);
1487
5.27M
      } else if (partial_dividend == divisor) {
1488
15.0k
        Q[i] = 1;
1489
15.0k
        remainder = 0;
1490
5.25M
      } else {
1491
5.25M
        Q[i] = Lo_32(partial_dividend / divisor);
1492
5.25M
        remainder = Lo_32(partial_dividend - (Q[i] * divisor));
1493
5.25M
      }
1494
6.73M
    }
1495
1.72M
    if (R)
1496
1.42M
      R[0] = remainder;
1497
1.72M
  } else {
1498
1.70M
    // Now we're ready to invoke the Knuth classical divide algorithm. In this
1499
1.70M
    // case n > 1.
1500
1.70M
    KnuthDiv(U, V, Q, R, m, n);
1501
1.70M
  }
1502
3.42M
1503
3.42M
  // If the caller wants the quotient
1504
3.42M
  if (Quotient) {
1505
10.2M
    for (unsigned i = 0; i < lhsWords; 
++i6.86M
)
1506
6.86M
      Quotient[i] = Make_64(Q[i*2+1], Q[i*2]);
1507
3.42M
  }
1508
3.42M
1509
3.42M
  // If the caller wants the remainder
1510
3.42M
  if (Remainder) {
1511
5.54M
    for (unsigned i = 0; i < rhsWords; 
++i2.77M
)
1512
2.77M
      Remainder[i] = Make_64(R[i*2+1], R[i*2]);
1513
2.77M
  }
1514
3.42M
1515
3.42M
  // Clean up the memory we allocated.
1516
3.42M
  if (U != &SPACE[0]) {
1517
358
    delete [] U;
1518
358
    delete [] V;
1519
358
    delete [] Q;
1520
358
    delete [] R;
1521
358
  }
1522
3.42M
}
1523
1524
61.2M
APInt APInt::udiv(const APInt &RHS) const {
1525
61.2M
  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1526
61.2M
1527
61.2M
  // First, deal with the easy case
1528
61.2M
  if (isSingleWord()) {
1529
60.1M
    assert(RHS.U.VAL != 0 && "Divide by zero?");
1530
60.1M
    return APInt(BitWidth, U.VAL / RHS.U.VAL);
1531
60.1M
  }
1532
1.06M
1533
1.06M
  // Get some facts about the LHS and RHS number of bits and words
1534
1.06M
  unsigned lhsWords = getNumWords(getActiveBits());
1535
1.06M
  unsigned rhsBits  = RHS.getActiveBits();
1536
1.06M
  unsigned rhsWords = getNumWords(rhsBits);
1537
1.06M
  assert(rhsWords && "Divided by zero???");
1538
1.06M
1539
1.06M
  // Deal with some degenerate cases
1540
1.06M
  if (!lhsWords)
1541
35.1k
    // 0 / X ===> 0
1542
35.1k
    return APInt(BitWidth, 0);
1543
1.03M
  if (rhsBits == 1)
1544
258k
    // X / 1 ===> X
1545
258k
    return *this;
1546
771k
  
if (771k
lhsWords < rhsWords771k
|| this->ult(RHS))
1547
33.2k
    // X / Y ===> 0, iff X < Y
1548
33.2k
    return APInt(BitWidth, 0);
1549
738k
  if (*this == RHS)
1550
22.1k
    // X / X ===> 1
1551
22.1k
    return APInt(BitWidth, 1);
1552
716k
  if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1553
63.6k
    // All high words are zero, just use native divide
1554
63.6k
    return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]);
1555
652k
1556
652k
  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1557
652k
  APInt Quotient(BitWidth, 0); // to hold result.
1558
652k
  divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr);
1559
652k
  return Quotient;
1560
652k
}
1561
1562
1.31k
APInt APInt::udiv(uint64_t RHS) const {
1563
1.31k
  assert(RHS != 0 && "Divide by zero?");
1564
1.31k
1565
1.31k
  // First, deal with the easy case
1566
1.31k
  if (isSingleWord())
1567
757
    return APInt(BitWidth, U.VAL / RHS);
1568
561
1569
561
  // Get some facts about the LHS words.
1570
561
  unsigned lhsWords = getNumWords(getActiveBits());
1571
561
1572
561
  // Deal with some degenerate cases
1573
561
  if (!lhsWords)
1574
0
    // 0 / X ===> 0
1575
0
    return APInt(BitWidth, 0);
1576
561
  if (RHS == 1)
1577
0
    // X / 1 ===> X
1578
0
    return *this;
1579
561
  if (this->ult(RHS))
1580
2
    // X / Y ===> 0, iff X < Y
1581
2
    return APInt(BitWidth, 0);
1582
559
  if (*this == RHS)
1583
7
    // X / X ===> 1
1584
7
    return APInt(BitWidth, 1);
1585
552
  if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1586
427
    // All high words are zero, just use native divide
1587
427
    return APInt(BitWidth, this->U.pVal[0] / RHS);
1588
125
1589
125
  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1590
125
  APInt Quotient(BitWidth, 0); // to hold result.
1591
125
  divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr);
1592
125
  return Quotient;
1593
125
}
1594
1595
26.0M
APInt APInt::sdiv(const APInt &RHS) const {
1596
26.0M
  if (isNegative()) {
1597
2.26M
    if (RHS.isNegative())
1598
1.08M
      return (-(*this)).udiv(-RHS);
1599
1.17M
    return -((-(*this)).udiv(RHS));
1600
1.17M
  }
1601
23.7M
  if (RHS.isNegative())
1602
1.12M
    return -(this->udiv(-RHS));
1603
22.6M
  return this->udiv(RHS);
1604
22.6M
}
1605
1606
5
APInt APInt::sdiv(int64_t RHS) const {
1607
5
  if (isNegative()) {
1608
2
    if (RHS < 0)
1609
0
      return (-(*this)).udiv(-RHS);
1610
2
    return -((-(*this)).udiv(RHS));
1611
2
  }
1612
3
  if (RHS < 0)
1613
0
    return -(this->udiv(-RHS));
1614
3
  return this->udiv(RHS);
1615
3
}
1616
1617
10.8M
APInt APInt::urem(const APInt &RHS) const {
1618
10.8M
  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1619
10.8M
  if (isSingleWord()) {
1620
10.8M
    assert(RHS.U.VAL != 0 && "Remainder by zero?");
1621
10.8M
    return APInt(BitWidth, U.VAL % RHS.U.VAL);
1622
10.8M
  }
1623
1.08k
1624
1.08k
  // Get some facts about the LHS
1625
1.08k
  unsigned lhsWords = getNumWords(getActiveBits());
1626
1.08k
1627
1.08k
  // Get some facts about the RHS
1628
1.08k
  unsigned rhsBits = RHS.getActiveBits();
1629
1.08k
  unsigned rhsWords = getNumWords(rhsBits);
1630
1.08k
  assert(rhsWords && "Performing remainder operation by zero ???");
1631
1.08k
1632
1.08k
  // Check the degenerate cases
1633
1.08k
  if (lhsWords == 0)
1634
132
    // 0 % Y ===> 0
1635
132
    return APInt(BitWidth, 0);
1636
955
  if (rhsBits == 1)
1637
343
    // X % 1 ===> 0
1638
343
    return APInt(BitWidth, 0);
1639
612
  if (lhsWords < rhsWords || 
this->ult(RHS)491
)
1640
442
    // X % Y ===> X, iff X < Y
1641
442
    return *this;
1642
170
  if (*this == RHS)
1643
10
    // X % X == 0;
1644
10
    return APInt(BitWidth, 0);
1645
160
  if (lhsWords == 1)
1646
19
    // All high words are zero, just use native remainder
1647
19
    return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]);
1648
141
1649
141
  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1650
141
  APInt Remainder(BitWidth, 0);
1651
141
  divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal);
1652
141
  return Remainder;
1653
141
}
1654
1655
5.30k
uint64_t APInt::urem(uint64_t RHS) const {
1656
5.30k
  assert(RHS != 0 && "Remainder by zero?");
1657
5.30k
1658
5.30k
  if (isSingleWord())
1659
5.30k
    return U.VAL % RHS;
1660
5
1661
5
  // Get some facts about the LHS
1662
5
  unsigned lhsWords = getNumWords(getActiveBits());
1663
5
1664
5
  // Check the degenerate cases
1665
5
  if (lhsWords == 0)
1666
0
    // 0 % Y ===> 0
1667
0
    return 0;
1668
5
  if (RHS == 1)
1669
0
    // X % 1 ===> 0
1670
0
    return 0;
1671
5
  if (this->ult(RHS))
1672
0
    // X % Y ===> X, iff X < Y
1673
0
    return getZExtValue();
1674
5
  if (*this == RHS)
1675
0
    // X % X == 0;
1676
0
    return 0;
1677
5
  if (lhsWords == 1)
1678
3
    // All high words are zero, just use native remainder
1679
3
    return U.pVal[0] % RHS;
1680
2
1681
2
  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1682
2
  uint64_t Remainder;
1683
2
  divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder);
1684
2
  return Remainder;
1685
2
}
1686
1687
4.62M
APInt APInt::srem(const APInt &RHS) const {
1688
4.62M
  if (isNegative()) {
1689
2.03M
    if (RHS.isNegative())
1690
962k
      return -((-(*this)).urem(-RHS));
1691
1.07M
    return -((-(*this)).urem(RHS));
1692
1.07M
  }
1693
2.59M
  if (RHS.isNegative())
1694
1.00M
    return this->urem(-RHS);
1695
1.58M
  return this->urem(RHS);
1696
1.58M
}
1697
1698
48
int64_t APInt::srem(int64_t RHS) const {
1699
48
  if (isNegative()) {
1700
2
    if (RHS < 0)
1701
0
      return -((-(*this)).urem(-RHS));
1702
2
    return -((-(*this)).urem(RHS));
1703
2
  }
1704
46
  if (RHS < 0)
1705
0
    return this->urem(-RHS);
1706
46
  return this->urem(RHS);
1707
46
}
1708
1709
void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1710
42.8M
                    APInt &Quotient, APInt &Remainder) {
1711
42.8M
  assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");
1712
42.8M
  unsigned BitWidth = LHS.BitWidth;
1713
42.8M
1714
42.8M
  // First, deal with the easy case
1715
42.8M
  if (LHS.isSingleWord()) {
1716
39.4M
    assert(RHS.U.VAL != 0 && "Divide by zero?");
1717
39.4M
    uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL;
1718
39.4M
    uint64_t RemVal = LHS.U.VAL % RHS.U.VAL;
1719
39.4M
    Quotient = APInt(BitWidth, QuotVal);
1720
39.4M
    Remainder = APInt(BitWidth, RemVal);
1721
39.4M
    return;
1722
39.4M
  }
1723
3.35M
1724
3.35M
  // Get some size facts about the dividend and divisor
1725
3.35M
  unsigned lhsWords = getNumWords(LHS.getActiveBits());
1726
3.35M
  unsigned rhsBits  = RHS.getActiveBits();
1727
3.35M
  unsigned rhsWords = getNumWords(rhsBits);
1728
3.35M
  assert(rhsWords && "Performing divrem operation by zero ???");
1729
3.35M
1730
3.35M
  // Check the degenerate cases
1731
3.35M
  if (lhsWords == 0) {
1732
610k
    Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1733
610k
    Remainder = APInt(BitWidth, 0);   // 0 % Y ===> 0
1734
610k
    return;
1735
610k
  }
1736
2.74M
1737
2.74M
  if (rhsBits == 1) {
1738
27.1k
    Quotient = LHS;                   // X / 1 ===> X
1739
27.1k
    Remainder = APInt(BitWidth, 0);   // X % 1 ===> 0
1740
27.1k
  }
1741
2.74M
1742
2.74M
  if (lhsWords < rhsWords || LHS.ult(RHS)) {
1743
27.7k
    Remainder = LHS;                  // X % Y ===> X, iff X < Y
1744
27.7k
    Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1745
27.7k
    return;
1746
27.7k
  }
1747
2.71M
1748
2.71M
  if (LHS == RHS) {
1749
19
    Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1750
19
    Remainder = APInt(BitWidth, 0);   // X % X ===> 0;
1751
19
    return;
1752
19
  }
1753
2.71M
1754
2.71M
  // Make sure there is enough space to hold the results.
1755
2.71M
  // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1756
2.71M
  // change the size. This is necessary if Quotient or Remainder is aliased
1757
2.71M
  // with LHS or RHS.
1758
2.71M
  Quotient.reallocate(BitWidth);
1759
2.71M
  Remainder.reallocate(BitWidth);
1760
2.71M
1761
2.71M
  if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1762
18.7k
    // There is only one word to consider so use the native versions.
1763
18.7k
    uint64_t lhsValue = LHS.U.pVal[0];
1764
18.7k
    uint64_t rhsValue = RHS.U.pVal[0];
1765
18.7k
    Quotient = lhsValue / rhsValue;
1766
18.7k
    Remainder = lhsValue % rhsValue;
1767
18.7k
    return;
1768
18.7k
  }
1769
2.69M
1770
2.69M
  // Okay, lets do it the long way
1771
2.69M
  divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal,
1772
2.69M
         Remainder.U.pVal);
1773
2.69M
  // Clear the rest of the Quotient and Remainder.
1774
2.69M
  std::memset(Quotient.U.pVal + lhsWords, 0,
1775
2.69M
              (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1776
2.69M
  std::memset(Remainder.U.pVal + rhsWords, 0,
1777
2.69M
              (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE);
1778
2.69M
}
1779
1780
void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
1781
150k
                    uint64_t &Remainder) {
1782
150k
  assert(RHS != 0 && "Divide by zero?");
1783
150k
  unsigned BitWidth = LHS.BitWidth;
1784
150k
1785
150k
  // First, deal with the easy case
1786
150k
  if (LHS.isSingleWord()) {
1787
381
    uint64_t QuotVal = LHS.U.VAL / RHS;
1788
381
    Remainder = LHS.U.VAL % RHS;
1789
381
    Quotient = APInt(BitWidth, QuotVal);
1790
381
    return;
1791
381
  }
1792
150k
1793
150k
  // Get some size facts about the dividend and divisor
1794
150k
  unsigned lhsWords = getNumWords(LHS.getActiveBits());
1795
150k
1796
150k
  // Check the degenerate cases
1797
150k
  if (lhsWords == 0) {
1798
0
    Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1799
0
    Remainder = 0;                    // 0 % Y ===> 0
1800
0
    return;
1801
0
  }
1802
150k
1803
150k
  if (RHS == 1) {
1804
0
    Quotient = LHS;                   // X / 1 ===> X
1805
0
    Remainder = 0;                    // X % 1 ===> 0
1806
0
    return;
1807
0
  }
1808
150k
1809
150k
  if (LHS.ult(RHS)) {
1810
4.85k
    Remainder = LHS.getZExtValue();   // X % Y ===> X, iff X < Y
1811
4.85k
    Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1812
4.85k
    return;
1813
4.85k
  }
1814
145k
1815
145k
  if (LHS == RHS) {
1816
32
    Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1817
32
    Remainder = 0;                    // X % X ===> 0;
1818
32
    return;
1819
32
  }
1820
145k
1821
145k
  // Make sure there is enough space to hold the results.
1822
145k
  // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1823
145k
  // change the size. This is necessary if Quotient is aliased with LHS.
1824
145k
  Quotient.reallocate(BitWidth);
1825
145k
1826
145k
  if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1827
72.0k
    // There is only one word to consider so use the native versions.
1828
72.0k
    uint64_t lhsValue = LHS.U.pVal[0];
1829
72.0k
    Quotient = lhsValue / RHS;
1830
72.0k
    Remainder = lhsValue % RHS;
1831
72.0k
    return;
1832
72.0k
  }
1833
73.3k
1834
73.3k
  // Okay, lets do it the long way
1835
73.3k
  divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder);
1836
73.3k
  // Clear the rest of the Quotient.
1837
73.3k
  std::memset(Quotient.U.pVal + lhsWords, 0,
1838
73.3k
              (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1839
73.3k
}
1840
1841
void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
1842
21.1M
                    APInt &Quotient, APInt &Remainder) {
1843
21.1M
  if (LHS.isNegative()) {
1844
10.4M
    if (RHS.isNegative())
1845
2.31M
      APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
1846
8.10M
    else {
1847
8.10M
      APInt::udivrem(-LHS, RHS, Quotient, Remainder);
1848
8.10M
      Quotient.negate();
1849
8.10M
    }
1850
10.4M
    Remainder.negate();
1851
10.7M
  } else if (RHS.isNegative()) {
1852
2.31M
    APInt::udivrem(LHS, -RHS, Quotient, Remainder);
1853
2.31M
    Quotient.negate();
1854
8.44M
  } else {
1855
8.44M
    APInt::udivrem(LHS, RHS, Quotient, Remainder);
1856
8.44M
  }
1857
21.1M
}
1858
1859
void APInt::sdivrem(const APInt &LHS, int64_t RHS,
1860
5
                    APInt &Quotient, int64_t &Remainder) {
1861
5
  uint64_t R = Remainder;
1862
5
  if (LHS.isNegative()) {
1863
2
    if (RHS < 0)
1864
0
      APInt::udivrem(-LHS, -RHS, Quotient, R);
1865
2
    else {
1866
2
      APInt::udivrem(-LHS, RHS, Quotient, R);
1867
2
      Quotient.negate();
1868
2
    }
1869
2
    R = -R;
1870
3
  } else if (RHS < 0) {
1871
0
    APInt::udivrem(LHS, -RHS, Quotient, R);
1872
0
    Quotient.negate();
1873
3
  } else {
1874
3
    APInt::udivrem(LHS, RHS, Quotient, R);
1875
3
  }
1876
5
  Remainder = R;
1877
5
}
1878
1879
7.76M
APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
1880
7.76M
  APInt Res = *this+RHS;
1881
7.76M
  Overflow = isNonNegative() == RHS.isNonNegative() &&
1882
7.76M
             
Res.isNonNegative() != isNonNegative()3.92M
;
1883
7.76M
  return Res;
1884
7.76M
}
1885
1886
35.8M
APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
1887
35.8M
  APInt Res = *this+RHS;
1888
35.8M
  Overflow = Res.ult(RHS);
1889
35.8M
  return Res;
1890
35.8M
}
1891
1892
7.74M
APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
1893
7.74M
  APInt Res = *this - RHS;
1894
7.74M
  Overflow = isNonNegative() != RHS.isNonNegative() &&
1895
7.74M
             
Res.isNonNegative() != isNonNegative()3.89M
;
1896
7.74M
  return Res;
1897
7.74M
}
1898
1899
7.66M
APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
1900
7.66M
  APInt Res = *this-RHS;
1901
7.66M
  Overflow = Res.ugt(*this);
1902
7.66M
  return Res;
1903
7.66M
}
1904
1905
19.7M
APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
1906
19.7M
  // MININT/-1  -->  overflow.
1907
19.7M
  Overflow = isMinSignedValue() && 
RHS.isAllOnesValue()0
;
1908
19.7M
  return sdiv(RHS);
1909
19.7M
}
1910
1911
139k
APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
1912
139k
  APInt Res = *this * RHS;
1913
139k
1914
139k
  if (*this != 0 && 
RHS != 0131k
)
1915
125k
    Overflow = Res.sdiv(RHS) != *this || 
Res.sdiv(*this) != RHS37.4k
;
1916
13.4k
  else
1917
13.4k
    Overflow = false;
1918
139k
  return Res;
1919
139k
}
1920
1921
5.80M
APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
1922
5.80M
  if (countLeadingZeros() + RHS.countLeadingZeros() + 2 <= BitWidth) {
1923
3.61M
    Overflow = true;
1924
3.61M
    return *this * RHS;
1925
3.61M
  }
1926
2.19M
1927
2.19M
  APInt Res = lshr(1) * RHS;
1928
2.19M
  Overflow = Res.isNegative();
1929
2.19M
  Res <<= 1;
1930
2.19M
  if ((*this)[0]) {
1931
628k
    Res += RHS;
1932
628k
    if (Res.ult(RHS))
1933
30.5k
      Overflow = true;
1934
628k
  }
1935
2.19M
  return Res;
1936
2.19M
}
1937
1938
3.78k
APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
1939
3.78k
  Overflow = ShAmt.uge(getBitWidth());
1940
3.78k
  if (Overflow)
1941
0
    return APInt(BitWidth, 0);
1942
3.78k
1943
3.78k
  if (isNonNegative()) // Don't allow sign change.
1944
3.78k
    Overflow = ShAmt.uge(countLeadingZeros());
1945
0
  else
1946
0
    Overflow = ShAmt.uge(countLeadingOnes());
1947
3.78k
1948
3.78k
  return *this << ShAmt;
1949
3.78k
}
1950
1951
41
APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
1952
41
  Overflow = ShAmt.uge(getBitWidth());
1953
41
  if (Overflow)
1954
0
    return APInt(BitWidth, 0);
1955
41
1956
41
  Overflow = ShAmt.ugt(countLeadingZeros());
1957
41
1958
41
  return *this << ShAmt;
1959
41
}
1960
1961
3.86M
APInt APInt::sadd_sat(const APInt &RHS) const {
1962
3.86M
  bool Overflow;
1963
3.86M
  APInt Res = sadd_ov(RHS, Overflow);
1964
3.86M
  if (!Overflow)
1965
2.85M
    return Res;
1966
1.01M
1967
1.01M
  return isNegative() ? 
APInt::getSignedMinValue(BitWidth)566k
1968
1.01M
                      : 
APInt::getSignedMaxValue(BitWidth)445k
;
1969
1.01M
}
1970
1971
32.0M
APInt APInt::uadd_sat(const APInt &RHS) const {
1972
32.0M
  bool Overflow;
1973
32.0M
  APInt Res = uadd_ov(RHS, Overflow);
1974
32.0M
  if (!Overflow)
1975
30.2M
    return Res;
1976
1.81M
1977
1.81M
  return APInt::getMaxValue(BitWidth);
1978
1.81M
}
1979
1980
3.86M
APInt APInt::ssub_sat(const APInt &RHS) const {
1981
3.86M
  bool Overflow;
1982
3.86M
  APInt Res = ssub_ov(RHS, Overflow);
1983
3.86M
  if (!Overflow)
1984
2.85M
    return Res;
1985
1.01M
1986
1.01M
  return isNegative() ? 
APInt::getSignedMinValue(BitWidth)445k
1987
1.01M
                      : 
APInt::getSignedMaxValue(BitWidth)566k
;
1988
1.01M
}
1989
1990
3.86M
APInt APInt::usub_sat(const APInt &RHS) const {
1991
3.86M
  bool Overflow;
1992
3.86M
  APInt Res = usub_ov(RHS, Overflow);
1993
3.86M
  if (!Overflow)
1994
2.05M
    return Res;
1995
1.81M
1996
1.81M
  return APInt(BitWidth, 0);
1997
1.81M
}
1998
1999
2000
4.27M
void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
2001
4.27M
  // Check our assumptions here
2002
4.27M
  assert(!str.empty() && "Invalid string length");
2003
4.27M
  assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
2004
4.27M
          radix == 36) &&
2005
4.27M
         "Radix should be 2, 8, 10, 16, or 36!");
2006
4.27M
2007
4.27M
  StringRef::iterator p = str.begin();
2008
4.27M
  size_t slen = str.size();
2009
4.27M
  bool isNeg = *p == '-';
2010
4.27M
  if (*p == '-' || 
*p == '+'4.18M
) {
2011
92.7k
    p++;
2012
92.7k
    slen--;
2013
92.7k
    assert(slen && "String is only a sign, needs a value.");
2014
92.7k
  }
2015
4.27M
  assert((slen <= numbits || radix != 2) && "Insufficient bit width");
2016
4.27M
  assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
2017
4.27M
  assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
2018
4.27M
  assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
2019
4.27M
         "Insufficient bit width");
2020
4.27M
2021
4.27M
  // Allocate memory if needed
2022
4.27M
  if (isSingleWord())
2023
4.27M
    U.VAL = 0;
2024
1.80k
  else
2025
1.80k
    U.pVal = getClearedMemory(getNumWords());
2026
4.27M
2027
4.27M
  // Figure out if we can shift instead of multiply
2028
4.27M
  unsigned shift = (radix == 16 ? 
41.84k
:
radix == 8 4.27M
?
318
:
radix == 2 4.27M
?
115
:
04.27M
);
2029
4.27M
2030
4.27M
  // Enter digit traversal loop
2031
10.5M
  for (StringRef::iterator e = str.end(); p != e; 
++p6.25M
) {
2032
6.25M
    unsigned digit = getDigit(*p, radix);
2033
6.25M
    assert(digit < radix && "Invalid character in digit string");
2034
6.25M
2035
6.25M
    // Shift or multiply the value by the radix
2036
6.25M
    if (slen > 1) {
2037
3.32M
      if (shift)
2038
13.9k
        *this <<= shift;
2039
3.30M
      else
2040
3.30M
        *this *= radix;
2041
3.32M
    }
2042
6.25M
2043
6.25M
    // Add in the digit we just interpreted
2044
6.25M
    *this += digit;
2045
6.25M
  }
2046
4.27M
  // If its negative, put it in two's complement form
2047
4.27M
  if (isNeg)
2048
92.7k
    this->negate();
2049
4.27M
}
2050
2051
void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2052
3.12M
                     bool Signed, bool formatAsCLiteral) const {
2053
3.12M
  assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
2054
3.12M
          Radix == 36) &&
2055
3.12M
         "Radix should be 2, 8, 10, 16, or 36!");
2056
3.12M
2057
3.12M
  const char *Prefix = "";
2058
3.12M
  if (formatAsCLiteral) {
2059
5.66k
    switch (Radix) {
2060
5.66k
      case 2:
2061
3
        // Binary literals are a non-standard extension added in gcc 4.3:
2062
3
        // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2063
3
        Prefix = "0b";
2064
3
        break;
2065
5.66k
      case 8:
2066
3
        Prefix = "0";
2067
3
        break;
2068
5.66k
      case 10:
2069
29
        break; // No prefix
2070
5.66k
      case 16:
2071
5.62k
        Prefix = "0x";
2072
5.62k
        break;
2073
5.66k
      default:
2074
0
        llvm_unreachable("Invalid radix!");
2075
3.12M
    }
2076
3.12M
  }
2077
3.12M
2078
3.12M
  // First, check for a zero value and just short circuit the logic below.
2079
3.12M
  if (*this == 0) {
2080
512k
    while (*Prefix) {
2081
5
      Str.push_back(*Prefix);
2082
5
      ++Prefix;
2083
5
    };
2084
512k
    Str.push_back('0');
2085
512k
    return;
2086
512k
  }
2087
2.61M
2088
2.61M
  static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2089
2.61M
2090
2.61M
  if (isSingleWord()) {
2091
2.60M
    char Buffer[65];
2092
2.60M
    char *BufPtr = std::end(Buffer);
2093
2.60M
2094
2.60M
    uint64_t N;
2095
2.60M
    if (!Signed) {
2096
827k
      N = getZExtValue();
2097
1.77M
    } else {
2098
1.77M
      int64_t I = getSExtValue();
2099
1.77M
      if (I >= 0) {
2100
1.72M
        N = I;
2101
1.72M
      } else {
2102
56.5k
        Str.push_back('-');
2103
56.5k
        N = -(uint64_t)I;
2104
56.5k
      }
2105
1.77M
    }
2106
2.60M
2107
2.60M
    while (*Prefix) {
2108
334
      Str.push_back(*Prefix);
2109
334
      ++Prefix;
2110
334
    };
2111
2.60M
2112
22.6M
    while (N) {
2113
20.0M
      *--BufPtr = Digits[N % Radix];
2114
20.0M
      N /= Radix;
2115
20.0M
    }
2116
2.60M
    Str.append(BufPtr, std::end(Buffer));
2117
2.60M
    return;
2118
2.60M
  }
2119
10.3k
2120
10.3k
  APInt Tmp(*this);
2121
10.3k
2122
10.3k
  if (Signed && 
isNegative()2.80k
) {
2123
126
    // They want to print the signed version and it is a negative value
2124
126
    // Flip the bits and add one to turn it into the equivalent positive
2125
126
    // value and put a '-' in the result.
2126
126
    Tmp.negate();
2127
126
    Str.push_back('-');
2128
126
  }
2129
10.3k
2130
21.2k
  while (*Prefix) {
2131
10.9k
    Str.push_back(*Prefix);
2132
10.9k
    ++Prefix;
2133
10.9k
  };
2134
10.3k
2135
10.3k
  // We insert the digits backward, then reverse them to get the right order.
2136
10.3k
  unsigned StartDig = Str.size();
2137
10.3k
2138
10.3k
  // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2139
10.3k
  // because the number of bits per digit (1, 3 and 4 respectively) divides
2140
10.3k
  // equally.  We just shift until the value is zero.
2141
10.3k
  if (
Radix == 210.3k
|| Radix == 8 || Radix == 16) {
2142
5.46k
    // Just shift tmp right for each digit width until it becomes zero
2143
5.46k
    unsigned ShiftAmt = (Radix == 16 ? 4 : 
(Radix == 8 0
?
30
:
10
));
2144
5.46k
    unsigned MaskAmt = Radix - 1;
2145
5.46k
2146
94.3k
    while (Tmp.getBoolValue()) {
2147
88.8k
      unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2148
88.8k
      Str.push_back(Digits[Digit]);
2149
88.8k
      Tmp.lshrInPlace(ShiftAmt);
2150
88.8k
    }
2151
5.46k
  } else {
2152
155k
    while (Tmp.getBoolValue()) {
2153
150k
      uint64_t Digit;
2154
150k
      udivrem(Tmp, Radix, Tmp, Digit);
2155
150k
      assert(Digit < Radix && "divide failed");
2156
150k
      Str.push_back(Digits[Digit]);
2157
150k
    }
2158
4.85k
  }
2159
10.3k
2160
10.3k
  // Reverse the digits before returning.
2161
10.3k
  std::reverse(Str.begin()+StartDig, Str.end());
2162
10.3k
}
2163
2164
/// Returns the APInt as a std::string. Note that this is an inefficient method.
2165
/// It is better to pass in a SmallVector/SmallString to the methods above.
2166
1.72M
std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2167
1.72M
  SmallString<40> S;
2168
1.72M
  toString(S, Radix, Signed, /* formatAsCLiteral = */false);
2169
1.72M
  return S.str();
2170
1.72M
}
2171
2172
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2173
LLVM_DUMP_METHOD void APInt::dump() const {
2174
  SmallString<40> S, U;
2175
  this->toStringUnsigned(U);
2176
  this->toStringSigned(S);
2177
  dbgs() << "APInt(" << BitWidth << "b, "
2178
         << U << "u " << S << "s)\n";
2179
}
2180
#endif
2181
2182
1.39M
void APInt::print(raw_ostream &OS, bool isSigned) const {
2183
1.39M
  SmallString<40> S;
2184
1.39M
  this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
2185
1.39M
  OS << S;
2186
1.39M
}
2187
2188
// This implements a variety of operations on a representation of
2189
// arbitrary precision, two's-complement, bignum integer values.
2190
2191
// Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2192
// and unrestricting assumption.
2193
static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
2194
              "Part width must be divisible by 2!");
2195
2196
/* Some handy functions local to this file.  */
2197
2198
/* Returns the integer part with the least significant BITS set.
2199
   BITS cannot be zero.  */
2200
104M
static inline APInt::WordType lowBitMask(unsigned bits) {
2201
104M
  assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
2202
104M
2203
104M
  return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
2204
104M
}
2205
2206
/* Returns the value of the lower half of PART.  */
2207
103M
static inline APInt::WordType lowHalf(APInt::WordType part) {
2208
103M
  return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
2209
103M
}
2210
2211
/* Returns the value of the upper half of PART.  */
2212
154M
static inline APInt::WordType highHalf(APInt::WordType part) {
2213
154M
  return part >> (APInt::APINT_BITS_PER_WORD / 2);
2214
154M
}
2215
2216
/* Returns the bit number of the most significant set bit of a part.
2217
   If the input number has no bits set -1U is returned.  */
2218
5.64M
static unsigned partMSB(APInt::WordType value) {
2219
5.64M
  return findLastSet(value, ZB_Max);
2220
5.64M
}
2221
2222
/* Returns the bit number of the least significant set bit of a
2223
   part.  If the input number has no bits set -1U is returned.  */
2224
2.24M
static unsigned partLSB(APInt::WordType value) {
2225
2.24M
  return findFirstSet(value, ZB_Max);
2226
2.24M
}
2227
2228
/* Sets the least significant part of a bignum to the input value, and
2229
   zeroes out higher parts.  */
2230
37.9M
void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
2231
37.9M
  assert(parts > 0);
2232
37.9M
2233
37.9M
  dst[0] = part;
2234
73.9M
  for (unsigned i = 1; i < parts; 
i++35.9M
)
2235
35.9M
    dst[i] = 0;
2236
37.9M
}
2237
2238
/* Assign one bignum to another.  */
2239
3.10M
void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
2240
6.29M
  for (unsigned i = 0; i < parts; 
i++3.18M
)
2241
3.18M
    dst[i] = src[i];
2242
3.10M
}
2243
2244
/* Returns true if a bignum is zero, false otherwise.  */
2245
153k
bool APInt::tcIsZero(const WordType *src, unsigned parts) {
2246
232k
  for (unsigned i = 0; i < parts; 
i++78.9k
)
2247
153k
    if (src[i])
2248
74.8k
      return false;
2249
153k
2250
153k
  
return true78.5k
;
2251
153k
}
2252
2253
/* Extract the given bit of a bignum; returns 0 or 1.  */
2254
1.60M
int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
2255
1.60M
  return (parts[whichWord(bit)] & maskBit(bit)) != 0;
2256
1.60M
}
2257
2258
/* Set the given bit of a bignum. */
2259
5.02M
void APInt::tcSetBit(WordType *parts, unsigned bit) {
2260
5.02M
  parts[whichWord(bit)] |= maskBit(bit);
2261
5.02M
}
2262
2263
/* Clears the given bit of a bignum. */
2264
2.06k
void APInt::tcClearBit(WordType *parts, unsigned bit) {
2265
2.06k
  parts[whichWord(bit)] &= ~maskBit(bit);
2266
2.06k
}
2267
2268
/* Returns the bit number of the least significant set bit of a
2269
   number.  If the input number has no bits set -1U is returned.  */
2270
2.24M
unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
2271
2.32M
  for (unsigned i = 0; i < n; 
i++77.6k
) {
2272
2.32M
    if (parts[i] != 0) {
2273
2.24M
      unsigned lsb = partLSB(parts[i]);
2274
2.24M
2275
2.24M
      return lsb + i * APINT_BITS_PER_WORD;
2276
2.24M
    }
2277
2.32M
  }
2278
2.24M
2279
2.24M
  
return -1U0
;
2280
2.24M
}
2281
2282
/* Returns the bit number of the most significant set bit of a number.
2283
   If the input number has no bits set -1U is returned.  */
2284
5.66M
unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
2285
5.74M
  do {
2286
5.74M
    --n;
2287
5.74M
2288
5.74M
    if (parts[n] != 0) {
2289
5.64M
      unsigned msb = partMSB(parts[n]);
2290
5.64M
2291
5.64M
      return msb + n * APINT_BITS_PER_WORD;
2292
5.64M
    }
2293
100k
  } while (n);
2294
5.66M
2295
5.66M
  
return -1U26.2k
;
2296
5.66M
}
2297
2298
/* Copy the bit vector of width srcBITS from SRC, starting at bit
2299
   srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2300
   the least significant bit of DST.  All high bits above srcBITS in
2301
   DST are zero-filled.  */
2302
void
2303
APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
2304
1.01M
                 unsigned srcBits, unsigned srcLSB) {
2305
1.01M
  unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
2306
1.01M
  assert(dstParts <= dstCount);
2307
1.01M
2308
1.01M
  unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
2309
1.01M
  tcAssign (dst, src + firstSrcPart, dstParts);
2310
1.01M
2311
1.01M
  unsigned shift = srcLSB % APINT_BITS_PER_WORD;
2312
1.01M
  tcShiftRight (dst, dstParts, shift);
2313
1.01M
2314
1.01M
  /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2315
1.01M
     in DST.  If this is less that srcBits, append the rest, else
2316
1.01M
     clear the high bits.  */
2317
1.01M
  unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
2318
1.01M
  if (n < srcBits) {
2319
14.9k
    WordType mask = lowBitMask (srcBits - n);
2320
14.9k
    dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2321
14.9k
                          << n % APINT_BITS_PER_WORD);
2322
1.00M
  } else if (n > srcBits) {
2323
993k
    if (srcBits % APINT_BITS_PER_WORD)
2324
991k
      dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
2325
993k
  }
2326
1.01M
2327
1.01M
  /* Clear high parts.  */
2328
1.07M
  while (dstParts < dstCount)
2329
63.5k
    dst[dstParts++] = 0;
2330
1.01M
}
2331
2332
/* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2333
APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
2334
2.44M
                             WordType c, unsigned parts) {
2335
2.44M
  assert(c <= 1);
2336
2.44M
2337
7.32M
  for (unsigned i = 0; i < parts; 
i++4.87M
) {
2338
4.87M
    WordType l = dst[i];
2339
4.87M
    if (c) {
2340
229k
      dst[i] += rhs[i] + 1;
2341
229k
      c = (dst[i] <= l);
2342
4.64M
    } else {
2343
4.64M
      dst[i] += rhs[i];
2344
4.64M
      c = (dst[i] < l);
2345
4.64M
    }
2346
4.87M
  }
2347
2.44M
2348
2.44M
  return c;
2349
2.44M
}
2350
2351
/// This function adds a single "word" integer, src, to the multiple
2352
/// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2353
/// 1 is returned if there is a carry out, otherwise 0 is returned.
2354
/// @returns the carry of the addition.
2355
APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
2356
15.0M
                                 unsigned parts) {
2357
19.3M
  for (unsigned i = 0; i < parts; 
++i4.31M
) {
2358
18.6M
    dst[i] += src;
2359
18.6M
    if (dst[i] >= src)
2360
14.3M
      return 0; // No need to carry so exit early.
2361
4.31M
    src = 1; // Carry one to next digit.
2362
4.31M
  }
2363
15.0M
2364
15.0M
  
return 1716k
;
2365
15.0M
}
2366
2367
/* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2368
APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
2369
8.70M
                                  WordType c, unsigned parts) {
2370
8.70M
  assert(c <= 1);
2371
8.70M
2372
21.1M
  for (unsigned i = 0; i < parts; 
i++12.4M
) {
2373
12.4M
    WordType l = dst[i];
2374
12.4M
    if (c) {
2375
978k
      dst[i] -= rhs[i] + 1;
2376
978k
      c = (dst[i] >= l);
2377
11.4M
    } else {
2378
11.4M
      dst[i] -= rhs[i];
2379
11.4M
      c = (dst[i] > l);
2380
11.4M
    }
2381
12.4M
  }
2382
8.70M
2383
8.70M
  return c;
2384
8.70M
}
2385
2386
/// This function subtracts a single "word" (64-bit word), src, from
2387
/// the multi-word integer array, dst[], propagating the borrowed 1 value until
2388
/// no further borrowing is needed or it runs out of "words" in dst.  The result
2389
/// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2390
/// exhausted. In other words, if src > dst then this function returns 1,
2391
/// otherwise 0.
2392
/// @returns the borrow out of the subtraction
2393
APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
2394
4.54M
                                      unsigned parts) {
2395
5.09M
  for (unsigned i = 0; i < parts; 
++i552k
) {
2396
5.04M
    WordType Dst = dst[i];
2397
5.04M
    dst[i] -= src;
2398
5.04M
    if (src <= Dst)
2399
4.48M
      return 0; // No need to borrow so exit early.
2400
552k
    src = 1; // We have to "borrow 1" from next "word"
2401
552k
  }
2402
4.54M
2403
4.54M
  
return 155.7k
;
2404
4.54M
}
2405
2406
/* Negate a bignum in-place.  */
2407
1.16k
void APInt::tcNegate(WordType *dst, unsigned parts) {
2408
1.16k
  tcComplement(dst, parts);
2409
1.16k
  tcIncrement(dst, parts);
2410
1.16k
}
2411
2412
/*  DST += SRC * MULTIPLIER + CARRY   if add is true
2413
    DST  = SRC * MULTIPLIER + CARRY   if add is false
2414
2415
    Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2416
    they must start at the same point, i.e. DST == SRC.
2417
2418
    If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2419
    returned.  Otherwise DST is filled with the least significant
2420
    DSTPARTS parts of the result, and if all of the omitted higher
2421
    parts were zero return zero, otherwise overflow occurred and
2422
    return one.  */
2423
int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
2424
                          WordType multiplier, WordType carry,
2425
                          unsigned srcParts, unsigned dstParts,
2426
71.1M
                          bool add) {
2427
71.1M
  /* Otherwise our writes of DST kill our later reads of SRC.  */
2428
71.1M
  assert(dst <= src || dst >= src + srcParts);
2429
71.1M
  assert(dstParts <= srcParts + 1);
2430
71.1M
2431
71.1M
  /* N loops; minimum of dstParts and srcParts.  */
2432
71.1M
  unsigned n = std::min(dstParts, srcParts);
2433
71.1M
2434
194M
  for (unsigned i = 0; i < n; 
i++123M
) {
2435
123M
    WordType low, mid, high, srcPart;
2436
123M
2437
123M
      /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2438
123M
2439
123M
         This cannot overflow, because
2440
123M
2441
123M
         (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2442
123M
2443
123M
         which is less than n^2.  */
2444
123M
2445
123M
    srcPart = src[i];
2446
123M
2447
123M
    if (multiplier == 0 || 
srcPart == 077.1M
) {
2448
97.9M
      low = carry;
2449
97.9M
      high = 0;
2450
97.9M
    } else {
2451
25.7M
      low = lowHalf(srcPart) * lowHalf(multiplier);
2452
25.7M
      high = highHalf(srcPart) * highHalf(multiplier);
2453
25.7M
2454
25.7M
      mid = lowHalf(srcPart) * highHalf(multiplier);
2455
25.7M
      high += highHalf(mid);
2456
25.7M
      mid <<= APINT_BITS_PER_WORD / 2;
2457
25.7M
      if (low + mid < low)
2458
4.35M
        high++;
2459
25.7M
      low += mid;
2460
25.7M
2461
25.7M
      mid = highHalf(srcPart) * lowHalf(multiplier);
2462
25.7M
      high += highHalf(mid);
2463
25.7M
      mid <<= APINT_BITS_PER_WORD / 2;
2464
25.7M
      if (low + mid < low)
2465
8.28M
        high++;
2466
25.7M
      low += mid;
2467
25.7M
2468
25.7M
      /* Now add carry.  */
2469
25.7M
      if (low + carry < low)
2470
2.02M
        high++;
2471
25.7M
      low += carry;
2472
25.7M
    }
2473
123M
2474
123M
    if (add) {
2475
123M
      /* And now DST[i], and store the new low part there.  */
2476
123M
      if (low + dst[i] < low)
2477
5.40M
        high++;
2478
123M
      dst[i] += low;
2479
123M
    } else
2480
88.9k
      dst[i] = low;
2481
123M
2482
123M
    carry = high;
2483
123M
  }
2484
71.1M
2485
71.1M
  if (srcParts < dstParts) {
2486
665k
    /* Full multiplication, there is no overflow.  */
2487
665k
    assert(srcParts + 1 == dstParts);
2488
665k
    dst[srcParts] = carry;
2489
665k
    return 0;
2490
665k
  }
2491
70.4M
2492
70.4M
  /* We overflowed if there is carry.  */
2493
70.4M
  if (carry)
2494
6.35M
    return 1;
2495
64.1M
2496
64.1M
  /* We would overflow if any significant unwritten parts would be
2497
64.1M
     non-zero.  This is true if any remaining src parts are non-zero
2498
64.1M
     and the multiplier is non-zero.  */
2499
64.1M
  if (multiplier)
2500
29.0M
    
for (unsigned i = dstParts; 28.4M
i < srcParts;
i++623k
)
2501
626k
      if (src[i])
2502
3.41k
        return 1;
2503
64.1M
2504
64.1M
  /* We fitted in the narrow destination.  */
2505
64.1M
  
return 064.1M
;
2506
64.1M
}
2507
2508
/* DST = LHS * RHS, where DST has the same width as the operands and
2509
   is filled with the least significant parts of the result.  Returns
2510
   one if overflow occurred, otherwise zero.  DST must be disjoint
2511
   from both operands.  */
2512
int APInt::tcMultiply(WordType *dst, const WordType *lhs,
2513
34.8M
                      const WordType *rhs, unsigned parts) {
2514
34.8M
  assert(dst != lhs && dst != rhs);
2515
34.8M
2516
34.8M
  int overflow = 0;
2517
34.8M
  tcSet(dst, 0, parts);
2518
34.8M
2519
105M
  for (unsigned i = 0; i < parts; 
i++70.4M
)
2520
70.4M
    overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2521
70.4M
                               parts - i, true);
2522
34.8M
2523
34.8M
  return overflow;
2524
34.8M
}
2525
2526
/// DST = LHS * RHS, where DST has width the sum of the widths of the
2527
/// operands. No overflow occurs. DST must be disjoint from both operands.
2528
void APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
2529
                           const WordType *rhs, unsigned lhsParts,
2530
191k
                           unsigned rhsParts) {
2531
191k
  /* Put the narrower number on the LHS for less loops below.  */
2532
191k
  if (lhsParts > rhsParts)
2533
0
    return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2534
191k
2535
191k
  assert(dst != lhs && dst != rhs);
2536
191k
2537
191k
  tcSet(dst, 0, rhsParts);
2538
191k
2539
560k
  for (unsigned i = 0; i < lhsParts; 
i++368k
)
2540
368k
    tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
2541
191k
}
2542
2543
/* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2544
   Otherwise set LHS to LHS / RHS with the fractional part discarded,
2545
   set REMAINDER to the remainder, return zero.  i.e.
2546
2547
   OLD_LHS = RHS * LHS + REMAINDER
2548
2549
   SCRATCH is a bignum of the same size as the operands and result for
2550
   use by the routine; its contents need not be initialized and are
2551
   destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2552
*/
2553
int APInt::tcDivide(WordType *lhs, const WordType *rhs,
2554
                    WordType *remainder, WordType *srhs,
2555
0
                    unsigned parts) {
2556
0
  assert(lhs != remainder && lhs != srhs && remainder != srhs);
2557
0
2558
0
  unsigned shiftCount = tcMSB(rhs, parts) + 1;
2559
0
  if (shiftCount == 0)
2560
0
    return true;
2561
0
2562
0
  shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;
2563
0
  unsigned n = shiftCount / APINT_BITS_PER_WORD;
2564
0
  WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD);
2565
0
2566
0
  tcAssign(srhs, rhs, parts);
2567
0
  tcShiftLeft(srhs, parts, shiftCount);
2568
0
  tcAssign(remainder, lhs, parts);
2569
0
  tcSet(lhs, 0, parts);
2570
0
2571
0
  /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2572
0
     the total.  */
2573
0
  for (;;) {
2574
0
    int compare = tcCompare(remainder, srhs, parts);
2575
0
    if (compare >= 0) {
2576
0
      tcSubtract(remainder, srhs, 0, parts);
2577
0
      lhs[n] |= mask;
2578
0
    }
2579
0
2580
0
    if (shiftCount == 0)
2581
0
      break;
2582
0
    shiftCount--;
2583
0
    tcShiftRight(srhs, parts, 1);
2584
0
    if ((mask >>= 1) == 0) {
2585
0
      mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1);
2586
0
      n--;
2587
0
    }
2588
0
  }
2589
0
2590
0
  return false;
2591
0
}
2592
2593
/// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
2594
/// no restrictions on Count.
2595
24.1M
void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {
2596
24.1M
  // Don't bother performing a no-op shift.
2597
24.1M
  if (!Count)
2598
3.68k
    return;
2599
24.1M
2600
24.1M
  // WordShift is the inter-part shift; BitShift is the intra-part shift.
2601
24.1M
  unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2602
24.1M
  unsigned BitShift = Count % APINT_BITS_PER_WORD;
2603
24.1M
2604
24.1M
  // Fastpath for moving by whole words.
2605
24.1M
  if (BitShift == 0) {
2606
588k
    std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE);
2607
23.5M
  } else {
2608
70.5M
    while (Words-- > WordShift) {
2609
47.0M
      Dst[Words] = Dst[Words - WordShift] << BitShift;
2610
47.0M
      if (Words > WordShift)
2611
23.4M
        Dst[Words] |=
2612
23.4M
          Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift);
2613
47.0M
    }
2614
23.5M
  }
2615
24.1M
2616
24.1M
  // Fill in the remainder with 0s.
2617
24.1M
  std::memset(Dst, 0, WordShift * APINT_WORD_SIZE);
2618
24.1M
}
2619
2620
/// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2621
/// are no restrictions on Count.
2622
5.63M
void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
2623
5.63M
  // Don't bother performing a no-op shift.
2624
5.63M
  if (!Count)
2625
654k
    return;
2626
4.98M
2627
4.98M
  // WordShift is the inter-part shift; BitShift is the intra-part shift.
2628
4.98M
  unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2629
4.98M
  unsigned BitShift = Count % APINT_BITS_PER_WORD;
2630
4.98M
2631
4.98M
  unsigned WordsToMove = Words - WordShift;
2632
4.98M
  // Fastpath for moving by whole words.
2633
4.98M
  if (BitShift == 0) {
2634
1.25M
    std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE);
2635
3.72M
  } else {
2636
23.9M
    for (unsigned i = 0; i != WordsToMove; 
++i20.2M
) {
2637
20.2M
      Dst[i] = Dst[i + WordShift] >> BitShift;
2638
20.2M
      if (i + 1 != WordsToMove)
2639
16.5M
        Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift);
2640
20.2M
    }
2641
3.72M
  }
2642
4.98M
2643
4.98M
  // Fill in the remainder with 0s.
2644
4.98M
  std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
2645
4.98M
}
2646
2647
/* Bitwise and of two bignums.  */
2648
2.83M
void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) {
2649
8.55M
  for (unsigned i = 0; i < parts; 
i++5.71M
)
2650
5.71M
    dst[i] &= rhs[i];
2651
2.83M
}
2652
2653
/* Bitwise inclusive or of two bignums.  */
2654
7.53M
void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) {
2655
22.7M
  for (unsigned i = 0; i < parts; 
i++15.2M
)
2656
15.2M
    dst[i] |= rhs[i];
2657
7.53M
}
2658
2659
/* Bitwise exclusive or of two bignums.  */
2660
204k
void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) {
2661
617k
  for (unsigned i = 0; i < parts; 
i++412k
)
2662
412k
    dst[i] ^= rhs[i];
2663
204k
}
2664
2665
/* Complement a bignum in-place.  */
2666
5.66M
void APInt::tcComplement(WordType *dst, unsigned parts) {
2667
17.0M
  for (unsigned i = 0; i < parts; 
i++11.3M
)
2668
11.3M
    dst[i] = ~dst[i];
2669
5.66M
}
2670
2671
/* Comparison (unsigned) of two bignums.  */
2672
int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
2673
53.5M
                     unsigned parts) {
2674
77.3M
  while (parts) {
2675
71.7M
    parts--;
2676
71.7M
    if (lhs[parts] != rhs[parts])
2677
47.9M
      return (lhs[parts] > rhs[parts]) ? 
118.9M
:
-128.9M
;
2678
71.7M
  }
2679
53.5M
2680
53.5M
  
return 05.60M
;
2681
53.5M
}
2682
2683
/* Set the least significant BITS bits of a bignum, clear the
2684
   rest.  */
2685
void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts,
2686
255
                                      unsigned bits) {
2687
255
  unsigned i = 0;
2688
256
  while (bits > APINT_BITS_PER_WORD) {
2689
1
    dst[i++] = ~(WordType) 0;
2690
1
    bits -= APINT_BITS_PER_WORD;
2691
1
  }
2692
255
2693
255
  if (bits)
2694
172
    dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits);
2695
255
2696
382
  while (i < parts)
2697
127
    dst[i++] = 0;
2698
255
}
2699
2700
APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B,
2701
40.7M
                                   APInt::Rounding RM) {
2702
40.7M
  // Currently udivrem always rounds down.
2703
40.7M
  switch (RM) {
2704
40.7M
  case APInt::Rounding::DOWN:
2705
20.4M
  case APInt::Rounding::TOWARD_ZERO:
2706
20.4M
    return A.udiv(B);
2707
20.4M
  case APInt::Rounding::UP: {
2708
20.3M
    APInt Quo, Rem;
2709
20.3M
    APInt::udivrem(A, B, Quo, Rem);
2710
20.3M
    if (Rem == 0)
2711
20.2M
      return Quo;
2712
63.5k
    return Quo + 1;
2713
63.5k
  }
2714
0
  }
2715
0
  llvm_unreachable("Unknown APInt::Rounding enum");
2716
0
}
2717
2718
APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B,
2719
20.8M
                                   APInt::Rounding RM) {
2720
20.8M
  switch (RM) {
2721
20.8M
  case APInt::Rounding::DOWN:
2722
20.8M
  case APInt::Rounding::UP: {
2723
20.8M
    APInt Quo, Rem;
2724
20.8M
    APInt::sdivrem(A, B, Quo, Rem);
2725
20.8M
    if (Rem == 0)
2726
6.99M
      return Quo;
2727
13.8M
    // This algorithm deals with arbitrary rounding mode used by sdivrem.
2728
13.8M
    // We want to check whether the non-integer part of the mathematical value
2729
13.8M
    // is negative or not. If the non-integer part is negative, we need to round
2730
13.8M
    // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's
2731
13.8M
    // already rounded down.
2732
13.8M
    if (RM == APInt::Rounding::DOWN) {
2733
8.29M
      if (Rem.isNegative() != B.isNegative())
2734
0
        return Quo - 1;
2735
8.29M
      return Quo;
2736
8.29M
    }
2737
5.52M
    if (Rem.isNegative() != B.isNegative())
2738
5.52M
      return Quo;
2739
0
    return Quo + 1;
2740
0
  }
2741
0
  // Currently sdiv rounds twards zero.
2742
255
  case APInt::Rounding::TOWARD_ZERO:
2743
255
    return A.sdiv(B);
2744
0
  }
2745
0
  llvm_unreachable("Unknown APInt::Rounding enum");
2746
0
}
2747
2748
Optional<APInt>
2749
llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2750
294k
                                           unsigned RangeWidth) {
2751
294k
  unsigned CoeffWidth = A.getBitWidth();
2752
294k
  assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth());
2753
294k
  assert(RangeWidth <= CoeffWidth &&
2754
294k
         "Value range width should be less than coefficient width");
2755
294k
  assert(RangeWidth > 1 && "Value range bit width should be > 1");
2756
294k
2757
294k
  LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B
2758
294k
                    << "x + " << C << ", rw:" << RangeWidth << '\n');
2759
294k
2760
294k
  // Identify 0 as a (non)solution immediately.
2761
294k
  if (C.sextOrTrunc(RangeWidth).isNullValue() ) {
2762
5.33k
    LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n");
2763
5.33k
    return APInt(CoeffWidth, 0);
2764
5.33k
  }
2765
288k
2766
288k
  // The result of APInt arithmetic has the same bit width as the operands,
2767
288k
  // so it can actually lose high bits. A product of two n-bit integers needs
2768
288k
  // 2n-1 bits to represent the full value.
2769
288k
  // The operation done below (on quadratic coefficients) that can produce
2770
288k
  // the largest value is the evaluation of the equation during bisection,
2771
288k
  // which needs 3 times the bitwidth of the coefficient, so the total number
2772
288k
  // of required bits is 3n.
2773
288k
  //
2774
288k
  // The purpose of this extension is to simulate the set Z of all integers,
2775
288k
  // where n+1 > n for all n in Z. In Z it makes sense to talk about positive
2776
288k
  // and negative numbers (not so much in a modulo arithmetic). The method
2777
288k
  // used to solve the equation is based on the standard formula for real
2778
288k
  // numbers, and uses the concepts of "positive" and "negative" with their
2779
288k
  // usual meanings.
2780
288k
  CoeffWidth *= 3;
2781
288k
  A = A.sext(CoeffWidth);
2782
288k
  B = B.sext(CoeffWidth);
2783
288k
  C = C.sext(CoeffWidth);
2784
288k
2785
288k
  // Make A > 0 for simplicity. Negate cannot overflow at this point because
2786
288k
  // the bit width has increased.
2787
288k
  if (A.isNegative()) {
2788
147k
    A.negate();
2789
147k
    B.negate();
2790
147k
    C.negate();
2791
147k
  }
2792
288k
2793
288k
  // Solving an equation q(x) = 0 with coefficients in modular arithmetic
2794
288k
  // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,
2795
288k
  // and R = 2^BitWidth.
2796
288k
  // Since we're trying not only to find exact solutions, but also values
2797
288k
  // that "wrap around", such a set will always have a solution, i.e. an x
2798
288k
  // that satisfies at least one of the equations, or such that |q(x)|
2799
288k
  // exceeds kR, while |q(x-1)| for the same k does not.
2800
288k
  //
2801
288k
  // We need to find a value k, such that Ax^2 + Bx + C = kR will have a
2802
288k
  // positive solution n (in the above sense), and also such that the n
2803
288k
  // will be the least among all solutions corresponding to k = 0, 1, ...
2804
288k
  // (more precisely, the least element in the set
2805
288k
  //   { n(k) | k is such that a solution n(k) exists }).
2806
288k
  //
2807
288k
  // Consider the parabola (over real numbers) that corresponds to the
2808
288k
  // quadratic equation. Since A > 0, the arms of the parabola will point
2809
288k
  // up. Picking different values of k will shift it up and down by R.
2810
288k
  //
2811
288k
  // We want to shift the parabola in such a way as to reduce the problem
2812
288k
  // of solving q(x) = kR to solving shifted_q(x) = 0.
2813
288k
  // (The interesting solutions are the ceilings of the real number
2814
288k
  // solutions.)
2815
288k
  APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth);
2816
288k
  APInt TwoA = 2 * A;
2817
288k
  APInt SqrB = B * B;
2818
288k
  bool PickLow;
2819
288k
2820
288k
  auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt {
2821
161k
    assert(A.isStrictlyPositive());
2822
161k
    APInt T = V.abs().urem(A);
2823
161k
    if (T.isNullValue())
2824
1.93k
      return V;
2825
159k
    return V.isNegative() ? 
V+T102k
:
V+(A-T)57.2k
;
2826
159k
  };
2827
288k
2828
288k
  // The vertex of the parabola is at -B/2A, but since A > 0, it's negative
2829
288k
  // iff B is positive.
2830
288k
  if (B.isNonNegative()) {
2831
147k
    // If B >= 0, the vertex it at a negative location (or at 0), so in
2832
147k
    // order to have a non-negative solution we need to pick k that makes
2833
147k
    // C-kR negative. To satisfy all the requirements for the solution
2834
147k
    // that we are looking for, it needs to be closest to 0 of all k.
2835
147k
    C = C.srem(R);
2836
147k
    if (C.isStrictlyPositive())
2837
73.6k
      C -= R;
2838
147k
    // Pick the greater solution.
2839
147k
    PickLow = false;
2840
147k
  } else {
2841
141k
    // If B < 0, the vertex is at a positive location. For any solution
2842
141k
    // to exist, the discriminant must be non-negative. This means that
2843
141k
    // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a
2844
141k
    // lower bound on values of k: kR >= C - B^2/4A.
2845
141k
    APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0.
2846
141k
    // Round LowkR up (towards +inf) to the nearest kR.
2847
141k
    LowkR = RoundUp(LowkR, R);
2848
141k
2849
141k
    // If there exists k meeting the condition above, and such that
2850
141k
    // C-kR > 0, there will be two positive real number solutions of
2851
141k
    // q(x) = kR. Out of all such values of k, pick the one that makes
2852
141k
    // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).
2853
141k
    // In other words, find maximum k such that LowkR <= kR < C.
2854
141k
    if (C.sgt(LowkR)) {
2855
20.1k
      // If LowkR < C, then such a k is guaranteed to exist because
2856
20.1k
      // LowkR itself is a multiple of R.
2857
20.1k
      C -= -RoundUp(-C, R);      // C = C - RoundDown(C, R)
2858
20.1k
      // Pick the smaller solution.
2859
20.1k
      PickLow = true;
2860
121k
    } else {
2861
121k
      // If C-kR < 0 for all potential k's, it means that one solution
2862
121k
      // will be negative, while the other will be positive. The positive
2863
121k
      // solution will shift towards 0 if the parabola is moved up.
2864
121k
      // Pick the kR closest to the lower bound (i.e. make C-kR closest
2865
121k
      // to 0, or in other words, out of all parabolas that have solutions,
2866
121k
      // pick the one that is the farthest "up").
2867
121k
      // Since LowkR is itself a multiple of R, simply take C-LowkR.
2868
121k
      C -= LowkR;
2869
121k
      // Pick the greater solution.
2870
121k
      PickLow = false;
2871
121k
    }
2872
141k
  }
2873
288k
2874
288k
  LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + "
2875
288k
                    << B << "x + " << C << ", rw:" << RangeWidth << '\n');
2876
288k
2877
288k
  APInt D = SqrB - 4*A*C;
2878
288k
  assert(D.isNonNegative() && "Negative discriminant");
2879
288k
  APInt SQ = D.sqrt();
2880
288k
2881
288k
  APInt Q = SQ * SQ;
2882
288k
  bool InexactSQ = Q != D;
2883
288k
  // The calculated SQ may actually be greater than the exact (non-integer)
2884
288k
  // value. If that's the case, decremement SQ to get a value that is lower.
2885
288k
  if (Q.sgt(D))
2886
133k
    SQ -= 1;
2887
288k
2888
288k
  APInt X;
2889
288k
  APInt Rem;
2890
288k
2891
288k
  // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact.
2892
288k
  // When using the quadratic formula directly, the calculated low root
2893
288k
  // may be greater than the exact one, since we would be subtracting SQ.
2894
288k
  // To make sure that the calculated root is not greater than the exact
2895
288k
  // one, subtract SQ+1 when calculating the low root (for inexact value
2896
288k
  // of SQ).
2897
288k
  if (PickLow)
2898
20.1k
    APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem);
2899
268k
  else
2900
268k
    APInt::sdivrem(-B + SQ, TwoA, X, Rem);
2901
288k
2902
288k
  // The updated coefficients should be such that the (exact) solution is
2903
288k
  // positive. Since APInt division rounds towards 0, the calculated one
2904
288k
  // can be 0, but cannot be negative.
2905
288k
  assert(X.isNonNegative() && "Solution should be non-negative");
2906
288k
2907
288k
  if (!InexactSQ && 
Rem.isNullValue()21.2k
) {
2908
8.46k
    LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n');
2909
8.46k
    return X;
2910
8.46k
  }
2911
280k
2912
280k
  assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D");
2913
280k
  // The exact value of the square root of D should be between SQ and SQ+1.
2914
280k
  // This implies that the solution should be between that corresponding to
2915
280k
  // SQ (i.e. X) and that corresponding to SQ+1.
2916
280k
  //
2917
280k
  // The calculated X cannot be greater than the exact (real) solution.
2918
280k
  // Actually it must be strictly less than the exact solution, while
2919
280k
  // X+1 will be greater than or equal to it.
2920
280k
2921
280k
  APInt VX = (A*X + B)*X + C;
2922
280k
  APInt VY = VX + TwoA*X + A + B;
2923
280k
  bool SignChange = VX.isNegative() != VY.isNegative() ||
2924
280k
                    
VX.isNullValue() != VY.isNullValue()3.34k
;
2925
280k
  // If the sign did not change between X and X+1, X is not a valid solution.
2926
280k
  // This could happen when the actual (exact) roots don't have an integer
2927
280k
  // between them, so they would both be contained between X and X+1.
2928
280k
  if (!SignChange) {
2929
2.62k
    LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n");
2930
2.62k
    return None;
2931
2.62k
  }
2932
277k
2933
277k
  X += 1;
2934
277k
  LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n');
2935
277k
  return X;
2936
277k
}
2937
2938
/// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
2939
/// with the integer held in IntVal.
2940
void llvm::StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
2941
486
                            unsigned StoreBytes) {
2942
486
  assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!");
2943
486
  const uint8_t *Src = (const uint8_t *)IntVal.getRawData();
2944
486
2945
486
  if (sys::IsLittleEndianHost) {
2946
486
    // Little-endian host - the source is ordered from LSB to MSB.  Order the
2947
486
    // destination from LSB to MSB: Do a straight copy.
2948
486
    memcpy(Dst, Src, StoreBytes);
2949
486
  } else {
2950
0
    // Big-endian host - the source is an array of 64 bit words ordered from
2951
0
    // LSW to MSW.  Each word is ordered from MSB to LSB.  Order the destination
2952
0
    // from MSB to LSB: Reverse the word order, but not the bytes in a word.
2953
0
    while (StoreBytes > sizeof(uint64_t)) {
2954
0
      StoreBytes -= sizeof(uint64_t);
2955
0
      // May not be aligned so use memcpy.
2956
0
      memcpy(Dst + StoreBytes, Src, sizeof(uint64_t));
2957
0
      Src += sizeof(uint64_t);
2958
0
    }
2959
0
2960
0
    memcpy(Dst, Src + sizeof(uint64_t) - StoreBytes, StoreBytes);
2961
0
  }
2962
486
}
2963
2964
/// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
2965
/// from Src into IntVal, which is assumed to be wide enough and to hold zero.
2966
482
void llvm::LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) {
2967
482
  assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!");
2968
482
  uint8_t *Dst = reinterpret_cast<uint8_t *>(
2969
482
                   const_cast<uint64_t *>(IntVal.getRawData()));
2970
482
2971
482
  if (sys::IsLittleEndianHost)
2972
482
    // Little-endian host - the destination must be ordered from LSB to MSB.
2973
482
    // The source is ordered from LSB to MSB: Do a straight copy.
2974
482
    memcpy(Dst, Src, LoadBytes);
2975
0
  else {
2976
0
    // Big-endian - the destination is an array of 64 bit words ordered from
2977
0
    // LSW to MSW.  Each word must be ordered from MSB to LSB.  The source is
2978
0
    // ordered from MSB to LSB: Reverse the word order, but not the bytes in
2979
0
    // a word.
2980
0
    while (LoadBytes > sizeof(uint64_t)) {
2981
0
      LoadBytes -= sizeof(uint64_t);
2982
0
      // May not be aligned so use memcpy.
2983
0
      memcpy(Dst, Src + LoadBytes, sizeof(uint64_t));
2984
0
      Dst += sizeof(uint64_t);
2985
0
    }
2986
0
2987
0
    memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes);
2988
0
  }
2989
482
}