/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Support/APInt.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- APInt.cpp - Implement APInt class ---------------------------------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // This file implements a class to represent arbitrary precision integer |
11 | | // constant values and provide a variety of arithmetic operations on them. |
12 | | // |
13 | | //===----------------------------------------------------------------------===// |
14 | | |
15 | | #include "llvm/ADT/APInt.h" |
16 | | #include "llvm/ADT/ArrayRef.h" |
17 | | #include "llvm/ADT/FoldingSet.h" |
18 | | #include "llvm/ADT/Hashing.h" |
19 | | #include "llvm/ADT/SmallString.h" |
20 | | #include "llvm/ADT/StringRef.h" |
21 | | #include "llvm/Support/Debug.h" |
22 | | #include "llvm/Support/ErrorHandling.h" |
23 | | #include "llvm/Support/MathExtras.h" |
24 | | #include "llvm/Support/raw_ostream.h" |
25 | | #include <climits> |
26 | | #include <cmath> |
27 | | #include <cstdlib> |
28 | | #include <cstring> |
29 | | using namespace llvm; |
30 | | |
31 | | #define DEBUG_TYPE "apint" |
32 | | |
33 | | /// A utility function for allocating memory, checking for allocation failures, |
34 | | /// and ensuring the contents are zeroed. |
35 | 19.4M | inline static uint64_t* getClearedMemory(unsigned numWords) { |
36 | 19.4M | uint64_t * result = new uint64_t[numWords]; |
37 | 19.4M | assert(result && "APInt memory allocation fails!"); |
38 | 19.4M | memset(result, 0, numWords * sizeof(uint64_t)); |
39 | 19.4M | return result; |
40 | 19.4M | } |
41 | | |
42 | | /// A utility function for allocating memory and checking for allocation |
43 | | /// failure. The content is not zeroed. |
44 | 106M | inline static uint64_t* getMemory(unsigned numWords) { |
45 | 106M | uint64_t * result = new uint64_t[numWords]; |
46 | 106M | assert(result && "APInt memory allocation fails!"); |
47 | 106M | return result; |
48 | 106M | } |
49 | | |
50 | | /// A utility function that converts a character to a digit. |
51 | 3.21M | inline static unsigned getDigit(char cdigit, uint8_t radix) { |
52 | 3.21M | unsigned r; |
53 | 3.21M | |
54 | 3.21M | if (radix == 16 || 3.21M radix == 363.20M ) { |
55 | 5.33k | r = cdigit - '0'; |
56 | 5.33k | if (r <= 9) |
57 | 4.16k | return r; |
58 | 1.16k | |
59 | 1.16k | r = cdigit - 'A'; |
60 | 1.16k | if (r <= radix - 11U) |
61 | 474 | return r + 10; |
62 | 688 | |
63 | 688 | r = cdigit - 'a'; |
64 | 688 | if (r <= radix - 11U) |
65 | 688 | return r + 10; |
66 | 0 |
|
67 | 0 | radix = 10; |
68 | 0 | } |
69 | 3.21M | |
70 | 3.20M | r = cdigit - '0'; |
71 | 3.20M | if (r < radix) |
72 | 3.20M | return r; |
73 | 0 |
|
74 | 0 | return -1U; |
75 | 0 | } |
76 | | |
77 | | |
78 | 19.4M | void APInt::initSlowCase(uint64_t val, bool isSigned) { |
79 | 19.4M | U.pVal = getClearedMemory(getNumWords()); |
80 | 19.4M | U.pVal[0] = val; |
81 | 19.4M | if (isSigned && 19.4M int64_t(val) < 05.42M ) |
82 | 10.1M | for (unsigned i = 1; 5.05M i < getNumWords()10.1M ; ++i5.06M ) |
83 | 5.06M | U.pVal[i] = WORD_MAX; |
84 | 19.4M | clearUnusedBits(); |
85 | 19.4M | } |
86 | | |
87 | 62.7M | void APInt::initSlowCase(const APInt& that) { |
88 | 62.7M | U.pVal = getMemory(getNumWords()); |
89 | 62.7M | memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE); |
90 | 62.7M | } |
91 | | |
92 | 184k | void APInt::initFromArray(ArrayRef<uint64_t> bigVal) { |
93 | 184k | assert(BitWidth && "Bitwidth too small"); |
94 | 184k | assert(bigVal.data() && "Null pointer detected!"); |
95 | 184k | if (isSingleWord()) |
96 | 157k | U.VAL = bigVal[0]; |
97 | 27.3k | else { |
98 | 27.3k | // Get memory, cleared to 0 |
99 | 27.3k | U.pVal = getClearedMemory(getNumWords()); |
100 | 27.3k | // Calculate the number of words to copy |
101 | 27.3k | unsigned words = std::min<unsigned>(bigVal.size(), getNumWords()); |
102 | 27.3k | // Copy the words from bigVal to pVal |
103 | 27.3k | memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE); |
104 | 27.3k | } |
105 | 184k | // Make sure unused high bits are cleared |
106 | 184k | clearUnusedBits(); |
107 | 184k | } |
108 | | |
109 | | APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal) |
110 | 156k | : BitWidth(numBits) { |
111 | 156k | initFromArray(bigVal); |
112 | 156k | } |
113 | | |
114 | | APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]) |
115 | 27.8k | : BitWidth(numBits) { |
116 | 27.8k | initFromArray(makeArrayRef(bigVal, numWords)); |
117 | 27.8k | } |
118 | | |
119 | | APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix) |
120 | 2.38M | : BitWidth(numbits) { |
121 | 2.38M | assert(BitWidth && "Bitwidth too small"); |
122 | 2.38M | fromString(numbits, Str, radix); |
123 | 2.38M | } |
124 | | |
125 | 592k | void APInt::reallocate(unsigned NewBitWidth) { |
126 | 592k | // If the number of words is the same we can just change the width and stop. |
127 | 592k | if (getNumWords() == getNumWords(NewBitWidth)592k ) { |
128 | 242k | BitWidth = NewBitWidth; |
129 | 242k | return; |
130 | 242k | } |
131 | 349k | |
132 | 349k | // If we have an allocation, delete it. |
133 | 349k | if (349k !isSingleWord()349k ) |
134 | 5.62k | delete [] U.pVal; |
135 | 349k | |
136 | 349k | // Update BitWidth. |
137 | 349k | BitWidth = NewBitWidth; |
138 | 349k | |
139 | 349k | // If we are supposed to have an allocation, create it. |
140 | 349k | if (!isSingleWord()) |
141 | 343k | U.pVal = getMemory(getNumWords()); |
142 | 592k | } |
143 | | |
144 | 536k | void APInt::AssignSlowCase(const APInt& RHS) { |
145 | 536k | // Don't do anything for X = X |
146 | 536k | if (this == &RHS) |
147 | 35.4k | return; |
148 | 501k | |
149 | 501k | // Adjust the bit width and handle allocations as necessary. |
150 | 501k | reallocate(RHS.getBitWidth()); |
151 | 501k | |
152 | 501k | // Copy the data. |
153 | 501k | if (isSingleWord()) |
154 | 5.62k | U.VAL = RHS.U.VAL; |
155 | 501k | else |
156 | 495k | memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE); |
157 | 536k | } |
158 | | |
159 | | /// This method 'profiles' an APInt for use with FoldingSet. |
160 | 2.23M | void APInt::Profile(FoldingSetNodeID& ID) const { |
161 | 2.23M | ID.AddInteger(BitWidth); |
162 | 2.23M | |
163 | 2.23M | if (isSingleWord()2.23M ) { |
164 | 2.23M | ID.AddInteger(U.VAL); |
165 | 2.23M | return; |
166 | 2.23M | } |
167 | 4 | |
168 | 4 | unsigned NumWords = getNumWords(); |
169 | 12 | for (unsigned i = 0; i < NumWords12 ; ++i8 ) |
170 | 8 | ID.AddInteger(U.pVal[i]); |
171 | 2.23M | } |
172 | | |
173 | | /// @brief Prefix increment operator. Increments the APInt by one. |
174 | 39.0M | APInt& APInt::operator++() { |
175 | 39.0M | if (isSingleWord()) |
176 | 37.6M | ++U.VAL; |
177 | 39.0M | else |
178 | 1.40M | tcIncrement(U.pVal, getNumWords()); |
179 | 39.0M | return clearUnusedBits(); |
180 | 39.0M | } |
181 | | |
182 | | /// @brief Prefix decrement operator. Decrements the APInt by one. |
183 | 878k | APInt& APInt::operator--() { |
184 | 878k | if (isSingleWord()) |
185 | 878k | --U.VAL; |
186 | 878k | else |
187 | 129 | tcDecrement(U.pVal, getNumWords()); |
188 | 878k | return clearUnusedBits(); |
189 | 878k | } |
190 | | |
191 | | /// Adds the RHS APint to this APInt. |
192 | | /// @returns this, after addition of RHS. |
193 | | /// @brief Addition assignment operator. |
194 | 281M | APInt& APInt::operator+=(const APInt& RHS) { |
195 | 281M | assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); |
196 | 281M | if (isSingleWord()) |
197 | 279M | U.VAL += RHS.U.VAL; |
198 | 281M | else |
199 | 1.69M | tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords()); |
200 | 281M | return clearUnusedBits(); |
201 | 281M | } |
202 | | |
203 | 500M | APInt& APInt::operator+=(uint64_t RHS) { |
204 | 500M | if (isSingleWord()) |
205 | 490M | U.VAL += RHS; |
206 | 500M | else |
207 | 10.0M | tcAddPart(U.pVal, RHS, getNumWords()); |
208 | 500M | return clearUnusedBits(); |
209 | 500M | } |
210 | | |
211 | | /// Subtracts the RHS APInt from this APInt |
212 | | /// @returns this, after subtraction |
213 | | /// @brief Subtraction assignment operator. |
214 | 141M | APInt& APInt::operator-=(const APInt& RHS) { |
215 | 141M | assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); |
216 | 141M | if (isSingleWord()) |
217 | 136M | U.VAL -= RHS.U.VAL; |
218 | 141M | else |
219 | 4.62M | tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords()); |
220 | 141M | return clearUnusedBits(); |
221 | 141M | } |
222 | | |
223 | 131M | APInt& APInt::operator-=(uint64_t RHS) { |
224 | 131M | if (isSingleWord()) |
225 | 127M | U.VAL -= RHS; |
226 | 131M | else |
227 | 4.20M | tcSubtractPart(U.pVal, RHS, getNumWords()); |
228 | 131M | return clearUnusedBits(); |
229 | 131M | } |
230 | | |
231 | 109M | APInt APInt::operator*(const APInt& RHS) const { |
232 | 109M | assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); |
233 | 109M | if (isSingleWord()) |
234 | 96.7M | return APInt(BitWidth, U.VAL * RHS.U.VAL); |
235 | 12.7M | |
236 | 12.7M | APInt Result(getMemory(getNumWords()), getBitWidth()); |
237 | 12.7M | |
238 | 12.7M | tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords()); |
239 | 12.7M | |
240 | 12.7M | Result.clearUnusedBits(); |
241 | 12.7M | return Result; |
242 | 12.7M | } |
243 | | |
244 | 2.24M | void APInt::AndAssignSlowCase(const APInt& RHS) { |
245 | 2.24M | tcAnd(U.pVal, RHS.U.pVal, getNumWords()); |
246 | 2.24M | } |
247 | | |
248 | 13.4M | void APInt::OrAssignSlowCase(const APInt& RHS) { |
249 | 13.4M | tcOr(U.pVal, RHS.U.pVal, getNumWords()); |
250 | 13.4M | } |
251 | | |
252 | 198k | void APInt::XorAssignSlowCase(const APInt& RHS) { |
253 | 198k | tcXor(U.pVal, RHS.U.pVal, getNumWords()); |
254 | 198k | } |
255 | | |
256 | 1.11M | APInt& APInt::operator*=(const APInt& RHS) { |
257 | 1.11M | assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); |
258 | 1.11M | *this = *this * RHS; |
259 | 1.11M | return *this; |
260 | 1.11M | } |
261 | | |
262 | 5.70M | APInt& APInt::operator*=(uint64_t RHS) { |
263 | 5.70M | if (isSingleWord()5.70M ) { |
264 | 5.68M | U.VAL *= RHS; |
265 | 5.70M | } else { |
266 | 21.9k | unsigned NumWords = getNumWords(); |
267 | 21.9k | tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false); |
268 | 21.9k | } |
269 | 5.70M | return clearUnusedBits(); |
270 | 5.70M | } |
271 | | |
272 | 63.2M | bool APInt::EqualSlowCase(const APInt& RHS) const { |
273 | 63.2M | return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal); |
274 | 63.2M | } |
275 | | |
276 | 570M | int APInt::compare(const APInt& RHS) const { |
277 | 570M | assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison"); |
278 | 570M | if (isSingleWord()) |
279 | 547M | return U.VAL < RHS.U.VAL ? 547M -1257M : U.VAL > RHS.U.VAL290M ; |
280 | 22.8M | |
281 | 22.8M | return tcCompare(U.pVal, RHS.U.pVal, getNumWords()); |
282 | 22.8M | } |
283 | | |
284 | 173M | int APInt::compareSigned(const APInt& RHS) const { |
285 | 173M | assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison"); |
286 | 173M | if (isSingleWord()173M ) { |
287 | 158M | int64_t lhsSext = SignExtend64(U.VAL, BitWidth); |
288 | 158M | int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth); |
289 | 158M | return lhsSext < rhsSext ? -1133M : lhsSext > rhsSext24.9M ; |
290 | 158M | } |
291 | 14.4M | |
292 | 14.4M | bool lhsNeg = isNegative(); |
293 | 14.4M | bool rhsNeg = RHS.isNegative(); |
294 | 14.4M | |
295 | 14.4M | // If the sign bits don't match, then (LHS < RHS) if LHS is negative |
296 | 14.4M | if (lhsNeg != rhsNeg) |
297 | 6.72M | return lhsNeg ? 6.72M -11.96M : 14.75M ; |
298 | 7.67M | |
299 | 7.67M | // Otherwise we can just use an unsigned comparison, because even negative |
300 | 7.67M | // numbers compare correctly this way if both have the same signed-ness. |
301 | 7.67M | return tcCompare(U.pVal, RHS.U.pVal, getNumWords()); |
302 | 7.67M | } |
303 | | |
304 | 2.01M | void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) { |
305 | 2.01M | unsigned loWord = whichWord(loBit); |
306 | 2.01M | unsigned hiWord = whichWord(hiBit); |
307 | 2.01M | |
308 | 2.01M | // Create an initial mask for the low word with zeros below loBit. |
309 | 2.01M | uint64_t loMask = WORD_MAX << whichBit(loBit); |
310 | 2.01M | |
311 | 2.01M | // If hiBit is not aligned, we need a high mask. |
312 | 2.01M | unsigned hiShiftAmt = whichBit(hiBit); |
313 | 2.01M | if (hiShiftAmt != 02.01M ) { |
314 | 10.0k | // Create a high mask with zeros above hiBit. |
315 | 10.0k | uint64_t hiMask = WORD_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt); |
316 | 10.0k | // If loWord and hiWord are equal, then we combine the masks. Otherwise, |
317 | 10.0k | // set the bits in hiWord. |
318 | 10.0k | if (hiWord == loWord) |
319 | 6.75k | loMask &= hiMask; |
320 | 10.0k | else |
321 | 3.31k | U.pVal[hiWord] |= hiMask; |
322 | 10.0k | } |
323 | 2.01M | // Apply the mask to the low word. |
324 | 2.01M | U.pVal[loWord] |= loMask; |
325 | 2.01M | |
326 | 2.01M | // Fill any words between loWord and hiWord with all ones. |
327 | 2.26M | for (unsigned word = loWord + 1; word < hiWord2.26M ; ++word256k ) |
328 | 256k | U.pVal[word] = WORD_MAX; |
329 | 2.01M | } |
330 | | |
331 | | /// @brief Toggle every bit to its opposite value. |
332 | 1.73M | void APInt::flipAllBitsSlowCase() { |
333 | 1.73M | tcComplement(U.pVal, getNumWords()); |
334 | 1.73M | clearUnusedBits(); |
335 | 1.73M | } |
336 | | |
337 | | /// Toggle a given bit to its opposite value whose position is given |
338 | | /// as "bitPosition". |
339 | | /// @brief Toggles a given bit to its opposite value. |
340 | 0 | void APInt::flipBit(unsigned bitPosition) { |
341 | 0 | assert(bitPosition < BitWidth && "Out of the bit-width range!"); |
342 | 0 | if ((*this)[bitPosition]0 ) clearBit(bitPosition)0 ; |
343 | 0 | else setBit(bitPosition); |
344 | 0 | } |
345 | | |
346 | 695k | void APInt::insertBits(const APInt &subBits, unsigned bitPosition) { |
347 | 695k | unsigned subBitWidth = subBits.getBitWidth(); |
348 | 695k | assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth && |
349 | 695k | "Illegal bit insertion"); |
350 | 695k | |
351 | 695k | // Insertion is a direct copy. |
352 | 695k | if (subBitWidth == BitWidth695k ) { |
353 | 697 | *this = subBits; |
354 | 697 | return; |
355 | 697 | } |
356 | 694k | |
357 | 694k | // Single word result can be done as a direct bitmask. |
358 | 694k | if (694k isSingleWord()694k ) { |
359 | 253k | uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - subBitWidth); |
360 | 253k | U.VAL &= ~(mask << bitPosition); |
361 | 253k | U.VAL |= (subBits.U.VAL << bitPosition); |
362 | 253k | return; |
363 | 253k | } |
364 | 441k | |
365 | 441k | unsigned loBit = whichBit(bitPosition); |
366 | 441k | unsigned loWord = whichWord(bitPosition); |
367 | 441k | unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1); |
368 | 441k | |
369 | 441k | // Insertion within a single word can be done as a direct bitmask. |
370 | 441k | if (loWord == hi1Word441k ) { |
371 | 441k | uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - subBitWidth); |
372 | 441k | U.pVal[loWord] &= ~(mask << loBit); |
373 | 441k | U.pVal[loWord] |= (subBits.U.VAL << loBit); |
374 | 441k | return; |
375 | 441k | } |
376 | 6 | |
377 | 6 | // Insert on word boundaries. |
378 | 6 | if (6 loBit == 06 ) { |
379 | 4 | // Direct copy whole words. |
380 | 4 | unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD; |
381 | 4 | memcpy(U.pVal + loWord, subBits.getRawData(), |
382 | 4 | numWholeSubWords * APINT_WORD_SIZE); |
383 | 4 | |
384 | 4 | // Mask+insert remaining bits. |
385 | 4 | unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD; |
386 | 4 | if (remainingBits != 04 ) { |
387 | 3 | uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - remainingBits); |
388 | 3 | U.pVal[hi1Word] &= ~mask; |
389 | 3 | U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1); |
390 | 3 | } |
391 | 4 | return; |
392 | 4 | } |
393 | 2 | |
394 | 2 | // General case - set/clear individual bits in dst based on src. |
395 | 2 | // TODO - there is scope for optimization here, but at the moment this code |
396 | 2 | // path is barely used so prefer readability over performance. |
397 | 162 | for (unsigned i = 0; 2 i != subBitWidth162 ; ++i160 ) { |
398 | 160 | if (subBits[i]) |
399 | 10 | setBit(bitPosition + i); |
400 | 160 | else |
401 | 150 | clearBit(bitPosition + i); |
402 | 160 | } |
403 | 695k | } |
404 | | |
405 | 345k | APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const { |
406 | 345k | assert(numBits > 0 && "Can't extract zero bits"); |
407 | 345k | assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth && |
408 | 345k | "Illegal bit extraction"); |
409 | 345k | |
410 | 345k | if (isSingleWord()) |
411 | 1 | return APInt(numBits, U.VAL >> bitPosition); |
412 | 345k | |
413 | 345k | unsigned loBit = whichBit(bitPosition); |
414 | 345k | unsigned loWord = whichWord(bitPosition); |
415 | 345k | unsigned hiWord = whichWord(bitPosition + numBits - 1); |
416 | 345k | |
417 | 345k | // Single word result extracting bits from a single word source. |
418 | 345k | if (loWord == hiWord) |
419 | 345k | return APInt(numBits, U.pVal[loWord] >> loBit); |
420 | 5 | |
421 | 5 | // Extracting bits that start on a source word boundary can be done |
422 | 5 | // as a fast memory copy. |
423 | 5 | if (5 loBit == 05 ) |
424 | 2 | return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord)); |
425 | 3 | |
426 | 3 | // General case - shift + copy source words directly into place. |
427 | 3 | APInt Result(numBits, 0); |
428 | 3 | unsigned NumSrcWords = getNumWords(); |
429 | 3 | unsigned NumDstWords = Result.getNumWords(); |
430 | 3 | |
431 | 10 | for (unsigned word = 0; word < NumDstWords10 ; ++word7 ) { |
432 | 7 | uint64_t w0 = U.pVal[loWord + word]; |
433 | 7 | uint64_t w1 = |
434 | 7 | (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1]7 : 00 ; |
435 | 7 | Result.U.pVal[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit)); |
436 | 7 | } |
437 | 345k | |
438 | 345k | return Result.clearUnusedBits(); |
439 | 345k | } |
440 | | |
441 | 61 | unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) { |
442 | 61 | assert(!str.empty() && "Invalid string length"); |
443 | 61 | assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 || |
444 | 61 | radix == 36) && |
445 | 61 | "Radix should be 2, 8, 10, 16, or 36!"); |
446 | 61 | |
447 | 61 | size_t slen = str.size(); |
448 | 61 | |
449 | 61 | // Each computation below needs to know if it's negative. |
450 | 61 | StringRef::iterator p = str.begin(); |
451 | 61 | unsigned isNegative = *p == '-'; |
452 | 61 | if (*p == '-' || 61 *p == '+'41 ) { |
453 | 40 | p++; |
454 | 40 | slen--; |
455 | 40 | assert(slen && "String is only a sign, needs a value."); |
456 | 40 | } |
457 | 61 | |
458 | 61 | // For radixes of power-of-two values, the bits required is accurately and |
459 | 61 | // easily computed |
460 | 61 | if (radix == 2) |
461 | 15 | return slen + isNegative; |
462 | 46 | if (46 radix == 846 ) |
463 | 15 | return slen * 3 + isNegative; |
464 | 31 | if (31 radix == 1631 ) |
465 | 15 | return slen * 4 + isNegative; |
466 | 16 | |
467 | 16 | // FIXME: base 36 |
468 | 16 | |
469 | 16 | // This is grossly inefficient but accurate. We could probably do something |
470 | 16 | // with a computation of roughly slen*64/20 and then adjust by the value of |
471 | 16 | // the first few digits. But, I'm not sure how accurate that could be. |
472 | 16 | |
473 | 16 | // Compute a sufficient number of bits that is always large enough but might |
474 | 16 | // be too large. This avoids the assertion in the constructor. This |
475 | 16 | // calculation doesn't work appropriately for the numbers 0-9, so just use 4 |
476 | 16 | // bits in that case. |
477 | 16 | unsigned sufficient |
478 | 16 | = radix == 10? (slen == 1 ? 16 47 : slen * 64/189 ) |
479 | 0 | : (slen == 1 ? 0 70 : slen * 16/30 ); |
480 | 16 | |
481 | 16 | // Convert to the actual binary value. |
482 | 16 | APInt tmp(sufficient, StringRef(p, slen), radix); |
483 | 16 | |
484 | 16 | // Compute how many bits are required. If the log is infinite, assume we need |
485 | 16 | // just bit. |
486 | 16 | unsigned log = tmp.logBase2(); |
487 | 16 | if (log == (unsigned)-116 ) { |
488 | 3 | return isNegative + 1; |
489 | 0 | } else { |
490 | 13 | return isNegative + log + 1; |
491 | 13 | } |
492 | 0 | } |
493 | | |
494 | 332M | hash_code llvm::hash_value(const APInt &Arg) { |
495 | 332M | if (Arg.isSingleWord()) |
496 | 326M | return hash_combine(Arg.U.VAL); |
497 | 5.78M | |
498 | 5.78M | return hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords()); |
499 | 5.78M | } |
500 | | |
501 | 324k | bool APInt::isSplat(unsigned SplatSizeInBits) const { |
502 | 324k | assert(getBitWidth() % SplatSizeInBits == 0 && |
503 | 324k | "SplatSizeInBits must divide width!"); |
504 | 324k | // We can check that all parts of an integer are equal by making use of a |
505 | 324k | // little trick: rotate and check if it's still the same value. |
506 | 324k | return *this == rotl(SplatSizeInBits); |
507 | 324k | } |
508 | | |
509 | | /// This function returns the high "numBits" bits of this APInt. |
510 | 220k | APInt APInt::getHiBits(unsigned numBits) const { |
511 | 220k | return this->lshr(BitWidth - numBits); |
512 | 220k | } |
513 | | |
514 | | /// This function returns the low "numBits" bits of this APInt. |
515 | 234k | APInt APInt::getLoBits(unsigned numBits) const { |
516 | 234k | APInt Result(getLowBitsSet(BitWidth, numBits)); |
517 | 234k | Result &= *this; |
518 | 234k | return Result; |
519 | 234k | } |
520 | | |
521 | | /// Return a value containing V broadcasted over NewLen bits. |
522 | 17.3k | APInt APInt::getSplat(unsigned NewLen, const APInt &V) { |
523 | 17.3k | assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!"); |
524 | 17.3k | |
525 | 17.3k | APInt Val = V.zextOrSelf(NewLen); |
526 | 67.9k | for (unsigned I = V.getBitWidth(); I < NewLen67.9k ; I <<= 150.6k ) |
527 | 50.6k | Val |= Val << I; |
528 | 17.3k | |
529 | 17.3k | return Val; |
530 | 17.3k | } |
531 | | |
532 | 22.8M | unsigned APInt::countLeadingZerosSlowCase() const { |
533 | 22.8M | unsigned Count = 0; |
534 | 188M | for (int i = getNumWords()-1; i >= 0188M ; --i165M ) { |
535 | 183M | uint64_t V = U.pVal[i]; |
536 | 183M | if (V == 0) |
537 | 165M | Count += APINT_BITS_PER_WORD; |
538 | 17.7M | else { |
539 | 17.7M | Count += llvm::countLeadingZeros(V); |
540 | 17.7M | break; |
541 | 17.7M | } |
542 | 183M | } |
543 | 22.8M | // Adjust for unused bits in the most significant word (they are zero). |
544 | 22.8M | unsigned Mod = BitWidth % APINT_BITS_PER_WORD; |
545 | 22.8M | Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod912k : 021.9M ; |
546 | 22.8M | return Count; |
547 | 22.8M | } |
548 | | |
549 | 26.1k | unsigned APInt::countLeadingOnesSlowCase() const { |
550 | 26.1k | unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD; |
551 | 26.1k | unsigned shift; |
552 | 26.1k | if (!highWordBits26.1k ) { |
553 | 25.5k | highWordBits = APINT_BITS_PER_WORD; |
554 | 25.5k | shift = 0; |
555 | 26.1k | } else { |
556 | 597 | shift = APINT_BITS_PER_WORD - highWordBits; |
557 | 597 | } |
558 | 26.1k | int i = getNumWords() - 1; |
559 | 26.1k | unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift); |
560 | 26.1k | if (Count == highWordBits26.1k ) { |
561 | 12.1k | for (i--; i >= 012.1k ; --i521 ) { |
562 | 11.9k | if (U.pVal[i] == WORD_MAX) |
563 | 521 | Count += APINT_BITS_PER_WORD; |
564 | 11.4k | else { |
565 | 11.4k | Count += llvm::countLeadingOnes(U.pVal[i]); |
566 | 11.4k | break; |
567 | 11.4k | } |
568 | 11.9k | } |
569 | 11.6k | } |
570 | 26.1k | return Count; |
571 | 26.1k | } |
572 | | |
573 | 113k | unsigned APInt::countTrailingZerosSlowCase() const { |
574 | 113k | unsigned Count = 0; |
575 | 113k | unsigned i = 0; |
576 | 159k | for (; i < getNumWords() && 159k U.pVal[i] == 0137k ; ++i46.4k ) |
577 | 46.4k | Count += APINT_BITS_PER_WORD; |
578 | 113k | if (i < getNumWords()) |
579 | 91.5k | Count += llvm::countTrailingZeros(U.pVal[i]); |
580 | 113k | return std::min(Count, BitWidth); |
581 | 113k | } |
582 | | |
583 | 7.51M | unsigned APInt::countTrailingOnesSlowCase() const { |
584 | 7.51M | unsigned Count = 0; |
585 | 7.51M | unsigned i = 0; |
586 | 14.3M | for (; i < getNumWords() && 14.3M U.pVal[i] == WORD_MAX11.0M ; ++i6.79M ) |
587 | 6.79M | Count += APINT_BITS_PER_WORD; |
588 | 7.51M | if (i < getNumWords()) |
589 | 4.26M | Count += llvm::countTrailingOnes(U.pVal[i]); |
590 | 7.51M | assert(Count <= BitWidth); |
591 | 7.51M | return Count; |
592 | 7.51M | } |
593 | | |
594 | 109k | unsigned APInt::countPopulationSlowCase() const { |
595 | 109k | unsigned Count = 0; |
596 | 335k | for (unsigned i = 0; i < getNumWords()335k ; ++i225k ) |
597 | 225k | Count += llvm::countPopulation(U.pVal[i]); |
598 | 109k | return Count; |
599 | 109k | } |
600 | | |
601 | 568k | bool APInt::intersectsSlowCase(const APInt &RHS) const { |
602 | 1.71M | for (unsigned i = 0, e = getNumWords(); i != e1.71M ; ++i1.14M ) |
603 | 1.14M | if (1.14M (U.pVal[i] & RHS.U.pVal[i]) != 01.14M ) |
604 | 2.57k | return true; |
605 | 568k | |
606 | 565k | return false; |
607 | 568k | } |
608 | | |
609 | 144k | bool APInt::isSubsetOfSlowCase(const APInt &RHS) const { |
610 | 190k | for (unsigned i = 0, e = getNumWords(); i != e190k ; ++i46.0k ) |
611 | 184k | if (184k (U.pVal[i] & ~RHS.U.pVal[i]) != 0184k ) |
612 | 138k | return false; |
613 | 144k | |
614 | 6.08k | return true; |
615 | 144k | } |
616 | | |
617 | 69.6k | APInt APInt::byteSwap() const { |
618 | 69.6k | assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!"); |
619 | 69.6k | if (BitWidth == 16) |
620 | 10.3k | return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL))); |
621 | 59.2k | if (59.2k BitWidth == 3259.2k ) |
622 | 56.1k | return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL))); |
623 | 3.14k | if (3.14k BitWidth == 483.14k ) { |
624 | 0 | unsigned Tmp1 = unsigned(U.VAL >> 16); |
625 | 0 | Tmp1 = ByteSwap_32(Tmp1); |
626 | 0 | uint16_t Tmp2 = uint16_t(U.VAL); |
627 | 0 | Tmp2 = ByteSwap_16(Tmp2); |
628 | 0 | return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1); |
629 | 0 | } |
630 | 3.14k | if (3.14k BitWidth == 643.14k ) |
631 | 3.13k | return APInt(BitWidth, ByteSwap_64(U.VAL)); |
632 | 1 | |
633 | 1 | APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0); |
634 | 3 | for (unsigned I = 0, N = getNumWords(); I != N3 ; ++I2 ) |
635 | 2 | Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]); |
636 | 1 | if (Result.BitWidth != BitWidth1 ) { |
637 | 1 | Result.lshrInPlace(Result.BitWidth - BitWidth); |
638 | 1 | Result.BitWidth = BitWidth; |
639 | 1 | } |
640 | 69.6k | return Result; |
641 | 69.6k | } |
642 | | |
643 | 6.18k | APInt APInt::reverseBits() const { |
644 | 6.18k | switch (BitWidth) { |
645 | 390 | case 64: |
646 | 390 | return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL)); |
647 | 1.88k | case 32: |
648 | 1.88k | return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL)); |
649 | 190 | case 16: |
650 | 190 | return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL)); |
651 | 188 | case 8: |
652 | 188 | return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL)); |
653 | 3.52k | default: |
654 | 3.52k | break; |
655 | 3.52k | } |
656 | 3.52k | |
657 | 3.52k | APInt Val(*this); |
658 | 3.52k | APInt Reversed(BitWidth, 0); |
659 | 3.52k | unsigned S = BitWidth; |
660 | 3.52k | |
661 | 1.16M | for (; Val != 01.16M ; Val.lshrInPlace(1)1.15M ) { |
662 | 1.15M | Reversed <<= 1; |
663 | 1.15M | Reversed |= Val[0]; |
664 | 1.15M | --S; |
665 | 1.15M | } |
666 | 6.18k | |
667 | 6.18k | Reversed <<= S; |
668 | 6.18k | return Reversed; |
669 | 6.18k | } |
670 | | |
671 | 2.60k | APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) { |
672 | 2.60k | // Fast-path a common case. |
673 | 2.60k | if (A == B2.60k ) return A874 ; |
674 | 1.72k | |
675 | 1.72k | // Corner cases: if either operand is zero, the other is the gcd. |
676 | 1.72k | if (1.72k !A1.72k ) return B1.00k ; |
677 | 726 | if (726 !B726 ) return A194 ; |
678 | 532 | |
679 | 532 | // Count common powers of 2 and remove all other powers of 2. |
680 | 532 | unsigned Pow2; |
681 | 532 | { |
682 | 532 | unsigned Pow2_A = A.countTrailingZeros(); |
683 | 532 | unsigned Pow2_B = B.countTrailingZeros(); |
684 | 532 | if (Pow2_A > Pow2_B532 ) { |
685 | 86 | A.lshrInPlace(Pow2_A - Pow2_B); |
686 | 86 | Pow2 = Pow2_B; |
687 | 532 | } else if (446 Pow2_B > Pow2_A446 ) { |
688 | 364 | B.lshrInPlace(Pow2_B - Pow2_A); |
689 | 364 | Pow2 = Pow2_A; |
690 | 446 | } else { |
691 | 82 | Pow2 = Pow2_A; |
692 | 82 | } |
693 | 532 | } |
694 | 532 | |
695 | 532 | // Both operands are odd multiples of 2^Pow_2: |
696 | 532 | // |
697 | 532 | // gcd(a, b) = gcd(|a - b| / 2^i, min(a, b)) |
698 | 532 | // |
699 | 532 | // This is a modified version of Stein's algorithm, taking advantage of |
700 | 532 | // efficient countTrailingZeros(). |
701 | 1.92k | while (A != B1.92k ) { |
702 | 1.39k | if (A.ugt(B)1.39k ) { |
703 | 336 | A -= B; |
704 | 336 | A.lshrInPlace(A.countTrailingZeros() - Pow2); |
705 | 1.39k | } else { |
706 | 1.05k | B -= A; |
707 | 1.05k | B.lshrInPlace(B.countTrailingZeros() - Pow2); |
708 | 1.05k | } |
709 | 1.39k | } |
710 | 2.60k | |
711 | 2.60k | return A; |
712 | 2.60k | } |
713 | | |
714 | 40 | APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) { |
715 | 40 | union { |
716 | 40 | double D; |
717 | 40 | uint64_t I; |
718 | 40 | } T; |
719 | 40 | T.D = Double; |
720 | 40 | |
721 | 40 | // Get the sign bit from the highest order bit |
722 | 40 | bool isNeg = T.I >> 63; |
723 | 40 | |
724 | 40 | // Get the 11-bit exponent and adjust for the 1023 bit bias |
725 | 40 | int64_t exp = ((T.I >> 52) & 0x7ff) - 1023; |
726 | 40 | |
727 | 40 | // If the exponent is negative, the value is < 0 so just return 0. |
728 | 40 | if (exp < 0) |
729 | 16 | return APInt(width, 0u); |
730 | 24 | |
731 | 24 | // Extract the mantissa by clearing the top 12 bits (sign + exponent). |
732 | 24 | uint64_t mantissa = (T.I & (~0ULL >> 12)) | 1ULL << 52; |
733 | 24 | |
734 | 24 | // If the exponent doesn't shift all bits out of the mantissa |
735 | 24 | if (exp < 52) |
736 | 24 | return isNeg ? 24 -APInt(width, mantissa >> (52 - exp))8 : |
737 | 16 | APInt(width, mantissa >> (52 - exp)); |
738 | 0 |
|
739 | 0 | // If the client didn't provide enough bits for us to shift the mantissa into |
740 | 0 | // then the result is undefined, just return 0 |
741 | 0 | if (0 width <= exp - 520 ) |
742 | 0 | return APInt(width, 0); |
743 | 0 |
|
744 | 0 | // Otherwise, we have to shift the mantissa bits up to the right location |
745 | 0 | APInt Tmp(width, mantissa); |
746 | 0 | Tmp <<= (unsigned)exp - 52; |
747 | 0 | return isNeg ? -Tmp0 : Tmp0 ; |
748 | 40 | } |
749 | | |
750 | | /// This function converts this APInt to a double. |
751 | | /// The layout for double is as following (IEEE Standard 754): |
752 | | /// -------------------------------------- |
753 | | /// | Sign Exponent Fraction Bias | |
754 | | /// |-------------------------------------- | |
755 | | /// | 1[63] 11[62-52] 52[51-00] 1023 | |
756 | | /// -------------------------------------- |
757 | 50 | double APInt::roundToDouble(bool isSigned) const { |
758 | 50 | |
759 | 50 | // Handle the simple case where the value is contained in one uint64_t. |
760 | 50 | // It is wrong to optimize getWord(0) to VAL; there might be more than one word. |
761 | 50 | if (isSingleWord() || 50 getActiveBits() <= APINT_BITS_PER_WORD0 ) { |
762 | 50 | if (isSigned50 ) { |
763 | 25 | int64_t sext = SignExtend64(getWord(0), BitWidth); |
764 | 25 | return double(sext); |
765 | 25 | } else |
766 | 25 | return double(getWord(0)); |
767 | 0 | } |
768 | 0 |
|
769 | 0 | // Determine if the value is negative. |
770 | 0 | bool isNeg = isSigned ? 0 (*this)[BitWidth-1]0 : false0 ; |
771 | 0 |
|
772 | 0 | // Construct the absolute value if we're negative. |
773 | 0 | APInt Tmp(isNeg ? -(*this)0 : (*this)0 ); |
774 | 0 |
|
775 | 0 | // Figure out how many bits we're using. |
776 | 0 | unsigned n = Tmp.getActiveBits(); |
777 | 0 |
|
778 | 0 | // The exponent (without bias normalization) is just the number of bits |
779 | 0 | // we are using. Note that the sign bit is gone since we constructed the |
780 | 0 | // absolute value. |
781 | 0 | uint64_t exp = n; |
782 | 0 |
|
783 | 0 | // Return infinity for exponent overflow |
784 | 0 | if (exp > 10230 ) { |
785 | 0 | if (!isSigned || 0 !isNeg0 ) |
786 | 0 | return std::numeric_limits<double>::infinity(); |
787 | 0 | else |
788 | 0 | return -std::numeric_limits<double>::infinity(); |
789 | 0 | } |
790 | 0 | exp += 1023; // Increment for 1023 bias |
791 | 0 |
|
792 | 0 | // Number of bits in mantissa is 52. To obtain the mantissa value, we must |
793 | 0 | // extract the high 52 bits from the correct words in pVal. |
794 | 0 | uint64_t mantissa; |
795 | 0 | unsigned hiWord = whichWord(n-1); |
796 | 0 | if (hiWord == 00 ) { |
797 | 0 | mantissa = Tmp.U.pVal[0]; |
798 | 0 | if (n > 52) |
799 | 0 | mantissa >>= n - 52; // shift down, we want the top 52 bits. |
800 | 0 | } else { |
801 | 0 | assert(hiWord > 0 && "huh?"); |
802 | 0 | uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD); |
803 | 0 | uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD); |
804 | 0 | mantissa = hibits | lobits; |
805 | 0 | } |
806 | 0 |
|
807 | 0 | // The leading bit of mantissa is implicit, so get rid of it. |
808 | 0 | uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1))0 : 00 ; |
809 | 50 | union { |
810 | 50 | double D; |
811 | 50 | uint64_t I; |
812 | 50 | } T; |
813 | 50 | T.I = sign | (exp << 52) | mantissa; |
814 | 50 | return T.D; |
815 | 50 | } |
816 | | |
817 | | // Truncate to new width. |
818 | 150M | APInt APInt::trunc(unsigned width) const { |
819 | 150M | assert(width < BitWidth && "Invalid APInt Truncate request"); |
820 | 150M | assert(width && "Can't truncate to 0 bits"); |
821 | 150M | |
822 | 150M | if (width <= APINT_BITS_PER_WORD) |
823 | 150M | return APInt(width, getRawData()[0]); |
824 | 50.7k | |
825 | 50.7k | APInt Result(getMemory(getNumWords(width)), width); |
826 | 50.7k | |
827 | 50.7k | // Copy full words. |
828 | 50.7k | unsigned i; |
829 | 173k | for (i = 0; i != width / APINT_BITS_PER_WORD173k ; i++122k ) |
830 | 122k | Result.U.pVal[i] = U.pVal[i]; |
831 | 50.7k | |
832 | 50.7k | // Truncate and copy any partial word. |
833 | 50.7k | unsigned bits = (0 - width) % APINT_BITS_PER_WORD; |
834 | 50.7k | if (bits != 0) |
835 | 2.17k | Result.U.pVal[i] = U.pVal[i] << bits >> bits; |
836 | 150M | |
837 | 150M | return Result; |
838 | 150M | } |
839 | | |
840 | | // Sign extend to a new width. |
841 | 77.6M | APInt APInt::sext(unsigned Width) const { |
842 | 77.6M | assert(Width > BitWidth && "Invalid APInt SignExtend request"); |
843 | 77.6M | |
844 | 77.6M | if (Width <= APINT_BITS_PER_WORD) |
845 | 69.6M | return APInt(Width, SignExtend64(U.VAL, BitWidth)); |
846 | 7.99M | |
847 | 7.99M | APInt Result(getMemory(getNumWords(Width)), Width); |
848 | 7.99M | |
849 | 7.99M | // Copy words. |
850 | 7.99M | std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE); |
851 | 7.99M | |
852 | 7.99M | // Sign extend the last word since there may be unused bits in the input. |
853 | 7.99M | Result.U.pVal[getNumWords() - 1] = |
854 | 7.99M | SignExtend64(Result.U.pVal[getNumWords() - 1], |
855 | 7.99M | ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1); |
856 | 7.99M | |
857 | 7.99M | // Fill with sign bits. |
858 | 7.99M | std::memset(Result.U.pVal + getNumWords(), isNegative() ? -14.52M : 03.47M , |
859 | 77.6M | (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE); |
860 | 77.6M | Result.clearUnusedBits(); |
861 | 77.6M | return Result; |
862 | 77.6M | } |
863 | | |
864 | | // Zero extend to a new width. |
865 | 121M | APInt APInt::zext(unsigned width) const { |
866 | 121M | assert(width > BitWidth && "Invalid APInt ZeroExtend request"); |
867 | 121M | |
868 | 121M | if (width <= APINT_BITS_PER_WORD) |
869 | 98.9M | return APInt(width, U.VAL); |
870 | 22.7M | |
871 | 22.7M | APInt Result(getMemory(getNumWords(width)), width); |
872 | 22.7M | |
873 | 22.7M | // Copy words. |
874 | 22.7M | std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE); |
875 | 22.7M | |
876 | 22.7M | // Zero remaining words. |
877 | 22.7M | std::memset(Result.U.pVal + getNumWords(), 0, |
878 | 22.7M | (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE); |
879 | 22.7M | |
880 | 22.7M | return Result; |
881 | 22.7M | } |
882 | | |
883 | 192M | APInt APInt::zextOrTrunc(unsigned width) const { |
884 | 192M | if (BitWidth < width) |
885 | 75.5M | return zext(width); |
886 | 116M | if (116M BitWidth > width116M ) |
887 | 69.8M | return trunc(width); |
888 | 46.5M | return *this; |
889 | 46.5M | } |
890 | | |
891 | 25.4M | APInt APInt::sextOrTrunc(unsigned width) const { |
892 | 25.4M | if (BitWidth < width) |
893 | 1.85M | return sext(width); |
894 | 23.5M | if (23.5M BitWidth > width23.5M ) |
895 | 757k | return trunc(width); |
896 | 22.7M | return *this; |
897 | 22.7M | } |
898 | | |
899 | 30.3M | APInt APInt::zextOrSelf(unsigned width) const { |
900 | 30.3M | if (BitWidth < width) |
901 | 20.1M | return zext(width); |
902 | 10.2M | return *this; |
903 | 10.2M | } |
904 | | |
905 | 6.49k | APInt APInt::sextOrSelf(unsigned width) const { |
906 | 6.49k | if (BitWidth < width) |
907 | 6.48k | return sext(width); |
908 | 11 | return *this; |
909 | 11 | } |
910 | | |
911 | | /// Arithmetic right-shift this APInt by shiftAmt. |
912 | | /// @brief Arithmetic right-shift function. |
913 | 197k | void APInt::ashrInPlace(const APInt &shiftAmt) { |
914 | 197k | ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth)); |
915 | 197k | } |
916 | | |
917 | | /// Arithmetic right-shift this APInt by shiftAmt. |
918 | | /// @brief Arithmetic right-shift function. |
919 | 251k | void APInt::ashrSlowCase(unsigned ShiftAmt) { |
920 | 251k | // Don't bother performing a no-op shift. |
921 | 251k | if (!ShiftAmt) |
922 | 536 | return; |
923 | 250k | |
924 | 250k | // Save the original sign bit for later. |
925 | 250k | bool Negative = isNegative(); |
926 | 250k | |
927 | 250k | // WordShift is the inter-part shift; BitShift is is intra-part shift. |
928 | 250k | unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD; |
929 | 250k | unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD; |
930 | 250k | |
931 | 250k | unsigned WordsToMove = getNumWords() - WordShift; |
932 | 250k | if (WordsToMove != 0250k ) { |
933 | 250k | // Sign extend the last word to fill in the unused bits. |
934 | 250k | U.pVal[getNumWords() - 1] = SignExtend64( |
935 | 250k | U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1); |
936 | 250k | |
937 | 250k | // Fastpath for moving by whole words. |
938 | 250k | if (BitShift == 0250k ) { |
939 | 588 | std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE); |
940 | 250k | } else { |
941 | 250k | // Move the words containing significant bits. |
942 | 439k | for (unsigned i = 0; i != WordsToMove - 1439k ; ++i189k ) |
943 | 189k | U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) | |
944 | 189k | (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift)); |
945 | 250k | |
946 | 250k | // Handle the last word which has no high bits to copy. |
947 | 250k | U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift; |
948 | 250k | // Sign extend one more time. |
949 | 250k | U.pVal[WordsToMove - 1] = |
950 | 250k | SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift); |
951 | 250k | } |
952 | 250k | } |
953 | 250k | |
954 | 250k | // Fill in the remainder based on the original sign. |
955 | 250k | std::memset(U.pVal + WordsToMove, Negative ? -1169 : 0250k , |
956 | 251k | WordShift * APINT_WORD_SIZE); |
957 | 251k | clearUnusedBits(); |
958 | 251k | } |
959 | | |
960 | | /// Logical right-shift this APInt by shiftAmt. |
961 | | /// @brief Logical right-shift function. |
962 | 4.10M | void APInt::lshrInPlace(const APInt &shiftAmt) { |
963 | 4.10M | lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth)); |
964 | 4.10M | } |
965 | | |
966 | | /// Logical right-shift this APInt by shiftAmt. |
967 | | /// @brief Logical right-shift function. |
968 | 1.65M | void APInt::lshrSlowCase(unsigned ShiftAmt) { |
969 | 1.65M | tcShiftRight(U.pVal, getNumWords(), ShiftAmt); |
970 | 1.65M | } |
971 | | |
972 | | /// Left-shift this APInt by shiftAmt. |
973 | | /// @brief Left-shift function. |
974 | 7.49M | APInt &APInt::operator<<=(const APInt &shiftAmt) { |
975 | 7.49M | // It's undefined behavior in C to shift by BitWidth or greater. |
976 | 7.49M | *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth); |
977 | 7.49M | return *this; |
978 | 7.49M | } |
979 | | |
980 | 14.7M | void APInt::shlSlowCase(unsigned ShiftAmt) { |
981 | 14.7M | tcShiftLeft(U.pVal, getNumWords(), ShiftAmt); |
982 | 14.7M | clearUnusedBits(); |
983 | 14.7M | } |
984 | | |
985 | | // Calculate the rotate amount modulo the bit width. |
986 | 48 | static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) { |
987 | 48 | unsigned rotBitWidth = rotateAmt.getBitWidth(); |
988 | 48 | APInt rot = rotateAmt; |
989 | 48 | if (rotBitWidth < BitWidth48 ) { |
990 | 14 | // Extend the rotate APInt, so that the urem doesn't divide by 0. |
991 | 14 | // e.g. APInt(1, 32) would give APInt(1, 0). |
992 | 14 | rot = rotateAmt.zext(BitWidth); |
993 | 14 | } |
994 | 48 | rot = rot.urem(APInt(rot.getBitWidth(), BitWidth)); |
995 | 48 | return rot.getLimitedValue(BitWidth); |
996 | 48 | } |
997 | | |
998 | 32 | APInt APInt::rotl(const APInt &rotateAmt) const { |
999 | 32 | return rotl(rotateModulo(BitWidth, rotateAmt)); |
1000 | 32 | } |
1001 | | |
1002 | 324k | APInt APInt::rotl(unsigned rotateAmt) const { |
1003 | 324k | rotateAmt %= BitWidth; |
1004 | 324k | if (rotateAmt == 0) |
1005 | 192 | return *this; |
1006 | 324k | return shl(rotateAmt) | lshr(BitWidth - rotateAmt); |
1007 | 324k | } |
1008 | | |
1009 | 16 | APInt APInt::rotr(const APInt &rotateAmt) const { |
1010 | 16 | return rotr(rotateModulo(BitWidth, rotateAmt)); |
1011 | 16 | } |
1012 | | |
1013 | 321 | APInt APInt::rotr(unsigned rotateAmt) const { |
1014 | 321 | rotateAmt %= BitWidth; |
1015 | 321 | if (rotateAmt == 0) |
1016 | 52 | return *this; |
1017 | 269 | return lshr(rotateAmt) | shl(BitWidth - rotateAmt); |
1018 | 269 | } |
1019 | | |
1020 | | // Square Root - this method computes and returns the square root of "this". |
1021 | | // Three mechanisms are used for computation. For small values (<= 5 bits), |
1022 | | // a table lookup is done. This gets some performance for common cases. For |
1023 | | // values using less than 52 bits, the value is converted to double and then |
1024 | | // the libc sqrt function is called. The result is rounded and then converted |
1025 | | // back to a uint64_t which is then used to construct the result. Finally, |
1026 | | // the Babylonian method for computing square roots is used. |
1027 | 19 | APInt APInt::sqrt() const { |
1028 | 19 | |
1029 | 19 | // Determine the magnitude of the value. |
1030 | 19 | unsigned magnitude = getActiveBits(); |
1031 | 19 | |
1032 | 19 | // Use a fast table for some small values. This also gets rid of some |
1033 | 19 | // rounding errors in libc sqrt for small values. |
1034 | 19 | if (magnitude <= 519 ) { |
1035 | 11 | static const uint8_t results[32] = { |
1036 | 11 | /* 0 */ 0, |
1037 | 11 | /* 1- 2 */ 1, 1, |
1038 | 11 | /* 3- 6 */ 2, 2, 2, 2, |
1039 | 11 | /* 7-12 */ 3, 3, 3, 3, 3, 3, |
1040 | 11 | /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4, |
1041 | 11 | /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, |
1042 | 11 | /* 31 */ 6 |
1043 | 11 | }; |
1044 | 11 | return APInt(BitWidth, results[ (isSingleWord() ? U.VAL11 : U.pVal[0]0 ) ]); |
1045 | 11 | } |
1046 | 8 | |
1047 | 8 | // If the magnitude of the value fits in less than 52 bits (the precision of |
1048 | 8 | // an IEEE double precision floating point value), then we can use the |
1049 | 8 | // libc sqrt function which will probably use a hardware sqrt computation. |
1050 | 8 | // This should be faster than the algorithm below. |
1051 | 8 | if (8 magnitude < 528 ) { |
1052 | 8 | return APInt(BitWidth, |
1053 | 8 | uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL |
1054 | 0 | : U.pVal[0]))))); |
1055 | 8 | } |
1056 | 0 |
|
1057 | 0 | // Okay, all the short cuts are exhausted. We must compute it. The following |
1058 | 0 | // is a classical Babylonian method for computing the square root. This code |
1059 | 0 | // was adapted to APInt from a wikipedia article on such computations. |
1060 | 0 | // See http://www.wikipedia.org/ and go to the page named |
1061 | 0 | // Calculate_an_integer_square_root. |
1062 | 0 | unsigned nbits = BitWidth, i = 4; |
1063 | 0 | APInt testy(BitWidth, 16); |
1064 | 0 | APInt x_old(BitWidth, 1); |
1065 | 0 | APInt x_new(BitWidth, 0); |
1066 | 0 | APInt two(BitWidth, 2); |
1067 | 0 |
|
1068 | 0 | // Select a good starting value using binary logarithms. |
1069 | 0 | for (;; i += 2, testy = testy.shl(2)) |
1070 | 0 | if (0 i >= nbits || 0 this->ule(testy)0 ) { |
1071 | 0 | x_old = x_old.shl(i / 2); |
1072 | 0 | break; |
1073 | 0 | } |
1074 | 0 |
|
1075 | 0 | // Use the Babylonian method to arrive at the integer square root: |
1076 | 0 | for (;;) { |
1077 | 0 | x_new = (this->udiv(x_old) + x_old).udiv(two); |
1078 | 0 | if (x_old.ule(x_new)) |
1079 | 0 | break; |
1080 | 0 | x_old = x_new; |
1081 | 0 | } |
1082 | 0 |
|
1083 | 0 | // Make sure we return the closest approximation |
1084 | 0 | // NOTE: The rounding calculation below is correct. It will produce an |
1085 | 0 | // off-by-one discrepancy with results from pari/gp. That discrepancy has been |
1086 | 0 | // determined to be a rounding issue with pari/gp as it begins to use a |
1087 | 0 | // floating point representation after 192 bits. There are no discrepancies |
1088 | 0 | // between this algorithm and pari/gp for bit widths < 192 bits. |
1089 | 0 | APInt square(x_old * x_old); |
1090 | 0 | APInt nextSquare((x_old + 1) * (x_old +1)); |
1091 | 0 | if (this->ult(square)) |
1092 | 0 | return x_old; |
1093 | 0 | assert(this->ule(nextSquare) && "Error in APInt::sqrt computation"); |
1094 | 0 | APInt midpoint((nextSquare - square).udiv(two)); |
1095 | 0 | APInt offset(*this - square); |
1096 | 0 | if (offset.ult(midpoint)) |
1097 | 0 | return x_old; |
1098 | 0 | return x_old + 1; |
1099 | 0 | } |
1100 | | |
1101 | | /// Computes the multiplicative inverse of this APInt for a given modulo. The |
1102 | | /// iterative extended Euclidean algorithm is used to solve for this value, |
1103 | | /// however we simplify it to speed up calculating only the inverse, and take |
1104 | | /// advantage of div+rem calculations. We also use some tricks to avoid copying |
1105 | | /// (potentially large) APInts around. |
1106 | 38.8k | APInt APInt::multiplicativeInverse(const APInt& modulo) const { |
1107 | 38.8k | assert(ult(modulo) && "This APInt must be smaller than the modulo"); |
1108 | 38.8k | |
1109 | 38.8k | // Using the properties listed at the following web page (accessed 06/21/08): |
1110 | 38.8k | // http://www.numbertheory.org/php/euclid.html |
1111 | 38.8k | // (especially the properties numbered 3, 4 and 9) it can be proved that |
1112 | 38.8k | // BitWidth bits suffice for all the computations in the algorithm implemented |
1113 | 38.8k | // below. More precisely, this number of bits suffice if the multiplicative |
1114 | 38.8k | // inverse exists, but may not suffice for the general extended Euclidean |
1115 | 38.8k | // algorithm. |
1116 | 38.8k | |
1117 | 38.8k | APInt r[2] = { modulo, *this }; |
1118 | 38.8k | APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) }; |
1119 | 38.8k | APInt q(BitWidth, 0); |
1120 | 38.8k | |
1121 | 38.8k | unsigned i; |
1122 | 80.1k | for (i = 0; r[i^1] != 080.1k ; i ^= 141.3k ) { |
1123 | 41.3k | // An overview of the math without the confusing bit-flipping: |
1124 | 41.3k | // q = r[i-2] / r[i-1] |
1125 | 41.3k | // r[i] = r[i-2] % r[i-1] |
1126 | 41.3k | // t[i] = t[i-2] - t[i-1] * q |
1127 | 41.3k | udivrem(r[i], r[i^1], q, r[i]); |
1128 | 41.3k | t[i] -= t[i^1] * q; |
1129 | 41.3k | } |
1130 | 38.8k | |
1131 | 38.8k | // If this APInt and the modulo are not coprime, there is no multiplicative |
1132 | 38.8k | // inverse, so return 0. We check this by looking at the next-to-last |
1133 | 38.8k | // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean |
1134 | 38.8k | // algorithm. |
1135 | 38.8k | if (r[i] != 1) |
1136 | 0 | return APInt(BitWidth, 0); |
1137 | 38.8k | |
1138 | 38.8k | // The next-to-last t is the multiplicative inverse. However, we are |
1139 | 38.8k | // interested in a positive inverse. Calculate a positive one from a negative |
1140 | 38.8k | // one if necessary. A simple addition of the modulo suffices because |
1141 | 38.8k | // abs(t[i]) is known to be less than *this/2 (see the link above). |
1142 | 38.8k | if (38.8k t[i].isNegative()38.8k ) |
1143 | 953 | t[i] += modulo; |
1144 | 38.8k | |
1145 | 38.8k | return std::move(t[i]); |
1146 | 38.8k | } |
1147 | | |
1148 | | /// Calculate the magic numbers required to implement a signed integer division |
1149 | | /// by a constant as a sequence of multiplies, adds and shifts. Requires that |
1150 | | /// the divisor not be 0, 1, or -1. Taken from "Hacker's Delight", Henry S. |
1151 | | /// Warren, Jr., chapter 10. |
1152 | 1.35k | APInt::ms APInt::magic() const { |
1153 | 1.35k | const APInt& d = *this; |
1154 | 1.35k | unsigned p; |
1155 | 1.35k | APInt ad, anc, delta, q1, r1, q2, r2, t; |
1156 | 1.35k | APInt signedMin = APInt::getSignedMinValue(d.getBitWidth()); |
1157 | 1.35k | struct ms mag; |
1158 | 1.35k | |
1159 | 1.35k | ad = d.abs(); |
1160 | 1.35k | t = signedMin + (d.lshr(d.getBitWidth() - 1)); |
1161 | 1.35k | anc = t - 1 - t.urem(ad); // absolute value of nc |
1162 | 1.35k | p = d.getBitWidth() - 1; // initialize p |
1163 | 1.35k | q1 = signedMin.udiv(anc); // initialize q1 = 2p/abs(nc) |
1164 | 1.35k | r1 = signedMin - q1*anc; // initialize r1 = rem(2p,abs(nc)) |
1165 | 1.35k | q2 = signedMin.udiv(ad); // initialize q2 = 2p/abs(d) |
1166 | 1.35k | r2 = signedMin - q2*ad; // initialize r2 = rem(2p,abs(d)) |
1167 | 6.55k | do { |
1168 | 6.55k | p = p + 1; |
1169 | 6.55k | q1 = q1<<1; // update q1 = 2p/abs(nc) |
1170 | 6.55k | r1 = r1<<1; // update r1 = rem(2p/abs(nc)) |
1171 | 6.55k | if (r1.uge(anc)6.55k ) { // must be unsigned comparison |
1172 | 56 | q1 = q1 + 1; |
1173 | 56 | r1 = r1 - anc; |
1174 | 56 | } |
1175 | 6.55k | q2 = q2<<1; // update q2 = 2p/abs(d) |
1176 | 6.55k | r2 = r2<<1; // update r2 = rem(2p/abs(d)) |
1177 | 6.55k | if (r2.uge(ad)6.55k ) { // must be unsigned comparison |
1178 | 2.50k | q2 = q2 + 1; |
1179 | 2.50k | r2 = r2 - ad; |
1180 | 2.50k | } |
1181 | 6.55k | delta = ad - r2; |
1182 | 6.55k | } while (q1.ult(delta) || 6.55k (q1 == delta && 1.35k r1 == 0199 )); |
1183 | 1.35k | |
1184 | 1.35k | mag.m = q2 + 1; |
1185 | 1.35k | if (d.isNegative()1.35k ) mag.m = -mag.m19 ; // resulting magic number |
1186 | 1.35k | mag.s = p - d.getBitWidth(); // resulting shift |
1187 | 1.35k | return mag; |
1188 | 1.35k | } |
1189 | | |
1190 | | /// Calculate the magic numbers required to implement an unsigned integer |
1191 | | /// division by a constant as a sequence of multiplies, adds and shifts. |
1192 | | /// Requires that the divisor not be 0. Taken from "Hacker's Delight", Henry |
1193 | | /// S. Warren, Jr., chapter 10. |
1194 | | /// LeadingZeros can be used to simplify the calculation if the upper bits |
1195 | | /// of the divided value are known zero. |
1196 | 7.01k | APInt::mu APInt::magicu(unsigned LeadingZeros) const { |
1197 | 7.01k | const APInt& d = *this; |
1198 | 7.01k | unsigned p; |
1199 | 7.01k | APInt nc, delta, q1, r1, q2, r2; |
1200 | 7.01k | struct mu magu; |
1201 | 7.01k | magu.a = 0; // initialize "add" indicator |
1202 | 7.01k | APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros); |
1203 | 7.01k | APInt signedMin = APInt::getSignedMinValue(d.getBitWidth()); |
1204 | 7.01k | APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth()); |
1205 | 7.01k | |
1206 | 7.01k | nc = allOnes - (allOnes - d).urem(d); |
1207 | 7.01k | p = d.getBitWidth() - 1; // initialize p |
1208 | 7.01k | q1 = signedMin.udiv(nc); // initialize q1 = 2p/nc |
1209 | 7.01k | r1 = signedMin - q1*nc; // initialize r1 = rem(2p,nc) |
1210 | 7.01k | q2 = signedMax.udiv(d); // initialize q2 = (2p-1)/d |
1211 | 7.01k | r2 = signedMax - q2*d; // initialize r2 = rem((2p-1),d) |
1212 | 78.2k | do { |
1213 | 78.2k | p = p + 1; |
1214 | 78.2k | if (r1.uge(nc - r1)78.2k ) { |
1215 | 7.10k | q1 = q1 + q1 + 1; // update q1 |
1216 | 7.10k | r1 = r1 + r1 - nc; // update r1 |
1217 | 7.10k | } |
1218 | 71.1k | else { |
1219 | 71.1k | q1 = q1+q1; // update q1 |
1220 | 71.1k | r1 = r1+r1; // update r1 |
1221 | 71.1k | } |
1222 | 78.2k | if ((r2 + 1).uge(d - r2)78.2k ) { |
1223 | 36.9k | if (q2.uge(signedMax)36.9k ) magu.a = 10 ; |
1224 | 36.9k | q2 = q2+q2 + 1; // update q2 |
1225 | 36.9k | r2 = r2+r2 + 1 - d; // update r2 |
1226 | 36.9k | } |
1227 | 41.3k | else { |
1228 | 41.3k | if (q2.uge(signedMin)41.3k ) magu.a = 1656 ; |
1229 | 41.3k | q2 = q2+q2; // update q2 |
1230 | 41.3k | r2 = r2+r2 + 1; // update r2 |
1231 | 41.3k | } |
1232 | 78.2k | delta = d - 1 - r2; |
1233 | 7.01k | } while (p < d.getBitWidth()*2 && |
1234 | 78.2k | (q1.ult(delta) || 78.2k (q1 == delta && 7.01k r1 == 00 ))); |
1235 | 7.01k | magu.m = q2 + 1; // resulting magic number |
1236 | 7.01k | magu.s = p - d.getBitWidth(); // resulting shift |
1237 | 7.01k | return magu; |
1238 | 7.01k | } |
1239 | | |
1240 | | /// Implementation of Knuth's Algorithm D (Division of nonnegative integers) |
1241 | | /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The |
1242 | | /// variables here have the same names as in the algorithm. Comments explain |
1243 | | /// the algorithm and any deviation from it. |
1244 | | static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r, |
1245 | 44.0k | unsigned m, unsigned n) { |
1246 | 44.0k | assert(u && "Must provide dividend"); |
1247 | 44.0k | assert(v && "Must provide divisor"); |
1248 | 44.0k | assert(q && "Must provide quotient"); |
1249 | 44.0k | assert(u != v && u != q && v != q && "Must use different memory"); |
1250 | 44.0k | assert(n>1 && "n must be > 1"); |
1251 | 44.0k | |
1252 | 44.0k | // b denotes the base of the number system. In our case b is 2^32. |
1253 | 44.0k | const uint64_t b = uint64_t(1) << 32; |
1254 | 44.0k | |
1255 | 44.0k | DEBUG(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n'); |
1256 | 44.0k | DEBUG(dbgs() << "KnuthDiv: original:"); |
1257 | 44.0k | DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]); |
1258 | 44.0k | DEBUG(dbgs() << " by"); |
1259 | 44.0k | DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]); |
1260 | 44.0k | DEBUG(dbgs() << '\n'); |
1261 | 44.0k | // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of |
1262 | 44.0k | // u and v by d. Note that we have taken Knuth's advice here to use a power |
1263 | 44.0k | // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of |
1264 | 44.0k | // 2 allows us to shift instead of multiply and it is easy to determine the |
1265 | 44.0k | // shift amount from the leading zeros. We are basically normalizing the u |
1266 | 44.0k | // and v so that its high bits are shifted to the top of v's range without |
1267 | 44.0k | // overflow. Note that this can require an extra word in u so that u must |
1268 | 44.0k | // be of length m+n+1. |
1269 | 44.0k | unsigned shift = countLeadingZeros(v[n-1]); |
1270 | 44.0k | uint32_t v_carry = 0; |
1271 | 44.0k | uint32_t u_carry = 0; |
1272 | 44.0k | if (shift44.0k ) { |
1273 | 145k | for (unsigned i = 0; i < m+n145k ; ++i126k ) { |
1274 | 126k | uint32_t u_tmp = u[i] >> (32 - shift); |
1275 | 126k | u[i] = (u[i] << shift) | u_carry; |
1276 | 126k | u_carry = u_tmp; |
1277 | 126k | } |
1278 | 119k | for (unsigned i = 0; i < n119k ; ++i99.5k ) { |
1279 | 99.5k | uint32_t v_tmp = v[i] >> (32 - shift); |
1280 | 99.5k | v[i] = (v[i] << shift) | v_carry; |
1281 | 99.5k | v_carry = v_tmp; |
1282 | 99.5k | } |
1283 | 19.6k | } |
1284 | 44.0k | u[m+n] = u_carry; |
1285 | 44.0k | |
1286 | 44.0k | DEBUG(dbgs() << "KnuthDiv: normal:"); |
1287 | 44.0k | DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]); |
1288 | 44.0k | DEBUG(dbgs() << " by"); |
1289 | 44.0k | DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]); |
1290 | 44.0k | DEBUG(dbgs() << '\n'); |
1291 | 44.0k | |
1292 | 44.0k | // D2. [Initialize j.] Set j to m. This is the loop counter over the places. |
1293 | 44.0k | int j = m; |
1294 | 71.1k | do { |
1295 | 71.1k | DEBUG(dbgs() << "KnuthDiv: quotient digit #" << j << '\n'); |
1296 | 71.1k | // D3. [Calculate q'.]. |
1297 | 71.1k | // Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q') |
1298 | 71.1k | // Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r') |
1299 | 71.1k | // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease |
1300 | 71.1k | // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test |
1301 | 71.1k | // on v[n-2] determines at high speed most of the cases in which the trial |
1302 | 71.1k | // value qp is one too large, and it eliminates all cases where qp is two |
1303 | 71.1k | // too large. |
1304 | 71.1k | uint64_t dividend = Make_64(u[j+n], u[j+n-1]); |
1305 | 71.1k | DEBUG(dbgs() << "KnuthDiv: dividend == " << dividend << '\n'); |
1306 | 71.1k | uint64_t qp = dividend / v[n-1]; |
1307 | 71.1k | uint64_t rp = dividend % v[n-1]; |
1308 | 71.1k | if (qp == b || 71.1k qp*v[n-2] > b*rp + u[j+n-2]71.1k ) { |
1309 | 4.40k | qp--; |
1310 | 4.40k | rp += v[n-1]; |
1311 | 4.40k | if (rp < b && 4.40k (qp == b || 3.14k qp*v[n-2] > b*rp + u[j+n-2]2.98k )) |
1312 | 168 | qp--; |
1313 | 4.40k | } |
1314 | 71.1k | DEBUG(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n'); |
1315 | 71.1k | |
1316 | 71.1k | // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with |
1317 | 71.1k | // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation |
1318 | 71.1k | // consists of a simple multiplication by a one-place number, combined with |
1319 | 71.1k | // a subtraction. |
1320 | 71.1k | // The digits (u[j+n]...u[j]) should be kept positive; if the result of |
1321 | 71.1k | // this step is actually negative, (u[j+n]...u[j]) should be left as the |
1322 | 71.1k | // true value plus b**(n+1), namely as the b's complement of |
1323 | 71.1k | // the true value, and a "borrow" to the left should be remembered. |
1324 | 71.1k | int64_t borrow = 0; |
1325 | 461k | for (unsigned i = 0; i < n461k ; ++i390k ) { |
1326 | 390k | uint64_t p = uint64_t(qp) * uint64_t(v[i]); |
1327 | 390k | int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p); |
1328 | 390k | u[j+i] = Lo_32(subres); |
1329 | 390k | borrow = Hi_32(p) - Hi_32(subres); |
1330 | 390k | DEBUG(dbgs() << "KnuthDiv: u[j+i] = " << u[j+i] |
1331 | 390k | << ", borrow = " << borrow << '\n'); |
1332 | 390k | } |
1333 | 71.1k | bool isNeg = u[j+n] < borrow; |
1334 | 71.1k | u[j+n] -= Lo_32(borrow); |
1335 | 71.1k | |
1336 | 71.1k | DEBUG(dbgs() << "KnuthDiv: after subtraction:"); |
1337 | 71.1k | DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]); |
1338 | 71.1k | DEBUG(dbgs() << '\n'); |
1339 | 71.1k | |
1340 | 71.1k | // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was |
1341 | 71.1k | // negative, go to step D6; otherwise go on to step D7. |
1342 | 71.1k | q[j] = Lo_32(qp); |
1343 | 71.1k | if (isNeg71.1k ) { |
1344 | 48 | // D6. [Add back]. The probability that this step is necessary is very |
1345 | 48 | // small, on the order of only 2/b. Make sure that test data accounts for |
1346 | 48 | // this possibility. Decrease q[j] by 1 |
1347 | 48 | q[j]--; |
1348 | 48 | // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]). |
1349 | 48 | // A carry will occur to the left of u[j+n], and it should be ignored |
1350 | 48 | // since it cancels with the borrow that occurred in D4. |
1351 | 48 | bool carry = false; |
1352 | 456 | for (unsigned i = 0; i < n456 ; i++408 ) { |
1353 | 408 | uint32_t limit = std::min(u[j+i],v[i]); |
1354 | 408 | u[j+i] += v[i] + carry; |
1355 | 348 | carry = u[j+i] < limit || (carry && 348 u[j+i] == limit252 ); |
1356 | 408 | } |
1357 | 48 | u[j+n] += carry; |
1358 | 48 | } |
1359 | 71.1k | DEBUG(dbgs() << "KnuthDiv: after correction:"); |
1360 | 71.1k | DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]); |
1361 | 71.1k | DEBUG(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n'); |
1362 | 71.1k | |
1363 | 71.1k | // D7. [Loop on j.] Decrease j by one. Now if j >= 0, go back to D3. |
1364 | 71.1k | } while (--j >= 0); |
1365 | 44.0k | |
1366 | 44.0k | DEBUG(dbgs() << "KnuthDiv: quotient:"); |
1367 | 44.0k | DEBUG(for (int i = m; i >=0; i--) dbgs() <<" " << q[i]); |
1368 | 44.0k | DEBUG(dbgs() << '\n'); |
1369 | 44.0k | |
1370 | 44.0k | // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired |
1371 | 44.0k | // remainder may be obtained by dividing u[...] by d. If r is non-null we |
1372 | 44.0k | // compute the remainder (urem uses this). |
1373 | 44.0k | if (r44.0k ) { |
1374 | 50 | // The value d is expressed by the "shift" value above since we avoided |
1375 | 50 | // multiplication by d by using a shift left. So, all we have to do is |
1376 | 50 | // shift right here. |
1377 | 50 | if (shift50 ) { |
1378 | 40 | uint32_t carry = 0; |
1379 | 40 | DEBUG(dbgs() << "KnuthDiv: remainder:"); |
1380 | 856 | for (int i = n-1; i >= 0856 ; i--816 ) { |
1381 | 816 | r[i] = (u[i] >> shift) | carry; |
1382 | 816 | carry = u[i] << (32 - shift); |
1383 | 816 | DEBUG(dbgs() << " " << r[i]); |
1384 | 816 | } |
1385 | 50 | } else { |
1386 | 38 | for (int i = n-1; i >= 038 ; i--28 ) { |
1387 | 28 | r[i] = u[i]; |
1388 | 28 | DEBUG(dbgs() << " " << r[i]); |
1389 | 28 | } |
1390 | 10 | } |
1391 | 50 | DEBUG(dbgs() << '\n'); |
1392 | 50 | } |
1393 | 44.0k | DEBUG(dbgs() << '\n'); |
1394 | 44.0k | } |
1395 | | |
1396 | | void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS, |
1397 | 253k | unsigned rhsWords, WordType *Quotient, WordType *Remainder) { |
1398 | 253k | assert(lhsWords >= rhsWords && "Fractional result"); |
1399 | 253k | |
1400 | 253k | // First, compose the values into an array of 32-bit words instead of |
1401 | 253k | // 64-bit words. This is a necessity of both the "short division" algorithm |
1402 | 253k | // and the Knuth "classical algorithm" which requires there to be native |
1403 | 253k | // operations for +, -, and * on an m bit value with an m*2 bit result. We |
1404 | 253k | // can't use 64-bit operands here because we don't have native results of |
1405 | 253k | // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't |
1406 | 253k | // work on large-endian machines. |
1407 | 253k | unsigned n = rhsWords * 2; |
1408 | 253k | unsigned m = (lhsWords * 2) - n; |
1409 | 253k | |
1410 | 253k | // Allocate space for the temporary values we need either on the stack, if |
1411 | 253k | // it will fit, or on the heap if it won't. |
1412 | 253k | uint32_t SPACE[128]; |
1413 | 253k | uint32_t *U = nullptr; |
1414 | 253k | uint32_t *V = nullptr; |
1415 | 253k | uint32_t *Q = nullptr; |
1416 | 253k | uint32_t *R = nullptr; |
1417 | 253k | if ((Remainder?253k 439.5k :3214k )*n+2*m+1 <= 128) { |
1418 | 253k | U = &SPACE[0]; |
1419 | 253k | V = &SPACE[m+n+1]; |
1420 | 253k | Q = &SPACE[(m+n+1) + n]; |
1421 | 253k | if (Remainder) |
1422 | 39.5k | R = &SPACE[(m+n+1) + n + (m+n)]; |
1423 | 253k | } else { |
1424 | 198 | U = new uint32_t[m + n + 1]; |
1425 | 198 | V = new uint32_t[n]; |
1426 | 198 | Q = new uint32_t[m+n]; |
1427 | 198 | if (Remainder) |
1428 | 8 | R = new uint32_t[n]; |
1429 | 198 | } |
1430 | 253k | |
1431 | 253k | // Initialize the dividend |
1432 | 253k | memset(U, 0, (m+n+1)*sizeof(uint32_t)); |
1433 | 790k | for (unsigned i = 0; i < lhsWords790k ; ++i536k ) { |
1434 | 536k | uint64_t tmp = LHS[i]; |
1435 | 536k | U[i * 2] = Lo_32(tmp); |
1436 | 536k | U[i * 2 + 1] = Hi_32(tmp); |
1437 | 536k | } |
1438 | 253k | U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm. |
1439 | 253k | |
1440 | 253k | // Initialize the divisor |
1441 | 253k | memset(V, 0, (n)*sizeof(uint32_t)); |
1442 | 568k | for (unsigned i = 0; i < rhsWords568k ; ++i314k ) { |
1443 | 314k | uint64_t tmp = RHS[i]; |
1444 | 314k | V[i * 2] = Lo_32(tmp); |
1445 | 314k | V[i * 2 + 1] = Hi_32(tmp); |
1446 | 314k | } |
1447 | 253k | |
1448 | 253k | // initialize the quotient and remainder |
1449 | 253k | memset(Q, 0, (m+n) * sizeof(uint32_t)); |
1450 | 253k | if (Remainder) |
1451 | 39.5k | memset(R, 0, n * sizeof(uint32_t)); |
1452 | 253k | |
1453 | 253k | // Now, adjust m and n for the Knuth division. n is the number of words in |
1454 | 253k | // the divisor. m is the number of words by which the dividend exceeds the |
1455 | 253k | // divisor (i.e. m+n is the length of the dividend). These sizes must not |
1456 | 253k | // contain any zero words or the Knuth algorithm fails. |
1457 | 472k | for (unsigned i = n; i > 0 && 472k V[i-1] == 0472k ; i--219k ) { |
1458 | 219k | n--; |
1459 | 219k | m++; |
1460 | 219k | } |
1461 | 281k | for (unsigned i = m+n; i > 0 && 281k U[i-1] == 0281k ; i--27.9k ) |
1462 | 27.9k | m--; |
1463 | 253k | |
1464 | 253k | // If we're left with only a single word for the divisor, Knuth doesn't work |
1465 | 253k | // so we implement the short division algorithm here. This is much simpler |
1466 | 253k | // and faster because we are certain that we can divide a 64-bit quantity |
1467 | 253k | // by a 32-bit quantity at hardware speed and short division is simply a |
1468 | 253k | // series of such operations. This is just like doing short division but we |
1469 | 253k | // are using base 2^32 instead of base 10. |
1470 | 253k | assert(n != 0 && "Divide by zero?"); |
1471 | 253k | if (n == 1253k ) { |
1472 | 209k | uint32_t divisor = V[0]; |
1473 | 209k | uint32_t remainder = 0; |
1474 | 1.02M | for (int i = m; i >= 01.02M ; i--817k ) { |
1475 | 817k | uint64_t partial_dividend = Make_64(remainder, U[i]); |
1476 | 817k | if (partial_dividend == 0817k ) { |
1477 | 300 | Q[i] = 0; |
1478 | 300 | remainder = 0; |
1479 | 817k | } else if (817k partial_dividend < divisor817k ) { |
1480 | 6.19k | Q[i] = 0; |
1481 | 6.19k | remainder = Lo_32(partial_dividend); |
1482 | 817k | } else if (811k partial_dividend == divisor811k ) { |
1483 | 137 | Q[i] = 1; |
1484 | 137 | remainder = 0; |
1485 | 811k | } else { |
1486 | 811k | Q[i] = Lo_32(partial_dividend / divisor); |
1487 | 811k | remainder = Lo_32(partial_dividend - (Q[i] * divisor)); |
1488 | 811k | } |
1489 | 817k | } |
1490 | 209k | if (R) |
1491 | 39.4k | R[0] = remainder; |
1492 | 253k | } else { |
1493 | 44.0k | // Now we're ready to invoke the Knuth classical divide algorithm. In this |
1494 | 44.0k | // case n > 1. |
1495 | 44.0k | KnuthDiv(U, V, Q, R, m, n); |
1496 | 44.0k | } |
1497 | 253k | |
1498 | 253k | // If the caller wants the quotient |
1499 | 253k | if (Quotient253k ) { |
1500 | 789k | for (unsigned i = 0; i < lhsWords789k ; ++i535k ) |
1501 | 535k | Quotient[i] = Make_64(Q[i*2+1], Q[i*2]); |
1502 | 253k | } |
1503 | 253k | |
1504 | 253k | // If the caller wants the remainder |
1505 | 253k | if (Remainder253k ) { |
1506 | 79.4k | for (unsigned i = 0; i < rhsWords79.4k ; ++i39.9k ) |
1507 | 39.9k | Remainder[i] = Make_64(R[i*2+1], R[i*2]); |
1508 | 39.5k | } |
1509 | 253k | |
1510 | 253k | // Clean up the memory we allocated. |
1511 | 253k | if (U != &SPACE[0]253k ) { |
1512 | 198 | delete [] U; |
1513 | 198 | delete [] V; |
1514 | 198 | delete [] Q; |
1515 | 198 | delete [] R; |
1516 | 198 | } |
1517 | 253k | } |
1518 | | |
1519 | 55.7M | APInt APInt::udiv(const APInt &RHS) const { |
1520 | 55.7M | assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); |
1521 | 55.7M | |
1522 | 55.7M | // First, deal with the easy case |
1523 | 55.7M | if (isSingleWord()55.7M ) { |
1524 | 55.1M | assert(RHS.U.VAL != 0 && "Divide by zero?"); |
1525 | 55.1M | return APInt(BitWidth, U.VAL / RHS.U.VAL); |
1526 | 55.1M | } |
1527 | 628k | |
1528 | 628k | // Get some facts about the LHS and RHS number of bits and words |
1529 | 628k | unsigned lhsWords = getNumWords(getActiveBits()); |
1530 | 628k | unsigned rhsBits = RHS.getActiveBits(); |
1531 | 628k | unsigned rhsWords = getNumWords(rhsBits); |
1532 | 628k | assert(rhsWords && "Divided by zero???"); |
1533 | 628k | |
1534 | 628k | // Deal with some degenerate cases |
1535 | 628k | if (!lhsWords) |
1536 | 628k | // 0 / X ===> 0 |
1537 | 2.20k | return APInt(BitWidth, 0); |
1538 | 625k | if (625k rhsBits == 1625k ) |
1539 | 625k | // X / 1 ===> X |
1540 | 381k | return *this; |
1541 | 243k | if (243k lhsWords < rhsWords || 243k this->ult(RHS)243k ) |
1542 | 243k | // X / Y ===> 0, iff X < Y |
1543 | 1.17k | return APInt(BitWidth, 0); |
1544 | 242k | if (242k *this == RHS242k ) |
1545 | 242k | // X / X ===> 1 |
1546 | 23.3k | return APInt(BitWidth, 1); |
1547 | 219k | if (219k lhsWords == 1219k ) // rhsWords is 1 if lhsWords is 1. |
1548 | 219k | // All high words are zero, just use native divide |
1549 | 5.30k | return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]); |
1550 | 214k | |
1551 | 214k | // We have to compute it the hard way. Invoke the Knuth divide algorithm. |
1552 | 214k | APInt Quotient(BitWidth, 0); // to hold result. |
1553 | 214k | divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr); |
1554 | 214k | return Quotient; |
1555 | 214k | } |
1556 | | |
1557 | 8 | APInt APInt::udiv(uint64_t RHS) const { |
1558 | 8 | assert(RHS != 0 && "Divide by zero?"); |
1559 | 8 | |
1560 | 8 | // First, deal with the easy case |
1561 | 8 | if (isSingleWord()) |
1562 | 3 | return APInt(BitWidth, U.VAL / RHS); |
1563 | 5 | |
1564 | 5 | // Get some facts about the LHS words. |
1565 | 5 | unsigned lhsWords = getNumWords(getActiveBits()); |
1566 | 5 | |
1567 | 5 | // Deal with some degenerate cases |
1568 | 5 | if (!lhsWords) |
1569 | 5 | // 0 / X ===> 0 |
1570 | 0 | return APInt(BitWidth, 0); |
1571 | 5 | if (5 RHS == 15 ) |
1572 | 5 | // X / 1 ===> X |
1573 | 0 | return *this; |
1574 | 5 | if (5 this->ult(RHS)5 ) |
1575 | 5 | // X / Y ===> 0, iff X < Y |
1576 | 0 | return APInt(BitWidth, 0); |
1577 | 5 | if (5 *this == RHS5 ) |
1578 | 5 | // X / X ===> 1 |
1579 | 0 | return APInt(BitWidth, 1); |
1580 | 5 | if (5 lhsWords == 15 ) // rhsWords is 1 if lhsWords is 1. |
1581 | 5 | // All high words are zero, just use native divide |
1582 | 3 | return APInt(BitWidth, this->U.pVal[0] / RHS); |
1583 | 2 | |
1584 | 2 | // We have to compute it the hard way. Invoke the Knuth divide algorithm. |
1585 | 2 | APInt Quotient(BitWidth, 0); // to hold result. |
1586 | 2 | divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr); |
1587 | 2 | return Quotient; |
1588 | 2 | } |
1589 | | |
1590 | 35.1M | APInt APInt::sdiv(const APInt &RHS) const { |
1591 | 35.1M | if (isNegative()35.1M ) { |
1592 | 302k | if (RHS.isNegative()) |
1593 | 45.5k | return (-(*this)).udiv(-RHS); |
1594 | 256k | return -((-(*this)).udiv(RHS)); |
1595 | 256k | } |
1596 | 34.8M | if (34.8M RHS.isNegative()34.8M ) |
1597 | 97.6k | return -(this->udiv(-RHS)); |
1598 | 34.7M | return this->udiv(RHS); |
1599 | 34.7M | } |
1600 | | |
1601 | 5 | APInt APInt::sdiv(int64_t RHS) const { |
1602 | 5 | if (isNegative()5 ) { |
1603 | 2 | if (RHS < 0) |
1604 | 0 | return (-(*this)).udiv(-RHS); |
1605 | 2 | return -((-(*this)).udiv(RHS)); |
1606 | 2 | } |
1607 | 3 | if (3 RHS < 03 ) |
1608 | 0 | return -(this->udiv(-RHS)); |
1609 | 3 | return this->udiv(RHS); |
1610 | 3 | } |
1611 | | |
1612 | 5.67M | APInt APInt::urem(const APInt &RHS) const { |
1613 | 5.67M | assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); |
1614 | 5.67M | if (isSingleWord()5.67M ) { |
1615 | 5.67M | assert(RHS.U.VAL != 0 && "Remainder by zero?"); |
1616 | 5.67M | return APInt(BitWidth, U.VAL % RHS.U.VAL); |
1617 | 5.67M | } |
1618 | 33 | |
1619 | 33 | // Get some facts about the LHS |
1620 | 33 | unsigned lhsWords = getNumWords(getActiveBits()); |
1621 | 33 | |
1622 | 33 | // Get some facts about the RHS |
1623 | 33 | unsigned rhsBits = RHS.getActiveBits(); |
1624 | 33 | unsigned rhsWords = getNumWords(rhsBits); |
1625 | 33 | assert(rhsWords && "Performing remainder operation by zero ???"); |
1626 | 33 | |
1627 | 33 | // Check the degenerate cases |
1628 | 33 | if (lhsWords == 0) |
1629 | 33 | // 0 % Y ===> 0 |
1630 | 4 | return APInt(BitWidth, 0); |
1631 | 29 | if (29 rhsBits == 129 ) |
1632 | 29 | // X % 1 ===> 0 |
1633 | 0 | return APInt(BitWidth, 0); |
1634 | 29 | if (29 lhsWords < rhsWords || 29 this->ult(RHS)29 ) |
1635 | 29 | // X % Y ===> X, iff X < Y |
1636 | 0 | return *this; |
1637 | 29 | if (29 *this == RHS29 ) |
1638 | 29 | // X % X == 0; |
1639 | 0 | return APInt(BitWidth, 0); |
1640 | 29 | if (29 lhsWords == 129 ) |
1641 | 29 | // All high words are zero, just use native remainder |
1642 | 1 | return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]); |
1643 | 28 | |
1644 | 28 | // We have to compute it the hard way. Invoke the Knuth divide algorithm. |
1645 | 28 | APInt Remainder(BitWidth, 0); |
1646 | 28 | divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal); |
1647 | 28 | return Remainder; |
1648 | 28 | } |
1649 | | |
1650 | 323 | uint64_t APInt::urem(uint64_t RHS) const { |
1651 | 323 | assert(RHS != 0 && "Remainder by zero?"); |
1652 | 323 | |
1653 | 323 | if (isSingleWord()) |
1654 | 318 | return U.VAL % RHS; |
1655 | 5 | |
1656 | 5 | // Get some facts about the LHS |
1657 | 5 | unsigned lhsWords = getNumWords(getActiveBits()); |
1658 | 5 | |
1659 | 5 | // Check the degenerate cases |
1660 | 5 | if (lhsWords == 0) |
1661 | 5 | // 0 % Y ===> 0 |
1662 | 0 | return 0; |
1663 | 5 | if (5 RHS == 15 ) |
1664 | 5 | // X % 1 ===> 0 |
1665 | 0 | return 0; |
1666 | 5 | if (5 this->ult(RHS)5 ) |
1667 | 5 | // X % Y ===> X, iff X < Y |
1668 | 0 | return getZExtValue(); |
1669 | 5 | if (5 *this == RHS5 ) |
1670 | 5 | // X % X == 0; |
1671 | 0 | return 0; |
1672 | 5 | if (5 lhsWords == 15 ) |
1673 | 5 | // All high words are zero, just use native remainder |
1674 | 3 | return U.pVal[0] % RHS; |
1675 | 2 | |
1676 | 2 | // We have to compute it the hard way. Invoke the Knuth divide algorithm. |
1677 | 2 | uint64_t Remainder; |
1678 | 2 | divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder); |
1679 | 2 | return Remainder; |
1680 | 2 | } |
1681 | | |
1682 | 1.59M | APInt APInt::srem(const APInt &RHS) const { |
1683 | 1.59M | if (isNegative()1.59M ) { |
1684 | 287k | if (RHS.isNegative()) |
1685 | 45.9k | return -((-(*this)).urem(-RHS)); |
1686 | 241k | return -((-(*this)).urem(RHS)); |
1687 | 241k | } |
1688 | 1.31M | if (1.31M RHS.isNegative()1.31M ) |
1689 | 98.4k | return this->urem(-RHS); |
1690 | 1.21M | return this->urem(RHS); |
1691 | 1.21M | } |
1692 | | |
1693 | 5 | int64_t APInt::srem(int64_t RHS) const { |
1694 | 5 | if (isNegative()5 ) { |
1695 | 2 | if (RHS < 0) |
1696 | 0 | return -((-(*this)).urem(-RHS)); |
1697 | 2 | return -((-(*this)).urem(RHS)); |
1698 | 2 | } |
1699 | 3 | if (3 RHS < 03 ) |
1700 | 0 | return this->urem(-RHS); |
1701 | 3 | return this->urem(RHS); |
1702 | 3 | } |
1703 | | |
1704 | | void APInt::udivrem(const APInt &LHS, const APInt &RHS, |
1705 | 598k | APInt &Quotient, APInt &Remainder) { |
1706 | 598k | assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same"); |
1707 | 598k | unsigned BitWidth = LHS.BitWidth; |
1708 | 598k | |
1709 | 598k | // First, deal with the easy case |
1710 | 598k | if (LHS.isSingleWord()598k ) { |
1711 | 556k | assert(RHS.U.VAL != 0 && "Divide by zero?"); |
1712 | 556k | uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL; |
1713 | 556k | uint64_t RemVal = LHS.U.VAL % RHS.U.VAL; |
1714 | 556k | Quotient = APInt(BitWidth, QuotVal); |
1715 | 556k | Remainder = APInt(BitWidth, RemVal); |
1716 | 556k | return; |
1717 | 556k | } |
1718 | 42.4k | |
1719 | 42.4k | // Get some size facts about the dividend and divisor |
1720 | 42.4k | unsigned lhsWords = getNumWords(LHS.getActiveBits()); |
1721 | 42.4k | unsigned rhsBits = RHS.getActiveBits(); |
1722 | 42.4k | unsigned rhsWords = getNumWords(rhsBits); |
1723 | 42.4k | assert(rhsWords && "Performing divrem operation by zero ???"); |
1724 | 42.4k | |
1725 | 42.4k | // Check the degenerate cases |
1726 | 42.4k | if (lhsWords == 042.4k ) { |
1727 | 0 | Quotient = 0; // 0 / Y ===> 0 |
1728 | 0 | Remainder = 0; // 0 % Y ===> 0 |
1729 | 0 | return; |
1730 | 0 | } |
1731 | 42.4k | |
1732 | 42.4k | if (42.4k rhsBits == 142.4k ) { |
1733 | 35.4k | Quotient = LHS; // X / 1 ===> X |
1734 | 35.4k | Remainder = 0; // X % 1 ===> 0 |
1735 | 35.4k | } |
1736 | 42.4k | |
1737 | 42.4k | if (lhsWords < rhsWords || 42.4k LHS.ult(RHS)42.4k ) { |
1738 | 36.1k | Remainder = LHS; // X % Y ===> X, iff X < Y |
1739 | 36.1k | Quotient = 0; // X / Y ===> 0, iff X < Y |
1740 | 36.1k | return; |
1741 | 36.1k | } |
1742 | 6.25k | |
1743 | 6.25k | if (6.25k LHS == RHS6.25k ) { |
1744 | 49 | Quotient = 1; // X / X ===> 1 |
1745 | 49 | Remainder = 0; // X % X ===> 0; |
1746 | 49 | return; |
1747 | 49 | } |
1748 | 6.20k | |
1749 | 6.20k | // Make sure there is enough space to hold the results. |
1750 | 6.20k | // NOTE: This assumes that reallocate won't affect any bits if it doesn't |
1751 | 6.20k | // change the size. This is necessary if Quotient or Remainder is aliased |
1752 | 6.20k | // with LHS or RHS. |
1753 | 6.20k | Quotient.reallocate(BitWidth); |
1754 | 6.20k | Remainder.reallocate(BitWidth); |
1755 | 6.20k | |
1756 | 6.20k | if (lhsWords == 16.20k ) { // rhsWords is 1 if lhsWords is 1. |
1757 | 5.97k | // There is only one word to consider so use the native versions. |
1758 | 5.97k | uint64_t lhsValue = LHS.U.pVal[0]; |
1759 | 5.97k | uint64_t rhsValue = RHS.U.pVal[0]; |
1760 | 5.97k | Quotient = lhsValue / rhsValue; |
1761 | 5.97k | Remainder = lhsValue % rhsValue; |
1762 | 5.97k | return; |
1763 | 5.97k | } |
1764 | 235 | |
1765 | 235 | // Okay, lets do it the long way |
1766 | 235 | divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, |
1767 | 235 | Remainder.U.pVal); |
1768 | 235 | // Clear the rest of the Quotient and Remainder. |
1769 | 235 | std::memset(Quotient.U.pVal + lhsWords, 0, |
1770 | 235 | (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE); |
1771 | 235 | std::memset(Remainder.U.pVal + rhsWords, 0, |
1772 | 235 | (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE); |
1773 | 235 | } |
1774 | | |
1775 | | void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient, |
1776 | 80.8k | uint64_t &Remainder) { |
1777 | 80.8k | assert(RHS != 0 && "Divide by zero?"); |
1778 | 80.8k | unsigned BitWidth = LHS.BitWidth; |
1779 | 80.8k | |
1780 | 80.8k | // First, deal with the easy case |
1781 | 80.8k | if (LHS.isSingleWord()80.8k ) { |
1782 | 3 | uint64_t QuotVal = LHS.U.VAL / RHS; |
1783 | 3 | Remainder = LHS.U.VAL % RHS; |
1784 | 3 | Quotient = APInt(BitWidth, QuotVal); |
1785 | 3 | return; |
1786 | 3 | } |
1787 | 80.8k | |
1788 | 80.8k | // Get some size facts about the dividend and divisor |
1789 | 80.8k | unsigned lhsWords = getNumWords(LHS.getActiveBits()); |
1790 | 80.8k | |
1791 | 80.8k | // Check the degenerate cases |
1792 | 80.8k | if (lhsWords == 080.8k ) { |
1793 | 0 | Quotient = 0; // 0 / Y ===> 0 |
1794 | 0 | Remainder = 0; // 0 % Y ===> 0 |
1795 | 0 | return; |
1796 | 0 | } |
1797 | 80.8k | |
1798 | 80.8k | if (80.8k RHS == 180.8k ) { |
1799 | 0 | Quotient = LHS; // X / 1 ===> X |
1800 | 0 | Remainder = 0; // X % 1 ===> 0 |
1801 | 0 | } |
1802 | 80.8k | |
1803 | 80.8k | if (LHS.ult(RHS)80.8k ) { |
1804 | 2.44k | Remainder = LHS.getZExtValue(); // X % Y ===> X, iff X < Y |
1805 | 2.44k | Quotient = 0; // X / Y ===> 0, iff X < Y |
1806 | 2.44k | return; |
1807 | 2.44k | } |
1808 | 78.4k | |
1809 | 78.4k | if (78.4k LHS == RHS78.4k ) { |
1810 | 26 | Quotient = 1; // X / X ===> 1 |
1811 | 26 | Remainder = 0; // X % X ===> 0; |
1812 | 26 | return; |
1813 | 26 | } |
1814 | 78.3k | |
1815 | 78.3k | // Make sure there is enough space to hold the results. |
1816 | 78.3k | // NOTE: This assumes that reallocate won't affect any bits if it doesn't |
1817 | 78.3k | // change the size. This is necessary if Quotient is aliased with LHS. |
1818 | 78.3k | Quotient.reallocate(BitWidth); |
1819 | 78.3k | |
1820 | 78.3k | if (lhsWords == 178.3k ) { // rhsWords is 1 if lhsWords is 1. |
1821 | 39.1k | // There is only one word to consider so use the native versions. |
1822 | 39.1k | uint64_t lhsValue = LHS.U.pVal[0]; |
1823 | 39.1k | Quotient = lhsValue / RHS; |
1824 | 39.1k | Remainder = lhsValue % RHS; |
1825 | 39.1k | return; |
1826 | 39.1k | } |
1827 | 39.2k | |
1828 | 39.2k | // Okay, lets do it the long way |
1829 | 39.2k | divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder); |
1830 | 39.2k | // Clear the rest of the Quotient. |
1831 | 39.2k | std::memset(Quotient.U.pVal + lhsWords, 0, |
1832 | 39.2k | (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE); |
1833 | 39.2k | } |
1834 | | |
1835 | | void APInt::sdivrem(const APInt &LHS, const APInt &RHS, |
1836 | 66.6k | APInt &Quotient, APInt &Remainder) { |
1837 | 66.6k | if (LHS.isNegative()66.6k ) { |
1838 | 2.17k | if (RHS.isNegative()) |
1839 | 73 | APInt::udivrem(-LHS, -RHS, Quotient, Remainder); |
1840 | 2.10k | else { |
1841 | 2.10k | APInt::udivrem(-LHS, RHS, Quotient, Remainder); |
1842 | 2.10k | Quotient.negate(); |
1843 | 2.10k | } |
1844 | 2.17k | Remainder.negate(); |
1845 | 66.6k | } else if (64.4k RHS.isNegative()64.4k ) { |
1846 | 114 | APInt::udivrem(LHS, -RHS, Quotient, Remainder); |
1847 | 114 | Quotient.negate(); |
1848 | 64.4k | } else { |
1849 | 64.3k | APInt::udivrem(LHS, RHS, Quotient, Remainder); |
1850 | 64.3k | } |
1851 | 66.6k | } |
1852 | | |
1853 | | void APInt::sdivrem(const APInt &LHS, int64_t RHS, |
1854 | 5 | APInt &Quotient, int64_t &Remainder) { |
1855 | 5 | uint64_t R = Remainder; |
1856 | 5 | if (LHS.isNegative()5 ) { |
1857 | 2 | if (RHS < 0) |
1858 | 0 | APInt::udivrem(-LHS, -RHS, Quotient, R); |
1859 | 2 | else { |
1860 | 2 | APInt::udivrem(-LHS, RHS, Quotient, R); |
1861 | 2 | Quotient.negate(); |
1862 | 2 | } |
1863 | 2 | R = -R; |
1864 | 5 | } else if (3 RHS < 03 ) { |
1865 | 0 | APInt::udivrem(LHS, -RHS, Quotient, R); |
1866 | 0 | Quotient.negate(); |
1867 | 3 | } else { |
1868 | 3 | APInt::udivrem(LHS, RHS, Quotient, R); |
1869 | 3 | } |
1870 | 5 | Remainder = R; |
1871 | 5 | } |
1872 | | |
1873 | 13.7k | APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const { |
1874 | 13.7k | APInt Res = *this+RHS; |
1875 | 13.7k | Overflow = isNonNegative() == RHS.isNonNegative() && |
1876 | 12.7k | Res.isNonNegative() != isNonNegative(); |
1877 | 13.7k | return Res; |
1878 | 13.7k | } |
1879 | | |
1880 | 1.68k | APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const { |
1881 | 1.68k | APInt Res = *this+RHS; |
1882 | 1.68k | Overflow = Res.ult(RHS); |
1883 | 1.68k | return Res; |
1884 | 1.68k | } |
1885 | | |
1886 | 72.0k | APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const { |
1887 | 72.0k | APInt Res = *this - RHS; |
1888 | 72.0k | Overflow = isNonNegative() != RHS.isNonNegative() && |
1889 | 3.26k | Res.isNonNegative() != isNonNegative(); |
1890 | 72.0k | return Res; |
1891 | 72.0k | } |
1892 | | |
1893 | 4 | APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const { |
1894 | 4 | APInt Res = *this-RHS; |
1895 | 4 | Overflow = Res.ugt(*this); |
1896 | 4 | return Res; |
1897 | 4 | } |
1898 | | |
1899 | 32.8M | APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const { |
1900 | 32.8M | // MININT/-1 --> overflow. |
1901 | 0 | Overflow = isMinSignedValue() && RHS.isAllOnesValue(); |
1902 | 32.8M | return sdiv(RHS); |
1903 | 32.8M | } |
1904 | | |
1905 | 997 | APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const { |
1906 | 997 | APInt Res = *this * RHS; |
1907 | 997 | |
1908 | 997 | if (*this != 0 && 997 RHS != 0997 ) |
1909 | 990 | Overflow = Res.sdiv(RHS) != *this || 990 Res.sdiv(*this) != RHS988 ; |
1910 | 997 | else |
1911 | 7 | Overflow = false; |
1912 | 997 | return Res; |
1913 | 997 | } |
1914 | | |
1915 | 3.20M | APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const { |
1916 | 3.20M | APInt Res = *this * RHS; |
1917 | 3.20M | |
1918 | 3.20M | if (*this != 0 && 3.20M RHS != 01.64M ) |
1919 | 1.64M | Overflow = Res.udiv(RHS) != *this || 1.64M Res.udiv(*this) != RHS84.2k ; |
1920 | 3.20M | else |
1921 | 1.55M | Overflow = false; |
1922 | 3.20M | return Res; |
1923 | 3.20M | } |
1924 | | |
1925 | 7.56k | APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const { |
1926 | 7.56k | Overflow = ShAmt.uge(getBitWidth()); |
1927 | 7.56k | if (Overflow) |
1928 | 0 | return APInt(BitWidth, 0); |
1929 | 7.56k | |
1930 | 7.56k | if (7.56k isNonNegative()7.56k ) // Don't allow sign change. |
1931 | 7.56k | Overflow = ShAmt.uge(countLeadingZeros()); |
1932 | 7.56k | else |
1933 | 0 | Overflow = ShAmt.uge(countLeadingOnes()); |
1934 | 7.56k | |
1935 | 7.56k | return *this << ShAmt; |
1936 | 7.56k | } |
1937 | | |
1938 | 21 | APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const { |
1939 | 21 | Overflow = ShAmt.uge(getBitWidth()); |
1940 | 21 | if (Overflow) |
1941 | 0 | return APInt(BitWidth, 0); |
1942 | 21 | |
1943 | 21 | Overflow = ShAmt.ugt(countLeadingZeros()); |
1944 | 21 | |
1945 | 21 | return *this << ShAmt; |
1946 | 21 | } |
1947 | | |
1948 | | |
1949 | | |
1950 | | |
1951 | 2.38M | void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) { |
1952 | 2.38M | // Check our assumptions here |
1953 | 2.38M | assert(!str.empty() && "Invalid string length"); |
1954 | 2.38M | assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 || |
1955 | 2.38M | radix == 36) && |
1956 | 2.38M | "Radix should be 2, 8, 10, 16, or 36!"); |
1957 | 2.38M | |
1958 | 2.38M | StringRef::iterator p = str.begin(); |
1959 | 2.38M | size_t slen = str.size(); |
1960 | 2.38M | bool isNeg = *p == '-'; |
1961 | 2.38M | if (*p == '-' || 2.38M *p == '+'2.34M ) { |
1962 | 38.0k | p++; |
1963 | 38.0k | slen--; |
1964 | 38.0k | assert(slen && "String is only a sign, needs a value."); |
1965 | 38.0k | } |
1966 | 2.38M | assert((slen <= numbits || radix != 2) && "Insufficient bit width"); |
1967 | 2.38M | assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width"); |
1968 | 2.38M | assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width"); |
1969 | 2.38M | assert((((slen-1)*64)/22 <= numbits || radix != 10) && |
1970 | 2.38M | "Insufficient bit width"); |
1971 | 2.38M | |
1972 | 2.38M | // Allocate memory if needed |
1973 | 2.38M | if (isSingleWord()) |
1974 | 2.38M | U.VAL = 0; |
1975 | 2.38M | else |
1976 | 1.06k | U.pVal = getClearedMemory(getNumWords()); |
1977 | 2.38M | |
1978 | 2.38M | // Figure out if we can shift instead of multiply |
1979 | 2.38M | unsigned shift = (radix == 16 ? 4805 : radix == 8 ? 2.38M 318 : radix == 2 ? 2.38M 115 : 02.38M ); |
1980 | 2.38M | |
1981 | 2.38M | // Enter digit traversal loop |
1982 | 5.59M | for (StringRef::iterator e = str.end(); p != e5.59M ; ++p3.21M ) { |
1983 | 3.21M | unsigned digit = getDigit(*p, radix); |
1984 | 3.21M | assert(digit < radix && "Invalid character in digit string"); |
1985 | 3.21M | |
1986 | 3.21M | // Shift or multiply the value by the radix |
1987 | 3.21M | if (slen > 13.21M ) { |
1988 | 1.45M | if (shift) |
1989 | 5.30k | *this <<= shift; |
1990 | 1.45M | else |
1991 | 1.45M | *this *= radix; |
1992 | 1.45M | } |
1993 | 3.21M | |
1994 | 3.21M | // Add in the digit we just interpreted |
1995 | 3.21M | *this += digit; |
1996 | 3.21M | } |
1997 | 2.38M | // If its negative, put it in two's complement form |
1998 | 2.38M | if (isNeg) |
1999 | 38.0k | this->negate(); |
2000 | 2.38M | } |
2001 | | |
2002 | | void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, |
2003 | 1.82M | bool Signed, bool formatAsCLiteral) const { |
2004 | 1.82M | assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 || |
2005 | 1.82M | Radix == 36) && |
2006 | 1.82M | "Radix should be 2, 8, 10, 16, or 36!"); |
2007 | 1.82M | |
2008 | 1.82M | const char *Prefix = ""; |
2009 | 1.82M | if (formatAsCLiteral1.82M ) { |
2010 | 326 | switch (Radix) { |
2011 | 3 | case 2: |
2012 | 3 | // Binary literals are a non-standard extension added in gcc 4.3: |
2013 | 3 | // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html |
2014 | 3 | Prefix = "0b"; |
2015 | 3 | break; |
2016 | 3 | case 8: |
2017 | 3 | Prefix = "0"; |
2018 | 3 | break; |
2019 | 29 | case 10: |
2020 | 29 | break; // No prefix |
2021 | 291 | case 16: |
2022 | 291 | Prefix = "0x"; |
2023 | 291 | break; |
2024 | 0 | default: |
2025 | 0 | llvm_unreachable("Invalid radix!"); |
2026 | 1.82M | } |
2027 | 1.82M | } |
2028 | 1.82M | |
2029 | 1.82M | // First, check for a zero value and just short circuit the logic below. |
2030 | 1.82M | if (1.82M *this == 01.82M ) { |
2031 | 235k | while (*Prefix235k ) { |
2032 | 5 | Str.push_back(*Prefix); |
2033 | 5 | ++Prefix; |
2034 | 5 | }; |
2035 | 235k | Str.push_back('0'); |
2036 | 235k | return; |
2037 | 235k | } |
2038 | 1.58M | |
2039 | 1.58M | static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; |
2040 | 1.58M | |
2041 | 1.58M | if (isSingleWord()1.58M ) { |
2042 | 1.58M | char Buffer[65]; |
2043 | 1.58M | char *BufPtr = std::end(Buffer); |
2044 | 1.58M | |
2045 | 1.58M | uint64_t N; |
2046 | 1.58M | if (!Signed1.58M ) { |
2047 | 493k | N = getZExtValue(); |
2048 | 1.58M | } else { |
2049 | 1.08M | int64_t I = getSExtValue(); |
2050 | 1.08M | if (I >= 01.08M ) { |
2051 | 1.05M | N = I; |
2052 | 1.08M | } else { |
2053 | 33.4k | Str.push_back('-'); |
2054 | 33.4k | N = -(uint64_t)I; |
2055 | 33.4k | } |
2056 | 1.08M | } |
2057 | 1.58M | |
2058 | 1.58M | while (*Prefix1.58M ) { |
2059 | 310 | Str.push_back(*Prefix); |
2060 | 310 | ++Prefix; |
2061 | 310 | }; |
2062 | 1.58M | |
2063 | 15.1M | while (N15.1M ) { |
2064 | 13.5M | *--BufPtr = Digits[N % Radix]; |
2065 | 13.5M | N /= Radix; |
2066 | 13.5M | } |
2067 | 1.58M | Str.append(BufPtr, std::end(Buffer)); |
2068 | 1.58M | return; |
2069 | 1.58M | } |
2070 | 2.58k | |
2071 | 2.58k | APInt Tmp(*this); |
2072 | 2.58k | |
2073 | 2.58k | if (Signed && 2.58k isNegative()1.64k ) { |
2074 | 122 | // They want to print the signed version and it is a negative value |
2075 | 122 | // Flip the bits and add one to turn it into the equivalent positive |
2076 | 122 | // value and put a '-' in the result. |
2077 | 122 | Tmp.negate(); |
2078 | 122 | Str.push_back('-'); |
2079 | 122 | } |
2080 | 2.58k | |
2081 | 2.86k | while (*Prefix2.86k ) { |
2082 | 276 | Str.push_back(*Prefix); |
2083 | 276 | ++Prefix; |
2084 | 276 | }; |
2085 | 2.58k | |
2086 | 2.58k | // We insert the digits backward, then reverse them to get the right order. |
2087 | 2.58k | unsigned StartDig = Str.size(); |
2088 | 2.58k | |
2089 | 2.58k | // For the 2, 8 and 16 bit cases, we can just shift instead of divide |
2090 | 2.58k | // because the number of bits per digit (1, 3 and 4 respectively) divides |
2091 | 2.58k | // equally. We just shift until the value is zero. |
2092 | 2.58k | if (Radix == 2 || 2.58k Radix == 82.58k || Radix == 162.58k ) { |
2093 | 138 | // Just shift tmp right for each digit width until it becomes zero |
2094 | 138 | unsigned ShiftAmt = (Radix == 16 ? 4138 : (Radix == 8 ? 0 30 : 10 )); |
2095 | 138 | unsigned MaskAmt = Radix - 1; |
2096 | 138 | |
2097 | 2.79k | while (Tmp.getBoolValue()2.79k ) { |
2098 | 2.65k | unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt; |
2099 | 2.65k | Str.push_back(Digits[Digit]); |
2100 | 2.65k | Tmp.lshrInPlace(ShiftAmt); |
2101 | 2.65k | } |
2102 | 2.58k | } else { |
2103 | 83.2k | while (Tmp.getBoolValue()83.2k ) { |
2104 | 80.8k | uint64_t Digit; |
2105 | 80.8k | udivrem(Tmp, Radix, Tmp, Digit); |
2106 | 80.8k | assert(Digit < Radix && "divide failed"); |
2107 | 80.8k | Str.push_back(Digits[Digit]); |
2108 | 80.8k | } |
2109 | 2.44k | } |
2110 | 1.82M | |
2111 | 1.82M | // Reverse the digits before returning. |
2112 | 1.82M | std::reverse(Str.begin()+StartDig, Str.end()); |
2113 | 1.82M | } |
2114 | | |
2115 | | /// Returns the APInt as a std::string. Note that this is an inefficient method. |
2116 | | /// It is better to pass in a SmallVector/SmallString to the methods above. |
2117 | 1.22M | std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const { |
2118 | 1.22M | SmallString<40> S; |
2119 | 1.22M | toString(S, Radix, Signed, /* formatAsCLiteral = */false); |
2120 | 1.22M | return S.str(); |
2121 | 1.22M | } |
2122 | | |
2123 | | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
2124 | | LLVM_DUMP_METHOD void APInt::dump() const { |
2125 | | SmallString<40> S, U; |
2126 | | this->toStringUnsigned(U); |
2127 | | this->toStringSigned(S); |
2128 | | dbgs() << "APInt(" << BitWidth << "b, " |
2129 | | << U << "u " << S << "s)\n"; |
2130 | | } |
2131 | | #endif |
2132 | | |
2133 | 597k | void APInt::print(raw_ostream &OS, bool isSigned) const { |
2134 | 597k | SmallString<40> S; |
2135 | 597k | this->toString(S, 10, isSigned, /* formatAsCLiteral = */false); |
2136 | 597k | OS << S; |
2137 | 597k | } |
2138 | | |
2139 | | // This implements a variety of operations on a representation of |
2140 | | // arbitrary precision, two's-complement, bignum integer values. |
2141 | | |
2142 | | // Assumed by lowHalf, highHalf, partMSB and partLSB. A fairly safe |
2143 | | // and unrestricting assumption. |
2144 | | static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0, |
2145 | | "Part width must be divisible by 2!"); |
2146 | | |
2147 | | /* Some handy functions local to this file. */ |
2148 | | |
2149 | | /* Returns the integer part with the least significant BITS set. |
2150 | | BITS cannot be zero. */ |
2151 | 73.6M | static inline APInt::WordType lowBitMask(unsigned bits) { |
2152 | 73.6M | assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD); |
2153 | 73.6M | |
2154 | 73.6M | return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits); |
2155 | 73.6M | } |
2156 | | |
2157 | | /* Returns the value of the lower half of PART. */ |
2158 | 72.2M | static inline APInt::WordType lowHalf(APInt::WordType part) { |
2159 | 72.2M | return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2); |
2160 | 72.2M | } |
2161 | | |
2162 | | /* Returns the value of the upper half of PART. */ |
2163 | 108M | static inline APInt::WordType highHalf(APInt::WordType part) { |
2164 | 108M | return part >> (APInt::APINT_BITS_PER_WORD / 2); |
2165 | 108M | } |
2166 | | |
2167 | | /* Returns the bit number of the most significant set bit of a part. |
2168 | | If the input number has no bits set -1U is returned. */ |
2169 | 6.37M | static unsigned partMSB(APInt::WordType value) { |
2170 | 6.37M | return findLastSet(value, ZB_Max); |
2171 | 6.37M | } |
2172 | | |
2173 | | /* Returns the bit number of the least significant set bit of a |
2174 | | part. If the input number has no bits set -1U is returned. */ |
2175 | 2.16M | static unsigned partLSB(APInt::WordType value) { |
2176 | 2.16M | return findFirstSet(value, ZB_Max); |
2177 | 2.16M | } |
2178 | | |
2179 | | /* Sets the least significant part of a bignum to the input value, and |
2180 | | zeroes out higher parts. */ |
2181 | 15.3M | void APInt::tcSet(WordType *dst, WordType part, unsigned parts) { |
2182 | 15.3M | assert(parts > 0); |
2183 | 15.3M | |
2184 | 15.3M | dst[0] = part; |
2185 | 28.9M | for (unsigned i = 1; i < parts28.9M ; i++13.5M ) |
2186 | 13.5M | dst[i] = 0; |
2187 | 15.3M | } |
2188 | | |
2189 | | /* Assign one bignum to another. */ |
2190 | 3.21M | void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) { |
2191 | 6.44M | for (unsigned i = 0; i < parts6.44M ; i++3.23M ) |
2192 | 3.23M | dst[i] = src[i]; |
2193 | 3.21M | } |
2194 | | |
2195 | | /* Returns true if a bignum is zero, false otherwise. */ |
2196 | 247k | bool APInt::tcIsZero(const WordType *src, unsigned parts) { |
2197 | 356k | for (unsigned i = 0; i < parts356k ; i++108k ) |
2198 | 247k | if (247k src[i]247k ) |
2199 | 139k | return false; |
2200 | 247k | |
2201 | 108k | return true; |
2202 | 247k | } |
2203 | | |
2204 | | /* Extract the given bit of a bignum; returns 0 or 1. */ |
2205 | 1.42M | int APInt::tcExtractBit(const WordType *parts, unsigned bit) { |
2206 | 1.42M | return (parts[whichWord(bit)] & maskBit(bit)) != 0; |
2207 | 1.42M | } |
2208 | | |
2209 | | /* Set the given bit of a bignum. */ |
2210 | 9.16M | void APInt::tcSetBit(WordType *parts, unsigned bit) { |
2211 | 9.16M | parts[whichWord(bit)] |= maskBit(bit); |
2212 | 9.16M | } |
2213 | | |
2214 | | /* Clears the given bit of a bignum. */ |
2215 | 88 | void APInt::tcClearBit(WordType *parts, unsigned bit) { |
2216 | 88 | parts[whichWord(bit)] &= ~maskBit(bit); |
2217 | 88 | } |
2218 | | |
2219 | | /* Returns the bit number of the least significant set bit of a |
2220 | | number. If the input number has no bits set -1U is returned. */ |
2221 | 2.16M | unsigned APInt::tcLSB(const WordType *parts, unsigned n) { |
2222 | 2.22M | for (unsigned i = 0; i < n2.22M ; i++62.3k ) { |
2223 | 2.22M | if (parts[i] != 02.22M ) { |
2224 | 2.16M | unsigned lsb = partLSB(parts[i]); |
2225 | 2.16M | |
2226 | 2.16M | return lsb + i * APINT_BITS_PER_WORD; |
2227 | 2.16M | } |
2228 | 2.22M | } |
2229 | 2.16M | |
2230 | 0 | return -1U; |
2231 | 2.16M | } |
2232 | | |
2233 | | /* Returns the bit number of the most significant set bit of a number. |
2234 | | If the input number has no bits set -1U is returned. */ |
2235 | 6.40M | unsigned APInt::tcMSB(const WordType *parts, unsigned n) { |
2236 | 6.48M | do { |
2237 | 6.48M | --n; |
2238 | 6.48M | |
2239 | 6.48M | if (parts[n] != 06.48M ) { |
2240 | 6.37M | unsigned msb = partMSB(parts[n]); |
2241 | 6.37M | |
2242 | 6.37M | return msb + n * APINT_BITS_PER_WORD; |
2243 | 6.37M | } |
2244 | 113k | } while (n); |
2245 | 6.40M | |
2246 | 34.1k | return -1U; |
2247 | 6.40M | } |
2248 | | |
2249 | | /* Copy the bit vector of width srcBITS from SRC, starting at bit |
2250 | | srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes |
2251 | | the least significant bit of DST. All high bits above srcBITS in |
2252 | | DST are zero-filled. */ |
2253 | | void |
2254 | | APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src, |
2255 | 1.44M | unsigned srcBits, unsigned srcLSB) { |
2256 | 1.44M | unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD; |
2257 | 1.44M | assert(dstParts <= dstCount); |
2258 | 1.44M | |
2259 | 1.44M | unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD; |
2260 | 1.44M | tcAssign (dst, src + firstSrcPart, dstParts); |
2261 | 1.44M | |
2262 | 1.44M | unsigned shift = srcLSB % APINT_BITS_PER_WORD; |
2263 | 1.44M | tcShiftRight (dst, dstParts, shift); |
2264 | 1.44M | |
2265 | 1.44M | /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC |
2266 | 1.44M | in DST. If this is less that srcBits, append the rest, else |
2267 | 1.44M | clear the high bits. */ |
2268 | 1.44M | unsigned n = dstParts * APINT_BITS_PER_WORD - shift; |
2269 | 1.44M | if (n < srcBits1.44M ) { |
2270 | 8.20k | WordType mask = lowBitMask (srcBits - n); |
2271 | 8.20k | dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask) |
2272 | 8.20k | << n % APINT_BITS_PER_WORD); |
2273 | 1.44M | } else if (1.43M n > srcBits1.43M ) { |
2274 | 1.42M | if (srcBits % APINT_BITS_PER_WORD) |
2275 | 1.42M | dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD); |
2276 | 1.43M | } |
2277 | 1.44M | |
2278 | 1.44M | /* Clear high parts. */ |
2279 | 1.51M | while (dstParts < dstCount) |
2280 | 74.6k | dst[dstParts++] = 0; |
2281 | 1.44M | } |
2282 | | |
2283 | | /* DST += RHS + C where C is zero or one. Returns the carry flag. */ |
2284 | | APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs, |
2285 | 1.73M | WordType c, unsigned parts) { |
2286 | 1.73M | assert(c <= 1); |
2287 | 1.73M | |
2288 | 5.17M | for (unsigned i = 0; i < parts5.17M ; i++3.43M ) { |
2289 | 3.43M | WordType l = dst[i]; |
2290 | 3.43M | if (c3.43M ) { |
2291 | 386k | dst[i] += rhs[i] + 1; |
2292 | 386k | c = (dst[i] <= l); |
2293 | 3.43M | } else { |
2294 | 3.04M | dst[i] += rhs[i]; |
2295 | 3.04M | c = (dst[i] < l); |
2296 | 3.04M | } |
2297 | 3.43M | } |
2298 | 1.73M | |
2299 | 1.73M | return c; |
2300 | 1.73M | } |
2301 | | |
2302 | | /// This function adds a single "word" integer, src, to the multiple |
2303 | | /// "word" integer array, dst[]. dst[] is modified to reflect the addition and |
2304 | | /// 1 is returned if there is a carry out, otherwise 0 is returned. |
2305 | | /// @returns the carry of the addition. |
2306 | | APInt::WordType APInt::tcAddPart(WordType *dst, WordType src, |
2307 | 11.6M | unsigned parts) { |
2308 | 12.4M | for (unsigned i = 0; i < parts12.4M ; ++i801k ) { |
2309 | 12.2M | dst[i] += src; |
2310 | 12.2M | if (dst[i] >= src) |
2311 | 11.4M | return 0; // No need to carry so exit early. |
2312 | 801k | src = 1; // Carry one to next digit. |
2313 | 801k | } |
2314 | 11.6M | |
2315 | 268k | return 1; |
2316 | 11.6M | } |
2317 | | |
2318 | | /* DST -= RHS + C where C is zero or one. Returns the carry flag. */ |
2319 | | APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs, |
2320 | 13.8M | WordType c, unsigned parts) { |
2321 | 13.8M | assert(c <= 1); |
2322 | 13.8M | |
2323 | 32.2M | for (unsigned i = 0; i < parts32.2M ; i++18.4M ) { |
2324 | 18.4M | WordType l = dst[i]; |
2325 | 18.4M | if (c18.4M ) { |
2326 | 1.28M | dst[i] -= rhs[i] + 1; |
2327 | 1.28M | c = (dst[i] >= l); |
2328 | 18.4M | } else { |
2329 | 17.1M | dst[i] -= rhs[i]; |
2330 | 17.1M | c = (dst[i] > l); |
2331 | 17.1M | } |
2332 | 18.4M | } |
2333 | 13.8M | |
2334 | 13.8M | return c; |
2335 | 13.8M | } |
2336 | | |
2337 | | /// This function subtracts a single "word" (64-bit word), src, from |
2338 | | /// the multi-word integer array, dst[], propagating the borrowed 1 value until |
2339 | | /// no further borrowing is needed or it runs out of "words" in dst. The result |
2340 | | /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not |
2341 | | /// exhausted. In other words, if src > dst then this function returns 1, |
2342 | | /// otherwise 0. |
2343 | | /// @returns the borrow out of the subtraction |
2344 | | APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src, |
2345 | 4.20M | unsigned parts) { |
2346 | 4.57M | for (unsigned i = 0; i < parts4.57M ; ++i377k ) { |
2347 | 4.46M | WordType Dst = dst[i]; |
2348 | 4.46M | dst[i] -= src; |
2349 | 4.46M | if (src <= Dst) |
2350 | 4.09M | return 0; // No need to borrow so exit early. |
2351 | 377k | src = 1; // We have to "borrow 1" from next "word" |
2352 | 377k | } |
2353 | 4.20M | |
2354 | 109k | return 1; |
2355 | 4.20M | } |
2356 | | |
2357 | | /* Negate a bignum in-place. */ |
2358 | 1.71k | void APInt::tcNegate(WordType *dst, unsigned parts) { |
2359 | 1.71k | tcComplement(dst, parts); |
2360 | 1.71k | tcIncrement(dst, parts); |
2361 | 1.71k | } |
2362 | | |
2363 | | /* DST += SRC * MULTIPLIER + CARRY if add is true |
2364 | | DST = SRC * MULTIPLIER + CARRY if add is false |
2365 | | |
2366 | | Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC |
2367 | | they must start at the same point, i.e. DST == SRC. |
2368 | | |
2369 | | If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is |
2370 | | returned. Otherwise DST is filled with the least significant |
2371 | | DSTPARTS parts of the result, and if all of the omitted higher |
2372 | | parts were zero return zero, otherwise overflow occurred and |
2373 | | return one. */ |
2374 | | int APInt::tcMultiplyPart(WordType *dst, const WordType *src, |
2375 | | WordType multiplier, WordType carry, |
2376 | | unsigned srcParts, unsigned dstParts, |
2377 | 26.7M | bool add) { |
2378 | 26.7M | /* Otherwise our writes of DST kill our later reads of SRC. */ |
2379 | 26.7M | assert(dst <= src || dst >= src + srcParts); |
2380 | 26.7M | assert(dstParts <= srcParts + 1); |
2381 | 26.7M | |
2382 | 26.7M | /* N loops; minimum of dstParts and srcParts. */ |
2383 | 26.7M | unsigned n = std::min(dstParts, srcParts); |
2384 | 26.7M | |
2385 | 73.1M | for (unsigned i = 0; i < n73.1M ; i++46.3M ) { |
2386 | 46.3M | WordType low, mid, high, srcPart; |
2387 | 46.3M | |
2388 | 46.3M | /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY. |
2389 | 46.3M | |
2390 | 46.3M | This cannot overflow, because |
2391 | 46.3M | |
2392 | 46.3M | (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1) |
2393 | 46.3M | |
2394 | 46.3M | which is less than n^2. */ |
2395 | 46.3M | |
2396 | 46.3M | srcPart = src[i]; |
2397 | 46.3M | |
2398 | 46.3M | if (multiplier == 0 || 46.3M srcPart == 025.1M ) { |
2399 | 28.2M | low = carry; |
2400 | 28.2M | high = 0; |
2401 | 46.3M | } else { |
2402 | 18.0M | low = lowHalf(srcPart) * lowHalf(multiplier); |
2403 | 18.0M | high = highHalf(srcPart) * highHalf(multiplier); |
2404 | 18.0M | |
2405 | 18.0M | mid = lowHalf(srcPart) * highHalf(multiplier); |
2406 | 18.0M | high += highHalf(mid); |
2407 | 18.0M | mid <<= APINT_BITS_PER_WORD / 2; |
2408 | 18.0M | if (low + mid < low) |
2409 | 2.32M | high++; |
2410 | 18.0M | low += mid; |
2411 | 18.0M | |
2412 | 18.0M | mid = highHalf(srcPart) * lowHalf(multiplier); |
2413 | 18.0M | high += highHalf(mid); |
2414 | 18.0M | mid <<= APINT_BITS_PER_WORD / 2; |
2415 | 18.0M | if (low + mid < low) |
2416 | 5.92M | high++; |
2417 | 18.0M | low += mid; |
2418 | 18.0M | |
2419 | 18.0M | /* Now add carry. */ |
2420 | 18.0M | if (low + carry < low) |
2421 | 172k | high++; |
2422 | 18.0M | low += carry; |
2423 | 18.0M | } |
2424 | 46.3M | |
2425 | 46.3M | if (add46.3M ) { |
2426 | 46.2M | /* And now DST[i], and store the new low part there. */ |
2427 | 46.2M | if (low + dst[i] < low) |
2428 | 2.22M | high++; |
2429 | 46.2M | dst[i] += low; |
2430 | 46.2M | } else |
2431 | 55.3k | dst[i] = low; |
2432 | 46.3M | |
2433 | 46.3M | carry = high; |
2434 | 46.3M | } |
2435 | 26.7M | |
2436 | 26.7M | if (srcParts < dstParts26.7M ) { |
2437 | 626k | /* Full multiplication, there is no overflow. */ |
2438 | 626k | assert(srcParts + 1 == dstParts); |
2439 | 626k | dst[srcParts] = carry; |
2440 | 626k | return 0; |
2441 | 626k | } |
2442 | 26.1M | |
2443 | 26.1M | /* We overflowed if there is carry. */ |
2444 | 26.1M | if (26.1M carry26.1M ) |
2445 | 7.18M | return 1; |
2446 | 18.9M | |
2447 | 18.9M | /* We would overflow if any significant unwritten parts would be |
2448 | 18.9M | non-zero. This is true if any remaining src parts are non-zero |
2449 | 18.9M | and the multiplier is non-zero. */ |
2450 | 18.9M | if (18.9M multiplier18.9M ) |
2451 | 6.03M | for (unsigned i = dstParts; 5.67M i < srcParts6.03M ; i++360k ) |
2452 | 361k | if (361k src[i]361k ) |
2453 | 1.76k | return 1; |
2454 | 18.9M | |
2455 | 18.9M | /* We fitted in the narrow destination. */ |
2456 | 18.9M | return 0; |
2457 | 26.7M | } |
2458 | | |
2459 | | /* DST = LHS * RHS, where DST has the same width as the operands and |
2460 | | is filled with the least significant parts of the result. Returns |
2461 | | one if overflow occurred, otherwise zero. DST must be disjoint |
2462 | | from both operands. */ |
2463 | | int APInt::tcMultiply(WordType *dst, const WordType *lhs, |
2464 | 12.7M | const WordType *rhs, unsigned parts) { |
2465 | 12.7M | assert(dst != lhs && dst != rhs); |
2466 | 12.7M | |
2467 | 12.7M | int overflow = 0; |
2468 | 12.7M | tcSet(dst, 0, parts); |
2469 | 12.7M | |
2470 | 38.8M | for (unsigned i = 0; i < parts38.8M ; i++26.1M ) |
2471 | 26.1M | overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts, |
2472 | 26.1M | parts - i, true); |
2473 | 12.7M | |
2474 | 12.7M | return overflow; |
2475 | 12.7M | } |
2476 | | |
2477 | | /// DST = LHS * RHS, where DST has width the sum of the widths of the |
2478 | | /// operands. No overflow occurs. DST must be disjoint from both operands. |
2479 | | void APInt::tcFullMultiply(WordType *dst, const WordType *lhs, |
2480 | | const WordType *rhs, unsigned lhsParts, |
2481 | 153k | unsigned rhsParts) { |
2482 | 153k | /* Put the narrower number on the LHS for less loops below. */ |
2483 | 153k | if (lhsParts > rhsParts) |
2484 | 0 | return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts); |
2485 | 153k | |
2486 | 153k | assert(dst != lhs && dst != rhs); |
2487 | 153k | |
2488 | 153k | tcSet(dst, 0, rhsParts); |
2489 | 153k | |
2490 | 346k | for (unsigned i = 0; i < lhsParts346k ; i++192k ) |
2491 | 192k | tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true); |
2492 | 153k | } |
2493 | | |
2494 | | /* If RHS is zero LHS and REMAINDER are left unchanged, return one. |
2495 | | Otherwise set LHS to LHS / RHS with the fractional part discarded, |
2496 | | set REMAINDER to the remainder, return zero. i.e. |
2497 | | |
2498 | | OLD_LHS = RHS * LHS + REMAINDER |
2499 | | |
2500 | | SCRATCH is a bignum of the same size as the operands and result for |
2501 | | use by the routine; its contents need not be initialized and are |
2502 | | destroyed. LHS, REMAINDER and SCRATCH must be distinct. |
2503 | | */ |
2504 | | int APInt::tcDivide(WordType *lhs, const WordType *rhs, |
2505 | | WordType *remainder, WordType *srhs, |
2506 | 0 | unsigned parts) { |
2507 | 0 | assert(lhs != remainder && lhs != srhs && remainder != srhs); |
2508 | 0 |
|
2509 | 0 | unsigned shiftCount = tcMSB(rhs, parts) + 1; |
2510 | 0 | if (shiftCount == 0) |
2511 | 0 | return true; |
2512 | 0 |
|
2513 | 0 | shiftCount = parts * APINT_BITS_PER_WORD - shiftCount; |
2514 | 0 | unsigned n = shiftCount / APINT_BITS_PER_WORD; |
2515 | 0 | WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD); |
2516 | 0 |
|
2517 | 0 | tcAssign(srhs, rhs, parts); |
2518 | 0 | tcShiftLeft(srhs, parts, shiftCount); |
2519 | 0 | tcAssign(remainder, lhs, parts); |
2520 | 0 | tcSet(lhs, 0, parts); |
2521 | 0 |
|
2522 | 0 | /* Loop, subtracting SRHS if REMAINDER is greater and adding that to |
2523 | 0 | the total. */ |
2524 | 0 | for (;;) { |
2525 | 0 | int compare = tcCompare(remainder, srhs, parts); |
2526 | 0 | if (compare >= 00 ) { |
2527 | 0 | tcSubtract(remainder, srhs, 0, parts); |
2528 | 0 | lhs[n] |= mask; |
2529 | 0 | } |
2530 | 0 |
|
2531 | 0 | if (shiftCount == 0) |
2532 | 0 | break; |
2533 | 0 | shiftCount--; |
2534 | 0 | tcShiftRight(srhs, parts, 1); |
2535 | 0 | if ((mask >>= 1) == 00 ) { |
2536 | 0 | mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1); |
2537 | 0 | n--; |
2538 | 0 | } |
2539 | 0 | } |
2540 | 0 |
|
2541 | 0 | return false; |
2542 | 0 | } |
2543 | | |
2544 | | /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are |
2545 | | /// no restrictions on Count. |
2546 | 40.3M | void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) { |
2547 | 40.3M | // Don't bother performing a no-op shift. |
2548 | 40.3M | if (!Count) |
2549 | 1.99k | return; |
2550 | 40.3M | |
2551 | 40.3M | // WordShift is the inter-part shift; BitShift is the intra-part shift. |
2552 | 40.3M | unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words); |
2553 | 40.3M | unsigned BitShift = Count % APINT_BITS_PER_WORD; |
2554 | 40.3M | |
2555 | 40.3M | // Fastpath for moving by whole words. |
2556 | 40.3M | if (BitShift == 040.3M ) { |
2557 | 615k | std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE); |
2558 | 40.3M | } else { |
2559 | 108M | while (Words-- > WordShift108M ) { |
2560 | 68.9M | Dst[Words] = Dst[Words - WordShift] << BitShift; |
2561 | 68.9M | if (Words > WordShift) |
2562 | 29.1M | Dst[Words] |= |
2563 | 29.1M | Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift); |
2564 | 68.9M | } |
2565 | 39.7M | } |
2566 | 40.3M | |
2567 | 40.3M | // Fill in the remainder with 0s. |
2568 | 40.3M | std::memset(Dst, 0, WordShift * APINT_WORD_SIZE); |
2569 | 40.3M | } |
2570 | | |
2571 | | /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There |
2572 | | /// are no restrictions on Count. |
2573 | 4.64M | void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) { |
2574 | 4.64M | // Don't bother performing a no-op shift. |
2575 | 4.64M | if (!Count) |
2576 | 975k | return; |
2577 | 3.66M | |
2578 | 3.66M | // WordShift is the inter-part shift; BitShift is the intra-part shift. |
2579 | 3.66M | unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words); |
2580 | 3.66M | unsigned BitShift = Count % APINT_BITS_PER_WORD; |
2581 | 3.66M | |
2582 | 3.66M | unsigned WordsToMove = Words - WordShift; |
2583 | 3.66M | // Fastpath for moving by whole words. |
2584 | 3.66M | if (BitShift == 03.66M ) { |
2585 | 253k | std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE); |
2586 | 3.66M | } else { |
2587 | 23.1M | for (unsigned i = 0; i != WordsToMove23.1M ; ++i19.7M ) { |
2588 | 19.7M | Dst[i] = Dst[i + WordShift] >> BitShift; |
2589 | 19.7M | if (i + 1 != WordsToMove) |
2590 | 16.3M | Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift); |
2591 | 19.7M | } |
2592 | 3.41M | } |
2593 | 4.64M | |
2594 | 4.64M | // Fill in the remainder with 0s. |
2595 | 4.64M | std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE); |
2596 | 4.64M | } |
2597 | | |
2598 | | /* Bitwise and of two bignums. */ |
2599 | 2.24M | void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) { |
2600 | 6.75M | for (unsigned i = 0; i < parts6.75M ; i++4.51M ) |
2601 | 4.51M | dst[i] &= rhs[i]; |
2602 | 2.24M | } |
2603 | | |
2604 | | /* Bitwise inclusive or of two bignums. */ |
2605 | 13.4M | void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) { |
2606 | 40.3M | for (unsigned i = 0; i < parts40.3M ; i++26.8M ) |
2607 | 26.8M | dst[i] |= rhs[i]; |
2608 | 13.4M | } |
2609 | | |
2610 | | /* Bitwise exclusive or of two bignums. */ |
2611 | 198k | void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) { |
2612 | 597k | for (unsigned i = 0; i < parts597k ; i++398k ) |
2613 | 398k | dst[i] ^= rhs[i]; |
2614 | 198k | } |
2615 | | |
2616 | | /* Complement a bignum in-place. */ |
2617 | 1.73M | void APInt::tcComplement(WordType *dst, unsigned parts) { |
2618 | 5.23M | for (unsigned i = 0; i < parts5.23M ; i++3.49M ) |
2619 | 3.49M | dst[i] = ~dst[i]; |
2620 | 1.73M | } |
2621 | | |
2622 | | /* Comparison (unsigned) of two bignums. */ |
2623 | | int APInt::tcCompare(const WordType *lhs, const WordType *rhs, |
2624 | 55.7M | unsigned parts) { |
2625 | 78.4M | while (parts78.4M ) { |
2626 | 73.3M | parts--; |
2627 | 73.3M | if (lhs[parts] != rhs[parts]) |
2628 | 50.5M | return (lhs[parts] > rhs[parts]) ? 50.5M 117.5M : -133.0M ; |
2629 | 73.3M | } |
2630 | 55.7M | |
2631 | 5.12M | return 0; |
2632 | 55.7M | } |
2633 | | |
2634 | | /* Set the least significant BITS bits of a bignum, clear the |
2635 | | rest. */ |
2636 | | void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts, |
2637 | 151 | unsigned bits) { |
2638 | 151 | unsigned i = 0; |
2639 | 154 | while (bits > APINT_BITS_PER_WORD154 ) { |
2640 | 3 | dst[i++] = ~(WordType) 0; |
2641 | 3 | bits -= APINT_BITS_PER_WORD; |
2642 | 3 | } |
2643 | 151 | |
2644 | 151 | if (bits) |
2645 | 117 | dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits); |
2646 | 151 | |
2647 | 214 | while (i < parts) |
2648 | 63 | dst[i++] = 0; |
2649 | 151 | } |