1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a class to represent arbitrary precision integer
10 // constant values and provide a variety of arithmetic operations on them.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/FoldingSet.h"
17 #include "llvm/ADT/Hashing.h"
18 #include "llvm/ADT/Optional.h"
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/ADT/bit.h"
22 #include "llvm/Config/llvm-config.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/ErrorHandling.h"
25 #include "llvm/Support/MathExtras.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include <climits>
28 #include <cmath>
29 #include <cstdlib>
30 #include <cstring>
31 using namespace llvm;
32
33 #define DEBUG_TYPE "apint"
34
35 /// A utility function for allocating memory, checking for allocation failures,
36 /// and ensuring the contents are zeroed.
getClearedMemory(unsigned numWords)37 inline static uint64_t* getClearedMemory(unsigned numWords) {
38 uint64_t *result = new uint64_t[numWords];
39 memset(result, 0, numWords * sizeof(uint64_t));
40 return result;
41 }
42
43 /// A utility function for allocating memory and checking for allocation
44 /// failure. The content is not zeroed.
getMemory(unsigned numWords)45 inline static uint64_t* getMemory(unsigned numWords) {
46 return new uint64_t[numWords];
47 }
48
49 /// A utility function that converts a character to a digit.
getDigit(char cdigit,uint8_t radix)50 inline static unsigned getDigit(char cdigit, uint8_t radix) {
51 unsigned r;
52
53 if (radix == 16 || radix == 36) {
54 r = cdigit - '0';
55 if (r <= 9)
56 return r;
57
58 r = cdigit - 'A';
59 if (r <= radix - 11U)
60 return r + 10;
61
62 r = cdigit - 'a';
63 if (r <= radix - 11U)
64 return r + 10;
65
66 radix = 10;
67 }
68
69 r = cdigit - '0';
70 if (r < radix)
71 return r;
72
73 return -1U;
74 }
75
76
initSlowCase(uint64_t val,bool isSigned)77 void APInt::initSlowCase(uint64_t val, bool isSigned) {
78 U.pVal = getClearedMemory(getNumWords());
79 U.pVal[0] = val;
80 if (isSigned && int64_t(val) < 0)
81 for (unsigned i = 1; i < getNumWords(); ++i)
82 U.pVal[i] = WORDTYPE_MAX;
83 clearUnusedBits();
84 }
85
initSlowCase(const APInt & that)86 void APInt::initSlowCase(const APInt& that) {
87 U.pVal = getMemory(getNumWords());
88 memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE);
89 }
90
initFromArray(ArrayRef<uint64_t> bigVal)91 void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
92 assert(BitWidth && "Bitwidth too small");
93 assert(bigVal.data() && "Null pointer detected!");
94 if (isSingleWord())
95 U.VAL = bigVal[0];
96 else {
97 // Get memory, cleared to 0
98 U.pVal = getClearedMemory(getNumWords());
99 // Calculate the number of words to copy
100 unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
101 // Copy the words from bigVal to pVal
102 memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE);
103 }
104 // Make sure unused high bits are cleared
105 clearUnusedBits();
106 }
107
APInt(unsigned numBits,ArrayRef<uint64_t> bigVal)108 APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
109 : BitWidth(numBits) {
110 initFromArray(bigVal);
111 }
112
APInt(unsigned numBits,unsigned numWords,const uint64_t bigVal[])113 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
114 : BitWidth(numBits) {
115 initFromArray(makeArrayRef(bigVal, numWords));
116 }
117
APInt(unsigned numbits,StringRef Str,uint8_t radix)118 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
119 : BitWidth(numbits) {
120 assert(BitWidth && "Bitwidth too small");
121 fromString(numbits, Str, radix);
122 }
123
reallocate(unsigned NewBitWidth)124 void APInt::reallocate(unsigned NewBitWidth) {
125 // If the number of words is the same we can just change the width and stop.
126 if (getNumWords() == getNumWords(NewBitWidth)) {
127 BitWidth = NewBitWidth;
128 return;
129 }
130
131 // If we have an allocation, delete it.
132 if (!isSingleWord())
133 delete [] U.pVal;
134
135 // Update BitWidth.
136 BitWidth = NewBitWidth;
137
138 // If we are supposed to have an allocation, create it.
139 if (!isSingleWord())
140 U.pVal = getMemory(getNumWords());
141 }
142
AssignSlowCase(const APInt & RHS)143 void APInt::AssignSlowCase(const APInt& RHS) {
144 // Don't do anything for X = X
145 if (this == &RHS)
146 return;
147
148 // Adjust the bit width and handle allocations as necessary.
149 reallocate(RHS.getBitWidth());
150
151 // Copy the data.
152 if (isSingleWord())
153 U.VAL = RHS.U.VAL;
154 else
155 memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE);
156 }
157
158 /// This method 'profiles' an APInt for use with FoldingSet.
Profile(FoldingSetNodeID & ID) const159 void APInt::Profile(FoldingSetNodeID& ID) const {
160 ID.AddInteger(BitWidth);
161
162 if (isSingleWord()) {
163 ID.AddInteger(U.VAL);
164 return;
165 }
166
167 unsigned NumWords = getNumWords();
168 for (unsigned i = 0; i < NumWords; ++i)
169 ID.AddInteger(U.pVal[i]);
170 }
171
172 /// Prefix increment operator. Increments the APInt by one.
operator ++()173 APInt& APInt::operator++() {
174 if (isSingleWord())
175 ++U.VAL;
176 else
177 tcIncrement(U.pVal, getNumWords());
178 return clearUnusedBits();
179 }
180
181 /// Prefix decrement operator. Decrements the APInt by one.
operator --()182 APInt& APInt::operator--() {
183 if (isSingleWord())
184 --U.VAL;
185 else
186 tcDecrement(U.pVal, getNumWords());
187 return clearUnusedBits();
188 }
189
190 /// Adds the RHS APInt to this APInt.
191 /// @returns this, after addition of RHS.
192 /// Addition assignment operator.
operator +=(const APInt & RHS)193 APInt& APInt::operator+=(const APInt& RHS) {
194 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
195 if (isSingleWord())
196 U.VAL += RHS.U.VAL;
197 else
198 tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords());
199 return clearUnusedBits();
200 }
201
operator +=(uint64_t RHS)202 APInt& APInt::operator+=(uint64_t RHS) {
203 if (isSingleWord())
204 U.VAL += RHS;
205 else
206 tcAddPart(U.pVal, RHS, getNumWords());
207 return clearUnusedBits();
208 }
209
210 /// Subtracts the RHS APInt from this APInt
211 /// @returns this, after subtraction
212 /// Subtraction assignment operator.
operator -=(const APInt & RHS)213 APInt& APInt::operator-=(const APInt& RHS) {
214 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
215 if (isSingleWord())
216 U.VAL -= RHS.U.VAL;
217 else
218 tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords());
219 return clearUnusedBits();
220 }
221
operator -=(uint64_t RHS)222 APInt& APInt::operator-=(uint64_t RHS) {
223 if (isSingleWord())
224 U.VAL -= RHS;
225 else
226 tcSubtractPart(U.pVal, RHS, getNumWords());
227 return clearUnusedBits();
228 }
229
operator *(const APInt & RHS) const230 APInt APInt::operator*(const APInt& RHS) const {
231 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
232 if (isSingleWord())
233 return APInt(BitWidth, U.VAL * RHS.U.VAL);
234
235 APInt Result(getMemory(getNumWords()), getBitWidth());
236
237 tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords());
238
239 Result.clearUnusedBits();
240 return Result;
241 }
242
AndAssignSlowCase(const APInt & RHS)243 void APInt::AndAssignSlowCase(const APInt& RHS) {
244 tcAnd(U.pVal, RHS.U.pVal, getNumWords());
245 }
246
OrAssignSlowCase(const APInt & RHS)247 void APInt::OrAssignSlowCase(const APInt& RHS) {
248 tcOr(U.pVal, RHS.U.pVal, getNumWords());
249 }
250
XorAssignSlowCase(const APInt & RHS)251 void APInt::XorAssignSlowCase(const APInt& RHS) {
252 tcXor(U.pVal, RHS.U.pVal, getNumWords());
253 }
254
operator *=(const APInt & RHS)255 APInt& APInt::operator*=(const APInt& RHS) {
256 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
257 *this = *this * RHS;
258 return *this;
259 }
260
operator *=(uint64_t RHS)261 APInt& APInt::operator*=(uint64_t RHS) {
262 if (isSingleWord()) {
263 U.VAL *= RHS;
264 } else {
265 unsigned NumWords = getNumWords();
266 tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false);
267 }
268 return clearUnusedBits();
269 }
270
EqualSlowCase(const APInt & RHS) const271 bool APInt::EqualSlowCase(const APInt& RHS) const {
272 return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal);
273 }
274
compare(const APInt & RHS) const275 int APInt::compare(const APInt& RHS) const {
276 assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
277 if (isSingleWord())
278 return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL;
279
280 return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
281 }
282
compareSigned(const APInt & RHS) const283 int APInt::compareSigned(const APInt& RHS) const {
284 assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
285 if (isSingleWord()) {
286 int64_t lhsSext = SignExtend64(U.VAL, BitWidth);
287 int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth);
288 return lhsSext < rhsSext ? -1 : lhsSext > rhsSext;
289 }
290
291 bool lhsNeg = isNegative();
292 bool rhsNeg = RHS.isNegative();
293
294 // If the sign bits don't match, then (LHS < RHS) if LHS is negative
295 if (lhsNeg != rhsNeg)
296 return lhsNeg ? -1 : 1;
297
298 // Otherwise we can just use an unsigned comparison, because even negative
299 // numbers compare correctly this way if both have the same signed-ness.
300 return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
301 }
302
setBitsSlowCase(unsigned loBit,unsigned hiBit)303 void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
304 unsigned loWord = whichWord(loBit);
305 unsigned hiWord = whichWord(hiBit);
306
307 // Create an initial mask for the low word with zeros below loBit.
308 uint64_t loMask = WORDTYPE_MAX << whichBit(loBit);
309
310 // If hiBit is not aligned, we need a high mask.
311 unsigned hiShiftAmt = whichBit(hiBit);
312 if (hiShiftAmt != 0) {
313 // Create a high mask with zeros above hiBit.
314 uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
315 // If loWord and hiWord are equal, then we combine the masks. Otherwise,
316 // set the bits in hiWord.
317 if (hiWord == loWord)
318 loMask &= hiMask;
319 else
320 U.pVal[hiWord] |= hiMask;
321 }
322 // Apply the mask to the low word.
323 U.pVal[loWord] |= loMask;
324
325 // Fill any words between loWord and hiWord with all ones.
326 for (unsigned word = loWord + 1; word < hiWord; ++word)
327 U.pVal[word] = WORDTYPE_MAX;
328 }
329
330 /// Toggle every bit to its opposite value.
flipAllBitsSlowCase()331 void APInt::flipAllBitsSlowCase() {
332 tcComplement(U.pVal, getNumWords());
333 clearUnusedBits();
334 }
335
336 /// Toggle a given bit to its opposite value whose position is given
337 /// as "bitPosition".
338 /// Toggles a given bit to its opposite value.
flipBit(unsigned bitPosition)339 void APInt::flipBit(unsigned bitPosition) {
340 assert(bitPosition < BitWidth && "Out of the bit-width range!");
341 setBitVal(bitPosition, !(*this)[bitPosition]);
342 }
343
insertBits(const APInt & subBits,unsigned bitPosition)344 void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
345 unsigned subBitWidth = subBits.getBitWidth();
346 assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
347 "Illegal bit insertion");
348
349 // Insertion is a direct copy.
350 if (subBitWidth == BitWidth) {
351 *this = subBits;
352 return;
353 }
354
355 // Single word result can be done as a direct bitmask.
356 if (isSingleWord()) {
357 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
358 U.VAL &= ~(mask << bitPosition);
359 U.VAL |= (subBits.U.VAL << bitPosition);
360 return;
361 }
362
363 unsigned loBit = whichBit(bitPosition);
364 unsigned loWord = whichWord(bitPosition);
365 unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);
366
367 // Insertion within a single word can be done as a direct bitmask.
368 if (loWord == hi1Word) {
369 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
370 U.pVal[loWord] &= ~(mask << loBit);
371 U.pVal[loWord] |= (subBits.U.VAL << loBit);
372 return;
373 }
374
375 // Insert on word boundaries.
376 if (loBit == 0) {
377 // Direct copy whole words.
378 unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
379 memcpy(U.pVal + loWord, subBits.getRawData(),
380 numWholeSubWords * APINT_WORD_SIZE);
381
382 // Mask+insert remaining bits.
383 unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
384 if (remainingBits != 0) {
385 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits);
386 U.pVal[hi1Word] &= ~mask;
387 U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
388 }
389 return;
390 }
391
392 // General case - set/clear individual bits in dst based on src.
393 // TODO - there is scope for optimization here, but at the moment this code
394 // path is barely used so prefer readability over performance.
395 for (unsigned i = 0; i != subBitWidth; ++i)
396 setBitVal(bitPosition + i, subBits[i]);
397 }
398
insertBits(uint64_t subBits,unsigned bitPosition,unsigned numBits)399 void APInt::insertBits(uint64_t subBits, unsigned bitPosition, unsigned numBits) {
400 uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits);
401 subBits &= maskBits;
402 if (isSingleWord()) {
403 U.VAL &= ~(maskBits << bitPosition);
404 U.VAL |= subBits << bitPosition;
405 return;
406 }
407
408 unsigned loBit = whichBit(bitPosition);
409 unsigned loWord = whichWord(bitPosition);
410 unsigned hiWord = whichWord(bitPosition + numBits - 1);
411 if (loWord == hiWord) {
412 U.pVal[loWord] &= ~(maskBits << loBit);
413 U.pVal[loWord] |= subBits << loBit;
414 return;
415 }
416
417 static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected");
418 unsigned wordBits = 8 * sizeof(WordType);
419 U.pVal[loWord] &= ~(maskBits << loBit);
420 U.pVal[loWord] |= subBits << loBit;
421
422 U.pVal[hiWord] &= ~(maskBits >> (wordBits - loBit));
423 U.pVal[hiWord] |= subBits >> (wordBits - loBit);
424 }
425
extractBits(unsigned numBits,unsigned bitPosition) const426 APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
427 assert(numBits > 0 && "Can't extract zero bits");
428 assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
429 "Illegal bit extraction");
430
431 if (isSingleWord())
432 return APInt(numBits, U.VAL >> bitPosition);
433
434 unsigned loBit = whichBit(bitPosition);
435 unsigned loWord = whichWord(bitPosition);
436 unsigned hiWord = whichWord(bitPosition + numBits - 1);
437
438 // Single word result extracting bits from a single word source.
439 if (loWord == hiWord)
440 return APInt(numBits, U.pVal[loWord] >> loBit);
441
442 // Extracting bits that start on a source word boundary can be done
443 // as a fast memory copy.
444 if (loBit == 0)
445 return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord));
446
447 // General case - shift + copy source words directly into place.
448 APInt Result(numBits, 0);
449 unsigned NumSrcWords = getNumWords();
450 unsigned NumDstWords = Result.getNumWords();
451
452 uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal;
453 for (unsigned word = 0; word < NumDstWords; ++word) {
454 uint64_t w0 = U.pVal[loWord + word];
455 uint64_t w1 =
456 (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0;
457 DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
458 }
459
460 return Result.clearUnusedBits();
461 }
462
extractBitsAsZExtValue(unsigned numBits,unsigned bitPosition) const463 uint64_t APInt::extractBitsAsZExtValue(unsigned numBits,
464 unsigned bitPosition) const {
465 assert(numBits > 0 && "Can't extract zero bits");
466 assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
467 "Illegal bit extraction");
468 assert(numBits <= 64 && "Illegal bit extraction");
469
470 uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits);
471 if (isSingleWord())
472 return (U.VAL >> bitPosition) & maskBits;
473
474 unsigned loBit = whichBit(bitPosition);
475 unsigned loWord = whichWord(bitPosition);
476 unsigned hiWord = whichWord(bitPosition + numBits - 1);
477 if (loWord == hiWord)
478 return (U.pVal[loWord] >> loBit) & maskBits;
479
480 static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected");
481 unsigned wordBits = 8 * sizeof(WordType);
482 uint64_t retBits = U.pVal[loWord] >> loBit;
483 retBits |= U.pVal[hiWord] << (wordBits - loBit);
484 retBits &= maskBits;
485 return retBits;
486 }
487
getBitsNeeded(StringRef str,uint8_t radix)488 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
489 assert(!str.empty() && "Invalid string length");
490 assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
491 radix == 36) &&
492 "Radix should be 2, 8, 10, 16, or 36!");
493
494 size_t slen = str.size();
495
496 // Each computation below needs to know if it's negative.
497 StringRef::iterator p = str.begin();
498 unsigned isNegative = *p == '-';
499 if (*p == '-' || *p == '+') {
500 p++;
501 slen--;
502 assert(slen && "String is only a sign, needs a value.");
503 }
504
505 // For radixes of power-of-two values, the bits required is accurately and
506 // easily computed
507 if (radix == 2)
508 return slen + isNegative;
509 if (radix == 8)
510 return slen * 3 + isNegative;
511 if (radix == 16)
512 return slen * 4 + isNegative;
513
514 // FIXME: base 36
515
516 // This is grossly inefficient but accurate. We could probably do something
517 // with a computation of roughly slen*64/20 and then adjust by the value of
518 // the first few digits. But, I'm not sure how accurate that could be.
519
520 // Compute a sufficient number of bits that is always large enough but might
521 // be too large. This avoids the assertion in the constructor. This
522 // calculation doesn't work appropriately for the numbers 0-9, so just use 4
523 // bits in that case.
524 unsigned sufficient
525 = radix == 10? (slen == 1 ? 4 : slen * 64/18)
526 : (slen == 1 ? 7 : slen * 16/3);
527
528 // Convert to the actual binary value.
529 APInt tmp(sufficient, StringRef(p, slen), radix);
530
531 // Compute how many bits are required. If the log is infinite, assume we need
532 // just bit. If the log is exact and value is negative, then the value is
533 // MinSignedValue with (log + 1) bits.
534 unsigned log = tmp.logBase2();
535 if (log == (unsigned)-1) {
536 return isNegative + 1;
537 } else if (isNegative && tmp.isPowerOf2()) {
538 return isNegative + log;
539 } else {
540 return isNegative + log + 1;
541 }
542 }
543
hash_value(const APInt & Arg)544 hash_code llvm::hash_value(const APInt &Arg) {
545 if (Arg.isSingleWord())
546 return hash_combine(Arg.BitWidth, Arg.U.VAL);
547
548 return hash_combine(
549 Arg.BitWidth,
550 hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords()));
551 }
552
isSplat(unsigned SplatSizeInBits) const553 bool APInt::isSplat(unsigned SplatSizeInBits) const {
554 assert(getBitWidth() % SplatSizeInBits == 0 &&
555 "SplatSizeInBits must divide width!");
556 // We can check that all parts of an integer are equal by making use of a
557 // little trick: rotate and check if it's still the same value.
558 return *this == rotl(SplatSizeInBits);
559 }
560
561 /// This function returns the high "numBits" bits of this APInt.
getHiBits(unsigned numBits) const562 APInt APInt::getHiBits(unsigned numBits) const {
563 return this->lshr(BitWidth - numBits);
564 }
565
566 /// This function returns the low "numBits" bits of this APInt.
getLoBits(unsigned numBits) const567 APInt APInt::getLoBits(unsigned numBits) const {
568 APInt Result(getLowBitsSet(BitWidth, numBits));
569 Result &= *this;
570 return Result;
571 }
572
573 /// Return a value containing V broadcasted over NewLen bits.
getSplat(unsigned NewLen,const APInt & V)574 APInt APInt::getSplat(unsigned NewLen, const APInt &V) {
575 assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!");
576
577 APInt Val = V.zextOrSelf(NewLen);
578 for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1)
579 Val |= Val << I;
580
581 return Val;
582 }
583
countLeadingZerosSlowCase() const584 unsigned APInt::countLeadingZerosSlowCase() const {
585 unsigned Count = 0;
586 for (int i = getNumWords()-1; i >= 0; --i) {
587 uint64_t V = U.pVal[i];
588 if (V == 0)
589 Count += APINT_BITS_PER_WORD;
590 else {
591 Count += llvm::countLeadingZeros(V);
592 break;
593 }
594 }
595 // Adjust for unused bits in the most significant word (they are zero).
596 unsigned Mod = BitWidth % APINT_BITS_PER_WORD;
597 Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0;
598 return Count;
599 }
600
countLeadingOnesSlowCase() const601 unsigned APInt::countLeadingOnesSlowCase() const {
602 unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
603 unsigned shift;
604 if (!highWordBits) {
605 highWordBits = APINT_BITS_PER_WORD;
606 shift = 0;
607 } else {
608 shift = APINT_BITS_PER_WORD - highWordBits;
609 }
610 int i = getNumWords() - 1;
611 unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift);
612 if (Count == highWordBits) {
613 for (i--; i >= 0; --i) {
614 if (U.pVal[i] == WORDTYPE_MAX)
615 Count += APINT_BITS_PER_WORD;
616 else {
617 Count += llvm::countLeadingOnes(U.pVal[i]);
618 break;
619 }
620 }
621 }
622 return Count;
623 }
624
countTrailingZerosSlowCase() const625 unsigned APInt::countTrailingZerosSlowCase() const {
626 unsigned Count = 0;
627 unsigned i = 0;
628 for (; i < getNumWords() && U.pVal[i] == 0; ++i)
629 Count += APINT_BITS_PER_WORD;
630 if (i < getNumWords())
631 Count += llvm::countTrailingZeros(U.pVal[i]);
632 return std::min(Count, BitWidth);
633 }
634
countTrailingOnesSlowCase() const635 unsigned APInt::countTrailingOnesSlowCase() const {
636 unsigned Count = 0;
637 unsigned i = 0;
638 for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i)
639 Count += APINT_BITS_PER_WORD;
640 if (i < getNumWords())
641 Count += llvm::countTrailingOnes(U.pVal[i]);
642 assert(Count <= BitWidth);
643 return Count;
644 }
645
countPopulationSlowCase() const646 unsigned APInt::countPopulationSlowCase() const {
647 unsigned Count = 0;
648 for (unsigned i = 0; i < getNumWords(); ++i)
649 Count += llvm::countPopulation(U.pVal[i]);
650 return Count;
651 }
652
intersectsSlowCase(const APInt & RHS) const653 bool APInt::intersectsSlowCase(const APInt &RHS) const {
654 for (unsigned i = 0, e = getNumWords(); i != e; ++i)
655 if ((U.pVal[i] & RHS.U.pVal[i]) != 0)
656 return true;
657
658 return false;
659 }
660
isSubsetOfSlowCase(const APInt & RHS) const661 bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
662 for (unsigned i = 0, e = getNumWords(); i != e; ++i)
663 if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0)
664 return false;
665
666 return true;
667 }
668
byteSwap() const669 APInt APInt::byteSwap() const {
670 assert(BitWidth >= 16 && BitWidth % 8 == 0 && "Cannot byteswap!");
671 if (BitWidth == 16)
672 return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL)));
673 if (BitWidth == 32)
674 return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL)));
675 if (BitWidth <= 64) {
676 uint64_t Tmp1 = ByteSwap_64(U.VAL);
677 Tmp1 >>= (64 - BitWidth);
678 return APInt(BitWidth, Tmp1);
679 }
680
681 APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
682 for (unsigned I = 0, N = getNumWords(); I != N; ++I)
683 Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]);
684 if (Result.BitWidth != BitWidth) {
685 Result.lshrInPlace(Result.BitWidth - BitWidth);
686 Result.BitWidth = BitWidth;
687 }
688 return Result;
689 }
690
reverseBits() const691 APInt APInt::reverseBits() const {
692 switch (BitWidth) {
693 case 64:
694 return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL));
695 case 32:
696 return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL));
697 case 16:
698 return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL));
699 case 8:
700 return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL));
701 default:
702 break;
703 }
704
705 APInt Val(*this);
706 APInt Reversed(BitWidth, 0);
707 unsigned S = BitWidth;
708
709 for (; Val != 0; Val.lshrInPlace(1)) {
710 Reversed <<= 1;
711 Reversed |= Val[0];
712 --S;
713 }
714
715 Reversed <<= S;
716 return Reversed;
717 }
718
GreatestCommonDivisor(APInt A,APInt B)719 APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
720 // Fast-path a common case.
721 if (A == B) return A;
722
723 // Corner cases: if either operand is zero, the other is the gcd.
724 if (!A) return B;
725 if (!B) return A;
726
727 // Count common powers of 2 and remove all other powers of 2.
728 unsigned Pow2;
729 {
730 unsigned Pow2_A = A.countTrailingZeros();
731 unsigned Pow2_B = B.countTrailingZeros();
732 if (Pow2_A > Pow2_B) {
733 A.lshrInPlace(Pow2_A - Pow2_B);
734 Pow2 = Pow2_B;
735 } else if (Pow2_B > Pow2_A) {
736 B.lshrInPlace(Pow2_B - Pow2_A);
737 Pow2 = Pow2_A;
738 } else {
739 Pow2 = Pow2_A;
740 }
741 }
742
743 // Both operands are odd multiples of 2^Pow_2:
744 //
745 // gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
746 //
747 // This is a modified version of Stein's algorithm, taking advantage of
748 // efficient countTrailingZeros().
749 while (A != B) {
750 if (A.ugt(B)) {
751 A -= B;
752 A.lshrInPlace(A.countTrailingZeros() - Pow2);
753 } else {
754 B -= A;
755 B.lshrInPlace(B.countTrailingZeros() - Pow2);
756 }
757 }
758
759 return A;
760 }
761
RoundDoubleToAPInt(double Double,unsigned width)762 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
763 uint64_t I = bit_cast<uint64_t>(Double);
764
765 // Get the sign bit from the highest order bit
766 bool isNeg = I >> 63;
767
768 // Get the 11-bit exponent and adjust for the 1023 bit bias
769 int64_t exp = ((I >> 52) & 0x7ff) - 1023;
770
771 // If the exponent is negative, the value is < 0 so just return 0.
772 if (exp < 0)
773 return APInt(width, 0u);
774
775 // Extract the mantissa by clearing the top 12 bits (sign + exponent).
776 uint64_t mantissa = (I & (~0ULL >> 12)) | 1ULL << 52;
777
778 // If the exponent doesn't shift all bits out of the mantissa
779 if (exp < 52)
780 return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
781 APInt(width, mantissa >> (52 - exp));
782
783 // If the client didn't provide enough bits for us to shift the mantissa into
784 // then the result is undefined, just return 0
785 if (width <= exp - 52)
786 return APInt(width, 0);
787
788 // Otherwise, we have to shift the mantissa bits up to the right location
789 APInt Tmp(width, mantissa);
790 Tmp <<= (unsigned)exp - 52;
791 return isNeg ? -Tmp : Tmp;
792 }
793
794 /// This function converts this APInt to a double.
795 /// The layout for double is as following (IEEE Standard 754):
796 /// --------------------------------------
797 /// | Sign Exponent Fraction Bias |
798 /// |-------------------------------------- |
799 /// | 1[63] 11[62-52] 52[51-00] 1023 |
800 /// --------------------------------------
roundToDouble(bool isSigned) const801 double APInt::roundToDouble(bool isSigned) const {
802
803 // Handle the simple case where the value is contained in one uint64_t.
804 // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
805 if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
806 if (isSigned) {
807 int64_t sext = SignExtend64(getWord(0), BitWidth);
808 return double(sext);
809 } else
810 return double(getWord(0));
811 }
812
813 // Determine if the value is negative.
814 bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
815
816 // Construct the absolute value if we're negative.
817 APInt Tmp(isNeg ? -(*this) : (*this));
818
819 // Figure out how many bits we're using.
820 unsigned n = Tmp.getActiveBits();
821
822 // The exponent (without bias normalization) is just the number of bits
823 // we are using. Note that the sign bit is gone since we constructed the
824 // absolute value.
825 uint64_t exp = n;
826
827 // Return infinity for exponent overflow
828 if (exp > 1023) {
829 if (!isSigned || !isNeg)
830 return std::numeric_limits<double>::infinity();
831 else
832 return -std::numeric_limits<double>::infinity();
833 }
834 exp += 1023; // Increment for 1023 bias
835
836 // Number of bits in mantissa is 52. To obtain the mantissa value, we must
837 // extract the high 52 bits from the correct words in pVal.
838 uint64_t mantissa;
839 unsigned hiWord = whichWord(n-1);
840 if (hiWord == 0) {
841 mantissa = Tmp.U.pVal[0];
842 if (n > 52)
843 mantissa >>= n - 52; // shift down, we want the top 52 bits.
844 } else {
845 assert(hiWord > 0 && "huh?");
846 uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
847 uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
848 mantissa = hibits | lobits;
849 }
850
851 // The leading bit of mantissa is implicit, so get rid of it.
852 uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
853 uint64_t I = sign | (exp << 52) | mantissa;
854 return bit_cast<double>(I);
855 }
856
857 // Truncate to new width.
trunc(unsigned width) const858 APInt APInt::trunc(unsigned width) const {
859 assert(width < BitWidth && "Invalid APInt Truncate request");
860 assert(width && "Can't truncate to 0 bits");
861
862 if (width <= APINT_BITS_PER_WORD)
863 return APInt(width, getRawData()[0]);
864
865 APInt Result(getMemory(getNumWords(width)), width);
866
867 // Copy full words.
868 unsigned i;
869 for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
870 Result.U.pVal[i] = U.pVal[i];
871
872 // Truncate and copy any partial word.
873 unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
874 if (bits != 0)
875 Result.U.pVal[i] = U.pVal[i] << bits >> bits;
876
877 return Result;
878 }
879
880 // Truncate to new width with unsigned saturation.
truncUSat(unsigned width) const881 APInt APInt::truncUSat(unsigned width) const {
882 assert(width < BitWidth && "Invalid APInt Truncate request");
883 assert(width && "Can't truncate to 0 bits");
884
885 // Can we just losslessly truncate it?
886 if (isIntN(width))
887 return trunc(width);
888 // If not, then just return the new limit.
889 return APInt::getMaxValue(width);
890 }
891
892 // Truncate to new width with signed saturation.
truncSSat(unsigned width) const893 APInt APInt::truncSSat(unsigned width) const {
894 assert(width < BitWidth && "Invalid APInt Truncate request");
895 assert(width && "Can't truncate to 0 bits");
896
897 // Can we just losslessly truncate it?
898 if (isSignedIntN(width))
899 return trunc(width);
900 // If not, then just return the new limits.
901 return isNegative() ? APInt::getSignedMinValue(width)
902 : APInt::getSignedMaxValue(width);
903 }
904
905 // Sign extend to a new width.
sext(unsigned Width) const906 APInt APInt::sext(unsigned Width) const {
907 assert(Width > BitWidth && "Invalid APInt SignExtend request");
908
909 if (Width <= APINT_BITS_PER_WORD)
910 return APInt(Width, SignExtend64(U.VAL, BitWidth));
911
912 APInt Result(getMemory(getNumWords(Width)), Width);
913
914 // Copy words.
915 std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
916
917 // Sign extend the last word since there may be unused bits in the input.
918 Result.U.pVal[getNumWords() - 1] =
919 SignExtend64(Result.U.pVal[getNumWords() - 1],
920 ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
921
922 // Fill with sign bits.
923 std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0,
924 (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
925 Result.clearUnusedBits();
926 return Result;
927 }
928
929 // Zero extend to a new width.
zext(unsigned width) const930 APInt APInt::zext(unsigned width) const {
931 assert(width > BitWidth && "Invalid APInt ZeroExtend request");
932
933 if (width <= APINT_BITS_PER_WORD)
934 return APInt(width, U.VAL);
935
936 APInt Result(getMemory(getNumWords(width)), width);
937
938 // Copy words.
939 std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
940
941 // Zero remaining words.
942 std::memset(Result.U.pVal + getNumWords(), 0,
943 (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
944
945 return Result;
946 }
947
zextOrTrunc(unsigned width) const948 APInt APInt::zextOrTrunc(unsigned width) const {
949 if (BitWidth < width)
950 return zext(width);
951 if (BitWidth > width)
952 return trunc(width);
953 return *this;
954 }
955
sextOrTrunc(unsigned width) const956 APInt APInt::sextOrTrunc(unsigned width) const {
957 if (BitWidth < width)
958 return sext(width);
959 if (BitWidth > width)
960 return trunc(width);
961 return *this;
962 }
963
truncOrSelf(unsigned width) const964 APInt APInt::truncOrSelf(unsigned width) const {
965 if (BitWidth > width)
966 return trunc(width);
967 return *this;
968 }
969
zextOrSelf(unsigned width) const970 APInt APInt::zextOrSelf(unsigned width) const {
971 if (BitWidth < width)
972 return zext(width);
973 return *this;
974 }
975
sextOrSelf(unsigned width) const976 APInt APInt::sextOrSelf(unsigned width) const {
977 if (BitWidth < width)
978 return sext(width);
979 return *this;
980 }
981
982 /// Arithmetic right-shift this APInt by shiftAmt.
983 /// Arithmetic right-shift function.
ashrInPlace(const APInt & shiftAmt)984 void APInt::ashrInPlace(const APInt &shiftAmt) {
985 ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
986 }
987
988 /// Arithmetic right-shift this APInt by shiftAmt.
989 /// Arithmetic right-shift function.
ashrSlowCase(unsigned ShiftAmt)990 void APInt::ashrSlowCase(unsigned ShiftAmt) {
991 // Don't bother performing a no-op shift.
992 if (!ShiftAmt)
993 return;
994
995 // Save the original sign bit for later.
996 bool Negative = isNegative();
997
998 // WordShift is the inter-part shift; BitShift is intra-part shift.
999 unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD;
1000 unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD;
1001
1002 unsigned WordsToMove = getNumWords() - WordShift;
1003 if (WordsToMove != 0) {
1004 // Sign extend the last word to fill in the unused bits.
1005 U.pVal[getNumWords() - 1] = SignExtend64(
1006 U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
1007
1008 // Fastpath for moving by whole words.
1009 if (BitShift == 0) {
1010 std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE);
1011 } else {
1012 // Move the words containing significant bits.
1013 for (unsigned i = 0; i != WordsToMove - 1; ++i)
1014 U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) |
1015 (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift));
1016
1017 // Handle the last word which has no high bits to copy.
1018 U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift;
1019 // Sign extend one more time.
1020 U.pVal[WordsToMove - 1] =
1021 SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift);
1022 }
1023 }
1024
1025 // Fill in the remainder based on the original sign.
1026 std::memset(U.pVal + WordsToMove, Negative ? -1 : 0,
1027 WordShift * APINT_WORD_SIZE);
1028 clearUnusedBits();
1029 }
1030
1031 /// Logical right-shift this APInt by shiftAmt.
1032 /// Logical right-shift function.
lshrInPlace(const APInt & shiftAmt)1033 void APInt::lshrInPlace(const APInt &shiftAmt) {
1034 lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
1035 }
1036
1037 /// Logical right-shift this APInt by shiftAmt.
1038 /// Logical right-shift function.
lshrSlowCase(unsigned ShiftAmt)1039 void APInt::lshrSlowCase(unsigned ShiftAmt) {
1040 tcShiftRight(U.pVal, getNumWords(), ShiftAmt);
1041 }
1042
1043 /// Left-shift this APInt by shiftAmt.
1044 /// Left-shift function.
operator <<=(const APInt & shiftAmt)1045 APInt &APInt::operator<<=(const APInt &shiftAmt) {
1046 // It's undefined behavior in C to shift by BitWidth or greater.
1047 *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth);
1048 return *this;
1049 }
1050
shlSlowCase(unsigned ShiftAmt)1051 void APInt::shlSlowCase(unsigned ShiftAmt) {
1052 tcShiftLeft(U.pVal, getNumWords(), ShiftAmt);
1053 clearUnusedBits();
1054 }
1055
1056 // Calculate the rotate amount modulo the bit width.
rotateModulo(unsigned BitWidth,const APInt & rotateAmt)1057 static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
1058 unsigned rotBitWidth = rotateAmt.getBitWidth();
1059 APInt rot = rotateAmt;
1060 if (rotBitWidth < BitWidth) {
1061 // Extend the rotate APInt, so that the urem doesn't divide by 0.
1062 // e.g. APInt(1, 32) would give APInt(1, 0).
1063 rot = rotateAmt.zext(BitWidth);
1064 }
1065 rot = rot.urem(APInt(rot.getBitWidth(), BitWidth));
1066 return rot.getLimitedValue(BitWidth);
1067 }
1068
rotl(const APInt & rotateAmt) const1069 APInt APInt::rotl(const APInt &rotateAmt) const {
1070 return rotl(rotateModulo(BitWidth, rotateAmt));
1071 }
1072
rotl(unsigned rotateAmt) const1073 APInt APInt::rotl(unsigned rotateAmt) const {
1074 rotateAmt %= BitWidth;
1075 if (rotateAmt == 0)
1076 return *this;
1077 return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
1078 }
1079
rotr(const APInt & rotateAmt) const1080 APInt APInt::rotr(const APInt &rotateAmt) const {
1081 return rotr(rotateModulo(BitWidth, rotateAmt));
1082 }
1083
rotr(unsigned rotateAmt) const1084 APInt APInt::rotr(unsigned rotateAmt) const {
1085 rotateAmt %= BitWidth;
1086 if (rotateAmt == 0)
1087 return *this;
1088 return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
1089 }
1090
1091 // Square Root - this method computes and returns the square root of "this".
1092 // Three mechanisms are used for computation. For small values (<= 5 bits),
1093 // a table lookup is done. This gets some performance for common cases. For
1094 // values using less than 52 bits, the value is converted to double and then
1095 // the libc sqrt function is called. The result is rounded and then converted
1096 // back to a uint64_t which is then used to construct the result. Finally,
1097 // the Babylonian method for computing square roots is used.
sqrt() const1098 APInt APInt::sqrt() const {
1099
1100 // Determine the magnitude of the value.
1101 unsigned magnitude = getActiveBits();
1102
1103 // Use a fast table for some small values. This also gets rid of some
1104 // rounding errors in libc sqrt for small values.
1105 if (magnitude <= 5) {
1106 static const uint8_t results[32] = {
1107 /* 0 */ 0,
1108 /* 1- 2 */ 1, 1,
1109 /* 3- 6 */ 2, 2, 2, 2,
1110 /* 7-12 */ 3, 3, 3, 3, 3, 3,
1111 /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1112 /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1113 /* 31 */ 6
1114 };
1115 return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]);
1116 }
1117
1118 // If the magnitude of the value fits in less than 52 bits (the precision of
1119 // an IEEE double precision floating point value), then we can use the
1120 // libc sqrt function which will probably use a hardware sqrt computation.
1121 // This should be faster than the algorithm below.
1122 if (magnitude < 52) {
1123 return APInt(BitWidth,
1124 uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL
1125 : U.pVal[0])))));
1126 }
1127
1128 // Okay, all the short cuts are exhausted. We must compute it. The following
1129 // is a classical Babylonian method for computing the square root. This code
1130 // was adapted to APInt from a wikipedia article on such computations.
1131 // See http://www.wikipedia.org/ and go to the page named
1132 // Calculate_an_integer_square_root.
1133 unsigned nbits = BitWidth, i = 4;
1134 APInt testy(BitWidth, 16);
1135 APInt x_old(BitWidth, 1);
1136 APInt x_new(BitWidth, 0);
1137 APInt two(BitWidth, 2);
1138
1139 // Select a good starting value using binary logarithms.
1140 for (;; i += 2, testy = testy.shl(2))
1141 if (i >= nbits || this->ule(testy)) {
1142 x_old = x_old.shl(i / 2);
1143 break;
1144 }
1145
1146 // Use the Babylonian method to arrive at the integer square root:
1147 for (;;) {
1148 x_new = (this->udiv(x_old) + x_old).udiv(two);
1149 if (x_old.ule(x_new))
1150 break;
1151 x_old = x_new;
1152 }
1153
1154 // Make sure we return the closest approximation
1155 // NOTE: The rounding calculation below is correct. It will produce an
1156 // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1157 // determined to be a rounding issue with pari/gp as it begins to use a
1158 // floating point representation after 192 bits. There are no discrepancies
1159 // between this algorithm and pari/gp for bit widths < 192 bits.
1160 APInt square(x_old * x_old);
1161 APInt nextSquare((x_old + 1) * (x_old +1));
1162 if (this->ult(square))
1163 return x_old;
1164 assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
1165 APInt midpoint((nextSquare - square).udiv(two));
1166 APInt offset(*this - square);
1167 if (offset.ult(midpoint))
1168 return x_old;
1169 return x_old + 1;
1170 }
1171
1172 /// Computes the multiplicative inverse of this APInt for a given modulo. The
1173 /// iterative extended Euclidean algorithm is used to solve for this value,
1174 /// however we simplify it to speed up calculating only the inverse, and take
1175 /// advantage of div+rem calculations. We also use some tricks to avoid copying
1176 /// (potentially large) APInts around.
1177 /// WARNING: a value of '0' may be returned,
1178 /// signifying that no multiplicative inverse exists!
multiplicativeInverse(const APInt & modulo) const1179 APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1180 assert(ult(modulo) && "This APInt must be smaller than the modulo");
1181
1182 // Using the properties listed at the following web page (accessed 06/21/08):
1183 // http://www.numbertheory.org/php/euclid.html
1184 // (especially the properties numbered 3, 4 and 9) it can be proved that
1185 // BitWidth bits suffice for all the computations in the algorithm implemented
1186 // below. More precisely, this number of bits suffice if the multiplicative
1187 // inverse exists, but may not suffice for the general extended Euclidean
1188 // algorithm.
1189
1190 APInt r[2] = { modulo, *this };
1191 APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1192 APInt q(BitWidth, 0);
1193
1194 unsigned i;
1195 for (i = 0; r[i^1] != 0; i ^= 1) {
1196 // An overview of the math without the confusing bit-flipping:
1197 // q = r[i-2] / r[i-1]
1198 // r[i] = r[i-2] % r[i-1]
1199 // t[i] = t[i-2] - t[i-1] * q
1200 udivrem(r[i], r[i^1], q, r[i]);
1201 t[i] -= t[i^1] * q;
1202 }
1203
1204 // If this APInt and the modulo are not coprime, there is no multiplicative
1205 // inverse, so return 0. We check this by looking at the next-to-last
1206 // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1207 // algorithm.
1208 if (r[i] != 1)
1209 return APInt(BitWidth, 0);
1210
1211 // The next-to-last t is the multiplicative inverse. However, we are
1212 // interested in a positive inverse. Calculate a positive one from a negative
1213 // one if necessary. A simple addition of the modulo suffices because
1214 // abs(t[i]) is known to be less than *this/2 (see the link above).
1215 if (t[i].isNegative())
1216 t[i] += modulo;
1217
1218 return std::move(t[i]);
1219 }
1220
1221 /// Calculate the magic numbers required to implement a signed integer division
1222 /// by a constant as a sequence of multiplies, adds and shifts. Requires that
1223 /// the divisor not be 0, 1, or -1. Taken from "Hacker's Delight", Henry S.
1224 /// Warren, Jr., chapter 10.
magic() const1225 APInt::ms APInt::magic() const {
1226 const APInt& d = *this;
1227 unsigned p;
1228 APInt ad, anc, delta, q1, r1, q2, r2, t;
1229 APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1230 struct ms mag;
1231
1232 ad = d.abs();
1233 t = signedMin + (d.lshr(d.getBitWidth() - 1));
1234 anc = t - 1 - t.urem(ad); // absolute value of nc
1235 p = d.getBitWidth() - 1; // initialize p
1236 q1 = signedMin.udiv(anc); // initialize q1 = 2p/abs(nc)
1237 r1 = signedMin - q1*anc; // initialize r1 = rem(2p,abs(nc))
1238 q2 = signedMin.udiv(ad); // initialize q2 = 2p/abs(d)
1239 r2 = signedMin - q2*ad; // initialize r2 = rem(2p,abs(d))
1240 do {
1241 p = p + 1;
1242 q1 = q1<<1; // update q1 = 2p/abs(nc)
1243 r1 = r1<<1; // update r1 = rem(2p/abs(nc))
1244 if (r1.uge(anc)) { // must be unsigned comparison
1245 q1 = q1 + 1;
1246 r1 = r1 - anc;
1247 }
1248 q2 = q2<<1; // update q2 = 2p/abs(d)
1249 r2 = r2<<1; // update r2 = rem(2p/abs(d))
1250 if (r2.uge(ad)) { // must be unsigned comparison
1251 q2 = q2 + 1;
1252 r2 = r2 - ad;
1253 }
1254 delta = ad - r2;
1255 } while (q1.ult(delta) || (q1 == delta && r1 == 0));
1256
1257 mag.m = q2 + 1;
1258 if (d.isNegative()) mag.m = -mag.m; // resulting magic number
1259 mag.s = p - d.getBitWidth(); // resulting shift
1260 return mag;
1261 }
1262
1263 /// Calculate the magic numbers required to implement an unsigned integer
1264 /// division by a constant as a sequence of multiplies, adds and shifts.
1265 /// Requires that the divisor not be 0. Taken from "Hacker's Delight", Henry
1266 /// S. Warren, Jr., chapter 10.
1267 /// LeadingZeros can be used to simplify the calculation if the upper bits
1268 /// of the divided value are known zero.
magicu(unsigned LeadingZeros) const1269 APInt::mu APInt::magicu(unsigned LeadingZeros) const {
1270 const APInt& d = *this;
1271 unsigned p;
1272 APInt nc, delta, q1, r1, q2, r2;
1273 struct mu magu;
1274 magu.a = 0; // initialize "add" indicator
1275 APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
1276 APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1277 APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1278
1279 nc = allOnes - (allOnes - d).urem(d);
1280 p = d.getBitWidth() - 1; // initialize p
1281 q1 = signedMin.udiv(nc); // initialize q1 = 2p/nc
1282 r1 = signedMin - q1*nc; // initialize r1 = rem(2p,nc)
1283 q2 = signedMax.udiv(d); // initialize q2 = (2p-1)/d
1284 r2 = signedMax - q2*d; // initialize r2 = rem((2p-1),d)
1285 do {
1286 p = p + 1;
1287 if (r1.uge(nc - r1)) {
1288 q1 = q1 + q1 + 1; // update q1
1289 r1 = r1 + r1 - nc; // update r1
1290 }
1291 else {
1292 q1 = q1+q1; // update q1
1293 r1 = r1+r1; // update r1
1294 }
1295 if ((r2 + 1).uge(d - r2)) {
1296 if (q2.uge(signedMax)) magu.a = 1;
1297 q2 = q2+q2 + 1; // update q2
1298 r2 = r2+r2 + 1 - d; // update r2
1299 }
1300 else {
1301 if (q2.uge(signedMin)) magu.a = 1;
1302 q2 = q2+q2; // update q2
1303 r2 = r2+r2 + 1; // update r2
1304 }
1305 delta = d - 1 - r2;
1306 } while (p < d.getBitWidth()*2 &&
1307 (q1.ult(delta) || (q1 == delta && r1 == 0)));
1308 magu.m = q2 + 1; // resulting magic number
1309 magu.s = p - d.getBitWidth(); // resulting shift
1310 return magu;
1311 }
1312
1313 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1314 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1315 /// variables here have the same names as in the algorithm. Comments explain
1316 /// the algorithm and any deviation from it.
KnuthDiv(uint32_t * u,uint32_t * v,uint32_t * q,uint32_t * r,unsigned m,unsigned n)1317 static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
1318 unsigned m, unsigned n) {
1319 assert(u && "Must provide dividend");
1320 assert(v && "Must provide divisor");
1321 assert(q && "Must provide quotient");
1322 assert(u != v && u != q && v != q && "Must use different memory");
1323 assert(n>1 && "n must be > 1");
1324
1325 // b denotes the base of the number system. In our case b is 2^32.
1326 const uint64_t b = uint64_t(1) << 32;
1327
1328 // The DEBUG macros here tend to be spam in the debug output if you're not
1329 // debugging this code. Disable them unless KNUTH_DEBUG is defined.
1330 #ifdef KNUTH_DEBUG
1331 #define DEBUG_KNUTH(X) LLVM_DEBUG(X)
1332 #else
1333 #define DEBUG_KNUTH(X) do {} while(false)
1334 #endif
1335
1336 DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1337 DEBUG_KNUTH(dbgs() << "KnuthDiv: original:");
1338 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1339 DEBUG_KNUTH(dbgs() << " by");
1340 DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1341 DEBUG_KNUTH(dbgs() << '\n');
1342 // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1343 // u and v by d. Note that we have taken Knuth's advice here to use a power
1344 // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1345 // 2 allows us to shift instead of multiply and it is easy to determine the
1346 // shift amount from the leading zeros. We are basically normalizing the u
1347 // and v so that its high bits are shifted to the top of v's range without
1348 // overflow. Note that this can require an extra word in u so that u must
1349 // be of length m+n+1.
1350 unsigned shift = countLeadingZeros(v[n-1]);
1351 uint32_t v_carry = 0;
1352 uint32_t u_carry = 0;
1353 if (shift) {
1354 for (unsigned i = 0; i < m+n; ++i) {
1355 uint32_t u_tmp = u[i] >> (32 - shift);
1356 u[i] = (u[i] << shift) | u_carry;
1357 u_carry = u_tmp;
1358 }
1359 for (unsigned i = 0; i < n; ++i) {
1360 uint32_t v_tmp = v[i] >> (32 - shift);
1361 v[i] = (v[i] << shift) | v_carry;
1362 v_carry = v_tmp;
1363 }
1364 }
1365 u[m+n] = u_carry;
1366
1367 DEBUG_KNUTH(dbgs() << "KnuthDiv: normal:");
1368 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1369 DEBUG_KNUTH(dbgs() << " by");
1370 DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1371 DEBUG_KNUTH(dbgs() << '\n');
1372
1373 // D2. [Initialize j.] Set j to m. This is the loop counter over the places.
1374 int j = m;
1375 do {
1376 DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1377 // D3. [Calculate q'.].
1378 // Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1379 // Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1380 // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1381 // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
1382 // on v[n-2] determines at high speed most of the cases in which the trial
1383 // value qp is one too large, and it eliminates all cases where qp is two
1384 // too large.
1385 uint64_t dividend = Make_64(u[j+n], u[j+n-1]);
1386 DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1387 uint64_t qp = dividend / v[n-1];
1388 uint64_t rp = dividend % v[n-1];
1389 if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1390 qp--;
1391 rp += v[n-1];
1392 if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1393 qp--;
1394 }
1395 DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1396
1397 // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1398 // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1399 // consists of a simple multiplication by a one-place number, combined with
1400 // a subtraction.
1401 // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1402 // this step is actually negative, (u[j+n]...u[j]) should be left as the
1403 // true value plus b**(n+1), namely as the b's complement of
1404 // the true value, and a "borrow" to the left should be remembered.
1405 int64_t borrow = 0;
1406 for (unsigned i = 0; i < n; ++i) {
1407 uint64_t p = uint64_t(qp) * uint64_t(v[i]);
1408 int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p);
1409 u[j+i] = Lo_32(subres);
1410 borrow = Hi_32(p) - Hi_32(subres);
1411 DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i]
1412 << ", borrow = " << borrow << '\n');
1413 }
1414 bool isNeg = u[j+n] < borrow;
1415 u[j+n] -= Lo_32(borrow);
1416
1417 DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:");
1418 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1419 DEBUG_KNUTH(dbgs() << '\n');
1420
1421 // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1422 // negative, go to step D6; otherwise go on to step D7.
1423 q[j] = Lo_32(qp);
1424 if (isNeg) {
1425 // D6. [Add back]. The probability that this step is necessary is very
1426 // small, on the order of only 2/b. Make sure that test data accounts for
1427 // this possibility. Decrease q[j] by 1
1428 q[j]--;
1429 // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1430 // A carry will occur to the left of u[j+n], and it should be ignored
1431 // since it cancels with the borrow that occurred in D4.
1432 bool carry = false;
1433 for (unsigned i = 0; i < n; i++) {
1434 uint32_t limit = std::min(u[j+i],v[i]);
1435 u[j+i] += v[i] + carry;
1436 carry = u[j+i] < limit || (carry && u[j+i] == limit);
1437 }
1438 u[j+n] += carry;
1439 }
1440 DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:");
1441 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1442 DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1443
1444 // D7. [Loop on j.] Decrease j by one. Now if j >= 0, go back to D3.
1445 } while (--j >= 0);
1446
1447 DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:");
1448 DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]);
1449 DEBUG_KNUTH(dbgs() << '\n');
1450
1451 // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1452 // remainder may be obtained by dividing u[...] by d. If r is non-null we
1453 // compute the remainder (urem uses this).
1454 if (r) {
1455 // The value d is expressed by the "shift" value above since we avoided
1456 // multiplication by d by using a shift left. So, all we have to do is
1457 // shift right here.
1458 if (shift) {
1459 uint32_t carry = 0;
1460 DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:");
1461 for (int i = n-1; i >= 0; i--) {
1462 r[i] = (u[i] >> shift) | carry;
1463 carry = u[i] << (32 - shift);
1464 DEBUG_KNUTH(dbgs() << " " << r[i]);
1465 }
1466 } else {
1467 for (int i = n-1; i >= 0; i--) {
1468 r[i] = u[i];
1469 DEBUG_KNUTH(dbgs() << " " << r[i]);
1470 }
1471 }
1472 DEBUG_KNUTH(dbgs() << '\n');
1473 }
1474 DEBUG_KNUTH(dbgs() << '\n');
1475 }
1476
divide(const WordType * LHS,unsigned lhsWords,const WordType * RHS,unsigned rhsWords,WordType * Quotient,WordType * Remainder)1477 void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS,
1478 unsigned rhsWords, WordType *Quotient, WordType *Remainder) {
1479 assert(lhsWords >= rhsWords && "Fractional result");
1480
1481 // First, compose the values into an array of 32-bit words instead of
1482 // 64-bit words. This is a necessity of both the "short division" algorithm
1483 // and the Knuth "classical algorithm" which requires there to be native
1484 // operations for +, -, and * on an m bit value with an m*2 bit result. We
1485 // can't use 64-bit operands here because we don't have native results of
1486 // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1487 // work on large-endian machines.
1488 unsigned n = rhsWords * 2;
1489 unsigned m = (lhsWords * 2) - n;
1490
1491 // Allocate space for the temporary values we need either on the stack, if
1492 // it will fit, or on the heap if it won't.
1493 uint32_t SPACE[128];
1494 uint32_t *U = nullptr;
1495 uint32_t *V = nullptr;
1496 uint32_t *Q = nullptr;
1497 uint32_t *R = nullptr;
1498 if ((Remainder?4:3)*n+2*m+1 <= 128) {
1499 U = &SPACE[0];
1500 V = &SPACE[m+n+1];
1501 Q = &SPACE[(m+n+1) + n];
1502 if (Remainder)
1503 R = &SPACE[(m+n+1) + n + (m+n)];
1504 } else {
1505 U = new uint32_t[m + n + 1];
1506 V = new uint32_t[n];
1507 Q = new uint32_t[m+n];
1508 if (Remainder)
1509 R = new uint32_t[n];
1510 }
1511
1512 // Initialize the dividend
1513 memset(U, 0, (m+n+1)*sizeof(uint32_t));
1514 for (unsigned i = 0; i < lhsWords; ++i) {
1515 uint64_t tmp = LHS[i];
1516 U[i * 2] = Lo_32(tmp);
1517 U[i * 2 + 1] = Hi_32(tmp);
1518 }
1519 U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1520
1521 // Initialize the divisor
1522 memset(V, 0, (n)*sizeof(uint32_t));
1523 for (unsigned i = 0; i < rhsWords; ++i) {
1524 uint64_t tmp = RHS[i];
1525 V[i * 2] = Lo_32(tmp);
1526 V[i * 2 + 1] = Hi_32(tmp);
1527 }
1528
1529 // initialize the quotient and remainder
1530 memset(Q, 0, (m+n) * sizeof(uint32_t));
1531 if (Remainder)
1532 memset(R, 0, n * sizeof(uint32_t));
1533
1534 // Now, adjust m and n for the Knuth division. n is the number of words in
1535 // the divisor. m is the number of words by which the dividend exceeds the
1536 // divisor (i.e. m+n is the length of the dividend). These sizes must not
1537 // contain any zero words or the Knuth algorithm fails.
1538 for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1539 n--;
1540 m++;
1541 }
1542 for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1543 m--;
1544
1545 // If we're left with only a single word for the divisor, Knuth doesn't work
1546 // so we implement the short division algorithm here. This is much simpler
1547 // and faster because we are certain that we can divide a 64-bit quantity
1548 // by a 32-bit quantity at hardware speed and short division is simply a
1549 // series of such operations. This is just like doing short division but we
1550 // are using base 2^32 instead of base 10.
1551 assert(n != 0 && "Divide by zero?");
1552 if (n == 1) {
1553 uint32_t divisor = V[0];
1554 uint32_t remainder = 0;
1555 for (int i = m; i >= 0; i--) {
1556 uint64_t partial_dividend = Make_64(remainder, U[i]);
1557 if (partial_dividend == 0) {
1558 Q[i] = 0;
1559 remainder = 0;
1560 } else if (partial_dividend < divisor) {
1561 Q[i] = 0;
1562 remainder = Lo_32(partial_dividend);
1563 } else if (partial_dividend == divisor) {
1564 Q[i] = 1;
1565 remainder = 0;
1566 } else {
1567 Q[i] = Lo_32(partial_dividend / divisor);
1568 remainder = Lo_32(partial_dividend - (Q[i] * divisor));
1569 }
1570 }
1571 if (R)
1572 R[0] = remainder;
1573 } else {
1574 // Now we're ready to invoke the Knuth classical divide algorithm. In this
1575 // case n > 1.
1576 KnuthDiv(U, V, Q, R, m, n);
1577 }
1578
1579 // If the caller wants the quotient
1580 if (Quotient) {
1581 for (unsigned i = 0; i < lhsWords; ++i)
1582 Quotient[i] = Make_64(Q[i*2+1], Q[i*2]);
1583 }
1584
1585 // If the caller wants the remainder
1586 if (Remainder) {
1587 for (unsigned i = 0; i < rhsWords; ++i)
1588 Remainder[i] = Make_64(R[i*2+1], R[i*2]);
1589 }
1590
1591 // Clean up the memory we allocated.
1592 if (U != &SPACE[0]) {
1593 delete [] U;
1594 delete [] V;
1595 delete [] Q;
1596 delete [] R;
1597 }
1598 }
1599
udiv(const APInt & RHS) const1600 APInt APInt::udiv(const APInt &RHS) const {
1601 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1602
1603 // First, deal with the easy case
1604 if (isSingleWord()) {
1605 assert(RHS.U.VAL != 0 && "Divide by zero?");
1606 return APInt(BitWidth, U.VAL / RHS.U.VAL);
1607 }
1608
1609 // Get some facts about the LHS and RHS number of bits and words
1610 unsigned lhsWords = getNumWords(getActiveBits());
1611 unsigned rhsBits = RHS.getActiveBits();
1612 unsigned rhsWords = getNumWords(rhsBits);
1613 assert(rhsWords && "Divided by zero???");
1614
1615 // Deal with some degenerate cases
1616 if (!lhsWords)
1617 // 0 / X ===> 0
1618 return APInt(BitWidth, 0);
1619 if (rhsBits == 1)
1620 // X / 1 ===> X
1621 return *this;
1622 if (lhsWords < rhsWords || this->ult(RHS))
1623 // X / Y ===> 0, iff X < Y
1624 return APInt(BitWidth, 0);
1625 if (*this == RHS)
1626 // X / X ===> 1
1627 return APInt(BitWidth, 1);
1628 if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1629 // All high words are zero, just use native divide
1630 return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]);
1631
1632 // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1633 APInt Quotient(BitWidth, 0); // to hold result.
1634 divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr);
1635 return Quotient;
1636 }
1637
udiv(uint64_t RHS) const1638 APInt APInt::udiv(uint64_t RHS) const {
1639 assert(RHS != 0 && "Divide by zero?");
1640
1641 // First, deal with the easy case
1642 if (isSingleWord())
1643 return APInt(BitWidth, U.VAL / RHS);
1644
1645 // Get some facts about the LHS words.
1646 unsigned lhsWords = getNumWords(getActiveBits());
1647
1648 // Deal with some degenerate cases
1649 if (!lhsWords)
1650 // 0 / X ===> 0
1651 return APInt(BitWidth, 0);
1652 if (RHS == 1)
1653 // X / 1 ===> X
1654 return *this;
1655 if (this->ult(RHS))
1656 // X / Y ===> 0, iff X < Y
1657 return APInt(BitWidth, 0);
1658 if (*this == RHS)
1659 // X / X ===> 1
1660 return APInt(BitWidth, 1);
1661 if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1662 // All high words are zero, just use native divide
1663 return APInt(BitWidth, this->U.pVal[0] / RHS);
1664
1665 // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1666 APInt Quotient(BitWidth, 0); // to hold result.
1667 divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr);
1668 return Quotient;
1669 }
1670
sdiv(const APInt & RHS) const1671 APInt APInt::sdiv(const APInt &RHS) const {
1672 if (isNegative()) {
1673 if (RHS.isNegative())
1674 return (-(*this)).udiv(-RHS);
1675 return -((-(*this)).udiv(RHS));
1676 }
1677 if (RHS.isNegative())
1678 return -(this->udiv(-RHS));
1679 return this->udiv(RHS);
1680 }
1681
sdiv(int64_t RHS) const1682 APInt APInt::sdiv(int64_t RHS) const {
1683 if (isNegative()) {
1684 if (RHS < 0)
1685 return (-(*this)).udiv(-RHS);
1686 return -((-(*this)).udiv(RHS));
1687 }
1688 if (RHS < 0)
1689 return -(this->udiv(-RHS));
1690 return this->udiv(RHS);
1691 }
1692
urem(const APInt & RHS) const1693 APInt APInt::urem(const APInt &RHS) const {
1694 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1695 if (isSingleWord()) {
1696 assert(RHS.U.VAL != 0 && "Remainder by zero?");
1697 return APInt(BitWidth, U.VAL % RHS.U.VAL);
1698 }
1699
1700 // Get some facts about the LHS
1701 unsigned lhsWords = getNumWords(getActiveBits());
1702
1703 // Get some facts about the RHS
1704 unsigned rhsBits = RHS.getActiveBits();
1705 unsigned rhsWords = getNumWords(rhsBits);
1706 assert(rhsWords && "Performing remainder operation by zero ???");
1707
1708 // Check the degenerate cases
1709 if (lhsWords == 0)
1710 // 0 % Y ===> 0
1711 return APInt(BitWidth, 0);
1712 if (rhsBits == 1)
1713 // X % 1 ===> 0
1714 return APInt(BitWidth, 0);
1715 if (lhsWords < rhsWords || this->ult(RHS))
1716 // X % Y ===> X, iff X < Y
1717 return *this;
1718 if (*this == RHS)
1719 // X % X == 0;
1720 return APInt(BitWidth, 0);
1721 if (lhsWords == 1)
1722 // All high words are zero, just use native remainder
1723 return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]);
1724
1725 // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1726 APInt Remainder(BitWidth, 0);
1727 divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal);
1728 return Remainder;
1729 }
1730
urem(uint64_t RHS) const1731 uint64_t APInt::urem(uint64_t RHS) const {
1732 assert(RHS != 0 && "Remainder by zero?");
1733
1734 if (isSingleWord())
1735 return U.VAL % RHS;
1736
1737 // Get some facts about the LHS
1738 unsigned lhsWords = getNumWords(getActiveBits());
1739
1740 // Check the degenerate cases
1741 if (lhsWords == 0)
1742 // 0 % Y ===> 0
1743 return 0;
1744 if (RHS == 1)
1745 // X % 1 ===> 0
1746 return 0;
1747 if (this->ult(RHS))
1748 // X % Y ===> X, iff X < Y
1749 return getZExtValue();
1750 if (*this == RHS)
1751 // X % X == 0;
1752 return 0;
1753 if (lhsWords == 1)
1754 // All high words are zero, just use native remainder
1755 return U.pVal[0] % RHS;
1756
1757 // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1758 uint64_t Remainder;
1759 divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder);
1760 return Remainder;
1761 }
1762
srem(const APInt & RHS) const1763 APInt APInt::srem(const APInt &RHS) const {
1764 if (isNegative()) {
1765 if (RHS.isNegative())
1766 return -((-(*this)).urem(-RHS));
1767 return -((-(*this)).urem(RHS));
1768 }
1769 if (RHS.isNegative())
1770 return this->urem(-RHS);
1771 return this->urem(RHS);
1772 }
1773
srem(int64_t RHS) const1774 int64_t APInt::srem(int64_t RHS) const {
1775 if (isNegative()) {
1776 if (RHS < 0)
1777 return -((-(*this)).urem(-RHS));
1778 return -((-(*this)).urem(RHS));
1779 }
1780 if (RHS < 0)
1781 return this->urem(-RHS);
1782 return this->urem(RHS);
1783 }
1784
udivrem(const APInt & LHS,const APInt & RHS,APInt & Quotient,APInt & Remainder)1785 void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1786 APInt &Quotient, APInt &Remainder) {
1787 assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");
1788 unsigned BitWidth = LHS.BitWidth;
1789
1790 // First, deal with the easy case
1791 if (LHS.isSingleWord()) {
1792 assert(RHS.U.VAL != 0 && "Divide by zero?");
1793 uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL;
1794 uint64_t RemVal = LHS.U.VAL % RHS.U.VAL;
1795 Quotient = APInt(BitWidth, QuotVal);
1796 Remainder = APInt(BitWidth, RemVal);
1797 return;
1798 }
1799
1800 // Get some size facts about the dividend and divisor
1801 unsigned lhsWords = getNumWords(LHS.getActiveBits());
1802 unsigned rhsBits = RHS.getActiveBits();
1803 unsigned rhsWords = getNumWords(rhsBits);
1804 assert(rhsWords && "Performing divrem operation by zero ???");
1805
1806 // Check the degenerate cases
1807 if (lhsWords == 0) {
1808 Quotient = APInt(BitWidth, 0); // 0 / Y ===> 0
1809 Remainder = APInt(BitWidth, 0); // 0 % Y ===> 0
1810 return;
1811 }
1812
1813 if (rhsBits == 1) {
1814 Quotient = LHS; // X / 1 ===> X
1815 Remainder = APInt(BitWidth, 0); // X % 1 ===> 0
1816 }
1817
1818 if (lhsWords < rhsWords || LHS.ult(RHS)) {
1819 Remainder = LHS; // X % Y ===> X, iff X < Y
1820 Quotient = APInt(BitWidth, 0); // X / Y ===> 0, iff X < Y
1821 return;
1822 }
1823
1824 if (LHS == RHS) {
1825 Quotient = APInt(BitWidth, 1); // X / X ===> 1
1826 Remainder = APInt(BitWidth, 0); // X % X ===> 0;
1827 return;
1828 }
1829
1830 // Make sure there is enough space to hold the results.
1831 // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1832 // change the size. This is necessary if Quotient or Remainder is aliased
1833 // with LHS or RHS.
1834 Quotient.reallocate(BitWidth);
1835 Remainder.reallocate(BitWidth);
1836
1837 if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1838 // There is only one word to consider so use the native versions.
1839 uint64_t lhsValue = LHS.U.pVal[0];
1840 uint64_t rhsValue = RHS.U.pVal[0];
1841 Quotient = lhsValue / rhsValue;
1842 Remainder = lhsValue % rhsValue;
1843 return;
1844 }
1845
1846 // Okay, lets do it the long way
1847 divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal,
1848 Remainder.U.pVal);
1849 // Clear the rest of the Quotient and Remainder.
1850 std::memset(Quotient.U.pVal + lhsWords, 0,
1851 (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1852 std::memset(Remainder.U.pVal + rhsWords, 0,
1853 (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE);
1854 }
1855
udivrem(const APInt & LHS,uint64_t RHS,APInt & Quotient,uint64_t & Remainder)1856 void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
1857 uint64_t &Remainder) {
1858 assert(RHS != 0 && "Divide by zero?");
1859 unsigned BitWidth = LHS.BitWidth;
1860
1861 // First, deal with the easy case
1862 if (LHS.isSingleWord()) {
1863 uint64_t QuotVal = LHS.U.VAL / RHS;
1864 Remainder = LHS.U.VAL % RHS;
1865 Quotient = APInt(BitWidth, QuotVal);
1866 return;
1867 }
1868
1869 // Get some size facts about the dividend and divisor
1870 unsigned lhsWords = getNumWords(LHS.getActiveBits());
1871
1872 // Check the degenerate cases
1873 if (lhsWords == 0) {
1874 Quotient = APInt(BitWidth, 0); // 0 / Y ===> 0
1875 Remainder = 0; // 0 % Y ===> 0
1876 return;
1877 }
1878
1879 if (RHS == 1) {
1880 Quotient = LHS; // X / 1 ===> X
1881 Remainder = 0; // X % 1 ===> 0
1882 return;
1883 }
1884
1885 if (LHS.ult(RHS)) {
1886 Remainder = LHS.getZExtValue(); // X % Y ===> X, iff X < Y
1887 Quotient = APInt(BitWidth, 0); // X / Y ===> 0, iff X < Y
1888 return;
1889 }
1890
1891 if (LHS == RHS) {
1892 Quotient = APInt(BitWidth, 1); // X / X ===> 1
1893 Remainder = 0; // X % X ===> 0;
1894 return;
1895 }
1896
1897 // Make sure there is enough space to hold the results.
1898 // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1899 // change the size. This is necessary if Quotient is aliased with LHS.
1900 Quotient.reallocate(BitWidth);
1901
1902 if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1903 // There is only one word to consider so use the native versions.
1904 uint64_t lhsValue = LHS.U.pVal[0];
1905 Quotient = lhsValue / RHS;
1906 Remainder = lhsValue % RHS;
1907 return;
1908 }
1909
1910 // Okay, lets do it the long way
1911 divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder);
1912 // Clear the rest of the Quotient.
1913 std::memset(Quotient.U.pVal + lhsWords, 0,
1914 (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1915 }
1916
sdivrem(const APInt & LHS,const APInt & RHS,APInt & Quotient,APInt & Remainder)1917 void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
1918 APInt &Quotient, APInt &Remainder) {
1919 if (LHS.isNegative()) {
1920 if (RHS.isNegative())
1921 APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
1922 else {
1923 APInt::udivrem(-LHS, RHS, Quotient, Remainder);
1924 Quotient.negate();
1925 }
1926 Remainder.negate();
1927 } else if (RHS.isNegative()) {
1928 APInt::udivrem(LHS, -RHS, Quotient, Remainder);
1929 Quotient.negate();
1930 } else {
1931 APInt::udivrem(LHS, RHS, Quotient, Remainder);
1932 }
1933 }
1934
sdivrem(const APInt & LHS,int64_t RHS,APInt & Quotient,int64_t & Remainder)1935 void APInt::sdivrem(const APInt &LHS, int64_t RHS,
1936 APInt &Quotient, int64_t &Remainder) {
1937 uint64_t R = Remainder;
1938 if (LHS.isNegative()) {
1939 if (RHS < 0)
1940 APInt::udivrem(-LHS, -RHS, Quotient, R);
1941 else {
1942 APInt::udivrem(-LHS, RHS, Quotient, R);
1943 Quotient.negate();
1944 }
1945 R = -R;
1946 } else if (RHS < 0) {
1947 APInt::udivrem(LHS, -RHS, Quotient, R);
1948 Quotient.negate();
1949 } else {
1950 APInt::udivrem(LHS, RHS, Quotient, R);
1951 }
1952 Remainder = R;
1953 }
1954
sadd_ov(const APInt & RHS,bool & Overflow) const1955 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
1956 APInt Res = *this+RHS;
1957 Overflow = isNonNegative() == RHS.isNonNegative() &&
1958 Res.isNonNegative() != isNonNegative();
1959 return Res;
1960 }
1961
uadd_ov(const APInt & RHS,bool & Overflow) const1962 APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
1963 APInt Res = *this+RHS;
1964 Overflow = Res.ult(RHS);
1965 return Res;
1966 }
1967
ssub_ov(const APInt & RHS,bool & Overflow) const1968 APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
1969 APInt Res = *this - RHS;
1970 Overflow = isNonNegative() != RHS.isNonNegative() &&
1971 Res.isNonNegative() != isNonNegative();
1972 return Res;
1973 }
1974
usub_ov(const APInt & RHS,bool & Overflow) const1975 APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
1976 APInt Res = *this-RHS;
1977 Overflow = Res.ugt(*this);
1978 return Res;
1979 }
1980
sdiv_ov(const APInt & RHS,bool & Overflow) const1981 APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
1982 // MININT/-1 --> overflow.
1983 Overflow = isMinSignedValue() && RHS.isAllOnesValue();
1984 return sdiv(RHS);
1985 }
1986
smul_ov(const APInt & RHS,bool & Overflow) const1987 APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
1988 APInt Res = *this * RHS;
1989
1990 if (*this != 0 && RHS != 0)
1991 Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
1992 else
1993 Overflow = false;
1994 return Res;
1995 }
1996
umul_ov(const APInt & RHS,bool & Overflow) const1997 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
1998 if (countLeadingZeros() + RHS.countLeadingZeros() + 2 <= BitWidth) {
1999 Overflow = true;
2000 return *this * RHS;
2001 }
2002
2003 APInt Res = lshr(1) * RHS;
2004 Overflow = Res.isNegative();
2005 Res <<= 1;
2006 if ((*this)[0]) {
2007 Res += RHS;
2008 if (Res.ult(RHS))
2009 Overflow = true;
2010 }
2011 return Res;
2012 }
2013
sshl_ov(const APInt & ShAmt,bool & Overflow) const2014 APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
2015 Overflow = ShAmt.uge(getBitWidth());
2016 if (Overflow)
2017 return APInt(BitWidth, 0);
2018
2019 if (isNonNegative()) // Don't allow sign change.
2020 Overflow = ShAmt.uge(countLeadingZeros());
2021 else
2022 Overflow = ShAmt.uge(countLeadingOnes());
2023
2024 return *this << ShAmt;
2025 }
2026
ushl_ov(const APInt & ShAmt,bool & Overflow) const2027 APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
2028 Overflow = ShAmt.uge(getBitWidth());
2029 if (Overflow)
2030 return APInt(BitWidth, 0);
2031
2032 Overflow = ShAmt.ugt(countLeadingZeros());
2033
2034 return *this << ShAmt;
2035 }
2036
sadd_sat(const APInt & RHS) const2037 APInt APInt::sadd_sat(const APInt &RHS) const {
2038 bool Overflow;
2039 APInt Res = sadd_ov(RHS, Overflow);
2040 if (!Overflow)
2041 return Res;
2042
2043 return isNegative() ? APInt::getSignedMinValue(BitWidth)
2044 : APInt::getSignedMaxValue(BitWidth);
2045 }
2046
uadd_sat(const APInt & RHS) const2047 APInt APInt::uadd_sat(const APInt &RHS) const {
2048 bool Overflow;
2049 APInt Res = uadd_ov(RHS, Overflow);
2050 if (!Overflow)
2051 return Res;
2052
2053 return APInt::getMaxValue(BitWidth);
2054 }
2055
ssub_sat(const APInt & RHS) const2056 APInt APInt::ssub_sat(const APInt &RHS) const {
2057 bool Overflow;
2058 APInt Res = ssub_ov(RHS, Overflow);
2059 if (!Overflow)
2060 return Res;
2061
2062 return isNegative() ? APInt::getSignedMinValue(BitWidth)
2063 : APInt::getSignedMaxValue(BitWidth);
2064 }
2065
usub_sat(const APInt & RHS) const2066 APInt APInt::usub_sat(const APInt &RHS) const {
2067 bool Overflow;
2068 APInt Res = usub_ov(RHS, Overflow);
2069 if (!Overflow)
2070 return Res;
2071
2072 return APInt(BitWidth, 0);
2073 }
2074
smul_sat(const APInt & RHS) const2075 APInt APInt::smul_sat(const APInt &RHS) const {
2076 bool Overflow;
2077 APInt Res = smul_ov(RHS, Overflow);
2078 if (!Overflow)
2079 return Res;
2080
2081 // The result is negative if one and only one of inputs is negative.
2082 bool ResIsNegative = isNegative() ^ RHS.isNegative();
2083
2084 return ResIsNegative ? APInt::getSignedMinValue(BitWidth)
2085 : APInt::getSignedMaxValue(BitWidth);
2086 }
2087
umul_sat(const APInt & RHS) const2088 APInt APInt::umul_sat(const APInt &RHS) const {
2089 bool Overflow;
2090 APInt Res = umul_ov(RHS, Overflow);
2091 if (!Overflow)
2092 return Res;
2093
2094 return APInt::getMaxValue(BitWidth);
2095 }
2096
sshl_sat(const APInt & RHS) const2097 APInt APInt::sshl_sat(const APInt &RHS) const {
2098 bool Overflow;
2099 APInt Res = sshl_ov(RHS, Overflow);
2100 if (!Overflow)
2101 return Res;
2102
2103 return isNegative() ? APInt::getSignedMinValue(BitWidth)
2104 : APInt::getSignedMaxValue(BitWidth);
2105 }
2106
ushl_sat(const APInt & RHS) const2107 APInt APInt::ushl_sat(const APInt &RHS) const {
2108 bool Overflow;
2109 APInt Res = ushl_ov(RHS, Overflow);
2110 if (!Overflow)
2111 return Res;
2112
2113 return APInt::getMaxValue(BitWidth);
2114 }
2115
fromString(unsigned numbits,StringRef str,uint8_t radix)2116 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
2117 // Check our assumptions here
2118 assert(!str.empty() && "Invalid string length");
2119 assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
2120 radix == 36) &&
2121 "Radix should be 2, 8, 10, 16, or 36!");
2122
2123 StringRef::iterator p = str.begin();
2124 size_t slen = str.size();
2125 bool isNeg = *p == '-';
2126 if (*p == '-' || *p == '+') {
2127 p++;
2128 slen--;
2129 assert(slen && "String is only a sign, needs a value.");
2130 }
2131 assert((slen <= numbits || radix != 2) && "Insufficient bit width");
2132 assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
2133 assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
2134 assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
2135 "Insufficient bit width");
2136
2137 // Allocate memory if needed
2138 if (isSingleWord())
2139 U.VAL = 0;
2140 else
2141 U.pVal = getClearedMemory(getNumWords());
2142
2143 // Figure out if we can shift instead of multiply
2144 unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
2145
2146 // Enter digit traversal loop
2147 for (StringRef::iterator e = str.end(); p != e; ++p) {
2148 unsigned digit = getDigit(*p, radix);
2149 assert(digit < radix && "Invalid character in digit string");
2150
2151 // Shift or multiply the value by the radix
2152 if (slen > 1) {
2153 if (shift)
2154 *this <<= shift;
2155 else
2156 *this *= radix;
2157 }
2158
2159 // Add in the digit we just interpreted
2160 *this += digit;
2161 }
2162 // If its negative, put it in two's complement form
2163 if (isNeg)
2164 this->negate();
2165 }
2166
toString(SmallVectorImpl<char> & Str,unsigned Radix,bool Signed,bool formatAsCLiteral) const2167 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2168 bool Signed, bool formatAsCLiteral) const {
2169 assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
2170 Radix == 36) &&
2171 "Radix should be 2, 8, 10, 16, or 36!");
2172
2173 const char *Prefix = "";
2174 if (formatAsCLiteral) {
2175 switch (Radix) {
2176 case 2:
2177 // Binary literals are a non-standard extension added in gcc 4.3:
2178 // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2179 Prefix = "0b";
2180 break;
2181 case 8:
2182 Prefix = "0";
2183 break;
2184 case 10:
2185 break; // No prefix
2186 case 16:
2187 Prefix = "0x";
2188 break;
2189 default:
2190 llvm_unreachable("Invalid radix!");
2191 }
2192 }
2193
2194 // First, check for a zero value and just short circuit the logic below.
2195 if (*this == 0) {
2196 while (*Prefix) {
2197 Str.push_back(*Prefix);
2198 ++Prefix;
2199 };
2200 Str.push_back('0');
2201 return;
2202 }
2203
2204 static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2205
2206 if (isSingleWord()) {
2207 char Buffer[65];
2208 char *BufPtr = std::end(Buffer);
2209
2210 uint64_t N;
2211 if (!Signed) {
2212 N = getZExtValue();
2213 } else {
2214 int64_t I = getSExtValue();
2215 if (I >= 0) {
2216 N = I;
2217 } else {
2218 Str.push_back('-');
2219 N = -(uint64_t)I;
2220 }
2221 }
2222
2223 while (*Prefix) {
2224 Str.push_back(*Prefix);
2225 ++Prefix;
2226 };
2227
2228 while (N) {
2229 *--BufPtr = Digits[N % Radix];
2230 N /= Radix;
2231 }
2232 Str.append(BufPtr, std::end(Buffer));
2233 return;
2234 }
2235
2236 APInt Tmp(*this);
2237
2238 if (Signed && isNegative()) {
2239 // They want to print the signed version and it is a negative value
2240 // Flip the bits and add one to turn it into the equivalent positive
2241 // value and put a '-' in the result.
2242 Tmp.negate();
2243 Str.push_back('-');
2244 }
2245
2246 while (*Prefix) {
2247 Str.push_back(*Prefix);
2248 ++Prefix;
2249 };
2250
2251 // We insert the digits backward, then reverse them to get the right order.
2252 unsigned StartDig = Str.size();
2253
2254 // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2255 // because the number of bits per digit (1, 3 and 4 respectively) divides
2256 // equally. We just shift until the value is zero.
2257 if (Radix == 2 || Radix == 8 || Radix == 16) {
2258 // Just shift tmp right for each digit width until it becomes zero
2259 unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2260 unsigned MaskAmt = Radix - 1;
2261
2262 while (Tmp.getBoolValue()) {
2263 unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2264 Str.push_back(Digits[Digit]);
2265 Tmp.lshrInPlace(ShiftAmt);
2266 }
2267 } else {
2268 while (Tmp.getBoolValue()) {
2269 uint64_t Digit;
2270 udivrem(Tmp, Radix, Tmp, Digit);
2271 assert(Digit < Radix && "divide failed");
2272 Str.push_back(Digits[Digit]);
2273 }
2274 }
2275
2276 // Reverse the digits before returning.
2277 std::reverse(Str.begin()+StartDig, Str.end());
2278 }
2279
2280 /// Returns the APInt as a std::string. Note that this is an inefficient method.
2281 /// It is better to pass in a SmallVector/SmallString to the methods above.
toString(unsigned Radix=10,bool Signed=true) const2282 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2283 SmallString<40> S;
2284 toString(S, Radix, Signed, /* formatAsCLiteral = */false);
2285 return std::string(S.str());
2286 }
2287
2288 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dump() const2289 LLVM_DUMP_METHOD void APInt::dump() const {
2290 SmallString<40> S, U;
2291 this->toStringUnsigned(U);
2292 this->toStringSigned(S);
2293 dbgs() << "APInt(" << BitWidth << "b, "
2294 << U << "u " << S << "s)\n";
2295 }
2296 #endif
2297
print(raw_ostream & OS,bool isSigned) const2298 void APInt::print(raw_ostream &OS, bool isSigned) const {
2299 SmallString<40> S;
2300 this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
2301 OS << S;
2302 }
2303
2304 // This implements a variety of operations on a representation of
2305 // arbitrary precision, two's-complement, bignum integer values.
2306
2307 // Assumed by lowHalf, highHalf, partMSB and partLSB. A fairly safe
2308 // and unrestricting assumption.
2309 static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
2310 "Part width must be divisible by 2!");
2311
2312 /* Some handy functions local to this file. */
2313
2314 /* Returns the integer part with the least significant BITS set.
2315 BITS cannot be zero. */
lowBitMask(unsigned bits)2316 static inline APInt::WordType lowBitMask(unsigned bits) {
2317 assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
2318
2319 return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
2320 }
2321
2322 /* Returns the value of the lower half of PART. */
lowHalf(APInt::WordType part)2323 static inline APInt::WordType lowHalf(APInt::WordType part) {
2324 return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
2325 }
2326
2327 /* Returns the value of the upper half of PART. */
highHalf(APInt::WordType part)2328 static inline APInt::WordType highHalf(APInt::WordType part) {
2329 return part >> (APInt::APINT_BITS_PER_WORD / 2);
2330 }
2331
2332 /* Returns the bit number of the most significant set bit of a part.
2333 If the input number has no bits set -1U is returned. */
partMSB(APInt::WordType value)2334 static unsigned partMSB(APInt::WordType value) {
2335 return findLastSet(value, ZB_Max);
2336 }
2337
2338 /* Returns the bit number of the least significant set bit of a
2339 part. If the input number has no bits set -1U is returned. */
partLSB(APInt::WordType value)2340 static unsigned partLSB(APInt::WordType value) {
2341 return findFirstSet(value, ZB_Max);
2342 }
2343
2344 /* Sets the least significant part of a bignum to the input value, and
2345 zeroes out higher parts. */
tcSet(WordType * dst,WordType part,unsigned parts)2346 void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
2347 assert(parts > 0);
2348
2349 dst[0] = part;
2350 for (unsigned i = 1; i < parts; i++)
2351 dst[i] = 0;
2352 }
2353
2354 /* Assign one bignum to another. */
tcAssign(WordType * dst,const WordType * src,unsigned parts)2355 void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
2356 for (unsigned i = 0; i < parts; i++)
2357 dst[i] = src[i];
2358 }
2359
2360 /* Returns true if a bignum is zero, false otherwise. */
tcIsZero(const WordType * src,unsigned parts)2361 bool APInt::tcIsZero(const WordType *src, unsigned parts) {
2362 for (unsigned i = 0; i < parts; i++)
2363 if (src[i])
2364 return false;
2365
2366 return true;
2367 }
2368
2369 /* Extract the given bit of a bignum; returns 0 or 1. */
tcExtractBit(const WordType * parts,unsigned bit)2370 int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
2371 return (parts[whichWord(bit)] & maskBit(bit)) != 0;
2372 }
2373
2374 /* Set the given bit of a bignum. */
tcSetBit(WordType * parts,unsigned bit)2375 void APInt::tcSetBit(WordType *parts, unsigned bit) {
2376 parts[whichWord(bit)] |= maskBit(bit);
2377 }
2378
2379 /* Clears the given bit of a bignum. */
tcClearBit(WordType * parts,unsigned bit)2380 void APInt::tcClearBit(WordType *parts, unsigned bit) {
2381 parts[whichWord(bit)] &= ~maskBit(bit);
2382 }
2383
2384 /* Returns the bit number of the least significant set bit of a
2385 number. If the input number has no bits set -1U is returned. */
tcLSB(const WordType * parts,unsigned n)2386 unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
2387 for (unsigned i = 0; i < n; i++) {
2388 if (parts[i] != 0) {
2389 unsigned lsb = partLSB(parts[i]);
2390
2391 return lsb + i * APINT_BITS_PER_WORD;
2392 }
2393 }
2394
2395 return -1U;
2396 }
2397
2398 /* Returns the bit number of the most significant set bit of a number.
2399 If the input number has no bits set -1U is returned. */
tcMSB(const WordType * parts,unsigned n)2400 unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
2401 do {
2402 --n;
2403
2404 if (parts[n] != 0) {
2405 unsigned msb = partMSB(parts[n]);
2406
2407 return msb + n * APINT_BITS_PER_WORD;
2408 }
2409 } while (n);
2410
2411 return -1U;
2412 }
2413
2414 /* Copy the bit vector of width srcBITS from SRC, starting at bit
2415 srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2416 the least significant bit of DST. All high bits above srcBITS in
2417 DST are zero-filled. */
2418 void
tcExtract(WordType * dst,unsigned dstCount,const WordType * src,unsigned srcBits,unsigned srcLSB)2419 APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
2420 unsigned srcBits, unsigned srcLSB) {
2421 unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
2422 assert(dstParts <= dstCount);
2423
2424 unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
2425 tcAssign (dst, src + firstSrcPart, dstParts);
2426
2427 unsigned shift = srcLSB % APINT_BITS_PER_WORD;
2428 tcShiftRight (dst, dstParts, shift);
2429
2430 /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2431 in DST. If this is less that srcBits, append the rest, else
2432 clear the high bits. */
2433 unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
2434 if (n < srcBits) {
2435 WordType mask = lowBitMask (srcBits - n);
2436 dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2437 << n % APINT_BITS_PER_WORD);
2438 } else if (n > srcBits) {
2439 if (srcBits % APINT_BITS_PER_WORD)
2440 dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
2441 }
2442
2443 /* Clear high parts. */
2444 while (dstParts < dstCount)
2445 dst[dstParts++] = 0;
2446 }
2447
2448 /* DST += RHS + C where C is zero or one. Returns the carry flag. */
tcAdd(WordType * dst,const WordType * rhs,WordType c,unsigned parts)2449 APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
2450 WordType c, unsigned parts) {
2451 assert(c <= 1);
2452
2453 for (unsigned i = 0; i < parts; i++) {
2454 WordType l = dst[i];
2455 if (c) {
2456 dst[i] += rhs[i] + 1;
2457 c = (dst[i] <= l);
2458 } else {
2459 dst[i] += rhs[i];
2460 c = (dst[i] < l);
2461 }
2462 }
2463
2464 return c;
2465 }
2466
2467 /// This function adds a single "word" integer, src, to the multiple
2468 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2469 /// 1 is returned if there is a carry out, otherwise 0 is returned.
2470 /// @returns the carry of the addition.
tcAddPart(WordType * dst,WordType src,unsigned parts)2471 APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
2472 unsigned parts) {
2473 for (unsigned i = 0; i < parts; ++i) {
2474 dst[i] += src;
2475 if (dst[i] >= src)
2476 return 0; // No need to carry so exit early.
2477 src = 1; // Carry one to next digit.
2478 }
2479
2480 return 1;
2481 }
2482
2483 /* DST -= RHS + C where C is zero or one. Returns the carry flag. */
tcSubtract(WordType * dst,const WordType * rhs,WordType c,unsigned parts)2484 APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
2485 WordType c, unsigned parts) {
2486 assert(c <= 1);
2487
2488 for (unsigned i = 0; i < parts; i++) {
2489 WordType l = dst[i];
2490 if (c) {
2491 dst[i] -= rhs[i] + 1;
2492 c = (dst[i] >= l);
2493 } else {
2494 dst[i] -= rhs[i];
2495 c = (dst[i] > l);
2496 }
2497 }
2498
2499 return c;
2500 }
2501
2502 /// This function subtracts a single "word" (64-bit word), src, from
2503 /// the multi-word integer array, dst[], propagating the borrowed 1 value until
2504 /// no further borrowing is needed or it runs out of "words" in dst. The result
2505 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2506 /// exhausted. In other words, if src > dst then this function returns 1,
2507 /// otherwise 0.
2508 /// @returns the borrow out of the subtraction
tcSubtractPart(WordType * dst,WordType src,unsigned parts)2509 APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
2510 unsigned parts) {
2511 for (unsigned i = 0; i < parts; ++i) {
2512 WordType Dst = dst[i];
2513 dst[i] -= src;
2514 if (src <= Dst)
2515 return 0; // No need to borrow so exit early.
2516 src = 1; // We have to "borrow 1" from next "word"
2517 }
2518
2519 return 1;
2520 }
2521
2522 /* Negate a bignum in-place. */
tcNegate(WordType * dst,unsigned parts)2523 void APInt::tcNegate(WordType *dst, unsigned parts) {
2524 tcComplement(dst, parts);
2525 tcIncrement(dst, parts);
2526 }
2527
2528 /* DST += SRC * MULTIPLIER + CARRY if add is true
2529 DST = SRC * MULTIPLIER + CARRY if add is false
2530
2531 Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC
2532 they must start at the same point, i.e. DST == SRC.
2533
2534 If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2535 returned. Otherwise DST is filled with the least significant
2536 DSTPARTS parts of the result, and if all of the omitted higher
2537 parts were zero return zero, otherwise overflow occurred and
2538 return one. */
tcMultiplyPart(WordType * dst,const WordType * src,WordType multiplier,WordType carry,unsigned srcParts,unsigned dstParts,bool add)2539 int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
2540 WordType multiplier, WordType carry,
2541 unsigned srcParts, unsigned dstParts,
2542 bool add) {
2543 /* Otherwise our writes of DST kill our later reads of SRC. */
2544 assert(dst <= src || dst >= src + srcParts);
2545 assert(dstParts <= srcParts + 1);
2546
2547 /* N loops; minimum of dstParts and srcParts. */
2548 unsigned n = std::min(dstParts, srcParts);
2549
2550 for (unsigned i = 0; i < n; i++) {
2551 WordType low, mid, high, srcPart;
2552
2553 /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2554
2555 This cannot overflow, because
2556
2557 (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2558
2559 which is less than n^2. */
2560
2561 srcPart = src[i];
2562
2563 if (multiplier == 0 || srcPart == 0) {
2564 low = carry;
2565 high = 0;
2566 } else {
2567 low = lowHalf(srcPart) * lowHalf(multiplier);
2568 high = highHalf(srcPart) * highHalf(multiplier);
2569
2570 mid = lowHalf(srcPart) * highHalf(multiplier);
2571 high += highHalf(mid);
2572 mid <<= APINT_BITS_PER_WORD / 2;
2573 if (low + mid < low)
2574 high++;
2575 low += mid;
2576
2577 mid = highHalf(srcPart) * lowHalf(multiplier);
2578 high += highHalf(mid);
2579 mid <<= APINT_BITS_PER_WORD / 2;
2580 if (low + mid < low)
2581 high++;
2582 low += mid;
2583
2584 /* Now add carry. */
2585 if (low + carry < low)
2586 high++;
2587 low += carry;
2588 }
2589
2590 if (add) {
2591 /* And now DST[i], and store the new low part there. */
2592 if (low + dst[i] < low)
2593 high++;
2594 dst[i] += low;
2595 } else
2596 dst[i] = low;
2597
2598 carry = high;
2599 }
2600
2601 if (srcParts < dstParts) {
2602 /* Full multiplication, there is no overflow. */
2603 assert(srcParts + 1 == dstParts);
2604 dst[srcParts] = carry;
2605 return 0;
2606 }
2607
2608 /* We overflowed if there is carry. */
2609 if (carry)
2610 return 1;
2611
2612 /* We would overflow if any significant unwritten parts would be
2613 non-zero. This is true if any remaining src parts are non-zero
2614 and the multiplier is non-zero. */
2615 if (multiplier)
2616 for (unsigned i = dstParts; i < srcParts; i++)
2617 if (src[i])
2618 return 1;
2619
2620 /* We fitted in the narrow destination. */
2621 return 0;
2622 }
2623
2624 /* DST = LHS * RHS, where DST has the same width as the operands and
2625 is filled with the least significant parts of the result. Returns
2626 one if overflow occurred, otherwise zero. DST must be disjoint
2627 from both operands. */
tcMultiply(WordType * dst,const WordType * lhs,const WordType * rhs,unsigned parts)2628 int APInt::tcMultiply(WordType *dst, const WordType *lhs,
2629 const WordType *rhs, unsigned parts) {
2630 assert(dst != lhs && dst != rhs);
2631
2632 int overflow = 0;
2633 tcSet(dst, 0, parts);
2634
2635 for (unsigned i = 0; i < parts; i++)
2636 overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2637 parts - i, true);
2638
2639 return overflow;
2640 }
2641
2642 /// DST = LHS * RHS, where DST has width the sum of the widths of the
2643 /// operands. No overflow occurs. DST must be disjoint from both operands.
tcFullMultiply(WordType * dst,const WordType * lhs,const WordType * rhs,unsigned lhsParts,unsigned rhsParts)2644 void APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
2645 const WordType *rhs, unsigned lhsParts,
2646 unsigned rhsParts) {
2647 /* Put the narrower number on the LHS for less loops below. */
2648 if (lhsParts > rhsParts)
2649 return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2650
2651 assert(dst != lhs && dst != rhs);
2652
2653 tcSet(dst, 0, rhsParts);
2654
2655 for (unsigned i = 0; i < lhsParts; i++)
2656 tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
2657 }
2658
2659 /* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2660 Otherwise set LHS to LHS / RHS with the fractional part discarded,
2661 set REMAINDER to the remainder, return zero. i.e.
2662
2663 OLD_LHS = RHS * LHS + REMAINDER
2664
2665 SCRATCH is a bignum of the same size as the operands and result for
2666 use by the routine; its contents need not be initialized and are
2667 destroyed. LHS, REMAINDER and SCRATCH must be distinct.
2668 */
tcDivide(WordType * lhs,const WordType * rhs,WordType * remainder,WordType * srhs,unsigned parts)2669 int APInt::tcDivide(WordType *lhs, const WordType *rhs,
2670 WordType *remainder, WordType *srhs,
2671 unsigned parts) {
2672 assert(lhs != remainder && lhs != srhs && remainder != srhs);
2673
2674 unsigned shiftCount = tcMSB(rhs, parts) + 1;
2675 if (shiftCount == 0)
2676 return true;
2677
2678 shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;
2679 unsigned n = shiftCount / APINT_BITS_PER_WORD;
2680 WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD);
2681
2682 tcAssign(srhs, rhs, parts);
2683 tcShiftLeft(srhs, parts, shiftCount);
2684 tcAssign(remainder, lhs, parts);
2685 tcSet(lhs, 0, parts);
2686
2687 /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2688 the total. */
2689 for (;;) {
2690 int compare = tcCompare(remainder, srhs, parts);
2691 if (compare >= 0) {
2692 tcSubtract(remainder, srhs, 0, parts);
2693 lhs[n] |= mask;
2694 }
2695
2696 if (shiftCount == 0)
2697 break;
2698 shiftCount--;
2699 tcShiftRight(srhs, parts, 1);
2700 if ((mask >>= 1) == 0) {
2701 mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1);
2702 n--;
2703 }
2704 }
2705
2706 return false;
2707 }
2708
2709 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
2710 /// no restrictions on Count.
tcShiftLeft(WordType * Dst,unsigned Words,unsigned Count)2711 void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {
2712 // Don't bother performing a no-op shift.
2713 if (!Count)
2714 return;
2715
2716 // WordShift is the inter-part shift; BitShift is the intra-part shift.
2717 unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2718 unsigned BitShift = Count % APINT_BITS_PER_WORD;
2719
2720 // Fastpath for moving by whole words.
2721 if (BitShift == 0) {
2722 std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE);
2723 } else {
2724 while (Words-- > WordShift) {
2725 Dst[Words] = Dst[Words - WordShift] << BitShift;
2726 if (Words > WordShift)
2727 Dst[Words] |=
2728 Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift);
2729 }
2730 }
2731
2732 // Fill in the remainder with 0s.
2733 std::memset(Dst, 0, WordShift * APINT_WORD_SIZE);
2734 }
2735
2736 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2737 /// are no restrictions on Count.
tcShiftRight(WordType * Dst,unsigned Words,unsigned Count)2738 void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
2739 // Don't bother performing a no-op shift.
2740 if (!Count)
2741 return;
2742
2743 // WordShift is the inter-part shift; BitShift is the intra-part shift.
2744 unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2745 unsigned BitShift = Count % APINT_BITS_PER_WORD;
2746
2747 unsigned WordsToMove = Words - WordShift;
2748 // Fastpath for moving by whole words.
2749 if (BitShift == 0) {
2750 std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE);
2751 } else {
2752 for (unsigned i = 0; i != WordsToMove; ++i) {
2753 Dst[i] = Dst[i + WordShift] >> BitShift;
2754 if (i + 1 != WordsToMove)
2755 Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift);
2756 }
2757 }
2758
2759 // Fill in the remainder with 0s.
2760 std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
2761 }
2762
2763 /* Bitwise and of two bignums. */
tcAnd(WordType * dst,const WordType * rhs,unsigned parts)2764 void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) {
2765 for (unsigned i = 0; i < parts; i++)
2766 dst[i] &= rhs[i];
2767 }
2768
2769 /* Bitwise inclusive or of two bignums. */
tcOr(WordType * dst,const WordType * rhs,unsigned parts)2770 void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) {
2771 for (unsigned i = 0; i < parts; i++)
2772 dst[i] |= rhs[i];
2773 }
2774
2775 /* Bitwise exclusive or of two bignums. */
tcXor(WordType * dst,const WordType * rhs,unsigned parts)2776 void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) {
2777 for (unsigned i = 0; i < parts; i++)
2778 dst[i] ^= rhs[i];
2779 }
2780
2781 /* Complement a bignum in-place. */
tcComplement(WordType * dst,unsigned parts)2782 void APInt::tcComplement(WordType *dst, unsigned parts) {
2783 for (unsigned i = 0; i < parts; i++)
2784 dst[i] = ~dst[i];
2785 }
2786
2787 /* Comparison (unsigned) of two bignums. */
tcCompare(const WordType * lhs,const WordType * rhs,unsigned parts)2788 int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
2789 unsigned parts) {
2790 while (parts) {
2791 parts--;
2792 if (lhs[parts] != rhs[parts])
2793 return (lhs[parts] > rhs[parts]) ? 1 : -1;
2794 }
2795
2796 return 0;
2797 }
2798
2799 /* Set the least significant BITS bits of a bignum, clear the
2800 rest. */
tcSetLeastSignificantBits(WordType * dst,unsigned parts,unsigned bits)2801 void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts,
2802 unsigned bits) {
2803 unsigned i = 0;
2804 while (bits > APINT_BITS_PER_WORD) {
2805 dst[i++] = ~(WordType) 0;
2806 bits -= APINT_BITS_PER_WORD;
2807 }
2808
2809 if (bits)
2810 dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits);
2811
2812 while (i < parts)
2813 dst[i++] = 0;
2814 }
2815
RoundingUDiv(const APInt & A,const APInt & B,APInt::Rounding RM)2816 APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B,
2817 APInt::Rounding RM) {
2818 // Currently udivrem always rounds down.
2819 switch (RM) {
2820 case APInt::Rounding::DOWN:
2821 case APInt::Rounding::TOWARD_ZERO:
2822 return A.udiv(B);
2823 case APInt::Rounding::UP: {
2824 APInt Quo, Rem;
2825 APInt::udivrem(A, B, Quo, Rem);
2826 if (Rem == 0)
2827 return Quo;
2828 return Quo + 1;
2829 }
2830 }
2831 llvm_unreachable("Unknown APInt::Rounding enum");
2832 }
2833
RoundingSDiv(const APInt & A,const APInt & B,APInt::Rounding RM)2834 APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B,
2835 APInt::Rounding RM) {
2836 switch (RM) {
2837 case APInt::Rounding::DOWN:
2838 case APInt::Rounding::UP: {
2839 APInt Quo, Rem;
2840 APInt::sdivrem(A, B, Quo, Rem);
2841 if (Rem == 0)
2842 return Quo;
2843 // This algorithm deals with arbitrary rounding mode used by sdivrem.
2844 // We want to check whether the non-integer part of the mathematical value
2845 // is negative or not. If the non-integer part is negative, we need to round
2846 // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's
2847 // already rounded down.
2848 if (RM == APInt::Rounding::DOWN) {
2849 if (Rem.isNegative() != B.isNegative())
2850 return Quo - 1;
2851 return Quo;
2852 }
2853 if (Rem.isNegative() != B.isNegative())
2854 return Quo;
2855 return Quo + 1;
2856 }
2857 // Currently sdiv rounds towards zero.
2858 case APInt::Rounding::TOWARD_ZERO:
2859 return A.sdiv(B);
2860 }
2861 llvm_unreachable("Unknown APInt::Rounding enum");
2862 }
2863
2864 Optional<APInt>
SolveQuadraticEquationWrap(APInt A,APInt B,APInt C,unsigned RangeWidth)2865 llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2866 unsigned RangeWidth) {
2867 unsigned CoeffWidth = A.getBitWidth();
2868 assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth());
2869 assert(RangeWidth <= CoeffWidth &&
2870 "Value range width should be less than coefficient width");
2871 assert(RangeWidth > 1 && "Value range bit width should be > 1");
2872
2873 LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B
2874 << "x + " << C << ", rw:" << RangeWidth << '\n');
2875
2876 // Identify 0 as a (non)solution immediately.
2877 if (C.sextOrTrunc(RangeWidth).isNullValue() ) {
2878 LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n");
2879 return APInt(CoeffWidth, 0);
2880 }
2881
2882 // The result of APInt arithmetic has the same bit width as the operands,
2883 // so it can actually lose high bits. A product of two n-bit integers needs
2884 // 2n-1 bits to represent the full value.
2885 // The operation done below (on quadratic coefficients) that can produce
2886 // the largest value is the evaluation of the equation during bisection,
2887 // which needs 3 times the bitwidth of the coefficient, so the total number
2888 // of required bits is 3n.
2889 //
2890 // The purpose of this extension is to simulate the set Z of all integers,
2891 // where n+1 > n for all n in Z. In Z it makes sense to talk about positive
2892 // and negative numbers (not so much in a modulo arithmetic). The method
2893 // used to solve the equation is based on the standard formula for real
2894 // numbers, and uses the concepts of "positive" and "negative" with their
2895 // usual meanings.
2896 CoeffWidth *= 3;
2897 A = A.sext(CoeffWidth);
2898 B = B.sext(CoeffWidth);
2899 C = C.sext(CoeffWidth);
2900
2901 // Make A > 0 for simplicity. Negate cannot overflow at this point because
2902 // the bit width has increased.
2903 if (A.isNegative()) {
2904 A.negate();
2905 B.negate();
2906 C.negate();
2907 }
2908
2909 // Solving an equation q(x) = 0 with coefficients in modular arithmetic
2910 // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,
2911 // and R = 2^BitWidth.
2912 // Since we're trying not only to find exact solutions, but also values
2913 // that "wrap around", such a set will always have a solution, i.e. an x
2914 // that satisfies at least one of the equations, or such that |q(x)|
2915 // exceeds kR, while |q(x-1)| for the same k does not.
2916 //
2917 // We need to find a value k, such that Ax^2 + Bx + C = kR will have a
2918 // positive solution n (in the above sense), and also such that the n
2919 // will be the least among all solutions corresponding to k = 0, 1, ...
2920 // (more precisely, the least element in the set
2921 // { n(k) | k is such that a solution n(k) exists }).
2922 //
2923 // Consider the parabola (over real numbers) that corresponds to the
2924 // quadratic equation. Since A > 0, the arms of the parabola will point
2925 // up. Picking different values of k will shift it up and down by R.
2926 //
2927 // We want to shift the parabola in such a way as to reduce the problem
2928 // of solving q(x) = kR to solving shifted_q(x) = 0.
2929 // (The interesting solutions are the ceilings of the real number
2930 // solutions.)
2931 APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth);
2932 APInt TwoA = 2 * A;
2933 APInt SqrB = B * B;
2934 bool PickLow;
2935
2936 auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt {
2937 assert(A.isStrictlyPositive());
2938 APInt T = V.abs().urem(A);
2939 if (T.isNullValue())
2940 return V;
2941 return V.isNegative() ? V+T : V+(A-T);
2942 };
2943
2944 // The vertex of the parabola is at -B/2A, but since A > 0, it's negative
2945 // iff B is positive.
2946 if (B.isNonNegative()) {
2947 // If B >= 0, the vertex it at a negative location (or at 0), so in
2948 // order to have a non-negative solution we need to pick k that makes
2949 // C-kR negative. To satisfy all the requirements for the solution
2950 // that we are looking for, it needs to be closest to 0 of all k.
2951 C = C.srem(R);
2952 if (C.isStrictlyPositive())
2953 C -= R;
2954 // Pick the greater solution.
2955 PickLow = false;
2956 } else {
2957 // If B < 0, the vertex is at a positive location. For any solution
2958 // to exist, the discriminant must be non-negative. This means that
2959 // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a
2960 // lower bound on values of k: kR >= C - B^2/4A.
2961 APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0.
2962 // Round LowkR up (towards +inf) to the nearest kR.
2963 LowkR = RoundUp(LowkR, R);
2964
2965 // If there exists k meeting the condition above, and such that
2966 // C-kR > 0, there will be two positive real number solutions of
2967 // q(x) = kR. Out of all such values of k, pick the one that makes
2968 // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).
2969 // In other words, find maximum k such that LowkR <= kR < C.
2970 if (C.sgt(LowkR)) {
2971 // If LowkR < C, then such a k is guaranteed to exist because
2972 // LowkR itself is a multiple of R.
2973 C -= -RoundUp(-C, R); // C = C - RoundDown(C, R)
2974 // Pick the smaller solution.
2975 PickLow = true;
2976 } else {
2977 // If C-kR < 0 for all potential k's, it means that one solution
2978 // will be negative, while the other will be positive. The positive
2979 // solution will shift towards 0 if the parabola is moved up.
2980 // Pick the kR closest to the lower bound (i.e. make C-kR closest
2981 // to 0, or in other words, out of all parabolas that have solutions,
2982 // pick the one that is the farthest "up").
2983 // Since LowkR is itself a multiple of R, simply take C-LowkR.
2984 C -= LowkR;
2985 // Pick the greater solution.
2986 PickLow = false;
2987 }
2988 }
2989
2990 LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + "
2991 << B << "x + " << C << ", rw:" << RangeWidth << '\n');
2992
2993 APInt D = SqrB - 4*A*C;
2994 assert(D.isNonNegative() && "Negative discriminant");
2995 APInt SQ = D.sqrt();
2996
2997 APInt Q = SQ * SQ;
2998 bool InexactSQ = Q != D;
2999 // The calculated SQ may actually be greater than the exact (non-integer)
3000 // value. If that's the case, decrement SQ to get a value that is lower.
3001 if (Q.sgt(D))
3002 SQ -= 1;
3003
3004 APInt X;
3005 APInt Rem;
3006
3007 // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact.
3008 // When using the quadratic formula directly, the calculated low root
3009 // may be greater than the exact one, since we would be subtracting SQ.
3010 // To make sure that the calculated root is not greater than the exact
3011 // one, subtract SQ+1 when calculating the low root (for inexact value
3012 // of SQ).
3013 if (PickLow)
3014 APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem);
3015 else
3016 APInt::sdivrem(-B + SQ, TwoA, X, Rem);
3017
3018 // The updated coefficients should be such that the (exact) solution is
3019 // positive. Since APInt division rounds towards 0, the calculated one
3020 // can be 0, but cannot be negative.
3021 assert(X.isNonNegative() && "Solution should be non-negative");
3022
3023 if (!InexactSQ && Rem.isNullValue()) {
3024 LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n');
3025 return X;
3026 }
3027
3028 assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D");
3029 // The exact value of the square root of D should be between SQ and SQ+1.
3030 // This implies that the solution should be between that corresponding to
3031 // SQ (i.e. X) and that corresponding to SQ+1.
3032 //
3033 // The calculated X cannot be greater than the exact (real) solution.
3034 // Actually it must be strictly less than the exact solution, while
3035 // X+1 will be greater than or equal to it.
3036
3037 APInt VX = (A*X + B)*X + C;
3038 APInt VY = VX + TwoA*X + A + B;
3039 bool SignChange = VX.isNegative() != VY.isNegative() ||
3040 VX.isNullValue() != VY.isNullValue();
3041 // If the sign did not change between X and X+1, X is not a valid solution.
3042 // This could happen when the actual (exact) roots don't have an integer
3043 // between them, so they would both be contained between X and X+1.
3044 if (!SignChange) {
3045 LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n");
3046 return None;
3047 }
3048
3049 X += 1;
3050 LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n');
3051 return X;
3052 }
3053
3054 Optional<unsigned>
GetMostSignificantDifferentBit(const APInt & A,const APInt & B)3055 llvm::APIntOps::GetMostSignificantDifferentBit(const APInt &A, const APInt &B) {
3056 assert(A.getBitWidth() == B.getBitWidth() && "Must have the same bitwidth");
3057 if (A == B)
3058 return llvm::None;
3059 return A.getBitWidth() - ((A ^ B).countLeadingZeros() + 1);
3060 }
3061
3062 /// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
3063 /// with the integer held in IntVal.
StoreIntToMemory(const APInt & IntVal,uint8_t * Dst,unsigned StoreBytes)3064 void llvm::StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
3065 unsigned StoreBytes) {
3066 assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!");
3067 const uint8_t *Src = (const uint8_t *)IntVal.getRawData();
3068
3069 if (sys::IsLittleEndianHost) {
3070 // Little-endian host - the source is ordered from LSB to MSB. Order the
3071 // destination from LSB to MSB: Do a straight copy.
3072 memcpy(Dst, Src, StoreBytes);
3073 } else {
3074 // Big-endian host - the source is an array of 64 bit words ordered from
3075 // LSW to MSW. Each word is ordered from MSB to LSB. Order the destination
3076 // from MSB to LSB: Reverse the word order, but not the bytes in a word.
3077 while (StoreBytes > sizeof(uint64_t)) {
3078 StoreBytes -= sizeof(uint64_t);
3079 // May not be aligned so use memcpy.
3080 memcpy(Dst + StoreBytes, Src, sizeof(uint64_t));
3081 Src += sizeof(uint64_t);
3082 }
3083
3084 memcpy(Dst, Src + sizeof(uint64_t) - StoreBytes, StoreBytes);
3085 }
3086 }
3087
3088 /// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
3089 /// from Src into IntVal, which is assumed to be wide enough and to hold zero.
LoadIntFromMemory(APInt & IntVal,const uint8_t * Src,unsigned LoadBytes)3090 void llvm::LoadIntFromMemory(APInt &IntVal, const uint8_t *Src,
3091 unsigned LoadBytes) {
3092 assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!");
3093 uint8_t *Dst = reinterpret_cast<uint8_t *>(
3094 const_cast<uint64_t *>(IntVal.getRawData()));
3095
3096 if (sys::IsLittleEndianHost)
3097 // Little-endian host - the destination must be ordered from LSB to MSB.
3098 // The source is ordered from LSB to MSB: Do a straight copy.
3099 memcpy(Dst, Src, LoadBytes);
3100 else {
3101 // Big-endian - the destination is an array of 64 bit words ordered from
3102 // LSW to MSW. Each word must be ordered from MSB to LSB. The source is
3103 // ordered from MSB to LSB: Reverse the word order, but not the bytes in
3104 // a word.
3105 while (LoadBytes > sizeof(uint64_t)) {
3106 LoadBytes -= sizeof(uint64_t);
3107 // May not be aligned so use memcpy.
3108 memcpy(Dst, Src + LoadBytes, sizeof(uint64_t));
3109 Dst += sizeof(uint64_t);
3110 }
3111
3112 memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes);
3113 }
3114 }
3115