1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a class to represent arbitrary precision integer
10 // constant values and provide a variety of arithmetic operations on them.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/FoldingSet.h"
17 #include "llvm/ADT/Hashing.h"
18 #include "llvm/ADT/Optional.h"
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/ADT/bit.h"
22 #include "llvm/Config/llvm-config.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/ErrorHandling.h"
25 #include "llvm/Support/MathExtras.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include <climits>
28 #include <cmath>
29 #include <cstdlib>
30 #include <cstring>
31 using namespace llvm;
32 
33 #define DEBUG_TYPE "apint"
34 
35 /// A utility function for allocating memory, checking for allocation failures,
36 /// and ensuring the contents are zeroed.
37 inline static uint64_t* getClearedMemory(unsigned numWords) {
38   uint64_t *result = new uint64_t[numWords];
39   memset(result, 0, numWords * sizeof(uint64_t));
40   return result;
41 }
42 
43 /// A utility function for allocating memory and checking for allocation
44 /// failure.  The content is not zeroed.
45 inline static uint64_t* getMemory(unsigned numWords) {
46   return new uint64_t[numWords];
47 }
48 
49 /// A utility function that converts a character to a digit.
50 inline static unsigned getDigit(char cdigit, uint8_t radix) {
51   unsigned r;
52 
53   if (radix == 16 || radix == 36) {
54     r = cdigit - '0';
55     if (r <= 9)
56       return r;
57 
58     r = cdigit - 'A';
59     if (r <= radix - 11U)
60       return r + 10;
61 
62     r = cdigit - 'a';
63     if (r <= radix - 11U)
64       return r + 10;
65 
66     radix = 10;
67   }
68 
69   r = cdigit - '0';
70   if (r < radix)
71     return r;
72 
73   return -1U;
74 }
75 
76 
77 void APInt::initSlowCase(uint64_t val, bool isSigned) {
78   U.pVal = getClearedMemory(getNumWords());
79   U.pVal[0] = val;
80   if (isSigned && int64_t(val) < 0)
81     for (unsigned i = 1; i < getNumWords(); ++i)
82       U.pVal[i] = WORDTYPE_MAX;
83   clearUnusedBits();
84 }
85 
86 void APInt::initSlowCase(const APInt& that) {
87   U.pVal = getMemory(getNumWords());
88   memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE);
89 }
90 
91 void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
92   assert(BitWidth && "Bitwidth too small");
93   assert(bigVal.data() && "Null pointer detected!");
94   if (isSingleWord())
95     U.VAL = bigVal[0];
96   else {
97     // Get memory, cleared to 0
98     U.pVal = getClearedMemory(getNumWords());
99     // Calculate the number of words to copy
100     unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
101     // Copy the words from bigVal to pVal
102     memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE);
103   }
104   // Make sure unused high bits are cleared
105   clearUnusedBits();
106 }
107 
108 APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
109   : BitWidth(numBits) {
110   initFromArray(bigVal);
111 }
112 
113 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
114   : BitWidth(numBits) {
115   initFromArray(makeArrayRef(bigVal, numWords));
116 }
117 
118 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
119   : BitWidth(numbits) {
120   assert(BitWidth && "Bitwidth too small");
121   fromString(numbits, Str, radix);
122 }
123 
124 void APInt::reallocate(unsigned NewBitWidth) {
125   // If the number of words is the same we can just change the width and stop.
126   if (getNumWords() == getNumWords(NewBitWidth)) {
127     BitWidth = NewBitWidth;
128     return;
129   }
130 
131   // If we have an allocation, delete it.
132   if (!isSingleWord())
133     delete [] U.pVal;
134 
135   // Update BitWidth.
136   BitWidth = NewBitWidth;
137 
138   // If we are supposed to have an allocation, create it.
139   if (!isSingleWord())
140     U.pVal = getMemory(getNumWords());
141 }
142 
143 void APInt::AssignSlowCase(const APInt& RHS) {
144   // Don't do anything for X = X
145   if (this == &RHS)
146     return;
147 
148   // Adjust the bit width and handle allocations as necessary.
149   reallocate(RHS.getBitWidth());
150 
151   // Copy the data.
152   if (isSingleWord())
153     U.VAL = RHS.U.VAL;
154   else
155     memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE);
156 }
157 
158 /// This method 'profiles' an APInt for use with FoldingSet.
159 void APInt::Profile(FoldingSetNodeID& ID) const {
160   ID.AddInteger(BitWidth);
161 
162   if (isSingleWord()) {
163     ID.AddInteger(U.VAL);
164     return;
165   }
166 
167   unsigned NumWords = getNumWords();
168   for (unsigned i = 0; i < NumWords; ++i)
169     ID.AddInteger(U.pVal[i]);
170 }
171 
172 /// Prefix increment operator. Increments the APInt by one.
173 APInt& APInt::operator++() {
174   if (isSingleWord())
175     ++U.VAL;
176   else
177     tcIncrement(U.pVal, getNumWords());
178   return clearUnusedBits();
179 }
180 
181 /// Prefix decrement operator. Decrements the APInt by one.
182 APInt& APInt::operator--() {
183   if (isSingleWord())
184     --U.VAL;
185   else
186     tcDecrement(U.pVal, getNumWords());
187   return clearUnusedBits();
188 }
189 
190 /// Adds the RHS APint to this APInt.
191 /// @returns this, after addition of RHS.
192 /// Addition assignment operator.
193 APInt& APInt::operator+=(const APInt& RHS) {
194   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
195   if (isSingleWord())
196     U.VAL += RHS.U.VAL;
197   else
198     tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords());
199   return clearUnusedBits();
200 }
201 
202 APInt& APInt::operator+=(uint64_t RHS) {
203   if (isSingleWord())
204     U.VAL += RHS;
205   else
206     tcAddPart(U.pVal, RHS, getNumWords());
207   return clearUnusedBits();
208 }
209 
210 /// Subtracts the RHS APInt from this APInt
211 /// @returns this, after subtraction
212 /// Subtraction assignment operator.
213 APInt& APInt::operator-=(const APInt& RHS) {
214   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
215   if (isSingleWord())
216     U.VAL -= RHS.U.VAL;
217   else
218     tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords());
219   return clearUnusedBits();
220 }
221 
222 APInt& APInt::operator-=(uint64_t RHS) {
223   if (isSingleWord())
224     U.VAL -= RHS;
225   else
226     tcSubtractPart(U.pVal, RHS, getNumWords());
227   return clearUnusedBits();
228 }
229 
230 APInt APInt::operator*(const APInt& RHS) const {
231   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
232   if (isSingleWord())
233     return APInt(BitWidth, U.VAL * RHS.U.VAL);
234 
235   APInt Result(getMemory(getNumWords()), getBitWidth());
236 
237   tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords());
238 
239   Result.clearUnusedBits();
240   return Result;
241 }
242 
243 void APInt::AndAssignSlowCase(const APInt& RHS) {
244   tcAnd(U.pVal, RHS.U.pVal, getNumWords());
245 }
246 
247 void APInt::OrAssignSlowCase(const APInt& RHS) {
248   tcOr(U.pVal, RHS.U.pVal, getNumWords());
249 }
250 
251 void APInt::XorAssignSlowCase(const APInt& RHS) {
252   tcXor(U.pVal, RHS.U.pVal, getNumWords());
253 }
254 
255 APInt& APInt::operator*=(const APInt& RHS) {
256   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
257   *this = *this * RHS;
258   return *this;
259 }
260 
261 APInt& APInt::operator*=(uint64_t RHS) {
262   if (isSingleWord()) {
263     U.VAL *= RHS;
264   } else {
265     unsigned NumWords = getNumWords();
266     tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false);
267   }
268   return clearUnusedBits();
269 }
270 
271 bool APInt::EqualSlowCase(const APInt& RHS) const {
272   return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal);
273 }
274 
275 int APInt::compare(const APInt& RHS) const {
276   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
277   if (isSingleWord())
278     return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL;
279 
280   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
281 }
282 
283 int APInt::compareSigned(const APInt& RHS) const {
284   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
285   if (isSingleWord()) {
286     int64_t lhsSext = SignExtend64(U.VAL, BitWidth);
287     int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth);
288     return lhsSext < rhsSext ? -1 : lhsSext > rhsSext;
289   }
290 
291   bool lhsNeg = isNegative();
292   bool rhsNeg = RHS.isNegative();
293 
294   // If the sign bits don't match, then (LHS < RHS) if LHS is negative
295   if (lhsNeg != rhsNeg)
296     return lhsNeg ? -1 : 1;
297 
298   // Otherwise we can just use an unsigned comparison, because even negative
299   // numbers compare correctly this way if both have the same signed-ness.
300   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
301 }
302 
303 void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
304   unsigned loWord = whichWord(loBit);
305   unsigned hiWord = whichWord(hiBit);
306 
307   // Create an initial mask for the low word with zeros below loBit.
308   uint64_t loMask = WORDTYPE_MAX << whichBit(loBit);
309 
310   // If hiBit is not aligned, we need a high mask.
311   unsigned hiShiftAmt = whichBit(hiBit);
312   if (hiShiftAmt != 0) {
313     // Create a high mask with zeros above hiBit.
314     uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
315     // If loWord and hiWord are equal, then we combine the masks. Otherwise,
316     // set the bits in hiWord.
317     if (hiWord == loWord)
318       loMask &= hiMask;
319     else
320       U.pVal[hiWord] |= hiMask;
321   }
322   // Apply the mask to the low word.
323   U.pVal[loWord] |= loMask;
324 
325   // Fill any words between loWord and hiWord with all ones.
326   for (unsigned word = loWord + 1; word < hiWord; ++word)
327     U.pVal[word] = WORDTYPE_MAX;
328 }
329 
330 /// Toggle every bit to its opposite value.
331 void APInt::flipAllBitsSlowCase() {
332   tcComplement(U.pVal, getNumWords());
333   clearUnusedBits();
334 }
335 
336 /// Toggle a given bit to its opposite value whose position is given
337 /// as "bitPosition".
338 /// Toggles a given bit to its opposite value.
339 void APInt::flipBit(unsigned bitPosition) {
340   assert(bitPosition < BitWidth && "Out of the bit-width range!");
341   if ((*this)[bitPosition]) clearBit(bitPosition);
342   else setBit(bitPosition);
343 }
344 
345 void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
346   unsigned subBitWidth = subBits.getBitWidth();
347   assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
348          "Illegal bit insertion");
349 
350   // Insertion is a direct copy.
351   if (subBitWidth == BitWidth) {
352     *this = subBits;
353     return;
354   }
355 
356   // Single word result can be done as a direct bitmask.
357   if (isSingleWord()) {
358     uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
359     U.VAL &= ~(mask << bitPosition);
360     U.VAL |= (subBits.U.VAL << bitPosition);
361     return;
362   }
363 
364   unsigned loBit = whichBit(bitPosition);
365   unsigned loWord = whichWord(bitPosition);
366   unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);
367 
368   // Insertion within a single word can be done as a direct bitmask.
369   if (loWord == hi1Word) {
370     uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
371     U.pVal[loWord] &= ~(mask << loBit);
372     U.pVal[loWord] |= (subBits.U.VAL << loBit);
373     return;
374   }
375 
376   // Insert on word boundaries.
377   if (loBit == 0) {
378     // Direct copy whole words.
379     unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
380     memcpy(U.pVal + loWord, subBits.getRawData(),
381            numWholeSubWords * APINT_WORD_SIZE);
382 
383     // Mask+insert remaining bits.
384     unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
385     if (remainingBits != 0) {
386       uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits);
387       U.pVal[hi1Word] &= ~mask;
388       U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
389     }
390     return;
391   }
392 
393   // General case - set/clear individual bits in dst based on src.
394   // TODO - there is scope for optimization here, but at the moment this code
395   // path is barely used so prefer readability over performance.
396   for (unsigned i = 0; i != subBitWidth; ++i) {
397     if (subBits[i])
398       setBit(bitPosition + i);
399     else
400       clearBit(bitPosition + i);
401   }
402 }
403 
404 APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
405   assert(numBits > 0 && "Can't extract zero bits");
406   assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
407          "Illegal bit extraction");
408 
409   if (isSingleWord())
410     return APInt(numBits, U.VAL >> bitPosition);
411 
412   unsigned loBit = whichBit(bitPosition);
413   unsigned loWord = whichWord(bitPosition);
414   unsigned hiWord = whichWord(bitPosition + numBits - 1);
415 
416   // Single word result extracting bits from a single word source.
417   if (loWord == hiWord)
418     return APInt(numBits, U.pVal[loWord] >> loBit);
419 
420   // Extracting bits that start on a source word boundary can be done
421   // as a fast memory copy.
422   if (loBit == 0)
423     return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord));
424 
425   // General case - shift + copy source words directly into place.
426   APInt Result(numBits, 0);
427   unsigned NumSrcWords = getNumWords();
428   unsigned NumDstWords = Result.getNumWords();
429 
430   uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal;
431   for (unsigned word = 0; word < NumDstWords; ++word) {
432     uint64_t w0 = U.pVal[loWord + word];
433     uint64_t w1 =
434         (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0;
435     DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
436   }
437 
438   return Result.clearUnusedBits();
439 }
440 
441 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
442   assert(!str.empty() && "Invalid string length");
443   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
444           radix == 36) &&
445          "Radix should be 2, 8, 10, 16, or 36!");
446 
447   size_t slen = str.size();
448 
449   // Each computation below needs to know if it's negative.
450   StringRef::iterator p = str.begin();
451   unsigned isNegative = *p == '-';
452   if (*p == '-' || *p == '+') {
453     p++;
454     slen--;
455     assert(slen && "String is only a sign, needs a value.");
456   }
457 
458   // For radixes of power-of-two values, the bits required is accurately and
459   // easily computed
460   if (radix == 2)
461     return slen + isNegative;
462   if (radix == 8)
463     return slen * 3 + isNegative;
464   if (radix == 16)
465     return slen * 4 + isNegative;
466 
467   // FIXME: base 36
468 
469   // This is grossly inefficient but accurate. We could probably do something
470   // with a computation of roughly slen*64/20 and then adjust by the value of
471   // the first few digits. But, I'm not sure how accurate that could be.
472 
473   // Compute a sufficient number of bits that is always large enough but might
474   // be too large. This avoids the assertion in the constructor. This
475   // calculation doesn't work appropriately for the numbers 0-9, so just use 4
476   // bits in that case.
477   unsigned sufficient
478     = radix == 10? (slen == 1 ? 4 : slen * 64/18)
479                  : (slen == 1 ? 7 : slen * 16/3);
480 
481   // Convert to the actual binary value.
482   APInt tmp(sufficient, StringRef(p, slen), radix);
483 
484   // Compute how many bits are required. If the log is infinite, assume we need
485   // just bit. If the log is exact and value is negative, then the value is
486   // MinSignedValue with (log + 1) bits.
487   unsigned log = tmp.logBase2();
488   if (log == (unsigned)-1) {
489     return isNegative + 1;
490   } else if (isNegative && tmp.isPowerOf2()) {
491     return isNegative + log;
492   } else {
493     return isNegative + log + 1;
494   }
495 }
496 
497 hash_code llvm::hash_value(const APInt &Arg) {
498   if (Arg.isSingleWord())
499     return hash_combine(Arg.U.VAL);
500 
501   return hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords());
502 }
503 
504 bool APInt::isSplat(unsigned SplatSizeInBits) const {
505   assert(getBitWidth() % SplatSizeInBits == 0 &&
506          "SplatSizeInBits must divide width!");
507   // We can check that all parts of an integer are equal by making use of a
508   // little trick: rotate and check if it's still the same value.
509   return *this == rotl(SplatSizeInBits);
510 }
511 
512 /// This function returns the high "numBits" bits of this APInt.
513 APInt APInt::getHiBits(unsigned numBits) const {
514   return this->lshr(BitWidth - numBits);
515 }
516 
517 /// This function returns the low "numBits" bits of this APInt.
518 APInt APInt::getLoBits(unsigned numBits) const {
519   APInt Result(getLowBitsSet(BitWidth, numBits));
520   Result &= *this;
521   return Result;
522 }
523 
524 /// Return a value containing V broadcasted over NewLen bits.
525 APInt APInt::getSplat(unsigned NewLen, const APInt &V) {
526   assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!");
527 
528   APInt Val = V.zextOrSelf(NewLen);
529   for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1)
530     Val |= Val << I;
531 
532   return Val;
533 }
534 
535 unsigned APInt::countLeadingZerosSlowCase() const {
536   unsigned Count = 0;
537   for (int i = getNumWords()-1; i >= 0; --i) {
538     uint64_t V = U.pVal[i];
539     if (V == 0)
540       Count += APINT_BITS_PER_WORD;
541     else {
542       Count += llvm::countLeadingZeros(V);
543       break;
544     }
545   }
546   // Adjust for unused bits in the most significant word (they are zero).
547   unsigned Mod = BitWidth % APINT_BITS_PER_WORD;
548   Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0;
549   return Count;
550 }
551 
552 unsigned APInt::countLeadingOnesSlowCase() const {
553   unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
554   unsigned shift;
555   if (!highWordBits) {
556     highWordBits = APINT_BITS_PER_WORD;
557     shift = 0;
558   } else {
559     shift = APINT_BITS_PER_WORD - highWordBits;
560   }
561   int i = getNumWords() - 1;
562   unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift);
563   if (Count == highWordBits) {
564     for (i--; i >= 0; --i) {
565       if (U.pVal[i] == WORDTYPE_MAX)
566         Count += APINT_BITS_PER_WORD;
567       else {
568         Count += llvm::countLeadingOnes(U.pVal[i]);
569         break;
570       }
571     }
572   }
573   return Count;
574 }
575 
576 unsigned APInt::countTrailingZerosSlowCase() const {
577   unsigned Count = 0;
578   unsigned i = 0;
579   for (; i < getNumWords() && U.pVal[i] == 0; ++i)
580     Count += APINT_BITS_PER_WORD;
581   if (i < getNumWords())
582     Count += llvm::countTrailingZeros(U.pVal[i]);
583   return std::min(Count, BitWidth);
584 }
585 
586 unsigned APInt::countTrailingOnesSlowCase() const {
587   unsigned Count = 0;
588   unsigned i = 0;
589   for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i)
590     Count += APINT_BITS_PER_WORD;
591   if (i < getNumWords())
592     Count += llvm::countTrailingOnes(U.pVal[i]);
593   assert(Count <= BitWidth);
594   return Count;
595 }
596 
597 unsigned APInt::countPopulationSlowCase() const {
598   unsigned Count = 0;
599   for (unsigned i = 0; i < getNumWords(); ++i)
600     Count += llvm::countPopulation(U.pVal[i]);
601   return Count;
602 }
603 
604 bool APInt::intersectsSlowCase(const APInt &RHS) const {
605   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
606     if ((U.pVal[i] & RHS.U.pVal[i]) != 0)
607       return true;
608 
609   return false;
610 }
611 
612 bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
613   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
614     if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0)
615       return false;
616 
617   return true;
618 }
619 
620 APInt APInt::byteSwap() const {
621   assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
622   if (BitWidth == 16)
623     return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL)));
624   if (BitWidth == 32)
625     return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL)));
626   if (BitWidth == 48) {
627     unsigned Tmp1 = unsigned(U.VAL >> 16);
628     Tmp1 = ByteSwap_32(Tmp1);
629     uint16_t Tmp2 = uint16_t(U.VAL);
630     Tmp2 = ByteSwap_16(Tmp2);
631     return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
632   }
633   if (BitWidth == 64)
634     return APInt(BitWidth, ByteSwap_64(U.VAL));
635 
636   APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
637   for (unsigned I = 0, N = getNumWords(); I != N; ++I)
638     Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]);
639   if (Result.BitWidth != BitWidth) {
640     Result.lshrInPlace(Result.BitWidth - BitWidth);
641     Result.BitWidth = BitWidth;
642   }
643   return Result;
644 }
645 
646 APInt APInt::reverseBits() const {
647   switch (BitWidth) {
648   case 64:
649     return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL));
650   case 32:
651     return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL));
652   case 16:
653     return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL));
654   case 8:
655     return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL));
656   default:
657     break;
658   }
659 
660   APInt Val(*this);
661   APInt Reversed(BitWidth, 0);
662   unsigned S = BitWidth;
663 
664   for (; Val != 0; Val.lshrInPlace(1)) {
665     Reversed <<= 1;
666     Reversed |= Val[0];
667     --S;
668   }
669 
670   Reversed <<= S;
671   return Reversed;
672 }
673 
674 APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
675   // Fast-path a common case.
676   if (A == B) return A;
677 
678   // Corner cases: if either operand is zero, the other is the gcd.
679   if (!A) return B;
680   if (!B) return A;
681 
682   // Count common powers of 2 and remove all other powers of 2.
683   unsigned Pow2;
684   {
685     unsigned Pow2_A = A.countTrailingZeros();
686     unsigned Pow2_B = B.countTrailingZeros();
687     if (Pow2_A > Pow2_B) {
688       A.lshrInPlace(Pow2_A - Pow2_B);
689       Pow2 = Pow2_B;
690     } else if (Pow2_B > Pow2_A) {
691       B.lshrInPlace(Pow2_B - Pow2_A);
692       Pow2 = Pow2_A;
693     } else {
694       Pow2 = Pow2_A;
695     }
696   }
697 
698   // Both operands are odd multiples of 2^Pow_2:
699   //
700   //   gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
701   //
702   // This is a modified version of Stein's algorithm, taking advantage of
703   // efficient countTrailingZeros().
704   while (A != B) {
705     if (A.ugt(B)) {
706       A -= B;
707       A.lshrInPlace(A.countTrailingZeros() - Pow2);
708     } else {
709       B -= A;
710       B.lshrInPlace(B.countTrailingZeros() - Pow2);
711     }
712   }
713 
714   return A;
715 }
716 
717 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
718   uint64_t I = bit_cast<uint64_t>(Double);
719 
720   // Get the sign bit from the highest order bit
721   bool isNeg = I >> 63;
722 
723   // Get the 11-bit exponent and adjust for the 1023 bit bias
724   int64_t exp = ((I >> 52) & 0x7ff) - 1023;
725 
726   // If the exponent is negative, the value is < 0 so just return 0.
727   if (exp < 0)
728     return APInt(width, 0u);
729 
730   // Extract the mantissa by clearing the top 12 bits (sign + exponent).
731   uint64_t mantissa = (I & (~0ULL >> 12)) | 1ULL << 52;
732 
733   // If the exponent doesn't shift all bits out of the mantissa
734   if (exp < 52)
735     return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
736                     APInt(width, mantissa >> (52 - exp));
737 
738   // If the client didn't provide enough bits for us to shift the mantissa into
739   // then the result is undefined, just return 0
740   if (width <= exp - 52)
741     return APInt(width, 0);
742 
743   // Otherwise, we have to shift the mantissa bits up to the right location
744   APInt Tmp(width, mantissa);
745   Tmp <<= (unsigned)exp - 52;
746   return isNeg ? -Tmp : Tmp;
747 }
748 
749 /// This function converts this APInt to a double.
750 /// The layout for double is as following (IEEE Standard 754):
751 ///  --------------------------------------
752 /// |  Sign    Exponent    Fraction    Bias |
753 /// |-------------------------------------- |
754 /// |  1[63]   11[62-52]   52[51-00]   1023 |
755 ///  --------------------------------------
756 double APInt::roundToDouble(bool isSigned) const {
757 
758   // Handle the simple case where the value is contained in one uint64_t.
759   // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
760   if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
761     if (isSigned) {
762       int64_t sext = SignExtend64(getWord(0), BitWidth);
763       return double(sext);
764     } else
765       return double(getWord(0));
766   }
767 
768   // Determine if the value is negative.
769   bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
770 
771   // Construct the absolute value if we're negative.
772   APInt Tmp(isNeg ? -(*this) : (*this));
773 
774   // Figure out how many bits we're using.
775   unsigned n = Tmp.getActiveBits();
776 
777   // The exponent (without bias normalization) is just the number of bits
778   // we are using. Note that the sign bit is gone since we constructed the
779   // absolute value.
780   uint64_t exp = n;
781 
782   // Return infinity for exponent overflow
783   if (exp > 1023) {
784     if (!isSigned || !isNeg)
785       return std::numeric_limits<double>::infinity();
786     else
787       return -std::numeric_limits<double>::infinity();
788   }
789   exp += 1023; // Increment for 1023 bias
790 
791   // Number of bits in mantissa is 52. To obtain the mantissa value, we must
792   // extract the high 52 bits from the correct words in pVal.
793   uint64_t mantissa;
794   unsigned hiWord = whichWord(n-1);
795   if (hiWord == 0) {
796     mantissa = Tmp.U.pVal[0];
797     if (n > 52)
798       mantissa >>= n - 52; // shift down, we want the top 52 bits.
799   } else {
800     assert(hiWord > 0 && "huh?");
801     uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
802     uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
803     mantissa = hibits | lobits;
804   }
805 
806   // The leading bit of mantissa is implicit, so get rid of it.
807   uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
808   uint64_t I = sign | (exp << 52) | mantissa;
809   return bit_cast<double>(I);
810 }
811 
812 // Truncate to new width.
813 APInt APInt::trunc(unsigned width) const {
814   assert(width < BitWidth && "Invalid APInt Truncate request");
815   assert(width && "Can't truncate to 0 bits");
816 
817   if (width <= APINT_BITS_PER_WORD)
818     return APInt(width, getRawData()[0]);
819 
820   APInt Result(getMemory(getNumWords(width)), width);
821 
822   // Copy full words.
823   unsigned i;
824   for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
825     Result.U.pVal[i] = U.pVal[i];
826 
827   // Truncate and copy any partial word.
828   unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
829   if (bits != 0)
830     Result.U.pVal[i] = U.pVal[i] << bits >> bits;
831 
832   return Result;
833 }
834 
835 // Sign extend to a new width.
836 APInt APInt::sext(unsigned Width) const {
837   assert(Width > BitWidth && "Invalid APInt SignExtend request");
838 
839   if (Width <= APINT_BITS_PER_WORD)
840     return APInt(Width, SignExtend64(U.VAL, BitWidth));
841 
842   APInt Result(getMemory(getNumWords(Width)), Width);
843 
844   // Copy words.
845   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
846 
847   // Sign extend the last word since there may be unused bits in the input.
848   Result.U.pVal[getNumWords() - 1] =
849       SignExtend64(Result.U.pVal[getNumWords() - 1],
850                    ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
851 
852   // Fill with sign bits.
853   std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0,
854               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
855   Result.clearUnusedBits();
856   return Result;
857 }
858 
859 //  Zero extend to a new width.
860 APInt APInt::zext(unsigned width) const {
861   assert(width > BitWidth && "Invalid APInt ZeroExtend request");
862 
863   if (width <= APINT_BITS_PER_WORD)
864     return APInt(width, U.VAL);
865 
866   APInt Result(getMemory(getNumWords(width)), width);
867 
868   // Copy words.
869   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
870 
871   // Zero remaining words.
872   std::memset(Result.U.pVal + getNumWords(), 0,
873               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
874 
875   return Result;
876 }
877 
878 APInt APInt::zextOrTrunc(unsigned width) const {
879   if (BitWidth < width)
880     return zext(width);
881   if (BitWidth > width)
882     return trunc(width);
883   return *this;
884 }
885 
886 APInt APInt::sextOrTrunc(unsigned width) const {
887   if (BitWidth < width)
888     return sext(width);
889   if (BitWidth > width)
890     return trunc(width);
891   return *this;
892 }
893 
894 APInt APInt::zextOrSelf(unsigned width) const {
895   if (BitWidth < width)
896     return zext(width);
897   return *this;
898 }
899 
900 APInt APInt::sextOrSelf(unsigned width) const {
901   if (BitWidth < width)
902     return sext(width);
903   return *this;
904 }
905 
906 /// Arithmetic right-shift this APInt by shiftAmt.
907 /// Arithmetic right-shift function.
908 void APInt::ashrInPlace(const APInt &shiftAmt) {
909   ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
910 }
911 
912 /// Arithmetic right-shift this APInt by shiftAmt.
913 /// Arithmetic right-shift function.
914 void APInt::ashrSlowCase(unsigned ShiftAmt) {
915   // Don't bother performing a no-op shift.
916   if (!ShiftAmt)
917     return;
918 
919   // Save the original sign bit for later.
920   bool Negative = isNegative();
921 
922   // WordShift is the inter-part shift; BitShift is intra-part shift.
923   unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD;
924   unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD;
925 
926   unsigned WordsToMove = getNumWords() - WordShift;
927   if (WordsToMove != 0) {
928     // Sign extend the last word to fill in the unused bits.
929     U.pVal[getNumWords() - 1] = SignExtend64(
930         U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
931 
932     // Fastpath for moving by whole words.
933     if (BitShift == 0) {
934       std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE);
935     } else {
936       // Move the words containing significant bits.
937       for (unsigned i = 0; i != WordsToMove - 1; ++i)
938         U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) |
939                     (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift));
940 
941       // Handle the last word which has no high bits to copy.
942       U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift;
943       // Sign extend one more time.
944       U.pVal[WordsToMove - 1] =
945           SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift);
946     }
947   }
948 
949   // Fill in the remainder based on the original sign.
950   std::memset(U.pVal + WordsToMove, Negative ? -1 : 0,
951               WordShift * APINT_WORD_SIZE);
952   clearUnusedBits();
953 }
954 
955 /// Logical right-shift this APInt by shiftAmt.
956 /// Logical right-shift function.
957 void APInt::lshrInPlace(const APInt &shiftAmt) {
958   lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
959 }
960 
961 /// Logical right-shift this APInt by shiftAmt.
962 /// Logical right-shift function.
963 void APInt::lshrSlowCase(unsigned ShiftAmt) {
964   tcShiftRight(U.pVal, getNumWords(), ShiftAmt);
965 }
966 
967 /// Left-shift this APInt by shiftAmt.
968 /// Left-shift function.
969 APInt &APInt::operator<<=(const APInt &shiftAmt) {
970   // It's undefined behavior in C to shift by BitWidth or greater.
971   *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth);
972   return *this;
973 }
974 
975 void APInt::shlSlowCase(unsigned ShiftAmt) {
976   tcShiftLeft(U.pVal, getNumWords(), ShiftAmt);
977   clearUnusedBits();
978 }
979 
980 // Calculate the rotate amount modulo the bit width.
981 static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
982   unsigned rotBitWidth = rotateAmt.getBitWidth();
983   APInt rot = rotateAmt;
984   if (rotBitWidth < BitWidth) {
985     // Extend the rotate APInt, so that the urem doesn't divide by 0.
986     // e.g. APInt(1, 32) would give APInt(1, 0).
987     rot = rotateAmt.zext(BitWidth);
988   }
989   rot = rot.urem(APInt(rot.getBitWidth(), BitWidth));
990   return rot.getLimitedValue(BitWidth);
991 }
992 
993 APInt APInt::rotl(const APInt &rotateAmt) const {
994   return rotl(rotateModulo(BitWidth, rotateAmt));
995 }
996 
997 APInt APInt::rotl(unsigned rotateAmt) const {
998   rotateAmt %= BitWidth;
999   if (rotateAmt == 0)
1000     return *this;
1001   return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
1002 }
1003 
1004 APInt APInt::rotr(const APInt &rotateAmt) const {
1005   return rotr(rotateModulo(BitWidth, rotateAmt));
1006 }
1007 
1008 APInt APInt::rotr(unsigned rotateAmt) const {
1009   rotateAmt %= BitWidth;
1010   if (rotateAmt == 0)
1011     return *this;
1012   return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
1013 }
1014 
1015 // Square Root - this method computes and returns the square root of "this".
1016 // Three mechanisms are used for computation. For small values (<= 5 bits),
1017 // a table lookup is done. This gets some performance for common cases. For
1018 // values using less than 52 bits, the value is converted to double and then
1019 // the libc sqrt function is called. The result is rounded and then converted
1020 // back to a uint64_t which is then used to construct the result. Finally,
1021 // the Babylonian method for computing square roots is used.
1022 APInt APInt::sqrt() const {
1023 
1024   // Determine the magnitude of the value.
1025   unsigned magnitude = getActiveBits();
1026 
1027   // Use a fast table for some small values. This also gets rid of some
1028   // rounding errors in libc sqrt for small values.
1029   if (magnitude <= 5) {
1030     static const uint8_t results[32] = {
1031       /*     0 */ 0,
1032       /*  1- 2 */ 1, 1,
1033       /*  3- 6 */ 2, 2, 2, 2,
1034       /*  7-12 */ 3, 3, 3, 3, 3, 3,
1035       /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1036       /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1037       /*    31 */ 6
1038     };
1039     return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]);
1040   }
1041 
1042   // If the magnitude of the value fits in less than 52 bits (the precision of
1043   // an IEEE double precision floating point value), then we can use the
1044   // libc sqrt function which will probably use a hardware sqrt computation.
1045   // This should be faster than the algorithm below.
1046   if (magnitude < 52) {
1047     return APInt(BitWidth,
1048                  uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL
1049                                                                : U.pVal[0])))));
1050   }
1051 
1052   // Okay, all the short cuts are exhausted. We must compute it. The following
1053   // is a classical Babylonian method for computing the square root. This code
1054   // was adapted to APInt from a wikipedia article on such computations.
1055   // See http://www.wikipedia.org/ and go to the page named
1056   // Calculate_an_integer_square_root.
1057   unsigned nbits = BitWidth, i = 4;
1058   APInt testy(BitWidth, 16);
1059   APInt x_old(BitWidth, 1);
1060   APInt x_new(BitWidth, 0);
1061   APInt two(BitWidth, 2);
1062 
1063   // Select a good starting value using binary logarithms.
1064   for (;; i += 2, testy = testy.shl(2))
1065     if (i >= nbits || this->ule(testy)) {
1066       x_old = x_old.shl(i / 2);
1067       break;
1068     }
1069 
1070   // Use the Babylonian method to arrive at the integer square root:
1071   for (;;) {
1072     x_new = (this->udiv(x_old) + x_old).udiv(two);
1073     if (x_old.ule(x_new))
1074       break;
1075     x_old = x_new;
1076   }
1077 
1078   // Make sure we return the closest approximation
1079   // NOTE: The rounding calculation below is correct. It will produce an
1080   // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1081   // determined to be a rounding issue with pari/gp as it begins to use a
1082   // floating point representation after 192 bits. There are no discrepancies
1083   // between this algorithm and pari/gp for bit widths < 192 bits.
1084   APInt square(x_old * x_old);
1085   APInt nextSquare((x_old + 1) * (x_old +1));
1086   if (this->ult(square))
1087     return x_old;
1088   assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
1089   APInt midpoint((nextSquare - square).udiv(two));
1090   APInt offset(*this - square);
1091   if (offset.ult(midpoint))
1092     return x_old;
1093   return x_old + 1;
1094 }
1095 
1096 /// Computes the multiplicative inverse of this APInt for a given modulo. The
1097 /// iterative extended Euclidean algorithm is used to solve for this value,
1098 /// however we simplify it to speed up calculating only the inverse, and take
1099 /// advantage of div+rem calculations. We also use some tricks to avoid copying
1100 /// (potentially large) APInts around.
1101 /// WARNING: a value of '0' may be returned,
1102 ///          signifying that no multiplicative inverse exists!
1103 APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1104   assert(ult(modulo) && "This APInt must be smaller than the modulo");
1105 
1106   // Using the properties listed at the following web page (accessed 06/21/08):
1107   //   http://www.numbertheory.org/php/euclid.html
1108   // (especially the properties numbered 3, 4 and 9) it can be proved that
1109   // BitWidth bits suffice for all the computations in the algorithm implemented
1110   // below. More precisely, this number of bits suffice if the multiplicative
1111   // inverse exists, but may not suffice for the general extended Euclidean
1112   // algorithm.
1113 
1114   APInt r[2] = { modulo, *this };
1115   APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1116   APInt q(BitWidth, 0);
1117 
1118   unsigned i;
1119   for (i = 0; r[i^1] != 0; i ^= 1) {
1120     // An overview of the math without the confusing bit-flipping:
1121     // q = r[i-2] / r[i-1]
1122     // r[i] = r[i-2] % r[i-1]
1123     // t[i] = t[i-2] - t[i-1] * q
1124     udivrem(r[i], r[i^1], q, r[i]);
1125     t[i] -= t[i^1] * q;
1126   }
1127 
1128   // If this APInt and the modulo are not coprime, there is no multiplicative
1129   // inverse, so return 0. We check this by looking at the next-to-last
1130   // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1131   // algorithm.
1132   if (r[i] != 1)
1133     return APInt(BitWidth, 0);
1134 
1135   // The next-to-last t is the multiplicative inverse.  However, we are
1136   // interested in a positive inverse. Calculate a positive one from a negative
1137   // one if necessary. A simple addition of the modulo suffices because
1138   // abs(t[i]) is known to be less than *this/2 (see the link above).
1139   if (t[i].isNegative())
1140     t[i] += modulo;
1141 
1142   return std::move(t[i]);
1143 }
1144 
1145 /// Calculate the magic numbers required to implement a signed integer division
1146 /// by a constant as a sequence of multiplies, adds and shifts.  Requires that
1147 /// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
1148 /// Warren, Jr., chapter 10.
1149 APInt::ms APInt::magic() const {
1150   const APInt& d = *this;
1151   unsigned p;
1152   APInt ad, anc, delta, q1, r1, q2, r2, t;
1153   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1154   struct ms mag;
1155 
1156   ad = d.abs();
1157   t = signedMin + (d.lshr(d.getBitWidth() - 1));
1158   anc = t - 1 - t.urem(ad);   // absolute value of nc
1159   p = d.getBitWidth() - 1;    // initialize p
1160   q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
1161   r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
1162   q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
1163   r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
1164   do {
1165     p = p + 1;
1166     q1 = q1<<1;          // update q1 = 2p/abs(nc)
1167     r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
1168     if (r1.uge(anc)) {  // must be unsigned comparison
1169       q1 = q1 + 1;
1170       r1 = r1 - anc;
1171     }
1172     q2 = q2<<1;          // update q2 = 2p/abs(d)
1173     r2 = r2<<1;          // update r2 = rem(2p/abs(d))
1174     if (r2.uge(ad)) {   // must be unsigned comparison
1175       q2 = q2 + 1;
1176       r2 = r2 - ad;
1177     }
1178     delta = ad - r2;
1179   } while (q1.ult(delta) || (q1 == delta && r1 == 0));
1180 
1181   mag.m = q2 + 1;
1182   if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
1183   mag.s = p - d.getBitWidth();          // resulting shift
1184   return mag;
1185 }
1186 
1187 /// Calculate the magic numbers required to implement an unsigned integer
1188 /// division by a constant as a sequence of multiplies, adds and shifts.
1189 /// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
1190 /// S. Warren, Jr., chapter 10.
1191 /// LeadingZeros can be used to simplify the calculation if the upper bits
1192 /// of the divided value are known zero.
1193 APInt::mu APInt::magicu(unsigned LeadingZeros) const {
1194   const APInt& d = *this;
1195   unsigned p;
1196   APInt nc, delta, q1, r1, q2, r2;
1197   struct mu magu;
1198   magu.a = 0;               // initialize "add" indicator
1199   APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
1200   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1201   APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1202 
1203   nc = allOnes - (allOnes - d).urem(d);
1204   p = d.getBitWidth() - 1;  // initialize p
1205   q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
1206   r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
1207   q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
1208   r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
1209   do {
1210     p = p + 1;
1211     if (r1.uge(nc - r1)) {
1212       q1 = q1 + q1 + 1;  // update q1
1213       r1 = r1 + r1 - nc; // update r1
1214     }
1215     else {
1216       q1 = q1+q1; // update q1
1217       r1 = r1+r1; // update r1
1218     }
1219     if ((r2 + 1).uge(d - r2)) {
1220       if (q2.uge(signedMax)) magu.a = 1;
1221       q2 = q2+q2 + 1;     // update q2
1222       r2 = r2+r2 + 1 - d; // update r2
1223     }
1224     else {
1225       if (q2.uge(signedMin)) magu.a = 1;
1226       q2 = q2+q2;     // update q2
1227       r2 = r2+r2 + 1; // update r2
1228     }
1229     delta = d - 1 - r2;
1230   } while (p < d.getBitWidth()*2 &&
1231            (q1.ult(delta) || (q1 == delta && r1 == 0)));
1232   magu.m = q2 + 1; // resulting magic number
1233   magu.s = p - d.getBitWidth();  // resulting shift
1234   return magu;
1235 }
1236 
1237 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1238 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1239 /// variables here have the same names as in the algorithm. Comments explain
1240 /// the algorithm and any deviation from it.
1241 static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
1242                      unsigned m, unsigned n) {
1243   assert(u && "Must provide dividend");
1244   assert(v && "Must provide divisor");
1245   assert(q && "Must provide quotient");
1246   assert(u != v && u != q && v != q && "Must use different memory");
1247   assert(n>1 && "n must be > 1");
1248 
1249   // b denotes the base of the number system. In our case b is 2^32.
1250   const uint64_t b = uint64_t(1) << 32;
1251 
1252 // The DEBUG macros here tend to be spam in the debug output if you're not
1253 // debugging this code. Disable them unless KNUTH_DEBUG is defined.
1254 #ifdef KNUTH_DEBUG
1255 #define DEBUG_KNUTH(X) LLVM_DEBUG(X)
1256 #else
1257 #define DEBUG_KNUTH(X) do {} while(false)
1258 #endif
1259 
1260   DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1261   DEBUG_KNUTH(dbgs() << "KnuthDiv: original:");
1262   DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1263   DEBUG_KNUTH(dbgs() << " by");
1264   DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1265   DEBUG_KNUTH(dbgs() << '\n');
1266   // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1267   // u and v by d. Note that we have taken Knuth's advice here to use a power
1268   // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1269   // 2 allows us to shift instead of multiply and it is easy to determine the
1270   // shift amount from the leading zeros.  We are basically normalizing the u
1271   // and v so that its high bits are shifted to the top of v's range without
1272   // overflow. Note that this can require an extra word in u so that u must
1273   // be of length m+n+1.
1274   unsigned shift = countLeadingZeros(v[n-1]);
1275   uint32_t v_carry = 0;
1276   uint32_t u_carry = 0;
1277   if (shift) {
1278     for (unsigned i = 0; i < m+n; ++i) {
1279       uint32_t u_tmp = u[i] >> (32 - shift);
1280       u[i] = (u[i] << shift) | u_carry;
1281       u_carry = u_tmp;
1282     }
1283     for (unsigned i = 0; i < n; ++i) {
1284       uint32_t v_tmp = v[i] >> (32 - shift);
1285       v[i] = (v[i] << shift) | v_carry;
1286       v_carry = v_tmp;
1287     }
1288   }
1289   u[m+n] = u_carry;
1290 
1291   DEBUG_KNUTH(dbgs() << "KnuthDiv:   normal:");
1292   DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1293   DEBUG_KNUTH(dbgs() << " by");
1294   DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1295   DEBUG_KNUTH(dbgs() << '\n');
1296 
1297   // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1298   int j = m;
1299   do {
1300     DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1301     // D3. [Calculate q'.].
1302     //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1303     //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1304     // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1305     // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
1306     // on v[n-2] determines at high speed most of the cases in which the trial
1307     // value qp is one too large, and it eliminates all cases where qp is two
1308     // too large.
1309     uint64_t dividend = Make_64(u[j+n], u[j+n-1]);
1310     DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1311     uint64_t qp = dividend / v[n-1];
1312     uint64_t rp = dividend % v[n-1];
1313     if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1314       qp--;
1315       rp += v[n-1];
1316       if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1317         qp--;
1318     }
1319     DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1320 
1321     // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1322     // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1323     // consists of a simple multiplication by a one-place number, combined with
1324     // a subtraction.
1325     // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1326     // this step is actually negative, (u[j+n]...u[j]) should be left as the
1327     // true value plus b**(n+1), namely as the b's complement of
1328     // the true value, and a "borrow" to the left should be remembered.
1329     int64_t borrow = 0;
1330     for (unsigned i = 0; i < n; ++i) {
1331       uint64_t p = uint64_t(qp) * uint64_t(v[i]);
1332       int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p);
1333       u[j+i] = Lo_32(subres);
1334       borrow = Hi_32(p) - Hi_32(subres);
1335       DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i]
1336                         << ", borrow = " << borrow << '\n');
1337     }
1338     bool isNeg = u[j+n] < borrow;
1339     u[j+n] -= Lo_32(borrow);
1340 
1341     DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:");
1342     DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1343     DEBUG_KNUTH(dbgs() << '\n');
1344 
1345     // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1346     // negative, go to step D6; otherwise go on to step D7.
1347     q[j] = Lo_32(qp);
1348     if (isNeg) {
1349       // D6. [Add back]. The probability that this step is necessary is very
1350       // small, on the order of only 2/b. Make sure that test data accounts for
1351       // this possibility. Decrease q[j] by 1
1352       q[j]--;
1353       // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1354       // A carry will occur to the left of u[j+n], and it should be ignored
1355       // since it cancels with the borrow that occurred in D4.
1356       bool carry = false;
1357       for (unsigned i = 0; i < n; i++) {
1358         uint32_t limit = std::min(u[j+i],v[i]);
1359         u[j+i] += v[i] + carry;
1360         carry = u[j+i] < limit || (carry && u[j+i] == limit);
1361       }
1362       u[j+n] += carry;
1363     }
1364     DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:");
1365     DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1366     DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1367 
1368     // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1369   } while (--j >= 0);
1370 
1371   DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:");
1372   DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]);
1373   DEBUG_KNUTH(dbgs() << '\n');
1374 
1375   // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1376   // remainder may be obtained by dividing u[...] by d. If r is non-null we
1377   // compute the remainder (urem uses this).
1378   if (r) {
1379     // The value d is expressed by the "shift" value above since we avoided
1380     // multiplication by d by using a shift left. So, all we have to do is
1381     // shift right here.
1382     if (shift) {
1383       uint32_t carry = 0;
1384       DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:");
1385       for (int i = n-1; i >= 0; i--) {
1386         r[i] = (u[i] >> shift) | carry;
1387         carry = u[i] << (32 - shift);
1388         DEBUG_KNUTH(dbgs() << " " << r[i]);
1389       }
1390     } else {
1391       for (int i = n-1; i >= 0; i--) {
1392         r[i] = u[i];
1393         DEBUG_KNUTH(dbgs() << " " << r[i]);
1394       }
1395     }
1396     DEBUG_KNUTH(dbgs() << '\n');
1397   }
1398   DEBUG_KNUTH(dbgs() << '\n');
1399 }
1400 
1401 void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS,
1402                    unsigned rhsWords, WordType *Quotient, WordType *Remainder) {
1403   assert(lhsWords >= rhsWords && "Fractional result");
1404 
1405   // First, compose the values into an array of 32-bit words instead of
1406   // 64-bit words. This is a necessity of both the "short division" algorithm
1407   // and the Knuth "classical algorithm" which requires there to be native
1408   // operations for +, -, and * on an m bit value with an m*2 bit result. We
1409   // can't use 64-bit operands here because we don't have native results of
1410   // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1411   // work on large-endian machines.
1412   unsigned n = rhsWords * 2;
1413   unsigned m = (lhsWords * 2) - n;
1414 
1415   // Allocate space for the temporary values we need either on the stack, if
1416   // it will fit, or on the heap if it won't.
1417   uint32_t SPACE[128];
1418   uint32_t *U = nullptr;
1419   uint32_t *V = nullptr;
1420   uint32_t *Q = nullptr;
1421   uint32_t *R = nullptr;
1422   if ((Remainder?4:3)*n+2*m+1 <= 128) {
1423     U = &SPACE[0];
1424     V = &SPACE[m+n+1];
1425     Q = &SPACE[(m+n+1) + n];
1426     if (Remainder)
1427       R = &SPACE[(m+n+1) + n + (m+n)];
1428   } else {
1429     U = new uint32_t[m + n + 1];
1430     V = new uint32_t[n];
1431     Q = new uint32_t[m+n];
1432     if (Remainder)
1433       R = new uint32_t[n];
1434   }
1435 
1436   // Initialize the dividend
1437   memset(U, 0, (m+n+1)*sizeof(uint32_t));
1438   for (unsigned i = 0; i < lhsWords; ++i) {
1439     uint64_t tmp = LHS[i];
1440     U[i * 2] = Lo_32(tmp);
1441     U[i * 2 + 1] = Hi_32(tmp);
1442   }
1443   U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1444 
1445   // Initialize the divisor
1446   memset(V, 0, (n)*sizeof(uint32_t));
1447   for (unsigned i = 0; i < rhsWords; ++i) {
1448     uint64_t tmp = RHS[i];
1449     V[i * 2] = Lo_32(tmp);
1450     V[i * 2 + 1] = Hi_32(tmp);
1451   }
1452 
1453   // initialize the quotient and remainder
1454   memset(Q, 0, (m+n) * sizeof(uint32_t));
1455   if (Remainder)
1456     memset(R, 0, n * sizeof(uint32_t));
1457 
1458   // Now, adjust m and n for the Knuth division. n is the number of words in
1459   // the divisor. m is the number of words by which the dividend exceeds the
1460   // divisor (i.e. m+n is the length of the dividend). These sizes must not
1461   // contain any zero words or the Knuth algorithm fails.
1462   for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1463     n--;
1464     m++;
1465   }
1466   for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1467     m--;
1468 
1469   // If we're left with only a single word for the divisor, Knuth doesn't work
1470   // so we implement the short division algorithm here. This is much simpler
1471   // and faster because we are certain that we can divide a 64-bit quantity
1472   // by a 32-bit quantity at hardware speed and short division is simply a
1473   // series of such operations. This is just like doing short division but we
1474   // are using base 2^32 instead of base 10.
1475   assert(n != 0 && "Divide by zero?");
1476   if (n == 1) {
1477     uint32_t divisor = V[0];
1478     uint32_t remainder = 0;
1479     for (int i = m; i >= 0; i--) {
1480       uint64_t partial_dividend = Make_64(remainder, U[i]);
1481       if (partial_dividend == 0) {
1482         Q[i] = 0;
1483         remainder = 0;
1484       } else if (partial_dividend < divisor) {
1485         Q[i] = 0;
1486         remainder = Lo_32(partial_dividend);
1487       } else if (partial_dividend == divisor) {
1488         Q[i] = 1;
1489         remainder = 0;
1490       } else {
1491         Q[i] = Lo_32(partial_dividend / divisor);
1492         remainder = Lo_32(partial_dividend - (Q[i] * divisor));
1493       }
1494     }
1495     if (R)
1496       R[0] = remainder;
1497   } else {
1498     // Now we're ready to invoke the Knuth classical divide algorithm. In this
1499     // case n > 1.
1500     KnuthDiv(U, V, Q, R, m, n);
1501   }
1502 
1503   // If the caller wants the quotient
1504   if (Quotient) {
1505     for (unsigned i = 0; i < lhsWords; ++i)
1506       Quotient[i] = Make_64(Q[i*2+1], Q[i*2]);
1507   }
1508 
1509   // If the caller wants the remainder
1510   if (Remainder) {
1511     for (unsigned i = 0; i < rhsWords; ++i)
1512       Remainder[i] = Make_64(R[i*2+1], R[i*2]);
1513   }
1514 
1515   // Clean up the memory we allocated.
1516   if (U != &SPACE[0]) {
1517     delete [] U;
1518     delete [] V;
1519     delete [] Q;
1520     delete [] R;
1521   }
1522 }
1523 
1524 APInt APInt::udiv(const APInt &RHS) const {
1525   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1526 
1527   // First, deal with the easy case
1528   if (isSingleWord()) {
1529     assert(RHS.U.VAL != 0 && "Divide by zero?");
1530     return APInt(BitWidth, U.VAL / RHS.U.VAL);
1531   }
1532 
1533   // Get some facts about the LHS and RHS number of bits and words
1534   unsigned lhsWords = getNumWords(getActiveBits());
1535   unsigned rhsBits  = RHS.getActiveBits();
1536   unsigned rhsWords = getNumWords(rhsBits);
1537   assert(rhsWords && "Divided by zero???");
1538 
1539   // Deal with some degenerate cases
1540   if (!lhsWords)
1541     // 0 / X ===> 0
1542     return APInt(BitWidth, 0);
1543   if (rhsBits == 1)
1544     // X / 1 ===> X
1545     return *this;
1546   if (lhsWords < rhsWords || this->ult(RHS))
1547     // X / Y ===> 0, iff X < Y
1548     return APInt(BitWidth, 0);
1549   if (*this == RHS)
1550     // X / X ===> 1
1551     return APInt(BitWidth, 1);
1552   if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1553     // All high words are zero, just use native divide
1554     return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]);
1555 
1556   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1557   APInt Quotient(BitWidth, 0); // to hold result.
1558   divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr);
1559   return Quotient;
1560 }
1561 
1562 APInt APInt::udiv(uint64_t RHS) const {
1563   assert(RHS != 0 && "Divide by zero?");
1564 
1565   // First, deal with the easy case
1566   if (isSingleWord())
1567     return APInt(BitWidth, U.VAL / RHS);
1568 
1569   // Get some facts about the LHS words.
1570   unsigned lhsWords = getNumWords(getActiveBits());
1571 
1572   // Deal with some degenerate cases
1573   if (!lhsWords)
1574     // 0 / X ===> 0
1575     return APInt(BitWidth, 0);
1576   if (RHS == 1)
1577     // X / 1 ===> X
1578     return *this;
1579   if (this->ult(RHS))
1580     // X / Y ===> 0, iff X < Y
1581     return APInt(BitWidth, 0);
1582   if (*this == RHS)
1583     // X / X ===> 1
1584     return APInt(BitWidth, 1);
1585   if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1586     // All high words are zero, just use native divide
1587     return APInt(BitWidth, this->U.pVal[0] / RHS);
1588 
1589   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1590   APInt Quotient(BitWidth, 0); // to hold result.
1591   divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr);
1592   return Quotient;
1593 }
1594 
1595 APInt APInt::sdiv(const APInt &RHS) const {
1596   if (isNegative()) {
1597     if (RHS.isNegative())
1598       return (-(*this)).udiv(-RHS);
1599     return -((-(*this)).udiv(RHS));
1600   }
1601   if (RHS.isNegative())
1602     return -(this->udiv(-RHS));
1603   return this->udiv(RHS);
1604 }
1605 
1606 APInt APInt::sdiv(int64_t RHS) const {
1607   if (isNegative()) {
1608     if (RHS < 0)
1609       return (-(*this)).udiv(-RHS);
1610     return -((-(*this)).udiv(RHS));
1611   }
1612   if (RHS < 0)
1613     return -(this->udiv(-RHS));
1614   return this->udiv(RHS);
1615 }
1616 
1617 APInt APInt::urem(const APInt &RHS) const {
1618   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1619   if (isSingleWord()) {
1620     assert(RHS.U.VAL != 0 && "Remainder by zero?");
1621     return APInt(BitWidth, U.VAL % RHS.U.VAL);
1622   }
1623 
1624   // Get some facts about the LHS
1625   unsigned lhsWords = getNumWords(getActiveBits());
1626 
1627   // Get some facts about the RHS
1628   unsigned rhsBits = RHS.getActiveBits();
1629   unsigned rhsWords = getNumWords(rhsBits);
1630   assert(rhsWords && "Performing remainder operation by zero ???");
1631 
1632   // Check the degenerate cases
1633   if (lhsWords == 0)
1634     // 0 % Y ===> 0
1635     return APInt(BitWidth, 0);
1636   if (rhsBits == 1)
1637     // X % 1 ===> 0
1638     return APInt(BitWidth, 0);
1639   if (lhsWords < rhsWords || this->ult(RHS))
1640     // X % Y ===> X, iff X < Y
1641     return *this;
1642   if (*this == RHS)
1643     // X % X == 0;
1644     return APInt(BitWidth, 0);
1645   if (lhsWords == 1)
1646     // All high words are zero, just use native remainder
1647     return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]);
1648 
1649   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1650   APInt Remainder(BitWidth, 0);
1651   divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal);
1652   return Remainder;
1653 }
1654 
1655 uint64_t APInt::urem(uint64_t RHS) const {
1656   assert(RHS != 0 && "Remainder by zero?");
1657 
1658   if (isSingleWord())
1659     return U.VAL % RHS;
1660 
1661   // Get some facts about the LHS
1662   unsigned lhsWords = getNumWords(getActiveBits());
1663 
1664   // Check the degenerate cases
1665   if (lhsWords == 0)
1666     // 0 % Y ===> 0
1667     return 0;
1668   if (RHS == 1)
1669     // X % 1 ===> 0
1670     return 0;
1671   if (this->ult(RHS))
1672     // X % Y ===> X, iff X < Y
1673     return getZExtValue();
1674   if (*this == RHS)
1675     // X % X == 0;
1676     return 0;
1677   if (lhsWords == 1)
1678     // All high words are zero, just use native remainder
1679     return U.pVal[0] % RHS;
1680 
1681   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1682   uint64_t Remainder;
1683   divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder);
1684   return Remainder;
1685 }
1686 
1687 APInt APInt::srem(const APInt &RHS) const {
1688   if (isNegative()) {
1689     if (RHS.isNegative())
1690       return -((-(*this)).urem(-RHS));
1691     return -((-(*this)).urem(RHS));
1692   }
1693   if (RHS.isNegative())
1694     return this->urem(-RHS);
1695   return this->urem(RHS);
1696 }
1697 
1698 int64_t APInt::srem(int64_t RHS) const {
1699   if (isNegative()) {
1700     if (RHS < 0)
1701       return -((-(*this)).urem(-RHS));
1702     return -((-(*this)).urem(RHS));
1703   }
1704   if (RHS < 0)
1705     return this->urem(-RHS);
1706   return this->urem(RHS);
1707 }
1708 
1709 void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1710                     APInt &Quotient, APInt &Remainder) {
1711   assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");
1712   unsigned BitWidth = LHS.BitWidth;
1713 
1714   // First, deal with the easy case
1715   if (LHS.isSingleWord()) {
1716     assert(RHS.U.VAL != 0 && "Divide by zero?");
1717     uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL;
1718     uint64_t RemVal = LHS.U.VAL % RHS.U.VAL;
1719     Quotient = APInt(BitWidth, QuotVal);
1720     Remainder = APInt(BitWidth, RemVal);
1721     return;
1722   }
1723 
1724   // Get some size facts about the dividend and divisor
1725   unsigned lhsWords = getNumWords(LHS.getActiveBits());
1726   unsigned rhsBits  = RHS.getActiveBits();
1727   unsigned rhsWords = getNumWords(rhsBits);
1728   assert(rhsWords && "Performing divrem operation by zero ???");
1729 
1730   // Check the degenerate cases
1731   if (lhsWords == 0) {
1732     Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1733     Remainder = APInt(BitWidth, 0);   // 0 % Y ===> 0
1734     return;
1735   }
1736 
1737   if (rhsBits == 1) {
1738     Quotient = LHS;                   // X / 1 ===> X
1739     Remainder = APInt(BitWidth, 0);   // X % 1 ===> 0
1740   }
1741 
1742   if (lhsWords < rhsWords || LHS.ult(RHS)) {
1743     Remainder = LHS;                  // X % Y ===> X, iff X < Y
1744     Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1745     return;
1746   }
1747 
1748   if (LHS == RHS) {
1749     Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1750     Remainder = APInt(BitWidth, 0);   // X % X ===> 0;
1751     return;
1752   }
1753 
1754   // Make sure there is enough space to hold the results.
1755   // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1756   // change the size. This is necessary if Quotient or Remainder is aliased
1757   // with LHS or RHS.
1758   Quotient.reallocate(BitWidth);
1759   Remainder.reallocate(BitWidth);
1760 
1761   if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1762     // There is only one word to consider so use the native versions.
1763     uint64_t lhsValue = LHS.U.pVal[0];
1764     uint64_t rhsValue = RHS.U.pVal[0];
1765     Quotient = lhsValue / rhsValue;
1766     Remainder = lhsValue % rhsValue;
1767     return;
1768   }
1769 
1770   // Okay, lets do it the long way
1771   divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal,
1772          Remainder.U.pVal);
1773   // Clear the rest of the Quotient and Remainder.
1774   std::memset(Quotient.U.pVal + lhsWords, 0,
1775               (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1776   std::memset(Remainder.U.pVal + rhsWords, 0,
1777               (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE);
1778 }
1779 
1780 void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
1781                     uint64_t &Remainder) {
1782   assert(RHS != 0 && "Divide by zero?");
1783   unsigned BitWidth = LHS.BitWidth;
1784 
1785   // First, deal with the easy case
1786   if (LHS.isSingleWord()) {
1787     uint64_t QuotVal = LHS.U.VAL / RHS;
1788     Remainder = LHS.U.VAL % RHS;
1789     Quotient = APInt(BitWidth, QuotVal);
1790     return;
1791   }
1792 
1793   // Get some size facts about the dividend and divisor
1794   unsigned lhsWords = getNumWords(LHS.getActiveBits());
1795 
1796   // Check the degenerate cases
1797   if (lhsWords == 0) {
1798     Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1799     Remainder = 0;                    // 0 % Y ===> 0
1800     return;
1801   }
1802 
1803   if (RHS == 1) {
1804     Quotient = LHS;                   // X / 1 ===> X
1805     Remainder = 0;                    // X % 1 ===> 0
1806     return;
1807   }
1808 
1809   if (LHS.ult(RHS)) {
1810     Remainder = LHS.getZExtValue();   // X % Y ===> X, iff X < Y
1811     Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1812     return;
1813   }
1814 
1815   if (LHS == RHS) {
1816     Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1817     Remainder = 0;                    // X % X ===> 0;
1818     return;
1819   }
1820 
1821   // Make sure there is enough space to hold the results.
1822   // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1823   // change the size. This is necessary if Quotient is aliased with LHS.
1824   Quotient.reallocate(BitWidth);
1825 
1826   if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1827     // There is only one word to consider so use the native versions.
1828     uint64_t lhsValue = LHS.U.pVal[0];
1829     Quotient = lhsValue / RHS;
1830     Remainder = lhsValue % RHS;
1831     return;
1832   }
1833 
1834   // Okay, lets do it the long way
1835   divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder);
1836   // Clear the rest of the Quotient.
1837   std::memset(Quotient.U.pVal + lhsWords, 0,
1838               (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1839 }
1840 
1841 void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
1842                     APInt &Quotient, APInt &Remainder) {
1843   if (LHS.isNegative()) {
1844     if (RHS.isNegative())
1845       APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
1846     else {
1847       APInt::udivrem(-LHS, RHS, Quotient, Remainder);
1848       Quotient.negate();
1849     }
1850     Remainder.negate();
1851   } else if (RHS.isNegative()) {
1852     APInt::udivrem(LHS, -RHS, Quotient, Remainder);
1853     Quotient.negate();
1854   } else {
1855     APInt::udivrem(LHS, RHS, Quotient, Remainder);
1856   }
1857 }
1858 
1859 void APInt::sdivrem(const APInt &LHS, int64_t RHS,
1860                     APInt &Quotient, int64_t &Remainder) {
1861   uint64_t R = Remainder;
1862   if (LHS.isNegative()) {
1863     if (RHS < 0)
1864       APInt::udivrem(-LHS, -RHS, Quotient, R);
1865     else {
1866       APInt::udivrem(-LHS, RHS, Quotient, R);
1867       Quotient.negate();
1868     }
1869     R = -R;
1870   } else if (RHS < 0) {
1871     APInt::udivrem(LHS, -RHS, Quotient, R);
1872     Quotient.negate();
1873   } else {
1874     APInt::udivrem(LHS, RHS, Quotient, R);
1875   }
1876   Remainder = R;
1877 }
1878 
1879 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
1880   APInt Res = *this+RHS;
1881   Overflow = isNonNegative() == RHS.isNonNegative() &&
1882              Res.isNonNegative() != isNonNegative();
1883   return Res;
1884 }
1885 
1886 APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
1887   APInt Res = *this+RHS;
1888   Overflow = Res.ult(RHS);
1889   return Res;
1890 }
1891 
1892 APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
1893   APInt Res = *this - RHS;
1894   Overflow = isNonNegative() != RHS.isNonNegative() &&
1895              Res.isNonNegative() != isNonNegative();
1896   return Res;
1897 }
1898 
1899 APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
1900   APInt Res = *this-RHS;
1901   Overflow = Res.ugt(*this);
1902   return Res;
1903 }
1904 
1905 APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
1906   // MININT/-1  -->  overflow.
1907   Overflow = isMinSignedValue() && RHS.isAllOnesValue();
1908   return sdiv(RHS);
1909 }
1910 
1911 APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
1912   APInt Res = *this * RHS;
1913 
1914   if (*this != 0 && RHS != 0)
1915     Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
1916   else
1917     Overflow = false;
1918   return Res;
1919 }
1920 
1921 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
1922   if (countLeadingZeros() + RHS.countLeadingZeros() + 2 <= BitWidth) {
1923     Overflow = true;
1924     return *this * RHS;
1925   }
1926 
1927   APInt Res = lshr(1) * RHS;
1928   Overflow = Res.isNegative();
1929   Res <<= 1;
1930   if ((*this)[0]) {
1931     Res += RHS;
1932     if (Res.ult(RHS))
1933       Overflow = true;
1934   }
1935   return Res;
1936 }
1937 
1938 APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
1939   Overflow = ShAmt.uge(getBitWidth());
1940   if (Overflow)
1941     return APInt(BitWidth, 0);
1942 
1943   if (isNonNegative()) // Don't allow sign change.
1944     Overflow = ShAmt.uge(countLeadingZeros());
1945   else
1946     Overflow = ShAmt.uge(countLeadingOnes());
1947 
1948   return *this << ShAmt;
1949 }
1950 
1951 APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
1952   Overflow = ShAmt.uge(getBitWidth());
1953   if (Overflow)
1954     return APInt(BitWidth, 0);
1955 
1956   Overflow = ShAmt.ugt(countLeadingZeros());
1957 
1958   return *this << ShAmt;
1959 }
1960 
1961 APInt APInt::sadd_sat(const APInt &RHS) const {
1962   bool Overflow;
1963   APInt Res = sadd_ov(RHS, Overflow);
1964   if (!Overflow)
1965     return Res;
1966 
1967   return isNegative() ? APInt::getSignedMinValue(BitWidth)
1968                       : APInt::getSignedMaxValue(BitWidth);
1969 }
1970 
1971 APInt APInt::uadd_sat(const APInt &RHS) const {
1972   bool Overflow;
1973   APInt Res = uadd_ov(RHS, Overflow);
1974   if (!Overflow)
1975     return Res;
1976 
1977   return APInt::getMaxValue(BitWidth);
1978 }
1979 
1980 APInt APInt::ssub_sat(const APInt &RHS) const {
1981   bool Overflow;
1982   APInt Res = ssub_ov(RHS, Overflow);
1983   if (!Overflow)
1984     return Res;
1985 
1986   return isNegative() ? APInt::getSignedMinValue(BitWidth)
1987                       : APInt::getSignedMaxValue(BitWidth);
1988 }
1989 
1990 APInt APInt::usub_sat(const APInt &RHS) const {
1991   bool Overflow;
1992   APInt Res = usub_ov(RHS, Overflow);
1993   if (!Overflow)
1994     return Res;
1995 
1996   return APInt(BitWidth, 0);
1997 }
1998 
1999 
2000 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
2001   // Check our assumptions here
2002   assert(!str.empty() && "Invalid string length");
2003   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
2004           radix == 36) &&
2005          "Radix should be 2, 8, 10, 16, or 36!");
2006 
2007   StringRef::iterator p = str.begin();
2008   size_t slen = str.size();
2009   bool isNeg = *p == '-';
2010   if (*p == '-' || *p == '+') {
2011     p++;
2012     slen--;
2013     assert(slen && "String is only a sign, needs a value.");
2014   }
2015   assert((slen <= numbits || radix != 2) && "Insufficient bit width");
2016   assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
2017   assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
2018   assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
2019          "Insufficient bit width");
2020 
2021   // Allocate memory if needed
2022   if (isSingleWord())
2023     U.VAL = 0;
2024   else
2025     U.pVal = getClearedMemory(getNumWords());
2026 
2027   // Figure out if we can shift instead of multiply
2028   unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
2029 
2030   // Enter digit traversal loop
2031   for (StringRef::iterator e = str.end(); p != e; ++p) {
2032     unsigned digit = getDigit(*p, radix);
2033     assert(digit < radix && "Invalid character in digit string");
2034 
2035     // Shift or multiply the value by the radix
2036     if (slen > 1) {
2037       if (shift)
2038         *this <<= shift;
2039       else
2040         *this *= radix;
2041     }
2042 
2043     // Add in the digit we just interpreted
2044     *this += digit;
2045   }
2046   // If its negative, put it in two's complement form
2047   if (isNeg)
2048     this->negate();
2049 }
2050 
2051 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2052                      bool Signed, bool formatAsCLiteral) const {
2053   assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
2054           Radix == 36) &&
2055          "Radix should be 2, 8, 10, 16, or 36!");
2056 
2057   const char *Prefix = "";
2058   if (formatAsCLiteral) {
2059     switch (Radix) {
2060       case 2:
2061         // Binary literals are a non-standard extension added in gcc 4.3:
2062         // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2063         Prefix = "0b";
2064         break;
2065       case 8:
2066         Prefix = "0";
2067         break;
2068       case 10:
2069         break; // No prefix
2070       case 16:
2071         Prefix = "0x";
2072         break;
2073       default:
2074         llvm_unreachable("Invalid radix!");
2075     }
2076   }
2077 
2078   // First, check for a zero value and just short circuit the logic below.
2079   if (*this == 0) {
2080     while (*Prefix) {
2081       Str.push_back(*Prefix);
2082       ++Prefix;
2083     };
2084     Str.push_back('0');
2085     return;
2086   }
2087 
2088   static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2089 
2090   if (isSingleWord()) {
2091     char Buffer[65];
2092     char *BufPtr = std::end(Buffer);
2093 
2094     uint64_t N;
2095     if (!Signed) {
2096       N = getZExtValue();
2097     } else {
2098       int64_t I = getSExtValue();
2099       if (I >= 0) {
2100         N = I;
2101       } else {
2102         Str.push_back('-');
2103         N = -(uint64_t)I;
2104       }
2105     }
2106 
2107     while (*Prefix) {
2108       Str.push_back(*Prefix);
2109       ++Prefix;
2110     };
2111 
2112     while (N) {
2113       *--BufPtr = Digits[N % Radix];
2114       N /= Radix;
2115     }
2116     Str.append(BufPtr, std::end(Buffer));
2117     return;
2118   }
2119 
2120   APInt Tmp(*this);
2121 
2122   if (Signed && isNegative()) {
2123     // They want to print the signed version and it is a negative value
2124     // Flip the bits and add one to turn it into the equivalent positive
2125     // value and put a '-' in the result.
2126     Tmp.negate();
2127     Str.push_back('-');
2128   }
2129 
2130   while (*Prefix) {
2131     Str.push_back(*Prefix);
2132     ++Prefix;
2133   };
2134 
2135   // We insert the digits backward, then reverse them to get the right order.
2136   unsigned StartDig = Str.size();
2137 
2138   // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2139   // because the number of bits per digit (1, 3 and 4 respectively) divides
2140   // equally.  We just shift until the value is zero.
2141   if (Radix == 2 || Radix == 8 || Radix == 16) {
2142     // Just shift tmp right for each digit width until it becomes zero
2143     unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2144     unsigned MaskAmt = Radix - 1;
2145 
2146     while (Tmp.getBoolValue()) {
2147       unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2148       Str.push_back(Digits[Digit]);
2149       Tmp.lshrInPlace(ShiftAmt);
2150     }
2151   } else {
2152     while (Tmp.getBoolValue()) {
2153       uint64_t Digit;
2154       udivrem(Tmp, Radix, Tmp, Digit);
2155       assert(Digit < Radix && "divide failed");
2156       Str.push_back(Digits[Digit]);
2157     }
2158   }
2159 
2160   // Reverse the digits before returning.
2161   std::reverse(Str.begin()+StartDig, Str.end());
2162 }
2163 
2164 /// Returns the APInt as a std::string. Note that this is an inefficient method.
2165 /// It is better to pass in a SmallVector/SmallString to the methods above.
2166 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2167   SmallString<40> S;
2168   toString(S, Radix, Signed, /* formatAsCLiteral = */false);
2169   return S.str();
2170 }
2171 
2172 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2173 LLVM_DUMP_METHOD void APInt::dump() const {
2174   SmallString<40> S, U;
2175   this->toStringUnsigned(U);
2176   this->toStringSigned(S);
2177   dbgs() << "APInt(" << BitWidth << "b, "
2178          << U << "u " << S << "s)\n";
2179 }
2180 #endif
2181 
2182 void APInt::print(raw_ostream &OS, bool isSigned) const {
2183   SmallString<40> S;
2184   this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
2185   OS << S;
2186 }
2187 
2188 // This implements a variety of operations on a representation of
2189 // arbitrary precision, two's-complement, bignum integer values.
2190 
2191 // Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2192 // and unrestricting assumption.
2193 static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
2194               "Part width must be divisible by 2!");
2195 
2196 /* Some handy functions local to this file.  */
2197 
2198 /* Returns the integer part with the least significant BITS set.
2199    BITS cannot be zero.  */
2200 static inline APInt::WordType lowBitMask(unsigned bits) {
2201   assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
2202 
2203   return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
2204 }
2205 
2206 /* Returns the value of the lower half of PART.  */
2207 static inline APInt::WordType lowHalf(APInt::WordType part) {
2208   return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
2209 }
2210 
2211 /* Returns the value of the upper half of PART.  */
2212 static inline APInt::WordType highHalf(APInt::WordType part) {
2213   return part >> (APInt::APINT_BITS_PER_WORD / 2);
2214 }
2215 
2216 /* Returns the bit number of the most significant set bit of a part.
2217    If the input number has no bits set -1U is returned.  */
2218 static unsigned partMSB(APInt::WordType value) {
2219   return findLastSet(value, ZB_Max);
2220 }
2221 
2222 /* Returns the bit number of the least significant set bit of a
2223    part.  If the input number has no bits set -1U is returned.  */
2224 static unsigned partLSB(APInt::WordType value) {
2225   return findFirstSet(value, ZB_Max);
2226 }
2227 
2228 /* Sets the least significant part of a bignum to the input value, and
2229    zeroes out higher parts.  */
2230 void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
2231   assert(parts > 0);
2232 
2233   dst[0] = part;
2234   for (unsigned i = 1; i < parts; i++)
2235     dst[i] = 0;
2236 }
2237 
2238 /* Assign one bignum to another.  */
2239 void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
2240   for (unsigned i = 0; i < parts; i++)
2241     dst[i] = src[i];
2242 }
2243 
2244 /* Returns true if a bignum is zero, false otherwise.  */
2245 bool APInt::tcIsZero(const WordType *src, unsigned parts) {
2246   for (unsigned i = 0; i < parts; i++)
2247     if (src[i])
2248       return false;
2249 
2250   return true;
2251 }
2252 
2253 /* Extract the given bit of a bignum; returns 0 or 1.  */
2254 int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
2255   return (parts[whichWord(bit)] & maskBit(bit)) != 0;
2256 }
2257 
2258 /* Set the given bit of a bignum. */
2259 void APInt::tcSetBit(WordType *parts, unsigned bit) {
2260   parts[whichWord(bit)] |= maskBit(bit);
2261 }
2262 
2263 /* Clears the given bit of a bignum. */
2264 void APInt::tcClearBit(WordType *parts, unsigned bit) {
2265   parts[whichWord(bit)] &= ~maskBit(bit);
2266 }
2267 
2268 /* Returns the bit number of the least significant set bit of a
2269    number.  If the input number has no bits set -1U is returned.  */
2270 unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
2271   for (unsigned i = 0; i < n; i++) {
2272     if (parts[i] != 0) {
2273       unsigned lsb = partLSB(parts[i]);
2274 
2275       return lsb + i * APINT_BITS_PER_WORD;
2276     }
2277   }
2278 
2279   return -1U;
2280 }
2281 
2282 /* Returns the bit number of the most significant set bit of a number.
2283    If the input number has no bits set -1U is returned.  */
2284 unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
2285   do {
2286     --n;
2287 
2288     if (parts[n] != 0) {
2289       unsigned msb = partMSB(parts[n]);
2290 
2291       return msb + n * APINT_BITS_PER_WORD;
2292     }
2293   } while (n);
2294 
2295   return -1U;
2296 }
2297 
2298 /* Copy the bit vector of width srcBITS from SRC, starting at bit
2299    srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2300    the least significant bit of DST.  All high bits above srcBITS in
2301    DST are zero-filled.  */
2302 void
2303 APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
2304                  unsigned srcBits, unsigned srcLSB) {
2305   unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
2306   assert(dstParts <= dstCount);
2307 
2308   unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
2309   tcAssign (dst, src + firstSrcPart, dstParts);
2310 
2311   unsigned shift = srcLSB % APINT_BITS_PER_WORD;
2312   tcShiftRight (dst, dstParts, shift);
2313 
2314   /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2315      in DST.  If this is less that srcBits, append the rest, else
2316      clear the high bits.  */
2317   unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
2318   if (n < srcBits) {
2319     WordType mask = lowBitMask (srcBits - n);
2320     dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2321                           << n % APINT_BITS_PER_WORD);
2322   } else if (n > srcBits) {
2323     if (srcBits % APINT_BITS_PER_WORD)
2324       dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
2325   }
2326 
2327   /* Clear high parts.  */
2328   while (dstParts < dstCount)
2329     dst[dstParts++] = 0;
2330 }
2331 
2332 /* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2333 APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
2334                              WordType c, unsigned parts) {
2335   assert(c <= 1);
2336 
2337   for (unsigned i = 0; i < parts; i++) {
2338     WordType l = dst[i];
2339     if (c) {
2340       dst[i] += rhs[i] + 1;
2341       c = (dst[i] <= l);
2342     } else {
2343       dst[i] += rhs[i];
2344       c = (dst[i] < l);
2345     }
2346   }
2347 
2348   return c;
2349 }
2350 
2351 /// This function adds a single "word" integer, src, to the multiple
2352 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2353 /// 1 is returned if there is a carry out, otherwise 0 is returned.
2354 /// @returns the carry of the addition.
2355 APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
2356                                  unsigned parts) {
2357   for (unsigned i = 0; i < parts; ++i) {
2358     dst[i] += src;
2359     if (dst[i] >= src)
2360       return 0; // No need to carry so exit early.
2361     src = 1; // Carry one to next digit.
2362   }
2363 
2364   return 1;
2365 }
2366 
2367 /* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2368 APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
2369                                   WordType c, unsigned parts) {
2370   assert(c <= 1);
2371 
2372   for (unsigned i = 0; i < parts; i++) {
2373     WordType l = dst[i];
2374     if (c) {
2375       dst[i] -= rhs[i] + 1;
2376       c = (dst[i] >= l);
2377     } else {
2378       dst[i] -= rhs[i];
2379       c = (dst[i] > l);
2380     }
2381   }
2382 
2383   return c;
2384 }
2385 
2386 /// This function subtracts a single "word" (64-bit word), src, from
2387 /// the multi-word integer array, dst[], propagating the borrowed 1 value until
2388 /// no further borrowing is needed or it runs out of "words" in dst.  The result
2389 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2390 /// exhausted. In other words, if src > dst then this function returns 1,
2391 /// otherwise 0.
2392 /// @returns the borrow out of the subtraction
2393 APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
2394                                       unsigned parts) {
2395   for (unsigned i = 0; i < parts; ++i) {
2396     WordType Dst = dst[i];
2397     dst[i] -= src;
2398     if (src <= Dst)
2399       return 0; // No need to borrow so exit early.
2400     src = 1; // We have to "borrow 1" from next "word"
2401   }
2402 
2403   return 1;
2404 }
2405 
2406 /* Negate a bignum in-place.  */
2407 void APInt::tcNegate(WordType *dst, unsigned parts) {
2408   tcComplement(dst, parts);
2409   tcIncrement(dst, parts);
2410 }
2411 
2412 /*  DST += SRC * MULTIPLIER + CARRY   if add is true
2413     DST  = SRC * MULTIPLIER + CARRY   if add is false
2414 
2415     Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2416     they must start at the same point, i.e. DST == SRC.
2417 
2418     If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2419     returned.  Otherwise DST is filled with the least significant
2420     DSTPARTS parts of the result, and if all of the omitted higher
2421     parts were zero return zero, otherwise overflow occurred and
2422     return one.  */
2423 int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
2424                           WordType multiplier, WordType carry,
2425                           unsigned srcParts, unsigned dstParts,
2426                           bool add) {
2427   /* Otherwise our writes of DST kill our later reads of SRC.  */
2428   assert(dst <= src || dst >= src + srcParts);
2429   assert(dstParts <= srcParts + 1);
2430 
2431   /* N loops; minimum of dstParts and srcParts.  */
2432   unsigned n = std::min(dstParts, srcParts);
2433 
2434   for (unsigned i = 0; i < n; i++) {
2435     WordType low, mid, high, srcPart;
2436 
2437       /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2438 
2439          This cannot overflow, because
2440 
2441          (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2442 
2443          which is less than n^2.  */
2444 
2445     srcPart = src[i];
2446 
2447     if (multiplier == 0 || srcPart == 0) {
2448       low = carry;
2449       high = 0;
2450     } else {
2451       low = lowHalf(srcPart) * lowHalf(multiplier);
2452       high = highHalf(srcPart) * highHalf(multiplier);
2453 
2454       mid = lowHalf(srcPart) * highHalf(multiplier);
2455       high += highHalf(mid);
2456       mid <<= APINT_BITS_PER_WORD / 2;
2457       if (low + mid < low)
2458         high++;
2459       low += mid;
2460 
2461       mid = highHalf(srcPart) * lowHalf(multiplier);
2462       high += highHalf(mid);
2463       mid <<= APINT_BITS_PER_WORD / 2;
2464       if (low + mid < low)
2465         high++;
2466       low += mid;
2467 
2468       /* Now add carry.  */
2469       if (low + carry < low)
2470         high++;
2471       low += carry;
2472     }
2473 
2474     if (add) {
2475       /* And now DST[i], and store the new low part there.  */
2476       if (low + dst[i] < low)
2477         high++;
2478       dst[i] += low;
2479     } else
2480       dst[i] = low;
2481 
2482     carry = high;
2483   }
2484 
2485   if (srcParts < dstParts) {
2486     /* Full multiplication, there is no overflow.  */
2487     assert(srcParts + 1 == dstParts);
2488     dst[srcParts] = carry;
2489     return 0;
2490   }
2491 
2492   /* We overflowed if there is carry.  */
2493   if (carry)
2494     return 1;
2495 
2496   /* We would overflow if any significant unwritten parts would be
2497      non-zero.  This is true if any remaining src parts are non-zero
2498      and the multiplier is non-zero.  */
2499   if (multiplier)
2500     for (unsigned i = dstParts; i < srcParts; i++)
2501       if (src[i])
2502         return 1;
2503 
2504   /* We fitted in the narrow destination.  */
2505   return 0;
2506 }
2507 
2508 /* DST = LHS * RHS, where DST has the same width as the operands and
2509    is filled with the least significant parts of the result.  Returns
2510    one if overflow occurred, otherwise zero.  DST must be disjoint
2511    from both operands.  */
2512 int APInt::tcMultiply(WordType *dst, const WordType *lhs,
2513                       const WordType *rhs, unsigned parts) {
2514   assert(dst != lhs && dst != rhs);
2515 
2516   int overflow = 0;
2517   tcSet(dst, 0, parts);
2518 
2519   for (unsigned i = 0; i < parts; i++)
2520     overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2521                                parts - i, true);
2522 
2523   return overflow;
2524 }
2525 
2526 /// DST = LHS * RHS, where DST has width the sum of the widths of the
2527 /// operands. No overflow occurs. DST must be disjoint from both operands.
2528 void APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
2529                            const WordType *rhs, unsigned lhsParts,
2530                            unsigned rhsParts) {
2531   /* Put the narrower number on the LHS for less loops below.  */
2532   if (lhsParts > rhsParts)
2533     return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2534 
2535   assert(dst != lhs && dst != rhs);
2536 
2537   tcSet(dst, 0, rhsParts);
2538 
2539   for (unsigned i = 0; i < lhsParts; i++)
2540     tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
2541 }
2542 
2543 /* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2544    Otherwise set LHS to LHS / RHS with the fractional part discarded,
2545    set REMAINDER to the remainder, return zero.  i.e.
2546 
2547    OLD_LHS = RHS * LHS + REMAINDER
2548 
2549    SCRATCH is a bignum of the same size as the operands and result for
2550    use by the routine; its contents need not be initialized and are
2551    destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2552 */
2553 int APInt::tcDivide(WordType *lhs, const WordType *rhs,
2554                     WordType *remainder, WordType *srhs,
2555                     unsigned parts) {
2556   assert(lhs != remainder && lhs != srhs && remainder != srhs);
2557 
2558   unsigned shiftCount = tcMSB(rhs, parts) + 1;
2559   if (shiftCount == 0)
2560     return true;
2561 
2562   shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;
2563   unsigned n = shiftCount / APINT_BITS_PER_WORD;
2564   WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD);
2565 
2566   tcAssign(srhs, rhs, parts);
2567   tcShiftLeft(srhs, parts, shiftCount);
2568   tcAssign(remainder, lhs, parts);
2569   tcSet(lhs, 0, parts);
2570 
2571   /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2572      the total.  */
2573   for (;;) {
2574     int compare = tcCompare(remainder, srhs, parts);
2575     if (compare >= 0) {
2576       tcSubtract(remainder, srhs, 0, parts);
2577       lhs[n] |= mask;
2578     }
2579 
2580     if (shiftCount == 0)
2581       break;
2582     shiftCount--;
2583     tcShiftRight(srhs, parts, 1);
2584     if ((mask >>= 1) == 0) {
2585       mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1);
2586       n--;
2587     }
2588   }
2589 
2590   return false;
2591 }
2592 
2593 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
2594 /// no restrictions on Count.
2595 void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {
2596   // Don't bother performing a no-op shift.
2597   if (!Count)
2598     return;
2599 
2600   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2601   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2602   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2603 
2604   // Fastpath for moving by whole words.
2605   if (BitShift == 0) {
2606     std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE);
2607   } else {
2608     while (Words-- > WordShift) {
2609       Dst[Words] = Dst[Words - WordShift] << BitShift;
2610       if (Words > WordShift)
2611         Dst[Words] |=
2612           Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift);
2613     }
2614   }
2615 
2616   // Fill in the remainder with 0s.
2617   std::memset(Dst, 0, WordShift * APINT_WORD_SIZE);
2618 }
2619 
2620 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2621 /// are no restrictions on Count.
2622 void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
2623   // Don't bother performing a no-op shift.
2624   if (!Count)
2625     return;
2626 
2627   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2628   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2629   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2630 
2631   unsigned WordsToMove = Words - WordShift;
2632   // Fastpath for moving by whole words.
2633   if (BitShift == 0) {
2634     std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE);
2635   } else {
2636     for (unsigned i = 0; i != WordsToMove; ++i) {
2637       Dst[i] = Dst[i + WordShift] >> BitShift;
2638       if (i + 1 != WordsToMove)
2639         Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift);
2640     }
2641   }
2642 
2643   // Fill in the remainder with 0s.
2644   std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
2645 }
2646 
2647 /* Bitwise and of two bignums.  */
2648 void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) {
2649   for (unsigned i = 0; i < parts; i++)
2650     dst[i] &= rhs[i];
2651 }
2652 
2653 /* Bitwise inclusive or of two bignums.  */
2654 void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) {
2655   for (unsigned i = 0; i < parts; i++)
2656     dst[i] |= rhs[i];
2657 }
2658 
2659 /* Bitwise exclusive or of two bignums.  */
2660 void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) {
2661   for (unsigned i = 0; i < parts; i++)
2662     dst[i] ^= rhs[i];
2663 }
2664 
2665 /* Complement a bignum in-place.  */
2666 void APInt::tcComplement(WordType *dst, unsigned parts) {
2667   for (unsigned i = 0; i < parts; i++)
2668     dst[i] = ~dst[i];
2669 }
2670 
2671 /* Comparison (unsigned) of two bignums.  */
2672 int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
2673                      unsigned parts) {
2674   while (parts) {
2675     parts--;
2676     if (lhs[parts] != rhs[parts])
2677       return (lhs[parts] > rhs[parts]) ? 1 : -1;
2678   }
2679 
2680   return 0;
2681 }
2682 
2683 /* Set the least significant BITS bits of a bignum, clear the
2684    rest.  */
2685 void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts,
2686                                       unsigned bits) {
2687   unsigned i = 0;
2688   while (bits > APINT_BITS_PER_WORD) {
2689     dst[i++] = ~(WordType) 0;
2690     bits -= APINT_BITS_PER_WORD;
2691   }
2692 
2693   if (bits)
2694     dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits);
2695 
2696   while (i < parts)
2697     dst[i++] = 0;
2698 }
2699 
2700 APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B,
2701                                    APInt::Rounding RM) {
2702   // Currently udivrem always rounds down.
2703   switch (RM) {
2704   case APInt::Rounding::DOWN:
2705   case APInt::Rounding::TOWARD_ZERO:
2706     return A.udiv(B);
2707   case APInt::Rounding::UP: {
2708     APInt Quo, Rem;
2709     APInt::udivrem(A, B, Quo, Rem);
2710     if (Rem == 0)
2711       return Quo;
2712     return Quo + 1;
2713   }
2714   }
2715   llvm_unreachable("Unknown APInt::Rounding enum");
2716 }
2717 
2718 APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B,
2719                                    APInt::Rounding RM) {
2720   switch (RM) {
2721   case APInt::Rounding::DOWN:
2722   case APInt::Rounding::UP: {
2723     APInt Quo, Rem;
2724     APInt::sdivrem(A, B, Quo, Rem);
2725     if (Rem == 0)
2726       return Quo;
2727     // This algorithm deals with arbitrary rounding mode used by sdivrem.
2728     // We want to check whether the non-integer part of the mathematical value
2729     // is negative or not. If the non-integer part is negative, we need to round
2730     // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's
2731     // already rounded down.
2732     if (RM == APInt::Rounding::DOWN) {
2733       if (Rem.isNegative() != B.isNegative())
2734         return Quo - 1;
2735       return Quo;
2736     }
2737     if (Rem.isNegative() != B.isNegative())
2738       return Quo;
2739     return Quo + 1;
2740   }
2741   // Currently sdiv rounds twards zero.
2742   case APInt::Rounding::TOWARD_ZERO:
2743     return A.sdiv(B);
2744   }
2745   llvm_unreachable("Unknown APInt::Rounding enum");
2746 }
2747 
2748 Optional<APInt>
2749 llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2750                                            unsigned RangeWidth) {
2751   unsigned CoeffWidth = A.getBitWidth();
2752   assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth());
2753   assert(RangeWidth <= CoeffWidth &&
2754          "Value range width should be less than coefficient width");
2755   assert(RangeWidth > 1 && "Value range bit width should be > 1");
2756 
2757   LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B
2758                     << "x + " << C << ", rw:" << RangeWidth << '\n');
2759 
2760   // Identify 0 as a (non)solution immediately.
2761   if (C.sextOrTrunc(RangeWidth).isNullValue() ) {
2762     LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n");
2763     return APInt(CoeffWidth, 0);
2764   }
2765 
2766   // The result of APInt arithmetic has the same bit width as the operands,
2767   // so it can actually lose high bits. A product of two n-bit integers needs
2768   // 2n-1 bits to represent the full value.
2769   // The operation done below (on quadratic coefficients) that can produce
2770   // the largest value is the evaluation of the equation during bisection,
2771   // which needs 3 times the bitwidth of the coefficient, so the total number
2772   // of required bits is 3n.
2773   //
2774   // The purpose of this extension is to simulate the set Z of all integers,
2775   // where n+1 > n for all n in Z. In Z it makes sense to talk about positive
2776   // and negative numbers (not so much in a modulo arithmetic). The method
2777   // used to solve the equation is based on the standard formula for real
2778   // numbers, and uses the concepts of "positive" and "negative" with their
2779   // usual meanings.
2780   CoeffWidth *= 3;
2781   A = A.sext(CoeffWidth);
2782   B = B.sext(CoeffWidth);
2783   C = C.sext(CoeffWidth);
2784 
2785   // Make A > 0 for simplicity. Negate cannot overflow at this point because
2786   // the bit width has increased.
2787   if (A.isNegative()) {
2788     A.negate();
2789     B.negate();
2790     C.negate();
2791   }
2792 
2793   // Solving an equation q(x) = 0 with coefficients in modular arithmetic
2794   // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,
2795   // and R = 2^BitWidth.
2796   // Since we're trying not only to find exact solutions, but also values
2797   // that "wrap around", such a set will always have a solution, i.e. an x
2798   // that satisfies at least one of the equations, or such that |q(x)|
2799   // exceeds kR, while |q(x-1)| for the same k does not.
2800   //
2801   // We need to find a value k, such that Ax^2 + Bx + C = kR will have a
2802   // positive solution n (in the above sense), and also such that the n
2803   // will be the least among all solutions corresponding to k = 0, 1, ...
2804   // (more precisely, the least element in the set
2805   //   { n(k) | k is such that a solution n(k) exists }).
2806   //
2807   // Consider the parabola (over real numbers) that corresponds to the
2808   // quadratic equation. Since A > 0, the arms of the parabola will point
2809   // up. Picking different values of k will shift it up and down by R.
2810   //
2811   // We want to shift the parabola in such a way as to reduce the problem
2812   // of solving q(x) = kR to solving shifted_q(x) = 0.
2813   // (The interesting solutions are the ceilings of the real number
2814   // solutions.)
2815   APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth);
2816   APInt TwoA = 2 * A;
2817   APInt SqrB = B * B;
2818   bool PickLow;
2819 
2820   auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt {
2821     assert(A.isStrictlyPositive());
2822     APInt T = V.abs().urem(A);
2823     if (T.isNullValue())
2824       return V;
2825     return V.isNegative() ? V+T : V+(A-T);
2826   };
2827 
2828   // The vertex of the parabola is at -B/2A, but since A > 0, it's negative
2829   // iff B is positive.
2830   if (B.isNonNegative()) {
2831     // If B >= 0, the vertex it at a negative location (or at 0), so in
2832     // order to have a non-negative solution we need to pick k that makes
2833     // C-kR negative. To satisfy all the requirements for the solution
2834     // that we are looking for, it needs to be closest to 0 of all k.
2835     C = C.srem(R);
2836     if (C.isStrictlyPositive())
2837       C -= R;
2838     // Pick the greater solution.
2839     PickLow = false;
2840   } else {
2841     // If B < 0, the vertex is at a positive location. For any solution
2842     // to exist, the discriminant must be non-negative. This means that
2843     // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a
2844     // lower bound on values of k: kR >= C - B^2/4A.
2845     APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0.
2846     // Round LowkR up (towards +inf) to the nearest kR.
2847     LowkR = RoundUp(LowkR, R);
2848 
2849     // If there exists k meeting the condition above, and such that
2850     // C-kR > 0, there will be two positive real number solutions of
2851     // q(x) = kR. Out of all such values of k, pick the one that makes
2852     // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).
2853     // In other words, find maximum k such that LowkR <= kR < C.
2854     if (C.sgt(LowkR)) {
2855       // If LowkR < C, then such a k is guaranteed to exist because
2856       // LowkR itself is a multiple of R.
2857       C -= -RoundUp(-C, R);      // C = C - RoundDown(C, R)
2858       // Pick the smaller solution.
2859       PickLow = true;
2860     } else {
2861       // If C-kR < 0 for all potential k's, it means that one solution
2862       // will be negative, while the other will be positive. The positive
2863       // solution will shift towards 0 if the parabola is moved up.
2864       // Pick the kR closest to the lower bound (i.e. make C-kR closest
2865       // to 0, or in other words, out of all parabolas that have solutions,
2866       // pick the one that is the farthest "up").
2867       // Since LowkR is itself a multiple of R, simply take C-LowkR.
2868       C -= LowkR;
2869       // Pick the greater solution.
2870       PickLow = false;
2871     }
2872   }
2873 
2874   LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + "
2875                     << B << "x + " << C << ", rw:" << RangeWidth << '\n');
2876 
2877   APInt D = SqrB - 4*A*C;
2878   assert(D.isNonNegative() && "Negative discriminant");
2879   APInt SQ = D.sqrt();
2880 
2881   APInt Q = SQ * SQ;
2882   bool InexactSQ = Q != D;
2883   // The calculated SQ may actually be greater than the exact (non-integer)
2884   // value. If that's the case, decremement SQ to get a value that is lower.
2885   if (Q.sgt(D))
2886     SQ -= 1;
2887 
2888   APInt X;
2889   APInt Rem;
2890 
2891   // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact.
2892   // When using the quadratic formula directly, the calculated low root
2893   // may be greater than the exact one, since we would be subtracting SQ.
2894   // To make sure that the calculated root is not greater than the exact
2895   // one, subtract SQ+1 when calculating the low root (for inexact value
2896   // of SQ).
2897   if (PickLow)
2898     APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem);
2899   else
2900     APInt::sdivrem(-B + SQ, TwoA, X, Rem);
2901 
2902   // The updated coefficients should be such that the (exact) solution is
2903   // positive. Since APInt division rounds towards 0, the calculated one
2904   // can be 0, but cannot be negative.
2905   assert(X.isNonNegative() && "Solution should be non-negative");
2906 
2907   if (!InexactSQ && Rem.isNullValue()) {
2908     LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n');
2909     return X;
2910   }
2911 
2912   assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D");
2913   // The exact value of the square root of D should be between SQ and SQ+1.
2914   // This implies that the solution should be between that corresponding to
2915   // SQ (i.e. X) and that corresponding to SQ+1.
2916   //
2917   // The calculated X cannot be greater than the exact (real) solution.
2918   // Actually it must be strictly less than the exact solution, while
2919   // X+1 will be greater than or equal to it.
2920 
2921   APInt VX = (A*X + B)*X + C;
2922   APInt VY = VX + TwoA*X + A + B;
2923   bool SignChange = VX.isNegative() != VY.isNegative() ||
2924                     VX.isNullValue() != VY.isNullValue();
2925   // If the sign did not change between X and X+1, X is not a valid solution.
2926   // This could happen when the actual (exact) roots don't have an integer
2927   // between them, so they would both be contained between X and X+1.
2928   if (!SignChange) {
2929     LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n");
2930     return None;
2931   }
2932 
2933   X += 1;
2934   LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n');
2935   return X;
2936 }
2937 
2938 /// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
2939 /// with the integer held in IntVal.
2940 void llvm::StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
2941                             unsigned StoreBytes) {
2942   assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!");
2943   const uint8_t *Src = (const uint8_t *)IntVal.getRawData();
2944 
2945   if (sys::IsLittleEndianHost) {
2946     // Little-endian host - the source is ordered from LSB to MSB.  Order the
2947     // destination from LSB to MSB: Do a straight copy.
2948     memcpy(Dst, Src, StoreBytes);
2949   } else {
2950     // Big-endian host - the source is an array of 64 bit words ordered from
2951     // LSW to MSW.  Each word is ordered from MSB to LSB.  Order the destination
2952     // from MSB to LSB: Reverse the word order, but not the bytes in a word.
2953     while (StoreBytes > sizeof(uint64_t)) {
2954       StoreBytes -= sizeof(uint64_t);
2955       // May not be aligned so use memcpy.
2956       memcpy(Dst + StoreBytes, Src, sizeof(uint64_t));
2957       Src += sizeof(uint64_t);
2958     }
2959 
2960     memcpy(Dst, Src + sizeof(uint64_t) - StoreBytes, StoreBytes);
2961   }
2962 }
2963 
2964 /// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
2965 /// from Src into IntVal, which is assumed to be wide enough and to hold zero.
2966 void llvm::LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) {
2967   assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!");
2968   uint8_t *Dst = reinterpret_cast<uint8_t *>(
2969                    const_cast<uint64_t *>(IntVal.getRawData()));
2970 
2971   if (sys::IsLittleEndianHost)
2972     // Little-endian host - the destination must be ordered from LSB to MSB.
2973     // The source is ordered from LSB to MSB: Do a straight copy.
2974     memcpy(Dst, Src, LoadBytes);
2975   else {
2976     // Big-endian - the destination is an array of 64 bit words ordered from
2977     // LSW to MSW.  Each word must be ordered from MSB to LSB.  The source is
2978     // ordered from MSB to LSB: Reverse the word order, but not the bytes in
2979     // a word.
2980     while (LoadBytes > sizeof(uint64_t)) {
2981       LoadBytes -= sizeof(uint64_t);
2982       // May not be aligned so use memcpy.
2983       memcpy(Dst, Src + LoadBytes, sizeof(uint64_t));
2984       Dst += sizeof(uint64_t);
2985     }
2986 
2987     memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes);
2988   }
2989 }
2990