1 /** Fundamental operations for arbitrary-precision arithmetic
2  *
3  * These functions are for internal use only.
4  */
5 /*          Copyright Don Clugston 2008 - 2010.
6  * Distributed under the Boost Software License, Version 1.0.
7  *    (See accompanying file LICENSE_1_0.txt or copy at
8  *          http://www.boost.org/LICENSE_1_0.txt)
9  */
10 /* References:
11    "Modern Computer Arithmetic" (MCA) is the primary reference for all
12     algorithms used in this library.
13   - R.P. Brent and P. Zimmermann, "Modern Computer Arithmetic",
14     Version 0.5.9, (Oct 2010).
15   - C. Burkinel and J. Ziegler, "Fast Recursive Division", MPI-I-98-1-022,
16     Max-Planck Institute fuer Informatik, (Oct 1998).
17   - G. Hanrot, M. Quercia, and P. Zimmermann, "The Middle Product Algorithm, I.",
18     INRIA 4664, (Dec 2002).
19   - M. Bodrato and A. Zanoni, "What about Toom-Cook Matrices Optimality?",
20     http://bodrato.it/papers (2006).
21   - A. Fog, "Optimizing subroutines in assembly language",
22     www.agner.org/optimize (2008).
23   - A. Fog, "The microarchitecture of Intel and AMD CPU's",
24     www.agner.org/optimize (2008).
25   - A. Fog, "Instruction tables: Lists of instruction latencies, throughputs
26     and micro-operation breakdowns for Intel and AMD CPU's.", www.agner.org/optimize (2008).
27 
28 Idioms:
29   Many functions in this module use
30   'func(Tulong)(Tulong x) if (is(Tulong == ulong))' rather than 'func(ulong x)'
31   in order to disable implicit conversion.
32 
33 */
34 module std.internal.math.biguintcore;
35 
version(D_InlineAsm_X86)36 version (D_InlineAsm_X86)
37 {
38     import std.internal.math.biguintx86;
39 }
40 else
41 {
42     import std.internal.math.biguintnoasm;
43 }
44 
45 alias multibyteAdd = multibyteAddSub!('+');
46 alias multibyteSub = multibyteAddSub!('-');
47 
48 
49 import core.cpuid;
50 public import std.ascii : LetterCase;
51 import std.range.primitives;
52 import std.traits;
53 
this()54 shared static this()
55 {
56     CACHELIMIT = core.cpuid.datacache[0].size*1024/2;
57 }
58 
59 private:
60 // Limits for when to switch between algorithms.
61 immutable size_t CACHELIMIT;   // Half the size of the data cache.
62 enum size_t FASTDIVLIMIT = 100; // crossover to recursive division
63 
64 
65 // These constants are used by shift operations
66 static if (BigDigit.sizeof == int.sizeof)
67 {
68     enum { LG2BIGDIGITBITS = 5, BIGDIGITSHIFTMASK = 31 };
69     alias BIGHALFDIGIT = ushort;
70 }
71 else static if (BigDigit.sizeof == long.sizeof)
72 {
73     alias BIGHALFDIGIT = uint;
74     enum { LG2BIGDIGITBITS = 6, BIGDIGITSHIFTMASK = 63 };
75 }
76 else static assert(0, "Unsupported BigDigit size");
77 
78 import std.exception : assumeUnique;
79 import std.traits : isIntegral;
80 enum BigDigitBits = BigDigit.sizeof*8;
81 template maxBigDigits(T)
82 if (isIntegral!T)
83 {
84     enum maxBigDigits = (T.sizeof+BigDigit.sizeof-1)/BigDigit.sizeof;
85 }
86 
87 static immutable BigDigit[] ZERO = [0];
88 static immutable BigDigit[] ONE = [1];
89 static immutable BigDigit[] TWO = [2];
90 static immutable BigDigit[] TEN = [10];
91 
92 
93 public:
94 
95 /// BigUint performs memory management and wraps the low-level calls.
96 struct BigUint
97 {
98 private:
invariantBigUint99     pure invariant()
100     {
101         assert( data.length >= 1 && (data.length == 1 || data[$-1] != 0 ));
102     }
103 
104     immutable(BigDigit) [] data = ZERO;
105 
thisBigUint106     this(immutable(BigDigit) [] x) pure nothrow @nogc @safe
107     {
108        data = x;
109     }
110   package(std)  // used from: std.bigint
111     this(T)(T x) pure nothrow @safe if (isIntegral!T)
112     {
113         opAssign(x);
114     }
115 
functionBigUint116     enum trustedAssumeUnique = function(BigDigit[] input) pure @trusted @nogc {
117         return assumeUnique(input);
118     };
119 public:
120     // Length in uints
uintLengthBigUint121     @property size_t uintLength() pure nothrow const @safe @nogc
122     {
123         static if (BigDigit.sizeof == uint.sizeof)
124         {
125             return data.length;
126         }
127         else static if (BigDigit.sizeof == ulong.sizeof)
128         {
129             return data.length * 2 -
130             ((data[$-1] & 0xFFFF_FFFF_0000_0000L) ? 1 : 0);
131         }
132     }
ulongLengthBigUint133     @property size_t ulongLength() pure nothrow const @safe @nogc
134     {
135         static if (BigDigit.sizeof == uint.sizeof)
136         {
137             return (data.length + 1) >> 1;
138         }
139         else static if (BigDigit.sizeof == ulong.sizeof)
140         {
141             return data.length;
142         }
143     }
144 
145     // The value at (cast(ulong[]) data)[n]
peekUlongBigUint146     ulong peekUlong(int n) pure nothrow const @safe @nogc
147     {
148         static if (BigDigit.sizeof == int.sizeof)
149         {
150             if (data.length == n*2 + 1) return data[n*2];
151             return data[n*2] + ((cast(ulong) data[n*2 + 1]) << 32 );
152         }
153         else static if (BigDigit.sizeof == long.sizeof)
154         {
155             return data[n];
156         }
157     }
peekUintBigUint158     uint peekUint(int n) pure nothrow const @safe @nogc
159     {
160         static if (BigDigit.sizeof == int.sizeof)
161         {
162             return data[n];
163         }
164         else
165         {
166             immutable x = data[n >> 1];
167             return (n & 1) ? cast(uint)(x >> 32) : cast(uint) x;
168         }
169     }
170 public:
171     ///
172     void opAssign(Tulong)(Tulong u) pure nothrow @safe if (is (Tulong == ulong))
173     {
174         if (u == 0) data = ZERO;
175         else if (u == 1) data = ONE;
176         else if (u == 2) data = TWO;
177         else if (u == 10) data = TEN;
178         else
179         {
180             static if (BigDigit.sizeof == int.sizeof)
181             {
182                 uint ulo = cast(uint)(u & 0xFFFF_FFFF);
183                 uint uhi = cast(uint)(u >> 32);
184                 if (uhi == 0)
185                 {
186                     data = [ulo];
187                 }
188                 else
189                 {
190                     data = [ulo, uhi];
191                 }
192             }
193             else static if (BigDigit.sizeof == long.sizeof)
194             {
195                 data = [u];
196             }
197         }
198     }
199     void opAssign(Tdummy = void)(BigUint y) pure nothrow @nogc @safe
200     {
201         this.data = y.data;
202     }
203 
204     ///
205     int opCmp(Tdummy = void)(const BigUint y) pure nothrow @nogc const @safe
206     {
207         if (data.length != y.data.length)
208             return (data.length > y.data.length) ?  1 : -1;
209         size_t k = highestDifferentDigit(data, y.data);
210         if (data[k] == y.data[k])
211             return 0;
212         return data[k] > y.data[k] ? 1 : -1;
213     }
214 
215     ///
216     int opCmp(Tulong)(Tulong y) pure nothrow @nogc const @safe if (is (Tulong == ulong))
217     {
218         if (data.length > maxBigDigits!Tulong)
219             return 1;
220 
221         foreach_reverse (i; 0 .. maxBigDigits!Tulong)
222         {
223             BigDigit tmp = cast(BigDigit)(y>>(i*BigDigitBits));
224             if (tmp == 0)
225                 if (data.length >= i+1)
226                 {
227                     // Since ZERO is [0], so we cannot simply return 1 here, as
228                     // data[i] would be 0 for i == 0 in that case.
229                     return (data[i] > 0) ? 1 : 0;
230                 }
231                 else
232                     continue;
233             else
234                 if (i+1 > data.length)
235                     return -1;
236                 else if (tmp != data[i])
237                     return data[i] > tmp ? 1 : -1;
238         }
239         return 0;
240     }
241 
242     bool opEquals(Tdummy = void)(ref const BigUint y) pure nothrow @nogc const @safe
243     {
244            return y.data[] == data[];
245     }
246 
247     bool opEquals(Tdummy = void)(ulong y) pure nothrow @nogc const @safe
248     {
249         if (data.length > 2)
250             return false;
251         uint ylo = cast(uint)(y & 0xFFFF_FFFF);
252         uint yhi = cast(uint)(y >> 32);
253         if (data.length == 2 && data[1]!=yhi)
254             return false;
255         if (data.length == 1 && yhi != 0)
256             return false;
257         return (data[0] == ylo);
258     }
259 
isZeroBigUint260     bool isZero() pure const nothrow @safe @nogc
261     {
262         return data.length == 1 && data[0] == 0;
263     }
264 
numBytesBigUint265     size_t numBytes() pure nothrow const @safe @nogc
266     {
267         return data.length * BigDigit.sizeof;
268     }
269 
270     // the extra bytes are added to the start of the string
toDecimalStringBigUint271     char [] toDecimalString(int frontExtraBytes) const pure nothrow
272     {
273         immutable predictlength = 20+20*(data.length/2); // just over 19
274         char [] buff = new char[frontExtraBytes + predictlength];
275         ptrdiff_t sofar = biguintToDecimal(buff, data.dup);
276         return buff[sofar-frontExtraBytes..$];
277     }
278 
279     /** Convert to a hex string, printing a minimum number of digits 'minPadding',
280      *  allocating an additional 'frontExtraBytes' at the start of the string.
281      *  Padding is done with padChar, which may be '0' or ' '.
282      *  'separator' is a digit separation character. If non-zero, it is inserted
283      *  between every 8 digits.
284      *  Separator characters do not contribute to the minPadding.
285      */
286     char [] toHexString(int frontExtraBytes, char separator = 0,
287             int minPadding=0, char padChar = '0',
288             LetterCase letterCase = LetterCase.upper) const pure nothrow @safe
289     {
290         // Calculate number of extra padding bytes
291         size_t extraPad = (minPadding > data.length * 2 * BigDigit.sizeof)
292             ? minPadding - data.length * 2 * BigDigit.sizeof : 0;
293 
294         // Length not including separator bytes
295         size_t lenBytes = data.length * 2 * BigDigit.sizeof;
296 
297         // Calculate number of separator bytes
298         size_t mainSeparatorBytes = separator ? (lenBytes  / 8) - 1 : 0;
299         immutable totalSeparatorBytes = separator ? ((extraPad + lenBytes + 7) / 8) - 1: 0;
300 
301         char [] buff = new char[lenBytes + extraPad + totalSeparatorBytes + frontExtraBytes];
302         biguintToHex(buff[$ - lenBytes - mainSeparatorBytes .. $], data, separator, letterCase);
303         if (extraPad > 0)
304         {
305             if (separator)
306             {
307                 size_t start = frontExtraBytes; // first index to pad
308                 if (extraPad &7)
309                 {
310                     // Do 1 to 7 extra zeros.
311                     buff[frontExtraBytes .. frontExtraBytes + (extraPad & 7)] = padChar;
312                     buff[frontExtraBytes + (extraPad & 7)] = (padChar == ' ' ? ' ' : separator);
313                     start += (extraPad & 7) + 1;
314                 }
315                 for (int i=0; i< (extraPad >> 3); ++i)
316                 {
317                     buff[start .. start + 8] = padChar;
318                     buff[start + 8] = (padChar == ' ' ? ' ' : separator);
319                     start += 9;
320                 }
321             }
322             else
323             {
324                 buff[frontExtraBytes .. frontExtraBytes + extraPad]=padChar;
325             }
326         }
327         int z = frontExtraBytes;
328         if (lenBytes > minPadding)
329         {
330             // Strip leading zeros.
331             ptrdiff_t maxStrip = lenBytes - minPadding;
332             while (z< buff.length-1 && (buff[z]=='0' || buff[z]==padChar) && maxStrip>0)
333             {
334                 ++z;
335                 --maxStrip;
336             }
337         }
338         if (padChar!='0')
339         {
340             // Convert leading zeros into padChars.
341             for (size_t k= z; k< buff.length-1 && (buff[k]=='0' || buff[k]==padChar); ++k)
342             {
343                 if (buff[k]=='0') buff[k]=padChar;
344             }
345         }
346         return buff[z-frontExtraBytes..$];
347     }
348 
349     /**
350      * Convert to an octal string.
351      */
toOctalStringBigUint352     char[] toOctalString() const
353     {
354         auto predictLength = 1 + data.length*BigDigitBits / 3;
355         char[] buff = new char[predictLength];
356         size_t firstNonZero = biguintToOctal(buff, data);
357         return buff[firstNonZero .. $];
358     }
359 
360     // return false if invalid character found
361     bool fromHexString(Range)(Range s) if (
362         isBidirectionalRange!Range && isSomeChar!(ElementType!Range))
363     {
364         import std.range : walkLength;
365 
366         //Strip leading zeros
367         while (!s.empty && s.front == '0')
368             s.popFront;
369 
370         if (s.empty)
371         {
372             data = ZERO;
373             return true;
374         }
375 
376         immutable len = (s.save.walkLength + 15) / 4;
377         auto tmp = new BigDigit[len + 1];
378         uint part, sofar, partcount;
379 
foreach_reverseBigUint380         foreach_reverse (character; s)
381         {
382             if (character == '_')
383                 continue;
384 
385             uint x;
386             if (character >= '0' && character <= '9')
387             {
388                 x = character - '0';
389             }
390             else if (character >= 'A' && character <= 'F')
391             {
392                 x = character - 'A' + 10;
393             }
394             else if (character >= 'a' && character <= 'f')
395             {
396                 x = character - 'a' + 10;
397             }
398             else
399             {
400                 return false;
401             }
402 
403             part >>= 4;
404             part |= (x << (32 - 4));
405             ++partcount;
406 
407             if (partcount == 8)
408             {
409                 tmp[sofar] = part;
410                 ++sofar;
411                 partcount = 0;
412                 part = 0;
413             }
414         }
415         if (part)
416         {
417             for ( ; partcount != 8; ++partcount) part >>= 4;
418             tmp[sofar] = part;
419             ++sofar;
420         }
421         if (sofar == 0)
422             data = ZERO;
423         else
424             data = trustedAssumeUnique(tmp[0 .. sofar]);
425 
426         return true;
427     }
428 
429     // return true if OK; false if erroneous characters found
430     bool fromDecimalString(Range)(Range s) if (
431         isForwardRange!Range && isSomeChar!(ElementType!Range))
432     {
433         import std.range : walkLength;
434 
435         while (!s.empty && s.front == '0')
436         {
437             s.popFront;
438         }
439 
440         if (s.empty)
441         {
442             data = ZERO;
443             return true;
444         }
445 
446         auto predict_length = (18 * 2 + 2 * s.save.walkLength) / 19;
447         auto tmp = new BigDigit[predict_length];
448 
449         tmp.length = biguintFromDecimal(tmp, s);
450 
451         data = trustedAssumeUnique(tmp);
452         return true;
453     }
454 
455     ////////////////////////
456     //
457     // All of these member functions create a new BigUint.
458 
459     // return x >> y
460     BigUint opShr(Tulong)(Tulong y) pure nothrow const if (is (Tulong == ulong))
461     {
462         assert(y>0);
463         uint bits = cast(uint) y & BIGDIGITSHIFTMASK;
464         if ((y >> LG2BIGDIGITBITS) >= data.length) return BigUint(ZERO);
465         uint words = cast(uint)(y >> LG2BIGDIGITBITS);
466         if (bits == 0)
467         {
468             return BigUint(data[words..$]);
469         }
470         else
471         {
472             uint [] result = new BigDigit[data.length - words];
473             multibyteShr(result, data[words..$], bits);
474 
475             if (result.length > 1 && result[$-1] == 0)
476                 return BigUint(trustedAssumeUnique(result[0 .. $-1]));
477             else
478                 return BigUint(trustedAssumeUnique(result));
479         }
480     }
481 
482     // return x << y
483     BigUint opShl(Tulong)(Tulong y) pure nothrow const if (is (Tulong == ulong))
484     {
485         assert(y>0);
486         if (isZero()) return this;
487         uint bits = cast(uint) y & BIGDIGITSHIFTMASK;
488         assert((y >> LG2BIGDIGITBITS) < cast(ulong)(uint.max));
489         uint words = cast(uint)(y >> LG2BIGDIGITBITS);
490         BigDigit [] result = new BigDigit[data.length + words+1];
491         result[0 .. words] = 0;
492         if (bits == 0)
493         {
494             result[words .. words+data.length] = data[];
495             return BigUint(trustedAssumeUnique(result[0 .. words+data.length]));
496         }
497         else
498         {
499             immutable c = multibyteShl(result[words .. words+data.length], data, bits);
500             if (c == 0) return BigUint(trustedAssumeUnique(result[0 .. words+data.length]));
501             result[$-1] = c;
502             return BigUint(trustedAssumeUnique(result));
503         }
504     }
505 
506     // If wantSub is false, return x + y, leaving sign unchanged
507     // If wantSub is true, return abs(x - y), negating sign if x < y
508     static BigUint addOrSubInt(Tulong)(const BigUint x, Tulong y,
509             bool wantSub, ref bool sign) pure nothrow if (is(Tulong == ulong))
510     {
511         BigUint r;
512         if (wantSub)
513         {   // perform a subtraction
514             if (x.data.length > 2)
515             {
516                 r.data = subInt(x.data, y);
517             }
518             else
519             {   // could change sign!
520                 ulong xx = x.data[0];
521                 if (x.data.length > 1)
522                     xx += (cast(ulong) x.data[1]) << 32;
523                 ulong d;
524                 if (xx <= y)
525                 {
526                     d = y - xx;
527                     sign = !sign;
528                 }
529                 else
530                 {
531                     d = xx - y;
532                 }
533                 if (d == 0)
534                 {
535                     r = 0UL;
536                     sign = false;
537                     return r;
538                 }
539                 if (d > uint.max)
540                 {
541                     r.data = [cast(uint)(d & 0xFFFF_FFFF), cast(uint)(d >> 32)];
542                 }
543                 else
544                 {
545                     r.data = [cast(uint)(d & 0xFFFF_FFFF)];
546                 }
547             }
548         }
549         else
550         {
551             r.data = addInt(x.data, y);
552         }
553         return r;
554     }
555 
556     // If wantSub is false, return x + y, leaving sign unchanged.
557     // If wantSub is true, return abs(x - y), negating sign if x<y
addOrSubBigUint558     static BigUint addOrSub(BigUint x, BigUint y, bool wantSub, bool *sign)
559         pure nothrow
560     {
561         BigUint r;
562         if (wantSub)
563         {   // perform a subtraction
564             bool negative;
565             r.data = sub(x.data, y.data, &negative);
566             *sign ^= negative;
567             if (r.isZero())
568             {
569                 *sign = false;
570             }
571         }
572         else
573         {
574             r.data = add(x.data, y.data);
575         }
576         return r;
577     }
578 
579 
580     //  return x*y.
581     //  y must not be zero.
582     static BigUint mulInt(T = ulong)(BigUint x, T y) pure nothrow
583     {
584         if (y == 0 || x == 0) return BigUint(ZERO);
585         uint hi = cast(uint)(y >>> 32);
586         uint lo = cast(uint)(y & 0xFFFF_FFFF);
587         uint [] result = new BigDigit[x.data.length+1+(hi != 0)];
588         result[x.data.length] = multibyteMul(result[0 .. x.data.length], x.data, lo, 0);
589         if (hi != 0)
590         {
591             result[x.data.length+1] = multibyteMulAdd!('+')(result[1 .. x.data.length+1],
592                 x.data, hi, 0);
593         }
594         return BigUint(removeLeadingZeros(trustedAssumeUnique(result)));
595     }
596 
597     /*  return x * y.
598      */
mulBigUint599     static BigUint mul(BigUint x, BigUint y) pure nothrow
600     {
601         if (y == 0 || x == 0)
602             return BigUint(ZERO);
603         auto len = x.data.length + y.data.length;
604         BigDigit [] result = new BigDigit[len];
605         if (y.data.length > x.data.length)
606         {
607             mulInternal(result, y.data, x.data);
608         }
609         else
610         {
611             if (x.data[]==y.data[]) squareInternal(result, x.data);
612             else mulInternal(result, x.data, y.data);
613         }
614         // the highest element could be zero,
615         // in which case we need to reduce the length
616         return BigUint(removeLeadingZeros(trustedAssumeUnique(result)));
617     }
618 
619     // return x / y
620     static BigUint divInt(T)(BigUint x, T y_) pure nothrow
621     if ( is(Unqual!T == uint) )
622     {
623         uint y = y_;
624         if (y == 1)
625             return x;
626         uint [] result = new BigDigit[x.data.length];
627         if ((y&(-y))==y)
628         {
629             assert(y != 0, "BigUint division by zero");
630             // perfect power of 2
631             uint b = 0;
632             for (;y != 1; y>>=1)
633             {
634                 ++b;
635             }
636             multibyteShr(result, x.data, b);
637         }
638         else
639         {
640             result[] = x.data[];
641             cast(void) multibyteDivAssign(result, y, 0);
642         }
643         return BigUint(removeLeadingZeros(trustedAssumeUnique(result)));
644     }
645 
646     static BigUint divInt(T)(BigUint x, T y) pure nothrow
647     if ( is(Unqual!T == ulong) )
648     {
649         if (y <= uint.max)
650             return divInt!uint(x, cast(uint) y);
651         if (x.data.length < 2)
652             return BigUint(ZERO);
653         uint hi = cast(uint)(y >>> 32);
654         uint lo = cast(uint)(y & 0xFFFF_FFFF);
655         immutable uint[2] z = [lo, hi];
656         BigDigit[] result = new BigDigit[x.data.length - z.length + 1];
657         divModInternal(result, null, x.data, z[]);
658         return BigUint(removeLeadingZeros(trustedAssumeUnique(result)));
659     }
660 
661     // return x % y
662     static uint modInt(T)(BigUint x, T y_) pure if ( is(Unqual!T == uint) )
663     {
664         import core.memory : GC;
665         uint y = y_;
666         assert(y != 0);
667         if ((y&(-y)) == y)
668         {   // perfect power of 2
669             return x.data[0] & (y-1);
670         }
671         else
672         {
673             // horribly inefficient - malloc, copy, & store are unnecessary.
674             uint [] wasteful = new BigDigit[x.data.length];
675             wasteful[] = x.data[];
676             immutable rem = multibyteDivAssign(wasteful, y, 0);
677             () @trusted { GC.free(wasteful.ptr); } ();
678             return rem;
679         }
680     }
681 
682     // return x / y
divBigUint683     static BigUint div(BigUint x, BigUint y) pure nothrow
684     {
685         if (y.data.length > x.data.length)
686             return BigUint(ZERO);
687         if (y.data.length == 1)
688             return divInt(x, y.data[0]);
689         BigDigit [] result = new BigDigit[x.data.length - y.data.length + 1];
690            divModInternal(result, null, x.data, y.data);
691         return BigUint(removeLeadingZeros(trustedAssumeUnique(result)));
692     }
693 
694     // return x % y
modBigUint695     static BigUint mod(BigUint x, BigUint y) pure nothrow
696     {
697         if (y.data.length > x.data.length) return x;
698         if (y.data.length == 1)
699         {
700             return BigUint([modInt(x, y.data[0])]);
701         }
702         BigDigit [] result = new BigDigit[x.data.length - y.data.length + 1];
703         BigDigit [] rem = new BigDigit[y.data.length];
704         divModInternal(result, rem, x.data, y.data);
705         return BigUint(removeLeadingZeros(trustedAssumeUnique(rem)));
706     }
707 
708     // return x op y
709     static BigUint bitwiseOp(string op)(BigUint x, BigUint y, bool xSign, bool ySign, ref bool resultSign)
710     pure nothrow @safe if (op == "|" || op == "^" || op == "&")
711     {
712         auto d1 = includeSign(x.data, y.uintLength, xSign);
713         auto d2 = includeSign(y.data, x.uintLength, ySign);
714 
715         foreach (i; 0 .. d1.length)
716         {
717             mixin("d1[i] " ~ op ~ "= d2[i];");
718         }
719 
720         mixin("resultSign = xSign " ~ op ~ " ySign;");
721 
722         if (resultSign)
723         {
724             twosComplement(d1, d1);
725         }
726 
727         return BigUint(removeLeadingZeros(trustedAssumeUnique(d1)));
728     }
729 
730     /**
731      * Return a BigUint which is x raised to the power of y.
732      * Method: Powers of 2 are removed from x, then left-to-right binary
733      * exponentiation is used.
734      * Memory allocation is minimized: at most one temporary BigUint is used.
735      */
powBigUint736     static BigUint pow(BigUint x, ulong y) pure nothrow
737     {
738         // Deal with the degenerate cases first.
739         if (y == 0) return BigUint(ONE);
740         if (y == 1) return x;
741         if (x == 0 || x == 1) return x;
742 
743         BigUint result;
744 
745         // Simplify, step 1: Remove all powers of 2.
746         uint firstnonzero = firstNonZeroDigit(x.data);
747         // Now we know x = x[firstnonzero..$] * (2^^(firstnonzero*BigDigitBits))
748         // where BigDigitBits = BigDigit.sizeof * 8
749 
750         // See if x[firstnonzero..$] can now fit into a single digit.
751         bool singledigit = ((x.data.length - firstnonzero) == 1);
752         // If true, then x0 is that digit
753         // and the result will be (x0 ^^ y) * (2^^(firstnonzero*y*BigDigitBits))
754         BigDigit x0 = x.data[firstnonzero];
755         assert(x0 != 0);
756         // Length of the non-zero portion
757         size_t nonzerolength = x.data.length - firstnonzero;
758         ulong y0;
759         uint evenbits = 0; // number of even bits in the bottom of x
760         while (!(x0 & 1))
761         {
762             x0 >>= 1;
763             ++evenbits;
764         }
765 
766         if (x.data.length- firstnonzero == 2)
767         {
768             // Check for a single digit straddling a digit boundary
769             const BigDigit x1 = x.data[firstnonzero+1];
770             if ((x1 >> evenbits) == 0)
771             {
772                 x0 |= (x1 << (BigDigit.sizeof * 8 - evenbits));
773                 singledigit = true;
774             }
775         }
776         // Now if (singledigit), x^^y  = (x0 ^^ y) * 2^^(evenbits * y) * 2^^(firstnonzero*y*BigDigitBits))
777 
778         uint evenshiftbits = 0; // Total powers of 2 to shift by, at the end
779 
780         // Simplify, step 2: For singledigits, see if we can trivially reduce y
781 
782         BigDigit finalMultiplier = 1UL;
783 
784         if (singledigit)
785         {
786             // x fits into a single digit. Raise it to the highest power we can
787             // that still fits into a single digit, then reduce the exponent accordingly.
788             // We're quite likely to have a residual multiply at the end.
789             // For example, 10^^100 = (((5^^13)^^7) * 5^^9) * 2^^100.
790             // and 5^^13 still fits into a uint.
791             evenshiftbits  = cast(uint)( (evenbits * y) & BIGDIGITSHIFTMASK);
792             if (x0 == 1)
793             {   // Perfect power of 2
794                 result = 1UL;
795                 return result << (evenbits + firstnonzero * 8 * BigDigit.sizeof) * y;
796             }
797             immutable p = highestPowerBelowUintMax(x0);
798             if (y <= p)
799             {   // Just do it with pow
800                 result = cast(ulong) intpow(x0, y);
801                 if (evenbits + firstnonzero == 0)
802                     return result;
803                 return result << (evenbits + firstnonzero * 8 * BigDigit.sizeof) * y;
804             }
805             y0 = y / p;
806             finalMultiplier = intpow(x0, y - y0*p);
807             x0 = intpow(x0, p);
808             // Result is x0
809             nonzerolength = 1;
810         }
811         // Now if (singledigit), x^^y  = finalMultiplier * (x0 ^^ y0) * 2^^(evenbits * y) * 2^^(firstnonzero*y*BigDigitBits))
812 
813         // Perform a crude check for overflow and allocate result buffer.
814         // The length required is y * lg2(x) bits.
815         // which will always fit into y*x.length digits. But this is
816         // a gross overestimate if x is small (length 1 or 2) and the highest
817         // digit is nearly empty.
818         // A better estimate is:
819         //   y * lg2(x[$-1]/BigDigit.max) + y * (x.length - 1) digits,
820         //  and the first term is always between
821         //  y * (bsr(x.data[$-1]) + 1) / BIGDIGITBITS and
822         //  y * (bsr(x.data[$-1]) + 2) / BIGDIGITBITS
823         // For single digit payloads, we already have
824         //   x^^y  = finalMultiplier * (x0 ^^ y0) * 2^^(evenbits * y) * 2^^(firstnonzero*y*BigDigitBits))
825         // and x0 is almost a full digit, so it's a tight estimate.
826         // Number of digits is therefore 1 + x0.length*y0 + (evenbits*y)/BIGDIGIT + firstnonzero*y
827         // Note that the divisions must be rounded up.
828 
829         // Estimated length in BigDigits
830         immutable estimatelength = singledigit
831             ? 1 + y0 + ((evenbits*y  + BigDigit.sizeof * 8 - 1) / (BigDigit.sizeof *8)) + firstnonzero*y
832             :  x.data.length * y;
833         // Imprecise check for overflow. Makes the extreme cases easier to debug
834         // (less extreme overflow will result in an out of memory error).
835         if (estimatelength > uint.max/(4*BigDigit.sizeof))
836             assert(0, "Overflow in BigInt.pow");
837 
838         // The result buffer includes space for all the trailing zeros
839         BigDigit [] resultBuffer = new BigDigit[cast(size_t) estimatelength];
840 
841         // Do all the powers of 2!
842         size_t result_start = cast(size_t)( firstnonzero * y
843             + (singledigit ? ((evenbits * y) >> LG2BIGDIGITBITS) : 0));
844 
845         resultBuffer[0 .. result_start] = 0;
846         BigDigit [] t1 = resultBuffer[result_start..$];
847         BigDigit [] r1;
848 
849         if (singledigit)
850         {
851             r1 = t1[0 .. 1];
852             r1[0] = x0;
853             y = y0;
854         }
855         else
856         {
857             // It's not worth right shifting by evenbits unless we also shrink the length after each
858             // multiply or squaring operation. That might still be worthwhile for large y.
859             r1 = t1[0 .. x.data.length - firstnonzero];
860             r1[0..$] = x.data[firstnonzero..$];
861         }
862 
863         if (y>1)
864         {   // Set r1 = r1 ^^ y.
865             // The secondary buffer only needs space for the multiplication results
866             BigDigit [] t2 = new BigDigit[resultBuffer.length - result_start];
867             BigDigit [] r2;
868 
869             int shifts = 63; // num bits in a long
870             while (!(y & 0x8000_0000_0000_0000L))
871             {
872                 y <<= 1;
873                 --shifts;
874             }
875             y <<=1;
876 
877             while (y != 0)
878             {
879                 // For each bit of y: Set r1 =  r1 * r1
880                 // If the bit is 1, set r1 = r1 * x
881                 // Eg, if y is 0b101, result = ((x^^2)^^2)*x == x^^5.
882                 // Optimization opportunity: if more than 2 bits in y are set,
883                 // it's usually possible to reduce the number of multiplies
884                 // by caching odd powers of x. eg for y = 54,
885                 // (0b110110), set u = x^^3, and result is ((u^^8)*u)^^2
886                 r2 = t2[0 .. r1.length*2];
887                 squareInternal(r2, r1);
888                 if (y & 0x8000_0000_0000_0000L)
889                 {
890                     r1 = t1[0 .. r2.length + nonzerolength];
891                     if (singledigit)
892                     {
893                         r1[$-1] = multibyteMul(r1[0 .. $-1], r2, x0, 0);
894                     }
895                     else
896                     {
897                         mulInternal(r1, r2, x.data[firstnonzero..$]);
898                     }
899                 }
900                 else
901                 {
902                     r1 = t1[0 .. r2.length];
903                     r1[] = r2[];
904                 }
905                 y <<=1;
906                 shifts--;
907             }
908             while (shifts>0)
909             {
910                 r2 = t2[0 .. r1.length * 2];
911                 squareInternal(r2, r1);
912                 r1 = t1[0 .. r2.length];
913                 r1[] = r2[];
914                 --shifts;
915             }
916         }
917 
918         if (finalMultiplier != 1)
919         {
920             const BigDigit carry = multibyteMul(r1, r1, finalMultiplier, 0);
921             if (carry)
922             {
923                 r1 = t1[0 .. r1.length + 1];
924                 r1[$-1] = carry;
925             }
926         }
927         if (evenshiftbits)
928         {
929             const BigDigit carry = multibyteShl(r1, r1, evenshiftbits);
930             if (carry != 0)
931             {
932                 r1 = t1[0 .. r1.length + 1];
933                 r1[$ - 1] = carry;
934             }
935         }
936         while (r1[$ - 1]==0)
937         {
938             r1=r1[0 .. $ - 1];
939         }
940         return BigUint(trustedAssumeUnique(resultBuffer[0 .. result_start + r1.length]));
941     }
942 
943     // Implement toHash so that BigUint works properly as an AA key.
toHashBigUint944     size_t toHash() const @trusted nothrow
945     {
946         return typeid(data).getHash(&data);
947     }
948 
949 } // end BigUint
950 
951 @safe pure nothrow unittest
952 {
953     // ulong comparison test
954     BigUint a = [1];
955     assert(a == 1);
956     assert(a < 0x8000_0000_0000_0000UL); // bug 9548
957 
958     // bug 12234
959     BigUint z = [0];
960     assert(z == 0UL);
961     assert(!(z > 0UL));
962     assert(!(z < 0UL));
963 }
964 
965 // Remove leading zeros from x, to restore the BigUint invariant
inout(BigDigit)966 inout(BigDigit) [] removeLeadingZeros(inout(BigDigit) [] x) pure nothrow @safe
967 {
968     size_t k = x.length;
969     while (k>1 && x[k - 1]==0) --k;
970     return x[0 .. k];
971 }
972 
973 pure @system unittest
974 {
975    BigUint r = BigUint([5]);
976    BigUint t = BigUint([7]);
977    BigUint s = BigUint.mod(r, t);
978    assert(s == 5);
979 }
980 
981 
982 @safe pure unittest
983 {
984     BigUint r;
985     r = 5UL;
986     assert(r.peekUlong(0) == 5UL);
987     assert(r.peekUint(0) == 5U);
988     r = 0x1234_5678_9ABC_DEF0UL;
989     assert(r.peekUlong(0) == 0x1234_5678_9ABC_DEF0UL);
990     assert(r.peekUint(0) == 0x9ABC_DEF0U);
991 }
992 
993 
994 // Pow tests
995 pure @system unittest
996 {
997     BigUint r, s;
998     r.fromHexString("80000000_00000001");
999     s = BigUint.pow(r, 5);
1000     r.fromHexString("08000000_00000000_50000000_00000001_40000000_00000002_80000000"
1001       ~ "_00000002_80000000_00000001");
1002     assert(s == r);
1003     s = 10UL;
1004     s = BigUint.pow(s, 39);
1005     r.fromDecimalString("1000000000000000000000000000000000000000");
1006     assert(s == r);
1007     r.fromHexString("1_E1178E81_00000000");
1008     s = BigUint.pow(r, 15); // Regression test: this used to overflow array bounds
1009 
1010     r.fromDecimalString("000_000_00");
1011     assert(r == 0);
1012     r.fromDecimalString("0007");
1013     assert(r == 7);
1014     r.fromDecimalString("0");
1015     assert(r == 0);
1016 }
1017 
1018 // Radix conversion tests
1019 @safe pure unittest
1020 {
1021     BigUint r;
1022     r.fromHexString("1_E1178E81_00000000");
1023     assert(r.toHexString(0, '_', 0) == "1_E1178E81_00000000");
1024     assert(r.toHexString(0, '_', 20) == "0001_E1178E81_00000000");
1025     assert(r.toHexString(0, '_', 16+8) == "00000001_E1178E81_00000000");
1026     assert(r.toHexString(0, '_', 16+9) == "0_00000001_E1178E81_00000000");
1027     assert(r.toHexString(0, '_', 16+8+8) ==   "00000000_00000001_E1178E81_00000000");
1028     assert(r.toHexString(0, '_', 16+8+8+1) ==      "0_00000000_00000001_E1178E81_00000000");
1029     assert(r.toHexString(0, '_', 16+8+8+1, ' ') == "                  1_E1178E81_00000000");
1030     assert(r.toHexString(0, 0, 16+8+8+1) == "00000000000000001E1178E8100000000");
1031     r = 0UL;
1032     assert(r.toHexString(0, '_', 0) == "0");
1033     assert(r.toHexString(0, '_', 7) == "0000000");
1034     assert(r.toHexString(0, '_', 7, ' ') == "      0");
1035     assert(r.toHexString(0, '#', 9) == "0#00000000");
1036     assert(r.toHexString(0, 0, 9) == "000000000");
1037 }
1038 
1039 //
1040 @safe pure unittest
1041 {
1042     BigUint r;
1043     r.fromHexString("1_E1178E81_00000000");
1044     assert(r.toHexString(0, '_', 0, '0', LetterCase.lower) == "1_e1178e81_00000000");
1045     assert(r.toHexString(0, '_', 20, '0', LetterCase.lower) == "0001_e1178e81_00000000");
1046     assert(r.toHexString(0, '_', 16+8, '0', LetterCase.lower) == "00000001_e1178e81_00000000");
1047     assert(r.toHexString(0, '_', 16+9, '0', LetterCase.lower) == "0_00000001_e1178e81_00000000");
1048     assert(r.toHexString(0, '_', 16+8+8, '0', LetterCase.lower) ==   "00000000_00000001_e1178e81_00000000");
1049     assert(r.toHexString(0, '_', 16+8+8+1, '0', LetterCase.lower) == "0_00000000_00000001_e1178e81_00000000");
1050     assert(r.toHexString(0, '_', 16+8+8+1, ' ', LetterCase.lower) == "                  1_e1178e81_00000000");
1051     assert(r.toHexString(0, 0, 16+8+8+1, '0', LetterCase.lower) == "00000000000000001e1178e8100000000");
1052     r = 0UL;
1053     assert(r.toHexString(0, '_', 0, '0', LetterCase.lower) == "0");
1054     assert(r.toHexString(0, '_', 7, '0', LetterCase.lower) == "0000000");
1055     assert(r.toHexString(0, '_', 7, ' ', LetterCase.lower) == "      0");
1056     assert(r.toHexString(0, '#', 9, '0', LetterCase.lower) == "0#00000000");
1057     assert(r.toHexString(0, 'Z', 9, '0', LetterCase.lower) == "0Z00000000");
1058     assert(r.toHexString(0, 0, 9, '0', LetterCase.lower) == "000000000");
1059 }
1060 
1061 
1062 private:
twosComplement(const (BigDigit)[]x,BigDigit[]result)1063 void twosComplement(const(BigDigit) [] x, BigDigit[] result)
1064 pure nothrow @safe
1065 {
1066     foreach (i; 0 .. x.length)
1067     {
1068         result[i] = ~x[i];
1069     }
1070     result[x.length..$] = BigDigit.max;
1071 
1072     foreach (i; 0 .. result.length)
1073     {
1074         if (result[i] == BigDigit.max)
1075         {
1076             result[i] = 0;
1077         }
1078         else
1079         {
1080             result[i] += 1;
1081             break;
1082         }
1083     }
1084 }
1085 
1086 // Encode BigInt as BigDigit array (sign and 2's complement)
includeSign(const (BigDigit)[]x,size_t minSize,bool sign)1087 BigDigit[] includeSign(const(BigDigit) [] x, size_t minSize, bool sign)
1088 pure nothrow @safe
1089 {
1090     size_t length = (x.length > minSize) ? x.length : minSize;
1091     BigDigit [] result = new BigDigit[length];
1092     if (sign)
1093     {
1094         twosComplement(x, result);
1095     }
1096     else
1097     {
1098         result[0 .. x.length] = x;
1099     }
1100     return result;
1101 }
1102 
1103 // works for any type
intpow(T)1104 T intpow(T)(T x, ulong n) pure nothrow @safe
1105 {
1106     T p;
1107 
1108     switch (n)
1109     {
1110     case 0:
1111         p = 1;
1112         break;
1113 
1114     case 1:
1115         p = x;
1116         break;
1117 
1118     case 2:
1119         p = x * x;
1120         break;
1121 
1122     default:
1123         p = 1;
1124         while (1)
1125         {
1126             if (n & 1)
1127                 p *= x;
1128             n >>= 1;
1129             if (!n)
1130                 break;
1131             x *= x;
1132         }
1133         break;
1134     }
1135     return p;
1136 }
1137 
1138 
1139 //  returns the maximum power of x that will fit in a uint.
highestPowerBelowUintMax(uint x)1140 int highestPowerBelowUintMax(uint x) pure nothrow @safe
1141 {
1142      assert(x>1);
1143      static immutable ubyte [22] maxpwr = [ 31, 20, 15, 13, 12, 11, 10, 10, 9, 9,
1144                                           8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7];
1145      if (x<24) return maxpwr[x-2];
1146      if (x<41) return 6;
1147      if (x<85) return 5;
1148      if (x<256) return 4;
1149      if (x<1626) return 3;
1150      if (x<65_536) return 2;
1151      return 1;
1152 }
1153 
1154 //  returns the maximum power of x that will fit in a ulong.
highestPowerBelowUlongMax(uint x)1155 int highestPowerBelowUlongMax(uint x) pure nothrow @safe
1156 {
1157      assert(x>1);
1158      static immutable ubyte [39] maxpwr = [ 63, 40, 31, 27, 24, 22, 21, 20, 19, 18,
1159                                          17, 17, 16, 16, 15, 15, 15, 15, 14, 14,
1160                                          14, 14, 13, 13, 13, 13, 13, 13, 13, 12,
1161                                          12, 12, 12, 12, 12, 12, 12, 12, 12];
1162      if (x<41) return maxpwr[x-2];
1163      if (x<57) return 11;
1164      if (x<85) return 10;
1165      if (x<139) return 9;
1166      if (x<256) return 8;
1167      if (x<566) return 7;
1168      if (x<1626) return 6;
1169      if (x<7132) return 5;
1170      if (x<65_536) return 4;
1171      if (x<2_642_246) return 3;
1172      return 2;
1173 }
1174 
version(unittest)1175 version (unittest)
1176 {
1177 
1178 int slowHighestPowerBelowUintMax(uint x) pure nothrow @safe
1179 {
1180      int pwr = 1;
1181      for (ulong q = x;x*q < cast(ulong) uint.max; )
1182      {
1183          q*=x; ++pwr;
1184      }
1185      return pwr;
1186 }
1187 
1188 @safe pure unittest
1189 {
1190     assert(highestPowerBelowUintMax(10)==9);
1191     for (int k=82; k<88; ++k)
1192     {
1193         assert(highestPowerBelowUintMax(k)== slowHighestPowerBelowUintMax(k));
1194     }
1195 }
1196 
1197 }
1198 
1199 
1200 /*  General unsigned subtraction routine for bigints.
1201  *  Sets result = x - y. If the result is negative, negative will be true.
1202  */
sub(const BigDigit[]x,const BigDigit[]y,bool * negative)1203 BigDigit [] sub(const BigDigit [] x, const BigDigit [] y, bool *negative)
1204 pure nothrow
1205 {
1206     if (x.length == y.length)
1207     {
1208         // There's a possibility of cancellation, if x and y are almost equal.
1209         ptrdiff_t last = highestDifferentDigit(x, y);
1210         BigDigit [] result = new BigDigit[last+1];
1211         if (x[last] < y[last])
1212         {   // we know result is negative
1213             multibyteSub(result[0 .. last+1], y[0 .. last+1], x[0 .. last+1], 0);
1214             *negative = true;
1215         }
1216         else
1217         {   // positive or zero result
1218             multibyteSub(result[0 .. last+1], x[0 .. last+1], y[0 .. last+1], 0);
1219             *negative = false;
1220         }
1221         while (result.length > 1 && result[$-1] == 0)
1222         {
1223             result = result[0..$-1];
1224         }
1225 //        if (result.length >1 && result[$-1]==0) return result[0..$-1];
1226         return result;
1227     }
1228     // Lengths are different
1229     const(BigDigit) [] large, small;
1230     if (x.length < y.length)
1231     {
1232         *negative = true;
1233         large = y; small = x;
1234     }
1235     else
1236     {
1237         *negative = false;
1238         large = x; small = y;
1239     }
1240     // result.length will be equal to larger length, or could decrease by 1.
1241 
1242 
1243     BigDigit [] result = new BigDigit[large.length];
1244     BigDigit carry = multibyteSub(result[0 .. small.length], large[0 .. small.length], small, 0);
1245     result[small.length..$] = large[small.length..$];
1246     if (carry)
1247     {
1248         multibyteIncrementAssign!('-')(result[small.length..$], carry);
1249     }
1250     while (result.length > 1 && result[$-1] == 0)
1251     {
1252         result = result[0..$-1];
1253     }
1254     return result;
1255 }
1256 
1257 
1258 // return a + b
add(const BigDigit[]a,const BigDigit[]b)1259 BigDigit [] add(const BigDigit [] a, const BigDigit [] b) pure nothrow
1260 {
1261     const(BigDigit) [] x, y;
1262     if (a.length < b.length)
1263     {
1264         x = b; y = a;
1265     }
1266     else
1267     {
1268         x = a; y = b;
1269     }
1270     // now we know x.length > y.length
1271     // create result. add 1 in case it overflows
1272     BigDigit [] result = new BigDigit[x.length + 1];
1273 
1274     BigDigit carry = multibyteAdd(result[0 .. y.length], x[0 .. y.length], y, 0);
1275     if (x.length != y.length)
1276     {
1277         result[y.length..$-1]= x[y.length..$];
1278         carry  = multibyteIncrementAssign!('+')(result[y.length..$-1], carry);
1279     }
1280     if (carry)
1281     {
1282         result[$-1] = carry;
1283         return result;
1284     }
1285     else
1286         return result[0..$-1];
1287 }
1288 
1289 /**  return x + y
1290  */
addInt(const BigDigit[]x,ulong y)1291 BigDigit [] addInt(const BigDigit[] x, ulong y) pure nothrow
1292 {
1293     uint hi = cast(uint)(y >>> 32);
1294     uint lo = cast(uint)(y& 0xFFFF_FFFF);
1295     auto len = x.length;
1296     if (x.length < 2 && hi != 0) ++len;
1297     BigDigit [] result = new BigDigit[len+1];
1298     result[0 .. x.length] = x[];
1299     if (x.length < 2 && hi != 0)
1300     {
1301         result[1]=hi;
1302         hi=0;
1303     }
1304     uint carry = multibyteIncrementAssign!('+')(result[0..$-1], lo);
1305     if (hi != 0) carry += multibyteIncrementAssign!('+')(result[1..$-1], hi);
1306     if (carry)
1307     {
1308         result[$-1] = carry;
1309         return result;
1310     }
1311     else
1312         return result[0..$-1];
1313 }
1314 
1315 /** Return x - y.
1316  *  x must be greater than y.
1317  */
subInt(const BigDigit[]x,ulong y)1318 BigDigit [] subInt(const BigDigit[] x, ulong y) pure nothrow
1319 {
1320     uint hi = cast(uint)(y >>> 32);
1321     uint lo = cast(uint)(y & 0xFFFF_FFFF);
1322     BigDigit [] result = new BigDigit[x.length];
1323     result[] = x[];
1324     multibyteIncrementAssign!('-')(result[], lo);
1325     if (hi)
1326         multibyteIncrementAssign!('-')(result[1..$], hi);
1327     if (result[$-1] == 0)
1328         return result[0..$-1];
1329     else
1330         return result;
1331 }
1332 
1333 /**  General unsigned multiply routine for bigints.
1334  *  Sets result = x * y.
1335  *
1336  *  The length of y must not be larger than the length of x.
1337  *  Different algorithms are used, depending on the lengths of x and y.
1338  *  TODO: "Modern Computer Arithmetic" suggests the OddEvenKaratsuba algorithm for the
1339  *  unbalanced case. (But I doubt it would be faster in practice).
1340  *
1341  */
mulInternal(BigDigit[]result,const (BigDigit)[]x,const (BigDigit)[]y)1342 void mulInternal(BigDigit[] result, const(BigDigit)[] x, const(BigDigit)[] y)
1343     pure nothrow
1344 {
1345     import core.memory : GC;
1346     assert( result.length == x.length + y.length );
1347     assert( y.length > 0 );
1348     assert( x.length >= y.length);
1349     if (y.length <= KARATSUBALIMIT)
1350     {
1351         // Small multiplier, we'll just use the asm classic multiply.
1352         if (y.length == 1)
1353         {   // Trivial case, no cache effects to worry about
1354             result[x.length] = multibyteMul(result[0 .. x.length], x, y[0], 0);
1355             return;
1356         }
1357 
1358         if (x.length + y.length < CACHELIMIT)
1359             return mulSimple(result, x, y);
1360 
1361         // If x is so big that it won't fit into the cache, we divide it into chunks
1362         // Every chunk must be greater than y.length.
1363         // We make the first chunk shorter, if necessary, to ensure this.
1364 
1365         auto chunksize = CACHELIMIT / y.length;
1366         immutable residual  =  x.length % chunksize;
1367         if (residual < y.length)
1368         {
1369             chunksize -= y.length;
1370         }
1371 
1372         // Use schoolbook multiply.
1373         mulSimple(result[0 .. chunksize + y.length], x[0 .. chunksize], y);
1374         auto done = chunksize;
1375 
1376         while (done < x.length)
1377         {
1378             // result[done .. done+ylength] already has a value.
1379             chunksize = (done + (CACHELIMIT / y.length) < x.length) ? (CACHELIMIT / y.length) :  x.length - done;
1380             BigDigit [KARATSUBALIMIT] partial;
1381             partial[0 .. y.length] = result[done .. done+y.length];
1382             mulSimple(result[done .. done+chunksize+y.length], x[done .. done+chunksize], y);
1383             addAssignSimple(result[done .. done+chunksize + y.length], partial[0 .. y.length]);
1384             done += chunksize;
1385         }
1386         return;
1387     }
1388 
1389     immutable half = (x.length >> 1) + (x.length & 1);
1390     if (2*y.length*y.length <= x.length*x.length)
1391     {
1392         // UNBALANCED MULTIPLY
1393         // Use school multiply to cut into quasi-squares of Karatsuba-size
1394         // or larger. The ratio of the two sides of the 'square' must be
1395         // between 1.414:1 and 1:1. Use Karatsuba on each chunk.
1396         //
1397         // For maximum performance, we want the ratio to be as close to
1398         // 1:1 as possible. To achieve this, we can either pad x or y.
1399         // The best choice depends on the modulus x%y.
1400         auto numchunks = x.length / y.length;
1401         auto chunksize = y.length;
1402         auto extra =  x.length % y.length;
1403         auto maxchunk = chunksize + extra;
1404         bool paddingY; // true = we're padding Y, false = we're padding X.
1405         if (extra * extra * 2 < y.length*y.length)
1406         {
1407             // The leftover bit is small enough that it should be incorporated
1408             // in the existing chunks.
1409             // Make all the chunks a tiny bit bigger
1410             // (We're padding y with zeros)
1411             chunksize += extra / numchunks;
1412             extra = x.length - chunksize*numchunks;
1413             // there will probably be a few left over.
1414             // Every chunk will either have size chunksize, or chunksize+1.
1415             maxchunk = chunksize + 1;
1416             paddingY = true;
1417             assert(chunksize + extra + chunksize *(numchunks-1) == x.length );
1418         }
1419         else
1420         {
1421             // the extra bit is large enough that it's worth making a new chunk.
1422             // (This means we're padding x with zeros, when doing the first one).
1423             maxchunk = chunksize;
1424             ++numchunks;
1425             paddingY = false;
1426             assert(extra + chunksize *(numchunks-1) == x.length );
1427         }
1428         // We make the buffer a bit bigger so we have space for the partial sums.
1429         BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(maxchunk) + y.length];
1430         BigDigit [] partial = scratchbuff[$ - y.length .. $];
1431         size_t done; // how much of X have we done so far?
1432         if (paddingY)
1433         {
1434             // If the first chunk is bigger, do it first. We're padding y.
1435             mulKaratsuba(result[0 .. y.length + chunksize + (extra > 0 ? 1 : 0 )],
1436                 x[0 .. chunksize + (extra>0?1:0)], y, scratchbuff);
1437             done = chunksize + (extra > 0 ? 1 : 0);
1438             if (extra) --extra;
1439         }
1440         else
1441         {   // We're padding X. Begin with the extra bit.
1442             mulKaratsuba(result[0 .. y.length + extra], y, x[0 .. extra], scratchbuff);
1443             done = extra;
1444             extra = 0;
1445         }
1446         immutable basechunksize = chunksize;
1447         while (done < x.length)
1448         {
1449             chunksize = basechunksize + (extra > 0 ? 1 : 0);
1450             if (extra) --extra;
1451             partial[] = result[done .. done+y.length];
1452             mulKaratsuba(result[done .. done + y.length + chunksize],
1453                        x[done .. done+chunksize], y, scratchbuff);
1454             addAssignSimple(result[done .. done + y.length + chunksize], partial);
1455             done += chunksize;
1456         }
1457         () @trusted { GC.free(scratchbuff.ptr); } ();
1458     }
1459     else
1460     {
1461         // Balanced. Use Karatsuba directly.
1462         BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(x.length)];
1463         mulKaratsuba(result, x, y, scratchbuff);
1464         () @trusted { GC.free(scratchbuff.ptr); } ();
1465     }
1466 }
1467 
1468 /**  General unsigned squaring routine for BigInts.
1469  *   Sets result = x*x.
1470  *   NOTE: If the highest half-digit of x is zero, the highest digit of result will
1471  *   also be zero.
1472  */
squareInternal(BigDigit[]result,const BigDigit[]x)1473 void squareInternal(BigDigit[] result, const BigDigit[] x) pure nothrow
1474 {
1475   import core.memory : GC;
1476   // Squaring is potentially half a multiply, plus add the squares of
1477   // the diagonal elements.
1478   assert(result.length == 2*x.length);
1479   if (x.length <= KARATSUBASQUARELIMIT)
1480   {
1481       if (x.length == 1)
1482       {
1483          result[1] = multibyteMul(result[0 .. 1], x, x[0], 0);
1484          return;
1485       }
1486       return squareSimple(result, x);
1487   }
1488   // The nice thing about squaring is that it always stays balanced
1489   BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(x.length)];
1490   squareKaratsuba(result, x, scratchbuff);
1491   () @trusted { GC.free(scratchbuff.ptr); } ();
1492 }
1493 
1494 
1495 import core.bitop : bsr;
1496 
1497 /// if remainder is null, only calculate quotient.
divModInternal(BigDigit[]quotient,BigDigit[]remainder,const BigDigit[]u,const BigDigit[]v)1498 void divModInternal(BigDigit [] quotient, BigDigit[] remainder, const BigDigit [] u,
1499         const BigDigit [] v) pure nothrow
1500 {
1501     import core.memory : GC;
1502     assert(quotient.length == u.length - v.length + 1);
1503     assert(remainder == null || remainder.length == v.length);
1504     assert(v.length > 1);
1505     assert(u.length >= v.length);
1506 
1507     // Normalize by shifting v left just enough so that
1508     // its high-order bit is on, and shift u left the
1509     // same amount. The highest bit of u will never be set.
1510 
1511     BigDigit [] vn = new BigDigit[v.length];
1512     BigDigit [] un = new BigDigit[u.length + 1];
1513     // How much to left shift v, so that its MSB is set.
1514     uint s = BIGDIGITSHIFTMASK - bsr(v[$-1]);
1515     if (s != 0)
1516     {
1517         multibyteShl(vn, v, s);
1518         un[$-1] = multibyteShl(un[0..$-1], u, s);
1519     }
1520     else
1521     {
1522         vn[] = v[];
1523         un[0..$-1] = u[];
1524         un[$-1] = 0;
1525     }
1526     if (quotient.length<FASTDIVLIMIT)
1527     {
1528         schoolbookDivMod(quotient, un, vn);
1529     }
1530     else
1531     {
1532         blockDivMod(quotient, un, vn);
1533     }
1534 
1535     // Unnormalize remainder, if required.
1536     if (remainder != null)
1537     {
1538         if (s == 0) remainder[] = un[0 .. vn.length];
1539         else multibyteShr(remainder, un[0 .. vn.length+1], s);
1540     }
1541     () @trusted { GC.free(un.ptr); GC.free(vn.ptr); } ();
1542 }
1543 
1544 pure @system unittest
1545 {
1546     immutable(uint) [] u = [0, 0xFFFF_FFFE, 0x8000_0000];
1547     immutable(uint) [] v = [0xFFFF_FFFF, 0x8000_0000];
1548     uint [] q = new uint[u.length - v.length + 1];
1549     uint [] r = new uint[2];
1550     divModInternal(q, r, u, v);
1551     assert(q[]==[0xFFFF_FFFFu, 0]);
1552     assert(r[]==[0xFFFF_FFFFu, 0x7FFF_FFFF]);
1553     u = [0, 0xFFFF_FFFE, 0x8000_0001];
1554     v = [0xFFFF_FFFF, 0x8000_0000];
1555     divModInternal(q, r, u, v);
1556 }
1557 
1558 
1559 private:
1560 // Converts a big uint to a hexadecimal string.
1561 //
1562 // Optionally, a separator character (eg, an underscore) may be added between
1563 // every 8 digits.
1564 // buff.length must be data.length*8 if separator is zero,
1565 // or data.length*9 if separator is non-zero. It will be completely filled.
1566 char [] biguintToHex(char [] buff, const BigDigit [] data, char separator=0,
1567         LetterCase letterCase = LetterCase.upper) pure nothrow @safe
1568 {
1569     int x=0;
1570     for (ptrdiff_t i=data.length - 1; i >= 0; --i)
1571     {
1572         toHexZeroPadded(buff[x .. x+8], data[i], letterCase);
1573         x+=8;
1574         if (separator)
1575         {
1576             if (i>0) buff[x] = separator;
1577             ++x;
1578         }
1579     }
1580     return buff;
1581 }
1582 
1583 /**
1584  * Convert a big uint into an octal string.
1585  *
1586  * Params:
1587  *  buff = The destination buffer for the octal string. Must be large enough to
1588  *      store the result, including leading zeroes, which is
1589  *      ceil(data.length * BigDigitBits / 3) characters.
1590  *      The buffer is filled from back to front, starting from `buff[$-1]`.
1591  *  data = The biguint to be converted.
1592  *
1593  * Returns: The index of the leading non-zero digit in `buff`. Will be
1594  * `buff.length - 1` if the entire big uint is zero.
1595  */
biguintToOctal(char[]buff,const (BigDigit)[]data)1596 size_t biguintToOctal(char[] buff, const(BigDigit)[] data)
1597     pure nothrow @safe @nogc
1598 {
1599     ubyte carry = 0;
1600     int shift = 0;
1601     size_t penPos = buff.length - 1;
1602     size_t lastNonZero = buff.length - 1;
1603 
1604     pragma(inline) void output(uint digit) @nogc nothrow
1605     {
1606         if (digit != 0)
1607             lastNonZero = penPos;
1608         buff[penPos--] = cast(char)('0' + digit);
1609     }
1610 
1611     foreach (bigdigit; data)
1612     {
1613         if (shift < 0)
1614         {
1615             // Some bits were carried over from previous word.
1616             assert(shift > -3);
1617             output(((bigdigit << -shift) | carry) & 0b111);
1618             shift += 3;
1619             assert(shift > 0);
1620         }
1621 
1622         while (shift <= BigDigitBits - 3)
1623         {
1624             output((bigdigit >>> shift) & 0b111);
1625             shift += 3;
1626         }
1627 
1628         if (shift < BigDigitBits)
1629         {
1630             // Some bits need to be carried forward.
1631             carry = (bigdigit >>> shift) & 0b11;
1632         }
1633         shift -= BigDigitBits;
1634         assert(shift >= -2 && shift <= 0);
1635     }
1636 
1637     if (shift < 0)
1638     {
1639         // Last word had bits that haven't been output yet.
1640         assert(shift > -3);
1641         output(carry);
1642     }
1643 
1644     return lastNonZero;
1645 }
1646 
1647 /** Convert a big uint into a decimal string.
1648  *
1649  * Params:
1650  *  data    The biguint to be converted. Will be destroyed.
1651  *  buff    The destination buffer for the decimal string. Must be
1652  *          large enough to store the result, including leading zeros.
1653  *          Will be filled backwards, starting from buff[$-1].
1654  *
1655  * buff.length must be >= (data.length*32)/log2(10) = 9.63296 * data.length.
1656  * Returns:
1657  *    the lowest index of buff which was used.
1658  */
biguintToDecimal(char[]buff,BigDigit[]data)1659 size_t biguintToDecimal(char [] buff, BigDigit [] data) pure nothrow
1660 {
1661     ptrdiff_t sofar = buff.length;
1662     // Might be better to divide by (10^38/2^32) since that gives 38 digits for
1663     // the price of 3 divisions and a shr; this version only gives 27 digits
1664     // for 3 divisions.
1665     while (data.length>1)
1666     {
1667         uint rem = multibyteDivAssign(data, 10_0000_0000, 0);
1668         itoaZeroPadded(buff[sofar-9 .. sofar], rem);
1669         sofar -= 9;
1670         if (data[$-1] == 0 && data.length > 1)
1671         {
1672             data.length = data.length - 1;
1673         }
1674     }
1675     itoaZeroPadded(buff[sofar-10 .. sofar], data[0]);
1676     sofar -= 10;
1677     // and strip off the leading zeros
1678     while (sofar != buff.length-1 && buff[sofar] == '0')
1679         sofar++;
1680     return sofar;
1681 }
1682 
1683 /** Convert a decimal string into a big uint.
1684  *
1685  * Params:
1686  *  data    The biguint to be receive the result. Must be large enough to
1687  *          store the result.
1688  *  s       The decimal string. May contain _ or 0 .. 9
1689  *
1690  * The required length for the destination buffer is slightly less than
1691  *  1 + s.length/log2(10) = 1 + s.length/3.3219.
1692  *
1693  * Returns:
1694  *    the highest index of data which was used.
1695  */
1696 int biguintFromDecimal(Range)(BigDigit[] data, Range s)
1697 if (
1698     isInputRange!Range &&
1699     isSomeChar!(ElementType!Range) &&
1700     !isInfinite!Range)
1701 in
1702 {
1703     static if (hasLength!Range)
1704         assert((data.length >= 2) || (data.length == 1 && s.length == 1));
1705 }
1706 body
1707 {
1708     import std.conv : ConvException;
1709 
1710     // Convert to base 1e19 = 10_000_000_000_000_000_000.
1711     // (this is the largest power of 10 that will fit into a long).
1712     // The length will be less than 1 + s.length/log2(10) = 1 + s.length/3.3219.
1713     // 485 bits will only just fit into 146 decimal digits.
1714     // As we convert the string, we record the number of digits we've seen in base 19:
1715     // hi is the number of digits/19, lo is the extra digits (0 to 18).
1716     // TODO: This is inefficient for very large strings (it is O(n^^2)).
1717     // We should take advantage of fast multiplication once the numbers exceed
1718     // Karatsuba size.
1719     uint lo = 0; // number of powers of digits, 0 .. 18
1720     uint x = 0;
1721     ulong y = 0;
1722     uint hi = 0; // number of base 1e19 digits
1723     data[0] = 0; // initially number is 0.
1724     if (data.length > 1)
1725         data[1] = 0;
1726 
foreach(character;s)1727     foreach (character; s)
1728     {
1729         if (character == '_')
1730             continue;
1731 
1732         if (character < '0' || character > '9')
1733             throw new ConvException("invalid digit");
1734         x *= 10;
1735         x += character - '0';
1736         ++lo;
1737         if (lo == 9)
1738         {
1739             y = x;
1740             x = 0;
1741         }
1742         if (lo == 18)
1743         {
1744             y *= 10_0000_0000;
1745             y += x;
1746             x = 0;
1747         }
1748         if (lo == 19)
1749         {
1750             y *= 10;
1751             y += x;
1752             x = 0;
1753             // Multiply existing number by 10^19, then add y1.
1754             if (hi>0)
1755             {
1756                 data[hi] = multibyteMul(data[0 .. hi], data[0 .. hi], 1_220_703_125*2u, 0); // 5^13*2 = 0x9184_E72A
1757                 ++hi;
1758                 data[hi] = multibyteMul(data[0 .. hi], data[0 .. hi], 15_625*262_144u, 0); // 5^6*2^18 = 0xF424_0000
1759                 ++hi;
1760             }
1761             else
1762                 hi = 2;
1763             uint c = multibyteIncrementAssign!('+')(data[0 .. hi], cast(uint)(y&0xFFFF_FFFF));
1764             c += multibyteIncrementAssign!('+')(data[1 .. hi], cast(uint)(y >> 32));
1765             if (c != 0)
1766             {
1767                 data[hi]=c;
1768                 ++hi;
1769             }
1770             y = 0;
1771             lo = 0;
1772         }
1773     }
1774     // Now set y = all remaining digits.
1775     if (lo >= 18)
1776     {
1777     }
1778     else if (lo >= 9)
1779     {
1780         for (int k=9; k<lo; ++k) y*=10;
1781         y+=x;
1782     }
1783     else
1784     {
1785         for (int k=0; k<lo; ++k) y*=10;
1786         y+=x;
1787     }
1788     if (lo != 0)
1789     {
1790         if (hi == 0)
1791         {
1792             data[0] = cast(uint) y;
1793             if (data.length == 1)
1794             {
1795                 hi = 1;
1796             }
1797             else
1798             {
1799                 data[1] = cast(uint)(y >>> 32);
1800                 hi=2;
1801             }
1802         }
1803         else
1804         {
1805             while (lo>0)
1806             {
1807                 immutable c = multibyteMul(data[0 .. hi], data[0 .. hi], 10, 0);
1808                 if (c != 0)
1809                 {
1810                     data[hi]=c;
1811                     ++hi;
1812                 }
1813                 --lo;
1814             }
1815             uint c = multibyteIncrementAssign!('+')(data[0 .. hi], cast(uint)(y&0xFFFF_FFFF));
1816             if (y > 0xFFFF_FFFFL)
1817             {
1818                 c += multibyteIncrementAssign!('+')(data[1 .. hi], cast(uint)(y >> 32));
1819             }
1820             if (c != 0)
1821             {
1822                 data[hi]=c;
1823                 ++hi;
1824             }
1825         }
1826     }
1827     while (hi>1 && data[hi-1]==0)
1828         --hi;
1829     return hi;
1830 }
1831 
1832 
1833 private:
1834 // ------------------------
1835 // These in-place functions are only for internal use; they are incompatible
1836 // with COW.
1837 
1838 // Classic 'schoolbook' multiplication.
mulSimple(BigDigit[]result,const (BigDigit)[]left,const (BigDigit)[]right)1839 void mulSimple(BigDigit[] result, const(BigDigit) [] left,
1840         const(BigDigit)[] right) pure nothrow
1841 in
1842 {
1843     assert(result.length == left.length + right.length);
1844     assert(right.length>1);
1845 }
1846 body
1847 {
1848     result[left.length] = multibyteMul(result[0 .. left.length], left, right[0], 0);
1849     multibyteMultiplyAccumulate(result[1..$], left, right[1..$]);
1850 }
1851 
1852 // Classic 'schoolbook' squaring
squareSimple(BigDigit[]result,const (BigDigit)[]x)1853 void squareSimple(BigDigit[] result, const(BigDigit) [] x) pure nothrow
1854 in
1855 {
1856     assert(result.length == 2*x.length);
1857     assert(x.length>1);
1858 }
1859 body
1860 {
1861     multibyteSquare(result, x);
1862 }
1863 
1864 
1865 // add two uints of possibly different lengths. Result must be as long
1866 // as the larger length.
1867 // Returns carry (0 or 1).
addSimple(BigDigit[]result,const BigDigit[]left,const BigDigit[]right)1868 uint addSimple(BigDigit[] result, const BigDigit [] left, const BigDigit [] right)
1869 pure nothrow
1870 in
1871 {
1872     assert(result.length == left.length);
1873     assert(left.length >= right.length);
1874     assert(right.length>0);
1875 }
1876 body
1877 {
1878     uint carry = multibyteAdd(result[0 .. right.length],
1879             left[0 .. right.length], right, 0);
1880     if (right.length < left.length)
1881     {
1882         result[right.length .. left.length] = left[right.length .. $];
1883         carry = multibyteIncrementAssign!('+')(result[right.length..$], carry);
1884     }
1885     return carry;
1886 }
1887 
1888 //  result = left - right
1889 // returns carry (0 or 1)
subSimple(BigDigit[]result,const (BigDigit)[]left,const (BigDigit)[]right)1890 BigDigit subSimple(BigDigit [] result,const(BigDigit) [] left,
1891         const(BigDigit) [] right) pure nothrow
1892 in
1893 {
1894     assert(result.length == left.length);
1895     assert(left.length >= right.length);
1896     assert(right.length>0);
1897 }
1898 body
1899 {
1900     BigDigit carry = multibyteSub(result[0 .. right.length],
1901             left[0 .. right.length], right, 0);
1902     if (right.length < left.length)
1903     {
1904         result[right.length .. left.length] = left[right.length .. $];
1905         carry = multibyteIncrementAssign!('-')(result[right.length..$], carry);
1906     } //else if (result.length == left.length+1) { result[$-1] = carry; carry=0; }
1907     return carry;
1908 }
1909 
1910 
1911 /* result = result - right
1912  * Returns carry = 1 if result was less than right.
1913 */
subAssignSimple(BigDigit[]result,const (BigDigit)[]right)1914 BigDigit subAssignSimple(BigDigit [] result, const(BigDigit) [] right)
1915 pure nothrow
1916 {
1917     assert(result.length >= right.length);
1918     uint c = multibyteSub(result[0 .. right.length], result[0 .. right.length], right, 0);
1919     if (c && result.length > right.length)
1920         c = multibyteIncrementAssign!('-')(result[right.length .. $], c);
1921     return c;
1922 }
1923 
1924 /* result = result + right
1925 */
addAssignSimple(BigDigit[]result,const (BigDigit)[]right)1926 BigDigit addAssignSimple(BigDigit [] result, const(BigDigit) [] right)
1927 pure nothrow
1928 {
1929     assert(result.length >= right.length);
1930     uint c = multibyteAdd(result[0 .. right.length], result[0 .. right.length], right, 0);
1931     if (c && result.length > right.length)
1932        c = multibyteIncrementAssign!('+')(result[right.length .. $], c);
1933     return c;
1934 }
1935 
1936 /* performs result += wantSub? - right : right;
1937 */
addOrSubAssignSimple(BigDigit[]result,const (BigDigit)[]right,bool wantSub)1938 BigDigit addOrSubAssignSimple(BigDigit [] result, const(BigDigit) [] right,
1939         bool wantSub) pure nothrow
1940 {
1941     if (wantSub)
1942         return subAssignSimple(result, right);
1943     else
1944         return addAssignSimple(result, right);
1945 }
1946 
1947 
1948 // return true if x<y, considering leading zeros
less(const (BigDigit)[]x,const (BigDigit)[]y)1949 bool less(const(BigDigit)[] x, const(BigDigit)[] y) pure nothrow
1950 {
1951     assert(x.length >= y.length);
1952     auto k = x.length-1;
1953     while (x[k]==0 && k >= y.length)
1954         --k;
1955     if (k >= y.length)
1956         return false;
1957     while (k>0 && x[k]==y[k])
1958         --k;
1959     return x[k] < y[k];
1960 }
1961 
1962 // Set result = abs(x-y), return true if result is negative(x<y), false if x <= y.
inplaceSub(BigDigit[]result,const (BigDigit)[]x,const (BigDigit)[]y)1963 bool inplaceSub(BigDigit[] result, const(BigDigit)[] x, const(BigDigit)[] y)
1964     pure nothrow
1965 {
1966     assert(result.length == (x.length >= y.length) ? x.length : y.length);
1967 
1968     size_t minlen;
1969     bool negative;
1970     if (x.length >= y.length)
1971     {
1972         minlen = y.length;
1973         negative = less(x, y);
1974     }
1975     else
1976     {
1977        minlen = x.length;
1978        negative = !less(y, x);
1979     }
1980     const (BigDigit)[] large, small;
1981     if (negative)
1982     {
1983         large = y; small = x;
1984     }
1985     else
1986     {
1987         large = x; small = y;
1988     }
1989 
1990     BigDigit carry = multibyteSub(result[0 .. minlen], large[0 .. minlen], small[0 .. minlen], 0);
1991     if (x.length != y.length)
1992     {
1993         result[minlen .. large.length]= large[minlen..$];
1994         result[large.length..$] = 0;
1995         if (carry)
1996             multibyteIncrementAssign!('-')(result[minlen..$], carry);
1997     }
1998     return negative;
1999 }
2000 
2001 /* Determine how much space is required for the temporaries
2002  * when performing a Karatsuba multiplication.
2003  */
karatsubaRequiredBuffSize(size_t xlen)2004 size_t karatsubaRequiredBuffSize(size_t xlen) pure nothrow @safe
2005 {
2006     return xlen <= KARATSUBALIMIT ? 0 : 2*xlen; // - KARATSUBALIMIT+2;
2007 }
2008 
2009 /* Sets result = x*y, using Karatsuba multiplication.
2010 * x must be longer or equal to y.
2011 * Valid only for balanced multiplies, where x is not shorter than y.
2012 * It is superior to schoolbook multiplication if and only if
2013 *    sqrt(2)*y.length > x.length > y.length.
2014 * Karatsuba multiplication is O(n^1.59), whereas schoolbook is O(n^2)
2015 * The maximum allowable length of x and y is uint.max; but better algorithms
2016 * should be used far before that length is reached.
2017 * Params:
2018 * scratchbuff      An array long enough to store all the temporaries. Will be destroyed.
2019 */
mulKaratsuba(BigDigit[]result,const (BigDigit)[]x,const (BigDigit)[]y,BigDigit[]scratchbuff)2020 void mulKaratsuba(BigDigit [] result, const(BigDigit) [] x,
2021         const(BigDigit)[] y, BigDigit [] scratchbuff) pure nothrow
2022 {
2023     assert(x.length >= y.length);
2024           assert(result.length < uint.max, "Operands too large");
2025     assert(result.length == x.length + y.length);
2026     if (x.length <= KARATSUBALIMIT)
2027     {
2028         return mulSimple(result, x, y);
2029     }
2030     // Must be almost square (otherwise, a schoolbook iteration is better)
2031     assert(2L * y.length * y.length > (x.length-1) * (x.length-1),
2032         "Bigint Internal Error: Asymmetric Karatsuba");
2033 
2034     // The subtractive version of Karatsuba multiply uses the following result:
2035     // (Nx1 + x0)*(Ny1 + y0) = (N*N)*x1y1 + x0y0 + N * (x0y0 + x1y1 - mid)
2036     // where mid = (x0-x1)*(y0-y1)
2037     // requiring 3 multiplies of length N, instead of 4.
2038     // The advantage of the subtractive over the additive version is that
2039     // the mid multiply cannot exceed length N. But there are subtleties:
2040     // (x0-x1),(y0-y1) may be negative or zero. To keep it simple, we
2041     // retain all of the leading zeros in the subtractions
2042 
2043     // half length, round up.
2044     auto half = (x.length >> 1) + (x.length & 1);
2045 
2046     const(BigDigit) [] x0 = x[0 .. half];
2047     const(BigDigit) [] x1 = x[half .. $];
2048     const(BigDigit) [] y0 = y[0 .. half];
2049     const(BigDigit) [] y1 = y[half .. $];
2050     BigDigit [] mid = scratchbuff[0 .. half*2];
2051     BigDigit [] newscratchbuff = scratchbuff[half*2 .. $];
2052     BigDigit [] resultLow = result[0 .. 2*half];
2053     BigDigit [] resultHigh = result[2*half .. $];
2054      // initially use result to store temporaries
2055     BigDigit [] xdiff= result[0 .. half];
2056     BigDigit [] ydiff = result[half .. half*2];
2057 
2058     // First, we calculate mid, and sign of mid
2059     immutable bool midNegative = inplaceSub(xdiff, x0, x1)
2060                       ^ inplaceSub(ydiff, y0, y1);
2061     mulKaratsuba(mid, xdiff, ydiff, newscratchbuff);
2062 
2063     // Low half of result gets x0 * y0. High half gets x1 * y1
2064 
2065     mulKaratsuba(resultLow, x0, y0, newscratchbuff);
2066 
2067     if (2L * y1.length * y1.length < x1.length * x1.length)
2068     {
2069         // an asymmetric situation has been created.
2070         // Worst case is if x:y = 1.414 : 1, then x1:y1 = 2.41 : 1.
2071         // Applying one schoolbook multiply gives us two pieces each 1.2:1
2072         if (y1.length <= KARATSUBALIMIT)
2073             mulSimple(resultHigh, x1, y1);
2074         else
2075         {
2076             // divide x1 in two, then use schoolbook multiply on the two pieces.
2077             auto quarter = (x1.length >> 1) + (x1.length & 1);
2078             immutable ysmaller = (quarter >= y1.length);
2079             mulKaratsuba(resultHigh[0 .. quarter+y1.length], ysmaller ? x1[0 .. quarter] : y1,
2080                 ysmaller ? y1 : x1[0 .. quarter], newscratchbuff);
2081             // Save the part which will be overwritten.
2082             immutable ysmaller2 = ((x1.length - quarter) >= y1.length);
2083             newscratchbuff[0 .. y1.length] = resultHigh[quarter .. quarter + y1.length];
2084             mulKaratsuba(resultHigh[quarter..$], ysmaller2 ? x1[quarter..$] : y1,
2085                 ysmaller2 ? y1 : x1[quarter..$], newscratchbuff[y1.length..$]);
2086 
2087             resultHigh[quarter..$].addAssignSimple(newscratchbuff[0 .. y1.length]);
2088         }
2089     }
2090     else
2091         mulKaratsuba(resultHigh, x1, y1, newscratchbuff);
2092 
2093     /* We now have result = x0y0 + (N*N)*x1y1
2094        Before adding or subtracting mid, we must calculate
2095        result += N * (x0y0 + x1y1)
2096        We can do this with three half-length additions. With a = x0y0, b = x1y1:
2097                       aHI aLO
2098         +       aHI   aLO
2099         +       bHI   bLO
2100         +  bHI  bLO
2101         =  R3   R2    R1   R0
2102         R1 = aHI + bLO + aLO
2103         R2 = aHI + bLO + aHI + carry_from_R1
2104         R3 = bHi + carry_from_R2
2105 
2106      It might actually be quicker to do it in two full-length additions:
2107      newscratchbuff[2*half] = addSimple(newscratchbuff[0 .. 2*half], result[0 .. 2*half], result[2*half..$]);
2108      addAssignSimple(result[half..$], newscratchbuff[0 .. 2*half+1]);
2109    */
2110     BigDigit[] R1 = result[half .. half*2];
2111     BigDigit[] R2 = result[half*2 .. half*3];
2112     BigDigit[] R3 = result[half*3..$];
2113     BigDigit c1 = multibyteAdd(R2, R2, R1, 0); // c1:R2 = R2 + R1
2114     BigDigit c2 = multibyteAdd(R1, R2, result[0 .. half], 0); // c2:R1 = R2 + R1 + R0
2115     BigDigit c3 = addAssignSimple(R2, R3); // R2 = R2 + R1 + R3
2116     if (c1+c2)
2117         multibyteIncrementAssign!('+')(result[half*2..$], c1+c2);
2118     if (c1+c3)
2119         multibyteIncrementAssign!('+')(R3, c1+c3);
2120 
2121     // And finally we subtract mid
2122     addOrSubAssignSimple(result[half..$], mid, !midNegative);
2123 }
2124 
squareKaratsuba(BigDigit[]result,const BigDigit[]x,BigDigit[]scratchbuff)2125 void squareKaratsuba(BigDigit [] result, const BigDigit [] x,
2126         BigDigit [] scratchbuff) pure nothrow
2127 {
2128     // See mulKaratsuba for implementation comments.
2129     // Squaring is simpler, since it never gets asymmetric.
2130     assert(result.length < uint.max, "Operands too large");
2131     assert(result.length == 2*x.length);
2132     if (x.length <= KARATSUBASQUARELIMIT)
2133     {
2134         return squareSimple(result, x);
2135     }
2136     // half length, round up.
2137     auto half = (x.length >> 1) + (x.length & 1);
2138 
2139     const(BigDigit)[] x0 = x[0 .. half];
2140     const(BigDigit)[] x1 = x[half .. $];
2141     BigDigit [] mid = scratchbuff[0 .. half*2];
2142     BigDigit [] newscratchbuff = scratchbuff[half*2 .. $];
2143      // initially use result to store temporaries
2144     BigDigit [] xdiff= result[0 .. half];
2145     const BigDigit [] ydiff = result[half .. half*2];
2146 
2147     // First, we calculate mid. We don't need its sign
2148     inplaceSub(xdiff, x0, x1);
2149     squareKaratsuba(mid, xdiff, newscratchbuff);
2150 
2151     // Set result = x0x0 + (N*N)*x1x1
2152     squareKaratsuba(result[0 .. 2*half], x0, newscratchbuff);
2153     squareKaratsuba(result[2*half .. $], x1, newscratchbuff);
2154 
2155     /* result += N * (x0x0 + x1x1)
2156        Do this with three half-length additions. With a = x0x0, b = x1x1:
2157         R1 = aHI + bLO + aLO
2158         R2 = aHI + bLO + aHI + carry_from_R1
2159         R3 = bHi + carry_from_R2
2160     */
2161     BigDigit[] R1 = result[half .. half*2];
2162     BigDigit[] R2 = result[half*2 .. half*3];
2163     BigDigit[] R3 = result[half*3..$];
2164     BigDigit c1 = multibyteAdd(R2, R2, R1, 0); // c1:R2 = R2 + R1
2165     BigDigit c2 = multibyteAdd(R1, R2, result[0 .. half], 0); // c2:R1 = R2 + R1 + R0
2166     BigDigit c3 = addAssignSimple(R2, R3); // R2 = R2 + R1 + R3
2167     if (c1+c2) multibyteIncrementAssign!('+')(result[half*2..$], c1+c2);
2168     if (c1+c3) multibyteIncrementAssign!('+')(R3, c1+c3);
2169 
2170     // And finally we subtract mid, which is always positive
2171     subAssignSimple(result[half..$], mid);
2172 }
2173 
2174 /* Knuth's Algorithm D, as presented in
2175  * H.S. Warren, "Hacker's Delight", Addison-Wesley Professional (2002).
2176  * Also described in "Modern Computer Arithmetic" 0.2, Exercise 1.8.18.
2177  * Given u and v, calculates  quotient  = u / v, u = u % v.
2178  * v must be normalized (ie, the MSB of v must be 1).
2179  * The most significant words of quotient and u may be zero.
2180  * u[0 .. v.length] holds the remainder.
2181  */
schoolbookDivMod(BigDigit[]quotient,BigDigit[]u,in BigDigit[]v)2182 void schoolbookDivMod(BigDigit [] quotient, BigDigit [] u, in BigDigit [] v)
2183     pure nothrow
2184 {
2185     assert(quotient.length == u.length - v.length);
2186     assert(v.length > 1);
2187     assert(u.length >= v.length);
2188     assert((v[$-1]&0x8000_0000)!=0);
2189     assert(u[$-1] < v[$-1]);
2190     // BUG: This code only works if BigDigit is uint.
2191     uint vhi = v[$-1];
2192     uint vlo = v[$-2];
2193 
2194     for (ptrdiff_t j = u.length - v.length - 1; j >= 0; j--)
2195     {
2196         // Compute estimate of quotient[j],
2197         // qhat = (three most significant words of u)/(two most sig words of v).
2198         uint qhat;
2199         if (u[j + v.length] == vhi)
2200         {
2201             // uu/vhi could exceed uint.max (it will be 0x8000_0000 or 0x8000_0001)
2202             qhat = uint.max;
2203         }
2204         else
2205         {
2206             uint ulo = u[j + v.length - 2];
2207             version (D_InlineAsm_X86)
2208             {
2209                 // Note: On DMD, this is only ~10% faster than the non-asm code.
2210                 uint *p = &u[j + v.length - 1];
2211                 asm pure nothrow
2212                 {
2213                     mov EAX, p;
2214                     mov EDX, [EAX+4];
2215                     mov EAX, [EAX];
2216                     div dword ptr [vhi];
2217                     mov qhat, EAX;
2218                     mov ECX, EDX;
2219 div3by2correction:
2220                     mul dword ptr [vlo]; // EDX:EAX = qhat * vlo
2221                     sub EAX, ulo;
2222                     sbb EDX, ECX;
2223                     jbe div3by2done;
2224                     mov EAX, qhat;
2225                     dec EAX;
2226                     mov qhat, EAX;
2227                     add ECX, dword ptr [vhi];
2228                     jnc div3by2correction;
2229 div3by2done:    ;
2230                 }
2231             }
2232             else
2233             { // version (InlineAsm)
2234                 ulong uu = (cast(ulong)(u[j + v.length]) << 32) | u[j + v.length - 1];
2235                 immutable bigqhat = uu / vhi;
2236                 ulong rhat =  uu - bigqhat * vhi;
2237                 qhat = cast(uint) bigqhat;
2238 again:
2239                 if (cast(ulong) qhat * vlo > ((rhat << 32) + ulo))
2240                 {
2241                     --qhat;
2242                     rhat += vhi;
2243                     if (!(rhat & 0xFFFF_FFFF_0000_0000L))
2244                         goto again;
2245                 }
2246             } // version (InlineAsm)
2247         }
2248         // Multiply and subtract.
2249         uint carry = multibyteMulAdd!('-')(u[j .. j + v.length], v, qhat, 0);
2250 
2251         if (u[j+v.length] < carry)
2252         {
2253             // If we subtracted too much, add back
2254             --qhat;
2255             carry -= multibyteAdd(u[j .. j + v.length],u[j .. j + v.length], v, 0);
2256         }
2257         quotient[j] = qhat;
2258         u[j + v.length] = u[j + v.length] - carry;
2259     }
2260 }
2261 
2262 private:
2263 
2264 // TODO: Replace with a library call
itoaZeroPadded(char[]output,uint value)2265 void itoaZeroPadded(char[] output, uint value)
2266     pure nothrow @safe @nogc
2267 {
2268     for (auto i = output.length; i--;)
2269     {
2270         if (value < 10)
2271         {
2272             output[i] = cast(char)(value + '0');
2273             value = 0;
2274         }
2275         else
2276         {
2277             output[i] = cast(char)(value % 10 + '0');
2278             value /= 10;
2279         }
2280     }
2281 }
2282 
2283 void toHexZeroPadded(char[] output, uint value,
2284         LetterCase letterCase = LetterCase.upper) pure nothrow @safe
2285 {
2286     ptrdiff_t x = output.length - 1;
2287     static immutable string upperHexDigits = "0123456789ABCDEF";
2288     static immutable string lowerHexDigits = "0123456789abcdef";
2289     for ( ; x >= 0; --x)
2290     {
2291         if (letterCase == LetterCase.upper)
2292         {
2293             output[x] = upperHexDigits[value & 0xF];
2294         }
2295         else
2296         {
2297             output[x] = lowerHexDigits[value & 0xF];
2298         }
2299         value >>= 4;
2300     }
2301 }
2302 
2303 private:
2304 
2305 // Returns the highest value of i for which left[i]!=right[i],
2306 // or 0 if left[] == right[]
highestDifferentDigit(const BigDigit[]left,const BigDigit[]right)2307 size_t highestDifferentDigit(const BigDigit [] left, const BigDigit [] right)
2308 pure nothrow @nogc @safe
2309 {
2310     assert(left.length == right.length);
2311     for (ptrdiff_t i = left.length - 1; i>0; --i)
2312     {
2313         if (left[i] != right[i])
2314             return i;
2315     }
2316     return 0;
2317 }
2318 
2319 // Returns the lowest value of i for which x[i]!=0.
firstNonZeroDigit(const BigDigit[]x)2320 int firstNonZeroDigit(const BigDigit [] x) pure nothrow @nogc @safe
2321 {
2322     int k = 0;
2323     while (x[k]==0)
2324     {
2325         ++k;
2326         assert(k<x.length);
2327     }
2328     return k;
2329 }
2330 
2331 /*
2332     Calculate quotient and remainder of u / v using fast recursive division.
2333     v must be normalised, and must be at least half as long as u.
2334     Given u and v, v normalised, calculates  quotient  = u/v, u = u%v.
2335     scratch is temporary storage space, length must be >= quotient + 1.
2336 
2337 Returns:
2338     u[0 .. v.length] is the remainder. u[v.length..$] is corrupted.
2339 
2340     Implements algorithm 1.8 from MCA.
2341     This algorithm has an annoying special case. After the first recursion, the
2342     highest bit of the quotient may be set. This means that in the second
2343     recursive call, the 'in' contract would be violated. (This happens only
2344     when the top quarter of u is equal to the top half of v. A base 10
2345     equivalent example of this situation is 5517/56; the first step gives
2346     55/5 = 11). To maintain the in contract, we pad a zero to the top of both
2347     u and the quotient. 'mayOverflow' indicates that that the special case
2348     has occurred.
2349     (In MCA, a different strategy is used: the in contract is weakened, and
2350     schoolbookDivMod is more general: it allows the high bit of u to be set).
2351     See also:
2352     - C. Burkinel and J. Ziegler, "Fast Recursive Division", MPI-I-98-1-022,
2353       Max-Planck Institute fuer Informatik, (Oct 1998).
2354 */
2355 void recursiveDivMod(BigDigit[] quotient, BigDigit[] u, const(BigDigit)[] v,
2356                      BigDigit[] scratch, bool mayOverflow = false)
2357                      pure nothrow
2358 in
2359 {
2360     // v must be normalized
2361     assert(v.length > 1);
2362     assert((v[$ - 1] & 0x8000_0000) != 0);
2363     assert(!(u[$ - 1] & 0x8000_0000));
2364     assert(quotient.length == u.length - v.length);
2365     if (mayOverflow)
2366     {
2367         assert(u[$-1] == 0);
2368         assert(u[$-2] & 0x8000_0000);
2369     }
2370 
2371     // Must be symmetric. Use block schoolbook division if not.
2372     assert((mayOverflow ? u.length-1 : u.length) <= 2 * v.length);
2373     assert((mayOverflow ? u.length-1 : u.length) >= v.length);
2374     assert(scratch.length >= quotient.length + (mayOverflow ? 0 : 1));
2375 }
2376 body
2377 {
2378     if (quotient.length < FASTDIVLIMIT)
2379     {
2380         return schoolbookDivMod(quotient, u, v);
2381     }
2382 
2383     // Split quotient into two halves, but keep padding in the top half
2384     auto k = (mayOverflow ?  quotient.length - 1 : quotient.length) >> 1;
2385 
2386     // RECURSION 1: Calculate the high half of the quotient
2387 
2388     // Note that if u and quotient were padded, they remain padded during
2389     // this call, so in contract is satisfied.
2390     recursiveDivMod(quotient[k .. $], u[2 * k .. $], v[k .. $],
2391         scratch, mayOverflow);
2392 
2393     // quotient[k..$] is our guess at the high quotient.
2394     // u[2*k .. 2.*k + v.length - k = k + v.length] is the high part of the
2395     // first remainder. u[0 .. 2*k] is the low part.
2396 
2397     // Calculate the full first remainder to be
2398     //    remainder - highQuotient * lowDivisor
2399     // reducing highQuotient until the remainder is positive.
2400     // The low part of the remainder, u[0 .. k], cannot be altered by this.
2401 
2402     adjustRemainder(quotient[k .. $], u[k .. k + v.length], v, k,
2403             scratch[0 .. quotient.length], mayOverflow);
2404 
2405     // RECURSION 2: Calculate the low half of the quotient
2406     // The full first remainder is now in u[0 .. k + v.length].
2407 
2408     if (u[k + v.length - 1] & 0x8000_0000)
2409     {
2410         // Special case. The high quotient is 0x1_00...000 or 0x1_00...001.
2411         // This means we need an extra quotient word for the next recursion.
2412         // We need to restore the invariant for the recursive calls.
2413         // We do this by padding both u and quotient. Extending u is trivial,
2414         // because the higher words will not be used again. But for the
2415         // quotient, we're clobbering the low word of the high quotient,
2416         // so we need save it, and add it back in after the recursive call.
2417 
2418         auto clobberedQuotient = quotient[k];
2419         u[k+v.length] = 0;
2420 
2421         recursiveDivMod(quotient[0 .. k+1], u[k .. k + v.length+1],
2422             v[k .. $], scratch, true);
2423         adjustRemainder(quotient[0 .. k+1], u[0 .. v.length], v, k,
2424             scratch[0 .. 2 * k+1], true);
2425 
2426         // Now add the quotient word that got clobbered earlier.
2427         multibyteIncrementAssign!('+')(quotient[k..$], clobberedQuotient);
2428     }
2429     else
2430     {
2431         // The special case has NOT happened.
2432         recursiveDivMod(quotient[0 .. k], u[k .. k + v.length], v[k .. $],
2433             scratch, false);
2434 
2435         // high remainder is in u[k .. k+(v.length-k)] == u[k .. v.length]
2436 
2437         adjustRemainder(quotient[0 .. k], u[0 .. v.length], v, k,
2438             scratch[0 .. 2 * k]);
2439     }
2440 }
2441 
2442 // rem -= quot * v[0 .. k].
2443 // If would make rem negative, decrease quot until rem is >= 0.
2444 // Needs (quot.length * k) scratch space to store the result of the multiply.
2445 void adjustRemainder(BigDigit[] quot, BigDigit[] rem, const(BigDigit)[] v,
2446         ptrdiff_t k,
2447         BigDigit[] scratch, bool mayOverflow = false) pure nothrow
2448 {
2449     assert(rem.length == v.length);
2450     mulInternal(scratch, quot, v[0 .. k]);
2451     uint carry = 0;
2452     if (mayOverflow)
2453         carry = scratch[$-1] + subAssignSimple(rem, scratch[0..$-1]);
2454     else
2455         carry = subAssignSimple(rem, scratch);
2456     while (carry)
2457     {
2458         multibyteIncrementAssign!('-')(quot, 1); // quot--
2459         carry -= multibyteAdd(rem, rem, v, 0);
2460     }
2461 }
2462 
2463 // Cope with unbalanced division by performing block schoolbook division.
blockDivMod(BigDigit[]quotient,BigDigit[]u,in BigDigit[]v)2464 void blockDivMod(BigDigit [] quotient, BigDigit [] u, in BigDigit [] v)
2465 pure nothrow
2466 {
2467     import core.memory : GC;
2468     assert(quotient.length == u.length - v.length);
2469     assert(v.length > 1);
2470     assert(u.length >= v.length);
2471     assert((v[$-1] & 0x8000_0000)!=0);
2472     assert((u[$-1] & 0x8000_0000)==0);
2473     BigDigit [] scratch = new BigDigit[v.length + 1];
2474 
2475     // Perform block schoolbook division, with 'v.length' blocks.
2476     auto m = u.length - v.length;
2477     while (m > v.length)
2478     {
2479         immutable mayOverflow = (u[m + v.length -1 ] & 0x8000_0000)!=0;
2480         BigDigit saveq;
2481         if (mayOverflow)
2482         {
2483             u[m + v.length] = 0;
2484             saveq = quotient[m];
2485         }
2486         recursiveDivMod(quotient[m-v.length .. m + (mayOverflow? 1: 0)],
2487             u[m - v.length .. m + v.length + (mayOverflow? 1: 0)], v, scratch, mayOverflow);
2488         if (mayOverflow)
2489         {
2490             assert(quotient[m] == 0);
2491             quotient[m] = saveq;
2492         }
2493         m -= v.length;
2494     }
2495     recursiveDivMod(quotient[0 .. m], u[0 .. m + v.length], v, scratch);
2496     () @trusted { GC.free(scratch.ptr); } ();
2497 }
2498 
2499 @system unittest
2500 {
2501     import core.stdc.stdio;
2502 
printBiguint(const uint[]data)2503     void printBiguint(const uint [] data)
2504     {
2505         char [] buff = biguintToHex(new char[data.length*9], data, '_');
2506         printf("%.*s\n", buff.length, buff.ptr);
2507     }
2508 
printDecimalBigUint(BigUint data)2509     void printDecimalBigUint(BigUint data)
2510     {
2511         auto str = data.toDecimalString(0);
2512         printf("%.*s\n", str.length, str.ptr);
2513     }
2514 
2515     uint [] a, b;
2516     a = new uint[43];
2517     b = new uint[179];
2518     for (int i=0; i<a.length; ++i) a[i] = 0x1234_B6E9 + i;
2519     for (int i=0; i<b.length; ++i) b[i] = 0x1BCD_8763 - i*546;
2520 
2521     a[$-1] |= 0x8000_0000;
2522     uint [] r = new uint[a.length];
2523     uint [] q = new uint[b.length-a.length+1];
2524 
2525     divModInternal(q, r, b, a);
2526     q = q[0..$-1];
2527     uint [] r1 = r.dup;
2528     uint [] q1 = q.dup;
2529     blockDivMod(q, b, a);
2530     r = b[0 .. a.length];
2531     assert(r[] == r1[]);
2532     assert(q[] == q1[]);
2533 }
2534 
2535 // biguintToOctal
2536 @safe unittest
2537 {
2538     enum bufSize = 5 * BigDigitBits / 3 + 1;
2539     auto buf = new char[bufSize];
2540     size_t i;
2541     BigDigit[] data = [ 342391 ];
2542 
2543     // Basic functionality with single word
2544     i = biguintToOctal(buf, data);
2545     assert(i == bufSize - 7 && buf[i .. $] == "1234567");
2546 
2547     // Test carrying bits between words
2548     data = [ 0x77053977, 0x39770539, 0x00000005 ];
2549     i = biguintToOctal(buf, data);
2550     assert(i == bufSize - 23 && buf[i .. $] == "12345670123456701234567");
2551 
2552     // Test carried bits in the last word
2553     data = [ 0x80000000 ];
2554     i = biguintToOctal(buf, data);
2555     assert(buf[i .. $] == "20000000000");
2556 
2557     // Test boundary between 3rd and 4th word where the number of bits is
2558     // divisible by 3 and no bits should be carried.
2559     //
2560     // The 0xC0000000's are for "poisoning" the carry to be non-zero when the
2561     // rollover happens, so that if any bugs happen in wrongly adding the carry
2562     // to the next word, non-zero bits will show up in the output.
2563     data = [ 0xC0000000, 0xC0000000, 0xC0000000, 0x00000010 ];
2564     i = biguintToOctal(buf, data);
2565     assert(buf[i .. $] == "2060000000001400000000030000000000");
2566 
2567     // Boundary case: 0
2568     data = [ 0 ];
2569     i = biguintToOctal(buf, data);
2570     assert(buf[i .. $] == "0");
2571 }
2572