1 /** Fundamental operations for arbitrary-precision arithmetic
2 *
3 * These functions are for internal use only.
4 */
5 /* Copyright Don Clugston 2008 - 2010.
6 * Distributed under the Boost Software License, Version 1.0.
7 * (See accompanying file LICENSE_1_0.txt or copy at
8 * http://www.boost.org/LICENSE_1_0.txt)
9 */
10 /* References:
11 "Modern Computer Arithmetic" (MCA) is the primary reference for all
12 algorithms used in this library.
13 - R.P. Brent and P. Zimmermann, "Modern Computer Arithmetic",
14 Version 0.5.9, (Oct 2010).
15 - C. Burkinel and J. Ziegler, "Fast Recursive Division", MPI-I-98-1-022,
16 Max-Planck Institute fuer Informatik, (Oct 1998).
17 - G. Hanrot, M. Quercia, and P. Zimmermann, "The Middle Product Algorithm, I.",
18 INRIA 4664, (Dec 2002).
19 - M. Bodrato and A. Zanoni, "What about Toom-Cook Matrices Optimality?",
20 http://bodrato.it/papers (2006).
21 - A. Fog, "Optimizing subroutines in assembly language",
22 www.agner.org/optimize (2008).
23 - A. Fog, "The microarchitecture of Intel and AMD CPU's",
24 www.agner.org/optimize (2008).
25 - A. Fog, "Instruction tables: Lists of instruction latencies, throughputs
26 and micro-operation breakdowns for Intel and AMD CPU's.", www.agner.org/optimize (2008).
27
28 Idioms:
29 Many functions in this module use
30 'func(Tulong)(Tulong x) if (is(Tulong == ulong))' rather than 'func(ulong x)'
31 in order to disable implicit conversion.
32
33 */
34 module std.internal.math.biguintcore;
35
version(D_InlineAsm_X86)36 version (D_InlineAsm_X86)
37 {
38 import std.internal.math.biguintx86;
39 }
40 else
41 {
42 import std.internal.math.biguintnoasm;
43 }
44
45 alias multibyteAdd = multibyteAddSub!('+');
46 alias multibyteSub = multibyteAddSub!('-');
47
48
49 import core.cpuid;
50 public import std.ascii : LetterCase;
51 import std.range.primitives;
52 import std.traits;
53
this()54 shared static this()
55 {
56 CACHELIMIT = core.cpuid.datacache[0].size*1024/2;
57 }
58
59 private:
60 // Limits for when to switch between algorithms.
61 immutable size_t CACHELIMIT; // Half the size of the data cache.
62 enum size_t FASTDIVLIMIT = 100; // crossover to recursive division
63
64
65 // These constants are used by shift operations
66 static if (BigDigit.sizeof == int.sizeof)
67 {
68 enum { LG2BIGDIGITBITS = 5, BIGDIGITSHIFTMASK = 31 };
69 alias BIGHALFDIGIT = ushort;
70 }
71 else static if (BigDigit.sizeof == long.sizeof)
72 {
73 alias BIGHALFDIGIT = uint;
74 enum { LG2BIGDIGITBITS = 6, BIGDIGITSHIFTMASK = 63 };
75 }
76 else static assert(0, "Unsupported BigDigit size");
77
78 import std.exception : assumeUnique;
79 import std.traits : isIntegral;
80 enum BigDigitBits = BigDigit.sizeof*8;
81 template maxBigDigits(T)
82 if (isIntegral!T)
83 {
84 enum maxBigDigits = (T.sizeof+BigDigit.sizeof-1)/BigDigit.sizeof;
85 }
86
87 static immutable BigDigit[] ZERO = [0];
88 static immutable BigDigit[] ONE = [1];
89 static immutable BigDigit[] TWO = [2];
90 static immutable BigDigit[] TEN = [10];
91
92
93 public:
94
95 /// BigUint performs memory management and wraps the low-level calls.
96 struct BigUint
97 {
98 private:
invariantBigUint99 pure invariant()
100 {
101 assert( data.length >= 1 && (data.length == 1 || data[$-1] != 0 ));
102 }
103
104 immutable(BigDigit) [] data = ZERO;
105
thisBigUint106 this(immutable(BigDigit) [] x) pure nothrow @nogc @safe
107 {
108 data = x;
109 }
110 package(std) // used from: std.bigint
111 this(T)(T x) pure nothrow @safe if (isIntegral!T)
112 {
113 opAssign(x);
114 }
115
functionBigUint116 enum trustedAssumeUnique = function(BigDigit[] input) pure @trusted @nogc {
117 return assumeUnique(input);
118 };
119 public:
120 // Length in uints
uintLengthBigUint121 @property size_t uintLength() pure nothrow const @safe @nogc
122 {
123 static if (BigDigit.sizeof == uint.sizeof)
124 {
125 return data.length;
126 }
127 else static if (BigDigit.sizeof == ulong.sizeof)
128 {
129 return data.length * 2 -
130 ((data[$-1] & 0xFFFF_FFFF_0000_0000L) ? 1 : 0);
131 }
132 }
ulongLengthBigUint133 @property size_t ulongLength() pure nothrow const @safe @nogc
134 {
135 static if (BigDigit.sizeof == uint.sizeof)
136 {
137 return (data.length + 1) >> 1;
138 }
139 else static if (BigDigit.sizeof == ulong.sizeof)
140 {
141 return data.length;
142 }
143 }
144
145 // The value at (cast(ulong[]) data)[n]
peekUlongBigUint146 ulong peekUlong(int n) pure nothrow const @safe @nogc
147 {
148 static if (BigDigit.sizeof == int.sizeof)
149 {
150 if (data.length == n*2 + 1) return data[n*2];
151 return data[n*2] + ((cast(ulong) data[n*2 + 1]) << 32 );
152 }
153 else static if (BigDigit.sizeof == long.sizeof)
154 {
155 return data[n];
156 }
157 }
peekUintBigUint158 uint peekUint(int n) pure nothrow const @safe @nogc
159 {
160 static if (BigDigit.sizeof == int.sizeof)
161 {
162 return data[n];
163 }
164 else
165 {
166 immutable x = data[n >> 1];
167 return (n & 1) ? cast(uint)(x >> 32) : cast(uint) x;
168 }
169 }
170 public:
171 ///
172 void opAssign(Tulong)(Tulong u) pure nothrow @safe if (is (Tulong == ulong))
173 {
174 if (u == 0) data = ZERO;
175 else if (u == 1) data = ONE;
176 else if (u == 2) data = TWO;
177 else if (u == 10) data = TEN;
178 else
179 {
180 static if (BigDigit.sizeof == int.sizeof)
181 {
182 uint ulo = cast(uint)(u & 0xFFFF_FFFF);
183 uint uhi = cast(uint)(u >> 32);
184 if (uhi == 0)
185 {
186 data = [ulo];
187 }
188 else
189 {
190 data = [ulo, uhi];
191 }
192 }
193 else static if (BigDigit.sizeof == long.sizeof)
194 {
195 data = [u];
196 }
197 }
198 }
199 void opAssign(Tdummy = void)(BigUint y) pure nothrow @nogc @safe
200 {
201 this.data = y.data;
202 }
203
204 ///
205 int opCmp(Tdummy = void)(const BigUint y) pure nothrow @nogc const @safe
206 {
207 if (data.length != y.data.length)
208 return (data.length > y.data.length) ? 1 : -1;
209 size_t k = highestDifferentDigit(data, y.data);
210 if (data[k] == y.data[k])
211 return 0;
212 return data[k] > y.data[k] ? 1 : -1;
213 }
214
215 ///
216 int opCmp(Tulong)(Tulong y) pure nothrow @nogc const @safe if (is (Tulong == ulong))
217 {
218 if (data.length > maxBigDigits!Tulong)
219 return 1;
220
221 foreach_reverse (i; 0 .. maxBigDigits!Tulong)
222 {
223 BigDigit tmp = cast(BigDigit)(y>>(i*BigDigitBits));
224 if (tmp == 0)
225 if (data.length >= i+1)
226 {
227 // Since ZERO is [0], so we cannot simply return 1 here, as
228 // data[i] would be 0 for i == 0 in that case.
229 return (data[i] > 0) ? 1 : 0;
230 }
231 else
232 continue;
233 else
234 if (i+1 > data.length)
235 return -1;
236 else if (tmp != data[i])
237 return data[i] > tmp ? 1 : -1;
238 }
239 return 0;
240 }
241
242 bool opEquals(Tdummy = void)(ref const BigUint y) pure nothrow @nogc const @safe
243 {
244 return y.data[] == data[];
245 }
246
247 bool opEquals(Tdummy = void)(ulong y) pure nothrow @nogc const @safe
248 {
249 if (data.length > 2)
250 return false;
251 uint ylo = cast(uint)(y & 0xFFFF_FFFF);
252 uint yhi = cast(uint)(y >> 32);
253 if (data.length == 2 && data[1]!=yhi)
254 return false;
255 if (data.length == 1 && yhi != 0)
256 return false;
257 return (data[0] == ylo);
258 }
259
isZeroBigUint260 bool isZero() pure const nothrow @safe @nogc
261 {
262 return data.length == 1 && data[0] == 0;
263 }
264
numBytesBigUint265 size_t numBytes() pure nothrow const @safe @nogc
266 {
267 return data.length * BigDigit.sizeof;
268 }
269
270 // the extra bytes are added to the start of the string
toDecimalStringBigUint271 char [] toDecimalString(int frontExtraBytes) const pure nothrow
272 {
273 immutable predictlength = 20+20*(data.length/2); // just over 19
274 char [] buff = new char[frontExtraBytes + predictlength];
275 ptrdiff_t sofar = biguintToDecimal(buff, data.dup);
276 return buff[sofar-frontExtraBytes..$];
277 }
278
279 /** Convert to a hex string, printing a minimum number of digits 'minPadding',
280 * allocating an additional 'frontExtraBytes' at the start of the string.
281 * Padding is done with padChar, which may be '0' or ' '.
282 * 'separator' is a digit separation character. If non-zero, it is inserted
283 * between every 8 digits.
284 * Separator characters do not contribute to the minPadding.
285 */
286 char [] toHexString(int frontExtraBytes, char separator = 0,
287 int minPadding=0, char padChar = '0',
288 LetterCase letterCase = LetterCase.upper) const pure nothrow @safe
289 {
290 // Calculate number of extra padding bytes
291 size_t extraPad = (minPadding > data.length * 2 * BigDigit.sizeof)
292 ? minPadding - data.length * 2 * BigDigit.sizeof : 0;
293
294 // Length not including separator bytes
295 size_t lenBytes = data.length * 2 * BigDigit.sizeof;
296
297 // Calculate number of separator bytes
298 size_t mainSeparatorBytes = separator ? (lenBytes / 8) - 1 : 0;
299 immutable totalSeparatorBytes = separator ? ((extraPad + lenBytes + 7) / 8) - 1: 0;
300
301 char [] buff = new char[lenBytes + extraPad + totalSeparatorBytes + frontExtraBytes];
302 biguintToHex(buff[$ - lenBytes - mainSeparatorBytes .. $], data, separator, letterCase);
303 if (extraPad > 0)
304 {
305 if (separator)
306 {
307 size_t start = frontExtraBytes; // first index to pad
308 if (extraPad &7)
309 {
310 // Do 1 to 7 extra zeros.
311 buff[frontExtraBytes .. frontExtraBytes + (extraPad & 7)] = padChar;
312 buff[frontExtraBytes + (extraPad & 7)] = (padChar == ' ' ? ' ' : separator);
313 start += (extraPad & 7) + 1;
314 }
315 for (int i=0; i< (extraPad >> 3); ++i)
316 {
317 buff[start .. start + 8] = padChar;
318 buff[start + 8] = (padChar == ' ' ? ' ' : separator);
319 start += 9;
320 }
321 }
322 else
323 {
324 buff[frontExtraBytes .. frontExtraBytes + extraPad]=padChar;
325 }
326 }
327 int z = frontExtraBytes;
328 if (lenBytes > minPadding)
329 {
330 // Strip leading zeros.
331 ptrdiff_t maxStrip = lenBytes - minPadding;
332 while (z< buff.length-1 && (buff[z]=='0' || buff[z]==padChar) && maxStrip>0)
333 {
334 ++z;
335 --maxStrip;
336 }
337 }
338 if (padChar!='0')
339 {
340 // Convert leading zeros into padChars.
341 for (size_t k= z; k< buff.length-1 && (buff[k]=='0' || buff[k]==padChar); ++k)
342 {
343 if (buff[k]=='0') buff[k]=padChar;
344 }
345 }
346 return buff[z-frontExtraBytes..$];
347 }
348
349 /**
350 * Convert to an octal string.
351 */
toOctalStringBigUint352 char[] toOctalString() const
353 {
354 auto predictLength = 1 + data.length*BigDigitBits / 3;
355 char[] buff = new char[predictLength];
356 size_t firstNonZero = biguintToOctal(buff, data);
357 return buff[firstNonZero .. $];
358 }
359
360 // return false if invalid character found
361 bool fromHexString(Range)(Range s) if (
362 isBidirectionalRange!Range && isSomeChar!(ElementType!Range))
363 {
364 import std.range : walkLength;
365
366 //Strip leading zeros
367 while (!s.empty && s.front == '0')
368 s.popFront;
369
370 if (s.empty)
371 {
372 data = ZERO;
373 return true;
374 }
375
376 immutable len = (s.save.walkLength + 15) / 4;
377 auto tmp = new BigDigit[len + 1];
378 uint part, sofar, partcount;
379
foreach_reverseBigUint380 foreach_reverse (character; s)
381 {
382 if (character == '_')
383 continue;
384
385 uint x;
386 if (character >= '0' && character <= '9')
387 {
388 x = character - '0';
389 }
390 else if (character >= 'A' && character <= 'F')
391 {
392 x = character - 'A' + 10;
393 }
394 else if (character >= 'a' && character <= 'f')
395 {
396 x = character - 'a' + 10;
397 }
398 else
399 {
400 return false;
401 }
402
403 part >>= 4;
404 part |= (x << (32 - 4));
405 ++partcount;
406
407 if (partcount == 8)
408 {
409 tmp[sofar] = part;
410 ++sofar;
411 partcount = 0;
412 part = 0;
413 }
414 }
415 if (part)
416 {
417 for ( ; partcount != 8; ++partcount) part >>= 4;
418 tmp[sofar] = part;
419 ++sofar;
420 }
421 if (sofar == 0)
422 data = ZERO;
423 else
424 data = trustedAssumeUnique(tmp[0 .. sofar]);
425
426 return true;
427 }
428
429 // return true if OK; false if erroneous characters found
430 bool fromDecimalString(Range)(Range s) if (
431 isForwardRange!Range && isSomeChar!(ElementType!Range))
432 {
433 import std.range : walkLength;
434
435 while (!s.empty && s.front == '0')
436 {
437 s.popFront;
438 }
439
440 if (s.empty)
441 {
442 data = ZERO;
443 return true;
444 }
445
446 auto predict_length = (18 * 2 + 2 * s.save.walkLength) / 19;
447 auto tmp = new BigDigit[predict_length];
448
449 tmp.length = biguintFromDecimal(tmp, s);
450
451 data = trustedAssumeUnique(tmp);
452 return true;
453 }
454
455 ////////////////////////
456 //
457 // All of these member functions create a new BigUint.
458
459 // return x >> y
460 BigUint opShr(Tulong)(Tulong y) pure nothrow const if (is (Tulong == ulong))
461 {
462 assert(y>0);
463 uint bits = cast(uint) y & BIGDIGITSHIFTMASK;
464 if ((y >> LG2BIGDIGITBITS) >= data.length) return BigUint(ZERO);
465 uint words = cast(uint)(y >> LG2BIGDIGITBITS);
466 if (bits == 0)
467 {
468 return BigUint(data[words..$]);
469 }
470 else
471 {
472 uint [] result = new BigDigit[data.length - words];
473 multibyteShr(result, data[words..$], bits);
474
475 if (result.length > 1 && result[$-1] == 0)
476 return BigUint(trustedAssumeUnique(result[0 .. $-1]));
477 else
478 return BigUint(trustedAssumeUnique(result));
479 }
480 }
481
482 // return x << y
483 BigUint opShl(Tulong)(Tulong y) pure nothrow const if (is (Tulong == ulong))
484 {
485 assert(y>0);
486 if (isZero()) return this;
487 uint bits = cast(uint) y & BIGDIGITSHIFTMASK;
488 assert((y >> LG2BIGDIGITBITS) < cast(ulong)(uint.max));
489 uint words = cast(uint)(y >> LG2BIGDIGITBITS);
490 BigDigit [] result = new BigDigit[data.length + words+1];
491 result[0 .. words] = 0;
492 if (bits == 0)
493 {
494 result[words .. words+data.length] = data[];
495 return BigUint(trustedAssumeUnique(result[0 .. words+data.length]));
496 }
497 else
498 {
499 immutable c = multibyteShl(result[words .. words+data.length], data, bits);
500 if (c == 0) return BigUint(trustedAssumeUnique(result[0 .. words+data.length]));
501 result[$-1] = c;
502 return BigUint(trustedAssumeUnique(result));
503 }
504 }
505
506 // If wantSub is false, return x + y, leaving sign unchanged
507 // If wantSub is true, return abs(x - y), negating sign if x < y
508 static BigUint addOrSubInt(Tulong)(const BigUint x, Tulong y,
509 bool wantSub, ref bool sign) pure nothrow if (is(Tulong == ulong))
510 {
511 BigUint r;
512 if (wantSub)
513 { // perform a subtraction
514 if (x.data.length > 2)
515 {
516 r.data = subInt(x.data, y);
517 }
518 else
519 { // could change sign!
520 ulong xx = x.data[0];
521 if (x.data.length > 1)
522 xx += (cast(ulong) x.data[1]) << 32;
523 ulong d;
524 if (xx <= y)
525 {
526 d = y - xx;
527 sign = !sign;
528 }
529 else
530 {
531 d = xx - y;
532 }
533 if (d == 0)
534 {
535 r = 0UL;
536 sign = false;
537 return r;
538 }
539 if (d > uint.max)
540 {
541 r.data = [cast(uint)(d & 0xFFFF_FFFF), cast(uint)(d >> 32)];
542 }
543 else
544 {
545 r.data = [cast(uint)(d & 0xFFFF_FFFF)];
546 }
547 }
548 }
549 else
550 {
551 r.data = addInt(x.data, y);
552 }
553 return r;
554 }
555
556 // If wantSub is false, return x + y, leaving sign unchanged.
557 // If wantSub is true, return abs(x - y), negating sign if x<y
addOrSubBigUint558 static BigUint addOrSub(BigUint x, BigUint y, bool wantSub, bool *sign)
559 pure nothrow
560 {
561 BigUint r;
562 if (wantSub)
563 { // perform a subtraction
564 bool negative;
565 r.data = sub(x.data, y.data, &negative);
566 *sign ^= negative;
567 if (r.isZero())
568 {
569 *sign = false;
570 }
571 }
572 else
573 {
574 r.data = add(x.data, y.data);
575 }
576 return r;
577 }
578
579
580 // return x*y.
581 // y must not be zero.
582 static BigUint mulInt(T = ulong)(BigUint x, T y) pure nothrow
583 {
584 if (y == 0 || x == 0) return BigUint(ZERO);
585 uint hi = cast(uint)(y >>> 32);
586 uint lo = cast(uint)(y & 0xFFFF_FFFF);
587 uint [] result = new BigDigit[x.data.length+1+(hi != 0)];
588 result[x.data.length] = multibyteMul(result[0 .. x.data.length], x.data, lo, 0);
589 if (hi != 0)
590 {
591 result[x.data.length+1] = multibyteMulAdd!('+')(result[1 .. x.data.length+1],
592 x.data, hi, 0);
593 }
594 return BigUint(removeLeadingZeros(trustedAssumeUnique(result)));
595 }
596
597 /* return x * y.
598 */
mulBigUint599 static BigUint mul(BigUint x, BigUint y) pure nothrow
600 {
601 if (y == 0 || x == 0)
602 return BigUint(ZERO);
603 auto len = x.data.length + y.data.length;
604 BigDigit [] result = new BigDigit[len];
605 if (y.data.length > x.data.length)
606 {
607 mulInternal(result, y.data, x.data);
608 }
609 else
610 {
611 if (x.data[]==y.data[]) squareInternal(result, x.data);
612 else mulInternal(result, x.data, y.data);
613 }
614 // the highest element could be zero,
615 // in which case we need to reduce the length
616 return BigUint(removeLeadingZeros(trustedAssumeUnique(result)));
617 }
618
619 // return x / y
620 static BigUint divInt(T)(BigUint x, T y_) pure nothrow
621 if ( is(Unqual!T == uint) )
622 {
623 uint y = y_;
624 if (y == 1)
625 return x;
626 uint [] result = new BigDigit[x.data.length];
627 if ((y&(-y))==y)
628 {
629 assert(y != 0, "BigUint division by zero");
630 // perfect power of 2
631 uint b = 0;
632 for (;y != 1; y>>=1)
633 {
634 ++b;
635 }
636 multibyteShr(result, x.data, b);
637 }
638 else
639 {
640 result[] = x.data[];
641 cast(void) multibyteDivAssign(result, y, 0);
642 }
643 return BigUint(removeLeadingZeros(trustedAssumeUnique(result)));
644 }
645
646 static BigUint divInt(T)(BigUint x, T y) pure nothrow
647 if ( is(Unqual!T == ulong) )
648 {
649 if (y <= uint.max)
650 return divInt!uint(x, cast(uint) y);
651 if (x.data.length < 2)
652 return BigUint(ZERO);
653 uint hi = cast(uint)(y >>> 32);
654 uint lo = cast(uint)(y & 0xFFFF_FFFF);
655 immutable uint[2] z = [lo, hi];
656 BigDigit[] result = new BigDigit[x.data.length - z.length + 1];
657 divModInternal(result, null, x.data, z[]);
658 return BigUint(removeLeadingZeros(trustedAssumeUnique(result)));
659 }
660
661 // return x % y
662 static uint modInt(T)(BigUint x, T y_) pure if ( is(Unqual!T == uint) )
663 {
664 import core.memory : GC;
665 uint y = y_;
666 assert(y != 0);
667 if ((y&(-y)) == y)
668 { // perfect power of 2
669 return x.data[0] & (y-1);
670 }
671 else
672 {
673 // horribly inefficient - malloc, copy, & store are unnecessary.
674 uint [] wasteful = new BigDigit[x.data.length];
675 wasteful[] = x.data[];
676 immutable rem = multibyteDivAssign(wasteful, y, 0);
677 () @trusted { GC.free(wasteful.ptr); } ();
678 return rem;
679 }
680 }
681
682 // return x / y
divBigUint683 static BigUint div(BigUint x, BigUint y) pure nothrow
684 {
685 if (y.data.length > x.data.length)
686 return BigUint(ZERO);
687 if (y.data.length == 1)
688 return divInt(x, y.data[0]);
689 BigDigit [] result = new BigDigit[x.data.length - y.data.length + 1];
690 divModInternal(result, null, x.data, y.data);
691 return BigUint(removeLeadingZeros(trustedAssumeUnique(result)));
692 }
693
694 // return x % y
modBigUint695 static BigUint mod(BigUint x, BigUint y) pure nothrow
696 {
697 if (y.data.length > x.data.length) return x;
698 if (y.data.length == 1)
699 {
700 return BigUint([modInt(x, y.data[0])]);
701 }
702 BigDigit [] result = new BigDigit[x.data.length - y.data.length + 1];
703 BigDigit [] rem = new BigDigit[y.data.length];
704 divModInternal(result, rem, x.data, y.data);
705 return BigUint(removeLeadingZeros(trustedAssumeUnique(rem)));
706 }
707
708 // return x op y
709 static BigUint bitwiseOp(string op)(BigUint x, BigUint y, bool xSign, bool ySign, ref bool resultSign)
710 pure nothrow @safe if (op == "|" || op == "^" || op == "&")
711 {
712 auto d1 = includeSign(x.data, y.uintLength, xSign);
713 auto d2 = includeSign(y.data, x.uintLength, ySign);
714
715 foreach (i; 0 .. d1.length)
716 {
717 mixin("d1[i] " ~ op ~ "= d2[i];");
718 }
719
720 mixin("resultSign = xSign " ~ op ~ " ySign;");
721
722 if (resultSign)
723 {
724 twosComplement(d1, d1);
725 }
726
727 return BigUint(removeLeadingZeros(trustedAssumeUnique(d1)));
728 }
729
730 /**
731 * Return a BigUint which is x raised to the power of y.
732 * Method: Powers of 2 are removed from x, then left-to-right binary
733 * exponentiation is used.
734 * Memory allocation is minimized: at most one temporary BigUint is used.
735 */
powBigUint736 static BigUint pow(BigUint x, ulong y) pure nothrow
737 {
738 // Deal with the degenerate cases first.
739 if (y == 0) return BigUint(ONE);
740 if (y == 1) return x;
741 if (x == 0 || x == 1) return x;
742
743 BigUint result;
744
745 // Simplify, step 1: Remove all powers of 2.
746 uint firstnonzero = firstNonZeroDigit(x.data);
747 // Now we know x = x[firstnonzero..$] * (2^^(firstnonzero*BigDigitBits))
748 // where BigDigitBits = BigDigit.sizeof * 8
749
750 // See if x[firstnonzero..$] can now fit into a single digit.
751 bool singledigit = ((x.data.length - firstnonzero) == 1);
752 // If true, then x0 is that digit
753 // and the result will be (x0 ^^ y) * (2^^(firstnonzero*y*BigDigitBits))
754 BigDigit x0 = x.data[firstnonzero];
755 assert(x0 != 0);
756 // Length of the non-zero portion
757 size_t nonzerolength = x.data.length - firstnonzero;
758 ulong y0;
759 uint evenbits = 0; // number of even bits in the bottom of x
760 while (!(x0 & 1))
761 {
762 x0 >>= 1;
763 ++evenbits;
764 }
765
766 if (x.data.length- firstnonzero == 2)
767 {
768 // Check for a single digit straddling a digit boundary
769 const BigDigit x1 = x.data[firstnonzero+1];
770 if ((x1 >> evenbits) == 0)
771 {
772 x0 |= (x1 << (BigDigit.sizeof * 8 - evenbits));
773 singledigit = true;
774 }
775 }
776 // Now if (singledigit), x^^y = (x0 ^^ y) * 2^^(evenbits * y) * 2^^(firstnonzero*y*BigDigitBits))
777
778 uint evenshiftbits = 0; // Total powers of 2 to shift by, at the end
779
780 // Simplify, step 2: For singledigits, see if we can trivially reduce y
781
782 BigDigit finalMultiplier = 1UL;
783
784 if (singledigit)
785 {
786 // x fits into a single digit. Raise it to the highest power we can
787 // that still fits into a single digit, then reduce the exponent accordingly.
788 // We're quite likely to have a residual multiply at the end.
789 // For example, 10^^100 = (((5^^13)^^7) * 5^^9) * 2^^100.
790 // and 5^^13 still fits into a uint.
791 evenshiftbits = cast(uint)( (evenbits * y) & BIGDIGITSHIFTMASK);
792 if (x0 == 1)
793 { // Perfect power of 2
794 result = 1UL;
795 return result << (evenbits + firstnonzero * 8 * BigDigit.sizeof) * y;
796 }
797 immutable p = highestPowerBelowUintMax(x0);
798 if (y <= p)
799 { // Just do it with pow
800 result = cast(ulong) intpow(x0, y);
801 if (evenbits + firstnonzero == 0)
802 return result;
803 return result << (evenbits + firstnonzero * 8 * BigDigit.sizeof) * y;
804 }
805 y0 = y / p;
806 finalMultiplier = intpow(x0, y - y0*p);
807 x0 = intpow(x0, p);
808 // Result is x0
809 nonzerolength = 1;
810 }
811 // Now if (singledigit), x^^y = finalMultiplier * (x0 ^^ y0) * 2^^(evenbits * y) * 2^^(firstnonzero*y*BigDigitBits))
812
813 // Perform a crude check for overflow and allocate result buffer.
814 // The length required is y * lg2(x) bits.
815 // which will always fit into y*x.length digits. But this is
816 // a gross overestimate if x is small (length 1 or 2) and the highest
817 // digit is nearly empty.
818 // A better estimate is:
819 // y * lg2(x[$-1]/BigDigit.max) + y * (x.length - 1) digits,
820 // and the first term is always between
821 // y * (bsr(x.data[$-1]) + 1) / BIGDIGITBITS and
822 // y * (bsr(x.data[$-1]) + 2) / BIGDIGITBITS
823 // For single digit payloads, we already have
824 // x^^y = finalMultiplier * (x0 ^^ y0) * 2^^(evenbits * y) * 2^^(firstnonzero*y*BigDigitBits))
825 // and x0 is almost a full digit, so it's a tight estimate.
826 // Number of digits is therefore 1 + x0.length*y0 + (evenbits*y)/BIGDIGIT + firstnonzero*y
827 // Note that the divisions must be rounded up.
828
829 // Estimated length in BigDigits
830 immutable estimatelength = singledigit
831 ? 1 + y0 + ((evenbits*y + BigDigit.sizeof * 8 - 1) / (BigDigit.sizeof *8)) + firstnonzero*y
832 : x.data.length * y;
833 // Imprecise check for overflow. Makes the extreme cases easier to debug
834 // (less extreme overflow will result in an out of memory error).
835 if (estimatelength > uint.max/(4*BigDigit.sizeof))
836 assert(0, "Overflow in BigInt.pow");
837
838 // The result buffer includes space for all the trailing zeros
839 BigDigit [] resultBuffer = new BigDigit[cast(size_t) estimatelength];
840
841 // Do all the powers of 2!
842 size_t result_start = cast(size_t)( firstnonzero * y
843 + (singledigit ? ((evenbits * y) >> LG2BIGDIGITBITS) : 0));
844
845 resultBuffer[0 .. result_start] = 0;
846 BigDigit [] t1 = resultBuffer[result_start..$];
847 BigDigit [] r1;
848
849 if (singledigit)
850 {
851 r1 = t1[0 .. 1];
852 r1[0] = x0;
853 y = y0;
854 }
855 else
856 {
857 // It's not worth right shifting by evenbits unless we also shrink the length after each
858 // multiply or squaring operation. That might still be worthwhile for large y.
859 r1 = t1[0 .. x.data.length - firstnonzero];
860 r1[0..$] = x.data[firstnonzero..$];
861 }
862
863 if (y>1)
864 { // Set r1 = r1 ^^ y.
865 // The secondary buffer only needs space for the multiplication results
866 BigDigit [] t2 = new BigDigit[resultBuffer.length - result_start];
867 BigDigit [] r2;
868
869 int shifts = 63; // num bits in a long
870 while (!(y & 0x8000_0000_0000_0000L))
871 {
872 y <<= 1;
873 --shifts;
874 }
875 y <<=1;
876
877 while (y != 0)
878 {
879 // For each bit of y: Set r1 = r1 * r1
880 // If the bit is 1, set r1 = r1 * x
881 // Eg, if y is 0b101, result = ((x^^2)^^2)*x == x^^5.
882 // Optimization opportunity: if more than 2 bits in y are set,
883 // it's usually possible to reduce the number of multiplies
884 // by caching odd powers of x. eg for y = 54,
885 // (0b110110), set u = x^^3, and result is ((u^^8)*u)^^2
886 r2 = t2[0 .. r1.length*2];
887 squareInternal(r2, r1);
888 if (y & 0x8000_0000_0000_0000L)
889 {
890 r1 = t1[0 .. r2.length + nonzerolength];
891 if (singledigit)
892 {
893 r1[$-1] = multibyteMul(r1[0 .. $-1], r2, x0, 0);
894 }
895 else
896 {
897 mulInternal(r1, r2, x.data[firstnonzero..$]);
898 }
899 }
900 else
901 {
902 r1 = t1[0 .. r2.length];
903 r1[] = r2[];
904 }
905 y <<=1;
906 shifts--;
907 }
908 while (shifts>0)
909 {
910 r2 = t2[0 .. r1.length * 2];
911 squareInternal(r2, r1);
912 r1 = t1[0 .. r2.length];
913 r1[] = r2[];
914 --shifts;
915 }
916 }
917
918 if (finalMultiplier != 1)
919 {
920 const BigDigit carry = multibyteMul(r1, r1, finalMultiplier, 0);
921 if (carry)
922 {
923 r1 = t1[0 .. r1.length + 1];
924 r1[$-1] = carry;
925 }
926 }
927 if (evenshiftbits)
928 {
929 const BigDigit carry = multibyteShl(r1, r1, evenshiftbits);
930 if (carry != 0)
931 {
932 r1 = t1[0 .. r1.length + 1];
933 r1[$ - 1] = carry;
934 }
935 }
936 while (r1[$ - 1]==0)
937 {
938 r1=r1[0 .. $ - 1];
939 }
940 return BigUint(trustedAssumeUnique(resultBuffer[0 .. result_start + r1.length]));
941 }
942
943 // Implement toHash so that BigUint works properly as an AA key.
toHashBigUint944 size_t toHash() const @trusted nothrow
945 {
946 return typeid(data).getHash(&data);
947 }
948
949 } // end BigUint
950
951 @safe pure nothrow unittest
952 {
953 // ulong comparison test
954 BigUint a = [1];
955 assert(a == 1);
956 assert(a < 0x8000_0000_0000_0000UL); // bug 9548
957
958 // bug 12234
959 BigUint z = [0];
960 assert(z == 0UL);
961 assert(!(z > 0UL));
962 assert(!(z < 0UL));
963 }
964
965 // Remove leading zeros from x, to restore the BigUint invariant
inout(BigDigit)966 inout(BigDigit) [] removeLeadingZeros(inout(BigDigit) [] x) pure nothrow @safe
967 {
968 size_t k = x.length;
969 while (k>1 && x[k - 1]==0) --k;
970 return x[0 .. k];
971 }
972
973 pure @system unittest
974 {
975 BigUint r = BigUint([5]);
976 BigUint t = BigUint([7]);
977 BigUint s = BigUint.mod(r, t);
978 assert(s == 5);
979 }
980
981
982 @safe pure unittest
983 {
984 BigUint r;
985 r = 5UL;
986 assert(r.peekUlong(0) == 5UL);
987 assert(r.peekUint(0) == 5U);
988 r = 0x1234_5678_9ABC_DEF0UL;
989 assert(r.peekUlong(0) == 0x1234_5678_9ABC_DEF0UL);
990 assert(r.peekUint(0) == 0x9ABC_DEF0U);
991 }
992
993
994 // Pow tests
995 pure @system unittest
996 {
997 BigUint r, s;
998 r.fromHexString("80000000_00000001");
999 s = BigUint.pow(r, 5);
1000 r.fromHexString("08000000_00000000_50000000_00000001_40000000_00000002_80000000"
1001 ~ "_00000002_80000000_00000001");
1002 assert(s == r);
1003 s = 10UL;
1004 s = BigUint.pow(s, 39);
1005 r.fromDecimalString("1000000000000000000000000000000000000000");
1006 assert(s == r);
1007 r.fromHexString("1_E1178E81_00000000");
1008 s = BigUint.pow(r, 15); // Regression test: this used to overflow array bounds
1009
1010 r.fromDecimalString("000_000_00");
1011 assert(r == 0);
1012 r.fromDecimalString("0007");
1013 assert(r == 7);
1014 r.fromDecimalString("0");
1015 assert(r == 0);
1016 }
1017
1018 // Radix conversion tests
1019 @safe pure unittest
1020 {
1021 BigUint r;
1022 r.fromHexString("1_E1178E81_00000000");
1023 assert(r.toHexString(0, '_', 0) == "1_E1178E81_00000000");
1024 assert(r.toHexString(0, '_', 20) == "0001_E1178E81_00000000");
1025 assert(r.toHexString(0, '_', 16+8) == "00000001_E1178E81_00000000");
1026 assert(r.toHexString(0, '_', 16+9) == "0_00000001_E1178E81_00000000");
1027 assert(r.toHexString(0, '_', 16+8+8) == "00000000_00000001_E1178E81_00000000");
1028 assert(r.toHexString(0, '_', 16+8+8+1) == "0_00000000_00000001_E1178E81_00000000");
1029 assert(r.toHexString(0, '_', 16+8+8+1, ' ') == " 1_E1178E81_00000000");
1030 assert(r.toHexString(0, 0, 16+8+8+1) == "00000000000000001E1178E8100000000");
1031 r = 0UL;
1032 assert(r.toHexString(0, '_', 0) == "0");
1033 assert(r.toHexString(0, '_', 7) == "0000000");
1034 assert(r.toHexString(0, '_', 7, ' ') == " 0");
1035 assert(r.toHexString(0, '#', 9) == "0#00000000");
1036 assert(r.toHexString(0, 0, 9) == "000000000");
1037 }
1038
1039 //
1040 @safe pure unittest
1041 {
1042 BigUint r;
1043 r.fromHexString("1_E1178E81_00000000");
1044 assert(r.toHexString(0, '_', 0, '0', LetterCase.lower) == "1_e1178e81_00000000");
1045 assert(r.toHexString(0, '_', 20, '0', LetterCase.lower) == "0001_e1178e81_00000000");
1046 assert(r.toHexString(0, '_', 16+8, '0', LetterCase.lower) == "00000001_e1178e81_00000000");
1047 assert(r.toHexString(0, '_', 16+9, '0', LetterCase.lower) == "0_00000001_e1178e81_00000000");
1048 assert(r.toHexString(0, '_', 16+8+8, '0', LetterCase.lower) == "00000000_00000001_e1178e81_00000000");
1049 assert(r.toHexString(0, '_', 16+8+8+1, '0', LetterCase.lower) == "0_00000000_00000001_e1178e81_00000000");
1050 assert(r.toHexString(0, '_', 16+8+8+1, ' ', LetterCase.lower) == " 1_e1178e81_00000000");
1051 assert(r.toHexString(0, 0, 16+8+8+1, '0', LetterCase.lower) == "00000000000000001e1178e8100000000");
1052 r = 0UL;
1053 assert(r.toHexString(0, '_', 0, '0', LetterCase.lower) == "0");
1054 assert(r.toHexString(0, '_', 7, '0', LetterCase.lower) == "0000000");
1055 assert(r.toHexString(0, '_', 7, ' ', LetterCase.lower) == " 0");
1056 assert(r.toHexString(0, '#', 9, '0', LetterCase.lower) == "0#00000000");
1057 assert(r.toHexString(0, 'Z', 9, '0', LetterCase.lower) == "0Z00000000");
1058 assert(r.toHexString(0, 0, 9, '0', LetterCase.lower) == "000000000");
1059 }
1060
1061
1062 private:
twosComplement(const (BigDigit)[]x,BigDigit[]result)1063 void twosComplement(const(BigDigit) [] x, BigDigit[] result)
1064 pure nothrow @safe
1065 {
1066 foreach (i; 0 .. x.length)
1067 {
1068 result[i] = ~x[i];
1069 }
1070 result[x.length..$] = BigDigit.max;
1071
1072 foreach (i; 0 .. result.length)
1073 {
1074 if (result[i] == BigDigit.max)
1075 {
1076 result[i] = 0;
1077 }
1078 else
1079 {
1080 result[i] += 1;
1081 break;
1082 }
1083 }
1084 }
1085
1086 // Encode BigInt as BigDigit array (sign and 2's complement)
includeSign(const (BigDigit)[]x,size_t minSize,bool sign)1087 BigDigit[] includeSign(const(BigDigit) [] x, size_t minSize, bool sign)
1088 pure nothrow @safe
1089 {
1090 size_t length = (x.length > minSize) ? x.length : minSize;
1091 BigDigit [] result = new BigDigit[length];
1092 if (sign)
1093 {
1094 twosComplement(x, result);
1095 }
1096 else
1097 {
1098 result[0 .. x.length] = x;
1099 }
1100 return result;
1101 }
1102
1103 // works for any type
intpow(T)1104 T intpow(T)(T x, ulong n) pure nothrow @safe
1105 {
1106 T p;
1107
1108 switch (n)
1109 {
1110 case 0:
1111 p = 1;
1112 break;
1113
1114 case 1:
1115 p = x;
1116 break;
1117
1118 case 2:
1119 p = x * x;
1120 break;
1121
1122 default:
1123 p = 1;
1124 while (1)
1125 {
1126 if (n & 1)
1127 p *= x;
1128 n >>= 1;
1129 if (!n)
1130 break;
1131 x *= x;
1132 }
1133 break;
1134 }
1135 return p;
1136 }
1137
1138
1139 // returns the maximum power of x that will fit in a uint.
highestPowerBelowUintMax(uint x)1140 int highestPowerBelowUintMax(uint x) pure nothrow @safe
1141 {
1142 assert(x>1);
1143 static immutable ubyte [22] maxpwr = [ 31, 20, 15, 13, 12, 11, 10, 10, 9, 9,
1144 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7];
1145 if (x<24) return maxpwr[x-2];
1146 if (x<41) return 6;
1147 if (x<85) return 5;
1148 if (x<256) return 4;
1149 if (x<1626) return 3;
1150 if (x<65_536) return 2;
1151 return 1;
1152 }
1153
1154 // returns the maximum power of x that will fit in a ulong.
highestPowerBelowUlongMax(uint x)1155 int highestPowerBelowUlongMax(uint x) pure nothrow @safe
1156 {
1157 assert(x>1);
1158 static immutable ubyte [39] maxpwr = [ 63, 40, 31, 27, 24, 22, 21, 20, 19, 18,
1159 17, 17, 16, 16, 15, 15, 15, 15, 14, 14,
1160 14, 14, 13, 13, 13, 13, 13, 13, 13, 12,
1161 12, 12, 12, 12, 12, 12, 12, 12, 12];
1162 if (x<41) return maxpwr[x-2];
1163 if (x<57) return 11;
1164 if (x<85) return 10;
1165 if (x<139) return 9;
1166 if (x<256) return 8;
1167 if (x<566) return 7;
1168 if (x<1626) return 6;
1169 if (x<7132) return 5;
1170 if (x<65_536) return 4;
1171 if (x<2_642_246) return 3;
1172 return 2;
1173 }
1174
version(unittest)1175 version (unittest)
1176 {
1177
1178 int slowHighestPowerBelowUintMax(uint x) pure nothrow @safe
1179 {
1180 int pwr = 1;
1181 for (ulong q = x;x*q < cast(ulong) uint.max; )
1182 {
1183 q*=x; ++pwr;
1184 }
1185 return pwr;
1186 }
1187
1188 @safe pure unittest
1189 {
1190 assert(highestPowerBelowUintMax(10)==9);
1191 for (int k=82; k<88; ++k)
1192 {
1193 assert(highestPowerBelowUintMax(k)== slowHighestPowerBelowUintMax(k));
1194 }
1195 }
1196
1197 }
1198
1199
1200 /* General unsigned subtraction routine for bigints.
1201 * Sets result = x - y. If the result is negative, negative will be true.
1202 */
sub(const BigDigit[]x,const BigDigit[]y,bool * negative)1203 BigDigit [] sub(const BigDigit [] x, const BigDigit [] y, bool *negative)
1204 pure nothrow
1205 {
1206 if (x.length == y.length)
1207 {
1208 // There's a possibility of cancellation, if x and y are almost equal.
1209 ptrdiff_t last = highestDifferentDigit(x, y);
1210 BigDigit [] result = new BigDigit[last+1];
1211 if (x[last] < y[last])
1212 { // we know result is negative
1213 multibyteSub(result[0 .. last+1], y[0 .. last+1], x[0 .. last+1], 0);
1214 *negative = true;
1215 }
1216 else
1217 { // positive or zero result
1218 multibyteSub(result[0 .. last+1], x[0 .. last+1], y[0 .. last+1], 0);
1219 *negative = false;
1220 }
1221 while (result.length > 1 && result[$-1] == 0)
1222 {
1223 result = result[0..$-1];
1224 }
1225 // if (result.length >1 && result[$-1]==0) return result[0..$-1];
1226 return result;
1227 }
1228 // Lengths are different
1229 const(BigDigit) [] large, small;
1230 if (x.length < y.length)
1231 {
1232 *negative = true;
1233 large = y; small = x;
1234 }
1235 else
1236 {
1237 *negative = false;
1238 large = x; small = y;
1239 }
1240 // result.length will be equal to larger length, or could decrease by 1.
1241
1242
1243 BigDigit [] result = new BigDigit[large.length];
1244 BigDigit carry = multibyteSub(result[0 .. small.length], large[0 .. small.length], small, 0);
1245 result[small.length..$] = large[small.length..$];
1246 if (carry)
1247 {
1248 multibyteIncrementAssign!('-')(result[small.length..$], carry);
1249 }
1250 while (result.length > 1 && result[$-1] == 0)
1251 {
1252 result = result[0..$-1];
1253 }
1254 return result;
1255 }
1256
1257
1258 // return a + b
add(const BigDigit[]a,const BigDigit[]b)1259 BigDigit [] add(const BigDigit [] a, const BigDigit [] b) pure nothrow
1260 {
1261 const(BigDigit) [] x, y;
1262 if (a.length < b.length)
1263 {
1264 x = b; y = a;
1265 }
1266 else
1267 {
1268 x = a; y = b;
1269 }
1270 // now we know x.length > y.length
1271 // create result. add 1 in case it overflows
1272 BigDigit [] result = new BigDigit[x.length + 1];
1273
1274 BigDigit carry = multibyteAdd(result[0 .. y.length], x[0 .. y.length], y, 0);
1275 if (x.length != y.length)
1276 {
1277 result[y.length..$-1]= x[y.length..$];
1278 carry = multibyteIncrementAssign!('+')(result[y.length..$-1], carry);
1279 }
1280 if (carry)
1281 {
1282 result[$-1] = carry;
1283 return result;
1284 }
1285 else
1286 return result[0..$-1];
1287 }
1288
1289 /** return x + y
1290 */
addInt(const BigDigit[]x,ulong y)1291 BigDigit [] addInt(const BigDigit[] x, ulong y) pure nothrow
1292 {
1293 uint hi = cast(uint)(y >>> 32);
1294 uint lo = cast(uint)(y& 0xFFFF_FFFF);
1295 auto len = x.length;
1296 if (x.length < 2 && hi != 0) ++len;
1297 BigDigit [] result = new BigDigit[len+1];
1298 result[0 .. x.length] = x[];
1299 if (x.length < 2 && hi != 0)
1300 {
1301 result[1]=hi;
1302 hi=0;
1303 }
1304 uint carry = multibyteIncrementAssign!('+')(result[0..$-1], lo);
1305 if (hi != 0) carry += multibyteIncrementAssign!('+')(result[1..$-1], hi);
1306 if (carry)
1307 {
1308 result[$-1] = carry;
1309 return result;
1310 }
1311 else
1312 return result[0..$-1];
1313 }
1314
1315 /** Return x - y.
1316 * x must be greater than y.
1317 */
subInt(const BigDigit[]x,ulong y)1318 BigDigit [] subInt(const BigDigit[] x, ulong y) pure nothrow
1319 {
1320 uint hi = cast(uint)(y >>> 32);
1321 uint lo = cast(uint)(y & 0xFFFF_FFFF);
1322 BigDigit [] result = new BigDigit[x.length];
1323 result[] = x[];
1324 multibyteIncrementAssign!('-')(result[], lo);
1325 if (hi)
1326 multibyteIncrementAssign!('-')(result[1..$], hi);
1327 if (result[$-1] == 0)
1328 return result[0..$-1];
1329 else
1330 return result;
1331 }
1332
1333 /** General unsigned multiply routine for bigints.
1334 * Sets result = x * y.
1335 *
1336 * The length of y must not be larger than the length of x.
1337 * Different algorithms are used, depending on the lengths of x and y.
1338 * TODO: "Modern Computer Arithmetic" suggests the OddEvenKaratsuba algorithm for the
1339 * unbalanced case. (But I doubt it would be faster in practice).
1340 *
1341 */
mulInternal(BigDigit[]result,const (BigDigit)[]x,const (BigDigit)[]y)1342 void mulInternal(BigDigit[] result, const(BigDigit)[] x, const(BigDigit)[] y)
1343 pure nothrow
1344 {
1345 import core.memory : GC;
1346 assert( result.length == x.length + y.length );
1347 assert( y.length > 0 );
1348 assert( x.length >= y.length);
1349 if (y.length <= KARATSUBALIMIT)
1350 {
1351 // Small multiplier, we'll just use the asm classic multiply.
1352 if (y.length == 1)
1353 { // Trivial case, no cache effects to worry about
1354 result[x.length] = multibyteMul(result[0 .. x.length], x, y[0], 0);
1355 return;
1356 }
1357
1358 if (x.length + y.length < CACHELIMIT)
1359 return mulSimple(result, x, y);
1360
1361 // If x is so big that it won't fit into the cache, we divide it into chunks
1362 // Every chunk must be greater than y.length.
1363 // We make the first chunk shorter, if necessary, to ensure this.
1364
1365 auto chunksize = CACHELIMIT / y.length;
1366 immutable residual = x.length % chunksize;
1367 if (residual < y.length)
1368 {
1369 chunksize -= y.length;
1370 }
1371
1372 // Use schoolbook multiply.
1373 mulSimple(result[0 .. chunksize + y.length], x[0 .. chunksize], y);
1374 auto done = chunksize;
1375
1376 while (done < x.length)
1377 {
1378 // result[done .. done+ylength] already has a value.
1379 chunksize = (done + (CACHELIMIT / y.length) < x.length) ? (CACHELIMIT / y.length) : x.length - done;
1380 BigDigit [KARATSUBALIMIT] partial;
1381 partial[0 .. y.length] = result[done .. done+y.length];
1382 mulSimple(result[done .. done+chunksize+y.length], x[done .. done+chunksize], y);
1383 addAssignSimple(result[done .. done+chunksize + y.length], partial[0 .. y.length]);
1384 done += chunksize;
1385 }
1386 return;
1387 }
1388
1389 immutable half = (x.length >> 1) + (x.length & 1);
1390 if (2*y.length*y.length <= x.length*x.length)
1391 {
1392 // UNBALANCED MULTIPLY
1393 // Use school multiply to cut into quasi-squares of Karatsuba-size
1394 // or larger. The ratio of the two sides of the 'square' must be
1395 // between 1.414:1 and 1:1. Use Karatsuba on each chunk.
1396 //
1397 // For maximum performance, we want the ratio to be as close to
1398 // 1:1 as possible. To achieve this, we can either pad x or y.
1399 // The best choice depends on the modulus x%y.
1400 auto numchunks = x.length / y.length;
1401 auto chunksize = y.length;
1402 auto extra = x.length % y.length;
1403 auto maxchunk = chunksize + extra;
1404 bool paddingY; // true = we're padding Y, false = we're padding X.
1405 if (extra * extra * 2 < y.length*y.length)
1406 {
1407 // The leftover bit is small enough that it should be incorporated
1408 // in the existing chunks.
1409 // Make all the chunks a tiny bit bigger
1410 // (We're padding y with zeros)
1411 chunksize += extra / numchunks;
1412 extra = x.length - chunksize*numchunks;
1413 // there will probably be a few left over.
1414 // Every chunk will either have size chunksize, or chunksize+1.
1415 maxchunk = chunksize + 1;
1416 paddingY = true;
1417 assert(chunksize + extra + chunksize *(numchunks-1) == x.length );
1418 }
1419 else
1420 {
1421 // the extra bit is large enough that it's worth making a new chunk.
1422 // (This means we're padding x with zeros, when doing the first one).
1423 maxchunk = chunksize;
1424 ++numchunks;
1425 paddingY = false;
1426 assert(extra + chunksize *(numchunks-1) == x.length );
1427 }
1428 // We make the buffer a bit bigger so we have space for the partial sums.
1429 BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(maxchunk) + y.length];
1430 BigDigit [] partial = scratchbuff[$ - y.length .. $];
1431 size_t done; // how much of X have we done so far?
1432 if (paddingY)
1433 {
1434 // If the first chunk is bigger, do it first. We're padding y.
1435 mulKaratsuba(result[0 .. y.length + chunksize + (extra > 0 ? 1 : 0 )],
1436 x[0 .. chunksize + (extra>0?1:0)], y, scratchbuff);
1437 done = chunksize + (extra > 0 ? 1 : 0);
1438 if (extra) --extra;
1439 }
1440 else
1441 { // We're padding X. Begin with the extra bit.
1442 mulKaratsuba(result[0 .. y.length + extra], y, x[0 .. extra], scratchbuff);
1443 done = extra;
1444 extra = 0;
1445 }
1446 immutable basechunksize = chunksize;
1447 while (done < x.length)
1448 {
1449 chunksize = basechunksize + (extra > 0 ? 1 : 0);
1450 if (extra) --extra;
1451 partial[] = result[done .. done+y.length];
1452 mulKaratsuba(result[done .. done + y.length + chunksize],
1453 x[done .. done+chunksize], y, scratchbuff);
1454 addAssignSimple(result[done .. done + y.length + chunksize], partial);
1455 done += chunksize;
1456 }
1457 () @trusted { GC.free(scratchbuff.ptr); } ();
1458 }
1459 else
1460 {
1461 // Balanced. Use Karatsuba directly.
1462 BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(x.length)];
1463 mulKaratsuba(result, x, y, scratchbuff);
1464 () @trusted { GC.free(scratchbuff.ptr); } ();
1465 }
1466 }
1467
1468 /** General unsigned squaring routine for BigInts.
1469 * Sets result = x*x.
1470 * NOTE: If the highest half-digit of x is zero, the highest digit of result will
1471 * also be zero.
1472 */
squareInternal(BigDigit[]result,const BigDigit[]x)1473 void squareInternal(BigDigit[] result, const BigDigit[] x) pure nothrow
1474 {
1475 import core.memory : GC;
1476 // Squaring is potentially half a multiply, plus add the squares of
1477 // the diagonal elements.
1478 assert(result.length == 2*x.length);
1479 if (x.length <= KARATSUBASQUARELIMIT)
1480 {
1481 if (x.length == 1)
1482 {
1483 result[1] = multibyteMul(result[0 .. 1], x, x[0], 0);
1484 return;
1485 }
1486 return squareSimple(result, x);
1487 }
1488 // The nice thing about squaring is that it always stays balanced
1489 BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(x.length)];
1490 squareKaratsuba(result, x, scratchbuff);
1491 () @trusted { GC.free(scratchbuff.ptr); } ();
1492 }
1493
1494
1495 import core.bitop : bsr;
1496
1497 /// if remainder is null, only calculate quotient.
divModInternal(BigDigit[]quotient,BigDigit[]remainder,const BigDigit[]u,const BigDigit[]v)1498 void divModInternal(BigDigit [] quotient, BigDigit[] remainder, const BigDigit [] u,
1499 const BigDigit [] v) pure nothrow
1500 {
1501 import core.memory : GC;
1502 assert(quotient.length == u.length - v.length + 1);
1503 assert(remainder == null || remainder.length == v.length);
1504 assert(v.length > 1);
1505 assert(u.length >= v.length);
1506
1507 // Normalize by shifting v left just enough so that
1508 // its high-order bit is on, and shift u left the
1509 // same amount. The highest bit of u will never be set.
1510
1511 BigDigit [] vn = new BigDigit[v.length];
1512 BigDigit [] un = new BigDigit[u.length + 1];
1513 // How much to left shift v, so that its MSB is set.
1514 uint s = BIGDIGITSHIFTMASK - bsr(v[$-1]);
1515 if (s != 0)
1516 {
1517 multibyteShl(vn, v, s);
1518 un[$-1] = multibyteShl(un[0..$-1], u, s);
1519 }
1520 else
1521 {
1522 vn[] = v[];
1523 un[0..$-1] = u[];
1524 un[$-1] = 0;
1525 }
1526 if (quotient.length<FASTDIVLIMIT)
1527 {
1528 schoolbookDivMod(quotient, un, vn);
1529 }
1530 else
1531 {
1532 blockDivMod(quotient, un, vn);
1533 }
1534
1535 // Unnormalize remainder, if required.
1536 if (remainder != null)
1537 {
1538 if (s == 0) remainder[] = un[0 .. vn.length];
1539 else multibyteShr(remainder, un[0 .. vn.length+1], s);
1540 }
1541 () @trusted { GC.free(un.ptr); GC.free(vn.ptr); } ();
1542 }
1543
1544 pure @system unittest
1545 {
1546 immutable(uint) [] u = [0, 0xFFFF_FFFE, 0x8000_0000];
1547 immutable(uint) [] v = [0xFFFF_FFFF, 0x8000_0000];
1548 uint [] q = new uint[u.length - v.length + 1];
1549 uint [] r = new uint[2];
1550 divModInternal(q, r, u, v);
1551 assert(q[]==[0xFFFF_FFFFu, 0]);
1552 assert(r[]==[0xFFFF_FFFFu, 0x7FFF_FFFF]);
1553 u = [0, 0xFFFF_FFFE, 0x8000_0001];
1554 v = [0xFFFF_FFFF, 0x8000_0000];
1555 divModInternal(q, r, u, v);
1556 }
1557
1558
1559 private:
1560 // Converts a big uint to a hexadecimal string.
1561 //
1562 // Optionally, a separator character (eg, an underscore) may be added between
1563 // every 8 digits.
1564 // buff.length must be data.length*8 if separator is zero,
1565 // or data.length*9 if separator is non-zero. It will be completely filled.
1566 char [] biguintToHex(char [] buff, const BigDigit [] data, char separator=0,
1567 LetterCase letterCase = LetterCase.upper) pure nothrow @safe
1568 {
1569 int x=0;
1570 for (ptrdiff_t i=data.length - 1; i >= 0; --i)
1571 {
1572 toHexZeroPadded(buff[x .. x+8], data[i], letterCase);
1573 x+=8;
1574 if (separator)
1575 {
1576 if (i>0) buff[x] = separator;
1577 ++x;
1578 }
1579 }
1580 return buff;
1581 }
1582
1583 /**
1584 * Convert a big uint into an octal string.
1585 *
1586 * Params:
1587 * buff = The destination buffer for the octal string. Must be large enough to
1588 * store the result, including leading zeroes, which is
1589 * ceil(data.length * BigDigitBits / 3) characters.
1590 * The buffer is filled from back to front, starting from `buff[$-1]`.
1591 * data = The biguint to be converted.
1592 *
1593 * Returns: The index of the leading non-zero digit in `buff`. Will be
1594 * `buff.length - 1` if the entire big uint is zero.
1595 */
biguintToOctal(char[]buff,const (BigDigit)[]data)1596 size_t biguintToOctal(char[] buff, const(BigDigit)[] data)
1597 pure nothrow @safe @nogc
1598 {
1599 ubyte carry = 0;
1600 int shift = 0;
1601 size_t penPos = buff.length - 1;
1602 size_t lastNonZero = buff.length - 1;
1603
1604 pragma(inline) void output(uint digit) @nogc nothrow
1605 {
1606 if (digit != 0)
1607 lastNonZero = penPos;
1608 buff[penPos--] = cast(char)('0' + digit);
1609 }
1610
1611 foreach (bigdigit; data)
1612 {
1613 if (shift < 0)
1614 {
1615 // Some bits were carried over from previous word.
1616 assert(shift > -3);
1617 output(((bigdigit << -shift) | carry) & 0b111);
1618 shift += 3;
1619 assert(shift > 0);
1620 }
1621
1622 while (shift <= BigDigitBits - 3)
1623 {
1624 output((bigdigit >>> shift) & 0b111);
1625 shift += 3;
1626 }
1627
1628 if (shift < BigDigitBits)
1629 {
1630 // Some bits need to be carried forward.
1631 carry = (bigdigit >>> shift) & 0b11;
1632 }
1633 shift -= BigDigitBits;
1634 assert(shift >= -2 && shift <= 0);
1635 }
1636
1637 if (shift < 0)
1638 {
1639 // Last word had bits that haven't been output yet.
1640 assert(shift > -3);
1641 output(carry);
1642 }
1643
1644 return lastNonZero;
1645 }
1646
1647 /** Convert a big uint into a decimal string.
1648 *
1649 * Params:
1650 * data The biguint to be converted. Will be destroyed.
1651 * buff The destination buffer for the decimal string. Must be
1652 * large enough to store the result, including leading zeros.
1653 * Will be filled backwards, starting from buff[$-1].
1654 *
1655 * buff.length must be >= (data.length*32)/log2(10) = 9.63296 * data.length.
1656 * Returns:
1657 * the lowest index of buff which was used.
1658 */
biguintToDecimal(char[]buff,BigDigit[]data)1659 size_t biguintToDecimal(char [] buff, BigDigit [] data) pure nothrow
1660 {
1661 ptrdiff_t sofar = buff.length;
1662 // Might be better to divide by (10^38/2^32) since that gives 38 digits for
1663 // the price of 3 divisions and a shr; this version only gives 27 digits
1664 // for 3 divisions.
1665 while (data.length>1)
1666 {
1667 uint rem = multibyteDivAssign(data, 10_0000_0000, 0);
1668 itoaZeroPadded(buff[sofar-9 .. sofar], rem);
1669 sofar -= 9;
1670 if (data[$-1] == 0 && data.length > 1)
1671 {
1672 data.length = data.length - 1;
1673 }
1674 }
1675 itoaZeroPadded(buff[sofar-10 .. sofar], data[0]);
1676 sofar -= 10;
1677 // and strip off the leading zeros
1678 while (sofar != buff.length-1 && buff[sofar] == '0')
1679 sofar++;
1680 return sofar;
1681 }
1682
1683 /** Convert a decimal string into a big uint.
1684 *
1685 * Params:
1686 * data The biguint to be receive the result. Must be large enough to
1687 * store the result.
1688 * s The decimal string. May contain _ or 0 .. 9
1689 *
1690 * The required length for the destination buffer is slightly less than
1691 * 1 + s.length/log2(10) = 1 + s.length/3.3219.
1692 *
1693 * Returns:
1694 * the highest index of data which was used.
1695 */
1696 int biguintFromDecimal(Range)(BigDigit[] data, Range s)
1697 if (
1698 isInputRange!Range &&
1699 isSomeChar!(ElementType!Range) &&
1700 !isInfinite!Range)
1701 in
1702 {
1703 static if (hasLength!Range)
1704 assert((data.length >= 2) || (data.length == 1 && s.length == 1));
1705 }
1706 body
1707 {
1708 import std.conv : ConvException;
1709
1710 // Convert to base 1e19 = 10_000_000_000_000_000_000.
1711 // (this is the largest power of 10 that will fit into a long).
1712 // The length will be less than 1 + s.length/log2(10) = 1 + s.length/3.3219.
1713 // 485 bits will only just fit into 146 decimal digits.
1714 // As we convert the string, we record the number of digits we've seen in base 19:
1715 // hi is the number of digits/19, lo is the extra digits (0 to 18).
1716 // TODO: This is inefficient for very large strings (it is O(n^^2)).
1717 // We should take advantage of fast multiplication once the numbers exceed
1718 // Karatsuba size.
1719 uint lo = 0; // number of powers of digits, 0 .. 18
1720 uint x = 0;
1721 ulong y = 0;
1722 uint hi = 0; // number of base 1e19 digits
1723 data[0] = 0; // initially number is 0.
1724 if (data.length > 1)
1725 data[1] = 0;
1726
foreach(character;s)1727 foreach (character; s)
1728 {
1729 if (character == '_')
1730 continue;
1731
1732 if (character < '0' || character > '9')
1733 throw new ConvException("invalid digit");
1734 x *= 10;
1735 x += character - '0';
1736 ++lo;
1737 if (lo == 9)
1738 {
1739 y = x;
1740 x = 0;
1741 }
1742 if (lo == 18)
1743 {
1744 y *= 10_0000_0000;
1745 y += x;
1746 x = 0;
1747 }
1748 if (lo == 19)
1749 {
1750 y *= 10;
1751 y += x;
1752 x = 0;
1753 // Multiply existing number by 10^19, then add y1.
1754 if (hi>0)
1755 {
1756 data[hi] = multibyteMul(data[0 .. hi], data[0 .. hi], 1_220_703_125*2u, 0); // 5^13*2 = 0x9184_E72A
1757 ++hi;
1758 data[hi] = multibyteMul(data[0 .. hi], data[0 .. hi], 15_625*262_144u, 0); // 5^6*2^18 = 0xF424_0000
1759 ++hi;
1760 }
1761 else
1762 hi = 2;
1763 uint c = multibyteIncrementAssign!('+')(data[0 .. hi], cast(uint)(y&0xFFFF_FFFF));
1764 c += multibyteIncrementAssign!('+')(data[1 .. hi], cast(uint)(y >> 32));
1765 if (c != 0)
1766 {
1767 data[hi]=c;
1768 ++hi;
1769 }
1770 y = 0;
1771 lo = 0;
1772 }
1773 }
1774 // Now set y = all remaining digits.
1775 if (lo >= 18)
1776 {
1777 }
1778 else if (lo >= 9)
1779 {
1780 for (int k=9; k<lo; ++k) y*=10;
1781 y+=x;
1782 }
1783 else
1784 {
1785 for (int k=0; k<lo; ++k) y*=10;
1786 y+=x;
1787 }
1788 if (lo != 0)
1789 {
1790 if (hi == 0)
1791 {
1792 data[0] = cast(uint) y;
1793 if (data.length == 1)
1794 {
1795 hi = 1;
1796 }
1797 else
1798 {
1799 data[1] = cast(uint)(y >>> 32);
1800 hi=2;
1801 }
1802 }
1803 else
1804 {
1805 while (lo>0)
1806 {
1807 immutable c = multibyteMul(data[0 .. hi], data[0 .. hi], 10, 0);
1808 if (c != 0)
1809 {
1810 data[hi]=c;
1811 ++hi;
1812 }
1813 --lo;
1814 }
1815 uint c = multibyteIncrementAssign!('+')(data[0 .. hi], cast(uint)(y&0xFFFF_FFFF));
1816 if (y > 0xFFFF_FFFFL)
1817 {
1818 c += multibyteIncrementAssign!('+')(data[1 .. hi], cast(uint)(y >> 32));
1819 }
1820 if (c != 0)
1821 {
1822 data[hi]=c;
1823 ++hi;
1824 }
1825 }
1826 }
1827 while (hi>1 && data[hi-1]==0)
1828 --hi;
1829 return hi;
1830 }
1831
1832
1833 private:
1834 // ------------------------
1835 // These in-place functions are only for internal use; they are incompatible
1836 // with COW.
1837
1838 // Classic 'schoolbook' multiplication.
mulSimple(BigDigit[]result,const (BigDigit)[]left,const (BigDigit)[]right)1839 void mulSimple(BigDigit[] result, const(BigDigit) [] left,
1840 const(BigDigit)[] right) pure nothrow
1841 in
1842 {
1843 assert(result.length == left.length + right.length);
1844 assert(right.length>1);
1845 }
1846 body
1847 {
1848 result[left.length] = multibyteMul(result[0 .. left.length], left, right[0], 0);
1849 multibyteMultiplyAccumulate(result[1..$], left, right[1..$]);
1850 }
1851
1852 // Classic 'schoolbook' squaring
squareSimple(BigDigit[]result,const (BigDigit)[]x)1853 void squareSimple(BigDigit[] result, const(BigDigit) [] x) pure nothrow
1854 in
1855 {
1856 assert(result.length == 2*x.length);
1857 assert(x.length>1);
1858 }
1859 body
1860 {
1861 multibyteSquare(result, x);
1862 }
1863
1864
1865 // add two uints of possibly different lengths. Result must be as long
1866 // as the larger length.
1867 // Returns carry (0 or 1).
addSimple(BigDigit[]result,const BigDigit[]left,const BigDigit[]right)1868 uint addSimple(BigDigit[] result, const BigDigit [] left, const BigDigit [] right)
1869 pure nothrow
1870 in
1871 {
1872 assert(result.length == left.length);
1873 assert(left.length >= right.length);
1874 assert(right.length>0);
1875 }
1876 body
1877 {
1878 uint carry = multibyteAdd(result[0 .. right.length],
1879 left[0 .. right.length], right, 0);
1880 if (right.length < left.length)
1881 {
1882 result[right.length .. left.length] = left[right.length .. $];
1883 carry = multibyteIncrementAssign!('+')(result[right.length..$], carry);
1884 }
1885 return carry;
1886 }
1887
1888 // result = left - right
1889 // returns carry (0 or 1)
subSimple(BigDigit[]result,const (BigDigit)[]left,const (BigDigit)[]right)1890 BigDigit subSimple(BigDigit [] result,const(BigDigit) [] left,
1891 const(BigDigit) [] right) pure nothrow
1892 in
1893 {
1894 assert(result.length == left.length);
1895 assert(left.length >= right.length);
1896 assert(right.length>0);
1897 }
1898 body
1899 {
1900 BigDigit carry = multibyteSub(result[0 .. right.length],
1901 left[0 .. right.length], right, 0);
1902 if (right.length < left.length)
1903 {
1904 result[right.length .. left.length] = left[right.length .. $];
1905 carry = multibyteIncrementAssign!('-')(result[right.length..$], carry);
1906 } //else if (result.length == left.length+1) { result[$-1] = carry; carry=0; }
1907 return carry;
1908 }
1909
1910
1911 /* result = result - right
1912 * Returns carry = 1 if result was less than right.
1913 */
subAssignSimple(BigDigit[]result,const (BigDigit)[]right)1914 BigDigit subAssignSimple(BigDigit [] result, const(BigDigit) [] right)
1915 pure nothrow
1916 {
1917 assert(result.length >= right.length);
1918 uint c = multibyteSub(result[0 .. right.length], result[0 .. right.length], right, 0);
1919 if (c && result.length > right.length)
1920 c = multibyteIncrementAssign!('-')(result[right.length .. $], c);
1921 return c;
1922 }
1923
1924 /* result = result + right
1925 */
addAssignSimple(BigDigit[]result,const (BigDigit)[]right)1926 BigDigit addAssignSimple(BigDigit [] result, const(BigDigit) [] right)
1927 pure nothrow
1928 {
1929 assert(result.length >= right.length);
1930 uint c = multibyteAdd(result[0 .. right.length], result[0 .. right.length], right, 0);
1931 if (c && result.length > right.length)
1932 c = multibyteIncrementAssign!('+')(result[right.length .. $], c);
1933 return c;
1934 }
1935
1936 /* performs result += wantSub? - right : right;
1937 */
addOrSubAssignSimple(BigDigit[]result,const (BigDigit)[]right,bool wantSub)1938 BigDigit addOrSubAssignSimple(BigDigit [] result, const(BigDigit) [] right,
1939 bool wantSub) pure nothrow
1940 {
1941 if (wantSub)
1942 return subAssignSimple(result, right);
1943 else
1944 return addAssignSimple(result, right);
1945 }
1946
1947
1948 // return true if x<y, considering leading zeros
less(const (BigDigit)[]x,const (BigDigit)[]y)1949 bool less(const(BigDigit)[] x, const(BigDigit)[] y) pure nothrow
1950 {
1951 assert(x.length >= y.length);
1952 auto k = x.length-1;
1953 while (x[k]==0 && k >= y.length)
1954 --k;
1955 if (k >= y.length)
1956 return false;
1957 while (k>0 && x[k]==y[k])
1958 --k;
1959 return x[k] < y[k];
1960 }
1961
1962 // Set result = abs(x-y), return true if result is negative(x<y), false if x <= y.
inplaceSub(BigDigit[]result,const (BigDigit)[]x,const (BigDigit)[]y)1963 bool inplaceSub(BigDigit[] result, const(BigDigit)[] x, const(BigDigit)[] y)
1964 pure nothrow
1965 {
1966 assert(result.length == (x.length >= y.length) ? x.length : y.length);
1967
1968 size_t minlen;
1969 bool negative;
1970 if (x.length >= y.length)
1971 {
1972 minlen = y.length;
1973 negative = less(x, y);
1974 }
1975 else
1976 {
1977 minlen = x.length;
1978 negative = !less(y, x);
1979 }
1980 const (BigDigit)[] large, small;
1981 if (negative)
1982 {
1983 large = y; small = x;
1984 }
1985 else
1986 {
1987 large = x; small = y;
1988 }
1989
1990 BigDigit carry = multibyteSub(result[0 .. minlen], large[0 .. minlen], small[0 .. minlen], 0);
1991 if (x.length != y.length)
1992 {
1993 result[minlen .. large.length]= large[minlen..$];
1994 result[large.length..$] = 0;
1995 if (carry)
1996 multibyteIncrementAssign!('-')(result[minlen..$], carry);
1997 }
1998 return negative;
1999 }
2000
2001 /* Determine how much space is required for the temporaries
2002 * when performing a Karatsuba multiplication.
2003 */
karatsubaRequiredBuffSize(size_t xlen)2004 size_t karatsubaRequiredBuffSize(size_t xlen) pure nothrow @safe
2005 {
2006 return xlen <= KARATSUBALIMIT ? 0 : 2*xlen; // - KARATSUBALIMIT+2;
2007 }
2008
2009 /* Sets result = x*y, using Karatsuba multiplication.
2010 * x must be longer or equal to y.
2011 * Valid only for balanced multiplies, where x is not shorter than y.
2012 * It is superior to schoolbook multiplication if and only if
2013 * sqrt(2)*y.length > x.length > y.length.
2014 * Karatsuba multiplication is O(n^1.59), whereas schoolbook is O(n^2)
2015 * The maximum allowable length of x and y is uint.max; but better algorithms
2016 * should be used far before that length is reached.
2017 * Params:
2018 * scratchbuff An array long enough to store all the temporaries. Will be destroyed.
2019 */
mulKaratsuba(BigDigit[]result,const (BigDigit)[]x,const (BigDigit)[]y,BigDigit[]scratchbuff)2020 void mulKaratsuba(BigDigit [] result, const(BigDigit) [] x,
2021 const(BigDigit)[] y, BigDigit [] scratchbuff) pure nothrow
2022 {
2023 assert(x.length >= y.length);
2024 assert(result.length < uint.max, "Operands too large");
2025 assert(result.length == x.length + y.length);
2026 if (x.length <= KARATSUBALIMIT)
2027 {
2028 return mulSimple(result, x, y);
2029 }
2030 // Must be almost square (otherwise, a schoolbook iteration is better)
2031 assert(2L * y.length * y.length > (x.length-1) * (x.length-1),
2032 "Bigint Internal Error: Asymmetric Karatsuba");
2033
2034 // The subtractive version of Karatsuba multiply uses the following result:
2035 // (Nx1 + x0)*(Ny1 + y0) = (N*N)*x1y1 + x0y0 + N * (x0y0 + x1y1 - mid)
2036 // where mid = (x0-x1)*(y0-y1)
2037 // requiring 3 multiplies of length N, instead of 4.
2038 // The advantage of the subtractive over the additive version is that
2039 // the mid multiply cannot exceed length N. But there are subtleties:
2040 // (x0-x1),(y0-y1) may be negative or zero. To keep it simple, we
2041 // retain all of the leading zeros in the subtractions
2042
2043 // half length, round up.
2044 auto half = (x.length >> 1) + (x.length & 1);
2045
2046 const(BigDigit) [] x0 = x[0 .. half];
2047 const(BigDigit) [] x1 = x[half .. $];
2048 const(BigDigit) [] y0 = y[0 .. half];
2049 const(BigDigit) [] y1 = y[half .. $];
2050 BigDigit [] mid = scratchbuff[0 .. half*2];
2051 BigDigit [] newscratchbuff = scratchbuff[half*2 .. $];
2052 BigDigit [] resultLow = result[0 .. 2*half];
2053 BigDigit [] resultHigh = result[2*half .. $];
2054 // initially use result to store temporaries
2055 BigDigit [] xdiff= result[0 .. half];
2056 BigDigit [] ydiff = result[half .. half*2];
2057
2058 // First, we calculate mid, and sign of mid
2059 immutable bool midNegative = inplaceSub(xdiff, x0, x1)
2060 ^ inplaceSub(ydiff, y0, y1);
2061 mulKaratsuba(mid, xdiff, ydiff, newscratchbuff);
2062
2063 // Low half of result gets x0 * y0. High half gets x1 * y1
2064
2065 mulKaratsuba(resultLow, x0, y0, newscratchbuff);
2066
2067 if (2L * y1.length * y1.length < x1.length * x1.length)
2068 {
2069 // an asymmetric situation has been created.
2070 // Worst case is if x:y = 1.414 : 1, then x1:y1 = 2.41 : 1.
2071 // Applying one schoolbook multiply gives us two pieces each 1.2:1
2072 if (y1.length <= KARATSUBALIMIT)
2073 mulSimple(resultHigh, x1, y1);
2074 else
2075 {
2076 // divide x1 in two, then use schoolbook multiply on the two pieces.
2077 auto quarter = (x1.length >> 1) + (x1.length & 1);
2078 immutable ysmaller = (quarter >= y1.length);
2079 mulKaratsuba(resultHigh[0 .. quarter+y1.length], ysmaller ? x1[0 .. quarter] : y1,
2080 ysmaller ? y1 : x1[0 .. quarter], newscratchbuff);
2081 // Save the part which will be overwritten.
2082 immutable ysmaller2 = ((x1.length - quarter) >= y1.length);
2083 newscratchbuff[0 .. y1.length] = resultHigh[quarter .. quarter + y1.length];
2084 mulKaratsuba(resultHigh[quarter..$], ysmaller2 ? x1[quarter..$] : y1,
2085 ysmaller2 ? y1 : x1[quarter..$], newscratchbuff[y1.length..$]);
2086
2087 resultHigh[quarter..$].addAssignSimple(newscratchbuff[0 .. y1.length]);
2088 }
2089 }
2090 else
2091 mulKaratsuba(resultHigh, x1, y1, newscratchbuff);
2092
2093 /* We now have result = x0y0 + (N*N)*x1y1
2094 Before adding or subtracting mid, we must calculate
2095 result += N * (x0y0 + x1y1)
2096 We can do this with three half-length additions. With a = x0y0, b = x1y1:
2097 aHI aLO
2098 + aHI aLO
2099 + bHI bLO
2100 + bHI bLO
2101 = R3 R2 R1 R0
2102 R1 = aHI + bLO + aLO
2103 R2 = aHI + bLO + aHI + carry_from_R1
2104 R3 = bHi + carry_from_R2
2105
2106 It might actually be quicker to do it in two full-length additions:
2107 newscratchbuff[2*half] = addSimple(newscratchbuff[0 .. 2*half], result[0 .. 2*half], result[2*half..$]);
2108 addAssignSimple(result[half..$], newscratchbuff[0 .. 2*half+1]);
2109 */
2110 BigDigit[] R1 = result[half .. half*2];
2111 BigDigit[] R2 = result[half*2 .. half*3];
2112 BigDigit[] R3 = result[half*3..$];
2113 BigDigit c1 = multibyteAdd(R2, R2, R1, 0); // c1:R2 = R2 + R1
2114 BigDigit c2 = multibyteAdd(R1, R2, result[0 .. half], 0); // c2:R1 = R2 + R1 + R0
2115 BigDigit c3 = addAssignSimple(R2, R3); // R2 = R2 + R1 + R3
2116 if (c1+c2)
2117 multibyteIncrementAssign!('+')(result[half*2..$], c1+c2);
2118 if (c1+c3)
2119 multibyteIncrementAssign!('+')(R3, c1+c3);
2120
2121 // And finally we subtract mid
2122 addOrSubAssignSimple(result[half..$], mid, !midNegative);
2123 }
2124
squareKaratsuba(BigDigit[]result,const BigDigit[]x,BigDigit[]scratchbuff)2125 void squareKaratsuba(BigDigit [] result, const BigDigit [] x,
2126 BigDigit [] scratchbuff) pure nothrow
2127 {
2128 // See mulKaratsuba for implementation comments.
2129 // Squaring is simpler, since it never gets asymmetric.
2130 assert(result.length < uint.max, "Operands too large");
2131 assert(result.length == 2*x.length);
2132 if (x.length <= KARATSUBASQUARELIMIT)
2133 {
2134 return squareSimple(result, x);
2135 }
2136 // half length, round up.
2137 auto half = (x.length >> 1) + (x.length & 1);
2138
2139 const(BigDigit)[] x0 = x[0 .. half];
2140 const(BigDigit)[] x1 = x[half .. $];
2141 BigDigit [] mid = scratchbuff[0 .. half*2];
2142 BigDigit [] newscratchbuff = scratchbuff[half*2 .. $];
2143 // initially use result to store temporaries
2144 BigDigit [] xdiff= result[0 .. half];
2145 const BigDigit [] ydiff = result[half .. half*2];
2146
2147 // First, we calculate mid. We don't need its sign
2148 inplaceSub(xdiff, x0, x1);
2149 squareKaratsuba(mid, xdiff, newscratchbuff);
2150
2151 // Set result = x0x0 + (N*N)*x1x1
2152 squareKaratsuba(result[0 .. 2*half], x0, newscratchbuff);
2153 squareKaratsuba(result[2*half .. $], x1, newscratchbuff);
2154
2155 /* result += N * (x0x0 + x1x1)
2156 Do this with three half-length additions. With a = x0x0, b = x1x1:
2157 R1 = aHI + bLO + aLO
2158 R2 = aHI + bLO + aHI + carry_from_R1
2159 R3 = bHi + carry_from_R2
2160 */
2161 BigDigit[] R1 = result[half .. half*2];
2162 BigDigit[] R2 = result[half*2 .. half*3];
2163 BigDigit[] R3 = result[half*3..$];
2164 BigDigit c1 = multibyteAdd(R2, R2, R1, 0); // c1:R2 = R2 + R1
2165 BigDigit c2 = multibyteAdd(R1, R2, result[0 .. half], 0); // c2:R1 = R2 + R1 + R0
2166 BigDigit c3 = addAssignSimple(R2, R3); // R2 = R2 + R1 + R3
2167 if (c1+c2) multibyteIncrementAssign!('+')(result[half*2..$], c1+c2);
2168 if (c1+c3) multibyteIncrementAssign!('+')(R3, c1+c3);
2169
2170 // And finally we subtract mid, which is always positive
2171 subAssignSimple(result[half..$], mid);
2172 }
2173
2174 /* Knuth's Algorithm D, as presented in
2175 * H.S. Warren, "Hacker's Delight", Addison-Wesley Professional (2002).
2176 * Also described in "Modern Computer Arithmetic" 0.2, Exercise 1.8.18.
2177 * Given u and v, calculates quotient = u / v, u = u % v.
2178 * v must be normalized (ie, the MSB of v must be 1).
2179 * The most significant words of quotient and u may be zero.
2180 * u[0 .. v.length] holds the remainder.
2181 */
schoolbookDivMod(BigDigit[]quotient,BigDigit[]u,in BigDigit[]v)2182 void schoolbookDivMod(BigDigit [] quotient, BigDigit [] u, in BigDigit [] v)
2183 pure nothrow
2184 {
2185 assert(quotient.length == u.length - v.length);
2186 assert(v.length > 1);
2187 assert(u.length >= v.length);
2188 assert((v[$-1]&0x8000_0000)!=0);
2189 assert(u[$-1] < v[$-1]);
2190 // BUG: This code only works if BigDigit is uint.
2191 uint vhi = v[$-1];
2192 uint vlo = v[$-2];
2193
2194 for (ptrdiff_t j = u.length - v.length - 1; j >= 0; j--)
2195 {
2196 // Compute estimate of quotient[j],
2197 // qhat = (three most significant words of u)/(two most sig words of v).
2198 uint qhat;
2199 if (u[j + v.length] == vhi)
2200 {
2201 // uu/vhi could exceed uint.max (it will be 0x8000_0000 or 0x8000_0001)
2202 qhat = uint.max;
2203 }
2204 else
2205 {
2206 uint ulo = u[j + v.length - 2];
2207 version (D_InlineAsm_X86)
2208 {
2209 // Note: On DMD, this is only ~10% faster than the non-asm code.
2210 uint *p = &u[j + v.length - 1];
2211 asm pure nothrow
2212 {
2213 mov EAX, p;
2214 mov EDX, [EAX+4];
2215 mov EAX, [EAX];
2216 div dword ptr [vhi];
2217 mov qhat, EAX;
2218 mov ECX, EDX;
2219 div3by2correction:
2220 mul dword ptr [vlo]; // EDX:EAX = qhat * vlo
2221 sub EAX, ulo;
2222 sbb EDX, ECX;
2223 jbe div3by2done;
2224 mov EAX, qhat;
2225 dec EAX;
2226 mov qhat, EAX;
2227 add ECX, dword ptr [vhi];
2228 jnc div3by2correction;
2229 div3by2done: ;
2230 }
2231 }
2232 else
2233 { // version (InlineAsm)
2234 ulong uu = (cast(ulong)(u[j + v.length]) << 32) | u[j + v.length - 1];
2235 immutable bigqhat = uu / vhi;
2236 ulong rhat = uu - bigqhat * vhi;
2237 qhat = cast(uint) bigqhat;
2238 again:
2239 if (cast(ulong) qhat * vlo > ((rhat << 32) + ulo))
2240 {
2241 --qhat;
2242 rhat += vhi;
2243 if (!(rhat & 0xFFFF_FFFF_0000_0000L))
2244 goto again;
2245 }
2246 } // version (InlineAsm)
2247 }
2248 // Multiply and subtract.
2249 uint carry = multibyteMulAdd!('-')(u[j .. j + v.length], v, qhat, 0);
2250
2251 if (u[j+v.length] < carry)
2252 {
2253 // If we subtracted too much, add back
2254 --qhat;
2255 carry -= multibyteAdd(u[j .. j + v.length],u[j .. j + v.length], v, 0);
2256 }
2257 quotient[j] = qhat;
2258 u[j + v.length] = u[j + v.length] - carry;
2259 }
2260 }
2261
2262 private:
2263
2264 // TODO: Replace with a library call
itoaZeroPadded(char[]output,uint value)2265 void itoaZeroPadded(char[] output, uint value)
2266 pure nothrow @safe @nogc
2267 {
2268 for (auto i = output.length; i--;)
2269 {
2270 if (value < 10)
2271 {
2272 output[i] = cast(char)(value + '0');
2273 value = 0;
2274 }
2275 else
2276 {
2277 output[i] = cast(char)(value % 10 + '0');
2278 value /= 10;
2279 }
2280 }
2281 }
2282
2283 void toHexZeroPadded(char[] output, uint value,
2284 LetterCase letterCase = LetterCase.upper) pure nothrow @safe
2285 {
2286 ptrdiff_t x = output.length - 1;
2287 static immutable string upperHexDigits = "0123456789ABCDEF";
2288 static immutable string lowerHexDigits = "0123456789abcdef";
2289 for ( ; x >= 0; --x)
2290 {
2291 if (letterCase == LetterCase.upper)
2292 {
2293 output[x] = upperHexDigits[value & 0xF];
2294 }
2295 else
2296 {
2297 output[x] = lowerHexDigits[value & 0xF];
2298 }
2299 value >>= 4;
2300 }
2301 }
2302
2303 private:
2304
2305 // Returns the highest value of i for which left[i]!=right[i],
2306 // or 0 if left[] == right[]
highestDifferentDigit(const BigDigit[]left,const BigDigit[]right)2307 size_t highestDifferentDigit(const BigDigit [] left, const BigDigit [] right)
2308 pure nothrow @nogc @safe
2309 {
2310 assert(left.length == right.length);
2311 for (ptrdiff_t i = left.length - 1; i>0; --i)
2312 {
2313 if (left[i] != right[i])
2314 return i;
2315 }
2316 return 0;
2317 }
2318
2319 // Returns the lowest value of i for which x[i]!=0.
firstNonZeroDigit(const BigDigit[]x)2320 int firstNonZeroDigit(const BigDigit [] x) pure nothrow @nogc @safe
2321 {
2322 int k = 0;
2323 while (x[k]==0)
2324 {
2325 ++k;
2326 assert(k<x.length);
2327 }
2328 return k;
2329 }
2330
2331 /*
2332 Calculate quotient and remainder of u / v using fast recursive division.
2333 v must be normalised, and must be at least half as long as u.
2334 Given u and v, v normalised, calculates quotient = u/v, u = u%v.
2335 scratch is temporary storage space, length must be >= quotient + 1.
2336
2337 Returns:
2338 u[0 .. v.length] is the remainder. u[v.length..$] is corrupted.
2339
2340 Implements algorithm 1.8 from MCA.
2341 This algorithm has an annoying special case. After the first recursion, the
2342 highest bit of the quotient may be set. This means that in the second
2343 recursive call, the 'in' contract would be violated. (This happens only
2344 when the top quarter of u is equal to the top half of v. A base 10
2345 equivalent example of this situation is 5517/56; the first step gives
2346 55/5 = 11). To maintain the in contract, we pad a zero to the top of both
2347 u and the quotient. 'mayOverflow' indicates that that the special case
2348 has occurred.
2349 (In MCA, a different strategy is used: the in contract is weakened, and
2350 schoolbookDivMod is more general: it allows the high bit of u to be set).
2351 See also:
2352 - C. Burkinel and J. Ziegler, "Fast Recursive Division", MPI-I-98-1-022,
2353 Max-Planck Institute fuer Informatik, (Oct 1998).
2354 */
2355 void recursiveDivMod(BigDigit[] quotient, BigDigit[] u, const(BigDigit)[] v,
2356 BigDigit[] scratch, bool mayOverflow = false)
2357 pure nothrow
2358 in
2359 {
2360 // v must be normalized
2361 assert(v.length > 1);
2362 assert((v[$ - 1] & 0x8000_0000) != 0);
2363 assert(!(u[$ - 1] & 0x8000_0000));
2364 assert(quotient.length == u.length - v.length);
2365 if (mayOverflow)
2366 {
2367 assert(u[$-1] == 0);
2368 assert(u[$-2] & 0x8000_0000);
2369 }
2370
2371 // Must be symmetric. Use block schoolbook division if not.
2372 assert((mayOverflow ? u.length-1 : u.length) <= 2 * v.length);
2373 assert((mayOverflow ? u.length-1 : u.length) >= v.length);
2374 assert(scratch.length >= quotient.length + (mayOverflow ? 0 : 1));
2375 }
2376 body
2377 {
2378 if (quotient.length < FASTDIVLIMIT)
2379 {
2380 return schoolbookDivMod(quotient, u, v);
2381 }
2382
2383 // Split quotient into two halves, but keep padding in the top half
2384 auto k = (mayOverflow ? quotient.length - 1 : quotient.length) >> 1;
2385
2386 // RECURSION 1: Calculate the high half of the quotient
2387
2388 // Note that if u and quotient were padded, they remain padded during
2389 // this call, so in contract is satisfied.
2390 recursiveDivMod(quotient[k .. $], u[2 * k .. $], v[k .. $],
2391 scratch, mayOverflow);
2392
2393 // quotient[k..$] is our guess at the high quotient.
2394 // u[2*k .. 2.*k + v.length - k = k + v.length] is the high part of the
2395 // first remainder. u[0 .. 2*k] is the low part.
2396
2397 // Calculate the full first remainder to be
2398 // remainder - highQuotient * lowDivisor
2399 // reducing highQuotient until the remainder is positive.
2400 // The low part of the remainder, u[0 .. k], cannot be altered by this.
2401
2402 adjustRemainder(quotient[k .. $], u[k .. k + v.length], v, k,
2403 scratch[0 .. quotient.length], mayOverflow);
2404
2405 // RECURSION 2: Calculate the low half of the quotient
2406 // The full first remainder is now in u[0 .. k + v.length].
2407
2408 if (u[k + v.length - 1] & 0x8000_0000)
2409 {
2410 // Special case. The high quotient is 0x1_00...000 or 0x1_00...001.
2411 // This means we need an extra quotient word for the next recursion.
2412 // We need to restore the invariant for the recursive calls.
2413 // We do this by padding both u and quotient. Extending u is trivial,
2414 // because the higher words will not be used again. But for the
2415 // quotient, we're clobbering the low word of the high quotient,
2416 // so we need save it, and add it back in after the recursive call.
2417
2418 auto clobberedQuotient = quotient[k];
2419 u[k+v.length] = 0;
2420
2421 recursiveDivMod(quotient[0 .. k+1], u[k .. k + v.length+1],
2422 v[k .. $], scratch, true);
2423 adjustRemainder(quotient[0 .. k+1], u[0 .. v.length], v, k,
2424 scratch[0 .. 2 * k+1], true);
2425
2426 // Now add the quotient word that got clobbered earlier.
2427 multibyteIncrementAssign!('+')(quotient[k..$], clobberedQuotient);
2428 }
2429 else
2430 {
2431 // The special case has NOT happened.
2432 recursiveDivMod(quotient[0 .. k], u[k .. k + v.length], v[k .. $],
2433 scratch, false);
2434
2435 // high remainder is in u[k .. k+(v.length-k)] == u[k .. v.length]
2436
2437 adjustRemainder(quotient[0 .. k], u[0 .. v.length], v, k,
2438 scratch[0 .. 2 * k]);
2439 }
2440 }
2441
2442 // rem -= quot * v[0 .. k].
2443 // If would make rem negative, decrease quot until rem is >= 0.
2444 // Needs (quot.length * k) scratch space to store the result of the multiply.
2445 void adjustRemainder(BigDigit[] quot, BigDigit[] rem, const(BigDigit)[] v,
2446 ptrdiff_t k,
2447 BigDigit[] scratch, bool mayOverflow = false) pure nothrow
2448 {
2449 assert(rem.length == v.length);
2450 mulInternal(scratch, quot, v[0 .. k]);
2451 uint carry = 0;
2452 if (mayOverflow)
2453 carry = scratch[$-1] + subAssignSimple(rem, scratch[0..$-1]);
2454 else
2455 carry = subAssignSimple(rem, scratch);
2456 while (carry)
2457 {
2458 multibyteIncrementAssign!('-')(quot, 1); // quot--
2459 carry -= multibyteAdd(rem, rem, v, 0);
2460 }
2461 }
2462
2463 // Cope with unbalanced division by performing block schoolbook division.
blockDivMod(BigDigit[]quotient,BigDigit[]u,in BigDigit[]v)2464 void blockDivMod(BigDigit [] quotient, BigDigit [] u, in BigDigit [] v)
2465 pure nothrow
2466 {
2467 import core.memory : GC;
2468 assert(quotient.length == u.length - v.length);
2469 assert(v.length > 1);
2470 assert(u.length >= v.length);
2471 assert((v[$-1] & 0x8000_0000)!=0);
2472 assert((u[$-1] & 0x8000_0000)==0);
2473 BigDigit [] scratch = new BigDigit[v.length + 1];
2474
2475 // Perform block schoolbook division, with 'v.length' blocks.
2476 auto m = u.length - v.length;
2477 while (m > v.length)
2478 {
2479 immutable mayOverflow = (u[m + v.length -1 ] & 0x8000_0000)!=0;
2480 BigDigit saveq;
2481 if (mayOverflow)
2482 {
2483 u[m + v.length] = 0;
2484 saveq = quotient[m];
2485 }
2486 recursiveDivMod(quotient[m-v.length .. m + (mayOverflow? 1: 0)],
2487 u[m - v.length .. m + v.length + (mayOverflow? 1: 0)], v, scratch, mayOverflow);
2488 if (mayOverflow)
2489 {
2490 assert(quotient[m] == 0);
2491 quotient[m] = saveq;
2492 }
2493 m -= v.length;
2494 }
2495 recursiveDivMod(quotient[0 .. m], u[0 .. m + v.length], v, scratch);
2496 () @trusted { GC.free(scratch.ptr); } ();
2497 }
2498
2499 @system unittest
2500 {
2501 import core.stdc.stdio;
2502
printBiguint(const uint[]data)2503 void printBiguint(const uint [] data)
2504 {
2505 char [] buff = biguintToHex(new char[data.length*9], data, '_');
2506 printf("%.*s\n", buff.length, buff.ptr);
2507 }
2508
printDecimalBigUint(BigUint data)2509 void printDecimalBigUint(BigUint data)
2510 {
2511 auto str = data.toDecimalString(0);
2512 printf("%.*s\n", str.length, str.ptr);
2513 }
2514
2515 uint [] a, b;
2516 a = new uint[43];
2517 b = new uint[179];
2518 for (int i=0; i<a.length; ++i) a[i] = 0x1234_B6E9 + i;
2519 for (int i=0; i<b.length; ++i) b[i] = 0x1BCD_8763 - i*546;
2520
2521 a[$-1] |= 0x8000_0000;
2522 uint [] r = new uint[a.length];
2523 uint [] q = new uint[b.length-a.length+1];
2524
2525 divModInternal(q, r, b, a);
2526 q = q[0..$-1];
2527 uint [] r1 = r.dup;
2528 uint [] q1 = q.dup;
2529 blockDivMod(q, b, a);
2530 r = b[0 .. a.length];
2531 assert(r[] == r1[]);
2532 assert(q[] == q1[]);
2533 }
2534
2535 // biguintToOctal
2536 @safe unittest
2537 {
2538 enum bufSize = 5 * BigDigitBits / 3 + 1;
2539 auto buf = new char[bufSize];
2540 size_t i;
2541 BigDigit[] data = [ 342391 ];
2542
2543 // Basic functionality with single word
2544 i = biguintToOctal(buf, data);
2545 assert(i == bufSize - 7 && buf[i .. $] == "1234567");
2546
2547 // Test carrying bits between words
2548 data = [ 0x77053977, 0x39770539, 0x00000005 ];
2549 i = biguintToOctal(buf, data);
2550 assert(i == bufSize - 23 && buf[i .. $] == "12345670123456701234567");
2551
2552 // Test carried bits in the last word
2553 data = [ 0x80000000 ];
2554 i = biguintToOctal(buf, data);
2555 assert(buf[i .. $] == "20000000000");
2556
2557 // Test boundary between 3rd and 4th word where the number of bits is
2558 // divisible by 3 and no bits should be carried.
2559 //
2560 // The 0xC0000000's are for "poisoning" the carry to be non-zero when the
2561 // rollover happens, so that if any bugs happen in wrongly adding the carry
2562 // to the next word, non-zero bits will show up in the output.
2563 data = [ 0xC0000000, 0xC0000000, 0xC0000000, 0x00000010 ];
2564 i = biguintToOctal(buf, data);
2565 assert(buf[i .. $] == "2060000000001400000000030000000000");
2566
2567 // Boundary case: 0
2568 data = [ 0 ];
2569 i = biguintToOctal(buf, data);
2570 assert(buf[i .. $] == "0");
2571 }
2572