1 /* $NetBSD: bn_mp_karatsuba_mul.c,v 1.1.1.1 2011/04/13 18:14:54 elric Exp $ */ 2 3 #include <tommath.h> 4 #ifdef BN_MP_KARATSUBA_MUL_C 5 /* LibTomMath, multiple-precision integer library -- Tom St Denis 6 * 7 * LibTomMath is a library that provides multiple-precision 8 * integer arithmetic as well as number theoretic functionality. 9 * 10 * The library was designed directly after the MPI library by 11 * Michael Fromberger but has been written from scratch with 12 * additional optimizations in place. 13 * 14 * The library is free for all purposes without any express 15 * guarantee it works. 16 * 17 * Tom St Denis, tomstdenis@gmail.com, http://libtom.org 18 */ 19 20 /* c = |a| * |b| using Karatsuba Multiplication using 21 * three half size multiplications 22 * 23 * Let B represent the radix [e.g. 2**DIGIT_BIT] and 24 * let n represent half of the number of digits in 25 * the min(a,b) 26 * 27 * a = a1 * B**n + a0 28 * b = b1 * B**n + b0 29 * 30 * Then, a * b => 31 a1b1 * B**2n + ((a1 + a0)(b1 + b0) - (a0b0 + a1b1)) * B + a0b0 32 * 33 * Note that a1b1 and a0b0 are used twice and only need to be 34 * computed once. So in total three half size (half # of 35 * digit) multiplications are performed, a0b0, a1b1 and 36 * (a1+b1)(a0+b0) 37 * 38 * Note that a multiplication of half the digits requires 39 * 1/4th the number of single precision multiplications so in 40 * total after one call 25% of the single precision multiplications 41 * are saved. Note also that the call to mp_mul can end up back 42 * in this function if the a0, a1, b0, or b1 are above the threshold. 43 * This is known as divide-and-conquer and leads to the famous 44 * O(N**lg(3)) or O(N**1.584) work which is asymptopically lower than 45 * the standard O(N**2) that the baseline/comba methods use. 46 * Generally though the overhead of this method doesn't pay off 47 * until a certain size (N ~ 80) is reached. 48 */ 49 int mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c) 50 { 51 mp_int x0, x1, y0, y1, t1, x0y0, x1y1; 52 int B, err; 53 54 /* default the return code to an error */ 55 err = MP_MEM; 56 57 /* min # of digits */ 58 B = MIN (a->used, b->used); 59 60 /* now divide in two */ 61 B = B >> 1; 62 63 /* init copy all the temps */ 64 if (mp_init_size (&x0, B) != MP_OKAY) 65 goto ERR; 66 if (mp_init_size (&x1, a->used - B) != MP_OKAY) 67 goto X0; 68 if (mp_init_size (&y0, B) != MP_OKAY) 69 goto X1; 70 if (mp_init_size (&y1, b->used - B) != MP_OKAY) 71 goto Y0; 72 73 /* init temps */ 74 if (mp_init_size (&t1, B * 2) != MP_OKAY) 75 goto Y1; 76 if (mp_init_size (&x0y0, B * 2) != MP_OKAY) 77 goto T1; 78 if (mp_init_size (&x1y1, B * 2) != MP_OKAY) 79 goto X0Y0; 80 81 /* now shift the digits */ 82 x0.used = y0.used = B; 83 x1.used = a->used - B; 84 y1.used = b->used - B; 85 86 { 87 register int x; 88 register mp_digit *tmpa, *tmpb, *tmpx, *tmpy; 89 90 /* we copy the digits directly instead of using higher level functions 91 * since we also need to shift the digits 92 */ 93 tmpa = a->dp; 94 tmpb = b->dp; 95 96 tmpx = x0.dp; 97 tmpy = y0.dp; 98 for (x = 0; x < B; x++) { 99 *tmpx++ = *tmpa++; 100 *tmpy++ = *tmpb++; 101 } 102 103 tmpx = x1.dp; 104 for (x = B; x < a->used; x++) { 105 *tmpx++ = *tmpa++; 106 } 107 108 tmpy = y1.dp; 109 for (x = B; x < b->used; x++) { 110 *tmpy++ = *tmpb++; 111 } 112 } 113 114 /* only need to clamp the lower words since by definition the 115 * upper words x1/y1 must have a known number of digits 116 */ 117 mp_clamp (&x0); 118 mp_clamp (&y0); 119 120 /* now calc the products x0y0 and x1y1 */ 121 /* after this x0 is no longer required, free temp [x0==t2]! */ 122 if (mp_mul (&x0, &y0, &x0y0) != MP_OKAY) 123 goto X1Y1; /* x0y0 = x0*y0 */ 124 if (mp_mul (&x1, &y1, &x1y1) != MP_OKAY) 125 goto X1Y1; /* x1y1 = x1*y1 */ 126 127 /* now calc x1+x0 and y1+y0 */ 128 if (s_mp_add (&x1, &x0, &t1) != MP_OKAY) 129 goto X1Y1; /* t1 = x1 - x0 */ 130 if (s_mp_add (&y1, &y0, &x0) != MP_OKAY) 131 goto X1Y1; /* t2 = y1 - y0 */ 132 if (mp_mul (&t1, &x0, &t1) != MP_OKAY) 133 goto X1Y1; /* t1 = (x1 + x0) * (y1 + y0) */ 134 135 /* add x0y0 */ 136 if (mp_add (&x0y0, &x1y1, &x0) != MP_OKAY) 137 goto X1Y1; /* t2 = x0y0 + x1y1 */ 138 if (s_mp_sub (&t1, &x0, &t1) != MP_OKAY) 139 goto X1Y1; /* t1 = (x1+x0)*(y1+y0) - (x1y1 + x0y0) */ 140 141 /* shift by B */ 142 if (mp_lshd (&t1, B) != MP_OKAY) 143 goto X1Y1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */ 144 if (mp_lshd (&x1y1, B * 2) != MP_OKAY) 145 goto X1Y1; /* x1y1 = x1y1 << 2*B */ 146 147 if (mp_add (&x0y0, &t1, &t1) != MP_OKAY) 148 goto X1Y1; /* t1 = x0y0 + t1 */ 149 if (mp_add (&t1, &x1y1, c) != MP_OKAY) 150 goto X1Y1; /* t1 = x0y0 + t1 + x1y1 */ 151 152 /* Algorithm succeeded set the return code to MP_OKAY */ 153 err = MP_OKAY; 154 155 X1Y1:mp_clear (&x1y1); 156 X0Y0:mp_clear (&x0y0); 157 T1:mp_clear (&t1); 158 Y1:mp_clear (&y1); 159 Y0:mp_clear (&y0); 160 X1:mp_clear (&x1); 161 X0:mp_clear (&x0); 162 ERR: 163 return err; 164 } 165 #endif 166 167 /* Source: /cvs/libtom/libtommath/bn_mp_karatsuba_mul.c,v */ 168 /* Revision: 1.6 */ 169 /* Date: 2006/12/28 01:25:13 */ 170