1 /* $NetBSD: bn_fast_s_mp_sqr.c,v 1.1.1.1 2011/04/13 18:14:54 elric Exp $ */ 2 3 #include <tommath.h> 4 #ifdef BN_FAST_S_MP_SQR_C 5 /* LibTomMath, multiple-precision integer library -- Tom St Denis 6 * 7 * LibTomMath is a library that provides multiple-precision 8 * integer arithmetic as well as number theoretic functionality. 9 * 10 * The library was designed directly after the MPI library by 11 * Michael Fromberger but has been written from scratch with 12 * additional optimizations in place. 13 * 14 * The library is free for all purposes without any express 15 * guarantee it works. 16 * 17 * Tom St Denis, tomstdenis@gmail.com, http://libtom.org 18 */ 19 20 /* the jist of squaring... 21 * you do like mult except the offset of the tmpx [one that 22 * starts closer to zero] can't equal the offset of tmpy. 23 * So basically you set up iy like before then you min it with 24 * (ty-tx) so that it never happens. You double all those 25 * you add in the inner loop 26 27 After that loop you do the squares and add them in. 28 */ 29 30 int fast_s_mp_sqr (mp_int * a, mp_int * b) 31 { 32 int olduse, res, pa, ix, iz; 33 mp_digit W[MP_WARRAY], *tmpx; 34 mp_word W1; 35 36 /* grow the destination as required */ 37 pa = a->used + a->used; 38 if (b->alloc < pa) { 39 if ((res = mp_grow (b, pa)) != MP_OKAY) { 40 return res; 41 } 42 } 43 44 /* number of output digits to produce */ 45 W1 = 0; 46 for (ix = 0; ix < pa; ix++) { 47 int tx, ty, iy; 48 mp_word _W; 49 mp_digit *tmpy; 50 51 /* clear counter */ 52 _W = 0; 53 54 /* get offsets into the two bignums */ 55 ty = MIN(a->used-1, ix); 56 tx = ix - ty; 57 58 /* setup temp aliases */ 59 tmpx = a->dp + tx; 60 tmpy = a->dp + ty; 61 62 /* this is the number of times the loop will iterrate, essentially 63 while (tx++ < a->used && ty-- >= 0) { ... } 64 */ 65 iy = MIN(a->used-tx, ty+1); 66 67 /* now for squaring tx can never equal ty 68 * we halve the distance since they approach at a rate of 2x 69 * and we have to round because odd cases need to be executed 70 */ 71 iy = MIN(iy, (ty-tx+1)>>1); 72 73 /* execute loop */ 74 for (iz = 0; iz < iy; iz++) { 75 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); 76 } 77 78 /* double the inner product and add carry */ 79 _W = _W + _W + W1; 80 81 /* even columns have the square term in them */ 82 if ((ix&1) == 0) { 83 _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]); 84 } 85 86 /* store it */ 87 W[ix] = (mp_digit)(_W & MP_MASK); 88 89 /* make next carry */ 90 W1 = _W >> ((mp_word)DIGIT_BIT); 91 } 92 93 /* setup dest */ 94 olduse = b->used; 95 b->used = a->used+a->used; 96 97 { 98 mp_digit *tmpb; 99 tmpb = b->dp; 100 for (ix = 0; ix < pa; ix++) { 101 *tmpb++ = W[ix] & MP_MASK; 102 } 103 104 /* clear unused digits [that existed in the old copy of c] */ 105 for (; ix < olduse; ix++) { 106 *tmpb++ = 0; 107 } 108 } 109 mp_clamp (b); 110 return MP_OKAY; 111 } 112 #endif 113 114 /* Source: /cvs/libtom/libtommath/bn_fast_s_mp_sqr.c,v */ 115 /* Revision: 1.4 */ 116 /* Date: 2006/12/28 01:25:13 */ 117