1 /*	$NetBSD: bn_fast_s_mp_sqr.c,v 1.1.1.1 2011/04/13 18:14:54 elric Exp $	*/
2 
3 #include <tommath.h>
4 #ifdef BN_FAST_S_MP_SQR_C
5 /* LibTomMath, multiple-precision integer library -- Tom St Denis
6  *
7  * LibTomMath is a library that provides multiple-precision
8  * integer arithmetic as well as number theoretic functionality.
9  *
10  * The library was designed directly after the MPI library by
11  * Michael Fromberger but has been written from scratch with
12  * additional optimizations in place.
13  *
14  * The library is free for all purposes without any express
15  * guarantee it works.
16  *
17  * Tom St Denis, tomstdenis@gmail.com, http://libtom.org
18  */
19 
20 /* the jist of squaring...
21  * you do like mult except the offset of the tmpx [one that
22  * starts closer to zero] can't equal the offset of tmpy.
23  * So basically you set up iy like before then you min it with
24  * (ty-tx) so that it never happens.  You double all those
25  * you add in the inner loop
26 
27 After that loop you do the squares and add them in.
28 */
29 
30 int fast_s_mp_sqr (mp_int * a, mp_int * b)
31 {
32   int       olduse, res, pa, ix, iz;
33   mp_digit   W[MP_WARRAY], *tmpx;
34   mp_word   W1;
35 
36   /* grow the destination as required */
37   pa = a->used + a->used;
38   if (b->alloc < pa) {
39     if ((res = mp_grow (b, pa)) != MP_OKAY) {
40       return res;
41     }
42   }
43 
44   /* number of output digits to produce */
45   W1 = 0;
46   for (ix = 0; ix < pa; ix++) {
47       int      tx, ty, iy;
48       mp_word  _W;
49       mp_digit *tmpy;
50 
51       /* clear counter */
52       _W = 0;
53 
54       /* get offsets into the two bignums */
55       ty = MIN(a->used-1, ix);
56       tx = ix - ty;
57 
58       /* setup temp aliases */
59       tmpx = a->dp + tx;
60       tmpy = a->dp + ty;
61 
62       /* this is the number of times the loop will iterrate, essentially
63          while (tx++ < a->used && ty-- >= 0) { ... }
64        */
65       iy = MIN(a->used-tx, ty+1);
66 
67       /* now for squaring tx can never equal ty
68        * we halve the distance since they approach at a rate of 2x
69        * and we have to round because odd cases need to be executed
70        */
71       iy = MIN(iy, (ty-tx+1)>>1);
72 
73       /* execute loop */
74       for (iz = 0; iz < iy; iz++) {
75          _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
76       }
77 
78       /* double the inner product and add carry */
79       _W = _W + _W + W1;
80 
81       /* even columns have the square term in them */
82       if ((ix&1) == 0) {
83          _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]);
84       }
85 
86       /* store it */
87       W[ix] = (mp_digit)(_W & MP_MASK);
88 
89       /* make next carry */
90       W1 = _W >> ((mp_word)DIGIT_BIT);
91   }
92 
93   /* setup dest */
94   olduse  = b->used;
95   b->used = a->used+a->used;
96 
97   {
98     mp_digit *tmpb;
99     tmpb = b->dp;
100     for (ix = 0; ix < pa; ix++) {
101       *tmpb++ = W[ix] & MP_MASK;
102     }
103 
104     /* clear unused digits [that existed in the old copy of c] */
105     for (; ix < olduse; ix++) {
106       *tmpb++ = 0;
107     }
108   }
109   mp_clamp (b);
110   return MP_OKAY;
111 }
112 #endif
113 
114 /* Source: /cvs/libtom/libtommath/bn_fast_s_mp_sqr.c,v */
115 /* Revision: 1.4 */
116 /* Date: 2006/12/28 01:25:13 */
117