1 /*
2  * Test driver for low-level bignum library (16-bit version).
3  * This access the low-level library directly.  It is NOT an example of
4  * how to program with the library normally!  By accessing the library
5  * at a low level, it is possible to exercise the smallest components
6  * and thus localize bugs more accurately.  This is especially useful
7  * when writing assembly-language primitives.
8  *
9  * This also does timing tests on modular exponentiation.  Modular
10  * exponentiation is so computationally expensive that the fact that this
11  * code omits one level of interface glue has no perceptible effect on
12  * the results.
13  */
14 #ifndef HAVE_CONFIG_H
15 #define HAVE_CONFIG_H 0
16 #endif
17 #if HAVE_CONFIG_H
18 #include "bnconfig.h"
19 #endif
20 
21 /*
22  * Some compilers complain about #if FOO if FOO isn't defined,
23  * so do the ANSI-mandated thing explicitly...
24  */
25 #ifndef NO_STDLIB_H
26 #define NO_STDLIB_H 0
27 #endif
28 #ifndef NO_STRING_H
29 #define NO_STRING_H 0
30 #endif
31 #ifndef HAVE_STRINGS_H
32 #define HAVE_STRINGS_H 0
33 #endif
34 
35 #include <stdio.h>
36 
37 #if !NO_STDLIB_H
38 #include <stdlib.h>	/* For strtol */
39 #else
40 long strtol(const char *, char **, int);
41 #endif
42 
43 #if !NO_STRING_H
44 #include <string.h>	/* For memcpy */
45 #elif HAVE_STRINGS_H
46 #include <strings.h>
47 #endif
48 
49 #include "cputime.h"
50 #include "lbn16.h"
51 
52 #include "kludge.h"
53 
54 #if BNYIELD
55 int (*bnYield)(void) = 0;
56 #endif
57 
58 /* Work with up to 2048-bit numbers */
59 #define MAXBITS 3072
60 #define SIZE (MAXBITS/16 + 1)
61 
62 /* Additive congruential random number generator, x[i] = x[i-24] + x[i-55] */
63 static BNWORD16 randp[55];
64 static BNWORD16 *randp1 = randp, *randp2 = randp+24;
65 
66 static BNWORD16
rand16(void)67 rand16(void)
68 {
69     if (++randp2 == randp+55) {
70 	randp2 = randp;
71 	randp1++;
72     } else if (++randp1 == randp+55) {
73 	randp1 = randp;
74     }
75 
76     return  *randp1 += *randp2;
77 }
78 
79 /*
80  * CRC-3_2: x^3_2+x^26+x^23+x^22+x^1_6+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1
81  *
82  * The additive congruential RNG is seeded with a single integer,
83  * which is shuffled with a CRC polynomial to generate the initial
84  * table values.  The Polynomial is the same size as the words being
85  * used.
86  *
87  * Thus, in the various versions of this library, we actually use this
88  * polynomial as-is, this polynomial mod x^17, and this polynomial with
89  * the leading coefficient deleted and replaced with x^6_4.  As-is,
90  * it's irreducible, so it has a long period.  Modulo x^17, it factors as
91  * (x^4+x^3+x^2+x+1) * (x^12+x^11+x^8+x^7+x^6+x^5+x^4+x^3+1),
92  * which still has a large enough period (4095) for the use it's put to.
93  * With the leading coefficient moved up, it factors as
94  * (x^50+x^49+x^48+x^47+x^46+x^43+x^41+x^40+x^38+x^37+x^36+x^35+x^34+x^33+
95  *  x^31+x^30+x^29+x^28+x^27+x^25+x^23+x^18+x^1_6+x^15+x^14+x^13+x^11+x^9+
96  *  x^8+x^7+x^6+x^5+x^3+x^2+1)*(x^11+x^10+x^9+x^5+x^4+x^3+1)*(x^3+x+1),
97  * which definitely has a long enough period to serve for initialization.
98  *
99  * The effort put into this PRNG is kind of unwarranted given the trivial
100  * use it's being put to, but oh, well.  It does have the nice advantage
101  * of producing numbers that are portable between platforms, so if there's
102  * a problem with one platform, you can compare all the intermediate
103  * results with another platform.
104  */
105 #define POLY (BNWORD16)0x04c11db7
106 
107 static void
srand16(BNWORD16 seed)108 srand16(BNWORD16 seed)
109 {
110     int i, j;
111 
112     for (i = 0; i < 55; i++) {
113 	for (j = 0; j < 16; j++)
114 	    if (seed >> (16-1))
115 		seed = (seed << 1) ^ POLY;
116 	    else
117 		seed <<= 1;
118 	randp[i] = seed;
119     }
120     for (i = 0; i < 3*55; i ++)
121 	rand16();
122 }
123 
124 static void
randnum(BNWORD16 * num,unsigned len)125 randnum(BNWORD16 *num, unsigned len)
126 {
127     while (len--)
128 	BIGLITTLE(*--num,*num++) = rand16();
129 }
130 
131 static void
bnprint16(BNWORD16 const * num,unsigned len)132 bnprint16(BNWORD16 const *num, unsigned len)
133 {
134     BIGLITTLE(num -= len, num += len);
135 
136     while (len--)
137 	printf("%0*lX", 16/4, (unsigned long)BIGLITTLE(*num++,*--num));
138 }
139 
140 static void
bnput16(char const * prompt,BNWORD16 const * num,unsigned len)141 bnput16(char const *prompt, BNWORD16 const *num, unsigned len)
142 {
143     fputs(prompt, stdout);
144     bnprint16(num, len);
145     putchar('\n');
146 }
147 
148 /*
149  * One of our tests uses a known prime.  The following selections were
150  * taken from the tables at the end of Hans Reisel's "Prime Numbers and
151  * Computer Methods for Factorization", second edition - an excellent book.
152  * (ISBN 0-8176-3743-5 ISBN 3-7323-3743-5)
153  */
154 #if 0
155 /* P31=1839605 17620282 38179967 87333633 from the factors of 3^256+2^256 */
156 static unsigned char const prime[] = {
157 	0x17,0x38,0x15,0xBC,0x8B,0xBB,0xE9,0xEF,0x01,0xA9,0xFD,0x3A,0x01
158 };
159 #elif 0
160 /* P48=40554942 04557502 46193993 36199835 4279613_2 73199617 from the same */
161 static unsigned char const prime[] = {
162 	0x47,0x09,0x77,0x07,0xCF,0xFD,0xE1,0x54,0x3E,0x24,
163 	0xF7,0xF1,0x7A,0x3E,0x91,0x51,0xCC,0xC7,0xD4,0x01
164 };
165 #elif 0
166 /*
167  * P75 = 450 55287320 97906895 47687014 5808213_2
168  *  05219565 99525911 39967932 66003_258 91979521
169  * from the factors of 4^128+3+128
170  * (The "026" and "062" are to prevent a Bad String from appearing here.)
171  */
172 static unsigned char const prime[] = {
173 	0xFF,0x00,0xFF,0x00,0xFF,0x01,0x06,0x4F,0xF8,0xED,
174 	0xA3,0x37,0x23,0x2A,0x04,0xEA,0xF9,0x5F,0x30,0x4C,
175 	0xAE,0xCD, 026,0x4E, 062,0x10,0x04,0x7D,0x0D,0x79,
176 	0x01
177 };
178 #else
179 /*
180  * P75 = 632 85659796 45277755 9123_2190 67300940
181  *  51844953 78793489 59444670 35675855 57440257
182  * from the factors of 5^128+4^128
183  * (The "026" is to prevent a Bad String from appearing here.)
184  */
185 static unsigned char const prime[] = {
186 	0x01,0x78,0x4B,0xA5,0xD3,0x30,0x03,0xEB,0x73,0xE6,
187 	0x0F,0x4E,0x31,0x7D,0xBC,0xE2,0xA0,0xD4, 026,0x3F,
188 	0x3C,0xEA,0x1B,0x44,0xAD,0x39,0xE7,0xE5,0xAD,0x19,
189 	0x67,0x01
190 };
191 #endif
192 
193 static int
usage(char const * name)194 usage(char const *name)
195 {
196 	fprintf(stderr, "Usage: %s [modbits [expbits [expbits2]]\n"
197 "With no arguments, just runs test suite.  If modbits is given, runs\n"
198 "quick validation test, then runs timing tests of modular exponentiation.\n"
199 "If expbits is given, it is used as an exponent size, otherwise it defaults\n"
200 "to the same as modbits.  If expbits2 is given it is used as the second\n"
201 "exponent size in the double-exponentiation tests, otherwise it defaults\n"
202 "to the same as expbits.  All are limited to %u bits.\n",
203 		name, (unsigned)MAXBITS);
204 	return 1;
205 }
206 
207 /* for libzrtp support */
208 int
bntest_main(int argc,char ** argv)209 bntest_main(int argc, char **argv)
210 {
211     unsigned i, j, k, l, m;
212     int z;
213     BNWORD16 t, carry, borrow;
214     BNWORD16 a[SIZE], b[SIZE], c[SIZE], d[SIZE];
215     BNWORD16 e[SIZE], f[SIZE];
216     static BNWORD16 entries[sizeof(prime)*2][(sizeof(prime)-1)/(16/8)+1];
217     BNWORD16 *array[sizeof(prime)*2];
218     unsigned long modbits = 0, expbits = 0, expbits2 = 0;
219     char *p;
220 #define A BIGLITTLE((a+SIZE),a)
221 #define B BIGLITTLE((b+SIZE),b)
222 #define C BIGLITTLE((c+SIZE),c)
223 #define D BIGLITTLE((d+SIZE),d)
224 #define E BIGLITTLE((e+SIZE),e)
225 #define F BIGLITTLE((f+SIZE),f)
226     static unsigned const smallprimes[] = {
227 	2, 3, 5, 7, 11, 13, 17, 19, 23, 27, 29, 31, 37, 41, 43
228     };
229 
230     /* Set up array for precomputed modexp */
231     for (i = 0; i < sizeof(array)/sizeof(*array); i++)
232 	array[i] = entries[i] BIG(+ SIZE);
233 
234     srand16(1);
235 
236     puts(BIGLITTLE("Big-endian machine","Little-endian machine"));
237 
238     if (argc >= 2) {
239 	    modbits = strtoul(argv[1], &p, 0);
240 	    if (!modbits || *p) {
241 		    fprintf(stderr, "Invalid modbits: %s\n", argv[1]);
242 		    return usage(argv[0]);
243 	    }
244     }
245     if (argc >= 3) {
246 	    expbits = strtoul(argv[2], &p, 0);
247 	    if (!expbits || *p) {
248 		    fprintf(stderr, "Invalid expbits: %s\n", argv[2]);
249 		    return usage(argv[0]);
250 	    }
251 	    expbits2 = expbits;
252     }
253     if (argc >= 4) {
254 	    expbits2 = strtoul(argv[3], &p, 0);
255 	    if (!expbits2 || *p) {
256 		    fprintf(stderr, "Invalid expbits2: %s\n", argv[3]);
257 		    return usage(argv[0]);
258 	    }
259     }
260     if (argc >= 5) {
261 	    fprintf(stderr, "Too many arguments: %s\n", argv[4]);
262 	    return usage(argv[0]);
263     }
264 
265     /* B is a nice not-so-little prime */
266     lbnInsertBigBytes_16(B, prime, 0, sizeof(prime));
267     ((unsigned char *)c)[0] = 0;
268     lbnInsertBigBytes_16(B, (unsigned char *)c, sizeof(prime), 1);
269     lbnExtractBigBytes_16(B, (unsigned char *)c, 0, sizeof(prime)+1);
270     i = (sizeof(prime)-1)/(16/8)+1;        /* Size of array in words */
271     if (((unsigned char *)c)[0] ||
272 	memcmp(prime, (unsigned char *)c+1, sizeof(prime)) != 0)
273     {
274 	printf("Input != output!:\n   ");
275 	for (k = 0; k < sizeof(prime); k++)
276 	    printf("%02X ", prime[k]);
277 	putchar('\n');
278 	for (k = 0; k < sizeof(prime)+1; k++)
279 	    printf("%02X ", ((unsigned char *)c)[k]);
280 	putchar('\n');
281 	bnput16("p = ", B, i);
282 
283     }
284 
285     /* Timing test code - only if requested on the command line */
286     if (modbits) {
287 	timetype start, stop;
288 	unsigned long cursec, expsec, twoexpsec, dblexpsec;
289 	unsigned curms, expms, twoexpms, dblexpms;
290 
291 	expsec = twoexpsec = dblexpsec = 0;
292 	expms = twoexpms = dblexpms = 0;
293 
294 	lbnCopy_16(C,B,i);
295 	lbnSub1_16(C,i,1);        /* C is exponent: p-1 */
296 
297 	puts("Testing modexp with a known prime.  "
298 	     "All results should be 1.");
299 	bnput16("p   = ", B, i);
300 	bnput16("p-1 = ", C, i);
301 	z = lbnTwoExpMod_16(A, C, i, B, i);
302 	if (z < 0)
303 	    goto nomem;
304 	bnput16("2^(p-1) mod p = ", A, i);
305 	for (j = 0; j < 10; j++) {
306 	    randnum(A,i);
307 	    (void)lbnDiv_16(D,A,i,B,i);
308 
309 	    bnput16("a = ", A, i);
310 	    z = lbnExpMod_16(D, A, i, C, i, B, i);
311 	    if (z < 0)
312 		goto nomem;
313 	    bnput16("a^(p-1) mod p = ", D, i);
314 #if 0
315 	    z = lbnBasePrecompBegin_16(array, (sizeof(prime)*8+4)/5, 5,
316 				       A, i, B, i);
317 	    if (z < 0)
318 		goto nomem;
319 	    BIGLITTLE(D[-1],D[0]) = -1;
320 	    z = lbnBasePrecompExp_16(D, (BNWORD16 const * const *)array,
321 			   	     5, C, i, B, i);
322 	    if (z < 0)
323 		goto nomem;
324 	    bnput16("a^(p-1) mod p = ", D, i);
325 #endif
326 	    for (k = 0; k < 5; k++) {
327 		randnum(E,i);
328 		bnput16("e = ", E, i);
329 		z = lbnExpMod_16(D, A, i, E, i, B, i);
330 		if (z < 0)
331 		    goto nomem;
332 		bnput16("a^e mod p = ", D, i);
333 #if 0
334 		z = lbnBasePrecompExp_16(D, (BNWORD16 const * const *)array,
335 					 5, E, i, B, i);
336 		if (z < 0)
337 		    goto nomem;
338 		bnput16("a^e mod p = ", D, i);
339 #endif
340 	    }
341 	}
342 
343 	printf("\n"
344 	       "Timing exponentiations modulo a %d-bit modulus, i.e.\n"
345 	       "2^<%d> mod <%d> bits, <%d>^<%d> mod <%d> bits and\n"
346 	       "<%d>^<%d> * <%d>^<%d> mod <%d> bits\n",
347 	       (int)modbits, (int)expbits, (int)modbits,
348 	       (int)modbits, (int)expbits, (int)modbits,
349 	       (int)modbits, (int)expbits, (int)modbits, (int)expbits2,
350 	       (int)modbits);
351 
352 	i = ((int)modbits-1)/16+1;
353 	k = ((int)expbits-1)/16+1;
354 	l = ((int)expbits2-1)/16+1;
355 	for (j = 0; j < 25; j++) {
356 	    randnum(A,i);        /* Base */
357 	    randnum(B,k);        /* Exponent */
358 	    randnum(C,i);        /* Modulus */
359 	    randnum(D,i);        /* Base2 */
360 	    randnum(E,l);        /* Exponent */
361 	    /* Clip bases and mod to appropriate number of bits */
362 	    t = ((BNWORD16)2<<((modbits-1)%16)) - 1;
363 	    *(BIGLITTLE(A-i,A+i-1)) &= t;
364 	    *(BIGLITTLE(C-i,C+i-1)) &= t;
365 	    *(BIGLITTLE(D-i,D+i-1)) &= t;
366 	    /* Make modulus large (msbit set) and odd (lsbit set) */
367 	    *(BIGLITTLE(C-i,C+i-1)) |= (t >> 1) + 1;
368 	    BIGLITTLE(C[-1],C[0]) |= 1;
369 
370 	    /* Clip exponent to appropriate number of bits */
371 	    t = ((BNWORD16)2<<((expbits-1)%16)) - 1;
372 	    *(BIGLITTLE(B-k,B+k-1)) &= t;
373 	    /* Make exponent large (msbit set) */
374 	    *(BIGLITTLE(B-k,B+k-1)) |= (t >> 1) + 1;
375 	    /* The same for exponent 2 */
376 	    t = ((BNWORD16)2<<((expbits2-1)%16)) - 1;
377 	    *(BIGLITTLE(E-l,E+l-1)) &= t;
378 	    *(BIGLITTLE(E-l,E+l-1)) |= (t >> 1) + 1;
379 
380 	    m = lbnBits_16(A, i);
381 	    if (m > (unsigned)modbits) {
382 		bnput16("a = ", a, i);
383 		printf("%u bits, should be <= %d\n",
384 		       m, (int)modbits);
385 	    }
386 	    m = lbnBits_16(B, k);
387 	    if (m != (unsigned)expbits) {
388 		bnput16("b = ", b, i);
389 		printf("%u bits, should be %d\n",
390 		       m, (int)expbits);
391 	    }
392 	    m = lbnBits_16(C, i);
393 	    if (m != (unsigned)modbits) {
394 		bnput16("c = ", c, k);
395 		printf("%u bits, should be %d\n",
396 		       m, (int)modbits);
397 	    }
398 	    m = lbnBits_16(D, i);
399 	    if (m > (unsigned)modbits) {
400 		bnput16("d = ", d, i);
401 		printf("%u bits, should be <= %d\n",
402 		       m, (int)modbits);
403 	    }
404 	    m = lbnBits_16(E, l);
405 	    if (m != (unsigned)expbits2) {
406 		bnput16("e = ", e, i);
407 		printf("%u bits, should be %d\n",
408 		       m, (int)expbits2);
409 	    }
410 	    gettime(&start);
411 	    z = lbnTwoExpMod_16(A, B, k, C, i);
412 	    if (z < 0)
413 		goto nomem;
414 	    gettime(&stop);
415 	    subtime(stop, start);
416 	    twoexpsec += cursec = sec(stop);
417 	    twoexpms += curms = msec(stop);
418 
419 	    printf("2^<%d>:%4lu.%03u   ", (int)expbits, cursec, curms);
420 	    fflush(stdout);
421 
422 	    gettime(&start);
423 	    z = lbnExpMod_16(A, A, i, B, k, C, i);
424 	    if (z < 0)
425 		goto nomem;
426 	    gettime(&stop);
427 	    subtime(stop, start);
428 	    expsec += cursec = sec(stop);
429 	    expms += curms = msec(stop);
430 	    printf("<%d>^<%d>:%4lu.%03u   ",(int)modbits, (int)expbits,
431 			    cursec, curms);
432 	    fflush(stdout);
433 
434 #if 0
435 	    gettime(&start);
436 	    z = lbnDoubleExpMod_16(D, A, i, B, k, D, i, E, l,C,i);
437 	    if (z < 0)
438 		goto nomem;
439 	    gettime(&stop);
440 	    subtime(stop, start);
441 	    dblexpsec += cursec = sec(stop);
442 	    dblexpms += curms = msec(stop);
443 	    printf("<%d>^<%d>*<%d>^<%d>:%4lu.%03u\n",
444 		   (int)modbits, (int)expbits,
445 		   (int)modbits, (int)expbits2,
446 		   cursec, curms);
447 #else
448 	    putchar('\n');
449 #endif
450 	}
451 	twoexpms += (twoexpsec % j) * 1000;
452 	printf("2^<%d> mod <%d> bits AVERAGE: %4lu.%03u s\n",
453 	       (int)expbits, (int)modbits, twoexpsec/j, twoexpms/j);
454 	expms += (expsec % j) * 1000;
455 	printf("<%d>^<%d> mod <%d> bits AVERAGE: %4lu.%03u s\n",
456 	       (int)modbits, (int)expbits, (int)modbits, expsec/j, expms/j);
457 #if 0
458 	dblexpms += (dblexpsec % j) * 1000;
459 	printf("<%d>^<%d> * <%d>^<%d> mod <%d> bits AVERAGE:"
460 	       " %4lu.%03u s\n",
461 	       (int)modbits, (int)expbits, (int)modbits,
462 	       (int)expbits2,
463 	       (int)modbits, dblexpsec/j, dblexpms/j);
464 #endif
465 	putchar('\n');
466     }
467 
468     printf("Beginning 1000 interations of sanity checking.\n");
469     printf("Any output indicates a bug.  No output is very strong\n");
470     printf("evidence that all the important low-level bignum routines\n");
471     printf("are working properly.\n");
472 
473     /*
474      * If you change this loop to have an iteration 0, all results
475      * are primted on that iteration.  Useful to see what's going
476      * on in case of major wierdness, but it produces a *lot* of
477      * output.
478      */
479     for (j = 1; j <= 1000; j++) {
480 	/* Do the tests for lots of different number sizes. */
481 	for (i = 1; i <= SIZE/2; i++) {
482 	    /* Make a random number i words long */
483 	    do {
484 		randnum(A,i);
485 	    } while (lbnNorm_16(A,i) < i);
486 
487 	    /* Checl lbnCmp - does a == a? */
488 	    if (lbnCmp_16(A,A,i) || !j) {
489 		bnput16("a = ", A, i);
490 		printf("(a <=> a) = %d\n", lbnCmp_16(A,A,i));
491 	    }
492 
493 	    memcpy(c, a, sizeof(a));
494 
495 	    /* Check that the difference, after copy, is good. */
496 	    if (lbnCmp_16(A,C,i) || !j) {
497 		bnput16("a = ", A, i);
498 		bnput16("c = ", C, i);
499 		printf("(a <=> c) = %d\n", lbnCmp_16(A,C,i));
500 	    }
501 
502 	    /* Generate a non-zero random t */
503 	    do {
504 		t = rand16();
505 	    } while (!t);
506 
507 	    /*
508 	     * Add t to A.  Check that:
509 	     * - lbnCmp works in both directions, and
510 	     * - A + t is greater than A.  If there was a carry,
511 	     *   the result, less the carry, should be *less*
512 	     *   than A.
513 	     */
514 	    carry = lbnAdd1_16(A,i,t);
515 	    if (lbnCmp_16(A,C,i) + lbnCmp_16(C,A,i) != 0 ||
516 		lbnCmp_16(A,C,i) != (carry ? -1 : 1) || !j)
517 	    {
518 		bnput16("c       = ", C, i);
519 		printf("t = %lX\n", (unsigned long)t);
520 		bnput16("a = c+t = ", A, i);
521 		printf("carry = %lX\n", (unsigned long)carry);
522 		printf("(a <=> c) = %d\n", lbnCmp_16(A,C,i));
523 		printf("(c <=> a) = %d\n", lbnCmp_16(C,A,i));
524 	    }
525 
526 	    /* Subtract t again */
527 	    memcpy(d, a, sizeof(a));
528 	    borrow = lbnSub1_16(A,i,t);
529 
530 	    if (carry != borrow || lbnCmp_16(A,C,i) || !j) {
531 		bnput16("a = ", C, i);
532 		printf("t = %lX\n", (unsigned long)t);
533 		lbnAdd1_16(A,i,t);
534 		bnput16("a += t = ", A, i);
535 		printf("Carry = %lX\n", (unsigned long)carry);
536 		lbnSub1_16(A,i,t);
537 		bnput16("a -= t = ", A, i);
538 		printf("Borrow = %lX\n", (unsigned long)borrow);
539 		printf("(a <=> c) = %d\n", lbnCmp_16(A,C,i));
540 	    }
541 
542 	    /* Generate a random B */
543 	    do {
544 		randnum(B,i);
545 	    } while (lbnNorm_16(B,i) < i);
546 
547 	    carry = lbnAddN_16(A,B,i);
548 	    memcpy(d, a, sizeof(a));
549 	    borrow = lbnSubN_16(A,B,i);
550 
551 	    if (carry != borrow || lbnCmp_16(A,C,i) || !j) {
552 		bnput16("a = ", C, i);
553 		bnput16("b = ", B, i);
554 		bnput16("a += b = ", D, i);
555 		printf("Carry = %lX\n", (unsigned long)carry);
556 		bnput16("a -= b = ", A, i);
557 		printf("Borrow = %lX\n", (unsigned long)borrow);
558 		printf("(a <=> c) = %d\n", lbnCmp_16(A,C,i));
559 	    }
560 
561 	    /* D = B * t */
562 	    lbnMulN1_16(D, B, i, t);
563 	    memcpy(e, d, sizeof(e));
564 	    /* D = A + B * t, "carry" is overflow */
565 	    borrow = *(BIGLITTLE(D-i-1,D+i)) += lbnAddN_16(D,A,i);
566 
567 	    carry = lbnMulAdd1_16(A, B, i, t);
568 
569 	    /* Did MulAdd get the same answer as mul then add? */
570 	    if (carry != borrow || lbnCmp_16(A, D, i) || !j) {
571 		bnput16("a = ", C, i);
572 		bnput16("b = ", B, i);
573 		printf("t = %lX\n", (unsigned long)t);
574 		bnput16("e = b * t = ", E, i+1);
575 		bnput16("    a + e = ", D, i+1);
576 		bnput16("a + b * t = ", A, i);
577 		printf("carry = %lX\n", (unsigned long)carry);
578 	    }
579 
580 	    memcpy(d, a, sizeof(a));
581 	    borrow = lbnMulSub1_16(A, B, i, t);
582 
583 	    /* Did MulSub perform the inverse of MulAdd */
584 	    if (carry != borrow || lbnCmp_16(A,C,i) || !j) {
585 		bnput16("       a = ", C, i);
586 		bnput16("       b = ", B, i);
587 		bnput16("a += b*t = ", D, i);
588 		printf("Carry = %lX\n", (unsigned long)carry);
589 		bnput16("a -= b*t = ", A, i);
590 		printf("Borrow = %lX\n", (unsigned long)borrow);
591 		printf("(a <=> c) = %d\n", lbnCmp_16(A,C,i));
592 		bnput16("b*t = ", E, i+1);
593 	    }
594 	    /* At this point we're done with t, so it's scratch */
595 #if 0
596 /* Extra debug code */
597 	    lbnMulN1_16(C, A, i, BIGLITTLE(B[-1],B[0]));
598 	    bnput16("a * b[0] = ", C, i+1);
599 	    for (k = 1; k < i; k++) {
600 		carry = lbnMulAdd1_16(BIGLITTLE(C-k,C+k), A, i,
601 				      *(BIGLITTLE(B-1-k,B+k)));
602 		*(BIGLITTLE(C-i-k,C+i+k)) = carry;
603 		bnput16("a * b[x] = ", C, i+k+1);
604 	    }
605 
606 	    lbnMulN1_16(D, B, i, BIGLITTLE(A[-1],A[0]));
607 	    bnput16("b * a[0] = ", D, i+1);
608 	    for (k = 1; k < i; k++) {
609 		carry = lbnMulAdd1_16(BIGLITTLE(D-k,D+k), B, i,
610 				      *(BIGLITTLE(A-1-k,A+k)));
611 		*(BIGLITTLE(D-i-k,D+i+k)) = carry;
612 		bnput16("b * a[x] = ", D, i+k+1);
613 	    }
614 #endif
615 	    /* Does Mul work both ways symmetrically */
616 	    lbnMul_16(C,A,i,B,i);
617 	    lbnMul_16(D,B,i,A,i);
618 	    if (lbnCmp_16(C,D,i+i) || !j) {
619 		bnput16("a = ", A, i);
620 		bnput16("b = ", B, i);
621 		bnput16("a * b = ", C, i+i);
622 		bnput16("b * a = ", D, i+i);
623 		printf("(a*b <=> b*a) = %d\n",
624 		       lbnCmp_16(C,D,i+i));
625 	    }
626 	    /* Check multiplication modulo some small things */
627 	    /* 30030 = 2*3*5*11*13 */
628 	    k = lbnModQ_16(C, i+i, 30030);
629 	    for (l = 0;
630 		 l < sizeof(smallprimes)/sizeof(*smallprimes);
631 		 l++)
632 	    {
633 		m = smallprimes[l];
634 		t = lbnModQ_16(C, i+i, m);
635 		carry = lbnModQ_16(A, i, m);
636 		borrow = lbnModQ_16(B, i, m);
637 		if (t != (carry * borrow) % m) {
638 		    bnput16("a = ", A, i);
639 		    printf("a mod %u = %u\n", m,
640 			   (unsigned)carry);
641 		    bnput16("b = ", B, i);
642 		    printf("b mod %u = %u\n", m,
643 			   (unsigned)borrow);
644 		    bnput16("a*b = ", C, i+i);
645 		    printf("a*b mod %u = %u\n", m,
646 			   (unsigned)t);
647 		    printf("expected %u\n",
648 			   (unsigned)((carry*borrow)%m));
649 		}
650 				/* Verify that (C % 30030) % m == C % m */
651 		if (m <= 13 && t != k % m) {
652 		    printf("c mod 30030 = %u mod %u= %u\n",
653 			   k, m, k%m);
654 		    printf("c mod %u = %u\n",
655 			   m, (unsigned)t);
656 		}
657 	    }
658 
659 	    /* Generate an F less than A and B */
660 	    do {
661 		randnum(F,i);
662 	    } while (lbnCmp_16(F,A,i) >= 0 ||
663 		     lbnCmp_16(F,B,i) >= 0);
664 
665 	    /* Add F to D (remember, D = A*B) */
666 	    lbnAdd1_16(BIGLITTLE(D-i,D+i), i, lbnAddN_16(D, F, i));
667 	    memcpy(c, d, sizeof(d));
668 
669 	    /*
670 	     * Divide by A and check that quotient and remainder
671 	     * match (remainder should be F, quotient should be B)
672 	     */
673 	    t = lbnDiv_16(E,C,i+i,A,i);
674 	    if (t || lbnCmp_16(E,B,i) || lbnCmp_16(C, F, i) || !j) {
675 		bnput16("a = ", A, i);
676 		bnput16("b = ", B, i);
677 		bnput16("f = ", F, i);
678 		bnput16("a * b + f = ", D, i+i);
679 		printf("qhigh = %lX\n", (unsigned long)t);
680 		bnput16("(a*b+f) / a = ", E, i);
681 		bnput16("(a*b+f) % a = ", C, i);
682 	    }
683 
684 	    memcpy(c, d, sizeof(d));
685 
686 	    /* Divide by B and check similarly */
687 	    t = lbnDiv_16(E,C,i+i,B,i);
688 	    if (lbnCmp_16(E,A,i) || lbnCmp_16(C, F, i) || !j) {
689 		bnput16("a = ", A, i);
690 		bnput16("b = ", B, i);
691 		bnput16("f = ", F, i);
692 		bnput16("a * b + f = ", D, i+i);
693 		printf("qhigh = %lX\n", (unsigned long)t);
694 		bnput16("(a*b+f) / b = ", E, i);
695 		bnput16("(a*b+f) % b = ", C, i);
696 	    }
697 
698 	    /* Check that A*A == A^2 */
699 	    lbnMul_16(C,A,i,A,i);
700 	    lbnSquare_16(D,A,i);
701 	    if (lbnCmp_16(C,D,i+i) || !j) {
702 		bnput16("a*a = ", C, i+i);
703 		bnput16("a^2 = ", D, i+i);
704 		printf("(a * a == a^2) = %d\n",
705 		       lbnCmp_16(C,D,i+i));
706 	    }
707 #if 0
708 	    /* Compute a GCD */
709 	    lbnCopy_16(C,A,i);
710 	    lbnCopy_16(D,B,i);
711 	    z = lbnGcd_16(C, i, D, i, &k);
712 	    if (z < 0)
713 		goto nomem;
714 	    /* z = 1 if GCD in D; z = 0 if GCD in C */
715 
716 	    /* Approximate check that the GCD came out right */
717 	    for (l = 0;
718 		 l < sizeof(smallprimes)/sizeof(*smallprimes);
719 		 l++)
720 	    {
721 		m = smallprimes[l];
722 		t = lbnModQ_16(z ? D : C, k, m);
723 		carry = lbnModQ_16(A, i, m);
724 		borrow = lbnModQ_16(B, i, m);
725 		if (!t != (!carry && !borrow)) {
726 		    bnput16("a = ", A, i);
727 		    printf("a mod %u = %u\n", m,
728 			   (unsigned)carry);
729 		    bnput16("b = ", B, i);
730 		    printf("b mod %u = %u\n", m,
731 			   (unsigned)borrow);
732 		    bnput16("gcd(a,b) = ", z ? D : C, k);
733 		    printf("gcd(a,b) mod %u = %u\n", m,
734 			   (unsigned)t);
735 		}
736 	    }
737 #endif
738 
739 	    /*
740 	     * Do some Montgomery operations
741 	     * Start with A > B, and also place a copy of B into C.
742 	     * Then make A odd so it can be a Montgomery modulus.
743 	     */
744 	    if (lbnCmp_16(A, B, i) < 0) {
745 		memcpy(c, a, sizeof(c));
746 		memcpy(a, b, sizeof(a));
747 		memcpy(b, c, sizeof(b));
748 	    } else {
749 		memcpy(c, b, sizeof(c));
750 	    }
751 	    BIGLITTLE(A[-1],A[0]) |= 1;
752 
753 	    /* Convert to and from */
754 	    lbnToMont_16(B, i, A, i);
755 	    lbnFromMont_16(B, A, i);
756 	    if (lbnCmp_16(B, C, i)) {
757 		memcpy(b, c, sizeof(c));
758 		bnput16("mod = ", A, i);
759 		bnput16("input = ", B, i);
760 		lbnToMont_16(B, i, A, i);
761 		bnput16("mont = ", B, i);
762 		lbnFromMont_16(B, A, i);
763 		bnput16("output = ", B, i);
764 	    }
765 	    /* E = B^5 (mod A), no Montgomery ops */
766 	    lbnSquare_16(E, B, i);
767 	    (void)lbnDiv_16(BIGLITTLE(E-i,E+i),E,i+i,A,i);
768 	    lbnSquare_16(D, E, i);
769 	    (void)lbnDiv_16(BIGLITTLE(D-i,D+i),D,i+i,A,i);
770 	    lbnMul_16(E, D, i, B, i);
771 	    (void)lbnDiv_16(BIGLITTLE(E-i,E+i),E,i+i,A,i);
772 
773 	    /* D = B^5, using ExpMod */
774 	    BIGLITTLE(F[-1],F[0]) = 5;
775 	    z = lbnExpMod_16(D, B, i, F, 1, A, i);
776 	    if (z < 0)
777 		goto nomem;
778 	    if (lbnCmp_16(D, E, i)  || !j) {
779 		bnput16("mod = ", A, i);
780 		bnput16("input = ", B, i);
781 		bnput16("input^5 = ", E, i);
782 		bnput16("input^5 = ", D, i);
783 		printf("a>b (x <=> y) = %d\n",
784 		       lbnCmp_16(D,E,i));
785 	    }
786 	    /* TODO: Test lbnTwoExpMod, lbnDoubleExpMod */
787 	} /* for (i) */
788 	printf("\r%d ", j);
789 	fflush(stdout);
790     } /* for (j) */
791     printf("%d iterations of up to %d 16-bit words completed.\n",
792 	   j-1, i-1);
793     return 0;
794  nomem:
795     printf("Out of memory\n");
796     return 1;
797 }
798