1 /*
2  * This file is intended to be included into the uaf_encode.c source file
3  * via a #include preprocessor directive and will not compile if compiled
4  * directly.  Function must be thread safe.
5  *
6  * Revised: 22-JUL-2009			Re-factor exponent to improve speed.
7  * Revised: 14-AUG-2009			Re-locate 'P' initialization.
8  * Revised: 25-AUG-2011			Misc. code cleanup.
9  *
10  * Copyright (c) 2011 by David L. Jones <jonesd/at/columbus.rr.com>, and
11  * is hereby released to the general public under the following terms:
12  *    Redistribution and use in source and binary forms, with or without
13  *    modifications, are permitted.
14  *
15  * Original hash algorithm by Shawn Clifford in February 1993.
16  */
17 #ifndef _uaf_hash_
18 #define _uaf_hash_
19 #include "uaf_encode.c"
20 
21 #ifdef VMS
22 #include <ssdef.h>
23 #define SSs_ABORT SS$_ABORT
24 #define SSs_BADPARAM SS$_BADPARAM
25 #define SSs_NORMAL SS$_NORMAL
26 #else
27 /*
28  * Emulate symbols defined for VMS services.
29  */
30 #define SSs_ABORT        44
31 #define SSs_BADPARAM     20
32 #define SSs_NORMAL        1
33 #endif
34 
35 
36 /***************************************************************************/
37 /*
38 
39 Title:		hash_password (originaly LGI$HPWD)
40 Author:		Shawn A. Clifford		(sysop@robot.nuceng.ufl.edu)
41 Date:		19-FEB-1993
42 Revised:	7-JUL-2009	Rework by David Jones for John the Ripper
43 Purpose:	Portable C version of the last 3 encryption methods for DEC's
44 		password hashing algorithms.
45 
46 Usage:		status = lgi$hpwd(&out_buf, &uaf_buf, encrypt, salt, &unam_buf);
47 
48 		int lgi$hpwd2(
49 		    unsigned long long *output_hash,
50 		    string *password,
51 		    unsigned char *encrypt,
52 		    unsigned short *salt,
53 		    string *username)
54 
55 		Where:
56 			output_hash = 8 byte output buffer descriptor
57 			   password = n character password string descriptor
58 			    encrypt = 1 byte; value determines algorithm to use
59 				      0 -> CRC algorithm   (not supported)
60 				      1 -> Purdy algorithm
61 				      2 -> Purdy_V
62 				      3 -> Purdy_S (Hickory algorithm)
63 			       salt = 2 byte (word) random number
64 			   username = up to 31 character username descriptor
65 
66 			status is either SS$_NORMAL (1) or SS$_ABORT (44)
67 
68 Compilation:	VAXC compiler, or 'gcc -traditional -c' to get hpwd.o file
69 		on a Un*x machine.
70 
71 Comments:	The overall speed of this routine is not great.  This is
72 		dictated by the performance of the EMULQ routine which emulates
73 		the VAX EMULQ instruction (see notes in the EMULQ routine).
74 		If anyone can improve performance, or finds bugs in this
75 		program, please send me e-mail at the above address.
76 
77 */
78 
79 typedef struct dsc_descriptor_s string;
80 
81 
82 /*
83  *	Create a quadword data type as successive longwords.
84  */
85 #undef quad
86 #define quad john_quad
87 typedef union {
88     uaf_lword ulw[2];
89     uaf_qword uqw;
90     char b[8];
91 } quad;
92 #define l_low ulw[0]	/* Low order longword in the quadword */
93 #define l_high ulw[1]	/* High order longword in the quadword */
94 
95 
96 /*
97  *	The following table of coefficients is used by the Purdy polynmial
98  *	algorithm.  They are prime, but the algorithm does not require this.
99  */
100 static const struct {
101 	quad	C1,
102 		C2,
103 		C3,
104 		C4,
105 		C5;
106 	} C = { {{-83,  -1}}, {{-179, -1}}, {{-257, -1}}, {{-323, -1}},
107 		{{-363, -1}} };
108 
109 
110 
111 /** Function prototypes **/
112 
113 static void COLLAPSE_R2 (string *r3, quad *r4, char r7);
114 						/* r3 -> input descriptor
115 						   r4 -> output buffer
116 						   r7 : Method 3 flag */
117 static void Purdy (quad *U);			/* U -> output buffer */
118 static void PQEXP_pair (quad *U, int higbits, uaf_lword n0,
119 	uaf_lword n1, quad *result0, quad *result1 );
120 /* static void PQADD_R0 (quad *U,
121 		      quad *Y,
122 		      quad *result); */		/* U + Y MOD P   */
123 static void PQMUL_ADD (quad *U,
124 		      quad *Y, quad *X,
125 		      quad *result);		/* U * Y MOD P   */
126 static void EMULQ    (uaf_lword a,
127 		      uaf_lword b,
128 		      quad *result);		/*  (a * b)	 */
129 static void PQLSH_R0 (quad *U,
130 		      quad *result); 		/* 2^32*U MOD P  */
131 
132 
133 /** RELATIVELY GLOBAL variables **/
134 
135 static uaf_lword	MAXINT = 4294967295U;	/* Largest 32 bit number */
136 static uaf_lword	a = 59;	/* 2^64 - 59 is the biggest quadword prime */
137 static quad	P;	/* P = 2^64 - 59 */
138 
139 /*
140  * Initialize P as 2^64 - a = MAXINT.MAXINT - 1 + 1  Assume called from
141  * main thread.
142  */
uaf_init(void)143 int uaf_init ( void )
144 {
145     static int initialized = 0;
146     if ( !enc_map_ready ) init_enc_map ( );
147     if ( initialized ) return 0;
148     /* since MAXINT.MAXINT = 2^64 - 1		*/
149     P.l_high = MAXINT;
150     P.l_low = MAXINT - a + 1;
151     initialized = 1;
152     return 1;
153 }
154 
155 /** LGI$HPWD entry point **/
156 
hash_password(uaf_qword * output_hash,string * password,unsigned char encrypt,unsigned short salt,string * username)157 static int hash_password (
158 	uaf_qword *output_hash,
159 	string *password,
160 	unsigned char encrypt,
161 	unsigned short salt,
162 	string *username)
163 {
164     string	    *r3;	/* Holds descriptors for COLLAPSE_R2 */
165     quad	    *r4;	/* Address of the output buffer */
166     uaf_lword   r5;		/* Length of username */
167     char	r7 = 0,		/* Flag for encryption method # 3 */
168 		*bytptr;	/* Pointer for adding in random salt */
169     quad	qword;		/* Quadword form of the output buffer */
170     char	uname[13];	/* buffer for padded username (PURDY) */
171 
172     /* ------------------------------------------------------------------------ */
173 
174 
175     /* Check for invalid parameters */
176     if ((encrypt < 1) || (encrypt > 3)) {
177 	  puts("BAD BAD!");
178 	    return -1;
179 //         exit(SSs_BADPARAM);
180     }
181     if (username->dsc_w_length > 31) {
182 	    puts("2");
183 	printf("Internal coding error, username is more than 31 bytes long.\n");
184 	exit(SSs_ABORT);
185     }
186 
187 
188     /* Setup pointer references */
189     r3 = password;			/* 1st COLLAPSE uses the password desc.   */
190     r4 = &qword;			/* @r4..@r4+7 equals obuf */
191     r5 = username->dsc_w_length;
192     r7 = (encrypt == 3);
193 
194     /* Clear the output buffer (zero the quadword) */
195     r4->ulw[0] = 0;
196     r4->ulw[1] = 0;
197     UAF_QW_SET(*output_hash,0);
198 
199     /* Check for the null password and return zero as the hash value if so */
200     if (password->dsc_w_length == 0) {
201 	return SSs_NORMAL;
202     }
203 
204     switch (encrypt) {
205       int ulen;
206       case UAIsC_AD_II:		/* CRC algorithm with Autodin II poly */
207 	/* As yet unsupported */
208 	return SSs_BADPARAM;
209 
210       case UAIsC_PURDY:		/* Purdy algorithm */
211 
212 	/* Use a blank padded username */
213 	strncpy(uname,"            ",sizeof(uname));
214 	strncpy(uname, username->dsc_a_pointer, r5);
215 	username->dsc_a_pointer = (char *)&uname;
216 	username->dsc_w_length = 12;
217 	break;
218 
219       case UAIsC_PURDY_V:		/* Purdy with blanks stripped */
220       case UAIsC_PURDY_S:		/* Hickory algorithm; Purdy_V with rotation */
221 
222 	/* Check padding.  Don't count blanks in the string length.
223 	* Remember:  r6->username_descriptor   the first word is length, then
224 	* 2 bytes of class information (4 bytes total), then the address of the
225 	* buffer.  Usernames can not be longer than 31 characters.
226 	*/
227 	for ( ulen = username->dsc_w_length; ulen > 0; ulen-- ) {
228 	    if ( username->dsc_a_pointer[ulen-1] != ' ' ) break;
229 	    username->dsc_w_length--;
230 	}
231 
232 	/* If Purdy_S:  Bytes 0-1 => plaintext length */
233 	if (r7) {
234 	   r4->ulw[0] = password->dsc_w_length;
235 	}
236 
237 	break;
238     }
239 
240 
241     /* Collapse the password to a quadword U; buffer pointed to by r4 */
242     COLLAPSE_R2 (r3, r4, r7);
243 				/* r3 already points to password descriptor */
244 
245 
246     /* Add random salt into the middle of U */
247     /* This has to be done byte-wise because the Sun will not allow you */
248     /* to add unaligned words, or it will give you a bus error and core dump :) */
249     bytptr = &r4->b[3+1];
250     *bytptr += (char)(salt>>8);			/* Add the high byte */
251 
252     /* Check for carry out of the low byte */
253     bytptr--;
254     if ( (short)((unsigned char)*bytptr + (unsigned char)(salt & 0xff)) > 255) {
255 	*(bytptr + 1) += 1;			/* Account for the carry */
256     }
257     *bytptr += (char)(salt & 0xff);			/* Add the low byte */
258 
259 
260 
261     /* Collapse the username into the quadword */
262     r3 = username;		/* Point r3 to the valid username descriptor */
263     COLLAPSE_R2 (r3, r4, r7);
264 
265     /* U (qword) contains the 8 character output buffer in quadword format */
266 
267     /* Run U through the polynomial mod P */
268     Purdy (r4);
269 
270     /* Write qword (*r4) back into the output buffer */
271     *((quad *) output_hash) = qword;
272 
273     /* Normal exit */
274     return SSs_NORMAL;
275 
276 } /* LGI$HPWD */
277 
278 
279 
280 
281 /***************	Functions Section	*******************/
282 
283 
284 /***************
285    COLLAPSE_R2
286  ***************/
287 
COLLAPSE_R2(string * r3,quad * r4,char r7)288 static void COLLAPSE_R2 (string *r3, quad *r4, char r7)
289 /*
290  *    r3 :  input string descriptor
291  *    r4 :  output buffer (quadword)
292  *    r7 :  flag (1) if using Hickory method (encrypt = 3)
293  *
294  * This routine takes a string of bytes (the descriptor for which is pointed
295  * to by r3) and collapses them into a quadword (pointed to by r4).  It does
296  * this by cycling around the bytes of the output buffer adding in the bytes of
297  * the input string.  Additionally, after every 8 characters, each longword in
298  * the resultant hash is rotated by one bit (PURDY_S only).
299  *
300  */
301 
302 {
303     unsigned short r0, r1;
304     uaf_lword rotate;
305     char *r2, mask = -8;
306 
307     /* --------------------------------------------------------------------- */
308 
309     r0 = r3->dsc_w_length;		/* Obtain the number of input bytes */
310 
311     if (r0 == 0) return;		/* Do nothing with empty string */
312 
313     r2 = r3->dsc_a_pointer;		/* Obtain pointer to input string */
314 
315     for (; (r0 != 0); r0--) {		/* Loop until input string exhausted */
316 
317 	r1 = (~mask & r0);		/* Obtain cyclic index into out buff */
318 	r4->b[r1] += *r2++;		/* Add in this character */
319 
320 	if ((r7) && (r1 == 7))		/* If Purdy_S and last byte ... */
321 	{
322 	   /* Rotate first longword one bit */
323 	   rotate = r4->ulw[0] & 0x80000000;
324 	   r4->ulw[0] <<= 1;
325 	   rotate >>=31;
326 	   r4->ulw[0] |= rotate;
327 
328 	   /* Rotate second longword one bit */
329 	   rotate = r4->ulw[1] & 0x80000000;
330 	   r4->ulw[1] <<= 1;
331 	   rotate >>=31;
332 	   r4->ulw[1] |= rotate;
333 	} /* if Purdy_S */
334 
335      } /* for loop */
336 
337     return;
338 
339 } /* COLLAPSE_R2 */
340 
341 
342 /************
343    PQADD_R0
344  ************/
345 
PQADD_R0(quad * U,quad * Y,quad * result)346 static void PQADD_R0 (quad *U, quad *Y, quad *result)
347 
348 /*
349  * U, Y : quadwords that we want to add
350  *
351  *
352  * Computes the sum U + Y MOD P where P is of the form P = 2^64 - a.
353  * U, Y are quadwords less than P.
354  *
355  * Fixed with the help of the code written by Terence Lee (DEC/HKO).
356  */
357 
358 {
359     uaf_lword carry = 0;
360 #ifndef NOLONGLONG
361     static const unsigned long long maxqw = 0xffffffffffffffffLL;
362     static const unsigned long long modulus = (0xffffffffffffffffLL - 59 + 1);
363 
364     if ( (maxqw-U->uqw) < Y->uqw ) carry = 1;
365     result->uqw = U->uqw + Y->uqw;
366     if ( !carry ) {
367 	if ( result->uqw < modulus ) return;
368 	result->uqw -= (modulus);
369     } else {
370         UAF_QW_ADD(result->uqw,a);	/* missing case of uqw > max-a */
371     }
372 #else /* Compiler doesn't support long long */
373 
374 
375     /* Add the low longwords, checking for carry out */
376     if ( (MAXINT - U->l_low) < Y->l_low ) carry = 1;
377     result->l_low = U->l_low + Y->l_low;
378 
379     /* Add the high longwords */
380     result->l_high = U->l_high + Y->l_high + carry;
381 
382     /* Test for carry out of high bit in the quadword */
383     if ( (MAXINT - U->l_high) < (Y->l_high + carry) )
384 	carry = 1;
385     else
386 	carry = 0;
387 
388     /* Check if we have to MOD P the result */
389     if (!carry && Y->l_high != MAXINT) return;	/* Outta here? */
390     if ( Y->l_low > (MAXINT - a) )
391 	carry = 1;
392     else
393 	carry = 0;
394     result->l_low += a;				/* U + Y MOD P */
395     result->l_high += carry;
396 #endif
397 
398     return;					/* Outta here! */
399 } /* PQADD_R0 */
400 
401 
402 /*********
403    EMULQ
404  *********/
405 
EMULQ(uaf_lword a,uaf_lword b,quad * result)406 static void EMULQ (uaf_lword a, uaf_lword b, quad *result)
407 
408 /*
409  * a, b   : longwords that we want to multiply (quadword result)
410  * result : the quadword result
411  *
412  * This routine knows how to multiply two unsigned longwords, returning the
413  * unsigned quadword product.
414  *
415  * Originally I wrote this using a much faster routine based on a
416  * divide-and-conquer strategy put forth in Knuth's "The Art of Computer
417  * Programming, Vol. 2, Seminumerical Algorithms" but I could not make the routine
418  * reliable.  There is some sort of fixup for sign compensation, much like for the
419  * VAX EMULQ instruction (where for each signed argument you must add the other
420  * argument to the high longword).
421  *
422  * If anyone can improve this routine, please send me source code.
423  *
424  * The original idea behind this algorithm was to build a 2n-by-n matrix for the
425  * n-bit arguments to fill, shifting over each row like you would do by hand.  I
426  * found a simple way to account for the carries and then get the final result,
427  * but the routine was about 4 to 5 times slower than it is now.  Then I realized
428  * that if I made the variables global, that would save a little time on
429  * allocating space for the vectors and matrices.  Last, I removed the matrix, and
430  * added all the values to the 'temp' vector on the fly.  This greatly increased
431  * the speed, but still nowhere near Knuth or calling LIB$EMULQ.
432  */
433 
434 {
435 #ifndef NOLONGLONG
436     uaf_qword qw_a, qw_b;
437 
438     qw_a = a;		/* promote to long long */
439     qw_b = b;
440     result->uqw = qw_a * qw_b;
441 #else
442     char bin_a[32], bin_b[32];
443     char temp[64], i, j;
444     char retbuf[8];
445 
446 
447     /* Initialize */
448     for (i=0; i<=63; i++) temp[i] = 0;
449     for (i=0; i<=31; i++) bin_a[i] = bin_b[i] = 0;
450     for (i=0; i<=7; retbuf[i]=0, i++);
451 
452 
453     /* Store the binary representation of a & b */
454     for (i=0; i<=31; i++) {
455 	bin_a[i] = a & 1;
456 	bin_b[i] = b & 1;
457 	a >>= 1;
458 	b >>= 1;
459     }
460 
461 
462     /* Add in the shifted multiplicand */
463     for (i=0; i<=31; i++) {
464 
465         /* For each 1 in bin_b, add in bin_a, starting in the ith position */
466 	if (bin_b[i] == 1) {
467 		for (j=i; j<=i+31; j++)
468 			temp[j] += bin_a[j-i];
469 	}
470     }
471 
472 
473     /* Carry into the next position and set the binary value */
474     for (j=0; j<=62; j++) {
475 	temp[j+1] += temp[j] / 2;
476 	temp[j] = temp[j] % 2;
477     }
478     temp[63] = temp[63] % 2;
479 
480 
481     /* Convert binary bytes back into 8 packed bytes. */
482     /* LEAST SIGNIFICANT BYTE FIRST!!!  This is LITTLE ENDIAN format. */
483     for (i=0; i<=7; i++) {
484 	for (j=0; j<=7; j++) retbuf[i] += temp[i*8 + j] * (1<<j);
485     }
486 
487 
488     /* Copy the 8 byte buffer into result */
489     memcpy ((char *)result, retbuf, 8);
490 #endif
491 
492     return;
493 
494 } /* EMULQ */
495 
496 
497 
498 /************
499    PQMOD_R0
500  ************/
501 #ifdef NOLONGLONG
PQMOD_R0(quad * U)502 static void PQMOD_R0 (quad *U)
503 /*
504  * U : output buffer (quadword)
505  *
506  * This routine replaces the quadword U with U MOD P, where P is of the form
507  * P = 2^64 - a			(RELATIVELY GLOBAL a = 59)
508  *   = FFFFFFFF.FFFFFFFF - 3B + 1	(MAXINT = FFFFFFFF = 4,294,967,295)
509  *   = FFFFFFFF.FFFFFFC5
510  *
511  * Method:  Since P is very nearly the maximum integer you can specify in a
512  * quadword (ie. P = FFFFFFFFFFFFFFC5, there will be only 58 choices for
513  * U that are larger than P (ie. U MOD P > 0).  So we check the high longword in
514  * the quadword and see if all its bits are set (-1).  If not, then U can not
515  * possibly be larger than P, and U MOD P = U.  If U is larger than MAXINT - 59,
516  * then U MOD P is the differential between (MAXINT - 59) and U, else
517  * U MOD P = U.  If U equals P, then U MOD P = 0 = (P + 59).
518  */
519 
520 {
521 
522     /* Check if U is larger/equal to P.  If not, then leave U alone. */
523 
524     if (U->l_high == P.l_high && U->l_low >= P.l_low) {
525 	U->l_low += a;		/* This will have a carry out, and ...	*/
526 	U->l_high = 0;		/* the carry will make l_high go to 0   */
527     }
528 
529     return;
530 
531 } /* PQMOD_R0 */
532 #else			/* Replace function with macro */
533 #define PQMOD_R0(x) if ( (x)->uqw < P.uqw ); else (x)->uqw += a;
534 #endif
535 
536 
PQEXP_pair(quad * U,int highbit,uaf_lword n0,uaf_lword n1,quad * result0,quad * result1)537 static void PQEXP_pair (quad *U, int highbit, uaf_lword n0,
538 	uaf_lword n1, quad *result0, quad *result1 )
539 /*
540  * U        : pointer to output buffer (quadword)
541  * highbit  : Highest bit set in n0/n1 +1
542  * n0,n1    : unsigned longword (exponent for U)
543  *
544  * The routine returns U^n MOD P where P is of the form P = 2^64-a.
545  * U is a quadword, n is an unsigned longword, P is a RELATIVELY GLOBAL quad.
546  * We optimize operation by generating two results for a single U re-using
547  * the powers of 2 vector.
548  *
549  * The method comes from Knuth, "The Art of Computer Programing, Vol. 2", section
550  * 4.6.3, "Evaluation of Powers."  This algorithm is for calculating U^n with
551  * fewer than (n-1) multiplies.  The result is U^n MOD P only because the
552  * multiplication routine is MOD P.  Knuth's example is from Pingala's Hindu
553  * algorithm in the Chandah-sutra.
554  */
555 
556 {
557     quad b2[32], *b2ptr;		/* sucessive squaring of base.  */
558     int is_one;				/* True if  intermediate value== 1 */
559     /*
560      * Build series where b2[i] = U^(2^i) through repeated squaring.
561      */
562     b2[0] = *U;
563     for ( b2ptr = b2; highbit > 1; highbit-- ) {
564 	PQMUL_ADD (b2ptr, b2ptr, 0, b2ptr+1);	   /* b2[i] = U ^ (2^i) */
565 	b2ptr++;
566     }
567     /*
568      * Compute U^n0 by multiply the factors corresponding to set bits in the
569      * the exponent (U^(a+b) = U^a * U^b).  Assume exponent non-zero.
570      */
571     is_one = 1;			/* Skip initializing result, set flag instead.*/
572     for ( b2ptr = b2; n0 != 0; b2ptr++ ) {
573 	if ( n0 % 2 ) {		/* Test low bit */
574 	    if ( is_one ) {	/* Use assign instead of multiply */
575 		is_one = 0;
576 		*result0 = *b2ptr;
577 	    } else PQMUL_ADD ( result0, b2ptr, 0, result0 );
578 	}
579 	n0 /= 2;		/* Shift right to test next bit next round */
580     }
581     /*
582      * Compute U^n1, but only if supplied by caller.
583      */
584     if ( !result1 ) return;
585     is_one = 1;
586     for ( b2ptr = b2; n1 != 0; b2ptr++ ) {
587 	if ( n1 % 2 ) {
588 	    if ( is_one ) {
589 		is_one = 0;
590 		*result1 = *b2ptr;
591 	    } else PQMUL_ADD ( result1, b2ptr, 0, result1 );
592 	}
593 	n1 /= 2;
594     }
595     return;
596 
597 } /* PQEXP_pair */
598 
599 
600 
601 /************
602    PQMUL_ADD
603  ************/
604 
PQMUL_ADD(quad * U,quad * Y,quad * X,quad * result)605 static void PQMUL_ADD (quad *U, quad *Y, quad *X, quad *result)
606 
607 /*
608  * U, Y, A : Input values, A optional.
609  * result  : Output, result = (U*Y)+A
610  *
611  * Computes the product U*Y+X MOD P where P is of the form P = 2^64 - a.
612  * U, Y, X are quadwords less than P.  The result is returned in result.
613  *
614  * The product may be formed as the sum of four longword multiplications
615  * which are scaled by powers of 2^32 by evaluating:
616  *
617  *	2^64*v*z + 2^32*(v*y + u*z) + u*y
618  *      ^^^^^^^^   ^^^^^^^^^^^^^^^^   ^^^
619  *      part1       part2 & part3     part4
620  *
621  * The result is computed such that division by the modulus P is avoided.
622  *
623  * u is the low longword of  U;	u = U.l_low
624  * v is the high longword of U;	v = U.l_high
625  * y is the low longword of  Y;	y = Y.l_low
626  * z is the high longword of Y;	z = Y.l_high
627  */
628 
629 {
630     quad stack, part1, part2, part3;
631 
632     EMULQ(U->l_high, Y->l_high, &stack);	/* Multiply   v*z      */
633 
634     PQMOD_R0(&stack);				/* Get   (v*z) MOD P   */
635 
636 
637     /*** 1st term ***/
638     PQLSH_R0(&stack, &part1);			/* Get   2^32*(v*z) MOD P  */
639 
640 
641     EMULQ(U->l_high, Y->l_low, &stack);		/* Multiply   v*y      */
642 
643     PQMOD_R0(&stack);				/* Get   (v*y) MOD P   */
644 
645     EMULQ(U->l_low, Y->l_high, &part2);		/* Multiply   u*z      */
646 
647     PQMOD_R0(&part2);				/* Get   (u*z) MOD P   */
648 
649     PQADD_R0 (&stack, &part2, &part3);		/* Get   (v*y + u*z)   */
650 
651     PQADD_R0 (&part1, &part3, &stack);   /* Get   2^32*(v*z) + (v*y + u*z) */
652 
653 
654     /*** 1st & 2nd terms ***/
655     PQLSH_R0(&stack, &part1);	/* Get   2^64*(v*z) + 2^32*(v*y + u*z) */
656 
657 
658     EMULQ(U->l_low, Y->l_low, &stack);		/* Multiply   u*y      */
659 
660 
661     /*** Last term ***/
662     PQMOD_R0(&stack);
663 
664     if ( X ) PQADD_R0 ( &stack, X, &stack );
665 
666     PQADD_R0(&part1, &stack, result);		/* Whole thing */
667 
668 
669     return;
670 
671 } /* PQMUL_R2 */
672 
673 
674 
675 /************
676    PQLSH_R0
677  ************/
678 
PQLSH_R0(quad * U,quad * result)679 static void PQLSH_R0 (quad *U, quad *result)
680 
681 /*
682  * Computes the product 2^32*U MOD P where P is of the form P = 2^64 - a.
683  * U is a quadword less than P.
684  *
685  * This routine is used by PQMUL in the formation of quadword products in
686  * such a way as to avoid division by modulus the P.
687  * The product 2^64*v + 2^32*u is congruent a*v + 2^32*U MOD P.
688  *
689  * u is the low longword in U
690  * v is the high longword in U
691  */
692 
693 {
694     quad stack;
695 
696 
697     /* Get	a*v   */
698     EMULQ(U->l_high, a, &stack);
699 
700     /* Form  Y = 2^32*u  */
701     U->l_high = U->l_low;
702     U->l_low = 0;
703 
704     /* Get	U + Y MOD P  */
705     PQADD_R0 (U, &stack, result);
706 
707     return;
708 
709 } /* PQLSH_R0 */
710 
711 
712 /*********
713    Purdy
714  *********/
715 
Purdy(quad * U)716 static void Purdy (quad *U)
717 
718 /*
719  * U : input/output buffer (quadword)
720  *
721  * This routine computes  f(U) = p(U) MOD P.  Where P is a prime of the form
722  * P = 2^64 - a.  The function p is the following polynomial:
723  *
724  * 		X^n0 + X^n1*C1 + X^3*C2 + X^2*C3 + X*C4 + C5
725  *                ^^^^^^^^^^^^^^   ^^^^^^^^^^^^^^^^^^^^^^^^^^^
726  *		    part1		  part2
727  *
728  * Note:  Part1 is calculated by its congruence  (X^(n0-n1) + C1)*X^n1
729  *        finding X^n1, X^(n0-n1), X^(n0-n1)+C1, then
730  *        (X^(n0-n1) + C1)*X^n1  which equals  X^n0 + X^n1*C1
731  *
732  *  To minimize the number of multiplications, we evaluate
733  *
734  *  f(U) =  ((U^(n0-n1) + C1)*U^(n1-1)*U + (U*C2 + C3)*U + C4)*U + C5
735  *          ^^^^^^^^^^^^^^^^^^^^^^^^^^     ^^^^^^^^^^^^^^^^^^^^^^^^^^
736  *                    part1                         part2
737  *
738  * The number of multiplies needed for an exponentiation is equal to
739  * the bit position of the most signficant bit + the number of other set
740  * bits in the binary expansion of the exponent.  By treating U^n1 as
741  * U*U^(n1-1), we can replace n1-1 with 448*37449 to compute the
742  * value in (1+15+5+8+2)=31 multiplies rather than (23+18)=41 multiplies used
743  * for a direct U^n1.  Note that because of the pqexp_pair optimization, the
744  * U^(n0-n1) calculation is done with just 3 multiplies.
745  *
746  */
747 
748 {
749     quad     *Cptr = (quad *)&C;	/* Address of table (C) */
750     static const uaf_lword n0 = 16777213,/* These exponents are prime, but this is  */
751 	      n1 = 16777153;	/* not required by the algorithm 	   */
752     static const uaf_lword na = 37449,
753 	      nb = 448;		/* U^n1 = (U^na)^nb * U  */
754 				/* n0 = 2^24 - 3;  n1 = 2^24 - 63;  n0-n1 = 60*/
755     quad      X,		/* The variable X in the polynomial */
756 	      X_n1, X_n0Mn1,
757 	      X_na, X_na_nb,	/* X raised to na, na*nb */
758 	      part1,		/* Collect the polynomial ... */
759 	      part2;		/* ... in two parts */
760 
761 
762     /* --------------------------------------------------------------------- */
763 
764 
765     /* Ensure U less than P by taking U MOD P  */
766     /* Save copy of result:  X = U MOD P       */
767     X.l_low = U->l_low;
768     X.l_high = U->l_high;
769 
770     if ( UAF_QW_GEQ(X.uqw, P.uqw) ) UAF_QW_ADD(X.uqw,a); /* Take X mod P    */
771 
772     PQEXP_pair ( &X, 16, (n0-n1), na, &X_n0Mn1, &X_na );
773     PQADD_R0 ( &X_n0Mn1, Cptr, &part1 );		/* X^(n1-n0) + C1   */
774 
775     PQEXP_pair ( &X_na, 9, nb, 0, &X_na_nb, 0 );	/* X^na^nb 	    */
776     PQMUL_ADD ( &X_na_nb, &X, 0, &X_n1 );		/* X^na^nb*X = X^n1 */
777 
778     /* Part 1 complete except for multiply by X^n1 */
779 
780     Cptr++;					/* Point to C2		*/
781 
782     PQMUL_ADD (&X, Cptr, Cptr+1, &part2);	/* part2= X*C2 + C3	*/
783 
784     Cptr += 2;				/* C4				*/
785     PQMUL_ADD (&X, &part2, Cptr, &part2);   	/* part2 = part2*X + C4 */
786 						/* (X^2*C2 + X*C3 + C4)	*/
787 
788     Cptr++;					/* C5			*/
789     PQMUL_ADD (&X, &part2, Cptr, &part2);/* part2=part2*X + C5       	*/
790 					/* X^3*C2 + X^2*C3 + X*C4 + C5	*/
791     /* Part 2 complete */
792 
793 
794    /* Do final multiply for part1 and add in part2 to get final result.  */
795 
796    PQMUL_ADD ( &part1, &X_n1, &part2, (quad *) U );
797 
798     /* Outta here... */
799     return;
800 
801 } /* Purdy */
802 #endif
803