1 /*
2 * This file is intended to be included into the uaf_encode.c source file
3 * via a #include preprocessor directive and will not compile if compiled
4 * directly. Function must be thread safe.
5 *
6 * Revised: 22-JUL-2009 Re-factor exponent to improve speed.
7 * Revised: 14-AUG-2009 Re-locate 'P' initialization.
8 * Revised: 25-AUG-2011 Misc. code cleanup.
9 *
10 * Copyright (c) 2011 by David L. Jones <jonesd/at/columbus.rr.com>, and
11 * is hereby released to the general public under the following terms:
12 * Redistribution and use in source and binary forms, with or without
13 * modifications, are permitted.
14 *
15 * Original hash algorithm by Shawn Clifford in February 1993.
16 */
17 #ifndef _uaf_hash_
18 #define _uaf_hash_
19 #include "uaf_encode.c"
20
21 #ifdef VMS
22 #include <ssdef.h>
23 #define SSs_ABORT SS$_ABORT
24 #define SSs_BADPARAM SS$_BADPARAM
25 #define SSs_NORMAL SS$_NORMAL
26 #else
27 /*
28 * Emulate symbols defined for VMS services.
29 */
30 #define SSs_ABORT 44
31 #define SSs_BADPARAM 20
32 #define SSs_NORMAL 1
33 #endif
34
35
36 /***************************************************************************/
37 /*
38
39 Title: hash_password (originaly LGI$HPWD)
40 Author: Shawn A. Clifford (sysop@robot.nuceng.ufl.edu)
41 Date: 19-FEB-1993
42 Revised: 7-JUL-2009 Rework by David Jones for John the Ripper
43 Purpose: Portable C version of the last 3 encryption methods for DEC's
44 password hashing algorithms.
45
46 Usage: status = lgi$hpwd(&out_buf, &uaf_buf, encrypt, salt, &unam_buf);
47
48 int lgi$hpwd2(
49 unsigned long long *output_hash,
50 string *password,
51 unsigned char *encrypt,
52 unsigned short *salt,
53 string *username)
54
55 Where:
56 output_hash = 8 byte output buffer descriptor
57 password = n character password string descriptor
58 encrypt = 1 byte; value determines algorithm to use
59 0 -> CRC algorithm (not supported)
60 1 -> Purdy algorithm
61 2 -> Purdy_V
62 3 -> Purdy_S (Hickory algorithm)
63 salt = 2 byte (word) random number
64 username = up to 31 character username descriptor
65
66 status is either SS$_NORMAL (1) or SS$_ABORT (44)
67
68 Compilation: VAXC compiler, or 'gcc -traditional -c' to get hpwd.o file
69 on a Un*x machine.
70
71 Comments: The overall speed of this routine is not great. This is
72 dictated by the performance of the EMULQ routine which emulates
73 the VAX EMULQ instruction (see notes in the EMULQ routine).
74 If anyone can improve performance, or finds bugs in this
75 program, please send me e-mail at the above address.
76
77 */
78
79 typedef struct dsc_descriptor_s string;
80
81
82 /*
83 * Create a quadword data type as successive longwords.
84 */
85 #undef quad
86 #define quad john_quad
87 typedef union {
88 uaf_lword ulw[2];
89 uaf_qword uqw;
90 char b[8];
91 } quad;
92 #define l_low ulw[0] /* Low order longword in the quadword */
93 #define l_high ulw[1] /* High order longword in the quadword */
94
95
96 /*
97 * The following table of coefficients is used by the Purdy polynmial
98 * algorithm. They are prime, but the algorithm does not require this.
99 */
100 static const struct {
101 quad C1,
102 C2,
103 C3,
104 C4,
105 C5;
106 } C = { {{-83, -1}}, {{-179, -1}}, {{-257, -1}}, {{-323, -1}},
107 {{-363, -1}} };
108
109
110
111 /** Function prototypes **/
112
113 static void COLLAPSE_R2 (string *r3, quad *r4, char r7);
114 /* r3 -> input descriptor
115 r4 -> output buffer
116 r7 : Method 3 flag */
117 static void Purdy (quad *U); /* U -> output buffer */
118 static void PQEXP_pair (quad *U, int higbits, uaf_lword n0,
119 uaf_lword n1, quad *result0, quad *result1 );
120 /* static void PQADD_R0 (quad *U,
121 quad *Y,
122 quad *result); */ /* U + Y MOD P */
123 static void PQMUL_ADD (quad *U,
124 quad *Y, quad *X,
125 quad *result); /* U * Y MOD P */
126 static void EMULQ (uaf_lword a,
127 uaf_lword b,
128 quad *result); /* (a * b) */
129 static void PQLSH_R0 (quad *U,
130 quad *result); /* 2^32*U MOD P */
131
132
133 /** RELATIVELY GLOBAL variables **/
134
135 static uaf_lword MAXINT = 4294967295U; /* Largest 32 bit number */
136 static uaf_lword a = 59; /* 2^64 - 59 is the biggest quadword prime */
137 static quad P; /* P = 2^64 - 59 */
138
139 /*
140 * Initialize P as 2^64 - a = MAXINT.MAXINT - 1 + 1 Assume called from
141 * main thread.
142 */
uaf_init(void)143 int uaf_init ( void )
144 {
145 static int initialized = 0;
146 if ( !enc_map_ready ) init_enc_map ( );
147 if ( initialized ) return 0;
148 /* since MAXINT.MAXINT = 2^64 - 1 */
149 P.l_high = MAXINT;
150 P.l_low = MAXINT - a + 1;
151 initialized = 1;
152 return 1;
153 }
154
155 /** LGI$HPWD entry point **/
156
hash_password(uaf_qword * output_hash,string * password,unsigned char encrypt,unsigned short salt,string * username)157 static int hash_password (
158 uaf_qword *output_hash,
159 string *password,
160 unsigned char encrypt,
161 unsigned short salt,
162 string *username)
163 {
164 string *r3; /* Holds descriptors for COLLAPSE_R2 */
165 quad *r4; /* Address of the output buffer */
166 uaf_lword r5; /* Length of username */
167 char r7 = 0, /* Flag for encryption method # 3 */
168 *bytptr; /* Pointer for adding in random salt */
169 quad qword; /* Quadword form of the output buffer */
170 char uname[13]; /* buffer for padded username (PURDY) */
171
172 /* ------------------------------------------------------------------------ */
173
174
175 /* Check for invalid parameters */
176 if ((encrypt < 1) || (encrypt > 3)) {
177 puts("BAD BAD!");
178 return -1;
179 // exit(SSs_BADPARAM);
180 }
181 if (username->dsc_w_length > 31) {
182 puts("2");
183 printf("Internal coding error, username is more than 31 bytes long.\n");
184 exit(SSs_ABORT);
185 }
186
187
188 /* Setup pointer references */
189 r3 = password; /* 1st COLLAPSE uses the password desc. */
190 r4 = &qword; /* @r4..@r4+7 equals obuf */
191 r5 = username->dsc_w_length;
192 r7 = (encrypt == 3);
193
194 /* Clear the output buffer (zero the quadword) */
195 r4->ulw[0] = 0;
196 r4->ulw[1] = 0;
197 UAF_QW_SET(*output_hash,0);
198
199 /* Check for the null password and return zero as the hash value if so */
200 if (password->dsc_w_length == 0) {
201 return SSs_NORMAL;
202 }
203
204 switch (encrypt) {
205 int ulen;
206 case UAIsC_AD_II: /* CRC algorithm with Autodin II poly */
207 /* As yet unsupported */
208 return SSs_BADPARAM;
209
210 case UAIsC_PURDY: /* Purdy algorithm */
211
212 /* Use a blank padded username */
213 strncpy(uname," ",sizeof(uname));
214 strncpy(uname, username->dsc_a_pointer, r5);
215 username->dsc_a_pointer = (char *)&uname;
216 username->dsc_w_length = 12;
217 break;
218
219 case UAIsC_PURDY_V: /* Purdy with blanks stripped */
220 case UAIsC_PURDY_S: /* Hickory algorithm; Purdy_V with rotation */
221
222 /* Check padding. Don't count blanks in the string length.
223 * Remember: r6->username_descriptor the first word is length, then
224 * 2 bytes of class information (4 bytes total), then the address of the
225 * buffer. Usernames can not be longer than 31 characters.
226 */
227 for ( ulen = username->dsc_w_length; ulen > 0; ulen-- ) {
228 if ( username->dsc_a_pointer[ulen-1] != ' ' ) break;
229 username->dsc_w_length--;
230 }
231
232 /* If Purdy_S: Bytes 0-1 => plaintext length */
233 if (r7) {
234 r4->ulw[0] = password->dsc_w_length;
235 }
236
237 break;
238 }
239
240
241 /* Collapse the password to a quadword U; buffer pointed to by r4 */
242 COLLAPSE_R2 (r3, r4, r7);
243 /* r3 already points to password descriptor */
244
245
246 /* Add random salt into the middle of U */
247 /* This has to be done byte-wise because the Sun will not allow you */
248 /* to add unaligned words, or it will give you a bus error and core dump :) */
249 bytptr = &r4->b[3+1];
250 *bytptr += (char)(salt>>8); /* Add the high byte */
251
252 /* Check for carry out of the low byte */
253 bytptr--;
254 if ( (short)((unsigned char)*bytptr + (unsigned char)(salt & 0xff)) > 255) {
255 *(bytptr + 1) += 1; /* Account for the carry */
256 }
257 *bytptr += (char)(salt & 0xff); /* Add the low byte */
258
259
260
261 /* Collapse the username into the quadword */
262 r3 = username; /* Point r3 to the valid username descriptor */
263 COLLAPSE_R2 (r3, r4, r7);
264
265 /* U (qword) contains the 8 character output buffer in quadword format */
266
267 /* Run U through the polynomial mod P */
268 Purdy (r4);
269
270 /* Write qword (*r4) back into the output buffer */
271 *((quad *) output_hash) = qword;
272
273 /* Normal exit */
274 return SSs_NORMAL;
275
276 } /* LGI$HPWD */
277
278
279
280
281 /*************** Functions Section *******************/
282
283
284 /***************
285 COLLAPSE_R2
286 ***************/
287
COLLAPSE_R2(string * r3,quad * r4,char r7)288 static void COLLAPSE_R2 (string *r3, quad *r4, char r7)
289 /*
290 * r3 : input string descriptor
291 * r4 : output buffer (quadword)
292 * r7 : flag (1) if using Hickory method (encrypt = 3)
293 *
294 * This routine takes a string of bytes (the descriptor for which is pointed
295 * to by r3) and collapses them into a quadword (pointed to by r4). It does
296 * this by cycling around the bytes of the output buffer adding in the bytes of
297 * the input string. Additionally, after every 8 characters, each longword in
298 * the resultant hash is rotated by one bit (PURDY_S only).
299 *
300 */
301
302 {
303 unsigned short r0, r1;
304 uaf_lword rotate;
305 char *r2, mask = -8;
306
307 /* --------------------------------------------------------------------- */
308
309 r0 = r3->dsc_w_length; /* Obtain the number of input bytes */
310
311 if (r0 == 0) return; /* Do nothing with empty string */
312
313 r2 = r3->dsc_a_pointer; /* Obtain pointer to input string */
314
315 for (; (r0 != 0); r0--) { /* Loop until input string exhausted */
316
317 r1 = (~mask & r0); /* Obtain cyclic index into out buff */
318 r4->b[r1] += *r2++; /* Add in this character */
319
320 if ((r7) && (r1 == 7)) /* If Purdy_S and last byte ... */
321 {
322 /* Rotate first longword one bit */
323 rotate = r4->ulw[0] & 0x80000000;
324 r4->ulw[0] <<= 1;
325 rotate >>=31;
326 r4->ulw[0] |= rotate;
327
328 /* Rotate second longword one bit */
329 rotate = r4->ulw[1] & 0x80000000;
330 r4->ulw[1] <<= 1;
331 rotate >>=31;
332 r4->ulw[1] |= rotate;
333 } /* if Purdy_S */
334
335 } /* for loop */
336
337 return;
338
339 } /* COLLAPSE_R2 */
340
341
342 /************
343 PQADD_R0
344 ************/
345
PQADD_R0(quad * U,quad * Y,quad * result)346 static void PQADD_R0 (quad *U, quad *Y, quad *result)
347
348 /*
349 * U, Y : quadwords that we want to add
350 *
351 *
352 * Computes the sum U + Y MOD P where P is of the form P = 2^64 - a.
353 * U, Y are quadwords less than P.
354 *
355 * Fixed with the help of the code written by Terence Lee (DEC/HKO).
356 */
357
358 {
359 uaf_lword carry = 0;
360 #ifndef NOLONGLONG
361 static const unsigned long long maxqw = 0xffffffffffffffffLL;
362 static const unsigned long long modulus = (0xffffffffffffffffLL - 59 + 1);
363
364 if ( (maxqw-U->uqw) < Y->uqw ) carry = 1;
365 result->uqw = U->uqw + Y->uqw;
366 if ( !carry ) {
367 if ( result->uqw < modulus ) return;
368 result->uqw -= (modulus);
369 } else {
370 UAF_QW_ADD(result->uqw,a); /* missing case of uqw > max-a */
371 }
372 #else /* Compiler doesn't support long long */
373
374
375 /* Add the low longwords, checking for carry out */
376 if ( (MAXINT - U->l_low) < Y->l_low ) carry = 1;
377 result->l_low = U->l_low + Y->l_low;
378
379 /* Add the high longwords */
380 result->l_high = U->l_high + Y->l_high + carry;
381
382 /* Test for carry out of high bit in the quadword */
383 if ( (MAXINT - U->l_high) < (Y->l_high + carry) )
384 carry = 1;
385 else
386 carry = 0;
387
388 /* Check if we have to MOD P the result */
389 if (!carry && Y->l_high != MAXINT) return; /* Outta here? */
390 if ( Y->l_low > (MAXINT - a) )
391 carry = 1;
392 else
393 carry = 0;
394 result->l_low += a; /* U + Y MOD P */
395 result->l_high += carry;
396 #endif
397
398 return; /* Outta here! */
399 } /* PQADD_R0 */
400
401
402 /*********
403 EMULQ
404 *********/
405
EMULQ(uaf_lword a,uaf_lword b,quad * result)406 static void EMULQ (uaf_lword a, uaf_lword b, quad *result)
407
408 /*
409 * a, b : longwords that we want to multiply (quadword result)
410 * result : the quadword result
411 *
412 * This routine knows how to multiply two unsigned longwords, returning the
413 * unsigned quadword product.
414 *
415 * Originally I wrote this using a much faster routine based on a
416 * divide-and-conquer strategy put forth in Knuth's "The Art of Computer
417 * Programming, Vol. 2, Seminumerical Algorithms" but I could not make the routine
418 * reliable. There is some sort of fixup for sign compensation, much like for the
419 * VAX EMULQ instruction (where for each signed argument you must add the other
420 * argument to the high longword).
421 *
422 * If anyone can improve this routine, please send me source code.
423 *
424 * The original idea behind this algorithm was to build a 2n-by-n matrix for the
425 * n-bit arguments to fill, shifting over each row like you would do by hand. I
426 * found a simple way to account for the carries and then get the final result,
427 * but the routine was about 4 to 5 times slower than it is now. Then I realized
428 * that if I made the variables global, that would save a little time on
429 * allocating space for the vectors and matrices. Last, I removed the matrix, and
430 * added all the values to the 'temp' vector on the fly. This greatly increased
431 * the speed, but still nowhere near Knuth or calling LIB$EMULQ.
432 */
433
434 {
435 #ifndef NOLONGLONG
436 uaf_qword qw_a, qw_b;
437
438 qw_a = a; /* promote to long long */
439 qw_b = b;
440 result->uqw = qw_a * qw_b;
441 #else
442 char bin_a[32], bin_b[32];
443 char temp[64], i, j;
444 char retbuf[8];
445
446
447 /* Initialize */
448 for (i=0; i<=63; i++) temp[i] = 0;
449 for (i=0; i<=31; i++) bin_a[i] = bin_b[i] = 0;
450 for (i=0; i<=7; retbuf[i]=0, i++);
451
452
453 /* Store the binary representation of a & b */
454 for (i=0; i<=31; i++) {
455 bin_a[i] = a & 1;
456 bin_b[i] = b & 1;
457 a >>= 1;
458 b >>= 1;
459 }
460
461
462 /* Add in the shifted multiplicand */
463 for (i=0; i<=31; i++) {
464
465 /* For each 1 in bin_b, add in bin_a, starting in the ith position */
466 if (bin_b[i] == 1) {
467 for (j=i; j<=i+31; j++)
468 temp[j] += bin_a[j-i];
469 }
470 }
471
472
473 /* Carry into the next position and set the binary value */
474 for (j=0; j<=62; j++) {
475 temp[j+1] += temp[j] / 2;
476 temp[j] = temp[j] % 2;
477 }
478 temp[63] = temp[63] % 2;
479
480
481 /* Convert binary bytes back into 8 packed bytes. */
482 /* LEAST SIGNIFICANT BYTE FIRST!!! This is LITTLE ENDIAN format. */
483 for (i=0; i<=7; i++) {
484 for (j=0; j<=7; j++) retbuf[i] += temp[i*8 + j] * (1<<j);
485 }
486
487
488 /* Copy the 8 byte buffer into result */
489 memcpy ((char *)result, retbuf, 8);
490 #endif
491
492 return;
493
494 } /* EMULQ */
495
496
497
498 /************
499 PQMOD_R0
500 ************/
501 #ifdef NOLONGLONG
PQMOD_R0(quad * U)502 static void PQMOD_R0 (quad *U)
503 /*
504 * U : output buffer (quadword)
505 *
506 * This routine replaces the quadword U with U MOD P, where P is of the form
507 * P = 2^64 - a (RELATIVELY GLOBAL a = 59)
508 * = FFFFFFFF.FFFFFFFF - 3B + 1 (MAXINT = FFFFFFFF = 4,294,967,295)
509 * = FFFFFFFF.FFFFFFC5
510 *
511 * Method: Since P is very nearly the maximum integer you can specify in a
512 * quadword (ie. P = FFFFFFFFFFFFFFC5, there will be only 58 choices for
513 * U that are larger than P (ie. U MOD P > 0). So we check the high longword in
514 * the quadword and see if all its bits are set (-1). If not, then U can not
515 * possibly be larger than P, and U MOD P = U. If U is larger than MAXINT - 59,
516 * then U MOD P is the differential between (MAXINT - 59) and U, else
517 * U MOD P = U. If U equals P, then U MOD P = 0 = (P + 59).
518 */
519
520 {
521
522 /* Check if U is larger/equal to P. If not, then leave U alone. */
523
524 if (U->l_high == P.l_high && U->l_low >= P.l_low) {
525 U->l_low += a; /* This will have a carry out, and ... */
526 U->l_high = 0; /* the carry will make l_high go to 0 */
527 }
528
529 return;
530
531 } /* PQMOD_R0 */
532 #else /* Replace function with macro */
533 #define PQMOD_R0(x) if ( (x)->uqw < P.uqw ); else (x)->uqw += a;
534 #endif
535
536
PQEXP_pair(quad * U,int highbit,uaf_lword n0,uaf_lword n1,quad * result0,quad * result1)537 static void PQEXP_pair (quad *U, int highbit, uaf_lword n0,
538 uaf_lword n1, quad *result0, quad *result1 )
539 /*
540 * U : pointer to output buffer (quadword)
541 * highbit : Highest bit set in n0/n1 +1
542 * n0,n1 : unsigned longword (exponent for U)
543 *
544 * The routine returns U^n MOD P where P is of the form P = 2^64-a.
545 * U is a quadword, n is an unsigned longword, P is a RELATIVELY GLOBAL quad.
546 * We optimize operation by generating two results for a single U re-using
547 * the powers of 2 vector.
548 *
549 * The method comes from Knuth, "The Art of Computer Programing, Vol. 2", section
550 * 4.6.3, "Evaluation of Powers." This algorithm is for calculating U^n with
551 * fewer than (n-1) multiplies. The result is U^n MOD P only because the
552 * multiplication routine is MOD P. Knuth's example is from Pingala's Hindu
553 * algorithm in the Chandah-sutra.
554 */
555
556 {
557 quad b2[32], *b2ptr; /* sucessive squaring of base. */
558 int is_one; /* True if intermediate value== 1 */
559 /*
560 * Build series where b2[i] = U^(2^i) through repeated squaring.
561 */
562 b2[0] = *U;
563 for ( b2ptr = b2; highbit > 1; highbit-- ) {
564 PQMUL_ADD (b2ptr, b2ptr, 0, b2ptr+1); /* b2[i] = U ^ (2^i) */
565 b2ptr++;
566 }
567 /*
568 * Compute U^n0 by multiply the factors corresponding to set bits in the
569 * the exponent (U^(a+b) = U^a * U^b). Assume exponent non-zero.
570 */
571 is_one = 1; /* Skip initializing result, set flag instead.*/
572 for ( b2ptr = b2; n0 != 0; b2ptr++ ) {
573 if ( n0 % 2 ) { /* Test low bit */
574 if ( is_one ) { /* Use assign instead of multiply */
575 is_one = 0;
576 *result0 = *b2ptr;
577 } else PQMUL_ADD ( result0, b2ptr, 0, result0 );
578 }
579 n0 /= 2; /* Shift right to test next bit next round */
580 }
581 /*
582 * Compute U^n1, but only if supplied by caller.
583 */
584 if ( !result1 ) return;
585 is_one = 1;
586 for ( b2ptr = b2; n1 != 0; b2ptr++ ) {
587 if ( n1 % 2 ) {
588 if ( is_one ) {
589 is_one = 0;
590 *result1 = *b2ptr;
591 } else PQMUL_ADD ( result1, b2ptr, 0, result1 );
592 }
593 n1 /= 2;
594 }
595 return;
596
597 } /* PQEXP_pair */
598
599
600
601 /************
602 PQMUL_ADD
603 ************/
604
PQMUL_ADD(quad * U,quad * Y,quad * X,quad * result)605 static void PQMUL_ADD (quad *U, quad *Y, quad *X, quad *result)
606
607 /*
608 * U, Y, A : Input values, A optional.
609 * result : Output, result = (U*Y)+A
610 *
611 * Computes the product U*Y+X MOD P where P is of the form P = 2^64 - a.
612 * U, Y, X are quadwords less than P. The result is returned in result.
613 *
614 * The product may be formed as the sum of four longword multiplications
615 * which are scaled by powers of 2^32 by evaluating:
616 *
617 * 2^64*v*z + 2^32*(v*y + u*z) + u*y
618 * ^^^^^^^^ ^^^^^^^^^^^^^^^^ ^^^
619 * part1 part2 & part3 part4
620 *
621 * The result is computed such that division by the modulus P is avoided.
622 *
623 * u is the low longword of U; u = U.l_low
624 * v is the high longword of U; v = U.l_high
625 * y is the low longword of Y; y = Y.l_low
626 * z is the high longword of Y; z = Y.l_high
627 */
628
629 {
630 quad stack, part1, part2, part3;
631
632 EMULQ(U->l_high, Y->l_high, &stack); /* Multiply v*z */
633
634 PQMOD_R0(&stack); /* Get (v*z) MOD P */
635
636
637 /*** 1st term ***/
638 PQLSH_R0(&stack, &part1); /* Get 2^32*(v*z) MOD P */
639
640
641 EMULQ(U->l_high, Y->l_low, &stack); /* Multiply v*y */
642
643 PQMOD_R0(&stack); /* Get (v*y) MOD P */
644
645 EMULQ(U->l_low, Y->l_high, &part2); /* Multiply u*z */
646
647 PQMOD_R0(&part2); /* Get (u*z) MOD P */
648
649 PQADD_R0 (&stack, &part2, &part3); /* Get (v*y + u*z) */
650
651 PQADD_R0 (&part1, &part3, &stack); /* Get 2^32*(v*z) + (v*y + u*z) */
652
653
654 /*** 1st & 2nd terms ***/
655 PQLSH_R0(&stack, &part1); /* Get 2^64*(v*z) + 2^32*(v*y + u*z) */
656
657
658 EMULQ(U->l_low, Y->l_low, &stack); /* Multiply u*y */
659
660
661 /*** Last term ***/
662 PQMOD_R0(&stack);
663
664 if ( X ) PQADD_R0 ( &stack, X, &stack );
665
666 PQADD_R0(&part1, &stack, result); /* Whole thing */
667
668
669 return;
670
671 } /* PQMUL_R2 */
672
673
674
675 /************
676 PQLSH_R0
677 ************/
678
PQLSH_R0(quad * U,quad * result)679 static void PQLSH_R0 (quad *U, quad *result)
680
681 /*
682 * Computes the product 2^32*U MOD P where P is of the form P = 2^64 - a.
683 * U is a quadword less than P.
684 *
685 * This routine is used by PQMUL in the formation of quadword products in
686 * such a way as to avoid division by modulus the P.
687 * The product 2^64*v + 2^32*u is congruent a*v + 2^32*U MOD P.
688 *
689 * u is the low longword in U
690 * v is the high longword in U
691 */
692
693 {
694 quad stack;
695
696
697 /* Get a*v */
698 EMULQ(U->l_high, a, &stack);
699
700 /* Form Y = 2^32*u */
701 U->l_high = U->l_low;
702 U->l_low = 0;
703
704 /* Get U + Y MOD P */
705 PQADD_R0 (U, &stack, result);
706
707 return;
708
709 } /* PQLSH_R0 */
710
711
712 /*********
713 Purdy
714 *********/
715
Purdy(quad * U)716 static void Purdy (quad *U)
717
718 /*
719 * U : input/output buffer (quadword)
720 *
721 * This routine computes f(U) = p(U) MOD P. Where P is a prime of the form
722 * P = 2^64 - a. The function p is the following polynomial:
723 *
724 * X^n0 + X^n1*C1 + X^3*C2 + X^2*C3 + X*C4 + C5
725 * ^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^
726 * part1 part2
727 *
728 * Note: Part1 is calculated by its congruence (X^(n0-n1) + C1)*X^n1
729 * finding X^n1, X^(n0-n1), X^(n0-n1)+C1, then
730 * (X^(n0-n1) + C1)*X^n1 which equals X^n0 + X^n1*C1
731 *
732 * To minimize the number of multiplications, we evaluate
733 *
734 * f(U) = ((U^(n0-n1) + C1)*U^(n1-1)*U + (U*C2 + C3)*U + C4)*U + C5
735 * ^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^
736 * part1 part2
737 *
738 * The number of multiplies needed for an exponentiation is equal to
739 * the bit position of the most signficant bit + the number of other set
740 * bits in the binary expansion of the exponent. By treating U^n1 as
741 * U*U^(n1-1), we can replace n1-1 with 448*37449 to compute the
742 * value in (1+15+5+8+2)=31 multiplies rather than (23+18)=41 multiplies used
743 * for a direct U^n1. Note that because of the pqexp_pair optimization, the
744 * U^(n0-n1) calculation is done with just 3 multiplies.
745 *
746 */
747
748 {
749 quad *Cptr = (quad *)&C; /* Address of table (C) */
750 static const uaf_lword n0 = 16777213,/* These exponents are prime, but this is */
751 n1 = 16777153; /* not required by the algorithm */
752 static const uaf_lword na = 37449,
753 nb = 448; /* U^n1 = (U^na)^nb * U */
754 /* n0 = 2^24 - 3; n1 = 2^24 - 63; n0-n1 = 60*/
755 quad X, /* The variable X in the polynomial */
756 X_n1, X_n0Mn1,
757 X_na, X_na_nb, /* X raised to na, na*nb */
758 part1, /* Collect the polynomial ... */
759 part2; /* ... in two parts */
760
761
762 /* --------------------------------------------------------------------- */
763
764
765 /* Ensure U less than P by taking U MOD P */
766 /* Save copy of result: X = U MOD P */
767 X.l_low = U->l_low;
768 X.l_high = U->l_high;
769
770 if ( UAF_QW_GEQ(X.uqw, P.uqw) ) UAF_QW_ADD(X.uqw,a); /* Take X mod P */
771
772 PQEXP_pair ( &X, 16, (n0-n1), na, &X_n0Mn1, &X_na );
773 PQADD_R0 ( &X_n0Mn1, Cptr, &part1 ); /* X^(n1-n0) + C1 */
774
775 PQEXP_pair ( &X_na, 9, nb, 0, &X_na_nb, 0 ); /* X^na^nb */
776 PQMUL_ADD ( &X_na_nb, &X, 0, &X_n1 ); /* X^na^nb*X = X^n1 */
777
778 /* Part 1 complete except for multiply by X^n1 */
779
780 Cptr++; /* Point to C2 */
781
782 PQMUL_ADD (&X, Cptr, Cptr+1, &part2); /* part2= X*C2 + C3 */
783
784 Cptr += 2; /* C4 */
785 PQMUL_ADD (&X, &part2, Cptr, &part2); /* part2 = part2*X + C4 */
786 /* (X^2*C2 + X*C3 + C4) */
787
788 Cptr++; /* C5 */
789 PQMUL_ADD (&X, &part2, Cptr, &part2);/* part2=part2*X + C5 */
790 /* X^3*C2 + X^2*C3 + X*C4 + C5 */
791 /* Part 2 complete */
792
793
794 /* Do final multiply for part1 and add in part2 to get final result. */
795
796 PQMUL_ADD ( &part1, &X_n1, &part2, (quad *) U );
797
798 /* Outta here... */
799 return;
800
801 } /* Purdy */
802 #endif
803