1 /* sha512.c - Functions to compute SHA512 and SHA384 message digest of files or
2    memory blocks according to the NIST specification FIPS-180-2.
3 
4    Copyright (C) 2005-2006, 2008-2014 Free Software Foundation, Inc.
5 
6    This program is free software: you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation, either version 3 of the License, or
9    (at your option) any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
18 
19 /* Written by David Madore, considerably copypasting from
20    Scott G. Miller's sha1.c
21 */
22 
23 #include <config.h>
24 
25 #if HAVE_OPENSSL_SHA512
26 # define GL_OPENSSL_INLINE _GL_EXTERN_INLINE
27 #endif
28 #include "sha512.h"
29 
30 #include <stdalign.h>
31 #include <stdint.h>
32 #include <stdlib.h>
33 #include <string.h>
34 
35 #if USE_UNLOCKED_IO
36 # include "unlocked-io.h"
37 #endif
38 
39 #ifdef WORDS_BIGENDIAN
40 # define SWAP(n) (n)
41 #else
42 # define SWAP(n) \
43     u64or (u64or (u64or (u64shl (n, 56),                                \
44                          u64shl (u64and (n, u64lo (0x0000ff00)), 40)),  \
45                   u64or (u64shl (u64and (n, u64lo (0x00ff0000)), 24),   \
46                          u64shl (u64and (n, u64lo (0xff000000)),  8))), \
47            u64or (u64or (u64and (u64shr (n,  8), u64lo (0xff000000)),   \
48                          u64and (u64shr (n, 24), u64lo (0x00ff0000))),  \
49                   u64or (u64and (u64shr (n, 40), u64lo (0x0000ff00)),   \
50                          u64shr (n, 56))))
51 #endif
52 
53 #define BLOCKSIZE 32768
54 #if BLOCKSIZE % 128 != 0
55 # error "invalid BLOCKSIZE"
56 #endif
57 
58 #if ! HAVE_OPENSSL_SHA512
59 /* This array contains the bytes used to pad the buffer to the next
60    128-byte boundary.  */
61 static const unsigned char fillbuf[128] = { 0x80, 0 /* , 0, 0, ...  */ };
62 
63 
64 /*
65   Takes a pointer to a 512 bit block of data (eight 64 bit ints) and
66   initializes it to the start constants of the SHA512 algorithm.  This
67   must be called before using hash in the call to sha512_hash
68 */
69 void
sha512_init_ctx(struct sha512_ctx * ctx)70 sha512_init_ctx (struct sha512_ctx *ctx)
71 {
72   ctx->state[0] = u64hilo (0x6a09e667, 0xf3bcc908);
73   ctx->state[1] = u64hilo (0xbb67ae85, 0x84caa73b);
74   ctx->state[2] = u64hilo (0x3c6ef372, 0xfe94f82b);
75   ctx->state[3] = u64hilo (0xa54ff53a, 0x5f1d36f1);
76   ctx->state[4] = u64hilo (0x510e527f, 0xade682d1);
77   ctx->state[5] = u64hilo (0x9b05688c, 0x2b3e6c1f);
78   ctx->state[6] = u64hilo (0x1f83d9ab, 0xfb41bd6b);
79   ctx->state[7] = u64hilo (0x5be0cd19, 0x137e2179);
80 
81   ctx->total[0] = ctx->total[1] = u64lo (0);
82   ctx->buflen = 0;
83 }
84 
85 void
sha384_init_ctx(struct sha512_ctx * ctx)86 sha384_init_ctx (struct sha512_ctx *ctx)
87 {
88   ctx->state[0] = u64hilo (0xcbbb9d5d, 0xc1059ed8);
89   ctx->state[1] = u64hilo (0x629a292a, 0x367cd507);
90   ctx->state[2] = u64hilo (0x9159015a, 0x3070dd17);
91   ctx->state[3] = u64hilo (0x152fecd8, 0xf70e5939);
92   ctx->state[4] = u64hilo (0x67332667, 0xffc00b31);
93   ctx->state[5] = u64hilo (0x8eb44a87, 0x68581511);
94   ctx->state[6] = u64hilo (0xdb0c2e0d, 0x64f98fa7);
95   ctx->state[7] = u64hilo (0x47b5481d, 0xbefa4fa4);
96 
97   ctx->total[0] = ctx->total[1] = u64lo (0);
98   ctx->buflen = 0;
99 }
100 
101 /* Copy the value from V into the memory location pointed to by *CP,
102    If your architecture allows unaligned access, this is equivalent to
103    * (__typeof__ (v) *) cp = v  */
104 static void
set_uint64(char * cp,u64 v)105 set_uint64 (char *cp, u64 v)
106 {
107   memcpy (cp, &v, sizeof v);
108 }
109 
110 /* Put result from CTX in first 64 bytes following RESBUF.
111    The result must be in little endian byte order.  */
112 void *
sha512_read_ctx(const struct sha512_ctx * ctx,void * resbuf)113 sha512_read_ctx (const struct sha512_ctx *ctx, void *resbuf)
114 {
115   int i;
116   char *r = resbuf;
117 
118   for (i = 0; i < 8; i++)
119     set_uint64 (r + i * sizeof ctx->state[0], SWAP (ctx->state[i]));
120 
121   return resbuf;
122 }
123 
124 void *
sha384_read_ctx(const struct sha512_ctx * ctx,void * resbuf)125 sha384_read_ctx (const struct sha512_ctx *ctx, void *resbuf)
126 {
127   int i;
128   char *r = resbuf;
129 
130   for (i = 0; i < 6; i++)
131     set_uint64 (r + i * sizeof ctx->state[0], SWAP (ctx->state[i]));
132 
133   return resbuf;
134 }
135 
136 /* Process the remaining bytes in the internal buffer and the usual
137    prolog according to the standard and write the result to RESBUF.  */
138 static void
sha512_conclude_ctx(struct sha512_ctx * ctx)139 sha512_conclude_ctx (struct sha512_ctx *ctx)
140 {
141   /* Take yet unprocessed bytes into account.  */
142   size_t bytes = ctx->buflen;
143   size_t size = (bytes < 112) ? 128 / 8 : 128 * 2 / 8;
144 
145   /* Now count remaining bytes.  */
146   ctx->total[0] = u64plus (ctx->total[0], u64lo (bytes));
147   if (u64lt (ctx->total[0], u64lo (bytes)))
148     ctx->total[1] = u64plus (ctx->total[1], u64lo (1));
149 
150   /* Put the 128-bit file length in *bits* at the end of the buffer.
151      Use set_uint64 rather than a simple assignment, to avoid risk of
152      unaligned access.  */
153   set_uint64 ((char *) &ctx->buffer[size - 2],
154               SWAP (u64or (u64shl (ctx->total[1], 3),
155                            u64shr (ctx->total[0], 61))));
156   set_uint64 ((char *) &ctx->buffer[size - 1],
157               SWAP (u64shl (ctx->total[0], 3)));
158 
159   memcpy (&((char *) ctx->buffer)[bytes], fillbuf, (size - 2) * 8 - bytes);
160 
161   /* Process last bytes.  */
162   sha512_process_block (ctx->buffer, size * 8, ctx);
163 }
164 
165 void *
sha512_finish_ctx(struct sha512_ctx * ctx,void * resbuf)166 sha512_finish_ctx (struct sha512_ctx *ctx, void *resbuf)
167 {
168   sha512_conclude_ctx (ctx);
169   return sha512_read_ctx (ctx, resbuf);
170 }
171 
172 void *
sha384_finish_ctx(struct sha512_ctx * ctx,void * resbuf)173 sha384_finish_ctx (struct sha512_ctx *ctx, void *resbuf)
174 {
175   sha512_conclude_ctx (ctx);
176   return sha384_read_ctx (ctx, resbuf);
177 }
178 #endif
179 
180 /* Compute SHA512 message digest for bytes read from STREAM.  The
181    resulting message digest number will be written into the 64 bytes
182    beginning at RESBLOCK.  */
183 int
sha512_stream(FILE * stream,void * resblock)184 sha512_stream (FILE *stream, void *resblock)
185 {
186   struct sha512_ctx ctx;
187   size_t sum;
188 
189   char *buffer = malloc (BLOCKSIZE + 72);
190   if (!buffer)
191     return 1;
192 
193   /* Initialize the computation context.  */
194   sha512_init_ctx (&ctx);
195 
196   /* Iterate over full file contents.  */
197   while (1)
198     {
199       /* We read the file in blocks of BLOCKSIZE bytes.  One call of the
200          computation function processes the whole buffer so that with the
201          next round of the loop another block can be read.  */
202       size_t n;
203       sum = 0;
204 
205       /* Read block.  Take care for partial reads.  */
206       while (1)
207         {
208           n = fread (buffer + sum, 1, BLOCKSIZE - sum, stream);
209 
210           sum += n;
211 
212           if (sum == BLOCKSIZE)
213             break;
214 
215           if (n == 0)
216             {
217               /* Check for the error flag IFF N == 0, so that we don't
218                  exit the loop after a partial read due to e.g., EAGAIN
219                  or EWOULDBLOCK.  */
220               if (ferror (stream))
221                 {
222                   free (buffer);
223                   return 1;
224                 }
225               goto process_partial_block;
226             }
227 
228           /* We've read at least one byte, so ignore errors.  But always
229              check for EOF, since feof may be true even though N > 0.
230              Otherwise, we could end up calling fread after EOF.  */
231           if (feof (stream))
232             goto process_partial_block;
233         }
234 
235       /* Process buffer with BLOCKSIZE bytes.  Note that
236                         BLOCKSIZE % 128 == 0
237        */
238       sha512_process_block (buffer, BLOCKSIZE, &ctx);
239     }
240 
241  process_partial_block:;
242 
243   /* Process any remaining bytes.  */
244   if (sum > 0)
245     sha512_process_bytes (buffer, sum, &ctx);
246 
247   /* Construct result in desired memory.  */
248   sha512_finish_ctx (&ctx, resblock);
249   free (buffer);
250   return 0;
251 }
252 
253 /* FIXME: Avoid code duplication */
254 int
sha384_stream(FILE * stream,void * resblock)255 sha384_stream (FILE *stream, void *resblock)
256 {
257   struct sha512_ctx ctx;
258   size_t sum;
259 
260   char *buffer = malloc (BLOCKSIZE + 72);
261   if (!buffer)
262     return 1;
263 
264   /* Initialize the computation context.  */
265   sha384_init_ctx (&ctx);
266 
267   /* Iterate over full file contents.  */
268   while (1)
269     {
270       /* We read the file in blocks of BLOCKSIZE bytes.  One call of the
271          computation function processes the whole buffer so that with the
272          next round of the loop another block can be read.  */
273       size_t n;
274       sum = 0;
275 
276       /* Read block.  Take care for partial reads.  */
277       while (1)
278         {
279           n = fread (buffer + sum, 1, BLOCKSIZE - sum, stream);
280 
281           sum += n;
282 
283           if (sum == BLOCKSIZE)
284             break;
285 
286           if (n == 0)
287             {
288               /* Check for the error flag IFF N == 0, so that we don't
289                  exit the loop after a partial read due to e.g., EAGAIN
290                  or EWOULDBLOCK.  */
291               if (ferror (stream))
292                 {
293                   free (buffer);
294                   return 1;
295                 }
296               goto process_partial_block;
297             }
298 
299           /* We've read at least one byte, so ignore errors.  But always
300              check for EOF, since feof may be true even though N > 0.
301              Otherwise, we could end up calling fread after EOF.  */
302           if (feof (stream))
303             goto process_partial_block;
304         }
305 
306       /* Process buffer with BLOCKSIZE bytes.  Note that
307                         BLOCKSIZE % 128 == 0
308        */
309       sha512_process_block (buffer, BLOCKSIZE, &ctx);
310     }
311 
312  process_partial_block:;
313 
314   /* Process any remaining bytes.  */
315   if (sum > 0)
316     sha512_process_bytes (buffer, sum, &ctx);
317 
318   /* Construct result in desired memory.  */
319   sha384_finish_ctx (&ctx, resblock);
320   free (buffer);
321   return 0;
322 }
323 
324 #if ! HAVE_OPENSSL_SHA512
325 /* Compute SHA512 message digest for LEN bytes beginning at BUFFER.  The
326    result is always in little endian byte order, so that a byte-wise
327    output yields to the wanted ASCII representation of the message
328    digest.  */
329 void *
sha512_buffer(const char * buffer,size_t len,void * resblock)330 sha512_buffer (const char *buffer, size_t len, void *resblock)
331 {
332   struct sha512_ctx ctx;
333 
334   /* Initialize the computation context.  */
335   sha512_init_ctx (&ctx);
336 
337   /* Process whole buffer but last len % 128 bytes.  */
338   sha512_process_bytes (buffer, len, &ctx);
339 
340   /* Put result in desired memory area.  */
341   return sha512_finish_ctx (&ctx, resblock);
342 }
343 
344 void *
sha384_buffer(const char * buffer,size_t len,void * resblock)345 sha384_buffer (const char *buffer, size_t len, void *resblock)
346 {
347   struct sha512_ctx ctx;
348 
349   /* Initialize the computation context.  */
350   sha384_init_ctx (&ctx);
351 
352   /* Process whole buffer but last len % 128 bytes.  */
353   sha512_process_bytes (buffer, len, &ctx);
354 
355   /* Put result in desired memory area.  */
356   return sha384_finish_ctx (&ctx, resblock);
357 }
358 
359 void
sha512_process_bytes(const void * buffer,size_t len,struct sha512_ctx * ctx)360 sha512_process_bytes (const void *buffer, size_t len, struct sha512_ctx *ctx)
361 {
362   /* When we already have some bits in our internal buffer concatenate
363      both inputs first.  */
364   if (ctx->buflen != 0)
365     {
366       size_t left_over = ctx->buflen;
367       size_t add = 256 - left_over > len ? len : 256 - left_over;
368 
369       memcpy (&((char *) ctx->buffer)[left_over], buffer, add);
370       ctx->buflen += add;
371 
372       if (ctx->buflen > 128)
373         {
374           sha512_process_block (ctx->buffer, ctx->buflen & ~127, ctx);
375 
376           ctx->buflen &= 127;
377           /* The regions in the following copy operation cannot overlap.  */
378           memcpy (ctx->buffer,
379                   &((char *) ctx->buffer)[(left_over + add) & ~127],
380                   ctx->buflen);
381         }
382 
383       buffer = (const char *) buffer + add;
384       len -= add;
385     }
386 
387   /* Process available complete blocks.  */
388   if (len >= 128)
389     {
390 #if !_STRING_ARCH_unaligned
391 # define UNALIGNED_P(p) ((uintptr_t) (p) % alignof (u64) != 0)
392       if (UNALIGNED_P (buffer))
393         while (len > 128)
394           {
395             sha512_process_block (memcpy (ctx->buffer, buffer, 128), 128, ctx);
396             buffer = (const char *) buffer + 128;
397             len -= 128;
398           }
399       else
400 #endif
401         {
402           sha512_process_block (buffer, len & ~127, ctx);
403           buffer = (const char *) buffer + (len & ~127);
404           len &= 127;
405         }
406     }
407 
408   /* Move remaining bytes in internal buffer.  */
409   if (len > 0)
410     {
411       size_t left_over = ctx->buflen;
412 
413       memcpy (&((char *) ctx->buffer)[left_over], buffer, len);
414       left_over += len;
415       if (left_over >= 128)
416         {
417           sha512_process_block (ctx->buffer, 128, ctx);
418           left_over -= 128;
419           memcpy (ctx->buffer, &ctx->buffer[16], left_over);
420         }
421       ctx->buflen = left_over;
422     }
423 }
424 
425 /* --- Code below is the primary difference between sha1.c and sha512.c --- */
426 
427 /* SHA512 round constants */
428 #define K(I) sha512_round_constants[I]
429 static u64 const sha512_round_constants[80] = {
430   u64init (0x428a2f98, 0xd728ae22), u64init (0x71374491, 0x23ef65cd),
431   u64init (0xb5c0fbcf, 0xec4d3b2f), u64init (0xe9b5dba5, 0x8189dbbc),
432   u64init (0x3956c25b, 0xf348b538), u64init (0x59f111f1, 0xb605d019),
433   u64init (0x923f82a4, 0xaf194f9b), u64init (0xab1c5ed5, 0xda6d8118),
434   u64init (0xd807aa98, 0xa3030242), u64init (0x12835b01, 0x45706fbe),
435   u64init (0x243185be, 0x4ee4b28c), u64init (0x550c7dc3, 0xd5ffb4e2),
436   u64init (0x72be5d74, 0xf27b896f), u64init (0x80deb1fe, 0x3b1696b1),
437   u64init (0x9bdc06a7, 0x25c71235), u64init (0xc19bf174, 0xcf692694),
438   u64init (0xe49b69c1, 0x9ef14ad2), u64init (0xefbe4786, 0x384f25e3),
439   u64init (0x0fc19dc6, 0x8b8cd5b5), u64init (0x240ca1cc, 0x77ac9c65),
440   u64init (0x2de92c6f, 0x592b0275), u64init (0x4a7484aa, 0x6ea6e483),
441   u64init (0x5cb0a9dc, 0xbd41fbd4), u64init (0x76f988da, 0x831153b5),
442   u64init (0x983e5152, 0xee66dfab), u64init (0xa831c66d, 0x2db43210),
443   u64init (0xb00327c8, 0x98fb213f), u64init (0xbf597fc7, 0xbeef0ee4),
444   u64init (0xc6e00bf3, 0x3da88fc2), u64init (0xd5a79147, 0x930aa725),
445   u64init (0x06ca6351, 0xe003826f), u64init (0x14292967, 0x0a0e6e70),
446   u64init (0x27b70a85, 0x46d22ffc), u64init (0x2e1b2138, 0x5c26c926),
447   u64init (0x4d2c6dfc, 0x5ac42aed), u64init (0x53380d13, 0x9d95b3df),
448   u64init (0x650a7354, 0x8baf63de), u64init (0x766a0abb, 0x3c77b2a8),
449   u64init (0x81c2c92e, 0x47edaee6), u64init (0x92722c85, 0x1482353b),
450   u64init (0xa2bfe8a1, 0x4cf10364), u64init (0xa81a664b, 0xbc423001),
451   u64init (0xc24b8b70, 0xd0f89791), u64init (0xc76c51a3, 0x0654be30),
452   u64init (0xd192e819, 0xd6ef5218), u64init (0xd6990624, 0x5565a910),
453   u64init (0xf40e3585, 0x5771202a), u64init (0x106aa070, 0x32bbd1b8),
454   u64init (0x19a4c116, 0xb8d2d0c8), u64init (0x1e376c08, 0x5141ab53),
455   u64init (0x2748774c, 0xdf8eeb99), u64init (0x34b0bcb5, 0xe19b48a8),
456   u64init (0x391c0cb3, 0xc5c95a63), u64init (0x4ed8aa4a, 0xe3418acb),
457   u64init (0x5b9cca4f, 0x7763e373), u64init (0x682e6ff3, 0xd6b2b8a3),
458   u64init (0x748f82ee, 0x5defb2fc), u64init (0x78a5636f, 0x43172f60),
459   u64init (0x84c87814, 0xa1f0ab72), u64init (0x8cc70208, 0x1a6439ec),
460   u64init (0x90befffa, 0x23631e28), u64init (0xa4506ceb, 0xde82bde9),
461   u64init (0xbef9a3f7, 0xb2c67915), u64init (0xc67178f2, 0xe372532b),
462   u64init (0xca273ece, 0xea26619c), u64init (0xd186b8c7, 0x21c0c207),
463   u64init (0xeada7dd6, 0xcde0eb1e), u64init (0xf57d4f7f, 0xee6ed178),
464   u64init (0x06f067aa, 0x72176fba), u64init (0x0a637dc5, 0xa2c898a6),
465   u64init (0x113f9804, 0xbef90dae), u64init (0x1b710b35, 0x131c471b),
466   u64init (0x28db77f5, 0x23047d84), u64init (0x32caab7b, 0x40c72493),
467   u64init (0x3c9ebe0a, 0x15c9bebc), u64init (0x431d67c4, 0x9c100d4c),
468   u64init (0x4cc5d4be, 0xcb3e42b6), u64init (0x597f299c, 0xfc657e2a),
469   u64init (0x5fcb6fab, 0x3ad6faec), u64init (0x6c44198c, 0x4a475817),
470 };
471 
472 /* Round functions.  */
473 #define F2(A, B, C) u64or (u64and (A, B), u64and (C, u64or (A, B)))
474 #define F1(E, F, G) u64xor (G, u64and (E, u64xor (F, G)))
475 
476 /* Process LEN bytes of BUFFER, accumulating context into CTX.
477    It is assumed that LEN % 128 == 0.
478    Most of this code comes from GnuPG's cipher/sha1.c.  */
479 
480 void
sha512_process_block(const void * buffer,size_t len,struct sha512_ctx * ctx)481 sha512_process_block (const void *buffer, size_t len, struct sha512_ctx *ctx)
482 {
483   u64 const *words = buffer;
484   u64 const *endp = words + len / sizeof (u64);
485   u64 x[16];
486   u64 a = ctx->state[0];
487   u64 b = ctx->state[1];
488   u64 c = ctx->state[2];
489   u64 d = ctx->state[3];
490   u64 e = ctx->state[4];
491   u64 f = ctx->state[5];
492   u64 g = ctx->state[6];
493   u64 h = ctx->state[7];
494   u64 lolen = u64size (len);
495 
496   /* First increment the byte count.  FIPS PUB 180-2 specifies the possible
497      length of the file up to 2^128 bits.  Here we only compute the
498      number of bytes.  Do a double word increment.  */
499   ctx->total[0] = u64plus (ctx->total[0], lolen);
500   ctx->total[1] = u64plus (ctx->total[1],
501                            u64plus (u64size (len >> 31 >> 31 >> 2),
502                                     u64lo (u64lt (ctx->total[0], lolen))));
503 
504 #define S0(x) u64xor (u64rol(x, 63), u64xor (u64rol (x, 56), u64shr (x, 7)))
505 #define S1(x) u64xor (u64rol (x, 45), u64xor (u64rol (x, 3), u64shr (x, 6)))
506 #define SS0(x) u64xor (u64rol (x, 36), u64xor (u64rol (x, 30), u64rol (x, 25)))
507 #define SS1(x) u64xor (u64rol(x, 50), u64xor (u64rol (x, 46), u64rol (x, 23)))
508 
509 #define M(I) (x[(I) & 15]                                                 \
510               = u64plus (x[(I) & 15],                                     \
511                          u64plus (S1 (x[((I) - 2) & 15]),                 \
512                                   u64plus (x[((I) - 7) & 15],             \
513                                            S0 (x[((I) - 15) & 15])))))
514 
515 #define R(A, B, C, D, E, F, G, H, K, M)                                   \
516   do                                                                      \
517     {                                                                     \
518       u64 t0 = u64plus (SS0 (A), F2 (A, B, C));                           \
519       u64 t1 =                                                            \
520         u64plus (H, u64plus (SS1 (E),                                     \
521                              u64plus (F1 (E, F, G), u64plus (K, M))));    \
522       D = u64plus (D, t1);                                                \
523       H = u64plus (t0, t1);                                               \
524     }                                                                     \
525   while (0)
526 
527   while (words < endp)
528     {
529       int t;
530       /* FIXME: see sha1.c for a better implementation.  */
531       for (t = 0; t < 16; t++)
532         {
533           x[t] = SWAP (*words);
534           words++;
535         }
536 
537       R( a, b, c, d, e, f, g, h, K( 0), x[ 0] );
538       R( h, a, b, c, d, e, f, g, K( 1), x[ 1] );
539       R( g, h, a, b, c, d, e, f, K( 2), x[ 2] );
540       R( f, g, h, a, b, c, d, e, K( 3), x[ 3] );
541       R( e, f, g, h, a, b, c, d, K( 4), x[ 4] );
542       R( d, e, f, g, h, a, b, c, K( 5), x[ 5] );
543       R( c, d, e, f, g, h, a, b, K( 6), x[ 6] );
544       R( b, c, d, e, f, g, h, a, K( 7), x[ 7] );
545       R( a, b, c, d, e, f, g, h, K( 8), x[ 8] );
546       R( h, a, b, c, d, e, f, g, K( 9), x[ 9] );
547       R( g, h, a, b, c, d, e, f, K(10), x[10] );
548       R( f, g, h, a, b, c, d, e, K(11), x[11] );
549       R( e, f, g, h, a, b, c, d, K(12), x[12] );
550       R( d, e, f, g, h, a, b, c, K(13), x[13] );
551       R( c, d, e, f, g, h, a, b, K(14), x[14] );
552       R( b, c, d, e, f, g, h, a, K(15), x[15] );
553       R( a, b, c, d, e, f, g, h, K(16), M(16) );
554       R( h, a, b, c, d, e, f, g, K(17), M(17) );
555       R( g, h, a, b, c, d, e, f, K(18), M(18) );
556       R( f, g, h, a, b, c, d, e, K(19), M(19) );
557       R( e, f, g, h, a, b, c, d, K(20), M(20) );
558       R( d, e, f, g, h, a, b, c, K(21), M(21) );
559       R( c, d, e, f, g, h, a, b, K(22), M(22) );
560       R( b, c, d, e, f, g, h, a, K(23), M(23) );
561       R( a, b, c, d, e, f, g, h, K(24), M(24) );
562       R( h, a, b, c, d, e, f, g, K(25), M(25) );
563       R( g, h, a, b, c, d, e, f, K(26), M(26) );
564       R( f, g, h, a, b, c, d, e, K(27), M(27) );
565       R( e, f, g, h, a, b, c, d, K(28), M(28) );
566       R( d, e, f, g, h, a, b, c, K(29), M(29) );
567       R( c, d, e, f, g, h, a, b, K(30), M(30) );
568       R( b, c, d, e, f, g, h, a, K(31), M(31) );
569       R( a, b, c, d, e, f, g, h, K(32), M(32) );
570       R( h, a, b, c, d, e, f, g, K(33), M(33) );
571       R( g, h, a, b, c, d, e, f, K(34), M(34) );
572       R( f, g, h, a, b, c, d, e, K(35), M(35) );
573       R( e, f, g, h, a, b, c, d, K(36), M(36) );
574       R( d, e, f, g, h, a, b, c, K(37), M(37) );
575       R( c, d, e, f, g, h, a, b, K(38), M(38) );
576       R( b, c, d, e, f, g, h, a, K(39), M(39) );
577       R( a, b, c, d, e, f, g, h, K(40), M(40) );
578       R( h, a, b, c, d, e, f, g, K(41), M(41) );
579       R( g, h, a, b, c, d, e, f, K(42), M(42) );
580       R( f, g, h, a, b, c, d, e, K(43), M(43) );
581       R( e, f, g, h, a, b, c, d, K(44), M(44) );
582       R( d, e, f, g, h, a, b, c, K(45), M(45) );
583       R( c, d, e, f, g, h, a, b, K(46), M(46) );
584       R( b, c, d, e, f, g, h, a, K(47), M(47) );
585       R( a, b, c, d, e, f, g, h, K(48), M(48) );
586       R( h, a, b, c, d, e, f, g, K(49), M(49) );
587       R( g, h, a, b, c, d, e, f, K(50), M(50) );
588       R( f, g, h, a, b, c, d, e, K(51), M(51) );
589       R( e, f, g, h, a, b, c, d, K(52), M(52) );
590       R( d, e, f, g, h, a, b, c, K(53), M(53) );
591       R( c, d, e, f, g, h, a, b, K(54), M(54) );
592       R( b, c, d, e, f, g, h, a, K(55), M(55) );
593       R( a, b, c, d, e, f, g, h, K(56), M(56) );
594       R( h, a, b, c, d, e, f, g, K(57), M(57) );
595       R( g, h, a, b, c, d, e, f, K(58), M(58) );
596       R( f, g, h, a, b, c, d, e, K(59), M(59) );
597       R( e, f, g, h, a, b, c, d, K(60), M(60) );
598       R( d, e, f, g, h, a, b, c, K(61), M(61) );
599       R( c, d, e, f, g, h, a, b, K(62), M(62) );
600       R( b, c, d, e, f, g, h, a, K(63), M(63) );
601       R( a, b, c, d, e, f, g, h, K(64), M(64) );
602       R( h, a, b, c, d, e, f, g, K(65), M(65) );
603       R( g, h, a, b, c, d, e, f, K(66), M(66) );
604       R( f, g, h, a, b, c, d, e, K(67), M(67) );
605       R( e, f, g, h, a, b, c, d, K(68), M(68) );
606       R( d, e, f, g, h, a, b, c, K(69), M(69) );
607       R( c, d, e, f, g, h, a, b, K(70), M(70) );
608       R( b, c, d, e, f, g, h, a, K(71), M(71) );
609       R( a, b, c, d, e, f, g, h, K(72), M(72) );
610       R( h, a, b, c, d, e, f, g, K(73), M(73) );
611       R( g, h, a, b, c, d, e, f, K(74), M(74) );
612       R( f, g, h, a, b, c, d, e, K(75), M(75) );
613       R( e, f, g, h, a, b, c, d, K(76), M(76) );
614       R( d, e, f, g, h, a, b, c, K(77), M(77) );
615       R( c, d, e, f, g, h, a, b, K(78), M(78) );
616       R( b, c, d, e, f, g, h, a, K(79), M(79) );
617 
618       a = ctx->state[0] = u64plus (ctx->state[0], a);
619       b = ctx->state[1] = u64plus (ctx->state[1], b);
620       c = ctx->state[2] = u64plus (ctx->state[2], c);
621       d = ctx->state[3] = u64plus (ctx->state[3], d);
622       e = ctx->state[4] = u64plus (ctx->state[4], e);
623       f = ctx->state[5] = u64plus (ctx->state[5], f);
624       g = ctx->state[6] = u64plus (ctx->state[6], g);
625       h = ctx->state[7] = u64plus (ctx->state[7], h);
626     }
627 }
628 #endif
629