1 /* This Source Code Form is subject to the terms of the Mozilla Public
2  * License, v. 2.0. If a copy of the MPL was not distributed with this
3  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4 
5 #ifdef FREEBL_NO_DEPEND
6 #include "stubs.h"
7 #endif
8 
9 #include <memory.h>
10 #include "blapi.h"
11 #include "sha_fast.h"
12 #include "prerror.h"
13 
14 #ifdef TRACING_SSL
15 #include "ssl.h"
16 #include "ssltrace.h"
17 #endif
18 
19 static void shaCompress(volatile SHA_HW_t *X, const PRUint32 *datain);
20 
21 #define W u.w
22 #define B u.b
23 
24 #define SHA_F1(X, Y, Z) ((((Y) ^ (Z)) & (X)) ^ (Z))
25 #define SHA_F2(X, Y, Z) ((X) ^ (Y) ^ (Z))
26 #define SHA_F3(X, Y, Z) (((X) & (Y)) | ((Z) & ((X) | (Y))))
27 #define SHA_F4(X, Y, Z) ((X) ^ (Y) ^ (Z))
28 
29 #define SHA_MIX(n, a, b, c) XW(n) = SHA_ROTL(XW(a) ^ XW(b) ^ XW(c) ^ XW(n), 1)
30 
31 /*
32  *  SHA: initialize context
33  */
34 void
SHA1_Begin(SHA1Context * ctx)35 SHA1_Begin(SHA1Context *ctx)
36 {
37     ctx->size = 0;
38     /*
39    *  Initialize H with constants from FIPS180-1.
40    */
41     ctx->H[0] = 0x67452301L;
42     ctx->H[1] = 0xefcdab89L;
43     ctx->H[2] = 0x98badcfeL;
44     ctx->H[3] = 0x10325476L;
45     ctx->H[4] = 0xc3d2e1f0L;
46 }
47 
48 /* Explanation of H array and index values:
49  * The context's H array is actually the concatenation of two arrays
50  * defined by SHA1, the H array of state variables (5 elements),
51  * and the W array of intermediate values, of which there are 16 elements.
52  * The W array starts at H[5], that is W[0] is H[5].
53  * Although these values are defined as 32-bit values, we use 64-bit
54  * variables to hold them because the AMD64 stores 64 bit values in
55  * memory MUCH faster than it stores any smaller values.
56  *
57  * Rather than passing the context structure to shaCompress, we pass
58  * this combined array of H and W values.  We do not pass the address
59  * of the first element of this array, but rather pass the address of an
60  * element in the middle of the array, element X.  Presently X[0] is H[11].
61  * So we pass the address of H[11] as the address of array X to shaCompress.
62  * Then shaCompress accesses the members of the array using positive AND
63  * negative indexes.
64  *
65  * Pictorially: (each element is 8 bytes)
66  * H | H0 H1 H2 H3 H4 W0 W1 W2 W3 W4 W5 W6 W7 W8 W9 Wa Wb Wc Wd We Wf |
67  * X |-11-10 -9 -8 -7 -6 -5 -4 -3 -2 -1 X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 |
68  *
69  * The byte offset from X[0] to any member of H and W is always
70  * representable in a signed 8-bit value, which will be encoded
71  * as a single byte offset in the X86-64 instruction set.
72  * If we didn't pass the address of H[11], and instead passed the
73  * address of H[0], the offsets to elements H[16] and above would be
74  * greater than 127, not representable in a signed 8-bit value, and the
75  * x86-64 instruction set would encode every such offset as a 32-bit
76  * signed number in each instruction that accessed element H[16] or
77  * higher.  This results in much bigger and slower code.
78  */
79 #if !defined(SHA_PUT_W_IN_STACK)
80 #define H2X 11 /* X[0] is H[11], and H[0] is X[-11] */
81 #define W2X 6  /* X[0] is W[6],  and W[0] is X[-6]  */
82 #else
83 #define H2X 0
84 #endif
85 
86 /*
87  *  SHA: Add data to context.
88  */
89 void
SHA1_Update(SHA1Context * ctx,const unsigned char * dataIn,unsigned int len)90 SHA1_Update(SHA1Context *ctx, const unsigned char *dataIn, unsigned int len)
91 {
92     register unsigned int lenB;
93     register unsigned int togo;
94 
95     if (!len)
96         return;
97 
98     /* accumulate the byte count. */
99     lenB = (unsigned int)(ctx->size) & 63U;
100 
101     ctx->size += len;
102 
103     /*
104    *  Read the data into W and process blocks as they get full
105    */
106     if (lenB > 0) {
107         togo = 64U - lenB;
108         if (len < togo)
109             togo = len;
110         memcpy(ctx->B + lenB, dataIn, togo);
111         len -= togo;
112         dataIn += togo;
113         lenB = (lenB + togo) & 63U;
114         if (!lenB) {
115             shaCompress(&ctx->H[H2X], ctx->W);
116         }
117     }
118 #if !defined(HAVE_UNALIGNED_ACCESS)
119     if ((ptrdiff_t)dataIn % sizeof(PRUint32)) {
120         while (len >= 64U) {
121             memcpy(ctx->B, dataIn, 64);
122             len -= 64U;
123             shaCompress(&ctx->H[H2X], ctx->W);
124             dataIn += 64U;
125         }
126     } else
127 #endif
128     {
129         while (len >= 64U) {
130             len -= 64U;
131             shaCompress(&ctx->H[H2X], (PRUint32 *)dataIn);
132             dataIn += 64U;
133         }
134     }
135     if (len) {
136         memcpy(ctx->B, dataIn, len);
137     }
138 }
139 
140 /*
141  *  SHA: Generate hash value from context
142  */
143 void NO_SANITIZE_ALIGNMENT
SHA1_End(SHA1Context * ctx,unsigned char * hashout,unsigned int * pDigestLen,unsigned int maxDigestLen)144 SHA1_End(SHA1Context *ctx, unsigned char *hashout,
145          unsigned int *pDigestLen, unsigned int maxDigestLen)
146 {
147     register PRUint64 size;
148     register PRUint32 lenB;
149 
150     static const unsigned char bulk_pad[64] = { 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0,
151                                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
152                                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
153 #define tmp lenB
154 
155     PORT_Assert(maxDigestLen >= SHA1_LENGTH);
156 
157     /*
158    *  Pad with a binary 1 (e.g. 0x80), then zeroes, then length in bits
159    */
160     size = ctx->size;
161 
162     lenB = (PRUint32)size & 63;
163     SHA1_Update(ctx, bulk_pad, (((55 + 64) - lenB) & 63) + 1);
164     PORT_Assert(((PRUint32)ctx->size & 63) == 56);
165     /* Convert size from bytes to bits. */
166     size <<= 3;
167     ctx->W[14] = SHA_HTONL((PRUint32)(size >> 32));
168     ctx->W[15] = SHA_HTONL((PRUint32)size);
169     shaCompress(&ctx->H[H2X], ctx->W);
170 
171     /*
172      *  Output hash
173      */
174     SHA_STORE_RESULT;
175     if (pDigestLen) {
176         *pDigestLen = SHA1_LENGTH;
177     }
178 #undef tmp
179 }
180 
181 void
SHA1_EndRaw(SHA1Context * ctx,unsigned char * hashout,unsigned int * pDigestLen,unsigned int maxDigestLen)182 SHA1_EndRaw(SHA1Context *ctx, unsigned char *hashout,
183             unsigned int *pDigestLen, unsigned int maxDigestLen)
184 {
185 #if defined(SHA_NEED_TMP_VARIABLE)
186     register PRUint32 tmp;
187 #endif
188     PORT_Assert(maxDigestLen >= SHA1_LENGTH);
189 
190     SHA_STORE_RESULT;
191     if (pDigestLen)
192         *pDigestLen = SHA1_LENGTH;
193 }
194 
195 #undef B
196 /*
197  *  SHA: Compression function, unrolled.
198  *
199  * Some operations in shaCompress are done as 5 groups of 16 operations.
200  * Others are done as 4 groups of 20 operations.
201  * The code below shows that structure.
202  *
203  * The functions that compute the new values of the 5 state variables
204  * A-E are done in 4 groups of 20 operations (or you may also think
205  * of them as being done in 16 groups of 5 operations).  They are
206  * done by the SHA_RNDx macros below, in the right column.
207  *
208  * The functions that set the 16 values of the W array are done in
209  * 5 groups of 16 operations.  The first group is done by the
210  * LOAD macros below, the latter 4 groups are done by SHA_MIX below,
211  * in the left column.
212  *
213  * gcc's optimizer observes that each member of the W array is assigned
214  * a value 5 times in this code.  It reduces the number of store
215  * operations done to the W array in the context (that is, in the X array)
216  * by creating a W array on the stack, and storing the W values there for
217  * the first 4 groups of operations on W, and storing the values in the
218  * context's W array only in the fifth group.  This is undesirable.
219  * It is MUCH bigger code than simply using the context's W array, because
220  * all the offsets to the W array in the stack are 32-bit signed offsets,
221  * and it is no faster than storing the values in the context's W array.
222  *
223  * The original code for sha_fast.c prevented this creation of a separate
224  * W array in the stack by creating a W array of 80 members, each of
225  * whose elements is assigned only once. It also separated the computations
226  * of the W array values and the computations of the values for the 5
227  * state variables into two separate passes, W's, then A-E's so that the
228  * second pass could be done all in registers (except for accessing the W
229  * array) on machines with fewer registers.  The method is suboptimal
230  * for machines with enough registers to do it all in one pass, and it
231  * necessitates using many instructions with 32-bit offsets.
232  *
233  * This code eliminates the separate W array on the stack by a completely
234  * different means: by declaring the X array volatile.  This prevents
235  * the optimizer from trying to reduce the use of the X array by the
236  * creation of a MORE expensive W array on the stack. The result is
237  * that all instructions use signed 8-bit offsets and not 32-bit offsets.
238  *
239  * The combination of this code and the -O3 optimizer flag on GCC 3.4.3
240  * results in code that is 3 times faster than the previous NSS sha_fast
241  * code on AMD64.
242  */
243 static void NO_SANITIZE_ALIGNMENT
shaCompress(volatile SHA_HW_t * X,const PRUint32 * inbuf)244 shaCompress(volatile SHA_HW_t *X, const PRUint32 *inbuf)
245 {
246     register SHA_HW_t A, B, C, D, E;
247 
248 #if defined(SHA_NEED_TMP_VARIABLE)
249     register PRUint32 tmp;
250 #endif
251 
252 #if !defined(SHA_PUT_W_IN_STACK)
253 #define XH(n) X[n - H2X]
254 #define XW(n) X[n - W2X]
255 #else
256     SHA_HW_t w_0, w_1, w_2, w_3, w_4, w_5, w_6, w_7,
257         w_8, w_9, w_10, w_11, w_12, w_13, w_14, w_15;
258 #define XW(n) w_##n
259 #define XH(n) X[n]
260 #endif
261 
262 #define K0 0x5a827999L
263 #define K1 0x6ed9eba1L
264 #define K2 0x8f1bbcdcL
265 #define K3 0xca62c1d6L
266 
267 #define SHA_RND1(a, b, c, d, e, n)                         \
268     a = SHA_ROTL(b, 5) + SHA_F1(c, d, e) + a + XW(n) + K0; \
269     c = SHA_ROTL(c, 30)
270 #define SHA_RND2(a, b, c, d, e, n)                         \
271     a = SHA_ROTL(b, 5) + SHA_F2(c, d, e) + a + XW(n) + K1; \
272     c = SHA_ROTL(c, 30)
273 #define SHA_RND3(a, b, c, d, e, n)                         \
274     a = SHA_ROTL(b, 5) + SHA_F3(c, d, e) + a + XW(n) + K2; \
275     c = SHA_ROTL(c, 30)
276 #define SHA_RND4(a, b, c, d, e, n)                         \
277     a = SHA_ROTL(b, 5) + SHA_F4(c, d, e) + a + XW(n) + K3; \
278     c = SHA_ROTL(c, 30)
279 
280 #define LOAD(n) XW(n) = SHA_HTONL(inbuf[n])
281 
282     A = XH(0);
283     B = XH(1);
284     C = XH(2);
285     D = XH(3);
286     E = XH(4);
287 
288     LOAD(0);
289     SHA_RND1(E, A, B, C, D, 0);
290     LOAD(1);
291     SHA_RND1(D, E, A, B, C, 1);
292     LOAD(2);
293     SHA_RND1(C, D, E, A, B, 2);
294     LOAD(3);
295     SHA_RND1(B, C, D, E, A, 3);
296     LOAD(4);
297     SHA_RND1(A, B, C, D, E, 4);
298     LOAD(5);
299     SHA_RND1(E, A, B, C, D, 5);
300     LOAD(6);
301     SHA_RND1(D, E, A, B, C, 6);
302     LOAD(7);
303     SHA_RND1(C, D, E, A, B, 7);
304     LOAD(8);
305     SHA_RND1(B, C, D, E, A, 8);
306     LOAD(9);
307     SHA_RND1(A, B, C, D, E, 9);
308     LOAD(10);
309     SHA_RND1(E, A, B, C, D, 10);
310     LOAD(11);
311     SHA_RND1(D, E, A, B, C, 11);
312     LOAD(12);
313     SHA_RND1(C, D, E, A, B, 12);
314     LOAD(13);
315     SHA_RND1(B, C, D, E, A, 13);
316     LOAD(14);
317     SHA_RND1(A, B, C, D, E, 14);
318     LOAD(15);
319     SHA_RND1(E, A, B, C, D, 15);
320 
321     SHA_MIX(0, 13, 8, 2);
322     SHA_RND1(D, E, A, B, C, 0);
323     SHA_MIX(1, 14, 9, 3);
324     SHA_RND1(C, D, E, A, B, 1);
325     SHA_MIX(2, 15, 10, 4);
326     SHA_RND1(B, C, D, E, A, 2);
327     SHA_MIX(3, 0, 11, 5);
328     SHA_RND1(A, B, C, D, E, 3);
329 
330     SHA_MIX(4, 1, 12, 6);
331     SHA_RND2(E, A, B, C, D, 4);
332     SHA_MIX(5, 2, 13, 7);
333     SHA_RND2(D, E, A, B, C, 5);
334     SHA_MIX(6, 3, 14, 8);
335     SHA_RND2(C, D, E, A, B, 6);
336     SHA_MIX(7, 4, 15, 9);
337     SHA_RND2(B, C, D, E, A, 7);
338     SHA_MIX(8, 5, 0, 10);
339     SHA_RND2(A, B, C, D, E, 8);
340     SHA_MIX(9, 6, 1, 11);
341     SHA_RND2(E, A, B, C, D, 9);
342     SHA_MIX(10, 7, 2, 12);
343     SHA_RND2(D, E, A, B, C, 10);
344     SHA_MIX(11, 8, 3, 13);
345     SHA_RND2(C, D, E, A, B, 11);
346     SHA_MIX(12, 9, 4, 14);
347     SHA_RND2(B, C, D, E, A, 12);
348     SHA_MIX(13, 10, 5, 15);
349     SHA_RND2(A, B, C, D, E, 13);
350     SHA_MIX(14, 11, 6, 0);
351     SHA_RND2(E, A, B, C, D, 14);
352     SHA_MIX(15, 12, 7, 1);
353     SHA_RND2(D, E, A, B, C, 15);
354 
355     SHA_MIX(0, 13, 8, 2);
356     SHA_RND2(C, D, E, A, B, 0);
357     SHA_MIX(1, 14, 9, 3);
358     SHA_RND2(B, C, D, E, A, 1);
359     SHA_MIX(2, 15, 10, 4);
360     SHA_RND2(A, B, C, D, E, 2);
361     SHA_MIX(3, 0, 11, 5);
362     SHA_RND2(E, A, B, C, D, 3);
363     SHA_MIX(4, 1, 12, 6);
364     SHA_RND2(D, E, A, B, C, 4);
365     SHA_MIX(5, 2, 13, 7);
366     SHA_RND2(C, D, E, A, B, 5);
367     SHA_MIX(6, 3, 14, 8);
368     SHA_RND2(B, C, D, E, A, 6);
369     SHA_MIX(7, 4, 15, 9);
370     SHA_RND2(A, B, C, D, E, 7);
371 
372     SHA_MIX(8, 5, 0, 10);
373     SHA_RND3(E, A, B, C, D, 8);
374     SHA_MIX(9, 6, 1, 11);
375     SHA_RND3(D, E, A, B, C, 9);
376     SHA_MIX(10, 7, 2, 12);
377     SHA_RND3(C, D, E, A, B, 10);
378     SHA_MIX(11, 8, 3, 13);
379     SHA_RND3(B, C, D, E, A, 11);
380     SHA_MIX(12, 9, 4, 14);
381     SHA_RND3(A, B, C, D, E, 12);
382     SHA_MIX(13, 10, 5, 15);
383     SHA_RND3(E, A, B, C, D, 13);
384     SHA_MIX(14, 11, 6, 0);
385     SHA_RND3(D, E, A, B, C, 14);
386     SHA_MIX(15, 12, 7, 1);
387     SHA_RND3(C, D, E, A, B, 15);
388 
389     SHA_MIX(0, 13, 8, 2);
390     SHA_RND3(B, C, D, E, A, 0);
391     SHA_MIX(1, 14, 9, 3);
392     SHA_RND3(A, B, C, D, E, 1);
393     SHA_MIX(2, 15, 10, 4);
394     SHA_RND3(E, A, B, C, D, 2);
395     SHA_MIX(3, 0, 11, 5);
396     SHA_RND3(D, E, A, B, C, 3);
397     SHA_MIX(4, 1, 12, 6);
398     SHA_RND3(C, D, E, A, B, 4);
399     SHA_MIX(5, 2, 13, 7);
400     SHA_RND3(B, C, D, E, A, 5);
401     SHA_MIX(6, 3, 14, 8);
402     SHA_RND3(A, B, C, D, E, 6);
403     SHA_MIX(7, 4, 15, 9);
404     SHA_RND3(E, A, B, C, D, 7);
405     SHA_MIX(8, 5, 0, 10);
406     SHA_RND3(D, E, A, B, C, 8);
407     SHA_MIX(9, 6, 1, 11);
408     SHA_RND3(C, D, E, A, B, 9);
409     SHA_MIX(10, 7, 2, 12);
410     SHA_RND3(B, C, D, E, A, 10);
411     SHA_MIX(11, 8, 3, 13);
412     SHA_RND3(A, B, C, D, E, 11);
413 
414     SHA_MIX(12, 9, 4, 14);
415     SHA_RND4(E, A, B, C, D, 12);
416     SHA_MIX(13, 10, 5, 15);
417     SHA_RND4(D, E, A, B, C, 13);
418     SHA_MIX(14, 11, 6, 0);
419     SHA_RND4(C, D, E, A, B, 14);
420     SHA_MIX(15, 12, 7, 1);
421     SHA_RND4(B, C, D, E, A, 15);
422 
423     SHA_MIX(0, 13, 8, 2);
424     SHA_RND4(A, B, C, D, E, 0);
425     SHA_MIX(1, 14, 9, 3);
426     SHA_RND4(E, A, B, C, D, 1);
427     SHA_MIX(2, 15, 10, 4);
428     SHA_RND4(D, E, A, B, C, 2);
429     SHA_MIX(3, 0, 11, 5);
430     SHA_RND4(C, D, E, A, B, 3);
431     SHA_MIX(4, 1, 12, 6);
432     SHA_RND4(B, C, D, E, A, 4);
433     SHA_MIX(5, 2, 13, 7);
434     SHA_RND4(A, B, C, D, E, 5);
435     SHA_MIX(6, 3, 14, 8);
436     SHA_RND4(E, A, B, C, D, 6);
437     SHA_MIX(7, 4, 15, 9);
438     SHA_RND4(D, E, A, B, C, 7);
439     SHA_MIX(8, 5, 0, 10);
440     SHA_RND4(C, D, E, A, B, 8);
441     SHA_MIX(9, 6, 1, 11);
442     SHA_RND4(B, C, D, E, A, 9);
443     SHA_MIX(10, 7, 2, 12);
444     SHA_RND4(A, B, C, D, E, 10);
445     SHA_MIX(11, 8, 3, 13);
446     SHA_RND4(E, A, B, C, D, 11);
447     SHA_MIX(12, 9, 4, 14);
448     SHA_RND4(D, E, A, B, C, 12);
449     SHA_MIX(13, 10, 5, 15);
450     SHA_RND4(C, D, E, A, B, 13);
451     SHA_MIX(14, 11, 6, 0);
452     SHA_RND4(B, C, D, E, A, 14);
453     SHA_MIX(15, 12, 7, 1);
454     SHA_RND4(A, B, C, D, E, 15);
455 
456     XH(0) += A;
457     XH(1) += B;
458     XH(2) += C;
459     XH(3) += D;
460     XH(4) += E;
461 }
462 
463 /*************************************************************************
464 ** Code below this line added to make SHA code support BLAPI interface
465 */
466 
467 SHA1Context *
SHA1_NewContext(void)468 SHA1_NewContext(void)
469 {
470     SHA1Context *cx;
471 
472     /* no need to ZNew, SHA1_Begin will init the context */
473     cx = PORT_New(SHA1Context);
474     return cx;
475 }
476 
477 /* Zero and free the context */
478 void
SHA1_DestroyContext(SHA1Context * cx,PRBool freeit)479 SHA1_DestroyContext(SHA1Context *cx, PRBool freeit)
480 {
481     memset(cx, 0, sizeof *cx);
482     if (freeit) {
483         PORT_Free(cx);
484     }
485 }
486 
487 SECStatus
SHA1_HashBuf(unsigned char * dest,const unsigned char * src,PRUint32 src_length)488 SHA1_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length)
489 {
490     SHA1Context ctx;
491     unsigned int outLen;
492 
493     SHA1_Begin(&ctx);
494     SHA1_Update(&ctx, src, src_length);
495     SHA1_End(&ctx, dest, &outLen, SHA1_LENGTH);
496     memset(&ctx, 0, sizeof ctx);
497     return SECSuccess;
498 }
499 
500 /* Hash a null-terminated character string. */
501 SECStatus
SHA1_Hash(unsigned char * dest,const char * src)502 SHA1_Hash(unsigned char *dest, const char *src)
503 {
504     return SHA1_HashBuf(dest, (const unsigned char *)src, PORT_Strlen(src));
505 }
506 
507 /*
508  * need to support save/restore state in pkcs11. Stores all the info necessary
509  * for a structure into just a stream of bytes.
510  */
511 unsigned int
SHA1_FlattenSize(SHA1Context * cx)512 SHA1_FlattenSize(SHA1Context *cx)
513 {
514     return sizeof(SHA1Context);
515 }
516 
517 SECStatus
SHA1_Flatten(SHA1Context * cx,unsigned char * space)518 SHA1_Flatten(SHA1Context *cx, unsigned char *space)
519 {
520     PORT_Memcpy(space, cx, sizeof(SHA1Context));
521     return SECSuccess;
522 }
523 
524 SHA1Context *
SHA1_Resurrect(unsigned char * space,void * arg)525 SHA1_Resurrect(unsigned char *space, void *arg)
526 {
527     SHA1Context *cx = SHA1_NewContext();
528     if (cx == NULL)
529         return NULL;
530 
531     PORT_Memcpy(cx, space, sizeof(SHA1Context));
532     return cx;
533 }
534 
535 void
SHA1_Clone(SHA1Context * dest,SHA1Context * src)536 SHA1_Clone(SHA1Context *dest, SHA1Context *src)
537 {
538     memcpy(dest, src, sizeof *dest);
539 }
540 
541 void
SHA1_TraceState(SHA1Context * ctx)542 SHA1_TraceState(SHA1Context *ctx)
543 {
544     PORT_SetError(PR_NOT_IMPLEMENTED_ERROR);
545 }
546