1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5 #ifdef FREEBL_NO_DEPEND
6 #include "stubs.h"
7 #endif
8
9 #include <memory.h>
10 #include "blapi.h"
11 #include "sha_fast.h"
12 #include "prerror.h"
13
14 #ifdef TRACING_SSL
15 #include "ssl.h"
16 #include "ssltrace.h"
17 #endif
18
19 static void shaCompress(volatile SHA_HW_t *X, const PRUint32 *datain);
20
21 #define W u.w
22 #define B u.b
23
24 #define SHA_F1(X, Y, Z) ((((Y) ^ (Z)) & (X)) ^ (Z))
25 #define SHA_F2(X, Y, Z) ((X) ^ (Y) ^ (Z))
26 #define SHA_F3(X, Y, Z) (((X) & (Y)) | ((Z) & ((X) | (Y))))
27 #define SHA_F4(X, Y, Z) ((X) ^ (Y) ^ (Z))
28
29 #define SHA_MIX(n, a, b, c) XW(n) = SHA_ROTL(XW(a) ^ XW(b) ^ XW(c) ^ XW(n), 1)
30
31 /*
32 * SHA: initialize context
33 */
34 void
SHA1_Begin(SHA1Context * ctx)35 SHA1_Begin(SHA1Context *ctx)
36 {
37 ctx->size = 0;
38 /*
39 * Initialize H with constants from FIPS180-1.
40 */
41 ctx->H[0] = 0x67452301L;
42 ctx->H[1] = 0xefcdab89L;
43 ctx->H[2] = 0x98badcfeL;
44 ctx->H[3] = 0x10325476L;
45 ctx->H[4] = 0xc3d2e1f0L;
46 }
47
48 /* Explanation of H array and index values:
49 * The context's H array is actually the concatenation of two arrays
50 * defined by SHA1, the H array of state variables (5 elements),
51 * and the W array of intermediate values, of which there are 16 elements.
52 * The W array starts at H[5], that is W[0] is H[5].
53 * Although these values are defined as 32-bit values, we use 64-bit
54 * variables to hold them because the AMD64 stores 64 bit values in
55 * memory MUCH faster than it stores any smaller values.
56 *
57 * Rather than passing the context structure to shaCompress, we pass
58 * this combined array of H and W values. We do not pass the address
59 * of the first element of this array, but rather pass the address of an
60 * element in the middle of the array, element X. Presently X[0] is H[11].
61 * So we pass the address of H[11] as the address of array X to shaCompress.
62 * Then shaCompress accesses the members of the array using positive AND
63 * negative indexes.
64 *
65 * Pictorially: (each element is 8 bytes)
66 * H | H0 H1 H2 H3 H4 W0 W1 W2 W3 W4 W5 W6 W7 W8 W9 Wa Wb Wc Wd We Wf |
67 * X |-11-10 -9 -8 -7 -6 -5 -4 -3 -2 -1 X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 |
68 *
69 * The byte offset from X[0] to any member of H and W is always
70 * representable in a signed 8-bit value, which will be encoded
71 * as a single byte offset in the X86-64 instruction set.
72 * If we didn't pass the address of H[11], and instead passed the
73 * address of H[0], the offsets to elements H[16] and above would be
74 * greater than 127, not representable in a signed 8-bit value, and the
75 * x86-64 instruction set would encode every such offset as a 32-bit
76 * signed number in each instruction that accessed element H[16] or
77 * higher. This results in much bigger and slower code.
78 */
79 #if !defined(SHA_PUT_W_IN_STACK)
80 #define H2X 11 /* X[0] is H[11], and H[0] is X[-11] */
81 #define W2X 6 /* X[0] is W[6], and W[0] is X[-6] */
82 #else
83 #define H2X 0
84 #endif
85
86 /*
87 * SHA: Add data to context.
88 */
89 void
SHA1_Update(SHA1Context * ctx,const unsigned char * dataIn,unsigned int len)90 SHA1_Update(SHA1Context *ctx, const unsigned char *dataIn, unsigned int len)
91 {
92 register unsigned int lenB;
93 register unsigned int togo;
94
95 if (!len)
96 return;
97
98 /* accumulate the byte count. */
99 lenB = (unsigned int)(ctx->size) & 63U;
100
101 ctx->size += len;
102
103 /*
104 * Read the data into W and process blocks as they get full
105 */
106 if (lenB > 0) {
107 togo = 64U - lenB;
108 if (len < togo)
109 togo = len;
110 memcpy(ctx->B + lenB, dataIn, togo);
111 len -= togo;
112 dataIn += togo;
113 lenB = (lenB + togo) & 63U;
114 if (!lenB) {
115 shaCompress(&ctx->H[H2X], ctx->W);
116 }
117 }
118 #if !defined(HAVE_UNALIGNED_ACCESS)
119 if ((ptrdiff_t)dataIn % sizeof(PRUint32)) {
120 while (len >= 64U) {
121 memcpy(ctx->B, dataIn, 64);
122 len -= 64U;
123 shaCompress(&ctx->H[H2X], ctx->W);
124 dataIn += 64U;
125 }
126 } else
127 #endif
128 {
129 while (len >= 64U) {
130 len -= 64U;
131 shaCompress(&ctx->H[H2X], (PRUint32 *)dataIn);
132 dataIn += 64U;
133 }
134 }
135 if (len) {
136 memcpy(ctx->B, dataIn, len);
137 }
138 }
139
140 /*
141 * SHA: Generate hash value from context
142 */
143 void NO_SANITIZE_ALIGNMENT
SHA1_End(SHA1Context * ctx,unsigned char * hashout,unsigned int * pDigestLen,unsigned int maxDigestLen)144 SHA1_End(SHA1Context *ctx, unsigned char *hashout,
145 unsigned int *pDigestLen, unsigned int maxDigestLen)
146 {
147 register PRUint64 size;
148 register PRUint32 lenB;
149
150 static const unsigned char bulk_pad[64] = { 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0,
151 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
152 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
153 #define tmp lenB
154
155 PORT_Assert(maxDigestLen >= SHA1_LENGTH);
156
157 /*
158 * Pad with a binary 1 (e.g. 0x80), then zeroes, then length in bits
159 */
160 size = ctx->size;
161
162 lenB = (PRUint32)size & 63;
163 SHA1_Update(ctx, bulk_pad, (((55 + 64) - lenB) & 63) + 1);
164 PORT_Assert(((PRUint32)ctx->size & 63) == 56);
165 /* Convert size from bytes to bits. */
166 size <<= 3;
167 ctx->W[14] = SHA_HTONL((PRUint32)(size >> 32));
168 ctx->W[15] = SHA_HTONL((PRUint32)size);
169 shaCompress(&ctx->H[H2X], ctx->W);
170
171 /*
172 * Output hash
173 */
174 SHA_STORE_RESULT;
175 if (pDigestLen) {
176 *pDigestLen = SHA1_LENGTH;
177 }
178 #undef tmp
179 }
180
181 void
SHA1_EndRaw(SHA1Context * ctx,unsigned char * hashout,unsigned int * pDigestLen,unsigned int maxDigestLen)182 SHA1_EndRaw(SHA1Context *ctx, unsigned char *hashout,
183 unsigned int *pDigestLen, unsigned int maxDigestLen)
184 {
185 #if defined(SHA_NEED_TMP_VARIABLE)
186 register PRUint32 tmp;
187 #endif
188 PORT_Assert(maxDigestLen >= SHA1_LENGTH);
189
190 SHA_STORE_RESULT;
191 if (pDigestLen)
192 *pDigestLen = SHA1_LENGTH;
193 }
194
195 #undef B
196 /*
197 * SHA: Compression function, unrolled.
198 *
199 * Some operations in shaCompress are done as 5 groups of 16 operations.
200 * Others are done as 4 groups of 20 operations.
201 * The code below shows that structure.
202 *
203 * The functions that compute the new values of the 5 state variables
204 * A-E are done in 4 groups of 20 operations (or you may also think
205 * of them as being done in 16 groups of 5 operations). They are
206 * done by the SHA_RNDx macros below, in the right column.
207 *
208 * The functions that set the 16 values of the W array are done in
209 * 5 groups of 16 operations. The first group is done by the
210 * LOAD macros below, the latter 4 groups are done by SHA_MIX below,
211 * in the left column.
212 *
213 * gcc's optimizer observes that each member of the W array is assigned
214 * a value 5 times in this code. It reduces the number of store
215 * operations done to the W array in the context (that is, in the X array)
216 * by creating a W array on the stack, and storing the W values there for
217 * the first 4 groups of operations on W, and storing the values in the
218 * context's W array only in the fifth group. This is undesirable.
219 * It is MUCH bigger code than simply using the context's W array, because
220 * all the offsets to the W array in the stack are 32-bit signed offsets,
221 * and it is no faster than storing the values in the context's W array.
222 *
223 * The original code for sha_fast.c prevented this creation of a separate
224 * W array in the stack by creating a W array of 80 members, each of
225 * whose elements is assigned only once. It also separated the computations
226 * of the W array values and the computations of the values for the 5
227 * state variables into two separate passes, W's, then A-E's so that the
228 * second pass could be done all in registers (except for accessing the W
229 * array) on machines with fewer registers. The method is suboptimal
230 * for machines with enough registers to do it all in one pass, and it
231 * necessitates using many instructions with 32-bit offsets.
232 *
233 * This code eliminates the separate W array on the stack by a completely
234 * different means: by declaring the X array volatile. This prevents
235 * the optimizer from trying to reduce the use of the X array by the
236 * creation of a MORE expensive W array on the stack. The result is
237 * that all instructions use signed 8-bit offsets and not 32-bit offsets.
238 *
239 * The combination of this code and the -O3 optimizer flag on GCC 3.4.3
240 * results in code that is 3 times faster than the previous NSS sha_fast
241 * code on AMD64.
242 */
243 static void NO_SANITIZE_ALIGNMENT
shaCompress(volatile SHA_HW_t * X,const PRUint32 * inbuf)244 shaCompress(volatile SHA_HW_t *X, const PRUint32 *inbuf)
245 {
246 register SHA_HW_t A, B, C, D, E;
247
248 #if defined(SHA_NEED_TMP_VARIABLE)
249 register PRUint32 tmp;
250 #endif
251
252 #if !defined(SHA_PUT_W_IN_STACK)
253 #define XH(n) X[n - H2X]
254 #define XW(n) X[n - W2X]
255 #else
256 SHA_HW_t w_0, w_1, w_2, w_3, w_4, w_5, w_6, w_7,
257 w_8, w_9, w_10, w_11, w_12, w_13, w_14, w_15;
258 #define XW(n) w_##n
259 #define XH(n) X[n]
260 #endif
261
262 #define K0 0x5a827999L
263 #define K1 0x6ed9eba1L
264 #define K2 0x8f1bbcdcL
265 #define K3 0xca62c1d6L
266
267 #define SHA_RND1(a, b, c, d, e, n) \
268 a = SHA_ROTL(b, 5) + SHA_F1(c, d, e) + a + XW(n) + K0; \
269 c = SHA_ROTL(c, 30)
270 #define SHA_RND2(a, b, c, d, e, n) \
271 a = SHA_ROTL(b, 5) + SHA_F2(c, d, e) + a + XW(n) + K1; \
272 c = SHA_ROTL(c, 30)
273 #define SHA_RND3(a, b, c, d, e, n) \
274 a = SHA_ROTL(b, 5) + SHA_F3(c, d, e) + a + XW(n) + K2; \
275 c = SHA_ROTL(c, 30)
276 #define SHA_RND4(a, b, c, d, e, n) \
277 a = SHA_ROTL(b, 5) + SHA_F4(c, d, e) + a + XW(n) + K3; \
278 c = SHA_ROTL(c, 30)
279
280 #define LOAD(n) XW(n) = SHA_HTONL(inbuf[n])
281
282 A = XH(0);
283 B = XH(1);
284 C = XH(2);
285 D = XH(3);
286 E = XH(4);
287
288 LOAD(0);
289 SHA_RND1(E, A, B, C, D, 0);
290 LOAD(1);
291 SHA_RND1(D, E, A, B, C, 1);
292 LOAD(2);
293 SHA_RND1(C, D, E, A, B, 2);
294 LOAD(3);
295 SHA_RND1(B, C, D, E, A, 3);
296 LOAD(4);
297 SHA_RND1(A, B, C, D, E, 4);
298 LOAD(5);
299 SHA_RND1(E, A, B, C, D, 5);
300 LOAD(6);
301 SHA_RND1(D, E, A, B, C, 6);
302 LOAD(7);
303 SHA_RND1(C, D, E, A, B, 7);
304 LOAD(8);
305 SHA_RND1(B, C, D, E, A, 8);
306 LOAD(9);
307 SHA_RND1(A, B, C, D, E, 9);
308 LOAD(10);
309 SHA_RND1(E, A, B, C, D, 10);
310 LOAD(11);
311 SHA_RND1(D, E, A, B, C, 11);
312 LOAD(12);
313 SHA_RND1(C, D, E, A, B, 12);
314 LOAD(13);
315 SHA_RND1(B, C, D, E, A, 13);
316 LOAD(14);
317 SHA_RND1(A, B, C, D, E, 14);
318 LOAD(15);
319 SHA_RND1(E, A, B, C, D, 15);
320
321 SHA_MIX(0, 13, 8, 2);
322 SHA_RND1(D, E, A, B, C, 0);
323 SHA_MIX(1, 14, 9, 3);
324 SHA_RND1(C, D, E, A, B, 1);
325 SHA_MIX(2, 15, 10, 4);
326 SHA_RND1(B, C, D, E, A, 2);
327 SHA_MIX(3, 0, 11, 5);
328 SHA_RND1(A, B, C, D, E, 3);
329
330 SHA_MIX(4, 1, 12, 6);
331 SHA_RND2(E, A, B, C, D, 4);
332 SHA_MIX(5, 2, 13, 7);
333 SHA_RND2(D, E, A, B, C, 5);
334 SHA_MIX(6, 3, 14, 8);
335 SHA_RND2(C, D, E, A, B, 6);
336 SHA_MIX(7, 4, 15, 9);
337 SHA_RND2(B, C, D, E, A, 7);
338 SHA_MIX(8, 5, 0, 10);
339 SHA_RND2(A, B, C, D, E, 8);
340 SHA_MIX(9, 6, 1, 11);
341 SHA_RND2(E, A, B, C, D, 9);
342 SHA_MIX(10, 7, 2, 12);
343 SHA_RND2(D, E, A, B, C, 10);
344 SHA_MIX(11, 8, 3, 13);
345 SHA_RND2(C, D, E, A, B, 11);
346 SHA_MIX(12, 9, 4, 14);
347 SHA_RND2(B, C, D, E, A, 12);
348 SHA_MIX(13, 10, 5, 15);
349 SHA_RND2(A, B, C, D, E, 13);
350 SHA_MIX(14, 11, 6, 0);
351 SHA_RND2(E, A, B, C, D, 14);
352 SHA_MIX(15, 12, 7, 1);
353 SHA_RND2(D, E, A, B, C, 15);
354
355 SHA_MIX(0, 13, 8, 2);
356 SHA_RND2(C, D, E, A, B, 0);
357 SHA_MIX(1, 14, 9, 3);
358 SHA_RND2(B, C, D, E, A, 1);
359 SHA_MIX(2, 15, 10, 4);
360 SHA_RND2(A, B, C, D, E, 2);
361 SHA_MIX(3, 0, 11, 5);
362 SHA_RND2(E, A, B, C, D, 3);
363 SHA_MIX(4, 1, 12, 6);
364 SHA_RND2(D, E, A, B, C, 4);
365 SHA_MIX(5, 2, 13, 7);
366 SHA_RND2(C, D, E, A, B, 5);
367 SHA_MIX(6, 3, 14, 8);
368 SHA_RND2(B, C, D, E, A, 6);
369 SHA_MIX(7, 4, 15, 9);
370 SHA_RND2(A, B, C, D, E, 7);
371
372 SHA_MIX(8, 5, 0, 10);
373 SHA_RND3(E, A, B, C, D, 8);
374 SHA_MIX(9, 6, 1, 11);
375 SHA_RND3(D, E, A, B, C, 9);
376 SHA_MIX(10, 7, 2, 12);
377 SHA_RND3(C, D, E, A, B, 10);
378 SHA_MIX(11, 8, 3, 13);
379 SHA_RND3(B, C, D, E, A, 11);
380 SHA_MIX(12, 9, 4, 14);
381 SHA_RND3(A, B, C, D, E, 12);
382 SHA_MIX(13, 10, 5, 15);
383 SHA_RND3(E, A, B, C, D, 13);
384 SHA_MIX(14, 11, 6, 0);
385 SHA_RND3(D, E, A, B, C, 14);
386 SHA_MIX(15, 12, 7, 1);
387 SHA_RND3(C, D, E, A, B, 15);
388
389 SHA_MIX(0, 13, 8, 2);
390 SHA_RND3(B, C, D, E, A, 0);
391 SHA_MIX(1, 14, 9, 3);
392 SHA_RND3(A, B, C, D, E, 1);
393 SHA_MIX(2, 15, 10, 4);
394 SHA_RND3(E, A, B, C, D, 2);
395 SHA_MIX(3, 0, 11, 5);
396 SHA_RND3(D, E, A, B, C, 3);
397 SHA_MIX(4, 1, 12, 6);
398 SHA_RND3(C, D, E, A, B, 4);
399 SHA_MIX(5, 2, 13, 7);
400 SHA_RND3(B, C, D, E, A, 5);
401 SHA_MIX(6, 3, 14, 8);
402 SHA_RND3(A, B, C, D, E, 6);
403 SHA_MIX(7, 4, 15, 9);
404 SHA_RND3(E, A, B, C, D, 7);
405 SHA_MIX(8, 5, 0, 10);
406 SHA_RND3(D, E, A, B, C, 8);
407 SHA_MIX(9, 6, 1, 11);
408 SHA_RND3(C, D, E, A, B, 9);
409 SHA_MIX(10, 7, 2, 12);
410 SHA_RND3(B, C, D, E, A, 10);
411 SHA_MIX(11, 8, 3, 13);
412 SHA_RND3(A, B, C, D, E, 11);
413
414 SHA_MIX(12, 9, 4, 14);
415 SHA_RND4(E, A, B, C, D, 12);
416 SHA_MIX(13, 10, 5, 15);
417 SHA_RND4(D, E, A, B, C, 13);
418 SHA_MIX(14, 11, 6, 0);
419 SHA_RND4(C, D, E, A, B, 14);
420 SHA_MIX(15, 12, 7, 1);
421 SHA_RND4(B, C, D, E, A, 15);
422
423 SHA_MIX(0, 13, 8, 2);
424 SHA_RND4(A, B, C, D, E, 0);
425 SHA_MIX(1, 14, 9, 3);
426 SHA_RND4(E, A, B, C, D, 1);
427 SHA_MIX(2, 15, 10, 4);
428 SHA_RND4(D, E, A, B, C, 2);
429 SHA_MIX(3, 0, 11, 5);
430 SHA_RND4(C, D, E, A, B, 3);
431 SHA_MIX(4, 1, 12, 6);
432 SHA_RND4(B, C, D, E, A, 4);
433 SHA_MIX(5, 2, 13, 7);
434 SHA_RND4(A, B, C, D, E, 5);
435 SHA_MIX(6, 3, 14, 8);
436 SHA_RND4(E, A, B, C, D, 6);
437 SHA_MIX(7, 4, 15, 9);
438 SHA_RND4(D, E, A, B, C, 7);
439 SHA_MIX(8, 5, 0, 10);
440 SHA_RND4(C, D, E, A, B, 8);
441 SHA_MIX(9, 6, 1, 11);
442 SHA_RND4(B, C, D, E, A, 9);
443 SHA_MIX(10, 7, 2, 12);
444 SHA_RND4(A, B, C, D, E, 10);
445 SHA_MIX(11, 8, 3, 13);
446 SHA_RND4(E, A, B, C, D, 11);
447 SHA_MIX(12, 9, 4, 14);
448 SHA_RND4(D, E, A, B, C, 12);
449 SHA_MIX(13, 10, 5, 15);
450 SHA_RND4(C, D, E, A, B, 13);
451 SHA_MIX(14, 11, 6, 0);
452 SHA_RND4(B, C, D, E, A, 14);
453 SHA_MIX(15, 12, 7, 1);
454 SHA_RND4(A, B, C, D, E, 15);
455
456 XH(0) += A;
457 XH(1) += B;
458 XH(2) += C;
459 XH(3) += D;
460 XH(4) += E;
461 }
462
463 /*************************************************************************
464 ** Code below this line added to make SHA code support BLAPI interface
465 */
466
467 SHA1Context *
SHA1_NewContext(void)468 SHA1_NewContext(void)
469 {
470 SHA1Context *cx;
471
472 /* no need to ZNew, SHA1_Begin will init the context */
473 cx = PORT_New(SHA1Context);
474 return cx;
475 }
476
477 /* Zero and free the context */
478 void
SHA1_DestroyContext(SHA1Context * cx,PRBool freeit)479 SHA1_DestroyContext(SHA1Context *cx, PRBool freeit)
480 {
481 memset(cx, 0, sizeof *cx);
482 if (freeit) {
483 PORT_Free(cx);
484 }
485 }
486
487 SECStatus
SHA1_HashBuf(unsigned char * dest,const unsigned char * src,PRUint32 src_length)488 SHA1_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length)
489 {
490 SHA1Context ctx;
491 unsigned int outLen;
492
493 SHA1_Begin(&ctx);
494 SHA1_Update(&ctx, src, src_length);
495 SHA1_End(&ctx, dest, &outLen, SHA1_LENGTH);
496 memset(&ctx, 0, sizeof ctx);
497 return SECSuccess;
498 }
499
500 /* Hash a null-terminated character string. */
501 SECStatus
SHA1_Hash(unsigned char * dest,const char * src)502 SHA1_Hash(unsigned char *dest, const char *src)
503 {
504 return SHA1_HashBuf(dest, (const unsigned char *)src, PORT_Strlen(src));
505 }
506
507 /*
508 * need to support save/restore state in pkcs11. Stores all the info necessary
509 * for a structure into just a stream of bytes.
510 */
511 unsigned int
SHA1_FlattenSize(SHA1Context * cx)512 SHA1_FlattenSize(SHA1Context *cx)
513 {
514 return sizeof(SHA1Context);
515 }
516
517 SECStatus
SHA1_Flatten(SHA1Context * cx,unsigned char * space)518 SHA1_Flatten(SHA1Context *cx, unsigned char *space)
519 {
520 PORT_Memcpy(space, cx, sizeof(SHA1Context));
521 return SECSuccess;
522 }
523
524 SHA1Context *
SHA1_Resurrect(unsigned char * space,void * arg)525 SHA1_Resurrect(unsigned char *space, void *arg)
526 {
527 SHA1Context *cx = SHA1_NewContext();
528 if (cx == NULL)
529 return NULL;
530
531 PORT_Memcpy(cx, space, sizeof(SHA1Context));
532 return cx;
533 }
534
535 void
SHA1_Clone(SHA1Context * dest,SHA1Context * src)536 SHA1_Clone(SHA1Context *dest, SHA1Context *src)
537 {
538 memcpy(dest, src, sizeof *dest);
539 }
540
541 void
SHA1_TraceState(SHA1Context * ctx)542 SHA1_TraceState(SHA1Context *ctx)
543 {
544 PORT_SetError(PR_NOT_IMPLEMENTED_ERROR);
545 }
546