1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #include "mozilla/Assertions.h"
8 #include "mozilla/EndianUtils.h"
9 #include "mozilla/SHA1.h"
10 
11 #include <string.h>
12 
13 using mozilla::NativeEndian;
14 using mozilla::SHA1Sum;
15 
SHA_ROTL(uint32_t aT,uint32_t aN)16 static inline uint32_t SHA_ROTL(uint32_t aT, uint32_t aN) {
17   MOZ_ASSERT(aN < 32);
18   return (aT << aN) | (aT >> (32 - aN));
19 }
20 
21 static void shaCompress(volatile unsigned* aX, const uint32_t* aBuf);
22 
23 #define SHA_F1(X, Y, Z) ((((Y) ^ (Z)) & (X)) ^ (Z))
24 #define SHA_F2(X, Y, Z) ((X) ^ (Y) ^ (Z))
25 #define SHA_F3(X, Y, Z) (((X) & (Y)) | ((Z) & ((X) | (Y))))
26 #define SHA_F4(X, Y, Z) ((X) ^ (Y) ^ (Z))
27 
28 #define SHA_MIX(n, a, b, c) XW(n) = SHA_ROTL(XW(a) ^ XW(b) ^ XW(c) ^ XW(n), 1)
29 
SHA1Sum()30 SHA1Sum::SHA1Sum() : mSize(0), mDone(false) {
31   // Initialize H with constants from FIPS180-1.
32   mH[0] = 0x67452301L;
33   mH[1] = 0xefcdab89L;
34   mH[2] = 0x98badcfeL;
35   mH[3] = 0x10325476L;
36   mH[4] = 0xc3d2e1f0L;
37 }
38 
39 /*
40  * Explanation of H array and index values:
41  *
42  * The context's H array is actually the concatenation of two arrays
43  * defined by SHA1, the H array of state variables (5 elements),
44  * and the W array of intermediate values, of which there are 16 elements.
45  * The W array starts at H[5], that is W[0] is H[5].
46  * Although these values are defined as 32-bit values, we use 64-bit
47  * variables to hold them because the AMD64 stores 64 bit values in
48  * memory MUCH faster than it stores any smaller values.
49  *
50  * Rather than passing the context structure to shaCompress, we pass
51  * this combined array of H and W values.  We do not pass the address
52  * of the first element of this array, but rather pass the address of an
53  * element in the middle of the array, element X.  Presently X[0] is H[11].
54  * So we pass the address of H[11] as the address of array X to shaCompress.
55  * Then shaCompress accesses the members of the array using positive AND
56  * negative indexes.
57  *
58  * Pictorially: (each element is 8 bytes)
59  * H | H0 H1 H2 H3 H4 W0 W1 W2 W3 W4 W5 W6 W7 W8 W9 Wa Wb Wc Wd We Wf |
60  * X |-11-10 -9 -8 -7 -6 -5 -4 -3 -2 -1 X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 |
61  *
62  * The byte offset from X[0] to any member of H and W is always
63  * representable in a signed 8-bit value, which will be encoded
64  * as a single byte offset in the X86-64 instruction set.
65  * If we didn't pass the address of H[11], and instead passed the
66  * address of H[0], the offsets to elements H[16] and above would be
67  * greater than 127, not representable in a signed 8-bit value, and the
68  * x86-64 instruction set would encode every such offset as a 32-bit
69  * signed number in each instruction that accessed element H[16] or
70  * higher.  This results in much bigger and slower code.
71  */
72 #define H2X 11 /* X[0] is H[11], and H[0] is X[-11] */
73 #define W2X 6  /* X[0] is W[6],  and W[0] is X[-6]  */
74 
75 /*
76  *  SHA: Add data to context.
77  */
update(const void * aData,uint32_t aLen)78 void SHA1Sum::update(const void* aData, uint32_t aLen) {
79   MOZ_ASSERT(!mDone, "SHA1Sum can only be used to compute a single hash.");
80 
81   const uint8_t* data = static_cast<const uint8_t*>(aData);
82 
83   if (aLen == 0) {
84     return;
85   }
86 
87   /* Accumulate the byte count. */
88   unsigned int lenB = static_cast<unsigned int>(mSize) & 63U;
89 
90   mSize += aLen;
91 
92   /* Read the data into W and process blocks as they get full. */
93   unsigned int togo;
94   if (lenB > 0) {
95     togo = 64U - lenB;
96     if (aLen < togo) {
97       togo = aLen;
98     }
99     memcpy(mU.mB + lenB, data, togo);
100     aLen -= togo;
101     data += togo;
102     lenB = (lenB + togo) & 63U;
103     if (!lenB) {
104       shaCompress(&mH[H2X], mU.mW);
105     }
106   }
107 
108   while (aLen >= 64U) {
109     aLen -= 64U;
110     shaCompress(&mH[H2X], reinterpret_cast<const uint32_t*>(data));
111     data += 64U;
112   }
113 
114   if (aLen > 0) {
115     memcpy(mU.mB, data, aLen);
116   }
117 }
118 
119 /*
120  *  SHA: Generate hash value
121  */
finish(SHA1Sum::Hash & aHashOut)122 void SHA1Sum::finish(SHA1Sum::Hash& aHashOut) {
123   MOZ_ASSERT(!mDone, "SHA1Sum can only be used to compute a single hash.");
124 
125   uint64_t size = mSize;
126   uint32_t lenB = uint32_t(size) & 63;
127 
128   static const uint8_t bulk_pad[64] = {
129       0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
130       0,    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
131       0,    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
132 
133   /* Pad with a binary 1 (e.g. 0x80), then zeroes, then length in bits. */
134   update(bulk_pad, (((55 + 64) - lenB) & 63) + 1);
135   MOZ_ASSERT((uint32_t(mSize) & 63) == 56);
136 
137   /* Convert size from bytes to bits. */
138   size <<= 3;
139   mU.mW[14] = NativeEndian::swapToBigEndian(uint32_t(size >> 32));
140   mU.mW[15] = NativeEndian::swapToBigEndian(uint32_t(size));
141   shaCompress(&mH[H2X], mU.mW);
142 
143   /* Output hash. */
144   mU.mW[0] = NativeEndian::swapToBigEndian(mH[0]);
145   mU.mW[1] = NativeEndian::swapToBigEndian(mH[1]);
146   mU.mW[2] = NativeEndian::swapToBigEndian(mH[2]);
147   mU.mW[3] = NativeEndian::swapToBigEndian(mH[3]);
148   mU.mW[4] = NativeEndian::swapToBigEndian(mH[4]);
149   memcpy(aHashOut, mU.mW, 20);
150   mDone = true;
151 }
152 
153 /*
154  *  SHA: Compression function, unrolled.
155  *
156  * Some operations in shaCompress are done as 5 groups of 16 operations.
157  * Others are done as 4 groups of 20 operations.
158  * The code below shows that structure.
159  *
160  * The functions that compute the new values of the 5 state variables
161  * A-E are done in 4 groups of 20 operations (or you may also think
162  * of them as being done in 16 groups of 5 operations).  They are
163  * done by the SHA_RNDx macros below, in the right column.
164  *
165  * The functions that set the 16 values of the W array are done in
166  * 5 groups of 16 operations.  The first group is done by the
167  * LOAD macros below, the latter 4 groups are done by SHA_MIX below,
168  * in the left column.
169  *
170  * gcc's optimizer observes that each member of the W array is assigned
171  * a value 5 times in this code.  It reduces the number of store
172  * operations done to the W array in the context (that is, in the X array)
173  * by creating a W array on the stack, and storing the W values there for
174  * the first 4 groups of operations on W, and storing the values in the
175  * context's W array only in the fifth group.  This is undesirable.
176  * It is MUCH bigger code than simply using the context's W array, because
177  * all the offsets to the W array in the stack are 32-bit signed offsets,
178  * and it is no faster than storing the values in the context's W array.
179  *
180  * The original code for sha_fast.c prevented this creation of a separate
181  * W array in the stack by creating a W array of 80 members, each of
182  * whose elements is assigned only once. It also separated the computations
183  * of the W array values and the computations of the values for the 5
184  * state variables into two separate passes, W's, then A-E's so that the
185  * second pass could be done all in registers (except for accessing the W
186  * array) on machines with fewer registers.  The method is suboptimal
187  * for machines with enough registers to do it all in one pass, and it
188  * necessitates using many instructions with 32-bit offsets.
189  *
190  * This code eliminates the separate W array on the stack by a completely
191  * different means: by declaring the X array volatile.  This prevents
192  * the optimizer from trying to reduce the use of the X array by the
193  * creation of a MORE expensive W array on the stack. The result is
194  * that all instructions use signed 8-bit offsets and not 32-bit offsets.
195  *
196  * The combination of this code and the -O3 optimizer flag on GCC 3.4.3
197  * results in code that is 3 times faster than the previous NSS sha_fast
198  * code on AMD64.
199  */
shaCompress(volatile unsigned * aX,const uint32_t * aBuf)200 static void shaCompress(volatile unsigned* aX, const uint32_t* aBuf) {
201   unsigned A, B, C, D, E;
202 
203 #define XH(n) aX[n - H2X]
204 #define XW(n) aX[n - W2X]
205 
206 #define K0 0x5a827999L
207 #define K1 0x6ed9eba1L
208 #define K2 0x8f1bbcdcL
209 #define K3 0xca62c1d6L
210 
211 #define SHA_RND1(a, b, c, d, e, n)                       \
212   a = SHA_ROTL(b, 5) + SHA_F1(c, d, e) + a + XW(n) + K0; \
213   c = SHA_ROTL(c, 30)
214 #define SHA_RND2(a, b, c, d, e, n)                       \
215   a = SHA_ROTL(b, 5) + SHA_F2(c, d, e) + a + XW(n) + K1; \
216   c = SHA_ROTL(c, 30)
217 #define SHA_RND3(a, b, c, d, e, n)                       \
218   a = SHA_ROTL(b, 5) + SHA_F3(c, d, e) + a + XW(n) + K2; \
219   c = SHA_ROTL(c, 30)
220 #define SHA_RND4(a, b, c, d, e, n)                       \
221   a = SHA_ROTL(b, 5) + SHA_F4(c, d, e) + a + XW(n) + K3; \
222   c = SHA_ROTL(c, 30)
223 
224 #define LOAD(n) XW(n) = NativeEndian::swapToBigEndian(aBuf[n])
225 
226   A = XH(0);
227   B = XH(1);
228   C = XH(2);
229   D = XH(3);
230   E = XH(4);
231 
232   LOAD(0);
233   SHA_RND1(E, A, B, C, D, 0);
234   LOAD(1);
235   SHA_RND1(D, E, A, B, C, 1);
236   LOAD(2);
237   SHA_RND1(C, D, E, A, B, 2);
238   LOAD(3);
239   SHA_RND1(B, C, D, E, A, 3);
240   LOAD(4);
241   SHA_RND1(A, B, C, D, E, 4);
242   LOAD(5);
243   SHA_RND1(E, A, B, C, D, 5);
244   LOAD(6);
245   SHA_RND1(D, E, A, B, C, 6);
246   LOAD(7);
247   SHA_RND1(C, D, E, A, B, 7);
248   LOAD(8);
249   SHA_RND1(B, C, D, E, A, 8);
250   LOAD(9);
251   SHA_RND1(A, B, C, D, E, 9);
252   LOAD(10);
253   SHA_RND1(E, A, B, C, D, 10);
254   LOAD(11);
255   SHA_RND1(D, E, A, B, C, 11);
256   LOAD(12);
257   SHA_RND1(C, D, E, A, B, 12);
258   LOAD(13);
259   SHA_RND1(B, C, D, E, A, 13);
260   LOAD(14);
261   SHA_RND1(A, B, C, D, E, 14);
262   LOAD(15);
263   SHA_RND1(E, A, B, C, D, 15);
264 
265   SHA_MIX(0, 13, 8, 2);
266   SHA_RND1(D, E, A, B, C, 0);
267   SHA_MIX(1, 14, 9, 3);
268   SHA_RND1(C, D, E, A, B, 1);
269   SHA_MIX(2, 15, 10, 4);
270   SHA_RND1(B, C, D, E, A, 2);
271   SHA_MIX(3, 0, 11, 5);
272   SHA_RND1(A, B, C, D, E, 3);
273 
274   SHA_MIX(4, 1, 12, 6);
275   SHA_RND2(E, A, B, C, D, 4);
276   SHA_MIX(5, 2, 13, 7);
277   SHA_RND2(D, E, A, B, C, 5);
278   SHA_MIX(6, 3, 14, 8);
279   SHA_RND2(C, D, E, A, B, 6);
280   SHA_MIX(7, 4, 15, 9);
281   SHA_RND2(B, C, D, E, A, 7);
282   SHA_MIX(8, 5, 0, 10);
283   SHA_RND2(A, B, C, D, E, 8);
284   SHA_MIX(9, 6, 1, 11);
285   SHA_RND2(E, A, B, C, D, 9);
286   SHA_MIX(10, 7, 2, 12);
287   SHA_RND2(D, E, A, B, C, 10);
288   SHA_MIX(11, 8, 3, 13);
289   SHA_RND2(C, D, E, A, B, 11);
290   SHA_MIX(12, 9, 4, 14);
291   SHA_RND2(B, C, D, E, A, 12);
292   SHA_MIX(13, 10, 5, 15);
293   SHA_RND2(A, B, C, D, E, 13);
294   SHA_MIX(14, 11, 6, 0);
295   SHA_RND2(E, A, B, C, D, 14);
296   SHA_MIX(15, 12, 7, 1);
297   SHA_RND2(D, E, A, B, C, 15);
298 
299   SHA_MIX(0, 13, 8, 2);
300   SHA_RND2(C, D, E, A, B, 0);
301   SHA_MIX(1, 14, 9, 3);
302   SHA_RND2(B, C, D, E, A, 1);
303   SHA_MIX(2, 15, 10, 4);
304   SHA_RND2(A, B, C, D, E, 2);
305   SHA_MIX(3, 0, 11, 5);
306   SHA_RND2(E, A, B, C, D, 3);
307   SHA_MIX(4, 1, 12, 6);
308   SHA_RND2(D, E, A, B, C, 4);
309   SHA_MIX(5, 2, 13, 7);
310   SHA_RND2(C, D, E, A, B, 5);
311   SHA_MIX(6, 3, 14, 8);
312   SHA_RND2(B, C, D, E, A, 6);
313   SHA_MIX(7, 4, 15, 9);
314   SHA_RND2(A, B, C, D, E, 7);
315 
316   SHA_MIX(8, 5, 0, 10);
317   SHA_RND3(E, A, B, C, D, 8);
318   SHA_MIX(9, 6, 1, 11);
319   SHA_RND3(D, E, A, B, C, 9);
320   SHA_MIX(10, 7, 2, 12);
321   SHA_RND3(C, D, E, A, B, 10);
322   SHA_MIX(11, 8, 3, 13);
323   SHA_RND3(B, C, D, E, A, 11);
324   SHA_MIX(12, 9, 4, 14);
325   SHA_RND3(A, B, C, D, E, 12);
326   SHA_MIX(13, 10, 5, 15);
327   SHA_RND3(E, A, B, C, D, 13);
328   SHA_MIX(14, 11, 6, 0);
329   SHA_RND3(D, E, A, B, C, 14);
330   SHA_MIX(15, 12, 7, 1);
331   SHA_RND3(C, D, E, A, B, 15);
332 
333   SHA_MIX(0, 13, 8, 2);
334   SHA_RND3(B, C, D, E, A, 0);
335   SHA_MIX(1, 14, 9, 3);
336   SHA_RND3(A, B, C, D, E, 1);
337   SHA_MIX(2, 15, 10, 4);
338   SHA_RND3(E, A, B, C, D, 2);
339   SHA_MIX(3, 0, 11, 5);
340   SHA_RND3(D, E, A, B, C, 3);
341   SHA_MIX(4, 1, 12, 6);
342   SHA_RND3(C, D, E, A, B, 4);
343   SHA_MIX(5, 2, 13, 7);
344   SHA_RND3(B, C, D, E, A, 5);
345   SHA_MIX(6, 3, 14, 8);
346   SHA_RND3(A, B, C, D, E, 6);
347   SHA_MIX(7, 4, 15, 9);
348   SHA_RND3(E, A, B, C, D, 7);
349   SHA_MIX(8, 5, 0, 10);
350   SHA_RND3(D, E, A, B, C, 8);
351   SHA_MIX(9, 6, 1, 11);
352   SHA_RND3(C, D, E, A, B, 9);
353   SHA_MIX(10, 7, 2, 12);
354   SHA_RND3(B, C, D, E, A, 10);
355   SHA_MIX(11, 8, 3, 13);
356   SHA_RND3(A, B, C, D, E, 11);
357 
358   SHA_MIX(12, 9, 4, 14);
359   SHA_RND4(E, A, B, C, D, 12);
360   SHA_MIX(13, 10, 5, 15);
361   SHA_RND4(D, E, A, B, C, 13);
362   SHA_MIX(14, 11, 6, 0);
363   SHA_RND4(C, D, E, A, B, 14);
364   SHA_MIX(15, 12, 7, 1);
365   SHA_RND4(B, C, D, E, A, 15);
366 
367   SHA_MIX(0, 13, 8, 2);
368   SHA_RND4(A, B, C, D, E, 0);
369   SHA_MIX(1, 14, 9, 3);
370   SHA_RND4(E, A, B, C, D, 1);
371   SHA_MIX(2, 15, 10, 4);
372   SHA_RND4(D, E, A, B, C, 2);
373   SHA_MIX(3, 0, 11, 5);
374   SHA_RND4(C, D, E, A, B, 3);
375   SHA_MIX(4, 1, 12, 6);
376   SHA_RND4(B, C, D, E, A, 4);
377   SHA_MIX(5, 2, 13, 7);
378   SHA_RND4(A, B, C, D, E, 5);
379   SHA_MIX(6, 3, 14, 8);
380   SHA_RND4(E, A, B, C, D, 6);
381   SHA_MIX(7, 4, 15, 9);
382   SHA_RND4(D, E, A, B, C, 7);
383   SHA_MIX(8, 5, 0, 10);
384   SHA_RND4(C, D, E, A, B, 8);
385   SHA_MIX(9, 6, 1, 11);
386   SHA_RND4(B, C, D, E, A, 9);
387   SHA_MIX(10, 7, 2, 12);
388   SHA_RND4(A, B, C, D, E, 10);
389   SHA_MIX(11, 8, 3, 13);
390   SHA_RND4(E, A, B, C, D, 11);
391   SHA_MIX(12, 9, 4, 14);
392   SHA_RND4(D, E, A, B, C, 12);
393   SHA_MIX(13, 10, 5, 15);
394   SHA_RND4(C, D, E, A, B, 13);
395   SHA_MIX(14, 11, 6, 0);
396   SHA_RND4(B, C, D, E, A, 14);
397   SHA_MIX(15, 12, 7, 1);
398   SHA_RND4(A, B, C, D, E, 15);
399 
400   XH(0) += A;
401   XH(1) += B;
402   XH(2) += C;
403   XH(3) += D;
404   XH(4) += E;
405 }
406