1 /* Sha256.c -- SHA-256 Hash
2 2021-04-01 : Igor Pavlov : Public domain
3 This code is based on public domain code from Wei Dai's Crypto++ library. */
4 
5 #include "Precomp.h"
6 
7 #include <string.h>
8 
9 #include "CpuArch.h"
10 #include "RotateDefs.h"
11 #include "Sha256.h"
12 
13 #if defined(_MSC_VER) && (_MSC_VER < 1900)
14 // #define USE_MY_MM
15 #endif
16 
17 #ifdef MY_CPU_X86_OR_AMD64
18   #ifdef _MSC_VER
19     #if _MSC_VER >= 1200
20       #define _SHA_SUPPORTED
21     #endif
22   #elif defined(__clang__)
23     #if (__clang_major__ >= 8) // fix that check
24       #define _SHA_SUPPORTED
25     #endif
26   #elif defined(__GNUC__)
27     #if (__GNUC__ >= 8) // fix that check
28       #define _SHA_SUPPORTED
29     #endif
30   #elif defined(__INTEL_COMPILER)
31     #if (__INTEL_COMPILER >= 1800) // fix that check
32       #define _SHA_SUPPORTED
33     #endif
34   #endif
35 #elif defined(MY_CPU_ARM_OR_ARM64)
36   #ifdef _MSC_VER
37     #if _MSC_VER >= 1910
38       #define _SHA_SUPPORTED
39     #endif
40   #elif defined(__clang__)
41     #if (__clang_major__ >= 8) // fix that check
42       #define _SHA_SUPPORTED
43     #endif
44   #elif defined(__GNUC__)
45     #if (__GNUC__ >= 6) // fix that check
46       #define _SHA_SUPPORTED
47     #endif
48   #endif
49 #endif
50 
51 void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks);
52 
53 #ifdef _SHA_SUPPORTED
54   void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
55 
56   static SHA256_FUNC_UPDATE_BLOCKS g_FUNC_UPDATE_BLOCKS = Sha256_UpdateBlocks;
57   static SHA256_FUNC_UPDATE_BLOCKS g_FUNC_UPDATE_BLOCKS_HW;
58 
59   #define UPDATE_BLOCKS(p) p->func_UpdateBlocks
60 #else
61   #define UPDATE_BLOCKS(p) Sha256_UpdateBlocks
62 #endif
63 
64 
Sha256_SetFunction(CSha256 * p,unsigned algo)65 BoolInt Sha256_SetFunction(CSha256 *p, unsigned algo)
66 {
67   SHA256_FUNC_UPDATE_BLOCKS func = Sha256_UpdateBlocks;
68 
69   #ifdef _SHA_SUPPORTED
70     if (algo != SHA256_ALGO_SW)
71     {
72       if (algo == SHA256_ALGO_DEFAULT)
73         func = g_FUNC_UPDATE_BLOCKS;
74       else
75       {
76         if (algo != SHA256_ALGO_HW)
77           return False;
78         func = g_FUNC_UPDATE_BLOCKS_HW;
79         if (!func)
80           return False;
81       }
82     }
83   #else
84     if (algo > 1)
85       return False;
86   #endif
87 
88   p->func_UpdateBlocks = func;
89   return True;
90 }
91 
92 
93 /* define it for speed optimization */
94 
95 #ifdef _SFX
96   #define STEP_PRE 1
97   #define STEP_MAIN 1
98 #else
99   #define STEP_PRE 2
100   #define STEP_MAIN 4
101   // #define _SHA256_UNROLL
102 #endif
103 
104 #if STEP_MAIN != 16
105   #define _SHA256_BIG_W
106 #endif
107 
108 
109 
110 
Sha256_InitState(CSha256 * p)111 void Sha256_InitState(CSha256 *p)
112 {
113   p->count = 0;
114   p->state[0] = 0x6a09e667;
115   p->state[1] = 0xbb67ae85;
116   p->state[2] = 0x3c6ef372;
117   p->state[3] = 0xa54ff53a;
118   p->state[4] = 0x510e527f;
119   p->state[5] = 0x9b05688c;
120   p->state[6] = 0x1f83d9ab;
121   p->state[7] = 0x5be0cd19;
122 }
123 
Sha256_Init(CSha256 * p)124 void Sha256_Init(CSha256 *p)
125 {
126   p->func_UpdateBlocks =
127   #ifdef _SHA_SUPPORTED
128       g_FUNC_UPDATE_BLOCKS;
129   #else
130       NULL;
131   #endif
132   Sha256_InitState(p);
133 }
134 
135 #define S0(x) (rotrFixed(x, 2) ^ rotrFixed(x,13) ^ rotrFixed(x, 22))
136 #define S1(x) (rotrFixed(x, 6) ^ rotrFixed(x,11) ^ rotrFixed(x, 25))
137 #define s0(x) (rotrFixed(x, 7) ^ rotrFixed(x,18) ^ (x >> 3))
138 #define s1(x) (rotrFixed(x,17) ^ rotrFixed(x,19) ^ (x >> 10))
139 
140 #define Ch(x,y,z) (z^(x&(y^z)))
141 #define Maj(x,y,z) ((x&y)|(z&(x|y)))
142 
143 
144 #define W_PRE(i) (W[(i) + (size_t)(j)] = GetBe32(data + ((size_t)(j) + i) * 4))
145 
146 #define blk2_main(j, i)  s1(w(j, (i)-2)) + w(j, (i)-7) + s0(w(j, (i)-15))
147 
148 #ifdef _SHA256_BIG_W
149     // we use +i instead of +(i) to change the order to solve CLANG compiler warning for signed/unsigned.
150     #define w(j, i)     W[(size_t)(j) + i]
151     #define blk2(j, i)  (w(j, i) = w(j, (i)-16) + blk2_main(j, i))
152 #else
153     #if STEP_MAIN == 16
154         #define w(j, i)  W[(i) & 15]
155     #else
156         #define w(j, i)  W[((size_t)(j) + (i)) & 15]
157     #endif
158     #define blk2(j, i)  (w(j, i) += blk2_main(j, i))
159 #endif
160 
161 #define W_MAIN(i)  blk2(j, i)
162 
163 
164 #define T1(wx, i) \
165     tmp = h + S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \
166     h = g; \
167     g = f; \
168     f = e; \
169     e = d + tmp; \
170     tmp += S0(a) + Maj(a, b, c); \
171     d = c; \
172     c = b; \
173     b = a; \
174     a = tmp; \
175 
176 #define R1_PRE(i)  T1( W_PRE, i)
177 #define R1_MAIN(i) T1( W_MAIN, i)
178 
179 #if (!defined(_SHA256_UNROLL) || STEP_MAIN < 8) && (STEP_MAIN >= 4)
180 #define R2_MAIN(i) \
181     R1_MAIN(i) \
182     R1_MAIN(i + 1) \
183 
184 #endif
185 
186 
187 
188 #if defined(_SHA256_UNROLL) && STEP_MAIN >= 8
189 
190 #define T4( a,b,c,d,e,f,g,h, wx, i) \
191     h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \
192     tmp = h; \
193     h += d; \
194     d = tmp + S0(a) + Maj(a, b, c); \
195 
196 #define R4( wx, i) \
197     T4 ( a,b,c,d,e,f,g,h, wx, (i  )); \
198     T4 ( d,a,b,c,h,e,f,g, wx, (i+1)); \
199     T4 ( c,d,a,b,g,h,e,f, wx, (i+2)); \
200     T4 ( b,c,d,a,f,g,h,e, wx, (i+3)); \
201 
202 #define R4_PRE(i)  R4( W_PRE, i)
203 #define R4_MAIN(i) R4( W_MAIN, i)
204 
205 
206 #define T8( a,b,c,d,e,f,g,h, wx, i) \
207     h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \
208     d += h; \
209     h += S0(a) + Maj(a, b, c); \
210 
211 #define R8( wx, i) \
212     T8 ( a,b,c,d,e,f,g,h, wx, i  ); \
213     T8 ( h,a,b,c,d,e,f,g, wx, i+1); \
214     T8 ( g,h,a,b,c,d,e,f, wx, i+2); \
215     T8 ( f,g,h,a,b,c,d,e, wx, i+3); \
216     T8 ( e,f,g,h,a,b,c,d, wx, i+4); \
217     T8 ( d,e,f,g,h,a,b,c, wx, i+5); \
218     T8 ( c,d,e,f,g,h,a,b, wx, i+6); \
219     T8 ( b,c,d,e,f,g,h,a, wx, i+7); \
220 
221 #define R8_PRE(i)  R8( W_PRE, i)
222 #define R8_MAIN(i) R8( W_MAIN, i)
223 
224 #endif
225 
226 void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
227 
228 // static
229 extern MY_ALIGN(64)
230 const UInt32 SHA256_K_ARRAY[64];
231 
232 MY_ALIGN(64)
233 const UInt32 SHA256_K_ARRAY[64] = {
234   0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
235   0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
236   0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
237   0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
238   0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
239   0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
240   0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
241   0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
242   0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
243   0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
244   0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
245   0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
246   0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
247   0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
248   0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
249   0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
250 };
251 
252 #define K SHA256_K_ARRAY
253 
254 
255 MY_NO_INLINE
Sha256_UpdateBlocks(UInt32 state[8],const Byte * data,size_t numBlocks)256 void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks)
257 {
258   UInt32 W
259   #ifdef _SHA256_BIG_W
260       [64];
261   #else
262       [16];
263   #endif
264 
265   unsigned j;
266 
267   UInt32 a,b,c,d,e,f,g,h;
268 
269   #if !defined(_SHA256_UNROLL) || (STEP_MAIN <= 4) || (STEP_PRE <= 4)
270   UInt32 tmp;
271   #endif
272 
273   a = state[0];
274   b = state[1];
275   c = state[2];
276   d = state[3];
277   e = state[4];
278   f = state[5];
279   g = state[6];
280   h = state[7];
281 
282   while (numBlocks)
283   {
284 
285   for (j = 0; j < 16; j += STEP_PRE)
286   {
287     #if STEP_PRE > 4
288 
289       #if STEP_PRE < 8
290       R4_PRE(0);
291       #else
292       R8_PRE(0);
293       #if STEP_PRE == 16
294       R8_PRE(8);
295       #endif
296       #endif
297 
298     #else
299 
300       R1_PRE(0);
301       #if STEP_PRE >= 2
302       R1_PRE(1);
303       #if STEP_PRE >= 4
304       R1_PRE(2);
305       R1_PRE(3);
306       #endif
307       #endif
308 
309     #endif
310   }
311 
312   for (j = 16; j < 64; j += STEP_MAIN)
313   {
314     #if defined(_SHA256_UNROLL) && STEP_MAIN >= 8
315 
316       #if STEP_MAIN < 8
317       R4_MAIN(0);
318       #else
319       R8_MAIN(0);
320       #if STEP_MAIN == 16
321       R8_MAIN(8);
322       #endif
323       #endif
324 
325     #else
326 
327       R1_MAIN(0);
328       #if STEP_MAIN >= 2
329       R1_MAIN(1);
330       #if STEP_MAIN >= 4
331       R2_MAIN(2);
332       #if STEP_MAIN >= 8
333       R2_MAIN(4);
334       R2_MAIN(6);
335       #if STEP_MAIN >= 16
336       R2_MAIN(8);
337       R2_MAIN(10);
338       R2_MAIN(12);
339       R2_MAIN(14);
340       #endif
341       #endif
342       #endif
343       #endif
344     #endif
345   }
346 
347   a += state[0]; state[0] = a;
348   b += state[1]; state[1] = b;
349   c += state[2]; state[2] = c;
350   d += state[3]; state[3] = d;
351   e += state[4]; state[4] = e;
352   f += state[5]; state[5] = f;
353   g += state[6]; state[6] = g;
354   h += state[7]; state[7] = h;
355 
356   data += 64;
357   numBlocks--;
358   }
359 
360   /* Wipe variables */
361   /* memset(W, 0, sizeof(W)); */
362 }
363 
364 #undef S0
365 #undef S1
366 #undef s0
367 #undef s1
368 #undef K
369 
370 #define Sha256_UpdateBlock(p) UPDATE_BLOCKS(p)(p->state, p->buffer, 1)
371 
Sha256_Update(CSha256 * p,const Byte * data,size_t size)372 void Sha256_Update(CSha256 *p, const Byte *data, size_t size)
373 {
374   if (size == 0)
375     return;
376 
377   {
378     unsigned pos = (unsigned)p->count & 0x3F;
379     unsigned num;
380 
381     p->count += size;
382 
383     num = 64 - pos;
384     if (num > size)
385     {
386       memcpy(p->buffer + pos, data, size);
387       return;
388     }
389 
390     if (pos != 0)
391     {
392       size -= num;
393       memcpy(p->buffer + pos, data, num);
394       data += num;
395       Sha256_UpdateBlock(p);
396     }
397   }
398   {
399     size_t numBlocks = size >> 6;
400     UPDATE_BLOCKS(p)(p->state, data, numBlocks);
401     size &= 0x3F;
402     if (size == 0)
403       return;
404     data += (numBlocks << 6);
405     memcpy(p->buffer, data, size);
406   }
407 }
408 
409 
Sha256_Final(CSha256 * p,Byte * digest)410 void Sha256_Final(CSha256 *p, Byte *digest)
411 {
412   unsigned pos = (unsigned)p->count & 0x3F;
413   unsigned i;
414 
415   p->buffer[pos++] = 0x80;
416 
417   if (pos > (64 - 8))
418   {
419     while (pos != 64) { p->buffer[pos++] = 0; }
420     // memset(&p->buf.buffer[pos], 0, 64 - pos);
421     Sha256_UpdateBlock(p);
422     pos = 0;
423   }
424 
425   /*
426   if (pos & 3)
427   {
428     p->buffer[pos] = 0;
429     p->buffer[pos + 1] = 0;
430     p->buffer[pos + 2] = 0;
431     pos += 3;
432     pos &= ~3;
433   }
434   {
435     for (; pos < 64 - 8; pos += 4)
436       *(UInt32 *)(&p->buffer[pos]) = 0;
437   }
438   */
439 
440   memset(&p->buffer[pos], 0, (64 - 8) - pos);
441 
442   {
443     UInt64 numBits = (p->count << 3);
444     SetBe32(p->buffer + 64 - 8, (UInt32)(numBits >> 32));
445     SetBe32(p->buffer + 64 - 4, (UInt32)(numBits));
446   }
447 
448   Sha256_UpdateBlock(p);
449 
450   for (i = 0; i < 8; i += 2)
451   {
452     UInt32 v0 = p->state[i];
453     UInt32 v1 = p->state[(size_t)i + 1];
454     SetBe32(digest    , v0);
455     SetBe32(digest + 4, v1);
456     digest += 8;
457   }
458 
459   Sha256_InitState(p);
460 }
461 
462 
Sha256Prepare()463 void Sha256Prepare()
464 {
465   #ifdef _SHA_SUPPORTED
466   SHA256_FUNC_UPDATE_BLOCKS f, f_hw;
467   f = Sha256_UpdateBlocks;
468   f_hw = NULL;
469   #ifdef MY_CPU_X86_OR_AMD64
470   #ifndef USE_MY_MM
471   if (CPU_IsSupported_SHA()
472       && CPU_IsSupported_SSSE3()
473       // && CPU_IsSupported_SSE41()
474       )
475   #endif
476   #else
477   if (CPU_IsSupported_SHA2())
478   #endif
479   {
480     // printf("\n========== HW SHA256 ======== \n");
481     f = f_hw = Sha256_UpdateBlocks_HW;
482   }
483   g_FUNC_UPDATE_BLOCKS    = f;
484   g_FUNC_UPDATE_BLOCKS_HW = f_hw;
485   #endif
486 }
487