1 /*
2    BLAKE2 reference source code package - optimized C implementations
3 
4    Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
5 
6    To the extent possible under law, the author(s) have dedicated all copyright
7    and related and neighboring rights to this software to the public domain
8    worldwide. This software is distributed without any warranty.
9 
10    You should have received a copy of the CC0 Public Domain Dedication along with
11    this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
12 */
13 
14 #include <stdint.h>
15 #include <string.h>
16 #include <stdio.h>
17 
18 #include "blake2.h"
19 #include "blake2-impl.h"
20 
21 #include "blake2-config.h"
22 
23 #if defined(_MSC_VER)
24 #include <intrin.h>
25 #endif
26 
27 #if defined(HAVE_SSE2)
28 #include <emmintrin.h>
29 // MSVC only defines  _mm_set_epi64x for x86_64...
30 #if defined(_MSC_VER) && !defined(_M_X64)
_mm_set_epi64x(const uint64_t u1,const uint64_t u0)31 static inline __m128i _mm_set_epi64x( const uint64_t u1, const uint64_t u0 )
32 {
33   return _mm_set_epi32( u1 >> 32, u1, u0 >> 32, u0 );
34 }
35 #endif
36 #endif
37 
38 #if defined(HAVE_SSSE3)
39 #include <tmmintrin.h>
40 #endif
41 #if defined(HAVE_SSE4_1)
42 #include <smmintrin.h>
43 #endif
44 #if defined(HAVE_AVX)
45 #include <immintrin.h>
46 #endif
47 #if defined(HAVE_XOP) && !defined(_MSC_VER)
48 #include <x86intrin.h>
49 #endif
50 
51 
52 
53 #include "blake2b-round.h"
54 
55 static const uint64_t blake2b_IV[8] =
56 {
57   0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
58   0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
59   0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
60   0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
61 };
62 
63 static const uint8_t blake2b_sigma[12][16] =
64 {
65   {  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15 } ,
66   { 14, 10,  4,  8,  9, 15, 13,  6,  1, 12,  0,  2, 11,  7,  5,  3 } ,
67   { 11,  8, 12,  0,  5,  2, 15, 13, 10, 14,  3,  6,  7,  1,  9,  4 } ,
68   {  7,  9,  3,  1, 13, 12, 11, 14,  2,  6,  5, 10,  4,  0, 15,  8 } ,
69   {  9,  0,  5,  7,  2,  4, 10, 15, 14,  1, 11, 12,  6,  8,  3, 13 } ,
70   {  2, 12,  6, 10,  0, 11,  8,  3,  4, 13,  7,  5, 15, 14,  1,  9 } ,
71   { 12,  5,  1, 15, 14, 13,  4, 10,  0,  7,  6,  3,  9,  2,  8, 11 } ,
72   { 13, 11,  7, 14, 12,  1,  3,  9,  5,  0, 15,  4,  8,  6,  2, 10 } ,
73   {  6, 15, 14,  9, 11,  3,  0,  8, 12,  2, 13,  7,  1,  4, 10,  5 } ,
74   { 10,  2,  8,  4,  7,  6,  1,  5, 15, 11,  9, 14,  3, 12, 13 , 0 } ,
75   {  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15 } ,
76   { 14, 10,  4,  8,  9, 15, 13,  6,  1, 12,  0,  2, 11,  7,  5,  3 }
77 };
78 
79 
80 /* Some helper functions, not necessarily useful */
blake2b_set_lastnode(blake2b_state * S)81 static inline int blake2b_set_lastnode( blake2b_state *S )
82 {
83   S->f[1] = ~0ULL;
84   return 0;
85 }
86 
blake2b_clear_lastnode(blake2b_state * S)87 static inline int blake2b_clear_lastnode( blake2b_state *S )
88 {
89   S->f[1] = 0ULL;
90   return 0;
91 }
92 
blake2b_set_lastblock(blake2b_state * S)93 static inline int blake2b_set_lastblock( blake2b_state *S )
94 {
95   if( S->last_node ) blake2b_set_lastnode( S );
96 
97   S->f[0] = ~0ULL;
98   return 0;
99 }
100 
blake2b_clear_lastblock(blake2b_state * S)101 static inline int blake2b_clear_lastblock( blake2b_state *S )
102 {
103   if( S->last_node ) blake2b_clear_lastnode( S );
104 
105   S->f[0] = 0ULL;
106   return 0;
107 }
108 
109 
blake2b_increment_counter(blake2b_state * S,const uint64_t inc)110 static inline int blake2b_increment_counter( blake2b_state *S, const uint64_t inc )
111 {
112 #if defined(__x86_64__) && (defined(__GNUC__) || defined(__clang__))
113   // ADD/ADC chain
114   __uint128_t t = ( ( __uint128_t )S->t[1] << 64 ) | S->t[0];
115   t += inc;
116   S->t[0] = ( uint64_t )( t >>  0 );
117   S->t[1] = ( uint64_t )( t >> 64 );
118 #else
119   S->t[0] += inc;
120   S->t[1] += ( S->t[0] < inc );
121 #endif
122   return 0;
123 }
124 
125 
126 // Parameter-related functions
blake2b_param_set_digest_length(blake2b_param * P,const uint8_t digest_length)127 static inline int blake2b_param_set_digest_length( blake2b_param *P, const uint8_t digest_length )
128 {
129   P->digest_length = digest_length;
130   return 0;
131 }
132 
blake2b_param_set_fanout(blake2b_param * P,const uint8_t fanout)133 static inline int blake2b_param_set_fanout( blake2b_param *P, const uint8_t fanout )
134 {
135   P->fanout = fanout;
136   return 0;
137 }
138 
blake2b_param_set_max_depth(blake2b_param * P,const uint8_t depth)139 static inline int blake2b_param_set_max_depth( blake2b_param *P, const uint8_t depth )
140 {
141   P->depth = depth;
142   return 0;
143 }
144 
blake2b_param_set_leaf_length(blake2b_param * P,const uint32_t leaf_length)145 static inline int blake2b_param_set_leaf_length( blake2b_param *P, const uint32_t leaf_length )
146 {
147   P->leaf_length = leaf_length;
148   return 0;
149 }
150 
blake2b_param_set_node_offset(blake2b_param * P,const uint64_t node_offset)151 static inline int blake2b_param_set_node_offset( blake2b_param *P, const uint64_t node_offset )
152 {
153   P->node_offset = node_offset;
154   return 0;
155 }
156 
blake2b_param_set_node_depth(blake2b_param * P,const uint8_t node_depth)157 static inline int blake2b_param_set_node_depth( blake2b_param *P, const uint8_t node_depth )
158 {
159   P->node_depth = node_depth;
160   return 0;
161 }
162 
blake2b_param_set_inner_length(blake2b_param * P,const uint8_t inner_length)163 static inline int blake2b_param_set_inner_length( blake2b_param *P, const uint8_t inner_length )
164 {
165   P->inner_length = inner_length;
166   return 0;
167 }
168 
blake2b_param_set_salt(blake2b_param * P,const uint8_t salt[BLAKE2B_SALTBYTES])169 static inline int blake2b_param_set_salt( blake2b_param *P, const uint8_t salt[BLAKE2B_SALTBYTES] )
170 {
171   memcpy( P->salt, salt, BLAKE2B_SALTBYTES );
172   return 0;
173 }
174 
blake2b_param_set_personal(blake2b_param * P,const uint8_t personal[BLAKE2B_PERSONALBYTES])175 static inline int blake2b_param_set_personal( blake2b_param *P, const uint8_t personal[BLAKE2B_PERSONALBYTES] )
176 {
177   memcpy( P->personal, personal, BLAKE2B_PERSONALBYTES );
178   return 0;
179 }
180 
blake2b_init0(blake2b_state * S)181 static inline int blake2b_init0( blake2b_state *S )
182 {
183   memset( S, 0, sizeof( blake2b_state ) );
184 
185   for( int i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i];
186 
187   return 0;
188 }
189 
190 
191 
192 #if defined(__cplusplus)
193 extern "C" {
194 #endif
195   int blake2b_init( blake2b_state *S, size_t outlen );
196   int blake2b_init_param( blake2b_state *S, const blake2b_param *P );
197   int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen );
198   int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen );
199   int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen );
200   int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen );
201 #if defined(__cplusplus)
202 }
203 #endif
204 
205 /* init xors IV with input parameter block */
blake2b_init_param(blake2b_state * S,const blake2b_param * P)206 int blake2b_init_param( blake2b_state *S, const blake2b_param *P )
207 {
208   uint8_t *p, *h, *v;
209   //blake2b_init0( S );
210   v = ( uint8_t * )( blake2b_IV );
211   h = ( uint8_t * )( S->h );
212   p = ( uint8_t * )( P );
213   /* IV XOR ParamBlock */
214   memset( S, 0, sizeof( blake2b_state ) );
215 
216   for( int i = 0; i < BLAKE2B_OUTBYTES; ++i ) h[i] = v[i] ^ p[i];
217 
218   S->outlen = P->digest_length;
219   return 0;
220 }
221 
222 
223 /* Some sort of default parameter block initialization, for sequential blake2b */
224 
blake2b_init(blake2b_state * S,size_t outlen)225 int blake2b_init( blake2b_state *S, size_t outlen )
226 {
227   if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
228 
229   const blake2b_param P =
230   {
231     ( uint8_t ) outlen,
232     0,
233     1,
234     1,
235     0,
236     0,
237     0,
238     0,
239     {0},
240     {0},
241     {0}
242   };
243   return blake2b_init_param( S, &P );
244 }
245 
blake2b_init_key(blake2b_state * S,size_t outlen,const void * key,size_t keylen)246 int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen )
247 {
248   if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
249 
250   if ( ( !keylen ) || keylen > BLAKE2B_KEYBYTES ) return -1;
251 
252   const blake2b_param P =
253   {
254     ( uint8_t ) outlen,
255     ( uint8_t ) keylen,
256     1,
257     1,
258     0,
259     0,
260     0,
261     0,
262     {0},
263     {0},
264     {0}
265   };
266 
267   if( blake2b_init_param( S, &P ) < 0 )
268     return 0;
269 
270   {
271     uint8_t block[BLAKE2B_BLOCKBYTES];
272     memset( block, 0, BLAKE2B_BLOCKBYTES );
273     memcpy( block, key, keylen );
274     blake2b_update( S, block, BLAKE2B_BLOCKBYTES );
275     secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */
276   }
277   return 0;
278 }
279 
blake2b_compress(blake2b_state * S,const uint8_t block[BLAKE2B_BLOCKBYTES])280 static inline int blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] )
281 {
282   __m128i row1l, row1h;
283   __m128i row2l, row2h;
284   __m128i row3l, row3h;
285   __m128i row4l, row4h;
286   __m128i b0, b1;
287   __m128i t0, t1;
288 #if defined(HAVE_SSSE3) && !defined(HAVE_XOP)
289   const __m128i r16 = _mm_setr_epi8( 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9 );
290   const __m128i r24 = _mm_setr_epi8( 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10 );
291 #endif
292 #if defined(HAVE_SSE4_1)
293   const __m128i m0 = LOADU( block + 00 );
294   const __m128i m1 = LOADU( block + 16 );
295   const __m128i m2 = LOADU( block + 32 );
296   const __m128i m3 = LOADU( block + 48 );
297   const __m128i m4 = LOADU( block + 64 );
298   const __m128i m5 = LOADU( block + 80 );
299   const __m128i m6 = LOADU( block + 96 );
300   const __m128i m7 = LOADU( block + 112 );
301 #else
302   const uint64_t  m0 = ( ( uint64_t * )block )[ 0];
303   const uint64_t  m1 = ( ( uint64_t * )block )[ 1];
304   const uint64_t  m2 = ( ( uint64_t * )block )[ 2];
305   const uint64_t  m3 = ( ( uint64_t * )block )[ 3];
306   const uint64_t  m4 = ( ( uint64_t * )block )[ 4];
307   const uint64_t  m5 = ( ( uint64_t * )block )[ 5];
308   const uint64_t  m6 = ( ( uint64_t * )block )[ 6];
309   const uint64_t  m7 = ( ( uint64_t * )block )[ 7];
310   const uint64_t  m8 = ( ( uint64_t * )block )[ 8];
311   const uint64_t  m9 = ( ( uint64_t * )block )[ 9];
312   const uint64_t m10 = ( ( uint64_t * )block )[10];
313   const uint64_t m11 = ( ( uint64_t * )block )[11];
314   const uint64_t m12 = ( ( uint64_t * )block )[12];
315   const uint64_t m13 = ( ( uint64_t * )block )[13];
316   const uint64_t m14 = ( ( uint64_t * )block )[14];
317   const uint64_t m15 = ( ( uint64_t * )block )[15];
318 #endif
319   row1l = LOADU( &S->h[0] );
320   row1h = LOADU( &S->h[2] );
321   row2l = LOADU( &S->h[4] );
322   row2h = LOADU( &S->h[6] );
323   row3l = LOADU( &blake2b_IV[0] );
324   row3h = LOADU( &blake2b_IV[2] );
325   row4l = _mm_xor_si128( LOADU( &blake2b_IV[4] ), LOADU( &S->t[0] ) );
326   row4h = _mm_xor_si128( LOADU( &blake2b_IV[6] ), LOADU( &S->f[0] ) );
327   ROUND( 0 );
328   ROUND( 1 );
329   ROUND( 2 );
330   ROUND( 3 );
331   ROUND( 4 );
332   ROUND( 5 );
333   ROUND( 6 );
334   ROUND( 7 );
335   ROUND( 8 );
336   ROUND( 9 );
337   ROUND( 10 );
338   ROUND( 11 );
339   row1l = _mm_xor_si128( row3l, row1l );
340   row1h = _mm_xor_si128( row3h, row1h );
341   STOREU( &S->h[0], _mm_xor_si128( LOADU( &S->h[0] ), row1l ) );
342   STOREU( &S->h[2], _mm_xor_si128( LOADU( &S->h[2] ), row1h ) );
343   row2l = _mm_xor_si128( row4l, row2l );
344   row2h = _mm_xor_si128( row4h, row2h );
345   STOREU( &S->h[4], _mm_xor_si128( LOADU( &S->h[4] ), row2l ) );
346   STOREU( &S->h[6], _mm_xor_si128( LOADU( &S->h[6] ), row2h ) );
347   return 0;
348 }
349 
350 
blake2b_update(blake2b_state * S,const uint8_t * in,size_t inlen)351 int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen )
352 {
353   while( inlen > 0 )
354   {
355     uint32_t left = S->buflen;
356     uint32_t fill = 2 * BLAKE2B_BLOCKBYTES - left;
357 
358     if( inlen > fill )
359     {
360       memcpy( S->buf + left, in, fill ); // Fill buffer
361       S->buflen += fill;
362       blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES );
363       blake2b_compress( S, S->buf ); // Compress
364       memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES ); // Shift buffer left
365       S->buflen -= BLAKE2B_BLOCKBYTES;
366       in += fill;
367       inlen -= fill;
368     }
369     else // inlen <= fill
370     {
371       memcpy( S->buf + left, in, inlen );
372       S->buflen += ( uint32_t ) inlen; // Be lazy, do not compress
373       in += inlen;
374       inlen -= inlen;
375     }
376   }
377 
378   return 0;
379 }
380 
381 
blake2b_final(blake2b_state * S,uint8_t * out,size_t outlen)382 int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen )
383 {
384   if(S->outlen != outlen) return -1;
385 
386   if( S->buflen > BLAKE2B_BLOCKBYTES )
387   {
388     blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES );
389     blake2b_compress( S, S->buf );
390     S->buflen -= BLAKE2B_BLOCKBYTES;
391     memmove( S->buf, S->buf + BLAKE2B_BLOCKBYTES, S->buflen );
392   }
393 
394   blake2b_increment_counter( S, S->buflen );
395   blake2b_set_lastblock( S );
396   memset( S->buf + S->buflen, 0, 2 * BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */
397   blake2b_compress( S, S->buf );
398   memcpy( out, &S->h[0], outlen );
399   return 0;
400 }
401 
402 
blake2b(uint8_t * out,const void * in,const void * key,size_t outlen,size_t inlen,size_t keylen)403 int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen )
404 {
405   blake2b_state S[1];
406 
407   /* Verify parameters */
408   if ( NULL == in && inlen > 0 ) return -1;
409 
410   if ( NULL == out ) return -1;
411 
412   if( NULL == key && keylen > 0 ) return -1;
413 
414   if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
415 
416   if( keylen > BLAKE2B_KEYBYTES ) return -1;
417 
418   if( keylen )
419   {
420     if( blake2b_init_key( S, outlen, key, keylen ) < 0 ) return -1;
421   }
422   else
423   {
424     if( blake2b_init( S, outlen ) < 0 ) return -1;
425   }
426 
427   if( blake2b_update( S, ( uint8_t * )in, inlen ) < 0) return -1;
428   return blake2b_final( S, out, outlen );
429 }
430 
431 #if defined(SUPERCOP)
crypto_hash(unsigned char * out,unsigned char * in,unsigned long long inlen)432 int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen )
433 {
434   return blake2b( out, in, NULL, BLAKE2B_OUTBYTES, inlen, 0 );
435 }
436 #endif
437