xref: /freebsd/sys/contrib/libb2/blake2s.c (revision 0e33efe4)
10e33efe4SConrad Meyer /*
20e33efe4SConrad Meyer    BLAKE2 reference source code package - optimized C implementations
30e33efe4SConrad Meyer 
40e33efe4SConrad Meyer    Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
50e33efe4SConrad Meyer 
60e33efe4SConrad Meyer    To the extent possible under law, the author(s) have dedicated all copyright
70e33efe4SConrad Meyer    and related and neighboring rights to this software to the public domain
80e33efe4SConrad Meyer    worldwide. This software is distributed without any warranty.
90e33efe4SConrad Meyer 
100e33efe4SConrad Meyer    You should have received a copy of the CC0 Public Domain Dedication along with
110e33efe4SConrad Meyer    this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
120e33efe4SConrad Meyer */
130e33efe4SConrad Meyer 
140e33efe4SConrad Meyer #include <stdint.h>
150e33efe4SConrad Meyer #include <string.h>
160e33efe4SConrad Meyer #include <stdio.h>
170e33efe4SConrad Meyer 
180e33efe4SConrad Meyer #include "blake2.h"
190e33efe4SConrad Meyer #include "blake2-impl.h"
200e33efe4SConrad Meyer 
210e33efe4SConrad Meyer #include "blake2-config.h"
220e33efe4SConrad Meyer 
230e33efe4SConrad Meyer #if defined(_MSC_VER)
240e33efe4SConrad Meyer #include <intrin.h>
250e33efe4SConrad Meyer #endif
260e33efe4SConrad Meyer 
270e33efe4SConrad Meyer #if defined(HAVE_SSE2)
280e33efe4SConrad Meyer #include <emmintrin.h>
290e33efe4SConrad Meyer // MSVC only defines  _mm_set_epi64x for x86_64...
300e33efe4SConrad Meyer #if defined(_MSC_VER) && !defined(_M_X64)
_mm_set_epi64x(const uint64_t u1,const uint64_t u0)310e33efe4SConrad Meyer static inline __m128i _mm_set_epi64x( const uint64_t u1, const uint64_t u0 )
320e33efe4SConrad Meyer {
330e33efe4SConrad Meyer   return _mm_set_epi32( u1 >> 32, u1, u0 >> 32, u0 );
340e33efe4SConrad Meyer }
350e33efe4SConrad Meyer #endif
360e33efe4SConrad Meyer #endif
370e33efe4SConrad Meyer 
380e33efe4SConrad Meyer 
390e33efe4SConrad Meyer #if defined(HAVE_SSSE3)
400e33efe4SConrad Meyer #include <tmmintrin.h>
410e33efe4SConrad Meyer #endif
420e33efe4SConrad Meyer #if defined(HAVE_SSE4_1)
430e33efe4SConrad Meyer #include <smmintrin.h>
440e33efe4SConrad Meyer #endif
450e33efe4SConrad Meyer #if defined(HAVE_AVX)
460e33efe4SConrad Meyer #include <immintrin.h>
470e33efe4SConrad Meyer #endif
480e33efe4SConrad Meyer #if defined(HAVE_XOP) && !defined(_MSC_VER)
490e33efe4SConrad Meyer #include <x86intrin.h>
500e33efe4SConrad Meyer #endif
510e33efe4SConrad Meyer 
520e33efe4SConrad Meyer #include "blake2s-round.h"
530e33efe4SConrad Meyer 
540e33efe4SConrad Meyer static const uint32_t blake2s_IV[8] =
550e33efe4SConrad Meyer {
560e33efe4SConrad Meyer   0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
570e33efe4SConrad Meyer   0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
580e33efe4SConrad Meyer };
590e33efe4SConrad Meyer 
600e33efe4SConrad Meyer static const uint8_t blake2s_sigma[10][16] =
610e33efe4SConrad Meyer {
620e33efe4SConrad Meyer   {  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15 } ,
630e33efe4SConrad Meyer   { 14, 10,  4,  8,  9, 15, 13,  6,  1, 12,  0,  2, 11,  7,  5,  3 } ,
640e33efe4SConrad Meyer   { 11,  8, 12,  0,  5,  2, 15, 13, 10, 14,  3,  6,  7,  1,  9,  4 } ,
650e33efe4SConrad Meyer   {  7,  9,  3,  1, 13, 12, 11, 14,  2,  6,  5, 10,  4,  0, 15,  8 } ,
660e33efe4SConrad Meyer   {  9,  0,  5,  7,  2,  4, 10, 15, 14,  1, 11, 12,  6,  8,  3, 13 } ,
670e33efe4SConrad Meyer   {  2, 12,  6, 10,  0, 11,  8,  3,  4, 13,  7,  5, 15, 14,  1,  9 } ,
680e33efe4SConrad Meyer   { 12,  5,  1, 15, 14, 13,  4, 10,  0,  7,  6,  3,  9,  2,  8, 11 } ,
690e33efe4SConrad Meyer   { 13, 11,  7, 14, 12,  1,  3,  9,  5,  0, 15,  4,  8,  6,  2, 10 } ,
700e33efe4SConrad Meyer   {  6, 15, 14,  9, 11,  3,  0,  8, 12,  2, 13,  7,  1,  4, 10,  5 } ,
710e33efe4SConrad Meyer   { 10,  2,  8,  4,  7,  6,  1,  5, 15, 11,  9, 14,  3, 12, 13 , 0 } ,
720e33efe4SConrad Meyer };
730e33efe4SConrad Meyer 
740e33efe4SConrad Meyer 
750e33efe4SConrad Meyer /* Some helper functions, not necessarily useful */
blake2s_set_lastnode(blake2s_state * S)760e33efe4SConrad Meyer static inline int blake2s_set_lastnode( blake2s_state *S )
770e33efe4SConrad Meyer {
780e33efe4SConrad Meyer   S->f[1] = ~0U;
790e33efe4SConrad Meyer   return 0;
800e33efe4SConrad Meyer }
810e33efe4SConrad Meyer 
blake2s_clear_lastnode(blake2s_state * S)820e33efe4SConrad Meyer static inline int blake2s_clear_lastnode( blake2s_state *S )
830e33efe4SConrad Meyer {
840e33efe4SConrad Meyer   S->f[1] = 0U;
850e33efe4SConrad Meyer   return 0;
860e33efe4SConrad Meyer }
870e33efe4SConrad Meyer 
blake2s_set_lastblock(blake2s_state * S)880e33efe4SConrad Meyer static inline int blake2s_set_lastblock( blake2s_state *S )
890e33efe4SConrad Meyer {
900e33efe4SConrad Meyer   if( S->last_node ) blake2s_set_lastnode( S );
910e33efe4SConrad Meyer 
920e33efe4SConrad Meyer   S->f[0] = ~0U;
930e33efe4SConrad Meyer   return 0;
940e33efe4SConrad Meyer }
950e33efe4SConrad Meyer 
blake2s_clear_lastblock(blake2s_state * S)960e33efe4SConrad Meyer static inline int blake2s_clear_lastblock( blake2s_state *S )
970e33efe4SConrad Meyer {
980e33efe4SConrad Meyer   if( S->last_node ) blake2s_clear_lastnode( S );
990e33efe4SConrad Meyer 
1000e33efe4SConrad Meyer   S->f[0] = 0U;
1010e33efe4SConrad Meyer   return 0;
1020e33efe4SConrad Meyer }
1030e33efe4SConrad Meyer 
blake2s_increment_counter(blake2s_state * S,const uint32_t inc)1040e33efe4SConrad Meyer static inline int blake2s_increment_counter( blake2s_state *S, const uint32_t inc )
1050e33efe4SConrad Meyer {
1060e33efe4SConrad Meyer   uint64_t t = ( ( uint64_t )S->t[1] << 32 ) | S->t[0];
1070e33efe4SConrad Meyer   t += inc;
1080e33efe4SConrad Meyer   S->t[0] = ( uint32_t )( t >>  0 );
1090e33efe4SConrad Meyer   S->t[1] = ( uint32_t )( t >> 32 );
1100e33efe4SConrad Meyer   return 0;
1110e33efe4SConrad Meyer }
1120e33efe4SConrad Meyer 
1130e33efe4SConrad Meyer 
1140e33efe4SConrad Meyer // Parameter-related functions
blake2s_param_set_digest_length(blake2s_param * P,const uint8_t digest_length)1150e33efe4SConrad Meyer static inline int blake2s_param_set_digest_length( blake2s_param *P, const uint8_t digest_length )
1160e33efe4SConrad Meyer {
1170e33efe4SConrad Meyer   P->digest_length = digest_length;
1180e33efe4SConrad Meyer   return 0;
1190e33efe4SConrad Meyer }
1200e33efe4SConrad Meyer 
blake2s_param_set_fanout(blake2s_param * P,const uint8_t fanout)1210e33efe4SConrad Meyer static inline int blake2s_param_set_fanout( blake2s_param *P, const uint8_t fanout )
1220e33efe4SConrad Meyer {
1230e33efe4SConrad Meyer   P->fanout = fanout;
1240e33efe4SConrad Meyer   return 0;
1250e33efe4SConrad Meyer }
1260e33efe4SConrad Meyer 
blake2s_param_set_max_depth(blake2s_param * P,const uint8_t depth)1270e33efe4SConrad Meyer static inline int blake2s_param_set_max_depth( blake2s_param *P, const uint8_t depth )
1280e33efe4SConrad Meyer {
1290e33efe4SConrad Meyer   P->depth = depth;
1300e33efe4SConrad Meyer   return 0;
1310e33efe4SConrad Meyer }
1320e33efe4SConrad Meyer 
blake2s_param_set_leaf_length(blake2s_param * P,const uint32_t leaf_length)1330e33efe4SConrad Meyer static inline int blake2s_param_set_leaf_length( blake2s_param *P, const uint32_t leaf_length )
1340e33efe4SConrad Meyer {
1350e33efe4SConrad Meyer   P->leaf_length = leaf_length;
1360e33efe4SConrad Meyer   return 0;
1370e33efe4SConrad Meyer }
1380e33efe4SConrad Meyer 
blake2s_param_set_node_offset(blake2s_param * P,const uint64_t node_offset)1390e33efe4SConrad Meyer static inline int blake2s_param_set_node_offset( blake2s_param *P, const uint64_t node_offset )
1400e33efe4SConrad Meyer {
1410e33efe4SConrad Meyer   store48( P->node_offset, node_offset );
1420e33efe4SConrad Meyer   return 0;
1430e33efe4SConrad Meyer }
1440e33efe4SConrad Meyer 
blake2s_param_set_node_depth(blake2s_param * P,const uint8_t node_depth)1450e33efe4SConrad Meyer static inline int blake2s_param_set_node_depth( blake2s_param *P, const uint8_t node_depth )
1460e33efe4SConrad Meyer {
1470e33efe4SConrad Meyer   P->node_depth = node_depth;
1480e33efe4SConrad Meyer   return 0;
1490e33efe4SConrad Meyer }
1500e33efe4SConrad Meyer 
blake2s_param_set_inner_length(blake2s_param * P,const uint8_t inner_length)1510e33efe4SConrad Meyer static inline int blake2s_param_set_inner_length( blake2s_param *P, const uint8_t inner_length )
1520e33efe4SConrad Meyer {
1530e33efe4SConrad Meyer   P->inner_length = inner_length;
1540e33efe4SConrad Meyer   return 0;
1550e33efe4SConrad Meyer }
1560e33efe4SConrad Meyer 
blake2s_param_set_salt(blake2s_param * P,const uint8_t salt[BLAKE2S_SALTBYTES])1570e33efe4SConrad Meyer static inline int blake2s_param_set_salt( blake2s_param *P, const uint8_t salt[BLAKE2S_SALTBYTES] )
1580e33efe4SConrad Meyer {
1590e33efe4SConrad Meyer   memcpy( P->salt, salt, BLAKE2S_SALTBYTES );
1600e33efe4SConrad Meyer   return 0;
1610e33efe4SConrad Meyer }
1620e33efe4SConrad Meyer 
blake2s_param_set_personal(blake2s_param * P,const uint8_t personal[BLAKE2S_PERSONALBYTES])1630e33efe4SConrad Meyer static inline int blake2s_param_set_personal( blake2s_param *P, const uint8_t personal[BLAKE2S_PERSONALBYTES] )
1640e33efe4SConrad Meyer {
1650e33efe4SConrad Meyer   memcpy( P->personal, personal, BLAKE2S_PERSONALBYTES );
1660e33efe4SConrad Meyer   return 0;
1670e33efe4SConrad Meyer }
1680e33efe4SConrad Meyer 
blake2s_init0(blake2s_state * S)1690e33efe4SConrad Meyer static inline int blake2s_init0( blake2s_state *S )
1700e33efe4SConrad Meyer {
1710e33efe4SConrad Meyer   memset( S, 0, sizeof( blake2s_state ) );
1720e33efe4SConrad Meyer 
1730e33efe4SConrad Meyer   for( int i = 0; i < 8; ++i ) S->h[i] = blake2s_IV[i];
1740e33efe4SConrad Meyer 
1750e33efe4SConrad Meyer   return 0;
1760e33efe4SConrad Meyer }
1770e33efe4SConrad Meyer 
1780e33efe4SConrad Meyer #define blake2s_init BLAKE2_IMPL_NAME(blake2s_init)
1790e33efe4SConrad Meyer #define blake2s_init_param BLAKE2_IMPL_NAME(blake2s_init_param)
1800e33efe4SConrad Meyer #define blake2s_init_key BLAKE2_IMPL_NAME(blake2s_init_key)
1810e33efe4SConrad Meyer #define blake2s_update BLAKE2_IMPL_NAME(blake2s_update)
1820e33efe4SConrad Meyer #define blake2s_final BLAKE2_IMPL_NAME(blake2s_final)
1830e33efe4SConrad Meyer #define blake2s BLAKE2_IMPL_NAME(blake2s)
1840e33efe4SConrad Meyer 
1850e33efe4SConrad Meyer #if defined(__cplusplus)
1860e33efe4SConrad Meyer extern "C" {
1870e33efe4SConrad Meyer #endif
1880e33efe4SConrad Meyer   int blake2s_init( blake2s_state *S, size_t outlen );
1890e33efe4SConrad Meyer   int blake2s_init_param( blake2s_state *S, const blake2s_param *P );
1900e33efe4SConrad Meyer   int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen );
1910e33efe4SConrad Meyer   int blake2s_update( blake2s_state *S, const uint8_t *in, size_t inlen );
1920e33efe4SConrad Meyer   int blake2s_final( blake2s_state *S, uint8_t *out, size_t outlen );
1930e33efe4SConrad Meyer   int blake2s( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen );
1940e33efe4SConrad Meyer #if defined(__cplusplus)
1950e33efe4SConrad Meyer }
1960e33efe4SConrad Meyer #endif
1970e33efe4SConrad Meyer 
1980e33efe4SConrad Meyer 
1990e33efe4SConrad Meyer /* init2 xors IV with input parameter block */
blake2s_init_param(blake2s_state * S,const blake2s_param * P)2000e33efe4SConrad Meyer int blake2s_init_param( blake2s_state *S, const blake2s_param *P )
2010e33efe4SConrad Meyer {
2020e33efe4SConrad Meyer   uint8_t *p, *h, *v;
2030e33efe4SConrad Meyer   //blake2s_init0( S );
2040e33efe4SConrad Meyer   v = ( uint8_t * )( blake2s_IV );
2050e33efe4SConrad Meyer   h = ( uint8_t * )( S->h );
2060e33efe4SConrad Meyer   p = ( uint8_t * )( P );
2070e33efe4SConrad Meyer   /* IV XOR ParamBlock */
2080e33efe4SConrad Meyer   memset( S, 0, sizeof( blake2s_state ) );
2090e33efe4SConrad Meyer 
2100e33efe4SConrad Meyer   for( int i = 0; i < BLAKE2S_OUTBYTES; ++i ) h[i] = v[i] ^ p[i];
2110e33efe4SConrad Meyer 
2120e33efe4SConrad Meyer   S->outlen = P->digest_length;
2130e33efe4SConrad Meyer   return 0;
2140e33efe4SConrad Meyer }
2150e33efe4SConrad Meyer 
2160e33efe4SConrad Meyer 
2170e33efe4SConrad Meyer /* Some sort of default parameter block initialization, for sequential blake2s */
blake2s_init(blake2s_state * S,size_t outlen)2180e33efe4SConrad Meyer int blake2s_init( blake2s_state *S, size_t outlen )
2190e33efe4SConrad Meyer {
2200e33efe4SConrad Meyer   if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1;
2210e33efe4SConrad Meyer 
2220e33efe4SConrad Meyer   const blake2s_param P =
2230e33efe4SConrad Meyer   {
2240e33efe4SConrad Meyer     outlen,
2250e33efe4SConrad Meyer     0,
2260e33efe4SConrad Meyer     1,
2270e33efe4SConrad Meyer     1,
2280e33efe4SConrad Meyer     0,
2290e33efe4SConrad Meyer     {0},
2300e33efe4SConrad Meyer     0,
2310e33efe4SConrad Meyer     0,
2320e33efe4SConrad Meyer     {0},
2330e33efe4SConrad Meyer     {0}
2340e33efe4SConrad Meyer   };
2350e33efe4SConrad Meyer   return blake2s_init_param( S, &P );
2360e33efe4SConrad Meyer }
2370e33efe4SConrad Meyer 
2380e33efe4SConrad Meyer 
blake2s_init_key(blake2s_state * S,size_t outlen,const void * key,size_t keylen)2390e33efe4SConrad Meyer int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen )
2400e33efe4SConrad Meyer {
2410e33efe4SConrad Meyer   if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1;
2420e33efe4SConrad Meyer 
2430e33efe4SConrad Meyer   if ( ( !key ) || ( !keylen ) || keylen > BLAKE2S_KEYBYTES ) return -1;
2440e33efe4SConrad Meyer 
2450e33efe4SConrad Meyer   const blake2s_param P =
2460e33efe4SConrad Meyer   {
2470e33efe4SConrad Meyer     outlen,
2480e33efe4SConrad Meyer     keylen,
2490e33efe4SConrad Meyer     1,
2500e33efe4SConrad Meyer     1,
2510e33efe4SConrad Meyer     0,
2520e33efe4SConrad Meyer     {0},
2530e33efe4SConrad Meyer     0,
2540e33efe4SConrad Meyer     0,
2550e33efe4SConrad Meyer     {0},
2560e33efe4SConrad Meyer     {0}
2570e33efe4SConrad Meyer   };
2580e33efe4SConrad Meyer 
2590e33efe4SConrad Meyer   if( blake2s_init_param( S, &P ) < 0 )
2600e33efe4SConrad Meyer     return -1;
2610e33efe4SConrad Meyer 
2620e33efe4SConrad Meyer   {
2630e33efe4SConrad Meyer     uint8_t block[BLAKE2S_BLOCKBYTES];
2640e33efe4SConrad Meyer     memset( block, 0, BLAKE2S_BLOCKBYTES );
2650e33efe4SConrad Meyer     memcpy( block, key, keylen );
2660e33efe4SConrad Meyer     blake2s_update( S, block, BLAKE2S_BLOCKBYTES );
2670e33efe4SConrad Meyer     secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */
2680e33efe4SConrad Meyer   }
2690e33efe4SConrad Meyer   return 0;
2700e33efe4SConrad Meyer }
2710e33efe4SConrad Meyer 
2720e33efe4SConrad Meyer 
blake2s_compress(blake2s_state * S,const uint8_t block[BLAKE2S_BLOCKBYTES])2730e33efe4SConrad Meyer static inline int blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2S_BLOCKBYTES] )
2740e33efe4SConrad Meyer {
2750e33efe4SConrad Meyer   __m128i row1, row2, row3, row4;
2760e33efe4SConrad Meyer   __m128i buf1, buf2, buf3, buf4;
2770e33efe4SConrad Meyer #if defined(HAVE_SSE4_1)
2780e33efe4SConrad Meyer   __m128i t0, t1;
2790e33efe4SConrad Meyer #if !defined(HAVE_XOP)
2800e33efe4SConrad Meyer   __m128i t2;
2810e33efe4SConrad Meyer #endif
2820e33efe4SConrad Meyer #endif
2830e33efe4SConrad Meyer   __m128i ff0, ff1;
2840e33efe4SConrad Meyer #if defined(HAVE_SSSE3) && !defined(HAVE_XOP)
2850e33efe4SConrad Meyer   const __m128i r8 = _mm_set_epi8( 12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1 );
2860e33efe4SConrad Meyer   const __m128i r16 = _mm_set_epi8( 13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2 );
2870e33efe4SConrad Meyer #endif
2880e33efe4SConrad Meyer #if defined(HAVE_SSE4_1)
2890e33efe4SConrad Meyer   const __m128i m0 = LOADU( block +  00 );
2900e33efe4SConrad Meyer   const __m128i m1 = LOADU( block +  16 );
2910e33efe4SConrad Meyer   const __m128i m2 = LOADU( block +  32 );
2920e33efe4SConrad Meyer   const __m128i m3 = LOADU( block +  48 );
2930e33efe4SConrad Meyer #else
2940e33efe4SConrad Meyer   const uint32_t  m0 = ( ( uint32_t * )block )[ 0];
2950e33efe4SConrad Meyer   const uint32_t  m1 = ( ( uint32_t * )block )[ 1];
2960e33efe4SConrad Meyer   const uint32_t  m2 = ( ( uint32_t * )block )[ 2];
2970e33efe4SConrad Meyer   const uint32_t  m3 = ( ( uint32_t * )block )[ 3];
2980e33efe4SConrad Meyer   const uint32_t  m4 = ( ( uint32_t * )block )[ 4];
2990e33efe4SConrad Meyer   const uint32_t  m5 = ( ( uint32_t * )block )[ 5];
3000e33efe4SConrad Meyer   const uint32_t  m6 = ( ( uint32_t * )block )[ 6];
3010e33efe4SConrad Meyer   const uint32_t  m7 = ( ( uint32_t * )block )[ 7];
3020e33efe4SConrad Meyer   const uint32_t  m8 = ( ( uint32_t * )block )[ 8];
3030e33efe4SConrad Meyer   const uint32_t  m9 = ( ( uint32_t * )block )[ 9];
3040e33efe4SConrad Meyer   const uint32_t m10 = ( ( uint32_t * )block )[10];
3050e33efe4SConrad Meyer   const uint32_t m11 = ( ( uint32_t * )block )[11];
3060e33efe4SConrad Meyer   const uint32_t m12 = ( ( uint32_t * )block )[12];
3070e33efe4SConrad Meyer   const uint32_t m13 = ( ( uint32_t * )block )[13];
3080e33efe4SConrad Meyer   const uint32_t m14 = ( ( uint32_t * )block )[14];
3090e33efe4SConrad Meyer   const uint32_t m15 = ( ( uint32_t * )block )[15];
3100e33efe4SConrad Meyer #endif
3110e33efe4SConrad Meyer   row1 = ff0 = LOADU( &S->h[0] );
3120e33efe4SConrad Meyer   row2 = ff1 = LOADU( &S->h[4] );
3130e33efe4SConrad Meyer   row3 = _mm_setr_epi32( 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A );
3140e33efe4SConrad Meyer   row4 = _mm_xor_si128( _mm_setr_epi32( 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 ), LOADU( &S->t[0] ) );
3150e33efe4SConrad Meyer   ROUND( 0 );
3160e33efe4SConrad Meyer   ROUND( 1 );
3170e33efe4SConrad Meyer   ROUND( 2 );
3180e33efe4SConrad Meyer   ROUND( 3 );
3190e33efe4SConrad Meyer   ROUND( 4 );
3200e33efe4SConrad Meyer   ROUND( 5 );
3210e33efe4SConrad Meyer   ROUND( 6 );
3220e33efe4SConrad Meyer   ROUND( 7 );
3230e33efe4SConrad Meyer   ROUND( 8 );
3240e33efe4SConrad Meyer   ROUND( 9 );
3250e33efe4SConrad Meyer   STOREU( &S->h[0], _mm_xor_si128( ff0, _mm_xor_si128( row1, row3 ) ) );
3260e33efe4SConrad Meyer   STOREU( &S->h[4], _mm_xor_si128( ff1, _mm_xor_si128( row2, row4 ) ) );
3270e33efe4SConrad Meyer   return 0;
3280e33efe4SConrad Meyer }
3290e33efe4SConrad Meyer 
3300e33efe4SConrad Meyer 
blake2s_update(blake2s_state * S,const uint8_t * in,size_t inlen)3310e33efe4SConrad Meyer int blake2s_update( blake2s_state *S, const uint8_t *in, size_t inlen )
3320e33efe4SConrad Meyer {
3330e33efe4SConrad Meyer   while( inlen > 0 )
3340e33efe4SConrad Meyer   {
3350e33efe4SConrad Meyer     size_t left = S->buflen;
3360e33efe4SConrad Meyer     size_t fill = 2 * BLAKE2S_BLOCKBYTES - left;
3370e33efe4SConrad Meyer 
3380e33efe4SConrad Meyer     if( inlen > fill )
3390e33efe4SConrad Meyer     {
3400e33efe4SConrad Meyer       memcpy( S->buf + left, in, fill ); // Fill buffer
3410e33efe4SConrad Meyer       S->buflen += fill;
3420e33efe4SConrad Meyer       blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
3430e33efe4SConrad Meyer       blake2s_compress( S, S->buf ); // Compress
3440e33efe4SConrad Meyer       memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES ); // Shift buffer left
3450e33efe4SConrad Meyer       S->buflen -= BLAKE2S_BLOCKBYTES;
3460e33efe4SConrad Meyer       in += fill;
3470e33efe4SConrad Meyer       inlen -= fill;
3480e33efe4SConrad Meyer     }
3490e33efe4SConrad Meyer     else /* inlen <= fill */
3500e33efe4SConrad Meyer     {
3510e33efe4SConrad Meyer       memcpy( S->buf + left, in, inlen );
3520e33efe4SConrad Meyer       S->buflen += inlen; // Be lazy, do not compress
3530e33efe4SConrad Meyer       in += inlen;
3540e33efe4SConrad Meyer       inlen -= inlen;
3550e33efe4SConrad Meyer     }
3560e33efe4SConrad Meyer   }
3570e33efe4SConrad Meyer 
3580e33efe4SConrad Meyer   return 0;
3590e33efe4SConrad Meyer }
3600e33efe4SConrad Meyer 
3610e33efe4SConrad Meyer 
blake2s_final(blake2s_state * S,uint8_t * out,size_t outlen)3620e33efe4SConrad Meyer int blake2s_final( blake2s_state *S, uint8_t *out, size_t outlen )
3630e33efe4SConrad Meyer {
3640e33efe4SConrad Meyer   uint8_t buffer[BLAKE2S_OUTBYTES];
3650e33efe4SConrad Meyer 
3660e33efe4SConrad Meyer   if(outlen != S->outlen ) return -1;
3670e33efe4SConrad Meyer 
3680e33efe4SConrad Meyer   if( S->buflen > BLAKE2S_BLOCKBYTES )
3690e33efe4SConrad Meyer   {
3700e33efe4SConrad Meyer     blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
3710e33efe4SConrad Meyer     blake2s_compress( S, S->buf );
3720e33efe4SConrad Meyer     S->buflen -= BLAKE2S_BLOCKBYTES;
3730e33efe4SConrad Meyer     memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, S->buflen );
3740e33efe4SConrad Meyer   }
3750e33efe4SConrad Meyer 
3760e33efe4SConrad Meyer   blake2s_increment_counter( S, ( uint32_t )S->buflen );
3770e33efe4SConrad Meyer   blake2s_set_lastblock( S );
3780e33efe4SConrad Meyer   memset( S->buf + S->buflen, 0, 2 * BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */
3790e33efe4SConrad Meyer   blake2s_compress( S, S->buf );
3800e33efe4SConrad Meyer 
3810e33efe4SConrad Meyer   for( int i = 0; i < 8; ++i ) /* Output full hash to temp buffer */
3820e33efe4SConrad Meyer     store32( buffer + sizeof( S->h[i] ) * i, S->h[i] );
3830e33efe4SConrad Meyer 
3840e33efe4SConrad Meyer   memcpy( out, buffer, outlen );
3850e33efe4SConrad Meyer   return 0;
3860e33efe4SConrad Meyer }
3870e33efe4SConrad Meyer 
blake2s(uint8_t * out,const void * in,const void * key,size_t outlen,size_t inlen,size_t keylen)3880e33efe4SConrad Meyer int blake2s( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen )
3890e33efe4SConrad Meyer {
3900e33efe4SConrad Meyer   blake2s_state S[1];
3910e33efe4SConrad Meyer 
3920e33efe4SConrad Meyer   /* Verify parameters */
3930e33efe4SConrad Meyer   if ( NULL == in && inlen > 0 ) return -1;
3940e33efe4SConrad Meyer 
3950e33efe4SConrad Meyer   if ( NULL == out ) return -1;
3960e33efe4SConrad Meyer 
3970e33efe4SConrad Meyer   if ( NULL == key && keylen > 0) return -1;
3980e33efe4SConrad Meyer 
3990e33efe4SConrad Meyer   if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1;
4000e33efe4SConrad Meyer 
4010e33efe4SConrad Meyer   if( keylen > BLAKE2S_KEYBYTES ) return -1;
4020e33efe4SConrad Meyer 
4030e33efe4SConrad Meyer   if( keylen > 0 )
4040e33efe4SConrad Meyer   {
4050e33efe4SConrad Meyer     if( blake2s_init_key( S, outlen, key, keylen ) < 0 ) return -1;
4060e33efe4SConrad Meyer   }
4070e33efe4SConrad Meyer   else
4080e33efe4SConrad Meyer   {
4090e33efe4SConrad Meyer     if( blake2s_init( S, outlen ) < 0 ) return -1;
4100e33efe4SConrad Meyer   }
4110e33efe4SConrad Meyer 
4120e33efe4SConrad Meyer   if( blake2s_update( S, ( uint8_t * )in, inlen ) < 0) return -1;
4130e33efe4SConrad Meyer   return blake2s_final( S, out, outlen );
4140e33efe4SConrad Meyer }
4150e33efe4SConrad Meyer 
4160e33efe4SConrad Meyer #if defined(SUPERCOP)
crypto_hash(unsigned char * out,unsigned char * in,unsigned long long inlen)4170e33efe4SConrad Meyer int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen )
4180e33efe4SConrad Meyer {
4190e33efe4SConrad Meyer   return blake2s( out, in, NULL, BLAKE2S_OUTBYTES, (size_t)inlen, 0 );
4200e33efe4SConrad Meyer }
4210e33efe4SConrad Meyer #endif
4220e33efe4SConrad Meyer 
423