10e33efe4SConrad Meyer /*
20e33efe4SConrad Meyer BLAKE2 reference source code package - optimized C implementations
30e33efe4SConrad Meyer
40e33efe4SConrad Meyer Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
50e33efe4SConrad Meyer
60e33efe4SConrad Meyer To the extent possible under law, the author(s) have dedicated all copyright
70e33efe4SConrad Meyer and related and neighboring rights to this software to the public domain
80e33efe4SConrad Meyer worldwide. This software is distributed without any warranty.
90e33efe4SConrad Meyer
100e33efe4SConrad Meyer You should have received a copy of the CC0 Public Domain Dedication along with
110e33efe4SConrad Meyer this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
120e33efe4SConrad Meyer */
130e33efe4SConrad Meyer
140e33efe4SConrad Meyer #include <stdint.h>
150e33efe4SConrad Meyer #include <string.h>
160e33efe4SConrad Meyer #include <stdio.h>
170e33efe4SConrad Meyer
180e33efe4SConrad Meyer #include "blake2.h"
190e33efe4SConrad Meyer #include "blake2-impl.h"
200e33efe4SConrad Meyer
210e33efe4SConrad Meyer #include "blake2-config.h"
220e33efe4SConrad Meyer
230e33efe4SConrad Meyer #if defined(_MSC_VER)
240e33efe4SConrad Meyer #include <intrin.h>
250e33efe4SConrad Meyer #endif
260e33efe4SConrad Meyer
270e33efe4SConrad Meyer #if defined(HAVE_SSE2)
280e33efe4SConrad Meyer #include <emmintrin.h>
290e33efe4SConrad Meyer // MSVC only defines _mm_set_epi64x for x86_64...
300e33efe4SConrad Meyer #if defined(_MSC_VER) && !defined(_M_X64)
_mm_set_epi64x(const uint64_t u1,const uint64_t u0)310e33efe4SConrad Meyer static inline __m128i _mm_set_epi64x( const uint64_t u1, const uint64_t u0 )
320e33efe4SConrad Meyer {
330e33efe4SConrad Meyer return _mm_set_epi32( u1 >> 32, u1, u0 >> 32, u0 );
340e33efe4SConrad Meyer }
350e33efe4SConrad Meyer #endif
360e33efe4SConrad Meyer #endif
370e33efe4SConrad Meyer
380e33efe4SConrad Meyer
390e33efe4SConrad Meyer #if defined(HAVE_SSSE3)
400e33efe4SConrad Meyer #include <tmmintrin.h>
410e33efe4SConrad Meyer #endif
420e33efe4SConrad Meyer #if defined(HAVE_SSE4_1)
430e33efe4SConrad Meyer #include <smmintrin.h>
440e33efe4SConrad Meyer #endif
450e33efe4SConrad Meyer #if defined(HAVE_AVX)
460e33efe4SConrad Meyer #include <immintrin.h>
470e33efe4SConrad Meyer #endif
480e33efe4SConrad Meyer #if defined(HAVE_XOP) && !defined(_MSC_VER)
490e33efe4SConrad Meyer #include <x86intrin.h>
500e33efe4SConrad Meyer #endif
510e33efe4SConrad Meyer
520e33efe4SConrad Meyer #include "blake2s-round.h"
530e33efe4SConrad Meyer
540e33efe4SConrad Meyer static const uint32_t blake2s_IV[8] =
550e33efe4SConrad Meyer {
560e33efe4SConrad Meyer 0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
570e33efe4SConrad Meyer 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
580e33efe4SConrad Meyer };
590e33efe4SConrad Meyer
600e33efe4SConrad Meyer static const uint8_t blake2s_sigma[10][16] =
610e33efe4SConrad Meyer {
620e33efe4SConrad Meyer { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
630e33efe4SConrad Meyer { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
640e33efe4SConrad Meyer { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
650e33efe4SConrad Meyer { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
660e33efe4SConrad Meyer { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
670e33efe4SConrad Meyer { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
680e33efe4SConrad Meyer { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
690e33efe4SConrad Meyer { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
700e33efe4SConrad Meyer { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
710e33efe4SConrad Meyer { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
720e33efe4SConrad Meyer };
730e33efe4SConrad Meyer
740e33efe4SConrad Meyer
750e33efe4SConrad Meyer /* Some helper functions, not necessarily useful */
blake2s_set_lastnode(blake2s_state * S)760e33efe4SConrad Meyer static inline int blake2s_set_lastnode( blake2s_state *S )
770e33efe4SConrad Meyer {
780e33efe4SConrad Meyer S->f[1] = ~0U;
790e33efe4SConrad Meyer return 0;
800e33efe4SConrad Meyer }
810e33efe4SConrad Meyer
blake2s_clear_lastnode(blake2s_state * S)820e33efe4SConrad Meyer static inline int blake2s_clear_lastnode( blake2s_state *S )
830e33efe4SConrad Meyer {
840e33efe4SConrad Meyer S->f[1] = 0U;
850e33efe4SConrad Meyer return 0;
860e33efe4SConrad Meyer }
870e33efe4SConrad Meyer
blake2s_set_lastblock(blake2s_state * S)880e33efe4SConrad Meyer static inline int blake2s_set_lastblock( blake2s_state *S )
890e33efe4SConrad Meyer {
900e33efe4SConrad Meyer if( S->last_node ) blake2s_set_lastnode( S );
910e33efe4SConrad Meyer
920e33efe4SConrad Meyer S->f[0] = ~0U;
930e33efe4SConrad Meyer return 0;
940e33efe4SConrad Meyer }
950e33efe4SConrad Meyer
blake2s_clear_lastblock(blake2s_state * S)960e33efe4SConrad Meyer static inline int blake2s_clear_lastblock( blake2s_state *S )
970e33efe4SConrad Meyer {
980e33efe4SConrad Meyer if( S->last_node ) blake2s_clear_lastnode( S );
990e33efe4SConrad Meyer
1000e33efe4SConrad Meyer S->f[0] = 0U;
1010e33efe4SConrad Meyer return 0;
1020e33efe4SConrad Meyer }
1030e33efe4SConrad Meyer
blake2s_increment_counter(blake2s_state * S,const uint32_t inc)1040e33efe4SConrad Meyer static inline int blake2s_increment_counter( blake2s_state *S, const uint32_t inc )
1050e33efe4SConrad Meyer {
1060e33efe4SConrad Meyer uint64_t t = ( ( uint64_t )S->t[1] << 32 ) | S->t[0];
1070e33efe4SConrad Meyer t += inc;
1080e33efe4SConrad Meyer S->t[0] = ( uint32_t )( t >> 0 );
1090e33efe4SConrad Meyer S->t[1] = ( uint32_t )( t >> 32 );
1100e33efe4SConrad Meyer return 0;
1110e33efe4SConrad Meyer }
1120e33efe4SConrad Meyer
1130e33efe4SConrad Meyer
1140e33efe4SConrad Meyer // Parameter-related functions
blake2s_param_set_digest_length(blake2s_param * P,const uint8_t digest_length)1150e33efe4SConrad Meyer static inline int blake2s_param_set_digest_length( blake2s_param *P, const uint8_t digest_length )
1160e33efe4SConrad Meyer {
1170e33efe4SConrad Meyer P->digest_length = digest_length;
1180e33efe4SConrad Meyer return 0;
1190e33efe4SConrad Meyer }
1200e33efe4SConrad Meyer
blake2s_param_set_fanout(blake2s_param * P,const uint8_t fanout)1210e33efe4SConrad Meyer static inline int blake2s_param_set_fanout( blake2s_param *P, const uint8_t fanout )
1220e33efe4SConrad Meyer {
1230e33efe4SConrad Meyer P->fanout = fanout;
1240e33efe4SConrad Meyer return 0;
1250e33efe4SConrad Meyer }
1260e33efe4SConrad Meyer
blake2s_param_set_max_depth(blake2s_param * P,const uint8_t depth)1270e33efe4SConrad Meyer static inline int blake2s_param_set_max_depth( blake2s_param *P, const uint8_t depth )
1280e33efe4SConrad Meyer {
1290e33efe4SConrad Meyer P->depth = depth;
1300e33efe4SConrad Meyer return 0;
1310e33efe4SConrad Meyer }
1320e33efe4SConrad Meyer
blake2s_param_set_leaf_length(blake2s_param * P,const uint32_t leaf_length)1330e33efe4SConrad Meyer static inline int blake2s_param_set_leaf_length( blake2s_param *P, const uint32_t leaf_length )
1340e33efe4SConrad Meyer {
1350e33efe4SConrad Meyer P->leaf_length = leaf_length;
1360e33efe4SConrad Meyer return 0;
1370e33efe4SConrad Meyer }
1380e33efe4SConrad Meyer
blake2s_param_set_node_offset(blake2s_param * P,const uint64_t node_offset)1390e33efe4SConrad Meyer static inline int blake2s_param_set_node_offset( blake2s_param *P, const uint64_t node_offset )
1400e33efe4SConrad Meyer {
1410e33efe4SConrad Meyer store48( P->node_offset, node_offset );
1420e33efe4SConrad Meyer return 0;
1430e33efe4SConrad Meyer }
1440e33efe4SConrad Meyer
blake2s_param_set_node_depth(blake2s_param * P,const uint8_t node_depth)1450e33efe4SConrad Meyer static inline int blake2s_param_set_node_depth( blake2s_param *P, const uint8_t node_depth )
1460e33efe4SConrad Meyer {
1470e33efe4SConrad Meyer P->node_depth = node_depth;
1480e33efe4SConrad Meyer return 0;
1490e33efe4SConrad Meyer }
1500e33efe4SConrad Meyer
blake2s_param_set_inner_length(blake2s_param * P,const uint8_t inner_length)1510e33efe4SConrad Meyer static inline int blake2s_param_set_inner_length( blake2s_param *P, const uint8_t inner_length )
1520e33efe4SConrad Meyer {
1530e33efe4SConrad Meyer P->inner_length = inner_length;
1540e33efe4SConrad Meyer return 0;
1550e33efe4SConrad Meyer }
1560e33efe4SConrad Meyer
blake2s_param_set_salt(blake2s_param * P,const uint8_t salt[BLAKE2S_SALTBYTES])1570e33efe4SConrad Meyer static inline int blake2s_param_set_salt( blake2s_param *P, const uint8_t salt[BLAKE2S_SALTBYTES] )
1580e33efe4SConrad Meyer {
1590e33efe4SConrad Meyer memcpy( P->salt, salt, BLAKE2S_SALTBYTES );
1600e33efe4SConrad Meyer return 0;
1610e33efe4SConrad Meyer }
1620e33efe4SConrad Meyer
blake2s_param_set_personal(blake2s_param * P,const uint8_t personal[BLAKE2S_PERSONALBYTES])1630e33efe4SConrad Meyer static inline int blake2s_param_set_personal( blake2s_param *P, const uint8_t personal[BLAKE2S_PERSONALBYTES] )
1640e33efe4SConrad Meyer {
1650e33efe4SConrad Meyer memcpy( P->personal, personal, BLAKE2S_PERSONALBYTES );
1660e33efe4SConrad Meyer return 0;
1670e33efe4SConrad Meyer }
1680e33efe4SConrad Meyer
blake2s_init0(blake2s_state * S)1690e33efe4SConrad Meyer static inline int blake2s_init0( blake2s_state *S )
1700e33efe4SConrad Meyer {
1710e33efe4SConrad Meyer memset( S, 0, sizeof( blake2s_state ) );
1720e33efe4SConrad Meyer
1730e33efe4SConrad Meyer for( int i = 0; i < 8; ++i ) S->h[i] = blake2s_IV[i];
1740e33efe4SConrad Meyer
1750e33efe4SConrad Meyer return 0;
1760e33efe4SConrad Meyer }
1770e33efe4SConrad Meyer
1780e33efe4SConrad Meyer #define blake2s_init BLAKE2_IMPL_NAME(blake2s_init)
1790e33efe4SConrad Meyer #define blake2s_init_param BLAKE2_IMPL_NAME(blake2s_init_param)
1800e33efe4SConrad Meyer #define blake2s_init_key BLAKE2_IMPL_NAME(blake2s_init_key)
1810e33efe4SConrad Meyer #define blake2s_update BLAKE2_IMPL_NAME(blake2s_update)
1820e33efe4SConrad Meyer #define blake2s_final BLAKE2_IMPL_NAME(blake2s_final)
1830e33efe4SConrad Meyer #define blake2s BLAKE2_IMPL_NAME(blake2s)
1840e33efe4SConrad Meyer
1850e33efe4SConrad Meyer #if defined(__cplusplus)
1860e33efe4SConrad Meyer extern "C" {
1870e33efe4SConrad Meyer #endif
1880e33efe4SConrad Meyer int blake2s_init( blake2s_state *S, size_t outlen );
1890e33efe4SConrad Meyer int blake2s_init_param( blake2s_state *S, const blake2s_param *P );
1900e33efe4SConrad Meyer int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen );
1910e33efe4SConrad Meyer int blake2s_update( blake2s_state *S, const uint8_t *in, size_t inlen );
1920e33efe4SConrad Meyer int blake2s_final( blake2s_state *S, uint8_t *out, size_t outlen );
1930e33efe4SConrad Meyer int blake2s( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen );
1940e33efe4SConrad Meyer #if defined(__cplusplus)
1950e33efe4SConrad Meyer }
1960e33efe4SConrad Meyer #endif
1970e33efe4SConrad Meyer
1980e33efe4SConrad Meyer
1990e33efe4SConrad Meyer /* init2 xors IV with input parameter block */
blake2s_init_param(blake2s_state * S,const blake2s_param * P)2000e33efe4SConrad Meyer int blake2s_init_param( blake2s_state *S, const blake2s_param *P )
2010e33efe4SConrad Meyer {
2020e33efe4SConrad Meyer uint8_t *p, *h, *v;
2030e33efe4SConrad Meyer //blake2s_init0( S );
2040e33efe4SConrad Meyer v = ( uint8_t * )( blake2s_IV );
2050e33efe4SConrad Meyer h = ( uint8_t * )( S->h );
2060e33efe4SConrad Meyer p = ( uint8_t * )( P );
2070e33efe4SConrad Meyer /* IV XOR ParamBlock */
2080e33efe4SConrad Meyer memset( S, 0, sizeof( blake2s_state ) );
2090e33efe4SConrad Meyer
2100e33efe4SConrad Meyer for( int i = 0; i < BLAKE2S_OUTBYTES; ++i ) h[i] = v[i] ^ p[i];
2110e33efe4SConrad Meyer
2120e33efe4SConrad Meyer S->outlen = P->digest_length;
2130e33efe4SConrad Meyer return 0;
2140e33efe4SConrad Meyer }
2150e33efe4SConrad Meyer
2160e33efe4SConrad Meyer
2170e33efe4SConrad Meyer /* Some sort of default parameter block initialization, for sequential blake2s */
blake2s_init(blake2s_state * S,size_t outlen)2180e33efe4SConrad Meyer int blake2s_init( blake2s_state *S, size_t outlen )
2190e33efe4SConrad Meyer {
2200e33efe4SConrad Meyer if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1;
2210e33efe4SConrad Meyer
2220e33efe4SConrad Meyer const blake2s_param P =
2230e33efe4SConrad Meyer {
2240e33efe4SConrad Meyer outlen,
2250e33efe4SConrad Meyer 0,
2260e33efe4SConrad Meyer 1,
2270e33efe4SConrad Meyer 1,
2280e33efe4SConrad Meyer 0,
2290e33efe4SConrad Meyer {0},
2300e33efe4SConrad Meyer 0,
2310e33efe4SConrad Meyer 0,
2320e33efe4SConrad Meyer {0},
2330e33efe4SConrad Meyer {0}
2340e33efe4SConrad Meyer };
2350e33efe4SConrad Meyer return blake2s_init_param( S, &P );
2360e33efe4SConrad Meyer }
2370e33efe4SConrad Meyer
2380e33efe4SConrad Meyer
blake2s_init_key(blake2s_state * S,size_t outlen,const void * key,size_t keylen)2390e33efe4SConrad Meyer int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen )
2400e33efe4SConrad Meyer {
2410e33efe4SConrad Meyer if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1;
2420e33efe4SConrad Meyer
2430e33efe4SConrad Meyer if ( ( !key ) || ( !keylen ) || keylen > BLAKE2S_KEYBYTES ) return -1;
2440e33efe4SConrad Meyer
2450e33efe4SConrad Meyer const blake2s_param P =
2460e33efe4SConrad Meyer {
2470e33efe4SConrad Meyer outlen,
2480e33efe4SConrad Meyer keylen,
2490e33efe4SConrad Meyer 1,
2500e33efe4SConrad Meyer 1,
2510e33efe4SConrad Meyer 0,
2520e33efe4SConrad Meyer {0},
2530e33efe4SConrad Meyer 0,
2540e33efe4SConrad Meyer 0,
2550e33efe4SConrad Meyer {0},
2560e33efe4SConrad Meyer {0}
2570e33efe4SConrad Meyer };
2580e33efe4SConrad Meyer
2590e33efe4SConrad Meyer if( blake2s_init_param( S, &P ) < 0 )
2600e33efe4SConrad Meyer return -1;
2610e33efe4SConrad Meyer
2620e33efe4SConrad Meyer {
2630e33efe4SConrad Meyer uint8_t block[BLAKE2S_BLOCKBYTES];
2640e33efe4SConrad Meyer memset( block, 0, BLAKE2S_BLOCKBYTES );
2650e33efe4SConrad Meyer memcpy( block, key, keylen );
2660e33efe4SConrad Meyer blake2s_update( S, block, BLAKE2S_BLOCKBYTES );
2670e33efe4SConrad Meyer secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */
2680e33efe4SConrad Meyer }
2690e33efe4SConrad Meyer return 0;
2700e33efe4SConrad Meyer }
2710e33efe4SConrad Meyer
2720e33efe4SConrad Meyer
blake2s_compress(blake2s_state * S,const uint8_t block[BLAKE2S_BLOCKBYTES])2730e33efe4SConrad Meyer static inline int blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2S_BLOCKBYTES] )
2740e33efe4SConrad Meyer {
2750e33efe4SConrad Meyer __m128i row1, row2, row3, row4;
2760e33efe4SConrad Meyer __m128i buf1, buf2, buf3, buf4;
2770e33efe4SConrad Meyer #if defined(HAVE_SSE4_1)
2780e33efe4SConrad Meyer __m128i t0, t1;
2790e33efe4SConrad Meyer #if !defined(HAVE_XOP)
2800e33efe4SConrad Meyer __m128i t2;
2810e33efe4SConrad Meyer #endif
2820e33efe4SConrad Meyer #endif
2830e33efe4SConrad Meyer __m128i ff0, ff1;
2840e33efe4SConrad Meyer #if defined(HAVE_SSSE3) && !defined(HAVE_XOP)
2850e33efe4SConrad Meyer const __m128i r8 = _mm_set_epi8( 12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1 );
2860e33efe4SConrad Meyer const __m128i r16 = _mm_set_epi8( 13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2 );
2870e33efe4SConrad Meyer #endif
2880e33efe4SConrad Meyer #if defined(HAVE_SSE4_1)
2890e33efe4SConrad Meyer const __m128i m0 = LOADU( block + 00 );
2900e33efe4SConrad Meyer const __m128i m1 = LOADU( block + 16 );
2910e33efe4SConrad Meyer const __m128i m2 = LOADU( block + 32 );
2920e33efe4SConrad Meyer const __m128i m3 = LOADU( block + 48 );
2930e33efe4SConrad Meyer #else
2940e33efe4SConrad Meyer const uint32_t m0 = ( ( uint32_t * )block )[ 0];
2950e33efe4SConrad Meyer const uint32_t m1 = ( ( uint32_t * )block )[ 1];
2960e33efe4SConrad Meyer const uint32_t m2 = ( ( uint32_t * )block )[ 2];
2970e33efe4SConrad Meyer const uint32_t m3 = ( ( uint32_t * )block )[ 3];
2980e33efe4SConrad Meyer const uint32_t m4 = ( ( uint32_t * )block )[ 4];
2990e33efe4SConrad Meyer const uint32_t m5 = ( ( uint32_t * )block )[ 5];
3000e33efe4SConrad Meyer const uint32_t m6 = ( ( uint32_t * )block )[ 6];
3010e33efe4SConrad Meyer const uint32_t m7 = ( ( uint32_t * )block )[ 7];
3020e33efe4SConrad Meyer const uint32_t m8 = ( ( uint32_t * )block )[ 8];
3030e33efe4SConrad Meyer const uint32_t m9 = ( ( uint32_t * )block )[ 9];
3040e33efe4SConrad Meyer const uint32_t m10 = ( ( uint32_t * )block )[10];
3050e33efe4SConrad Meyer const uint32_t m11 = ( ( uint32_t * )block )[11];
3060e33efe4SConrad Meyer const uint32_t m12 = ( ( uint32_t * )block )[12];
3070e33efe4SConrad Meyer const uint32_t m13 = ( ( uint32_t * )block )[13];
3080e33efe4SConrad Meyer const uint32_t m14 = ( ( uint32_t * )block )[14];
3090e33efe4SConrad Meyer const uint32_t m15 = ( ( uint32_t * )block )[15];
3100e33efe4SConrad Meyer #endif
3110e33efe4SConrad Meyer row1 = ff0 = LOADU( &S->h[0] );
3120e33efe4SConrad Meyer row2 = ff1 = LOADU( &S->h[4] );
3130e33efe4SConrad Meyer row3 = _mm_setr_epi32( 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A );
3140e33efe4SConrad Meyer row4 = _mm_xor_si128( _mm_setr_epi32( 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 ), LOADU( &S->t[0] ) );
3150e33efe4SConrad Meyer ROUND( 0 );
3160e33efe4SConrad Meyer ROUND( 1 );
3170e33efe4SConrad Meyer ROUND( 2 );
3180e33efe4SConrad Meyer ROUND( 3 );
3190e33efe4SConrad Meyer ROUND( 4 );
3200e33efe4SConrad Meyer ROUND( 5 );
3210e33efe4SConrad Meyer ROUND( 6 );
3220e33efe4SConrad Meyer ROUND( 7 );
3230e33efe4SConrad Meyer ROUND( 8 );
3240e33efe4SConrad Meyer ROUND( 9 );
3250e33efe4SConrad Meyer STOREU( &S->h[0], _mm_xor_si128( ff0, _mm_xor_si128( row1, row3 ) ) );
3260e33efe4SConrad Meyer STOREU( &S->h[4], _mm_xor_si128( ff1, _mm_xor_si128( row2, row4 ) ) );
3270e33efe4SConrad Meyer return 0;
3280e33efe4SConrad Meyer }
3290e33efe4SConrad Meyer
3300e33efe4SConrad Meyer
blake2s_update(blake2s_state * S,const uint8_t * in,size_t inlen)3310e33efe4SConrad Meyer int blake2s_update( blake2s_state *S, const uint8_t *in, size_t inlen )
3320e33efe4SConrad Meyer {
3330e33efe4SConrad Meyer while( inlen > 0 )
3340e33efe4SConrad Meyer {
3350e33efe4SConrad Meyer size_t left = S->buflen;
3360e33efe4SConrad Meyer size_t fill = 2 * BLAKE2S_BLOCKBYTES - left;
3370e33efe4SConrad Meyer
3380e33efe4SConrad Meyer if( inlen > fill )
3390e33efe4SConrad Meyer {
3400e33efe4SConrad Meyer memcpy( S->buf + left, in, fill ); // Fill buffer
3410e33efe4SConrad Meyer S->buflen += fill;
3420e33efe4SConrad Meyer blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
3430e33efe4SConrad Meyer blake2s_compress( S, S->buf ); // Compress
3440e33efe4SConrad Meyer memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES ); // Shift buffer left
3450e33efe4SConrad Meyer S->buflen -= BLAKE2S_BLOCKBYTES;
3460e33efe4SConrad Meyer in += fill;
3470e33efe4SConrad Meyer inlen -= fill;
3480e33efe4SConrad Meyer }
3490e33efe4SConrad Meyer else /* inlen <= fill */
3500e33efe4SConrad Meyer {
3510e33efe4SConrad Meyer memcpy( S->buf + left, in, inlen );
3520e33efe4SConrad Meyer S->buflen += inlen; // Be lazy, do not compress
3530e33efe4SConrad Meyer in += inlen;
3540e33efe4SConrad Meyer inlen -= inlen;
3550e33efe4SConrad Meyer }
3560e33efe4SConrad Meyer }
3570e33efe4SConrad Meyer
3580e33efe4SConrad Meyer return 0;
3590e33efe4SConrad Meyer }
3600e33efe4SConrad Meyer
3610e33efe4SConrad Meyer
blake2s_final(blake2s_state * S,uint8_t * out,size_t outlen)3620e33efe4SConrad Meyer int blake2s_final( blake2s_state *S, uint8_t *out, size_t outlen )
3630e33efe4SConrad Meyer {
3640e33efe4SConrad Meyer uint8_t buffer[BLAKE2S_OUTBYTES];
3650e33efe4SConrad Meyer
3660e33efe4SConrad Meyer if(outlen != S->outlen ) return -1;
3670e33efe4SConrad Meyer
3680e33efe4SConrad Meyer if( S->buflen > BLAKE2S_BLOCKBYTES )
3690e33efe4SConrad Meyer {
3700e33efe4SConrad Meyer blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
3710e33efe4SConrad Meyer blake2s_compress( S, S->buf );
3720e33efe4SConrad Meyer S->buflen -= BLAKE2S_BLOCKBYTES;
3730e33efe4SConrad Meyer memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, S->buflen );
3740e33efe4SConrad Meyer }
3750e33efe4SConrad Meyer
3760e33efe4SConrad Meyer blake2s_increment_counter( S, ( uint32_t )S->buflen );
3770e33efe4SConrad Meyer blake2s_set_lastblock( S );
3780e33efe4SConrad Meyer memset( S->buf + S->buflen, 0, 2 * BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */
3790e33efe4SConrad Meyer blake2s_compress( S, S->buf );
3800e33efe4SConrad Meyer
3810e33efe4SConrad Meyer for( int i = 0; i < 8; ++i ) /* Output full hash to temp buffer */
3820e33efe4SConrad Meyer store32( buffer + sizeof( S->h[i] ) * i, S->h[i] );
3830e33efe4SConrad Meyer
3840e33efe4SConrad Meyer memcpy( out, buffer, outlen );
3850e33efe4SConrad Meyer return 0;
3860e33efe4SConrad Meyer }
3870e33efe4SConrad Meyer
blake2s(uint8_t * out,const void * in,const void * key,size_t outlen,size_t inlen,size_t keylen)3880e33efe4SConrad Meyer int blake2s( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen )
3890e33efe4SConrad Meyer {
3900e33efe4SConrad Meyer blake2s_state S[1];
3910e33efe4SConrad Meyer
3920e33efe4SConrad Meyer /* Verify parameters */
3930e33efe4SConrad Meyer if ( NULL == in && inlen > 0 ) return -1;
3940e33efe4SConrad Meyer
3950e33efe4SConrad Meyer if ( NULL == out ) return -1;
3960e33efe4SConrad Meyer
3970e33efe4SConrad Meyer if ( NULL == key && keylen > 0) return -1;
3980e33efe4SConrad Meyer
3990e33efe4SConrad Meyer if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1;
4000e33efe4SConrad Meyer
4010e33efe4SConrad Meyer if( keylen > BLAKE2S_KEYBYTES ) return -1;
4020e33efe4SConrad Meyer
4030e33efe4SConrad Meyer if( keylen > 0 )
4040e33efe4SConrad Meyer {
4050e33efe4SConrad Meyer if( blake2s_init_key( S, outlen, key, keylen ) < 0 ) return -1;
4060e33efe4SConrad Meyer }
4070e33efe4SConrad Meyer else
4080e33efe4SConrad Meyer {
4090e33efe4SConrad Meyer if( blake2s_init( S, outlen ) < 0 ) return -1;
4100e33efe4SConrad Meyer }
4110e33efe4SConrad Meyer
4120e33efe4SConrad Meyer if( blake2s_update( S, ( uint8_t * )in, inlen ) < 0) return -1;
4130e33efe4SConrad Meyer return blake2s_final( S, out, outlen );
4140e33efe4SConrad Meyer }
4150e33efe4SConrad Meyer
4160e33efe4SConrad Meyer #if defined(SUPERCOP)
crypto_hash(unsigned char * out,unsigned char * in,unsigned long long inlen)4170e33efe4SConrad Meyer int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen )
4180e33efe4SConrad Meyer {
4190e33efe4SConrad Meyer return blake2s( out, in, NULL, BLAKE2S_OUTBYTES, (size_t)inlen, 0 );
4200e33efe4SConrad Meyer }
4210e33efe4SConrad Meyer #endif
4220e33efe4SConrad Meyer
423