1 // Based on public domain code written in 2012 by Samuel Neves
2 
3 #include "rar.hpp"
4 
5 #ifdef USE_SSE
6 #include "blake2s_sse.cpp"
7 #endif
8 
9 static void blake2s_init_param( blake2s_state *S, uint32 node_offset, uint32 node_depth);
10 static void blake2s_update( blake2s_state *S, const byte *in, size_t inlen );
11 static void blake2s_final( blake2s_state *S, byte *digest );
12 
13 #include "blake2sp.cpp"
14 
15 static const uint32 blake2s_IV[8] =
16 {
17   0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
18   0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
19 };
20 
21 static const byte blake2s_sigma[10][16] =
22 {
23   {  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15 } ,
24   { 14, 10,  4,  8,  9, 15, 13,  6,  1, 12,  0,  2, 11,  7,  5,  3 } ,
25   { 11,  8, 12,  0,  5,  2, 15, 13, 10, 14,  3,  6,  7,  1,  9,  4 } ,
26   {  7,  9,  3,  1, 13, 12, 11, 14,  2,  6,  5, 10,  4,  0, 15,  8 } ,
27   {  9,  0,  5,  7,  2,  4, 10, 15, 14,  1, 11, 12,  6,  8,  3, 13 } ,
28   {  2, 12,  6, 10,  0, 11,  8,  3,  4, 13,  7,  5, 15, 14,  1,  9 } ,
29   { 12,  5,  1, 15, 14, 13,  4, 10,  0,  7,  6,  3,  9,  2,  8, 11 } ,
30   { 13, 11,  7, 14, 12,  1,  3,  9,  5,  0, 15,  4,  8,  6,  2, 10 } ,
31   {  6, 15, 14,  9, 11,  3,  0,  8, 12,  2, 13,  7,  1,  4, 10,  5 } ,
32   { 10,  2,  8,  4,  7,  6,  1,  5, 15, 11,  9, 14,  3, 12, 13 , 0 } ,
33 };
34 
blake2s_set_lastnode(blake2s_state * S)35 static inline void blake2s_set_lastnode( blake2s_state *S )
36 {
37   S->f[1] = ~0U;
38 }
39 
40 
41 /* Some helper functions, not necessarily useful */
blake2s_set_lastblock(blake2s_state * S)42 static inline void blake2s_set_lastblock( blake2s_state *S )
43 {
44   if( S->last_node ) blake2s_set_lastnode( S );
45 
46   S->f[0] = ~0U;
47 }
48 
49 
blake2s_increment_counter(blake2s_state * S,const uint32 inc)50 static inline void blake2s_increment_counter( blake2s_state *S, const uint32 inc )
51 {
52   S->t[0] += inc;
53   S->t[1] += ( S->t[0] < inc );
54 }
55 
56 
57 /* init2 xors IV with input parameter block */
blake2s_init_param(blake2s_state * S,uint32 node_offset,uint32 node_depth)58 void blake2s_init_param( blake2s_state *S, uint32 node_offset, uint32 node_depth)
59 {
60 #ifdef USE_SSE
61   if (_SSE_Version>=SSE_SSE2)
62     blake2s_init_sse();
63 #endif
64 
65   S->init(); // Clean data.
66   for( int i = 0; i < 8; ++i )
67     S->h[i] = blake2s_IV[i];
68 
69   S->h[0] ^= 0x02080020; // We use BLAKE2sp parameters block.
70   S->h[2] ^= node_offset;
71   S->h[3] ^= (node_depth<<16)|0x20000000;
72 }
73 
74 
75 #define G(r,i,m,a,b,c,d) \
76   a = a + b + m[blake2s_sigma[r][2*i+0]]; \
77   d = rotr32(d ^ a, 16); \
78   c = c + d; \
79   b = rotr32(b ^ c, 12); \
80   a = a + b + m[blake2s_sigma[r][2*i+1]]; \
81   d = rotr32(d ^ a, 8); \
82   c = c + d; \
83   b = rotr32(b ^ c, 7);
84 
85 
blake2s_compress(blake2s_state * S,const byte block[BLAKE2S_BLOCKBYTES])86 static void blake2s_compress( blake2s_state *S, const byte block[BLAKE2S_BLOCKBYTES] )
87 {
88   uint32 m[16];
89   uint32 v[16];
90 
91   for( size_t i = 0; i < 16; ++i )
92     m[i] = RawGet4( block + i * 4 );
93 
94   for( size_t i = 0; i < 8; ++i )
95     v[i] = S->h[i];
96 
97   v[ 8] = blake2s_IV[0];
98   v[ 9] = blake2s_IV[1];
99   v[10] = blake2s_IV[2];
100   v[11] = blake2s_IV[3];
101   v[12] = S->t[0] ^ blake2s_IV[4];
102   v[13] = S->t[1] ^ blake2s_IV[5];
103   v[14] = S->f[0] ^ blake2s_IV[6];
104   v[15] = S->f[1] ^ blake2s_IV[7];
105 
106   for ( uint r = 0; r <= 9; ++r ) // No gain on i7 if unrolled, but exe size grows.
107   {
108     G(r,0,m,v[ 0],v[ 4],v[ 8],v[12]);
109     G(r,1,m,v[ 1],v[ 5],v[ 9],v[13]);
110     G(r,2,m,v[ 2],v[ 6],v[10],v[14]);
111     G(r,3,m,v[ 3],v[ 7],v[11],v[15]);
112     G(r,4,m,v[ 0],v[ 5],v[10],v[15]);
113     G(r,5,m,v[ 1],v[ 6],v[11],v[12]);
114     G(r,6,m,v[ 2],v[ 7],v[ 8],v[13]);
115     G(r,7,m,v[ 3],v[ 4],v[ 9],v[14]);
116   }
117 
118   for( size_t i = 0; i < 8; ++i )
119     S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
120 }
121 
122 
blake2s_update(blake2s_state * S,const byte * in,size_t inlen)123 void blake2s_update( blake2s_state *S, const byte *in, size_t inlen )
124 {
125   while( inlen > 0 )
126   {
127     size_t left = S->buflen;
128     size_t fill = 2 * BLAKE2S_BLOCKBYTES - left;
129 
130     if( inlen > fill )
131     {
132       memcpy( S->buf + left, in, fill ); // Fill buffer
133       S->buflen += fill;
134       blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
135 
136 #ifdef USE_SSE
137 #ifdef _WIN_32 // We use SSSE3 _mm_shuffle_epi8 only in x64 mode.
138       if (_SSE_Version>=SSE_SSE2)
139 #else
140       if (_SSE_Version>=SSE_SSSE3)
141 #endif
142         blake2s_compress_sse( S, S->buf );
143       else
144         blake2s_compress( S, S->buf ); // Compress
145 #else
146       blake2s_compress( S, S->buf ); // Compress
147 #endif
148 
149       memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES ); // Shift buffer left
150       S->buflen -= BLAKE2S_BLOCKBYTES;
151       in += fill;
152       inlen -= fill;
153     }
154     else // inlen <= fill
155     {
156       memcpy( S->buf + left, in, (size_t)inlen );
157       S->buflen += (size_t)inlen; // Be lazy, do not compress
158       in += inlen;
159       inlen = 0;
160     }
161   }
162 }
163 
164 
blake2s_final(blake2s_state * S,byte * digest)165 void blake2s_final( blake2s_state *S, byte *digest )
166 {
167   if( S->buflen > BLAKE2S_BLOCKBYTES )
168   {
169     blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
170     blake2s_compress( S, S->buf );
171     S->buflen -= BLAKE2S_BLOCKBYTES;
172     memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, S->buflen );
173   }
174 
175   blake2s_increment_counter( S, ( uint32 )S->buflen );
176   blake2s_set_lastblock( S );
177   memset( S->buf + S->buflen, 0, 2 * BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */
178   blake2s_compress( S, S->buf );
179 
180   for( int i = 0; i < 8; ++i ) /* Output full hash  */
181     RawPut4( S->h[i], digest + 4 * i );
182 }
183 
184