1 // Based on public domain code written in 2012 by Samuel Neves
2
3 #include "rar.hpp"
4
5 #ifdef USE_SSE
6 #include "blake2s_sse.cpp"
7 #endif
8
9 static void blake2s_init_param( blake2s_state *S, uint32 node_offset, uint32 node_depth);
10 static void blake2s_update( blake2s_state *S, const byte *in, size_t inlen );
11 static void blake2s_final( blake2s_state *S, byte *digest );
12
13 #include "blake2sp.cpp"
14
15 static const uint32 blake2s_IV[8] =
16 {
17 0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
18 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
19 };
20
21 static const byte blake2s_sigma[10][16] =
22 {
23 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
24 { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
25 { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
26 { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
27 { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
28 { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
29 { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
30 { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
31 { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
32 { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
33 };
34
blake2s_set_lastnode(blake2s_state * S)35 static inline void blake2s_set_lastnode( blake2s_state *S )
36 {
37 S->f[1] = ~0U;
38 }
39
40
41 /* Some helper functions, not necessarily useful */
blake2s_set_lastblock(blake2s_state * S)42 static inline void blake2s_set_lastblock( blake2s_state *S )
43 {
44 if( S->last_node ) blake2s_set_lastnode( S );
45
46 S->f[0] = ~0U;
47 }
48
49
blake2s_increment_counter(blake2s_state * S,const uint32 inc)50 static inline void blake2s_increment_counter( blake2s_state *S, const uint32 inc )
51 {
52 S->t[0] += inc;
53 S->t[1] += ( S->t[0] < inc );
54 }
55
56
57 /* init2 xors IV with input parameter block */
blake2s_init_param(blake2s_state * S,uint32 node_offset,uint32 node_depth)58 void blake2s_init_param( blake2s_state *S, uint32 node_offset, uint32 node_depth)
59 {
60 #ifdef USE_SSE
61 if (_SSE_Version>=SSE_SSE2)
62 blake2s_init_sse();
63 #endif
64
65 S->init(); // Clean data.
66 for( int i = 0; i < 8; ++i )
67 S->h[i] = blake2s_IV[i];
68
69 S->h[0] ^= 0x02080020; // We use BLAKE2sp parameters block.
70 S->h[2] ^= node_offset;
71 S->h[3] ^= (node_depth<<16)|0x20000000;
72 }
73
74
75 #define G(r,i,m,a,b,c,d) \
76 a = a + b + m[blake2s_sigma[r][2*i+0]]; \
77 d = rotr32(d ^ a, 16); \
78 c = c + d; \
79 b = rotr32(b ^ c, 12); \
80 a = a + b + m[blake2s_sigma[r][2*i+1]]; \
81 d = rotr32(d ^ a, 8); \
82 c = c + d; \
83 b = rotr32(b ^ c, 7);
84
85
blake2s_compress(blake2s_state * S,const byte block[BLAKE2S_BLOCKBYTES])86 static void blake2s_compress( blake2s_state *S, const byte block[BLAKE2S_BLOCKBYTES] )
87 {
88 uint32 m[16];
89 uint32 v[16];
90
91 for( size_t i = 0; i < 16; ++i )
92 m[i] = RawGet4( block + i * 4 );
93
94 for( size_t i = 0; i < 8; ++i )
95 v[i] = S->h[i];
96
97 v[ 8] = blake2s_IV[0];
98 v[ 9] = blake2s_IV[1];
99 v[10] = blake2s_IV[2];
100 v[11] = blake2s_IV[3];
101 v[12] = S->t[0] ^ blake2s_IV[4];
102 v[13] = S->t[1] ^ blake2s_IV[5];
103 v[14] = S->f[0] ^ blake2s_IV[6];
104 v[15] = S->f[1] ^ blake2s_IV[7];
105
106 for ( uint r = 0; r <= 9; ++r ) // No gain on i7 if unrolled, but exe size grows.
107 {
108 G(r,0,m,v[ 0],v[ 4],v[ 8],v[12]);
109 G(r,1,m,v[ 1],v[ 5],v[ 9],v[13]);
110 G(r,2,m,v[ 2],v[ 6],v[10],v[14]);
111 G(r,3,m,v[ 3],v[ 7],v[11],v[15]);
112 G(r,4,m,v[ 0],v[ 5],v[10],v[15]);
113 G(r,5,m,v[ 1],v[ 6],v[11],v[12]);
114 G(r,6,m,v[ 2],v[ 7],v[ 8],v[13]);
115 G(r,7,m,v[ 3],v[ 4],v[ 9],v[14]);
116 }
117
118 for( size_t i = 0; i < 8; ++i )
119 S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
120 }
121
122
blake2s_update(blake2s_state * S,const byte * in,size_t inlen)123 void blake2s_update( blake2s_state *S, const byte *in, size_t inlen )
124 {
125 while( inlen > 0 )
126 {
127 size_t left = S->buflen;
128 size_t fill = 2 * BLAKE2S_BLOCKBYTES - left;
129
130 if( inlen > fill )
131 {
132 memcpy( S->buf + left, in, fill ); // Fill buffer
133 S->buflen += fill;
134 blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
135
136 #ifdef USE_SSE
137 #ifdef _WIN_32 // We use SSSE3 _mm_shuffle_epi8 only in x64 mode.
138 if (_SSE_Version>=SSE_SSE2)
139 #else
140 if (_SSE_Version>=SSE_SSSE3)
141 #endif
142 blake2s_compress_sse( S, S->buf );
143 else
144 blake2s_compress( S, S->buf ); // Compress
145 #else
146 blake2s_compress( S, S->buf ); // Compress
147 #endif
148
149 memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES ); // Shift buffer left
150 S->buflen -= BLAKE2S_BLOCKBYTES;
151 in += fill;
152 inlen -= fill;
153 }
154 else // inlen <= fill
155 {
156 memcpy( S->buf + left, in, (size_t)inlen );
157 S->buflen += (size_t)inlen; // Be lazy, do not compress
158 in += inlen;
159 inlen = 0;
160 }
161 }
162 }
163
164
blake2s_final(blake2s_state * S,byte * digest)165 void blake2s_final( blake2s_state *S, byte *digest )
166 {
167 if( S->buflen > BLAKE2S_BLOCKBYTES )
168 {
169 blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
170 blake2s_compress( S, S->buf );
171 S->buflen -= BLAKE2S_BLOCKBYTES;
172 memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, S->buflen );
173 }
174
175 blake2s_increment_counter( S, ( uint32 )S->buflen );
176 blake2s_set_lastblock( S );
177 memset( S->buf + S->buflen, 0, 2 * BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */
178 blake2s_compress( S, S->buf );
179
180 for( int i = 0; i < 8; ++i ) /* Output full hash */
181 RawPut4( S->h[i], digest + 4 * i );
182 }
183
184