1 /*
2 The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
3 Michaël Peeters and Gilles Van Assche. For more information, feedback or
4 questions, please refer to our website: http://keccak.noekeon.org/
5
6 Implementation by the designers,
7 hereby denoted as "the implementer".
8
9 To the extent possible under law, the implementer has waived all copyright
10 and related or neighboring rights to the source code in this file.
11 http://creativecommons.org/publicdomain/zero/1.0/
12 */
13
14 #include <string.h>
15 #include "brg_endian.h"
16 #include "KeccakF-1600-opt64-settings.h"
17 #include "KeccakF-1600-interface.h"
18
19 typedef unsigned char UINT8;
20 typedef unsigned long long int UINT64;
21
22 #if defined(UseSSE) || defined(UseXOP)
23 #if defined(__GNUC__)
24 #define ALIGN __attribute__ ((aligned(32)))
25 #elif defined(_MSC_VER)
26 #define ALIGN __declspec(align(32))
27 #endif
28 #endif
29
30 #ifndef ALIGN
31 # define ALIGN
32 #endif
33
34 #if defined(UseSSE)
35 #include <x86intrin.h>
36 typedef __m128i V64;
37 typedef __m128i V128;
38 typedef union {
39 V128 v128;
40 UINT64 v64[2];
41 } V6464;
42
43 #define ANDnu64(a, b) _mm_andnot_si128(a, b)
44 #define LOAD64(a) _mm_loadl_epi64((const V64 *)&(a))
45 #define CONST64(a) _mm_loadl_epi64((const V64 *)&(a))
46 #define ROL64(a, o) _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o)))
47 #define STORE64(a, b) _mm_storel_epi64((V64 *)&(a), b)
48 #define XOR64(a, b) _mm_xor_si128(a, b)
49 #define XOReq64(a, b) a = _mm_xor_si128(a, b)
50 #define SHUFFLEBYTES128(a, b) _mm_shuffle_epi8(a, b)
51
52 #define ANDnu128(a, b) _mm_andnot_si128(a, b)
53 #define LOAD6464(a, b) _mm_set_epi64((__m64)(a), (__m64)(b))
54 #define CONST128(a) _mm_load_si128((const V128 *)&(a))
55 #define LOAD128(a) _mm_load_si128((const V128 *)&(a))
56 #define LOAD128u(a) _mm_loadu_si128((const V128 *)&(a))
57 #define ROL64in128(a, o) _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o)))
58 #define STORE128(a, b) _mm_store_si128((V128 *)&(a), b)
59 #define XOR128(a, b) _mm_xor_si128(a, b)
60 #define XOReq128(a, b) a = _mm_xor_si128(a, b)
61 #define GET64LOLO(a, b) _mm_unpacklo_epi64(a, b)
62 #define GET64HIHI(a, b) _mm_unpackhi_epi64(a, b)
63 #define COPY64HI2LO(a) _mm_shuffle_epi32(a, 0xEE)
64 #define COPY64LO2HI(a) _mm_shuffle_epi32(a, 0x44)
65 #define ZERO128() _mm_setzero_si128()
66
67 #ifdef UseOnlySIMD64
68 #include "KeccakF-1600-simd64.macros"
69 #else
70 ALIGN const UINT64 rho8_56[2] = {0x0605040302010007, 0x080F0E0D0C0B0A09};
71 #include "KeccakF-1600-simd128.macros"
72 #endif
73
74 #ifdef UseBebigokimisa
75 #error "UseBebigokimisa cannot be used in combination with UseSSE"
76 #endif
77 #elif defined(UseXOP)
78 #include <x86intrin.h>
79 typedef __m128i V64;
80 typedef __m128i V128;
81
82 #define LOAD64(a) _mm_loadl_epi64((const V64 *)&(a))
83 #define CONST64(a) _mm_loadl_epi64((const V64 *)&(a))
84 #define STORE64(a, b) _mm_storel_epi64((V64 *)&(a), b)
85 #define XOR64(a, b) _mm_xor_si128(a, b)
86 #define XOReq64(a, b) a = _mm_xor_si128(a, b)
87
88 #define ANDnu128(a, b) _mm_andnot_si128(a, b)
89 #define LOAD6464(a, b) _mm_set_epi64((__m64)(a), (__m64)(b))
90 #define CONST128(a) _mm_load_si128((const V128 *)&(a))
91 #define LOAD128(a) _mm_load_si128((const V128 *)&(a))
92 #define LOAD128u(a) _mm_loadu_si128((const V128 *)&(a))
93 #define STORE128(a, b) _mm_store_si128((V128 *)&(a), b)
94 #define XOR128(a, b) _mm_xor_si128(a, b)
95 #define XOReq128(a, b) a = _mm_xor_si128(a, b)
96 #define ZERO128() _mm_setzero_si128()
97
98 #define SWAP64(a) _mm_shuffle_epi32(a, 0x4E)
99 #define GET64LOLO(a, b) _mm_unpacklo_epi64(a, b)
100 #define GET64HIHI(a, b) _mm_unpackhi_epi64(a, b)
101 #define GET64LOHI(a, b) ((__m128i)_mm_blend_pd((__m128d)a, (__m128d)b, 2))
102 #define GET64HILO(a, b) SWAP64(GET64LOHI(b, a))
103 #define COPY64HI2LO(a) _mm_shuffle_epi32(a, 0xEE)
104 #define COPY64LO2HI(a) _mm_shuffle_epi32(a, 0x44)
105
106 #define ROL6464same(a, o) _mm_roti_epi64(a, o)
107 #define ROL6464(a, r1, r2) _mm_rot_epi64(a, CONST128( rot_##r1##_##r2 ))
108 ALIGN const UINT64 rot_0_20[2] = { 0, 20};
109 ALIGN const UINT64 rot_44_3[2] = {44, 3};
110 ALIGN const UINT64 rot_43_45[2] = {43, 45};
111 ALIGN const UINT64 rot_21_61[2] = {21, 61};
112 ALIGN const UINT64 rot_14_28[2] = {14, 28};
113 ALIGN const UINT64 rot_1_36[2] = { 1, 36};
114 ALIGN const UINT64 rot_6_10[2] = { 6, 10};
115 ALIGN const UINT64 rot_25_15[2] = {25, 15};
116 ALIGN const UINT64 rot_8_56[2] = { 8, 56};
117 ALIGN const UINT64 rot_18_27[2] = {18, 27};
118 ALIGN const UINT64 rot_62_55[2] = {62, 55};
119 ALIGN const UINT64 rot_39_41[2] = {39, 41};
120
121 #if defined(UseSimulatedXOP)
122 // For debugging purposes, when XOP is not available
123 #undef ROL6464
124 #undef ROL6464same
125 #define ROL6464same(a, o) _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o)))
ROL6464(V128 a,int r0,int r1)126 V128 ROL6464(V128 a, int r0, int r1)
127 {
128 V128 a0 = ROL64(a, r0);
129 V128 a1 = COPY64HI2LO(ROL64(a, r1));
130 return GET64LOLO(a0, a1);
131 }
132 #endif
133
134 #include "KeccakF-1600-xop.macros"
135
136 #ifdef UseBebigokimisa
137 #error "UseBebigokimisa cannot be used in combination with UseXOP"
138 #endif
139 #elif defined(UseMMX)
140 #include <mmintrin.h>
141 typedef __m64 V64;
142 #define ANDnu64(a, b) _mm_andnot_si64(a, b)
143
144 #if (defined(_MSC_VER) || defined (__INTEL_COMPILER))
145 #define LOAD64(a) *(V64*)&(a)
146 #define CONST64(a) *(V64*)&(a)
147 #define STORE64(a, b) *(V64*)&(a) = b
148 #else
149 #define LOAD64(a) (V64)a
150 #define CONST64(a) (V64)a
151 #define STORE64(a, b) a = (UINT64)b
152 #endif
153 #define ROL64(a, o) _mm_or_si64(_mm_slli_si64(a, o), _mm_srli_si64(a, 64-(o)))
154 #define XOR64(a, b) _mm_xor_si64(a, b)
155 #define XOReq64(a, b) a = _mm_xor_si64(a, b)
156
157 #include "KeccakF-1600-simd64.macros"
158
159 #ifdef UseBebigokimisa
160 #error "UseBebigokimisa cannot be used in combination with UseMMX"
161 #endif
162 #else
163 #if defined(_MSC_VER)
164 #define ROL64(a, offset) _rotl64(a, offset)
165 #elif defined(UseSHLD)
166 #define ROL64(x,N) ({ \
167 register UINT64 __out; \
168 register UINT64 __in = x; \
169 __asm__ ("shld %2,%0,%0" : "=r"(__out) : "0"(__in), "i"(N)); \
170 __out; \
171 })
172 #else
173 #define ROL64(a, offset) ((((UINT64)a) << offset) ^ (((UINT64)a) >> (64-offset)))
174 #endif
175
176 #include "KeccakF-1600-64.macros"
177 #endif
178
179 #include "KeccakF-1600-unrolling.macros"
180
KeccakPermutationOnWords(UINT64 * state)181 static void KeccakPermutationOnWords(UINT64 *state)
182 {
183 declareABCDE
184 #if (Unrolling != 24)
185 unsigned int i;
186 #endif
187
188 copyFromState(A, state)
189 rounds
190 #if defined(UseMMX)
191 _mm_empty();
192 #endif
193 }
194
KeccakPermutationOnWordsAfterXoring(UINT64 * state,const UINT64 * input,unsigned int laneCount)195 static void KeccakPermutationOnWordsAfterXoring(UINT64 *state, const UINT64 *input, unsigned int laneCount)
196 {
197 declareABCDE
198 #if (Unrolling != 24)
199 unsigned int i;
200 #endif
201 unsigned int j;
202
203 for(j=0; j<laneCount; j++)
204 state[j] ^= input[j];
205 copyFromState(A, state)
206 rounds
207 #if defined(UseMMX)
208 _mm_empty();
209 #endif
210 }
211
212 #ifdef ProvideFast576
KeccakPermutationOnWordsAfterXoring576bits(UINT64 * state,const UINT64 * input)213 static void KeccakPermutationOnWordsAfterXoring576bits(UINT64 *state, const UINT64 *input)
214 {
215 declareABCDE
216 #if (Unrolling != 24)
217 unsigned int i;
218 #endif
219
220 copyFromStateAndXor576bits(A, state, input)
221 rounds
222 #if defined(UseMMX)
223 _mm_empty();
224 #endif
225 }
226 #endif
227
228 #ifdef ProvideFast832
KeccakPermutationOnWordsAfterXoring832bits(UINT64 * state,const UINT64 * input)229 static void KeccakPermutationOnWordsAfterXoring832bits(UINT64 *state, const UINT64 *input)
230 {
231 declareABCDE
232 #if (Unrolling != 24)
233 unsigned int i;
234 #endif
235
236 copyFromStateAndXor832bits(A, state, input)
237 rounds
238 #if defined(UseMMX)
239 _mm_empty();
240 #endif
241 }
242 #endif
243
244 #ifdef ProvideFast1024
KeccakPermutationOnWordsAfterXoring1024bits(UINT64 * state,const UINT64 * input)245 static void KeccakPermutationOnWordsAfterXoring1024bits(UINT64 *state, const UINT64 *input)
246 {
247 declareABCDE
248 #if (Unrolling != 24)
249 unsigned int i;
250 #endif
251
252 copyFromStateAndXor1024bits(A, state, input)
253 rounds
254 #if defined(UseMMX)
255 _mm_empty();
256 #endif
257 }
258 #endif
259
260 #ifdef ProvideFast1088
KeccakPermutationOnWordsAfterXoring1088bits(UINT64 * state,const UINT64 * input)261 static void KeccakPermutationOnWordsAfterXoring1088bits(UINT64 *state, const UINT64 *input)
262 {
263 declareABCDE
264 #if (Unrolling != 24)
265 unsigned int i;
266 #endif
267
268 copyFromStateAndXor1088bits(A, state, input)
269 rounds
270 #if defined(UseMMX)
271 _mm_empty();
272 #endif
273 }
274 #endif
275
276 #ifdef ProvideFast1152
KeccakPermutationOnWordsAfterXoring1152bits(UINT64 * state,const UINT64 * input)277 static void KeccakPermutationOnWordsAfterXoring1152bits(UINT64 *state, const UINT64 *input)
278 {
279 declareABCDE
280 #if (Unrolling != 24)
281 unsigned int i;
282 #endif
283
284 copyFromStateAndXor1152bits(A, state, input)
285 rounds
286 #if defined(UseMMX)
287 _mm_empty();
288 #endif
289 }
290 #endif
291
292 #ifdef ProvideFast1344
KeccakPermutationOnWordsAfterXoring1344bits(UINT64 * state,const UINT64 * input)293 static void KeccakPermutationOnWordsAfterXoring1344bits(UINT64 *state, const UINT64 *input)
294 {
295 declareABCDE
296 #if (Unrolling != 24)
297 unsigned int i;
298 #endif
299
300 copyFromStateAndXor1344bits(A, state, input)
301 rounds
302 #if defined(UseMMX)
303 _mm_empty();
304 #endif
305 }
306 #endif
307
KeccakInitialize()308 static void KeccakInitialize()
309 {
310 }
311
KeccakInitializeState(unsigned char * state)312 static void KeccakInitializeState(unsigned char *state)
313 {
314 memset(state, 0, 200);
315 #ifdef UseBebigokimisa
316 ((UINT64*)state)[ 1] = ~(UINT64)0;
317 ((UINT64*)state)[ 2] = ~(UINT64)0;
318 ((UINT64*)state)[ 8] = ~(UINT64)0;
319 ((UINT64*)state)[12] = ~(UINT64)0;
320 ((UINT64*)state)[17] = ~(UINT64)0;
321 ((UINT64*)state)[20] = ~(UINT64)0;
322 #endif
323 }
324
KeccakPermutation(unsigned char * state)325 static void KeccakPermutation(unsigned char *state)
326 {
327 // We assume the state is always stored as words
328 KeccakPermutationOnWords((UINT64*)state);
329 }
330
331 #if (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN)
fromBytesToWord(UINT64 * word,const UINT8 * bytes)332 static void fromBytesToWord(UINT64 *word, const UINT8 *bytes)
333 {
334 unsigned int i;
335
336 *word = 0;
337 for(i=0; i<(64/8); i++)
338 *word |= (UINT64)(bytes[i]) << (8*i);
339 }
340 #endif
341
342 #ifdef ProvideFast576
KeccakAbsorb576bits(unsigned char * state,const unsigned char * data)343 static void KeccakAbsorb576bits(unsigned char *state, const unsigned char *data)
344 {
345 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
346 KeccakPermutationOnWordsAfterXoring576bits((UINT64*)state, (const UINT64*)data);
347 #else
348 UINT64 dataAsWords[9];
349 unsigned int i;
350
351 for(i=0; i<9; i++)
352 fromBytesToWord(dataAsWords+i, data+(i*8));
353 KeccakPermutationOnWordsAfterXoring576bits((UINT64*)state, dataAsWords);
354 #endif
355 }
356 #endif
357
358 #ifdef ProvideFast832
KeccakAbsorb832bits(unsigned char * state,const unsigned char * data)359 static void KeccakAbsorb832bits(unsigned char *state, const unsigned char *data)
360 {
361 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
362 KeccakPermutationOnWordsAfterXoring832bits((UINT64*)state, (const UINT64*)data);
363 #else
364 UINT64 dataAsWords[13];
365 unsigned int i;
366
367 for(i=0; i<13; i++)
368 fromBytesToWord(dataAsWords+i, data+(i*8));
369 KeccakPermutationOnWordsAfterXoring832bits((UINT64*)state, dataAsWords);
370 #endif
371 }
372 #endif
373
374 #ifdef ProvideFast1024
KeccakAbsorb1024bits(unsigned char * state,const unsigned char * data)375 static void KeccakAbsorb1024bits(unsigned char *state, const unsigned char *data)
376 {
377 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
378 KeccakPermutationOnWordsAfterXoring1024bits((UINT64*)state, (const UINT64*)data);
379 #else
380 UINT64 dataAsWords[16];
381 unsigned int i;
382
383 for(i=0; i<16; i++)
384 fromBytesToWord(dataAsWords+i, data+(i*8));
385 KeccakPermutationOnWordsAfterXoring1024bits((UINT64*)state, dataAsWords);
386 #endif
387 }
388 #endif
389
390 #ifdef ProvideFast1088
KeccakAbsorb1088bits(unsigned char * state,const unsigned char * data)391 static void KeccakAbsorb1088bits(unsigned char *state, const unsigned char *data)
392 {
393 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
394 KeccakPermutationOnWordsAfterXoring1088bits((UINT64*)state, (const UINT64*)data);
395 #else
396 UINT64 dataAsWords[17];
397 unsigned int i;
398
399 for(i=0; i<17; i++)
400 fromBytesToWord(dataAsWords+i, data+(i*8));
401 KeccakPermutationOnWordsAfterXoring1088bits((UINT64*)state, dataAsWords);
402 #endif
403 }
404 #endif
405
406 #ifdef ProvideFast1152
KeccakAbsorb1152bits(unsigned char * state,const unsigned char * data)407 static void KeccakAbsorb1152bits(unsigned char *state, const unsigned char *data)
408 {
409 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
410 KeccakPermutationOnWordsAfterXoring1152bits((UINT64*)state, (const UINT64*)data);
411 #else
412 UINT64 dataAsWords[18];
413 unsigned int i;
414
415 for(i=0; i<18; i++)
416 fromBytesToWord(dataAsWords+i, data+(i*8));
417 KeccakPermutationOnWordsAfterXoring1152bits((UINT64*)state, dataAsWords);
418 #endif
419 }
420 #endif
421
422 #ifdef ProvideFast1344
KeccakAbsorb1344bits(unsigned char * state,const unsigned char * data)423 static void KeccakAbsorb1344bits(unsigned char *state, const unsigned char *data)
424 {
425 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
426 KeccakPermutationOnWordsAfterXoring1344bits((UINT64*)state, (const UINT64*)data);
427 #else
428 UINT64 dataAsWords[21];
429 unsigned int i;
430
431 for(i=0; i<21; i++)
432 fromBytesToWord(dataAsWords+i, data+(i*8));
433 KeccakPermutationOnWordsAfterXoring1344bits((UINT64*)state, dataAsWords);
434 #endif
435 }
436 #endif
437
KeccakAbsorb(unsigned char * state,const unsigned char * data,unsigned int laneCount)438 static void KeccakAbsorb(unsigned char *state, const unsigned char *data, unsigned int laneCount)
439 {
440 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
441 KeccakPermutationOnWordsAfterXoring((UINT64*)state, (const UINT64*)data, laneCount);
442 #else
443 UINT64 dataAsWords[25];
444 unsigned int i;
445
446 for(i=0; i<laneCount; i++)
447 fromBytesToWord(dataAsWords+i, data+(i*8));
448 KeccakPermutationOnWordsAfterXoring((UINT64*)state, dataAsWords, laneCount);
449 #endif
450 }
451
452 #if (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN)
fromWordToBytes(UINT8 * bytes,const UINT64 word)453 static void fromWordToBytes(UINT8 *bytes, const UINT64 word)
454 {
455 unsigned int i;
456
457 for(i=0; i<(64/8); i++)
458 bytes[i] = (word >> (8*i)) & 0xFF;
459 }
460 #endif
461
462 #ifdef ProvideFast1024
KeccakExtract1024bits(const unsigned char * state,unsigned char * data)463 static void KeccakExtract1024bits(const unsigned char *state, unsigned char *data)
464 {
465 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
466 memcpy(data, state, 128);
467 #else
468 unsigned int i;
469
470 for(i=0; i<16; i++)
471 fromWordToBytes(data+(i*8), ((const UINT64*)state)[i]);
472 #endif
473 #ifdef UseBebigokimisa
474 ((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1];
475 ((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2];
476 ((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8];
477 ((UINT64*)data)[12] = ~((UINT64*)data)[12];
478 #endif
479 }
480 #endif
481
KeccakExtract(const unsigned char * state,unsigned char * data,unsigned int laneCount)482 static void KeccakExtract(const unsigned char *state, unsigned char *data, unsigned int laneCount)
483 {
484 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
485 memcpy(data, state, laneCount*8);
486 #else
487 unsigned int i;
488
489 for(i=0; i<laneCount; i++)
490 fromWordToBytes(data+(i*8), ((const UINT64*)state)[i]);
491 #endif
492 #ifdef UseBebigokimisa
493 if (laneCount > 1) {
494 ((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1];
495 if (laneCount > 2) {
496 ((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2];
497 if (laneCount > 8) {
498 ((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8];
499 if (laneCount > 12) {
500 ((UINT64*)data)[12] = ~((UINT64*)data)[12];
501 if (laneCount > 17) {
502 ((UINT64*)data)[17] = ~((UINT64*)data)[17];
503 if (laneCount > 20) {
504 ((UINT64*)data)[20] = ~((UINT64*)data)[20];
505 }
506 }
507 }
508 }
509 }
510 }
511 #endif
512 }
513