1 /*
2 The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
3 Michaël Peeters and Gilles Van Assche. For more information, feedback or
4 questions, please refer to our website: http://keccak.noekeon.org/
5 
6 Implementation by the designers,
7 hereby denoted as "the implementer".
8 
9 To the extent possible under law, the implementer has waived all copyright
10 and related or neighboring rights to the source code in this file.
11 http://creativecommons.org/publicdomain/zero/1.0/
12 */
13 
14 #include <string.h>
15 #include "brg_endian.h"
16 #include "KeccakF-1600-opt64-settings.h"
17 #include "KeccakF-1600-interface.h"
18 
19 typedef unsigned char UINT8;
20 typedef unsigned long long int UINT64;
21 
22 #if defined(UseSSE) || defined(UseXOP)
23 #if defined(__GNUC__)
24 #define ALIGN __attribute__ ((aligned(32)))
25 #elif defined(_MSC_VER)
26 #define ALIGN __declspec(align(32))
27 #endif
28 #endif
29 
30 #ifndef ALIGN
31 # define ALIGN
32 #endif
33 
34 #if defined(UseSSE)
35     #include <x86intrin.h>
36     typedef __m128i V64;
37     typedef __m128i V128;
38     typedef union {
39         V128 v128;
40         UINT64 v64[2];
41     } V6464;
42 
43     #define ANDnu64(a, b)       _mm_andnot_si128(a, b)
44     #define LOAD64(a)           _mm_loadl_epi64((const V64 *)&(a))
45     #define CONST64(a)          _mm_loadl_epi64((const V64 *)&(a))
46     #define ROL64(a, o)         _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o)))
47     #define STORE64(a, b)       _mm_storel_epi64((V64 *)&(a), b)
48     #define XOR64(a, b)         _mm_xor_si128(a, b)
49     #define XOReq64(a, b)       a = _mm_xor_si128(a, b)
50     #define SHUFFLEBYTES128(a, b)   _mm_shuffle_epi8(a, b)
51 
52     #define ANDnu128(a, b)      _mm_andnot_si128(a, b)
53     #define LOAD6464(a, b)      _mm_set_epi64((__m64)(a), (__m64)(b))
54     #define CONST128(a)         _mm_load_si128((const V128 *)&(a))
55     #define LOAD128(a)          _mm_load_si128((const V128 *)&(a))
56     #define LOAD128u(a)         _mm_loadu_si128((const V128 *)&(a))
57     #define ROL64in128(a, o)    _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o)))
58     #define STORE128(a, b)      _mm_store_si128((V128 *)&(a), b)
59     #define XOR128(a, b)        _mm_xor_si128(a, b)
60     #define XOReq128(a, b)      a = _mm_xor_si128(a, b)
61     #define GET64LOLO(a, b)     _mm_unpacklo_epi64(a, b)
62     #define GET64HIHI(a, b)     _mm_unpackhi_epi64(a, b)
63     #define COPY64HI2LO(a)      _mm_shuffle_epi32(a, 0xEE)
64     #define COPY64LO2HI(a)      _mm_shuffle_epi32(a, 0x44)
65     #define ZERO128()           _mm_setzero_si128()
66 
67     #ifdef UseOnlySIMD64
68     #include "KeccakF-1600-simd64.macros"
69     #else
70 ALIGN const UINT64 rho8_56[2] = {0x0605040302010007, 0x080F0E0D0C0B0A09};
71     #include "KeccakF-1600-simd128.macros"
72     #endif
73 
74     #ifdef UseBebigokimisa
75     #error "UseBebigokimisa cannot be used in combination with UseSSE"
76     #endif
77 #elif defined(UseXOP)
78     #include <x86intrin.h>
79     typedef __m128i V64;
80     typedef __m128i V128;
81 
82     #define LOAD64(a)           _mm_loadl_epi64((const V64 *)&(a))
83     #define CONST64(a)          _mm_loadl_epi64((const V64 *)&(a))
84     #define STORE64(a, b)       _mm_storel_epi64((V64 *)&(a), b)
85     #define XOR64(a, b)         _mm_xor_si128(a, b)
86     #define XOReq64(a, b)       a = _mm_xor_si128(a, b)
87 
88     #define ANDnu128(a, b)      _mm_andnot_si128(a, b)
89     #define LOAD6464(a, b)      _mm_set_epi64((__m64)(a), (__m64)(b))
90     #define CONST128(a)         _mm_load_si128((const V128 *)&(a))
91     #define LOAD128(a)          _mm_load_si128((const V128 *)&(a))
92     #define LOAD128u(a)         _mm_loadu_si128((const V128 *)&(a))
93     #define STORE128(a, b)      _mm_store_si128((V128 *)&(a), b)
94     #define XOR128(a, b)        _mm_xor_si128(a, b)
95     #define XOReq128(a, b)      a = _mm_xor_si128(a, b)
96     #define ZERO128()           _mm_setzero_si128()
97 
98     #define SWAP64(a)           _mm_shuffle_epi32(a, 0x4E)
99     #define GET64LOLO(a, b)     _mm_unpacklo_epi64(a, b)
100     #define GET64HIHI(a, b)     _mm_unpackhi_epi64(a, b)
101     #define GET64LOHI(a, b)     ((__m128i)_mm_blend_pd((__m128d)a, (__m128d)b, 2))
102     #define GET64HILO(a, b)     SWAP64(GET64LOHI(b, a))
103     #define COPY64HI2LO(a)      _mm_shuffle_epi32(a, 0xEE)
104     #define COPY64LO2HI(a)      _mm_shuffle_epi32(a, 0x44)
105 
106     #define ROL6464same(a, o)   _mm_roti_epi64(a, o)
107     #define ROL6464(a, r1, r2)  _mm_rot_epi64(a, CONST128( rot_##r1##_##r2 ))
108 ALIGN const UINT64 rot_0_20[2]  = { 0, 20};
109 ALIGN const UINT64 rot_44_3[2]  = {44,  3};
110 ALIGN const UINT64 rot_43_45[2] = {43, 45};
111 ALIGN const UINT64 rot_21_61[2] = {21, 61};
112 ALIGN const UINT64 rot_14_28[2] = {14, 28};
113 ALIGN const UINT64 rot_1_36[2]  = { 1, 36};
114 ALIGN const UINT64 rot_6_10[2]  = { 6, 10};
115 ALIGN const UINT64 rot_25_15[2] = {25, 15};
116 ALIGN const UINT64 rot_8_56[2]  = { 8, 56};
117 ALIGN const UINT64 rot_18_27[2] = {18, 27};
118 ALIGN const UINT64 rot_62_55[2] = {62, 55};
119 ALIGN const UINT64 rot_39_41[2] = {39, 41};
120 
121 #if defined(UseSimulatedXOP)
122     // For debugging purposes, when XOP is not available
123     #undef ROL6464
124     #undef ROL6464same
125     #define ROL6464same(a, o)   _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o)))
ROL6464(V128 a,int r0,int r1)126     V128 ROL6464(V128 a, int r0, int r1)
127     {
128         V128 a0 = ROL64(a, r0);
129         V128 a1 = COPY64HI2LO(ROL64(a, r1));
130         return GET64LOLO(a0, a1);
131     }
132 #endif
133 
134     #include "KeccakF-1600-xop.macros"
135 
136     #ifdef UseBebigokimisa
137     #error "UseBebigokimisa cannot be used in combination with UseXOP"
138     #endif
139 #elif defined(UseMMX)
140     #include <mmintrin.h>
141     typedef __m64 V64;
142     #define ANDnu64(a, b)       _mm_andnot_si64(a, b)
143 
144     #if (defined(_MSC_VER) || defined (__INTEL_COMPILER))
145         #define LOAD64(a)       *(V64*)&(a)
146         #define CONST64(a)      *(V64*)&(a)
147         #define STORE64(a, b)   *(V64*)&(a) = b
148     #else
149         #define LOAD64(a)       (V64)a
150         #define CONST64(a)      (V64)a
151         #define STORE64(a, b)   a = (UINT64)b
152     #endif
153     #define ROL64(a, o)         _mm_or_si64(_mm_slli_si64(a, o), _mm_srli_si64(a, 64-(o)))
154     #define XOR64(a, b)         _mm_xor_si64(a, b)
155     #define XOReq64(a, b)       a = _mm_xor_si64(a, b)
156 
157     #include "KeccakF-1600-simd64.macros"
158 
159     #ifdef UseBebigokimisa
160     #error "UseBebigokimisa cannot be used in combination with UseMMX"
161     #endif
162 #else
163     #if defined(_MSC_VER)
164     #define ROL64(a, offset) _rotl64(a, offset)
165     #elif defined(UseSHLD)
166       #define ROL64(x,N) ({ \
167         register UINT64 __out; \
168         register UINT64 __in = x; \
169         __asm__ ("shld %2,%0,%0" : "=r"(__out) : "0"(__in), "i"(N)); \
170         __out; \
171       })
172     #else
173     #define ROL64(a, offset) ((((UINT64)a) << offset) ^ (((UINT64)a) >> (64-offset)))
174     #endif
175 
176     #include "KeccakF-1600-64.macros"
177 #endif
178 
179 #include "KeccakF-1600-unrolling.macros"
180 
KeccakPermutationOnWords(UINT64 * state)181 static void KeccakPermutationOnWords(UINT64 *state)
182 {
183     declareABCDE
184 #if (Unrolling != 24)
185     unsigned int i;
186 #endif
187 
188     copyFromState(A, state)
189     rounds
190 #if defined(UseMMX)
191     _mm_empty();
192 #endif
193 }
194 
KeccakPermutationOnWordsAfterXoring(UINT64 * state,const UINT64 * input,unsigned int laneCount)195 static void KeccakPermutationOnWordsAfterXoring(UINT64 *state, const UINT64 *input, unsigned int laneCount)
196 {
197     declareABCDE
198 #if (Unrolling != 24)
199     unsigned int i;
200 #endif
201 	unsigned int j;
202 
203     for(j=0; j<laneCount; j++)
204         state[j] ^= input[j];
205     copyFromState(A, state)
206     rounds
207 #if defined(UseMMX)
208     _mm_empty();
209 #endif
210 }
211 
212 #ifdef ProvideFast576
KeccakPermutationOnWordsAfterXoring576bits(UINT64 * state,const UINT64 * input)213 static void KeccakPermutationOnWordsAfterXoring576bits(UINT64 *state, const UINT64 *input)
214 {
215     declareABCDE
216 #if (Unrolling != 24)
217     unsigned int i;
218 #endif
219 
220     copyFromStateAndXor576bits(A, state, input)
221     rounds
222 #if defined(UseMMX)
223     _mm_empty();
224 #endif
225 }
226 #endif
227 
228 #ifdef ProvideFast832
KeccakPermutationOnWordsAfterXoring832bits(UINT64 * state,const UINT64 * input)229 static void KeccakPermutationOnWordsAfterXoring832bits(UINT64 *state, const UINT64 *input)
230 {
231     declareABCDE
232 #if (Unrolling != 24)
233     unsigned int i;
234 #endif
235 
236     copyFromStateAndXor832bits(A, state, input)
237     rounds
238 #if defined(UseMMX)
239     _mm_empty();
240 #endif
241 }
242 #endif
243 
244 #ifdef ProvideFast1024
KeccakPermutationOnWordsAfterXoring1024bits(UINT64 * state,const UINT64 * input)245 static void KeccakPermutationOnWordsAfterXoring1024bits(UINT64 *state, const UINT64 *input)
246 {
247     declareABCDE
248 #if (Unrolling != 24)
249     unsigned int i;
250 #endif
251 
252     copyFromStateAndXor1024bits(A, state, input)
253     rounds
254 #if defined(UseMMX)
255     _mm_empty();
256 #endif
257 }
258 #endif
259 
260 #ifdef ProvideFast1088
KeccakPermutationOnWordsAfterXoring1088bits(UINT64 * state,const UINT64 * input)261 static void KeccakPermutationOnWordsAfterXoring1088bits(UINT64 *state, const UINT64 *input)
262 {
263     declareABCDE
264 #if (Unrolling != 24)
265     unsigned int i;
266 #endif
267 
268     copyFromStateAndXor1088bits(A, state, input)
269     rounds
270 #if defined(UseMMX)
271     _mm_empty();
272 #endif
273 }
274 #endif
275 
276 #ifdef ProvideFast1152
KeccakPermutationOnWordsAfterXoring1152bits(UINT64 * state,const UINT64 * input)277 static void KeccakPermutationOnWordsAfterXoring1152bits(UINT64 *state, const UINT64 *input)
278 {
279     declareABCDE
280 #if (Unrolling != 24)
281     unsigned int i;
282 #endif
283 
284     copyFromStateAndXor1152bits(A, state, input)
285     rounds
286 #if defined(UseMMX)
287     _mm_empty();
288 #endif
289 }
290 #endif
291 
292 #ifdef ProvideFast1344
KeccakPermutationOnWordsAfterXoring1344bits(UINT64 * state,const UINT64 * input)293 static void KeccakPermutationOnWordsAfterXoring1344bits(UINT64 *state, const UINT64 *input)
294 {
295     declareABCDE
296 #if (Unrolling != 24)
297     unsigned int i;
298 #endif
299 
300     copyFromStateAndXor1344bits(A, state, input)
301     rounds
302 #if defined(UseMMX)
303     _mm_empty();
304 #endif
305 }
306 #endif
307 
KeccakInitialize()308 static void KeccakInitialize()
309 {
310 }
311 
KeccakInitializeState(unsigned char * state)312 static void KeccakInitializeState(unsigned char *state)
313 {
314     memset(state, 0, 200);
315 #ifdef UseBebigokimisa
316     ((UINT64*)state)[ 1] = ~(UINT64)0;
317     ((UINT64*)state)[ 2] = ~(UINT64)0;
318     ((UINT64*)state)[ 8] = ~(UINT64)0;
319     ((UINT64*)state)[12] = ~(UINT64)0;
320     ((UINT64*)state)[17] = ~(UINT64)0;
321     ((UINT64*)state)[20] = ~(UINT64)0;
322 #endif
323 }
324 
KeccakPermutation(unsigned char * state)325 static void KeccakPermutation(unsigned char *state)
326 {
327     // We assume the state is always stored as words
328     KeccakPermutationOnWords((UINT64*)state);
329 }
330 
331 #if (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN)
fromBytesToWord(UINT64 * word,const UINT8 * bytes)332 static void fromBytesToWord(UINT64 *word, const UINT8 *bytes)
333 {
334     unsigned int i;
335 
336     *word = 0;
337     for(i=0; i<(64/8); i++)
338         *word |= (UINT64)(bytes[i]) << (8*i);
339 }
340 #endif
341 
342 #ifdef ProvideFast576
KeccakAbsorb576bits(unsigned char * state,const unsigned char * data)343 static void KeccakAbsorb576bits(unsigned char *state, const unsigned char *data)
344 {
345 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
346     KeccakPermutationOnWordsAfterXoring576bits((UINT64*)state, (const UINT64*)data);
347 #else
348     UINT64 dataAsWords[9];
349     unsigned int i;
350 
351     for(i=0; i<9; i++)
352         fromBytesToWord(dataAsWords+i, data+(i*8));
353     KeccakPermutationOnWordsAfterXoring576bits((UINT64*)state, dataAsWords);
354 #endif
355 }
356 #endif
357 
358 #ifdef ProvideFast832
KeccakAbsorb832bits(unsigned char * state,const unsigned char * data)359 static void KeccakAbsorb832bits(unsigned char *state, const unsigned char *data)
360 {
361 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
362     KeccakPermutationOnWordsAfterXoring832bits((UINT64*)state, (const UINT64*)data);
363 #else
364     UINT64 dataAsWords[13];
365     unsigned int i;
366 
367     for(i=0; i<13; i++)
368         fromBytesToWord(dataAsWords+i, data+(i*8));
369     KeccakPermutationOnWordsAfterXoring832bits((UINT64*)state, dataAsWords);
370 #endif
371 }
372 #endif
373 
374 #ifdef ProvideFast1024
KeccakAbsorb1024bits(unsigned char * state,const unsigned char * data)375 static void KeccakAbsorb1024bits(unsigned char *state, const unsigned char *data)
376 {
377 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
378     KeccakPermutationOnWordsAfterXoring1024bits((UINT64*)state, (const UINT64*)data);
379 #else
380     UINT64 dataAsWords[16];
381     unsigned int i;
382 
383     for(i=0; i<16; i++)
384         fromBytesToWord(dataAsWords+i, data+(i*8));
385     KeccakPermutationOnWordsAfterXoring1024bits((UINT64*)state, dataAsWords);
386 #endif
387 }
388 #endif
389 
390 #ifdef ProvideFast1088
KeccakAbsorb1088bits(unsigned char * state,const unsigned char * data)391 static void KeccakAbsorb1088bits(unsigned char *state, const unsigned char *data)
392 {
393 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
394     KeccakPermutationOnWordsAfterXoring1088bits((UINT64*)state, (const UINT64*)data);
395 #else
396     UINT64 dataAsWords[17];
397     unsigned int i;
398 
399     for(i=0; i<17; i++)
400         fromBytesToWord(dataAsWords+i, data+(i*8));
401     KeccakPermutationOnWordsAfterXoring1088bits((UINT64*)state, dataAsWords);
402 #endif
403 }
404 #endif
405 
406 #ifdef ProvideFast1152
KeccakAbsorb1152bits(unsigned char * state,const unsigned char * data)407 static void KeccakAbsorb1152bits(unsigned char *state, const unsigned char *data)
408 {
409 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
410     KeccakPermutationOnWordsAfterXoring1152bits((UINT64*)state, (const UINT64*)data);
411 #else
412     UINT64 dataAsWords[18];
413     unsigned int i;
414 
415     for(i=0; i<18; i++)
416         fromBytesToWord(dataAsWords+i, data+(i*8));
417     KeccakPermutationOnWordsAfterXoring1152bits((UINT64*)state, dataAsWords);
418 #endif
419 }
420 #endif
421 
422 #ifdef ProvideFast1344
KeccakAbsorb1344bits(unsigned char * state,const unsigned char * data)423 static void KeccakAbsorb1344bits(unsigned char *state, const unsigned char *data)
424 {
425 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
426     KeccakPermutationOnWordsAfterXoring1344bits((UINT64*)state, (const UINT64*)data);
427 #else
428     UINT64 dataAsWords[21];
429     unsigned int i;
430 
431     for(i=0; i<21; i++)
432         fromBytesToWord(dataAsWords+i, data+(i*8));
433     KeccakPermutationOnWordsAfterXoring1344bits((UINT64*)state, dataAsWords);
434 #endif
435 }
436 #endif
437 
KeccakAbsorb(unsigned char * state,const unsigned char * data,unsigned int laneCount)438 static void KeccakAbsorb(unsigned char *state, const unsigned char *data, unsigned int laneCount)
439 {
440 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
441     KeccakPermutationOnWordsAfterXoring((UINT64*)state, (const UINT64*)data, laneCount);
442 #else
443     UINT64 dataAsWords[25];
444     unsigned int i;
445 
446     for(i=0; i<laneCount; i++)
447         fromBytesToWord(dataAsWords+i, data+(i*8));
448     KeccakPermutationOnWordsAfterXoring((UINT64*)state, dataAsWords, laneCount);
449 #endif
450 }
451 
452 #if (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN)
fromWordToBytes(UINT8 * bytes,const UINT64 word)453 static void fromWordToBytes(UINT8 *bytes, const UINT64 word)
454 {
455     unsigned int i;
456 
457     for(i=0; i<(64/8); i++)
458         bytes[i] = (word >> (8*i)) & 0xFF;
459 }
460 #endif
461 
462 #ifdef ProvideFast1024
KeccakExtract1024bits(const unsigned char * state,unsigned char * data)463 static void KeccakExtract1024bits(const unsigned char *state, unsigned char *data)
464 {
465 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
466     memcpy(data, state, 128);
467 #else
468     unsigned int i;
469 
470     for(i=0; i<16; i++)
471         fromWordToBytes(data+(i*8), ((const UINT64*)state)[i]);
472 #endif
473 #ifdef UseBebigokimisa
474     ((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1];
475     ((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2];
476     ((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8];
477     ((UINT64*)data)[12] = ~((UINT64*)data)[12];
478 #endif
479 }
480 #endif
481 
KeccakExtract(const unsigned char * state,unsigned char * data,unsigned int laneCount)482 static void KeccakExtract(const unsigned char *state, unsigned char *data, unsigned int laneCount)
483 {
484 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
485     memcpy(data, state, laneCount*8);
486 #else
487     unsigned int i;
488 
489     for(i=0; i<laneCount; i++)
490         fromWordToBytes(data+(i*8), ((const UINT64*)state)[i]);
491 #endif
492 #ifdef UseBebigokimisa
493     if (laneCount > 1) {
494         ((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1];
495         if (laneCount > 2) {
496             ((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2];
497             if (laneCount > 8) {
498                 ((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8];
499                 if (laneCount > 12) {
500                     ((UINT64*)data)[12] = ~((UINT64*)data)[12];
501                     if (laneCount > 17) {
502                         ((UINT64*)data)[17] = ~((UINT64*)data)[17];
503                         if (laneCount > 20) {
504                             ((UINT64*)data)[20] = ~((UINT64*)data)[20];
505                         }
506                     }
507                 }
508             }
509         }
510     }
511 #endif
512 }
513