1 /*
2 The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
3 Michaël Peeters and Gilles Van Assche. For more information, feedback or
4 questions, please refer to our website: http://keccak.noekeon.org/
5 
6 Implementation by the designers,
7 hereby denoted as "the implementer".
8 
9 To the extent possible under law, the implementer has waived all copyright
10 and related or neighboring rights to the source code in this file.
11 http://creativecommons.org/publicdomain/zero/1.0/
12 */
13 
14 #include <string.h>
15 #include "brg_endian.h"
16 #include "KeccakF-1600-opt32-settings.h"
17 #include "KeccakF-1600-interface.h"
18 
19 typedef unsigned char UINT8;
20 typedef unsigned short UINT16;
21 typedef unsigned int UINT32;
22 typedef unsigned long long int UINT64;
23 
24 #ifdef UseInterleaveTables
25 int interleaveTablesBuilt = 0;
26 UINT16 interleaveTable[65536];
27 UINT16 deinterleaveTable[65536];
28 
buildInterleaveTables()29 static void buildInterleaveTables()
30 {
31     UINT32 i, j;
32     UINT16 x;
33 
34     if (!interleaveTablesBuilt) {
35         for(i=0; i<65536; i++) {
36             x = 0;
37             for(j=0; j<16; j++) {
38                 if (i & (1 << j))
39                     x |= (1 << (j/2 + 8*(j%2)));
40             }
41             interleaveTable[i] = x;
42             deinterleaveTable[x] = (UINT16)i;
43         }
44         interleaveTablesBuilt = 1;
45     }
46 }
47 
48 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
49 
50 #define xor2bytesIntoInterleavedWords(even, odd, source, j) \
51     i##j = interleaveTable[((const UINT16*)source)[j]]; \
52     ((UINT8*)even)[j] ^= i##j & 0xFF; \
53     ((UINT8*)odd)[j] ^= i##j >> 8;
54 
55 #define setInterleavedWordsInto2bytes(dest, even, odd, j) \
56     d##j = deinterleaveTable[((even >> (j*8)) & 0xFF) ^ (((odd >> (j*8)) & 0xFF) << 8)]; \
57     ((UINT16*)dest)[j] = d##j;
58 
59 #else // (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN)
60 
61 #define xor2bytesIntoInterleavedWords(even, odd, source, j) \
62     i##j = interleaveTable[source[2*j] ^ ((UINT16)source[2*j+1] << 8)]; \
63     *even ^= (i##j & 0xFF) << (j*8); \
64     *odd ^= ((i##j >> 8) & 0xFF) << (j*8);
65 
66 #define setInterleavedWordsInto2bytes(dest, even, odd, j) \
67     d##j = deinterleaveTable[((even >> (j*8)) & 0xFF) ^ (((odd >> (j*8)) & 0xFF) << 8)]; \
68     dest[2*j] = d##j & 0xFF; \
69     dest[2*j+1] = d##j >> 8;
70 
71 #endif // Endianness
72 
xor8bytesIntoInterleavedWords(UINT32 * even,UINT32 * odd,const UINT8 * source)73 static void xor8bytesIntoInterleavedWords(UINT32 *even, UINT32 *odd, const UINT8* source)
74 {
75     UINT16 i0, i1, i2, i3;
76 
77     xor2bytesIntoInterleavedWords(even, odd, source, 0)
78     xor2bytesIntoInterleavedWords(even, odd, source, 1)
79     xor2bytesIntoInterleavedWords(even, odd, source, 2)
80     xor2bytesIntoInterleavedWords(even, odd, source, 3)
81 }
82 
83 #define xorLanesIntoState(laneCount, state, input) \
84     { \
85         int i; \
86         for(i=0; i<(laneCount); i++) \
87             xor8bytesIntoInterleavedWords(state+i*2, state+i*2+1, input+i*8); \
88     }
89 
setInterleavedWordsInto8bytes(UINT8 * dest,UINT32 even,UINT32 odd)90 static void setInterleavedWordsInto8bytes(UINT8* dest, UINT32 even, UINT32 odd)
91 {
92     UINT16 d0, d1, d2, d3;
93 
94     setInterleavedWordsInto2bytes(dest, even, odd, 0)
95     setInterleavedWordsInto2bytes(dest, even, odd, 1)
96     setInterleavedWordsInto2bytes(dest, even, odd, 2)
97     setInterleavedWordsInto2bytes(dest, even, odd, 3)
98 }
99 
100 #define extractLanes(laneCount, state, data) \
101     { \
102         int i; \
103         for(i=0; i<(laneCount); i++) \
104             setInterleavedWordsInto8bytes(data+i*8, ((UINT32*)state)[i*2], ((UINT32*)state)[i*2+1]); \
105     }
106 
107 #else // No interleaving tables
108 
109 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
110 
111 // Credit: Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
112 #define xorInterleavedLE(rateInLanes, state, input) \
113 	{ \
114 		const UINT32 * pI = (const UINT32 *)input; \
115 		UINT32 * pS = state; \
116 		UINT32 t, x0, x1; \
117         int i; \
118         for (i = (int)(rateInLanes)-1; i >= 0; --i) \
119 		{ \
120 			x0 = *(pI++); \
121 			t = (x0 ^ (x0 >>  1)) & 0x22222222UL;  x0 = x0 ^ t ^ (t <<  1); \
122 			t = (x0 ^ (x0 >>  2)) & 0x0C0C0C0CUL;  x0 = x0 ^ t ^ (t <<  2); \
123 			t = (x0 ^ (x0 >>  4)) & 0x00F000F0UL;  x0 = x0 ^ t ^ (t <<  4); \
124 			t = (x0 ^ (x0 >>  8)) & 0x0000FF00UL;  x0 = x0 ^ t ^ (t <<  8); \
125  			x1 = *(pI++); \
126 			t = (x1 ^ (x1 >>  1)) & 0x22222222UL;  x1 = x1 ^ t ^ (t <<  1); \
127 			t = (x1 ^ (x1 >>  2)) & 0x0C0C0C0CUL;  x1 = x1 ^ t ^ (t <<  2); \
128 			t = (x1 ^ (x1 >>  4)) & 0x00F000F0UL;  x1 = x1 ^ t ^ (t <<  4); \
129 			t = (x1 ^ (x1 >>  8)) & 0x0000FF00UL;  x1 = x1 ^ t ^ (t <<  8); \
130 			*(pS++) ^= (UINT16)x0 | (x1 << 16); \
131 			*(pS++) ^= (x0 >> 16) | (x1 & 0xFFFF0000); \
132 		} \
133 	}
134 
135 #define xorLanesIntoState(laneCount, state, input) \
136     xorInterleavedLE(laneCount, state, input)
137 
138 #else // (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN)
139 
140 // Credit: Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
toInterleaving(UINT64 x)141 static UINT64 toInterleaving(UINT64 x)
142 {
143    UINT64 t;
144 
145    t = (x ^ (x >>  1)) & 0x2222222222222222ULL;  x = x ^ t ^ (t <<  1);
146    t = (x ^ (x >>  2)) & 0x0C0C0C0C0C0C0C0CULL;  x = x ^ t ^ (t <<  2);
147    t = (x ^ (x >>  4)) & 0x00F000F000F000F0ULL;  x = x ^ t ^ (t <<  4);
148    t = (x ^ (x >>  8)) & 0x0000FF000000FF00ULL;  x = x ^ t ^ (t <<  8);
149    t = (x ^ (x >> 16)) & 0x00000000FFFF0000ULL;  x = x ^ t ^ (t << 16);
150 
151    return x;
152 }
153 
xor8bytesIntoInterleavedWords(UINT32 * evenAndOdd,const UINT8 * source)154 static void xor8bytesIntoInterleavedWords(UINT32* evenAndOdd, const UINT8* source)
155 {
156     // This can be optimized
157     UINT64 sourceWord =
158         (UINT64)source[0]
159         ^ (((UINT64)source[1]) <<  8)
160         ^ (((UINT64)source[2]) << 16)
161         ^ (((UINT64)source[3]) << 24)
162         ^ (((UINT64)source[4]) << 32)
163         ^ (((UINT64)source[5]) << 40)
164         ^ (((UINT64)source[6]) << 48)
165         ^ (((UINT64)source[7]) << 56);
166     UINT64 evenAndOddWord = toInterleaving(sourceWord);
167     evenAndOdd[0] ^= (UINT32)evenAndOddWord;
168     evenAndOdd[1] ^= (UINT32)(evenAndOddWord >> 32);
169 }
170 
171 #define xorLanesIntoState(laneCount, state, input) \
172     { \
173         unsigned i; \
174         for(i=0; i<(laneCount); i++) \
175             xor8bytesIntoInterleavedWords(state+i*2, input+i*8); \
176     }
177 
178 #endif // Endianness
179 
180 // Credit: Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
fromInterleaving(UINT64 x)181 static UINT64 fromInterleaving(UINT64 x)
182 {
183    UINT64 t;
184 
185    t = (x ^ (x >> 16)) & 0x00000000FFFF0000ULL;  x = x ^ t ^ (t << 16);
186    t = (x ^ (x >>  8)) & 0x0000FF000000FF00ULL;  x = x ^ t ^ (t <<  8);
187    t = (x ^ (x >>  4)) & 0x00F000F000F000F0ULL;  x = x ^ t ^ (t <<  4);
188    t = (x ^ (x >>  2)) & 0x0C0C0C0C0C0C0C0CULL;  x = x ^ t ^ (t <<  2);
189    t = (x ^ (x >>  1)) & 0x2222222222222222ULL;  x = x ^ t ^ (t <<  1);
190 
191    return x;
192 }
193 
setInterleavedWordsInto8bytes(UINT8 * dest,const UINT32 * evenAndOdd)194 static void setInterleavedWordsInto8bytes(UINT8* dest, const UINT32* evenAndOdd)
195 {
196 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
197     ((UINT64*)dest)[0] = fromInterleaving(*(const UINT64*)evenAndOdd);
198 #else // (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN)
199     // This can be optimized
200     UINT64 evenAndOddWord = (UINT64)evenAndOdd[0] ^ ((UINT64)evenAndOdd[1] << 32);
201     UINT64 destWord = fromInterleaving(evenAndOddWord);
202     dest[0] = destWord & 0xFF;
203     dest[1] = (destWord >> 8) & 0xFF;
204     dest[2] = (destWord >> 16) & 0xFF;
205     dest[3] = (destWord >> 24) & 0xFF;
206     dest[4] = (destWord >> 32) & 0xFF;
207     dest[5] = (destWord >> 40) & 0xFF;
208     dest[6] = (destWord >> 48) & 0xFF;
209     dest[7] = (destWord >> 56) & 0xFF;
210 #endif // Endianness
211 }
212 
213 #define extractLanes(laneCount, state, data) \
214     { \
215         unsigned i; \
216         for(i=0; i<(laneCount); i++) \
217             setInterleavedWordsInto8bytes(data+i*8, (const UINT32*)state+i*2); \
218     }
219 
220 #endif // With or without interleaving tables
221 
222 #if defined(_MSC_VER)
223 #define ROL32(a, offset) _rotl(a, offset)
224 #elif (defined (__arm__) && defined(__ARMCC_VERSION))
225 #define ROL32(a, offset) __ror(a, 32-(offset))
226 #else
227 #define ROL32(a, offset) ((((UINT32)a) << (offset)) ^ (((UINT32)a) >> (32-(offset))))
228 #endif
229 
230 #include "KeccakF-1600-unrolling.macros"
231 #include "KeccakF-1600-32.macros"
232 
233 #if (UseSchedule == 3)
234 
235 #ifdef UseBebigokimisa
236 #error "No lane complementing with schedule 3."
237 #endif
238 
239 #if (Unrolling != 2)
240 #error "Only unrolling 2 is supported by schedule 3."
241 #endif
242 
KeccakPermutationOnWords(UINT32 * state)243 static void KeccakPermutationOnWords(UINT32 *state)
244 {
245     rounds
246 }
247 
KeccakPermutationOnWordsAfterXoring(UINT32 * state,const UINT8 * input,unsigned int laneCount)248 static void KeccakPermutationOnWordsAfterXoring(UINT32 *state, const UINT8 *input, unsigned int laneCount)
249 {
250     xorLanesIntoState(laneCount, state, input)
251     rounds
252 }
253 
254 #ifdef ProvideFast576
KeccakPermutationOnWordsAfterXoring576bits(UINT32 * state,const UINT8 * input)255 static void KeccakPermutationOnWordsAfterXoring576bits(UINT32 *state, const UINT8 *input)
256 {
257     xorLanesIntoState(9, state, input)
258     rounds
259 }
260 #endif
261 
262 #ifdef ProvideFast832
KeccakPermutationOnWordsAfterXoring832bits(UINT32 * state,const UINT8 * input)263 static void KeccakPermutationOnWordsAfterXoring832bits(UINT32 *state, const UINT8 *input)
264 {
265     xorLanesIntoState(13, state, input)
266     rounds
267 }
268 #endif
269 
270 #ifdef ProvideFast1024
KeccakPermutationOnWordsAfterXoring1024bits(UINT32 * state,const UINT8 * input)271 static void KeccakPermutationOnWordsAfterXoring1024bits(UINT32 *state, const UINT8 *input)
272 {
273     xorLanesIntoState(16, state, input)
274     rounds
275 }
276 #endif
277 
278 #ifdef ProvideFast1088
KeccakPermutationOnWordsAfterXoring1088bits(UINT32 * state,const UINT8 * input)279 static void KeccakPermutationOnWordsAfterXoring1088bits(UINT32 *state, const UINT8 *input)
280 {
281     xorLanesIntoState(17, state, input)
282     rounds
283 }
284 #endif
285 
286 #ifdef ProvideFast1152
KeccakPermutationOnWordsAfterXoring1152bits(UINT32 * state,const UINT8 * input)287 static void KeccakPermutationOnWordsAfterXoring1152bits(UINT32 *state, const UINT8 *input)
288 {
289     xorLanesIntoState(18, state, input)
290     rounds
291 }
292 #endif
293 
294 #ifdef ProvideFast1344
KeccakPermutationOnWordsAfterXoring1344bits(UINT32 * state,const UINT8 * input)295 static void KeccakPermutationOnWordsAfterXoring1344bits(UINT32 *state, const UINT8 *input)
296 {
297     xorLanesIntoState(21, state, input)
298     rounds
299 }
300 #endif
301 
302 #else // (Schedule != 3)
303 
KeccakPermutationOnWords(UINT32 * state)304 static void KeccakPermutationOnWords(UINT32 *state)
305 {
306     declareABCDE
307 #if (Unrolling != 24)
308     unsigned int i;
309 #endif
310 
311     copyFromState(A, state)
312     rounds
313 }
314 
KeccakPermutationOnWordsAfterXoring(UINT32 * state,const UINT8 * input,unsigned int laneCount)315 static void KeccakPermutationOnWordsAfterXoring(UINT32 *state, const UINT8 *input, unsigned int laneCount)
316 {
317     declareABCDE
318     unsigned int i;
319 
320     xorLanesIntoState(laneCount, state, input)
321     copyFromState(A, state)
322     rounds
323 }
324 
325 #ifdef ProvideFast576
KeccakPermutationOnWordsAfterXoring576bits(UINT32 * state,const UINT8 * input)326 static void KeccakPermutationOnWordsAfterXoring576bits(UINT32 *state, const UINT8 *input)
327 {
328     declareABCDE
329     unsigned int i;
330 
331     xorLanesIntoState(9, state, input)
332     copyFromState(A, state)
333     rounds
334 }
335 #endif
336 
337 #ifdef ProvideFast832
KeccakPermutationOnWordsAfterXoring832bits(UINT32 * state,const UINT8 * input)338 static void KeccakPermutationOnWordsAfterXoring832bits(UINT32 *state, const UINT8 *input)
339 {
340     declareABCDE
341     unsigned int i;
342 
343     xorLanesIntoState(13, state, input)
344     copyFromState(A, state)
345     rounds
346 }
347 #endif
348 
349 #ifdef ProvideFast1024
KeccakPermutationOnWordsAfterXoring1024bits(UINT32 * state,const UINT8 * input)350 static void KeccakPermutationOnWordsAfterXoring1024bits(UINT32 *state, const UINT8 *input)
351 {
352     declareABCDE
353     unsigned int i;
354 
355     xorLanesIntoState(16, state, input)
356     copyFromState(A, state)
357     rounds
358 }
359 #endif
360 
361 #ifdef ProvideFast1088
KeccakPermutationOnWordsAfterXoring1088bits(UINT32 * state,const UINT8 * input)362 static void KeccakPermutationOnWordsAfterXoring1088bits(UINT32 *state, const UINT8 *input)
363 {
364     declareABCDE
365     unsigned int i;
366 
367     xorLanesIntoState(17, state, input)
368     copyFromState(A, state)
369     rounds
370 }
371 #endif
372 
373 #ifdef ProvideFast1152
KeccakPermutationOnWordsAfterXoring1152bits(UINT32 * state,const UINT8 * input)374 static void KeccakPermutationOnWordsAfterXoring1152bits(UINT32 *state, const UINT8 *input)
375 {
376     declareABCDE
377     unsigned int i;
378 
379     xorLanesIntoState(18, state, input)
380     copyFromState(A, state)
381     rounds
382 }
383 #endif
384 
385 #ifdef ProvideFast1344
KeccakPermutationOnWordsAfterXoring1344bits(UINT32 * state,const UINT8 * input)386 static void KeccakPermutationOnWordsAfterXoring1344bits(UINT32 *state, const UINT8 *input)
387 {
388     declareABCDE
389     unsigned int i;
390 
391     xorLanesIntoState(21, state, input)
392     copyFromState(A, state)
393     rounds
394 }
395 #endif
396 
397 #endif
398 
KeccakInitialize()399 static void KeccakInitialize()
400 {
401 #ifdef UseInterleaveTables
402     buildInterleaveTables();
403 #endif
404 }
405 
KeccakInitializeState(unsigned char * state)406 static void KeccakInitializeState(unsigned char *state)
407 {
408     memset(state, 0, 200);
409 #ifdef UseBebigokimisa
410     ((UINT32*)state)[ 2] = ~(UINT32)0;
411     ((UINT32*)state)[ 3] = ~(UINT32)0;
412     ((UINT32*)state)[ 4] = ~(UINT32)0;
413     ((UINT32*)state)[ 5] = ~(UINT32)0;
414     ((UINT32*)state)[16] = ~(UINT32)0;
415     ((UINT32*)state)[17] = ~(UINT32)0;
416     ((UINT32*)state)[24] = ~(UINT32)0;
417     ((UINT32*)state)[25] = ~(UINT32)0;
418     ((UINT32*)state)[34] = ~(UINT32)0;
419     ((UINT32*)state)[35] = ~(UINT32)0;
420     ((UINT32*)state)[40] = ~(UINT32)0;
421     ((UINT32*)state)[41] = ~(UINT32)0;
422 #endif
423 }
424 
KeccakPermutation(unsigned char * state)425 static void KeccakPermutation(unsigned char *state)
426 {
427     // We assume the state is always stored as interleaved 32-bit words
428     KeccakPermutationOnWords((UINT32*)state);
429 }
430 
431 #ifdef ProvideFast576
KeccakAbsorb576bits(unsigned char * state,const unsigned char * data)432 static void KeccakAbsorb576bits(unsigned char *state, const unsigned char *data)
433 {
434     KeccakPermutationOnWordsAfterXoring576bits((UINT32*)state, data);
435 }
436 #endif
437 
438 #ifdef ProvideFast832
KeccakAbsorb832bits(unsigned char * state,const unsigned char * data)439 static void KeccakAbsorb832bits(unsigned char *state, const unsigned char *data)
440 {
441     KeccakPermutationOnWordsAfterXoring832bits((UINT32*)state, data);
442 }
443 #endif
444 
445 #ifdef ProvideFast1024
KeccakAbsorb1024bits(unsigned char * state,const unsigned char * data)446 static void KeccakAbsorb1024bits(unsigned char *state, const unsigned char *data)
447 {
448     KeccakPermutationOnWordsAfterXoring1024bits((UINT32*)state, data);
449 }
450 #endif
451 
452 #ifdef ProvideFast1088
KeccakAbsorb1088bits(unsigned char * state,const unsigned char * data)453 static void KeccakAbsorb1088bits(unsigned char *state, const unsigned char *data)
454 {
455     KeccakPermutationOnWordsAfterXoring1088bits((UINT32*)state, data);
456 }
457 #endif
458 
459 #ifdef ProvideFast1152
KeccakAbsorb1152bits(unsigned char * state,const unsigned char * data)460 static void KeccakAbsorb1152bits(unsigned char *state, const unsigned char *data)
461 {
462     KeccakPermutationOnWordsAfterXoring1152bits((UINT32*)state, data);
463 }
464 #endif
465 
466 #ifdef ProvideFast1344
KeccakAbsorb1344bits(unsigned char * state,const unsigned char * data)467 static void KeccakAbsorb1344bits(unsigned char *state, const unsigned char *data)
468 {
469     KeccakPermutationOnWordsAfterXoring1344bits((UINT32*)state, data);
470 }
471 #endif
472 
KeccakAbsorb(unsigned char * state,const unsigned char * data,unsigned int laneCount)473 static void KeccakAbsorb(unsigned char *state, const unsigned char *data, unsigned int laneCount)
474 {
475     KeccakPermutationOnWordsAfterXoring((UINT32*)state, data, laneCount);
476 }
477 
478 #ifdef ProvideFast1024
KeccakExtract1024bits(const unsigned char * state,unsigned char * data)479 static void KeccakExtract1024bits(const unsigned char *state, unsigned char *data)
480 {
481     extractLanes(16, state, data)
482 #ifdef UseBebigokimisa
483     ((UINT32*)data)[ 2] = ~((UINT32*)data)[ 2];
484     ((UINT32*)data)[ 3] = ~((UINT32*)data)[ 3];
485     ((UINT32*)data)[ 4] = ~((UINT32*)data)[ 4];
486     ((UINT32*)data)[ 5] = ~((UINT32*)data)[ 5];
487     ((UINT32*)data)[16] = ~((UINT32*)data)[16];
488     ((UINT32*)data)[17] = ~((UINT32*)data)[17];
489     ((UINT32*)data)[24] = ~((UINT32*)data)[24];
490     ((UINT32*)data)[25] = ~((UINT32*)data)[25];
491 #endif
492 }
493 #endif
494 
KeccakExtract(const unsigned char * state,unsigned char * data,unsigned int laneCount)495 static void KeccakExtract(const unsigned char *state, unsigned char *data, unsigned int laneCount)
496 {
497     extractLanes(laneCount, state, data)
498 #ifdef UseBebigokimisa
499     if (laneCount > 1) {
500         ((UINT32*)data)[ 2] = ~((UINT32*)data)[ 2];
501         ((UINT32*)data)[ 3] = ~((UINT32*)data)[ 3];
502         if (laneCount > 2) {
503             ((UINT32*)data)[ 4] = ~((UINT32*)data)[ 4];
504             ((UINT32*)data)[ 5] = ~((UINT32*)data)[ 5];
505             if (laneCount > 8) {
506                 ((UINT32*)data)[16] = ~((UINT32*)data)[16];
507                 ((UINT32*)data)[17] = ~((UINT32*)data)[17];
508                 if (laneCount > 12) {
509                     ((UINT32*)data)[24] = ~((UINT32*)data)[24];
510                     ((UINT32*)data)[25] = ~((UINT32*)data)[25];
511                     if (laneCount > 17) {
512                         ((UINT32*)data)[34] = ~((UINT32*)data)[34];
513                         ((UINT32*)data)[35] = ~((UINT32*)data)[35];
514                         if (laneCount > 20) {
515                             ((UINT32*)data)[40] = ~((UINT32*)data)[40];
516                             ((UINT32*)data)[41] = ~((UINT32*)data)[41];
517                         }
518                     }
519                 }
520             }
521         }
522     }
523 #endif
524 }
525