1 /*
2 Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
3 Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
4 denoted as "the implementer".
5 
6 For more information, feedback or questions, please refer to our websites:
7 http://keccak.noekeon.org/
8 http://keyak.noekeon.org/
9 http://ketje.noekeon.org/
10 
11 To the extent possible under law, the implementer has waived all copyright
12 and related or neighboring rights to the source code in this file.
13 http://creativecommons.org/publicdomain/zero/1.0/
14 */
15 
16 #include <string.h>
17 #include <stdlib.h>
18 /* #include "brg_endian.h" */
19 #include "KeccakP-1600-opt64-config.h"
20 
21 #if NOT_PYTHON
22 typedef unsigned char UINT8;
23 /* typedef unsigned long long int UINT64; */
24 #endif
25 
26 #if defined(KeccakP1600_useLaneComplementing)
27 #define UseBebigokimisa
28 #endif
29 
30 #if defined(_MSC_VER)
31 #define ROL64(a, offset) _rotl64(a, offset)
32 #elif defined(KeccakP1600_useSHLD)
33     #define ROL64(x,N) ({ \
34     register UINT64 __out; \
35     register UINT64 __in = x; \
36     __asm__ ("shld %2,%0,%0" : "=r"(__out) : "0"(__in), "i"(N)); \
37     __out; \
38     })
39 #else
40 #define ROL64(a, offset) ((((UINT64)a) << offset) ^ (((UINT64)a) >> (64-offset)))
41 #endif
42 
43 #include "KeccakP-1600-64.macros"
44 #ifdef KeccakP1600_fullUnrolling
45 #define FullUnrolling
46 #else
47 #define Unrolling KeccakP1600_unrolling
48 #endif
49 #include "KeccakP-1600-unrolling.macros"
50 #include "SnP-Relaned.h"
51 
52 static const UINT64 KeccakF1600RoundConstants[24] = {
53     0x0000000000000001ULL,
54     0x0000000000008082ULL,
55     0x800000000000808aULL,
56     0x8000000080008000ULL,
57     0x000000000000808bULL,
58     0x0000000080000001ULL,
59     0x8000000080008081ULL,
60     0x8000000000008009ULL,
61     0x000000000000008aULL,
62     0x0000000000000088ULL,
63     0x0000000080008009ULL,
64     0x000000008000000aULL,
65     0x000000008000808bULL,
66     0x800000000000008bULL,
67     0x8000000000008089ULL,
68     0x8000000000008003ULL,
69     0x8000000000008002ULL,
70     0x8000000000000080ULL,
71     0x000000000000800aULL,
72     0x800000008000000aULL,
73     0x8000000080008081ULL,
74     0x8000000000008080ULL,
75     0x0000000080000001ULL,
76     0x8000000080008008ULL };
77 
78 /* ---------------------------------------------------------------- */
79 
KeccakP1600_Initialize(void * state)80 void KeccakP1600_Initialize(void *state)
81 {
82     memset(state, 0, 200);
83 #ifdef KeccakP1600_useLaneComplementing
84     ((UINT64*)state)[ 1] = ~(UINT64)0;
85     ((UINT64*)state)[ 2] = ~(UINT64)0;
86     ((UINT64*)state)[ 8] = ~(UINT64)0;
87     ((UINT64*)state)[12] = ~(UINT64)0;
88     ((UINT64*)state)[17] = ~(UINT64)0;
89     ((UINT64*)state)[20] = ~(UINT64)0;
90 #endif
91 }
92 
93 /* ---------------------------------------------------------------- */
94 
KeccakP1600_AddBytesInLane(void * state,unsigned int lanePosition,const unsigned char * data,unsigned int offset,unsigned int length)95 void KeccakP1600_AddBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length)
96 {
97 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
98     UINT64 lane;
99     if (length == 0)
100         return;
101     if (length == 1)
102         lane = data[0];
103     else {
104         lane = 0;
105         memcpy(&lane, data, length);
106     }
107     lane <<= offset*8;
108 #else
109     UINT64 lane = 0;
110     unsigned int i;
111     for(i=0; i<length; i++)
112         lane |= ((UINT64)data[i]) << ((i+offset)*8);
113 #endif
114     ((UINT64*)state)[lanePosition] ^= lane;
115 }
116 
117 /* ---------------------------------------------------------------- */
118 
KeccakP1600_AddLanes(void * state,const unsigned char * data,unsigned int laneCount)119 void KeccakP1600_AddLanes(void *state, const unsigned char *data, unsigned int laneCount)
120 {
121 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
122     unsigned int i = 0;
123 #ifdef NO_MISALIGNED_ACCESSES
124     /* If either pointer is misaligned, fall back to byte-wise xor. */
125 
126     if (((((uintptr_t)state) & 7) != 0) || ((((uintptr_t)data) & 7) != 0)) {
127       for (i = 0; i < laneCount * 8; i++) {
128         ((unsigned char*)state)[i] ^= data[i];
129       }
130     }
131     else
132 #endif
133     {
134       /* Otherwise... */
135 
136       for( ; (i+8)<=laneCount; i+=8) {
137           ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0];
138           ((UINT64*)state)[i+1] ^= ((UINT64*)data)[i+1];
139           ((UINT64*)state)[i+2] ^= ((UINT64*)data)[i+2];
140           ((UINT64*)state)[i+3] ^= ((UINT64*)data)[i+3];
141           ((UINT64*)state)[i+4] ^= ((UINT64*)data)[i+4];
142           ((UINT64*)state)[i+5] ^= ((UINT64*)data)[i+5];
143           ((UINT64*)state)[i+6] ^= ((UINT64*)data)[i+6];
144           ((UINT64*)state)[i+7] ^= ((UINT64*)data)[i+7];
145       }
146       for( ; (i+4)<=laneCount; i+=4) {
147           ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0];
148           ((UINT64*)state)[i+1] ^= ((UINT64*)data)[i+1];
149           ((UINT64*)state)[i+2] ^= ((UINT64*)data)[i+2];
150           ((UINT64*)state)[i+3] ^= ((UINT64*)data)[i+3];
151       }
152       for( ; (i+2)<=laneCount; i+=2) {
153           ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0];
154           ((UINT64*)state)[i+1] ^= ((UINT64*)data)[i+1];
155       }
156       if (i<laneCount) {
157           ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0];
158       }
159     }
160 #else
161     unsigned int i;
162     UINT8 *curData = data;
163     for(i=0; i<laneCount; i++, curData+=8) {
164         UINT64 lane = (UINT64)curData[0]
165             | ((UINT64)curData[1] << 8)
166             | ((UINT64)curData[2] << 16)
167             | ((UINT64)curData[3] << 24)
168             | ((UINT64)curData[4] <<32)
169             | ((UINT64)curData[5] << 40)
170             | ((UINT64)curData[6] << 48)
171             | ((UINT64)curData[7] << 56);
172         ((UINT64*)state)[i] ^= lane;
173     }
174 #endif
175 }
176 
177 /* ---------------------------------------------------------------- */
178 
179 #if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN)
KeccakP1600_AddByte(void * state,unsigned char byte,unsigned int offset)180 void KeccakP1600_AddByte(void *state, unsigned char byte, unsigned int offset)
181 {
182     UINT64 lane = byte;
183     lane <<= (offset%8)*8;
184     ((UINT64*)state)[offset/8] ^= lane;
185 }
186 #endif
187 
188 /* ---------------------------------------------------------------- */
189 
KeccakP1600_AddBytes(void * state,const unsigned char * data,unsigned int offset,unsigned int length)190 void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
191 {
192     SnP_AddBytes(state, data, offset, length, KeccakP1600_AddLanes, KeccakP1600_AddBytesInLane, 8);
193 }
194 
195 /* ---------------------------------------------------------------- */
196 
KeccakP1600_OverwriteBytesInLane(void * state,unsigned int lanePosition,const unsigned char * data,unsigned int offset,unsigned int length)197 void KeccakP1600_OverwriteBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length)
198 {
199 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
200 #ifdef KeccakP1600_useLaneComplementing
201     if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) {
202         unsigned int i;
203         for(i=0; i<length; i++)
204             ((unsigned char*)state)[lanePosition*8+offset+i] = ~data[i];
205     }
206     else
207 #endif
208     {
209         memcpy((unsigned char*)state+lanePosition*8+offset, data, length);
210     }
211 #else
212 #error "Not yet implemented"
213 #endif
214 }
215 
216 /* ---------------------------------------------------------------- */
217 
KeccakP1600_OverwriteLanes(void * state,const unsigned char * data,unsigned int laneCount)218 void KeccakP1600_OverwriteLanes(void *state, const unsigned char *data, unsigned int laneCount)
219 {
220 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
221 #ifdef KeccakP1600_useLaneComplementing
222     unsigned int lanePosition;
223 
224     for(lanePosition=0; lanePosition<laneCount; lanePosition++)
225         if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
226             ((UINT64*)state)[lanePosition] = ~((const UINT64*)data)[lanePosition];
227         else
228             ((UINT64*)state)[lanePosition] = ((const UINT64*)data)[lanePosition];
229 #else
230     memcpy(state, data, laneCount*8);
231 #endif
232 #else
233 #error "Not yet implemented"
234 #endif
235 }
236 
237 /* ---------------------------------------------------------------- */
238 
KeccakP1600_OverwriteBytes(void * state,const unsigned char * data,unsigned int offset,unsigned int length)239 void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
240 {
241     SnP_OverwriteBytes(state, data, offset, length, KeccakP1600_OverwriteLanes, KeccakP1600_OverwriteBytesInLane, 8);
242 }
243 
244 /* ---------------------------------------------------------------- */
245 
KeccakP1600_OverwriteWithZeroes(void * state,unsigned int byteCount)246 void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount)
247 {
248 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
249 #ifdef KeccakP1600_useLaneComplementing
250     unsigned int lanePosition;
251 
252     for(lanePosition=0; lanePosition<byteCount/8; lanePosition++)
253         if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
254             ((UINT64*)state)[lanePosition] = ~0;
255         else
256             ((UINT64*)state)[lanePosition] = 0;
257     if (byteCount%8 != 0) {
258         lanePosition = byteCount/8;
259         if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
260             memset((unsigned char*)state+lanePosition*8, 0xFF, byteCount%8);
261         else
262             memset((unsigned char*)state+lanePosition*8, 0, byteCount%8);
263     }
264 #else
265     memset(state, 0, byteCount);
266 #endif
267 #else
268 #error "Not yet implemented"
269 #endif
270 }
271 
272 /* ---------------------------------------------------------------- */
273 
KeccakP1600_Permute_24rounds(void * state)274 void KeccakP1600_Permute_24rounds(void *state)
275 {
276     declareABCDE
277     #ifndef KeccakP1600_fullUnrolling
278     unsigned int i;
279     #endif
280     UINT64 *stateAsLanes = (UINT64*)state;
281 
282     copyFromState(A, stateAsLanes)
283     rounds24
284     copyToState(stateAsLanes, A)
285 }
286 
287 /* ---------------------------------------------------------------- */
288 
KeccakP1600_Permute_12rounds(void * state)289 void KeccakP1600_Permute_12rounds(void *state)
290 {
291     declareABCDE
292     #ifndef KeccakP1600_fullUnrolling
293     unsigned int i;
294     #endif
295     UINT64 *stateAsLanes = (UINT64*)state;
296 
297     copyFromState(A, stateAsLanes)
298     rounds12
299     copyToState(stateAsLanes, A)
300 }
301 
302 /* ---------------------------------------------------------------- */
303 
KeccakP1600_ExtractBytesInLane(const void * state,unsigned int lanePosition,unsigned char * data,unsigned int offset,unsigned int length)304 void KeccakP1600_ExtractBytesInLane(const void *state, unsigned int lanePosition, unsigned char *data, unsigned int offset, unsigned int length)
305 {
306     UINT64 lane = ((UINT64*)state)[lanePosition];
307 #ifdef KeccakP1600_useLaneComplementing
308     if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
309         lane = ~lane;
310 #endif
311 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
312     {
313         UINT64 lane1[1];
314         lane1[0] = lane;
315         memcpy(data, (UINT8*)lane1+offset, length);
316     }
317 #else
318     unsigned int i;
319     lane >>= offset*8;
320     for(i=0; i<length; i++) {
321         data[i] = lane & 0xFF;
322         lane >>= 8;
323     }
324 #endif
325 }
326 
327 /* ---------------------------------------------------------------- */
328 
329 #if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN)
fromWordToBytes(UINT8 * bytes,const UINT64 word)330 void fromWordToBytes(UINT8 *bytes, const UINT64 word)
331 {
332     unsigned int i;
333 
334     for(i=0; i<(64/8); i++)
335         bytes[i] = (word >> (8*i)) & 0xFF;
336 }
337 #endif
338 
KeccakP1600_ExtractLanes(const void * state,unsigned char * data,unsigned int laneCount)339 void KeccakP1600_ExtractLanes(const void *state, unsigned char *data, unsigned int laneCount)
340 {
341 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
342     memcpy(data, state, laneCount*8);
343 #else
344     unsigned int i;
345 
346     for(i=0; i<laneCount; i++)
347         fromWordToBytes(data+(i*8), ((const UINT64*)state)[i]);
348 #endif
349 #ifdef KeccakP1600_useLaneComplementing
350     if (laneCount > 1) {
351         ((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1];
352         if (laneCount > 2) {
353             ((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2];
354             if (laneCount > 8) {
355                 ((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8];
356                 if (laneCount > 12) {
357                     ((UINT64*)data)[12] = ~((UINT64*)data)[12];
358                     if (laneCount > 17) {
359                         ((UINT64*)data)[17] = ~((UINT64*)data)[17];
360                         if (laneCount > 20) {
361                             ((UINT64*)data)[20] = ~((UINT64*)data)[20];
362                         }
363                     }
364                 }
365             }
366         }
367     }
368 #endif
369 }
370 
371 /* ---------------------------------------------------------------- */
372 
KeccakP1600_ExtractBytes(const void * state,unsigned char * data,unsigned int offset,unsigned int length)373 void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length)
374 {
375     SnP_ExtractBytes(state, data, offset, length, KeccakP1600_ExtractLanes, KeccakP1600_ExtractBytesInLane, 8);
376 }
377 
378 /* ---------------------------------------------------------------- */
379 
KeccakP1600_ExtractAndAddBytesInLane(const void * state,unsigned int lanePosition,const unsigned char * input,unsigned char * output,unsigned int offset,unsigned int length)380 void KeccakP1600_ExtractAndAddBytesInLane(const void *state, unsigned int lanePosition, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
381 {
382     UINT64 lane = ((UINT64*)state)[lanePosition];
383 #ifdef KeccakP1600_useLaneComplementing
384     if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
385         lane = ~lane;
386 #endif
387 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
388     {
389         unsigned int i;
390         UINT64 lane1[1];
391         lane1[0] = lane;
392         for(i=0; i<length; i++)
393             output[i] = input[i] ^ ((UINT8*)lane1)[offset+i];
394     }
395 #else
396     unsigned int i;
397     lane >>= offset*8;
398     for(i=0; i<length; i++) {
399         output[i] = input[i] ^ (lane & 0xFF);
400         lane >>= 8;
401     }
402 #endif
403 }
404 
405 /* ---------------------------------------------------------------- */
406 
KeccakP1600_ExtractAndAddLanes(const void * state,const unsigned char * input,unsigned char * output,unsigned int laneCount)407 void KeccakP1600_ExtractAndAddLanes(const void *state, const unsigned char *input, unsigned char *output, unsigned int laneCount)
408 {
409     unsigned int i;
410 #if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN)
411     unsigned char temp[8];
412     unsigned int j;
413 #endif
414 
415     for(i=0; i<laneCount; i++) {
416 #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
417         ((UINT64*)output)[i] = ((UINT64*)input)[i] ^ ((const UINT64*)state)[i];
418 #else
419         fromWordToBytes(temp, ((const UINT64*)state)[i]);
420         for(j=0; j<8; j++)
421             output[i*8+j] = input[i*8+j] ^ temp[j];
422 #endif
423     }
424 #ifdef KeccakP1600_useLaneComplementing
425     if (laneCount > 1) {
426         ((UINT64*)output)[ 1] = ~((UINT64*)output)[ 1];
427         if (laneCount > 2) {
428             ((UINT64*)output)[ 2] = ~((UINT64*)output)[ 2];
429             if (laneCount > 8) {
430                 ((UINT64*)output)[ 8] = ~((UINT64*)output)[ 8];
431                 if (laneCount > 12) {
432                     ((UINT64*)output)[12] = ~((UINT64*)output)[12];
433                     if (laneCount > 17) {
434                         ((UINT64*)output)[17] = ~((UINT64*)output)[17];
435                         if (laneCount > 20) {
436                             ((UINT64*)output)[20] = ~((UINT64*)output)[20];
437                         }
438                     }
439                 }
440             }
441         }
442     }
443 #endif
444 }
445 
446 /* ---------------------------------------------------------------- */
447 
KeccakP1600_ExtractAndAddBytes(const void * state,const unsigned char * input,unsigned char * output,unsigned int offset,unsigned int length)448 void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
449 {
450     SnP_ExtractAndAddBytes(state, input, output, offset, length, KeccakP1600_ExtractAndAddLanes, KeccakP1600_ExtractAndAddBytesInLane, 8);
451 }
452 
453 /* ---------------------------------------------------------------- */
454 
KeccakF1600_FastLoop_Absorb(void * state,unsigned int laneCount,const unsigned char * data,size_t dataByteLen)455 size_t KeccakF1600_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen)
456 {
457     size_t originalDataByteLen = dataByteLen;
458     declareABCDE
459     #ifndef KeccakP1600_fullUnrolling
460     unsigned int i;
461     #endif
462     UINT64 *stateAsLanes = (UINT64*)state;
463     UINT64 *inDataAsLanes = (UINT64*)data;
464 
465     copyFromState(A, stateAsLanes)
466     while(dataByteLen >= laneCount*8) {
467         addInput(A, inDataAsLanes, laneCount)
468         rounds24
469         inDataAsLanes += laneCount;
470         dataByteLen -= laneCount*8;
471     }
472     copyToState(stateAsLanes, A)
473     return originalDataByteLen - dataByteLen;
474 }
475