1 /* $Id: skein.c 254 2011-06-07 19:38:58Z tp $ */
2 /*
3 * Skein implementation.
4 *
5 * ==========================(LICENSE BEGIN)============================
6 *
7 * Copyright (c) 2007-2010 Projet RNRT SAPHIR
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining
10 * a copy of this software and associated documentation files (the
11 * "Software"), to deal in the Software without restriction, including
12 * without limitation the rights to use, copy, modify, merge, publish,
13 * distribute, sublicense, and/or sell copies of the Software, and to
14 * permit persons to whom the Software is furnished to do so, subject to
15 * the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be
18 * included in all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
24 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 *
28 * ===========================(LICENSE END)=============================
29 *
30 * @author Thomas Pornin <thomas.pornin@cryptolog.com>
31 */
32
33 #include <stddef.h>
34 #include <string.h>
35
36 #include "sph_skein.h"
37
38
39 #if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_SKEIN
40 #define SPH_SMALL_FOOTPRINT_SKEIN 1
41 #endif
42
43 #ifdef _MSC_VER
44 #pragma warning (disable: 4146)
45 #endif
46
47 #if SPH_64
48
49 #if 0
50 /* obsolete */
51 /*
52 * M5_ ## s ## _ ## i evaluates to s+i mod 5 (0 <= s <= 18, 0 <= i <= 3).
53 */
54
55 #define M5_0_0 0
56 #define M5_0_1 1
57 #define M5_0_2 2
58 #define M5_0_3 3
59
60 #define M5_1_0 1
61 #define M5_1_1 2
62 #define M5_1_2 3
63 #define M5_1_3 4
64
65 #define M5_2_0 2
66 #define M5_2_1 3
67 #define M5_2_2 4
68 #define M5_2_3 0
69
70 #define M5_3_0 3
71 #define M5_3_1 4
72 #define M5_3_2 0
73 #define M5_3_3 1
74
75 #define M5_4_0 4
76 #define M5_4_1 0
77 #define M5_4_2 1
78 #define M5_4_3 2
79
80 #define M5_5_0 0
81 #define M5_5_1 1
82 #define M5_5_2 2
83 #define M5_5_3 3
84
85 #define M5_6_0 1
86 #define M5_6_1 2
87 #define M5_6_2 3
88 #define M5_6_3 4
89
90 #define M5_7_0 2
91 #define M5_7_1 3
92 #define M5_7_2 4
93 #define M5_7_3 0
94
95 #define M5_8_0 3
96 #define M5_8_1 4
97 #define M5_8_2 0
98 #define M5_8_3 1
99
100 #define M5_9_0 4
101 #define M5_9_1 0
102 #define M5_9_2 1
103 #define M5_9_3 2
104
105 #define M5_10_0 0
106 #define M5_10_1 1
107 #define M5_10_2 2
108 #define M5_10_3 3
109
110 #define M5_11_0 1
111 #define M5_11_1 2
112 #define M5_11_2 3
113 #define M5_11_3 4
114
115 #define M5_12_0 2
116 #define M5_12_1 3
117 #define M5_12_2 4
118 #define M5_12_3 0
119
120 #define M5_13_0 3
121 #define M5_13_1 4
122 #define M5_13_2 0
123 #define M5_13_3 1
124
125 #define M5_14_0 4
126 #define M5_14_1 0
127 #define M5_14_2 1
128 #define M5_14_3 2
129
130 #define M5_15_0 0
131 #define M5_15_1 1
132 #define M5_15_2 2
133 #define M5_15_3 3
134
135 #define M5_16_0 1
136 #define M5_16_1 2
137 #define M5_16_2 3
138 #define M5_16_3 4
139
140 #define M5_17_0 2
141 #define M5_17_1 3
142 #define M5_17_2 4
143 #define M5_17_3 0
144
145 #define M5_18_0 3
146 #define M5_18_1 4
147 #define M5_18_2 0
148 #define M5_18_3 1
149 #endif
150
151 /*
152 * M9_ ## s ## _ ## i evaluates to s+i mod 9 (0 <= s <= 18, 0 <= i <= 7).
153 */
154
155 #define M9_0_0 0
156 #define M9_0_1 1
157 #define M9_0_2 2
158 #define M9_0_3 3
159 #define M9_0_4 4
160 #define M9_0_5 5
161 #define M9_0_6 6
162 #define M9_0_7 7
163
164 #define M9_1_0 1
165 #define M9_1_1 2
166 #define M9_1_2 3
167 #define M9_1_3 4
168 #define M9_1_4 5
169 #define M9_1_5 6
170 #define M9_1_6 7
171 #define M9_1_7 8
172
173 #define M9_2_0 2
174 #define M9_2_1 3
175 #define M9_2_2 4
176 #define M9_2_3 5
177 #define M9_2_4 6
178 #define M9_2_5 7
179 #define M9_2_6 8
180 #define M9_2_7 0
181
182 #define M9_3_0 3
183 #define M9_3_1 4
184 #define M9_3_2 5
185 #define M9_3_3 6
186 #define M9_3_4 7
187 #define M9_3_5 8
188 #define M9_3_6 0
189 #define M9_3_7 1
190
191 #define M9_4_0 4
192 #define M9_4_1 5
193 #define M9_4_2 6
194 #define M9_4_3 7
195 #define M9_4_4 8
196 #define M9_4_5 0
197 #define M9_4_6 1
198 #define M9_4_7 2
199
200 #define M9_5_0 5
201 #define M9_5_1 6
202 #define M9_5_2 7
203 #define M9_5_3 8
204 #define M9_5_4 0
205 #define M9_5_5 1
206 #define M9_5_6 2
207 #define M9_5_7 3
208
209 #define M9_6_0 6
210 #define M9_6_1 7
211 #define M9_6_2 8
212 #define M9_6_3 0
213 #define M9_6_4 1
214 #define M9_6_5 2
215 #define M9_6_6 3
216 #define M9_6_7 4
217
218 #define M9_7_0 7
219 #define M9_7_1 8
220 #define M9_7_2 0
221 #define M9_7_3 1
222 #define M9_7_4 2
223 #define M9_7_5 3
224 #define M9_7_6 4
225 #define M9_7_7 5
226
227 #define M9_8_0 8
228 #define M9_8_1 0
229 #define M9_8_2 1
230 #define M9_8_3 2
231 #define M9_8_4 3
232 #define M9_8_5 4
233 #define M9_8_6 5
234 #define M9_8_7 6
235
236 #define M9_9_0 0
237 #define M9_9_1 1
238 #define M9_9_2 2
239 #define M9_9_3 3
240 #define M9_9_4 4
241 #define M9_9_5 5
242 #define M9_9_6 6
243 #define M9_9_7 7
244
245 #define M9_10_0 1
246 #define M9_10_1 2
247 #define M9_10_2 3
248 #define M9_10_3 4
249 #define M9_10_4 5
250 #define M9_10_5 6
251 #define M9_10_6 7
252 #define M9_10_7 8
253
254 #define M9_11_0 2
255 #define M9_11_1 3
256 #define M9_11_2 4
257 #define M9_11_3 5
258 #define M9_11_4 6
259 #define M9_11_5 7
260 #define M9_11_6 8
261 #define M9_11_7 0
262
263 #define M9_12_0 3
264 #define M9_12_1 4
265 #define M9_12_2 5
266 #define M9_12_3 6
267 #define M9_12_4 7
268 #define M9_12_5 8
269 #define M9_12_6 0
270 #define M9_12_7 1
271
272 #define M9_13_0 4
273 #define M9_13_1 5
274 #define M9_13_2 6
275 #define M9_13_3 7
276 #define M9_13_4 8
277 #define M9_13_5 0
278 #define M9_13_6 1
279 #define M9_13_7 2
280
281 #define M9_14_0 5
282 #define M9_14_1 6
283 #define M9_14_2 7
284 #define M9_14_3 8
285 #define M9_14_4 0
286 #define M9_14_5 1
287 #define M9_14_6 2
288 #define M9_14_7 3
289
290 #define M9_15_0 6
291 #define M9_15_1 7
292 #define M9_15_2 8
293 #define M9_15_3 0
294 #define M9_15_4 1
295 #define M9_15_5 2
296 #define M9_15_6 3
297 #define M9_15_7 4
298
299 #define M9_16_0 7
300 #define M9_16_1 8
301 #define M9_16_2 0
302 #define M9_16_3 1
303 #define M9_16_4 2
304 #define M9_16_5 3
305 #define M9_16_6 4
306 #define M9_16_7 5
307
308 #define M9_17_0 8
309 #define M9_17_1 0
310 #define M9_17_2 1
311 #define M9_17_3 2
312 #define M9_17_4 3
313 #define M9_17_5 4
314 #define M9_17_6 5
315 #define M9_17_7 6
316
317 #define M9_18_0 0
318 #define M9_18_1 1
319 #define M9_18_2 2
320 #define M9_18_3 3
321 #define M9_18_4 4
322 #define M9_18_5 5
323 #define M9_18_6 6
324 #define M9_18_7 7
325
326 /*
327 * M3_ ## s ## _ ## i evaluates to s+i mod 3 (0 <= s <= 18, 0 <= i <= 1).
328 */
329
330 #define M3_0_0 0
331 #define M3_0_1 1
332 #define M3_1_0 1
333 #define M3_1_1 2
334 #define M3_2_0 2
335 #define M3_2_1 0
336 #define M3_3_0 0
337 #define M3_3_1 1
338 #define M3_4_0 1
339 #define M3_4_1 2
340 #define M3_5_0 2
341 #define M3_5_1 0
342 #define M3_6_0 0
343 #define M3_6_1 1
344 #define M3_7_0 1
345 #define M3_7_1 2
346 #define M3_8_0 2
347 #define M3_8_1 0
348 #define M3_9_0 0
349 #define M3_9_1 1
350 #define M3_10_0 1
351 #define M3_10_1 2
352 #define M3_11_0 2
353 #define M3_11_1 0
354 #define M3_12_0 0
355 #define M3_12_1 1
356 #define M3_13_0 1
357 #define M3_13_1 2
358 #define M3_14_0 2
359 #define M3_14_1 0
360 #define M3_15_0 0
361 #define M3_15_1 1
362 #define M3_16_0 1
363 #define M3_16_1 2
364 #define M3_17_0 2
365 #define M3_17_1 0
366 #define M3_18_0 0
367 #define M3_18_1 1
368
369 #define XCAT(x, y) XCAT_(x, y)
370 #define XCAT_(x, y) x ## y
371
372 #if 0
373 /* obsolete */
374 #define SKSI(k, s, i) XCAT(k, XCAT(XCAT(XCAT(M5_, s), _), i))
375 #define SKST(t, s, v) XCAT(t, XCAT(XCAT(XCAT(M3_, s), _), v))
376 #endif
377
378 #define SKBI(k, s, i) XCAT(k, XCAT(XCAT(XCAT(M9_, s), _), i))
379 #define SKBT(t, s, v) XCAT(t, XCAT(XCAT(XCAT(M3_, s), _), v))
380
381 #if 0
382 /* obsolete */
383 #define TFSMALL_KINIT(k0, k1, k2, k3, k4, t0, t1, t2) do { \
384 k4 = (k0 ^ k1) ^ (k2 ^ k3) ^ SPH_C64(0x1BD11BDAA9FC1A22); \
385 t2 = t0 ^ t1; \
386 } while (0)
387 #endif
388
389 #define TFBIG_KINIT(k0, k1, k2, k3, k4, k5, k6, k7, k8, t0, t1, t2) do { \
390 k8 = ((k0 ^ k1) ^ (k2 ^ k3)) ^ ((k4 ^ k5) ^ (k6 ^ k7)) \
391 ^ SPH_C64(0x1BD11BDAA9FC1A22); \
392 t2 = t0 ^ t1; \
393 } while (0)
394
395 #if 0
396 /* obsolete */
397 #define TFSMALL_ADDKEY(w0, w1, w2, w3, k, t, s) do { \
398 w0 = SPH_T64(w0 + SKSI(k, s, 0)); \
399 w1 = SPH_T64(w1 + SKSI(k, s, 1) + SKST(t, s, 0)); \
400 w2 = SPH_T64(w2 + SKSI(k, s, 2) + SKST(t, s, 1)); \
401 w3 = SPH_T64(w3 + SKSI(k, s, 3) + (sph_u64)s); \
402 } while (0)
403 #endif
404
405 #if SPH_SMALL_FOOTPRINT_SKEIN
406
407 #define TFBIG_ADDKEY(s, tt0, tt1) do { \
408 p0 = SPH_T64(p0 + h[s + 0]); \
409 p1 = SPH_T64(p1 + h[s + 1]); \
410 p2 = SPH_T64(p2 + h[s + 2]); \
411 p3 = SPH_T64(p3 + h[s + 3]); \
412 p4 = SPH_T64(p4 + h[s + 4]); \
413 p5 = SPH_T64(p5 + h[s + 5] + tt0); \
414 p6 = SPH_T64(p6 + h[s + 6] + tt1); \
415 p7 = SPH_T64(p7 + h[s + 7] + (sph_u64)s); \
416 } while (0)
417
418 #else
419
420 #define TFBIG_ADDKEY(w0, w1, w2, w3, w4, w5, w6, w7, k, t, s) do { \
421 w0 = SPH_T64(w0 + SKBI(k, s, 0)); \
422 w1 = SPH_T64(w1 + SKBI(k, s, 1)); \
423 w2 = SPH_T64(w2 + SKBI(k, s, 2)); \
424 w3 = SPH_T64(w3 + SKBI(k, s, 3)); \
425 w4 = SPH_T64(w4 + SKBI(k, s, 4)); \
426 w5 = SPH_T64(w5 + SKBI(k, s, 5) + SKBT(t, s, 0)); \
427 w6 = SPH_T64(w6 + SKBI(k, s, 6) + SKBT(t, s, 1)); \
428 w7 = SPH_T64(w7 + SKBI(k, s, 7) + (sph_u64)s); \
429 } while (0)
430
431 #endif
432
433 #if 0
434 /* obsolete */
435 #define TFSMALL_MIX(x0, x1, rc) do { \
436 x0 = SPH_T64(x0 + x1); \
437 x1 = SPH_ROTL64(x1, rc) ^ x0; \
438 } while (0)
439 #endif
440
441 #define TFBIG_MIX(x0, x1, rc) do { \
442 x0 = SPH_T64(x0 + x1); \
443 x1 = SPH_ROTL64(x1, rc) ^ x0; \
444 } while (0)
445
446 #if 0
447 /* obsolete */
448 #define TFSMALL_MIX4(w0, w1, w2, w3, rc0, rc1) do { \
449 TFSMALL_MIX(w0, w1, rc0); \
450 TFSMALL_MIX(w2, w3, rc1); \
451 } while (0)
452 #endif
453
454 #define TFBIG_MIX8(w0, w1, w2, w3, w4, w5, w6, w7, rc0, rc1, rc2, rc3) do { \
455 TFBIG_MIX(w0, w1, rc0); \
456 TFBIG_MIX(w2, w3, rc1); \
457 TFBIG_MIX(w4, w5, rc2); \
458 TFBIG_MIX(w6, w7, rc3); \
459 } while (0)
460
461 #if 0
462 /* obsolete */
463 #define TFSMALL_4e(s) do { \
464 TFSMALL_ADDKEY(p0, p1, p2, p3, h, t, s); \
465 TFSMALL_MIX4(p0, p1, p2, p3, 14, 16); \
466 TFSMALL_MIX4(p0, p3, p2, p1, 52, 57); \
467 TFSMALL_MIX4(p0, p1, p2, p3, 23, 40); \
468 TFSMALL_MIX4(p0, p3, p2, p1, 5, 37); \
469 } while (0)
470
471 #define TFSMALL_4o(s) do { \
472 TFSMALL_ADDKEY(p0, p1, p2, p3, h, t, s); \
473 TFSMALL_MIX4(p0, p1, p2, p3, 25, 33); \
474 TFSMALL_MIX4(p0, p3, p2, p1, 46, 12); \
475 TFSMALL_MIX4(p0, p1, p2, p3, 58, 22); \
476 TFSMALL_MIX4(p0, p3, p2, p1, 32, 32); \
477 } while (0)
478 #endif
479
480 #if SPH_SMALL_FOOTPRINT_SKEIN
481
482 #define TFBIG_4e(s) do { \
483 TFBIG_ADDKEY(s, t0, t1); \
484 TFBIG_MIX8(p0, p1, p2, p3, p4, p5, p6, p7, 46, 36, 19, 37); \
485 TFBIG_MIX8(p2, p1, p4, p7, p6, p5, p0, p3, 33, 27, 14, 42); \
486 TFBIG_MIX8(p4, p1, p6, p3, p0, p5, p2, p7, 17, 49, 36, 39); \
487 TFBIG_MIX8(p6, p1, p0, p7, p2, p5, p4, p3, 44, 9, 54, 56); \
488 } while (0)
489
490 #define TFBIG_4o(s) do { \
491 TFBIG_ADDKEY(s, t1, t2); \
492 TFBIG_MIX8(p0, p1, p2, p3, p4, p5, p6, p7, 39, 30, 34, 24); \
493 TFBIG_MIX8(p2, p1, p4, p7, p6, p5, p0, p3, 13, 50, 10, 17); \
494 TFBIG_MIX8(p4, p1, p6, p3, p0, p5, p2, p7, 25, 29, 39, 43); \
495 TFBIG_MIX8(p6, p1, p0, p7, p2, p5, p4, p3, 8, 35, 56, 22); \
496 } while (0)
497
498 #else
499
500 #define TFBIG_4e(s) do { \
501 TFBIG_ADDKEY(p0, p1, p2, p3, p4, p5, p6, p7, h, t, s); \
502 TFBIG_MIX8(p0, p1, p2, p3, p4, p5, p6, p7, 46, 36, 19, 37); \
503 TFBIG_MIX8(p2, p1, p4, p7, p6, p5, p0, p3, 33, 27, 14, 42); \
504 TFBIG_MIX8(p4, p1, p6, p3, p0, p5, p2, p7, 17, 49, 36, 39); \
505 TFBIG_MIX8(p6, p1, p0, p7, p2, p5, p4, p3, 44, 9, 54, 56); \
506 } while (0)
507
508 #define TFBIG_4o(s) do { \
509 TFBIG_ADDKEY(p0, p1, p2, p3, p4, p5, p6, p7, h, t, s); \
510 TFBIG_MIX8(p0, p1, p2, p3, p4, p5, p6, p7, 39, 30, 34, 24); \
511 TFBIG_MIX8(p2, p1, p4, p7, p6, p5, p0, p3, 13, 50, 10, 17); \
512 TFBIG_MIX8(p4, p1, p6, p3, p0, p5, p2, p7, 25, 29, 39, 43); \
513 TFBIG_MIX8(p6, p1, p0, p7, p2, p5, p4, p3, 8, 35, 56, 22); \
514 } while (0)
515
516 #endif
517
518 #if 0
519 /* obsolete */
520 #define UBI_SMALL(etype, extra) do { \
521 sph_u64 h4, t0, t1, t2; \
522 sph_u64 m0 = sph_dec64le(buf + 0); \
523 sph_u64 m1 = sph_dec64le(buf + 8); \
524 sph_u64 m2 = sph_dec64le(buf + 16); \
525 sph_u64 m3 = sph_dec64le(buf + 24); \
526 sph_u64 p0 = m0; \
527 sph_u64 p1 = m1; \
528 sph_u64 p2 = m2; \
529 sph_u64 p3 = m3; \
530 t0 = SPH_T64(bcount << 5) + (sph_u64)(extra); \
531 t1 = (bcount >> 59) + ((sph_u64)(etype) << 55); \
532 TFSMALL_KINIT(h0, h1, h2, h3, h4, t0, t1, t2); \
533 TFSMALL_4e(0); \
534 TFSMALL_4o(1); \
535 TFSMALL_4e(2); \
536 TFSMALL_4o(3); \
537 TFSMALL_4e(4); \
538 TFSMALL_4o(5); \
539 TFSMALL_4e(6); \
540 TFSMALL_4o(7); \
541 TFSMALL_4e(8); \
542 TFSMALL_4o(9); \
543 TFSMALL_4e(10); \
544 TFSMALL_4o(11); \
545 TFSMALL_4e(12); \
546 TFSMALL_4o(13); \
547 TFSMALL_4e(14); \
548 TFSMALL_4o(15); \
549 TFSMALL_4e(16); \
550 TFSMALL_4o(17); \
551 TFSMALL_ADDKEY(p0, p1, p2, p3, h, t, 18); \
552 h0 = m0 ^ p0; \
553 h1 = m1 ^ p1; \
554 h2 = m2 ^ p2; \
555 h3 = m3 ^ p3; \
556 } while (0)
557 #endif
558
559 #if SPH_SMALL_FOOTPRINT_SKEIN
560
561 #define UBI_BIG(etype, extra) do { \
562 sph_u64 t0, t1, t2; \
563 unsigned u; \
564 sph_u64 m0 = sph_dec64le_aligned(buf + 0); \
565 sph_u64 m1 = sph_dec64le_aligned(buf + 8); \
566 sph_u64 m2 = sph_dec64le_aligned(buf + 16); \
567 sph_u64 m3 = sph_dec64le_aligned(buf + 24); \
568 sph_u64 m4 = sph_dec64le_aligned(buf + 32); \
569 sph_u64 m5 = sph_dec64le_aligned(buf + 40); \
570 sph_u64 m6 = sph_dec64le_aligned(buf + 48); \
571 sph_u64 m7 = sph_dec64le_aligned(buf + 56); \
572 sph_u64 p0 = m0; \
573 sph_u64 p1 = m1; \
574 sph_u64 p2 = m2; \
575 sph_u64 p3 = m3; \
576 sph_u64 p4 = m4; \
577 sph_u64 p5 = m5; \
578 sph_u64 p6 = m6; \
579 sph_u64 p7 = m7; \
580 t0 = SPH_T64(bcount << 6) + (sph_u64)(extra); \
581 t1 = (bcount >> 58) + ((sph_u64)(etype) << 55); \
582 TFBIG_KINIT(h[0], h[1], h[2], h[3], h[4], h[5], \
583 h[6], h[7], h[8], t0, t1, t2); \
584 for (u = 0; u <= 15; u += 3) { \
585 h[u + 9] = h[u + 0]; \
586 h[u + 10] = h[u + 1]; \
587 h[u + 11] = h[u + 2]; \
588 } \
589 for (u = 0; u < 9; u ++) { \
590 sph_u64 s = u << 1; \
591 sph_u64 tmp; \
592 TFBIG_4e(s); \
593 TFBIG_4o(s + 1); \
594 tmp = t2; \
595 t2 = t1; \
596 t1 = t0; \
597 t0 = tmp; \
598 } \
599 TFBIG_ADDKEY(18, t0, t1); \
600 h[0] = m0 ^ p0; \
601 h[1] = m1 ^ p1; \
602 h[2] = m2 ^ p2; \
603 h[3] = m3 ^ p3; \
604 h[4] = m4 ^ p4; \
605 h[5] = m5 ^ p5; \
606 h[6] = m6 ^ p6; \
607 h[7] = m7 ^ p7; \
608 } while (0)
609
610 #else
611
612 #define UBI_BIG(etype, extra) do { \
613 sph_u64 h8, t0, t1, t2; \
614 sph_u64 m0 = sph_dec64le_aligned(buf + 0); \
615 sph_u64 m1 = sph_dec64le_aligned(buf + 8); \
616 sph_u64 m2 = sph_dec64le_aligned(buf + 16); \
617 sph_u64 m3 = sph_dec64le_aligned(buf + 24); \
618 sph_u64 m4 = sph_dec64le_aligned(buf + 32); \
619 sph_u64 m5 = sph_dec64le_aligned(buf + 40); \
620 sph_u64 m6 = sph_dec64le_aligned(buf + 48); \
621 sph_u64 m7 = sph_dec64le_aligned(buf + 56); \
622 sph_u64 p0 = m0; \
623 sph_u64 p1 = m1; \
624 sph_u64 p2 = m2; \
625 sph_u64 p3 = m3; \
626 sph_u64 p4 = m4; \
627 sph_u64 p5 = m5; \
628 sph_u64 p6 = m6; \
629 sph_u64 p7 = m7; \
630 t0 = SPH_T64(bcount << 6) + (sph_u64)(extra); \
631 t1 = (bcount >> 58) + ((sph_u64)(etype) << 55); \
632 TFBIG_KINIT(h0, h1, h2, h3, h4, h5, h6, h7, h8, t0, t1, t2); \
633 TFBIG_4e(0); \
634 TFBIG_4o(1); \
635 TFBIG_4e(2); \
636 TFBIG_4o(3); \
637 TFBIG_4e(4); \
638 TFBIG_4o(5); \
639 TFBIG_4e(6); \
640 TFBIG_4o(7); \
641 TFBIG_4e(8); \
642 TFBIG_4o(9); \
643 TFBIG_4e(10); \
644 TFBIG_4o(11); \
645 TFBIG_4e(12); \
646 TFBIG_4o(13); \
647 TFBIG_4e(14); \
648 TFBIG_4o(15); \
649 TFBIG_4e(16); \
650 TFBIG_4o(17); \
651 TFBIG_ADDKEY(p0, p1, p2, p3, p4, p5, p6, p7, h, t, 18); \
652 h0 = m0 ^ p0; \
653 h1 = m1 ^ p1; \
654 h2 = m2 ^ p2; \
655 h3 = m3 ^ p3; \
656 h4 = m4 ^ p4; \
657 h5 = m5 ^ p5; \
658 h6 = m6 ^ p6; \
659 h7 = m7 ^ p7; \
660 } while (0)
661
662 #endif
663
664 #if 0
665 /* obsolete */
666 #define DECL_STATE_SMALL \
667 sph_u64 h0, h1, h2, h3; \
668 sph_u64 bcount;
669
670 #define READ_STATE_SMALL(sc) do { \
671 h0 = (sc)->h0; \
672 h1 = (sc)->h1; \
673 h2 = (sc)->h2; \
674 h3 = (sc)->h3; \
675 bcount = sc->bcount; \
676 } while (0)
677
678 #define WRITE_STATE_SMALL(sc) do { \
679 (sc)->h0 = h0; \
680 (sc)->h1 = h1; \
681 (sc)->h2 = h2; \
682 (sc)->h3 = h3; \
683 sc->bcount = bcount; \
684 } while (0)
685 #endif
686
687 #if SPH_SMALL_FOOTPRINT_SKEIN
688
689 #define DECL_STATE_BIG \
690 sph_u64 h[27]; \
691 sph_u64 bcount;
692
693 #define READ_STATE_BIG(sc) do { \
694 h[0] = (sc)->h0; \
695 h[1] = (sc)->h1; \
696 h[2] = (sc)->h2; \
697 h[3] = (sc)->h3; \
698 h[4] = (sc)->h4; \
699 h[5] = (sc)->h5; \
700 h[6] = (sc)->h6; \
701 h[7] = (sc)->h7; \
702 bcount = sc->bcount; \
703 } while (0)
704
705 #define WRITE_STATE_BIG(sc) do { \
706 (sc)->h0 = h[0]; \
707 (sc)->h1 = h[1]; \
708 (sc)->h2 = h[2]; \
709 (sc)->h3 = h[3]; \
710 (sc)->h4 = h[4]; \
711 (sc)->h5 = h[5]; \
712 (sc)->h6 = h[6]; \
713 (sc)->h7 = h[7]; \
714 sc->bcount = bcount; \
715 } while (0)
716
717 #else
718
719 #define DECL_STATE_BIG \
720 sph_u64 h0, h1, h2, h3, h4, h5, h6, h7; \
721 sph_u64 bcount;
722
723 #define READ_STATE_BIG(sc) do { \
724 h0 = (sc)->h0; \
725 h1 = (sc)->h1; \
726 h2 = (sc)->h2; \
727 h3 = (sc)->h3; \
728 h4 = (sc)->h4; \
729 h5 = (sc)->h5; \
730 h6 = (sc)->h6; \
731 h7 = (sc)->h7; \
732 bcount = sc->bcount; \
733 } while (0)
734
735 #define WRITE_STATE_BIG(sc) do { \
736 (sc)->h0 = h0; \
737 (sc)->h1 = h1; \
738 (sc)->h2 = h2; \
739 (sc)->h3 = h3; \
740 (sc)->h4 = h4; \
741 (sc)->h5 = h5; \
742 (sc)->h6 = h6; \
743 (sc)->h7 = h7; \
744 sc->bcount = bcount; \
745 } while (0)
746
747 #endif
748
749 #if 0
750 /* obsolete */
751 static void
752 skein_small_init(sph_skein_small_context *sc, const sph_u64 *iv)
753 {
754 sc->h0 = iv[0];
755 sc->h1 = iv[1];
756 sc->h2 = iv[2];
757 sc->h3 = iv[3];
758 sc->bcount = 0;
759 sc->ptr = 0;
760 }
761 #endif
762
763 static void
skein_big_init(sph_skein_big_context * sc,const sph_u64 * iv)764 skein_big_init(sph_skein_big_context *sc, const sph_u64 *iv)
765 {
766 sc->h0 = iv[0];
767 sc->h1 = iv[1];
768 sc->h2 = iv[2];
769 sc->h3 = iv[3];
770 sc->h4 = iv[4];
771 sc->h5 = iv[5];
772 sc->h6 = iv[6];
773 sc->h7 = iv[7];
774 sc->bcount = 0;
775 sc->ptr = 0;
776 }
777
778 #if 0
779 /* obsolete */
780 static void
781 skein_small_core(sph_skein_small_context *sc, const void *data, size_t len)
782 {
783 unsigned char *buf;
784 size_t ptr, clen;
785 unsigned first;
786 DECL_STATE_SMALL
787
788 buf = sc->buf;
789 ptr = sc->ptr;
790 clen = (sizeof sc->buf) - ptr;
791 if (len <= clen) {
792 memcpy(buf + ptr, data, len);
793 sc->ptr = ptr + len;
794 return;
795 }
796 if (clen != 0) {
797 memcpy(buf + ptr, data, clen);
798 data = (const unsigned char *)data + clen;
799 len -= clen;
800 }
801
802 #if SPH_SMALL_FOOTPRINT_SKEIN
803
804 READ_STATE_SMALL(sc);
805 first = (bcount == 0) << 7;
806 for (;;) {
807 bcount ++;
808 UBI_SMALL(96 + first, 0);
809 if (len <= sizeof sc->buf)
810 break;
811 first = 0;
812 memcpy(buf, data, sizeof sc->buf);
813 data = (const unsigned char *)data + sizeof sc->buf;
814 len -= sizeof sc->buf;
815 }
816 WRITE_STATE_SMALL(sc);
817 sc->ptr = len;
818 memcpy(buf, data, len);
819
820 #else
821
822 /*
823 * Unrolling the loop yields a slight performance boost, while
824 * keeping the code size around 24 kB on 32-bit x86.
825 */
826 READ_STATE_SMALL(sc);
827 first = (bcount == 0) << 7;
828 for (;;) {
829 bcount ++;
830 UBI_SMALL(96 + first, 0);
831 if (len <= sizeof sc->buf)
832 break;
833 buf = (unsigned char *)data;
834 bcount ++;
835 UBI_SMALL(96, 0);
836 if (len <= 2 * sizeof sc->buf) {
837 data = buf + sizeof sc->buf;
838 len -= sizeof sc->buf;
839 break;
840 }
841 buf += sizeof sc->buf;
842 data = buf + sizeof sc->buf;
843 first = 0;
844 len -= 2 * sizeof sc->buf;
845 }
846 WRITE_STATE_SMALL(sc);
847 sc->ptr = len;
848 memcpy(sc->buf, data, len);
849
850 #endif
851 }
852 #endif
853
854 static void
skein_big_core(sph_skein_big_context * sc,const void * data,size_t len)855 skein_big_core(sph_skein_big_context *sc, const void *data, size_t len)
856 {
857 /*
858 * The Skein "final bit" in the tweak is troublesome here,
859 * because if the input has a length which is a multiple of the
860 * block size (512 bits) then that bit must be set for the
861 * final block, which is full of message bits (padding in
862 * Skein can be reduced to no extra bit at all). However, this
863 * function cannot know whether it processes the last chunks of
864 * the message or not. Hence we may keep a full block of buffered
865 * data (64 bytes).
866 */
867 unsigned char *buf;
868 size_t ptr;
869 unsigned first;
870 DECL_STATE_BIG
871
872 buf = sc->buf;
873 ptr = sc->ptr;
874 if (len <= (sizeof sc->buf) - ptr) {
875 memcpy(buf + ptr, data, len);
876 ptr += len;
877 sc->ptr = ptr;
878 return;
879 }
880
881 READ_STATE_BIG(sc);
882 first = (bcount == 0) << 7;
883 do {
884 size_t clen;
885
886 if (ptr == sizeof sc->buf) {
887 bcount ++;
888 UBI_BIG(96 + first, 0);
889 first = 0;
890 ptr = 0;
891 }
892 clen = (sizeof sc->buf) - ptr;
893 if (clen > len)
894 clen = len;
895 memcpy(buf + ptr, data, clen);
896 ptr += clen;
897 data = (const unsigned char *)data + clen;
898 len -= clen;
899 } while (len > 0);
900 WRITE_STATE_BIG(sc);
901 sc->ptr = ptr;
902 }
903
904 #if 0
905 /* obsolete */
906 static void
907 skein_small_close(sph_skein_small_context *sc, unsigned ub, unsigned n,
908 void *dst, size_t out_len)
909 {
910 unsigned char *buf;
911 size_t ptr;
912 unsigned et;
913 int i;
914 DECL_STATE_SMALL
915
916 if (n != 0) {
917 unsigned z;
918 unsigned char x;
919
920 z = 0x80 >> n;
921 x = ((ub & -z) | z) & 0xFF;
922 skein_small_core(sc, &x, 1);
923 }
924
925 buf = sc->buf;
926 ptr = sc->ptr;
927 READ_STATE_SMALL(sc);
928 memset(buf + ptr, 0, (sizeof sc->buf) - ptr);
929 et = 352 + ((bcount == 0) << 7) + (n != 0);
930 for (i = 0; i < 2; i ++) {
931 UBI_SMALL(et, ptr);
932 if (i == 0) {
933 memset(buf, 0, sizeof sc->buf);
934 bcount = 0;
935 et = 510;
936 ptr = 8;
937 }
938 }
939
940 sph_enc64le_aligned(buf + 0, h0);
941 sph_enc64le_aligned(buf + 8, h1);
942 sph_enc64le_aligned(buf + 16, h2);
943 sph_enc64le_aligned(buf + 24, h3);
944 memcpy(dst, buf, out_len);
945 }
946 #endif
947
948 static void
skein_big_close(sph_skein_big_context * sc,unsigned ub,unsigned n,void * dst,size_t out_len)949 skein_big_close(sph_skein_big_context *sc, unsigned ub, unsigned n,
950 void *dst, size_t out_len)
951 {
952 unsigned char *buf;
953 size_t ptr;
954 unsigned et;
955 int i;
956 #if SPH_SMALL_FOOTPRINT_SKEIN
957 size_t u;
958 #endif
959 DECL_STATE_BIG
960
961 /*
962 * Add bit padding if necessary.
963 */
964 if (n != 0) {
965 unsigned z;
966 unsigned char x;
967
968 z = 0x80 >> n;
969 x = ((ub & -z) | z) & 0xFF;
970 skein_big_core(sc, &x, 1);
971 }
972
973 buf = sc->buf;
974 ptr = sc->ptr;
975
976 /*
977 * At that point, if ptr == 0, then the message was empty;
978 * otherwise, there is between 1 and 64 bytes (inclusive) which
979 * are yet to be processed. Either way, we complete the buffer
980 * to a full block with zeros (the Skein specification mandates
981 * that an empty message is padded so that there is at least
982 * one block to process).
983 *
984 * Once this block has been processed, we do it again, with
985 * a block full of zeros, for the output (that block contains
986 * the encoding of "0", over 8 bytes, then padded with zeros).
987 */
988 READ_STATE_BIG(sc);
989 memset(buf + ptr, 0, (sizeof sc->buf) - ptr);
990 et = 352 + ((bcount == 0) << 7) + (n != 0);
991 for (i = 0; i < 2; i ++) {
992 UBI_BIG(et, ptr);
993 if (i == 0) {
994 memset(buf, 0, sizeof sc->buf);
995 bcount = 0;
996 et = 510;
997 ptr = 8;
998 }
999 }
1000
1001 #if SPH_SMALL_FOOTPRINT_SKEIN
1002
1003 /*
1004 * We use a temporary buffer because we must support the case
1005 * where output size is not a multiple of 64 (namely, a 224-bit
1006 * output).
1007 */
1008 for (u = 0; u < out_len; u += 8)
1009 sph_enc64le_aligned(buf + u, h[u >> 3]);
1010 memcpy(dst, buf, out_len);
1011
1012 #else
1013
1014 sph_enc64le_aligned(buf + 0, h0);
1015 sph_enc64le_aligned(buf + 8, h1);
1016 sph_enc64le_aligned(buf + 16, h2);
1017 sph_enc64le_aligned(buf + 24, h3);
1018 sph_enc64le_aligned(buf + 32, h4);
1019 sph_enc64le_aligned(buf + 40, h5);
1020 sph_enc64le_aligned(buf + 48, h6);
1021 sph_enc64le_aligned(buf + 56, h7);
1022 memcpy(dst, buf, out_len);
1023
1024 #endif
1025 }
1026
1027 #if 0
1028 /* obsolete */
1029 static const sph_u64 IV224[] = {
1030 SPH_C64(0xC6098A8C9AE5EA0B), SPH_C64(0x876D568608C5191C),
1031 SPH_C64(0x99CB88D7D7F53884), SPH_C64(0x384BDDB1AEDDB5DE)
1032 };
1033
1034 static const sph_u64 IV256[] = {
1035 SPH_C64(0xFC9DA860D048B449), SPH_C64(0x2FCA66479FA7D833),
1036 SPH_C64(0xB33BC3896656840F), SPH_C64(0x6A54E920FDE8DA69)
1037 };
1038 #endif
1039
1040 static const sph_u64 IV224[] = {
1041 SPH_C64(0xCCD0616248677224), SPH_C64(0xCBA65CF3A92339EF),
1042 SPH_C64(0x8CCD69D652FF4B64), SPH_C64(0x398AED7B3AB890B4),
1043 SPH_C64(0x0F59D1B1457D2BD0), SPH_C64(0x6776FE6575D4EB3D),
1044 SPH_C64(0x99FBC70E997413E9), SPH_C64(0x9E2CFCCFE1C41EF7)
1045 };
1046
1047 static const sph_u64 IV256[] = {
1048 SPH_C64(0xCCD044A12FDB3E13), SPH_C64(0xE83590301A79A9EB),
1049 SPH_C64(0x55AEA0614F816E6F), SPH_C64(0x2A2767A4AE9B94DB),
1050 SPH_C64(0xEC06025E74DD7683), SPH_C64(0xE7A436CDC4746251),
1051 SPH_C64(0xC36FBAF9393AD185), SPH_C64(0x3EEDBA1833EDFC13)
1052 };
1053
1054 static const sph_u64 IV384[] = {
1055 SPH_C64(0xA3F6C6BF3A75EF5F), SPH_C64(0xB0FEF9CCFD84FAA4),
1056 SPH_C64(0x9D77DD663D770CFE), SPH_C64(0xD798CBF3B468FDDA),
1057 SPH_C64(0x1BC4A6668A0E4465), SPH_C64(0x7ED7D434E5807407),
1058 SPH_C64(0x548FC1ACD4EC44D6), SPH_C64(0x266E17546AA18FF8)
1059 };
1060
1061 static const sph_u64 IV512[] = {
1062 SPH_C64(0x4903ADFF749C51CE), SPH_C64(0x0D95DE399746DF03),
1063 SPH_C64(0x8FD1934127C79BCE), SPH_C64(0x9A255629FF352CB1),
1064 SPH_C64(0x5DB62599DF6CA7B0), SPH_C64(0xEABE394CA9D5C3F4),
1065 SPH_C64(0x991112C71A75B523), SPH_C64(0xAE18A40B660FCC33)
1066 };
1067
1068 #if 0
1069 /* obsolete */
1070 /* see sph_skein.h */
1071 void
1072 sph_skein224_init(void *cc)
1073 {
1074 skein_small_init(cc, IV224);
1075 }
1076
1077 /* see sph_skein.h */
1078 void
1079 sph_skein224(void *cc, const void *data, size_t len)
1080 {
1081 skein_small_core(cc, data, len);
1082 }
1083
1084 /* see sph_skein.h */
1085 void
1086 sph_skein224_close(void *cc, void *dst)
1087 {
1088 sph_skein224_addbits_and_close(cc, 0, 0, dst);
1089 }
1090
1091 /* see sph_skein.h */
1092 void
1093 sph_skein224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1094 {
1095 skein_small_close(cc, ub, n, dst, 28);
1096 sph_skein224_init(cc);
1097 }
1098
1099 /* see sph_skein.h */
1100 void
1101 sph_skein256_init(void *cc)
1102 {
1103 skein_small_init(cc, IV256);
1104 }
1105
1106 /* see sph_skein.h */
1107 void
1108 sph_skein256(void *cc, const void *data, size_t len)
1109 {
1110 skein_small_core(cc, data, len);
1111 }
1112
1113 /* see sph_skein.h */
1114 void
1115 sph_skein256_close(void *cc, void *dst)
1116 {
1117 sph_skein256_addbits_and_close(cc, 0, 0, dst);
1118 }
1119
1120 /* see sph_skein.h */
1121 void
1122 sph_skein256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1123 {
1124 skein_small_close(cc, ub, n, dst, 32);
1125 sph_skein256_init(cc);
1126 }
1127 #endif
1128
1129 /* see sph_skein.h */
1130 void
sph_skein224_init(void * cc)1131 sph_skein224_init(void *cc)
1132 {
1133 skein_big_init(cc, IV224);
1134 }
1135
1136 /* see sph_skein.h */
1137 void
sph_skein224(void * cc,const void * data,size_t len)1138 sph_skein224(void *cc, const void *data, size_t len)
1139 {
1140 skein_big_core(cc, data, len);
1141 }
1142
1143 /* see sph_skein.h */
1144 void
sph_skein224_close(void * cc,void * dst)1145 sph_skein224_close(void *cc, void *dst)
1146 {
1147 sph_skein224_addbits_and_close(cc, 0, 0, dst);
1148 }
1149
1150 /* see sph_skein.h */
1151 void
sph_skein224_addbits_and_close(void * cc,unsigned ub,unsigned n,void * dst)1152 sph_skein224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1153 {
1154 skein_big_close(cc, ub, n, dst, 28);
1155 sph_skein224_init(cc);
1156 }
1157
1158 /* see sph_skein.h */
1159 void
sph_skein256_init(void * cc)1160 sph_skein256_init(void *cc)
1161 {
1162 skein_big_init(cc, IV256);
1163 }
1164
1165 /* see sph_skein.h */
1166 void
sph_skein256(void * cc,const void * data,size_t len)1167 sph_skein256(void *cc, const void *data, size_t len)
1168 {
1169 skein_big_core(cc, data, len);
1170 }
1171
1172 /* see sph_skein.h */
1173 void
sph_skein256_close(void * cc,void * dst)1174 sph_skein256_close(void *cc, void *dst)
1175 {
1176 sph_skein256_addbits_and_close(cc, 0, 0, dst);
1177 }
1178
1179 /* see sph_skein.h */
1180 void
sph_skein256_addbits_and_close(void * cc,unsigned ub,unsigned n,void * dst)1181 sph_skein256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1182 {
1183 skein_big_close(cc, ub, n, dst, 32);
1184 sph_skein256_init(cc);
1185 }
1186
1187 /* see sph_skein.h */
1188 void
sph_skein384_init(void * cc)1189 sph_skein384_init(void *cc)
1190 {
1191 skein_big_init(cc, IV384);
1192 }
1193
1194 /* see sph_skein.h */
1195 void
sph_skein384(void * cc,const void * data,size_t len)1196 sph_skein384(void *cc, const void *data, size_t len)
1197 {
1198 skein_big_core(cc, data, len);
1199 }
1200
1201 /* see sph_skein.h */
1202 void
sph_skein384_close(void * cc,void * dst)1203 sph_skein384_close(void *cc, void *dst)
1204 {
1205 sph_skein384_addbits_and_close(cc, 0, 0, dst);
1206 }
1207
1208 /* see sph_skein.h */
1209 void
sph_skein384_addbits_and_close(void * cc,unsigned ub,unsigned n,void * dst)1210 sph_skein384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1211 {
1212 skein_big_close(cc, ub, n, dst, 48);
1213 sph_skein384_init(cc);
1214 }
1215
1216 /* see sph_skein.h */
1217 void
sph_skein512_init(void * cc)1218 sph_skein512_init(void *cc)
1219 {
1220 skein_big_init(cc, IV512);
1221 }
1222
1223 /* see sph_skein.h */
1224 void
sph_skein512(void * cc,const void * data,size_t len)1225 sph_skein512(void *cc, const void *data, size_t len)
1226 {
1227 skein_big_core(cc, data, len);
1228 }
1229
1230 /* see sph_skein.h */
1231 void
sph_skein512_close(void * cc,void * dst)1232 sph_skein512_close(void *cc, void *dst)
1233 {
1234 sph_skein512_addbits_and_close(cc, 0, 0, dst);
1235 }
1236
1237 /* see sph_skein.h */
1238 void
sph_skein512_addbits_and_close(void * cc,unsigned ub,unsigned n,void * dst)1239 sph_skein512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1240 {
1241 skein_big_close(cc, ub, n, dst, 64);
1242 sph_skein512_init(cc);
1243 }
1244
1245 #endif
1246