1 /***********************************************************************
2 **
3 ** Implementation of the Skein block functions.
4 **
5 ** Source code author: Doug Whiting, 2008.
6 **
7 ** This algorithm and source code is released to the public domain.
8 **
9 ** Compile-time switches:
10 **
11 ** SKEIN_USE_ASM -- set bits (256/512/1024) to select which
12 ** versions use ASM code for block processing
13 ** [default: use C for all block sizes]
14 **
15 ************************************************************************/
16
17 #include <string.h>
18 #include <dieharder/skein.h>
19 #include <stdio.h>
20
21 /* 64-bit rotate left -- defined in skein_port.h as macro
22 u64b_t RotL_64(u64b_t x,uint_t N)
23 {
24 return (x << (N & 63)) | (x >> ((64-N) & 63));
25 }
26 */
27
28 #define BLK_BITS (WCNT*64)
29
30 /* macro to perform a key injection (same for all block sizes) */
31 #define InjectKey(r) \
32 for (i=0;i < WCNT;i++) \
33 X[i] += ks[((r)+i) % (WCNT+1)]; \
34 X[WCNT-3] += ts[((r)+0) % 3]; \
35 X[WCNT-2] += ts[((r)+1) % 3]; \
36 X[WCNT-1] += (r); /* avoid slide attacks */ \
37
38
39
Threefish_512_Process_Blocks(Threefish_512_Ctxt_t * ctx,const u08b_t * input,void * output,size_t blkCnt)40 void Threefish_512_Process_Blocks(Threefish_512_Ctxt_t *ctx, const u08b_t *input,
41 void *output, size_t blkCnt) {
42 enum { WCNT = SKEIN_512_STATE_WORDS };
43
44 size_t i,r;
45 u64b_t ts[3]; /* key schedule: tweak */
46 u64b_t ks[WCNT+1]; /* key schedule: chaining vars */
47 u64b_t *X = (void *) output ; /* local copy of vars */
48 u64b_t w [WCNT]; /* local copy of input block */
49
50 Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */
51 /* precompute the key schedule for this block */
52 ks[WCNT] = SKEIN_KS_PARITY;
53 for (i=0;i < WCNT; i++) {
54 ks[i] = ctx->Key[i];
55 ks[WCNT] ^= ctx->Key[i]; /* compute overall parity */
56 }
57 ts[0] = ctx->T[0]; /* Tweak words */
58 ts[1] = ctx->T[1];
59 ts[2] = ts[0] ^ ts[1];
60
61 do {
62 Skein_Get64_LSB_First(w,input,WCNT); /* get input block in little-endian format */
63 for (i=0;i < WCNT; i++) { /* do the first full key injection */
64 X[i] = w[i] + ks[i];
65 }
66 X[WCNT-3] += ts[0];
67 X[WCNT-2] += ts[1];
68
69
70 for (r=1;r <= SKEIN_512_ROUNDS_TOTAL/8; r++) { /* unroll 8 rounds */
71 X[0] += X[1]; X[1] = RotL_64(X[1],R_512_0_0); X[1] ^= X[0];
72 X[2] += X[3]; X[3] = RotL_64(X[3],R_512_0_1); X[3] ^= X[2];
73 X[4] += X[5]; X[5] = RotL_64(X[5],R_512_0_2); X[5] ^= X[4];
74 X[6] += X[7]; X[7] = RotL_64(X[7],R_512_0_3); X[7] ^= X[6];
75
76 X[2] += X[1]; X[1] = RotL_64(X[1],R_512_1_0); X[1] ^= X[2];
77 X[4] += X[7]; X[7] = RotL_64(X[7],R_512_1_1); X[7] ^= X[4];
78 X[6] += X[5]; X[5] = RotL_64(X[5],R_512_1_2); X[5] ^= X[6];
79 X[0] += X[3]; X[3] = RotL_64(X[3],R_512_1_3); X[3] ^= X[0];
80
81 X[4] += X[1]; X[1] = RotL_64(X[1],R_512_2_0); X[1] ^= X[4];
82 X[6] += X[3]; X[3] = RotL_64(X[3],R_512_2_1); X[3] ^= X[6];
83 X[0] += X[5]; X[5] = RotL_64(X[5],R_512_2_2); X[5] ^= X[0];
84 X[2] += X[7]; X[7] = RotL_64(X[7],R_512_2_3); X[7] ^= X[2];
85
86 X[6] += X[1]; X[1] = RotL_64(X[1],R_512_3_0); X[1] ^= X[6];
87 X[0] += X[7]; X[7] = RotL_64(X[7],R_512_3_1); X[7] ^= X[0];
88 X[2] += X[5]; X[5] = RotL_64(X[5],R_512_3_2); X[5] ^= X[2];
89 X[4] += X[3]; X[3] = RotL_64(X[3],R_512_3_3); X[3] ^= X[4];
90 InjectKey(2*r-1);
91
92 X[0] += X[1]; X[1] = RotL_64(X[1],R_512_4_0); X[1] ^= X[0];
93 X[2] += X[3]; X[3] = RotL_64(X[3],R_512_4_1); X[3] ^= X[2];
94 X[4] += X[5]; X[5] = RotL_64(X[5],R_512_4_2); X[5] ^= X[4];
95 X[6] += X[7]; X[7] = RotL_64(X[7],R_512_4_3); X[7] ^= X[6];
96
97 X[2] += X[1]; X[1] = RotL_64(X[1],R_512_5_0); X[1] ^= X[2];
98 X[4] += X[7]; X[7] = RotL_64(X[7],R_512_5_1); X[7] ^= X[4];
99 X[6] += X[5]; X[5] = RotL_64(X[5],R_512_5_2); X[5] ^= X[6];
100 X[0] += X[3]; X[3] = RotL_64(X[3],R_512_5_3); X[3] ^= X[0];
101
102 X[4] += X[1]; X[1] = RotL_64(X[1],R_512_6_0); X[1] ^= X[4];
103 X[6] += X[3]; X[3] = RotL_64(X[3],R_512_6_1); X[3] ^= X[6];
104 X[0] += X[5]; X[5] = RotL_64(X[5],R_512_6_2); X[5] ^= X[0];
105 X[2] += X[7]; X[7] = RotL_64(X[7],R_512_6_3); X[7] ^= X[2];
106
107 X[6] += X[1]; X[1] = RotL_64(X[1],R_512_7_0); X[1] ^= X[6];
108 X[0] += X[7]; X[7] = RotL_64(X[7],R_512_7_1); X[7] ^= X[0];
109 X[2] += X[5]; X[5] = RotL_64(X[5],R_512_7_2); X[5] ^= X[2];
110 X[4] += X[3]; X[3] = RotL_64(X[3],R_512_7_3); X[3] ^= X[4];
111 InjectKey(2*r);
112 }
113 /* DON'T do the final "feedforward" xor, update context chaining vars */
114 // for (i=0;i < WCNT;i++)
115 // ctx->X[i] = X[i] ^ w[i];
116
117 // Skein_Clear_First_Flag(ctx->h); /* clear the start bit */
118 input += SKEIN_512_BLOCK_BYTES;
119 X += WCNT;
120 }
121 while (--blkCnt);
122 }
123
124 #ifdef TSC_TIME
125 typedef unsigned long long uint64;
126
127 #if 0
128 uint64 timingAdjust = 200;
129
130 uint64 time() {
131 volatile uint64 temp = 4;
132 __asm__ __volatile__ (
133 "cpuid\n\t"
134 "rdtsc\n\t"
135 "leaq %0, %%rcx\n\t"
136 "movl %%eax, (%%rcx)\n\t"
137 "movl %%edx, 4(%%rcx)\n\t"
138 : : "m" (temp) : "%eax", "%ebx", "%rcx", "%edx");
139 return temp - timingAdjust;
140 }
141
142 #endif
143 #endif
144
145