1 /***********************************************************************
2 **
3 ** Implementation of the Skein block functions.
4 **
5 ** Source code author: Doug Whiting, 2008.
6 **
7 ** This algorithm and source code is released to the public domain.
8 **
9 ** Compile-time switches:
10 **
11 **  SKEIN_USE_ASM  -- set bits (256/512/1024) to select which
12 **                    versions use ASM code for block processing
13 **                    [default: use C for all block sizes]
14 **
15 ************************************************************************/
16 
17 #include <string.h>
18 #include <dieharder/skein.h>
19 
20 #ifndef SKEIN_USE_ASM
21 #define SKEIN_USE_ASM   (0)                     /* default is all C code (no ASM) */
22 #endif
23 
24 #ifndef SKEIN_LOOP
25 #define SKEIN_LOOP 001                          /* default: unroll 256 and 512, but not 1024 */
26 #endif
27 
28 #define BLK_BITS        (WCNT*64)               /* some useful definitions for code here */
29 #define KW_TWK_BASE     (0)
30 #define KW_KEY_BASE     (3)
31 #define ks              (kw + KW_KEY_BASE)
32 #define ts              (kw + KW_TWK_BASE)
33 
34 #ifdef SKEIN_DEBUG
35 #define DebugSaveTweak(ctx) { ctx->h.T[0] = ts[0]; ctx->h.T[1] = ts[1]; }
36 #else
37 #define DebugSaveTweak(ctx)
38 #endif
39 
40 
41 /*****************************  Skein_512 ******************************/
42 #if !(SKEIN_USE_ASM & 512)
Threefish_512_Process_Blocks64(Threefish_512_Ctxt_t * ctx,const u08b_t * input,void * output,size_t blkCnt)43 void Threefish_512_Process_Blocks64(Threefish_512_Ctxt_t *ctx, const u08b_t *input,
44 		        void *output, size_t blkCnt) {
45     enum { WCNT = SKEIN_512_STATE_WORDS };
46 #undef  RCNT
47 #define RCNT  (SKEIN_512_ROUNDS_TOTAL/8)
48 
49 #ifdef  SKEIN_LOOP                              /* configure how much to unroll the loop */
50 #define SKEIN_UNROLL_512 (((SKEIN_LOOP)/10)%10)
51 #else
52 #define SKEIN_UNROLL_512 (0)
53 #endif
54 
55 #if SKEIN_UNROLL_512
56 #if (RCNT % SKEIN_UNROLL_512)
57 #error "Invalid SKEIN_UNROLL_512"               /* sanity check on unroll count */
58 #endif
59     size_t  r;
60     u64b_t  kw[WCNT+4+RCNT*2];                  /* key schedule words : chaining vars + tweak + "rotation"*/
61 #else
62     u64b_t  kw[WCNT+4];                         /* key schedule words : chaining vars + tweak */
63 #endif
64     u64b_t  X0,X1,X2,X3,X4,X5,X6,X7;            /* local copy of vars, for speed */
65     u64b_t  w [WCNT];                           /* local copy of input block */
66 #ifdef SKEIN_DEBUG
67     const u64b_t *Xptr[8];                      /* use for debugging (help compiler put Xn in registers) */
68     Xptr[0] = &X0;  Xptr[1] = &X1;  Xptr[2] = &X2;  Xptr[3] = &X3;
69     Xptr[4] = &X4;  Xptr[5] = &X5;  Xptr[6] = &X6;  Xptr[7] = &X7;
70 #endif
71 
72     Skein_assert(blkCnt != 0);                  /* never call with blkCnt == 0! */
73 	ts[0] = ctx->T[0];
74 	ts[1] = ctx->T[1];
75 
76 	/* precompute the key schedule for this block */
77 	ks[0] = ctx->Key[0];
78 	ks[1] = ctx->Key[1];
79 	ks[2] = ctx->Key[2];
80 	ks[3] = ctx->Key[3];
81 	ks[4] = ctx->Key[4];
82 	ks[5] = ctx->Key[5];
83 	ks[6] = ctx->Key[6];
84 	ks[7] = ctx->Key[7];
85 	ks[8] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^
86 			ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^ SKEIN_KS_PARITY;
87 
88 	ts[2] = ts[0] ^ ts[1];
89 
90 	do  {
91 		Skein_Get64_LSB_First(w,input,WCNT); /* get input block in little-endian format */
92 
93         X0   = w[0] + ks[0];                    /* do the first full key injection */
94         X1   = w[1] + ks[1];
95         X2   = w[2] + ks[2];
96         X3   = w[3] + ks[3];
97         X4   = w[4] + ks[4];
98         X5   = w[5] + ks[5] + ts[0];
99         X6   = w[6] + ks[6] + ts[1];
100         X7   = w[7] + ks[7];
101 
102         input += SKEIN_512_BLOCK_BYTES;
103 
104         Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INITIAL,Xptr);
105         /* run the rounds */
106 #define Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum)                  \
107     X##p0 += X##p1; X##p1 = RotL_64(X##p1,ROT##_0); X##p1 ^= X##p0; \
108     X##p2 += X##p3; X##p3 = RotL_64(X##p3,ROT##_1); X##p3 ^= X##p2; \
109     X##p4 += X##p5; X##p5 = RotL_64(X##p5,ROT##_2); X##p5 ^= X##p4; \
110     X##p6 += X##p7; X##p7 = RotL_64(X##p7,ROT##_3); X##p7 ^= X##p6; \
111 
112 #if SKEIN_UNROLL_512 == 0
113 #define R512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum)      /* unrolled */  \
114     Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum)                      \
115     Skein_Show_R_Ptr(BLK_BITS,&ctx->h,rNum,Xptr);
116 
117 #define I512(R)                                                     \
118     X0   += ks[((R)+1) % 9];   /* inject the key schedule value */  \
119     X1   += ks[((R)+2) % 9];                                        \
120     X2   += ks[((R)+3) % 9];                                        \
121     X3   += ks[((R)+4) % 9];                                        \
122     X4   += ks[((R)+5) % 9];                                        \
123     X5   += ks[((R)+6) % 9] + ts[((R)+1) % 3];                      \
124     X6   += ks[((R)+7) % 9] + ts[((R)+2) % 3];                      \
125     X7   += ks[((R)+8) % 9] +     (R)+1;                            \
126     Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr);
127 #else                                       /* looping version */
128 #define R512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum)                      \
129     Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum)                      \
130     Skein_Show_R_Ptr(BLK_BITS,&ctx->h,4*(r-1)+rNum,Xptr);
131 
132 #define I512(R)                                                     \
133     X0   += ks[r+(R)+0];        /* inject the key schedule value */ \
134     X1   += ks[r+(R)+1];                                            \
135     X2   += ks[r+(R)+2];                                            \
136     X3   += ks[r+(R)+3];                                            \
137     X4   += ks[r+(R)+4];                                            \
138     X5   += ks[r+(R)+5] + ts[r+(R)+0];                              \
139     X6   += ks[r+(R)+6] + ts[r+(R)+1];                              \
140     X7   += ks[r+(R)+7] +    r+(R)   ;                              \
141     ks[r +       (R)+8] = ks[r+(R)-1];  /* rotate key schedule */   \
142     ts[r +       (R)+2] = ts[r+(R)-1];                              \
143     Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr);
144 
145     for (r=1;r < 2*RCNT;r+=2*SKEIN_UNROLL_512)   /* loop thru it */
146 #endif                         /* end of looped code definitions */
147         {
148 #define R512_8_rounds(R)  /* do 8 full rounds */  \
149         R512(0,1,2,3,4,5,6,7,R_512_0,8*(R)+ 1);   \
150         R512(2,1,4,7,6,5,0,3,R_512_1,8*(R)+ 2);   \
151         R512(4,1,6,3,0,5,2,7,R_512_2,8*(R)+ 3);   \
152         R512(6,1,0,7,2,5,4,3,R_512_3,8*(R)+ 4);   \
153         I512(2*(R));                              \
154         R512(0,1,2,3,4,5,6,7,R_512_4,8*(R)+ 5);   \
155         R512(2,1,4,7,6,5,0,3,R_512_5,8*(R)+ 6);   \
156         R512(4,1,6,3,0,5,2,7,R_512_6,8*(R)+ 7);   \
157         R512(6,1,0,7,2,5,4,3,R_512_7,8*(R)+ 8);   \
158         I512(2*(R)+1);        /* and key injection */
159 
160         R512_8_rounds( 0);
161 
162 #define R512_Unroll_R(NN) ((SKEIN_UNROLL_512 == 0 && SKEIN_512_ROUNDS_TOTAL/8 > (NN)) || (SKEIN_UNROLL_512 > (NN)))
163 
164   #if   R512_Unroll_R( 1)
165         R512_8_rounds( 1);
166   #endif
167   #if   R512_Unroll_R( 2)
168         R512_8_rounds( 2);
169   #endif
170   #if   R512_Unroll_R( 3)
171         R512_8_rounds( 3);
172   #endif
173   #if   R512_Unroll_R( 4)
174         R512_8_rounds( 4);
175   #endif
176   #if   R512_Unroll_R( 5)
177         R512_8_rounds( 5);
178   #endif
179   #if   R512_Unroll_R( 6)
180         R512_8_rounds( 6);
181   #endif
182   #if   R512_Unroll_R( 7)
183         R512_8_rounds( 7);
184   #endif
185   #if   R512_Unroll_R( 8)
186         R512_8_rounds( 8);
187   #endif
188   #if   R512_Unroll_R( 9)
189         R512_8_rounds( 9);
190   #endif
191   #if   R512_Unroll_R(10)
192         R512_8_rounds(10);
193   #endif
194   #if   R512_Unroll_R(11)
195         R512_8_rounds(11);
196   #endif
197   #if   R512_Unroll_R(12)
198         R512_8_rounds(12);
199   #endif
200   #if   R512_Unroll_R(13)
201         R512_8_rounds(13);
202   #endif
203   #if   R512_Unroll_R(14)
204         R512_8_rounds(14);
205   #endif
206   #if  (SKEIN_UNROLL_512 > 14)
207 #error  "need more unrolling in Skein_512_Process_Block"
208   #endif
209         }
210 		((u64b_t *) output)[0] = X0;
211 		((u64b_t *) output)[1] = X1;
212 		((u64b_t *) output)[2] = X2;
213 		((u64b_t *) output)[3] = X3;
214 		((u64b_t *) output)[4] = X4;
215 		((u64b_t *) output)[5] = X5;
216 		((u64b_t *) output)[6] = X6;
217 		((u64b_t *) output)[7] = X7;
218 
219                 /*
220 		 * This is a silly fix, perhaps, BUT it shuts up the
221 		 * compiler warning about doing arithmetic with a void
222 		 * pointer.  I think it will do the same thing the commented
223 		 * line did, without the warning.
224 		 */
225 		/* output += SKEIN_512_BLOCK_BYTES; */
226 		unsigned long long int output_tmp = (unsigned long long int) output;
227 		output_tmp += SKEIN_512_BLOCK_BYTES;
228 		output = (void *) output_tmp;
229         } while (--blkCnt);
230     }
231 
232 #if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
Skein_512_Process_Block_CodeSize(void)233 size_t Skein_512_Process_Block_CodeSize(void)
234     {
235     return ((u08b_t *) Skein_512_Process_Block_CodeSize) -
236            ((u08b_t *) Skein_512_Process_Block);
237     }
Skein_512_Unroll_Cnt(void)238 uint_t Skein_512_Unroll_Cnt(void)
239     {
240     return SKEIN_UNROLL_512;
241     }
242 #endif
243 #endif
244