1 /***********************************************************************
2 **
3 ** Implementation of the Skein block functions.
4 **
5 ** Source code author: Doug Whiting, 2008.
6 **
7 ** This algorithm and source code is released to the public domain.
8 **
9 ** Compile-time switches:
10 **
11 **  SKEIN_USE_ASM             -- set bits (256/512/1024) to select which
12 **                               versions use ASM code for block processing
13 **                               [default: use C for all block sizes]
14 **
15 ************************************************************************/
16 
17 #include <string.h>
18 #include <dieharder/skein.h>
19 #include <stdio.h>
20 
21 /* 64-bit rotate left --  defined in skein_port.h as macro
22 u64b_t RotL_64(u64b_t x,uint_t N)
23     {
24     return (x << (N & 63)) | (x >> ((64-N) & 63));
25     }
26 */
27 
28 #define BLK_BITS    (WCNT*64)
29 
30 /* macro to perform a key injection (same for all block sizes) */
31 #define InjectKey(r)                                                \
32     for (i=0;i < WCNT;i++)                                          \
33          X[i] += ks[((r)+i) % (WCNT+1)];                            \
34     X[WCNT-3] += ts[((r)+0) % 3];                                   \
35     X[WCNT-2] += ts[((r)+1) % 3];                                   \
36     X[WCNT-1] += (r);                    /* avoid slide attacks */  \
37 
38 
39 
Threefish_512_Process_Blocks(Threefish_512_Ctxt_t * ctx,const u08b_t * input,void * output,size_t blkCnt)40 void Threefish_512_Process_Blocks(Threefish_512_Ctxt_t *ctx, const u08b_t *input,
41 		void *output, size_t blkCnt) {
42 	enum { WCNT = SKEIN_512_STATE_WORDS };
43 
44 	size_t  i,r;
45 	u64b_t  ts[3];                            /* key schedule: tweak */
46 	u64b_t  ks[WCNT+1];                       /* key schedule: chaining vars */
47 	u64b_t  *X = (void *) output ;            /* local copy of vars */
48 	u64b_t  w [WCNT];                         /* local copy of input block */
49 
50 	Skein_assert(blkCnt != 0);                /* never call with blkCnt == 0! */
51 	/* precompute the key schedule for this block */
52 	ks[WCNT] = SKEIN_KS_PARITY;
53 	for (i=0;i < WCNT; i++) {
54 		ks[i]     = ctx->Key[i];
55 		ks[WCNT] ^= ctx->Key[i];            /* compute overall parity */
56 	}
57 	ts[0] = ctx->T[0];	/* Tweak words */
58 	ts[1] = ctx->T[1];
59 	ts[2] = ts[0] ^ ts[1];
60 
61 	do  {
62 		Skein_Get64_LSB_First(w,input,WCNT); /* get input block in little-endian format */
63 		for (i=0;i < WCNT; i++) {             /* do the first full key injection */
64 			X[i]  = w[i] + ks[i];
65 		}
66 		X[WCNT-3] += ts[0];
67 		X[WCNT-2] += ts[1];
68 
69 
70 		for (r=1;r <= SKEIN_512_ROUNDS_TOTAL/8; r++) { /* unroll 8 rounds */
71 			X[0] += X[1]; X[1] = RotL_64(X[1],R_512_0_0); X[1] ^= X[0];
72 			X[2] += X[3]; X[3] = RotL_64(X[3],R_512_0_1); X[3] ^= X[2];
73 			X[4] += X[5]; X[5] = RotL_64(X[5],R_512_0_2); X[5] ^= X[4];
74 			X[6] += X[7]; X[7] = RotL_64(X[7],R_512_0_3); X[7] ^= X[6];
75 
76 			X[2] += X[1]; X[1] = RotL_64(X[1],R_512_1_0); X[1] ^= X[2];
77 			X[4] += X[7]; X[7] = RotL_64(X[7],R_512_1_1); X[7] ^= X[4];
78 			X[6] += X[5]; X[5] = RotL_64(X[5],R_512_1_2); X[5] ^= X[6];
79 			X[0] += X[3]; X[3] = RotL_64(X[3],R_512_1_3); X[3] ^= X[0];
80 
81 			X[4] += X[1]; X[1] = RotL_64(X[1],R_512_2_0); X[1] ^= X[4];
82 			X[6] += X[3]; X[3] = RotL_64(X[3],R_512_2_1); X[3] ^= X[6];
83 			X[0] += X[5]; X[5] = RotL_64(X[5],R_512_2_2); X[5] ^= X[0];
84 			X[2] += X[7]; X[7] = RotL_64(X[7],R_512_2_3); X[7] ^= X[2];
85 
86 			X[6] += X[1]; X[1] = RotL_64(X[1],R_512_3_0); X[1] ^= X[6];
87 			X[0] += X[7]; X[7] = RotL_64(X[7],R_512_3_1); X[7] ^= X[0];
88 			X[2] += X[5]; X[5] = RotL_64(X[5],R_512_3_2); X[5] ^= X[2];
89 			X[4] += X[3]; X[3] = RotL_64(X[3],R_512_3_3); X[3] ^= X[4];
90 			InjectKey(2*r-1);
91 
92 			X[0] += X[1]; X[1] = RotL_64(X[1],R_512_4_0); X[1] ^= X[0];
93 			X[2] += X[3]; X[3] = RotL_64(X[3],R_512_4_1); X[3] ^= X[2];
94 			X[4] += X[5]; X[5] = RotL_64(X[5],R_512_4_2); X[5] ^= X[4];
95 			X[6] += X[7]; X[7] = RotL_64(X[7],R_512_4_3); X[7] ^= X[6];
96 
97 			X[2] += X[1]; X[1] = RotL_64(X[1],R_512_5_0); X[1] ^= X[2];
98 			X[4] += X[7]; X[7] = RotL_64(X[7],R_512_5_1); X[7] ^= X[4];
99 			X[6] += X[5]; X[5] = RotL_64(X[5],R_512_5_2); X[5] ^= X[6];
100 			X[0] += X[3]; X[3] = RotL_64(X[3],R_512_5_3); X[3] ^= X[0];
101 
102 			X[4] += X[1]; X[1] = RotL_64(X[1],R_512_6_0); X[1] ^= X[4];
103 			X[6] += X[3]; X[3] = RotL_64(X[3],R_512_6_1); X[3] ^= X[6];
104 			X[0] += X[5]; X[5] = RotL_64(X[5],R_512_6_2); X[5] ^= X[0];
105 			X[2] += X[7]; X[7] = RotL_64(X[7],R_512_6_3); X[7] ^= X[2];
106 
107 			X[6] += X[1]; X[1] = RotL_64(X[1],R_512_7_0); X[1] ^= X[6];
108 			X[0] += X[7]; X[7] = RotL_64(X[7],R_512_7_1); X[7] ^= X[0];
109 			X[2] += X[5]; X[5] = RotL_64(X[5],R_512_7_2); X[5] ^= X[2];
110 			X[4] += X[3]; X[3] = RotL_64(X[3],R_512_7_3); X[3] ^= X[4];
111 			InjectKey(2*r);
112 		}
113 		/* DON'T do the final "feedforward" xor, update context chaining vars */
114 		//		for (i=0;i < WCNT;i++)
115 		//			ctx->X[i] = X[i] ^ w[i];
116 
117 		//		Skein_Clear_First_Flag(ctx->h);		/* clear the start bit */
118 		input += SKEIN_512_BLOCK_BYTES;
119 		X += WCNT;
120 	}
121 	while (--blkCnt);
122 }
123 
124 #ifdef TSC_TIME
125 typedef unsigned long long uint64;
126 
127 #if 0
128 uint64 timingAdjust = 200;
129 
130 uint64 time() {
131 	volatile uint64 temp = 4;
132 	__asm__ __volatile__ (
133 			"cpuid\n\t"
134 			"rdtsc\n\t"
135 			"leaq %0, %%rcx\n\t"
136 			"movl %%eax, (%%rcx)\n\t"
137 			"movl %%edx, 4(%%rcx)\n\t"
138 			: : "m" (temp) : "%eax", "%ebx", "%rcx", "%edx");
139 	return temp - timingAdjust;
140 }
141 
142 #endif
143 #endif
144 
145