1 // cipher SPECK -- 128 bit block size -- 256 bit key size -- CTR mode
2 // taken from (and modified: removed pure crypto-stream generation and seperated key expansion)
3 // https://github.com/nsacyber/simon-speck-supercop/blob/master/crypto_stream/speck128256ctr/
4 
5 #include <stdlib.h>
6 #include "portable_endian.h"
7 
8 #include "speck.h"
9 
10 #if defined (__AVX2__)	// AVX support ----------------------------------------------------
11 
12 
13 #define LCS(x,r) (((x)<<r)|((x)>>(64-r)))
14 #define RCS(x,r) (((x)>>r)|((x)<<(64-r)))
15 
16 #define XOR _mm256_xor_si256
17 #define AND _mm256_and_si256
18 #define ADD _mm256_add_epi64
19 #define SL  _mm256_slli_epi64
20 #define SR  _mm256_srli_epi64
21 
22 #define _q SET(0x3,0x1,0x2,0x0)
23 #define _four SET(0x4,0x4,0x4,0x4)
24 
25 #define SET _mm256_set_epi64x
26 #define SET1(X,c) (X=SET(c,c,c,c))
27 #define SET4(X,c) (X=SET(c,c,c,c), X=ADD(X,_q))
28 
29 #define LOW  _mm256_unpacklo_epi64
30 #define HIGH _mm256_unpackhi_epi64
31 #define LD(ip) _mm256_loadu_si256((__m256i *)(ip))
32 #define ST(ip,X) _mm256_storeu_si256((__m256i *)(ip),X)
33 #define STORE(out,X,Y) (ST(out,LOW(Y,X)), ST(out+32,HIGH(Y,X)))
34 #define STORE_ALT(out,X,Y) (ST(out,LOW(X,Y)), ST(out+32,HIGH(X,Y)))
35 #define XOR_STORE(in,out,X,Y) (ST(out,XOR(LD(in),LOW(Y,X))), ST(out+32,XOR(LD(in+32),HIGH(Y,X))))
36 #define XOR_STORE_ALT(in,out,X,Y) (ST(out,XOR(LD(in),LOW(X,Y))), ST(out+32,XOR(LD(in+32),HIGH(X,Y))))
37 
38 #define SHFL _mm256_shuffle_epi8
39 #define R8 SET(0x080f0e0d0c0b0a09LL,0x0007060504030201LL,0x080f0e0d0c0b0a09LL,0x0007060504030201LL)
40 #define L8 SET(0x0e0d0c0b0a09080fLL,0x0605040302010007LL,0x0e0d0c0b0a09080fLL,0x0605040302010007LL)
41 #define ROL8(X)  (SHFL(X,L8))
42 #define ROR8(X)  (SHFL(X,R8))
43 #define ROL(X,r) (XOR(SL(X,r),SR(X,(64-r))))
44 #define ROR(X,r) (XOR(SR(X,r),SL(X,(64-r))))
45 
46 #define numrounds   34
47 #define numkeywords 4
48 
49 #define R(X,Y,k) (X=XOR(ADD(ROR8(X),Y),k), Y=XOR(ROL(Y,3),X))
50 
51 #define Rx4(X,Y,k)  (R(X[0],Y[0],k))
52 #define Rx8(X,Y,k)  (R(X[0],Y[0],k), R(X[1],Y[1],k))
53 #define Rx12(X,Y,k) (R(X[0],Y[0],k), R(X[1],Y[1],k), R(X[2],Y[2],k))
54 
55 #define Rx16(X,Y,k) (X[0]=ROR8(X[0]), X[0]=ADD(X[0],Y[0]), X[1]=ROR8(X[1]), X[1]=ADD(X[1],Y[1]), \
56 		     X[2]=ROR8(X[2]), X[2]=ADD(X[2],Y[2]), X[3]=ROR8(X[3]), X[3]=ADD(X[3],Y[3]), \
57 		     X[0]=XOR(X[0],k), X[1]=XOR(X[1],k), X[2]=XOR(X[2],k), X[3]=XOR(X[3],k), \
58 		     Z[0]=Y[0], Z[1]=Y[1], Z[2]=Y[2], Z[3]=Y[3],	\
59 		     Z[0]=SL(Z[0],3),  Y[0]=SR(Y[0],61), Z[1]=SL(Z[1],3), Y[1]=SR(Y[1],61), \
60 		     Z[2]=SL(Z[2],3),  Y[2]=SR(Y[2],61), Z[3]=SL(Z[3],3), Y[3]=SR(Y[3],61), \
61 		     Y[0]=XOR(Y[0],Z[0]), Y[1]=XOR(Y[1],Z[1]), Y[2]=XOR(Y[2],Z[2]), Y[3]=XOR(Y[3],Z[3]), \
62 		     Y[0]=XOR(X[0],Y[0]), Y[1]=XOR(X[1],Y[1]), Y[2]=XOR(X[2],Y[2]), Y[3]=XOR(X[3],Y[3]))
63 
64 #define Rx2(x,y,k) (x[0]=RCS(x[0],8), x[1]=RCS(x[1],8), x[0]+=y[0], x[1]+=y[1],	\
65                     x[0]^=k, x[1]^=k, y[0]=LCS(y[0],3), y[1]=LCS(y[1],3), y[0]^=x[0], y[1]^=x[1])
66 
67 #define Rx1(x,y,k) (x[0]=RCS(x[0],8), x[0]+=y[0], x[0]^=k, y[0]=LCS(y[0],3), y[0]^=x[0])
68 
69 #define Rx1b(x,y,k) (x=RCS(x,8), x+=y, x^=k, y=LCS(y,3), y^=x)
70 
71 #define Encrypt(X,Y,k,n) (Rx##n(X,Y,k[0]),  Rx##n(X,Y,k[1]),  Rx##n(X,Y,k[2]),  Rx##n(X,Y,k[3]),  Rx##n(X,Y,k[4]),  Rx##n(X,Y,k[5]),  Rx##n(X,Y,k[6]),  Rx##n(X,Y,k[7]), \
72 			  Rx##n(X,Y,k[8]),  Rx##n(X,Y,k[9]),  Rx##n(X,Y,k[10]), Rx##n(X,Y,k[11]), Rx##n(X,Y,k[12]), Rx##n(X,Y,k[13]), Rx##n(X,Y,k[14]), Rx##n(X,Y,k[15]), \
73 			  Rx##n(X,Y,k[16]), Rx##n(X,Y,k[17]), Rx##n(X,Y,k[18]), Rx##n(X,Y,k[19]), Rx##n(X,Y,k[20]), Rx##n(X,Y,k[21]), Rx##n(X,Y,k[22]), Rx##n(X,Y,k[23]), \
74 			  Rx##n(X,Y,k[24]), Rx##n(X,Y,k[25]), Rx##n(X,Y,k[26]), Rx##n(X,Y,k[27]), Rx##n(X,Y,k[28]), Rx##n(X,Y,k[29]), Rx##n(X,Y,k[30]), Rx##n(X,Y,k[31]), \
75 			  Rx##n(X,Y,k[32]), Rx##n(X,Y,k[33]))
76 
77 #define RK(X,Y,k,key,i)   (SET1(k[i],Y), key[i]=Y, X=RCS(X,8), X+=Y, X^=i, Y=LCS(Y,3), Y^=X)
78 
79 #define EK(A,B,C,D,k,key) (RK(B,A,k,key,0),  RK(C,A,k,key,1),  RK(D,A,k,key,2),  RK(B,A,k,key,3),  RK(C,A,k,key,4),  RK(D,A,k,key,5),  RK(B,A,k,key,6), \
80 			   RK(C,A,k,key,7),  RK(D,A,k,key,8),  RK(B,A,k,key,9),  RK(C,A,k,key,10), RK(D,A,k,key,11), RK(B,A,k,key,12), RK(C,A,k,key,13), \
81 			   RK(D,A,k,key,14), RK(B,A,k,key,15), RK(C,A,k,key,16), RK(D,A,k,key,17), RK(B,A,k,key,18), RK(C,A,k,key,19), RK(D,A,k,key,20), \
82 			   RK(B,A,k,key,21), RK(C,A,k,key,22), RK(D,A,k,key,23), RK(B,A,k,key,24), RK(C,A,k,key,25), RK(D,A,k,key,26), RK(B,A,k,key,27), \
83 			   RK(C,A,k,key,28), RK(D,A,k,key,29), RK(B,A,k,key,30), RK(C,A,k,key,31), RK(D,A,k,key,32), RK(B,A,k,key,33))
84 
speck_encrypt_xor(unsigned char * out,const unsigned char * in,u64 nonce[],speck_context_t * ctx,int numbytes)85 static int speck_encrypt_xor(unsigned char *out, const unsigned char *in, u64 nonce[], speck_context_t *ctx, int numbytes) {
86 
87   u64  x[2], y[2];
88   u256 X[4], Y[4], Z[4];
89 
90   if (numbytes == 16) {
91     x[0] = nonce[1]; y[0] = nonce[0]; nonce[0]++;
92     Encrypt (x, y, ctx->key, 1);
93     ((u64 *)out)[1] = x[0]; ((u64 *)out)[0] = y[0];
94     return 0;
95   }
96 
97   if (numbytes == 32) {
98     x[0] = nonce[1]; y[0] = nonce[0]; nonce[0]++;
99     x[1] = nonce[1]; y[1] = nonce[0]; nonce[0]++;
100     Encrypt (x , y, ctx->key, 2);
101     ((u64 *)out)[1] = x[0] ^ ((u64 *)in)[1]; ((u64 *)out)[0] = y[0] ^ ((u64 *)in)[0];
102     ((u64 *)out)[3] = x[1] ^ ((u64 *)in)[3]; ((u64 *)out)[2] = y[1] ^ ((u64 *)in)[2];
103     return 0;
104   }
105 
106   SET1 (X[0], nonce[1]); SET4 (Y[0], nonce[0]);
107 
108   if (numbytes == 64)
109     Encrypt (X, Y, ctx->rk, 4);
110   else {
111     X[1] = X[0];
112     Y[1] = ADD (Y[0], _four);
113     if (numbytes == 128)
114       Encrypt (X, Y, ctx->rk, 8);
115     else {
116       X[2] = X[0];
117       Y[2] = ADD (Y[1], _four);
118       if (numbytes == 192)
119 	Encrypt (X, Y, ctx->rk, 12);
120       else {
121 	X[3] = X[0];
122 	Y[3] = ADD (Y[2], _four);
123 	Encrypt (X, Y, ctx->rk, 16);
124       }
125     }
126   }
127 
128   nonce[0] += (numbytes>>4);
129 
130   XOR_STORE (in, out, X[0], Y[0]);
131   if (numbytes >= 128)
132     XOR_STORE (in +  64, out +  64, X[1], Y[1]);
133   if (numbytes >= 192)
134     XOR_STORE (in + 128, out + 128, X[2], Y[2]);
135   if (numbytes >= 256)
136     XOR_STORE (in + 192, out + 192, X[3], Y[3]);
137 
138   return 0;
139 }
140 
141 
speck_ctr(unsigned char * out,const unsigned char * in,unsigned long long inlen,const unsigned char * n,speck_context_t * ctx)142 int speck_ctr( unsigned char *out, const unsigned char *in, unsigned long long inlen,
143 	       const unsigned char *n, speck_context_t *ctx) {
144 
145   int i;
146   u64 nonce[2];
147   unsigned char block[16];
148   u64 * const block64 = (u64 *)block;
149 
150   if (!inlen)
151     return 0;
152 
153   nonce[0] = ((u64 *)n)[0];
154   nonce[1] = ((u64 *)n)[1];
155 
156   while (inlen >= 256) {
157     speck_encrypt_xor (out, in, nonce, ctx, 256);
158     in += 256; inlen -= 256; out += 256;
159   }
160 
161   if (inlen >= 192) {
162     speck_encrypt_xor (out, in, nonce, ctx, 192);
163     in += 192; inlen -= 192; out += 192;
164   }
165 
166   if (inlen >= 128) {
167     speck_encrypt_xor (out, in, nonce, ctx, 128);
168     in += 128; inlen -= 128; out += 128;
169   }
170 
171   if (inlen >= 64) {
172     speck_encrypt_xor (out, in, nonce, ctx, 64);
173     in += 64; inlen -= 64; out += 64;
174   }
175 
176   if (inlen >= 32) {
177     speck_encrypt_xor (out, in, nonce, ctx, 32);
178     in += 32; inlen -= 32; out += 32;
179   }
180 
181   if (inlen >= 16) {
182     speck_encrypt_xor (block, in, nonce, ctx, 16);
183     ((u64 *)out)[0] = block64[0] ^ ((u64 *)in)[0];
184     ((u64 *)out)[1] = block64[1] ^ ((u64 *)in)[1];
185     in += 16; inlen -= 16; out += 16;
186   }
187 
188   if (inlen > 0) {
189     speck_encrypt_xor (block, in, nonce, ctx, 16);
190     for (i = 0; i < inlen; i++)
191       out[i] = block[i] ^ in[i];
192   }
193 
194   return 0;
195 }
196 
197 
speck_expand_key(const unsigned char * k,speck_context_t * ctx)198 int speck_expand_key (const unsigned char *k, speck_context_t *ctx) {
199 
200   u64 K[4];
201   size_t i;
202   for (i = 0; i < numkeywords; i++)
203     K[i] = ((u64 *)k)[i];
204 
205   EK (K[0], K[1], K[2], K[3], ctx->rk, ctx->key);
206 
207   return 0;
208 }
209 
210 
211 #elif defined (__SSE4_2__) // SSE support -------------------------------------------------
212 
213 
214 #define LCS(x,r) (((x)<<r)|((x)>>(64-r)))
215 #define RCS(x,r) (((x)>>r)|((x)<<(64-r)))
216 
217 #define XOR _mm_xor_si128
218 #define AND _mm_and_si128
219 #define ADD _mm_add_epi64
220 #define SL  _mm_slli_epi64
221 #define SR  _mm_srli_epi64
222 
223 #define _q SET(0x1,0x0)
224 #define _two SET(0x2,0x2)
225 
226 #define SET _mm_set_epi64x
227 #define SET1(X,c) (X=SET(c,c))
228 #define SET2(X,c) (X=SET(c,c), X=ADD(X,_q))
229 
230 #define LOW _mm_unpacklo_epi64
231 #define HIGH _mm_unpackhi_epi64
232 #define LD(ip) _mm_loadu_si128((__m128i *)(ip))
233 #define ST(ip,X) _mm_storeu_si128((__m128i *)(ip),X)
234 #define STORE(out,X,Y) (ST(out,LOW(Y,X)), ST(out+16,HIGH(Y,X)))
235 #define STORE_ALT(out,X,Y) (ST(out,LOW(X,Y)), ST(out+16,HIGH(X,Y)))
236 #define XOR_STORE(in,out,X,Y) (ST(out,XOR(LD(in),LOW(Y,X))), ST(out+16,XOR(LD(in+16),HIGH(Y,X))))
237 #define XOR_STORE_ALT(in,out,X,Y) (ST(out,XOR(LD(in),LOW(X,Y))), ST(out+16,XOR(LD(in+16),HIGH(X,Y))))
238 
239 #define SHFL _mm_shuffle_epi8
240 #define R8   _mm_set_epi64x(0x080f0e0d0c0b0a09LL,0x0007060504030201LL)
241 #define L8   _mm_set_epi64x(0x0e0d0c0b0a09080fLL,0x0605040302010007LL)
242 #define ROL8(X)  (SHFL(X,L8))
243 #define ROR8(X)  (SHFL(X,R8))
244 #define ROL(X,r) (XOR(SL(X,r),SR(X,(64-r))))
245 #define ROR(X,r) (XOR(SR(X,r),SL(X,(64-r))))
246 
247 #define numrounds   34
248 #define numkeywords 4
249 
250 #define R(X,Y,k) (X=XOR(ADD(ROR8(X),Y),k), Y=XOR(ROL(Y,3),X))
251 
252 #define Rx2(X,Y,k) (R(X[0],Y[0],k))
253 #define Rx4(X,Y,k) (R(X[0],Y[0],k), R(X[1],Y[1],k))
254 #define Rx6(X,Y,k) (R(X[0],Y[0],k), R(X[1],Y[1],k), R(X[2],Y[2],k))
255 
256 #define Rx8(X,Y,k) (X[0]=ROR8(X[0]), X[0]=ADD(X[0],Y[0]), X[1]=ROR8(X[1]), X[1]=ADD(X[1],Y[1]), \
257                     X[2]=ROR8(X[2]), X[2]=ADD(X[2],Y[2]), X[3]=ROR8(X[3]), X[3]=ADD(X[3],Y[3]), \
258                     X[0]=XOR(X[0],k), X[1]=XOR(X[1],k), X[2]=XOR(X[2],k), X[3]=XOR(X[3],k), \
259                     Z[0]=Y[0], Z[1]=Y[1], Z[2]=Y[2], Z[3]=Y[3],         \
260                     Z[0]=SL(Z[0],3),  Y[0]=SR(Y[0],61), Z[1]=SL(Z[1],3), Y[1]=SR(Y[1],61), \
261                     Z[2]=SL(Z[2],3),  Y[2]=SR(Y[2],61), Z[3]=SL(Z[3],3), Y[3]=SR(Y[3],61), \
262                     Y[0]=XOR(Y[0],Z[0]), Y[1]=XOR(Y[1],Z[1]), Y[2]=XOR(Y[2],Z[2]), Y[3]=XOR(Y[3],Z[3]), \
263                     Y[0]=XOR(X[0],Y[0]), Y[1]=XOR(X[1],Y[1]), Y[2]=XOR(X[2],Y[2]), Y[3]=XOR(X[3],Y[3]))
264 
265 #define Rx1(x,y,k) (x[0]=RCS(x[0],8), x[0]+=y[0], x[0]^=k, y[0]=LCS(y[0],3), y[0]^=x[0])
266 
267 #define Rx1b(x,y,k) (x=RCS(x,8), x+=y, x^=k, y=LCS(y,3), y^=x)
268 
269 #define Encrypt(X,Y,k,n) (Rx##n(X,Y,k[0]),  Rx##n(X,Y,k[1]),  Rx##n(X,Y,k[2]),  Rx##n(X,Y,k[3]),  Rx##n(X,Y,k[4]),  Rx##n(X,Y,k[5]),  Rx##n(X,Y,k[6]),  Rx##n(X,Y,k[7]), \
270                           Rx##n(X,Y,k[8]),  Rx##n(X,Y,k[9]),  Rx##n(X,Y,k[10]), Rx##n(X,Y,k[11]), Rx##n(X,Y,k[12]), Rx##n(X,Y,k[13]), Rx##n(X,Y,k[14]), Rx##n(X,Y,k[15]), \
271                           Rx##n(X,Y,k[16]), Rx##n(X,Y,k[17]), Rx##n(X,Y,k[18]), Rx##n(X,Y,k[19]), Rx##n(X,Y,k[20]), Rx##n(X,Y,k[21]), Rx##n(X,Y,k[22]), Rx##n(X,Y,k[23]), \
272                           Rx##n(X,Y,k[24]), Rx##n(X,Y,k[25]), Rx##n(X,Y,k[26]), Rx##n(X,Y,k[27]), Rx##n(X,Y,k[28]), Rx##n(X,Y,k[29]), Rx##n(X,Y,k[30]), Rx##n(X,Y,k[31]), \
273                           Rx##n(X,Y,k[32]), Rx##n(X,Y,k[33]))
274 
275 #define RK(X,Y,k,key,i)   (SET1(k[i],Y), key[i]=Y, X=RCS(X,8), X+=Y, X^=i, Y=LCS(Y,3), Y^=X)
276 
277 #define EK(A,B,C,D,k,key) (RK(B,A,k,key,0),  RK(C,A,k,key,1),  RK(D,A,k,key,2),  RK(B,A,k,key,3),  RK(C,A,k,key,4),  RK(D,A,k,key,5),  RK(B,A,k,key,6), \
278                            RK(C,A,k,key,7),  RK(D,A,k,key,8),  RK(B,A,k,key,9),  RK(C,A,k,key,10), RK(D,A,k,key,11), RK(B,A,k,key,12), RK(C,A,k,key,13), \
279                            RK(D,A,k,key,14), RK(B,A,k,key,15), RK(C,A,k,key,16), RK(D,A,k,key,17), RK(B,A,k,key,18), RK(C,A,k,key,19), RK(D,A,k,key,20), \
280                            RK(B,A,k,key,21), RK(C,A,k,key,22), RK(D,A,k,key,23), RK(B,A,k,key,24), RK(C,A,k,key,25), RK(D,A,k,key,26), RK(B,A,k,key,27), \
281                            RK(C,A,k,key,28), RK(D,A,k,key,29), RK(B,A,k,key,30), RK(C,A,k,key,31), RK(D,A,k,key,32), RK(B,A,k,key,33))
282 
283 
speck_encrypt_xor(unsigned char * out,const unsigned char * in,u64 nonce[],const speck_context_t ctx,int numbytes)284 static int speck_encrypt_xor (unsigned char *out, const unsigned char *in, u64 nonce[], const speck_context_t ctx, int numbytes) {
285 
286   u64  x[2], y[2];
287   u128 X[4], Y[4], Z[4];
288 
289   if (numbytes == 16) {
290     x[0] = nonce[1]; y[0] = nonce[0]; nonce[0]++;
291     Encrypt (x, y, ctx.key, 1);
292     ((u64 *)out)[1] = x[0]; ((u64 *)out)[0] = y[0];
293     return 0;
294   }
295 
296   SET1 (X[0], nonce[1]); SET2 (Y[0], nonce[0]);
297 
298   if (numbytes == 32)
299     Encrypt (X, Y, ctx.rk, 2);
300   else {
301     X[1] = X[0]; Y[1] = ADD (Y[0], _two);
302     if (numbytes == 64)
303       Encrypt (X, Y, ctx.rk, 4);
304     else {
305       X[2] = X[0]; Y[2] = ADD (Y[1], _two);
306       if (numbytes == 96)
307 	Encrypt (X, Y, ctx.rk, 6);
308       else {
309 	X[3] = X[0]; Y[3] = ADD (Y[2], _two);
310 	Encrypt (X, Y, ctx.rk, 8);
311       }
312     }
313   }
314 
315   nonce[0] += (numbytes>>4);
316 
317   XOR_STORE (in, out, X[0], Y[0]);
318   if (numbytes >= 64)
319     XOR_STORE (in + 32, out + 32, X[1], Y[1]);
320   if (numbytes >= 96)
321     XOR_STORE (in + 64, out + 64, X[2], Y[2]);
322   if (numbytes >= 128)
323     XOR_STORE (in + 96, out + 96, X[3], Y[3]);
324 
325   return 0;
326 }
327 
328 
speck_ctr(unsigned char * out,const unsigned char * in,unsigned long long inlen,const unsigned char * n,const speck_context_t ctx)329 int speck_ctr (unsigned char *out, const unsigned char *in, unsigned long long inlen,
330 	       const unsigned char *n, const speck_context_t ctx) {
331 
332   int i;
333   u64 nonce[2];
334   unsigned char block[16];
335   u64 * const block64 = (u64 *)block;
336 
337   if (!inlen)
338     return 0;
339 
340   nonce[0] = ((u64 *)n)[0];
341   nonce[1] = ((u64 *)n)[1];
342 
343   while (inlen >= 128) {
344     speck_encrypt_xor (out, in, nonce, ctx, 128);
345     in += 128; inlen -= 128; out += 128;
346   }
347 
348   if (inlen >= 96) {
349     speck_encrypt_xor (out, in, nonce, ctx, 96);
350     in += 96; inlen -= 96; out += 96;
351   }
352 
353   if (inlen >= 64) {
354     speck_encrypt_xor (out, in, nonce, ctx, 64);
355     in += 64; inlen -= 64; out += 64;
356   }
357 
358   if (inlen >= 32) {
359     speck_encrypt_xor (out, in, nonce, ctx, 32);
360     in += 32; inlen -= 32; out += 32;
361   }
362 
363   if (inlen >= 16) {
364     speck_encrypt_xor (block, in, nonce, ctx, 16);
365     ((u64 *)out)[0] = block64[0] ^ ((u64 *)in)[0];
366     ((u64 *)out)[1] = block64[1] ^ ((u64 *)in)[1];
367     in += 16; inlen -= 16; out += 16;
368   }
369 
370   if (inlen > 0) {
371     speck_encrypt_xor (block, in, nonce, ctx, 16);
372     for (i = 0; i < inlen; i++)
373       out[i] = block[i] ^ in[i];
374   }
375 
376   return 0;
377 }
378 
379 
speck_expand_key(const unsigned char * k,speck_context_t * ctx)380 int speck_expand_key (const unsigned char *k, speck_context_t *ctx) {
381 
382   u64 K[4];
383   size_t i;
384   for (i = 0; i < numkeywords; i++)
385     K[i] = ((u64 *)k)[i];
386 
387   EK (K[0], K[1], K[2], K[3], ctx->rk, ctx->key);
388 
389   return 0;
390 }
391 
392 
393 #elif defined (__ARM_NEON)	// NEON support -------------------------------------------
394 
395 
396 #define LCS(x,r) (((x)<<r)|((x)>>(64-r)))
397 #define RCS(x,r) (((x)>>r)|((x)<<(64-r)))
398 
399 #define XOR veorq_u64
400 #define AND vandq_u64
401 #define ADD vaddq_u64
402 #define SL vshlq_n_u64
403 #define SR vshrq_n_u64
404 
405 #define SET(a,b) vcombine_u64((uint64x1_t)(a),(uint64x1_t)(b))
406 #define SET1(X,c) (X=SET(c,c))
407 #define SET2(X,c) (SET1(X,c), X=ADD(X,SET(0x1ll,0x0ll)),c+=2)
408 
409 #define LOW(Z) vgetq_lane_u64(Z,0)
410 #define HIGH(Z) vgetq_lane_u64(Z,1)
411 #define STORE(ip,X,Y) (((u64 *)(ip))[0]=HIGH(Y), ((u64 *)(ip))[1]=HIGH(X), ((u64 *)(ip))[2]=LOW(Y), ((u64 *)(ip))[3]=LOW(X))
412 #define XOR_STORE(in,out,X,Y) (Y=XOR(Y,SET(((u64 *)(in))[2],((u64 *)(in))[0])), X=XOR(X,SET(((u64 *)(in))[3],((u64 *)(in))[1])), STORE(out,X,Y))
413 
414 #define ROR(X,r) vsriq_n_u64(SL(X,(64-r)),X,r)
415 #define ROL(X,r) ROR(X,(64-r))
416 
417 #define tableR vcreate_u8(0x0007060504030201LL)
418 #define tableL vcreate_u8(0x0605040302010007LL)
419 #define ROR8(X) SET(vtbl1_u8((uint8x8_t)vget_low_u64(X),tableR), vtbl1_u8((uint8x8_t)vget_high_u64(X),tableR))
420 #define ROL8(X) SET(vtbl1_u8((uint8x8_t)vget_low_u64(X),tableL), vtbl1_u8((uint8x8_t)vget_high_u64(X),tableL))
421 
422 #define numrounds 34
423 #define numkeywords 4
424 
425 #define R(X,Y,k) (X=XOR(ADD(ROR8(X),Y),k), Y=XOR(ROL(Y,3),X))
426 
427 #define Rx2(X,Y,k) (R(X[0],Y[0],k))
428 
429 #define Rx4(X,Y,k) (R(X[0],Y[0],k), R(X[1],Y[1],k))
430 #define Rx6(X,Y,k) (R(X[0],Y[0],k), R(X[1],Y[1],k), R(X[2],Y[2],k))
431 #define Rx8(X,Y,k) (X[0]=ROR8(X[0]), X[0]=ADD(X[0],Y[0]), X[0]=XOR(X[0],k), X[1]=ROR8(X[1]), X[1]=ADD(X[1],Y[1]), X[1]=XOR(X[1],k), \
432 		    X[2]=ROR8(X[2]), X[2]=ADD(X[2],Y[2]), X[2]=XOR(X[2],k), X[3]=ROR8(X[3]), X[3]=ADD(X[3],Y[3]), X[3]=XOR(X[3],k), \
433                     Z[0]=SL(Y[0],3), Z[1]=SL(Y[1],3), Z[2]=SL(Y[2],3), Z[3]=SL(Y[3],3),	\
434                     Y[0]=SR(Y[0],61), Y[1]=SR(Y[1],61), Y[2]=SR(Y[2],61), Y[3]=SR(Y[3],61), \
435                     Y[0]=XOR(Y[0],Z[0]), Y[1]=XOR(Y[1],Z[1]), Y[2]=XOR(Y[2],Z[2]), Y[3]=XOR(Y[3],Z[3]),	\
436                     Y[0]=XOR(X[0],Y[0]), Y[1]=XOR(X[1],Y[1]), Y[2]=XOR(X[2],Y[2]), Y[3]=XOR(X[3],Y[3]))
437 
438 #define Rx1(x,y,k) (x[0]=RCS(x[0],8), x[0]+=y[0], x[0]^=k, y[0]=LCS(y[0],3), y[0]^=x[0])
439 
440 #define Rx1b(x,y,k) (x=RCS(x,8), x+=y, x^=k, y=LCS(y,3), y^=x)
441 
442 #define Encrypt(X,Y,k,n) (Rx##n(X,Y,k[0]),  Rx##n(X,Y,k[1]),  Rx##n(X,Y,k[2]),  Rx##n(X,Y,k[3]),  Rx##n(X,Y,k[4]),  Rx##n(X,Y,k[5]),  Rx##n(X,Y,k[6]),  Rx##n(X,Y,k[7]), \
443 			  Rx##n(X,Y,k[8]),  Rx##n(X,Y,k[9]),  Rx##n(X,Y,k[10]), Rx##n(X,Y,k[11]), Rx##n(X,Y,k[12]), Rx##n(X,Y,k[13]), Rx##n(X,Y,k[14]), Rx##n(X,Y,k[15]), \
444 			  Rx##n(X,Y,k[16]), Rx##n(X,Y,k[17]), Rx##n(X,Y,k[18]), Rx##n(X,Y,k[19]), Rx##n(X,Y,k[20]), Rx##n(X,Y,k[21]), Rx##n(X,Y,k[22]), Rx##n(X,Y,k[23]), \
445 			  Rx##n(X,Y,k[24]), Rx##n(X,Y,k[25]), Rx##n(X,Y,k[26]), Rx##n(X,Y,k[27]), Rx##n(X,Y,k[28]), Rx##n(X,Y,k[29]), Rx##n(X,Y,k[30]), Rx##n(X,Y,k[31]), \
446 			  Rx##n(X,Y,k[32]), Rx##n(X,Y,k[33]))
447 
448 #define RK(X,Y,k,key,i) (SET1(k[i],Y), key[i]=Y, X=RCS(X,8), X+=Y, X^=i, Y=LCS(Y,3), Y^=X)
449 
450 #define EK(A,B,C,D,k,key) (RK(B,A,k,key,0),  RK(C,A,k,key,1),  RK(D,A,k,key,2),  RK(B,A,k,key,3),  RK(C,A,k,key,4),  RK(D,A,k,key,5),  RK(B,A,k,key,6),	\
451 			   RK(C,A,k,key,7),  RK(D,A,k,key,8),  RK(B,A,k,key,9),  RK(C,A,k,key,10), RK(D,A,k,key,11), RK(B,A,k,key,12), RK(C,A,k,key,13), \
452 			   RK(D,A,k,key,14), RK(B,A,k,key,15), RK(C,A,k,key,16), RK(D,A,k,key,17), RK(B,A,k,key,18), RK(C,A,k,key,19), RK(D,A,k,key,20), \
453 			   RK(B,A,k,key,21), RK(C,A,k,key,22), RK(D,A,k,key,23), RK(B,A,k,key,24), RK(C,A,k,key,25), RK(D,A,k,key,26), RK(B,A,k,key,27), \
454 			   RK(C,A,k,key,28), RK(D,A,k,key,29), RK(B,A,k,key,30), RK(C,A,k,key,31), RK(D,A,k,key,32), RK(B,A,k,key,33))
455 
456 
speck_encrypt_xor(unsigned char * out,const unsigned char * in,u64 nonce[],speck_context_t * ctx,int numbytes)457 static int speck_encrypt_xor (unsigned char *out, const unsigned char *in, u64 nonce[], speck_context_t *ctx, int numbytes) {
458 
459   u64  x[2], y[2];
460   u128 X[4], Y[4], Z[4];
461 
462   if (numbytes == 16) {
463     x[0] = nonce[1]; y[0]=nonce[0]; nonce[0]++;
464     Encrypt (x, y, ctx->key, 1);
465     ((u64 *)out)[1] = x[0]; ((u64 *)out)[0] = y[0];
466     return 0;
467   }
468 
469   SET1 (X[0], nonce[1]); SET2 (Y[0], nonce[0]);
470 
471   if (numbytes == 32)
472     Encrypt (X, Y, ctx->rk, 2);
473   else {
474     X[1] = X[0]; SET2 (Y[1], nonce[0]);
475     if (numbytes == 64)
476       Encrypt (X, Y, ctx->rk, 4);
477     else {
478       X[2] = X[0]; SET2 (Y[2], nonce[0]);
479       if (numbytes == 96)
480 	Encrypt (X, Y, ctx->rk, 6);
481       else {
482 	X[3] = X[0]; SET2 (Y[3], nonce[0]);
483 	Encrypt (X, Y, ctx->rk, 8);
484       }
485     }
486   }
487 
488   XOR_STORE (in, out, X[0], Y[0]);
489   if (numbytes >= 64)
490     XOR_STORE (in +  32, out +  32, X[1], Y[1]);
491   if (numbytes >= 96)
492     XOR_STORE (in +  64, out +  64, X[2], Y[2]);
493   if (numbytes >= 128)
494     XOR_STORE (in +  96, out +  96, X[3], Y[3]);
495 
496   return 0;
497 }
498 
499 
speck_ctr(unsigned char * out,const unsigned char * in,unsigned long long inlen,const unsigned char * n,speck_context_t * ctx)500 int speck_ctr (unsigned char *out, const unsigned char *in, unsigned long long inlen,
501 	       const unsigned char *n, speck_context_t *ctx) {
502 
503   int i;
504   u64 nonce[2];
505   unsigned char block[16];
506   u64 *const block64 = (u64 *)block;
507 
508   if (!inlen)
509     return 0;
510 
511   nonce[0] = ((u64 *)n)[0];
512   nonce[1] = ((u64 *)n)[1];
513 
514   while (inlen >= 128) {
515     speck_encrypt_xor (out, in, nonce, ctx, 128);
516     in += 128; inlen -= 128; out += 128;
517   }
518 
519   if (inlen >= 96) {
520     speck_encrypt_xor (out, in, nonce, ctx, 96);
521     in += 96; inlen -= 96; out += 96;
522   }
523 
524   if (inlen >= 64) {
525     speck_encrypt_xor (out, in, nonce, ctx, 64);
526     in += 64; inlen -= 64; out += 64;
527   }
528 
529   if (inlen >= 32) {
530     speck_encrypt_xor (out, in, nonce, ctx, 32);
531     in += 32; inlen -= 32; out += 32;
532   }
533 
534   if (inlen >= 16) {
535     speck_encrypt_xor (block, in, nonce, ctx, 16);
536     ((u64 *)out)[0] = block64[0] ^ ((u64 *)in)[0];
537     ((u64 *)out)[1] = block64[1] ^ ((u64 *)in)[1];
538     in += 16; inlen -= 16; out += 16;
539   }
540 
541   if (inlen > 0) {
542     speck_encrypt_xor (block, in, nonce, ctx, 16);
543     for (i = 0; i < inlen; i++)
544       out[i] = block[i] ^ in[i];
545   }
546 
547   return 0;
548 }
549 
550 
speck_expand_key(const unsigned char * k,speck_context_t * ctx)551 int speck_expand_key (const unsigned char *k, speck_context_t *ctx) {
552 
553   u64 K[4];
554   size_t i;
555   for (i = 0; i < numkeywords; i++)
556     K[i] = ((u64 *)k)[i];
557 
558   EK (K[0], K[1], K[2], K[3], ctx->rk, ctx->key);
559 
560   return 0;
561 }
562 
563 
564 #else 		// plain C ----------------------------------------------------------------
565 
566 
567 #define ROR(x,r) (((x)>>(r))|((x)<<(64-(r))))
568 #define ROL(x,r) (((x)<<(r))|((x)>>(64-(r))))
569 #define R(x,y,k) (x=ROR(x,8), x+=y, x^=k, y=ROL(y,3), y^=x)
570 
571 
speck_encrypt(u64 * u,u64 * v,speck_context_t * ctx)572 static int speck_encrypt (u64 *u, u64 *v, speck_context_t *ctx) {
573 
574   u64 i, x = *u, y = *v;
575 
576   for (i = 0; i < 34; i++)
577     R (x, y, ctx->key[i]);
578 
579   *u = x; *v = y;
580 
581   return 0;
582 }
583 
584 
speck_ctr(unsigned char * out,const unsigned char * in,unsigned long long inlen,const unsigned char * n,speck_context_t * ctx)585 int speck_ctr (unsigned char *out, const unsigned char *in, unsigned long long inlen,
586 	       const unsigned char *n, speck_context_t *ctx) {
587 
588   u64 i, nonce[2], x, y, t;
589   unsigned char *block = malloc (16);
590 
591   if (!inlen) {
592     free (block);
593     return 0;
594   }
595   nonce[0] = htole64 ( ((u64*)n)[0] );
596   nonce[1] = htole64 ( ((u64*)n)[1] );
597 
598   t=0;
599   while (inlen >= 16) {
600     x = nonce[1]; y = nonce[0]; nonce[0]++;
601     speck_encrypt (&x, &y, ctx);
602     ((u64 *)out)[1+t] = htole64 (x ^ ((u64 *)in)[1+t]);
603     ((u64 *)out)[0+t] = htole64 (y ^ ((u64 *)in)[0+t]);
604     t += 2;
605     inlen -= 16;
606   }
607   if (inlen > 0) {
608     x = nonce[1]; y = nonce[0];
609     speck_encrypt (&x, &y, ctx);
610     ((u64 *)block)[1] = htole64 (x); ((u64 *)block)[0] = htole64 (y);
611     for (i = 0; i < inlen; i++)
612       out[i + 8*t] = block[i] ^ in[i + 8*t];
613   }
614 
615   free (block);
616   return 0;
617 }
618 
619 
speck_expand_key(const unsigned char * k,speck_context_t * ctx)620 int speck_expand_key (const unsigned char *k, speck_context_t *ctx) {
621 
622   u64 K[4];
623   u64 i;
624 
625   for (i = 0; i < 4; i++)
626     K[i] = htole64 ( ((u64 *)k)[i] );
627 
628   for (i = 0; i < 33; i += 3) {
629     ctx->key[i  ] = K[0];
630     R (K[1], K[0], i    );
631     ctx->key[i+1] = K[0];
632     R (K[2], K[0], i + 1);
633     ctx->key[i+2] = K[0];
634     R (K[3], K[0], i + 2);
635   }
636   ctx->key[33] = K[0];
637   return 1;
638 }
639 
640 
641 #endif		// AVX, SSE, NEON, plain C ------------------------------------------------
642 
643 
644 // cipher SPECK -- 128 bit block size -- 128 bit key size -- CTR mode
645 // used for header encryption, thus the prefix 'he_'
646 // for now: just plain C -- AVX, SSE, NEON might follow
647 
648 #define ROR64(x,r) (((x)>>(r))|((x)<<(64-(r))))
649 #define ROL64(x,r) (((x)<<(r))|((x)>>(64-(r))))
650 #define R64(x,y,k) (x=ROR64(x,8), x+=y, x^=k, y=ROL64(y,3), y^=x)
651 
652 
speck_encrypt_he(u64 * u,u64 * v,speck_context_t * ctx)653 static int speck_encrypt_he (u64 *u, u64 *v, speck_context_t *ctx) {
654 
655   u64 i, x=*u, y=*v;
656 
657   for (i = 0; i < 32; i++)
658     R64 (x, y, ctx->key[i]);
659 
660   *u = x; *v = y;
661 
662   return 0;
663 }
664 
665 
speck_he(unsigned char * out,const unsigned char * in,unsigned long long inlen,const unsigned char * n,speck_context_t * ctx)666 int speck_he (unsigned char *out, const unsigned char *in, unsigned long long inlen,
667 	      const unsigned char *n, speck_context_t *ctx) {
668 
669   u64 i, nonce[2], x, y, t;
670   unsigned char *block = malloc(16);
671 
672   if (!inlen) {
673     free (block);
674     return 0;
675   }
676   nonce[0] = htole64 ( ((u64*)n)[0] );
677   nonce[1] = htole64 ( ((u64*)n)[1] );
678 
679   t = 0;
680   while (inlen >= 16) {
681     x = nonce[1]; y = nonce[0]; nonce[0]++;
682     speck_encrypt_he (&x, &y, ctx);
683     ((u64 *)out)[1+t] = htole64 (x ^ ((u64 *)in)[1+t]);
684     ((u64 *)out)[0+t] = htole64 (y ^ ((u64 *)in)[0+t]);
685     t += 2;
686     inlen -= 16;
687   }
688 
689   if (inlen > 0) {
690     x = nonce[1]; y = nonce[0];
691     speck_encrypt_he (&x, &y, ctx);
692     ((u64 *)block)[1] = htole64 (x); ((u64 *)block)[0] = htole64 (y);
693     for (i = 0; i < inlen; i++)
694       out[i+8*t] = block[i] ^ in[i+8*t];
695   }
696 
697   free(block);
698   return 0;
699 }
700 
701 
speck_expand_key_he(const unsigned char * k,speck_context_t * ctx)702 int speck_expand_key_he (const unsigned char *k, speck_context_t *ctx) {
703 
704   u64 A, B;
705   u64 i;
706 
707   A = htole64 ( ((u64 *)k)[0] );
708   B = htole64 ( ((u64 *)k)[1] );
709 
710   for (i = 0; i < 32; i++) {
711     ctx->key[i] = A;
712     R64 ( B, A, i);
713   }
714   return 1;
715 }
716 
717 
718 // ----------------------------------------------------------------------------------------
719 
720 
721 // cipher SPECK -- 96 bit block size -- 96 bit key size -- ECB mode
722 // follows endianess rules as used in official implementation guide and NOT as in original 2013 cipher presentation
723 // used for IV in header encryption, thus the prefix 'he_iv_'
724 // for now: just plain C -- probably no need for AVX, SSE, NEON
725 
726 // prerequisite: lower 16 bit reset
727 #define ROTL48(x,r) (((((x)<<(r)) | (x>>(48-(r)))) >> 16) << 16)
728 #define ROTR48(x,r) (((((x)>>(r)) | ((x)<<(48-(r)))) >> 16) << 16)
729 #define ER96(x,y,k) (x=ROTR48(x,8), x+=y, x^=k, y=ROTL48(y,3), y^=x)
730 #define DR96(x,y,k) (y^=x, y=ROTR48(y,3), x^=k, x-=y, x=ROTL48(x,8))
731 
732 
speck_he_iv_encrypt(unsigned char * inout,speck_context_t * ctx)733 int speck_he_iv_encrypt (unsigned char *inout, speck_context_t *ctx) {
734 
735   u64 x, y;
736   int i;
737 
738   x = htole64 ( *(u64*)&inout[0] ); x <<= 16;
739   y = htole64 ( *(u64*)&inout[4] ); y >>= 16; y <<= 16;
740 
741   for (i = 0; i < 28; i++)
742     ER96 (y, x, ctx->key[i]);
743 
744   x >>= 16; x |= y << 32;
745   y >>= 32;
746 
747   ((u64*)inout)[0] = le64toh (x);
748   ((u32*)inout)[2] = le32toh (y);
749 
750   return 0;
751 }
752 
753 
speck_he_iv_decrypt(unsigned char * inout,speck_context_t * ctx)754 int speck_he_iv_decrypt (unsigned char *inout, speck_context_t *ctx) {
755 
756   u64 x, y;
757   int i;
758 
759   x = htole64 ( *(u64*)&inout[0] ); x <<= 16;
760   y = htole64 ( *(u64*)&inout[4] ); y >>= 16; y <<= 16;
761 
762   for (i = 27; i >= 0; i--)
763     DR96 (y, x, ctx->key[i]);
764 
765   x >>= 16; x |= y << 32;
766   y >>= 32;
767 
768   ((u64*)inout)[0] = le64toh (x);
769   ((u32*)inout)[2] = le32toh (y);
770 
771   return 0;
772 }
773 
774 
speck_expand_key_he_iv(const unsigned char * k,speck_context_t * ctx)775 int speck_expand_key_he_iv (const unsigned char *k, speck_context_t *ctx) {
776 
777   u64 A, B;
778   int i;
779 
780   A = htole64 ( *(u64 *)&k[0] ); A <<= 16;
781   B = htole64 ( *(u64 *)&k[4] ); B >>= 16; B <<= 16;
782 
783   for (i = 0; i < 28; i++) {
784     ctx->key[i] = A;
785     ER96 ( B, A, i << 16);
786   }
787 
788   return 1;
789 }
790 
791 
792 // ----------------------------------------------------------------------------------------
793 
794 /*
795 // code for testing -- to be removed when finished
796 #include <stdio.h> // for testing
797 #include <string.h>
798 
799 int speck_test () {
800 
801   uint8_t key[32] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
802 		      0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
803 		      0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
804 		      0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F };
805 
806   uint8_t k96[12] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
807 		      0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D };
808 
809   uint8_t iv[16]  = { 0x70, 0x6f, 0x6f, 0x6e, 0x65, 0x72, 0x2e, 0x20,
810 		      0x49, 0x6e, 0x20, 0x74, 0x68, 0x6f, 0x73, 0x65 };
811 
812   uint8_t xv[16]  = { 0x20, 0x6d, 0x61, 0x64, 0x65, 0x20, 0x69, 0x74,
813 		      0x20, 0x65, 0x71, 0x75, 0x69, 0x76, 0x61, 0x6c };
814 
815   uint8_t p96[12] = { 0x20, 0x75, 0x73, 0x61, 0x67, 0x65,
816 		      0x2C, 0x20, 0x68, 0x6F, 0x77, 0x65 };
817 
818   uint8_t pt[16]  = { 0x00 };
819 
820   // expected outcome (according to pp. 35 & 36 of Implementation Guide 1.1 as of 2019) and
821   // original cipher presentation as of 2013 in which notably a different endianess is used
822   uint8_t ct[16]  = { 0x43, 0x8f, 0x18, 0x9c, 0x8d, 0xb4, 0xee, 0x4e,
823 		      0x3e, 0xf5, 0xc0, 0x05, 0x04, 0x01, 0x09, 0x41 };
824 
825   uint8_t xt[16]  = { 0x18, 0x0d, 0x57, 0x5c, 0xdf, 0xfe, 0x60, 0x78,
826 		      0x65, 0x32, 0x78, 0x79, 0x51, 0x98, 0x5d, 0xa6 };
827 
828   uint8_t x96[12] = { 0xAA, 0x79, 0x8F, 0xDE, 0xBD, 0x62,
829 		      0x78, 0x71, 0xAB, 0x09, 0x4D, 0x9E };
830   speck_context_t ctx;
831 
832   speck_expand_key (key, &ctx);
833 #if defined (SPECK_CTX_BYVAL)
834   speck_ctr (pt, pt, 16, iv, ctx);
835 #else
836   speck_ctr (pt, pt, 16, iv, &ctx);
837 #endif
838 
839   u64 i;
840    fprintf (stderr, "rk00: %016llx\n",  ctx.key[0]);
841    fprintf (stderr, "rk33: %016llx\n",  ctx.key[33]);
842    fprintf (stderr, "out : %016lx\n", *(uint64_t*)pt);
843    fprintf (stderr, "mem : " ); for (i=0; i < 16; i++) fprintf (stderr, "%02x ", pt[i]); fprintf (stderr, "\n");
844 
845   int ret = 1;
846   for (i=0; i < 16; i++)
847     if (pt[i] != ct[i]) ret = 0;
848 
849   memset (pt, 0, 16);
850   speck_expand_key_he (key, &ctx);
851   speck_he (pt, pt, 16, xv, &ctx);
852 
853    fprintf (stderr, "rk00: %016llx\n",  ctx.key[0]);
854    fprintf (stderr, "rk31: %016llx\n",  ctx.key[31]);
855    fprintf (stderr, "out : %016lx\n", *(uint64_t*)pt);
856    fprintf (stderr, "mem : " ); for (i=0; i < 16; i++) fprintf (stderr, "%02x ", pt[i]); fprintf (stderr, "\n");
857 
858   for (i=0; i < 16; i++)
859     if (pt[i] != xt[i]) ret = 0;
860 
861   speck_expand_key_he_iv (k96, &ctx);
862   speck_he_iv_encrypt (p96, &ctx);
863 //  speck_he_iv_decrypt (p96, &ctx);
864 //  speck_he_iv_encrypt (p96, &ctx);
865 
866    fprintf (stderr, "rk00: %016llx\n",  ctx.key[0]);
867    fprintf (stderr, "rk27: %016llx\n",  ctx.key[27]);
868    fprintf (stderr, "out : %016lx\n", *(uint64_t*)p96);
869    fprintf (stderr, "mem : " ); for (i=0; i < 12; i++) fprintf (stderr, "%02x ", p96[i]); fprintf (stderr, "\n");
870 
871   for (i=0; i < 12; i++)
872     if (p96[i] != x96[i]) ret = 0;
873 
874   return (ret);
875 }
876 
877 
878 int main (int argc, char* argv[]) {
879 
880   fprintf (stdout, "SPECK SELF TEST RESULT: %u\n", speck_test (0,NULL));
881 }
882 
883 */
884