1/* This is an independent implementation of the encryption algorithm:   */
2/*                                                                      */
3/*         Serpent by Ross Anderson, Eli Biham and Lars Knudsen         */
4/*                                                                      */
5/* which is a candidate algorithm in the Advanced Encryption Standard   */
6/* programme of the US National Institute of Standards and Technology.  */
7/*                                                                      */
8/* Copyright in this implementation is held by Dr B R Gladman but I     */
9/* hereby give permission for its free direct or derivative use subject */
10/* to acknowledgment of its origin and compliance with any conditions   */
11/* that the originators of the algorithm place on its exploitation.     */
12/*                                                                      */
13/* Dr Brian Gladman (gladman@seven77.demon.co.uk) 14th January 1999     */
14/*                                                                      */
15/* -------------------------------------------------------------------- */
16/*                                                                      */
17/* Cleaned and optimized for GPU use with hashcat by Jens Steube        */
18/* Added 192-bit functions by Gabriele Gristina                         */
19
20#include "inc_vendor.h"
21#include "inc_types.h"
22#include "inc_platform.h"
23#include "inc_common.h"
24#include "inc_cipher_serpent.h"
25
26/* 15 terms */
27
28#define sb0(a,b,c,d,e,f,g,h)    \
29    t1 = a ^ d;     \
30    t2 = a & d;     \
31    t3 = c ^ t1;    \
32    t6 = b & t1;    \
33    t4 = b ^ t3;    \
34    t10 = ~t3;      \
35    h = t2 ^ t4;    \
36    t7 = a ^ t6;    \
37    t14 = ~t7;      \
38    t8 = c | t7;    \
39    t11 = t3 ^ t7;  \
40    g = t4 ^ t8;    \
41    t12 = h & t11;  \
42    f = t10 ^ t12;  \
43    e = t12 ^ t14
44
45/* 15 terms */
46
47#define ib0(a,b,c,d,e,f,g,h)    \
48    t1 = ~a;        \
49    t2 = a ^ b;     \
50    t3 = t1 | t2;   \
51    t4 = d ^ t3;    \
52    t7 = d & t2;    \
53    t5 = c ^ t4;    \
54    t8 = t1 ^ t7;   \
55    g = t2 ^ t5;    \
56    t11 = a & t4;   \
57    t9 = g & t8;    \
58    t14 = t5 ^ t8;  \
59    f = t4 ^ t9;    \
60    t12 = t5 | f;   \
61    h = t11 ^ t12;  \
62    e = h ^ t14
63
64/* 14 terms!  */
65
66#define sb1(a,b,c,d,e,f,g,h)    \
67    t1 = ~a;        \
68    t2 = b ^ t1;    \
69    t3 = a | t2;    \
70    t4 = d | t2;    \
71    t5 = c ^ t3;    \
72    g = d ^ t5;     \
73    t7 = b ^ t4;    \
74    t8 = t2 ^ g;    \
75    t9 = t5 & t7;   \
76    h = t8 ^ t9;    \
77    t11 = t5 ^ t7;  \
78    f = h ^ t11;    \
79    t13 = t8 & t11; \
80    e = t5 ^ t13
81
82/* 17 terms */
83
84#define ib1(a,b,c,d,e,f,g,h)    \
85    t1 = a ^ d;     \
86    t2 = a & b;     \
87    t3 = b ^ c;     \
88    t4 = a ^ t3;    \
89    t5 = b | d;     \
90    t7 = c | t1;    \
91    h = t4 ^ t5;    \
92    t8 = b ^ t7;    \
93    t11 = ~t2;      \
94    t9 = t4 & t8;   \
95    f = t1 ^ t9;    \
96    t13 = t9 ^ t11; \
97    t12 = h & f;    \
98    g = t12 ^ t13;  \
99    t15 = a & d;    \
100    t16 = c ^ t13;  \
101    e = t15 ^ t16
102
103/* 16 terms */
104
105#define sb2(a,b,c,d,e,f,g,h)    \
106    t1 = ~a;        \
107    t2 = b ^ d;     \
108    t3 = c & t1;    \
109    t13 = d | t1;   \
110    e = t2 ^ t3;    \
111    t5 = c ^ t1;    \
112    t6 = c ^ e;     \
113    t7 = b & t6;    \
114    t10 = e | t5;   \
115    h = t5 ^ t7;    \
116    t9 = d | t7;    \
117    t11 = t9 & t10; \
118    t14 = t2 ^ h;   \
119    g = a ^ t11;    \
120    t15 = g ^ t13;  \
121    f = t14 ^ t15
122
123/* 16 terms */
124
125#define ib2(a,b,c,d,e,f,g,h)    \
126    t1 = b ^ d;     \
127    t2 = ~t1;       \
128    t3 = a ^ c;     \
129    t4 = c ^ t1;    \
130    t7 = a | t2;    \
131    t5 = b & t4;    \
132    t8 = d ^ t7;    \
133    t11 = ~t4;      \
134    e = t3 ^ t5;    \
135    t9 = t3 | t8;   \
136    t14 = d & t11;  \
137    h = t1 ^ t9;    \
138    t12 = e | h;    \
139    f = t11 ^ t12;  \
140    t15 = t3 ^ t12; \
141    g = t14 ^ t15
142
143/* 17 terms */
144
145#define sb3(a,b,c,d,e,f,g,h)    \
146    t1 = a ^ c;     \
147    t2 = d ^ t1;    \
148    t3 = a & t2;    \
149    t4 = d ^ t3;    \
150    t5 = b & t4;    \
151    g = t2 ^ t5;    \
152    t7 = a | g;     \
153    t8 = b | d;     \
154    t11 = a | d;    \
155    t9 = t4 & t7;   \
156    f = t8 ^ t9;    \
157    t12 = b ^ t11;  \
158    t13 = g ^ t9;   \
159    t15 = t3 ^ t8;  \
160    h = t12 ^ t13;  \
161    t16 = c & t15;  \
162    e = t12 ^ t16
163
164/* 16 term solution that performs less well than 17 term one
165   in my environment (PPro/PII)
166
167#define sb3(a,b,c,d,e,f,g,h)    \
168    t1 = a ^ b;     \
169    t2 = a & c;     \
170    t3 = a | d;     \
171    t4 = c ^ d;     \
172    t5 = t1 & t3;   \
173    t6 = t2 | t5;   \
174    g = t4 ^ t6;    \
175    t8 = b ^ t3;    \
176    t9 = t6 ^ t8;   \
177    t10 = t4 & t9;  \
178    e = t1 ^ t10;   \
179    t12 = g & e;    \
180    f = t9 ^ t12;   \
181    t14 = b | d;    \
182    t15 = t4 ^ t12; \
183    h = t14 ^ t15
184*/
185
186/* 17 terms */
187
188#define ib3(a,b,c,d,e,f,g,h)    \
189    t1 = b ^ c;     \
190    t2 = b | c;     \
191    t3 = a ^ c;     \
192    t7 = a ^ d;     \
193    t4 = t2 ^ t3;   \
194    t5 = d | t4;    \
195    t9 = t2 ^ t7;   \
196    e = t1 ^ t5;    \
197    t8 = t1 | t5;   \
198    t11 = a & t4;   \
199    g = t8 ^ t9;    \
200    t12 = e | t9;   \
201    f = t11 ^ t12;  \
202    t14 = a & g;    \
203    t15 = t2 ^ t14; \
204    t16 = e & t15;  \
205    h = t4 ^ t16
206
207/* 15 terms */
208
209#define sb4(a,b,c,d,e,f,g,h)    \
210    t1 = a ^ d;     \
211    t2 = d & t1;    \
212    t3 = c ^ t2;    \
213    t4 = b | t3;    \
214    h = t1 ^ t4;    \
215    t6 = ~b;        \
216    t7 = t1 | t6;   \
217    e = t3 ^ t7;    \
218    t9 = a & e;     \
219    t10 = t1 ^ t6;  \
220    t11 = t4 & t10; \
221    g = t9 ^ t11;   \
222    t13 = a ^ t3;   \
223    t14 = t10 & g;  \
224    f = t13 ^ t14
225
226/* 17 terms */
227
228#define ib4(a,b,c,d,e,f,g,h)    \
229    t1 = c ^ d;     \
230    t2 = c | d;     \
231    t3 = b ^ t2;    \
232    t4 = a & t3;    \
233    f = t1 ^ t4;    \
234    t6 = a ^ d;     \
235    t7 = b | d;     \
236    t8 = t6 & t7;   \
237    h = t3 ^ t8;    \
238    t10 = ~a;       \
239    t11 = c ^ h;    \
240    t12 = t10 | t11;\
241    e = t3 ^ t12;   \
242    t14 = c | t4;   \
243    t15 = t7 ^ t14; \
244    t16 = h | t10;  \
245    g = t15 ^ t16
246
247/* 16 terms */
248
249#define sb5(a,b,c,d,e,f,g,h)    \
250    t1 = ~a;        \
251    t2 = a ^ b;     \
252    t3 = a ^ d;     \
253    t4 = c ^ t1;    \
254    t5 = t2 | t3;   \
255    e = t4 ^ t5;    \
256    t7 = d & e;     \
257    t8 = t2 ^ e;    \
258    t10 = t1 | e;   \
259    f = t7 ^ t8;    \
260    t11 = t2 | t7;  \
261    t12 = t3 ^ t10; \
262    t14 = b ^ t7;   \
263    g = t11 ^ t12;  \
264    t15 = f & t12;  \
265    h = t14 ^ t15
266
267/* 16 terms */
268
269#define ib5(a,b,c,d,e,f,g,h)    \
270    t1 = ~c;        \
271    t2 = b & t1;    \
272    t3 = d ^ t2;    \
273    t4 = a & t3;    \
274    t5 = b ^ t1;    \
275    h = t4 ^ t5;    \
276    t7 = b | h;     \
277    t8 = a & t7;    \
278    f = t3 ^ t8;    \
279    t10 = a | d;    \
280    t11 = t1 ^ t7;  \
281    e = t10 ^ t11;  \
282    t13 = a ^ c;    \
283    t14 = b & t10;  \
284    t15 = t4 | t13; \
285    g = t14 ^ t15
286
287/* 15 terms */
288
289#define sb6(a,b,c,d,e,f,g,h)    \
290    t1 = ~a;        \
291    t2 = a ^ d;     \
292    t3 = b ^ t2;    \
293    t4 = t1 | t2;   \
294    t5 = c ^ t4;    \
295    f = b ^ t5;     \
296    t13 = ~t5;      \
297    t7 = t2 | f;    \
298    t8 = d ^ t7;    \
299    t9 = t5 & t8;   \
300    g = t3 ^ t9;    \
301    t11 = t5 ^ t8;  \
302    e = g ^ t11;    \
303    t14 = t3 & t11; \
304    h = t13 ^ t14
305
306/* 15 terms */
307
308#define ib6(a,b,c,d,e,f,g,h)    \
309    t1 = ~a;        \
310    t2 = a ^ b;     \
311    t3 = c ^ t2;    \
312    t4 = c | t1;    \
313    t5 = d ^ t4;    \
314    t13 = d & t1;   \
315    f = t3 ^ t5;    \
316    t7 = t3 & t5;   \
317    t8 = t2 ^ t7;   \
318    t9 = b | t8;    \
319    h = t5 ^ t9;    \
320    t11 = b | h;    \
321    e = t8 ^ t11;   \
322    t14 = t3 ^ t11; \
323    g = t13 ^ t14
324
325/* 17 terms */
326
327#define sb7(a,b,c,d,e,f,g,h)    \
328    t1 = ~c;        \
329    t2 = b ^ c;     \
330    t3 = b | t1;    \
331    t4 = d ^ t3;    \
332    t5 = a & t4;    \
333    t7 = a ^ d;     \
334    h = t2 ^ t5;    \
335    t8 = b ^ t5;    \
336    t9 = t2 | t8;   \
337    t11 = d & t3;   \
338    f = t7 ^ t9;    \
339    t12 = t5 ^ f;   \
340    t15 = t1 | t4;  \
341    t13 = h & t12;  \
342    g = t11 ^ t13;  \
343    t16 = t12 ^ g;  \
344    e = t15 ^ t16
345
346/* 17 terms */
347
348#define ib7(a,b,c,d,e,f,g,h)    \
349    t1 = a & b;     \
350    t2 = a | b;     \
351    t3 = c | t1;    \
352    t4 = d & t2;    \
353    h = t3 ^ t4;    \
354    t6 = ~d;        \
355    t7 = b ^ t4;    \
356    t8 = h ^ t6;    \
357    t11 = c ^ t7;   \
358    t9 = t7 | t8;   \
359    f = a ^ t9;     \
360    t12 = d | f;    \
361    e = t11 ^ t12;  \
362    t14 = a & h;    \
363    t15 = t3 ^ f;   \
364    t16 = e ^ t14;  \
365    g = t15 ^ t16
366
367#define k_xor(r,a,b,c,d) \
368    a ^= ks[4 * r +  8]; \
369    b ^= ks[4 * r +  9]; \
370    c ^= ks[4 * r + 10]; \
371    d ^= ks[4 * r + 11]
372
373#define k_set(r,a,b,c,d) \
374    a = ks[4 * r +  8];  \
375    b = ks[4 * r +  9];  \
376    c = ks[4 * r + 10];  \
377    d = ks[4 * r + 11]
378
379#define k_get(r,a,b,c,d) \
380    ks[4 * r +  8] = a;  \
381    ks[4 * r +  9] = b;  \
382    ks[4 * r + 10] = c;  \
383    ks[4 * r + 11] = d
384
385/* the linear transformation and its inverse    */
386
387#define rot(a,b,c,d)         \
388    a = hc_rotl32_S (a, 13); \
389    c = hc_rotl32_S (c, 3);  \
390    d ^= c ^ (a << 3);       \
391    b ^= a ^ c;              \
392    d = hc_rotl32_S (d, 7);  \
393    b = hc_rotl32_S (b, 1);  \
394    a ^= b ^ d;              \
395    c ^= d ^ (b << 7);       \
396    a = hc_rotl32_S (a, 5);  \
397    c = hc_rotl32_S (c, 22)
398
399#define irot(a,b,c,d)        \
400    c = hc_rotr32_S (c, 22); \
401    a = hc_rotr32_S (a, 5);  \
402    c ^= d ^ (b << 7);       \
403    a ^= b ^ d;              \
404    d = hc_rotr32_S (d, 7);  \
405    b = hc_rotr32_S (b, 1);  \
406    d ^= c ^ (a <<  3);      \
407    b ^= a ^ c;              \
408    c = hc_rotr32_S (c, 3);  \
409    a = hc_rotr32_S (a, 13)
410
411// 128 bit key
412
413DECLSPEC void serpent128_set_key (u32 *ks, const u32 *ukey)
414{
415  ks[  0] = ukey[0];
416  ks[  1] = ukey[1];
417  ks[  2] = ukey[2];
418  ks[  3] = ukey[3];
419  ks[  4] = 1;
420  ks[  5] = 0;
421  ks[  6] = 0;
422  ks[  7] = 0;
423  ks[  8] = hc_rotl32_S ((ks[  7] ^ ks[  5] ^ ks[  3] ^ ks[  0] ^ 0x9e3779b9 ^   0), 11);
424  ks[  9] = hc_rotl32_S ((ks[  8] ^ ks[  6] ^ ks[  4] ^ ks[  1] ^ 0x9e3779b9 ^   1), 11);
425  ks[ 10] = hc_rotl32_S ((ks[  9] ^ ks[  7] ^ ks[  5] ^ ks[  2] ^ 0x9e3779b9 ^   2), 11);
426  ks[ 11] = hc_rotl32_S ((ks[ 10] ^ ks[  8] ^ ks[  6] ^ ks[  3] ^ 0x9e3779b9 ^   3), 11);
427  ks[ 12] = hc_rotl32_S ((ks[ 11] ^ ks[  9] ^ ks[  7] ^ ks[  4] ^ 0x9e3779b9 ^   4), 11);
428  ks[ 13] = hc_rotl32_S ((ks[ 12] ^ ks[ 10] ^ ks[  8] ^ ks[  5] ^ 0x9e3779b9 ^   5), 11);
429  ks[ 14] = hc_rotl32_S ((ks[ 13] ^ ks[ 11] ^ ks[  9] ^ ks[  6] ^ 0x9e3779b9 ^   6), 11);
430  ks[ 15] = hc_rotl32_S ((ks[ 14] ^ ks[ 12] ^ ks[ 10] ^ ks[  7] ^ 0x9e3779b9 ^   7), 11);
431  ks[ 16] = hc_rotl32_S ((ks[ 15] ^ ks[ 13] ^ ks[ 11] ^ ks[  8] ^ 0x9e3779b9 ^   8), 11);
432  ks[ 17] = hc_rotl32_S ((ks[ 16] ^ ks[ 14] ^ ks[ 12] ^ ks[  9] ^ 0x9e3779b9 ^   9), 11);
433  ks[ 18] = hc_rotl32_S ((ks[ 17] ^ ks[ 15] ^ ks[ 13] ^ ks[ 10] ^ 0x9e3779b9 ^  10), 11);
434  ks[ 19] = hc_rotl32_S ((ks[ 18] ^ ks[ 16] ^ ks[ 14] ^ ks[ 11] ^ 0x9e3779b9 ^  11), 11);
435  ks[ 20] = hc_rotl32_S ((ks[ 19] ^ ks[ 17] ^ ks[ 15] ^ ks[ 12] ^ 0x9e3779b9 ^  12), 11);
436  ks[ 21] = hc_rotl32_S ((ks[ 20] ^ ks[ 18] ^ ks[ 16] ^ ks[ 13] ^ 0x9e3779b9 ^  13), 11);
437  ks[ 22] = hc_rotl32_S ((ks[ 21] ^ ks[ 19] ^ ks[ 17] ^ ks[ 14] ^ 0x9e3779b9 ^  14), 11);
438  ks[ 23] = hc_rotl32_S ((ks[ 22] ^ ks[ 20] ^ ks[ 18] ^ ks[ 15] ^ 0x9e3779b9 ^  15), 11);
439  ks[ 24] = hc_rotl32_S ((ks[ 23] ^ ks[ 21] ^ ks[ 19] ^ ks[ 16] ^ 0x9e3779b9 ^  16), 11);
440  ks[ 25] = hc_rotl32_S ((ks[ 24] ^ ks[ 22] ^ ks[ 20] ^ ks[ 17] ^ 0x9e3779b9 ^  17), 11);
441  ks[ 26] = hc_rotl32_S ((ks[ 25] ^ ks[ 23] ^ ks[ 21] ^ ks[ 18] ^ 0x9e3779b9 ^  18), 11);
442  ks[ 27] = hc_rotl32_S ((ks[ 26] ^ ks[ 24] ^ ks[ 22] ^ ks[ 19] ^ 0x9e3779b9 ^  19), 11);
443  ks[ 28] = hc_rotl32_S ((ks[ 27] ^ ks[ 25] ^ ks[ 23] ^ ks[ 20] ^ 0x9e3779b9 ^  20), 11);
444  ks[ 29] = hc_rotl32_S ((ks[ 28] ^ ks[ 26] ^ ks[ 24] ^ ks[ 21] ^ 0x9e3779b9 ^  21), 11);
445  ks[ 30] = hc_rotl32_S ((ks[ 29] ^ ks[ 27] ^ ks[ 25] ^ ks[ 22] ^ 0x9e3779b9 ^  22), 11);
446  ks[ 31] = hc_rotl32_S ((ks[ 30] ^ ks[ 28] ^ ks[ 26] ^ ks[ 23] ^ 0x9e3779b9 ^  23), 11);
447  ks[ 32] = hc_rotl32_S ((ks[ 31] ^ ks[ 29] ^ ks[ 27] ^ ks[ 24] ^ 0x9e3779b9 ^  24), 11);
448  ks[ 33] = hc_rotl32_S ((ks[ 32] ^ ks[ 30] ^ ks[ 28] ^ ks[ 25] ^ 0x9e3779b9 ^  25), 11);
449  ks[ 34] = hc_rotl32_S ((ks[ 33] ^ ks[ 31] ^ ks[ 29] ^ ks[ 26] ^ 0x9e3779b9 ^  26), 11);
450  ks[ 35] = hc_rotl32_S ((ks[ 34] ^ ks[ 32] ^ ks[ 30] ^ ks[ 27] ^ 0x9e3779b9 ^  27), 11);
451  ks[ 36] = hc_rotl32_S ((ks[ 35] ^ ks[ 33] ^ ks[ 31] ^ ks[ 28] ^ 0x9e3779b9 ^  28), 11);
452  ks[ 37] = hc_rotl32_S ((ks[ 36] ^ ks[ 34] ^ ks[ 32] ^ ks[ 29] ^ 0x9e3779b9 ^  29), 11);
453  ks[ 38] = hc_rotl32_S ((ks[ 37] ^ ks[ 35] ^ ks[ 33] ^ ks[ 30] ^ 0x9e3779b9 ^  30), 11);
454  ks[ 39] = hc_rotl32_S ((ks[ 38] ^ ks[ 36] ^ ks[ 34] ^ ks[ 31] ^ 0x9e3779b9 ^  31), 11);
455  ks[ 40] = hc_rotl32_S ((ks[ 39] ^ ks[ 37] ^ ks[ 35] ^ ks[ 32] ^ 0x9e3779b9 ^  32), 11);
456  ks[ 41] = hc_rotl32_S ((ks[ 40] ^ ks[ 38] ^ ks[ 36] ^ ks[ 33] ^ 0x9e3779b9 ^  33), 11);
457  ks[ 42] = hc_rotl32_S ((ks[ 41] ^ ks[ 39] ^ ks[ 37] ^ ks[ 34] ^ 0x9e3779b9 ^  34), 11);
458  ks[ 43] = hc_rotl32_S ((ks[ 42] ^ ks[ 40] ^ ks[ 38] ^ ks[ 35] ^ 0x9e3779b9 ^  35), 11);
459  ks[ 44] = hc_rotl32_S ((ks[ 43] ^ ks[ 41] ^ ks[ 39] ^ ks[ 36] ^ 0x9e3779b9 ^  36), 11);
460  ks[ 45] = hc_rotl32_S ((ks[ 44] ^ ks[ 42] ^ ks[ 40] ^ ks[ 37] ^ 0x9e3779b9 ^  37), 11);
461  ks[ 46] = hc_rotl32_S ((ks[ 45] ^ ks[ 43] ^ ks[ 41] ^ ks[ 38] ^ 0x9e3779b9 ^  38), 11);
462  ks[ 47] = hc_rotl32_S ((ks[ 46] ^ ks[ 44] ^ ks[ 42] ^ ks[ 39] ^ 0x9e3779b9 ^  39), 11);
463  ks[ 48] = hc_rotl32_S ((ks[ 47] ^ ks[ 45] ^ ks[ 43] ^ ks[ 40] ^ 0x9e3779b9 ^  40), 11);
464  ks[ 49] = hc_rotl32_S ((ks[ 48] ^ ks[ 46] ^ ks[ 44] ^ ks[ 41] ^ 0x9e3779b9 ^  41), 11);
465  ks[ 50] = hc_rotl32_S ((ks[ 49] ^ ks[ 47] ^ ks[ 45] ^ ks[ 42] ^ 0x9e3779b9 ^  42), 11);
466  ks[ 51] = hc_rotl32_S ((ks[ 50] ^ ks[ 48] ^ ks[ 46] ^ ks[ 43] ^ 0x9e3779b9 ^  43), 11);
467  ks[ 52] = hc_rotl32_S ((ks[ 51] ^ ks[ 49] ^ ks[ 47] ^ ks[ 44] ^ 0x9e3779b9 ^  44), 11);
468  ks[ 53] = hc_rotl32_S ((ks[ 52] ^ ks[ 50] ^ ks[ 48] ^ ks[ 45] ^ 0x9e3779b9 ^  45), 11);
469  ks[ 54] = hc_rotl32_S ((ks[ 53] ^ ks[ 51] ^ ks[ 49] ^ ks[ 46] ^ 0x9e3779b9 ^  46), 11);
470  ks[ 55] = hc_rotl32_S ((ks[ 54] ^ ks[ 52] ^ ks[ 50] ^ ks[ 47] ^ 0x9e3779b9 ^  47), 11);
471  ks[ 56] = hc_rotl32_S ((ks[ 55] ^ ks[ 53] ^ ks[ 51] ^ ks[ 48] ^ 0x9e3779b9 ^  48), 11);
472  ks[ 57] = hc_rotl32_S ((ks[ 56] ^ ks[ 54] ^ ks[ 52] ^ ks[ 49] ^ 0x9e3779b9 ^  49), 11);
473  ks[ 58] = hc_rotl32_S ((ks[ 57] ^ ks[ 55] ^ ks[ 53] ^ ks[ 50] ^ 0x9e3779b9 ^  50), 11);
474  ks[ 59] = hc_rotl32_S ((ks[ 58] ^ ks[ 56] ^ ks[ 54] ^ ks[ 51] ^ 0x9e3779b9 ^  51), 11);
475  ks[ 60] = hc_rotl32_S ((ks[ 59] ^ ks[ 57] ^ ks[ 55] ^ ks[ 52] ^ 0x9e3779b9 ^  52), 11);
476  ks[ 61] = hc_rotl32_S ((ks[ 60] ^ ks[ 58] ^ ks[ 56] ^ ks[ 53] ^ 0x9e3779b9 ^  53), 11);
477  ks[ 62] = hc_rotl32_S ((ks[ 61] ^ ks[ 59] ^ ks[ 57] ^ ks[ 54] ^ 0x9e3779b9 ^  54), 11);
478  ks[ 63] = hc_rotl32_S ((ks[ 62] ^ ks[ 60] ^ ks[ 58] ^ ks[ 55] ^ 0x9e3779b9 ^  55), 11);
479  ks[ 64] = hc_rotl32_S ((ks[ 63] ^ ks[ 61] ^ ks[ 59] ^ ks[ 56] ^ 0x9e3779b9 ^  56), 11);
480  ks[ 65] = hc_rotl32_S ((ks[ 64] ^ ks[ 62] ^ ks[ 60] ^ ks[ 57] ^ 0x9e3779b9 ^  57), 11);
481  ks[ 66] = hc_rotl32_S ((ks[ 65] ^ ks[ 63] ^ ks[ 61] ^ ks[ 58] ^ 0x9e3779b9 ^  58), 11);
482  ks[ 67] = hc_rotl32_S ((ks[ 66] ^ ks[ 64] ^ ks[ 62] ^ ks[ 59] ^ 0x9e3779b9 ^  59), 11);
483  ks[ 68] = hc_rotl32_S ((ks[ 67] ^ ks[ 65] ^ ks[ 63] ^ ks[ 60] ^ 0x9e3779b9 ^  60), 11);
484  ks[ 69] = hc_rotl32_S ((ks[ 68] ^ ks[ 66] ^ ks[ 64] ^ ks[ 61] ^ 0x9e3779b9 ^  61), 11);
485  ks[ 70] = hc_rotl32_S ((ks[ 69] ^ ks[ 67] ^ ks[ 65] ^ ks[ 62] ^ 0x9e3779b9 ^  62), 11);
486  ks[ 71] = hc_rotl32_S ((ks[ 70] ^ ks[ 68] ^ ks[ 66] ^ ks[ 63] ^ 0x9e3779b9 ^  63), 11);
487  ks[ 72] = hc_rotl32_S ((ks[ 71] ^ ks[ 69] ^ ks[ 67] ^ ks[ 64] ^ 0x9e3779b9 ^  64), 11);
488  ks[ 73] = hc_rotl32_S ((ks[ 72] ^ ks[ 70] ^ ks[ 68] ^ ks[ 65] ^ 0x9e3779b9 ^  65), 11);
489  ks[ 74] = hc_rotl32_S ((ks[ 73] ^ ks[ 71] ^ ks[ 69] ^ ks[ 66] ^ 0x9e3779b9 ^  66), 11);
490  ks[ 75] = hc_rotl32_S ((ks[ 74] ^ ks[ 72] ^ ks[ 70] ^ ks[ 67] ^ 0x9e3779b9 ^  67), 11);
491  ks[ 76] = hc_rotl32_S ((ks[ 75] ^ ks[ 73] ^ ks[ 71] ^ ks[ 68] ^ 0x9e3779b9 ^  68), 11);
492  ks[ 77] = hc_rotl32_S ((ks[ 76] ^ ks[ 74] ^ ks[ 72] ^ ks[ 69] ^ 0x9e3779b9 ^  69), 11);
493  ks[ 78] = hc_rotl32_S ((ks[ 77] ^ ks[ 75] ^ ks[ 73] ^ ks[ 70] ^ 0x9e3779b9 ^  70), 11);
494  ks[ 79] = hc_rotl32_S ((ks[ 78] ^ ks[ 76] ^ ks[ 74] ^ ks[ 71] ^ 0x9e3779b9 ^  71), 11);
495  ks[ 80] = hc_rotl32_S ((ks[ 79] ^ ks[ 77] ^ ks[ 75] ^ ks[ 72] ^ 0x9e3779b9 ^  72), 11);
496  ks[ 81] = hc_rotl32_S ((ks[ 80] ^ ks[ 78] ^ ks[ 76] ^ ks[ 73] ^ 0x9e3779b9 ^  73), 11);
497  ks[ 82] = hc_rotl32_S ((ks[ 81] ^ ks[ 79] ^ ks[ 77] ^ ks[ 74] ^ 0x9e3779b9 ^  74), 11);
498  ks[ 83] = hc_rotl32_S ((ks[ 82] ^ ks[ 80] ^ ks[ 78] ^ ks[ 75] ^ 0x9e3779b9 ^  75), 11);
499  ks[ 84] = hc_rotl32_S ((ks[ 83] ^ ks[ 81] ^ ks[ 79] ^ ks[ 76] ^ 0x9e3779b9 ^  76), 11);
500  ks[ 85] = hc_rotl32_S ((ks[ 84] ^ ks[ 82] ^ ks[ 80] ^ ks[ 77] ^ 0x9e3779b9 ^  77), 11);
501  ks[ 86] = hc_rotl32_S ((ks[ 85] ^ ks[ 83] ^ ks[ 81] ^ ks[ 78] ^ 0x9e3779b9 ^  78), 11);
502  ks[ 87] = hc_rotl32_S ((ks[ 86] ^ ks[ 84] ^ ks[ 82] ^ ks[ 79] ^ 0x9e3779b9 ^  79), 11);
503  ks[ 88] = hc_rotl32_S ((ks[ 87] ^ ks[ 85] ^ ks[ 83] ^ ks[ 80] ^ 0x9e3779b9 ^  80), 11);
504  ks[ 89] = hc_rotl32_S ((ks[ 88] ^ ks[ 86] ^ ks[ 84] ^ ks[ 81] ^ 0x9e3779b9 ^  81), 11);
505  ks[ 90] = hc_rotl32_S ((ks[ 89] ^ ks[ 87] ^ ks[ 85] ^ ks[ 82] ^ 0x9e3779b9 ^  82), 11);
506  ks[ 91] = hc_rotl32_S ((ks[ 90] ^ ks[ 88] ^ ks[ 86] ^ ks[ 83] ^ 0x9e3779b9 ^  83), 11);
507  ks[ 92] = hc_rotl32_S ((ks[ 91] ^ ks[ 89] ^ ks[ 87] ^ ks[ 84] ^ 0x9e3779b9 ^  84), 11);
508  ks[ 93] = hc_rotl32_S ((ks[ 92] ^ ks[ 90] ^ ks[ 88] ^ ks[ 85] ^ 0x9e3779b9 ^  85), 11);
509  ks[ 94] = hc_rotl32_S ((ks[ 93] ^ ks[ 91] ^ ks[ 89] ^ ks[ 86] ^ 0x9e3779b9 ^  86), 11);
510  ks[ 95] = hc_rotl32_S ((ks[ 94] ^ ks[ 92] ^ ks[ 90] ^ ks[ 87] ^ 0x9e3779b9 ^  87), 11);
511  ks[ 96] = hc_rotl32_S ((ks[ 95] ^ ks[ 93] ^ ks[ 91] ^ ks[ 88] ^ 0x9e3779b9 ^  88), 11);
512  ks[ 97] = hc_rotl32_S ((ks[ 96] ^ ks[ 94] ^ ks[ 92] ^ ks[ 89] ^ 0x9e3779b9 ^  89), 11);
513  ks[ 98] = hc_rotl32_S ((ks[ 97] ^ ks[ 95] ^ ks[ 93] ^ ks[ 90] ^ 0x9e3779b9 ^  90), 11);
514  ks[ 99] = hc_rotl32_S ((ks[ 98] ^ ks[ 96] ^ ks[ 94] ^ ks[ 91] ^ 0x9e3779b9 ^  91), 11);
515  ks[100] = hc_rotl32_S ((ks[ 99] ^ ks[ 97] ^ ks[ 95] ^ ks[ 92] ^ 0x9e3779b9 ^  92), 11);
516  ks[101] = hc_rotl32_S ((ks[100] ^ ks[ 98] ^ ks[ 96] ^ ks[ 93] ^ 0x9e3779b9 ^  93), 11);
517  ks[102] = hc_rotl32_S ((ks[101] ^ ks[ 99] ^ ks[ 97] ^ ks[ 94] ^ 0x9e3779b9 ^  94), 11);
518  ks[103] = hc_rotl32_S ((ks[102] ^ ks[100] ^ ks[ 98] ^ ks[ 95] ^ 0x9e3779b9 ^  95), 11);
519  ks[104] = hc_rotl32_S ((ks[103] ^ ks[101] ^ ks[ 99] ^ ks[ 96] ^ 0x9e3779b9 ^  96), 11);
520  ks[105] = hc_rotl32_S ((ks[104] ^ ks[102] ^ ks[100] ^ ks[ 97] ^ 0x9e3779b9 ^  97), 11);
521  ks[106] = hc_rotl32_S ((ks[105] ^ ks[103] ^ ks[101] ^ ks[ 98] ^ 0x9e3779b9 ^  98), 11);
522  ks[107] = hc_rotl32_S ((ks[106] ^ ks[104] ^ ks[102] ^ ks[ 99] ^ 0x9e3779b9 ^  99), 11);
523  ks[108] = hc_rotl32_S ((ks[107] ^ ks[105] ^ ks[103] ^ ks[100] ^ 0x9e3779b9 ^ 100), 11);
524  ks[109] = hc_rotl32_S ((ks[108] ^ ks[106] ^ ks[104] ^ ks[101] ^ 0x9e3779b9 ^ 101), 11);
525  ks[110] = hc_rotl32_S ((ks[109] ^ ks[107] ^ ks[105] ^ ks[102] ^ 0x9e3779b9 ^ 102), 11);
526  ks[111] = hc_rotl32_S ((ks[110] ^ ks[108] ^ ks[106] ^ ks[103] ^ 0x9e3779b9 ^ 103), 11);
527  ks[112] = hc_rotl32_S ((ks[111] ^ ks[109] ^ ks[107] ^ ks[104] ^ 0x9e3779b9 ^ 104), 11);
528  ks[113] = hc_rotl32_S ((ks[112] ^ ks[110] ^ ks[108] ^ ks[105] ^ 0x9e3779b9 ^ 105), 11);
529  ks[114] = hc_rotl32_S ((ks[113] ^ ks[111] ^ ks[109] ^ ks[106] ^ 0x9e3779b9 ^ 106), 11);
530  ks[115] = hc_rotl32_S ((ks[114] ^ ks[112] ^ ks[110] ^ ks[107] ^ 0x9e3779b9 ^ 107), 11);
531  ks[116] = hc_rotl32_S ((ks[115] ^ ks[113] ^ ks[111] ^ ks[108] ^ 0x9e3779b9 ^ 108), 11);
532  ks[117] = hc_rotl32_S ((ks[116] ^ ks[114] ^ ks[112] ^ ks[109] ^ 0x9e3779b9 ^ 109), 11);
533  ks[118] = hc_rotl32_S ((ks[117] ^ ks[115] ^ ks[113] ^ ks[110] ^ 0x9e3779b9 ^ 110), 11);
534  ks[119] = hc_rotl32_S ((ks[118] ^ ks[116] ^ ks[114] ^ ks[111] ^ 0x9e3779b9 ^ 111), 11);
535  ks[120] = hc_rotl32_S ((ks[119] ^ ks[117] ^ ks[115] ^ ks[112] ^ 0x9e3779b9 ^ 112), 11);
536  ks[121] = hc_rotl32_S ((ks[120] ^ ks[118] ^ ks[116] ^ ks[113] ^ 0x9e3779b9 ^ 113), 11);
537  ks[122] = hc_rotl32_S ((ks[121] ^ ks[119] ^ ks[117] ^ ks[114] ^ 0x9e3779b9 ^ 114), 11);
538  ks[123] = hc_rotl32_S ((ks[122] ^ ks[120] ^ ks[118] ^ ks[115] ^ 0x9e3779b9 ^ 115), 11);
539  ks[124] = hc_rotl32_S ((ks[123] ^ ks[121] ^ ks[119] ^ ks[116] ^ 0x9e3779b9 ^ 116), 11);
540  ks[125] = hc_rotl32_S ((ks[124] ^ ks[122] ^ ks[120] ^ ks[117] ^ 0x9e3779b9 ^ 117), 11);
541  ks[126] = hc_rotl32_S ((ks[125] ^ ks[123] ^ ks[121] ^ ks[118] ^ 0x9e3779b9 ^ 118), 11);
542  ks[127] = hc_rotl32_S ((ks[126] ^ ks[124] ^ ks[122] ^ ks[119] ^ 0x9e3779b9 ^ 119), 11);
543  ks[128] = hc_rotl32_S ((ks[127] ^ ks[125] ^ ks[123] ^ ks[120] ^ 0x9e3779b9 ^ 120), 11);
544  ks[129] = hc_rotl32_S ((ks[128] ^ ks[126] ^ ks[124] ^ ks[121] ^ 0x9e3779b9 ^ 121), 11);
545  ks[130] = hc_rotl32_S ((ks[129] ^ ks[127] ^ ks[125] ^ ks[122] ^ 0x9e3779b9 ^ 122), 11);
546  ks[131] = hc_rotl32_S ((ks[130] ^ ks[128] ^ ks[126] ^ ks[123] ^ 0x9e3779b9 ^ 123), 11);
547  ks[132] = hc_rotl32_S ((ks[131] ^ ks[129] ^ ks[127] ^ ks[124] ^ 0x9e3779b9 ^ 124), 11);
548  ks[133] = hc_rotl32_S ((ks[132] ^ ks[130] ^ ks[128] ^ ks[125] ^ 0x9e3779b9 ^ 125), 11);
549  ks[134] = hc_rotl32_S ((ks[133] ^ ks[131] ^ ks[129] ^ ks[126] ^ 0x9e3779b9 ^ 126), 11);
550  ks[135] = hc_rotl32_S ((ks[134] ^ ks[132] ^ ks[130] ^ ks[127] ^ 0x9e3779b9 ^ 127), 11);
551  ks[136] = hc_rotl32_S ((ks[135] ^ ks[133] ^ ks[131] ^ ks[128] ^ 0x9e3779b9 ^ 128), 11);
552  ks[137] = hc_rotl32_S ((ks[136] ^ ks[134] ^ ks[132] ^ ks[129] ^ 0x9e3779b9 ^ 129), 11);
553  ks[138] = hc_rotl32_S ((ks[137] ^ ks[135] ^ ks[133] ^ ks[130] ^ 0x9e3779b9 ^ 130), 11);
554  ks[139] = hc_rotl32_S ((ks[138] ^ ks[136] ^ ks[134] ^ ks[131] ^ 0x9e3779b9 ^ 131), 11);
555
556  u32  a,b,c,d,e,f,g,h;
557  u32  t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16;
558
559  k_set( 0,a,b,c,d); sb3(a,b,c,d,e,f,g,h); k_get( 0,e,f,g,h);
560  k_set( 1,a,b,c,d); sb2(a,b,c,d,e,f,g,h); k_get( 1,e,f,g,h);
561  k_set( 2,a,b,c,d); sb1(a,b,c,d,e,f,g,h); k_get( 2,e,f,g,h);
562  k_set( 3,a,b,c,d); sb0(a,b,c,d,e,f,g,h); k_get( 3,e,f,g,h);
563  k_set( 4,a,b,c,d); sb7(a,b,c,d,e,f,g,h); k_get( 4,e,f,g,h);
564  k_set( 5,a,b,c,d); sb6(a,b,c,d,e,f,g,h); k_get( 5,e,f,g,h);
565  k_set( 6,a,b,c,d); sb5(a,b,c,d,e,f,g,h); k_get( 6,e,f,g,h);
566  k_set( 7,a,b,c,d); sb4(a,b,c,d,e,f,g,h); k_get( 7,e,f,g,h);
567  k_set( 8,a,b,c,d); sb3(a,b,c,d,e,f,g,h); k_get( 8,e,f,g,h);
568  k_set( 9,a,b,c,d); sb2(a,b,c,d,e,f,g,h); k_get( 9,e,f,g,h);
569  k_set(10,a,b,c,d); sb1(a,b,c,d,e,f,g,h); k_get(10,e,f,g,h);
570  k_set(11,a,b,c,d); sb0(a,b,c,d,e,f,g,h); k_get(11,e,f,g,h);
571  k_set(12,a,b,c,d); sb7(a,b,c,d,e,f,g,h); k_get(12,e,f,g,h);
572  k_set(13,a,b,c,d); sb6(a,b,c,d,e,f,g,h); k_get(13,e,f,g,h);
573  k_set(14,a,b,c,d); sb5(a,b,c,d,e,f,g,h); k_get(14,e,f,g,h);
574  k_set(15,a,b,c,d); sb4(a,b,c,d,e,f,g,h); k_get(15,e,f,g,h);
575  k_set(16,a,b,c,d); sb3(a,b,c,d,e,f,g,h); k_get(16,e,f,g,h);
576  k_set(17,a,b,c,d); sb2(a,b,c,d,e,f,g,h); k_get(17,e,f,g,h);
577  k_set(18,a,b,c,d); sb1(a,b,c,d,e,f,g,h); k_get(18,e,f,g,h);
578  k_set(19,a,b,c,d); sb0(a,b,c,d,e,f,g,h); k_get(19,e,f,g,h);
579  k_set(20,a,b,c,d); sb7(a,b,c,d,e,f,g,h); k_get(20,e,f,g,h);
580  k_set(21,a,b,c,d); sb6(a,b,c,d,e,f,g,h); k_get(21,e,f,g,h);
581  k_set(22,a,b,c,d); sb5(a,b,c,d,e,f,g,h); k_get(22,e,f,g,h);
582  k_set(23,a,b,c,d); sb4(a,b,c,d,e,f,g,h); k_get(23,e,f,g,h);
583  k_set(24,a,b,c,d); sb3(a,b,c,d,e,f,g,h); k_get(24,e,f,g,h);
584  k_set(25,a,b,c,d); sb2(a,b,c,d,e,f,g,h); k_get(25,e,f,g,h);
585  k_set(26,a,b,c,d); sb1(a,b,c,d,e,f,g,h); k_get(26,e,f,g,h);
586  k_set(27,a,b,c,d); sb0(a,b,c,d,e,f,g,h); k_get(27,e,f,g,h);
587  k_set(28,a,b,c,d); sb7(a,b,c,d,e,f,g,h); k_get(28,e,f,g,h);
588  k_set(29,a,b,c,d); sb6(a,b,c,d,e,f,g,h); k_get(29,e,f,g,h);
589  k_set(30,a,b,c,d); sb5(a,b,c,d,e,f,g,h); k_get(30,e,f,g,h);
590  k_set(31,a,b,c,d); sb4(a,b,c,d,e,f,g,h); k_get(31,e,f,g,h);
591  k_set(32,a,b,c,d); sb3(a,b,c,d,e,f,g,h); k_get(32,e,f,g,h);
592}
593
594DECLSPEC void serpent128_encrypt (const u32 *ks, const u32 *in, u32 *out)
595{
596  u32  a,b,c,d,e,f,g,h;
597  u32  t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16;
598
599  a = in[0];
600  b = in[1];
601  c = in[2];
602  d = in[3];
603
604  k_xor( 0,a,b,c,d); sb0(a,b,c,d,e,f,g,h); rot(e,f,g,h);
605  k_xor( 1,e,f,g,h); sb1(e,f,g,h,a,b,c,d); rot(a,b,c,d);
606  k_xor( 2,a,b,c,d); sb2(a,b,c,d,e,f,g,h); rot(e,f,g,h);
607  k_xor( 3,e,f,g,h); sb3(e,f,g,h,a,b,c,d); rot(a,b,c,d);
608  k_xor( 4,a,b,c,d); sb4(a,b,c,d,e,f,g,h); rot(e,f,g,h);
609  k_xor( 5,e,f,g,h); sb5(e,f,g,h,a,b,c,d); rot(a,b,c,d);
610  k_xor( 6,a,b,c,d); sb6(a,b,c,d,e,f,g,h); rot(e,f,g,h);
611  k_xor( 7,e,f,g,h); sb7(e,f,g,h,a,b,c,d); rot(a,b,c,d);
612  k_xor( 8,a,b,c,d); sb0(a,b,c,d,e,f,g,h); rot(e,f,g,h);
613  k_xor( 9,e,f,g,h); sb1(e,f,g,h,a,b,c,d); rot(a,b,c,d);
614  k_xor(10,a,b,c,d); sb2(a,b,c,d,e,f,g,h); rot(e,f,g,h);
615  k_xor(11,e,f,g,h); sb3(e,f,g,h,a,b,c,d); rot(a,b,c,d);
616  k_xor(12,a,b,c,d); sb4(a,b,c,d,e,f,g,h); rot(e,f,g,h);
617  k_xor(13,e,f,g,h); sb5(e,f,g,h,a,b,c,d); rot(a,b,c,d);
618  k_xor(14,a,b,c,d); sb6(a,b,c,d,e,f,g,h); rot(e,f,g,h);
619  k_xor(15,e,f,g,h); sb7(e,f,g,h,a,b,c,d); rot(a,b,c,d);
620  k_xor(16,a,b,c,d); sb0(a,b,c,d,e,f,g,h); rot(e,f,g,h);
621  k_xor(17,e,f,g,h); sb1(e,f,g,h,a,b,c,d); rot(a,b,c,d);
622  k_xor(18,a,b,c,d); sb2(a,b,c,d,e,f,g,h); rot(e,f,g,h);
623  k_xor(19,e,f,g,h); sb3(e,f,g,h,a,b,c,d); rot(a,b,c,d);
624  k_xor(20,a,b,c,d); sb4(a,b,c,d,e,f,g,h); rot(e,f,g,h);
625  k_xor(21,e,f,g,h); sb5(e,f,g,h,a,b,c,d); rot(a,b,c,d);
626  k_xor(22,a,b,c,d); sb6(a,b,c,d,e,f,g,h); rot(e,f,g,h);
627  k_xor(23,e,f,g,h); sb7(e,f,g,h,a,b,c,d); rot(a,b,c,d);
628  k_xor(24,a,b,c,d); sb0(a,b,c,d,e,f,g,h); rot(e,f,g,h);
629  k_xor(25,e,f,g,h); sb1(e,f,g,h,a,b,c,d); rot(a,b,c,d);
630  k_xor(26,a,b,c,d); sb2(a,b,c,d,e,f,g,h); rot(e,f,g,h);
631  k_xor(27,e,f,g,h); sb3(e,f,g,h,a,b,c,d); rot(a,b,c,d);
632  k_xor(28,a,b,c,d); sb4(a,b,c,d,e,f,g,h); rot(e,f,g,h);
633  k_xor(29,e,f,g,h); sb5(e,f,g,h,a,b,c,d); rot(a,b,c,d);
634  k_xor(30,a,b,c,d); sb6(a,b,c,d,e,f,g,h); rot(e,f,g,h);
635  k_xor(31,e,f,g,h); sb7(e,f,g,h,a,b,c,d);
636  k_xor(32,a,b,c,d);
637
638  out[0] = a;
639  out[1] = b;
640  out[2] = c;
641  out[3] = d;
642}
643
644DECLSPEC void serpent128_decrypt (const u32 *ks, const u32 *in, u32 *out)
645{
646  u32  a,b,c,d,e,f,g,h;
647  u32  t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16;
648
649  a = in[0];
650  b = in[1];
651  c = in[2];
652  d = in[3];
653
654                                       k_xor(32,a,b,c,d);
655                 ib7(a,b,c,d,e,f,g,h); k_xor(31,e,f,g,h);
656  irot(e,f,g,h); ib6(e,f,g,h,a,b,c,d); k_xor(30,a,b,c,d);
657  irot(a,b,c,d); ib5(a,b,c,d,e,f,g,h); k_xor(29,e,f,g,h);
658  irot(e,f,g,h); ib4(e,f,g,h,a,b,c,d); k_xor(28,a,b,c,d);
659  irot(a,b,c,d); ib3(a,b,c,d,e,f,g,h); k_xor(27,e,f,g,h);
660  irot(e,f,g,h); ib2(e,f,g,h,a,b,c,d); k_xor(26,a,b,c,d);
661  irot(a,b,c,d); ib1(a,b,c,d,e,f,g,h); k_xor(25,e,f,g,h);
662  irot(e,f,g,h); ib0(e,f,g,h,a,b,c,d); k_xor(24,a,b,c,d);
663  irot(a,b,c,d); ib7(a,b,c,d,e,f,g,h); k_xor(23,e,f,g,h);
664  irot(e,f,g,h); ib6(e,f,g,h,a,b,c,d); k_xor(22,a,b,c,d);
665  irot(a,b,c,d); ib5(a,b,c,d,e,f,g,h); k_xor(21,e,f,g,h);
666  irot(e,f,g,h); ib4(e,f,g,h,a,b,c,d); k_xor(20,a,b,c,d);
667  irot(a,b,c,d); ib3(a,b,c,d,e,f,g,h); k_xor(19,e,f,g,h);
668  irot(e,f,g,h); ib2(e,f,g,h,a,b,c,d); k_xor(18,a,b,c,d);
669  irot(a,b,c,d); ib1(a,b,c,d,e,f,g,h); k_xor(17,e,f,g,h);
670  irot(e,f,g,h); ib0(e,f,g,h,a,b,c,d); k_xor(16,a,b,c,d);
671  irot(a,b,c,d); ib7(a,b,c,d,e,f,g,h); k_xor(15,e,f,g,h);
672  irot(e,f,g,h); ib6(e,f,g,h,a,b,c,d); k_xor(14,a,b,c,d);
673  irot(a,b,c,d); ib5(a,b,c,d,e,f,g,h); k_xor(13,e,f,g,h);
674  irot(e,f,g,h); ib4(e,f,g,h,a,b,c,d); k_xor(12,a,b,c,d);
675  irot(a,b,c,d); ib3(a,b,c,d,e,f,g,h); k_xor(11,e,f,g,h);
676  irot(e,f,g,h); ib2(e,f,g,h,a,b,c,d); k_xor(10,a,b,c,d);
677  irot(a,b,c,d); ib1(a,b,c,d,e,f,g,h); k_xor( 9,e,f,g,h);
678  irot(e,f,g,h); ib0(e,f,g,h,a,b,c,d); k_xor( 8,a,b,c,d);
679  irot(a,b,c,d); ib7(a,b,c,d,e,f,g,h); k_xor( 7,e,f,g,h);
680  irot(e,f,g,h); ib6(e,f,g,h,a,b,c,d); k_xor( 6,a,b,c,d);
681  irot(a,b,c,d); ib5(a,b,c,d,e,f,g,h); k_xor( 5,e,f,g,h);
682  irot(e,f,g,h); ib4(e,f,g,h,a,b,c,d); k_xor( 4,a,b,c,d);
683  irot(a,b,c,d); ib3(a,b,c,d,e,f,g,h); k_xor( 3,e,f,g,h);
684  irot(e,f,g,h); ib2(e,f,g,h,a,b,c,d); k_xor( 2,a,b,c,d);
685  irot(a,b,c,d); ib1(a,b,c,d,e,f,g,h); k_xor( 1,e,f,g,h);
686  irot(e,f,g,h); ib0(e,f,g,h,a,b,c,d); k_xor( 0,a,b,c,d);
687
688  out[0] = a;
689  out[1] = b;
690  out[2] = c;
691  out[3] = d;
692}
693
694// 192 bit key
695
696DECLSPEC void serpent192_set_key (u32 *ks, const u32 *ukey)
697{
698  ks[  0] = ukey[0];
699  ks[  1] = ukey[1];
700  ks[  2] = ukey[2];
701  ks[  3] = ukey[3];
702  ks[  4] = ukey[4];
703  ks[  5] = ukey[5];
704  ks[  6] = 1;
705  ks[  7] = 0;
706  ks[  8] = hc_rotl32_S ((ks[  7] ^ ks[  5] ^ ks[  3] ^ ks[  0] ^ 0x9e3779b9 ^   0), 11);
707  ks[  9] = hc_rotl32_S ((ks[  8] ^ ks[  6] ^ ks[  4] ^ ks[  1] ^ 0x9e3779b9 ^   1), 11);
708  ks[ 10] = hc_rotl32_S ((ks[  9] ^ ks[  7] ^ ks[  5] ^ ks[  2] ^ 0x9e3779b9 ^   2), 11);
709  ks[ 11] = hc_rotl32_S ((ks[ 10] ^ ks[  8] ^ ks[  6] ^ ks[  3] ^ 0x9e3779b9 ^   3), 11);
710  ks[ 12] = hc_rotl32_S ((ks[ 11] ^ ks[  9] ^ ks[  7] ^ ks[  4] ^ 0x9e3779b9 ^   4), 11);
711  ks[ 13] = hc_rotl32_S ((ks[ 12] ^ ks[ 10] ^ ks[  8] ^ ks[  5] ^ 0x9e3779b9 ^   5), 11);
712  ks[ 14] = hc_rotl32_S ((ks[ 13] ^ ks[ 11] ^ ks[  9] ^ ks[  6] ^ 0x9e3779b9 ^   6), 11);
713  ks[ 15] = hc_rotl32_S ((ks[ 14] ^ ks[ 12] ^ ks[ 10] ^ ks[  7] ^ 0x9e3779b9 ^   7), 11);
714  ks[ 16] = hc_rotl32_S ((ks[ 15] ^ ks[ 13] ^ ks[ 11] ^ ks[  8] ^ 0x9e3779b9 ^   8), 11);
715  ks[ 17] = hc_rotl32_S ((ks[ 16] ^ ks[ 14] ^ ks[ 12] ^ ks[  9] ^ 0x9e3779b9 ^   9), 11);
716  ks[ 18] = hc_rotl32_S ((ks[ 17] ^ ks[ 15] ^ ks[ 13] ^ ks[ 10] ^ 0x9e3779b9 ^  10), 11);
717  ks[ 19] = hc_rotl32_S ((ks[ 18] ^ ks[ 16] ^ ks[ 14] ^ ks[ 11] ^ 0x9e3779b9 ^  11), 11);
718  ks[ 20] = hc_rotl32_S ((ks[ 19] ^ ks[ 17] ^ ks[ 15] ^ ks[ 12] ^ 0x9e3779b9 ^  12), 11);
719  ks[ 21] = hc_rotl32_S ((ks[ 20] ^ ks[ 18] ^ ks[ 16] ^ ks[ 13] ^ 0x9e3779b9 ^  13), 11);
720  ks[ 22] = hc_rotl32_S ((ks[ 21] ^ ks[ 19] ^ ks[ 17] ^ ks[ 14] ^ 0x9e3779b9 ^  14), 11);
721  ks[ 23] = hc_rotl32_S ((ks[ 22] ^ ks[ 20] ^ ks[ 18] ^ ks[ 15] ^ 0x9e3779b9 ^  15), 11);
722  ks[ 24] = hc_rotl32_S ((ks[ 23] ^ ks[ 21] ^ ks[ 19] ^ ks[ 16] ^ 0x9e3779b9 ^  16), 11);
723  ks[ 25] = hc_rotl32_S ((ks[ 24] ^ ks[ 22] ^ ks[ 20] ^ ks[ 17] ^ 0x9e3779b9 ^  17), 11);
724  ks[ 26] = hc_rotl32_S ((ks[ 25] ^ ks[ 23] ^ ks[ 21] ^ ks[ 18] ^ 0x9e3779b9 ^  18), 11);
725  ks[ 27] = hc_rotl32_S ((ks[ 26] ^ ks[ 24] ^ ks[ 22] ^ ks[ 19] ^ 0x9e3779b9 ^  19), 11);
726  ks[ 28] = hc_rotl32_S ((ks[ 27] ^ ks[ 25] ^ ks[ 23] ^ ks[ 20] ^ 0x9e3779b9 ^  20), 11);
727  ks[ 29] = hc_rotl32_S ((ks[ 28] ^ ks[ 26] ^ ks[ 24] ^ ks[ 21] ^ 0x9e3779b9 ^  21), 11);
728  ks[ 30] = hc_rotl32_S ((ks[ 29] ^ ks[ 27] ^ ks[ 25] ^ ks[ 22] ^ 0x9e3779b9 ^  22), 11);
729  ks[ 31] = hc_rotl32_S ((ks[ 30] ^ ks[ 28] ^ ks[ 26] ^ ks[ 23] ^ 0x9e3779b9 ^  23), 11);
730  ks[ 32] = hc_rotl32_S ((ks[ 31] ^ ks[ 29] ^ ks[ 27] ^ ks[ 24] ^ 0x9e3779b9 ^  24), 11);
731  ks[ 33] = hc_rotl32_S ((ks[ 32] ^ ks[ 30] ^ ks[ 28] ^ ks[ 25] ^ 0x9e3779b9 ^  25), 11);
732  ks[ 34] = hc_rotl32_S ((ks[ 33] ^ ks[ 31] ^ ks[ 29] ^ ks[ 26] ^ 0x9e3779b9 ^  26), 11);
733  ks[ 35] = hc_rotl32_S ((ks[ 34] ^ ks[ 32] ^ ks[ 30] ^ ks[ 27] ^ 0x9e3779b9 ^  27), 11);
734  ks[ 36] = hc_rotl32_S ((ks[ 35] ^ ks[ 33] ^ ks[ 31] ^ ks[ 28] ^ 0x9e3779b9 ^  28), 11);
735  ks[ 37] = hc_rotl32_S ((ks[ 36] ^ ks[ 34] ^ ks[ 32] ^ ks[ 29] ^ 0x9e3779b9 ^  29), 11);
736  ks[ 38] = hc_rotl32_S ((ks[ 37] ^ ks[ 35] ^ ks[ 33] ^ ks[ 30] ^ 0x9e3779b9 ^  30), 11);
737  ks[ 39] = hc_rotl32_S ((ks[ 38] ^ ks[ 36] ^ ks[ 34] ^ ks[ 31] ^ 0x9e3779b9 ^  31), 11);
738  ks[ 40] = hc_rotl32_S ((ks[ 39] ^ ks[ 37] ^ ks[ 35] ^ ks[ 32] ^ 0x9e3779b9 ^  32), 11);
739  ks[ 41] = hc_rotl32_S ((ks[ 40] ^ ks[ 38] ^ ks[ 36] ^ ks[ 33] ^ 0x9e3779b9 ^  33), 11);
740  ks[ 42] = hc_rotl32_S ((ks[ 41] ^ ks[ 39] ^ ks[ 37] ^ ks[ 34] ^ 0x9e3779b9 ^  34), 11);
741  ks[ 43] = hc_rotl32_S ((ks[ 42] ^ ks[ 40] ^ ks[ 38] ^ ks[ 35] ^ 0x9e3779b9 ^  35), 11);
742  ks[ 44] = hc_rotl32_S ((ks[ 43] ^ ks[ 41] ^ ks[ 39] ^ ks[ 36] ^ 0x9e3779b9 ^  36), 11);
743  ks[ 45] = hc_rotl32_S ((ks[ 44] ^ ks[ 42] ^ ks[ 40] ^ ks[ 37] ^ 0x9e3779b9 ^  37), 11);
744  ks[ 46] = hc_rotl32_S ((ks[ 45] ^ ks[ 43] ^ ks[ 41] ^ ks[ 38] ^ 0x9e3779b9 ^  38), 11);
745  ks[ 47] = hc_rotl32_S ((ks[ 46] ^ ks[ 44] ^ ks[ 42] ^ ks[ 39] ^ 0x9e3779b9 ^  39), 11);
746  ks[ 48] = hc_rotl32_S ((ks[ 47] ^ ks[ 45] ^ ks[ 43] ^ ks[ 40] ^ 0x9e3779b9 ^  40), 11);
747  ks[ 49] = hc_rotl32_S ((ks[ 48] ^ ks[ 46] ^ ks[ 44] ^ ks[ 41] ^ 0x9e3779b9 ^  41), 11);
748  ks[ 50] = hc_rotl32_S ((ks[ 49] ^ ks[ 47] ^ ks[ 45] ^ ks[ 42] ^ 0x9e3779b9 ^  42), 11);
749  ks[ 51] = hc_rotl32_S ((ks[ 50] ^ ks[ 48] ^ ks[ 46] ^ ks[ 43] ^ 0x9e3779b9 ^  43), 11);
750  ks[ 52] = hc_rotl32_S ((ks[ 51] ^ ks[ 49] ^ ks[ 47] ^ ks[ 44] ^ 0x9e3779b9 ^  44), 11);
751  ks[ 53] = hc_rotl32_S ((ks[ 52] ^ ks[ 50] ^ ks[ 48] ^ ks[ 45] ^ 0x9e3779b9 ^  45), 11);
752  ks[ 54] = hc_rotl32_S ((ks[ 53] ^ ks[ 51] ^ ks[ 49] ^ ks[ 46] ^ 0x9e3779b9 ^  46), 11);
753  ks[ 55] = hc_rotl32_S ((ks[ 54] ^ ks[ 52] ^ ks[ 50] ^ ks[ 47] ^ 0x9e3779b9 ^  47), 11);
754  ks[ 56] = hc_rotl32_S ((ks[ 55] ^ ks[ 53] ^ ks[ 51] ^ ks[ 48] ^ 0x9e3779b9 ^  48), 11);
755  ks[ 57] = hc_rotl32_S ((ks[ 56] ^ ks[ 54] ^ ks[ 52] ^ ks[ 49] ^ 0x9e3779b9 ^  49), 11);
756  ks[ 58] = hc_rotl32_S ((ks[ 57] ^ ks[ 55] ^ ks[ 53] ^ ks[ 50] ^ 0x9e3779b9 ^  50), 11);
757  ks[ 59] = hc_rotl32_S ((ks[ 58] ^ ks[ 56] ^ ks[ 54] ^ ks[ 51] ^ 0x9e3779b9 ^  51), 11);
758  ks[ 60] = hc_rotl32_S ((ks[ 59] ^ ks[ 57] ^ ks[ 55] ^ ks[ 52] ^ 0x9e3779b9 ^  52), 11);
759  ks[ 61] = hc_rotl32_S ((ks[ 60] ^ ks[ 58] ^ ks[ 56] ^ ks[ 53] ^ 0x9e3779b9 ^  53), 11);
760  ks[ 62] = hc_rotl32_S ((ks[ 61] ^ ks[ 59] ^ ks[ 57] ^ ks[ 54] ^ 0x9e3779b9 ^  54), 11);
761  ks[ 63] = hc_rotl32_S ((ks[ 62] ^ ks[ 60] ^ ks[ 58] ^ ks[ 55] ^ 0x9e3779b9 ^  55), 11);
762  ks[ 64] = hc_rotl32_S ((ks[ 63] ^ ks[ 61] ^ ks[ 59] ^ ks[ 56] ^ 0x9e3779b9 ^  56), 11);
763  ks[ 65] = hc_rotl32_S ((ks[ 64] ^ ks[ 62] ^ ks[ 60] ^ ks[ 57] ^ 0x9e3779b9 ^  57), 11);
764  ks[ 66] = hc_rotl32_S ((ks[ 65] ^ ks[ 63] ^ ks[ 61] ^ ks[ 58] ^ 0x9e3779b9 ^  58), 11);
765  ks[ 67] = hc_rotl32_S ((ks[ 66] ^ ks[ 64] ^ ks[ 62] ^ ks[ 59] ^ 0x9e3779b9 ^  59), 11);
766  ks[ 68] = hc_rotl32_S ((ks[ 67] ^ ks[ 65] ^ ks[ 63] ^ ks[ 60] ^ 0x9e3779b9 ^  60), 11);
767  ks[ 69] = hc_rotl32_S ((ks[ 68] ^ ks[ 66] ^ ks[ 64] ^ ks[ 61] ^ 0x9e3779b9 ^  61), 11);
768  ks[ 70] = hc_rotl32_S ((ks[ 69] ^ ks[ 67] ^ ks[ 65] ^ ks[ 62] ^ 0x9e3779b9 ^  62), 11);
769  ks[ 71] = hc_rotl32_S ((ks[ 70] ^ ks[ 68] ^ ks[ 66] ^ ks[ 63] ^ 0x9e3779b9 ^  63), 11);
770  ks[ 72] = hc_rotl32_S ((ks[ 71] ^ ks[ 69] ^ ks[ 67] ^ ks[ 64] ^ 0x9e3779b9 ^  64), 11);
771  ks[ 73] = hc_rotl32_S ((ks[ 72] ^ ks[ 70] ^ ks[ 68] ^ ks[ 65] ^ 0x9e3779b9 ^  65), 11);
772  ks[ 74] = hc_rotl32_S ((ks[ 73] ^ ks[ 71] ^ ks[ 69] ^ ks[ 66] ^ 0x9e3779b9 ^  66), 11);
773  ks[ 75] = hc_rotl32_S ((ks[ 74] ^ ks[ 72] ^ ks[ 70] ^ ks[ 67] ^ 0x9e3779b9 ^  67), 11);
774  ks[ 76] = hc_rotl32_S ((ks[ 75] ^ ks[ 73] ^ ks[ 71] ^ ks[ 68] ^ 0x9e3779b9 ^  68), 11);
775  ks[ 77] = hc_rotl32_S ((ks[ 76] ^ ks[ 74] ^ ks[ 72] ^ ks[ 69] ^ 0x9e3779b9 ^  69), 11);
776  ks[ 78] = hc_rotl32_S ((ks[ 77] ^ ks[ 75] ^ ks[ 73] ^ ks[ 70] ^ 0x9e3779b9 ^  70), 11);
777  ks[ 79] = hc_rotl32_S ((ks[ 78] ^ ks[ 76] ^ ks[ 74] ^ ks[ 71] ^ 0x9e3779b9 ^  71), 11);
778  ks[ 80] = hc_rotl32_S ((ks[ 79] ^ ks[ 77] ^ ks[ 75] ^ ks[ 72] ^ 0x9e3779b9 ^  72), 11);
779  ks[ 81] = hc_rotl32_S ((ks[ 80] ^ ks[ 78] ^ ks[ 76] ^ ks[ 73] ^ 0x9e3779b9 ^  73), 11);
780  ks[ 82] = hc_rotl32_S ((ks[ 81] ^ ks[ 79] ^ ks[ 77] ^ ks[ 74] ^ 0x9e3779b9 ^  74), 11);
781  ks[ 83] = hc_rotl32_S ((ks[ 82] ^ ks[ 80] ^ ks[ 78] ^ ks[ 75] ^ 0x9e3779b9 ^  75), 11);
782  ks[ 84] = hc_rotl32_S ((ks[ 83] ^ ks[ 81] ^ ks[ 79] ^ ks[ 76] ^ 0x9e3779b9 ^  76), 11);
783  ks[ 85] = hc_rotl32_S ((ks[ 84] ^ ks[ 82] ^ ks[ 80] ^ ks[ 77] ^ 0x9e3779b9 ^  77), 11);
784  ks[ 86] = hc_rotl32_S ((ks[ 85] ^ ks[ 83] ^ ks[ 81] ^ ks[ 78] ^ 0x9e3779b9 ^  78), 11);
785  ks[ 87] = hc_rotl32_S ((ks[ 86] ^ ks[ 84] ^ ks[ 82] ^ ks[ 79] ^ 0x9e3779b9 ^  79), 11);
786  ks[ 88] = hc_rotl32_S ((ks[ 87] ^ ks[ 85] ^ ks[ 83] ^ ks[ 80] ^ 0x9e3779b9 ^  80), 11);
787  ks[ 89] = hc_rotl32_S ((ks[ 88] ^ ks[ 86] ^ ks[ 84] ^ ks[ 81] ^ 0x9e3779b9 ^  81), 11);
788  ks[ 90] = hc_rotl32_S ((ks[ 89] ^ ks[ 87] ^ ks[ 85] ^ ks[ 82] ^ 0x9e3779b9 ^  82), 11);
789  ks[ 91] = hc_rotl32_S ((ks[ 90] ^ ks[ 88] ^ ks[ 86] ^ ks[ 83] ^ 0x9e3779b9 ^  83), 11);
790  ks[ 92] = hc_rotl32_S ((ks[ 91] ^ ks[ 89] ^ ks[ 87] ^ ks[ 84] ^ 0x9e3779b9 ^  84), 11);
791  ks[ 93] = hc_rotl32_S ((ks[ 92] ^ ks[ 90] ^ ks[ 88] ^ ks[ 85] ^ 0x9e3779b9 ^  85), 11);
792  ks[ 94] = hc_rotl32_S ((ks[ 93] ^ ks[ 91] ^ ks[ 89] ^ ks[ 86] ^ 0x9e3779b9 ^  86), 11);
793  ks[ 95] = hc_rotl32_S ((ks[ 94] ^ ks[ 92] ^ ks[ 90] ^ ks[ 87] ^ 0x9e3779b9 ^  87), 11);
794  ks[ 96] = hc_rotl32_S ((ks[ 95] ^ ks[ 93] ^ ks[ 91] ^ ks[ 88] ^ 0x9e3779b9 ^  88), 11);
795  ks[ 97] = hc_rotl32_S ((ks[ 96] ^ ks[ 94] ^ ks[ 92] ^ ks[ 89] ^ 0x9e3779b9 ^  89), 11);
796  ks[ 98] = hc_rotl32_S ((ks[ 97] ^ ks[ 95] ^ ks[ 93] ^ ks[ 90] ^ 0x9e3779b9 ^  90), 11);
797  ks[ 99] = hc_rotl32_S ((ks[ 98] ^ ks[ 96] ^ ks[ 94] ^ ks[ 91] ^ 0x9e3779b9 ^  91), 11);
798  ks[100] = hc_rotl32_S ((ks[ 99] ^ ks[ 97] ^ ks[ 95] ^ ks[ 92] ^ 0x9e3779b9 ^  92), 11);
799  ks[101] = hc_rotl32_S ((ks[100] ^ ks[ 98] ^ ks[ 96] ^ ks[ 93] ^ 0x9e3779b9 ^  93), 11);
800  ks[102] = hc_rotl32_S ((ks[101] ^ ks[ 99] ^ ks[ 97] ^ ks[ 94] ^ 0x9e3779b9 ^  94), 11);
801  ks[103] = hc_rotl32_S ((ks[102] ^ ks[100] ^ ks[ 98] ^ ks[ 95] ^ 0x9e3779b9 ^  95), 11);
802  ks[104] = hc_rotl32_S ((ks[103] ^ ks[101] ^ ks[ 99] ^ ks[ 96] ^ 0x9e3779b9 ^  96), 11);
803  ks[105] = hc_rotl32_S ((ks[104] ^ ks[102] ^ ks[100] ^ ks[ 97] ^ 0x9e3779b9 ^  97), 11);
804  ks[106] = hc_rotl32_S ((ks[105] ^ ks[103] ^ ks[101] ^ ks[ 98] ^ 0x9e3779b9 ^  98), 11);
805  ks[107] = hc_rotl32_S ((ks[106] ^ ks[104] ^ ks[102] ^ ks[ 99] ^ 0x9e3779b9 ^  99), 11);
806  ks[108] = hc_rotl32_S ((ks[107] ^ ks[105] ^ ks[103] ^ ks[100] ^ 0x9e3779b9 ^ 100), 11);
807  ks[109] = hc_rotl32_S ((ks[108] ^ ks[106] ^ ks[104] ^ ks[101] ^ 0x9e3779b9 ^ 101), 11);
808  ks[110] = hc_rotl32_S ((ks[109] ^ ks[107] ^ ks[105] ^ ks[102] ^ 0x9e3779b9 ^ 102), 11);
809  ks[111] = hc_rotl32_S ((ks[110] ^ ks[108] ^ ks[106] ^ ks[103] ^ 0x9e3779b9 ^ 103), 11);
810  ks[112] = hc_rotl32_S ((ks[111] ^ ks[109] ^ ks[107] ^ ks[104] ^ 0x9e3779b9 ^ 104), 11);
811  ks[113] = hc_rotl32_S ((ks[112] ^ ks[110] ^ ks[108] ^ ks[105] ^ 0x9e3779b9 ^ 105), 11);
812  ks[114] = hc_rotl32_S ((ks[113] ^ ks[111] ^ ks[109] ^ ks[106] ^ 0x9e3779b9 ^ 106), 11);
813  ks[115] = hc_rotl32_S ((ks[114] ^ ks[112] ^ ks[110] ^ ks[107] ^ 0x9e3779b9 ^ 107), 11);
814  ks[116] = hc_rotl32_S ((ks[115] ^ ks[113] ^ ks[111] ^ ks[108] ^ 0x9e3779b9 ^ 108), 11);
815  ks[117] = hc_rotl32_S ((ks[116] ^ ks[114] ^ ks[112] ^ ks[109] ^ 0x9e3779b9 ^ 109), 11);
816  ks[118] = hc_rotl32_S ((ks[117] ^ ks[115] ^ ks[113] ^ ks[110] ^ 0x9e3779b9 ^ 110), 11);
817  ks[119] = hc_rotl32_S ((ks[118] ^ ks[116] ^ ks[114] ^ ks[111] ^ 0x9e3779b9 ^ 111), 11);
818  ks[120] = hc_rotl32_S ((ks[119] ^ ks[117] ^ ks[115] ^ ks[112] ^ 0x9e3779b9 ^ 112), 11);
819  ks[121] = hc_rotl32_S ((ks[120] ^ ks[118] ^ ks[116] ^ ks[113] ^ 0x9e3779b9 ^ 113), 11);
820  ks[122] = hc_rotl32_S ((ks[121] ^ ks[119] ^ ks[117] ^ ks[114] ^ 0x9e3779b9 ^ 114), 11);
821  ks[123] = hc_rotl32_S ((ks[122] ^ ks[120] ^ ks[118] ^ ks[115] ^ 0x9e3779b9 ^ 115), 11);
822  ks[124] = hc_rotl32_S ((ks[123] ^ ks[121] ^ ks[119] ^ ks[116] ^ 0x9e3779b9 ^ 116), 11);
823  ks[125] = hc_rotl32_S ((ks[124] ^ ks[122] ^ ks[120] ^ ks[117] ^ 0x9e3779b9 ^ 117), 11);
824  ks[126] = hc_rotl32_S ((ks[125] ^ ks[123] ^ ks[121] ^ ks[118] ^ 0x9e3779b9 ^ 118), 11);
825  ks[127] = hc_rotl32_S ((ks[126] ^ ks[124] ^ ks[122] ^ ks[119] ^ 0x9e3779b9 ^ 119), 11);
826  ks[128] = hc_rotl32_S ((ks[127] ^ ks[125] ^ ks[123] ^ ks[120] ^ 0x9e3779b9 ^ 120), 11);
827  ks[129] = hc_rotl32_S ((ks[128] ^ ks[126] ^ ks[124] ^ ks[121] ^ 0x9e3779b9 ^ 121), 11);
828  ks[130] = hc_rotl32_S ((ks[129] ^ ks[127] ^ ks[125] ^ ks[122] ^ 0x9e3779b9 ^ 122), 11);
829  ks[131] = hc_rotl32_S ((ks[130] ^ ks[128] ^ ks[126] ^ ks[123] ^ 0x9e3779b9 ^ 123), 11);
830  ks[132] = hc_rotl32_S ((ks[131] ^ ks[129] ^ ks[127] ^ ks[124] ^ 0x9e3779b9 ^ 124), 11);
831  ks[133] = hc_rotl32_S ((ks[132] ^ ks[130] ^ ks[128] ^ ks[125] ^ 0x9e3779b9 ^ 125), 11);
832  ks[134] = hc_rotl32_S ((ks[133] ^ ks[131] ^ ks[129] ^ ks[126] ^ 0x9e3779b9 ^ 126), 11);
833  ks[135] = hc_rotl32_S ((ks[134] ^ ks[132] ^ ks[130] ^ ks[127] ^ 0x9e3779b9 ^ 127), 11);
834  ks[136] = hc_rotl32_S ((ks[135] ^ ks[133] ^ ks[131] ^ ks[128] ^ 0x9e3779b9 ^ 128), 11);
835  ks[137] = hc_rotl32_S ((ks[136] ^ ks[134] ^ ks[132] ^ ks[129] ^ 0x9e3779b9 ^ 129), 11);
836  ks[138] = hc_rotl32_S ((ks[137] ^ ks[135] ^ ks[133] ^ ks[130] ^ 0x9e3779b9 ^ 130), 11);
837  ks[139] = hc_rotl32_S ((ks[138] ^ ks[136] ^ ks[134] ^ ks[131] ^ 0x9e3779b9 ^ 131), 11);
838
839  u32  a,b,c,d,e,f,g,h;
840  u32  t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16;
841
842  k_set( 0,a,b,c,d); sb3(a,b,c,d,e,f,g,h); k_get( 0,e,f,g,h);
843  k_set( 1,a,b,c,d); sb2(a,b,c,d,e,f,g,h); k_get( 1,e,f,g,h);
844  k_set( 2,a,b,c,d); sb1(a,b,c,d,e,f,g,h); k_get( 2,e,f,g,h);
845  k_set( 3,a,b,c,d); sb0(a,b,c,d,e,f,g,h); k_get( 3,e,f,g,h);
846  k_set( 4,a,b,c,d); sb7(a,b,c,d,e,f,g,h); k_get( 4,e,f,g,h);
847  k_set( 5,a,b,c,d); sb6(a,b,c,d,e,f,g,h); k_get( 5,e,f,g,h);
848  k_set( 6,a,b,c,d); sb5(a,b,c,d,e,f,g,h); k_get( 6,e,f,g,h);
849  k_set( 7,a,b,c,d); sb4(a,b,c,d,e,f,g,h); k_get( 7,e,f,g,h);
850  k_set( 8,a,b,c,d); sb3(a,b,c,d,e,f,g,h); k_get( 8,e,f,g,h);
851  k_set( 9,a,b,c,d); sb2(a,b,c,d,e,f,g,h); k_get( 9,e,f,g,h);
852  k_set(10,a,b,c,d); sb1(a,b,c,d,e,f,g,h); k_get(10,e,f,g,h);
853  k_set(11,a,b,c,d); sb0(a,b,c,d,e,f,g,h); k_get(11,e,f,g,h);
854  k_set(12,a,b,c,d); sb7(a,b,c,d,e,f,g,h); k_get(12,e,f,g,h);
855  k_set(13,a,b,c,d); sb6(a,b,c,d,e,f,g,h); k_get(13,e,f,g,h);
856  k_set(14,a,b,c,d); sb5(a,b,c,d,e,f,g,h); k_get(14,e,f,g,h);
857  k_set(15,a,b,c,d); sb4(a,b,c,d,e,f,g,h); k_get(15,e,f,g,h);
858  k_set(16,a,b,c,d); sb3(a,b,c,d,e,f,g,h); k_get(16,e,f,g,h);
859  k_set(17,a,b,c,d); sb2(a,b,c,d,e,f,g,h); k_get(17,e,f,g,h);
860  k_set(18,a,b,c,d); sb1(a,b,c,d,e,f,g,h); k_get(18,e,f,g,h);
861  k_set(19,a,b,c,d); sb0(a,b,c,d,e,f,g,h); k_get(19,e,f,g,h);
862  k_set(20,a,b,c,d); sb7(a,b,c,d,e,f,g,h); k_get(20,e,f,g,h);
863  k_set(21,a,b,c,d); sb6(a,b,c,d,e,f,g,h); k_get(21,e,f,g,h);
864  k_set(22,a,b,c,d); sb5(a,b,c,d,e,f,g,h); k_get(22,e,f,g,h);
865  k_set(23,a,b,c,d); sb4(a,b,c,d,e,f,g,h); k_get(23,e,f,g,h);
866  k_set(24,a,b,c,d); sb3(a,b,c,d,e,f,g,h); k_get(24,e,f,g,h);
867  k_set(25,a,b,c,d); sb2(a,b,c,d,e,f,g,h); k_get(25,e,f,g,h);
868  k_set(26,a,b,c,d); sb1(a,b,c,d,e,f,g,h); k_get(26,e,f,g,h);
869  k_set(27,a,b,c,d); sb0(a,b,c,d,e,f,g,h); k_get(27,e,f,g,h);
870  k_set(28,a,b,c,d); sb7(a,b,c,d,e,f,g,h); k_get(28,e,f,g,h);
871  k_set(29,a,b,c,d); sb6(a,b,c,d,e,f,g,h); k_get(29,e,f,g,h);
872  k_set(30,a,b,c,d); sb5(a,b,c,d,e,f,g,h); k_get(30,e,f,g,h);
873  k_set(31,a,b,c,d); sb4(a,b,c,d,e,f,g,h); k_get(31,e,f,g,h);
874  k_set(32,a,b,c,d); sb3(a,b,c,d,e,f,g,h); k_get(32,e,f,g,h);
875}
876
877DECLSPEC void serpent192_encrypt (const u32 *ks, const u32 *in, u32 *out)
878{
879  u32  a,b,c,d,e,f,g,h;
880  u32  t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16;
881
882  a = in[0];
883  b = in[1];
884  c = in[2];
885  d = in[3];
886
887  k_xor( 0,a,b,c,d); sb0(a,b,c,d,e,f,g,h); rot(e,f,g,h);
888  k_xor( 1,e,f,g,h); sb1(e,f,g,h,a,b,c,d); rot(a,b,c,d);
889  k_xor( 2,a,b,c,d); sb2(a,b,c,d,e,f,g,h); rot(e,f,g,h);
890  k_xor( 3,e,f,g,h); sb3(e,f,g,h,a,b,c,d); rot(a,b,c,d);
891  k_xor( 4,a,b,c,d); sb4(a,b,c,d,e,f,g,h); rot(e,f,g,h);
892  k_xor( 5,e,f,g,h); sb5(e,f,g,h,a,b,c,d); rot(a,b,c,d);
893  k_xor( 6,a,b,c,d); sb6(a,b,c,d,e,f,g,h); rot(e,f,g,h);
894  k_xor( 7,e,f,g,h); sb7(e,f,g,h,a,b,c,d); rot(a,b,c,d);
895  k_xor( 8,a,b,c,d); sb0(a,b,c,d,e,f,g,h); rot(e,f,g,h);
896  k_xor( 9,e,f,g,h); sb1(e,f,g,h,a,b,c,d); rot(a,b,c,d);
897  k_xor(10,a,b,c,d); sb2(a,b,c,d,e,f,g,h); rot(e,f,g,h);
898  k_xor(11,e,f,g,h); sb3(e,f,g,h,a,b,c,d); rot(a,b,c,d);
899  k_xor(12,a,b,c,d); sb4(a,b,c,d,e,f,g,h); rot(e,f,g,h);
900  k_xor(13,e,f,g,h); sb5(e,f,g,h,a,b,c,d); rot(a,b,c,d);
901  k_xor(14,a,b,c,d); sb6(a,b,c,d,e,f,g,h); rot(e,f,g,h);
902  k_xor(15,e,f,g,h); sb7(e,f,g,h,a,b,c,d); rot(a,b,c,d);
903  k_xor(16,a,b,c,d); sb0(a,b,c,d,e,f,g,h); rot(e,f,g,h);
904  k_xor(17,e,f,g,h); sb1(e,f,g,h,a,b,c,d); rot(a,b,c,d);
905  k_xor(18,a,b,c,d); sb2(a,b,c,d,e,f,g,h); rot(e,f,g,h);
906  k_xor(19,e,f,g,h); sb3(e,f,g,h,a,b,c,d); rot(a,b,c,d);
907  k_xor(20,a,b,c,d); sb4(a,b,c,d,e,f,g,h); rot(e,f,g,h);
908  k_xor(21,e,f,g,h); sb5(e,f,g,h,a,b,c,d); rot(a,b,c,d);
909  k_xor(22,a,b,c,d); sb6(a,b,c,d,e,f,g,h); rot(e,f,g,h);
910  k_xor(23,e,f,g,h); sb7(e,f,g,h,a,b,c,d); rot(a,b,c,d);
911  k_xor(24,a,b,c,d); sb0(a,b,c,d,e,f,g,h); rot(e,f,g,h);
912  k_xor(25,e,f,g,h); sb1(e,f,g,h,a,b,c,d); rot(a,b,c,d);
913  k_xor(26,a,b,c,d); sb2(a,b,c,d,e,f,g,h); rot(e,f,g,h);
914  k_xor(27,e,f,g,h); sb3(e,f,g,h,a,b,c,d); rot(a,b,c,d);
915  k_xor(28,a,b,c,d); sb4(a,b,c,d,e,f,g,h); rot(e,f,g,h);
916  k_xor(29,e,f,g,h); sb5(e,f,g,h,a,b,c,d); rot(a,b,c,d);
917  k_xor(30,a,b,c,d); sb6(a,b,c,d,e,f,g,h); rot(e,f,g,h);
918  k_xor(31,e,f,g,h); sb7(e,f,g,h,a,b,c,d);
919  k_xor(32,a,b,c,d);
920
921  out[0] = a;
922  out[1] = b;
923  out[2] = c;
924  out[3] = d;
925}
926
927DECLSPEC void serpent192_decrypt (const u32 *ks, const u32 *in, u32 *out)
928{
929  u32  a,b,c,d,e,f,g,h;
930  u32  t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16;
931
932  a = in[0];
933  b = in[1];
934  c = in[2];
935  d = in[3];
936
937                                       k_xor(32,a,b,c,d);
938                 ib7(a,b,c,d,e,f,g,h); k_xor(31,e,f,g,h);
939  irot(e,f,g,h); ib6(e,f,g,h,a,b,c,d); k_xor(30,a,b,c,d);
940  irot(a,b,c,d); ib5(a,b,c,d,e,f,g,h); k_xor(29,e,f,g,h);
941  irot(e,f,g,h); ib4(e,f,g,h,a,b,c,d); k_xor(28,a,b,c,d);
942  irot(a,b,c,d); ib3(a,b,c,d,e,f,g,h); k_xor(27,e,f,g,h);
943  irot(e,f,g,h); ib2(e,f,g,h,a,b,c,d); k_xor(26,a,b,c,d);
944  irot(a,b,c,d); ib1(a,b,c,d,e,f,g,h); k_xor(25,e,f,g,h);
945  irot(e,f,g,h); ib0(e,f,g,h,a,b,c,d); k_xor(24,a,b,c,d);
946  irot(a,b,c,d); ib7(a,b,c,d,e,f,g,h); k_xor(23,e,f,g,h);
947  irot(e,f,g,h); ib6(e,f,g,h,a,b,c,d); k_xor(22,a,b,c,d);
948  irot(a,b,c,d); ib5(a,b,c,d,e,f,g,h); k_xor(21,e,f,g,h);
949  irot(e,f,g,h); ib4(e,f,g,h,a,b,c,d); k_xor(20,a,b,c,d);
950  irot(a,b,c,d); ib3(a,b,c,d,e,f,g,h); k_xor(19,e,f,g,h);
951  irot(e,f,g,h); ib2(e,f,g,h,a,b,c,d); k_xor(18,a,b,c,d);
952  irot(a,b,c,d); ib1(a,b,c,d,e,f,g,h); k_xor(17,e,f,g,h);
953  irot(e,f,g,h); ib0(e,f,g,h,a,b,c,d); k_xor(16,a,b,c,d);
954  irot(a,b,c,d); ib7(a,b,c,d,e,f,g,h); k_xor(15,e,f,g,h);
955  irot(e,f,g,h); ib6(e,f,g,h,a,b,c,d); k_xor(14,a,b,c,d);
956  irot(a,b,c,d); ib5(a,b,c,d,e,f,g,h); k_xor(13,e,f,g,h);
957  irot(e,f,g,h); ib4(e,f,g,h,a,b,c,d); k_xor(12,a,b,c,d);
958  irot(a,b,c,d); ib3(a,b,c,d,e,f,g,h); k_xor(11,e,f,g,h);
959  irot(e,f,g,h); ib2(e,f,g,h,a,b,c,d); k_xor(10,a,b,c,d);
960  irot(a,b,c,d); ib1(a,b,c,d,e,f,g,h); k_xor( 9,e,f,g,h);
961  irot(e,f,g,h); ib0(e,f,g,h,a,b,c,d); k_xor( 8,a,b,c,d);
962  irot(a,b,c,d); ib7(a,b,c,d,e,f,g,h); k_xor( 7,e,f,g,h);
963  irot(e,f,g,h); ib6(e,f,g,h,a,b,c,d); k_xor( 6,a,b,c,d);
964  irot(a,b,c,d); ib5(a,b,c,d,e,f,g,h); k_xor( 5,e,f,g,h);
965  irot(e,f,g,h); ib4(e,f,g,h,a,b,c,d); k_xor( 4,a,b,c,d);
966  irot(a,b,c,d); ib3(a,b,c,d,e,f,g,h); k_xor( 3,e,f,g,h);
967  irot(e,f,g,h); ib2(e,f,g,h,a,b,c,d); k_xor( 2,a,b,c,d);
968  irot(a,b,c,d); ib1(a,b,c,d,e,f,g,h); k_xor( 1,e,f,g,h);
969  irot(e,f,g,h); ib0(e,f,g,h,a,b,c,d); k_xor( 0,a,b,c,d);
970
971  out[0] = a;
972  out[1] = b;
973  out[2] = c;
974  out[3] = d;
975}
976
977// 256 bit key
978
979DECLSPEC void serpent256_set_key (u32 *ks, const u32 *ukey)
980{
981  ks[  0] = ukey[0];
982  ks[  1] = ukey[1];
983  ks[  2] = ukey[2];
984  ks[  3] = ukey[3];
985  ks[  4] = ukey[4];
986  ks[  5] = ukey[5];
987  ks[  6] = ukey[6];
988  ks[  7] = ukey[7];
989  ks[  8] = hc_rotl32_S ((ks[  7] ^ ks[  5] ^ ks[  3] ^ ks[  0] ^ 0x9e3779b9 ^   0), 11);
990  ks[  9] = hc_rotl32_S ((ks[  8] ^ ks[  6] ^ ks[  4] ^ ks[  1] ^ 0x9e3779b9 ^   1), 11);
991  ks[ 10] = hc_rotl32_S ((ks[  9] ^ ks[  7] ^ ks[  5] ^ ks[  2] ^ 0x9e3779b9 ^   2), 11);
992  ks[ 11] = hc_rotl32_S ((ks[ 10] ^ ks[  8] ^ ks[  6] ^ ks[  3] ^ 0x9e3779b9 ^   3), 11);
993  ks[ 12] = hc_rotl32_S ((ks[ 11] ^ ks[  9] ^ ks[  7] ^ ks[  4] ^ 0x9e3779b9 ^   4), 11);
994  ks[ 13] = hc_rotl32_S ((ks[ 12] ^ ks[ 10] ^ ks[  8] ^ ks[  5] ^ 0x9e3779b9 ^   5), 11);
995  ks[ 14] = hc_rotl32_S ((ks[ 13] ^ ks[ 11] ^ ks[  9] ^ ks[  6] ^ 0x9e3779b9 ^   6), 11);
996  ks[ 15] = hc_rotl32_S ((ks[ 14] ^ ks[ 12] ^ ks[ 10] ^ ks[  7] ^ 0x9e3779b9 ^   7), 11);
997  ks[ 16] = hc_rotl32_S ((ks[ 15] ^ ks[ 13] ^ ks[ 11] ^ ks[  8] ^ 0x9e3779b9 ^   8), 11);
998  ks[ 17] = hc_rotl32_S ((ks[ 16] ^ ks[ 14] ^ ks[ 12] ^ ks[  9] ^ 0x9e3779b9 ^   9), 11);
999  ks[ 18] = hc_rotl32_S ((ks[ 17] ^ ks[ 15] ^ ks[ 13] ^ ks[ 10] ^ 0x9e3779b9 ^  10), 11);
1000  ks[ 19] = hc_rotl32_S ((ks[ 18] ^ ks[ 16] ^ ks[ 14] ^ ks[ 11] ^ 0x9e3779b9 ^  11), 11);
1001  ks[ 20] = hc_rotl32_S ((ks[ 19] ^ ks[ 17] ^ ks[ 15] ^ ks[ 12] ^ 0x9e3779b9 ^  12), 11);
1002  ks[ 21] = hc_rotl32_S ((ks[ 20] ^ ks[ 18] ^ ks[ 16] ^ ks[ 13] ^ 0x9e3779b9 ^  13), 11);
1003  ks[ 22] = hc_rotl32_S ((ks[ 21] ^ ks[ 19] ^ ks[ 17] ^ ks[ 14] ^ 0x9e3779b9 ^  14), 11);
1004  ks[ 23] = hc_rotl32_S ((ks[ 22] ^ ks[ 20] ^ ks[ 18] ^ ks[ 15] ^ 0x9e3779b9 ^  15), 11);
1005  ks[ 24] = hc_rotl32_S ((ks[ 23] ^ ks[ 21] ^ ks[ 19] ^ ks[ 16] ^ 0x9e3779b9 ^  16), 11);
1006  ks[ 25] = hc_rotl32_S ((ks[ 24] ^ ks[ 22] ^ ks[ 20] ^ ks[ 17] ^ 0x9e3779b9 ^  17), 11);
1007  ks[ 26] = hc_rotl32_S ((ks[ 25] ^ ks[ 23] ^ ks[ 21] ^ ks[ 18] ^ 0x9e3779b9 ^  18), 11);
1008  ks[ 27] = hc_rotl32_S ((ks[ 26] ^ ks[ 24] ^ ks[ 22] ^ ks[ 19] ^ 0x9e3779b9 ^  19), 11);
1009  ks[ 28] = hc_rotl32_S ((ks[ 27] ^ ks[ 25] ^ ks[ 23] ^ ks[ 20] ^ 0x9e3779b9 ^  20), 11);
1010  ks[ 29] = hc_rotl32_S ((ks[ 28] ^ ks[ 26] ^ ks[ 24] ^ ks[ 21] ^ 0x9e3779b9 ^  21), 11);
1011  ks[ 30] = hc_rotl32_S ((ks[ 29] ^ ks[ 27] ^ ks[ 25] ^ ks[ 22] ^ 0x9e3779b9 ^  22), 11);
1012  ks[ 31] = hc_rotl32_S ((ks[ 30] ^ ks[ 28] ^ ks[ 26] ^ ks[ 23] ^ 0x9e3779b9 ^  23), 11);
1013  ks[ 32] = hc_rotl32_S ((ks[ 31] ^ ks[ 29] ^ ks[ 27] ^ ks[ 24] ^ 0x9e3779b9 ^  24), 11);
1014  ks[ 33] = hc_rotl32_S ((ks[ 32] ^ ks[ 30] ^ ks[ 28] ^ ks[ 25] ^ 0x9e3779b9 ^  25), 11);
1015  ks[ 34] = hc_rotl32_S ((ks[ 33] ^ ks[ 31] ^ ks[ 29] ^ ks[ 26] ^ 0x9e3779b9 ^  26), 11);
1016  ks[ 35] = hc_rotl32_S ((ks[ 34] ^ ks[ 32] ^ ks[ 30] ^ ks[ 27] ^ 0x9e3779b9 ^  27), 11);
1017  ks[ 36] = hc_rotl32_S ((ks[ 35] ^ ks[ 33] ^ ks[ 31] ^ ks[ 28] ^ 0x9e3779b9 ^  28), 11);
1018  ks[ 37] = hc_rotl32_S ((ks[ 36] ^ ks[ 34] ^ ks[ 32] ^ ks[ 29] ^ 0x9e3779b9 ^  29), 11);
1019  ks[ 38] = hc_rotl32_S ((ks[ 37] ^ ks[ 35] ^ ks[ 33] ^ ks[ 30] ^ 0x9e3779b9 ^  30), 11);
1020  ks[ 39] = hc_rotl32_S ((ks[ 38] ^ ks[ 36] ^ ks[ 34] ^ ks[ 31] ^ 0x9e3779b9 ^  31), 11);
1021  ks[ 40] = hc_rotl32_S ((ks[ 39] ^ ks[ 37] ^ ks[ 35] ^ ks[ 32] ^ 0x9e3779b9 ^  32), 11);
1022  ks[ 41] = hc_rotl32_S ((ks[ 40] ^ ks[ 38] ^ ks[ 36] ^ ks[ 33] ^ 0x9e3779b9 ^  33), 11);
1023  ks[ 42] = hc_rotl32_S ((ks[ 41] ^ ks[ 39] ^ ks[ 37] ^ ks[ 34] ^ 0x9e3779b9 ^  34), 11);
1024  ks[ 43] = hc_rotl32_S ((ks[ 42] ^ ks[ 40] ^ ks[ 38] ^ ks[ 35] ^ 0x9e3779b9 ^  35), 11);
1025  ks[ 44] = hc_rotl32_S ((ks[ 43] ^ ks[ 41] ^ ks[ 39] ^ ks[ 36] ^ 0x9e3779b9 ^  36), 11);
1026  ks[ 45] = hc_rotl32_S ((ks[ 44] ^ ks[ 42] ^ ks[ 40] ^ ks[ 37] ^ 0x9e3779b9 ^  37), 11);
1027  ks[ 46] = hc_rotl32_S ((ks[ 45] ^ ks[ 43] ^ ks[ 41] ^ ks[ 38] ^ 0x9e3779b9 ^  38), 11);
1028  ks[ 47] = hc_rotl32_S ((ks[ 46] ^ ks[ 44] ^ ks[ 42] ^ ks[ 39] ^ 0x9e3779b9 ^  39), 11);
1029  ks[ 48] = hc_rotl32_S ((ks[ 47] ^ ks[ 45] ^ ks[ 43] ^ ks[ 40] ^ 0x9e3779b9 ^  40), 11);
1030  ks[ 49] = hc_rotl32_S ((ks[ 48] ^ ks[ 46] ^ ks[ 44] ^ ks[ 41] ^ 0x9e3779b9 ^  41), 11);
1031  ks[ 50] = hc_rotl32_S ((ks[ 49] ^ ks[ 47] ^ ks[ 45] ^ ks[ 42] ^ 0x9e3779b9 ^  42), 11);
1032  ks[ 51] = hc_rotl32_S ((ks[ 50] ^ ks[ 48] ^ ks[ 46] ^ ks[ 43] ^ 0x9e3779b9 ^  43), 11);
1033  ks[ 52] = hc_rotl32_S ((ks[ 51] ^ ks[ 49] ^ ks[ 47] ^ ks[ 44] ^ 0x9e3779b9 ^  44), 11);
1034  ks[ 53] = hc_rotl32_S ((ks[ 52] ^ ks[ 50] ^ ks[ 48] ^ ks[ 45] ^ 0x9e3779b9 ^  45), 11);
1035  ks[ 54] = hc_rotl32_S ((ks[ 53] ^ ks[ 51] ^ ks[ 49] ^ ks[ 46] ^ 0x9e3779b9 ^  46), 11);
1036  ks[ 55] = hc_rotl32_S ((ks[ 54] ^ ks[ 52] ^ ks[ 50] ^ ks[ 47] ^ 0x9e3779b9 ^  47), 11);
1037  ks[ 56] = hc_rotl32_S ((ks[ 55] ^ ks[ 53] ^ ks[ 51] ^ ks[ 48] ^ 0x9e3779b9 ^  48), 11);
1038  ks[ 57] = hc_rotl32_S ((ks[ 56] ^ ks[ 54] ^ ks[ 52] ^ ks[ 49] ^ 0x9e3779b9 ^  49), 11);
1039  ks[ 58] = hc_rotl32_S ((ks[ 57] ^ ks[ 55] ^ ks[ 53] ^ ks[ 50] ^ 0x9e3779b9 ^  50), 11);
1040  ks[ 59] = hc_rotl32_S ((ks[ 58] ^ ks[ 56] ^ ks[ 54] ^ ks[ 51] ^ 0x9e3779b9 ^  51), 11);
1041  ks[ 60] = hc_rotl32_S ((ks[ 59] ^ ks[ 57] ^ ks[ 55] ^ ks[ 52] ^ 0x9e3779b9 ^  52), 11);
1042  ks[ 61] = hc_rotl32_S ((ks[ 60] ^ ks[ 58] ^ ks[ 56] ^ ks[ 53] ^ 0x9e3779b9 ^  53), 11);
1043  ks[ 62] = hc_rotl32_S ((ks[ 61] ^ ks[ 59] ^ ks[ 57] ^ ks[ 54] ^ 0x9e3779b9 ^  54), 11);
1044  ks[ 63] = hc_rotl32_S ((ks[ 62] ^ ks[ 60] ^ ks[ 58] ^ ks[ 55] ^ 0x9e3779b9 ^  55), 11);
1045  ks[ 64] = hc_rotl32_S ((ks[ 63] ^ ks[ 61] ^ ks[ 59] ^ ks[ 56] ^ 0x9e3779b9 ^  56), 11);
1046  ks[ 65] = hc_rotl32_S ((ks[ 64] ^ ks[ 62] ^ ks[ 60] ^ ks[ 57] ^ 0x9e3779b9 ^  57), 11);
1047  ks[ 66] = hc_rotl32_S ((ks[ 65] ^ ks[ 63] ^ ks[ 61] ^ ks[ 58] ^ 0x9e3779b9 ^  58), 11);
1048  ks[ 67] = hc_rotl32_S ((ks[ 66] ^ ks[ 64] ^ ks[ 62] ^ ks[ 59] ^ 0x9e3779b9 ^  59), 11);
1049  ks[ 68] = hc_rotl32_S ((ks[ 67] ^ ks[ 65] ^ ks[ 63] ^ ks[ 60] ^ 0x9e3779b9 ^  60), 11);
1050  ks[ 69] = hc_rotl32_S ((ks[ 68] ^ ks[ 66] ^ ks[ 64] ^ ks[ 61] ^ 0x9e3779b9 ^  61), 11);
1051  ks[ 70] = hc_rotl32_S ((ks[ 69] ^ ks[ 67] ^ ks[ 65] ^ ks[ 62] ^ 0x9e3779b9 ^  62), 11);
1052  ks[ 71] = hc_rotl32_S ((ks[ 70] ^ ks[ 68] ^ ks[ 66] ^ ks[ 63] ^ 0x9e3779b9 ^  63), 11);
1053  ks[ 72] = hc_rotl32_S ((ks[ 71] ^ ks[ 69] ^ ks[ 67] ^ ks[ 64] ^ 0x9e3779b9 ^  64), 11);
1054  ks[ 73] = hc_rotl32_S ((ks[ 72] ^ ks[ 70] ^ ks[ 68] ^ ks[ 65] ^ 0x9e3779b9 ^  65), 11);
1055  ks[ 74] = hc_rotl32_S ((ks[ 73] ^ ks[ 71] ^ ks[ 69] ^ ks[ 66] ^ 0x9e3779b9 ^  66), 11);
1056  ks[ 75] = hc_rotl32_S ((ks[ 74] ^ ks[ 72] ^ ks[ 70] ^ ks[ 67] ^ 0x9e3779b9 ^  67), 11);
1057  ks[ 76] = hc_rotl32_S ((ks[ 75] ^ ks[ 73] ^ ks[ 71] ^ ks[ 68] ^ 0x9e3779b9 ^  68), 11);
1058  ks[ 77] = hc_rotl32_S ((ks[ 76] ^ ks[ 74] ^ ks[ 72] ^ ks[ 69] ^ 0x9e3779b9 ^  69), 11);
1059  ks[ 78] = hc_rotl32_S ((ks[ 77] ^ ks[ 75] ^ ks[ 73] ^ ks[ 70] ^ 0x9e3779b9 ^  70), 11);
1060  ks[ 79] = hc_rotl32_S ((ks[ 78] ^ ks[ 76] ^ ks[ 74] ^ ks[ 71] ^ 0x9e3779b9 ^  71), 11);
1061  ks[ 80] = hc_rotl32_S ((ks[ 79] ^ ks[ 77] ^ ks[ 75] ^ ks[ 72] ^ 0x9e3779b9 ^  72), 11);
1062  ks[ 81] = hc_rotl32_S ((ks[ 80] ^ ks[ 78] ^ ks[ 76] ^ ks[ 73] ^ 0x9e3779b9 ^  73), 11);
1063  ks[ 82] = hc_rotl32_S ((ks[ 81] ^ ks[ 79] ^ ks[ 77] ^ ks[ 74] ^ 0x9e3779b9 ^  74), 11);
1064  ks[ 83] = hc_rotl32_S ((ks[ 82] ^ ks[ 80] ^ ks[ 78] ^ ks[ 75] ^ 0x9e3779b9 ^  75), 11);
1065  ks[ 84] = hc_rotl32_S ((ks[ 83] ^ ks[ 81] ^ ks[ 79] ^ ks[ 76] ^ 0x9e3779b9 ^  76), 11);
1066  ks[ 85] = hc_rotl32_S ((ks[ 84] ^ ks[ 82] ^ ks[ 80] ^ ks[ 77] ^ 0x9e3779b9 ^  77), 11);
1067  ks[ 86] = hc_rotl32_S ((ks[ 85] ^ ks[ 83] ^ ks[ 81] ^ ks[ 78] ^ 0x9e3779b9 ^  78), 11);
1068  ks[ 87] = hc_rotl32_S ((ks[ 86] ^ ks[ 84] ^ ks[ 82] ^ ks[ 79] ^ 0x9e3779b9 ^  79), 11);
1069  ks[ 88] = hc_rotl32_S ((ks[ 87] ^ ks[ 85] ^ ks[ 83] ^ ks[ 80] ^ 0x9e3779b9 ^  80), 11);
1070  ks[ 89] = hc_rotl32_S ((ks[ 88] ^ ks[ 86] ^ ks[ 84] ^ ks[ 81] ^ 0x9e3779b9 ^  81), 11);
1071  ks[ 90] = hc_rotl32_S ((ks[ 89] ^ ks[ 87] ^ ks[ 85] ^ ks[ 82] ^ 0x9e3779b9 ^  82), 11);
1072  ks[ 91] = hc_rotl32_S ((ks[ 90] ^ ks[ 88] ^ ks[ 86] ^ ks[ 83] ^ 0x9e3779b9 ^  83), 11);
1073  ks[ 92] = hc_rotl32_S ((ks[ 91] ^ ks[ 89] ^ ks[ 87] ^ ks[ 84] ^ 0x9e3779b9 ^  84), 11);
1074  ks[ 93] = hc_rotl32_S ((ks[ 92] ^ ks[ 90] ^ ks[ 88] ^ ks[ 85] ^ 0x9e3779b9 ^  85), 11);
1075  ks[ 94] = hc_rotl32_S ((ks[ 93] ^ ks[ 91] ^ ks[ 89] ^ ks[ 86] ^ 0x9e3779b9 ^  86), 11);
1076  ks[ 95] = hc_rotl32_S ((ks[ 94] ^ ks[ 92] ^ ks[ 90] ^ ks[ 87] ^ 0x9e3779b9 ^  87), 11);
1077  ks[ 96] = hc_rotl32_S ((ks[ 95] ^ ks[ 93] ^ ks[ 91] ^ ks[ 88] ^ 0x9e3779b9 ^  88), 11);
1078  ks[ 97] = hc_rotl32_S ((ks[ 96] ^ ks[ 94] ^ ks[ 92] ^ ks[ 89] ^ 0x9e3779b9 ^  89), 11);
1079  ks[ 98] = hc_rotl32_S ((ks[ 97] ^ ks[ 95] ^ ks[ 93] ^ ks[ 90] ^ 0x9e3779b9 ^  90), 11);
1080  ks[ 99] = hc_rotl32_S ((ks[ 98] ^ ks[ 96] ^ ks[ 94] ^ ks[ 91] ^ 0x9e3779b9 ^  91), 11);
1081  ks[100] = hc_rotl32_S ((ks[ 99] ^ ks[ 97] ^ ks[ 95] ^ ks[ 92] ^ 0x9e3779b9 ^  92), 11);
1082  ks[101] = hc_rotl32_S ((ks[100] ^ ks[ 98] ^ ks[ 96] ^ ks[ 93] ^ 0x9e3779b9 ^  93), 11);
1083  ks[102] = hc_rotl32_S ((ks[101] ^ ks[ 99] ^ ks[ 97] ^ ks[ 94] ^ 0x9e3779b9 ^  94), 11);
1084  ks[103] = hc_rotl32_S ((ks[102] ^ ks[100] ^ ks[ 98] ^ ks[ 95] ^ 0x9e3779b9 ^  95), 11);
1085  ks[104] = hc_rotl32_S ((ks[103] ^ ks[101] ^ ks[ 99] ^ ks[ 96] ^ 0x9e3779b9 ^  96), 11);
1086  ks[105] = hc_rotl32_S ((ks[104] ^ ks[102] ^ ks[100] ^ ks[ 97] ^ 0x9e3779b9 ^  97), 11);
1087  ks[106] = hc_rotl32_S ((ks[105] ^ ks[103] ^ ks[101] ^ ks[ 98] ^ 0x9e3779b9 ^  98), 11);
1088  ks[107] = hc_rotl32_S ((ks[106] ^ ks[104] ^ ks[102] ^ ks[ 99] ^ 0x9e3779b9 ^  99), 11);
1089  ks[108] = hc_rotl32_S ((ks[107] ^ ks[105] ^ ks[103] ^ ks[100] ^ 0x9e3779b9 ^ 100), 11);
1090  ks[109] = hc_rotl32_S ((ks[108] ^ ks[106] ^ ks[104] ^ ks[101] ^ 0x9e3779b9 ^ 101), 11);
1091  ks[110] = hc_rotl32_S ((ks[109] ^ ks[107] ^ ks[105] ^ ks[102] ^ 0x9e3779b9 ^ 102), 11);
1092  ks[111] = hc_rotl32_S ((ks[110] ^ ks[108] ^ ks[106] ^ ks[103] ^ 0x9e3779b9 ^ 103), 11);
1093  ks[112] = hc_rotl32_S ((ks[111] ^ ks[109] ^ ks[107] ^ ks[104] ^ 0x9e3779b9 ^ 104), 11);
1094  ks[113] = hc_rotl32_S ((ks[112] ^ ks[110] ^ ks[108] ^ ks[105] ^ 0x9e3779b9 ^ 105), 11);
1095  ks[114] = hc_rotl32_S ((ks[113] ^ ks[111] ^ ks[109] ^ ks[106] ^ 0x9e3779b9 ^ 106), 11);
1096  ks[115] = hc_rotl32_S ((ks[114] ^ ks[112] ^ ks[110] ^ ks[107] ^ 0x9e3779b9 ^ 107), 11);
1097  ks[116] = hc_rotl32_S ((ks[115] ^ ks[113] ^ ks[111] ^ ks[108] ^ 0x9e3779b9 ^ 108), 11);
1098  ks[117] = hc_rotl32_S ((ks[116] ^ ks[114] ^ ks[112] ^ ks[109] ^ 0x9e3779b9 ^ 109), 11);
1099  ks[118] = hc_rotl32_S ((ks[117] ^ ks[115] ^ ks[113] ^ ks[110] ^ 0x9e3779b9 ^ 110), 11);
1100  ks[119] = hc_rotl32_S ((ks[118] ^ ks[116] ^ ks[114] ^ ks[111] ^ 0x9e3779b9 ^ 111), 11);
1101  ks[120] = hc_rotl32_S ((ks[119] ^ ks[117] ^ ks[115] ^ ks[112] ^ 0x9e3779b9 ^ 112), 11);
1102  ks[121] = hc_rotl32_S ((ks[120] ^ ks[118] ^ ks[116] ^ ks[113] ^ 0x9e3779b9 ^ 113), 11);
1103  ks[122] = hc_rotl32_S ((ks[121] ^ ks[119] ^ ks[117] ^ ks[114] ^ 0x9e3779b9 ^ 114), 11);
1104  ks[123] = hc_rotl32_S ((ks[122] ^ ks[120] ^ ks[118] ^ ks[115] ^ 0x9e3779b9 ^ 115), 11);
1105  ks[124] = hc_rotl32_S ((ks[123] ^ ks[121] ^ ks[119] ^ ks[116] ^ 0x9e3779b9 ^ 116), 11);
1106  ks[125] = hc_rotl32_S ((ks[124] ^ ks[122] ^ ks[120] ^ ks[117] ^ 0x9e3779b9 ^ 117), 11);
1107  ks[126] = hc_rotl32_S ((ks[125] ^ ks[123] ^ ks[121] ^ ks[118] ^ 0x9e3779b9 ^ 118), 11);
1108  ks[127] = hc_rotl32_S ((ks[126] ^ ks[124] ^ ks[122] ^ ks[119] ^ 0x9e3779b9 ^ 119), 11);
1109  ks[128] = hc_rotl32_S ((ks[127] ^ ks[125] ^ ks[123] ^ ks[120] ^ 0x9e3779b9 ^ 120), 11);
1110  ks[129] = hc_rotl32_S ((ks[128] ^ ks[126] ^ ks[124] ^ ks[121] ^ 0x9e3779b9 ^ 121), 11);
1111  ks[130] = hc_rotl32_S ((ks[129] ^ ks[127] ^ ks[125] ^ ks[122] ^ 0x9e3779b9 ^ 122), 11);
1112  ks[131] = hc_rotl32_S ((ks[130] ^ ks[128] ^ ks[126] ^ ks[123] ^ 0x9e3779b9 ^ 123), 11);
1113  ks[132] = hc_rotl32_S ((ks[131] ^ ks[129] ^ ks[127] ^ ks[124] ^ 0x9e3779b9 ^ 124), 11);
1114  ks[133] = hc_rotl32_S ((ks[132] ^ ks[130] ^ ks[128] ^ ks[125] ^ 0x9e3779b9 ^ 125), 11);
1115  ks[134] = hc_rotl32_S ((ks[133] ^ ks[131] ^ ks[129] ^ ks[126] ^ 0x9e3779b9 ^ 126), 11);
1116  ks[135] = hc_rotl32_S ((ks[134] ^ ks[132] ^ ks[130] ^ ks[127] ^ 0x9e3779b9 ^ 127), 11);
1117  ks[136] = hc_rotl32_S ((ks[135] ^ ks[133] ^ ks[131] ^ ks[128] ^ 0x9e3779b9 ^ 128), 11);
1118  ks[137] = hc_rotl32_S ((ks[136] ^ ks[134] ^ ks[132] ^ ks[129] ^ 0x9e3779b9 ^ 129), 11);
1119  ks[138] = hc_rotl32_S ((ks[137] ^ ks[135] ^ ks[133] ^ ks[130] ^ 0x9e3779b9 ^ 130), 11);
1120  ks[139] = hc_rotl32_S ((ks[138] ^ ks[136] ^ ks[134] ^ ks[131] ^ 0x9e3779b9 ^ 131), 11);
1121
1122  u32  a,b,c,d,e,f,g,h;
1123  u32  t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16;
1124
1125  k_set( 0,a,b,c,d); sb3(a,b,c,d,e,f,g,h); k_get( 0,e,f,g,h);
1126  k_set( 1,a,b,c,d); sb2(a,b,c,d,e,f,g,h); k_get( 1,e,f,g,h);
1127  k_set( 2,a,b,c,d); sb1(a,b,c,d,e,f,g,h); k_get( 2,e,f,g,h);
1128  k_set( 3,a,b,c,d); sb0(a,b,c,d,e,f,g,h); k_get( 3,e,f,g,h);
1129  k_set( 4,a,b,c,d); sb7(a,b,c,d,e,f,g,h); k_get( 4,e,f,g,h);
1130  k_set( 5,a,b,c,d); sb6(a,b,c,d,e,f,g,h); k_get( 5,e,f,g,h);
1131  k_set( 6,a,b,c,d); sb5(a,b,c,d,e,f,g,h); k_get( 6,e,f,g,h);
1132  k_set( 7,a,b,c,d); sb4(a,b,c,d,e,f,g,h); k_get( 7,e,f,g,h);
1133  k_set( 8,a,b,c,d); sb3(a,b,c,d,e,f,g,h); k_get( 8,e,f,g,h);
1134  k_set( 9,a,b,c,d); sb2(a,b,c,d,e,f,g,h); k_get( 9,e,f,g,h);
1135  k_set(10,a,b,c,d); sb1(a,b,c,d,e,f,g,h); k_get(10,e,f,g,h);
1136  k_set(11,a,b,c,d); sb0(a,b,c,d,e,f,g,h); k_get(11,e,f,g,h);
1137  k_set(12,a,b,c,d); sb7(a,b,c,d,e,f,g,h); k_get(12,e,f,g,h);
1138  k_set(13,a,b,c,d); sb6(a,b,c,d,e,f,g,h); k_get(13,e,f,g,h);
1139  k_set(14,a,b,c,d); sb5(a,b,c,d,e,f,g,h); k_get(14,e,f,g,h);
1140  k_set(15,a,b,c,d); sb4(a,b,c,d,e,f,g,h); k_get(15,e,f,g,h);
1141  k_set(16,a,b,c,d); sb3(a,b,c,d,e,f,g,h); k_get(16,e,f,g,h);
1142  k_set(17,a,b,c,d); sb2(a,b,c,d,e,f,g,h); k_get(17,e,f,g,h);
1143  k_set(18,a,b,c,d); sb1(a,b,c,d,e,f,g,h); k_get(18,e,f,g,h);
1144  k_set(19,a,b,c,d); sb0(a,b,c,d,e,f,g,h); k_get(19,e,f,g,h);
1145  k_set(20,a,b,c,d); sb7(a,b,c,d,e,f,g,h); k_get(20,e,f,g,h);
1146  k_set(21,a,b,c,d); sb6(a,b,c,d,e,f,g,h); k_get(21,e,f,g,h);
1147  k_set(22,a,b,c,d); sb5(a,b,c,d,e,f,g,h); k_get(22,e,f,g,h);
1148  k_set(23,a,b,c,d); sb4(a,b,c,d,e,f,g,h); k_get(23,e,f,g,h);
1149  k_set(24,a,b,c,d); sb3(a,b,c,d,e,f,g,h); k_get(24,e,f,g,h);
1150  k_set(25,a,b,c,d); sb2(a,b,c,d,e,f,g,h); k_get(25,e,f,g,h);
1151  k_set(26,a,b,c,d); sb1(a,b,c,d,e,f,g,h); k_get(26,e,f,g,h);
1152  k_set(27,a,b,c,d); sb0(a,b,c,d,e,f,g,h); k_get(27,e,f,g,h);
1153  k_set(28,a,b,c,d); sb7(a,b,c,d,e,f,g,h); k_get(28,e,f,g,h);
1154  k_set(29,a,b,c,d); sb6(a,b,c,d,e,f,g,h); k_get(29,e,f,g,h);
1155  k_set(30,a,b,c,d); sb5(a,b,c,d,e,f,g,h); k_get(30,e,f,g,h);
1156  k_set(31,a,b,c,d); sb4(a,b,c,d,e,f,g,h); k_get(31,e,f,g,h);
1157  k_set(32,a,b,c,d); sb3(a,b,c,d,e,f,g,h); k_get(32,e,f,g,h);
1158}
1159
1160DECLSPEC void serpent256_encrypt (const u32 *ks, const u32 *in, u32 *out)
1161{
1162  u32  a,b,c,d,e,f,g,h;
1163  u32  t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16;
1164
1165  a = in[0];
1166  b = in[1];
1167  c = in[2];
1168  d = in[3];
1169
1170  k_xor( 0,a,b,c,d); sb0(a,b,c,d,e,f,g,h); rot(e,f,g,h);
1171  k_xor( 1,e,f,g,h); sb1(e,f,g,h,a,b,c,d); rot(a,b,c,d);
1172  k_xor( 2,a,b,c,d); sb2(a,b,c,d,e,f,g,h); rot(e,f,g,h);
1173  k_xor( 3,e,f,g,h); sb3(e,f,g,h,a,b,c,d); rot(a,b,c,d);
1174  k_xor( 4,a,b,c,d); sb4(a,b,c,d,e,f,g,h); rot(e,f,g,h);
1175  k_xor( 5,e,f,g,h); sb5(e,f,g,h,a,b,c,d); rot(a,b,c,d);
1176  k_xor( 6,a,b,c,d); sb6(a,b,c,d,e,f,g,h); rot(e,f,g,h);
1177  k_xor( 7,e,f,g,h); sb7(e,f,g,h,a,b,c,d); rot(a,b,c,d);
1178  k_xor( 8,a,b,c,d); sb0(a,b,c,d,e,f,g,h); rot(e,f,g,h);
1179  k_xor( 9,e,f,g,h); sb1(e,f,g,h,a,b,c,d); rot(a,b,c,d);
1180  k_xor(10,a,b,c,d); sb2(a,b,c,d,e,f,g,h); rot(e,f,g,h);
1181  k_xor(11,e,f,g,h); sb3(e,f,g,h,a,b,c,d); rot(a,b,c,d);
1182  k_xor(12,a,b,c,d); sb4(a,b,c,d,e,f,g,h); rot(e,f,g,h);
1183  k_xor(13,e,f,g,h); sb5(e,f,g,h,a,b,c,d); rot(a,b,c,d);
1184  k_xor(14,a,b,c,d); sb6(a,b,c,d,e,f,g,h); rot(e,f,g,h);
1185  k_xor(15,e,f,g,h); sb7(e,f,g,h,a,b,c,d); rot(a,b,c,d);
1186  k_xor(16,a,b,c,d); sb0(a,b,c,d,e,f,g,h); rot(e,f,g,h);
1187  k_xor(17,e,f,g,h); sb1(e,f,g,h,a,b,c,d); rot(a,b,c,d);
1188  k_xor(18,a,b,c,d); sb2(a,b,c,d,e,f,g,h); rot(e,f,g,h);
1189  k_xor(19,e,f,g,h); sb3(e,f,g,h,a,b,c,d); rot(a,b,c,d);
1190  k_xor(20,a,b,c,d); sb4(a,b,c,d,e,f,g,h); rot(e,f,g,h);
1191  k_xor(21,e,f,g,h); sb5(e,f,g,h,a,b,c,d); rot(a,b,c,d);
1192  k_xor(22,a,b,c,d); sb6(a,b,c,d,e,f,g,h); rot(e,f,g,h);
1193  k_xor(23,e,f,g,h); sb7(e,f,g,h,a,b,c,d); rot(a,b,c,d);
1194  k_xor(24,a,b,c,d); sb0(a,b,c,d,e,f,g,h); rot(e,f,g,h);
1195  k_xor(25,e,f,g,h); sb1(e,f,g,h,a,b,c,d); rot(a,b,c,d);
1196  k_xor(26,a,b,c,d); sb2(a,b,c,d,e,f,g,h); rot(e,f,g,h);
1197  k_xor(27,e,f,g,h); sb3(e,f,g,h,a,b,c,d); rot(a,b,c,d);
1198  k_xor(28,a,b,c,d); sb4(a,b,c,d,e,f,g,h); rot(e,f,g,h);
1199  k_xor(29,e,f,g,h); sb5(e,f,g,h,a,b,c,d); rot(a,b,c,d);
1200  k_xor(30,a,b,c,d); sb6(a,b,c,d,e,f,g,h); rot(e,f,g,h);
1201  k_xor(31,e,f,g,h); sb7(e,f,g,h,a,b,c,d);
1202  k_xor(32,a,b,c,d);
1203
1204  out[0] = a;
1205  out[1] = b;
1206  out[2] = c;
1207  out[3] = d;
1208}
1209
1210DECLSPEC void serpent256_decrypt (const u32 *ks, const u32 *in, u32 *out)
1211{
1212  u32  a,b,c,d,e,f,g,h;
1213  u32  t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16;
1214
1215  a = in[0];
1216  b = in[1];
1217  c = in[2];
1218  d = in[3];
1219
1220                                       k_xor(32,a,b,c,d);
1221                 ib7(a,b,c,d,e,f,g,h); k_xor(31,e,f,g,h);
1222  irot(e,f,g,h); ib6(e,f,g,h,a,b,c,d); k_xor(30,a,b,c,d);
1223  irot(a,b,c,d); ib5(a,b,c,d,e,f,g,h); k_xor(29,e,f,g,h);
1224  irot(e,f,g,h); ib4(e,f,g,h,a,b,c,d); k_xor(28,a,b,c,d);
1225  irot(a,b,c,d); ib3(a,b,c,d,e,f,g,h); k_xor(27,e,f,g,h);
1226  irot(e,f,g,h); ib2(e,f,g,h,a,b,c,d); k_xor(26,a,b,c,d);
1227  irot(a,b,c,d); ib1(a,b,c,d,e,f,g,h); k_xor(25,e,f,g,h);
1228  irot(e,f,g,h); ib0(e,f,g,h,a,b,c,d); k_xor(24,a,b,c,d);
1229  irot(a,b,c,d); ib7(a,b,c,d,e,f,g,h); k_xor(23,e,f,g,h);
1230  irot(e,f,g,h); ib6(e,f,g,h,a,b,c,d); k_xor(22,a,b,c,d);
1231  irot(a,b,c,d); ib5(a,b,c,d,e,f,g,h); k_xor(21,e,f,g,h);
1232  irot(e,f,g,h); ib4(e,f,g,h,a,b,c,d); k_xor(20,a,b,c,d);
1233  irot(a,b,c,d); ib3(a,b,c,d,e,f,g,h); k_xor(19,e,f,g,h);
1234  irot(e,f,g,h); ib2(e,f,g,h,a,b,c,d); k_xor(18,a,b,c,d);
1235  irot(a,b,c,d); ib1(a,b,c,d,e,f,g,h); k_xor(17,e,f,g,h);
1236  irot(e,f,g,h); ib0(e,f,g,h,a,b,c,d); k_xor(16,a,b,c,d);
1237  irot(a,b,c,d); ib7(a,b,c,d,e,f,g,h); k_xor(15,e,f,g,h);
1238  irot(e,f,g,h); ib6(e,f,g,h,a,b,c,d); k_xor(14,a,b,c,d);
1239  irot(a,b,c,d); ib5(a,b,c,d,e,f,g,h); k_xor(13,e,f,g,h);
1240  irot(e,f,g,h); ib4(e,f,g,h,a,b,c,d); k_xor(12,a,b,c,d);
1241  irot(a,b,c,d); ib3(a,b,c,d,e,f,g,h); k_xor(11,e,f,g,h);
1242  irot(e,f,g,h); ib2(e,f,g,h,a,b,c,d); k_xor(10,a,b,c,d);
1243  irot(a,b,c,d); ib1(a,b,c,d,e,f,g,h); k_xor( 9,e,f,g,h);
1244  irot(e,f,g,h); ib0(e,f,g,h,a,b,c,d); k_xor( 8,a,b,c,d);
1245  irot(a,b,c,d); ib7(a,b,c,d,e,f,g,h); k_xor( 7,e,f,g,h);
1246  irot(e,f,g,h); ib6(e,f,g,h,a,b,c,d); k_xor( 6,a,b,c,d);
1247  irot(a,b,c,d); ib5(a,b,c,d,e,f,g,h); k_xor( 5,e,f,g,h);
1248  irot(e,f,g,h); ib4(e,f,g,h,a,b,c,d); k_xor( 4,a,b,c,d);
1249  irot(a,b,c,d); ib3(a,b,c,d,e,f,g,h); k_xor( 3,e,f,g,h);
1250  irot(e,f,g,h); ib2(e,f,g,h,a,b,c,d); k_xor( 2,a,b,c,d);
1251  irot(a,b,c,d); ib1(a,b,c,d,e,f,g,h); k_xor( 1,e,f,g,h);
1252  irot(e,f,g,h); ib0(e,f,g,h,a,b,c,d); k_xor( 0,a,b,c,d);
1253
1254  out[0] = a;
1255  out[1] = b;
1256  out[2] = c;
1257  out[3] = d;
1258}
1259
1260#undef sb0
1261#undef ib0
1262#undef sb1
1263#undef ib1
1264#undef sb2
1265#undef ib2
1266#undef sb3
1267#undef ib3
1268#undef sb4
1269#undef ib4
1270#undef sb5
1271#undef ib5
1272#undef sb6
1273#undef ib6
1274#undef sb7
1275#undef ib7
1276#undef k_xor
1277#undef k_set
1278#undef k_get
1279#undef rot
1280#undef irot
1281