xref: /freebsd/crypto/openssl/crypto/aes/aes_core.c (revision 1f474190)
1 /*
2  * Copyright 2002-2020 The OpenSSL Project Authors. All Rights Reserved.
3  *
4  * Licensed under the OpenSSL license (the "License").  You may not use
5  * this file except in compliance with the License.  You can obtain a copy
6  * in the file LICENSE in the source distribution or at
7  * https://www.openssl.org/source/license.html
8  */
9 
10 /**
11  * rijndael-alg-fst.c
12  *
13  * @version 3.0 (December 2000)
14  *
15  * Optimised ANSI C code for the Rijndael cipher (now AES)
16  *
17  * @author Vincent Rijmen
18  * @author Antoon Bosselaers
19  * @author Paulo Barreto
20  *
21  * This code is hereby placed in the public domain.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
24  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
25  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
27  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
30  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
31  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
32  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
33  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 /* Note: rewritten a little bit to provide error control and an OpenSSL-
37    compatible API */
38 
39 #include <assert.h>
40 
41 #include <stdlib.h>
42 #include <openssl/crypto.h>
43 #include <openssl/aes.h>
44 #include "aes_local.h"
45 
46 #if defined(OPENSSL_AES_CONST_TIME) && !defined(AES_ASM)
47 typedef union {
48     unsigned char b[8];
49     u32 w[2];
50     u64 d;
51 } uni;
52 
53 /*
54  * Compute w := (w * x) mod (x^8 + x^4 + x^3 + x^1 + 1)
55  * Therefore the name "xtime".
56  */
57 static void XtimeWord(u32 *w)
58 {
59     u32 a, b;
60 
61     a = *w;
62     b = a & 0x80808080u;
63     a ^= b;
64     b -= b >> 7;
65     b &= 0x1B1B1B1Bu;
66     b ^= a << 1;
67     *w = b;
68 }
69 
70 static void XtimeLong(u64 *w)
71 {
72     u64 a, b;
73 
74     a = *w;
75     b = a & 0x8080808080808080uLL;
76     a ^= b;
77     b -= b >> 7;
78     b &= 0x1B1B1B1B1B1B1B1BuLL;
79     b ^= a << 1;
80     *w = b;
81 }
82 
83 /*
84  * This computes w := S * w ^ -1 + c, where c = {01100011}.
85  * Instead of using GF(2^8) mod (x^8+x^4+x^3+x+1} we do the inversion
86  * in GF(GF(GF(2^2)^2)^2) mod (X^2+X+8)
87  * and GF(GF(2^2)^2) mod (X^2+X+2)
88  * and GF(2^2) mod (X^2+X+1)
89  * The first part of the algorithm below transfers the coordinates
90  * {0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80} =>
91  * {1,Y,Y^2,Y^3,Y^4,Y^5,Y^6,Y^7} with Y=0x41:
92  * {0x01,0x41,0x66,0x6c,0x56,0x9a,0x58,0xc4}
93  * The last part undoes the coordinate transfer and the final affine
94  * transformation S:
95  * b[i] = b[i] + b[(i+4)%8] + b[(i+5)%8] + b[(i+6)%8] + b[(i+7)%8] + c[i]
96  * in one step.
97  * The multiplication in GF(2^2^2^2) is done in ordinary coords:
98  * A = (a0*1 + a1*x^4)
99  * B = (b0*1 + b1*x^4)
100  * AB = ((a0*b0 + 8*a1*b1)*1 + (a1*b0 + (a0+a1)*b1)*x^4)
101  * When A = (a0,a1) is given we want to solve AB = 1:
102  * (a) 1 = a0*b0 + 8*a1*b1
103  * (b) 0 = a1*b0 + (a0+a1)*b1
104  * => multiply (a) by a1 and (b) by a0
105  * (c) a1 = a1*a0*b0 + (8*a1*a1)*b1
106  * (d) 0 = a1*a0*b0 + (a0*a0+a1*a0)*b1
107  * => add (c) + (d)
108  * (e) a1 = (a0*a0 + a1*a0 + 8*a1*a1)*b1
109  * => therefore
110  * b1 = (a0*a0 + a1*a0 + 8*a1*a1)^-1 * a1
111  * => and adding (a1*b0) to (b) we get
112  * (f) a1*b0 = (a0+a1)*b1
113  * => therefore
114  * b0 = (a0*a0 + a1*a0 + 8*a1*a1)^-1 * (a0+a1)
115  * Note this formula also works for the case
116  * (a0+a1)*a0 + 8*a1*a1 = 0
117  * if the inverse element for 0^-1 is mapped to 0.
118  * Repeat the same for GF(2^2^2) and GF(2^2).
119  * We get the following algorithm:
120  * inv8(a0,a1):
121  *   x0 = a0^a1
122  *   [y0,y1] = mul4([x0,a1],[a0,a1]); (*)
123  *   y1 = mul4(8,y1);
124  *   t = inv4(y0^y1);
125  *   [b0,b1] = mul4([x0,a1],[t,t]); (*)
126  *   return [b0,b1];
127  * The non-linear multiplies (*) can be done in parallel at no extra cost.
128  */
129 static void SubWord(u32 *w)
130 {
131     u32 x, y, a1, a2, a3, a4, a5, a6;
132 
133     x = *w;
134     y = ((x & 0xFEFEFEFEu) >> 1) | ((x & 0x01010101u) << 7);
135     x &= 0xDDDDDDDDu;
136     x ^= y & 0x57575757u;
137     y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
138     x ^= y & 0x1C1C1C1Cu;
139     y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
140     x ^= y & 0x4A4A4A4Au;
141     y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
142     x ^= y & 0x42424242u;
143     y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
144     x ^= y & 0x64646464u;
145     y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
146     x ^= y & 0xE0E0E0E0u;
147     a1 = x;
148     a1 ^= (x & 0xF0F0F0F0u) >> 4;
149     a2 = ((x & 0xCCCCCCCCu) >> 2) | ((x & 0x33333333u) << 2);
150     a3 = x & a1;
151     a3 ^= (a3 & 0xAAAAAAAAu) >> 1;
152     a3 ^= (((x << 1) & a1) ^ ((a1 << 1) & x)) & 0xAAAAAAAAu;
153     a4 = a2 & a1;
154     a4 ^= (a4 & 0xAAAAAAAAu) >> 1;
155     a4 ^= (((a2 << 1) & a1) ^ ((a1 << 1) & a2)) & 0xAAAAAAAAu;
156     a5 = (a3 & 0xCCCCCCCCu) >> 2;
157     a3 ^= ((a4 << 2) ^ a4) & 0xCCCCCCCCu;
158     a4 = a5 & 0x22222222u;
159     a4 |= a4 >> 1;
160     a4 ^= (a5 << 1) & 0x22222222u;
161     a3 ^= a4;
162     a5 = a3 & 0xA0A0A0A0u;
163     a5 |= a5 >> 1;
164     a5 ^= (a3 << 1) & 0xA0A0A0A0u;
165     a4 = a5 & 0xC0C0C0C0u;
166     a6 = a4 >> 2;
167     a4 ^= (a5 << 2) & 0xC0C0C0C0u;
168     a5 = a6 & 0x20202020u;
169     a5 |= a5 >> 1;
170     a5 ^= (a6 << 1) & 0x20202020u;
171     a4 |= a5;
172     a3 ^= a4 >> 4;
173     a3 &= 0x0F0F0F0Fu;
174     a2 = a3;
175     a2 ^= (a3 & 0x0C0C0C0Cu) >> 2;
176     a4 = a3 & a2;
177     a4 ^= (a4 & 0x0A0A0A0A0Au) >> 1;
178     a4 ^= (((a3 << 1) & a2) ^ ((a2 << 1) & a3)) & 0x0A0A0A0Au;
179     a5 = a4 & 0x08080808u;
180     a5 |= a5 >> 1;
181     a5 ^= (a4 << 1) & 0x08080808u;
182     a4 ^= a5 >> 2;
183     a4 &= 0x03030303u;
184     a4 ^= (a4 & 0x02020202u) >> 1;
185     a4 |= a4 << 2;
186     a3 = a2 & a4;
187     a3 ^= (a3 & 0x0A0A0A0Au) >> 1;
188     a3 ^= (((a2 << 1) & a4) ^ ((a4 << 1) & a2)) & 0x0A0A0A0Au;
189     a3 |= a3 << 4;
190     a2 = ((a1 & 0xCCCCCCCCu) >> 2) | ((a1 & 0x33333333u) << 2);
191     x = a1 & a3;
192     x ^= (x & 0xAAAAAAAAu) >> 1;
193     x ^= (((a1 << 1) & a3) ^ ((a3 << 1) & a1)) & 0xAAAAAAAAu;
194     a4 = a2 & a3;
195     a4 ^= (a4 & 0xAAAAAAAAu) >> 1;
196     a4 ^= (((a2 << 1) & a3) ^ ((a3 << 1) & a2)) & 0xAAAAAAAAu;
197     a5 = (x & 0xCCCCCCCCu) >> 2;
198     x ^= ((a4 << 2) ^ a4) & 0xCCCCCCCCu;
199     a4 = a5 & 0x22222222u;
200     a4 |= a4 >> 1;
201     a4 ^= (a5 << 1) & 0x22222222u;
202     x ^= a4;
203     y = ((x & 0xFEFEFEFEu) >> 1) | ((x & 0x01010101u) << 7);
204     x &= 0x39393939u;
205     x ^= y & 0x3F3F3F3Fu;
206     y = ((y & 0xFCFCFCFCu) >> 2) | ((y & 0x03030303u) << 6);
207     x ^= y & 0x97979797u;
208     y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
209     x ^= y & 0x9B9B9B9Bu;
210     y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
211     x ^= y & 0x3C3C3C3Cu;
212     y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
213     x ^= y & 0xDDDDDDDDu;
214     y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
215     x ^= y & 0x72727272u;
216     x ^= 0x63636363u;
217     *w = x;
218 }
219 
220 static void SubLong(u64 *w)
221 {
222     u64 x, y, a1, a2, a3, a4, a5, a6;
223 
224     x = *w;
225     y = ((x & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((x & 0x0101010101010101uLL) << 7);
226     x &= 0xDDDDDDDDDDDDDDDDuLL;
227     x ^= y & 0x5757575757575757uLL;
228     y = ((y & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((y & 0x0101010101010101uLL) << 7);
229     x ^= y & 0x1C1C1C1C1C1C1C1CuLL;
230     y = ((y & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((y & 0x0101010101010101uLL) << 7);
231     x ^= y & 0x4A4A4A4A4A4A4A4AuLL;
232     y = ((y & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((y & 0x0101010101010101uLL) << 7);
233     x ^= y & 0x4242424242424242uLL;
234     y = ((y & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((y & 0x0101010101010101uLL) << 7);
235     x ^= y & 0x6464646464646464uLL;
236     y = ((y & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((y & 0x0101010101010101uLL) << 7);
237     x ^= y & 0xE0E0E0E0E0E0E0E0uLL;
238     a1 = x;
239     a1 ^= (x & 0xF0F0F0F0F0F0F0F0uLL) >> 4;
240     a2 = ((x & 0xCCCCCCCCCCCCCCCCuLL) >> 2) | ((x & 0x3333333333333333uLL) << 2);
241     a3 = x & a1;
242     a3 ^= (a3 & 0xAAAAAAAAAAAAAAAAuLL) >> 1;
243     a3 ^= (((x << 1) & a1) ^ ((a1 << 1) & x)) & 0xAAAAAAAAAAAAAAAAuLL;
244     a4 = a2 & a1;
245     a4 ^= (a4 & 0xAAAAAAAAAAAAAAAAuLL) >> 1;
246     a4 ^= (((a2 << 1) & a1) ^ ((a1 << 1) & a2)) & 0xAAAAAAAAAAAAAAAAuLL;
247     a5 = (a3 & 0xCCCCCCCCCCCCCCCCuLL) >> 2;
248     a3 ^= ((a4 << 2) ^ a4) & 0xCCCCCCCCCCCCCCCCuLL;
249     a4 = a5 & 0x2222222222222222uLL;
250     a4 |= a4 >> 1;
251     a4 ^= (a5 << 1) & 0x2222222222222222uLL;
252     a3 ^= a4;
253     a5 = a3 & 0xA0A0A0A0A0A0A0A0uLL;
254     a5 |= a5 >> 1;
255     a5 ^= (a3 << 1) & 0xA0A0A0A0A0A0A0A0uLL;
256     a4 = a5 & 0xC0C0C0C0C0C0C0C0uLL;
257     a6 = a4 >> 2;
258     a4 ^= (a5 << 2) & 0xC0C0C0C0C0C0C0C0uLL;
259     a5 = a6 & 0x2020202020202020uLL;
260     a5 |= a5 >> 1;
261     a5 ^= (a6 << 1) & 0x2020202020202020uLL;
262     a4 |= a5;
263     a3 ^= a4 >> 4;
264     a3 &= 0x0F0F0F0F0F0F0F0FuLL;
265     a2 = a3;
266     a2 ^= (a3 & 0x0C0C0C0C0C0C0C0CuLL) >> 2;
267     a4 = a3 & a2;
268     a4 ^= (a4 & 0x0A0A0A0A0A0A0A0AuLL) >> 1;
269     a4 ^= (((a3 << 1) & a2) ^ ((a2 << 1) & a3)) & 0x0A0A0A0A0A0A0A0AuLL;
270     a5 = a4 & 0x0808080808080808uLL;
271     a5 |= a5 >> 1;
272     a5 ^= (a4 << 1) & 0x0808080808080808uLL;
273     a4 ^= a5 >> 2;
274     a4 &= 0x0303030303030303uLL;
275     a4 ^= (a4 & 0x0202020202020202uLL) >> 1;
276     a4 |= a4 << 2;
277     a3 = a2 & a4;
278     a3 ^= (a3 & 0x0A0A0A0A0A0A0A0AuLL) >> 1;
279     a3 ^= (((a2 << 1) & a4) ^ ((a4 << 1) & a2)) & 0x0A0A0A0A0A0A0A0AuLL;
280     a3 |= a3 << 4;
281     a2 = ((a1 & 0xCCCCCCCCCCCCCCCCuLL) >> 2) | ((a1 & 0x3333333333333333uLL) << 2);
282     x = a1 & a3;
283     x ^= (x & 0xAAAAAAAAAAAAAAAAuLL) >> 1;
284     x ^= (((a1 << 1) & a3) ^ ((a3 << 1) & a1)) & 0xAAAAAAAAAAAAAAAAuLL;
285     a4 = a2 & a3;
286     a4 ^= (a4 & 0xAAAAAAAAAAAAAAAAuLL) >> 1;
287     a4 ^= (((a2 << 1) & a3) ^ ((a3 << 1) & a2)) & 0xAAAAAAAAAAAAAAAAuLL;
288     a5 = (x & 0xCCCCCCCCCCCCCCCCuLL) >> 2;
289     x ^= ((a4 << 2) ^ a4) & 0xCCCCCCCCCCCCCCCCuLL;
290     a4 = a5 & 0x2222222222222222uLL;
291     a4 |= a4 >> 1;
292     a4 ^= (a5 << 1) & 0x2222222222222222uLL;
293     x ^= a4;
294     y = ((x & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((x & 0x0101010101010101uLL) << 7);
295     x &= 0x3939393939393939uLL;
296     x ^= y & 0x3F3F3F3F3F3F3F3FuLL;
297     y = ((y & 0xFCFCFCFCFCFCFCFCuLL) >> 2) | ((y & 0x0303030303030303uLL) << 6);
298     x ^= y & 0x9797979797979797uLL;
299     y = ((y & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((y & 0x0101010101010101uLL) << 7);
300     x ^= y & 0x9B9B9B9B9B9B9B9BuLL;
301     y = ((y & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((y & 0x0101010101010101uLL) << 7);
302     x ^= y & 0x3C3C3C3C3C3C3C3CuLL;
303     y = ((y & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((y & 0x0101010101010101uLL) << 7);
304     x ^= y & 0xDDDDDDDDDDDDDDDDuLL;
305     y = ((y & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((y & 0x0101010101010101uLL) << 7);
306     x ^= y & 0x7272727272727272uLL;
307     x ^= 0x6363636363636363uLL;
308     *w = x;
309 }
310 
311 /*
312  * This computes w := (S^-1 * (w + c))^-1
313  */
314 static void InvSubLong(u64 *w)
315 {
316     u64 x, y, a1, a2, a3, a4, a5, a6;
317 
318     x = *w;
319     x ^= 0x6363636363636363uLL;
320     y = ((x & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((x & 0x0101010101010101uLL) << 7);
321     x &= 0xFDFDFDFDFDFDFDFDuLL;
322     x ^= y & 0x5E5E5E5E5E5E5E5EuLL;
323     y = ((y & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((y & 0x0101010101010101uLL) << 7);
324     x ^= y & 0xF3F3F3F3F3F3F3F3uLL;
325     y = ((y & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((y & 0x0101010101010101uLL) << 7);
326     x ^= y & 0xF5F5F5F5F5F5F5F5uLL;
327     y = ((y & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((y & 0x0101010101010101uLL) << 7);
328     x ^= y & 0x7878787878787878uLL;
329     y = ((y & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((y & 0x0101010101010101uLL) << 7);
330     x ^= y & 0x7777777777777777uLL;
331     y = ((y & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((y & 0x0101010101010101uLL) << 7);
332     x ^= y & 0x1515151515151515uLL;
333     y = ((y & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((y & 0x0101010101010101uLL) << 7);
334     x ^= y & 0xA5A5A5A5A5A5A5A5uLL;
335     a1 = x;
336     a1 ^= (x & 0xF0F0F0F0F0F0F0F0uLL) >> 4;
337     a2 = ((x & 0xCCCCCCCCCCCCCCCCuLL) >> 2) | ((x & 0x3333333333333333uLL) << 2);
338     a3 = x & a1;
339     a3 ^= (a3 & 0xAAAAAAAAAAAAAAAAuLL) >> 1;
340     a3 ^= (((x << 1) & a1) ^ ((a1 << 1) & x)) & 0xAAAAAAAAAAAAAAAAuLL;
341     a4 = a2 & a1;
342     a4 ^= (a4 & 0xAAAAAAAAAAAAAAAAuLL) >> 1;
343     a4 ^= (((a2 << 1) & a1) ^ ((a1 << 1) & a2)) & 0xAAAAAAAAAAAAAAAAuLL;
344     a5 = (a3 & 0xCCCCCCCCCCCCCCCCuLL) >> 2;
345     a3 ^= ((a4 << 2) ^ a4) & 0xCCCCCCCCCCCCCCCCuLL;
346     a4 = a5 & 0x2222222222222222uLL;
347     a4 |= a4 >> 1;
348     a4 ^= (a5 << 1) & 0x2222222222222222uLL;
349     a3 ^= a4;
350     a5 = a3 & 0xA0A0A0A0A0A0A0A0uLL;
351     a5 |= a5 >> 1;
352     a5 ^= (a3 << 1) & 0xA0A0A0A0A0A0A0A0uLL;
353     a4 = a5 & 0xC0C0C0C0C0C0C0C0uLL;
354     a6 = a4 >> 2;
355     a4 ^= (a5 << 2) & 0xC0C0C0C0C0C0C0C0uLL;
356     a5 = a6 & 0x2020202020202020uLL;
357     a5 |= a5 >> 1;
358     a5 ^= (a6 << 1) & 0x2020202020202020uLL;
359     a4 |= a5;
360     a3 ^= a4 >> 4;
361     a3 &= 0x0F0F0F0F0F0F0F0FuLL;
362     a2 = a3;
363     a2 ^= (a3 & 0x0C0C0C0C0C0C0C0CuLL) >> 2;
364     a4 = a3 & a2;
365     a4 ^= (a4 & 0x0A0A0A0A0A0A0A0AuLL) >> 1;
366     a4 ^= (((a3 << 1) & a2) ^ ((a2 << 1) & a3)) & 0x0A0A0A0A0A0A0A0AuLL;
367     a5 = a4 & 0x0808080808080808uLL;
368     a5 |= a5 >> 1;
369     a5 ^= (a4 << 1) & 0x0808080808080808uLL;
370     a4 ^= a5 >> 2;
371     a4 &= 0x0303030303030303uLL;
372     a4 ^= (a4 & 0x0202020202020202uLL) >> 1;
373     a4 |= a4 << 2;
374     a3 = a2 & a4;
375     a3 ^= (a3 & 0x0A0A0A0A0A0A0A0AuLL) >> 1;
376     a3 ^= (((a2 << 1) & a4) ^ ((a4 << 1) & a2)) & 0x0A0A0A0A0A0A0A0AuLL;
377     a3 |= a3 << 4;
378     a2 = ((a1 & 0xCCCCCCCCCCCCCCCCuLL) >> 2) | ((a1 & 0x3333333333333333uLL) << 2);
379     x = a1 & a3;
380     x ^= (x & 0xAAAAAAAAAAAAAAAAuLL) >> 1;
381     x ^= (((a1 << 1) & a3) ^ ((a3 << 1) & a1)) & 0xAAAAAAAAAAAAAAAAuLL;
382     a4 = a2 & a3;
383     a4 ^= (a4 & 0xAAAAAAAAAAAAAAAAuLL) >> 1;
384     a4 ^= (((a2 << 1) & a3) ^ ((a3 << 1) & a2)) & 0xAAAAAAAAAAAAAAAAuLL;
385     a5 = (x & 0xCCCCCCCCCCCCCCCCuLL) >> 2;
386     x ^= ((a4 << 2) ^ a4) & 0xCCCCCCCCCCCCCCCCuLL;
387     a4 = a5 & 0x2222222222222222uLL;
388     a4 |= a4 >> 1;
389     a4 ^= (a5 << 1) & 0x2222222222222222uLL;
390     x ^= a4;
391     y = ((x & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((x & 0x0101010101010101uLL) << 7);
392     x &= 0xB5B5B5B5B5B5B5B5uLL;
393     x ^= y & 0x4040404040404040uLL;
394     y = ((y & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((y & 0x0101010101010101uLL) << 7);
395     x ^= y & 0x8080808080808080uLL;
396     y = ((y & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((y & 0x0101010101010101uLL) << 7);
397     x ^= y & 0x1616161616161616uLL;
398     y = ((y & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((y & 0x0101010101010101uLL) << 7);
399     x ^= y & 0xEBEBEBEBEBEBEBEBuLL;
400     y = ((y & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((y & 0x0101010101010101uLL) << 7);
401     x ^= y & 0x9797979797979797uLL;
402     y = ((y & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((y & 0x0101010101010101uLL) << 7);
403     x ^= y & 0xFBFBFBFBFBFBFBFBuLL;
404     y = ((y & 0xFEFEFEFEFEFEFEFEuLL) >> 1) | ((y & 0x0101010101010101uLL) << 7);
405     x ^= y & 0x7D7D7D7D7D7D7D7DuLL;
406     *w = x;
407 }
408 
409 static void ShiftRows(u64 *state)
410 {
411     unsigned char s[4];
412     unsigned char *s0;
413     int r;
414 
415     s0 = (unsigned char *)state;
416     for (r = 0; r < 4; r++) {
417         s[0] = s0[0*4 + r];
418         s[1] = s0[1*4 + r];
419         s[2] = s0[2*4 + r];
420         s[3] = s0[3*4 + r];
421         s0[0*4 + r] = s[(r+0) % 4];
422         s0[1*4 + r] = s[(r+1) % 4];
423         s0[2*4 + r] = s[(r+2) % 4];
424         s0[3*4 + r] = s[(r+3) % 4];
425     }
426 }
427 
428 static void InvShiftRows(u64 *state)
429 {
430     unsigned char s[4];
431     unsigned char *s0;
432     int r;
433 
434     s0 = (unsigned char *)state;
435     for (r = 0; r < 4; r++) {
436         s[0] = s0[0*4 + r];
437         s[1] = s0[1*4 + r];
438         s[2] = s0[2*4 + r];
439         s[3] = s0[3*4 + r];
440         s0[0*4 + r] = s[(4-r) % 4];
441         s0[1*4 + r] = s[(5-r) % 4];
442         s0[2*4 + r] = s[(6-r) % 4];
443         s0[3*4 + r] = s[(7-r) % 4];
444     }
445 }
446 
447 static void MixColumns(u64 *state)
448 {
449     uni s1;
450     uni s;
451     int c;
452 
453     for (c = 0; c < 2; c++) {
454         s1.d = state[c];
455         s.d = s1.d;
456         s.d ^= ((s.d & 0xFFFF0000FFFF0000uLL) >> 16)
457                | ((s.d & 0x0000FFFF0000FFFFuLL) << 16);
458         s.d ^= ((s.d & 0xFF00FF00FF00FF00uLL) >> 8)
459                | ((s.d & 0x00FF00FF00FF00FFuLL) << 8);
460         s.d ^= s1.d;
461         XtimeLong(&s1.d);
462         s.d ^= s1.d;
463         s.b[0] ^= s1.b[1];
464         s.b[1] ^= s1.b[2];
465         s.b[2] ^= s1.b[3];
466         s.b[3] ^= s1.b[0];
467         s.b[4] ^= s1.b[5];
468         s.b[5] ^= s1.b[6];
469         s.b[6] ^= s1.b[7];
470         s.b[7] ^= s1.b[4];
471         state[c] = s.d;
472     }
473 }
474 
475 static void InvMixColumns(u64 *state)
476 {
477     uni s1;
478     uni s;
479     int c;
480 
481     for (c = 0; c < 2; c++) {
482         s1.d = state[c];
483         s.d = s1.d;
484         s.d ^= ((s.d & 0xFFFF0000FFFF0000uLL) >> 16)
485                | ((s.d & 0x0000FFFF0000FFFFuLL) << 16);
486         s.d ^= ((s.d & 0xFF00FF00FF00FF00uLL) >> 8)
487                | ((s.d & 0x00FF00FF00FF00FFuLL) << 8);
488         s.d ^= s1.d;
489         XtimeLong(&s1.d);
490         s.d ^= s1.d;
491         s.b[0] ^= s1.b[1];
492         s.b[1] ^= s1.b[2];
493         s.b[2] ^= s1.b[3];
494         s.b[3] ^= s1.b[0];
495         s.b[4] ^= s1.b[5];
496         s.b[5] ^= s1.b[6];
497         s.b[6] ^= s1.b[7];
498         s.b[7] ^= s1.b[4];
499         XtimeLong(&s1.d);
500         s1.d ^= ((s1.d & 0xFFFF0000FFFF0000uLL) >> 16)
501                 | ((s1.d & 0x0000FFFF0000FFFFuLL) << 16);
502         s.d ^= s1.d;
503         XtimeLong(&s1.d);
504         s1.d ^= ((s1.d & 0xFF00FF00FF00FF00uLL) >> 8)
505                 | ((s1.d & 0x00FF00FF00FF00FFuLL) << 8);
506         s.d ^= s1.d;
507         state[c] = s.d;
508     }
509 }
510 
511 static void AddRoundKey(u64 *state, const u64 *w)
512 {
513     state[0] ^= w[0];
514     state[1] ^= w[1];
515 }
516 
517 static void Cipher(const unsigned char *in, unsigned char *out,
518                    const u64 *w, int nr)
519 {
520     u64 state[2];
521     int i;
522 
523     memcpy(state, in, 16);
524 
525     AddRoundKey(state, w);
526 
527     for (i = 1; i < nr; i++) {
528         SubLong(&state[0]);
529         SubLong(&state[1]);
530         ShiftRows(state);
531         MixColumns(state);
532         AddRoundKey(state, w + i*2);
533     }
534 
535     SubLong(&state[0]);
536     SubLong(&state[1]);
537     ShiftRows(state);
538     AddRoundKey(state, w + nr*2);
539 
540     memcpy(out, state, 16);
541 }
542 
543 static void InvCipher(const unsigned char *in, unsigned char *out,
544                       const u64 *w, int nr)
545 
546 {
547     u64 state[2];
548     int i;
549 
550     memcpy(state, in, 16);
551 
552     AddRoundKey(state, w + nr*2);
553 
554     for (i = nr - 1; i > 0; i--) {
555         InvShiftRows(state);
556         InvSubLong(&state[0]);
557         InvSubLong(&state[1]);
558         AddRoundKey(state, w + i*2);
559         InvMixColumns(state);
560     }
561 
562     InvShiftRows(state);
563     InvSubLong(&state[0]);
564     InvSubLong(&state[1]);
565     AddRoundKey(state, w);
566 
567     memcpy(out, state, 16);
568 }
569 
570 static void RotWord(u32 *x)
571 {
572     unsigned char *w0;
573     unsigned char tmp;
574 
575     w0 = (unsigned char *)x;
576     tmp = w0[0];
577     w0[0] = w0[1];
578     w0[1] = w0[2];
579     w0[2] = w0[3];
580     w0[3] = tmp;
581 }
582 
583 static void KeyExpansion(const unsigned char *key, u64 *w,
584                          int nr, int nk)
585 {
586     u32 rcon;
587     uni prev;
588     u32 temp;
589     int i, n;
590 
591     memcpy(w, key, nk*4);
592     memcpy(&rcon, "\1\0\0\0", 4);
593     n = nk/2;
594     prev.d = w[n-1];
595     for (i = n; i < (nr+1)*2; i++) {
596         temp = prev.w[1];
597         if (i % n == 0) {
598             RotWord(&temp);
599             SubWord(&temp);
600             temp ^= rcon;
601             XtimeWord(&rcon);
602         } else if (nk > 6 && i % n == 2) {
603             SubWord(&temp);
604         }
605         prev.d = w[i-n];
606         prev.w[0] ^= temp;
607         prev.w[1] ^= prev.w[0];
608         w[i] = prev.d;
609     }
610 }
611 
612 /**
613  * Expand the cipher key into the encryption key schedule.
614  */
615 int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
616                         AES_KEY *key)
617 {
618     u64 *rk;
619 
620     if (!userKey || !key)
621         return -1;
622     if (bits != 128 && bits != 192 && bits != 256)
623         return -2;
624 
625     rk = (u64*)key->rd_key;
626 
627     if (bits == 128)
628         key->rounds = 10;
629     else if (bits == 192)
630         key->rounds = 12;
631     else
632         key->rounds = 14;
633 
634     KeyExpansion(userKey, rk, key->rounds, bits/32);
635     return 0;
636 }
637 
638 /**
639  * Expand the cipher key into the decryption key schedule.
640  */
641 int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
642                         AES_KEY *key)
643 {
644     return AES_set_encrypt_key(userKey, bits, key);
645 }
646 
647 /*
648  * Encrypt a single block
649  * in and out can overlap
650  */
651 void AES_encrypt(const unsigned char *in, unsigned char *out,
652                  const AES_KEY *key)
653 {
654     const u64 *rk;
655 
656     assert(in && out && key);
657     rk = (u64*)key->rd_key;
658 
659     Cipher(in, out, rk, key->rounds);
660 }
661 
662 /*
663  * Decrypt a single block
664  * in and out can overlap
665  */
666 void AES_decrypt(const unsigned char *in, unsigned char *out,
667                  const AES_KEY *key)
668 {
669     const u64 *rk;
670 
671     assert(in && out && key);
672     rk = (u64*)key->rd_key;
673 
674     InvCipher(in, out, rk, key->rounds);
675 }
676 #elif !defined(AES_ASM)
677 /*-
678 Te0[x] = S [x].[02, 01, 01, 03];
679 Te1[x] = S [x].[03, 02, 01, 01];
680 Te2[x] = S [x].[01, 03, 02, 01];
681 Te3[x] = S [x].[01, 01, 03, 02];
682 
683 Td0[x] = Si[x].[0e, 09, 0d, 0b];
684 Td1[x] = Si[x].[0b, 0e, 09, 0d];
685 Td2[x] = Si[x].[0d, 0b, 0e, 09];
686 Td3[x] = Si[x].[09, 0d, 0b, 0e];
687 Td4[x] = Si[x].[01];
688 */
689 
690 static const u32 Te0[256] = {
691     0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
692     0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
693     0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
694     0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU,
695     0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U,
696     0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU,
697     0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU,
698     0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU,
699     0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU,
700     0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU,
701     0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U,
702     0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU,
703     0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU,
704     0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U,
705     0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU,
706     0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU,
707     0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU,
708     0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU,
709     0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU,
710     0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U,
711     0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU,
712     0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU,
713     0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU,
714     0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU,
715     0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U,
716     0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U,
717     0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U,
718     0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U,
719     0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU,
720     0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U,
721     0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U,
722     0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU,
723     0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU,
724     0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U,
725     0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U,
726     0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U,
727     0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU,
728     0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U,
729     0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU,
730     0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U,
731     0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU,
732     0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U,
733     0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U,
734     0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU,
735     0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U,
736     0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U,
737     0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U,
738     0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U,
739     0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U,
740     0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U,
741     0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U,
742     0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U,
743     0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU,
744     0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U,
745     0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U,
746     0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U,
747     0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U,
748     0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U,
749     0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U,
750     0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU,
751     0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U,
752     0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U,
753     0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
754     0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU,
755 };
756 static const u32 Te1[256] = {
757     0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU,
758     0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
759     0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU,
760     0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U,
761     0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU,
762     0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U,
763     0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU,
764     0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U,
765     0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U,
766     0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU,
767     0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U,
768     0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U,
769     0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U,
770     0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU,
771     0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U,
772     0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U,
773     0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU,
774     0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U,
775     0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U,
776     0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U,
777     0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU,
778     0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU,
779     0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U,
780     0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU,
781     0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU,
782     0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U,
783     0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU,
784     0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U,
785     0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU,
786     0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U,
787     0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U,
788     0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U,
789     0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU,
790     0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U,
791     0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU,
792     0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U,
793     0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU,
794     0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U,
795     0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U,
796     0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU,
797     0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU,
798     0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU,
799     0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U,
800     0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U,
801     0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU,
802     0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U,
803     0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU,
804     0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U,
805     0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU,
806     0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U,
807     0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU,
808     0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU,
809     0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U,
810     0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU,
811     0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U,
812     0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU,
813     0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U,
814     0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U,
815     0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U,
816     0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU,
817     0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU,
818     0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U,
819     0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU,
820     0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U,
821 };
822 static const u32 Te2[256] = {
823     0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU,
824     0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U,
825     0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU,
826     0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U,
827     0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU,
828     0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U,
829     0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU,
830     0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U,
831     0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U,
832     0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU,
833     0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U,
834     0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U,
835     0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U,
836     0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU,
837     0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U,
838     0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U,
839     0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU,
840     0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U,
841     0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U,
842     0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U,
843     0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU,
844     0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU,
845     0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U,
846     0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU,
847     0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU,
848     0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U,
849     0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU,
850     0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U,
851     0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU,
852     0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U,
853     0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U,
854     0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U,
855     0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU,
856     0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U,
857     0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU,
858     0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U,
859     0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU,
860     0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U,
861     0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U,
862     0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU,
863     0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU,
864     0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU,
865     0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U,
866     0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U,
867     0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU,
868     0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U,
869     0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU,
870     0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U,
871     0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU,
872     0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U,
873     0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU,
874     0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU,
875     0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U,
876     0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU,
877     0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U,
878     0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU,
879     0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U,
880     0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U,
881     0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U,
882     0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU,
883     0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU,
884     0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U,
885     0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU,
886     0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U,
887 };
888 static const u32 Te3[256] = {
889     0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U,
890     0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
891     0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U,
892     0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU,
893     0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU,
894     0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU,
895     0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U,
896     0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU,
897     0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU,
898     0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U,
899     0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U,
900     0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU,
901     0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU,
902     0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU,
903     0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU,
904     0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU,
905     0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U,
906     0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU,
907     0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU,
908     0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U,
909     0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U,
910     0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U,
911     0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U,
912     0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U,
913     0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU,
914     0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U,
915     0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU,
916     0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU,
917     0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U,
918     0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U,
919     0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U,
920     0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU,
921     0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U,
922     0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU,
923     0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU,
924     0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U,
925     0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U,
926     0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU,
927     0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U,
928     0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU,
929     0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U,
930     0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U,
931     0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U,
932     0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U,
933     0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU,
934     0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U,
935     0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU,
936     0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U,
937     0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU,
938     0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U,
939     0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU,
940     0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU,
941     0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU,
942     0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU,
943     0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U,
944     0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U,
945     0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U,
946     0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U,
947     0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U,
948     0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U,
949     0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU,
950     0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U,
951     0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
952     0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU,
953 };
954 
955 static const u32 Td0[256] = {
956     0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
957     0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
958     0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
959     0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU,
960     0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U,
961     0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U,
962     0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU,
963     0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U,
964     0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU,
965     0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U,
966     0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U,
967     0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U,
968     0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U,
969     0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU,
970     0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U,
971     0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU,
972     0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U,
973     0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU,
974     0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U,
975     0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U,
976     0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U,
977     0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU,
978     0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U,
979     0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU,
980     0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U,
981     0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU,
982     0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U,
983     0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU,
984     0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU,
985     0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U,
986     0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU,
987     0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U,
988     0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU,
989     0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U,
990     0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U,
991     0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U,
992     0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU,
993     0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U,
994     0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U,
995     0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU,
996     0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U,
997     0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U,
998     0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U,
999     0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U,
1000     0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U,
1001     0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU,
1002     0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U,
1003     0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U,
1004     0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U,
1005     0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U,
1006     0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U,
1007     0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU,
1008     0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU,
1009     0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU,
1010     0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU,
1011     0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U,
1012     0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U,
1013     0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU,
1014     0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU,
1015     0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U,
1016     0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU,
1017     0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U,
1018     0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
1019     0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U,
1020 };
1021 static const u32 Td1[256] = {
1022     0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU,
1023     0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
1024     0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU,
1025     0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U,
1026     0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U,
1027     0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U,
1028     0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U,
1029     0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U,
1030     0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U,
1031     0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU,
1032     0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU,
1033     0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU,
1034     0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U,
1035     0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU,
1036     0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U,
1037     0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U,
1038     0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U,
1039     0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU,
1040     0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU,
1041     0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U,
1042     0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU,
1043     0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U,
1044     0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU,
1045     0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU,
1046     0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U,
1047     0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U,
1048     0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U,
1049     0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU,
1050     0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U,
1051     0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU,
1052     0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U,
1053     0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U,
1054     0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U,
1055     0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU,
1056     0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U,
1057     0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U,
1058     0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U,
1059     0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U,
1060     0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U,
1061     0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U,
1062     0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU,
1063     0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU,
1064     0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U,
1065     0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU,
1066     0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U,
1067     0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU,
1068     0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU,
1069     0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U,
1070     0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU,
1071     0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U,
1072     0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U,
1073     0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U,
1074     0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U,
1075     0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U,
1076     0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U,
1077     0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U,
1078     0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU,
1079     0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U,
1080     0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U,
1081     0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU,
1082     0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U,
1083     0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U,
1084     0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U,
1085     0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U,
1086 };
1087 static const u32 Td2[256] = {
1088     0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U,
1089     0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U,
1090     0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U,
1091     0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U,
1092     0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU,
1093     0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U,
1094     0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U,
1095     0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U,
1096     0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U,
1097     0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU,
1098     0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U,
1099     0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U,
1100     0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU,
1101     0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U,
1102     0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U,
1103     0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U,
1104     0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U,
1105     0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U,
1106     0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U,
1107     0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU,
1108     0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U,
1109     0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U,
1110     0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U,
1111     0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U,
1112     0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U,
1113     0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU,
1114     0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU,
1115     0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U,
1116     0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU,
1117     0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U,
1118     0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU,
1119     0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU,
1120     0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU,
1121     0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU,
1122     0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U,
1123     0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U,
1124     0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U,
1125     0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U,
1126     0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U,
1127     0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U,
1128     0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U,
1129     0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU,
1130     0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU,
1131     0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U,
1132     0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U,
1133     0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU,
1134     0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU,
1135     0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U,
1136     0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U,
1137     0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U,
1138     0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U,
1139     0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U,
1140     0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U,
1141     0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U,
1142     0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU,
1143     0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U,
1144     0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U,
1145     0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U,
1146     0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U,
1147     0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U,
1148     0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U,
1149     0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU,
1150     0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U,
1151     0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U,
1152 };
1153 static const u32 Td3[256] = {
1154     0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU,
1155     0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU,
1156     0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U,
1157     0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U,
1158     0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU,
1159     0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU,
1160     0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U,
1161     0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU,
1162     0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U,
1163     0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU,
1164     0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U,
1165     0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U,
1166     0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U,
1167     0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U,
1168     0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U,
1169     0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU,
1170     0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU,
1171     0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U,
1172     0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U,
1173     0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU,
1174     0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU,
1175     0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U,
1176     0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U,
1177     0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U,
1178     0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U,
1179     0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU,
1180     0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U,
1181     0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U,
1182     0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU,
1183     0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU,
1184     0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U,
1185     0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U,
1186     0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U,
1187     0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU,
1188     0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U,
1189     0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U,
1190     0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U,
1191     0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U,
1192     0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U,
1193     0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U,
1194     0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U,
1195     0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU,
1196     0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U,
1197     0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U,
1198     0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU,
1199     0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU,
1200     0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U,
1201     0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU,
1202     0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U,
1203     0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U,
1204     0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U,
1205     0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U,
1206     0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U,
1207     0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U,
1208     0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU,
1209     0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU,
1210     0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU,
1211     0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU,
1212     0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U,
1213     0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U,
1214     0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U,
1215     0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU,
1216     0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
1217     0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U,
1218 };
1219 static const u8 Td4[256] = {
1220     0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U,
1221     0xbfU, 0x40U, 0xa3U, 0x9eU, 0x81U, 0xf3U, 0xd7U, 0xfbU,
1222     0x7cU, 0xe3U, 0x39U, 0x82U, 0x9bU, 0x2fU, 0xffU, 0x87U,
1223     0x34U, 0x8eU, 0x43U, 0x44U, 0xc4U, 0xdeU, 0xe9U, 0xcbU,
1224     0x54U, 0x7bU, 0x94U, 0x32U, 0xa6U, 0xc2U, 0x23U, 0x3dU,
1225     0xeeU, 0x4cU, 0x95U, 0x0bU, 0x42U, 0xfaU, 0xc3U, 0x4eU,
1226     0x08U, 0x2eU, 0xa1U, 0x66U, 0x28U, 0xd9U, 0x24U, 0xb2U,
1227     0x76U, 0x5bU, 0xa2U, 0x49U, 0x6dU, 0x8bU, 0xd1U, 0x25U,
1228     0x72U, 0xf8U, 0xf6U, 0x64U, 0x86U, 0x68U, 0x98U, 0x16U,
1229     0xd4U, 0xa4U, 0x5cU, 0xccU, 0x5dU, 0x65U, 0xb6U, 0x92U,
1230     0x6cU, 0x70U, 0x48U, 0x50U, 0xfdU, 0xedU, 0xb9U, 0xdaU,
1231     0x5eU, 0x15U, 0x46U, 0x57U, 0xa7U, 0x8dU, 0x9dU, 0x84U,
1232     0x90U, 0xd8U, 0xabU, 0x00U, 0x8cU, 0xbcU, 0xd3U, 0x0aU,
1233     0xf7U, 0xe4U, 0x58U, 0x05U, 0xb8U, 0xb3U, 0x45U, 0x06U,
1234     0xd0U, 0x2cU, 0x1eU, 0x8fU, 0xcaU, 0x3fU, 0x0fU, 0x02U,
1235     0xc1U, 0xafU, 0xbdU, 0x03U, 0x01U, 0x13U, 0x8aU, 0x6bU,
1236     0x3aU, 0x91U, 0x11U, 0x41U, 0x4fU, 0x67U, 0xdcU, 0xeaU,
1237     0x97U, 0xf2U, 0xcfU, 0xceU, 0xf0U, 0xb4U, 0xe6U, 0x73U,
1238     0x96U, 0xacU, 0x74U, 0x22U, 0xe7U, 0xadU, 0x35U, 0x85U,
1239     0xe2U, 0xf9U, 0x37U, 0xe8U, 0x1cU, 0x75U, 0xdfU, 0x6eU,
1240     0x47U, 0xf1U, 0x1aU, 0x71U, 0x1dU, 0x29U, 0xc5U, 0x89U,
1241     0x6fU, 0xb7U, 0x62U, 0x0eU, 0xaaU, 0x18U, 0xbeU, 0x1bU,
1242     0xfcU, 0x56U, 0x3eU, 0x4bU, 0xc6U, 0xd2U, 0x79U, 0x20U,
1243     0x9aU, 0xdbU, 0xc0U, 0xfeU, 0x78U, 0xcdU, 0x5aU, 0xf4U,
1244     0x1fU, 0xddU, 0xa8U, 0x33U, 0x88U, 0x07U, 0xc7U, 0x31U,
1245     0xb1U, 0x12U, 0x10U, 0x59U, 0x27U, 0x80U, 0xecU, 0x5fU,
1246     0x60U, 0x51U, 0x7fU, 0xa9U, 0x19U, 0xb5U, 0x4aU, 0x0dU,
1247     0x2dU, 0xe5U, 0x7aU, 0x9fU, 0x93U, 0xc9U, 0x9cU, 0xefU,
1248     0xa0U, 0xe0U, 0x3bU, 0x4dU, 0xaeU, 0x2aU, 0xf5U, 0xb0U,
1249     0xc8U, 0xebU, 0xbbU, 0x3cU, 0x83U, 0x53U, 0x99U, 0x61U,
1250     0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U,
1251     0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU,
1252 };
1253 static const u32 rcon[] = {
1254     0x01000000, 0x02000000, 0x04000000, 0x08000000,
1255     0x10000000, 0x20000000, 0x40000000, 0x80000000,
1256     0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
1257 };
1258 
1259 /**
1260  * Expand the cipher key into the encryption key schedule.
1261  */
1262 int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
1263                         AES_KEY *key)
1264 {
1265 
1266     u32 *rk;
1267     int i = 0;
1268     u32 temp;
1269 
1270     if (!userKey || !key)
1271         return -1;
1272     if (bits != 128 && bits != 192 && bits != 256)
1273         return -2;
1274 
1275     rk = key->rd_key;
1276 
1277     if (bits == 128)
1278         key->rounds = 10;
1279     else if (bits == 192)
1280         key->rounds = 12;
1281     else
1282         key->rounds = 14;
1283 
1284     rk[0] = GETU32(userKey     );
1285     rk[1] = GETU32(userKey +  4);
1286     rk[2] = GETU32(userKey +  8);
1287     rk[3] = GETU32(userKey + 12);
1288     if (bits == 128) {
1289         while (1) {
1290             temp  = rk[3];
1291             rk[4] = rk[0] ^
1292                 (Te2[(temp >> 16) & 0xff] & 0xff000000) ^
1293                 (Te3[(temp >>  8) & 0xff] & 0x00ff0000) ^
1294                 (Te0[(temp      ) & 0xff] & 0x0000ff00) ^
1295                 (Te1[(temp >> 24)       ] & 0x000000ff) ^
1296                 rcon[i];
1297             rk[5] = rk[1] ^ rk[4];
1298             rk[6] = rk[2] ^ rk[5];
1299             rk[7] = rk[3] ^ rk[6];
1300             if (++i == 10) {
1301                 return 0;
1302             }
1303             rk += 4;
1304         }
1305     }
1306     rk[4] = GETU32(userKey + 16);
1307     rk[5] = GETU32(userKey + 20);
1308     if (bits == 192) {
1309         while (1) {
1310             temp = rk[ 5];
1311             rk[ 6] = rk[ 0] ^
1312                 (Te2[(temp >> 16) & 0xff] & 0xff000000) ^
1313                 (Te3[(temp >>  8) & 0xff] & 0x00ff0000) ^
1314                 (Te0[(temp      ) & 0xff] & 0x0000ff00) ^
1315                 (Te1[(temp >> 24)       ] & 0x000000ff) ^
1316                 rcon[i];
1317             rk[ 7] = rk[ 1] ^ rk[ 6];
1318             rk[ 8] = rk[ 2] ^ rk[ 7];
1319             rk[ 9] = rk[ 3] ^ rk[ 8];
1320             if (++i == 8) {
1321                 return 0;
1322             }
1323             rk[10] = rk[ 4] ^ rk[ 9];
1324             rk[11] = rk[ 5] ^ rk[10];
1325             rk += 6;
1326         }
1327     }
1328     rk[6] = GETU32(userKey + 24);
1329     rk[7] = GETU32(userKey + 28);
1330     if (bits == 256) {
1331         while (1) {
1332             temp = rk[ 7];
1333             rk[ 8] = rk[ 0] ^
1334                 (Te2[(temp >> 16) & 0xff] & 0xff000000) ^
1335                 (Te3[(temp >>  8) & 0xff] & 0x00ff0000) ^
1336                 (Te0[(temp      ) & 0xff] & 0x0000ff00) ^
1337                 (Te1[(temp >> 24)       ] & 0x000000ff) ^
1338                 rcon[i];
1339             rk[ 9] = rk[ 1] ^ rk[ 8];
1340             rk[10] = rk[ 2] ^ rk[ 9];
1341             rk[11] = rk[ 3] ^ rk[10];
1342             if (++i == 7) {
1343                 return 0;
1344             }
1345             temp = rk[11];
1346             rk[12] = rk[ 4] ^
1347                 (Te2[(temp >> 24)       ] & 0xff000000) ^
1348                 (Te3[(temp >> 16) & 0xff] & 0x00ff0000) ^
1349                 (Te0[(temp >>  8) & 0xff] & 0x0000ff00) ^
1350                 (Te1[(temp      ) & 0xff] & 0x000000ff);
1351             rk[13] = rk[ 5] ^ rk[12];
1352             rk[14] = rk[ 6] ^ rk[13];
1353             rk[15] = rk[ 7] ^ rk[14];
1354 
1355             rk += 8;
1356             }
1357     }
1358     return 0;
1359 }
1360 
1361 /**
1362  * Expand the cipher key into the decryption key schedule.
1363  */
1364 int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
1365                         AES_KEY *key)
1366 {
1367 
1368     u32 *rk;
1369     int i, j, status;
1370     u32 temp;
1371 
1372     /* first, start with an encryption schedule */
1373     status = AES_set_encrypt_key(userKey, bits, key);
1374     if (status < 0)
1375         return status;
1376 
1377     rk = key->rd_key;
1378 
1379     /* invert the order of the round keys: */
1380     for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) {
1381         temp = rk[i    ]; rk[i    ] = rk[j    ]; rk[j    ] = temp;
1382         temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
1383         temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
1384         temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
1385     }
1386     /* apply the inverse MixColumn transform to all round keys but the first and the last: */
1387     for (i = 1; i < (key->rounds); i++) {
1388         rk += 4;
1389         rk[0] =
1390             Td0[Te1[(rk[0] >> 24)       ] & 0xff] ^
1391             Td1[Te1[(rk[0] >> 16) & 0xff] & 0xff] ^
1392             Td2[Te1[(rk[0] >>  8) & 0xff] & 0xff] ^
1393             Td3[Te1[(rk[0]      ) & 0xff] & 0xff];
1394         rk[1] =
1395             Td0[Te1[(rk[1] >> 24)       ] & 0xff] ^
1396             Td1[Te1[(rk[1] >> 16) & 0xff] & 0xff] ^
1397             Td2[Te1[(rk[1] >>  8) & 0xff] & 0xff] ^
1398             Td3[Te1[(rk[1]      ) & 0xff] & 0xff];
1399         rk[2] =
1400             Td0[Te1[(rk[2] >> 24)       ] & 0xff] ^
1401             Td1[Te1[(rk[2] >> 16) & 0xff] & 0xff] ^
1402             Td2[Te1[(rk[2] >>  8) & 0xff] & 0xff] ^
1403             Td3[Te1[(rk[2]      ) & 0xff] & 0xff];
1404         rk[3] =
1405             Td0[Te1[(rk[3] >> 24)       ] & 0xff] ^
1406             Td1[Te1[(rk[3] >> 16) & 0xff] & 0xff] ^
1407             Td2[Te1[(rk[3] >>  8) & 0xff] & 0xff] ^
1408             Td3[Te1[(rk[3]      ) & 0xff] & 0xff];
1409     }
1410     return 0;
1411 }
1412 
1413 /*
1414  * Encrypt a single block
1415  * in and out can overlap
1416  */
1417 void AES_encrypt(const unsigned char *in, unsigned char *out,
1418                  const AES_KEY *key) {
1419 
1420     const u32 *rk;
1421     u32 s0, s1, s2, s3, t0, t1, t2, t3;
1422 #ifndef FULL_UNROLL
1423     int r;
1424 #endif /* ?FULL_UNROLL */
1425 
1426     assert(in && out && key);
1427     rk = key->rd_key;
1428 
1429     /*
1430      * map byte array block to cipher state
1431      * and add initial round key:
1432      */
1433     s0 = GETU32(in     ) ^ rk[0];
1434     s1 = GETU32(in +  4) ^ rk[1];
1435     s2 = GETU32(in +  8) ^ rk[2];
1436     s3 = GETU32(in + 12) ^ rk[3];
1437 #ifdef FULL_UNROLL
1438     /* round 1: */
1439     t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4];
1440     t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5];
1441     t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6];
1442     t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7];
1443     /* round 2: */
1444     s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 8];
1445     s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 9];
1446     s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[10];
1447     s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[11];
1448     /* round 3: */
1449     t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[12];
1450     t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[13];
1451     t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[14];
1452     t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[15];
1453     /* round 4: */
1454     s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[16];
1455     s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[17];
1456     s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[18];
1457     s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[19];
1458     /* round 5: */
1459     t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[20];
1460     t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[21];
1461     t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[22];
1462     t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[23];
1463     /* round 6: */
1464     s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[24];
1465     s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[25];
1466     s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[26];
1467     s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[27];
1468     /* round 7: */
1469     t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[28];
1470     t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[29];
1471     t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[30];
1472     t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[31];
1473     /* round 8: */
1474     s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[32];
1475     s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[33];
1476     s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[34];
1477     s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[35];
1478     /* round 9: */
1479     t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[36];
1480     t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[37];
1481     t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[38];
1482     t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[39];
1483     if (key->rounds > 10) {
1484         /* round 10: */
1485         s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[40];
1486         s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[41];
1487         s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[42];
1488         s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[43];
1489         /* round 11: */
1490         t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[44];
1491         t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[45];
1492         t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[46];
1493         t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[47];
1494         if (key->rounds > 12) {
1495             /* round 12: */
1496             s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[48];
1497             s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[49];
1498             s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[50];
1499             s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[51];
1500             /* round 13: */
1501             t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[52];
1502             t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[53];
1503             t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[54];
1504             t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[55];
1505         }
1506     }
1507     rk += key->rounds << 2;
1508 #else  /* !FULL_UNROLL */
1509     /*
1510      * Nr - 1 full rounds:
1511      */
1512     r = key->rounds >> 1;
1513     for (;;) {
1514         t0 =
1515             Te0[(s0 >> 24)       ] ^
1516             Te1[(s1 >> 16) & 0xff] ^
1517             Te2[(s2 >>  8) & 0xff] ^
1518             Te3[(s3      ) & 0xff] ^
1519             rk[4];
1520         t1 =
1521             Te0[(s1 >> 24)       ] ^
1522             Te1[(s2 >> 16) & 0xff] ^
1523             Te2[(s3 >>  8) & 0xff] ^
1524             Te3[(s0      ) & 0xff] ^
1525             rk[5];
1526         t2 =
1527             Te0[(s2 >> 24)       ] ^
1528             Te1[(s3 >> 16) & 0xff] ^
1529             Te2[(s0 >>  8) & 0xff] ^
1530             Te3[(s1      ) & 0xff] ^
1531             rk[6];
1532         t3 =
1533             Te0[(s3 >> 24)       ] ^
1534             Te1[(s0 >> 16) & 0xff] ^
1535             Te2[(s1 >>  8) & 0xff] ^
1536             Te3[(s2      ) & 0xff] ^
1537             rk[7];
1538 
1539         rk += 8;
1540         if (--r == 0) {
1541             break;
1542         }
1543 
1544         s0 =
1545             Te0[(t0 >> 24)       ] ^
1546             Te1[(t1 >> 16) & 0xff] ^
1547             Te2[(t2 >>  8) & 0xff] ^
1548             Te3[(t3      ) & 0xff] ^
1549             rk[0];
1550         s1 =
1551             Te0[(t1 >> 24)       ] ^
1552             Te1[(t2 >> 16) & 0xff] ^
1553             Te2[(t3 >>  8) & 0xff] ^
1554             Te3[(t0      ) & 0xff] ^
1555             rk[1];
1556         s2 =
1557             Te0[(t2 >> 24)       ] ^
1558             Te1[(t3 >> 16) & 0xff] ^
1559             Te2[(t0 >>  8) & 0xff] ^
1560             Te3[(t1      ) & 0xff] ^
1561             rk[2];
1562         s3 =
1563             Te0[(t3 >> 24)       ] ^
1564             Te1[(t0 >> 16) & 0xff] ^
1565             Te2[(t1 >>  8) & 0xff] ^
1566             Te3[(t2      ) & 0xff] ^
1567             rk[3];
1568     }
1569 #endif /* ?FULL_UNROLL */
1570     /*
1571      * apply last round and
1572      * map cipher state to byte array block:
1573      */
1574     s0 =
1575         (Te2[(t0 >> 24)       ] & 0xff000000) ^
1576         (Te3[(t1 >> 16) & 0xff] & 0x00ff0000) ^
1577         (Te0[(t2 >>  8) & 0xff] & 0x0000ff00) ^
1578         (Te1[(t3      ) & 0xff] & 0x000000ff) ^
1579         rk[0];
1580     PUTU32(out     , s0);
1581     s1 =
1582         (Te2[(t1 >> 24)       ] & 0xff000000) ^
1583         (Te3[(t2 >> 16) & 0xff] & 0x00ff0000) ^
1584         (Te0[(t3 >>  8) & 0xff] & 0x0000ff00) ^
1585         (Te1[(t0      ) & 0xff] & 0x000000ff) ^
1586         rk[1];
1587     PUTU32(out +  4, s1);
1588     s2 =
1589         (Te2[(t2 >> 24)       ] & 0xff000000) ^
1590         (Te3[(t3 >> 16) & 0xff] & 0x00ff0000) ^
1591         (Te0[(t0 >>  8) & 0xff] & 0x0000ff00) ^
1592         (Te1[(t1      ) & 0xff] & 0x000000ff) ^
1593         rk[2];
1594     PUTU32(out +  8, s2);
1595     s3 =
1596         (Te2[(t3 >> 24)       ] & 0xff000000) ^
1597         (Te3[(t0 >> 16) & 0xff] & 0x00ff0000) ^
1598         (Te0[(t1 >>  8) & 0xff] & 0x0000ff00) ^
1599         (Te1[(t2      ) & 0xff] & 0x000000ff) ^
1600         rk[3];
1601     PUTU32(out + 12, s3);
1602 }
1603 
1604 /*
1605  * Decrypt a single block
1606  * in and out can overlap
1607  */
1608 void AES_decrypt(const unsigned char *in, unsigned char *out,
1609                  const AES_KEY *key)
1610 {
1611 
1612     const u32 *rk;
1613     u32 s0, s1, s2, s3, t0, t1, t2, t3;
1614 #ifndef FULL_UNROLL
1615     int r;
1616 #endif /* ?FULL_UNROLL */
1617 
1618     assert(in && out && key);
1619     rk = key->rd_key;
1620 
1621     /*
1622      * map byte array block to cipher state
1623      * and add initial round key:
1624      */
1625     s0 = GETU32(in     ) ^ rk[0];
1626     s1 = GETU32(in +  4) ^ rk[1];
1627     s2 = GETU32(in +  8) ^ rk[2];
1628     s3 = GETU32(in + 12) ^ rk[3];
1629 #ifdef FULL_UNROLL
1630     /* round 1: */
1631     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[ 4];
1632     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[ 5];
1633     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[ 6];
1634     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[ 7];
1635     /* round 2: */
1636     s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[ 8];
1637     s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[ 9];
1638     s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[10];
1639     s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[11];
1640     /* round 3: */
1641     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[12];
1642     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[13];
1643     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[14];
1644     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[15];
1645     /* round 4: */
1646     s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[16];
1647     s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[17];
1648     s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[18];
1649     s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[19];
1650     /* round 5: */
1651     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[20];
1652     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[21];
1653     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[22];
1654     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[23];
1655     /* round 6: */
1656     s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[24];
1657     s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[25];
1658     s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[26];
1659     s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[27];
1660     /* round 7: */
1661     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[28];
1662     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[29];
1663     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[30];
1664     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[31];
1665     /* round 8: */
1666     s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[32];
1667     s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[33];
1668     s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[34];
1669     s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[35];
1670     /* round 9: */
1671     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[36];
1672     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[37];
1673     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[38];
1674     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[39];
1675     if (key->rounds > 10) {
1676         /* round 10: */
1677         s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[40];
1678         s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[41];
1679         s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[42];
1680         s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[43];
1681         /* round 11: */
1682         t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[44];
1683         t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[45];
1684         t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[46];
1685         t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[47];
1686         if (key->rounds > 12) {
1687             /* round 12: */
1688             s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[48];
1689             s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[49];
1690             s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[50];
1691             s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[51];
1692             /* round 13: */
1693             t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[52];
1694             t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[53];
1695             t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[54];
1696             t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[55];
1697         }
1698     }
1699     rk += key->rounds << 2;
1700 #else  /* !FULL_UNROLL */
1701     /*
1702      * Nr - 1 full rounds:
1703      */
1704     r = key->rounds >> 1;
1705     for (;;) {
1706         t0 =
1707             Td0[(s0 >> 24)       ] ^
1708             Td1[(s3 >> 16) & 0xff] ^
1709             Td2[(s2 >>  8) & 0xff] ^
1710             Td3[(s1      ) & 0xff] ^
1711             rk[4];
1712         t1 =
1713             Td0[(s1 >> 24)       ] ^
1714             Td1[(s0 >> 16) & 0xff] ^
1715             Td2[(s3 >>  8) & 0xff] ^
1716             Td3[(s2      ) & 0xff] ^
1717             rk[5];
1718         t2 =
1719             Td0[(s2 >> 24)       ] ^
1720             Td1[(s1 >> 16) & 0xff] ^
1721             Td2[(s0 >>  8) & 0xff] ^
1722             Td3[(s3      ) & 0xff] ^
1723             rk[6];
1724         t3 =
1725             Td0[(s3 >> 24)       ] ^
1726             Td1[(s2 >> 16) & 0xff] ^
1727             Td2[(s1 >>  8) & 0xff] ^
1728             Td3[(s0      ) & 0xff] ^
1729             rk[7];
1730 
1731         rk += 8;
1732         if (--r == 0) {
1733             break;
1734         }
1735 
1736         s0 =
1737             Td0[(t0 >> 24)       ] ^
1738             Td1[(t3 >> 16) & 0xff] ^
1739             Td2[(t2 >>  8) & 0xff] ^
1740             Td3[(t1      ) & 0xff] ^
1741             rk[0];
1742         s1 =
1743             Td0[(t1 >> 24)       ] ^
1744             Td1[(t0 >> 16) & 0xff] ^
1745             Td2[(t3 >>  8) & 0xff] ^
1746             Td3[(t2      ) & 0xff] ^
1747             rk[1];
1748         s2 =
1749             Td0[(t2 >> 24)       ] ^
1750             Td1[(t1 >> 16) & 0xff] ^
1751             Td2[(t0 >>  8) & 0xff] ^
1752             Td3[(t3      ) & 0xff] ^
1753             rk[2];
1754         s3 =
1755             Td0[(t3 >> 24)       ] ^
1756             Td1[(t2 >> 16) & 0xff] ^
1757             Td2[(t1 >>  8) & 0xff] ^
1758             Td3[(t0      ) & 0xff] ^
1759             rk[3];
1760     }
1761 #endif /* ?FULL_UNROLL */
1762     /*
1763      * apply last round and
1764      * map cipher state to byte array block:
1765      */
1766     s0 =
1767         ((u32)Td4[(t0 >> 24)       ] << 24) ^
1768         ((u32)Td4[(t3 >> 16) & 0xff] << 16) ^
1769         ((u32)Td4[(t2 >>  8) & 0xff] <<  8) ^
1770         ((u32)Td4[(t1      ) & 0xff])       ^
1771         rk[0];
1772     PUTU32(out     , s0);
1773     s1 =
1774         ((u32)Td4[(t1 >> 24)       ] << 24) ^
1775         ((u32)Td4[(t0 >> 16) & 0xff] << 16) ^
1776         ((u32)Td4[(t3 >>  8) & 0xff] <<  8) ^
1777         ((u32)Td4[(t2      ) & 0xff])       ^
1778         rk[1];
1779     PUTU32(out +  4, s1);
1780     s2 =
1781         ((u32)Td4[(t2 >> 24)       ] << 24) ^
1782         ((u32)Td4[(t1 >> 16) & 0xff] << 16) ^
1783         ((u32)Td4[(t0 >>  8) & 0xff] <<  8) ^
1784         ((u32)Td4[(t3      ) & 0xff])       ^
1785         rk[2];
1786     PUTU32(out +  8, s2);
1787     s3 =
1788         ((u32)Td4[(t3 >> 24)       ] << 24) ^
1789         ((u32)Td4[(t2 >> 16) & 0xff] << 16) ^
1790         ((u32)Td4[(t1 >>  8) & 0xff] <<  8) ^
1791         ((u32)Td4[(t0      ) & 0xff])       ^
1792         rk[3];
1793     PUTU32(out + 12, s3);
1794 }
1795 
1796 #else /* AES_ASM */
1797 
1798 static const u8 Te4[256] = {
1799     0x63U, 0x7cU, 0x77U, 0x7bU, 0xf2U, 0x6bU, 0x6fU, 0xc5U,
1800     0x30U, 0x01U, 0x67U, 0x2bU, 0xfeU, 0xd7U, 0xabU, 0x76U,
1801     0xcaU, 0x82U, 0xc9U, 0x7dU, 0xfaU, 0x59U, 0x47U, 0xf0U,
1802     0xadU, 0xd4U, 0xa2U, 0xafU, 0x9cU, 0xa4U, 0x72U, 0xc0U,
1803     0xb7U, 0xfdU, 0x93U, 0x26U, 0x36U, 0x3fU, 0xf7U, 0xccU,
1804     0x34U, 0xa5U, 0xe5U, 0xf1U, 0x71U, 0xd8U, 0x31U, 0x15U,
1805     0x04U, 0xc7U, 0x23U, 0xc3U, 0x18U, 0x96U, 0x05U, 0x9aU,
1806     0x07U, 0x12U, 0x80U, 0xe2U, 0xebU, 0x27U, 0xb2U, 0x75U,
1807     0x09U, 0x83U, 0x2cU, 0x1aU, 0x1bU, 0x6eU, 0x5aU, 0xa0U,
1808     0x52U, 0x3bU, 0xd6U, 0xb3U, 0x29U, 0xe3U, 0x2fU, 0x84U,
1809     0x53U, 0xd1U, 0x00U, 0xedU, 0x20U, 0xfcU, 0xb1U, 0x5bU,
1810     0x6aU, 0xcbU, 0xbeU, 0x39U, 0x4aU, 0x4cU, 0x58U, 0xcfU,
1811     0xd0U, 0xefU, 0xaaU, 0xfbU, 0x43U, 0x4dU, 0x33U, 0x85U,
1812     0x45U, 0xf9U, 0x02U, 0x7fU, 0x50U, 0x3cU, 0x9fU, 0xa8U,
1813     0x51U, 0xa3U, 0x40U, 0x8fU, 0x92U, 0x9dU, 0x38U, 0xf5U,
1814     0xbcU, 0xb6U, 0xdaU, 0x21U, 0x10U, 0xffU, 0xf3U, 0xd2U,
1815     0xcdU, 0x0cU, 0x13U, 0xecU, 0x5fU, 0x97U, 0x44U, 0x17U,
1816     0xc4U, 0xa7U, 0x7eU, 0x3dU, 0x64U, 0x5dU, 0x19U, 0x73U,
1817     0x60U, 0x81U, 0x4fU, 0xdcU, 0x22U, 0x2aU, 0x90U, 0x88U,
1818     0x46U, 0xeeU, 0xb8U, 0x14U, 0xdeU, 0x5eU, 0x0bU, 0xdbU,
1819     0xe0U, 0x32U, 0x3aU, 0x0aU, 0x49U, 0x06U, 0x24U, 0x5cU,
1820     0xc2U, 0xd3U, 0xacU, 0x62U, 0x91U, 0x95U, 0xe4U, 0x79U,
1821     0xe7U, 0xc8U, 0x37U, 0x6dU, 0x8dU, 0xd5U, 0x4eU, 0xa9U,
1822     0x6cU, 0x56U, 0xf4U, 0xeaU, 0x65U, 0x7aU, 0xaeU, 0x08U,
1823     0xbaU, 0x78U, 0x25U, 0x2eU, 0x1cU, 0xa6U, 0xb4U, 0xc6U,
1824     0xe8U, 0xddU, 0x74U, 0x1fU, 0x4bU, 0xbdU, 0x8bU, 0x8aU,
1825     0x70U, 0x3eU, 0xb5U, 0x66U, 0x48U, 0x03U, 0xf6U, 0x0eU,
1826     0x61U, 0x35U, 0x57U, 0xb9U, 0x86U, 0xc1U, 0x1dU, 0x9eU,
1827     0xe1U, 0xf8U, 0x98U, 0x11U, 0x69U, 0xd9U, 0x8eU, 0x94U,
1828     0x9bU, 0x1eU, 0x87U, 0xe9U, 0xceU, 0x55U, 0x28U, 0xdfU,
1829     0x8cU, 0xa1U, 0x89U, 0x0dU, 0xbfU, 0xe6U, 0x42U, 0x68U,
1830     0x41U, 0x99U, 0x2dU, 0x0fU, 0xb0U, 0x54U, 0xbbU, 0x16U
1831 };
1832 static const u32 rcon[] = {
1833     0x01000000, 0x02000000, 0x04000000, 0x08000000,
1834     0x10000000, 0x20000000, 0x40000000, 0x80000000,
1835     0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
1836 };
1837 
1838 /**
1839  * Expand the cipher key into the encryption key schedule.
1840  */
1841 int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
1842                         AES_KEY *key)
1843 {
1844     u32 *rk;
1845     int i = 0;
1846     u32 temp;
1847 
1848     if (!userKey || !key)
1849         return -1;
1850     if (bits != 128 && bits != 192 && bits != 256)
1851         return -2;
1852 
1853     rk = key->rd_key;
1854 
1855     if (bits == 128)
1856         key->rounds = 10;
1857     else if (bits == 192)
1858         key->rounds = 12;
1859     else
1860         key->rounds = 14;
1861 
1862     rk[0] = GETU32(userKey     );
1863     rk[1] = GETU32(userKey +  4);
1864     rk[2] = GETU32(userKey +  8);
1865     rk[3] = GETU32(userKey + 12);
1866     if (bits == 128) {
1867         while (1) {
1868             temp  = rk[3];
1869             rk[4] = rk[0] ^
1870                 ((u32)Te4[(temp >> 16) & 0xff] << 24) ^
1871                 ((u32)Te4[(temp >>  8) & 0xff] << 16) ^
1872                 ((u32)Te4[(temp      ) & 0xff] << 8) ^
1873                 ((u32)Te4[(temp >> 24)       ]) ^
1874                 rcon[i];
1875             rk[5] = rk[1] ^ rk[4];
1876             rk[6] = rk[2] ^ rk[5];
1877             rk[7] = rk[3] ^ rk[6];
1878             if (++i == 10) {
1879                 return 0;
1880             }
1881             rk += 4;
1882         }
1883     }
1884     rk[4] = GETU32(userKey + 16);
1885     rk[5] = GETU32(userKey + 20);
1886     if (bits == 192) {
1887         while (1) {
1888             temp = rk[ 5];
1889             rk[ 6] = rk[ 0] ^
1890                 ((u32)Te4[(temp >> 16) & 0xff] << 24) ^
1891                 ((u32)Te4[(temp >>  8) & 0xff] << 16) ^
1892                 ((u32)Te4[(temp      ) & 0xff] << 8) ^
1893                 ((u32)Te4[(temp >> 24)       ]) ^
1894                 rcon[i];
1895             rk[ 7] = rk[ 1] ^ rk[ 6];
1896             rk[ 8] = rk[ 2] ^ rk[ 7];
1897             rk[ 9] = rk[ 3] ^ rk[ 8];
1898             if (++i == 8) {
1899                 return 0;
1900             }
1901             rk[10] = rk[ 4] ^ rk[ 9];
1902             rk[11] = rk[ 5] ^ rk[10];
1903             rk += 6;
1904         }
1905     }
1906     rk[6] = GETU32(userKey + 24);
1907     rk[7] = GETU32(userKey + 28);
1908     if (bits == 256) {
1909         while (1) {
1910             temp = rk[ 7];
1911             rk[ 8] = rk[ 0] ^
1912                 ((u32)Te4[(temp >> 16) & 0xff] << 24) ^
1913                 ((u32)Te4[(temp >>  8) & 0xff] << 16) ^
1914                 ((u32)Te4[(temp      ) & 0xff] << 8) ^
1915                 ((u32)Te4[(temp >> 24)       ]) ^
1916                 rcon[i];
1917             rk[ 9] = rk[ 1] ^ rk[ 8];
1918             rk[10] = rk[ 2] ^ rk[ 9];
1919             rk[11] = rk[ 3] ^ rk[10];
1920             if (++i == 7) {
1921                 return 0;
1922             }
1923             temp = rk[11];
1924             rk[12] = rk[ 4] ^
1925                 ((u32)Te4[(temp >> 24)       ] << 24) ^
1926                 ((u32)Te4[(temp >> 16) & 0xff] << 16) ^
1927                 ((u32)Te4[(temp >>  8) & 0xff] << 8) ^
1928                 ((u32)Te4[(temp      ) & 0xff]);
1929             rk[13] = rk[ 5] ^ rk[12];
1930             rk[14] = rk[ 6] ^ rk[13];
1931             rk[15] = rk[ 7] ^ rk[14];
1932 
1933             rk += 8;
1934         }
1935     }
1936     return 0;
1937 }
1938 
1939 /**
1940  * Expand the cipher key into the decryption key schedule.
1941  */
1942 int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
1943                         AES_KEY *key)
1944 {
1945 
1946     u32 *rk;
1947     int i, j, status;
1948     u32 temp;
1949 
1950     /* first, start with an encryption schedule */
1951     status = AES_set_encrypt_key(userKey, bits, key);
1952     if (status < 0)
1953         return status;
1954 
1955     rk = key->rd_key;
1956 
1957     /* invert the order of the round keys: */
1958     for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) {
1959         temp = rk[i    ]; rk[i    ] = rk[j    ]; rk[j    ] = temp;
1960         temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
1961         temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
1962         temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
1963     }
1964     /* apply the inverse MixColumn transform to all round keys but the first and the last: */
1965     for (i = 1; i < (key->rounds); i++) {
1966         rk += 4;
1967         for (j = 0; j < 4; j++) {
1968             u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
1969 
1970             tp1 = rk[j];
1971             m = tp1 & 0x80808080;
1972             tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
1973                 ((m - (m >> 7)) & 0x1b1b1b1b);
1974             m = tp2 & 0x80808080;
1975             tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
1976                 ((m - (m >> 7)) & 0x1b1b1b1b);
1977             m = tp4 & 0x80808080;
1978             tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
1979                 ((m - (m >> 7)) & 0x1b1b1b1b);
1980             tp9 = tp8 ^ tp1;
1981             tpb = tp9 ^ tp2;
1982             tpd = tp9 ^ tp4;
1983             tpe = tp8 ^ tp4 ^ tp2;
1984 #if defined(ROTATE)
1985             rk[j] = tpe ^ ROTATE(tpd,16) ^
1986                 ROTATE(tp9,24) ^ ROTATE(tpb,8);
1987 #else
1988             rk[j] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^
1989                 (tp9 >> 8) ^ (tp9 << 24) ^
1990                 (tpb >> 24) ^ (tpb << 8);
1991 #endif
1992         }
1993     }
1994     return 0;
1995 }
1996 
1997 #endif /* AES_ASM */
1998