1 /*
2 * AES using POWER8/POWER9 crypto extensions
3 *
4 * Contributed by Jeffrey Walton
5 *
6 * Further changes
7 * (C) 2018,2019 Jack Lloyd
8 *
9 * Botan is released under the Simplified BSD License (see license.txt)
10 */
11 
12 #include <botan/aes.h>
13 #include <botan/cpuid.h>
14 
15 #include <altivec.h>
16 #undef vector
17 #undef bool
18 
19 namespace Botan {
20 
21 typedef __vector unsigned long long Altivec64x2;
22 typedef __vector unsigned int Altivec32x4;
23 typedef __vector unsigned char Altivec8x16;
24 
25 namespace {
26 
reverse_vec(Altivec8x16 src)27 inline Altivec8x16 reverse_vec(Altivec8x16 src)
28    {
29    if(CPUID::is_little_endian())
30       {
31       const Altivec8x16 mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0};
32       const Altivec8x16 zero = {0};
33       return vec_perm(src, zero, mask);
34       }
35    else
36       {
37       return src;
38       }
39    }
40 
load_key(const uint32_t key[])41 inline Altivec64x2 load_key(const uint32_t key[])
42    {
43    return (Altivec64x2)reverse_vec((Altivec8x16)vec_vsx_ld(0, key));;
44    }
45 
load_block(const uint8_t src[])46 inline Altivec64x2 load_block(const uint8_t src[])
47    {
48    return (Altivec64x2)reverse_vec(vec_vsx_ld(0, src));
49    }
50 
store_block(Altivec64x2 src,uint8_t dest[])51 inline void store_block(Altivec64x2 src, uint8_t dest[])
52    {
53    vec_vsx_st(reverse_vec((Altivec8x16)src), 0, dest);
54    }
55 
store_blocks(Altivec64x2 B0,Altivec64x2 B1,Altivec64x2 B2,Altivec64x2 B3,uint8_t out[])56 inline void store_blocks(Altivec64x2 B0, Altivec64x2 B1,
57                          Altivec64x2 B2, Altivec64x2 B3,
58                          uint8_t out[])
59    {
60    store_block(B0, out);
61    store_block(B1, out+16);
62    store_block(B2, out+16*2);
63    store_block(B3, out+16*3);
64    }
65 
66 #define AES_XOR_4(B0, B1, B2, B3, K) do {      \
67    B0 = vec_xor(B0, K);                        \
68    B1 = vec_xor(B1, K);                        \
69    B2 = vec_xor(B2, K);                        \
70    B3 = vec_xor(B3, K);                        \
71    } while(0)
72 
73 #define AES_ENCRYPT_4(B0, B1, B2, B3, K) do {                   \
74    B0 = __builtin_crypto_vcipher(B0, K);                        \
75    B1 = __builtin_crypto_vcipher(B1, K);                        \
76    B2 = __builtin_crypto_vcipher(B2, K);                        \
77    B3 = __builtin_crypto_vcipher(B3, K);                        \
78    } while(0)
79 
80 #define AES_ENCRYPT_4_LAST(B0, B1, B2, B3, K) do {              \
81    B0 = __builtin_crypto_vcipherlast(B0, K);                    \
82    B1 = __builtin_crypto_vcipherlast(B1, K);                    \
83    B2 = __builtin_crypto_vcipherlast(B2, K);                    \
84    B3 = __builtin_crypto_vcipherlast(B3, K);                    \
85    } while(0)
86 
87 #define AES_DECRYPT_4(B0, B1, B2, B3, K) do {                   \
88    B0 = __builtin_crypto_vncipher(B0, K);                       \
89    B1 = __builtin_crypto_vncipher(B1, K);                       \
90    B2 = __builtin_crypto_vncipher(B2, K);                       \
91    B3 = __builtin_crypto_vncipher(B3, K);                       \
92    } while(0)
93 
94 #define AES_DECRYPT_4_LAST(B0, B1, B2, B3, K) do {              \
95    B0 = __builtin_crypto_vncipherlast(B0, K);                   \
96    B1 = __builtin_crypto_vncipherlast(B1, K);                   \
97    B2 = __builtin_crypto_vncipherlast(B2, K);                   \
98    B3 = __builtin_crypto_vncipherlast(B3, K);                   \
99    } while(0)
100 
101 }
102 
103 BOTAN_FUNC_ISA("crypto")
hw_aes_encrypt_n(const uint8_t in[],uint8_t out[],size_t blocks) const104 void AES_128::hw_aes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
105    {
106    const Altivec64x2 K0  = load_key(&m_EK[0]);
107    const Altivec64x2 K1  = load_key(&m_EK[4]);
108    const Altivec64x2 K2  = load_key(&m_EK[8]);
109    const Altivec64x2 K3  = load_key(&m_EK[12]);
110    const Altivec64x2 K4  = load_key(&m_EK[16]);
111    const Altivec64x2 K5  = load_key(&m_EK[20]);
112    const Altivec64x2 K6  = load_key(&m_EK[24]);
113    const Altivec64x2 K7  = load_key(&m_EK[28]);
114    const Altivec64x2 K8  = load_key(&m_EK[32]);
115    const Altivec64x2 K9  = load_key(&m_EK[36]);
116    const Altivec64x2 K10 = load_key(&m_EK[40]);
117 
118    while(blocks >= 4)
119       {
120       Altivec64x2 B0 = load_block(in);
121       Altivec64x2 B1 = load_block(in+16);
122       Altivec64x2 B2 = load_block(in+16*2);
123       Altivec64x2 B3 = load_block(in+16*3);
124 
125       AES_XOR_4(B0, B1, B2, B3, K0);
126       AES_ENCRYPT_4(B0, B1, B2, B3, K1);
127       AES_ENCRYPT_4(B0, B1, B2, B3, K2);
128       AES_ENCRYPT_4(B0, B1, B2, B3, K3);
129       AES_ENCRYPT_4(B0, B1, B2, B3, K4);
130       AES_ENCRYPT_4(B0, B1, B2, B3, K5);
131       AES_ENCRYPT_4(B0, B1, B2, B3, K6);
132       AES_ENCRYPT_4(B0, B1, B2, B3, K7);
133       AES_ENCRYPT_4(B0, B1, B2, B3, K8);
134       AES_ENCRYPT_4(B0, B1, B2, B3, K9);
135       AES_ENCRYPT_4_LAST(B0, B1, B2, B3, K10);
136 
137       store_blocks(B0, B1, B2, B3, out);
138 
139       out += 4*16;
140       in += 4*16;
141       blocks -= 4;
142       }
143 
144    for(size_t i = 0; i != blocks; ++i)
145       {
146       Altivec64x2 B = load_block(in);
147 
148       B = vec_xor(B, K0);
149       B = __builtin_crypto_vcipher(B, K1);
150       B = __builtin_crypto_vcipher(B, K2);
151       B = __builtin_crypto_vcipher(B, K3);
152       B = __builtin_crypto_vcipher(B, K4);
153       B = __builtin_crypto_vcipher(B, K5);
154       B = __builtin_crypto_vcipher(B, K6);
155       B = __builtin_crypto_vcipher(B, K7);
156       B = __builtin_crypto_vcipher(B, K8);
157       B = __builtin_crypto_vcipher(B, K9);
158       B = __builtin_crypto_vcipherlast(B, K10);
159 
160       store_block(B, out);
161 
162       out += 16;
163       in += 16;
164       }
165    }
166 
167 BOTAN_FUNC_ISA("crypto")
hw_aes_decrypt_n(const uint8_t in[],uint8_t out[],size_t blocks) const168 void AES_128::hw_aes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
169    {
170    const Altivec64x2 K0  = load_key(&m_EK[40]);
171    const Altivec64x2 K1  = load_key(&m_EK[36]);
172    const Altivec64x2 K2  = load_key(&m_EK[32]);
173    const Altivec64x2 K3  = load_key(&m_EK[28]);
174    const Altivec64x2 K4  = load_key(&m_EK[24]);
175    const Altivec64x2 K5  = load_key(&m_EK[20]);
176    const Altivec64x2 K6  = load_key(&m_EK[16]);
177    const Altivec64x2 K7  = load_key(&m_EK[12]);
178    const Altivec64x2 K8  = load_key(&m_EK[8]);
179    const Altivec64x2 K9  = load_key(&m_EK[4]);
180    const Altivec64x2 K10 = load_key(&m_EK[0]);
181 
182    while(blocks >= 4)
183       {
184       Altivec64x2 B0 = load_block(in);
185       Altivec64x2 B1 = load_block(in+16);
186       Altivec64x2 B2 = load_block(in+16*2);
187       Altivec64x2 B3 = load_block(in+16*3);
188 
189       AES_XOR_4(B0, B1, B2, B3, K0);
190       AES_DECRYPT_4(B0, B1, B2, B3, K1);
191       AES_DECRYPT_4(B0, B1, B2, B3, K2);
192       AES_DECRYPT_4(B0, B1, B2, B3, K3);
193       AES_DECRYPT_4(B0, B1, B2, B3, K4);
194       AES_DECRYPT_4(B0, B1, B2, B3, K5);
195       AES_DECRYPT_4(B0, B1, B2, B3, K6);
196       AES_DECRYPT_4(B0, B1, B2, B3, K7);
197       AES_DECRYPT_4(B0, B1, B2, B3, K8);
198       AES_DECRYPT_4(B0, B1, B2, B3, K9);
199       AES_DECRYPT_4_LAST(B0, B1, B2, B3, K10);
200 
201       store_blocks(B0, B1, B2, B3, out);
202 
203       out += 4*16;
204       in += 4*16;
205       blocks -= 4;
206       }
207 
208    for(size_t i = 0; i != blocks; ++i)
209       {
210       Altivec64x2 B = load_block(in);
211 
212       B = vec_xor(B, K0);
213       B = __builtin_crypto_vncipher(B, K1);
214       B = __builtin_crypto_vncipher(B, K2);
215       B = __builtin_crypto_vncipher(B, K3);
216       B = __builtin_crypto_vncipher(B, K4);
217       B = __builtin_crypto_vncipher(B, K5);
218       B = __builtin_crypto_vncipher(B, K6);
219       B = __builtin_crypto_vncipher(B, K7);
220       B = __builtin_crypto_vncipher(B, K8);
221       B = __builtin_crypto_vncipher(B, K9);
222       B = __builtin_crypto_vncipherlast(B, K10);
223 
224       store_block(B, out);
225 
226       out += 16;
227       in += 16;
228       }
229    }
230 
231 BOTAN_FUNC_ISA("crypto")
hw_aes_encrypt_n(const uint8_t in[],uint8_t out[],size_t blocks) const232 void AES_192::hw_aes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
233    {
234    const Altivec64x2 K0  = load_key(&m_EK[0]);
235    const Altivec64x2 K1  = load_key(&m_EK[4]);
236    const Altivec64x2 K2  = load_key(&m_EK[8]);
237    const Altivec64x2 K3  = load_key(&m_EK[12]);
238    const Altivec64x2 K4  = load_key(&m_EK[16]);
239    const Altivec64x2 K5  = load_key(&m_EK[20]);
240    const Altivec64x2 K6  = load_key(&m_EK[24]);
241    const Altivec64x2 K7  = load_key(&m_EK[28]);
242    const Altivec64x2 K8  = load_key(&m_EK[32]);
243    const Altivec64x2 K9  = load_key(&m_EK[36]);
244    const Altivec64x2 K10 = load_key(&m_EK[40]);
245    const Altivec64x2 K11 = load_key(&m_EK[44]);
246    const Altivec64x2 K12 = load_key(&m_EK[48]);
247 
248    while(blocks >= 4)
249       {
250       Altivec64x2 B0 = load_block(in);
251       Altivec64x2 B1 = load_block(in+16);
252       Altivec64x2 B2 = load_block(in+16*2);
253       Altivec64x2 B3 = load_block(in+16*3);
254 
255       AES_XOR_4(B0, B1, B2, B3, K0);
256       AES_ENCRYPT_4(B0, B1, B2, B3, K1);
257       AES_ENCRYPT_4(B0, B1, B2, B3, K2);
258       AES_ENCRYPT_4(B0, B1, B2, B3, K3);
259       AES_ENCRYPT_4(B0, B1, B2, B3, K4);
260       AES_ENCRYPT_4(B0, B1, B2, B3, K5);
261       AES_ENCRYPT_4(B0, B1, B2, B3, K6);
262       AES_ENCRYPT_4(B0, B1, B2, B3, K7);
263       AES_ENCRYPT_4(B0, B1, B2, B3, K8);
264       AES_ENCRYPT_4(B0, B1, B2, B3, K9);
265       AES_ENCRYPT_4(B0, B1, B2, B3, K10);
266       AES_ENCRYPT_4(B0, B1, B2, B3, K11);
267       AES_ENCRYPT_4_LAST(B0, B1, B2, B3, K12);
268 
269       store_blocks(B0, B1, B2, B3, out);
270 
271       out += 4*16;
272       in += 4*16;
273       blocks -= 4;
274       }
275 
276    for(size_t i = 0; i != blocks; ++i)
277       {
278       Altivec64x2 B = load_block(in);
279 
280       B = vec_xor(B, K0);
281       B = __builtin_crypto_vcipher(B, K1);
282       B = __builtin_crypto_vcipher(B, K2);
283       B = __builtin_crypto_vcipher(B, K3);
284       B = __builtin_crypto_vcipher(B, K4);
285       B = __builtin_crypto_vcipher(B, K5);
286       B = __builtin_crypto_vcipher(B, K6);
287       B = __builtin_crypto_vcipher(B, K7);
288       B = __builtin_crypto_vcipher(B, K8);
289       B = __builtin_crypto_vcipher(B, K9);
290       B = __builtin_crypto_vcipher(B, K10);
291       B = __builtin_crypto_vcipher(B, K11);
292       B = __builtin_crypto_vcipherlast(B, K12);
293 
294       store_block(B, out);
295 
296       out += 16;
297       in += 16;
298       }
299    }
300 
301 BOTAN_FUNC_ISA("crypto")
hw_aes_decrypt_n(const uint8_t in[],uint8_t out[],size_t blocks) const302 void AES_192::hw_aes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
303    {
304    const Altivec64x2 K0  = load_key(&m_EK[48]);
305    const Altivec64x2 K1  = load_key(&m_EK[44]);
306    const Altivec64x2 K2  = load_key(&m_EK[40]);
307    const Altivec64x2 K3  = load_key(&m_EK[36]);
308    const Altivec64x2 K4  = load_key(&m_EK[32]);
309    const Altivec64x2 K5  = load_key(&m_EK[28]);
310    const Altivec64x2 K6  = load_key(&m_EK[24]);
311    const Altivec64x2 K7  = load_key(&m_EK[20]);
312    const Altivec64x2 K8  = load_key(&m_EK[16]);
313    const Altivec64x2 K9  = load_key(&m_EK[12]);
314    const Altivec64x2 K10 = load_key(&m_EK[8]);
315    const Altivec64x2 K11 = load_key(&m_EK[4]);
316    const Altivec64x2 K12 = load_key(&m_EK[0]);
317 
318    while(blocks >= 4)
319       {
320       Altivec64x2 B0 = load_block(in);
321       Altivec64x2 B1 = load_block(in+16);
322       Altivec64x2 B2 = load_block(in+16*2);
323       Altivec64x2 B3 = load_block(in+16*3);
324 
325       AES_XOR_4(B0, B1, B2, B3, K0);
326       AES_DECRYPT_4(B0, B1, B2, B3, K1);
327       AES_DECRYPT_4(B0, B1, B2, B3, K2);
328       AES_DECRYPT_4(B0, B1, B2, B3, K3);
329       AES_DECRYPT_4(B0, B1, B2, B3, K4);
330       AES_DECRYPT_4(B0, B1, B2, B3, K5);
331       AES_DECRYPT_4(B0, B1, B2, B3, K6);
332       AES_DECRYPT_4(B0, B1, B2, B3, K7);
333       AES_DECRYPT_4(B0, B1, B2, B3, K8);
334       AES_DECRYPT_4(B0, B1, B2, B3, K9);
335       AES_DECRYPT_4(B0, B1, B2, B3, K10);
336       AES_DECRYPT_4(B0, B1, B2, B3, K11);
337       AES_DECRYPT_4_LAST(B0, B1, B2, B3, K12);
338 
339       store_blocks(B0, B1, B2, B3, out);
340 
341       out += 4*16;
342       in += 4*16;
343       blocks -= 4;
344       }
345 
346    for(size_t i = 0; i != blocks; ++i)
347       {
348       Altivec64x2 B = load_block(in);
349 
350       B = vec_xor(B, K0);
351       B = __builtin_crypto_vncipher(B, K1);
352       B = __builtin_crypto_vncipher(B, K2);
353       B = __builtin_crypto_vncipher(B, K3);
354       B = __builtin_crypto_vncipher(B, K4);
355       B = __builtin_crypto_vncipher(B, K5);
356       B = __builtin_crypto_vncipher(B, K6);
357       B = __builtin_crypto_vncipher(B, K7);
358       B = __builtin_crypto_vncipher(B, K8);
359       B = __builtin_crypto_vncipher(B, K9);
360       B = __builtin_crypto_vncipher(B, K10);
361       B = __builtin_crypto_vncipher(B, K11);
362       B = __builtin_crypto_vncipherlast(B, K12);
363 
364       store_block(B, out);
365 
366       out += 16;
367       in += 16;
368       }
369    }
370 
371 BOTAN_FUNC_ISA("crypto")
hw_aes_encrypt_n(const uint8_t in[],uint8_t out[],size_t blocks) const372 void AES_256::hw_aes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
373    {
374    const Altivec64x2 K0  = load_key(&m_EK[0]);
375    const Altivec64x2 K1  = load_key(&m_EK[4]);
376    const Altivec64x2 K2  = load_key(&m_EK[8]);
377    const Altivec64x2 K3  = load_key(&m_EK[12]);
378    const Altivec64x2 K4  = load_key(&m_EK[16]);
379    const Altivec64x2 K5  = load_key(&m_EK[20]);
380    const Altivec64x2 K6  = load_key(&m_EK[24]);
381    const Altivec64x2 K7  = load_key(&m_EK[28]);
382    const Altivec64x2 K8  = load_key(&m_EK[32]);
383    const Altivec64x2 K9  = load_key(&m_EK[36]);
384    const Altivec64x2 K10 = load_key(&m_EK[40]);
385    const Altivec64x2 K11 = load_key(&m_EK[44]);
386    const Altivec64x2 K12 = load_key(&m_EK[48]);
387    const Altivec64x2 K13 = load_key(&m_EK[52]);
388    const Altivec64x2 K14 = load_key(&m_EK[56]);
389 
390    while(blocks >= 4)
391       {
392       Altivec64x2 B0 = load_block(in);
393       Altivec64x2 B1 = load_block(in+16);
394       Altivec64x2 B2 = load_block(in+16*2);
395       Altivec64x2 B3 = load_block(in+16*3);
396 
397       AES_XOR_4(B0, B1, B2, B3, K0);
398       AES_ENCRYPT_4(B0, B1, B2, B3, K1);
399       AES_ENCRYPT_4(B0, B1, B2, B3, K2);
400       AES_ENCRYPT_4(B0, B1, B2, B3, K3);
401       AES_ENCRYPT_4(B0, B1, B2, B3, K4);
402       AES_ENCRYPT_4(B0, B1, B2, B3, K5);
403       AES_ENCRYPT_4(B0, B1, B2, B3, K6);
404       AES_ENCRYPT_4(B0, B1, B2, B3, K7);
405       AES_ENCRYPT_4(B0, B1, B2, B3, K8);
406       AES_ENCRYPT_4(B0, B1, B2, B3, K9);
407       AES_ENCRYPT_4(B0, B1, B2, B3, K10);
408       AES_ENCRYPT_4(B0, B1, B2, B3, K11);
409       AES_ENCRYPT_4(B0, B1, B2, B3, K12);
410       AES_ENCRYPT_4(B0, B1, B2, B3, K13);
411       AES_ENCRYPT_4_LAST(B0, B1, B2, B3, K14);
412 
413       store_blocks(B0, B1, B2, B3, out);
414 
415       out += 4*16;
416       in += 4*16;
417       blocks -= 4;
418       }
419 
420    for(size_t i = 0; i != blocks; ++i)
421       {
422       Altivec64x2 B = load_block(in);
423 
424       B = vec_xor(B, K0);
425       B = __builtin_crypto_vcipher(B, K1);
426       B = __builtin_crypto_vcipher(B, K2);
427       B = __builtin_crypto_vcipher(B, K3);
428       B = __builtin_crypto_vcipher(B, K4);
429       B = __builtin_crypto_vcipher(B, K5);
430       B = __builtin_crypto_vcipher(B, K6);
431       B = __builtin_crypto_vcipher(B, K7);
432       B = __builtin_crypto_vcipher(B, K8);
433       B = __builtin_crypto_vcipher(B, K9);
434       B = __builtin_crypto_vcipher(B, K10);
435       B = __builtin_crypto_vcipher(B, K11);
436       B = __builtin_crypto_vcipher(B, K12);
437       B = __builtin_crypto_vcipher(B, K13);
438       B = __builtin_crypto_vcipherlast(B, K14);
439 
440       store_block(B, out);
441 
442       out += 16;
443       in += 16;
444       }
445    }
446 
447 BOTAN_FUNC_ISA("crypto")
hw_aes_decrypt_n(const uint8_t in[],uint8_t out[],size_t blocks) const448 void AES_256::hw_aes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
449    {
450    const Altivec64x2 K0  = load_key(&m_EK[56]);
451    const Altivec64x2 K1  = load_key(&m_EK[52]);
452    const Altivec64x2 K2  = load_key(&m_EK[48]);
453    const Altivec64x2 K3  = load_key(&m_EK[44]);
454    const Altivec64x2 K4  = load_key(&m_EK[40]);
455    const Altivec64x2 K5  = load_key(&m_EK[36]);
456    const Altivec64x2 K6  = load_key(&m_EK[32]);
457    const Altivec64x2 K7  = load_key(&m_EK[28]);
458    const Altivec64x2 K8  = load_key(&m_EK[24]);
459    const Altivec64x2 K9  = load_key(&m_EK[20]);
460    const Altivec64x2 K10 = load_key(&m_EK[16]);
461    const Altivec64x2 K11 = load_key(&m_EK[12]);
462    const Altivec64x2 K12 = load_key(&m_EK[8]);
463    const Altivec64x2 K13 = load_key(&m_EK[4]);
464    const Altivec64x2 K14 = load_key(&m_EK[0]);
465 
466    while(blocks >= 4)
467       {
468       Altivec64x2 B0 = load_block(in);
469       Altivec64x2 B1 = load_block(in+16);
470       Altivec64x2 B2 = load_block(in+16*2);
471       Altivec64x2 B3 = load_block(in+16*3);
472 
473       AES_XOR_4(B0, B1, B2, B3, K0);
474       AES_DECRYPT_4(B0, B1, B2, B3, K1);
475       AES_DECRYPT_4(B0, B1, B2, B3, K2);
476       AES_DECRYPT_4(B0, B1, B2, B3, K3);
477       AES_DECRYPT_4(B0, B1, B2, B3, K4);
478       AES_DECRYPT_4(B0, B1, B2, B3, K5);
479       AES_DECRYPT_4(B0, B1, B2, B3, K6);
480       AES_DECRYPT_4(B0, B1, B2, B3, K7);
481       AES_DECRYPT_4(B0, B1, B2, B3, K8);
482       AES_DECRYPT_4(B0, B1, B2, B3, K9);
483       AES_DECRYPT_4(B0, B1, B2, B3, K10);
484       AES_DECRYPT_4(B0, B1, B2, B3, K11);
485       AES_DECRYPT_4(B0, B1, B2, B3, K12);
486       AES_DECRYPT_4(B0, B1, B2, B3, K13);
487       AES_DECRYPT_4_LAST(B0, B1, B2, B3, K14);
488 
489       store_blocks(B0, B1, B2, B3, out);
490 
491       out += 4*16;
492       in += 4*16;
493       blocks -= 4;
494       }
495 
496    for(size_t i = 0; i != blocks; ++i)
497       {
498       Altivec64x2 B = load_block(in);
499 
500       B = vec_xor(B, K0);
501       B = __builtin_crypto_vncipher(B, K1);
502       B = __builtin_crypto_vncipher(B, K2);
503       B = __builtin_crypto_vncipher(B, K3);
504       B = __builtin_crypto_vncipher(B, K4);
505       B = __builtin_crypto_vncipher(B, K5);
506       B = __builtin_crypto_vncipher(B, K6);
507       B = __builtin_crypto_vncipher(B, K7);
508       B = __builtin_crypto_vncipher(B, K8);
509       B = __builtin_crypto_vncipher(B, K9);
510       B = __builtin_crypto_vncipher(B, K10);
511       B = __builtin_crypto_vncipher(B, K11);
512       B = __builtin_crypto_vncipher(B, K12);
513       B = __builtin_crypto_vncipher(B, K13);
514       B = __builtin_crypto_vncipherlast(B, K14);
515 
516       store_block(B, out);
517 
518       out += 16;
519       in += 16;
520       }
521    }
522 
523 #undef AES_XOR_4
524 #undef AES_ENCRYPT_4
525 #undef AES_ENCRYPT_4_LAST
526 #undef AES_DECRYPT_4
527 #undef AES_DECRYPT_4_LAST
528 
529 }
530