1;;
2;; Copyright (c) 2012-2020, Intel Corporation
3;;
4;; Redistribution and use in source and binary forms, with or without
5;; modification, are permitted provided that the following conditions are met:
6;;
7;;     * Redistributions of source code must retain the above copyright notice,
8;;       this list of conditions and the following disclaimer.
9;;     * Redistributions in binary form must reproduce the above copyright
10;;       notice, this list of conditions and the following disclaimer in the
11;;       documentation and/or other materials provided with the distribution.
12;;     * Neither the name of Intel Corporation nor the names of its contributors
13;;       may be used to endorse or promote products derived from this software
14;;       without specific prior written permission.
15;;
16;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19;; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20;; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23;; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24;; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25;; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26;;
27
28; Routine to do AES key expansion
29%include "include/os.asm"
30%define NO_AESNI_RENAME
31%include "include/aesni_emu.inc"
32%include "include/clear_regs.asm"
33
34; Uses the f() function of the aeskeygenassist result
35%macro key_expansion_256_sse 0
36	;; Assumes the xmm3 includes all zeros at this point.
37        pshufd	xmm2, xmm2, 11111111b
38        shufps	xmm3, xmm1, 00010000b
39        pxor	xmm1, xmm3
40        shufps	xmm3, xmm1, 10001100b
41        pxor	xmm1, xmm3
42	pxor	xmm1, xmm2
43%endmacro
44
45; Uses the SubWord function of the aeskeygenassist result
46%macro key_expansion_256_sse_2 0
47	;; Assumes the xmm3 includes all zeros at this point.
48        pshufd	xmm2, xmm2, 10101010b
49        shufps	xmm3, xmm4, 00010000b
50        pxor	xmm4, xmm3
51        shufps	xmm3, xmm4, 10001100b
52        pxor	xmm4, xmm3
53	pxor	xmm4, xmm2
54%endmacro
55
56; Uses the f() function of the aeskeygenassist result
57%macro key_expansion_256_avx 0
58	;; Assumes the xmm3 includes all zeros at this point.
59        vpshufd	xmm2, xmm2, 11111111b
60        vshufps	xmm3, xmm3, xmm1, 00010000b
61        vpxor	xmm1, xmm1, xmm3
62        vshufps	xmm3, xmm3, xmm1, 10001100b
63        vpxor	xmm1, xmm1, xmm3
64	vpxor	xmm1, xmm1, xmm2
65%endmacro
66
67; Uses the SubWord function of the aeskeygenassist result
68%macro key_expansion_256_avx_2 0
69	;; Assumes the xmm3 includes all zeros at this point.
70        vpshufd	xmm2, xmm2, 10101010b
71        vshufps	xmm3, xmm3, xmm4, 00010000b
72        vpxor	xmm4, xmm4, xmm3
73        vshufps	xmm3, xmm3, xmm4, 10001100b
74        vpxor	xmm4, xmm4, xmm3
75	vpxor	xmm4, xmm4, xmm2
76%endmacro
77
78%ifdef LINUX
79%define KEY		rdi
80%define EXP_ENC_KEYS	rsi
81%define EXP_DEC_KEYS	rdx
82%else
83%define KEY		rcx
84%define EXP_ENC_KEYS	rdx
85%define EXP_DEC_KEYS	r8
86%endif
87
88section .text
89
90; void aes_keyexp_256(UINT128 *key,
91;                     UINT128 *enc_exp_keys,
92;                     UINT128 *dec_exp_keys);
93;
94; arg 1: rcx: pointer to key
95; arg 2: rdx: pointer to expanded key array for encrypt
96; arg 3: r8:  pointer to expanded key array for decrypt
97;
98MKGLOBAL(aes_keyexp_256_sse,function,)
99aes_keyexp_256_sse:
100
101%ifdef SAFE_PARAM
102        cmp     KEY, 0
103        jz      aes_keyexp_256_sse_return
104        cmp     EXP_ENC_KEYS, 0
105        jz      aes_keyexp_256_sse_return
106        cmp     EXP_DEC_KEYS, 0
107        jz      aes_keyexp_256_sse_return
108%endif
109
110        movdqu	xmm1, [KEY]			; loading the AES key
111	movdqa	[EXP_ENC_KEYS + 16*0], xmm1
112        movdqa	[EXP_DEC_KEYS + 16*14], xmm1	; Storing key in memory
113
114        movdqu	xmm4, [KEY+16]			; loading the AES key
115	movdqa	[EXP_ENC_KEYS + 16*1], xmm4
116        aesimc	xmm0, xmm4
117        movdqa	[EXP_DEC_KEYS + 16*13], xmm0	; Storing key in memory
118
119        pxor xmm3, xmm3				; Required for the key_expansion.
120
121        aeskeygenassist xmm2, xmm4, 0x1		; Generating round key 2
122        key_expansion_256_sse
123	movdqa	[EXP_ENC_KEYS + 16*2], xmm1
124	aesimc	xmm5, xmm1
125	movdqa	[EXP_DEC_KEYS + 16*12], xmm5
126
127        aeskeygenassist xmm2, xmm1, 0x1		; Generating round key 3
128        key_expansion_256_sse_2
129	movdqa	[EXP_ENC_KEYS + 16*3], xmm4
130        aesimc	xmm0, xmm4
131	movdqa	[EXP_DEC_KEYS + 16*11], xmm0
132
133        aeskeygenassist xmm2, xmm4, 0x2		; Generating round key 4
134        key_expansion_256_sse
135	movdqa	[EXP_ENC_KEYS + 16*4], xmm1
136        aesimc	xmm5, xmm1
137	movdqa	[EXP_DEC_KEYS + 16*10], xmm5
138
139        aeskeygenassist xmm2, xmm1, 0x2		; Generating round key 5
140        key_expansion_256_sse_2
141	movdqa	[EXP_ENC_KEYS + 16*5], xmm4
142        aesimc	xmm0, xmm4
143	movdqa	[EXP_DEC_KEYS + 16*9], xmm0
144
145        aeskeygenassist xmm2, xmm4, 0x4		; Generating round key 6
146        key_expansion_256_sse
147	movdqa	[EXP_ENC_KEYS + 16*6], xmm1
148        aesimc	xmm5, xmm1
149	movdqa	[EXP_DEC_KEYS + 16*8], xmm5
150
151        aeskeygenassist xmm2, xmm1, 0x4		; Generating round key 7
152        key_expansion_256_sse_2
153	movdqa	[EXP_ENC_KEYS + 16*7], xmm4
154        aesimc xmm0, xmm4
155	movdqa	[EXP_DEC_KEYS + 16*7], xmm0
156
157        aeskeygenassist xmm2, xmm4, 0x8		; Generating round key 8
158        key_expansion_256_sse
159	movdqa	[EXP_ENC_KEYS + 16*8], xmm1
160        aesimc	xmm5, xmm1
161	movdqa	[EXP_DEC_KEYS + 16*6], xmm5
162
163        aeskeygenassist xmm2, xmm1, 0x8		; Generating round key 9
164        key_expansion_256_sse_2
165	movdqa	[EXP_ENC_KEYS + 16*9], xmm4
166        aesimc	xmm0, xmm4
167	movdqa	[EXP_DEC_KEYS + 16*5], xmm0
168
169        aeskeygenassist xmm2, xmm4, 0x10	; Generating round key 10
170        key_expansion_256_sse
171	movdqa	[EXP_ENC_KEYS + 16*10], xmm1
172        aesimc	xmm5, xmm1
173	movdqa	[EXP_DEC_KEYS + 16*4], xmm5
174
175        aeskeygenassist xmm2, xmm1, 0x10	; Generating round key 11
176        key_expansion_256_sse_2
177	movdqa	[EXP_ENC_KEYS + 16*11], xmm4
178        aesimc	xmm0, xmm4
179	movdqa	[EXP_DEC_KEYS + 16*3], xmm0
180
181        aeskeygenassist xmm2, xmm4, 0x20	; Generating round key 12
182        key_expansion_256_sse
183	movdqa	[EXP_ENC_KEYS + 16*12], xmm1
184        aesimc	xmm5, xmm1
185	movdqa	[EXP_DEC_KEYS + 16*2], xmm5
186
187        aeskeygenassist xmm2, xmm1, 0x20	; Generating round key 13
188        key_expansion_256_sse_2
189	movdqa	[EXP_ENC_KEYS + 16*13], xmm4
190        aesimc	xmm0, xmm4
191	movdqa	[EXP_DEC_KEYS + 16*1], xmm0
192
193        aeskeygenassist xmm2, xmm4, 0x40	; Generating round key 14
194        key_expansion_256_sse
195	movdqa	[EXP_ENC_KEYS + 16*14], xmm1
196	movdqa	[EXP_DEC_KEYS + 16*0], xmm1
197
198%ifdef SAFE_DATA
199        clear_scratch_gps_asm
200        clear_scratch_xmms_sse_asm
201%endif
202
203aes_keyexp_256_sse_return:
204        ret
205
206MKGLOBAL(aes_keyexp_256_sse_no_aesni,function,)
207aes_keyexp_256_sse_no_aesni:
208
209%ifdef SAFE_PARAM
210        cmp     KEY, 0
211        jz      aes_keyexp_256_sse_no_aesni_return
212        cmp     EXP_ENC_KEYS, 0
213        jz      aes_keyexp_256_sse_no_aesni_return
214        cmp     EXP_DEC_KEYS, 0
215        jz      aes_keyexp_256_sse_no_aesni_return
216%endif
217
218        movdqu	xmm1, [KEY]			; loading the AES key
219	movdqa	[EXP_ENC_KEYS + 16*0], xmm1
220        movdqa	[EXP_DEC_KEYS + 16*14], xmm1	; Storing key in memory
221
222        movdqu	xmm4, [KEY+16]			; loading the AES key
223	movdqa	[EXP_ENC_KEYS + 16*1], xmm4
224        EMULATE_AESIMC	xmm0, xmm4
225        movdqa	[EXP_DEC_KEYS + 16*13], xmm0	; Storing key in memory
226
227        pxor xmm3, xmm3				; Required for the key_expansion.
228
229        EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x1		; Generating round key 2
230        key_expansion_256_sse
231	movdqa	[EXP_ENC_KEYS + 16*2], xmm1
232	EMULATE_AESIMC	xmm5, xmm1
233	movdqa	[EXP_DEC_KEYS + 16*12], xmm5
234
235        EMULATE_AESKEYGENASSIST xmm2, xmm1, 0x1		; Generating round key 3
236        key_expansion_256_sse_2
237	movdqa	[EXP_ENC_KEYS + 16*3], xmm4
238        EMULATE_AESIMC	xmm0, xmm4
239	movdqa	[EXP_DEC_KEYS + 16*11], xmm0
240
241        EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x2		; Generating round key 4
242        key_expansion_256_sse
243	movdqa	[EXP_ENC_KEYS + 16*4], xmm1
244        EMULATE_AESIMC	xmm5, xmm1
245	movdqa	[EXP_DEC_KEYS + 16*10], xmm5
246
247        EMULATE_AESKEYGENASSIST xmm2, xmm1, 0x2		; Generating round key 5
248        key_expansion_256_sse_2
249	movdqa	[EXP_ENC_KEYS + 16*5], xmm4
250        EMULATE_AESIMC	xmm0, xmm4
251	movdqa	[EXP_DEC_KEYS + 16*9], xmm0
252
253        EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x4		; Generating round key 6
254        key_expansion_256_sse
255	movdqa	[EXP_ENC_KEYS + 16*6], xmm1
256        EMULATE_AESIMC	xmm5, xmm1
257	movdqa	[EXP_DEC_KEYS + 16*8], xmm5
258
259        EMULATE_AESKEYGENASSIST xmm2, xmm1, 0x4		; Generating round key 7
260        key_expansion_256_sse_2
261	movdqa	[EXP_ENC_KEYS + 16*7], xmm4
262        EMULATE_AESIMC xmm0, xmm4
263	movdqa	[EXP_DEC_KEYS + 16*7], xmm0
264
265        EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x8		; Generating round key 8
266        key_expansion_256_sse
267	movdqa	[EXP_ENC_KEYS + 16*8], xmm1
268        EMULATE_AESIMC	xmm5, xmm1
269	movdqa	[EXP_DEC_KEYS + 16*6], xmm5
270
271        EMULATE_AESKEYGENASSIST xmm2, xmm1, 0x8		; Generating round key 9
272        key_expansion_256_sse_2
273	movdqa	[EXP_ENC_KEYS + 16*9], xmm4
274        EMULATE_AESIMC	xmm0, xmm4
275	movdqa	[EXP_DEC_KEYS + 16*5], xmm0
276
277        EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x10	; Generating round key 10
278        key_expansion_256_sse
279	movdqa	[EXP_ENC_KEYS + 16*10], xmm1
280        EMULATE_AESIMC	xmm5, xmm1
281	movdqa	[EXP_DEC_KEYS + 16*4], xmm5
282
283        EMULATE_AESKEYGENASSIST xmm2, xmm1, 0x10	; Generating round key 11
284        key_expansion_256_sse_2
285	movdqa	[EXP_ENC_KEYS + 16*11], xmm4
286        EMULATE_AESIMC	xmm0, xmm4
287	movdqa	[EXP_DEC_KEYS + 16*3], xmm0
288
289        EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x20	; Generating round key 12
290        key_expansion_256_sse
291	movdqa	[EXP_ENC_KEYS + 16*12], xmm1
292        EMULATE_AESIMC	xmm5, xmm1
293	movdqa	[EXP_DEC_KEYS + 16*2], xmm5
294
295        EMULATE_AESKEYGENASSIST xmm2, xmm1, 0x20	; Generating round key 13
296        key_expansion_256_sse_2
297	movdqa	[EXP_ENC_KEYS + 16*13], xmm4
298        EMULATE_AESIMC	xmm0, xmm4
299	movdqa	[EXP_DEC_KEYS + 16*1], xmm0
300
301        EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x40	; Generating round key 14
302        key_expansion_256_sse
303	movdqa	[EXP_ENC_KEYS + 16*14], xmm1
304	movdqa	[EXP_DEC_KEYS + 16*0], xmm1
305
306%ifdef SAFE_DATA
307        clear_scratch_gps_asm
308        clear_scratch_xmms_sse_asm
309%endif
310
311aes_keyexp_256_sse_no_aesni_return:
312	ret
313
314MKGLOBAL(aes_keyexp_256_avx,function,)
315MKGLOBAL(aes_keyexp_256_avx2,function,)
316MKGLOBAL(aes_keyexp_256_avx512,function,)
317aes_keyexp_256_avx:
318aes_keyexp_256_avx2:
319aes_keyexp_256_avx512:
320
321%ifdef SAFE_PARAM
322        cmp     KEY, 0
323        jz      aes_keyexp_256_avx_return
324        cmp     EXP_ENC_KEYS, 0
325        jz      aes_keyexp_256_avx_return
326        cmp     EXP_DEC_KEYS, 0
327        jz      aes_keyexp_256_avx_return
328%endif
329
330        vmovdqu	xmm1, [KEY]			; loading the AES key
331	vmovdqa	[EXP_ENC_KEYS + 16*0], xmm1
332        vmovdqa	[EXP_DEC_KEYS + 16*14], xmm1	; Storing key in memory
333
334        vmovdqu	xmm4, [KEY+16]			; loading the AES key
335	vmovdqa	[EXP_ENC_KEYS + 16*1], xmm4
336        vaesimc	xmm0, xmm4
337        vmovdqa	[EXP_DEC_KEYS + 16*13], xmm0	; Storing key in memory
338
339        vpxor xmm3, xmm3, xmm3			; Required for the key_expansion.
340
341        vaeskeygenassist xmm2, xmm4, 0x1		; Generating round key 2
342        key_expansion_256_avx
343	vmovdqa	[EXP_ENC_KEYS + 16*2], xmm1
344	vaesimc	xmm5, xmm1
345	vmovdqa	[EXP_DEC_KEYS + 16*12], xmm5
346
347        vaeskeygenassist xmm2, xmm1, 0x1		; Generating round key 3
348        key_expansion_256_avx_2
349	vmovdqa	[EXP_ENC_KEYS + 16*3], xmm4
350        vaesimc	xmm0, xmm4
351	vmovdqa	[EXP_DEC_KEYS + 16*11], xmm0
352
353        vaeskeygenassist xmm2, xmm4, 0x2		; Generating round key 4
354        key_expansion_256_avx
355	vmovdqa	[EXP_ENC_KEYS + 16*4], xmm1
356        vaesimc	xmm5, xmm1
357	vmovdqa	[EXP_DEC_KEYS + 16*10], xmm5
358
359        vaeskeygenassist xmm2, xmm1, 0x2		; Generating round key 5
360        key_expansion_256_avx_2
361	vmovdqa	[EXP_ENC_KEYS + 16*5], xmm4
362        vaesimc	xmm0, xmm4
363	vmovdqa	[EXP_DEC_KEYS + 16*9], xmm0
364
365        vaeskeygenassist xmm2, xmm4, 0x4		; Generating round key 6
366        key_expansion_256_avx
367	vmovdqa	[EXP_ENC_KEYS + 16*6], xmm1
368        vaesimc	xmm5, xmm1
369	vmovdqa	[EXP_DEC_KEYS + 16*8], xmm5
370
371        vaeskeygenassist xmm2, xmm1, 0x4		; Generating round key 7
372        key_expansion_256_avx_2
373	vmovdqa	[EXP_ENC_KEYS + 16*7], xmm4
374        vaesimc xmm0, xmm4
375	vmovdqa	[EXP_DEC_KEYS + 16*7], xmm0
376
377        vaeskeygenassist xmm2, xmm4, 0x8		; Generating round key 8
378        key_expansion_256_avx
379	vmovdqa	[EXP_ENC_KEYS + 16*8], xmm1
380        vaesimc	xmm5, xmm1
381	vmovdqa	[EXP_DEC_KEYS + 16*6], xmm5
382
383        vaeskeygenassist xmm2, xmm1, 0x8		; Generating round key 9
384        key_expansion_256_avx_2
385	vmovdqa	[EXP_ENC_KEYS + 16*9], xmm4
386        vaesimc	xmm0, xmm4
387	vmovdqa	[EXP_DEC_KEYS + 16*5], xmm0
388
389        vaeskeygenassist xmm2, xmm4, 0x10	; Generating round key 10
390        key_expansion_256_avx
391	vmovdqa	[EXP_ENC_KEYS + 16*10], xmm1
392        vaesimc	xmm5, xmm1
393	vmovdqa	[EXP_DEC_KEYS + 16*4], xmm5
394
395        vaeskeygenassist xmm2, xmm1, 0x10	; Generating round key 11
396        key_expansion_256_avx_2
397	vmovdqa	[EXP_ENC_KEYS + 16*11], xmm4
398        vaesimc	xmm0, xmm4
399	vmovdqa	[EXP_DEC_KEYS + 16*3], xmm0
400
401        vaeskeygenassist xmm2, xmm4, 0x20	; Generating round key 12
402        key_expansion_256_avx
403	vmovdqa	[EXP_ENC_KEYS + 16*12], xmm1
404        vaesimc	xmm5, xmm1
405	vmovdqa	[EXP_DEC_KEYS + 16*2], xmm5
406
407        vaeskeygenassist xmm2, xmm1, 0x20	; Generating round key 13
408        key_expansion_256_avx_2
409	vmovdqa	[EXP_ENC_KEYS + 16*13], xmm4
410        vaesimc	xmm0, xmm4
411	vmovdqa	[EXP_DEC_KEYS + 16*1], xmm0
412
413        vaeskeygenassist xmm2, xmm4, 0x40	; Generating round key 14
414        key_expansion_256_avx
415	vmovdqa	[EXP_ENC_KEYS + 16*14], xmm1
416	vmovdqa	[EXP_DEC_KEYS + 16*0], xmm1
417
418%ifdef SAFE_DATA
419        clear_scratch_gps_asm
420        clear_scratch_xmms_avx_asm
421%endif
422
423aes_keyexp_256_avx_return:
424	ret
425
426;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
427;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
428;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
429;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
430
431; void aes_keyexp_256_enc_sse(UINT128 *key,
432;     UINT128 *enc_exp_keys);
433;
434; arg 1: rcx: pointer to key
435; arg 2: rdx: pointer to expanded key array for encrypt
436;
437MKGLOBAL(aes_keyexp_256_enc_sse,function,)
438aes_keyexp_256_enc_sse:
439
440%ifdef SAFE_PARAM
441        cmp     KEY, 0
442        jz      aes_keyexp_256_enc_sse_return
443        cmp     EXP_ENC_KEYS, 0
444        jz      aes_keyexp_256_enc_sse_return
445%endif
446
447        movdqu	xmm1, [KEY]			; loading the AES key
448	movdqa	[EXP_ENC_KEYS + 16*0], xmm1
449
450        movdqu	xmm4, [KEY+16]			; loading the AES key
451	movdqa	[EXP_ENC_KEYS + 16*1], xmm4
452
453        pxor xmm3, xmm3				; Required for the key_expansion.
454
455        aeskeygenassist xmm2, xmm4, 0x1		; Generating round key 2
456        key_expansion_256_sse
457	movdqa	[EXP_ENC_KEYS + 16*2], xmm1
458
459        aeskeygenassist xmm2, xmm1, 0x1		; Generating round key 3
460        key_expansion_256_sse_2
461	movdqa	[EXP_ENC_KEYS + 16*3], xmm4
462
463        aeskeygenassist xmm2, xmm4, 0x2		; Generating round key 4
464        key_expansion_256_sse
465	movdqa	[EXP_ENC_KEYS + 16*4], xmm1
466
467        aeskeygenassist xmm2, xmm1, 0x2		; Generating round key 5
468        key_expansion_256_sse_2
469	movdqa	[EXP_ENC_KEYS + 16*5], xmm4
470
471        aeskeygenassist xmm2, xmm4, 0x4		; Generating round key 6
472        key_expansion_256_sse
473	movdqa	[EXP_ENC_KEYS + 16*6], xmm1
474
475        aeskeygenassist xmm2, xmm1, 0x4		; Generating round key 7
476        key_expansion_256_sse_2
477	movdqa	[EXP_ENC_KEYS + 16*7], xmm4
478
479        aeskeygenassist xmm2, xmm4, 0x8		; Generating round key 8
480        key_expansion_256_sse
481	movdqa	[EXP_ENC_KEYS + 16*8], xmm1
482
483        aeskeygenassist xmm2, xmm1, 0x8		; Generating round key 9
484        key_expansion_256_sse_2
485	movdqa	[EXP_ENC_KEYS + 16*9], xmm4
486
487        aeskeygenassist xmm2, xmm4, 0x10	; Generating round key 10
488        key_expansion_256_sse
489	movdqa	[EXP_ENC_KEYS + 16*10], xmm1
490
491        aeskeygenassist xmm2, xmm1, 0x10	; Generating round key 11
492        key_expansion_256_sse_2
493	movdqa	[EXP_ENC_KEYS + 16*11], xmm4
494
495        aeskeygenassist xmm2, xmm4, 0x20	; Generating round key 12
496        key_expansion_256_sse
497	movdqa	[EXP_ENC_KEYS + 16*12], xmm1
498
499        aeskeygenassist xmm2, xmm1, 0x20	; Generating round key 13
500        key_expansion_256_sse_2
501	movdqa	[EXP_ENC_KEYS + 16*13], xmm4
502
503        aeskeygenassist xmm2, xmm4, 0x40	; Generating round key 14
504        key_expansion_256_sse
505	movdqa	[EXP_ENC_KEYS + 16*14], xmm1
506
507%ifdef SAFE_DATA
508        clear_scratch_gps_asm
509        clear_scratch_xmms_sse_asm
510%endif
511
512aes_keyexp_256_enc_sse_return:
513	ret
514
515MKGLOBAL(aes_keyexp_256_enc_sse_no_aesni,function,)
516aes_keyexp_256_enc_sse_no_aesni:
517
518%ifdef SAFE_PARAM
519        cmp     KEY, 0
520        jz      aes_keyexp_256_enc_sse_no_aesni_return
521        cmp     EXP_ENC_KEYS, 0
522        jz      aes_keyexp_256_enc_sse_no_aesni_return
523%endif
524
525        movdqu	xmm1, [KEY]			; loading the AES key
526	movdqa	[EXP_ENC_KEYS + 16*0], xmm1
527
528        movdqu	xmm4, [KEY+16]			; loading the AES key
529	movdqa	[EXP_ENC_KEYS + 16*1], xmm4
530
531        pxor xmm3, xmm3				; Required for the key_expansion.
532
533        EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x1		; Generating round key 2
534        key_expansion_256_sse
535	movdqa	[EXP_ENC_KEYS + 16*2], xmm1
536
537        EMULATE_AESKEYGENASSIST xmm2, xmm1, 0x1		; Generating round key 3
538        key_expansion_256_sse_2
539	movdqa	[EXP_ENC_KEYS + 16*3], xmm4
540
541        EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x2		; Generating round key 4
542        key_expansion_256_sse
543	movdqa	[EXP_ENC_KEYS + 16*4], xmm1
544
545        EMULATE_AESKEYGENASSIST xmm2, xmm1, 0x2		; Generating round key 5
546        key_expansion_256_sse_2
547	movdqa	[EXP_ENC_KEYS + 16*5], xmm4
548
549        EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x4		; Generating round key 6
550        key_expansion_256_sse
551	movdqa	[EXP_ENC_KEYS + 16*6], xmm1
552
553        EMULATE_AESKEYGENASSIST xmm2, xmm1, 0x4		; Generating round key 7
554        key_expansion_256_sse_2
555	movdqa	[EXP_ENC_KEYS + 16*7], xmm4
556
557        EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x8		; Generating round key 8
558        key_expansion_256_sse
559	movdqa	[EXP_ENC_KEYS + 16*8], xmm1
560
561        EMULATE_AESKEYGENASSIST xmm2, xmm1, 0x8		; Generating round key 9
562        key_expansion_256_sse_2
563	movdqa	[EXP_ENC_KEYS + 16*9], xmm4
564
565        EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x10	; Generating round key 10
566        key_expansion_256_sse
567	movdqa	[EXP_ENC_KEYS + 16*10], xmm1
568
569        EMULATE_AESKEYGENASSIST xmm2, xmm1, 0x10	; Generating round key 11
570        key_expansion_256_sse_2
571	movdqa	[EXP_ENC_KEYS + 16*11], xmm4
572
573        EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x20	; Generating round key 12
574        key_expansion_256_sse
575	movdqa	[EXP_ENC_KEYS + 16*12], xmm1
576
577        EMULATE_AESKEYGENASSIST xmm2, xmm1, 0x20	; Generating round key 13
578        key_expansion_256_sse_2
579	movdqa	[EXP_ENC_KEYS + 16*13], xmm4
580
581        EMULATE_AESKEYGENASSIST xmm2, xmm4, 0x40	; Generating round key 14
582        key_expansion_256_sse
583	movdqa	[EXP_ENC_KEYS + 16*14], xmm1
584
585%ifdef SAFE_DATA
586        clear_scratch_gps_asm
587        clear_scratch_xmms_sse_asm
588%endif
589
590aes_keyexp_256_enc_sse_no_aesni_return:
591	ret
592
593MKGLOBAL(aes_keyexp_256_enc_avx,function,)
594MKGLOBAL(aes_keyexp_256_enc_avx2,function,)
595MKGLOBAL(aes_keyexp_256_enc_avx512,function,)
596aes_keyexp_256_enc_avx:
597aes_keyexp_256_enc_avx2:
598aes_keyexp_256_enc_avx512:
599
600%ifdef SAFE_PARAM
601        cmp     KEY, 0
602        jz      aes_keyexp_256_enc_avx_return
603        cmp     EXP_ENC_KEYS, 0
604        jz      aes_keyexp_256_enc_avx_return
605%endif
606
607        vmovdqu	xmm1, [KEY]			; loading the AES key
608	vmovdqa	[EXP_ENC_KEYS + 16*0], xmm1
609
610        vmovdqu	xmm4, [KEY+16]			; loading the AES key
611	vmovdqa	[EXP_ENC_KEYS + 16*1], xmm4
612
613        vpxor xmm3, xmm3, xmm3			; Required for the key_expansion.
614
615        vaeskeygenassist xmm2, xmm4, 0x1		; Generating round key 2
616        key_expansion_256_avx
617	vmovdqa	[EXP_ENC_KEYS + 16*2], xmm1
618
619        vaeskeygenassist xmm2, xmm1, 0x1		; Generating round key 3
620        key_expansion_256_avx_2
621	vmovdqa	[EXP_ENC_KEYS + 16*3], xmm4
622
623        vaeskeygenassist xmm2, xmm4, 0x2		; Generating round key 4
624        key_expansion_256_avx
625	vmovdqa	[EXP_ENC_KEYS + 16*4], xmm1
626
627        vaeskeygenassist xmm2, xmm1, 0x2		; Generating round key 5
628        key_expansion_256_avx_2
629	vmovdqa	[EXP_ENC_KEYS + 16*5], xmm4
630
631        vaeskeygenassist xmm2, xmm4, 0x4		; Generating round key 6
632        key_expansion_256_avx
633	vmovdqa	[EXP_ENC_KEYS + 16*6], xmm1
634
635        vaeskeygenassist xmm2, xmm1, 0x4		; Generating round key 7
636        key_expansion_256_avx_2
637	vmovdqa	[EXP_ENC_KEYS + 16*7], xmm4
638
639        vaeskeygenassist xmm2, xmm4, 0x8		; Generating round key 8
640        key_expansion_256_avx
641	vmovdqa	[EXP_ENC_KEYS + 16*8], xmm1
642
643        vaeskeygenassist xmm2, xmm1, 0x8		; Generating round key 9
644        key_expansion_256_avx_2
645	vmovdqa	[EXP_ENC_KEYS + 16*9], xmm4
646
647        vaeskeygenassist xmm2, xmm4, 0x10	; Generating round key 10
648        key_expansion_256_avx
649	vmovdqa	[EXP_ENC_KEYS + 16*10], xmm1
650
651        vaeskeygenassist xmm2, xmm1, 0x10	; Generating round key 11
652        key_expansion_256_avx_2
653	vmovdqa	[EXP_ENC_KEYS + 16*11], xmm4
654
655        vaeskeygenassist xmm2, xmm4, 0x20	; Generating round key 12
656        key_expansion_256_avx
657	vmovdqa	[EXP_ENC_KEYS + 16*12], xmm1
658
659        vaeskeygenassist xmm2, xmm1, 0x20	; Generating round key 13
660        key_expansion_256_avx_2
661	vmovdqa	[EXP_ENC_KEYS + 16*13], xmm4
662
663        vaeskeygenassist xmm2, xmm4, 0x40	; Generating round key 14
664        key_expansion_256_avx
665	vmovdqa	[EXP_ENC_KEYS + 16*14], xmm1
666
667%ifdef SAFE_DATA
668        clear_scratch_gps_asm
669        clear_scratch_xmms_avx_asm
670%endif
671
672aes_keyexp_256_enc_avx_return:
673        ret
674
675%ifdef LINUX
676section .note.GNU-stack noalloc noexec nowrite progbits
677%endif
678