1/*-
2* The white paper of AES-NI instructions can be downloaded from:
3 *   http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf
4 *
5 * Copyright (C) 2008-2010, Intel Corporation
6 *    Author: Huang Ying <ying.huang@intel.com>
7 *            Vinodh Gopal <vinodh.gopal@intel.com>
8 *            Kahraman Akdemir
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above copyright
15 *   notice, this list of conditions and the following disclaimer.
16 *
17 * - Redistributions in binary form must reproduce the above copyright
18 *   notice, this list of conditions and the following disclaimer in the
19 *   documentation and/or other materials provided with the
20 *   distribution.
21 *
22 * - Neither the name of Intel Corporation nor the names of its
23 *   contributors may be used to endorse or promote products
24 *   derived from this software without specific prior written
25 *   permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
31 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 *
39 * $FreeBSD: src/sys/crypto/aesni/aeskeys_amd64.S,v 1.2 2011/03/02 14:56:58 kib Exp $
40 */
41
42#include <machine/asmacros.h>
43
44	.text
45
46ENTRY(_key_expansion_128)
47_key_expansion_256a:
48	.cfi_startproc
49	pshufd	$0b11111111,%xmm1,%xmm1
50	shufps	$0b00010000,%xmm0,%xmm4
51	pxor	%xmm4,%xmm0
52	shufps	$0b10001100,%xmm0,%xmm4
53	pxor	%xmm4,%xmm0
54	pxor	%xmm1,%xmm0
55	movaps	%xmm0,(%rsi)
56	addq	$0x10,%rsi
57	retq
58	.cfi_endproc
59END(_key_expansion_128)
60
61ENTRY(_key_expansion_192a)
62	.cfi_startproc
63	pshufd	$0b01010101,%xmm1,%xmm1
64	shufps	$0b00010000,%xmm0,%xmm4
65	pxor	%xmm4,%xmm0
66	shufps	$0b10001100,%xmm0,%xmm4
67	pxor	%xmm4,%xmm0
68	pxor	%xmm1,%xmm0
69	movaps	%xmm2,%xmm5
70	movaps	%xmm2,%xmm6
71	pslldq	$4,%xmm5
72	pshufd	$0b11111111,%xmm0,%xmm3
73	pxor	%xmm3,%xmm2
74	pxor	%xmm5,%xmm2
75	movaps	%xmm0,%xmm1
76	shufps	$0b01000100,%xmm0,%xmm6
77	movaps	%xmm6,(%rsi)
78	shufps	$0b01001110,%xmm2,%xmm1
79	movaps	%xmm1,0x10(%rsi)
80	addq	$0x20,%rsi
81	retq
82	.cfi_endproc
83END(_key_expansion_192a)
84
85ENTRY(_key_expansion_192b)
86	.cfi_startproc
87	pshufd	$0b01010101,%xmm1,%xmm1
88	shufps	$0b00010000,%xmm0,%xmm4
89	pxor	%xmm4,%xmm0
90	shufps	$0b10001100,%xmm0,%xmm4
91	pxor	%xmm4,%xmm0
92	pxor	%xmm1,%xmm0
93	movaps	%xmm2,%xmm5
94	pslldq	$4,%xmm5
95	pshufd	$0b11111111,%xmm0,%xmm3
96	pxor	%xmm3,%xmm2
97	pxor	%xmm5,%xmm2
98	movaps	%xmm0,(%rsi)
99	addq	$0x10,%rsi
100	retq
101	.cfi_endproc
102END(_key_expansion_192b)
103
104ENTRY(_key_expansion_256b)
105	.cfi_startproc
106	pshufd	$0b10101010,%xmm1,%xmm1
107	shufps	$0b00010000,%xmm2,%xmm4
108	pxor	%xmm4,%xmm2
109	shufps	$0b10001100,%xmm2,%xmm4
110	pxor	%xmm4,%xmm2
111	pxor	%xmm1,%xmm2
112	movaps	%xmm2,(%rsi)
113	addq	$0x10,%rsi
114	retq
115	.cfi_endproc
116END(_key_expansion_256b)
117
118ENTRY(aesni_set_enckey)
119	.cfi_startproc
120	movups	(%rdi),%xmm0		# user key (first 16 bytes)
121	movaps	%xmm0,(%rsi)
122	addq	$0x10,%rsi		# key addr
123	pxor	%xmm4,%xmm4		# xmm4 is assumed 0 in _key_expansion_x
124	cmpl	$12,%edx
125	jb	.Lenc_key128
126	je	.Lenc_key192
127	movups	0x10(%rdi),%xmm2	# other user key
128	movaps	%xmm2,(%rsi)
129	addq	$0x10,%rsi
130//	aeskeygenassist $0x1,%xmm2,%xmm1	# round 1
131	.byte	0x66,0x0f,0x3a,0xdf,0xca,0x01
132	call	_key_expansion_256a
133//	aeskeygenassist $0x1,%xmm0,%xmm1
134	.byte	0x66,0x0f,0x3a,0xdf,0xc8,0x01
135	call	_key_expansion_256b
136//	aeskeygenassist $0x2,%xmm2,%xmm1	# round 2
137	.byte	0x66,0x0f,0x3a,0xdf,0xca,0x02
138	call	_key_expansion_256a
139//	aeskeygenassist $0x2,%xmm0,%xmm1
140	.byte	0x66,0x0f,0x3a,0xdf,0xc8,0x02
141	call	_key_expansion_256b
142//	aeskeygenassist $0x4,%xmm2,%xmm1	# round 3
143	.byte	0x66,0x0f,0x3a,0xdf,0xca,0x04
144	call	_key_expansion_256a
145//	aeskeygenassist $0x4,%xmm0,%xmm1
146	.byte	0x66,0x0f,0x3a,0xdf,0xc8,0x04
147	call	_key_expansion_256b
148//	aeskeygenassist $0x8,%xmm2,%xmm1	# round 4
149	.byte	0x66,0x0f,0x3a,0xdf,0xca,0x08
150	call	_key_expansion_256a
151//	aeskeygenassist $0x8,%xmm0,%xmm1
152	.byte	0x66,0x0f,0x3a,0xdf,0xc8,0x08
153	call	_key_expansion_256b
154//	aeskeygenassist $0x10,%xmm2,%xmm1	# round 5
155	.byte	0x66,0x0f,0x3a,0xdf,0xca,0x10
156	call	_key_expansion_256a
157//	aeskeygenassist $0x10,%xmm0,%xmm1
158	.byte	0x66,0x0f,0x3a,0xdf,0xc8,0x10
159	call	_key_expansion_256b
160//	aeskeygenassist $0x20,%xmm2,%xmm1	# round 6
161	.byte	0x66,0x0f,0x3a,0xdf,0xca,0x20
162	call	_key_expansion_256a
163//	aeskeygenassist $0x20,%xmm0,%xmm1
164	.byte	0x66,0x0f,0x3a,0xdf,0xc8,0x20
165	call	_key_expansion_256b
166//	aeskeygenassist $0x40,%xmm2,%xmm1	# round 7
167	.byte	0x66,0x0f,0x3a,0xdf,0xca,0x40
168	call	_key_expansion_256a
169	retq
170.Lenc_key192:
171	movq	0x10(%rdi),%xmm2		# other user key
172//	aeskeygenassist $0x1,%xmm2,%xmm1	# round 1
173	.byte	0x66,0x0f,0x3a,0xdf,0xca,0x01
174	call	_key_expansion_192a
175//	aeskeygenassist $0x2,%xmm2,%xmm1	# round 2
176	.byte	0x66,0x0f,0x3a,0xdf,0xca,0x02
177	call	_key_expansion_192b
178//	aeskeygenassist $0x4,%xmm2,%xmm1	# round 3
179	.byte	0x66,0x0f,0x3a,0xdf,0xca,0x04
180	call	_key_expansion_192a
181//	aeskeygenassist $0x8,%xmm2,%xmm1	# round 4
182	.byte	0x66,0x0f,0x3a,0xdf,0xca,0x08
183	call	_key_expansion_192b
184//	aeskeygenassist $0x10,%xmm2,%xmm1	# round 5
185	.byte	0x66,0x0f,0x3a,0xdf,0xca,0x10
186	call	_key_expansion_192a
187//	aeskeygenassist $0x20,%xmm2,%xmm1	# round 6
188	.byte	0x66,0x0f,0x3a,0xdf,0xca,0x20
189	call	_key_expansion_192b
190//	aeskeygenassist $0x40,%xmm2,%xmm1	# round 7
191	.byte	0x66,0x0f,0x3a,0xdf,0xca,0x40
192	call	_key_expansion_192a
193//	aeskeygenassist $0x80,%xmm2,%xmm1	# round 8
194	.byte	0x66,0x0f,0x3a,0xdf,0xca,0x80
195	call	_key_expansion_192b
196	retq
197.Lenc_key128:
198//	aeskeygenassist $0x1,%xmm0,%xmm1	# round 1
199	.byte	0x66,0x0f,0x3a,0xdf,0xc8,0x01
200	call	_key_expansion_128
201//	aeskeygenassist $0x2,%xmm0,%xmm1	# round 2
202	.byte	0x66,0x0f,0x3a,0xdf,0xc8,0x02
203	call	_key_expansion_128
204//	aeskeygenassist $0x4,%xmm0,%xmm1	# round 3
205	.byte	0x66,0x0f,0x3a,0xdf,0xc8,0x04
206	call	_key_expansion_128
207//	aeskeygenassist $0x8,%xmm0,%xmm1	# round 4
208	.byte	0x66,0x0f,0x3a,0xdf,0xc8,0x08
209	call	_key_expansion_128
210//	aeskeygenassist $0x10,%xmm0,%xmm1	# round 5
211	.byte	0x66,0x0f,0x3a,0xdf,0xc8,0x10
212	call	_key_expansion_128
213//	aeskeygenassist $0x20,%xmm0,%xmm1	# round 6
214	.byte	0x66,0x0f,0x3a,0xdf,0xc8,0x20
215	call	_key_expansion_128
216//	aeskeygenassist $0x40,%xmm0,%xmm1	# round 7
217	.byte	0x66,0x0f,0x3a,0xdf,0xc8,0x40
218	call	_key_expansion_128
219//	aeskeygenassist $0x80,%xmm0,%xmm1	# round 8
220	.byte	0x66,0x0f,0x3a,0xdf,0xc8,0x80
221	call	_key_expansion_128
222//	aeskeygenassist $0x1b,%xmm0,%xmm1	# round 9
223	.byte	0x66,0x0f,0x3a,0xdf,0xc8,0x1b
224	call	_key_expansion_128
225//	aeskeygenassist $0x36,%xmm0,%xmm1	# round 10
226	.byte	0x66,0x0f,0x3a,0xdf,0xc8,0x36
227	call	_key_expansion_128
228	retq
229	.cfi_endproc
230END(aesni_set_enckey)
231
232ENTRY(aesni_set_deckey)
233	.cfi_startproc
234	movslq	%edx,%rax
235	shlq	$4,%rax
236	addq	%rax,%rdi
237	movdqa	(%rdi),%xmm0
238	movdqa	%xmm0,(%rsi)
239	decl	%edx
2401:
241	addq	$0x10,%rsi
242	subq	$0x10,%rdi
243//	aesimc	(%rdi),%xmm1
244	.byte	0x66,0x0f,0x38,0xdb,0x0f
245	movdqa	%xmm1,(%rsi)
246	decl	%edx
247	jne	1b
248
249	addq	$0x10,%rsi
250	subq	$0x10,%rdi
251	movdqa	(%rdi),%xmm0
252	movdqa	%xmm0,(%rsi)
253	retq
254	.cfi_endproc
255END(aesni_set_deckey)
256