1// Copyright 2012 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5#include "textflag.h"
6
7// func encryptBlockAsm(nr int, xk *uint32, dst, src *byte)
8TEXT ·encryptBlockAsm(SB),NOSPLIT,$0
9	MOVQ nr+0(FP), CX
10	MOVQ xk+8(FP), AX
11	MOVQ dst+16(FP), DX
12	MOVQ src+24(FP), BX
13	MOVUPS 0(AX), X1
14	MOVUPS 0(BX), X0
15	ADDQ $16, AX
16	PXOR X1, X0
17	SUBQ $12, CX
18	JE Lenc196
19	JB Lenc128
20Lenc256:
21	MOVUPS 0(AX), X1
22	AESENC X1, X0
23	MOVUPS 16(AX), X1
24	AESENC X1, X0
25	ADDQ $32, AX
26Lenc196:
27	MOVUPS 0(AX), X1
28	AESENC X1, X0
29	MOVUPS 16(AX), X1
30	AESENC X1, X0
31	ADDQ $32, AX
32Lenc128:
33	MOVUPS 0(AX), X1
34	AESENC X1, X0
35	MOVUPS 16(AX), X1
36	AESENC X1, X0
37	MOVUPS 32(AX), X1
38	AESENC X1, X0
39	MOVUPS 48(AX), X1
40	AESENC X1, X0
41	MOVUPS 64(AX), X1
42	AESENC X1, X0
43	MOVUPS 80(AX), X1
44	AESENC X1, X0
45	MOVUPS 96(AX), X1
46	AESENC X1, X0
47	MOVUPS 112(AX), X1
48	AESENC X1, X0
49	MOVUPS 128(AX), X1
50	AESENC X1, X0
51	MOVUPS 144(AX), X1
52	AESENCLAST X1, X0
53	MOVUPS X0, 0(DX)
54	RET
55
56// func decryptBlockAsm(nr int, xk *uint32, dst, src *byte)
57TEXT ·decryptBlockAsm(SB),NOSPLIT,$0
58	MOVQ nr+0(FP), CX
59	MOVQ xk+8(FP), AX
60	MOVQ dst+16(FP), DX
61	MOVQ src+24(FP), BX
62	MOVUPS 0(AX), X1
63	MOVUPS 0(BX), X0
64	ADDQ $16, AX
65	PXOR X1, X0
66	SUBQ $12, CX
67	JE Ldec196
68	JB Ldec128
69Ldec256:
70	MOVUPS 0(AX), X1
71	AESDEC X1, X0
72	MOVUPS 16(AX), X1
73	AESDEC X1, X0
74	ADDQ $32, AX
75Ldec196:
76	MOVUPS 0(AX), X1
77	AESDEC X1, X0
78	MOVUPS 16(AX), X1
79	AESDEC X1, X0
80	ADDQ $32, AX
81Ldec128:
82	MOVUPS 0(AX), X1
83	AESDEC X1, X0
84	MOVUPS 16(AX), X1
85	AESDEC X1, X0
86	MOVUPS 32(AX), X1
87	AESDEC X1, X0
88	MOVUPS 48(AX), X1
89	AESDEC X1, X0
90	MOVUPS 64(AX), X1
91	AESDEC X1, X0
92	MOVUPS 80(AX), X1
93	AESDEC X1, X0
94	MOVUPS 96(AX), X1
95	AESDEC X1, X0
96	MOVUPS 112(AX), X1
97	AESDEC X1, X0
98	MOVUPS 128(AX), X1
99	AESDEC X1, X0
100	MOVUPS 144(AX), X1
101	AESDECLAST X1, X0
102	MOVUPS X0, 0(DX)
103	RET
104
105// func expandKeyAsm(nr int, key *byte, enc, dec *uint32) {
106// Note that round keys are stored in uint128 format, not uint32
107TEXT ·expandKeyAsm(SB),NOSPLIT,$0
108	MOVQ nr+0(FP), CX
109	MOVQ key+8(FP), AX
110	MOVQ enc+16(FP), BX
111	MOVQ dec+24(FP), DX
112	MOVUPS (AX), X0
113	// enc
114	MOVUPS X0, (BX)
115	ADDQ $16, BX
116	PXOR X4, X4 // _expand_key_* expect X4 to be zero
117	CMPL CX, $12
118	JE Lexp_enc196
119	JB Lexp_enc128
120Lexp_enc256:
121	MOVUPS 16(AX), X2
122	MOVUPS X2, (BX)
123	ADDQ $16, BX
124	AESKEYGENASSIST $0x01, X2, X1
125	CALL _expand_key_256a<>(SB)
126	AESKEYGENASSIST $0x01, X0, X1
127	CALL _expand_key_256b<>(SB)
128	AESKEYGENASSIST $0x02, X2, X1
129	CALL _expand_key_256a<>(SB)
130	AESKEYGENASSIST $0x02, X0, X1
131	CALL _expand_key_256b<>(SB)
132	AESKEYGENASSIST $0x04, X2, X1
133	CALL _expand_key_256a<>(SB)
134	AESKEYGENASSIST $0x04, X0, X1
135	CALL _expand_key_256b<>(SB)
136	AESKEYGENASSIST $0x08, X2, X1
137	CALL _expand_key_256a<>(SB)
138	AESKEYGENASSIST $0x08, X0, X1
139	CALL _expand_key_256b<>(SB)
140	AESKEYGENASSIST $0x10, X2, X1
141	CALL _expand_key_256a<>(SB)
142	AESKEYGENASSIST $0x10, X0, X1
143	CALL _expand_key_256b<>(SB)
144	AESKEYGENASSIST $0x20, X2, X1
145	CALL _expand_key_256a<>(SB)
146	AESKEYGENASSIST $0x20, X0, X1
147	CALL _expand_key_256b<>(SB)
148	AESKEYGENASSIST $0x40, X2, X1
149	CALL _expand_key_256a<>(SB)
150	JMP Lexp_dec
151Lexp_enc196:
152	MOVQ 16(AX), X2
153	AESKEYGENASSIST $0x01, X2, X1
154	CALL _expand_key_192a<>(SB)
155	AESKEYGENASSIST $0x02, X2, X1
156	CALL _expand_key_192b<>(SB)
157	AESKEYGENASSIST $0x04, X2, X1
158	CALL _expand_key_192a<>(SB)
159	AESKEYGENASSIST $0x08, X2, X1
160	CALL _expand_key_192b<>(SB)
161	AESKEYGENASSIST $0x10, X2, X1
162	CALL _expand_key_192a<>(SB)
163	AESKEYGENASSIST $0x20, X2, X1
164	CALL _expand_key_192b<>(SB)
165	AESKEYGENASSIST $0x40, X2, X1
166	CALL _expand_key_192a<>(SB)
167	AESKEYGENASSIST $0x80, X2, X1
168	CALL _expand_key_192b<>(SB)
169	JMP Lexp_dec
170Lexp_enc128:
171	AESKEYGENASSIST $0x01, X0, X1
172	CALL _expand_key_128<>(SB)
173	AESKEYGENASSIST $0x02, X0, X1
174	CALL _expand_key_128<>(SB)
175	AESKEYGENASSIST $0x04, X0, X1
176	CALL _expand_key_128<>(SB)
177	AESKEYGENASSIST $0x08, X0, X1
178	CALL _expand_key_128<>(SB)
179	AESKEYGENASSIST $0x10, X0, X1
180	CALL _expand_key_128<>(SB)
181	AESKEYGENASSIST $0x20, X0, X1
182	CALL _expand_key_128<>(SB)
183	AESKEYGENASSIST $0x40, X0, X1
184	CALL _expand_key_128<>(SB)
185	AESKEYGENASSIST $0x80, X0, X1
186	CALL _expand_key_128<>(SB)
187	AESKEYGENASSIST $0x1b, X0, X1
188	CALL _expand_key_128<>(SB)
189	AESKEYGENASSIST $0x36, X0, X1
190	CALL _expand_key_128<>(SB)
191Lexp_dec:
192	// dec
193	SUBQ $16, BX
194	MOVUPS (BX), X1
195	MOVUPS X1, (DX)
196	DECQ CX
197Lexp_dec_loop:
198	MOVUPS -16(BX), X1
199	AESIMC X1, X0
200	MOVUPS X0, 16(DX)
201	SUBQ $16, BX
202	ADDQ $16, DX
203	DECQ CX
204	JNZ Lexp_dec_loop
205	MOVUPS -16(BX), X0
206	MOVUPS X0, 16(DX)
207	RET
208
209TEXT _expand_key_128<>(SB),NOSPLIT,$0
210	PSHUFD $0xff, X1, X1
211	SHUFPS $0x10, X0, X4
212	PXOR X4, X0
213	SHUFPS $0x8c, X0, X4
214	PXOR X4, X0
215	PXOR X1, X0
216	MOVUPS X0, (BX)
217	ADDQ $16, BX
218	RET
219
220TEXT _expand_key_192a<>(SB),NOSPLIT,$0
221	PSHUFD $0x55, X1, X1
222	SHUFPS $0x10, X0, X4
223	PXOR X4, X0
224	SHUFPS $0x8c, X0, X4
225	PXOR X4, X0
226	PXOR X1, X0
227
228	MOVAPS X2, X5
229	MOVAPS X2, X6
230	PSLLDQ $0x4, X5
231	PSHUFD $0xff, X0, X3
232	PXOR X3, X2
233	PXOR X5, X2
234
235	MOVAPS X0, X1
236	SHUFPS $0x44, X0, X6
237	MOVUPS X6, (BX)
238	SHUFPS $0x4e, X2, X1
239	MOVUPS X1, 16(BX)
240	ADDQ $32, BX
241	RET
242
243TEXT _expand_key_192b<>(SB),NOSPLIT,$0
244	PSHUFD $0x55, X1, X1
245	SHUFPS $0x10, X0, X4
246	PXOR X4, X0
247	SHUFPS $0x8c, X0, X4
248	PXOR X4, X0
249	PXOR X1, X0
250
251	MOVAPS X2, X5
252	PSLLDQ $0x4, X5
253	PSHUFD $0xff, X0, X3
254	PXOR X3, X2
255	PXOR X5, X2
256
257	MOVUPS X0, (BX)
258	ADDQ $16, BX
259	RET
260
261TEXT _expand_key_256a<>(SB),NOSPLIT,$0
262	JMP _expand_key_128<>(SB)
263
264TEXT _expand_key_256b<>(SB),NOSPLIT,$0
265	PSHUFD $0xaa, X1, X1
266	SHUFPS $0x10, X2, X4
267	PXOR X4, X2
268	SHUFPS $0x8c, X2, X4
269	PXOR X4, X2
270	PXOR X1, X2
271
272	MOVUPS X2, (BX)
273	ADDQ $16, BX
274	RET
275