1// Copyright 2016 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Based on CRYPTOGAMS code with the following comment:
6// # ====================================================================
7// # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
8// # project. The module is, however, dual licensed under OpenSSL and
9// # CRYPTOGAMS licenses depending on where you obtain it. For further
10// # details see http://www.openssl.org/~appro/cryptogams/.
11// # ====================================================================
12
13// Original code can be found at the link below:
14// https://github.com/dot-asm/cryptogams/blob/master/ppc/aesp8-ppc.pl
15
16// Some function names were changed to be consistent with Go function
17// names. For instance, function aes_p8_set_{en,de}crypt_key become
18// set{En,De}cryptKeyAsm. I also split setEncryptKeyAsm in two parts
19// and a new session was created (doEncryptKeyAsm). This was necessary to
20// avoid arguments overwriting when setDecryptKeyAsm calls setEncryptKeyAsm.
21// There were other modifications as well but kept the same functionality.
22
23#include "textflag.h"
24
25// For set{En,De}cryptKeyAsm
26#define INP     R3
27#define BITS    R4
28#define OUT     R5
29#define PTR     R6
30#define CNT     R7
31#define ROUNDS  R8
32#define TEMP    R19
33#define ZERO    V0
34#define IN0     V1
35#define IN1     V2
36#define KEY     V3
37#define RCON    V4
38#define MASK    V5
39#define TMP     V6
40#define STAGE   V7
41#define OUTPERM V8
42#define OUTMASK V9
43#define OUTHEAD V10
44#define OUTTAIL V11
45
46// For {en,de}cryptBlockAsm
47#define BLK_INP    R3
48#define BLK_OUT    R4
49#define BLK_KEY    R5
50#define BLK_ROUNDS R6
51#define BLK_IDX    R7
52
53DATA ·rcon+0x00(SB)/8, $0x0100000001000000 // RCON
54DATA ·rcon+0x08(SB)/8, $0x0100000001000000 // RCON
55DATA ·rcon+0x10(SB)/8, $0x1b0000001b000000
56DATA ·rcon+0x18(SB)/8, $0x1b0000001b000000
57DATA ·rcon+0x20(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK
58DATA ·rcon+0x28(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK
59DATA ·rcon+0x30(SB)/8, $0x0000000000000000
60DATA ·rcon+0x38(SB)/8, $0x0000000000000000
61GLOBL ·rcon(SB), RODATA, $64
62
63// func setEncryptKeyAsm(key *byte, keylen int, enc *uint32) int
64TEXT ·setEncryptKeyAsm(SB), NOSPLIT|NOFRAME, $0
65	// Load the arguments inside the registers
66	MOVD	key+0(FP), INP
67	MOVD	keylen+8(FP), BITS
68	MOVD	enc+16(FP), OUT
69	JMP	·doEncryptKeyAsm(SB)
70
71// This text is used both setEncryptKeyAsm and setDecryptKeyAsm
72TEXT ·doEncryptKeyAsm(SB), NOSPLIT|NOFRAME, $0
73	// Do not change R10 since it's storing the LR value in setDecryptKeyAsm
74
75	// Check arguments
76	MOVD	$-1, PTR               // li    6,-1       exit code to -1 (255)
77	CMPU	INP, $0                // cmpldi r3,0      input key pointer set?
78	BC	0x0E, 2, enc_key_abort // beq-  .Lenc_key_abort
79	CMPU	OUT, $0                // cmpldi r5,0      output key pointer set?
80	BC	0x0E, 2, enc_key_abort // beq-  .Lenc_key_abort
81	MOVD	$-2, PTR               // li    6,-2       exit code to -2 (254)
82	CMPW	BITS, $128             // cmpwi 4,128      greater or equal to 128
83	BC	0x0E, 0, enc_key_abort // blt-  .Lenc_key_abort
84	CMPW	BITS, $256             // cmpwi 4,256      lesser or equal to 256
85	BC	0x0E, 1, enc_key_abort // bgt-  .Lenc_key_abort
86	ANDCC	$0x3f, BITS, TEMP      // andi. 0,4,0x3f   multiple of 64
87	BC	0x06, 2, enc_key_abort // bne-  .Lenc_key_abort
88
89	MOVDrcon(SB), PTR // PTR point to rcon addr
90
91	// Get key from memory and write aligned into VR
92	NEG	INP, R9            // neg   9,3        R9 is ~INP + 1
93	LVX	(INP)(R0), IN0     // lvx   1,0,3      Load key inside IN0
94	ADD	$15, INP, INP      // addi  3,3,15     Add 15B to INP addr
95	LVSR	(R9)(R0), KEY      // lvsr  3,0,9
96	MOVD	$0x20, R8          // li    8,0x20     R8 = 32
97	CMPW	BITS, $192         // cmpwi 4,192      Key size == 192?
98	LVX	(INP)(R0), IN1     // lvx   2,0,3
99	VSPLTISB	$0x0f, MASK// vspltisb 5,0x0f  0x0f0f0f0f... mask
100	LVX	(PTR)(R0), RCON    // lvx   4,0,6      Load first 16 bytes into RCON
101	VXOR	KEY, MASK, KEY     // vxor  3,3,5      Adjust for byte swap
102	LVX	(PTR)(R8), MASK    // lvx   5,8,6
103	ADD	$0x10, PTR, PTR    // addi  6,6,0x10   PTR to next 16 bytes of RCON
104	VPERM	IN0, IN1, KEY, IN0 // vperm 1,1,2,3    Align
105	MOVD	$8, CNT            // li    7,8        CNT = 8
106	VXOR	ZERO, ZERO, ZERO   // vxor  0,0,0      Zero to be zero :)
107	MOVD	CNT, CTR           // mtctr 7          Set the counter to 8 (rounds)
108
109	LVSL	(OUT)(R0), OUTPERM              // lvsl  8,0,5
110	VSPLTISB	$-1, OUTMASK                    // vspltisb      9,-1
111	LVX	(OUT)(R0), OUTHEAD              // lvx   10,0,5
112	VPERM	OUTMASK, ZERO, OUTPERM, OUTMASK // vperm 9,9,0,8
113
114	BLT	loop128      // blt   .Loop128
115	ADD	$8, INP, INP // addi  3,3,8
116	BEQ	l192         // beq   .L192
117	ADD	$8, INP, INP // addi  3,3,8
118	JMP	l256         // b     .L256
119
120loop128:
121	// Key schedule (Round 1 to 8)
122	VPERM	IN0, IN0, MASK, KEY              // vperm 3,1,1,5         Rotate-n-splat
123	VSLDOI	$12, ZERO, IN0, TMP              // vsldoi 6,0,1,12
124	VPERM	IN0, IN0, OUTPERM, OUTTAIL       // vperm 11,1,1,8    Rotate
125	VSEL	OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
126	VOR	OUTTAIL, OUTTAIL, OUTHEAD        // vor 10,11,11
127	VCIPHERLAST	KEY, RCON, KEY           // vcipherlast 3,3,4
128	STVX	STAGE, (OUT+R0)                  // stvx 7,0,5        Write to output
129	ADD	$16, OUT, OUT                    // addi 5,5,16       Point to the next round
130
131	VXOR	IN0, TMP, IN0       // vxor 1,1,6
132	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
133	VXOR	IN0, TMP, IN0       // vxor 1,1,6
134	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
135	VXOR	IN0, TMP, IN0       // vxor 1,1,6
136	VADDUWM	RCON, RCON, RCON    // vadduwm 4,4,4
137	VXOR	IN0, KEY, IN0       // vxor 1,1,3
138	BC	0x10, 0, loop128    // bdnz .Loop128
139
140	LVX	(PTR)(R0), RCON // lvx 4,0,6     Last two round keys
141
142	// Key schedule (Round 9)
143	VPERM	IN0, IN0, MASK, KEY              // vperm 3,1,1,5   Rotate-n-spat
144	VSLDOI	$12, ZERO, IN0, TMP              // vsldoi 6,0,1,12
145	VPERM	IN0, IN0, OUTPERM, OUTTAIL       // vperm 11,1,1,8  Rotate
146	VSEL	OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
147	VOR	OUTTAIL, OUTTAIL, OUTHEAD        // vor 10,11,11
148	VCIPHERLAST	KEY, RCON, KEY           // vcipherlast 3,3,4
149	STVX	STAGE, (OUT+R0)                  // stvx 7,0,5   Round 9
150	ADD	$16, OUT, OUT                    // addi 5,5,16
151
152	// Key schedule (Round 10)
153	VXOR	IN0, TMP, IN0       // vxor 1,1,6
154	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
155	VXOR	IN0, TMP, IN0       // vxor 1,1,6
156	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
157	VXOR	IN0, TMP, IN0       // vxor 1,1,6
158	VADDUWM	RCON, RCON, RCON    // vadduwm 4,4,4
159	VXOR	IN0, KEY, IN0       // vxor 1,1,3
160
161	VPERM	IN0, IN0, MASK, KEY              // vperm 3,1,1,5   Rotate-n-splat
162	VSLDOI	$12, ZERO, IN0, TMP              // vsldoi 6,0,1,12
163	VPERM	IN0, IN0, OUTPERM, OUTTAIL       // vperm 11,1,1,8  Rotate
164	VSEL	OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
165	VOR	OUTTAIL, OUTTAIL, OUTHEAD        // vor 10,11,11
166	VCIPHERLAST	KEY, RCON, KEY           // vcipherlast 3,3,4
167	STVX	STAGE, (OUT+R0)                  // stvx 7,0,5    Round 10
168	ADD	$16, OUT, OUT                    // addi 5,5,16
169
170	// Key schedule (Round 11)
171	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
172	VSLDOI	$12, ZERO, TMP, TMP              // vsldoi 6,0,6,12
173	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
174	VSLDOI	$12, ZERO, TMP, TMP              // vsldoi 6,0,6,12
175	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
176	VXOR	IN0, KEY, IN0                    // vxor 1,1,3
177	VPERM	IN0, IN0, OUTPERM, OUTTAIL       // vperm 11,1,1,8
178	VSEL	OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
179	VOR	OUTTAIL, OUTTAIL, OUTHEAD        // vor 10,11,11
180	STVX	STAGE, (OUT+R0)                  // stvx 7,0,5  Round 11
181
182	ADD	$15, OUT, INP   // addi  3,5,15
183	ADD	$0x50, OUT, OUT // addi  5,5,0x50
184
185	MOVD	$10, ROUNDS // li    8,10
186	JMP	done        // b     .Ldone
187
188l192:
189	LVX	(INP)(R0), TMP                   // lvx 6,0,3
190	MOVD	$4, CNT                          // li 7,4
191	VPERM	IN0, IN0, OUTPERM, OUTTAIL       // vperm 11,1,1,8
192	VSEL	OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
193	VOR	OUTTAIL, OUTTAIL, OUTHEAD        // vor 10,11,11
194	STVX	STAGE, (OUT+R0)                  // stvx 7,0,5
195	ADD	$16, OUT, OUT                    // addi 5,5,16
196	VPERM	IN1, TMP, KEY, IN1               // vperm 2,2,6,3
197	VSPLTISB	$8, KEY                  // vspltisb 3,8
198	MOVD	CNT, CTR                         // mtctr 7
199	VSUBUBM	MASK, KEY, MASK                  // vsububm 5,5,3
200
201loop192:
202	VPERM	IN1, IN1, MASK, KEY // vperm 3,2,2,5
203	VSLDOI	$12, ZERO, IN0, TMP // vsldoi 6,0,1,12
204	VCIPHERLAST	KEY, RCON, KEY      // vcipherlast 3,3,4
205
206	VXOR	IN0, TMP, IN0       // vxor 1,1,6
207	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
208	VXOR	IN0, TMP, IN0       // vxor 1,1,6
209	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
210	VXOR	IN0, TMP, IN0       // vxor 1,1,6
211
212	VSLDOI	$8, ZERO, IN1, STAGE  // vsldoi 7,0,2,8
213	VSPLTW	$3, IN0, TMP          // vspltw 6,1,3
214	VXOR	TMP, IN1, TMP         // vxor 6,6,2
215	VSLDOI	$12, ZERO, IN1, IN1   // vsldoi 2,0,2,12
216	VADDUWM	RCON, RCON, RCON      // vadduwm 4,4,4
217	VXOR	IN1, TMP, IN1         // vxor 2,2,6
218	VXOR	IN0, KEY, IN0         // vxor 1,1,3
219	VXOR	IN1, KEY, IN1         // vxor 2,2,3
220	VSLDOI	$8, STAGE, IN0, STAGE // vsldoi 7,7,1,8
221
222	VPERM	IN1, IN1, MASK, KEY              // vperm 3,2,2,5
223	VSLDOI	$12, ZERO, IN0, TMP              // vsldoi 6,0,1,12
224	VPERM	STAGE, STAGE, OUTPERM, OUTTAIL   // vperm 11,7,7,8
225	VSEL	OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
226	VOR	OUTTAIL, OUTTAIL, OUTHEAD        // vor 10,11,11
227	VCIPHERLAST	KEY, RCON, KEY           // vcipherlast 3,3,4
228	STVX	STAGE, (OUT+R0)                  // stvx 7,0,5
229	ADD	$16, OUT, OUT                    // addi 5,5,16
230
231	VSLDOI	$8, IN0, IN1, STAGE              // vsldoi 7,1,2,8
232	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
233	VSLDOI	$12, ZERO, TMP, TMP              // vsldoi 6,0,6,12
234	VPERM	STAGE, STAGE, OUTPERM, OUTTAIL   // vperm 11,7,7,8
235	VSEL	OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
236	VOR	OUTTAIL, OUTTAIL, OUTHEAD        // vor 10,11,11
237	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
238	VSLDOI	$12, ZERO, TMP, TMP              // vsldoi 6,0,6,12
239	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
240	STVX	STAGE, (OUT+R0)                  // stvx 7,0,5
241	ADD	$16, OUT, OUT                    // addi 5,5,16
242
243	VSPLTW	$3, IN0, TMP                     // vspltw 6,1,3
244	VXOR	TMP, IN1, TMP                    // vxor 6,6,2
245	VSLDOI	$12, ZERO, IN1, IN1              // vsldoi 2,0,2,12
246	VADDUWM	RCON, RCON, RCON                 // vadduwm 4,4,4
247	VXOR	IN1, TMP, IN1                    // vxor 2,2,6
248	VXOR	IN0, KEY, IN0                    // vxor 1,1,3
249	VXOR	IN1, KEY, IN1                    // vxor 2,2,3
250	VPERM	IN0, IN0, OUTPERM, OUTTAIL       // vperm 11,1,1,8
251	VSEL	OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
252	VOR	OUTTAIL, OUTTAIL, OUTHEAD        // vor 10,11,11
253	STVX	STAGE, (OUT+R0)                  // stvx 7,0,5
254	ADD	$15, OUT, INP                    // addi 3,5,15
255	ADD	$16, OUT, OUT                    // addi 5,5,16
256	BC	0x10, 0, loop192                 // bdnz .Loop192
257
258	MOVD	$12, ROUNDS     // li 8,12
259	ADD	$0x20, OUT, OUT // addi 5,5,0x20
260	BR	done            // b .Ldone
261
262l256:
263	LVX	(INP)(R0), TMP                   // lvx 6,0,3
264	MOVD	$7, CNT                          // li 7,7
265	MOVD	$14, ROUNDS                      // li 8,14
266	VPERM	IN0, IN0, OUTPERM, OUTTAIL       // vperm 11,1,1,8
267	VSEL	OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
268	VOR	OUTTAIL, OUTTAIL, OUTHEAD        // vor 10,11,11
269	STVX	STAGE, (OUT+R0)                  // stvx 7,0,5
270	ADD	$16, OUT, OUT                    // addi 5,5,16
271	VPERM	IN1, TMP, KEY, IN1               // vperm 2,2,6,3
272	MOVD	CNT, CTR                         // mtctr 7
273
274loop256:
275	VPERM	IN1, IN1, MASK, KEY              // vperm 3,2,2,5
276	VSLDOI	$12, ZERO, IN0, TMP              // vsldoi 6,0,1,12
277	VPERM	IN1, IN1, OUTPERM, OUTTAIL       // vperm 11,2,2,8
278	VSEL	OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
279	VOR	OUTTAIL, OUTTAIL, OUTHEAD        // vor 10,11,11
280	VCIPHERLAST	KEY, RCON, KEY           // vcipherlast 3,3,4
281	STVX	STAGE, (OUT+R0)                  // stvx 7,0,5
282	ADD	$16, OUT, OUT                    // addi 5,5,16
283
284	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
285	VSLDOI	$12, ZERO, TMP, TMP              // vsldoi 6,0,6,12
286	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
287	VSLDOI	$12, ZERO, TMP, TMP              // vsldoi 6,0,6,12
288	VXOR	IN0, TMP, IN0                    // vxor 1,1,6
289	VADDUWM	RCON, RCON, RCON                 // vadduwm 4,4,4
290	VXOR	IN0, KEY, IN0                    // vxor 1,1,3
291	VPERM	IN0, IN0, OUTPERM, OUTTAIL       // vperm 11,1,1,8
292	VSEL	OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9
293	VOR	OUTTAIL, OUTTAIL, OUTHEAD        // vor 10,11,11
294	STVX	STAGE, (OUT+R0)                  // stvx 7,0,5
295	ADD	$15, OUT, INP                    // addi 3,5,15
296	ADD	$16, OUT, OUT                    // addi 5,5,16
297	BC	0x12, 0, done                    // bdz .Ldone
298
299	VSPLTW	$3, IN0, KEY        // vspltw 3,1,3
300	VSLDOI	$12, ZERO, IN1, TMP // vsldoi 6,0,2,12
301	VSBOX	KEY, KEY            // vsbox 3,3
302
303	VXOR	IN1, TMP, IN1       // vxor 2,2,6
304	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
305	VXOR	IN1, TMP, IN1       // vxor 2,2,6
306	VSLDOI	$12, ZERO, TMP, TMP // vsldoi 6,0,6,12
307	VXOR	IN1, TMP, IN1       // vxor 2,2,6
308
309	VXOR	IN1, KEY, IN1 // vxor 2,2,3
310	JMP	loop256       // b .Loop256
311
312done:
313	LVX	(INP)(R0), IN1             // lvx   2,0,3
314	VSEL	OUTHEAD, IN1, OUTMASK, IN1 // vsel 2,10,2,9
315	STVX	IN1, (INP+R0)              // stvx  2,0,3
316	MOVD	$0, PTR                    // li    6,0    set PTR to 0 (exit code 0)
317	MOVW	ROUNDS, 0(OUT)             // stw   8,0(5)
318
319enc_key_abort:
320	MOVD	PTR, INP        // mr    3,6    set exit code with PTR value
321	MOVD	INP, ret+24(FP) // Put return value into the FP
322	RET                  // blr
323
324// func setDecryptKeyAsm(key *byte, keylen int, dec *uint32) int
325TEXT ·setDecryptKeyAsm(SB), NOSPLIT|NOFRAME, $0
326	// Load the arguments inside the registers
327	MOVD	key+0(FP), INP
328	MOVD	keylen+8(FP), BITS
329	MOVD	dec+16(FP), OUT
330
331	MOVD	LR, R10              // mflr 10
332	CALL	·doEncryptKeyAsm(SB)
333	MOVD	R10, LR              // mtlr 10
334
335	CMPW	INP, $0                // cmpwi 3,0  exit 0 = ok
336	BC	0x06, 2, dec_key_abort // bne- .Ldec_key_abort
337
338	// doEncryptKeyAsm set ROUNDS (R8) with the proper value for each mode
339	SLW	$4, ROUNDS, CNT    // slwi 7,8,4
340	SUB	$240, OUT, INP     // subi 3,5,240
341	SRW	$1, ROUNDS, ROUNDS // srwi 8,8,1
342	ADD	R7, INP, OUT       // add 5,3,7
343	MOVD	ROUNDS, CTR        // mtctr 8
344
345	// dec_key will invert the key sequence in order to be used for decrypt
346dec_key:
347	MOVWZ	0(INP), TEMP     // lwz 0, 0(3)
348	MOVWZ	4(INP), R6       // lwz 6, 4(3)
349	MOVWZ	8(INP), R7       // lwz 7, 8(3)
350	MOVWZ	12(INP), R8      // lwz 8, 12(3)
351	ADD	$16, INP, INP    // addi 3,3,16
352	MOVWZ	0(OUT), R9       // lwz 9, 0(5)
353	MOVWZ	4(OUT), R10      // lwz 10,4(5)
354	MOVWZ	8(OUT), R11      // lwz 11,8(5)
355	MOVWZ	12(OUT), R12     // lwz 12,12(5)
356	MOVW	TEMP, 0(OUT)     // stw 0, 0(5)
357	MOVW	R6, 4(OUT)       // stw 6, 4(5)
358	MOVW	R7, 8(OUT)       // stw 7, 8(5)
359	MOVW	R8, 12(OUT)      // stw 8, 12(5)
360	SUB	$16, OUT, OUT    // subi 5,5,16
361	MOVW	R9, -16(INP)     // stw 9, -16(3)
362	MOVW	R10, -12(INP)    // stw 10,-12(3)
363	MOVW	R11, -8(INP)     // stw 11,-8(3)
364	MOVW	R12, -4(INP)     // stw 12,-4(3)
365	BC	0x10, 0, dec_key // bdnz .Ldeckey
366
367	XOR	R3, R3, R3 // xor 3,3,3      Clean R3
368
369dec_key_abort:
370	MOVD	R3, ret+24(FP) // Put return value into the FP
371	RET                 // blr
372
373// func encryptBlockAsm(dst, src *byte, enc *uint32)
374TEXT ·encryptBlockAsm(SB), NOSPLIT|NOFRAME, $0
375	// Load the arguments inside the registers
376	MOVD	dst+0(FP), BLK_OUT
377	MOVD	src+8(FP), BLK_INP
378	MOVD	enc+16(FP), BLK_KEY
379
380	MOVWZ	240(BLK_KEY), BLK_ROUNDS // lwz 6,240(5)
381	MOVD	$15, BLK_IDX             // li 7,15
382
383	LVX	(BLK_INP)(R0), ZERO        // lvx 0,0,3
384	NEG	BLK_OUT, R11               // neg 11,4
385	LVX	(BLK_INP)(BLK_IDX), IN0    // lvx 1,7,3
386	LVSL	(BLK_INP)(R0), IN1         // lvsl 2,0,3
387	VSPLTISB	$0x0f, RCON        // vspltisb 4,0x0f
388	LVSR	(R11)(R0), KEY             // lvsr 3,0,11
389	VXOR	IN1, RCON, IN1             // vxor 2,2,4
390	MOVD	$16, BLK_IDX               // li 7,16
391	VPERM	ZERO, IN0, IN1, ZERO       // vperm 0,0,1,2
392	LVX	(BLK_KEY)(R0), IN0         // lvx 1,0,5
393	LVSR	(BLK_KEY)(R0), MASK        // lvsr 5,0,5
394	SRW	$1, BLK_ROUNDS, BLK_ROUNDS // srwi 6,6,1
395	LVX	(BLK_KEY)(BLK_IDX), IN1    // lvx 2,7,5
396	ADD	$16, BLK_IDX, BLK_IDX      // addi 7,7,16
397	SUB	$1, BLK_ROUNDS, BLK_ROUNDS // subi 6,6,1
398	VPERM	IN1, IN0, MASK, IN0        // vperm 1,2,1,5
399
400	VXOR	ZERO, IN0, ZERO         // vxor 0,0,1
401	LVX	(BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5
402	ADD	$16, BLK_IDX, BLK_IDX   // addi 7,7,16
403	MOVD	BLK_ROUNDS, CTR         // mtctr 6
404
405loop_enc:
406	VPERM	IN0, IN1, MASK, IN1     // vperm 2,1,2,5
407	VCIPHER	ZERO, IN1, ZERO         // vcipher 0,0,2
408	LVX	(BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5
409	ADD	$16, BLK_IDX, BLK_IDX   // addi 7,7,16
410	VPERM	IN1, IN0, MASK, IN0     // vperm 1,2,1,5
411	VCIPHER	ZERO, IN0, ZERO         // vcipher 0,0,1
412	LVX	(BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5
413	ADD	$16, BLK_IDX, BLK_IDX   // addi 7,7,16
414	BC	0x10, 0, loop_enc       // bdnz .Loop_enc
415
416	VPERM	IN0, IN1, MASK, IN1     // vperm 2,1,2,5
417	VCIPHER	ZERO, IN1, ZERO         // vcipher 0,0,2
418	LVX	(BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5
419	VPERM	IN1, IN0, MASK, IN0     // vperm 1,2,1,5
420	VCIPHERLAST	ZERO, IN0, ZERO // vcipherlast 0,0,1
421
422	VSPLTISB	$-1, IN1         // vspltisb 2,-1
423	VXOR	IN0, IN0, IN0            // vxor 1,1,1
424	MOVD	$15, BLK_IDX             // li 7,15
425	VPERM	IN1, IN0, KEY, IN1       // vperm 2,2,1,3
426	VXOR	KEY, RCON, KEY           // vxor 3,3,4
427	LVX	(BLK_OUT)(R0), IN0       // lvx 1,0,4
428	VPERM	ZERO, ZERO, KEY, ZERO    // vperm 0,0,0,3
429	VSEL	IN0, ZERO, IN1, IN0      // vsel 1,1,0,2
430	LVX	(BLK_OUT)(BLK_IDX), RCON // lvx 4,7,4
431	STVX	IN0, (BLK_OUT+R0)        // stvx 1,0,4
432	VSEL	ZERO, RCON, IN1, ZERO    // vsel 0,0,4,2
433	STVX	ZERO, (BLK_OUT+BLK_IDX)  // stvx 0,7,4
434
435	RET // blr
436
437// func decryptBlockAsm(dst, src *byte, dec *uint32)
438TEXT ·decryptBlockAsm(SB), NOSPLIT|NOFRAME, $0
439	// Load the arguments inside the registers
440	MOVD	dst+0(FP), BLK_OUT
441	MOVD	src+8(FP), BLK_INP
442	MOVD	dec+16(FP), BLK_KEY
443
444	MOVWZ	240(BLK_KEY), BLK_ROUNDS // lwz 6,240(5)
445	MOVD	$15, BLK_IDX             // li 7,15
446
447	LVX	(BLK_INP)(R0), ZERO        // lvx 0,0,3
448	NEG	BLK_OUT, R11               // neg 11,4
449	LVX	(BLK_INP)(BLK_IDX), IN0    // lvx 1,7,3
450	LVSL	(BLK_INP)(R0), IN1         // lvsl 2,0,3
451	VSPLTISB	$0x0f, RCON        // vspltisb 4,0x0f
452	LVSR	(R11)(R0), KEY             // lvsr 3,0,11
453	VXOR	IN1, RCON, IN1             // vxor 2,2,4
454	MOVD	$16, BLK_IDX               // li 7,16
455	VPERM	ZERO, IN0, IN1, ZERO       // vperm 0,0,1,2
456	LVX	(BLK_KEY)(R0), IN0         // lvx 1,0,5
457	LVSR	(BLK_KEY)(R0), MASK        // lvsr 5,0,5
458	SRW	$1, BLK_ROUNDS, BLK_ROUNDS // srwi 6,6,1
459	LVX	(BLK_KEY)(BLK_IDX), IN1    // lvx 2,7,5
460	ADD	$16, BLK_IDX, BLK_IDX      // addi 7,7,16
461	SUB	$1, BLK_ROUNDS, BLK_ROUNDS // subi 6,6,1
462	VPERM	IN1, IN0, MASK, IN0        // vperm 1,2,1,5
463
464	VXOR	ZERO, IN0, ZERO         // vxor 0,0,1
465	LVX	(BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5
466	ADD	$16, BLK_IDX, BLK_IDX   // addi 7,7,16
467	MOVD	BLK_ROUNDS, CTR         // mtctr 6
468
469loop_dec:
470	VPERM	IN0, IN1, MASK, IN1     // vperm 2,1,2,5
471	VNCIPHER	ZERO, IN1, ZERO // vncipher 0,0,2
472	LVX	(BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5
473	ADD	$16, BLK_IDX, BLK_IDX   // addi 7,7,16
474	VPERM	IN1, IN0, MASK, IN0     // vperm 1,2,1,5
475	VNCIPHER	ZERO, IN0, ZERO // vncipher 0,0,1
476	LVX	(BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5
477	ADD	$16, BLK_IDX, BLK_IDX   // addi 7,7,16
478	BC	0x10, 0, loop_dec       // bdnz .Loop_dec
479
480	VPERM	IN0, IN1, MASK, IN1     // vperm 2,1,2,5
481	VNCIPHER	ZERO, IN1, ZERO // vncipher 0,0,2
482	LVX	(BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5
483	VPERM	IN1, IN0, MASK, IN0     // vperm 1,2,1,5
484	VNCIPHERLAST	ZERO, IN0, ZERO // vncipherlast 0,0,1
485
486	VSPLTISB	$-1, IN1         // vspltisb 2,-1
487	VXOR	IN0, IN0, IN0            // vxor 1,1,1
488	MOVD	$15, BLK_IDX             // li 7,15
489	VPERM	IN1, IN0, KEY, IN1       // vperm 2,2,1,3
490	VXOR	KEY, RCON, KEY           // vxor 3,3,4
491	LVX	(BLK_OUT)(R0), IN0       // lvx 1,0,4
492	VPERM	ZERO, ZERO, KEY, ZERO    // vperm 0,0,0,3
493	VSEL	IN0, ZERO, IN1, IN0      // vsel 1,1,0,2
494	LVX	(BLK_OUT)(BLK_IDX), RCON // lvx 4,7,4
495	STVX	IN0, (BLK_OUT+R0)        // stvx 1,0,4
496	VSEL	ZERO, RCON, IN1, ZERO    // vsel 0,0,4,2
497	STVX	ZERO, (BLK_OUT+BLK_IDX)  // stvx 0,7,4
498
499	RET // blr
500
501