1#! /usr/bin/env perl
2# Copyright 2012-2020 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the OpenSSL license (the "License").  You may not use
5# this file except in compliance with the License.  You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
9#
10# ====================================================================
11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12# project. The module is, however, dual licensed under OpenSSL and
13# CRYPTOGAMS licenses depending on where you obtain it. For further
14# details see http://www.openssl.org/~appro/cryptogams/.
15# ====================================================================
16#
17# [Endian-neutral] AES for C64x+.
18#
19# Even though SPLOOPs are scheduled for 13 cycles, and thus expected
20# performance is ~8.5 cycles per byte processed with 128-bit key,
21# measured performance turned to be ~10 cycles per byte. Discrepancy
22# must be caused by limitations of L1D memory banking(*), see SPRU871
23# TI publication for further details. If any consolation it's still
24# ~20% faster than TI's linear assembly module anyway... Compared to
25# aes_core.c compiled with cl6x 6.0 with -mv6400+ -o2 options this
26# code is 3.75x faster and almost 3x smaller (tables included).
27#
28# (*)	This means that there might be subtle correlation between data
29#	and timing and one can wonder if it can be ... attacked:-(
30#	On the other hand this also means that *if* one chooses to
31#	implement *4* T-tables variant [instead of 1 T-table as in
32#	this implementation, or in addition to], then one ought to
33#	*interleave* them. Even though it complicates addressing,
34#	references to interleaved tables would be guaranteed not to
35#	clash. I reckon that it should be possible to break 8 cycles
36#	per byte "barrier," i.e. improve by ~20%, naturally at the
37#	cost of 8x increased pressure on L1D. 8x because you'd have
38#	to interleave both Te and Td tables...
39
40while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
41open STDOUT,">$output";
42
43($TEA,$TEB)=("A5","B5");
44($KPA,$KPB)=("A3","B1");
45@K=("A6","B6","A7","B7");
46@s=("A8","B8","A9","B9");
47@Te0=@Td0=("A16","B16","A17","B17");
48@Te1=@Td1=("A18","B18","A19","B19");
49@Te2=@Td2=("A20","B20","A21","B21");
50@Te3=@Td3=("A22","B22","A23","B23");
51
52$code=<<___;
53	.text
54
55	.if	.ASSEMBLER_VERSION<7000000
56	.asg	0,__TI_EABI__
57	.endif
58	.if	__TI_EABI__
59	.nocmp
60	.asg	AES_encrypt,_AES_encrypt
61	.asg	AES_decrypt,_AES_decrypt
62	.asg	AES_set_encrypt_key,_AES_set_encrypt_key
63	.asg	AES_set_decrypt_key,_AES_set_decrypt_key
64	.asg	AES_ctr32_encrypt,_AES_ctr32_encrypt
65	.endif
66
67	.asg	B3,RA
68	.asg	A4,INP
69	.asg	B4,OUT
70	.asg	A6,KEY
71	.asg	A4,RET
72	.asg	B15,SP
73
74	.eval	24,EXT0
75	.eval	16,EXT1
76	.eval	8,EXT2
77	.eval	0,EXT3
78	.eval	8,TBL1
79	.eval	16,TBL2
80	.eval	24,TBL3
81
82	.if	.BIG_ENDIAN
83	.eval	24-EXT0,EXT0
84	.eval	24-EXT1,EXT1
85	.eval	24-EXT2,EXT2
86	.eval	24-EXT3,EXT3
87	.eval	32-TBL1,TBL1
88	.eval	32-TBL2,TBL2
89	.eval	32-TBL3,TBL3
90	.endif
91
92	.global	_AES_encrypt
93_AES_encrypt:
94	.asmfunc
95	MVK	1,B2
96__encrypt:
97	.if	__TI_EABI__
98   [B2]	LDNDW	*INP++,A9:A8			; load input
99||	MVKL	\$PCR_OFFSET(AES_Te,__encrypt),$TEA
100||	ADDKPC	__encrypt,B0
101   [B2]	LDNDW	*INP++,B9:B8
102||	MVKH	\$PCR_OFFSET(AES_Te,__encrypt),$TEA
103||	ADD	0,KEY,$KPA
104||	ADD	4,KEY,$KPB
105	.else
106   [B2]	LDNDW	*INP++,A9:A8			; load input
107||	MVKL	(AES_Te-__encrypt),$TEA
108||	ADDKPC	__encrypt,B0
109   [B2]	LDNDW	*INP++,B9:B8
110||	MVKH	(AES_Te-__encrypt),$TEA
111||	ADD	0,KEY,$KPA
112||	ADD	4,KEY,$KPB
113	.endif
114	LDW	*$KPA++[2],$Te0[0]		; zero round key
115||	LDW	*$KPB++[2],$Te0[1]
116||	MVK	60,A0
117||	ADD	B0,$TEA,$TEA			; AES_Te
118	LDW	*KEY[A0],B0			; rounds
119||	MVK	1024,A0				; sizeof(AES_Te)
120	LDW	*$KPA++[2],$Te0[2]
121||	LDW	*$KPB++[2],$Te0[3]
122||	MV	$TEA,$TEB
123	NOP
124	.if	.BIG_ENDIAN
125	MV	A9,$s[0]
126||	MV	A8,$s[1]
127||	MV	B9,$s[2]
128||	MV	B8,$s[3]
129	.else
130	MV	A8,$s[0]
131||	MV	A9,$s[1]
132||	MV	B8,$s[2]
133||	MV	B9,$s[3]
134	.endif
135	XOR	$Te0[0],$s[0],$s[0]
136||	XOR	$Te0[1],$s[1],$s[1]
137||	LDW	*$KPA++[2],$K[0]		; 1st round key
138||	LDW	*$KPB++[2],$K[1]
139	SUB	B0,2,B0
140
141	SPLOOPD	13
142||	MVC	B0,ILC
143||	LDW	*$KPA++[2],$K[2]
144||	LDW	*$KPB++[2],$K[3]
145;;====================================================================
146	EXTU	$s[1],EXT1,24,$Te1[1]
147||	EXTU	$s[0],EXT3,24,$Te3[0]
148	LDW	*${TEB}[$Te1[1]],$Te1[1]	; Te1[s1>>8],	t0
149||	LDW	*${TEA}[$Te3[0]],$Te3[0]	; Te3[s0>>24],	t1
150||	XOR	$s[2],$Te0[2],$s[2]		; modulo-scheduled
151||	XOR	$s[3],$Te0[3],$s[3]		; modulo-scheduled
152||	EXTU	$s[1],EXT3,24,$Te3[1]
153||	EXTU	$s[0],EXT1,24,$Te1[0]
154	LDW	*${TEB}[$Te3[1]],$Te3[1]	; Te3[s1>>24],	t2
155||	LDW	*${TEA}[$Te1[0]],$Te1[0]	; Te1[s0>>8],	t3
156||	EXTU	$s[2],EXT2,24,$Te2[2]
157||	EXTU	$s[3],EXT2,24,$Te2[3]
158	LDW	*${TEA}[$Te2[2]],$Te2[2]	; Te2[s2>>16],	t0
159||	LDW	*${TEB}[$Te2[3]],$Te2[3]	; Te2[s3>>16],	t1
160||	EXTU	$s[3],EXT3,24,$Te3[3]
161||	EXTU	$s[2],EXT1,24,$Te1[2]
162	LDW	*${TEB}[$Te3[3]],$Te3[3]	; Te3[s3>>24],	t0
163||	LDW	*${TEA}[$Te1[2]],$Te1[2]	; Te1[s2>>8],	t1
164||	EXTU	$s[0],EXT2,24,$Te2[0]
165||	EXTU	$s[1],EXT2,24,$Te2[1]
166	LDW	*${TEA}[$Te2[0]],$Te2[0]	; Te2[s0>>16],	t2
167||	LDW	*${TEB}[$Te2[1]],$Te2[1]	; Te2[s1>>16],	t3
168||	EXTU	$s[3],EXT1,24,$Te1[3]
169||	EXTU	$s[2],EXT3,24,$Te3[2]
170	LDW	*${TEB}[$Te1[3]],$Te1[3]	; Te1[s3>>8],	t2
171||	LDW	*${TEA}[$Te3[2]],$Te3[2]	; Te3[s2>>24],	t3
172||	ROTL	$Te1[1],TBL1,$Te3[0]		; t0
173||	ROTL	$Te3[0],TBL3,$Te1[1]		; t1
174||	EXTU	$s[0],EXT0,24,$Te0[0]
175||	EXTU	$s[1],EXT0,24,$Te0[1]
176	LDW	*${TEA}[$Te0[0]],$Te0[0]	; Te0[s0],	t0
177||	LDW	*${TEB}[$Te0[1]],$Te0[1]	; Te0[s1],	t1
178||	ROTL	$Te3[1],TBL3,$Te1[0]		; t2
179||	ROTL	$Te1[0],TBL1,$Te3[1]		; t3
180||	EXTU	$s[2],EXT0,24,$Te0[2]
181||	EXTU	$s[3],EXT0,24,$Te0[3]
182	LDW	*${TEA}[$Te0[2]],$Te0[2]	; Te0[s2],	t2
183||	LDW	*${TEB}[$Te0[3]],$Te0[3]	; Te0[s3],	t3
184||	ROTL	$Te2[2],TBL2,$Te2[2]		; t0
185||	ROTL	$Te2[3],TBL2,$Te2[3]		; t1
186||	XOR	$K[0],$Te3[0],$s[0]
187||	XOR	$K[1],$Te1[1],$s[1]
188	ROTL	$Te3[3],TBL3,$Te1[2]		; t0
189||	ROTL	$Te1[2],TBL1,$Te3[3]		; t1
190||	XOR	$K[2],$Te1[0],$s[2]
191||	XOR	$K[3],$Te3[1],$s[3]
192||	LDW	*$KPA++[2],$K[0]		; next round key
193||	LDW	*$KPB++[2],$K[1]
194	ROTL	$Te2[0],TBL2,$Te2[0]		; t2
195||	ROTL	$Te2[1],TBL2,$Te2[1]		; t3
196||	XOR	$s[0],$Te2[2],$s[0]
197||	XOR	$s[1],$Te2[3],$s[1]
198||	LDW	*$KPA++[2],$K[2]
199||	LDW	*$KPB++[2],$K[3]
200	ROTL	$Te1[3],TBL1,$Te3[2]		; t2
201||	ROTL	$Te3[2],TBL3,$Te1[3]		; t3
202||	XOR	$s[0],$Te1[2],$s[0]
203||	XOR	$s[1],$Te3[3],$s[1]
204	XOR	$s[2],$Te2[0],$s[2]
205||	XOR	$s[3],$Te2[1],$s[3]
206||	XOR	$s[0],$Te0[0],$s[0]
207||	XOR	$s[1],$Te0[1],$s[1]
208	SPKERNEL
209||	XOR.L	$s[2],$Te3[2],$s[2]
210||	XOR.L	$s[3],$Te1[3],$s[3]
211;;====================================================================
212	ADD.D	${TEA},A0,${TEA}		; point to Te4
213||	ADD.D	${TEB},A0,${TEB}
214||	EXTU	$s[1],EXT1,24,$Te1[1]
215||	EXTU	$s[0],EXT3,24,$Te3[0]
216	LDBU	*${TEB}[$Te1[1]],$Te1[1]	; Te1[s1>>8],	t0
217||	LDBU	*${TEA}[$Te3[0]],$Te3[0]	; Te3[s0>>24],	t1
218||	XOR	$s[2],$Te0[2],$s[2]		; modulo-scheduled
219||	XOR	$s[3],$Te0[3],$s[3]		; modulo-scheduled
220||	EXTU	$s[0],EXT0,24,$Te0[0]
221||	EXTU	$s[1],EXT0,24,$Te0[1]
222	LDBU	*${TEA}[$Te0[0]],$Te0[0]	; Te0[s0],	t0
223||	LDBU	*${TEB}[$Te0[1]],$Te0[1]	; Te0[s1],	t1
224||	EXTU	$s[3],EXT3,24,$Te3[3]
225||	EXTU	$s[2],EXT1,24,$Te1[2]
226	LDBU	*${TEB}[$Te3[3]],$Te3[3]	; Te3[s3>>24],	t0
227||	LDBU	*${TEA}[$Te1[2]],$Te1[2]	; Te1[s2>>8],	t1
228||	EXTU	$s[2],EXT2,24,$Te2[2]
229||	EXTU	$s[3],EXT2,24,$Te2[3]
230	LDBU	*${TEA}[$Te2[2]],$Te2[2]	; Te2[s2>>16],	t0
231||	LDBU	*${TEB}[$Te2[3]],$Te2[3]	; Te2[s3>>16],	t1
232||	EXTU	$s[1],EXT3,24,$Te3[1]
233||	EXTU	$s[0],EXT1,24,$Te1[0]
234	LDBU	*${TEB}[$Te3[1]],$Te3[1]	; Te3[s1>>24],	t2
235||	LDBU	*${TEA}[$Te1[0]],$Te1[0]	; Te1[s0>>8],	t3
236||	EXTU	$s[3],EXT1,24,$Te1[3]
237||	EXTU	$s[2],EXT3,24,$Te3[2]
238	LDBU	*${TEB}[$Te1[3]],$Te1[3]	; Te1[s3>>8],	t2
239||	LDBU	*${TEA}[$Te3[2]],$Te3[2]	; Te3[s2>>24],	t3
240||	EXTU	$s[2],EXT0,24,$Te0[2]
241||	EXTU	$s[3],EXT0,24,$Te0[3]
242	LDBU	*${TEA}[$Te0[2]],$Te0[2]	; Te0[s2],	t2
243||	LDBU	*${TEB}[$Te0[3]],$Te0[3]	; Te0[s3],	t3
244||	EXTU	$s[0],EXT2,24,$Te2[0]
245||	EXTU	$s[1],EXT2,24,$Te2[1]
246	LDBU	*${TEA}[$Te2[0]],$Te2[0]	; Te2[s0>>16],	t2
247||	LDBU	*${TEB}[$Te2[1]],$Te2[1]	; Te2[s1>>16],	t3
248
249	.if	.BIG_ENDIAN
250	PACK2	$Te0[0],$Te1[1],$Te0[0]
251||	PACK2	$Te0[1],$Te1[2],$Te0[1]
252	PACK2	$Te2[2],$Te3[3],$Te2[2]
253||	PACK2	$Te2[3],$Te3[0],$Te2[3]
254	PACKL4	$Te0[0],$Te2[2],$Te0[0]
255||	PACKL4	$Te0[1],$Te2[3],$Te0[1]
256	XOR	$K[0],$Te0[0],$Te0[0]		; s[0]
257||	XOR	$K[1],$Te0[1],$Te0[1]		; s[1]
258
259	PACK2	$Te0[2],$Te1[3],$Te0[2]
260||	PACK2	$Te0[3],$Te1[0],$Te0[3]
261	PACK2	$Te2[0],$Te3[1],$Te2[0]
262||	PACK2	$Te2[1],$Te3[2],$Te2[1]
263||	BNOP	RA
264	PACKL4	$Te0[2],$Te2[0],$Te0[2]
265||	PACKL4	$Te0[3],$Te2[1],$Te0[3]
266	XOR	$K[2],$Te0[2],$Te0[2]		; s[2]
267||	XOR	$K[3],$Te0[3],$Te0[3]		; s[3]
268
269	MV	$Te0[0],A9
270||	MV	$Te0[1],A8
271	MV	$Te0[2],B9
272||	MV	$Te0[3],B8
273|| [B2]	STNDW	A9:A8,*OUT++
274   [B2]	STNDW	B9:B8,*OUT++
275	.else
276	PACK2	$Te1[1],$Te0[0],$Te1[1]
277||	PACK2	$Te1[2],$Te0[1],$Te1[2]
278	PACK2	$Te3[3],$Te2[2],$Te3[3]
279||	PACK2	$Te3[0],$Te2[3],$Te3[0]
280	PACKL4	$Te3[3],$Te1[1],$Te1[1]
281||	PACKL4	$Te3[0],$Te1[2],$Te1[2]
282	XOR	$K[0],$Te1[1],$Te1[1]		; s[0]
283||	XOR	$K[1],$Te1[2],$Te1[2]		; s[1]
284
285	PACK2	$Te1[3],$Te0[2],$Te1[3]
286||	PACK2	$Te1[0],$Te0[3],$Te1[0]
287	PACK2	$Te3[1],$Te2[0],$Te3[1]
288||	PACK2	$Te3[2],$Te2[1],$Te3[2]
289||	BNOP	RA
290	PACKL4	$Te3[1],$Te1[3],$Te1[3]
291||	PACKL4	$Te3[2],$Te1[0],$Te1[0]
292	XOR	$K[2],$Te1[3],$Te1[3]		; s[2]
293||	XOR	$K[3],$Te1[0],$Te1[0]		; s[3]
294
295	MV	$Te1[1],A8
296||	MV	$Te1[2],A9
297	MV	$Te1[3],B8
298||	MV	$Te1[0],B9
299|| [B2]	STNDW	A9:A8,*OUT++
300   [B2]	STNDW	B9:B8,*OUT++
301	.endif
302	.endasmfunc
303
304	.global	_AES_decrypt
305_AES_decrypt:
306	.asmfunc
307	MVK	1,B2
308__decrypt:
309	.if	__TI_EABI__
310   [B2]	LDNDW	*INP++,A9:A8			; load input
311||	MVKL	\$PCR_OFFSET(AES_Td,__decrypt),$TEA
312||	ADDKPC	__decrypt,B0
313   [B2]	LDNDW	*INP++,B9:B8
314||	MVKH	\$PCR_OFFSET(AES_Td,__decrypt),$TEA
315||	ADD	0,KEY,$KPA
316||	ADD	4,KEY,$KPB
317	.else
318   [B2]	LDNDW	*INP++,A9:A8			; load input
319||	MVKL	(AES_Td-__decrypt),$TEA
320||	ADDKPC	__decrypt,B0
321   [B2]	LDNDW	*INP++,B9:B8
322||	MVKH	(AES_Td-__decrypt),$TEA
323||	ADD	0,KEY,$KPA
324||	ADD	4,KEY,$KPB
325	.endif
326	LDW	*$KPA++[2],$Td0[0]		; zero round key
327||	LDW	*$KPB++[2],$Td0[1]
328||	MVK	60,A0
329||	ADD	B0,$TEA,$TEA			; AES_Td
330	LDW	*KEY[A0],B0			; rounds
331||	MVK	1024,A0				; sizeof(AES_Td)
332	LDW	*$KPA++[2],$Td0[2]
333||	LDW	*$KPB++[2],$Td0[3]
334||	MV	$TEA,$TEB
335	NOP
336	.if	.BIG_ENDIAN
337	MV	A9,$s[0]
338||	MV	A8,$s[1]
339||	MV	B9,$s[2]
340||	MV	B8,$s[3]
341	.else
342	MV	A8,$s[0]
343||	MV	A9,$s[1]
344||	MV	B8,$s[2]
345||	MV	B9,$s[3]
346	.endif
347	XOR	$Td0[0],$s[0],$s[0]
348||	XOR	$Td0[1],$s[1],$s[1]
349||	LDW	*$KPA++[2],$K[0]		; 1st round key
350||	LDW	*$KPB++[2],$K[1]
351	SUB	B0,2,B0
352
353	SPLOOPD	13
354||	MVC	B0,ILC
355||	LDW	*$KPA++[2],$K[2]
356||	LDW	*$KPB++[2],$K[3]
357;;====================================================================
358	EXTU	$s[1],EXT3,24,$Td3[1]
359||	EXTU	$s[0],EXT1,24,$Td1[0]
360	LDW	*${TEB}[$Td3[1]],$Td3[1]	; Td3[s1>>24],	t0
361||	LDW	*${TEA}[$Td1[0]],$Td1[0]	; Td1[s0>>8],	t1
362||	XOR	$s[2],$Td0[2],$s[2]		; modulo-scheduled
363||	XOR	$s[3],$Td0[3],$s[3]		; modulo-scheduled
364||	EXTU	$s[1],EXT1,24,$Td1[1]
365||	EXTU	$s[0],EXT3,24,$Td3[0]
366	LDW	*${TEB}[$Td1[1]],$Td1[1]	; Td1[s1>>8],	t2
367||	LDW	*${TEA}[$Td3[0]],$Td3[0]	; Td3[s0>>24],	t3
368||	EXTU	$s[2],EXT2,24,$Td2[2]
369||	EXTU	$s[3],EXT2,24,$Td2[3]
370	LDW	*${TEA}[$Td2[2]],$Td2[2]	; Td2[s2>>16],	t0
371||	LDW	*${TEB}[$Td2[3]],$Td2[3]	; Td2[s3>>16],	t1
372||	EXTU	$s[3],EXT1,24,$Td1[3]
373||	EXTU	$s[2],EXT3,24,$Td3[2]
374	LDW	*${TEB}[$Td1[3]],$Td1[3]	; Td1[s3>>8],	t0
375||	LDW	*${TEA}[$Td3[2]],$Td3[2]	; Td3[s2>>24],	t1
376||	EXTU	$s[0],EXT2,24,$Td2[0]
377||	EXTU	$s[1],EXT2,24,$Td2[1]
378	LDW	*${TEA}[$Td2[0]],$Td2[0]	; Td2[s0>>16],	t2
379||	LDW	*${TEB}[$Td2[1]],$Td2[1]	; Td2[s1>>16],	t3
380||	EXTU	$s[3],EXT3,24,$Td3[3]
381||	EXTU	$s[2],EXT1,24,$Td1[2]
382	LDW	*${TEB}[$Td3[3]],$Td3[3]	; Td3[s3>>24],	t2
383||	LDW	*${TEA}[$Td1[2]],$Td1[2]	; Td1[s2>>8],	t3
384||	ROTL	$Td3[1],TBL3,$Td1[0]		; t0
385||	ROTL	$Td1[0],TBL1,$Td3[1]		; t1
386||	EXTU	$s[0],EXT0,24,$Td0[0]
387||	EXTU	$s[1],EXT0,24,$Td0[1]
388	LDW	*${TEA}[$Td0[0]],$Td0[0]	; Td0[s0],	t0
389||	LDW	*${TEB}[$Td0[1]],$Td0[1]	; Td0[s1],	t1
390||	ROTL	$Td1[1],TBL1,$Td3[0]		; t2
391||	ROTL	$Td3[0],TBL3,$Td1[1]		; t3
392||	EXTU	$s[2],EXT0,24,$Td0[2]
393||	EXTU	$s[3],EXT0,24,$Td0[3]
394	LDW	*${TEA}[$Td0[2]],$Td0[2]	; Td0[s2],	t2
395||	LDW	*${TEB}[$Td0[3]],$Td0[3]	; Td0[s3],	t3
396||	ROTL	$Td2[2],TBL2,$Td2[2]		; t0
397||	ROTL	$Td2[3],TBL2,$Td2[3]		; t1
398||	XOR	$K[0],$Td1[0],$s[0]
399||	XOR	$K[1],$Td3[1],$s[1]
400	ROTL	$Td1[3],TBL1,$Td3[2]		; t0
401||	ROTL	$Td3[2],TBL3,$Td1[3]		; t1
402||	XOR	$K[2],$Td3[0],$s[2]
403||	XOR	$K[3],$Td1[1],$s[3]
404||	LDW	*$KPA++[2],$K[0]		; next round key
405||	LDW	*$KPB++[2],$K[1]
406	ROTL	$Td2[0],TBL2,$Td2[0]		; t2
407||	ROTL	$Td2[1],TBL2,$Td2[1]		; t3
408||	XOR	$s[0],$Td2[2],$s[0]
409||	XOR	$s[1],$Td2[3],$s[1]
410||	LDW	*$KPA++[2],$K[2]
411||	LDW	*$KPB++[2],$K[3]
412	ROTL	$Td3[3],TBL3,$Td1[2]		; t2
413||	ROTL	$Td1[2],TBL1,$Td3[3]		; t3
414||	XOR	$s[0],$Td3[2],$s[0]
415||	XOR	$s[1],$Td1[3],$s[1]
416	XOR	$s[2],$Td2[0],$s[2]
417||	XOR	$s[3],$Td2[1],$s[3]
418||	XOR	$s[0],$Td0[0],$s[0]
419||	XOR	$s[1],$Td0[1],$s[1]
420	SPKERNEL
421||	XOR.L	$s[2],$Td1[2],$s[2]
422||	XOR.L	$s[3],$Td3[3],$s[3]
423;;====================================================================
424	ADD.D	${TEA},A0,${TEA}		; point to Td4
425||	ADD.D	${TEB},A0,${TEB}
426||	EXTU	$s[1],EXT3,24,$Td3[1]
427||	EXTU	$s[0],EXT1,24,$Td1[0]
428	LDBU	*${TEB}[$Td3[1]],$Td3[1]	; Td3[s1>>24],	t0
429||	LDBU	*${TEA}[$Td1[0]],$Td1[0]	; Td1[s0>>8],	t1
430||	XOR	$s[2],$Td0[2],$s[2]		; modulo-scheduled
431||	XOR	$s[3],$Td0[3],$s[3]		; modulo-scheduled
432||	EXTU	$s[0],EXT0,24,$Td0[0]
433||	EXTU	$s[1],EXT0,24,$Td0[1]
434	LDBU	*${TEA}[$Td0[0]],$Td0[0]	; Td0[s0],	t0
435||	LDBU	*${TEB}[$Td0[1]],$Td0[1]	; Td0[s1],	t1
436||	EXTU	$s[2],EXT2,24,$Td2[2]
437||	EXTU	$s[3],EXT2,24,$Td2[3]
438	LDBU	*${TEA}[$Td2[2]],$Td2[2]	; Td2[s2>>16],	t0
439||	LDBU	*${TEB}[$Td2[3]],$Td2[3]	; Td2[s3>>16],	t1
440||	EXTU	$s[3],EXT1,24,$Td1[3]
441||	EXTU	$s[2],EXT3,24,$Td3[2]
442	LDBU	*${TEB}[$Td1[3]],$Td1[3]	; Td1[s3>>8],	t0
443||	LDBU	*${TEA}[$Td3[2]],$Td3[2]	; Td3[s2>>24],	t1
444||	EXTU	$s[1],EXT1,24,$Td1[1]
445||	EXTU	$s[0],EXT3,24,$Td3[0]
446	LDBU	*${TEB}[$Td1[1]],$Td1[1]	; Td1[s1>>8],	t2
447||	LDBU	*${TEA}[$Td3[0]],$Td3[0]	; Td3[s0>>24],	t3
448||	EXTU	$s[0],EXT2,24,$Td2[0]
449||	EXTU	$s[1],EXT2,24,$Td2[1]
450	LDBU	*${TEA}[$Td2[0]],$Td2[0]	; Td2[s0>>16],	t2
451||	LDBU	*${TEB}[$Td2[1]],$Td2[1]	; Td2[s1>>16],	t3
452||	EXTU	$s[3],EXT3,24,$Td3[3]
453||	EXTU	$s[2],EXT1,24,$Td1[2]
454	LDBU	*${TEB}[$Td3[3]],$Td3[3]	; Td3[s3>>24],	t2
455||	LDBU	*${TEA}[$Td1[2]],$Td1[2]	; Td1[s2>>8],	t3
456||	EXTU	$s[2],EXT0,24,$Td0[2]
457||	EXTU	$s[3],EXT0,24,$Td0[3]
458	LDBU	*${TEA}[$Td0[2]],$Td0[2]	; Td0[s2],	t2
459||	LDBU	*${TEB}[$Td0[3]],$Td0[3]	; Td0[s3],	t3
460
461	.if	.BIG_ENDIAN
462	PACK2	$Td0[0],$Td1[3],$Td0[0]
463||	PACK2	$Td0[1],$Td1[0],$Td0[1]
464	PACK2	$Td2[2],$Td3[1],$Td2[2]
465||	PACK2	$Td2[3],$Td3[2],$Td2[3]
466	PACKL4	$Td0[0],$Td2[2],$Td0[0]
467||	PACKL4	$Td0[1],$Td2[3],$Td0[1]
468	XOR	$K[0],$Td0[0],$Td0[0]		; s[0]
469||	XOR	$K[1],$Td0[1],$Td0[1]		; s[1]
470
471	PACK2	$Td0[2],$Td1[1],$Td0[2]
472||	PACK2	$Td0[3],$Td1[2],$Td0[3]
473	PACK2	$Td2[0],$Td3[3],$Td2[0]
474||	PACK2	$Td2[1],$Td3[0],$Td2[1]
475||	BNOP	RA
476	PACKL4	$Td0[2],$Td2[0],$Td0[2]
477||	PACKL4	$Td0[3],$Td2[1],$Td0[3]
478	XOR	$K[2],$Td0[2],$Td0[2]		; s[2]
479||	XOR	$K[3],$Td0[3],$Td0[3]		; s[3]
480
481	MV	$Td0[0],A9
482||	MV	$Td0[1],A8
483	MV	$Td0[2],B9
484||	MV	$Td0[3],B8
485|| [B2]	STNDW	A9:A8,*OUT++
486   [B2]	STNDW	B9:B8,*OUT++
487	.else
488	PACK2	$Td1[3],$Td0[0],$Td1[3]
489||	PACK2	$Td1[0],$Td0[1],$Td1[0]
490	PACK2	$Td3[1],$Td2[2],$Td3[1]
491||	PACK2	$Td3[2],$Td2[3],$Td3[2]
492	PACKL4	$Td3[1],$Td1[3],$Td1[3]
493||	PACKL4	$Td3[2],$Td1[0],$Td1[0]
494	XOR	$K[0],$Td1[3],$Td1[3]		; s[0]
495||	XOR	$K[1],$Td1[0],$Td1[0]		; s[1]
496
497	PACK2	$Td1[1],$Td0[2],$Td1[1]
498||	PACK2	$Td1[2],$Td0[3],$Td1[2]
499	PACK2	$Td3[3],$Td2[0],$Td3[3]
500||	PACK2	$Td3[0],$Td2[1],$Td3[0]
501||	BNOP	RA
502	PACKL4	$Td3[3],$Td1[1],$Td1[1]
503||	PACKL4	$Td3[0],$Td1[2],$Td1[2]
504	XOR	$K[2],$Td1[1],$Td1[1]		; s[2]
505||	XOR	$K[3],$Td1[2],$Td1[2]		; s[3]
506
507	MV	$Td1[3],A8
508||	MV	$Td1[0],A9
509	MV	$Td1[1],B8
510||	MV	$Td1[2],B9
511|| [B2]	STNDW	A9:A8,*OUT++
512   [B2]	STNDW	B9:B8,*OUT++
513	.endif
514	.endasmfunc
515___
516{
517my @K=(@K,@s);			# extended key
518my @Te4=map("B$_",(16..19));
519
520my @Kx9=@Te0;			# used in AES_set_decrypt_key
521my @KxB=@Te1;
522my @KxD=@Te2;
523my @KxE=@Te3;
524
525$code.=<<___;
526	.asg	OUT,BITS
527
528	.global	_AES_set_encrypt_key
529_AES_set_encrypt_key:
530__set_encrypt_key:
531	.asmfunc
532	MV	INP,A0
533||	SHRU	BITS,5,BITS			; 128-192-256 -> 4-6-8
534||	MV	KEY,A1
535  [!A0]	B	RA
536||[!A0]	MVK	-1,RET
537||[!A0]	MVK	1,A1				; only one B RA
538  [!A1]	B	RA
539||[!A1]	MVK	-1,RET
540||[!A1]	MVK	0,A0
541||	MVK	0,B0
542||	MVK	0,A1
543   [A0]	LDNDW	*INP++,A9:A8
544|| [A0]	CMPEQ	4,BITS,B0
545|| [A0]	CMPLT	3,BITS,A1
546   [B0]	B	key128?
547|| [A1]	LDNDW	*INP++,B9:B8
548|| [A0]	CMPEQ	6,BITS,B0
549|| [A0]	CMPLT	5,BITS,A1
550   [B0]	B	key192?
551|| [A1]	LDNDW	*INP++,B17:B16
552|| [A0]	CMPEQ	8,BITS,B0
553|| [A0]	CMPLT	7,BITS,A1
554   [B0]	B	key256?
555|| [A1]	LDNDW	*INP++,B19:B18
556
557	.if	__TI_EABI__
558   [A0]	ADD	0,KEY,$KPA
559|| [A0]	ADD	4,KEY,$KPB
560|| [A0]	MVKL	\$PCR_OFFSET(AES_Te4,__set_encrypt_key),$TEA
561|| [A0]	ADDKPC	__set_encrypt_key,B6
562   [A0]	MVKH	\$PCR_OFFSET(AES_Te4,__set_encrypt_key),$TEA
563   [A0]	ADD	B6,$TEA,$TEA			; AES_Te4
564	.else
565   [A0]	ADD	0,KEY,$KPA
566|| [A0]	ADD	4,KEY,$KPB
567|| [A0]	MVKL	(AES_Te4-__set_encrypt_key),$TEA
568|| [A0]	ADDKPC	__set_encrypt_key,B6
569   [A0]	MVKH	(AES_Te4-__set_encrypt_key),$TEA
570   [A0]	ADD	B6,$TEA,$TEA			; AES_Te4
571	.endif
572	NOP
573	NOP
574
575	BNOP	RA,5
576||	MVK	-2,RET				; unknown bit length
577||	MVK	0,B0				; redundant
578;;====================================================================
579;;====================================================================
580key128?:
581	.if	.BIG_ENDIAN
582	MV	A9,$K[0]
583||	MV	A8,$K[1]
584||	MV	B9,$Te4[2]
585||	MV	B8,$K[3]
586	.else
587	MV	A8,$K[0]
588||	MV	A9,$K[1]
589||	MV	B8,$Te4[2]
590||	MV	B9,$K[3]
591	.endif
592
593	MVK	256,A0
594||	MVK	9,B0
595
596	SPLOOPD	14
597||	MVC	B0,ILC
598||	MV	$TEA,$TEB
599||	ADD	$TEA,A0,A30			; rcon
600;;====================================================================
601	LDW	*A30++[1],A31			; rcon[i]
602||	MV	$Te4[2],$K[2]
603||	EXTU	$K[3],EXT1,24,$Te4[0]
604	LDBU	*${TEB}[$Te4[0]],$Te4[0]
605||	MV	$K[3],A0
606||	EXTU	$K[3],EXT2,24,$Te4[1]
607	LDBU	*${TEB}[$Te4[1]],$Te4[1]
608||	EXTU	A0,EXT3,24,A0
609||	EXTU	$K[3],EXT0,24,$Te4[3]
610	.if	.BIG_ENDIAN
611	LDBU	*${TEA}[A0],$Te4[3]
612||	LDBU	*${TEB}[$Te4[3]],A0
613	.else
614	LDBU	*${TEA}[A0],A0
615||	LDBU	*${TEB}[$Te4[3]],$Te4[3]
616	.endif
617
618	STW	$K[0],*$KPA++[2]
619||	STW	$K[1],*$KPB++[2]
620	STW	$K[2],*$KPA++[2]
621||	STW	$K[3],*$KPB++[2]
622
623	XOR	A31,$K[0],$K[0]			; ^=rcon[i]
624	.if	.BIG_ENDIAN
625	PACK2	$Te4[0],$Te4[1],$Te4[1]
626	PACK2	$Te4[3],A0,$Te4[3]
627	PACKL4	$Te4[1],$Te4[3],$Te4[3]
628	.else
629	PACK2	$Te4[1],$Te4[0],$Te4[1]
630	PACK2	$Te4[3],A0,$Te4[3]
631	PACKL4	$Te4[3],$Te4[1],$Te4[3]
632	.endif
633	XOR	$Te4[3],$K[0],$Te4[0]		; K[0]
634	XOR	$Te4[0],$K[1],$K[1]		; K[1]
635	MV	$Te4[0],$K[0]
636||	XOR	$K[1],$K[2],$Te4[2]		; K[2]
637	XOR	$Te4[2],$K[3],$K[3]		; K[3]
638	SPKERNEL
639;;====================================================================
640	BNOP	RA
641	MV	$Te4[2],$K[2]
642||	STW	$K[0],*$KPA++[2]
643||	STW	$K[1],*$KPB++[2]
644	STW	$K[2],*$KPA++[2]
645||	STW	$K[3],*$KPB++[2]
646	MVK	10,B0				; rounds
647	STW	B0,*++${KPB}[15]
648	MVK	0,RET
649;;====================================================================
650;;====================================================================
651key192?:
652	.if	.BIG_ENDIAN
653	MV	A9,$K[0]
654||	MV	A8,$K[1]
655||	MV	B9,$K[2]
656||	MV	B8,$K[3]
657	MV	B17,$Te4[2]
658||	MV	B16,$K[5]
659	.else
660	MV	A8,$K[0]
661||	MV	A9,$K[1]
662||	MV	B8,$K[2]
663||	MV	B9,$K[3]
664	MV	B16,$Te4[2]
665||	MV	B17,$K[5]
666	.endif
667
668	MVK	256,A0
669||	MVK	6,B0
670	MV	$TEA,$TEB
671||	ADD	$TEA,A0,A30			; rcon
672;;====================================================================
673loop192?:
674	LDW	*A30++[1],A31			; rcon[i]
675||	MV	$Te4[2],$K[4]
676||	EXTU	$K[5],EXT1,24,$Te4[0]
677	LDBU	*${TEB}[$Te4[0]],$Te4[0]
678||	MV	$K[5],A0
679||	EXTU	$K[5],EXT2,24,$Te4[1]
680	LDBU	*${TEB}[$Te4[1]],$Te4[1]
681||	EXTU	A0,EXT3,24,A0
682||	EXTU	$K[5],EXT0,24,$Te4[3]
683	.if	.BIG_ENDIAN
684	LDBU	*${TEA}[A0],$Te4[3]
685||	LDBU	*${TEB}[$Te4[3]],A0
686	.else
687	LDBU	*${TEA}[A0],A0
688||	LDBU	*${TEB}[$Te4[3]],$Te4[3]
689	.endif
690
691	STW	$K[0],*$KPA++[2]
692||	STW	$K[1],*$KPB++[2]
693	STW	$K[2],*$KPA++[2]
694||	STW	$K[3],*$KPB++[2]
695	STW	$K[4],*$KPA++[2]
696||	STW	$K[5],*$KPB++[2]
697
698	XOR	A31,$K[0],$K[0]			; ^=rcon[i]
699	.if	.BIG_ENDIAN
700	PACK2	$Te4[0],$Te4[1],$Te4[1]
701||	PACK2	$Te4[3],A0,$Te4[3]
702	PACKL4	$Te4[1],$Te4[3],$Te4[3]
703	.else
704	PACK2	$Te4[1],$Te4[0],$Te4[1]
705||	PACK2	$Te4[3],A0,$Te4[3]
706	PACKL4	$Te4[3],$Te4[1],$Te4[3]
707	.endif
708	BDEC	loop192?,B0
709||	XOR	$Te4[3],$K[0],$Te4[0]		; K[0]
710	XOR	$Te4[0],$K[1],$K[1]		; K[1]
711	MV	$Te4[0],$K[0]
712||	XOR	$K[1],$K[2],$Te4[2]		; K[2]
713	XOR	$Te4[2],$K[3],$K[3]		; K[3]
714	MV	$Te4[2],$K[2]
715||	XOR	$K[3],$K[4],$Te4[2]		; K[4]
716	XOR	$Te4[2],$K[5],$K[5]		; K[5]
717;;====================================================================
718	BNOP	RA
719	STW	$K[0],*$KPA++[2]
720||	STW	$K[1],*$KPB++[2]
721	STW	$K[2],*$KPA++[2]
722||	STW	$K[3],*$KPB++[2]
723	MVK	12,B0				; rounds
724	STW	B0,*++${KPB}[7]
725	MVK	0,RET
726;;====================================================================
727;;====================================================================
728key256?:
729	.if	.BIG_ENDIAN
730	MV	A9,$K[0]
731||	MV	A8,$K[1]
732||	MV	B9,$K[2]
733||	MV	B8,$K[3]
734	MV	B17,$K[4]
735||	MV	B16,$K[5]
736||	MV	B19,$Te4[2]
737||	MV	B18,$K[7]
738	.else
739	MV	A8,$K[0]
740||	MV	A9,$K[1]
741||	MV	B8,$K[2]
742||	MV	B9,$K[3]
743	MV	B16,$K[4]
744||	MV	B17,$K[5]
745||	MV	B18,$Te4[2]
746||	MV	B19,$K[7]
747	.endif
748
749	MVK	256,A0
750||	MVK	6,B0
751	MV	$TEA,$TEB
752||	ADD	$TEA,A0,A30			; rcon
753;;====================================================================
754loop256?:
755	LDW	*A30++[1],A31			; rcon[i]
756||	MV	$Te4[2],$K[6]
757||	EXTU	$K[7],EXT1,24,$Te4[0]
758	LDBU	*${TEB}[$Te4[0]],$Te4[0]
759||	MV	$K[7],A0
760||	EXTU	$K[7],EXT2,24,$Te4[1]
761	LDBU	*${TEB}[$Te4[1]],$Te4[1]
762||	EXTU	A0,EXT3,24,A0
763||	EXTU	$K[7],EXT0,24,$Te4[3]
764	.if	.BIG_ENDIAN
765	LDBU	*${TEA}[A0],$Te4[3]
766||	LDBU	*${TEB}[$Te4[3]],A0
767	.else
768	LDBU	*${TEA}[A0],A0
769||	LDBU	*${TEB}[$Te4[3]],$Te4[3]
770	.endif
771
772	STW	$K[0],*$KPA++[2]
773||	STW	$K[1],*$KPB++[2]
774	STW	$K[2],*$KPA++[2]
775||	STW	$K[3],*$KPB++[2]
776	STW	$K[4],*$KPA++[2]
777||	STW	$K[5],*$KPB++[2]
778	STW	$K[6],*$KPA++[2]
779||	STW	$K[7],*$KPB++[2]
780||	XOR	A31,$K[0],$K[0]			; ^=rcon[i]
781	.if	.BIG_ENDIAN
782	PACK2	$Te4[0],$Te4[1],$Te4[1]
783||	PACK2	$Te4[3],A0,$Te4[3]
784	PACKL4	$Te4[1],$Te4[3],$Te4[3]
785||[!B0]	B	done256?
786	.else
787	PACK2	$Te4[1],$Te4[0],$Te4[1]
788||	PACK2	$Te4[3],A0,$Te4[3]
789	PACKL4	$Te4[3],$Te4[1],$Te4[3]
790||[!B0]	B	done256?
791	.endif
792	XOR	$Te4[3],$K[0],$Te4[0]		; K[0]
793	XOR	$Te4[0],$K[1],$K[1]		; K[1]
794	MV	$Te4[0],$K[0]
795||	XOR	$K[1],$K[2],$Te4[2]		; K[2]
796	XOR	$Te4[2],$K[3],$K[3]		; K[3]
797
798	MV	$Te4[2],$K[2]
799|| [B0]	EXTU	$K[3],EXT0,24,$Te4[0]
800|| [B0]	SUB	B0,1,B0
801	LDBU	*${TEB}[$Te4[0]],$Te4[0]
802||	MV	$K[3],A0
803||	EXTU	$K[3],EXT1,24,$Te4[1]
804	LDBU	*${TEB}[$Te4[1]],$Te4[1]
805||	EXTU	A0,EXT2,24,A0
806||	EXTU	$K[3],EXT3,24,$Te4[3]
807
808	.if	.BIG_ENDIAN
809	LDBU	*${TEA}[A0],$Te4[3]
810||	LDBU	*${TEB}[$Te4[3]],A0
811	NOP	3
812	PACK2	$Te4[0],$Te4[1],$Te4[1]
813	PACK2	$Te4[3],A0,$Te4[3]
814||	B	loop256?
815	PACKL4	$Te4[1],$Te4[3],$Te4[3]
816	.else
817	LDBU	*${TEA}[A0],A0
818||	LDBU	*${TEB}[$Te4[3]],$Te4[3]
819	NOP	3
820	PACK2	$Te4[1],$Te4[0],$Te4[1]
821	PACK2	$Te4[3],A0,$Te4[3]
822||	B	loop256?
823	PACKL4	$Te4[3],$Te4[1],$Te4[3]
824	.endif
825
826	XOR	$Te4[3],$K[4],$Te4[0]		; K[4]
827	XOR	$Te4[0],$K[5],$K[5]		; K[5]
828	MV	$Te4[0],$K[4]
829||	XOR	$K[5],$K[6],$Te4[2]		; K[6]
830	XOR	$Te4[2],$K[7],$K[7]		; K[7]
831;;====================================================================
832done256?:
833	BNOP	RA
834	STW	$K[0],*$KPA++[2]
835||	STW	$K[1],*$KPB++[2]
836	STW	$K[2],*$KPA++[2]
837||	STW	$K[3],*$KPB++[2]
838	MVK	14,B0				; rounds
839	STW	B0,*--${KPB}[1]
840	MVK	0,RET
841	.endasmfunc
842
843	.global	_AES_set_decrypt_key
844_AES_set_decrypt_key:
845	.asmfunc
846	B	__set_encrypt_key		; guarantee local call
847	MV	KEY,B30				; B30 is not modified
848	MV	RA, B31				; B31 is not modified
849	ADDKPC	ret?,RA,2
850ret?:						; B0 holds rounds or zero
851  [!B0]	BNOP	B31				; return if zero
852   [B0]	SHL	B0,4,A0				; offset to last round key
853   [B0]	SHRU	B0,1,B1
854   [B0]	SUB	B1,1,B1
855   [B0]	MVK	0x0000001B,B3			; AES polynomial
856   [B0]	MVKH	0x07000000,B3
857
858	SPLOOPD	9				; flip round keys
859||	MVC	B1,ILC
860||	MV	B30,$KPA
861||	ADD	B30,A0,$KPB
862||	MVK	16,A0				; sizeof(round key)
863;;====================================================================
864	LDW	*${KPA}[0],A16
865||	LDW	*${KPB}[0],B16
866	LDW	*${KPA}[1],A17
867||	LDW	*${KPB}[1],B17
868	LDW	*${KPA}[2],A18
869||	LDW	*${KPB}[2],B18
870	LDW	*${KPA}[3],A19
871||	ADD	$KPA,A0,$KPA
872||	LDW	*${KPB}[3],B19
873||	SUB	$KPB,A0,$KPB
874	NOP
875	STW	B16,*${KPA}[-4]
876||	STW	A16,*${KPB}[4]
877	STW	B17,*${KPA}[-3]
878||	STW	A17,*${KPB}[5]
879	STW	B18,*${KPA}[-2]
880||	STW	A18,*${KPB}[6]
881	STW	B19,*${KPA}[-1]
882||	STW	A19,*${KPB}[7]
883	SPKERNEL
884;;====================================================================
885	SUB	B0,1,B0				; skip last round
886||	ADD	B30,A0,$KPA			; skip first round
887||	ADD	B30,A0,$KPB
888||	MVC	GFPGFR,B30			; save GFPGFR
889	LDW	*${KPA}[0],$K[0]
890||	LDW	*${KPB}[1],$K[1]
891||	MVC	B3,GFPGFR
892	LDW	*${KPA}[2],$K[2]
893||	LDW	*${KPB}[3],$K[3]
894	MVK	0x00000909,A24
895||	MVK	0x00000B0B,B24
896	MVKH	0x09090000,A24
897||	MVKH	0x0B0B0000,B24
898	MVC	B0,ILC
899||	SUB	B0,1,B0
900
901	GMPY4	$K[0],A24,$Kx9[0]		; ·0x09
902||	GMPY4	$K[1],A24,$Kx9[1]
903||	MVK	0x00000D0D,A25
904||	MVK	0x00000E0E,B25
905	GMPY4	$K[2],A24,$Kx9[2]
906||	GMPY4	$K[3],A24,$Kx9[3]
907||	MVKH	0x0D0D0000,A25
908||	MVKH	0x0E0E0000,B25
909
910	GMPY4	$K[0],B24,$KxB[0]		; ·0x0B
911||	GMPY4	$K[1],B24,$KxB[1]
912	GMPY4	$K[2],B24,$KxB[2]
913||	GMPY4	$K[3],B24,$KxB[3]
914
915	SPLOOP	11				; InvMixColumns
916;;====================================================================
917	GMPY4	$K[0],A25,$KxD[0]		; ·0x0D
918||	GMPY4	$K[1],A25,$KxD[1]
919||	SWAP2	$Kx9[0],$Kx9[0]			; rotate by 16
920||	SWAP2	$Kx9[1],$Kx9[1]
921||	MV	$K[0],$s[0]			; this or DINT
922||	MV	$K[1],$s[1]
923|| [B0]	LDW	*${KPA}[4],$K[0]
924|| [B0]	LDW	*${KPB}[5],$K[1]
925	GMPY4	$K[2],A25,$KxD[2]
926||	GMPY4	$K[3],A25,$KxD[3]
927||	SWAP2	$Kx9[2],$Kx9[2]
928||	SWAP2	$Kx9[3],$Kx9[3]
929||	MV	$K[2],$s[2]
930||	MV	$K[3],$s[3]
931|| [B0]	LDW	*${KPA}[6],$K[2]
932|| [B0]	LDW	*${KPB}[7],$K[3]
933
934	GMPY4	$s[0],B25,$KxE[0]		; ·0x0E
935||	GMPY4	$s[1],B25,$KxE[1]
936||	XOR	$Kx9[0],$KxB[0],$KxB[0]
937||	XOR	$Kx9[1],$KxB[1],$KxB[1]
938	GMPY4	$s[2],B25,$KxE[2]
939||	GMPY4	$s[3],B25,$KxE[3]
940||	XOR	$Kx9[2],$KxB[2],$KxB[2]
941||	XOR	$Kx9[3],$KxB[3],$KxB[3]
942
943	ROTL	$KxB[0],TBL3,$KxB[0]
944||	ROTL	$KxB[1],TBL3,$KxB[1]
945||	SWAP2	$KxD[0],$KxD[0]			; rotate by 16
946||	SWAP2	$KxD[1],$KxD[1]
947	ROTL	$KxB[2],TBL3,$KxB[2]
948||	ROTL	$KxB[3],TBL3,$KxB[3]
949||	SWAP2	$KxD[2],$KxD[2]
950||	SWAP2	$KxD[3],$KxD[3]
951
952	XOR	$KxE[0],$KxD[0],$KxE[0]
953||	XOR	$KxE[1],$KxD[1],$KxE[1]
954|| [B0]	GMPY4	$K[0],A24,$Kx9[0]		; ·0x09
955|| [B0]	GMPY4	$K[1],A24,$Kx9[1]
956||	ADDAW	$KPA,4,$KPA
957	XOR	$KxE[2],$KxD[2],$KxE[2]
958||	XOR	$KxE[3],$KxD[3],$KxE[3]
959|| [B0]	GMPY4	$K[2],A24,$Kx9[2]
960|| [B0]	GMPY4	$K[3],A24,$Kx9[3]
961||	ADDAW	$KPB,4,$KPB
962
963	XOR	$KxB[0],$KxE[0],$KxE[0]
964||	XOR	$KxB[1],$KxE[1],$KxE[1]
965|| [B0]	GMPY4	$K[0],B24,$KxB[0]		; ·0x0B
966|| [B0]	GMPY4	$K[1],B24,$KxB[1]
967	XOR	$KxB[2],$KxE[2],$KxE[2]
968||	XOR	$KxB[3],$KxE[3],$KxE[3]
969|| [B0]	GMPY4	$K[2],B24,$KxB[2]
970|| [B0]	GMPY4	$K[3],B24,$KxB[3]
971||	STW	$KxE[0],*${KPA}[-4]
972||	STW	$KxE[1],*${KPB}[-3]
973	STW	$KxE[2],*${KPA}[-2]
974||	STW	$KxE[3],*${KPB}[-1]
975|| [B0]	SUB	B0,1,B0
976	SPKERNEL
977;;====================================================================
978	BNOP	B31,3
979	MVC	B30,GFPGFR			; restore GFPGFR(*)
980	MVK	0,RET
981	.endasmfunc
982___
983# (*)	Even though ABI doesn't specify GFPGFR as non-volatile, there
984#	are code samples out there that *assume* its default value.
985}
986{
987my ($inp,$out,$blocks,$key,$ivp)=("A4","B4","A6","B6","A8");
988$code.=<<___;
989	.global	_AES_ctr32_encrypt
990_AES_ctr32_encrypt:
991	.asmfunc
992	LDNDW	*${ivp}[0],A31:A30	; load counter value
993||	MV	$blocks,A2		; reassign $blocks
994||	DMV	RA,$key,B27:B26		; reassign RA and $key
995	LDNDW	*${ivp}[1],B31:B30
996||	MVK	0,B2			; don't let __encrypt load input
997||	MVK	0,A1			; and postpone writing output
998	.if	.BIG_ENDIAN
999	NOP
1000	.else
1001	NOP	4
1002	SWAP2	B31,B31			; keep least significant 32 bits
1003	SWAP4	B31,B31			; in host byte order
1004	.endif
1005ctr32_loop?:
1006   [A2]	BNOP	__encrypt
1007|| [A1]	XOR	A29,A9,A9		; input^Ek(counter)
1008|| [A1]	XOR	A28,A8,A8
1009|| [A2]	LDNDW	*INP++,A29:A28		; load input
1010  [!A2]	BNOP	B27			; return
1011|| [A1]	XOR	B29,B9,B9
1012|| [A1]	XOR	B28,B8,B8
1013|| [A2]	LDNDW	*INP++,B29:B28
1014	.if	.BIG_ENDIAN
1015   [A1]	STNDW	A9:A8,*OUT++		; save output
1016|| [A2]	DMV	A31,A30,A9:A8		; pass counter value to __encrypt
1017   [A1]	STNDW	B9:B8,*OUT++
1018|| [A2]	DMV	B31,B30,B9:B8
1019|| [A2]	ADD	B30,1,B30		; counter++
1020	.else
1021   [A1]	STNDW	A9:A8,*OUT++		; save output
1022|| [A2]	DMV	A31,A30,A9:A8
1023|| [A2]	SWAP2	B31,B0
1024|| [A2]	ADD	B31,1,B31		; counter++
1025   [A1]	STNDW	B9:B8,*OUT++
1026|| [A2]	MV	B30,B8
1027|| [A2]	SWAP4	B0,B9
1028	.endif
1029   [A2]	ADDKPC	ctr32_loop?,RA		; return to ctr32_loop?
1030|| [A2]	MV	B26,KEY			; pass $key
1031|| [A2]	SUB	A2,1,A2			; $blocks--
1032||[!A1]	MVK	1,A1
1033	NOP
1034	NOP
1035	.endasmfunc
1036___
1037}
1038# Tables are kept in endian-neutral manner
1039$code.=<<___;
1040	.if	__TI_EABI__
1041	.sect	".text:aes_asm.const"
1042	.else
1043	.sect	".const:aes_asm"
1044	.endif
1045	.align	128
1046AES_Te:
1047	.byte	0xc6,0x63,0x63,0xa5,	0xf8,0x7c,0x7c,0x84
1048	.byte	0xee,0x77,0x77,0x99,	0xf6,0x7b,0x7b,0x8d
1049	.byte	0xff,0xf2,0xf2,0x0d,	0xd6,0x6b,0x6b,0xbd
1050	.byte	0xde,0x6f,0x6f,0xb1,	0x91,0xc5,0xc5,0x54
1051	.byte	0x60,0x30,0x30,0x50,	0x02,0x01,0x01,0x03
1052	.byte	0xce,0x67,0x67,0xa9,	0x56,0x2b,0x2b,0x7d
1053	.byte	0xe7,0xfe,0xfe,0x19,	0xb5,0xd7,0xd7,0x62
1054	.byte	0x4d,0xab,0xab,0xe6,	0xec,0x76,0x76,0x9a
1055	.byte	0x8f,0xca,0xca,0x45,	0x1f,0x82,0x82,0x9d
1056	.byte	0x89,0xc9,0xc9,0x40,	0xfa,0x7d,0x7d,0x87
1057	.byte	0xef,0xfa,0xfa,0x15,	0xb2,0x59,0x59,0xeb
1058	.byte	0x8e,0x47,0x47,0xc9,	0xfb,0xf0,0xf0,0x0b
1059	.byte	0x41,0xad,0xad,0xec,	0xb3,0xd4,0xd4,0x67
1060	.byte	0x5f,0xa2,0xa2,0xfd,	0x45,0xaf,0xaf,0xea
1061	.byte	0x23,0x9c,0x9c,0xbf,	0x53,0xa4,0xa4,0xf7
1062	.byte	0xe4,0x72,0x72,0x96,	0x9b,0xc0,0xc0,0x5b
1063	.byte	0x75,0xb7,0xb7,0xc2,	0xe1,0xfd,0xfd,0x1c
1064	.byte	0x3d,0x93,0x93,0xae,	0x4c,0x26,0x26,0x6a
1065	.byte	0x6c,0x36,0x36,0x5a,	0x7e,0x3f,0x3f,0x41
1066	.byte	0xf5,0xf7,0xf7,0x02,	0x83,0xcc,0xcc,0x4f
1067	.byte	0x68,0x34,0x34,0x5c,	0x51,0xa5,0xa5,0xf4
1068	.byte	0xd1,0xe5,0xe5,0x34,	0xf9,0xf1,0xf1,0x08
1069	.byte	0xe2,0x71,0x71,0x93,	0xab,0xd8,0xd8,0x73
1070	.byte	0x62,0x31,0x31,0x53,	0x2a,0x15,0x15,0x3f
1071	.byte	0x08,0x04,0x04,0x0c,	0x95,0xc7,0xc7,0x52
1072	.byte	0x46,0x23,0x23,0x65,	0x9d,0xc3,0xc3,0x5e
1073	.byte	0x30,0x18,0x18,0x28,	0x37,0x96,0x96,0xa1
1074	.byte	0x0a,0x05,0x05,0x0f,	0x2f,0x9a,0x9a,0xb5
1075	.byte	0x0e,0x07,0x07,0x09,	0x24,0x12,0x12,0x36
1076	.byte	0x1b,0x80,0x80,0x9b,	0xdf,0xe2,0xe2,0x3d
1077	.byte	0xcd,0xeb,0xeb,0x26,	0x4e,0x27,0x27,0x69
1078	.byte	0x7f,0xb2,0xb2,0xcd,	0xea,0x75,0x75,0x9f
1079	.byte	0x12,0x09,0x09,0x1b,	0x1d,0x83,0x83,0x9e
1080	.byte	0x58,0x2c,0x2c,0x74,	0x34,0x1a,0x1a,0x2e
1081	.byte	0x36,0x1b,0x1b,0x2d,	0xdc,0x6e,0x6e,0xb2
1082	.byte	0xb4,0x5a,0x5a,0xee,	0x5b,0xa0,0xa0,0xfb
1083	.byte	0xa4,0x52,0x52,0xf6,	0x76,0x3b,0x3b,0x4d
1084	.byte	0xb7,0xd6,0xd6,0x61,	0x7d,0xb3,0xb3,0xce
1085	.byte	0x52,0x29,0x29,0x7b,	0xdd,0xe3,0xe3,0x3e
1086	.byte	0x5e,0x2f,0x2f,0x71,	0x13,0x84,0x84,0x97
1087	.byte	0xa6,0x53,0x53,0xf5,	0xb9,0xd1,0xd1,0x68
1088	.byte	0x00,0x00,0x00,0x00,	0xc1,0xed,0xed,0x2c
1089	.byte	0x40,0x20,0x20,0x60,	0xe3,0xfc,0xfc,0x1f
1090	.byte	0x79,0xb1,0xb1,0xc8,	0xb6,0x5b,0x5b,0xed
1091	.byte	0xd4,0x6a,0x6a,0xbe,	0x8d,0xcb,0xcb,0x46
1092	.byte	0x67,0xbe,0xbe,0xd9,	0x72,0x39,0x39,0x4b
1093	.byte	0x94,0x4a,0x4a,0xde,	0x98,0x4c,0x4c,0xd4
1094	.byte	0xb0,0x58,0x58,0xe8,	0x85,0xcf,0xcf,0x4a
1095	.byte	0xbb,0xd0,0xd0,0x6b,	0xc5,0xef,0xef,0x2a
1096	.byte	0x4f,0xaa,0xaa,0xe5,	0xed,0xfb,0xfb,0x16
1097	.byte	0x86,0x43,0x43,0xc5,	0x9a,0x4d,0x4d,0xd7
1098	.byte	0x66,0x33,0x33,0x55,	0x11,0x85,0x85,0x94
1099	.byte	0x8a,0x45,0x45,0xcf,	0xe9,0xf9,0xf9,0x10
1100	.byte	0x04,0x02,0x02,0x06,	0xfe,0x7f,0x7f,0x81
1101	.byte	0xa0,0x50,0x50,0xf0,	0x78,0x3c,0x3c,0x44
1102	.byte	0x25,0x9f,0x9f,0xba,	0x4b,0xa8,0xa8,0xe3
1103	.byte	0xa2,0x51,0x51,0xf3,	0x5d,0xa3,0xa3,0xfe
1104	.byte	0x80,0x40,0x40,0xc0,	0x05,0x8f,0x8f,0x8a
1105	.byte	0x3f,0x92,0x92,0xad,	0x21,0x9d,0x9d,0xbc
1106	.byte	0x70,0x38,0x38,0x48,	0xf1,0xf5,0xf5,0x04
1107	.byte	0x63,0xbc,0xbc,0xdf,	0x77,0xb6,0xb6,0xc1
1108	.byte	0xaf,0xda,0xda,0x75,	0x42,0x21,0x21,0x63
1109	.byte	0x20,0x10,0x10,0x30,	0xe5,0xff,0xff,0x1a
1110	.byte	0xfd,0xf3,0xf3,0x0e,	0xbf,0xd2,0xd2,0x6d
1111	.byte	0x81,0xcd,0xcd,0x4c,	0x18,0x0c,0x0c,0x14
1112	.byte	0x26,0x13,0x13,0x35,	0xc3,0xec,0xec,0x2f
1113	.byte	0xbe,0x5f,0x5f,0xe1,	0x35,0x97,0x97,0xa2
1114	.byte	0x88,0x44,0x44,0xcc,	0x2e,0x17,0x17,0x39
1115	.byte	0x93,0xc4,0xc4,0x57,	0x55,0xa7,0xa7,0xf2
1116	.byte	0xfc,0x7e,0x7e,0x82,	0x7a,0x3d,0x3d,0x47
1117	.byte	0xc8,0x64,0x64,0xac,	0xba,0x5d,0x5d,0xe7
1118	.byte	0x32,0x19,0x19,0x2b,	0xe6,0x73,0x73,0x95
1119	.byte	0xc0,0x60,0x60,0xa0,	0x19,0x81,0x81,0x98
1120	.byte	0x9e,0x4f,0x4f,0xd1,	0xa3,0xdc,0xdc,0x7f
1121	.byte	0x44,0x22,0x22,0x66,	0x54,0x2a,0x2a,0x7e
1122	.byte	0x3b,0x90,0x90,0xab,	0x0b,0x88,0x88,0x83
1123	.byte	0x8c,0x46,0x46,0xca,	0xc7,0xee,0xee,0x29
1124	.byte	0x6b,0xb8,0xb8,0xd3,	0x28,0x14,0x14,0x3c
1125	.byte	0xa7,0xde,0xde,0x79,	0xbc,0x5e,0x5e,0xe2
1126	.byte	0x16,0x0b,0x0b,0x1d,	0xad,0xdb,0xdb,0x76
1127	.byte	0xdb,0xe0,0xe0,0x3b,	0x64,0x32,0x32,0x56
1128	.byte	0x74,0x3a,0x3a,0x4e,	0x14,0x0a,0x0a,0x1e
1129	.byte	0x92,0x49,0x49,0xdb,	0x0c,0x06,0x06,0x0a
1130	.byte	0x48,0x24,0x24,0x6c,	0xb8,0x5c,0x5c,0xe4
1131	.byte	0x9f,0xc2,0xc2,0x5d,	0xbd,0xd3,0xd3,0x6e
1132	.byte	0x43,0xac,0xac,0xef,	0xc4,0x62,0x62,0xa6
1133	.byte	0x39,0x91,0x91,0xa8,	0x31,0x95,0x95,0xa4
1134	.byte	0xd3,0xe4,0xe4,0x37,	0xf2,0x79,0x79,0x8b
1135	.byte	0xd5,0xe7,0xe7,0x32,	0x8b,0xc8,0xc8,0x43
1136	.byte	0x6e,0x37,0x37,0x59,	0xda,0x6d,0x6d,0xb7
1137	.byte	0x01,0x8d,0x8d,0x8c,	0xb1,0xd5,0xd5,0x64
1138	.byte	0x9c,0x4e,0x4e,0xd2,	0x49,0xa9,0xa9,0xe0
1139	.byte	0xd8,0x6c,0x6c,0xb4,	0xac,0x56,0x56,0xfa
1140	.byte	0xf3,0xf4,0xf4,0x07,	0xcf,0xea,0xea,0x25
1141	.byte	0xca,0x65,0x65,0xaf,	0xf4,0x7a,0x7a,0x8e
1142	.byte	0x47,0xae,0xae,0xe9,	0x10,0x08,0x08,0x18
1143	.byte	0x6f,0xba,0xba,0xd5,	0xf0,0x78,0x78,0x88
1144	.byte	0x4a,0x25,0x25,0x6f,	0x5c,0x2e,0x2e,0x72
1145	.byte	0x38,0x1c,0x1c,0x24,	0x57,0xa6,0xa6,0xf1
1146	.byte	0x73,0xb4,0xb4,0xc7,	0x97,0xc6,0xc6,0x51
1147	.byte	0xcb,0xe8,0xe8,0x23,	0xa1,0xdd,0xdd,0x7c
1148	.byte	0xe8,0x74,0x74,0x9c,	0x3e,0x1f,0x1f,0x21
1149	.byte	0x96,0x4b,0x4b,0xdd,	0x61,0xbd,0xbd,0xdc
1150	.byte	0x0d,0x8b,0x8b,0x86,	0x0f,0x8a,0x8a,0x85
1151	.byte	0xe0,0x70,0x70,0x90,	0x7c,0x3e,0x3e,0x42
1152	.byte	0x71,0xb5,0xb5,0xc4,	0xcc,0x66,0x66,0xaa
1153	.byte	0x90,0x48,0x48,0xd8,	0x06,0x03,0x03,0x05
1154	.byte	0xf7,0xf6,0xf6,0x01,	0x1c,0x0e,0x0e,0x12
1155	.byte	0xc2,0x61,0x61,0xa3,	0x6a,0x35,0x35,0x5f
1156	.byte	0xae,0x57,0x57,0xf9,	0x69,0xb9,0xb9,0xd0
1157	.byte	0x17,0x86,0x86,0x91,	0x99,0xc1,0xc1,0x58
1158	.byte	0x3a,0x1d,0x1d,0x27,	0x27,0x9e,0x9e,0xb9
1159	.byte	0xd9,0xe1,0xe1,0x38,	0xeb,0xf8,0xf8,0x13
1160	.byte	0x2b,0x98,0x98,0xb3,	0x22,0x11,0x11,0x33
1161	.byte	0xd2,0x69,0x69,0xbb,	0xa9,0xd9,0xd9,0x70
1162	.byte	0x07,0x8e,0x8e,0x89,	0x33,0x94,0x94,0xa7
1163	.byte	0x2d,0x9b,0x9b,0xb6,	0x3c,0x1e,0x1e,0x22
1164	.byte	0x15,0x87,0x87,0x92,	0xc9,0xe9,0xe9,0x20
1165	.byte	0x87,0xce,0xce,0x49,	0xaa,0x55,0x55,0xff
1166	.byte	0x50,0x28,0x28,0x78,	0xa5,0xdf,0xdf,0x7a
1167	.byte	0x03,0x8c,0x8c,0x8f,	0x59,0xa1,0xa1,0xf8
1168	.byte	0x09,0x89,0x89,0x80,	0x1a,0x0d,0x0d,0x17
1169	.byte	0x65,0xbf,0xbf,0xda,	0xd7,0xe6,0xe6,0x31
1170	.byte	0x84,0x42,0x42,0xc6,	0xd0,0x68,0x68,0xb8
1171	.byte	0x82,0x41,0x41,0xc3,	0x29,0x99,0x99,0xb0
1172	.byte	0x5a,0x2d,0x2d,0x77,	0x1e,0x0f,0x0f,0x11
1173	.byte	0x7b,0xb0,0xb0,0xcb,	0xa8,0x54,0x54,0xfc
1174	.byte	0x6d,0xbb,0xbb,0xd6,	0x2c,0x16,0x16,0x3a
1175AES_Te4:
1176	.byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
1177	.byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
1178	.byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
1179	.byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
1180	.byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
1181	.byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
1182	.byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
1183	.byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
1184	.byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
1185	.byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
1186	.byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
1187	.byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
1188	.byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
1189	.byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
1190	.byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
1191	.byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
1192	.byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
1193	.byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
1194	.byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
1195	.byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
1196	.byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
1197	.byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
1198	.byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
1199	.byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
1200	.byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
1201	.byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
1202	.byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
1203	.byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
1204	.byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
1205	.byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
1206	.byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
1207	.byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
1208rcon:
1209	.byte	0x01,0x00,0x00,0x00,	0x02,0x00,0x00,0x00
1210	.byte	0x04,0x00,0x00,0x00,	0x08,0x00,0x00,0x00
1211	.byte	0x10,0x00,0x00,0x00,	0x20,0x00,0x00,0x00
1212	.byte	0x40,0x00,0x00,0x00,	0x80,0x00,0x00,0x00
1213	.byte	0x1B,0x00,0x00,0x00,	0x36,0x00,0x00,0x00
1214	.align	128
1215AES_Td:
1216	.byte	0x51,0xf4,0xa7,0x50,	0x7e,0x41,0x65,0x53
1217	.byte	0x1a,0x17,0xa4,0xc3,	0x3a,0x27,0x5e,0x96
1218	.byte	0x3b,0xab,0x6b,0xcb,	0x1f,0x9d,0x45,0xf1
1219	.byte	0xac,0xfa,0x58,0xab,	0x4b,0xe3,0x03,0x93
1220	.byte	0x20,0x30,0xfa,0x55,	0xad,0x76,0x6d,0xf6
1221	.byte	0x88,0xcc,0x76,0x91,	0xf5,0x02,0x4c,0x25
1222	.byte	0x4f,0xe5,0xd7,0xfc,	0xc5,0x2a,0xcb,0xd7
1223	.byte	0x26,0x35,0x44,0x80,	0xb5,0x62,0xa3,0x8f
1224	.byte	0xde,0xb1,0x5a,0x49,	0x25,0xba,0x1b,0x67
1225	.byte	0x45,0xea,0x0e,0x98,	0x5d,0xfe,0xc0,0xe1
1226	.byte	0xc3,0x2f,0x75,0x02,	0x81,0x4c,0xf0,0x12
1227	.byte	0x8d,0x46,0x97,0xa3,	0x6b,0xd3,0xf9,0xc6
1228	.byte	0x03,0x8f,0x5f,0xe7,	0x15,0x92,0x9c,0x95
1229	.byte	0xbf,0x6d,0x7a,0xeb,	0x95,0x52,0x59,0xda
1230	.byte	0xd4,0xbe,0x83,0x2d,	0x58,0x74,0x21,0xd3
1231	.byte	0x49,0xe0,0x69,0x29,	0x8e,0xc9,0xc8,0x44
1232	.byte	0x75,0xc2,0x89,0x6a,	0xf4,0x8e,0x79,0x78
1233	.byte	0x99,0x58,0x3e,0x6b,	0x27,0xb9,0x71,0xdd
1234	.byte	0xbe,0xe1,0x4f,0xb6,	0xf0,0x88,0xad,0x17
1235	.byte	0xc9,0x20,0xac,0x66,	0x7d,0xce,0x3a,0xb4
1236	.byte	0x63,0xdf,0x4a,0x18,	0xe5,0x1a,0x31,0x82
1237	.byte	0x97,0x51,0x33,0x60,	0x62,0x53,0x7f,0x45
1238	.byte	0xb1,0x64,0x77,0xe0,	0xbb,0x6b,0xae,0x84
1239	.byte	0xfe,0x81,0xa0,0x1c,	0xf9,0x08,0x2b,0x94
1240	.byte	0x70,0x48,0x68,0x58,	0x8f,0x45,0xfd,0x19
1241	.byte	0x94,0xde,0x6c,0x87,	0x52,0x7b,0xf8,0xb7
1242	.byte	0xab,0x73,0xd3,0x23,	0x72,0x4b,0x02,0xe2
1243	.byte	0xe3,0x1f,0x8f,0x57,	0x66,0x55,0xab,0x2a
1244	.byte	0xb2,0xeb,0x28,0x07,	0x2f,0xb5,0xc2,0x03
1245	.byte	0x86,0xc5,0x7b,0x9a,	0xd3,0x37,0x08,0xa5
1246	.byte	0x30,0x28,0x87,0xf2,	0x23,0xbf,0xa5,0xb2
1247	.byte	0x02,0x03,0x6a,0xba,	0xed,0x16,0x82,0x5c
1248	.byte	0x8a,0xcf,0x1c,0x2b,	0xa7,0x79,0xb4,0x92
1249	.byte	0xf3,0x07,0xf2,0xf0,	0x4e,0x69,0xe2,0xa1
1250	.byte	0x65,0xda,0xf4,0xcd,	0x06,0x05,0xbe,0xd5
1251	.byte	0xd1,0x34,0x62,0x1f,	0xc4,0xa6,0xfe,0x8a
1252	.byte	0x34,0x2e,0x53,0x9d,	0xa2,0xf3,0x55,0xa0
1253	.byte	0x05,0x8a,0xe1,0x32,	0xa4,0xf6,0xeb,0x75
1254	.byte	0x0b,0x83,0xec,0x39,	0x40,0x60,0xef,0xaa
1255	.byte	0x5e,0x71,0x9f,0x06,	0xbd,0x6e,0x10,0x51
1256	.byte	0x3e,0x21,0x8a,0xf9,	0x96,0xdd,0x06,0x3d
1257	.byte	0xdd,0x3e,0x05,0xae,	0x4d,0xe6,0xbd,0x46
1258	.byte	0x91,0x54,0x8d,0xb5,	0x71,0xc4,0x5d,0x05
1259	.byte	0x04,0x06,0xd4,0x6f,	0x60,0x50,0x15,0xff
1260	.byte	0x19,0x98,0xfb,0x24,	0xd6,0xbd,0xe9,0x97
1261	.byte	0x89,0x40,0x43,0xcc,	0x67,0xd9,0x9e,0x77
1262	.byte	0xb0,0xe8,0x42,0xbd,	0x07,0x89,0x8b,0x88
1263	.byte	0xe7,0x19,0x5b,0x38,	0x79,0xc8,0xee,0xdb
1264	.byte	0xa1,0x7c,0x0a,0x47,	0x7c,0x42,0x0f,0xe9
1265	.byte	0xf8,0x84,0x1e,0xc9,	0x00,0x00,0x00,0x00
1266	.byte	0x09,0x80,0x86,0x83,	0x32,0x2b,0xed,0x48
1267	.byte	0x1e,0x11,0x70,0xac,	0x6c,0x5a,0x72,0x4e
1268	.byte	0xfd,0x0e,0xff,0xfb,	0x0f,0x85,0x38,0x56
1269	.byte	0x3d,0xae,0xd5,0x1e,	0x36,0x2d,0x39,0x27
1270	.byte	0x0a,0x0f,0xd9,0x64,	0x68,0x5c,0xa6,0x21
1271	.byte	0x9b,0x5b,0x54,0xd1,	0x24,0x36,0x2e,0x3a
1272	.byte	0x0c,0x0a,0x67,0xb1,	0x93,0x57,0xe7,0x0f
1273	.byte	0xb4,0xee,0x96,0xd2,	0x1b,0x9b,0x91,0x9e
1274	.byte	0x80,0xc0,0xc5,0x4f,	0x61,0xdc,0x20,0xa2
1275	.byte	0x5a,0x77,0x4b,0x69,	0x1c,0x12,0x1a,0x16
1276	.byte	0xe2,0x93,0xba,0x0a,	0xc0,0xa0,0x2a,0xe5
1277	.byte	0x3c,0x22,0xe0,0x43,	0x12,0x1b,0x17,0x1d
1278	.byte	0x0e,0x09,0x0d,0x0b,	0xf2,0x8b,0xc7,0xad
1279	.byte	0x2d,0xb6,0xa8,0xb9,	0x14,0x1e,0xa9,0xc8
1280	.byte	0x57,0xf1,0x19,0x85,	0xaf,0x75,0x07,0x4c
1281	.byte	0xee,0x99,0xdd,0xbb,	0xa3,0x7f,0x60,0xfd
1282	.byte	0xf7,0x01,0x26,0x9f,	0x5c,0x72,0xf5,0xbc
1283	.byte	0x44,0x66,0x3b,0xc5,	0x5b,0xfb,0x7e,0x34
1284	.byte	0x8b,0x43,0x29,0x76,	0xcb,0x23,0xc6,0xdc
1285	.byte	0xb6,0xed,0xfc,0x68,	0xb8,0xe4,0xf1,0x63
1286	.byte	0xd7,0x31,0xdc,0xca,	0x42,0x63,0x85,0x10
1287	.byte	0x13,0x97,0x22,0x40,	0x84,0xc6,0x11,0x20
1288	.byte	0x85,0x4a,0x24,0x7d,	0xd2,0xbb,0x3d,0xf8
1289	.byte	0xae,0xf9,0x32,0x11,	0xc7,0x29,0xa1,0x6d
1290	.byte	0x1d,0x9e,0x2f,0x4b,	0xdc,0xb2,0x30,0xf3
1291	.byte	0x0d,0x86,0x52,0xec,	0x77,0xc1,0xe3,0xd0
1292	.byte	0x2b,0xb3,0x16,0x6c,	0xa9,0x70,0xb9,0x99
1293	.byte	0x11,0x94,0x48,0xfa,	0x47,0xe9,0x64,0x22
1294	.byte	0xa8,0xfc,0x8c,0xc4,	0xa0,0xf0,0x3f,0x1a
1295	.byte	0x56,0x7d,0x2c,0xd8,	0x22,0x33,0x90,0xef
1296	.byte	0x87,0x49,0x4e,0xc7,	0xd9,0x38,0xd1,0xc1
1297	.byte	0x8c,0xca,0xa2,0xfe,	0x98,0xd4,0x0b,0x36
1298	.byte	0xa6,0xf5,0x81,0xcf,	0xa5,0x7a,0xde,0x28
1299	.byte	0xda,0xb7,0x8e,0x26,	0x3f,0xad,0xbf,0xa4
1300	.byte	0x2c,0x3a,0x9d,0xe4,	0x50,0x78,0x92,0x0d
1301	.byte	0x6a,0x5f,0xcc,0x9b,	0x54,0x7e,0x46,0x62
1302	.byte	0xf6,0x8d,0x13,0xc2,	0x90,0xd8,0xb8,0xe8
1303	.byte	0x2e,0x39,0xf7,0x5e,	0x82,0xc3,0xaf,0xf5
1304	.byte	0x9f,0x5d,0x80,0xbe,	0x69,0xd0,0x93,0x7c
1305	.byte	0x6f,0xd5,0x2d,0xa9,	0xcf,0x25,0x12,0xb3
1306	.byte	0xc8,0xac,0x99,0x3b,	0x10,0x18,0x7d,0xa7
1307	.byte	0xe8,0x9c,0x63,0x6e,	0xdb,0x3b,0xbb,0x7b
1308	.byte	0xcd,0x26,0x78,0x09,	0x6e,0x59,0x18,0xf4
1309	.byte	0xec,0x9a,0xb7,0x01,	0x83,0x4f,0x9a,0xa8
1310	.byte	0xe6,0x95,0x6e,0x65,	0xaa,0xff,0xe6,0x7e
1311	.byte	0x21,0xbc,0xcf,0x08,	0xef,0x15,0xe8,0xe6
1312	.byte	0xba,0xe7,0x9b,0xd9,	0x4a,0x6f,0x36,0xce
1313	.byte	0xea,0x9f,0x09,0xd4,	0x29,0xb0,0x7c,0xd6
1314	.byte	0x31,0xa4,0xb2,0xaf,	0x2a,0x3f,0x23,0x31
1315	.byte	0xc6,0xa5,0x94,0x30,	0x35,0xa2,0x66,0xc0
1316	.byte	0x74,0x4e,0xbc,0x37,	0xfc,0x82,0xca,0xa6
1317	.byte	0xe0,0x90,0xd0,0xb0,	0x33,0xa7,0xd8,0x15
1318	.byte	0xf1,0x04,0x98,0x4a,	0x41,0xec,0xda,0xf7
1319	.byte	0x7f,0xcd,0x50,0x0e,	0x17,0x91,0xf6,0x2f
1320	.byte	0x76,0x4d,0xd6,0x8d,	0x43,0xef,0xb0,0x4d
1321	.byte	0xcc,0xaa,0x4d,0x54,	0xe4,0x96,0x04,0xdf
1322	.byte	0x9e,0xd1,0xb5,0xe3,	0x4c,0x6a,0x88,0x1b
1323	.byte	0xc1,0x2c,0x1f,0xb8,	0x46,0x65,0x51,0x7f
1324	.byte	0x9d,0x5e,0xea,0x04,	0x01,0x8c,0x35,0x5d
1325	.byte	0xfa,0x87,0x74,0x73,	0xfb,0x0b,0x41,0x2e
1326	.byte	0xb3,0x67,0x1d,0x5a,	0x92,0xdb,0xd2,0x52
1327	.byte	0xe9,0x10,0x56,0x33,	0x6d,0xd6,0x47,0x13
1328	.byte	0x9a,0xd7,0x61,0x8c,	0x37,0xa1,0x0c,0x7a
1329	.byte	0x59,0xf8,0x14,0x8e,	0xeb,0x13,0x3c,0x89
1330	.byte	0xce,0xa9,0x27,0xee,	0xb7,0x61,0xc9,0x35
1331	.byte	0xe1,0x1c,0xe5,0xed,	0x7a,0x47,0xb1,0x3c
1332	.byte	0x9c,0xd2,0xdf,0x59,	0x55,0xf2,0x73,0x3f
1333	.byte	0x18,0x14,0xce,0x79,	0x73,0xc7,0x37,0xbf
1334	.byte	0x53,0xf7,0xcd,0xea,	0x5f,0xfd,0xaa,0x5b
1335	.byte	0xdf,0x3d,0x6f,0x14,	0x78,0x44,0xdb,0x86
1336	.byte	0xca,0xaf,0xf3,0x81,	0xb9,0x68,0xc4,0x3e
1337	.byte	0x38,0x24,0x34,0x2c,	0xc2,0xa3,0x40,0x5f
1338	.byte	0x16,0x1d,0xc3,0x72,	0xbc,0xe2,0x25,0x0c
1339	.byte	0x28,0x3c,0x49,0x8b,	0xff,0x0d,0x95,0x41
1340	.byte	0x39,0xa8,0x01,0x71,	0x08,0x0c,0xb3,0xde
1341	.byte	0xd8,0xb4,0xe4,0x9c,	0x64,0x56,0xc1,0x90
1342	.byte	0x7b,0xcb,0x84,0x61,	0xd5,0x32,0xb6,0x70
1343	.byte	0x48,0x6c,0x5c,0x74,	0xd0,0xb8,0x57,0x42
1344AES_Td4:
1345	.byte	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
1346	.byte	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
1347	.byte	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
1348	.byte	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
1349	.byte	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
1350	.byte	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
1351	.byte	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
1352	.byte	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
1353	.byte	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
1354	.byte	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
1355	.byte	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
1356	.byte	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
1357	.byte	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
1358	.byte	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
1359	.byte	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
1360	.byte	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
1361	.byte	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
1362	.byte	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
1363	.byte	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
1364	.byte	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
1365	.byte	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
1366	.byte	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
1367	.byte	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
1368	.byte	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
1369	.byte	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
1370	.byte	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
1371	.byte	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
1372	.byte	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
1373	.byte	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
1374	.byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
1375	.byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
1376	.byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
1377	.cstring "AES for C64x+, CRYPTOGAMS by <appro\@openssl.org>"
1378	.align	4
1379___
1380
1381print $code;
1382close STDOUT or die "error closing STDOUT: $!";
1383