1#! /usr/bin/env perl
2# Copyright 2007-2020 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the Apache License 2.0 (the "License").  You may not use
5# this file except in compliance with the License.  You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
9
10# ====================================================================
11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12# project. The module is, however, dual licensed under OpenSSL and
13# CRYPTOGAMS licenses depending on where you obtain it. For further
14# details see http://www.openssl.org/~appro/cryptogams/.
15# ====================================================================
16
17# AES for ARMv4
18
19# January 2007.
20#
21# Code uses single 1K S-box and is >2 times faster than code generated
22# by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which
23# allows to merge logical or arithmetic operation with shift or rotate
24# in one instruction and emit combined result every cycle. The module
25# is endian-neutral. The performance is ~42 cycles/byte for 128-bit
26# key [on single-issue Xscale PXA250 core].
27
28# May 2007.
29#
30# AES_set_[en|de]crypt_key is added.
31
32# July 2010.
33#
34# Rescheduling for dual-issue pipeline resulted in 12% improvement on
35# Cortex A8 core and ~25 cycles per byte processed with 128-bit key.
36
37# February 2011.
38#
39# Profiler-assisted and platform-specific optimization resulted in 16%
40# improvement on Cortex A8 core and ~21.5 cycles per byte.
41
42# $output is the last argument if it looks like a file (it has an extension)
43# $flavour is the first argument if it doesn't look like a file
44$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
45$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
46
47if ($flavour && $flavour ne "void") {
48    $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
49    ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
50    ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
51    die "can't locate arm-xlate.pl";
52
53    open STDOUT,"| \"$^X\" $xlate $flavour \"$output\""
54        or die "can't call $xlate: $!";
55} else {
56    $output and open STDOUT,">$output";
57}
58
59$s0="r0";
60$s1="r1";
61$s2="r2";
62$s3="r3";
63$t1="r4";
64$t2="r5";
65$t3="r6";
66$i1="r7";
67$i2="r8";
68$i3="r9";
69
70$tbl="r10";
71$key="r11";
72$rounds="r12";
73
74$code=<<___;
75#ifndef __KERNEL__
76# include "arm_arch.h"
77#else
78# define __ARM_ARCH__ __LINUX_ARM_ARCH__
79#endif
80
81#if defined(__thumb2__) && !defined(__APPLE__)
82.syntax	unified
83.thumb
84#else
85.code	32
86#undef __thumb2__
87#endif
88
89.text
90
91.type	AES_Te,%object
92.align	5
93AES_Te:
94.word	0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d
95.word	0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554
96.word	0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d
97.word	0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a
98.word	0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87
99.word	0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b
100.word	0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea
101.word	0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b
102.word	0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a
103.word	0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f
104.word	0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108
105.word	0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f
106.word	0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e
107.word	0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5
108.word	0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d
109.word	0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f
110.word	0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e
111.word	0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb
112.word	0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce
113.word	0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497
114.word	0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c
115.word	0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed
116.word	0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b
117.word	0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a
118.word	0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16
119.word	0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594
120.word	0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81
121.word	0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3
122.word	0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a
123.word	0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504
124.word	0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163
125.word	0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d
126.word	0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f
127.word	0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739
128.word	0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47
129.word	0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395
130.word	0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f
131.word	0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883
132.word	0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c
133.word	0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76
134.word	0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e
135.word	0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4
136.word	0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6
137.word	0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b
138.word	0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7
139.word	0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0
140.word	0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25
141.word	0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818
142.word	0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72
143.word	0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651
144.word	0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21
145.word	0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85
146.word	0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa
147.word	0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12
148.word	0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0
149.word	0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9
150.word	0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133
151.word	0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7
152.word	0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920
153.word	0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a
154.word	0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17
155.word	0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8
156.word	0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11
157.word	0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a
158@ Te4[256]
159.byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
160.byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
161.byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
162.byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
163.byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
164.byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
165.byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
166.byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
167.byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
168.byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
169.byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
170.byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
171.byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
172.byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
173.byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
174.byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
175.byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
176.byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
177.byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
178.byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
179.byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
180.byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
181.byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
182.byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
183.byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
184.byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
185.byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
186.byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
187.byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
188.byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
189.byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
190.byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
191@ rcon[]
192.word	0x01000000, 0x02000000, 0x04000000, 0x08000000
193.word	0x10000000, 0x20000000, 0x40000000, 0x80000000
194.word	0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
195.size	AES_Te,.-AES_Te
196
197@ void AES_encrypt(const unsigned char *in, unsigned char *out,
198@ 		 const AES_KEY *key) {
199.global AES_encrypt
200.type   AES_encrypt,%function
201.align	5
202AES_encrypt:
203#ifndef	__thumb2__
204	sub	r3,pc,#8		@ AES_encrypt
205#else
206	adr	r3,.
207#endif
208	stmdb   sp!,{r1,r4-r12,lr}
209#if defined(__thumb2__) || defined(__APPLE__)
210	adr	$tbl,AES_Te
211#else
212	sub	$tbl,r3,#AES_encrypt-AES_Te	@ Te
213#endif
214	mov	$rounds,r0		@ inp
215	mov	$key,r2
216#if __ARM_ARCH__<7
217	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
218	ldrb	$t1,[$rounds,#2]	@ manner...
219	ldrb	$t2,[$rounds,#1]
220	ldrb	$t3,[$rounds,#0]
221	orr	$s0,$s0,$t1,lsl#8
222	ldrb	$s1,[$rounds,#7]
223	orr	$s0,$s0,$t2,lsl#16
224	ldrb	$t1,[$rounds,#6]
225	orr	$s0,$s0,$t3,lsl#24
226	ldrb	$t2,[$rounds,#5]
227	ldrb	$t3,[$rounds,#4]
228	orr	$s1,$s1,$t1,lsl#8
229	ldrb	$s2,[$rounds,#11]
230	orr	$s1,$s1,$t2,lsl#16
231	ldrb	$t1,[$rounds,#10]
232	orr	$s1,$s1,$t3,lsl#24
233	ldrb	$t2,[$rounds,#9]
234	ldrb	$t3,[$rounds,#8]
235	orr	$s2,$s2,$t1,lsl#8
236	ldrb	$s3,[$rounds,#15]
237	orr	$s2,$s2,$t2,lsl#16
238	ldrb	$t1,[$rounds,#14]
239	orr	$s2,$s2,$t3,lsl#24
240	ldrb	$t2,[$rounds,#13]
241	ldrb	$t3,[$rounds,#12]
242	orr	$s3,$s3,$t1,lsl#8
243	orr	$s3,$s3,$t2,lsl#16
244	orr	$s3,$s3,$t3,lsl#24
245#else
246	ldr	$s0,[$rounds,#0]
247	ldr	$s1,[$rounds,#4]
248	ldr	$s2,[$rounds,#8]
249	ldr	$s3,[$rounds,#12]
250#ifdef __ARMEL__
251	rev	$s0,$s0
252	rev	$s1,$s1
253	rev	$s2,$s2
254	rev	$s3,$s3
255#endif
256#endif
257	bl	_armv4_AES_encrypt
258
259	ldr	$rounds,[sp],#4		@ pop out
260#if __ARM_ARCH__>=7
261#ifdef __ARMEL__
262	rev	$s0,$s0
263	rev	$s1,$s1
264	rev	$s2,$s2
265	rev	$s3,$s3
266#endif
267	str	$s0,[$rounds,#0]
268	str	$s1,[$rounds,#4]
269	str	$s2,[$rounds,#8]
270	str	$s3,[$rounds,#12]
271#else
272	mov	$t1,$s0,lsr#24		@ write output in endian-neutral
273	mov	$t2,$s0,lsr#16		@ manner...
274	mov	$t3,$s0,lsr#8
275	strb	$t1,[$rounds,#0]
276	strb	$t2,[$rounds,#1]
277	mov	$t1,$s1,lsr#24
278	strb	$t3,[$rounds,#2]
279	mov	$t2,$s1,lsr#16
280	strb	$s0,[$rounds,#3]
281	mov	$t3,$s1,lsr#8
282	strb	$t1,[$rounds,#4]
283	strb	$t2,[$rounds,#5]
284	mov	$t1,$s2,lsr#24
285	strb	$t3,[$rounds,#6]
286	mov	$t2,$s2,lsr#16
287	strb	$s1,[$rounds,#7]
288	mov	$t3,$s2,lsr#8
289	strb	$t1,[$rounds,#8]
290	strb	$t2,[$rounds,#9]
291	mov	$t1,$s3,lsr#24
292	strb	$t3,[$rounds,#10]
293	mov	$t2,$s3,lsr#16
294	strb	$s2,[$rounds,#11]
295	mov	$t3,$s3,lsr#8
296	strb	$t1,[$rounds,#12]
297	strb	$t2,[$rounds,#13]
298	strb	$t3,[$rounds,#14]
299	strb	$s3,[$rounds,#15]
300#endif
301#if __ARM_ARCH__>=5
302	ldmia	sp!,{r4-r12,pc}
303#else
304	ldmia   sp!,{r4-r12,lr}
305	tst	lr,#1
306	moveq	pc,lr			@ be binary compatible with V4, yet
307	bx	lr			@ interoperable with Thumb ISA:-)
308#endif
309.size	AES_encrypt,.-AES_encrypt
310
311.type   _armv4_AES_encrypt,%function
312.align	2
313_armv4_AES_encrypt:
314	str	lr,[sp,#-4]!		@ push lr
315	ldmia	$key!,{$t1-$i1}
316	eor	$s0,$s0,$t1
317	ldr	$rounds,[$key,#240-16]
318	eor	$s1,$s1,$t2
319	eor	$s2,$s2,$t3
320	eor	$s3,$s3,$i1
321	sub	$rounds,$rounds,#1
322	mov	lr,#255
323
324	and	$i1,lr,$s0
325	and	$i2,lr,$s0,lsr#8
326	and	$i3,lr,$s0,lsr#16
327	mov	$s0,$s0,lsr#24
328.Lenc_loop:
329	ldr	$t1,[$tbl,$i1,lsl#2]	@ Te3[s0>>0]
330	and	$i1,lr,$s1,lsr#16	@ i0
331	ldr	$t2,[$tbl,$i2,lsl#2]	@ Te2[s0>>8]
332	and	$i2,lr,$s1
333	ldr	$t3,[$tbl,$i3,lsl#2]	@ Te1[s0>>16]
334	and	$i3,lr,$s1,lsr#8
335	ldr	$s0,[$tbl,$s0,lsl#2]	@ Te0[s0>>24]
336	mov	$s1,$s1,lsr#24
337
338	ldr	$i1,[$tbl,$i1,lsl#2]	@ Te1[s1>>16]
339	ldr	$i2,[$tbl,$i2,lsl#2]	@ Te3[s1>>0]
340	ldr	$i3,[$tbl,$i3,lsl#2]	@ Te2[s1>>8]
341	eor	$s0,$s0,$i1,ror#8
342	ldr	$s1,[$tbl,$s1,lsl#2]	@ Te0[s1>>24]
343	and	$i1,lr,$s2,lsr#8	@ i0
344	eor	$t2,$t2,$i2,ror#8
345	and	$i2,lr,$s2,lsr#16	@ i1
346	eor	$t3,$t3,$i3,ror#8
347	and	$i3,lr,$s2
348	ldr	$i1,[$tbl,$i1,lsl#2]	@ Te2[s2>>8]
349	eor	$s1,$s1,$t1,ror#24
350	ldr	$i2,[$tbl,$i2,lsl#2]	@ Te1[s2>>16]
351	mov	$s2,$s2,lsr#24
352
353	ldr	$i3,[$tbl,$i3,lsl#2]	@ Te3[s2>>0]
354	eor	$s0,$s0,$i1,ror#16
355	ldr	$s2,[$tbl,$s2,lsl#2]	@ Te0[s2>>24]
356	and	$i1,lr,$s3		@ i0
357	eor	$s1,$s1,$i2,ror#8
358	and	$i2,lr,$s3,lsr#8	@ i1
359	eor	$t3,$t3,$i3,ror#16
360	and	$i3,lr,$s3,lsr#16	@ i2
361	ldr	$i1,[$tbl,$i1,lsl#2]	@ Te3[s3>>0]
362	eor	$s2,$s2,$t2,ror#16
363	ldr	$i2,[$tbl,$i2,lsl#2]	@ Te2[s3>>8]
364	mov	$s3,$s3,lsr#24
365
366	ldr	$i3,[$tbl,$i3,lsl#2]	@ Te1[s3>>16]
367	eor	$s0,$s0,$i1,ror#24
368	ldr	$i1,[$key],#16
369	eor	$s1,$s1,$i2,ror#16
370	ldr	$s3,[$tbl,$s3,lsl#2]	@ Te0[s3>>24]
371	eor	$s2,$s2,$i3,ror#8
372	ldr	$t1,[$key,#-12]
373	eor	$s3,$s3,$t3,ror#8
374
375	ldr	$t2,[$key,#-8]
376	eor	$s0,$s0,$i1
377	ldr	$t3,[$key,#-4]
378	and	$i1,lr,$s0
379	eor	$s1,$s1,$t1
380	and	$i2,lr,$s0,lsr#8
381	eor	$s2,$s2,$t2
382	and	$i3,lr,$s0,lsr#16
383	eor	$s3,$s3,$t3
384	mov	$s0,$s0,lsr#24
385
386	subs	$rounds,$rounds,#1
387	bne	.Lenc_loop
388
389	add	$tbl,$tbl,#2
390
391	ldrb	$t1,[$tbl,$i1,lsl#2]	@ Te4[s0>>0]
392	and	$i1,lr,$s1,lsr#16	@ i0
393	ldrb	$t2,[$tbl,$i2,lsl#2]	@ Te4[s0>>8]
394	and	$i2,lr,$s1
395	ldrb	$t3,[$tbl,$i3,lsl#2]	@ Te4[s0>>16]
396	and	$i3,lr,$s1,lsr#8
397	ldrb	$s0,[$tbl,$s0,lsl#2]	@ Te4[s0>>24]
398	mov	$s1,$s1,lsr#24
399
400	ldrb	$i1,[$tbl,$i1,lsl#2]	@ Te4[s1>>16]
401	ldrb	$i2,[$tbl,$i2,lsl#2]	@ Te4[s1>>0]
402	ldrb	$i3,[$tbl,$i3,lsl#2]	@ Te4[s1>>8]
403	eor	$s0,$i1,$s0,lsl#8
404	ldrb	$s1,[$tbl,$s1,lsl#2]	@ Te4[s1>>24]
405	and	$i1,lr,$s2,lsr#8	@ i0
406	eor	$t2,$i2,$t2,lsl#8
407	and	$i2,lr,$s2,lsr#16	@ i1
408	eor	$t3,$i3,$t3,lsl#8
409	and	$i3,lr,$s2
410	ldrb	$i1,[$tbl,$i1,lsl#2]	@ Te4[s2>>8]
411	eor	$s1,$t1,$s1,lsl#24
412	ldrb	$i2,[$tbl,$i2,lsl#2]	@ Te4[s2>>16]
413	mov	$s2,$s2,lsr#24
414
415	ldrb	$i3,[$tbl,$i3,lsl#2]	@ Te4[s2>>0]
416	eor	$s0,$i1,$s0,lsl#8
417	ldrb	$s2,[$tbl,$s2,lsl#2]	@ Te4[s2>>24]
418	and	$i1,lr,$s3		@ i0
419	eor	$s1,$s1,$i2,lsl#16
420	and	$i2,lr,$s3,lsr#8	@ i1
421	eor	$t3,$i3,$t3,lsl#8
422	and	$i3,lr,$s3,lsr#16	@ i2
423	ldrb	$i1,[$tbl,$i1,lsl#2]	@ Te4[s3>>0]
424	eor	$s2,$t2,$s2,lsl#24
425	ldrb	$i2,[$tbl,$i2,lsl#2]	@ Te4[s3>>8]
426	mov	$s3,$s3,lsr#24
427
428	ldrb	$i3,[$tbl,$i3,lsl#2]	@ Te4[s3>>16]
429	eor	$s0,$i1,$s0,lsl#8
430	ldr	$i1,[$key,#0]
431	ldrb	$s3,[$tbl,$s3,lsl#2]	@ Te4[s3>>24]
432	eor	$s1,$s1,$i2,lsl#8
433	ldr	$t1,[$key,#4]
434	eor	$s2,$s2,$i3,lsl#16
435	ldr	$t2,[$key,#8]
436	eor	$s3,$t3,$s3,lsl#24
437	ldr	$t3,[$key,#12]
438
439	eor	$s0,$s0,$i1
440	eor	$s1,$s1,$t1
441	eor	$s2,$s2,$t2
442	eor	$s3,$s3,$t3
443
444	sub	$tbl,$tbl,#2
445	ldr	pc,[sp],#4		@ pop and return
446.size	_armv4_AES_encrypt,.-_armv4_AES_encrypt
447
448.global AES_set_encrypt_key
449.type   AES_set_encrypt_key,%function
450.align	5
451AES_set_encrypt_key:
452_armv4_AES_set_encrypt_key:
453#ifndef	__thumb2__
454	sub	r3,pc,#8		@ AES_set_encrypt_key
455#else
456	adr	r3,.
457#endif
458	teq	r0,#0
459#ifdef	__thumb2__
460	itt	eq			@ Thumb2 thing, sanity check in ARM
461#endif
462	moveq	r0,#-1
463	beq	.Labrt
464	teq	r2,#0
465#ifdef	__thumb2__
466	itt	eq			@ Thumb2 thing, sanity check in ARM
467#endif
468	moveq	r0,#-1
469	beq	.Labrt
470
471	teq	r1,#128
472	beq	.Lok
473	teq	r1,#192
474	beq	.Lok
475	teq	r1,#256
476#ifdef	__thumb2__
477	itt	ne			@ Thumb2 thing, sanity check in ARM
478#endif
479	movne	r0,#-1
480	bne	.Labrt
481
482.Lok:	stmdb   sp!,{r4-r12,lr}
483	mov	$rounds,r0		@ inp
484	mov	lr,r1			@ bits
485	mov	$key,r2			@ key
486
487#if defined(__thumb2__) || defined(__APPLE__)
488	adr	$tbl,AES_Te+1024				@ Te4
489#else
490	sub	$tbl,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024	@ Te4
491#endif
492
493#if __ARM_ARCH__<7
494	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
495	ldrb	$t1,[$rounds,#2]	@ manner...
496	ldrb	$t2,[$rounds,#1]
497	ldrb	$t3,[$rounds,#0]
498	orr	$s0,$s0,$t1,lsl#8
499	ldrb	$s1,[$rounds,#7]
500	orr	$s0,$s0,$t2,lsl#16
501	ldrb	$t1,[$rounds,#6]
502	orr	$s0,$s0,$t3,lsl#24
503	ldrb	$t2,[$rounds,#5]
504	ldrb	$t3,[$rounds,#4]
505	orr	$s1,$s1,$t1,lsl#8
506	ldrb	$s2,[$rounds,#11]
507	orr	$s1,$s1,$t2,lsl#16
508	ldrb	$t1,[$rounds,#10]
509	orr	$s1,$s1,$t3,lsl#24
510	ldrb	$t2,[$rounds,#9]
511	ldrb	$t3,[$rounds,#8]
512	orr	$s2,$s2,$t1,lsl#8
513	ldrb	$s3,[$rounds,#15]
514	orr	$s2,$s2,$t2,lsl#16
515	ldrb	$t1,[$rounds,#14]
516	orr	$s2,$s2,$t3,lsl#24
517	ldrb	$t2,[$rounds,#13]
518	ldrb	$t3,[$rounds,#12]
519	orr	$s3,$s3,$t1,lsl#8
520	str	$s0,[$key],#16
521	orr	$s3,$s3,$t2,lsl#16
522	str	$s1,[$key,#-12]
523	orr	$s3,$s3,$t3,lsl#24
524	str	$s2,[$key,#-8]
525	str	$s3,[$key,#-4]
526#else
527	ldr	$s0,[$rounds,#0]
528	ldr	$s1,[$rounds,#4]
529	ldr	$s2,[$rounds,#8]
530	ldr	$s3,[$rounds,#12]
531#ifdef __ARMEL__
532	rev	$s0,$s0
533	rev	$s1,$s1
534	rev	$s2,$s2
535	rev	$s3,$s3
536#endif
537	str	$s0,[$key],#16
538	str	$s1,[$key,#-12]
539	str	$s2,[$key,#-8]
540	str	$s3,[$key,#-4]
541#endif
542
543	teq	lr,#128
544	bne	.Lnot128
545	mov	$rounds,#10
546	str	$rounds,[$key,#240-16]
547	add	$t3,$tbl,#256			@ rcon
548	mov	lr,#255
549
550.L128_loop:
551	and	$t2,lr,$s3,lsr#24
552	and	$i1,lr,$s3,lsr#16
553	ldrb	$t2,[$tbl,$t2]
554	and	$i2,lr,$s3,lsr#8
555	ldrb	$i1,[$tbl,$i1]
556	and	$i3,lr,$s3
557	ldrb	$i2,[$tbl,$i2]
558	orr	$t2,$t2,$i1,lsl#24
559	ldrb	$i3,[$tbl,$i3]
560	orr	$t2,$t2,$i2,lsl#16
561	ldr	$t1,[$t3],#4			@ rcon[i++]
562	orr	$t2,$t2,$i3,lsl#8
563	eor	$t2,$t2,$t1
564	eor	$s0,$s0,$t2			@ rk[4]=rk[0]^...
565	eor	$s1,$s1,$s0			@ rk[5]=rk[1]^rk[4]
566	str	$s0,[$key],#16
567	eor	$s2,$s2,$s1			@ rk[6]=rk[2]^rk[5]
568	str	$s1,[$key,#-12]
569	eor	$s3,$s3,$s2			@ rk[7]=rk[3]^rk[6]
570	str	$s2,[$key,#-8]
571	subs	$rounds,$rounds,#1
572	str	$s3,[$key,#-4]
573	bne	.L128_loop
574	sub	r2,$key,#176
575	b	.Ldone
576
577.Lnot128:
578#if __ARM_ARCH__<7
579	ldrb	$i2,[$rounds,#19]
580	ldrb	$t1,[$rounds,#18]
581	ldrb	$t2,[$rounds,#17]
582	ldrb	$t3,[$rounds,#16]
583	orr	$i2,$i2,$t1,lsl#8
584	ldrb	$i3,[$rounds,#23]
585	orr	$i2,$i2,$t2,lsl#16
586	ldrb	$t1,[$rounds,#22]
587	orr	$i2,$i2,$t3,lsl#24
588	ldrb	$t2,[$rounds,#21]
589	ldrb	$t3,[$rounds,#20]
590	orr	$i3,$i3,$t1,lsl#8
591	orr	$i3,$i3,$t2,lsl#16
592	str	$i2,[$key],#8
593	orr	$i3,$i3,$t3,lsl#24
594	str	$i3,[$key,#-4]
595#else
596	ldr	$i2,[$rounds,#16]
597	ldr	$i3,[$rounds,#20]
598#ifdef __ARMEL__
599	rev	$i2,$i2
600	rev	$i3,$i3
601#endif
602	str	$i2,[$key],#8
603	str	$i3,[$key,#-4]
604#endif
605
606	teq	lr,#192
607	bne	.Lnot192
608	mov	$rounds,#12
609	str	$rounds,[$key,#240-24]
610	add	$t3,$tbl,#256			@ rcon
611	mov	lr,#255
612	mov	$rounds,#8
613
614.L192_loop:
615	and	$t2,lr,$i3,lsr#24
616	and	$i1,lr,$i3,lsr#16
617	ldrb	$t2,[$tbl,$t2]
618	and	$i2,lr,$i3,lsr#8
619	ldrb	$i1,[$tbl,$i1]
620	and	$i3,lr,$i3
621	ldrb	$i2,[$tbl,$i2]
622	orr	$t2,$t2,$i1,lsl#24
623	ldrb	$i3,[$tbl,$i3]
624	orr	$t2,$t2,$i2,lsl#16
625	ldr	$t1,[$t3],#4			@ rcon[i++]
626	orr	$t2,$t2,$i3,lsl#8
627	eor	$i3,$t2,$t1
628	eor	$s0,$s0,$i3			@ rk[6]=rk[0]^...
629	eor	$s1,$s1,$s0			@ rk[7]=rk[1]^rk[6]
630	str	$s0,[$key],#24
631	eor	$s2,$s2,$s1			@ rk[8]=rk[2]^rk[7]
632	str	$s1,[$key,#-20]
633	eor	$s3,$s3,$s2			@ rk[9]=rk[3]^rk[8]
634	str	$s2,[$key,#-16]
635	subs	$rounds,$rounds,#1
636	str	$s3,[$key,#-12]
637#ifdef	__thumb2__
638	itt	eq				@ Thumb2 thing, sanity check in ARM
639#endif
640	subeq	r2,$key,#216
641	beq	.Ldone
642
643	ldr	$i1,[$key,#-32]
644	ldr	$i2,[$key,#-28]
645	eor	$i1,$i1,$s3			@ rk[10]=rk[4]^rk[9]
646	eor	$i3,$i2,$i1			@ rk[11]=rk[5]^rk[10]
647	str	$i1,[$key,#-8]
648	str	$i3,[$key,#-4]
649	b	.L192_loop
650
651.Lnot192:
652#if __ARM_ARCH__<7
653	ldrb	$i2,[$rounds,#27]
654	ldrb	$t1,[$rounds,#26]
655	ldrb	$t2,[$rounds,#25]
656	ldrb	$t3,[$rounds,#24]
657	orr	$i2,$i2,$t1,lsl#8
658	ldrb	$i3,[$rounds,#31]
659	orr	$i2,$i2,$t2,lsl#16
660	ldrb	$t1,[$rounds,#30]
661	orr	$i2,$i2,$t3,lsl#24
662	ldrb	$t2,[$rounds,#29]
663	ldrb	$t3,[$rounds,#28]
664	orr	$i3,$i3,$t1,lsl#8
665	orr	$i3,$i3,$t2,lsl#16
666	str	$i2,[$key],#8
667	orr	$i3,$i3,$t3,lsl#24
668	str	$i3,[$key,#-4]
669#else
670	ldr	$i2,[$rounds,#24]
671	ldr	$i3,[$rounds,#28]
672#ifdef __ARMEL__
673	rev	$i2,$i2
674	rev	$i3,$i3
675#endif
676	str	$i2,[$key],#8
677	str	$i3,[$key,#-4]
678#endif
679
680	mov	$rounds,#14
681	str	$rounds,[$key,#240-32]
682	add	$t3,$tbl,#256			@ rcon
683	mov	lr,#255
684	mov	$rounds,#7
685
686.L256_loop:
687	and	$t2,lr,$i3,lsr#24
688	and	$i1,lr,$i3,lsr#16
689	ldrb	$t2,[$tbl,$t2]
690	and	$i2,lr,$i3,lsr#8
691	ldrb	$i1,[$tbl,$i1]
692	and	$i3,lr,$i3
693	ldrb	$i2,[$tbl,$i2]
694	orr	$t2,$t2,$i1,lsl#24
695	ldrb	$i3,[$tbl,$i3]
696	orr	$t2,$t2,$i2,lsl#16
697	ldr	$t1,[$t3],#4			@ rcon[i++]
698	orr	$t2,$t2,$i3,lsl#8
699	eor	$i3,$t2,$t1
700	eor	$s0,$s0,$i3			@ rk[8]=rk[0]^...
701	eor	$s1,$s1,$s0			@ rk[9]=rk[1]^rk[8]
702	str	$s0,[$key],#32
703	eor	$s2,$s2,$s1			@ rk[10]=rk[2]^rk[9]
704	str	$s1,[$key,#-28]
705	eor	$s3,$s3,$s2			@ rk[11]=rk[3]^rk[10]
706	str	$s2,[$key,#-24]
707	subs	$rounds,$rounds,#1
708	str	$s3,[$key,#-20]
709#ifdef	__thumb2__
710	itt	eq				@ Thumb2 thing, sanity check in ARM
711#endif
712	subeq	r2,$key,#256
713	beq	.Ldone
714
715	and	$t2,lr,$s3
716	and	$i1,lr,$s3,lsr#8
717	ldrb	$t2,[$tbl,$t2]
718	and	$i2,lr,$s3,lsr#16
719	ldrb	$i1,[$tbl,$i1]
720	and	$i3,lr,$s3,lsr#24
721	ldrb	$i2,[$tbl,$i2]
722	orr	$t2,$t2,$i1,lsl#8
723	ldrb	$i3,[$tbl,$i3]
724	orr	$t2,$t2,$i2,lsl#16
725	ldr	$t1,[$key,#-48]
726	orr	$t2,$t2,$i3,lsl#24
727
728	ldr	$i1,[$key,#-44]
729	ldr	$i2,[$key,#-40]
730	eor	$t1,$t1,$t2			@ rk[12]=rk[4]^...
731	ldr	$i3,[$key,#-36]
732	eor	$i1,$i1,$t1			@ rk[13]=rk[5]^rk[12]
733	str	$t1,[$key,#-16]
734	eor	$i2,$i2,$i1			@ rk[14]=rk[6]^rk[13]
735	str	$i1,[$key,#-12]
736	eor	$i3,$i3,$i2			@ rk[15]=rk[7]^rk[14]
737	str	$i2,[$key,#-8]
738	str	$i3,[$key,#-4]
739	b	.L256_loop
740
741.align	2
742.Ldone:	mov	r0,#0
743	ldmia   sp!,{r4-r12,lr}
744.Labrt:
745#if __ARM_ARCH__>=5
746	ret				@ bx lr
747#else
748	tst	lr,#1
749	moveq	pc,lr			@ be binary compatible with V4, yet
750	bx	lr			@ interoperable with Thumb ISA:-)
751#endif
752.size	AES_set_encrypt_key,.-AES_set_encrypt_key
753
754.global AES_set_decrypt_key
755.type   AES_set_decrypt_key,%function
756.align	5
757AES_set_decrypt_key:
758	str	lr,[sp,#-4]!            @ push lr
759	bl	_armv4_AES_set_encrypt_key
760	teq	r0,#0
761	ldr	lr,[sp],#4              @ pop lr
762	bne	.Labrt
763
764	mov	r0,r2			@ AES_set_encrypt_key preserves r2,
765	mov	r1,r2			@ which is AES_KEY *key
766	b	_armv4_AES_set_enc2dec_key
767.size	AES_set_decrypt_key,.-AES_set_decrypt_key
768
769@ void AES_set_enc2dec_key(const AES_KEY *inp,AES_KEY *out)
770.global	AES_set_enc2dec_key
771.type	AES_set_enc2dec_key,%function
772.align	5
773AES_set_enc2dec_key:
774_armv4_AES_set_enc2dec_key:
775	stmdb   sp!,{r4-r12,lr}
776
777	ldr	$rounds,[r0,#240]
778	mov	$i1,r0			@ input
779	add	$i2,r0,$rounds,lsl#4
780	mov	$key,r1			@ output
781	add	$tbl,r1,$rounds,lsl#4
782	str	$rounds,[r1,#240]
783
784.Linv:	ldr	$s0,[$i1],#16
785	ldr	$s1,[$i1,#-12]
786	ldr	$s2,[$i1,#-8]
787	ldr	$s3,[$i1,#-4]
788	ldr	$t1,[$i2],#-16
789	ldr	$t2,[$i2,#16+4]
790	ldr	$t3,[$i2,#16+8]
791	ldr	$i3,[$i2,#16+12]
792	str	$s0,[$tbl],#-16
793	str	$s1,[$tbl,#16+4]
794	str	$s2,[$tbl,#16+8]
795	str	$s3,[$tbl,#16+12]
796	str	$t1,[$key],#16
797	str	$t2,[$key,#-12]
798	str	$t3,[$key,#-8]
799	str	$i3,[$key,#-4]
800	teq	$i1,$i2
801	bne	.Linv
802
803	ldr	$s0,[$i1]
804	ldr	$s1,[$i1,#4]
805	ldr	$s2,[$i1,#8]
806	ldr	$s3,[$i1,#12]
807	str	$s0,[$key]
808	str	$s1,[$key,#4]
809	str	$s2,[$key,#8]
810	str	$s3,[$key,#12]
811	sub	$key,$key,$rounds,lsl#3
812___
813$mask80=$i1;
814$mask1b=$i2;
815$mask7f=$i3;
816$code.=<<___;
817	ldr	$s0,[$key,#16]!		@ prefetch tp1
818	mov	$mask80,#0x80
819	mov	$mask1b,#0x1b
820	orr	$mask80,$mask80,#0x8000
821	orr	$mask1b,$mask1b,#0x1b00
822	orr	$mask80,$mask80,$mask80,lsl#16
823	orr	$mask1b,$mask1b,$mask1b,lsl#16
824	sub	$rounds,$rounds,#1
825	mvn	$mask7f,$mask80
826	mov	$rounds,$rounds,lsl#2	@ (rounds-1)*4
827
828.Lmix:	and	$t1,$s0,$mask80
829	and	$s1,$s0,$mask7f
830	sub	$t1,$t1,$t1,lsr#7
831	and	$t1,$t1,$mask1b
832	eor	$s1,$t1,$s1,lsl#1	@ tp2
833
834	and	$t1,$s1,$mask80
835	and	$s2,$s1,$mask7f
836	sub	$t1,$t1,$t1,lsr#7
837	and	$t1,$t1,$mask1b
838	eor	$s2,$t1,$s2,lsl#1	@ tp4
839
840	and	$t1,$s2,$mask80
841	and	$s3,$s2,$mask7f
842	sub	$t1,$t1,$t1,lsr#7
843	and	$t1,$t1,$mask1b
844	eor	$s3,$t1,$s3,lsl#1	@ tp8
845
846	eor	$t1,$s1,$s2
847	eor	$t2,$s0,$s3		@ tp9
848	eor	$t1,$t1,$s3		@ tpe
849	eor	$t1,$t1,$s1,ror#24
850	eor	$t1,$t1,$t2,ror#24	@ ^= ROTATE(tpb=tp9^tp2,8)
851	eor	$t1,$t1,$s2,ror#16
852	eor	$t1,$t1,$t2,ror#16	@ ^= ROTATE(tpd=tp9^tp4,16)
853	eor	$t1,$t1,$t2,ror#8	@ ^= ROTATE(tp9,24)
854
855	ldr	$s0,[$key,#4]		@ prefetch tp1
856	str	$t1,[$key],#4
857	subs	$rounds,$rounds,#1
858	bne	.Lmix
859
860	mov	r0,#0
861#if __ARM_ARCH__>=5
862	ldmia	sp!,{r4-r12,pc}
863#else
864	ldmia   sp!,{r4-r12,lr}
865	tst	lr,#1
866	moveq	pc,lr			@ be binary compatible with V4, yet
867	bx	lr			@ interoperable with Thumb ISA:-)
868#endif
869.size	AES_set_enc2dec_key,.-AES_set_enc2dec_key
870
871.type	AES_Td,%object
872.align	5
873AES_Td:
874.word	0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96
875.word	0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393
876.word	0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25
877.word	0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f
878.word	0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1
879.word	0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6
880.word	0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da
881.word	0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844
882.word	0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd
883.word	0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4
884.word	0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45
885.word	0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94
886.word	0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7
887.word	0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a
888.word	0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5
889.word	0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c
890.word	0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1
891.word	0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a
892.word	0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75
893.word	0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051
894.word	0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46
895.word	0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff
896.word	0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77
897.word	0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb
898.word	0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000
899.word	0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e
900.word	0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927
901.word	0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a
902.word	0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e
903.word	0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16
904.word	0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d
905.word	0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8
906.word	0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd
907.word	0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34
908.word	0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163
909.word	0xd731dcca, 0x42638510, 0x13972240, 0x84c61120
910.word	0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d
911.word	0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0
912.word	0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422
913.word	0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef
914.word	0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36
915.word	0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4
916.word	0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662
917.word	0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5
918.word	0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3
919.word	0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b
920.word	0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8
921.word	0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6
922.word	0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6
923.word	0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0
924.word	0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815
925.word	0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f
926.word	0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df
927.word	0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f
928.word	0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e
929.word	0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713
930.word	0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89
931.word	0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c
932.word	0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf
933.word	0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86
934.word	0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f
935.word	0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541
936.word	0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190
937.word	0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742
938@ Td4[256]
939.byte	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
940.byte	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
941.byte	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
942.byte	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
943.byte	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
944.byte	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
945.byte	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
946.byte	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
947.byte	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
948.byte	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
949.byte	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
950.byte	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
951.byte	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
952.byte	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
953.byte	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
954.byte	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
955.byte	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
956.byte	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
957.byte	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
958.byte	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
959.byte	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
960.byte	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
961.byte	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
962.byte	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
963.byte	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
964.byte	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
965.byte	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
966.byte	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
967.byte	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
968.byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
969.byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
970.byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
971.size	AES_Td,.-AES_Td
972
973@ void AES_decrypt(const unsigned char *in, unsigned char *out,
974@ 		 const AES_KEY *key) {
975.global AES_decrypt
976.type   AES_decrypt,%function
977.align	5
978AES_decrypt:
979#ifndef	__thumb2__
980	sub	r3,pc,#8		@ AES_decrypt
981#else
982	adr	r3,.
983#endif
984	stmdb   sp!,{r1,r4-r12,lr}
985#if defined(__thumb2__) || defined(__APPLE__)
986	adr	$tbl,AES_Td
987#else
988	sub	$tbl,r3,#AES_decrypt-AES_Td	@ Td
989#endif
990	mov	$rounds,r0		@ inp
991	mov	$key,r2
992#if __ARM_ARCH__<7
993	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
994	ldrb	$t1,[$rounds,#2]	@ manner...
995	ldrb	$t2,[$rounds,#1]
996	ldrb	$t3,[$rounds,#0]
997	orr	$s0,$s0,$t1,lsl#8
998	ldrb	$s1,[$rounds,#7]
999	orr	$s0,$s0,$t2,lsl#16
1000	ldrb	$t1,[$rounds,#6]
1001	orr	$s0,$s0,$t3,lsl#24
1002	ldrb	$t2,[$rounds,#5]
1003	ldrb	$t3,[$rounds,#4]
1004	orr	$s1,$s1,$t1,lsl#8
1005	ldrb	$s2,[$rounds,#11]
1006	orr	$s1,$s1,$t2,lsl#16
1007	ldrb	$t1,[$rounds,#10]
1008	orr	$s1,$s1,$t3,lsl#24
1009	ldrb	$t2,[$rounds,#9]
1010	ldrb	$t3,[$rounds,#8]
1011	orr	$s2,$s2,$t1,lsl#8
1012	ldrb	$s3,[$rounds,#15]
1013	orr	$s2,$s2,$t2,lsl#16
1014	ldrb	$t1,[$rounds,#14]
1015	orr	$s2,$s2,$t3,lsl#24
1016	ldrb	$t2,[$rounds,#13]
1017	ldrb	$t3,[$rounds,#12]
1018	orr	$s3,$s3,$t1,lsl#8
1019	orr	$s3,$s3,$t2,lsl#16
1020	orr	$s3,$s3,$t3,lsl#24
1021#else
1022	ldr	$s0,[$rounds,#0]
1023	ldr	$s1,[$rounds,#4]
1024	ldr	$s2,[$rounds,#8]
1025	ldr	$s3,[$rounds,#12]
1026#ifdef __ARMEL__
1027	rev	$s0,$s0
1028	rev	$s1,$s1
1029	rev	$s2,$s2
1030	rev	$s3,$s3
1031#endif
1032#endif
1033	bl	_armv4_AES_decrypt
1034
1035	ldr	$rounds,[sp],#4		@ pop out
1036#if __ARM_ARCH__>=7
1037#ifdef __ARMEL__
1038	rev	$s0,$s0
1039	rev	$s1,$s1
1040	rev	$s2,$s2
1041	rev	$s3,$s3
1042#endif
1043	str	$s0,[$rounds,#0]
1044	str	$s1,[$rounds,#4]
1045	str	$s2,[$rounds,#8]
1046	str	$s3,[$rounds,#12]
1047#else
1048	mov	$t1,$s0,lsr#24		@ write output in endian-neutral
1049	mov	$t2,$s0,lsr#16		@ manner...
1050	mov	$t3,$s0,lsr#8
1051	strb	$t1,[$rounds,#0]
1052	strb	$t2,[$rounds,#1]
1053	mov	$t1,$s1,lsr#24
1054	strb	$t3,[$rounds,#2]
1055	mov	$t2,$s1,lsr#16
1056	strb	$s0,[$rounds,#3]
1057	mov	$t3,$s1,lsr#8
1058	strb	$t1,[$rounds,#4]
1059	strb	$t2,[$rounds,#5]
1060	mov	$t1,$s2,lsr#24
1061	strb	$t3,[$rounds,#6]
1062	mov	$t2,$s2,lsr#16
1063	strb	$s1,[$rounds,#7]
1064	mov	$t3,$s2,lsr#8
1065	strb	$t1,[$rounds,#8]
1066	strb	$t2,[$rounds,#9]
1067	mov	$t1,$s3,lsr#24
1068	strb	$t3,[$rounds,#10]
1069	mov	$t2,$s3,lsr#16
1070	strb	$s2,[$rounds,#11]
1071	mov	$t3,$s3,lsr#8
1072	strb	$t1,[$rounds,#12]
1073	strb	$t2,[$rounds,#13]
1074	strb	$t3,[$rounds,#14]
1075	strb	$s3,[$rounds,#15]
1076#endif
1077#if __ARM_ARCH__>=5
1078	ldmia	sp!,{r4-r12,pc}
1079#else
1080	ldmia   sp!,{r4-r12,lr}
1081	tst	lr,#1
1082	moveq	pc,lr			@ be binary compatible with V4, yet
1083	bx	lr			@ interoperable with Thumb ISA:-)
1084#endif
1085.size	AES_decrypt,.-AES_decrypt
1086
1087.type   _armv4_AES_decrypt,%function
1088.align	2
1089_armv4_AES_decrypt:
1090	str	lr,[sp,#-4]!		@ push lr
1091	ldmia	$key!,{$t1-$i1}
1092	eor	$s0,$s0,$t1
1093	ldr	$rounds,[$key,#240-16]
1094	eor	$s1,$s1,$t2
1095	eor	$s2,$s2,$t3
1096	eor	$s3,$s3,$i1
1097	sub	$rounds,$rounds,#1
1098	mov	lr,#255
1099
1100	and	$i1,lr,$s0,lsr#16
1101	and	$i2,lr,$s0,lsr#8
1102	and	$i3,lr,$s0
1103	mov	$s0,$s0,lsr#24
1104.Ldec_loop:
1105	ldr	$t1,[$tbl,$i1,lsl#2]	@ Td1[s0>>16]
1106	and	$i1,lr,$s1		@ i0
1107	ldr	$t2,[$tbl,$i2,lsl#2]	@ Td2[s0>>8]
1108	and	$i2,lr,$s1,lsr#16
1109	ldr	$t3,[$tbl,$i3,lsl#2]	@ Td3[s0>>0]
1110	and	$i3,lr,$s1,lsr#8
1111	ldr	$s0,[$tbl,$s0,lsl#2]	@ Td0[s0>>24]
1112	mov	$s1,$s1,lsr#24
1113
1114	ldr	$i1,[$tbl,$i1,lsl#2]	@ Td3[s1>>0]
1115	ldr	$i2,[$tbl,$i2,lsl#2]	@ Td1[s1>>16]
1116	ldr	$i3,[$tbl,$i3,lsl#2]	@ Td2[s1>>8]
1117	eor	$s0,$s0,$i1,ror#24
1118	ldr	$s1,[$tbl,$s1,lsl#2]	@ Td0[s1>>24]
1119	and	$i1,lr,$s2,lsr#8	@ i0
1120	eor	$t2,$i2,$t2,ror#8
1121	and	$i2,lr,$s2		@ i1
1122	eor	$t3,$i3,$t3,ror#8
1123	and	$i3,lr,$s2,lsr#16
1124	ldr	$i1,[$tbl,$i1,lsl#2]	@ Td2[s2>>8]
1125	eor	$s1,$s1,$t1,ror#8
1126	ldr	$i2,[$tbl,$i2,lsl#2]	@ Td3[s2>>0]
1127	mov	$s2,$s2,lsr#24
1128
1129	ldr	$i3,[$tbl,$i3,lsl#2]	@ Td1[s2>>16]
1130	eor	$s0,$s0,$i1,ror#16
1131	ldr	$s2,[$tbl,$s2,lsl#2]	@ Td0[s2>>24]
1132	and	$i1,lr,$s3,lsr#16	@ i0
1133	eor	$s1,$s1,$i2,ror#24
1134	and	$i2,lr,$s3,lsr#8	@ i1
1135	eor	$t3,$i3,$t3,ror#8
1136	and	$i3,lr,$s3		@ i2
1137	ldr	$i1,[$tbl,$i1,lsl#2]	@ Td1[s3>>16]
1138	eor	$s2,$s2,$t2,ror#8
1139	ldr	$i2,[$tbl,$i2,lsl#2]	@ Td2[s3>>8]
1140	mov	$s3,$s3,lsr#24
1141
1142	ldr	$i3,[$tbl,$i3,lsl#2]	@ Td3[s3>>0]
1143	eor	$s0,$s0,$i1,ror#8
1144	ldr	$i1,[$key],#16
1145	eor	$s1,$s1,$i2,ror#16
1146	ldr	$s3,[$tbl,$s3,lsl#2]	@ Td0[s3>>24]
1147	eor	$s2,$s2,$i3,ror#24
1148
1149	ldr	$t1,[$key,#-12]
1150	eor	$s0,$s0,$i1
1151	ldr	$t2,[$key,#-8]
1152	eor	$s3,$s3,$t3,ror#8
1153	ldr	$t3,[$key,#-4]
1154	and	$i1,lr,$s0,lsr#16
1155	eor	$s1,$s1,$t1
1156	and	$i2,lr,$s0,lsr#8
1157	eor	$s2,$s2,$t2
1158	and	$i3,lr,$s0
1159	eor	$s3,$s3,$t3
1160	mov	$s0,$s0,lsr#24
1161
1162	subs	$rounds,$rounds,#1
1163	bne	.Ldec_loop
1164
1165	add	$tbl,$tbl,#1024
1166
1167	ldr	$t2,[$tbl,#0]		@ prefetch Td4
1168	ldr	$t3,[$tbl,#32]
1169	ldr	$t1,[$tbl,#64]
1170	ldr	$t2,[$tbl,#96]
1171	ldr	$t3,[$tbl,#128]
1172	ldr	$t1,[$tbl,#160]
1173	ldr	$t2,[$tbl,#192]
1174	ldr	$t3,[$tbl,#224]
1175
1176	ldrb	$s0,[$tbl,$s0]		@ Td4[s0>>24]
1177	ldrb	$t1,[$tbl,$i1]		@ Td4[s0>>16]
1178	and	$i1,lr,$s1		@ i0
1179	ldrb	$t2,[$tbl,$i2]		@ Td4[s0>>8]
1180	and	$i2,lr,$s1,lsr#16
1181	ldrb	$t3,[$tbl,$i3]		@ Td4[s0>>0]
1182	and	$i3,lr,$s1,lsr#8
1183
1184	add	$s1,$tbl,$s1,lsr#24
1185	ldrb	$i1,[$tbl,$i1]		@ Td4[s1>>0]
1186	ldrb	$s1,[$s1]		@ Td4[s1>>24]
1187	ldrb	$i2,[$tbl,$i2]		@ Td4[s1>>16]
1188	eor	$s0,$i1,$s0,lsl#24
1189	ldrb	$i3,[$tbl,$i3]		@ Td4[s1>>8]
1190	eor	$s1,$t1,$s1,lsl#8
1191	and	$i1,lr,$s2,lsr#8	@ i0
1192	eor	$t2,$t2,$i2,lsl#8
1193	and	$i2,lr,$s2		@ i1
1194	ldrb	$i1,[$tbl,$i1]		@ Td4[s2>>8]
1195	eor	$t3,$t3,$i3,lsl#8
1196	ldrb	$i2,[$tbl,$i2]		@ Td4[s2>>0]
1197	and	$i3,lr,$s2,lsr#16
1198
1199	add	$s2,$tbl,$s2,lsr#24
1200	ldrb	$s2,[$s2]		@ Td4[s2>>24]
1201	eor	$s0,$s0,$i1,lsl#8
1202	ldrb	$i3,[$tbl,$i3]		@ Td4[s2>>16]
1203	eor	$s1,$i2,$s1,lsl#16
1204	and	$i1,lr,$s3,lsr#16	@ i0
1205	eor	$s2,$t2,$s2,lsl#16
1206	and	$i2,lr,$s3,lsr#8	@ i1
1207	ldrb	$i1,[$tbl,$i1]		@ Td4[s3>>16]
1208	eor	$t3,$t3,$i3,lsl#16
1209	ldrb	$i2,[$tbl,$i2]		@ Td4[s3>>8]
1210	and	$i3,lr,$s3		@ i2
1211
1212	add	$s3,$tbl,$s3,lsr#24
1213	ldrb	$i3,[$tbl,$i3]		@ Td4[s3>>0]
1214	ldrb	$s3,[$s3]		@ Td4[s3>>24]
1215	eor	$s0,$s0,$i1,lsl#16
1216	ldr	$i1,[$key,#0]
1217	eor	$s1,$s1,$i2,lsl#8
1218	ldr	$t1,[$key,#4]
1219	eor	$s2,$i3,$s2,lsl#8
1220	ldr	$t2,[$key,#8]
1221	eor	$s3,$t3,$s3,lsl#24
1222	ldr	$t3,[$key,#12]
1223
1224	eor	$s0,$s0,$i1
1225	eor	$s1,$s1,$t1
1226	eor	$s2,$s2,$t2
1227	eor	$s3,$s3,$t3
1228
1229	sub	$tbl,$tbl,#1024
1230	ldr	pc,[sp],#4		@ pop and return
1231.size	_armv4_AES_decrypt,.-_armv4_AES_decrypt
1232.asciz	"AES for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
1233.align	2
1234___
1235
1236$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;	# make it possible to compile with -march=armv4
1237$code =~ s/\bret\b/bx\tlr/gm;
1238
1239open SELF,$0;
1240while(<SELF>) {
1241	next if (/^#!/);
1242	last if (!s/^#/@/ and !/^$/);
1243	print;
1244}
1245close SELF;
1246
1247print $code;
1248close STDOUT or die "error closing STDOUT: $!";	# enforce flush
1249