xref: /freebsd/crypto/openssl/crypto/aes/asm/aes-ppc.pl (revision a3557ef0)
1#! /usr/bin/env perl
2# Copyright 2007-2020 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the OpenSSL license (the "License").  You may not use
5# this file except in compliance with the License.  You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
9
10# ====================================================================
11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12# project. The module is, however, dual licensed under OpenSSL and
13# CRYPTOGAMS licenses depending on where you obtain it. For further
14# details see http://www.openssl.org/~appro/cryptogams/.
15# ====================================================================
16
17# Needs more work: key setup, CBC routine...
18#
19# ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
20# 128-bit key, which is ~40% better than 64-bit code generated by gcc
21# 4.0. But these are not the ones currently used! Their "compact"
22# counterparts are, for security reason. ppc_AES_encrypt_compact runs
23# at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
24# at 1/3 of ppc_AES_decrypt.
25
26# February 2010
27#
28# Rescheduling instructions to favour Power6 pipeline gave 10%
29# performance improvement on the platform in question (and marginal
30# improvement even on others). It should be noted that Power6 fails
31# to process byte in 18 cycles, only in 23, because it fails to issue
32# 4 load instructions in two cycles, only in 3. As result non-compact
33# block subroutines are 25% slower than one would expect. Compact
34# functions scale better, because they have pure computational part,
35# which scales perfectly with clock frequency. To be specific
36# ppc_AES_encrypt_compact operates at 42 cycles per byte, while
37# ppc_AES_decrypt_compact - at 55 (in 64-bit build).
38
39$flavour = shift;
40
41if ($flavour =~ /64/) {
42	$SIZE_T	=8;
43	$LRSAVE	=2*$SIZE_T;
44	$STU	="stdu";
45	$POP	="ld";
46	$PUSH	="std";
47} elsif ($flavour =~ /32/) {
48	$SIZE_T	=4;
49	$LRSAVE	=$SIZE_T;
50	$STU	="stwu";
51	$POP	="lwz";
52	$PUSH	="stw";
53} else { die "nonsense $flavour"; }
54
55$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
56
57$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
58( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
59( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
60die "can't locate ppc-xlate.pl";
61
62open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
63
64$FRAME=32*$SIZE_T;
65
66sub _data_word()
67{ my $i;
68    while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
69}
70
71$sp="r1";
72$toc="r2";
73$inp="r3";
74$out="r4";
75$key="r5";
76
77$Tbl0="r3";
78$Tbl1="r6";
79$Tbl2="r7";
80$Tbl3=$out;	# stay away from "r2"; $out is offloaded to stack
81
82$s0="r8";
83$s1="r9";
84$s2="r10";
85$s3="r11";
86
87$t0="r12";
88$t1="r0";	# stay away from "r13";
89$t2="r14";
90$t3="r15";
91
92$acc00="r16";
93$acc01="r17";
94$acc02="r18";
95$acc03="r19";
96
97$acc04="r20";
98$acc05="r21";
99$acc06="r22";
100$acc07="r23";
101
102$acc08="r24";
103$acc09="r25";
104$acc10="r26";
105$acc11="r27";
106
107$acc12="r28";
108$acc13="r29";
109$acc14="r30";
110$acc15="r31";
111
112$mask80=$Tbl2;
113$mask1b=$Tbl3;
114
115$code.=<<___;
116.machine	"any"
117.text
118
119.align	7
120LAES_Te:
121	mflr	r0
122	bcl	20,31,\$+4
123	mflr	$Tbl0	;    vvvvv "distance" between . and 1st data entry
124	addi	$Tbl0,$Tbl0,`128-8`
125	mtlr	r0
126	blr
127	.long	0
128	.byte	0,12,0x14,0,0,0,0,0
129	.space	`64-9*4`
130LAES_Td:
131	mflr	r0
132	bcl	20,31,\$+4
133	mflr	$Tbl0	;    vvvvvvvv "distance" between . and 1st data entry
134	addi	$Tbl0,$Tbl0,`128-64-8+2048+256`
135	mtlr	r0
136	blr
137	.long	0
138	.byte	0,12,0x14,0,0,0,0,0
139	.space	`128-64-9*4`
140___
141&_data_word(
142	0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
143	0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
144	0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
145	0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
146	0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
147	0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
148	0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
149	0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
150	0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
151	0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
152	0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
153	0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
154	0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
155	0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
156	0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
157	0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
158	0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
159	0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
160	0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
161	0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
162	0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
163	0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
164	0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
165	0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
166	0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
167	0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
168	0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
169	0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
170	0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
171	0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
172	0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
173	0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
174	0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
175	0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
176	0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
177	0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
178	0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
179	0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
180	0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
181	0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
182	0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
183	0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
184	0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
185	0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
186	0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
187	0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
188	0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
189	0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
190	0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
191	0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
192	0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
193	0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
194	0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
195	0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
196	0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
197	0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
198	0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
199	0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
200	0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
201	0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
202	0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
203	0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
204	0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
205	0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
206$code.=<<___;
207.byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
208.byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
209.byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
210.byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
211.byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
212.byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
213.byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
214.byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
215.byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
216.byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
217.byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
218.byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
219.byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
220.byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
221.byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
222.byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
223.byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
224.byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
225.byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
226.byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
227.byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
228.byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
229.byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
230.byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
231.byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
232.byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
233.byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
234.byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
235.byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
236.byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
237.byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
238.byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
239___
240&_data_word(
241	0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
242	0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
243	0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
244	0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
245	0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
246	0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
247	0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
248	0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
249	0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
250	0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
251	0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
252	0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
253	0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
254	0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
255	0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
256	0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
257	0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
258	0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
259	0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
260	0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
261	0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
262	0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
263	0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
264	0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
265	0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
266	0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
267	0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
268	0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
269	0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
270	0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
271	0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
272	0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
273	0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
274	0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
275	0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
276	0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
277	0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
278	0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
279	0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
280	0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
281	0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
282	0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
283	0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
284	0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
285	0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
286	0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
287	0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
288	0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
289	0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
290	0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
291	0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
292	0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
293	0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
294	0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
295	0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
296	0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
297	0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
298	0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
299	0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
300	0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
301	0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
302	0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
303	0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
304	0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
305$code.=<<___;
306.byte	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
307.byte	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
308.byte	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
309.byte	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
310.byte	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
311.byte	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
312.byte	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
313.byte	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
314.byte	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
315.byte	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
316.byte	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
317.byte	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
318.byte	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
319.byte	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
320.byte	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
321.byte	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
322.byte	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
323.byte	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
324.byte	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
325.byte	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
326.byte	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
327.byte	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
328.byte	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
329.byte	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
330.byte	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
331.byte	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
332.byte	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
333.byte	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
334.byte	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
335.byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
336.byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
337.byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
338
339
340.globl	.AES_encrypt
341.align	7
342.AES_encrypt:
343	$STU	$sp,-$FRAME($sp)
344	mflr	r0
345
346	$PUSH	$out,`$FRAME-$SIZE_T*19`($sp)
347	$PUSH	r14,`$FRAME-$SIZE_T*18`($sp)
348	$PUSH	r15,`$FRAME-$SIZE_T*17`($sp)
349	$PUSH	r16,`$FRAME-$SIZE_T*16`($sp)
350	$PUSH	r17,`$FRAME-$SIZE_T*15`($sp)
351	$PUSH	r18,`$FRAME-$SIZE_T*14`($sp)
352	$PUSH	r19,`$FRAME-$SIZE_T*13`($sp)
353	$PUSH	r20,`$FRAME-$SIZE_T*12`($sp)
354	$PUSH	r21,`$FRAME-$SIZE_T*11`($sp)
355	$PUSH	r22,`$FRAME-$SIZE_T*10`($sp)
356	$PUSH	r23,`$FRAME-$SIZE_T*9`($sp)
357	$PUSH	r24,`$FRAME-$SIZE_T*8`($sp)
358	$PUSH	r25,`$FRAME-$SIZE_T*7`($sp)
359	$PUSH	r26,`$FRAME-$SIZE_T*6`($sp)
360	$PUSH	r27,`$FRAME-$SIZE_T*5`($sp)
361	$PUSH	r28,`$FRAME-$SIZE_T*4`($sp)
362	$PUSH	r29,`$FRAME-$SIZE_T*3`($sp)
363	$PUSH	r30,`$FRAME-$SIZE_T*2`($sp)
364	$PUSH	r31,`$FRAME-$SIZE_T*1`($sp)
365	$PUSH	r0,`$FRAME+$LRSAVE`($sp)
366
367	andi.	$t0,$inp,3
368	andi.	$t1,$out,3
369	or.	$t0,$t0,$t1
370	bne	Lenc_unaligned
371
372Lenc_unaligned_ok:
373___
374$code.=<<___ if (!$LITTLE_ENDIAN);
375	lwz	$s0,0($inp)
376	lwz	$s1,4($inp)
377	lwz	$s2,8($inp)
378	lwz	$s3,12($inp)
379___
380$code.=<<___ if ($LITTLE_ENDIAN);
381	lwz	$t0,0($inp)
382	lwz	$t1,4($inp)
383	lwz	$t2,8($inp)
384	lwz	$t3,12($inp)
385	rotlwi	$s0,$t0,8
386	rotlwi	$s1,$t1,8
387	rotlwi	$s2,$t2,8
388	rotlwi	$s3,$t3,8
389	rlwimi	$s0,$t0,24,0,7
390	rlwimi	$s1,$t1,24,0,7
391	rlwimi	$s2,$t2,24,0,7
392	rlwimi	$s3,$t3,24,0,7
393	rlwimi	$s0,$t0,24,16,23
394	rlwimi	$s1,$t1,24,16,23
395	rlwimi	$s2,$t2,24,16,23
396	rlwimi	$s3,$t3,24,16,23
397___
398$code.=<<___;
399	bl	LAES_Te
400	bl	Lppc_AES_encrypt_compact
401	$POP	$out,`$FRAME-$SIZE_T*19`($sp)
402___
403$code.=<<___ if ($LITTLE_ENDIAN);
404	rotlwi	$t0,$s0,8
405	rotlwi	$t1,$s1,8
406	rotlwi	$t2,$s2,8
407	rotlwi	$t3,$s3,8
408	rlwimi	$t0,$s0,24,0,7
409	rlwimi	$t1,$s1,24,0,7
410	rlwimi	$t2,$s2,24,0,7
411	rlwimi	$t3,$s3,24,0,7
412	rlwimi	$t0,$s0,24,16,23
413	rlwimi	$t1,$s1,24,16,23
414	rlwimi	$t2,$s2,24,16,23
415	rlwimi	$t3,$s3,24,16,23
416	stw	$t0,0($out)
417	stw	$t1,4($out)
418	stw	$t2,8($out)
419	stw	$t3,12($out)
420___
421$code.=<<___ if (!$LITTLE_ENDIAN);
422	stw	$s0,0($out)
423	stw	$s1,4($out)
424	stw	$s2,8($out)
425	stw	$s3,12($out)
426___
427$code.=<<___;
428	b	Lenc_done
429
430Lenc_unaligned:
431	subfic	$t0,$inp,4096
432	subfic	$t1,$out,4096
433	andi.	$t0,$t0,4096-16
434	beq	Lenc_xpage
435	andi.	$t1,$t1,4096-16
436	bne	Lenc_unaligned_ok
437
438Lenc_xpage:
439	lbz	$acc00,0($inp)
440	lbz	$acc01,1($inp)
441	lbz	$acc02,2($inp)
442	lbz	$s0,3($inp)
443	lbz	$acc04,4($inp)
444	lbz	$acc05,5($inp)
445	lbz	$acc06,6($inp)
446	lbz	$s1,7($inp)
447	lbz	$acc08,8($inp)
448	lbz	$acc09,9($inp)
449	lbz	$acc10,10($inp)
450	insrwi	$s0,$acc00,8,0
451	lbz	$s2,11($inp)
452	insrwi	$s1,$acc04,8,0
453	lbz	$acc12,12($inp)
454	insrwi	$s0,$acc01,8,8
455	lbz	$acc13,13($inp)
456	insrwi	$s1,$acc05,8,8
457	lbz	$acc14,14($inp)
458	insrwi	$s0,$acc02,8,16
459	lbz	$s3,15($inp)
460	insrwi	$s1,$acc06,8,16
461	insrwi	$s2,$acc08,8,0
462	insrwi	$s3,$acc12,8,0
463	insrwi	$s2,$acc09,8,8
464	insrwi	$s3,$acc13,8,8
465	insrwi	$s2,$acc10,8,16
466	insrwi	$s3,$acc14,8,16
467
468	bl	LAES_Te
469	bl	Lppc_AES_encrypt_compact
470	$POP	$out,`$FRAME-$SIZE_T*19`($sp)
471
472	extrwi	$acc00,$s0,8,0
473	extrwi	$acc01,$s0,8,8
474	stb	$acc00,0($out)
475	extrwi	$acc02,$s0,8,16
476	stb	$acc01,1($out)
477	stb	$acc02,2($out)
478	extrwi	$acc04,$s1,8,0
479	stb	$s0,3($out)
480	extrwi	$acc05,$s1,8,8
481	stb	$acc04,4($out)
482	extrwi	$acc06,$s1,8,16
483	stb	$acc05,5($out)
484	stb	$acc06,6($out)
485	extrwi	$acc08,$s2,8,0
486	stb	$s1,7($out)
487	extrwi	$acc09,$s2,8,8
488	stb	$acc08,8($out)
489	extrwi	$acc10,$s2,8,16
490	stb	$acc09,9($out)
491	stb	$acc10,10($out)
492	extrwi	$acc12,$s3,8,0
493	stb	$s2,11($out)
494	extrwi	$acc13,$s3,8,8
495	stb	$acc12,12($out)
496	extrwi	$acc14,$s3,8,16
497	stb	$acc13,13($out)
498	stb	$acc14,14($out)
499	stb	$s3,15($out)
500
501Lenc_done:
502	$POP	r0,`$FRAME+$LRSAVE`($sp)
503	$POP	r14,`$FRAME-$SIZE_T*18`($sp)
504	$POP	r15,`$FRAME-$SIZE_T*17`($sp)
505	$POP	r16,`$FRAME-$SIZE_T*16`($sp)
506	$POP	r17,`$FRAME-$SIZE_T*15`($sp)
507	$POP	r18,`$FRAME-$SIZE_T*14`($sp)
508	$POP	r19,`$FRAME-$SIZE_T*13`($sp)
509	$POP	r20,`$FRAME-$SIZE_T*12`($sp)
510	$POP	r21,`$FRAME-$SIZE_T*11`($sp)
511	$POP	r22,`$FRAME-$SIZE_T*10`($sp)
512	$POP	r23,`$FRAME-$SIZE_T*9`($sp)
513	$POP	r24,`$FRAME-$SIZE_T*8`($sp)
514	$POP	r25,`$FRAME-$SIZE_T*7`($sp)
515	$POP	r26,`$FRAME-$SIZE_T*6`($sp)
516	$POP	r27,`$FRAME-$SIZE_T*5`($sp)
517	$POP	r28,`$FRAME-$SIZE_T*4`($sp)
518	$POP	r29,`$FRAME-$SIZE_T*3`($sp)
519	$POP	r30,`$FRAME-$SIZE_T*2`($sp)
520	$POP	r31,`$FRAME-$SIZE_T*1`($sp)
521	mtlr	r0
522	addi	$sp,$sp,$FRAME
523	blr
524	.long	0
525	.byte	0,12,4,1,0x80,18,3,0
526	.long	0
527
528.align	5
529Lppc_AES_encrypt:
530	lwz	$acc00,240($key)
531	addi	$Tbl1,$Tbl0,3
532	lwz	$t0,0($key)
533	addi	$Tbl2,$Tbl0,2
534	lwz	$t1,4($key)
535	addi	$Tbl3,$Tbl0,1
536	lwz	$t2,8($key)
537	addi	$acc00,$acc00,-1
538	lwz	$t3,12($key)
539	addi	$key,$key,16
540	xor	$s0,$s0,$t0
541	xor	$s1,$s1,$t1
542	xor	$s2,$s2,$t2
543	xor	$s3,$s3,$t3
544	mtctr	$acc00
545.align	4
546Lenc_loop:
547	rlwinm	$acc00,$s0,`32-24+3`,21,28
548	rlwinm	$acc01,$s1,`32-24+3`,21,28
549	rlwinm	$acc02,$s2,`32-24+3`,21,28
550	rlwinm	$acc03,$s3,`32-24+3`,21,28
551	lwz	$t0,0($key)
552	rlwinm	$acc04,$s1,`32-16+3`,21,28
553	lwz	$t1,4($key)
554	rlwinm	$acc05,$s2,`32-16+3`,21,28
555	lwz	$t2,8($key)
556	rlwinm	$acc06,$s3,`32-16+3`,21,28
557	lwz	$t3,12($key)
558	rlwinm	$acc07,$s0,`32-16+3`,21,28
559	lwzx	$acc00,$Tbl0,$acc00
560	rlwinm	$acc08,$s2,`32-8+3`,21,28
561	lwzx	$acc01,$Tbl0,$acc01
562	rlwinm	$acc09,$s3,`32-8+3`,21,28
563	lwzx	$acc02,$Tbl0,$acc02
564	rlwinm	$acc10,$s0,`32-8+3`,21,28
565	lwzx	$acc03,$Tbl0,$acc03
566	rlwinm	$acc11,$s1,`32-8+3`,21,28
567	lwzx	$acc04,$Tbl1,$acc04
568	rlwinm	$acc12,$s3,`0+3`,21,28
569	lwzx	$acc05,$Tbl1,$acc05
570	rlwinm	$acc13,$s0,`0+3`,21,28
571	lwzx	$acc06,$Tbl1,$acc06
572	rlwinm	$acc14,$s1,`0+3`,21,28
573	lwzx	$acc07,$Tbl1,$acc07
574	rlwinm	$acc15,$s2,`0+3`,21,28
575	lwzx	$acc08,$Tbl2,$acc08
576	xor	$t0,$t0,$acc00
577	lwzx	$acc09,$Tbl2,$acc09
578	xor	$t1,$t1,$acc01
579	lwzx	$acc10,$Tbl2,$acc10
580	xor	$t2,$t2,$acc02
581	lwzx	$acc11,$Tbl2,$acc11
582	xor	$t3,$t3,$acc03
583	lwzx	$acc12,$Tbl3,$acc12
584	xor	$t0,$t0,$acc04
585	lwzx	$acc13,$Tbl3,$acc13
586	xor	$t1,$t1,$acc05
587	lwzx	$acc14,$Tbl3,$acc14
588	xor	$t2,$t2,$acc06
589	lwzx	$acc15,$Tbl3,$acc15
590	xor	$t3,$t3,$acc07
591	xor	$t0,$t0,$acc08
592	xor	$t1,$t1,$acc09
593	xor	$t2,$t2,$acc10
594	xor	$t3,$t3,$acc11
595	xor	$s0,$t0,$acc12
596	xor	$s1,$t1,$acc13
597	xor	$s2,$t2,$acc14
598	xor	$s3,$t3,$acc15
599	addi	$key,$key,16
600	bdnz	Lenc_loop
601
602	addi	$Tbl2,$Tbl0,2048
603	nop
604	lwz	$t0,0($key)
605	rlwinm	$acc00,$s0,`32-24`,24,31
606	lwz	$t1,4($key)
607	rlwinm	$acc01,$s1,`32-24`,24,31
608	lwz	$t2,8($key)
609	rlwinm	$acc02,$s2,`32-24`,24,31
610	lwz	$t3,12($key)
611	rlwinm	$acc03,$s3,`32-24`,24,31
612	lwz	$acc08,`2048+0`($Tbl0)	! prefetch Te4
613	rlwinm	$acc04,$s1,`32-16`,24,31
614	lwz	$acc09,`2048+32`($Tbl0)
615	rlwinm	$acc05,$s2,`32-16`,24,31
616	lwz	$acc10,`2048+64`($Tbl0)
617	rlwinm	$acc06,$s3,`32-16`,24,31
618	lwz	$acc11,`2048+96`($Tbl0)
619	rlwinm	$acc07,$s0,`32-16`,24,31
620	lwz	$acc12,`2048+128`($Tbl0)
621	rlwinm	$acc08,$s2,`32-8`,24,31
622	lwz	$acc13,`2048+160`($Tbl0)
623	rlwinm	$acc09,$s3,`32-8`,24,31
624	lwz	$acc14,`2048+192`($Tbl0)
625	rlwinm	$acc10,$s0,`32-8`,24,31
626	lwz	$acc15,`2048+224`($Tbl0)
627	rlwinm	$acc11,$s1,`32-8`,24,31
628	lbzx	$acc00,$Tbl2,$acc00
629	rlwinm	$acc12,$s3,`0`,24,31
630	lbzx	$acc01,$Tbl2,$acc01
631	rlwinm	$acc13,$s0,`0`,24,31
632	lbzx	$acc02,$Tbl2,$acc02
633	rlwinm	$acc14,$s1,`0`,24,31
634	lbzx	$acc03,$Tbl2,$acc03
635	rlwinm	$acc15,$s2,`0`,24,31
636	lbzx	$acc04,$Tbl2,$acc04
637	rlwinm	$s0,$acc00,24,0,7
638	lbzx	$acc05,$Tbl2,$acc05
639	rlwinm	$s1,$acc01,24,0,7
640	lbzx	$acc06,$Tbl2,$acc06
641	rlwinm	$s2,$acc02,24,0,7
642	lbzx	$acc07,$Tbl2,$acc07
643	rlwinm	$s3,$acc03,24,0,7
644	lbzx	$acc08,$Tbl2,$acc08
645	rlwimi	$s0,$acc04,16,8,15
646	lbzx	$acc09,$Tbl2,$acc09
647	rlwimi	$s1,$acc05,16,8,15
648	lbzx	$acc10,$Tbl2,$acc10
649	rlwimi	$s2,$acc06,16,8,15
650	lbzx	$acc11,$Tbl2,$acc11
651	rlwimi	$s3,$acc07,16,8,15
652	lbzx	$acc12,$Tbl2,$acc12
653	rlwimi	$s0,$acc08,8,16,23
654	lbzx	$acc13,$Tbl2,$acc13
655	rlwimi	$s1,$acc09,8,16,23
656	lbzx	$acc14,$Tbl2,$acc14
657	rlwimi	$s2,$acc10,8,16,23
658	lbzx	$acc15,$Tbl2,$acc15
659	rlwimi	$s3,$acc11,8,16,23
660	or	$s0,$s0,$acc12
661	or	$s1,$s1,$acc13
662	or	$s2,$s2,$acc14
663	or	$s3,$s3,$acc15
664	xor	$s0,$s0,$t0
665	xor	$s1,$s1,$t1
666	xor	$s2,$s2,$t2
667	xor	$s3,$s3,$t3
668	blr
669	.long	0
670	.byte	0,12,0x14,0,0,0,0,0
671
672.align	4
673Lppc_AES_encrypt_compact:
674	lwz	$acc00,240($key)
675	addi	$Tbl1,$Tbl0,2048
676	lwz	$t0,0($key)
677	lis	$mask80,0x8080
678	lwz	$t1,4($key)
679	lis	$mask1b,0x1b1b
680	lwz	$t2,8($key)
681	ori	$mask80,$mask80,0x8080
682	lwz	$t3,12($key)
683	ori	$mask1b,$mask1b,0x1b1b
684	addi	$key,$key,16
685	mtctr	$acc00
686.align	4
687Lenc_compact_loop:
688	xor	$s0,$s0,$t0
689	xor	$s1,$s1,$t1
690	rlwinm	$acc00,$s0,`32-24`,24,31
691	xor	$s2,$s2,$t2
692	rlwinm	$acc01,$s1,`32-24`,24,31
693	xor	$s3,$s3,$t3
694	rlwinm	$acc02,$s2,`32-24`,24,31
695	rlwinm	$acc03,$s3,`32-24`,24,31
696	rlwinm	$acc04,$s1,`32-16`,24,31
697	rlwinm	$acc05,$s2,`32-16`,24,31
698	rlwinm	$acc06,$s3,`32-16`,24,31
699	rlwinm	$acc07,$s0,`32-16`,24,31
700	lbzx	$acc00,$Tbl1,$acc00
701	rlwinm	$acc08,$s2,`32-8`,24,31
702	lbzx	$acc01,$Tbl1,$acc01
703	rlwinm	$acc09,$s3,`32-8`,24,31
704	lbzx	$acc02,$Tbl1,$acc02
705	rlwinm	$acc10,$s0,`32-8`,24,31
706	lbzx	$acc03,$Tbl1,$acc03
707	rlwinm	$acc11,$s1,`32-8`,24,31
708	lbzx	$acc04,$Tbl1,$acc04
709	rlwinm	$acc12,$s3,`0`,24,31
710	lbzx	$acc05,$Tbl1,$acc05
711	rlwinm	$acc13,$s0,`0`,24,31
712	lbzx	$acc06,$Tbl1,$acc06
713	rlwinm	$acc14,$s1,`0`,24,31
714	lbzx	$acc07,$Tbl1,$acc07
715	rlwinm	$acc15,$s2,`0`,24,31
716	lbzx	$acc08,$Tbl1,$acc08
717	rlwinm	$s0,$acc00,24,0,7
718	lbzx	$acc09,$Tbl1,$acc09
719	rlwinm	$s1,$acc01,24,0,7
720	lbzx	$acc10,$Tbl1,$acc10
721	rlwinm	$s2,$acc02,24,0,7
722	lbzx	$acc11,$Tbl1,$acc11
723	rlwinm	$s3,$acc03,24,0,7
724	lbzx	$acc12,$Tbl1,$acc12
725	rlwimi	$s0,$acc04,16,8,15
726	lbzx	$acc13,$Tbl1,$acc13
727	rlwimi	$s1,$acc05,16,8,15
728	lbzx	$acc14,$Tbl1,$acc14
729	rlwimi	$s2,$acc06,16,8,15
730	lbzx	$acc15,$Tbl1,$acc15
731	rlwimi	$s3,$acc07,16,8,15
732	rlwimi	$s0,$acc08,8,16,23
733	rlwimi	$s1,$acc09,8,16,23
734	rlwimi	$s2,$acc10,8,16,23
735	rlwimi	$s3,$acc11,8,16,23
736	lwz	$t0,0($key)
737	or	$s0,$s0,$acc12
738	lwz	$t1,4($key)
739	or	$s1,$s1,$acc13
740	lwz	$t2,8($key)
741	or	$s2,$s2,$acc14
742	lwz	$t3,12($key)
743	or	$s3,$s3,$acc15
744
745	addi	$key,$key,16
746	bdz	Lenc_compact_done
747
748	and	$acc00,$s0,$mask80	# r1=r0&0x80808080
749	and	$acc01,$s1,$mask80
750	and	$acc02,$s2,$mask80
751	and	$acc03,$s3,$mask80
752	srwi	$acc04,$acc00,7		# r1>>7
753	andc	$acc08,$s0,$mask80	# r0&0x7f7f7f7f
754	srwi	$acc05,$acc01,7
755	andc	$acc09,$s1,$mask80
756	srwi	$acc06,$acc02,7
757	andc	$acc10,$s2,$mask80
758	srwi	$acc07,$acc03,7
759	andc	$acc11,$s3,$mask80
760	sub	$acc00,$acc00,$acc04	# r1-(r1>>7)
761	sub	$acc01,$acc01,$acc05
762	sub	$acc02,$acc02,$acc06
763	sub	$acc03,$acc03,$acc07
764	add	$acc08,$acc08,$acc08	# (r0&0x7f7f7f7f)<<1
765	add	$acc09,$acc09,$acc09
766	add	$acc10,$acc10,$acc10
767	add	$acc11,$acc11,$acc11
768	and	$acc00,$acc00,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
769	and	$acc01,$acc01,$mask1b
770	and	$acc02,$acc02,$mask1b
771	and	$acc03,$acc03,$mask1b
772	xor	$acc00,$acc00,$acc08	# r2
773	xor	$acc01,$acc01,$acc09
774	 rotlwi	$acc12,$s0,16		# ROTATE(r0,16)
775	xor	$acc02,$acc02,$acc10
776	 rotlwi	$acc13,$s1,16
777	xor	$acc03,$acc03,$acc11
778	 rotlwi	$acc14,$s2,16
779
780	xor	$s0,$s0,$acc00		# r0^r2
781	rotlwi	$acc15,$s3,16
782	xor	$s1,$s1,$acc01
783	rotrwi	$s0,$s0,24		# ROTATE(r2^r0,24)
784	xor	$s2,$s2,$acc02
785	rotrwi	$s1,$s1,24
786	xor	$s3,$s3,$acc03
787	rotrwi	$s2,$s2,24
788	xor	$s0,$s0,$acc00		# ROTATE(r2^r0,24)^r2
789	rotrwi	$s3,$s3,24
790	xor	$s1,$s1,$acc01
791	xor	$s2,$s2,$acc02
792	xor	$s3,$s3,$acc03
793	rotlwi	$acc08,$acc12,8		# ROTATE(r0,24)
794	xor	$s0,$s0,$acc12		#
795	rotlwi	$acc09,$acc13,8
796	xor	$s1,$s1,$acc13
797	rotlwi	$acc10,$acc14,8
798	xor	$s2,$s2,$acc14
799	rotlwi	$acc11,$acc15,8
800	xor	$s3,$s3,$acc15
801	xor	$s0,$s0,$acc08		#
802	xor	$s1,$s1,$acc09
803	xor	$s2,$s2,$acc10
804	xor	$s3,$s3,$acc11
805
806	b	Lenc_compact_loop
807.align	4
808Lenc_compact_done:
809	xor	$s0,$s0,$t0
810	xor	$s1,$s1,$t1
811	xor	$s2,$s2,$t2
812	xor	$s3,$s3,$t3
813	blr
814	.long	0
815	.byte	0,12,0x14,0,0,0,0,0
816.size	.AES_encrypt,.-.AES_encrypt
817
818.globl	.AES_decrypt
819.align	7
820.AES_decrypt:
821	$STU	$sp,-$FRAME($sp)
822	mflr	r0
823
824	$PUSH	$out,`$FRAME-$SIZE_T*19`($sp)
825	$PUSH	r14,`$FRAME-$SIZE_T*18`($sp)
826	$PUSH	r15,`$FRAME-$SIZE_T*17`($sp)
827	$PUSH	r16,`$FRAME-$SIZE_T*16`($sp)
828	$PUSH	r17,`$FRAME-$SIZE_T*15`($sp)
829	$PUSH	r18,`$FRAME-$SIZE_T*14`($sp)
830	$PUSH	r19,`$FRAME-$SIZE_T*13`($sp)
831	$PUSH	r20,`$FRAME-$SIZE_T*12`($sp)
832	$PUSH	r21,`$FRAME-$SIZE_T*11`($sp)
833	$PUSH	r22,`$FRAME-$SIZE_T*10`($sp)
834	$PUSH	r23,`$FRAME-$SIZE_T*9`($sp)
835	$PUSH	r24,`$FRAME-$SIZE_T*8`($sp)
836	$PUSH	r25,`$FRAME-$SIZE_T*7`($sp)
837	$PUSH	r26,`$FRAME-$SIZE_T*6`($sp)
838	$PUSH	r27,`$FRAME-$SIZE_T*5`($sp)
839	$PUSH	r28,`$FRAME-$SIZE_T*4`($sp)
840	$PUSH	r29,`$FRAME-$SIZE_T*3`($sp)
841	$PUSH	r30,`$FRAME-$SIZE_T*2`($sp)
842	$PUSH	r31,`$FRAME-$SIZE_T*1`($sp)
843	$PUSH	r0,`$FRAME+$LRSAVE`($sp)
844
845	andi.	$t0,$inp,3
846	andi.	$t1,$out,3
847	or.	$t0,$t0,$t1
848	bne	Ldec_unaligned
849
850Ldec_unaligned_ok:
851___
852$code.=<<___ if (!$LITTLE_ENDIAN);
853	lwz	$s0,0($inp)
854	lwz	$s1,4($inp)
855	lwz	$s2,8($inp)
856	lwz	$s3,12($inp)
857___
858$code.=<<___ if ($LITTLE_ENDIAN);
859	lwz	$t0,0($inp)
860	lwz	$t1,4($inp)
861	lwz	$t2,8($inp)
862	lwz	$t3,12($inp)
863	rotlwi	$s0,$t0,8
864	rotlwi	$s1,$t1,8
865	rotlwi	$s2,$t2,8
866	rotlwi	$s3,$t3,8
867	rlwimi	$s0,$t0,24,0,7
868	rlwimi	$s1,$t1,24,0,7
869	rlwimi	$s2,$t2,24,0,7
870	rlwimi	$s3,$t3,24,0,7
871	rlwimi	$s0,$t0,24,16,23
872	rlwimi	$s1,$t1,24,16,23
873	rlwimi	$s2,$t2,24,16,23
874	rlwimi	$s3,$t3,24,16,23
875___
876$code.=<<___;
877	bl	LAES_Td
878	bl	Lppc_AES_decrypt_compact
879	$POP	$out,`$FRAME-$SIZE_T*19`($sp)
880___
881$code.=<<___ if ($LITTLE_ENDIAN);
882	rotlwi	$t0,$s0,8
883	rotlwi	$t1,$s1,8
884	rotlwi	$t2,$s2,8
885	rotlwi	$t3,$s3,8
886	rlwimi	$t0,$s0,24,0,7
887	rlwimi	$t1,$s1,24,0,7
888	rlwimi	$t2,$s2,24,0,7
889	rlwimi	$t3,$s3,24,0,7
890	rlwimi	$t0,$s0,24,16,23
891	rlwimi	$t1,$s1,24,16,23
892	rlwimi	$t2,$s2,24,16,23
893	rlwimi	$t3,$s3,24,16,23
894	stw	$t0,0($out)
895	stw	$t1,4($out)
896	stw	$t2,8($out)
897	stw	$t3,12($out)
898___
899$code.=<<___ if (!$LITTLE_ENDIAN);
900	stw	$s0,0($out)
901	stw	$s1,4($out)
902	stw	$s2,8($out)
903	stw	$s3,12($out)
904___
905$code.=<<___;
906	b	Ldec_done
907
908Ldec_unaligned:
909	subfic	$t0,$inp,4096
910	subfic	$t1,$out,4096
911	andi.	$t0,$t0,4096-16
912	beq	Ldec_xpage
913	andi.	$t1,$t1,4096-16
914	bne	Ldec_unaligned_ok
915
916Ldec_xpage:
917	lbz	$acc00,0($inp)
918	lbz	$acc01,1($inp)
919	lbz	$acc02,2($inp)
920	lbz	$s0,3($inp)
921	lbz	$acc04,4($inp)
922	lbz	$acc05,5($inp)
923	lbz	$acc06,6($inp)
924	lbz	$s1,7($inp)
925	lbz	$acc08,8($inp)
926	lbz	$acc09,9($inp)
927	lbz	$acc10,10($inp)
928	insrwi	$s0,$acc00,8,0
929	lbz	$s2,11($inp)
930	insrwi	$s1,$acc04,8,0
931	lbz	$acc12,12($inp)
932	insrwi	$s0,$acc01,8,8
933	lbz	$acc13,13($inp)
934	insrwi	$s1,$acc05,8,8
935	lbz	$acc14,14($inp)
936	insrwi	$s0,$acc02,8,16
937	lbz	$s3,15($inp)
938	insrwi	$s1,$acc06,8,16
939	insrwi	$s2,$acc08,8,0
940	insrwi	$s3,$acc12,8,0
941	insrwi	$s2,$acc09,8,8
942	insrwi	$s3,$acc13,8,8
943	insrwi	$s2,$acc10,8,16
944	insrwi	$s3,$acc14,8,16
945
946	bl	LAES_Td
947	bl	Lppc_AES_decrypt_compact
948	$POP	$out,`$FRAME-$SIZE_T*19`($sp)
949
950	extrwi	$acc00,$s0,8,0
951	extrwi	$acc01,$s0,8,8
952	stb	$acc00,0($out)
953	extrwi	$acc02,$s0,8,16
954	stb	$acc01,1($out)
955	stb	$acc02,2($out)
956	extrwi	$acc04,$s1,8,0
957	stb	$s0,3($out)
958	extrwi	$acc05,$s1,8,8
959	stb	$acc04,4($out)
960	extrwi	$acc06,$s1,8,16
961	stb	$acc05,5($out)
962	stb	$acc06,6($out)
963	extrwi	$acc08,$s2,8,0
964	stb	$s1,7($out)
965	extrwi	$acc09,$s2,8,8
966	stb	$acc08,8($out)
967	extrwi	$acc10,$s2,8,16
968	stb	$acc09,9($out)
969	stb	$acc10,10($out)
970	extrwi	$acc12,$s3,8,0
971	stb	$s2,11($out)
972	extrwi	$acc13,$s3,8,8
973	stb	$acc12,12($out)
974	extrwi	$acc14,$s3,8,16
975	stb	$acc13,13($out)
976	stb	$acc14,14($out)
977	stb	$s3,15($out)
978
979Ldec_done:
980	$POP	r0,`$FRAME+$LRSAVE`($sp)
981	$POP	r14,`$FRAME-$SIZE_T*18`($sp)
982	$POP	r15,`$FRAME-$SIZE_T*17`($sp)
983	$POP	r16,`$FRAME-$SIZE_T*16`($sp)
984	$POP	r17,`$FRAME-$SIZE_T*15`($sp)
985	$POP	r18,`$FRAME-$SIZE_T*14`($sp)
986	$POP	r19,`$FRAME-$SIZE_T*13`($sp)
987	$POP	r20,`$FRAME-$SIZE_T*12`($sp)
988	$POP	r21,`$FRAME-$SIZE_T*11`($sp)
989	$POP	r22,`$FRAME-$SIZE_T*10`($sp)
990	$POP	r23,`$FRAME-$SIZE_T*9`($sp)
991	$POP	r24,`$FRAME-$SIZE_T*8`($sp)
992	$POP	r25,`$FRAME-$SIZE_T*7`($sp)
993	$POP	r26,`$FRAME-$SIZE_T*6`($sp)
994	$POP	r27,`$FRAME-$SIZE_T*5`($sp)
995	$POP	r28,`$FRAME-$SIZE_T*4`($sp)
996	$POP	r29,`$FRAME-$SIZE_T*3`($sp)
997	$POP	r30,`$FRAME-$SIZE_T*2`($sp)
998	$POP	r31,`$FRAME-$SIZE_T*1`($sp)
999	mtlr	r0
1000	addi	$sp,$sp,$FRAME
1001	blr
1002	.long	0
1003	.byte	0,12,4,1,0x80,18,3,0
1004	.long	0
1005
1006.align	5
1007Lppc_AES_decrypt:
1008	lwz	$acc00,240($key)
1009	addi	$Tbl1,$Tbl0,3
1010	lwz	$t0,0($key)
1011	addi	$Tbl2,$Tbl0,2
1012	lwz	$t1,4($key)
1013	addi	$Tbl3,$Tbl0,1
1014	lwz	$t2,8($key)
1015	addi	$acc00,$acc00,-1
1016	lwz	$t3,12($key)
1017	addi	$key,$key,16
1018	xor	$s0,$s0,$t0
1019	xor	$s1,$s1,$t1
1020	xor	$s2,$s2,$t2
1021	xor	$s3,$s3,$t3
1022	mtctr	$acc00
1023.align	4
1024Ldec_loop:
1025	rlwinm	$acc00,$s0,`32-24+3`,21,28
1026	rlwinm	$acc01,$s1,`32-24+3`,21,28
1027	rlwinm	$acc02,$s2,`32-24+3`,21,28
1028	rlwinm	$acc03,$s3,`32-24+3`,21,28
1029	lwz	$t0,0($key)
1030	rlwinm	$acc04,$s3,`32-16+3`,21,28
1031	lwz	$t1,4($key)
1032	rlwinm	$acc05,$s0,`32-16+3`,21,28
1033	lwz	$t2,8($key)
1034	rlwinm	$acc06,$s1,`32-16+3`,21,28
1035	lwz	$t3,12($key)
1036	rlwinm	$acc07,$s2,`32-16+3`,21,28
1037	lwzx	$acc00,$Tbl0,$acc00
1038	rlwinm	$acc08,$s2,`32-8+3`,21,28
1039	lwzx	$acc01,$Tbl0,$acc01
1040	rlwinm	$acc09,$s3,`32-8+3`,21,28
1041	lwzx	$acc02,$Tbl0,$acc02
1042	rlwinm	$acc10,$s0,`32-8+3`,21,28
1043	lwzx	$acc03,$Tbl0,$acc03
1044	rlwinm	$acc11,$s1,`32-8+3`,21,28
1045	lwzx	$acc04,$Tbl1,$acc04
1046	rlwinm	$acc12,$s1,`0+3`,21,28
1047	lwzx	$acc05,$Tbl1,$acc05
1048	rlwinm	$acc13,$s2,`0+3`,21,28
1049	lwzx	$acc06,$Tbl1,$acc06
1050	rlwinm	$acc14,$s3,`0+3`,21,28
1051	lwzx	$acc07,$Tbl1,$acc07
1052	rlwinm	$acc15,$s0,`0+3`,21,28
1053	lwzx	$acc08,$Tbl2,$acc08
1054	xor	$t0,$t0,$acc00
1055	lwzx	$acc09,$Tbl2,$acc09
1056	xor	$t1,$t1,$acc01
1057	lwzx	$acc10,$Tbl2,$acc10
1058	xor	$t2,$t2,$acc02
1059	lwzx	$acc11,$Tbl2,$acc11
1060	xor	$t3,$t3,$acc03
1061	lwzx	$acc12,$Tbl3,$acc12
1062	xor	$t0,$t0,$acc04
1063	lwzx	$acc13,$Tbl3,$acc13
1064	xor	$t1,$t1,$acc05
1065	lwzx	$acc14,$Tbl3,$acc14
1066	xor	$t2,$t2,$acc06
1067	lwzx	$acc15,$Tbl3,$acc15
1068	xor	$t3,$t3,$acc07
1069	xor	$t0,$t0,$acc08
1070	xor	$t1,$t1,$acc09
1071	xor	$t2,$t2,$acc10
1072	xor	$t3,$t3,$acc11
1073	xor	$s0,$t0,$acc12
1074	xor	$s1,$t1,$acc13
1075	xor	$s2,$t2,$acc14
1076	xor	$s3,$t3,$acc15
1077	addi	$key,$key,16
1078	bdnz	Ldec_loop
1079
1080	addi	$Tbl2,$Tbl0,2048
1081	nop
1082	lwz	$t0,0($key)
1083	rlwinm	$acc00,$s0,`32-24`,24,31
1084	lwz	$t1,4($key)
1085	rlwinm	$acc01,$s1,`32-24`,24,31
1086	lwz	$t2,8($key)
1087	rlwinm	$acc02,$s2,`32-24`,24,31
1088	lwz	$t3,12($key)
1089	rlwinm	$acc03,$s3,`32-24`,24,31
1090	lwz	$acc08,`2048+0`($Tbl0)	! prefetch Td4
1091	rlwinm	$acc04,$s3,`32-16`,24,31
1092	lwz	$acc09,`2048+32`($Tbl0)
1093	rlwinm	$acc05,$s0,`32-16`,24,31
1094	lwz	$acc10,`2048+64`($Tbl0)
1095	lbzx	$acc00,$Tbl2,$acc00
1096	lwz	$acc11,`2048+96`($Tbl0)
1097	lbzx	$acc01,$Tbl2,$acc01
1098	lwz	$acc12,`2048+128`($Tbl0)
1099	rlwinm	$acc06,$s1,`32-16`,24,31
1100	lwz	$acc13,`2048+160`($Tbl0)
1101	rlwinm	$acc07,$s2,`32-16`,24,31
1102	lwz	$acc14,`2048+192`($Tbl0)
1103	rlwinm	$acc08,$s2,`32-8`,24,31
1104	lwz	$acc15,`2048+224`($Tbl0)
1105	rlwinm	$acc09,$s3,`32-8`,24,31
1106	lbzx	$acc02,$Tbl2,$acc02
1107	rlwinm	$acc10,$s0,`32-8`,24,31
1108	lbzx	$acc03,$Tbl2,$acc03
1109	rlwinm	$acc11,$s1,`32-8`,24,31
1110	lbzx	$acc04,$Tbl2,$acc04
1111	rlwinm	$acc12,$s1,`0`,24,31
1112	lbzx	$acc05,$Tbl2,$acc05
1113	rlwinm	$acc13,$s2,`0`,24,31
1114	lbzx	$acc06,$Tbl2,$acc06
1115	rlwinm	$acc14,$s3,`0`,24,31
1116	lbzx	$acc07,$Tbl2,$acc07
1117	rlwinm	$acc15,$s0,`0`,24,31
1118	lbzx	$acc08,$Tbl2,$acc08
1119	rlwinm	$s0,$acc00,24,0,7
1120	lbzx	$acc09,$Tbl2,$acc09
1121	rlwinm	$s1,$acc01,24,0,7
1122	lbzx	$acc10,$Tbl2,$acc10
1123	rlwinm	$s2,$acc02,24,0,7
1124	lbzx	$acc11,$Tbl2,$acc11
1125	rlwinm	$s3,$acc03,24,0,7
1126	lbzx	$acc12,$Tbl2,$acc12
1127	rlwimi	$s0,$acc04,16,8,15
1128	lbzx	$acc13,$Tbl2,$acc13
1129	rlwimi	$s1,$acc05,16,8,15
1130	lbzx	$acc14,$Tbl2,$acc14
1131	rlwimi	$s2,$acc06,16,8,15
1132	lbzx	$acc15,$Tbl2,$acc15
1133	rlwimi	$s3,$acc07,16,8,15
1134	rlwimi	$s0,$acc08,8,16,23
1135	rlwimi	$s1,$acc09,8,16,23
1136	rlwimi	$s2,$acc10,8,16,23
1137	rlwimi	$s3,$acc11,8,16,23
1138	or	$s0,$s0,$acc12
1139	or	$s1,$s1,$acc13
1140	or	$s2,$s2,$acc14
1141	or	$s3,$s3,$acc15
1142	xor	$s0,$s0,$t0
1143	xor	$s1,$s1,$t1
1144	xor	$s2,$s2,$t2
1145	xor	$s3,$s3,$t3
1146	blr
1147	.long	0
1148	.byte	0,12,0x14,0,0,0,0,0
1149
1150.align	4
1151Lppc_AES_decrypt_compact:
1152	lwz	$acc00,240($key)
1153	addi	$Tbl1,$Tbl0,2048
1154	lwz	$t0,0($key)
1155	lis	$mask80,0x8080
1156	lwz	$t1,4($key)
1157	lis	$mask1b,0x1b1b
1158	lwz	$t2,8($key)
1159	ori	$mask80,$mask80,0x8080
1160	lwz	$t3,12($key)
1161	ori	$mask1b,$mask1b,0x1b1b
1162	addi	$key,$key,16
1163___
1164$code.=<<___ if ($SIZE_T==8);
1165	insrdi	$mask80,$mask80,32,0
1166	insrdi	$mask1b,$mask1b,32,0
1167___
1168$code.=<<___;
1169	mtctr	$acc00
1170.align	4
1171Ldec_compact_loop:
1172	xor	$s0,$s0,$t0
1173	xor	$s1,$s1,$t1
1174	rlwinm	$acc00,$s0,`32-24`,24,31
1175	xor	$s2,$s2,$t2
1176	rlwinm	$acc01,$s1,`32-24`,24,31
1177	xor	$s3,$s3,$t3
1178	rlwinm	$acc02,$s2,`32-24`,24,31
1179	rlwinm	$acc03,$s3,`32-24`,24,31
1180	rlwinm	$acc04,$s3,`32-16`,24,31
1181	rlwinm	$acc05,$s0,`32-16`,24,31
1182	rlwinm	$acc06,$s1,`32-16`,24,31
1183	rlwinm	$acc07,$s2,`32-16`,24,31
1184	lbzx	$acc00,$Tbl1,$acc00
1185	rlwinm	$acc08,$s2,`32-8`,24,31
1186	lbzx	$acc01,$Tbl1,$acc01
1187	rlwinm	$acc09,$s3,`32-8`,24,31
1188	lbzx	$acc02,$Tbl1,$acc02
1189	rlwinm	$acc10,$s0,`32-8`,24,31
1190	lbzx	$acc03,$Tbl1,$acc03
1191	rlwinm	$acc11,$s1,`32-8`,24,31
1192	lbzx	$acc04,$Tbl1,$acc04
1193	rlwinm	$acc12,$s1,`0`,24,31
1194	lbzx	$acc05,$Tbl1,$acc05
1195	rlwinm	$acc13,$s2,`0`,24,31
1196	lbzx	$acc06,$Tbl1,$acc06
1197	rlwinm	$acc14,$s3,`0`,24,31
1198	lbzx	$acc07,$Tbl1,$acc07
1199	rlwinm	$acc15,$s0,`0`,24,31
1200	lbzx	$acc08,$Tbl1,$acc08
1201	rlwinm	$s0,$acc00,24,0,7
1202	lbzx	$acc09,$Tbl1,$acc09
1203	rlwinm	$s1,$acc01,24,0,7
1204	lbzx	$acc10,$Tbl1,$acc10
1205	rlwinm	$s2,$acc02,24,0,7
1206	lbzx	$acc11,$Tbl1,$acc11
1207	rlwinm	$s3,$acc03,24,0,7
1208	lbzx	$acc12,$Tbl1,$acc12
1209	rlwimi	$s0,$acc04,16,8,15
1210	lbzx	$acc13,$Tbl1,$acc13
1211	rlwimi	$s1,$acc05,16,8,15
1212	lbzx	$acc14,$Tbl1,$acc14
1213	rlwimi	$s2,$acc06,16,8,15
1214	lbzx	$acc15,$Tbl1,$acc15
1215	rlwimi	$s3,$acc07,16,8,15
1216	rlwimi	$s0,$acc08,8,16,23
1217	rlwimi	$s1,$acc09,8,16,23
1218	rlwimi	$s2,$acc10,8,16,23
1219	rlwimi	$s3,$acc11,8,16,23
1220	lwz	$t0,0($key)
1221	or	$s0,$s0,$acc12
1222	lwz	$t1,4($key)
1223	or	$s1,$s1,$acc13
1224	lwz	$t2,8($key)
1225	or	$s2,$s2,$acc14
1226	lwz	$t3,12($key)
1227	or	$s3,$s3,$acc15
1228
1229	addi	$key,$key,16
1230	bdz	Ldec_compact_done
1231___
1232$code.=<<___ if ($SIZE_T==8);
1233	# vectorized permutation improves decrypt performance by 10%
1234	insrdi	$s0,$s1,32,0
1235	insrdi	$s2,$s3,32,0
1236
1237	and	$acc00,$s0,$mask80	# r1=r0&0x80808080
1238	and	$acc02,$s2,$mask80
1239	srdi	$acc04,$acc00,7		# r1>>7
1240	srdi	$acc06,$acc02,7
1241	andc	$acc08,$s0,$mask80	# r0&0x7f7f7f7f
1242	andc	$acc10,$s2,$mask80
1243	sub	$acc00,$acc00,$acc04	# r1-(r1>>7)
1244	sub	$acc02,$acc02,$acc06
1245	add	$acc08,$acc08,$acc08	# (r0&0x7f7f7f7f)<<1
1246	add	$acc10,$acc10,$acc10
1247	and	$acc00,$acc00,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
1248	and	$acc02,$acc02,$mask1b
1249	xor	$acc00,$acc00,$acc08	# r2
1250	xor	$acc02,$acc02,$acc10
1251
1252	and	$acc04,$acc00,$mask80	# r1=r2&0x80808080
1253	and	$acc06,$acc02,$mask80
1254	srdi	$acc08,$acc04,7		# r1>>7
1255	srdi	$acc10,$acc06,7
1256	andc	$acc12,$acc00,$mask80	# r2&0x7f7f7f7f
1257	andc	$acc14,$acc02,$mask80
1258	sub	$acc04,$acc04,$acc08	# r1-(r1>>7)
1259	sub	$acc06,$acc06,$acc10
1260	add	$acc12,$acc12,$acc12	# (r2&0x7f7f7f7f)<<1
1261	add	$acc14,$acc14,$acc14
1262	and	$acc04,$acc04,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
1263	and	$acc06,$acc06,$mask1b
1264	xor	$acc04,$acc04,$acc12	# r4
1265	xor	$acc06,$acc06,$acc14
1266
1267	and	$acc08,$acc04,$mask80	# r1=r4&0x80808080
1268	and	$acc10,$acc06,$mask80
1269	srdi	$acc12,$acc08,7		# r1>>7
1270	srdi	$acc14,$acc10,7
1271	sub	$acc08,$acc08,$acc12	# r1-(r1>>7)
1272	sub	$acc10,$acc10,$acc14
1273	andc	$acc12,$acc04,$mask80	# r4&0x7f7f7f7f
1274	andc	$acc14,$acc06,$mask80
1275	add	$acc12,$acc12,$acc12	# (r4&0x7f7f7f7f)<<1
1276	add	$acc14,$acc14,$acc14
1277	and	$acc08,$acc08,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
1278	and	$acc10,$acc10,$mask1b
1279	xor	$acc08,$acc08,$acc12	# r8
1280	xor	$acc10,$acc10,$acc14
1281
1282	xor	$acc00,$acc00,$s0	# r2^r0
1283	xor	$acc02,$acc02,$s2
1284	xor	$acc04,$acc04,$s0	# r4^r0
1285	xor	$acc06,$acc06,$s2
1286
1287	extrdi	$acc01,$acc00,32,0
1288	extrdi	$acc03,$acc02,32,0
1289	extrdi	$acc05,$acc04,32,0
1290	extrdi	$acc07,$acc06,32,0
1291	extrdi	$acc09,$acc08,32,0
1292	extrdi	$acc11,$acc10,32,0
1293___
1294$code.=<<___ if ($SIZE_T==4);
1295	and	$acc00,$s0,$mask80	# r1=r0&0x80808080
1296	and	$acc01,$s1,$mask80
1297	and	$acc02,$s2,$mask80
1298	and	$acc03,$s3,$mask80
1299	srwi	$acc04,$acc00,7		# r1>>7
1300	andc	$acc08,$s0,$mask80	# r0&0x7f7f7f7f
1301	srwi	$acc05,$acc01,7
1302	andc	$acc09,$s1,$mask80
1303	srwi	$acc06,$acc02,7
1304	andc	$acc10,$s2,$mask80
1305	srwi	$acc07,$acc03,7
1306	andc	$acc11,$s3,$mask80
1307	sub	$acc00,$acc00,$acc04	# r1-(r1>>7)
1308	sub	$acc01,$acc01,$acc05
1309	sub	$acc02,$acc02,$acc06
1310	sub	$acc03,$acc03,$acc07
1311	add	$acc08,$acc08,$acc08	# (r0&0x7f7f7f7f)<<1
1312	add	$acc09,$acc09,$acc09
1313	add	$acc10,$acc10,$acc10
1314	add	$acc11,$acc11,$acc11
1315	and	$acc00,$acc00,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
1316	and	$acc01,$acc01,$mask1b
1317	and	$acc02,$acc02,$mask1b
1318	and	$acc03,$acc03,$mask1b
1319	xor	$acc00,$acc00,$acc08	# r2
1320	xor	$acc01,$acc01,$acc09
1321	xor	$acc02,$acc02,$acc10
1322	xor	$acc03,$acc03,$acc11
1323
1324	and	$acc04,$acc00,$mask80	# r1=r2&0x80808080
1325	and	$acc05,$acc01,$mask80
1326	and	$acc06,$acc02,$mask80
1327	and	$acc07,$acc03,$mask80
1328	srwi	$acc08,$acc04,7		# r1>>7
1329	andc	$acc12,$acc00,$mask80	# r2&0x7f7f7f7f
1330	srwi	$acc09,$acc05,7
1331	andc	$acc13,$acc01,$mask80
1332	srwi	$acc10,$acc06,7
1333	andc	$acc14,$acc02,$mask80
1334	srwi	$acc11,$acc07,7
1335	andc	$acc15,$acc03,$mask80
1336	sub	$acc04,$acc04,$acc08	# r1-(r1>>7)
1337	sub	$acc05,$acc05,$acc09
1338	sub	$acc06,$acc06,$acc10
1339	sub	$acc07,$acc07,$acc11
1340	add	$acc12,$acc12,$acc12	# (r2&0x7f7f7f7f)<<1
1341	add	$acc13,$acc13,$acc13
1342	add	$acc14,$acc14,$acc14
1343	add	$acc15,$acc15,$acc15
1344	and	$acc04,$acc04,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
1345	and	$acc05,$acc05,$mask1b
1346	and	$acc06,$acc06,$mask1b
1347	and	$acc07,$acc07,$mask1b
1348	xor	$acc04,$acc04,$acc12	# r4
1349	xor	$acc05,$acc05,$acc13
1350	xor	$acc06,$acc06,$acc14
1351	xor	$acc07,$acc07,$acc15
1352
1353	and	$acc08,$acc04,$mask80	# r1=r4&0x80808080
1354	and	$acc09,$acc05,$mask80
1355	srwi	$acc12,$acc08,7		# r1>>7
1356	and	$acc10,$acc06,$mask80
1357	srwi	$acc13,$acc09,7
1358	and	$acc11,$acc07,$mask80
1359	srwi	$acc14,$acc10,7
1360	sub	$acc08,$acc08,$acc12	# r1-(r1>>7)
1361	srwi	$acc15,$acc11,7
1362	sub	$acc09,$acc09,$acc13
1363	sub	$acc10,$acc10,$acc14
1364	sub	$acc11,$acc11,$acc15
1365	andc	$acc12,$acc04,$mask80	# r4&0x7f7f7f7f
1366	andc	$acc13,$acc05,$mask80
1367	andc	$acc14,$acc06,$mask80
1368	andc	$acc15,$acc07,$mask80
1369	add	$acc12,$acc12,$acc12	# (r4&0x7f7f7f7f)<<1
1370	add	$acc13,$acc13,$acc13
1371	add	$acc14,$acc14,$acc14
1372	add	$acc15,$acc15,$acc15
1373	and	$acc08,$acc08,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
1374	and	$acc09,$acc09,$mask1b
1375	and	$acc10,$acc10,$mask1b
1376	and	$acc11,$acc11,$mask1b
1377	xor	$acc08,$acc08,$acc12	# r8
1378	xor	$acc09,$acc09,$acc13
1379	xor	$acc10,$acc10,$acc14
1380	xor	$acc11,$acc11,$acc15
1381
1382	xor	$acc00,$acc00,$s0	# r2^r0
1383	xor	$acc01,$acc01,$s1
1384	xor	$acc02,$acc02,$s2
1385	xor	$acc03,$acc03,$s3
1386	xor	$acc04,$acc04,$s0	# r4^r0
1387	xor	$acc05,$acc05,$s1
1388	xor	$acc06,$acc06,$s2
1389	xor	$acc07,$acc07,$s3
1390___
1391$code.=<<___;
1392	rotrwi	$s0,$s0,8		# = ROTATE(r0,8)
1393	rotrwi	$s1,$s1,8
1394	xor	$s0,$s0,$acc00		# ^= r2^r0
1395	rotrwi	$s2,$s2,8
1396	xor	$s1,$s1,$acc01
1397	rotrwi	$s3,$s3,8
1398	xor	$s2,$s2,$acc02
1399	xor	$s3,$s3,$acc03
1400	xor	$acc00,$acc00,$acc08
1401	xor	$acc01,$acc01,$acc09
1402	xor	$acc02,$acc02,$acc10
1403	xor	$acc03,$acc03,$acc11
1404	xor	$s0,$s0,$acc04		# ^= r4^r0
1405	rotrwi	$acc00,$acc00,24
1406	xor	$s1,$s1,$acc05
1407	rotrwi	$acc01,$acc01,24
1408	xor	$s2,$s2,$acc06
1409	rotrwi	$acc02,$acc02,24
1410	xor	$s3,$s3,$acc07
1411	rotrwi	$acc03,$acc03,24
1412	xor	$acc04,$acc04,$acc08
1413	xor	$acc05,$acc05,$acc09
1414	xor	$acc06,$acc06,$acc10
1415	xor	$acc07,$acc07,$acc11
1416	xor	$s0,$s0,$acc08		# ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)]
1417	rotrwi	$acc04,$acc04,16
1418	xor	$s1,$s1,$acc09
1419	rotrwi	$acc05,$acc05,16
1420	xor	$s2,$s2,$acc10
1421	rotrwi	$acc06,$acc06,16
1422	xor	$s3,$s3,$acc11
1423	rotrwi	$acc07,$acc07,16
1424	xor	$s0,$s0,$acc00		# ^= ROTATE(r8^r2^r0,24)
1425	rotrwi	$acc08,$acc08,8
1426	xor	$s1,$s1,$acc01
1427	rotrwi	$acc09,$acc09,8
1428	xor	$s2,$s2,$acc02
1429	rotrwi	$acc10,$acc10,8
1430	xor	$s3,$s3,$acc03
1431	rotrwi	$acc11,$acc11,8
1432	xor	$s0,$s0,$acc04		# ^= ROTATE(r8^r4^r0,16)
1433	xor	$s1,$s1,$acc05
1434	xor	$s2,$s2,$acc06
1435	xor	$s3,$s3,$acc07
1436	xor	$s0,$s0,$acc08		# ^= ROTATE(r8,8)
1437	xor	$s1,$s1,$acc09
1438	xor	$s2,$s2,$acc10
1439	xor	$s3,$s3,$acc11
1440
1441	b	Ldec_compact_loop
1442.align	4
1443Ldec_compact_done:
1444	xor	$s0,$s0,$t0
1445	xor	$s1,$s1,$t1
1446	xor	$s2,$s2,$t2
1447	xor	$s3,$s3,$t3
1448	blr
1449	.long	0
1450	.byte	0,12,0x14,0,0,0,0,0
1451.size	.AES_decrypt,.-.AES_decrypt
1452
1453.asciz	"AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
1454.align	7
1455___
1456
1457$code =~ s/\`([^\`]*)\`/eval $1/gem;
1458print $code;
1459close STDOUT or die "error closing STDOUT: $!";
1460