xref: /freebsd/crypto/openssl/crypto/aes/asm/aes-ppc.pl (revision d6b92ffa)
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# Needs more work: key setup, CBC routine...
11#
12# ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
13# 128-bit key, which is ~40% better than 64-bit code generated by gcc
14# 4.0. But these are not the ones currently used! Their "compact"
15# counterparts are, for security reason. ppc_AES_encrypt_compact runs
16# at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
17# at 1/3 of ppc_AES_decrypt.
18
19# February 2010
20#
21# Rescheduling instructions to favour Power6 pipeline gave 10%
22# performance improvement on the platfrom in question (and marginal
23# improvement even on others). It should be noted that Power6 fails
24# to process byte in 18 cycles, only in 23, because it fails to issue
25# 4 load instructions in two cycles, only in 3. As result non-compact
26# block subroutines are 25% slower than one would expect. Compact
27# functions scale better, because they have pure computational part,
28# which scales perfectly with clock frequency. To be specific
29# ppc_AES_encrypt_compact operates at 42 cycles per byte, while
30# ppc_AES_decrypt_compact - at 55 (in 64-bit build).
31
32$flavour = shift;
33
34if ($flavour =~ /64/) {
35	$SIZE_T	=8;
36	$LRSAVE	=2*$SIZE_T;
37	$STU	="stdu";
38	$POP	="ld";
39	$PUSH	="std";
40} elsif ($flavour =~ /32/) {
41	$SIZE_T	=4;
42	$LRSAVE	=$SIZE_T;
43	$STU	="stwu";
44	$POP	="lwz";
45	$PUSH	="stw";
46} else { die "nonsense $flavour"; }
47
48$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
49
50$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
51( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
52( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
53die "can't locate ppc-xlate.pl";
54
55open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
56
57$FRAME=32*$SIZE_T;
58
59sub _data_word()
60{ my $i;
61    while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
62}
63
64$sp="r1";
65$toc="r2";
66$inp="r3";
67$out="r4";
68$key="r5";
69
70$Tbl0="r3";
71$Tbl1="r6";
72$Tbl2="r7";
73$Tbl3=$out;	# stay away from "r2"; $out is offloaded to stack
74
75$s0="r8";
76$s1="r9";
77$s2="r10";
78$s3="r11";
79
80$t0="r12";
81$t1="r0";	# stay away from "r13";
82$t2="r14";
83$t3="r15";
84
85$acc00="r16";
86$acc01="r17";
87$acc02="r18";
88$acc03="r19";
89
90$acc04="r20";
91$acc05="r21";
92$acc06="r22";
93$acc07="r23";
94
95$acc08="r24";
96$acc09="r25";
97$acc10="r26";
98$acc11="r27";
99
100$acc12="r28";
101$acc13="r29";
102$acc14="r30";
103$acc15="r31";
104
105$mask80=$Tbl2;
106$mask1b=$Tbl3;
107
108$code.=<<___;
109.machine	"any"
110.text
111
112.align	7
113LAES_Te:
114	mflr	r0
115	bcl	20,31,\$+4
116	mflr	$Tbl0	;    vvvvv "distance" between . and 1st data entry
117	addi	$Tbl0,$Tbl0,`128-8`
118	mtlr	r0
119	blr
120	.long	0
121	.byte	0,12,0x14,0,0,0,0,0
122	.space	`64-9*4`
123LAES_Td:
124	mflr	r0
125	bcl	20,31,\$+4
126	mflr	$Tbl0	;    vvvvvvvv "distance" between . and 1st data entry
127	addi	$Tbl0,$Tbl0,`128-64-8+2048+256`
128	mtlr	r0
129	blr
130	.long	0
131	.byte	0,12,0x14,0,0,0,0,0
132	.space	`128-64-9*4`
133___
134&_data_word(
135	0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
136	0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
137	0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
138	0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
139	0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
140	0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
141	0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
142	0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
143	0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
144	0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
145	0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
146	0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
147	0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
148	0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
149	0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
150	0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
151	0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
152	0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
153	0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
154	0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
155	0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
156	0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
157	0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
158	0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
159	0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
160	0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
161	0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
162	0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
163	0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
164	0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
165	0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
166	0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
167	0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
168	0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
169	0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
170	0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
171	0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
172	0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
173	0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
174	0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
175	0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
176	0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
177	0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
178	0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
179	0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
180	0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
181	0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
182	0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
183	0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
184	0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
185	0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
186	0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
187	0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
188	0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
189	0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
190	0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
191	0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
192	0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
193	0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
194	0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
195	0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
196	0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
197	0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
198	0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
199$code.=<<___;
200.byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
201.byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
202.byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
203.byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
204.byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
205.byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
206.byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
207.byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
208.byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
209.byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
210.byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
211.byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
212.byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
213.byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
214.byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
215.byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
216.byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
217.byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
218.byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
219.byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
220.byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
221.byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
222.byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
223.byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
224.byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
225.byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
226.byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
227.byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
228.byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
229.byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
230.byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
231.byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
232___
233&_data_word(
234	0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
235	0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
236	0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
237	0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
238	0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
239	0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
240	0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
241	0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
242	0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
243	0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
244	0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
245	0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
246	0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
247	0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
248	0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
249	0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
250	0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
251	0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
252	0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
253	0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
254	0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
255	0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
256	0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
257	0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
258	0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
259	0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
260	0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
261	0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
262	0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
263	0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
264	0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
265	0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
266	0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
267	0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
268	0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
269	0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
270	0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
271	0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
272	0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
273	0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
274	0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
275	0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
276	0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
277	0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
278	0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
279	0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
280	0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
281	0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
282	0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
283	0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
284	0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
285	0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
286	0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
287	0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
288	0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
289	0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
290	0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
291	0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
292	0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
293	0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
294	0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
295	0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
296	0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
297	0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
298$code.=<<___;
299.byte	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
300.byte	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
301.byte	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
302.byte	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
303.byte	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
304.byte	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
305.byte	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
306.byte	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
307.byte	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
308.byte	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
309.byte	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
310.byte	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
311.byte	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
312.byte	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
313.byte	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
314.byte	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
315.byte	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
316.byte	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
317.byte	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
318.byte	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
319.byte	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
320.byte	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
321.byte	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
322.byte	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
323.byte	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
324.byte	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
325.byte	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
326.byte	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
327.byte	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
328.byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
329.byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
330.byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
331
332
333.globl	.AES_encrypt
334.align	7
335.AES_encrypt:
336	$STU	$sp,-$FRAME($sp)
337	mflr	r0
338
339	$PUSH	$out,`$FRAME-$SIZE_T*19`($sp)
340	$PUSH	r14,`$FRAME-$SIZE_T*18`($sp)
341	$PUSH	r15,`$FRAME-$SIZE_T*17`($sp)
342	$PUSH	r16,`$FRAME-$SIZE_T*16`($sp)
343	$PUSH	r17,`$FRAME-$SIZE_T*15`($sp)
344	$PUSH	r18,`$FRAME-$SIZE_T*14`($sp)
345	$PUSH	r19,`$FRAME-$SIZE_T*13`($sp)
346	$PUSH	r20,`$FRAME-$SIZE_T*12`($sp)
347	$PUSH	r21,`$FRAME-$SIZE_T*11`($sp)
348	$PUSH	r22,`$FRAME-$SIZE_T*10`($sp)
349	$PUSH	r23,`$FRAME-$SIZE_T*9`($sp)
350	$PUSH	r24,`$FRAME-$SIZE_T*8`($sp)
351	$PUSH	r25,`$FRAME-$SIZE_T*7`($sp)
352	$PUSH	r26,`$FRAME-$SIZE_T*6`($sp)
353	$PUSH	r27,`$FRAME-$SIZE_T*5`($sp)
354	$PUSH	r28,`$FRAME-$SIZE_T*4`($sp)
355	$PUSH	r29,`$FRAME-$SIZE_T*3`($sp)
356	$PUSH	r30,`$FRAME-$SIZE_T*2`($sp)
357	$PUSH	r31,`$FRAME-$SIZE_T*1`($sp)
358	$PUSH	r0,`$FRAME+$LRSAVE`($sp)
359
360	andi.	$t0,$inp,3
361	andi.	$t1,$out,3
362	or.	$t0,$t0,$t1
363	bne	Lenc_unaligned
364
365Lenc_unaligned_ok:
366___
367$code.=<<___ if (!$LITTLE_ENDIAN);
368	lwz	$s0,0($inp)
369	lwz	$s1,4($inp)
370	lwz	$s2,8($inp)
371	lwz	$s3,12($inp)
372___
373$code.=<<___ if ($LITTLE_ENDIAN);
374	lwz	$t0,0($inp)
375	lwz	$t1,4($inp)
376	lwz	$t2,8($inp)
377	lwz	$t3,12($inp)
378	rotlwi	$s0,$t0,8
379	rotlwi	$s1,$t1,8
380	rotlwi	$s2,$t2,8
381	rotlwi	$s3,$t3,8
382	rlwimi	$s0,$t0,24,0,7
383	rlwimi	$s1,$t1,24,0,7
384	rlwimi	$s2,$t2,24,0,7
385	rlwimi	$s3,$t3,24,0,7
386	rlwimi	$s0,$t0,24,16,23
387	rlwimi	$s1,$t1,24,16,23
388	rlwimi	$s2,$t2,24,16,23
389	rlwimi	$s3,$t3,24,16,23
390___
391$code.=<<___;
392	bl	LAES_Te
393	bl	Lppc_AES_encrypt_compact
394	$POP	$out,`$FRAME-$SIZE_T*19`($sp)
395___
396$code.=<<___ if ($LITTLE_ENDIAN);
397	rotlwi	$t0,$s0,8
398	rotlwi	$t1,$s1,8
399	rotlwi	$t2,$s2,8
400	rotlwi	$t3,$s3,8
401	rlwimi	$t0,$s0,24,0,7
402	rlwimi	$t1,$s1,24,0,7
403	rlwimi	$t2,$s2,24,0,7
404	rlwimi	$t3,$s3,24,0,7
405	rlwimi	$t0,$s0,24,16,23
406	rlwimi	$t1,$s1,24,16,23
407	rlwimi	$t2,$s2,24,16,23
408	rlwimi	$t3,$s3,24,16,23
409	stw	$t0,0($out)
410	stw	$t1,4($out)
411	stw	$t2,8($out)
412	stw	$t3,12($out)
413___
414$code.=<<___ if (!$LITTLE_ENDIAN);
415	stw	$s0,0($out)
416	stw	$s1,4($out)
417	stw	$s2,8($out)
418	stw	$s3,12($out)
419___
420$code.=<<___;
421	b	Lenc_done
422
423Lenc_unaligned:
424	subfic	$t0,$inp,4096
425	subfic	$t1,$out,4096
426	andi.	$t0,$t0,4096-16
427	beq	Lenc_xpage
428	andi.	$t1,$t1,4096-16
429	bne	Lenc_unaligned_ok
430
431Lenc_xpage:
432	lbz	$acc00,0($inp)
433	lbz	$acc01,1($inp)
434	lbz	$acc02,2($inp)
435	lbz	$s0,3($inp)
436	lbz	$acc04,4($inp)
437	lbz	$acc05,5($inp)
438	lbz	$acc06,6($inp)
439	lbz	$s1,7($inp)
440	lbz	$acc08,8($inp)
441	lbz	$acc09,9($inp)
442	lbz	$acc10,10($inp)
443	insrwi	$s0,$acc00,8,0
444	lbz	$s2,11($inp)
445	insrwi	$s1,$acc04,8,0
446	lbz	$acc12,12($inp)
447	insrwi	$s0,$acc01,8,8
448	lbz	$acc13,13($inp)
449	insrwi	$s1,$acc05,8,8
450	lbz	$acc14,14($inp)
451	insrwi	$s0,$acc02,8,16
452	lbz	$s3,15($inp)
453	insrwi	$s1,$acc06,8,16
454	insrwi	$s2,$acc08,8,0
455	insrwi	$s3,$acc12,8,0
456	insrwi	$s2,$acc09,8,8
457	insrwi	$s3,$acc13,8,8
458	insrwi	$s2,$acc10,8,16
459	insrwi	$s3,$acc14,8,16
460
461	bl	LAES_Te
462	bl	Lppc_AES_encrypt_compact
463	$POP	$out,`$FRAME-$SIZE_T*19`($sp)
464
465	extrwi	$acc00,$s0,8,0
466	extrwi	$acc01,$s0,8,8
467	stb	$acc00,0($out)
468	extrwi	$acc02,$s0,8,16
469	stb	$acc01,1($out)
470	stb	$acc02,2($out)
471	extrwi	$acc04,$s1,8,0
472	stb	$s0,3($out)
473	extrwi	$acc05,$s1,8,8
474	stb	$acc04,4($out)
475	extrwi	$acc06,$s1,8,16
476	stb	$acc05,5($out)
477	stb	$acc06,6($out)
478	extrwi	$acc08,$s2,8,0
479	stb	$s1,7($out)
480	extrwi	$acc09,$s2,8,8
481	stb	$acc08,8($out)
482	extrwi	$acc10,$s2,8,16
483	stb	$acc09,9($out)
484	stb	$acc10,10($out)
485	extrwi	$acc12,$s3,8,0
486	stb	$s2,11($out)
487	extrwi	$acc13,$s3,8,8
488	stb	$acc12,12($out)
489	extrwi	$acc14,$s3,8,16
490	stb	$acc13,13($out)
491	stb	$acc14,14($out)
492	stb	$s3,15($out)
493
494Lenc_done:
495	$POP	r0,`$FRAME+$LRSAVE`($sp)
496	$POP	r14,`$FRAME-$SIZE_T*18`($sp)
497	$POP	r15,`$FRAME-$SIZE_T*17`($sp)
498	$POP	r16,`$FRAME-$SIZE_T*16`($sp)
499	$POP	r17,`$FRAME-$SIZE_T*15`($sp)
500	$POP	r18,`$FRAME-$SIZE_T*14`($sp)
501	$POP	r19,`$FRAME-$SIZE_T*13`($sp)
502	$POP	r20,`$FRAME-$SIZE_T*12`($sp)
503	$POP	r21,`$FRAME-$SIZE_T*11`($sp)
504	$POP	r22,`$FRAME-$SIZE_T*10`($sp)
505	$POP	r23,`$FRAME-$SIZE_T*9`($sp)
506	$POP	r24,`$FRAME-$SIZE_T*8`($sp)
507	$POP	r25,`$FRAME-$SIZE_T*7`($sp)
508	$POP	r26,`$FRAME-$SIZE_T*6`($sp)
509	$POP	r27,`$FRAME-$SIZE_T*5`($sp)
510	$POP	r28,`$FRAME-$SIZE_T*4`($sp)
511	$POP	r29,`$FRAME-$SIZE_T*3`($sp)
512	$POP	r30,`$FRAME-$SIZE_T*2`($sp)
513	$POP	r31,`$FRAME-$SIZE_T*1`($sp)
514	mtlr	r0
515	addi	$sp,$sp,$FRAME
516	blr
517	.long	0
518	.byte	0,12,4,1,0x80,18,3,0
519	.long	0
520
521.align	5
522Lppc_AES_encrypt:
523	lwz	$acc00,240($key)
524	addi	$Tbl1,$Tbl0,3
525	lwz	$t0,0($key)
526	addi	$Tbl2,$Tbl0,2
527	lwz	$t1,4($key)
528	addi	$Tbl3,$Tbl0,1
529	lwz	$t2,8($key)
530	addi	$acc00,$acc00,-1
531	lwz	$t3,12($key)
532	addi	$key,$key,16
533	xor	$s0,$s0,$t0
534	xor	$s1,$s1,$t1
535	xor	$s2,$s2,$t2
536	xor	$s3,$s3,$t3
537	mtctr	$acc00
538.align	4
539Lenc_loop:
540	rlwinm	$acc00,$s0,`32-24+3`,21,28
541	rlwinm	$acc01,$s1,`32-24+3`,21,28
542	rlwinm	$acc02,$s2,`32-24+3`,21,28
543	rlwinm	$acc03,$s3,`32-24+3`,21,28
544	lwz	$t0,0($key)
545	rlwinm	$acc04,$s1,`32-16+3`,21,28
546	lwz	$t1,4($key)
547	rlwinm	$acc05,$s2,`32-16+3`,21,28
548	lwz	$t2,8($key)
549	rlwinm	$acc06,$s3,`32-16+3`,21,28
550	lwz	$t3,12($key)
551	rlwinm	$acc07,$s0,`32-16+3`,21,28
552	lwzx	$acc00,$Tbl0,$acc00
553	rlwinm	$acc08,$s2,`32-8+3`,21,28
554	lwzx	$acc01,$Tbl0,$acc01
555	rlwinm	$acc09,$s3,`32-8+3`,21,28
556	lwzx	$acc02,$Tbl0,$acc02
557	rlwinm	$acc10,$s0,`32-8+3`,21,28
558	lwzx	$acc03,$Tbl0,$acc03
559	rlwinm	$acc11,$s1,`32-8+3`,21,28
560	lwzx	$acc04,$Tbl1,$acc04
561	rlwinm	$acc12,$s3,`0+3`,21,28
562	lwzx	$acc05,$Tbl1,$acc05
563	rlwinm	$acc13,$s0,`0+3`,21,28
564	lwzx	$acc06,$Tbl1,$acc06
565	rlwinm	$acc14,$s1,`0+3`,21,28
566	lwzx	$acc07,$Tbl1,$acc07
567	rlwinm	$acc15,$s2,`0+3`,21,28
568	lwzx	$acc08,$Tbl2,$acc08
569	xor	$t0,$t0,$acc00
570	lwzx	$acc09,$Tbl2,$acc09
571	xor	$t1,$t1,$acc01
572	lwzx	$acc10,$Tbl2,$acc10
573	xor	$t2,$t2,$acc02
574	lwzx	$acc11,$Tbl2,$acc11
575	xor	$t3,$t3,$acc03
576	lwzx	$acc12,$Tbl3,$acc12
577	xor	$t0,$t0,$acc04
578	lwzx	$acc13,$Tbl3,$acc13
579	xor	$t1,$t1,$acc05
580	lwzx	$acc14,$Tbl3,$acc14
581	xor	$t2,$t2,$acc06
582	lwzx	$acc15,$Tbl3,$acc15
583	xor	$t3,$t3,$acc07
584	xor	$t0,$t0,$acc08
585	xor	$t1,$t1,$acc09
586	xor	$t2,$t2,$acc10
587	xor	$t3,$t3,$acc11
588	xor	$s0,$t0,$acc12
589	xor	$s1,$t1,$acc13
590	xor	$s2,$t2,$acc14
591	xor	$s3,$t3,$acc15
592	addi	$key,$key,16
593	bdnz	Lenc_loop
594
595	addi	$Tbl2,$Tbl0,2048
596	nop
597	lwz	$t0,0($key)
598	rlwinm	$acc00,$s0,`32-24`,24,31
599	lwz	$t1,4($key)
600	rlwinm	$acc01,$s1,`32-24`,24,31
601	lwz	$t2,8($key)
602	rlwinm	$acc02,$s2,`32-24`,24,31
603	lwz	$t3,12($key)
604	rlwinm	$acc03,$s3,`32-24`,24,31
605	lwz	$acc08,`2048+0`($Tbl0)	! prefetch Te4
606	rlwinm	$acc04,$s1,`32-16`,24,31
607	lwz	$acc09,`2048+32`($Tbl0)
608	rlwinm	$acc05,$s2,`32-16`,24,31
609	lwz	$acc10,`2048+64`($Tbl0)
610	rlwinm	$acc06,$s3,`32-16`,24,31
611	lwz	$acc11,`2048+96`($Tbl0)
612	rlwinm	$acc07,$s0,`32-16`,24,31
613	lwz	$acc12,`2048+128`($Tbl0)
614	rlwinm	$acc08,$s2,`32-8`,24,31
615	lwz	$acc13,`2048+160`($Tbl0)
616	rlwinm	$acc09,$s3,`32-8`,24,31
617	lwz	$acc14,`2048+192`($Tbl0)
618	rlwinm	$acc10,$s0,`32-8`,24,31
619	lwz	$acc15,`2048+224`($Tbl0)
620	rlwinm	$acc11,$s1,`32-8`,24,31
621	lbzx	$acc00,$Tbl2,$acc00
622	rlwinm	$acc12,$s3,`0`,24,31
623	lbzx	$acc01,$Tbl2,$acc01
624	rlwinm	$acc13,$s0,`0`,24,31
625	lbzx	$acc02,$Tbl2,$acc02
626	rlwinm	$acc14,$s1,`0`,24,31
627	lbzx	$acc03,$Tbl2,$acc03
628	rlwinm	$acc15,$s2,`0`,24,31
629	lbzx	$acc04,$Tbl2,$acc04
630	rlwinm	$s0,$acc00,24,0,7
631	lbzx	$acc05,$Tbl2,$acc05
632	rlwinm	$s1,$acc01,24,0,7
633	lbzx	$acc06,$Tbl2,$acc06
634	rlwinm	$s2,$acc02,24,0,7
635	lbzx	$acc07,$Tbl2,$acc07
636	rlwinm	$s3,$acc03,24,0,7
637	lbzx	$acc08,$Tbl2,$acc08
638	rlwimi	$s0,$acc04,16,8,15
639	lbzx	$acc09,$Tbl2,$acc09
640	rlwimi	$s1,$acc05,16,8,15
641	lbzx	$acc10,$Tbl2,$acc10
642	rlwimi	$s2,$acc06,16,8,15
643	lbzx	$acc11,$Tbl2,$acc11
644	rlwimi	$s3,$acc07,16,8,15
645	lbzx	$acc12,$Tbl2,$acc12
646	rlwimi	$s0,$acc08,8,16,23
647	lbzx	$acc13,$Tbl2,$acc13
648	rlwimi	$s1,$acc09,8,16,23
649	lbzx	$acc14,$Tbl2,$acc14
650	rlwimi	$s2,$acc10,8,16,23
651	lbzx	$acc15,$Tbl2,$acc15
652	rlwimi	$s3,$acc11,8,16,23
653	or	$s0,$s0,$acc12
654	or	$s1,$s1,$acc13
655	or	$s2,$s2,$acc14
656	or	$s3,$s3,$acc15
657	xor	$s0,$s0,$t0
658	xor	$s1,$s1,$t1
659	xor	$s2,$s2,$t2
660	xor	$s3,$s3,$t3
661	blr
662	.long	0
663	.byte	0,12,0x14,0,0,0,0,0
664
665.align	4
666Lppc_AES_encrypt_compact:
667	lwz	$acc00,240($key)
668	addi	$Tbl1,$Tbl0,2048
669	lwz	$t0,0($key)
670	lis	$mask80,0x8080
671	lwz	$t1,4($key)
672	lis	$mask1b,0x1b1b
673	lwz	$t2,8($key)
674	ori	$mask80,$mask80,0x8080
675	lwz	$t3,12($key)
676	ori	$mask1b,$mask1b,0x1b1b
677	addi	$key,$key,16
678	mtctr	$acc00
679.align	4
680Lenc_compact_loop:
681	xor	$s0,$s0,$t0
682	xor	$s1,$s1,$t1
683	rlwinm	$acc00,$s0,`32-24`,24,31
684	xor	$s2,$s2,$t2
685	rlwinm	$acc01,$s1,`32-24`,24,31
686	xor	$s3,$s3,$t3
687	rlwinm	$acc02,$s2,`32-24`,24,31
688	rlwinm	$acc03,$s3,`32-24`,24,31
689	rlwinm	$acc04,$s1,`32-16`,24,31
690	rlwinm	$acc05,$s2,`32-16`,24,31
691	rlwinm	$acc06,$s3,`32-16`,24,31
692	rlwinm	$acc07,$s0,`32-16`,24,31
693	lbzx	$acc00,$Tbl1,$acc00
694	rlwinm	$acc08,$s2,`32-8`,24,31
695	lbzx	$acc01,$Tbl1,$acc01
696	rlwinm	$acc09,$s3,`32-8`,24,31
697	lbzx	$acc02,$Tbl1,$acc02
698	rlwinm	$acc10,$s0,`32-8`,24,31
699	lbzx	$acc03,$Tbl1,$acc03
700	rlwinm	$acc11,$s1,`32-8`,24,31
701	lbzx	$acc04,$Tbl1,$acc04
702	rlwinm	$acc12,$s3,`0`,24,31
703	lbzx	$acc05,$Tbl1,$acc05
704	rlwinm	$acc13,$s0,`0`,24,31
705	lbzx	$acc06,$Tbl1,$acc06
706	rlwinm	$acc14,$s1,`0`,24,31
707	lbzx	$acc07,$Tbl1,$acc07
708	rlwinm	$acc15,$s2,`0`,24,31
709	lbzx	$acc08,$Tbl1,$acc08
710	rlwinm	$s0,$acc00,24,0,7
711	lbzx	$acc09,$Tbl1,$acc09
712	rlwinm	$s1,$acc01,24,0,7
713	lbzx	$acc10,$Tbl1,$acc10
714	rlwinm	$s2,$acc02,24,0,7
715	lbzx	$acc11,$Tbl1,$acc11
716	rlwinm	$s3,$acc03,24,0,7
717	lbzx	$acc12,$Tbl1,$acc12
718	rlwimi	$s0,$acc04,16,8,15
719	lbzx	$acc13,$Tbl1,$acc13
720	rlwimi	$s1,$acc05,16,8,15
721	lbzx	$acc14,$Tbl1,$acc14
722	rlwimi	$s2,$acc06,16,8,15
723	lbzx	$acc15,$Tbl1,$acc15
724	rlwimi	$s3,$acc07,16,8,15
725	rlwimi	$s0,$acc08,8,16,23
726	rlwimi	$s1,$acc09,8,16,23
727	rlwimi	$s2,$acc10,8,16,23
728	rlwimi	$s3,$acc11,8,16,23
729	lwz	$t0,0($key)
730	or	$s0,$s0,$acc12
731	lwz	$t1,4($key)
732	or	$s1,$s1,$acc13
733	lwz	$t2,8($key)
734	or	$s2,$s2,$acc14
735	lwz	$t3,12($key)
736	or	$s3,$s3,$acc15
737
738	addi	$key,$key,16
739	bdz	Lenc_compact_done
740
741	and	$acc00,$s0,$mask80	# r1=r0&0x80808080
742	and	$acc01,$s1,$mask80
743	and	$acc02,$s2,$mask80
744	and	$acc03,$s3,$mask80
745	srwi	$acc04,$acc00,7		# r1>>7
746	andc	$acc08,$s0,$mask80	# r0&0x7f7f7f7f
747	srwi	$acc05,$acc01,7
748	andc	$acc09,$s1,$mask80
749	srwi	$acc06,$acc02,7
750	andc	$acc10,$s2,$mask80
751	srwi	$acc07,$acc03,7
752	andc	$acc11,$s3,$mask80
753	sub	$acc00,$acc00,$acc04	# r1-(r1>>7)
754	sub	$acc01,$acc01,$acc05
755	sub	$acc02,$acc02,$acc06
756	sub	$acc03,$acc03,$acc07
757	add	$acc08,$acc08,$acc08	# (r0&0x7f7f7f7f)<<1
758	add	$acc09,$acc09,$acc09
759	add	$acc10,$acc10,$acc10
760	add	$acc11,$acc11,$acc11
761	and	$acc00,$acc00,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
762	and	$acc01,$acc01,$mask1b
763	and	$acc02,$acc02,$mask1b
764	and	$acc03,$acc03,$mask1b
765	xor	$acc00,$acc00,$acc08	# r2
766	xor	$acc01,$acc01,$acc09
767	 rotlwi	$acc12,$s0,16		# ROTATE(r0,16)
768	xor	$acc02,$acc02,$acc10
769	 rotlwi	$acc13,$s1,16
770	xor	$acc03,$acc03,$acc11
771	 rotlwi	$acc14,$s2,16
772
773	xor	$s0,$s0,$acc00		# r0^r2
774	rotlwi	$acc15,$s3,16
775	xor	$s1,$s1,$acc01
776	rotrwi	$s0,$s0,24		# ROTATE(r2^r0,24)
777	xor	$s2,$s2,$acc02
778	rotrwi	$s1,$s1,24
779	xor	$s3,$s3,$acc03
780	rotrwi	$s2,$s2,24
781	xor	$s0,$s0,$acc00		# ROTATE(r2^r0,24)^r2
782	rotrwi	$s3,$s3,24
783	xor	$s1,$s1,$acc01
784	xor	$s2,$s2,$acc02
785	xor	$s3,$s3,$acc03
786	rotlwi	$acc08,$acc12,8		# ROTATE(r0,24)
787	xor	$s0,$s0,$acc12		#
788	rotlwi	$acc09,$acc13,8
789	xor	$s1,$s1,$acc13
790	rotlwi	$acc10,$acc14,8
791	xor	$s2,$s2,$acc14
792	rotlwi	$acc11,$acc15,8
793	xor	$s3,$s3,$acc15
794	xor	$s0,$s0,$acc08		#
795	xor	$s1,$s1,$acc09
796	xor	$s2,$s2,$acc10
797	xor	$s3,$s3,$acc11
798
799	b	Lenc_compact_loop
800.align	4
801Lenc_compact_done:
802	xor	$s0,$s0,$t0
803	xor	$s1,$s1,$t1
804	xor	$s2,$s2,$t2
805	xor	$s3,$s3,$t3
806	blr
807	.long	0
808	.byte	0,12,0x14,0,0,0,0,0
809.size	.AES_encrypt,.-.AES_encrypt
810
811.globl	.AES_decrypt
812.align	7
813.AES_decrypt:
814	$STU	$sp,-$FRAME($sp)
815	mflr	r0
816
817	$PUSH	$out,`$FRAME-$SIZE_T*19`($sp)
818	$PUSH	r14,`$FRAME-$SIZE_T*18`($sp)
819	$PUSH	r15,`$FRAME-$SIZE_T*17`($sp)
820	$PUSH	r16,`$FRAME-$SIZE_T*16`($sp)
821	$PUSH	r17,`$FRAME-$SIZE_T*15`($sp)
822	$PUSH	r18,`$FRAME-$SIZE_T*14`($sp)
823	$PUSH	r19,`$FRAME-$SIZE_T*13`($sp)
824	$PUSH	r20,`$FRAME-$SIZE_T*12`($sp)
825	$PUSH	r21,`$FRAME-$SIZE_T*11`($sp)
826	$PUSH	r22,`$FRAME-$SIZE_T*10`($sp)
827	$PUSH	r23,`$FRAME-$SIZE_T*9`($sp)
828	$PUSH	r24,`$FRAME-$SIZE_T*8`($sp)
829	$PUSH	r25,`$FRAME-$SIZE_T*7`($sp)
830	$PUSH	r26,`$FRAME-$SIZE_T*6`($sp)
831	$PUSH	r27,`$FRAME-$SIZE_T*5`($sp)
832	$PUSH	r28,`$FRAME-$SIZE_T*4`($sp)
833	$PUSH	r29,`$FRAME-$SIZE_T*3`($sp)
834	$PUSH	r30,`$FRAME-$SIZE_T*2`($sp)
835	$PUSH	r31,`$FRAME-$SIZE_T*1`($sp)
836	$PUSH	r0,`$FRAME+$LRSAVE`($sp)
837
838	andi.	$t0,$inp,3
839	andi.	$t1,$out,3
840	or.	$t0,$t0,$t1
841	bne	Ldec_unaligned
842
843Ldec_unaligned_ok:
844___
845$code.=<<___ if (!$LITTLE_ENDIAN);
846	lwz	$s0,0($inp)
847	lwz	$s1,4($inp)
848	lwz	$s2,8($inp)
849	lwz	$s3,12($inp)
850___
851$code.=<<___ if ($LITTLE_ENDIAN);
852	lwz	$t0,0($inp)
853	lwz	$t1,4($inp)
854	lwz	$t2,8($inp)
855	lwz	$t3,12($inp)
856	rotlwi	$s0,$t0,8
857	rotlwi	$s1,$t1,8
858	rotlwi	$s2,$t2,8
859	rotlwi	$s3,$t3,8
860	rlwimi	$s0,$t0,24,0,7
861	rlwimi	$s1,$t1,24,0,7
862	rlwimi	$s2,$t2,24,0,7
863	rlwimi	$s3,$t3,24,0,7
864	rlwimi	$s0,$t0,24,16,23
865	rlwimi	$s1,$t1,24,16,23
866	rlwimi	$s2,$t2,24,16,23
867	rlwimi	$s3,$t3,24,16,23
868___
869$code.=<<___;
870	bl	LAES_Td
871	bl	Lppc_AES_decrypt_compact
872	$POP	$out,`$FRAME-$SIZE_T*19`($sp)
873___
874$code.=<<___ if ($LITTLE_ENDIAN);
875	rotlwi	$t0,$s0,8
876	rotlwi	$t1,$s1,8
877	rotlwi	$t2,$s2,8
878	rotlwi	$t3,$s3,8
879	rlwimi	$t0,$s0,24,0,7
880	rlwimi	$t1,$s1,24,0,7
881	rlwimi	$t2,$s2,24,0,7
882	rlwimi	$t3,$s3,24,0,7
883	rlwimi	$t0,$s0,24,16,23
884	rlwimi	$t1,$s1,24,16,23
885	rlwimi	$t2,$s2,24,16,23
886	rlwimi	$t3,$s3,24,16,23
887	stw	$t0,0($out)
888	stw	$t1,4($out)
889	stw	$t2,8($out)
890	stw	$t3,12($out)
891___
892$code.=<<___ if (!$LITTLE_ENDIAN);
893	stw	$s0,0($out)
894	stw	$s1,4($out)
895	stw	$s2,8($out)
896	stw	$s3,12($out)
897___
898$code.=<<___;
899	b	Ldec_done
900
901Ldec_unaligned:
902	subfic	$t0,$inp,4096
903	subfic	$t1,$out,4096
904	andi.	$t0,$t0,4096-16
905	beq	Ldec_xpage
906	andi.	$t1,$t1,4096-16
907	bne	Ldec_unaligned_ok
908
909Ldec_xpage:
910	lbz	$acc00,0($inp)
911	lbz	$acc01,1($inp)
912	lbz	$acc02,2($inp)
913	lbz	$s0,3($inp)
914	lbz	$acc04,4($inp)
915	lbz	$acc05,5($inp)
916	lbz	$acc06,6($inp)
917	lbz	$s1,7($inp)
918	lbz	$acc08,8($inp)
919	lbz	$acc09,9($inp)
920	lbz	$acc10,10($inp)
921	insrwi	$s0,$acc00,8,0
922	lbz	$s2,11($inp)
923	insrwi	$s1,$acc04,8,0
924	lbz	$acc12,12($inp)
925	insrwi	$s0,$acc01,8,8
926	lbz	$acc13,13($inp)
927	insrwi	$s1,$acc05,8,8
928	lbz	$acc14,14($inp)
929	insrwi	$s0,$acc02,8,16
930	lbz	$s3,15($inp)
931	insrwi	$s1,$acc06,8,16
932	insrwi	$s2,$acc08,8,0
933	insrwi	$s3,$acc12,8,0
934	insrwi	$s2,$acc09,8,8
935	insrwi	$s3,$acc13,8,8
936	insrwi	$s2,$acc10,8,16
937	insrwi	$s3,$acc14,8,16
938
939	bl	LAES_Td
940	bl	Lppc_AES_decrypt_compact
941	$POP	$out,`$FRAME-$SIZE_T*19`($sp)
942
943	extrwi	$acc00,$s0,8,0
944	extrwi	$acc01,$s0,8,8
945	stb	$acc00,0($out)
946	extrwi	$acc02,$s0,8,16
947	stb	$acc01,1($out)
948	stb	$acc02,2($out)
949	extrwi	$acc04,$s1,8,0
950	stb	$s0,3($out)
951	extrwi	$acc05,$s1,8,8
952	stb	$acc04,4($out)
953	extrwi	$acc06,$s1,8,16
954	stb	$acc05,5($out)
955	stb	$acc06,6($out)
956	extrwi	$acc08,$s2,8,0
957	stb	$s1,7($out)
958	extrwi	$acc09,$s2,8,8
959	stb	$acc08,8($out)
960	extrwi	$acc10,$s2,8,16
961	stb	$acc09,9($out)
962	stb	$acc10,10($out)
963	extrwi	$acc12,$s3,8,0
964	stb	$s2,11($out)
965	extrwi	$acc13,$s3,8,8
966	stb	$acc12,12($out)
967	extrwi	$acc14,$s3,8,16
968	stb	$acc13,13($out)
969	stb	$acc14,14($out)
970	stb	$s3,15($out)
971
972Ldec_done:
973	$POP	r0,`$FRAME+$LRSAVE`($sp)
974	$POP	r14,`$FRAME-$SIZE_T*18`($sp)
975	$POP	r15,`$FRAME-$SIZE_T*17`($sp)
976	$POP	r16,`$FRAME-$SIZE_T*16`($sp)
977	$POP	r17,`$FRAME-$SIZE_T*15`($sp)
978	$POP	r18,`$FRAME-$SIZE_T*14`($sp)
979	$POP	r19,`$FRAME-$SIZE_T*13`($sp)
980	$POP	r20,`$FRAME-$SIZE_T*12`($sp)
981	$POP	r21,`$FRAME-$SIZE_T*11`($sp)
982	$POP	r22,`$FRAME-$SIZE_T*10`($sp)
983	$POP	r23,`$FRAME-$SIZE_T*9`($sp)
984	$POP	r24,`$FRAME-$SIZE_T*8`($sp)
985	$POP	r25,`$FRAME-$SIZE_T*7`($sp)
986	$POP	r26,`$FRAME-$SIZE_T*6`($sp)
987	$POP	r27,`$FRAME-$SIZE_T*5`($sp)
988	$POP	r28,`$FRAME-$SIZE_T*4`($sp)
989	$POP	r29,`$FRAME-$SIZE_T*3`($sp)
990	$POP	r30,`$FRAME-$SIZE_T*2`($sp)
991	$POP	r31,`$FRAME-$SIZE_T*1`($sp)
992	mtlr	r0
993	addi	$sp,$sp,$FRAME
994	blr
995	.long	0
996	.byte	0,12,4,1,0x80,18,3,0
997	.long	0
998
999.align	5
1000Lppc_AES_decrypt:
1001	lwz	$acc00,240($key)
1002	addi	$Tbl1,$Tbl0,3
1003	lwz	$t0,0($key)
1004	addi	$Tbl2,$Tbl0,2
1005	lwz	$t1,4($key)
1006	addi	$Tbl3,$Tbl0,1
1007	lwz	$t2,8($key)
1008	addi	$acc00,$acc00,-1
1009	lwz	$t3,12($key)
1010	addi	$key,$key,16
1011	xor	$s0,$s0,$t0
1012	xor	$s1,$s1,$t1
1013	xor	$s2,$s2,$t2
1014	xor	$s3,$s3,$t3
1015	mtctr	$acc00
1016.align	4
1017Ldec_loop:
1018	rlwinm	$acc00,$s0,`32-24+3`,21,28
1019	rlwinm	$acc01,$s1,`32-24+3`,21,28
1020	rlwinm	$acc02,$s2,`32-24+3`,21,28
1021	rlwinm	$acc03,$s3,`32-24+3`,21,28
1022	lwz	$t0,0($key)
1023	rlwinm	$acc04,$s3,`32-16+3`,21,28
1024	lwz	$t1,4($key)
1025	rlwinm	$acc05,$s0,`32-16+3`,21,28
1026	lwz	$t2,8($key)
1027	rlwinm	$acc06,$s1,`32-16+3`,21,28
1028	lwz	$t3,12($key)
1029	rlwinm	$acc07,$s2,`32-16+3`,21,28
1030	lwzx	$acc00,$Tbl0,$acc00
1031	rlwinm	$acc08,$s2,`32-8+3`,21,28
1032	lwzx	$acc01,$Tbl0,$acc01
1033	rlwinm	$acc09,$s3,`32-8+3`,21,28
1034	lwzx	$acc02,$Tbl0,$acc02
1035	rlwinm	$acc10,$s0,`32-8+3`,21,28
1036	lwzx	$acc03,$Tbl0,$acc03
1037	rlwinm	$acc11,$s1,`32-8+3`,21,28
1038	lwzx	$acc04,$Tbl1,$acc04
1039	rlwinm	$acc12,$s1,`0+3`,21,28
1040	lwzx	$acc05,$Tbl1,$acc05
1041	rlwinm	$acc13,$s2,`0+3`,21,28
1042	lwzx	$acc06,$Tbl1,$acc06
1043	rlwinm	$acc14,$s3,`0+3`,21,28
1044	lwzx	$acc07,$Tbl1,$acc07
1045	rlwinm	$acc15,$s0,`0+3`,21,28
1046	lwzx	$acc08,$Tbl2,$acc08
1047	xor	$t0,$t0,$acc00
1048	lwzx	$acc09,$Tbl2,$acc09
1049	xor	$t1,$t1,$acc01
1050	lwzx	$acc10,$Tbl2,$acc10
1051	xor	$t2,$t2,$acc02
1052	lwzx	$acc11,$Tbl2,$acc11
1053	xor	$t3,$t3,$acc03
1054	lwzx	$acc12,$Tbl3,$acc12
1055	xor	$t0,$t0,$acc04
1056	lwzx	$acc13,$Tbl3,$acc13
1057	xor	$t1,$t1,$acc05
1058	lwzx	$acc14,$Tbl3,$acc14
1059	xor	$t2,$t2,$acc06
1060	lwzx	$acc15,$Tbl3,$acc15
1061	xor	$t3,$t3,$acc07
1062	xor	$t0,$t0,$acc08
1063	xor	$t1,$t1,$acc09
1064	xor	$t2,$t2,$acc10
1065	xor	$t3,$t3,$acc11
1066	xor	$s0,$t0,$acc12
1067	xor	$s1,$t1,$acc13
1068	xor	$s2,$t2,$acc14
1069	xor	$s3,$t3,$acc15
1070	addi	$key,$key,16
1071	bdnz	Ldec_loop
1072
1073	addi	$Tbl2,$Tbl0,2048
1074	nop
1075	lwz	$t0,0($key)
1076	rlwinm	$acc00,$s0,`32-24`,24,31
1077	lwz	$t1,4($key)
1078	rlwinm	$acc01,$s1,`32-24`,24,31
1079	lwz	$t2,8($key)
1080	rlwinm	$acc02,$s2,`32-24`,24,31
1081	lwz	$t3,12($key)
1082	rlwinm	$acc03,$s3,`32-24`,24,31
1083	lwz	$acc08,`2048+0`($Tbl0)	! prefetch Td4
1084	rlwinm	$acc04,$s3,`32-16`,24,31
1085	lwz	$acc09,`2048+32`($Tbl0)
1086	rlwinm	$acc05,$s0,`32-16`,24,31
1087	lwz	$acc10,`2048+64`($Tbl0)
1088	lbzx	$acc00,$Tbl2,$acc00
1089	lwz	$acc11,`2048+96`($Tbl0)
1090	lbzx	$acc01,$Tbl2,$acc01
1091	lwz	$acc12,`2048+128`($Tbl0)
1092	rlwinm	$acc06,$s1,`32-16`,24,31
1093	lwz	$acc13,`2048+160`($Tbl0)
1094	rlwinm	$acc07,$s2,`32-16`,24,31
1095	lwz	$acc14,`2048+192`($Tbl0)
1096	rlwinm	$acc08,$s2,`32-8`,24,31
1097	lwz	$acc15,`2048+224`($Tbl0)
1098	rlwinm	$acc09,$s3,`32-8`,24,31
1099	lbzx	$acc02,$Tbl2,$acc02
1100	rlwinm	$acc10,$s0,`32-8`,24,31
1101	lbzx	$acc03,$Tbl2,$acc03
1102	rlwinm	$acc11,$s1,`32-8`,24,31
1103	lbzx	$acc04,$Tbl2,$acc04
1104	rlwinm	$acc12,$s1,`0`,24,31
1105	lbzx	$acc05,$Tbl2,$acc05
1106	rlwinm	$acc13,$s2,`0`,24,31
1107	lbzx	$acc06,$Tbl2,$acc06
1108	rlwinm	$acc14,$s3,`0`,24,31
1109	lbzx	$acc07,$Tbl2,$acc07
1110	rlwinm	$acc15,$s0,`0`,24,31
1111	lbzx	$acc08,$Tbl2,$acc08
1112	rlwinm	$s0,$acc00,24,0,7
1113	lbzx	$acc09,$Tbl2,$acc09
1114	rlwinm	$s1,$acc01,24,0,7
1115	lbzx	$acc10,$Tbl2,$acc10
1116	rlwinm	$s2,$acc02,24,0,7
1117	lbzx	$acc11,$Tbl2,$acc11
1118	rlwinm	$s3,$acc03,24,0,7
1119	lbzx	$acc12,$Tbl2,$acc12
1120	rlwimi	$s0,$acc04,16,8,15
1121	lbzx	$acc13,$Tbl2,$acc13
1122	rlwimi	$s1,$acc05,16,8,15
1123	lbzx	$acc14,$Tbl2,$acc14
1124	rlwimi	$s2,$acc06,16,8,15
1125	lbzx	$acc15,$Tbl2,$acc15
1126	rlwimi	$s3,$acc07,16,8,15
1127	rlwimi	$s0,$acc08,8,16,23
1128	rlwimi	$s1,$acc09,8,16,23
1129	rlwimi	$s2,$acc10,8,16,23
1130	rlwimi	$s3,$acc11,8,16,23
1131	or	$s0,$s0,$acc12
1132	or	$s1,$s1,$acc13
1133	or	$s2,$s2,$acc14
1134	or	$s3,$s3,$acc15
1135	xor	$s0,$s0,$t0
1136	xor	$s1,$s1,$t1
1137	xor	$s2,$s2,$t2
1138	xor	$s3,$s3,$t3
1139	blr
1140	.long	0
1141	.byte	0,12,0x14,0,0,0,0,0
1142
1143.align	4
1144Lppc_AES_decrypt_compact:
1145	lwz	$acc00,240($key)
1146	addi	$Tbl1,$Tbl0,2048
1147	lwz	$t0,0($key)
1148	lis	$mask80,0x8080
1149	lwz	$t1,4($key)
1150	lis	$mask1b,0x1b1b
1151	lwz	$t2,8($key)
1152	ori	$mask80,$mask80,0x8080
1153	lwz	$t3,12($key)
1154	ori	$mask1b,$mask1b,0x1b1b
1155	addi	$key,$key,16
1156___
1157$code.=<<___ if ($SIZE_T==8);
1158	insrdi	$mask80,$mask80,32,0
1159	insrdi	$mask1b,$mask1b,32,0
1160___
1161$code.=<<___;
1162	mtctr	$acc00
1163.align	4
1164Ldec_compact_loop:
1165	xor	$s0,$s0,$t0
1166	xor	$s1,$s1,$t1
1167	rlwinm	$acc00,$s0,`32-24`,24,31
1168	xor	$s2,$s2,$t2
1169	rlwinm	$acc01,$s1,`32-24`,24,31
1170	xor	$s3,$s3,$t3
1171	rlwinm	$acc02,$s2,`32-24`,24,31
1172	rlwinm	$acc03,$s3,`32-24`,24,31
1173	rlwinm	$acc04,$s3,`32-16`,24,31
1174	rlwinm	$acc05,$s0,`32-16`,24,31
1175	rlwinm	$acc06,$s1,`32-16`,24,31
1176	rlwinm	$acc07,$s2,`32-16`,24,31
1177	lbzx	$acc00,$Tbl1,$acc00
1178	rlwinm	$acc08,$s2,`32-8`,24,31
1179	lbzx	$acc01,$Tbl1,$acc01
1180	rlwinm	$acc09,$s3,`32-8`,24,31
1181	lbzx	$acc02,$Tbl1,$acc02
1182	rlwinm	$acc10,$s0,`32-8`,24,31
1183	lbzx	$acc03,$Tbl1,$acc03
1184	rlwinm	$acc11,$s1,`32-8`,24,31
1185	lbzx	$acc04,$Tbl1,$acc04
1186	rlwinm	$acc12,$s1,`0`,24,31
1187	lbzx	$acc05,$Tbl1,$acc05
1188	rlwinm	$acc13,$s2,`0`,24,31
1189	lbzx	$acc06,$Tbl1,$acc06
1190	rlwinm	$acc14,$s3,`0`,24,31
1191	lbzx	$acc07,$Tbl1,$acc07
1192	rlwinm	$acc15,$s0,`0`,24,31
1193	lbzx	$acc08,$Tbl1,$acc08
1194	rlwinm	$s0,$acc00,24,0,7
1195	lbzx	$acc09,$Tbl1,$acc09
1196	rlwinm	$s1,$acc01,24,0,7
1197	lbzx	$acc10,$Tbl1,$acc10
1198	rlwinm	$s2,$acc02,24,0,7
1199	lbzx	$acc11,$Tbl1,$acc11
1200	rlwinm	$s3,$acc03,24,0,7
1201	lbzx	$acc12,$Tbl1,$acc12
1202	rlwimi	$s0,$acc04,16,8,15
1203	lbzx	$acc13,$Tbl1,$acc13
1204	rlwimi	$s1,$acc05,16,8,15
1205	lbzx	$acc14,$Tbl1,$acc14
1206	rlwimi	$s2,$acc06,16,8,15
1207	lbzx	$acc15,$Tbl1,$acc15
1208	rlwimi	$s3,$acc07,16,8,15
1209	rlwimi	$s0,$acc08,8,16,23
1210	rlwimi	$s1,$acc09,8,16,23
1211	rlwimi	$s2,$acc10,8,16,23
1212	rlwimi	$s3,$acc11,8,16,23
1213	lwz	$t0,0($key)
1214	or	$s0,$s0,$acc12
1215	lwz	$t1,4($key)
1216	or	$s1,$s1,$acc13
1217	lwz	$t2,8($key)
1218	or	$s2,$s2,$acc14
1219	lwz	$t3,12($key)
1220	or	$s3,$s3,$acc15
1221
1222	addi	$key,$key,16
1223	bdz	Ldec_compact_done
1224___
1225$code.=<<___ if ($SIZE_T==8);
1226	# vectorized permutation improves decrypt performance by 10%
1227	insrdi	$s0,$s1,32,0
1228	insrdi	$s2,$s3,32,0
1229
1230	and	$acc00,$s0,$mask80	# r1=r0&0x80808080
1231	and	$acc02,$s2,$mask80
1232	srdi	$acc04,$acc00,7		# r1>>7
1233	srdi	$acc06,$acc02,7
1234	andc	$acc08,$s0,$mask80	# r0&0x7f7f7f7f
1235	andc	$acc10,$s2,$mask80
1236	sub	$acc00,$acc00,$acc04	# r1-(r1>>7)
1237	sub	$acc02,$acc02,$acc06
1238	add	$acc08,$acc08,$acc08	# (r0&0x7f7f7f7f)<<1
1239	add	$acc10,$acc10,$acc10
1240	and	$acc00,$acc00,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
1241	and	$acc02,$acc02,$mask1b
1242	xor	$acc00,$acc00,$acc08	# r2
1243	xor	$acc02,$acc02,$acc10
1244
1245	and	$acc04,$acc00,$mask80	# r1=r2&0x80808080
1246	and	$acc06,$acc02,$mask80
1247	srdi	$acc08,$acc04,7		# r1>>7
1248	srdi	$acc10,$acc06,7
1249	andc	$acc12,$acc00,$mask80	# r2&0x7f7f7f7f
1250	andc	$acc14,$acc02,$mask80
1251	sub	$acc04,$acc04,$acc08	# r1-(r1>>7)
1252	sub	$acc06,$acc06,$acc10
1253	add	$acc12,$acc12,$acc12	# (r2&0x7f7f7f7f)<<1
1254	add	$acc14,$acc14,$acc14
1255	and	$acc04,$acc04,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
1256	and	$acc06,$acc06,$mask1b
1257	xor	$acc04,$acc04,$acc12	# r4
1258	xor	$acc06,$acc06,$acc14
1259
1260	and	$acc08,$acc04,$mask80	# r1=r4&0x80808080
1261	and	$acc10,$acc06,$mask80
1262	srdi	$acc12,$acc08,7		# r1>>7
1263	srdi	$acc14,$acc10,7
1264	sub	$acc08,$acc08,$acc12	# r1-(r1>>7)
1265	sub	$acc10,$acc10,$acc14
1266	andc	$acc12,$acc04,$mask80	# r4&0x7f7f7f7f
1267	andc	$acc14,$acc06,$mask80
1268	add	$acc12,$acc12,$acc12	# (r4&0x7f7f7f7f)<<1
1269	add	$acc14,$acc14,$acc14
1270	and	$acc08,$acc08,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
1271	and	$acc10,$acc10,$mask1b
1272	xor	$acc08,$acc08,$acc12	# r8
1273	xor	$acc10,$acc10,$acc14
1274
1275	xor	$acc00,$acc00,$s0	# r2^r0
1276	xor	$acc02,$acc02,$s2
1277	xor	$acc04,$acc04,$s0	# r4^r0
1278	xor	$acc06,$acc06,$s2
1279
1280	extrdi	$acc01,$acc00,32,0
1281	extrdi	$acc03,$acc02,32,0
1282	extrdi	$acc05,$acc04,32,0
1283	extrdi	$acc07,$acc06,32,0
1284	extrdi	$acc09,$acc08,32,0
1285	extrdi	$acc11,$acc10,32,0
1286___
1287$code.=<<___ if ($SIZE_T==4);
1288	and	$acc00,$s0,$mask80	# r1=r0&0x80808080
1289	and	$acc01,$s1,$mask80
1290	and	$acc02,$s2,$mask80
1291	and	$acc03,$s3,$mask80
1292	srwi	$acc04,$acc00,7		# r1>>7
1293	andc	$acc08,$s0,$mask80	# r0&0x7f7f7f7f
1294	srwi	$acc05,$acc01,7
1295	andc	$acc09,$s1,$mask80
1296	srwi	$acc06,$acc02,7
1297	andc	$acc10,$s2,$mask80
1298	srwi	$acc07,$acc03,7
1299	andc	$acc11,$s3,$mask80
1300	sub	$acc00,$acc00,$acc04	# r1-(r1>>7)
1301	sub	$acc01,$acc01,$acc05
1302	sub	$acc02,$acc02,$acc06
1303	sub	$acc03,$acc03,$acc07
1304	add	$acc08,$acc08,$acc08	# (r0&0x7f7f7f7f)<<1
1305	add	$acc09,$acc09,$acc09
1306	add	$acc10,$acc10,$acc10
1307	add	$acc11,$acc11,$acc11
1308	and	$acc00,$acc00,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
1309	and	$acc01,$acc01,$mask1b
1310	and	$acc02,$acc02,$mask1b
1311	and	$acc03,$acc03,$mask1b
1312	xor	$acc00,$acc00,$acc08	# r2
1313	xor	$acc01,$acc01,$acc09
1314	xor	$acc02,$acc02,$acc10
1315	xor	$acc03,$acc03,$acc11
1316
1317	and	$acc04,$acc00,$mask80	# r1=r2&0x80808080
1318	and	$acc05,$acc01,$mask80
1319	and	$acc06,$acc02,$mask80
1320	and	$acc07,$acc03,$mask80
1321	srwi	$acc08,$acc04,7		# r1>>7
1322	andc	$acc12,$acc00,$mask80	# r2&0x7f7f7f7f
1323	srwi	$acc09,$acc05,7
1324	andc	$acc13,$acc01,$mask80
1325	srwi	$acc10,$acc06,7
1326	andc	$acc14,$acc02,$mask80
1327	srwi	$acc11,$acc07,7
1328	andc	$acc15,$acc03,$mask80
1329	sub	$acc04,$acc04,$acc08	# r1-(r1>>7)
1330	sub	$acc05,$acc05,$acc09
1331	sub	$acc06,$acc06,$acc10
1332	sub	$acc07,$acc07,$acc11
1333	add	$acc12,$acc12,$acc12	# (r2&0x7f7f7f7f)<<1
1334	add	$acc13,$acc13,$acc13
1335	add	$acc14,$acc14,$acc14
1336	add	$acc15,$acc15,$acc15
1337	and	$acc04,$acc04,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
1338	and	$acc05,$acc05,$mask1b
1339	and	$acc06,$acc06,$mask1b
1340	and	$acc07,$acc07,$mask1b
1341	xor	$acc04,$acc04,$acc12	# r4
1342	xor	$acc05,$acc05,$acc13
1343	xor	$acc06,$acc06,$acc14
1344	xor	$acc07,$acc07,$acc15
1345
1346	and	$acc08,$acc04,$mask80	# r1=r4&0x80808080
1347	and	$acc09,$acc05,$mask80
1348	srwi	$acc12,$acc08,7		# r1>>7
1349	and	$acc10,$acc06,$mask80
1350	srwi	$acc13,$acc09,7
1351	and	$acc11,$acc07,$mask80
1352	srwi	$acc14,$acc10,7
1353	sub	$acc08,$acc08,$acc12	# r1-(r1>>7)
1354	srwi	$acc15,$acc11,7
1355	sub	$acc09,$acc09,$acc13
1356	sub	$acc10,$acc10,$acc14
1357	sub	$acc11,$acc11,$acc15
1358	andc	$acc12,$acc04,$mask80	# r4&0x7f7f7f7f
1359	andc	$acc13,$acc05,$mask80
1360	andc	$acc14,$acc06,$mask80
1361	andc	$acc15,$acc07,$mask80
1362	add	$acc12,$acc12,$acc12	# (r4&0x7f7f7f7f)<<1
1363	add	$acc13,$acc13,$acc13
1364	add	$acc14,$acc14,$acc14
1365	add	$acc15,$acc15,$acc15
1366	and	$acc08,$acc08,$mask1b	# (r1-(r1>>7))&0x1b1b1b1b
1367	and	$acc09,$acc09,$mask1b
1368	and	$acc10,$acc10,$mask1b
1369	and	$acc11,$acc11,$mask1b
1370	xor	$acc08,$acc08,$acc12	# r8
1371	xor	$acc09,$acc09,$acc13
1372	xor	$acc10,$acc10,$acc14
1373	xor	$acc11,$acc11,$acc15
1374
1375	xor	$acc00,$acc00,$s0	# r2^r0
1376	xor	$acc01,$acc01,$s1
1377	xor	$acc02,$acc02,$s2
1378	xor	$acc03,$acc03,$s3
1379	xor	$acc04,$acc04,$s0	# r4^r0
1380	xor	$acc05,$acc05,$s1
1381	xor	$acc06,$acc06,$s2
1382	xor	$acc07,$acc07,$s3
1383___
1384$code.=<<___;
1385	rotrwi	$s0,$s0,8		# = ROTATE(r0,8)
1386	rotrwi	$s1,$s1,8
1387	xor	$s0,$s0,$acc00		# ^= r2^r0
1388	rotrwi	$s2,$s2,8
1389	xor	$s1,$s1,$acc01
1390	rotrwi	$s3,$s3,8
1391	xor	$s2,$s2,$acc02
1392	xor	$s3,$s3,$acc03
1393	xor	$acc00,$acc00,$acc08
1394	xor	$acc01,$acc01,$acc09
1395	xor	$acc02,$acc02,$acc10
1396	xor	$acc03,$acc03,$acc11
1397	xor	$s0,$s0,$acc04		# ^= r4^r0
1398	rotrwi	$acc00,$acc00,24
1399	xor	$s1,$s1,$acc05
1400	rotrwi	$acc01,$acc01,24
1401	xor	$s2,$s2,$acc06
1402	rotrwi	$acc02,$acc02,24
1403	xor	$s3,$s3,$acc07
1404	rotrwi	$acc03,$acc03,24
1405	xor	$acc04,$acc04,$acc08
1406	xor	$acc05,$acc05,$acc09
1407	xor	$acc06,$acc06,$acc10
1408	xor	$acc07,$acc07,$acc11
1409	xor	$s0,$s0,$acc08		# ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)]
1410	rotrwi	$acc04,$acc04,16
1411	xor	$s1,$s1,$acc09
1412	rotrwi	$acc05,$acc05,16
1413	xor	$s2,$s2,$acc10
1414	rotrwi	$acc06,$acc06,16
1415	xor	$s3,$s3,$acc11
1416	rotrwi	$acc07,$acc07,16
1417	xor	$s0,$s0,$acc00		# ^= ROTATE(r8^r2^r0,24)
1418	rotrwi	$acc08,$acc08,8
1419	xor	$s1,$s1,$acc01
1420	rotrwi	$acc09,$acc09,8
1421	xor	$s2,$s2,$acc02
1422	rotrwi	$acc10,$acc10,8
1423	xor	$s3,$s3,$acc03
1424	rotrwi	$acc11,$acc11,8
1425	xor	$s0,$s0,$acc04		# ^= ROTATE(r8^r4^r0,16)
1426	xor	$s1,$s1,$acc05
1427	xor	$s2,$s2,$acc06
1428	xor	$s3,$s3,$acc07
1429	xor	$s0,$s0,$acc08		# ^= ROTATE(r8,8)
1430	xor	$s1,$s1,$acc09
1431	xor	$s2,$s2,$acc10
1432	xor	$s3,$s3,$acc11
1433
1434	b	Ldec_compact_loop
1435.align	4
1436Ldec_compact_done:
1437	xor	$s0,$s0,$t0
1438	xor	$s1,$s1,$t1
1439	xor	$s2,$s2,$t2
1440	xor	$s3,$s3,$t3
1441	blr
1442	.long	0
1443	.byte	0,12,0x14,0,0,0,0,0
1444.size	.AES_decrypt,.-.AES_decrypt
1445
1446.asciz	"AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
1447.align	7
1448___
1449
1450$code =~ s/\`([^\`]*)\`/eval $1/gem;
1451print $code;
1452close STDOUT;
1453