1#! /usr/bin/env perl
2# Copyright 2010-2020 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the OpenSSL license (the "License").  You may not use
5# this file except in compliance with the License.  You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
9
10# ====================================================================
11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12# project. The module is, however, dual licensed under OpenSSL and
13# CRYPTOGAMS licenses depending on where you obtain it. For further
14# details see http://www.openssl.org/~appro/cryptogams/.
15# ====================================================================
16
17# AES for MIPS
18
19# October 2010
20#
21# Code uses 1K[+256B] S-box and on single-issue core [such as R5000]
22# spends ~68 cycles per byte processed with 128-bit key. This is ~16%
23# faster than gcc-generated code, which is not very impressive. But
24# recall that compressed S-box requires extra processing, namely
25# additional rotations. Rotations are implemented with lwl/lwr pairs,
26# which is normally used for loading unaligned data. Another cool
27# thing about this module is its endian neutrality, which means that
28# it processes data without ever changing byte order...
29
30# September 2012
31#
32# Add MIPS32R2 (~10% less instructions) and SmartMIPS ASE (further
33# ~25% less instructions) code. Note that there is no run-time switch,
34# instead, code path is chosen upon pre-process time, pass -mips32r2
35# or/and -msmartmips.
36
37######################################################################
38# There is a number of MIPS ABI in use, O32 and N32/64 are most
39# widely used. Then there is a new contender: NUBI. It appears that if
40# one picks the latter, it's possible to arrange code in ABI neutral
41# manner. Therefore let's stick to NUBI register layout:
42#
43($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
44($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
45($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
46($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
47#
48# The return value is placed in $a0. Following coding rules facilitate
49# interoperability:
50#
51# - never ever touch $tp, "thread pointer", former $gp;
52# - copy return value to $t0, former $v0 [or to $a0 if you're adapting
53#   old code];
54# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
55#
56# For reference here is register layout for N32/64 MIPS ABIs:
57#
58# ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
59# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
60# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
61# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
62# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
63#
64$flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
65
66if ($flavour =~ /64|n32/i) {
67	$PTR_LA="dla";
68	$PTR_ADD="daddu";	# incidentally works even on n32
69	$PTR_SUB="dsubu";	# incidentally works even on n32
70	$PTR_INS="dins";
71	$REG_S="sd";
72	$REG_L="ld";
73	$PTR_SLL="dsll";	# incidentally works even on n32
74	$SZREG=8;
75} else {
76	$PTR_LA="la";
77	$PTR_ADD="addu";
78	$PTR_SUB="subu";
79	$PTR_INS="ins";
80	$REG_S="sw";
81	$REG_L="lw";
82	$PTR_SLL="sll";
83	$SZREG=4;
84}
85$pf = ($flavour =~ /nubi/i) ? $t0 : $t2;
86#
87# <appro@openssl.org>
88#
89######################################################################
90
91$big_endian=(`echo MIPSEB | $ENV{CC} -E -`=~/MIPSEB/)?0:1 if ($ENV{CC});
92
93for (@ARGV) {	$output=$_ if (/\w[\w\-]*\.\w+$/);	}
94open STDOUT,">$output";
95
96if (!defined($big_endian))
97{    $big_endian=(unpack('L',pack('N',1))==1);   }
98
99while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
100open STDOUT,">$output";
101
102my ($MSB,$LSB)=(0,3);	# automatically converted to little-endian
103
104$code.=<<___;
105#include "mips_arch.h"
106
107.text
108#if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__))
109.option	pic2
110#endif
111.set	noat
112___
113
114{{{
115my $FRAMESIZE=16*$SZREG;
116my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0xc0fff008" : "0xc0ff0000";
117
118my ($inp,$out,$key,$Tbl,$s0,$s1,$s2,$s3)=($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7);
119my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
120my ($t0,$t1,$t2,$t3,$t4,$t5,$t6,$t7,$t8,$t9,$t10,$t11) = map("\$$_",(12..23));
121my ($key0,$cnt)=($gp,$fp);
122
123# instruction ordering is "stolen" from output from MIPSpro assembler
124# invoked with -mips3 -O3 arguments...
125$code.=<<___;
126.align	5
127.ent	_mips_AES_encrypt
128_mips_AES_encrypt:
129	.frame	$sp,0,$ra
130	.set	reorder
131	lw	$t0,0($key)
132	lw	$t1,4($key)
133	lw	$t2,8($key)
134	lw	$t3,12($key)
135	lw	$cnt,240($key)
136	$PTR_ADD $key0,$key,16
137
138	xor	$s0,$t0
139	xor	$s1,$t1
140	xor	$s2,$t2
141	xor	$s3,$t3
142
143	subu	$cnt,1
144#if defined(__mips_smartmips)
145	ext	$i0,$s1,16,8
146.Loop_enc:
147	ext	$i1,$s2,16,8
148	ext	$i2,$s3,16,8
149	ext	$i3,$s0,16,8
150	lwxs	$t0,$i0($Tbl)		# Te1[s1>>16]
151	ext	$i0,$s2,8,8
152	lwxs	$t1,$i1($Tbl)		# Te1[s2>>16]
153	ext	$i1,$s3,8,8
154	lwxs	$t2,$i2($Tbl)		# Te1[s3>>16]
155	ext	$i2,$s0,8,8
156	lwxs	$t3,$i3($Tbl)		# Te1[s0>>16]
157	ext	$i3,$s1,8,8
158
159	lwxs	$t4,$i0($Tbl)		# Te2[s2>>8]
160	ext	$i0,$s3,0,8
161	lwxs	$t5,$i1($Tbl)		# Te2[s3>>8]
162	ext	$i1,$s0,0,8
163	lwxs	$t6,$i2($Tbl)		# Te2[s0>>8]
164	ext	$i2,$s1,0,8
165	lwxs	$t7,$i3($Tbl)		# Te2[s1>>8]
166	ext	$i3,$s2,0,8
167
168	lwxs	$t8,$i0($Tbl)		# Te3[s3]
169	ext	$i0,$s0,24,8
170	lwxs	$t9,$i1($Tbl)		# Te3[s0]
171	ext	$i1,$s1,24,8
172	lwxs	$t10,$i2($Tbl)		# Te3[s1]
173	ext	$i2,$s2,24,8
174	lwxs	$t11,$i3($Tbl)		# Te3[s2]
175	ext	$i3,$s3,24,8
176
177	rotr	$t0,$t0,8
178	rotr	$t1,$t1,8
179	rotr	$t2,$t2,8
180	rotr	$t3,$t3,8
181
182	rotr	$t4,$t4,16
183	rotr	$t5,$t5,16
184	rotr	$t6,$t6,16
185	rotr	$t7,$t7,16
186
187	xor	$t0,$t4
188	lwxs	$t4,$i0($Tbl)		# Te0[s0>>24]
189	xor	$t1,$t5
190	lwxs	$t5,$i1($Tbl)		# Te0[s1>>24]
191	xor	$t2,$t6
192	lwxs	$t6,$i2($Tbl)		# Te0[s2>>24]
193	xor	$t3,$t7
194	lwxs	$t7,$i3($Tbl)		# Te0[s3>>24]
195
196	rotr	$t8,$t8,24
197	lw	$s0,0($key0)
198	rotr	$t9,$t9,24
199	lw	$s1,4($key0)
200	rotr	$t10,$t10,24
201	lw	$s2,8($key0)
202	rotr	$t11,$t11,24
203	lw	$s3,12($key0)
204
205	xor	$t0,$t8
206	xor	$t1,$t9
207	xor	$t2,$t10
208	xor	$t3,$t11
209
210	xor	$t0,$t4
211	xor	$t1,$t5
212	xor	$t2,$t6
213	xor	$t3,$t7
214
215	subu	$cnt,1
216	$PTR_ADD $key0,16
217	xor	$s0,$t0
218	xor	$s1,$t1
219	xor	$s2,$t2
220	xor	$s3,$t3
221	.set	noreorder
222	bnez	$cnt,.Loop_enc
223	ext	$i0,$s1,16,8
224
225	_xtr	$i0,$s1,16-2
226#else
227	_xtr	$i0,$s1,16-2
228.Loop_enc:
229	_xtr	$i1,$s2,16-2
230	_xtr	$i2,$s3,16-2
231	_xtr	$i3,$s0,16-2
232	and	$i0,0x3fc
233	and	$i1,0x3fc
234	and	$i2,0x3fc
235	and	$i3,0x3fc
236	$PTR_ADD $i0,$Tbl
237	$PTR_ADD $i1,$Tbl
238	$PTR_ADD $i2,$Tbl
239	$PTR_ADD $i3,$Tbl
240#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
241	lw	$t0,0($i0)		# Te1[s1>>16]
242	_xtr	$i0,$s2,8-2
243	lw	$t1,0($i1)		# Te1[s2>>16]
244	_xtr	$i1,$s3,8-2
245	lw	$t2,0($i2)		# Te1[s3>>16]
246	_xtr	$i2,$s0,8-2
247	lw	$t3,0($i3)		# Te1[s0>>16]
248	_xtr	$i3,$s1,8-2
249#else
250	lwl	$t0,3($i0)		# Te1[s1>>16]
251	lwl	$t1,3($i1)		# Te1[s2>>16]
252	lwl	$t2,3($i2)		# Te1[s3>>16]
253	lwl	$t3,3($i3)		# Te1[s0>>16]
254	lwr	$t0,2($i0)		# Te1[s1>>16]
255	_xtr	$i0,$s2,8-2
256	lwr	$t1,2($i1)		# Te1[s2>>16]
257	_xtr	$i1,$s3,8-2
258	lwr	$t2,2($i2)		# Te1[s3>>16]
259	_xtr	$i2,$s0,8-2
260	lwr	$t3,2($i3)		# Te1[s0>>16]
261	_xtr	$i3,$s1,8-2
262#endif
263	and	$i0,0x3fc
264	and	$i1,0x3fc
265	and	$i2,0x3fc
266	and	$i3,0x3fc
267	$PTR_ADD $i0,$Tbl
268	$PTR_ADD $i1,$Tbl
269	$PTR_ADD $i2,$Tbl
270	$PTR_ADD $i3,$Tbl
271#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
272	rotr	$t0,$t0,8
273	rotr	$t1,$t1,8
274	rotr	$t2,$t2,8
275	rotr	$t3,$t3,8
276# if defined(_MIPSEL)
277	lw	$t4,0($i0)		# Te2[s2>>8]
278	_xtr	$i0,$s3,0-2
279	lw	$t5,0($i1)		# Te2[s3>>8]
280	_xtr	$i1,$s0,0-2
281	lw	$t6,0($i2)		# Te2[s0>>8]
282	_xtr	$i2,$s1,0-2
283	lw	$t7,0($i3)		# Te2[s1>>8]
284	_xtr	$i3,$s2,0-2
285
286	and	$i0,0x3fc
287	and	$i1,0x3fc
288	and	$i2,0x3fc
289	and	$i3,0x3fc
290	$PTR_ADD $i0,$Tbl
291	$PTR_ADD $i1,$Tbl
292	$PTR_ADD $i2,$Tbl
293	$PTR_ADD $i3,$Tbl
294	lw	$t8,0($i0)		# Te3[s3]
295	$PTR_INS $i0,$s0,2,8
296	lw	$t9,0($i1)		# Te3[s0]
297	$PTR_INS $i1,$s1,2,8
298	lw	$t10,0($i2)		# Te3[s1]
299	$PTR_INS $i2,$s2,2,8
300	lw	$t11,0($i3)		# Te3[s2]
301	$PTR_INS $i3,$s3,2,8
302# else
303	lw	$t4,0($i0)		# Te2[s2>>8]
304	$PTR_INS $i0,$s3,2,8
305	lw	$t5,0($i1)		# Te2[s3>>8]
306	$PTR_INS $i1,$s0,2,8
307	lw	$t6,0($i2)		# Te2[s0>>8]
308	$PTR_INS $i2,$s1,2,8
309	lw	$t7,0($i3)		# Te2[s1>>8]
310	$PTR_INS $i3,$s2,2,8
311
312	lw	$t8,0($i0)		# Te3[s3]
313	_xtr	$i0,$s0,24-2
314	lw	$t9,0($i1)		# Te3[s0]
315	_xtr	$i1,$s1,24-2
316	lw	$t10,0($i2)		# Te3[s1]
317	_xtr	$i2,$s2,24-2
318	lw	$t11,0($i3)		# Te3[s2]
319	_xtr	$i3,$s3,24-2
320
321	and	$i0,0x3fc
322	and	$i1,0x3fc
323	and	$i2,0x3fc
324	and	$i3,0x3fc
325	$PTR_ADD $i0,$Tbl
326	$PTR_ADD $i1,$Tbl
327	$PTR_ADD $i2,$Tbl
328	$PTR_ADD $i3,$Tbl
329# endif
330	rotr	$t4,$t4,16
331	rotr	$t5,$t5,16
332	rotr	$t6,$t6,16
333	rotr	$t7,$t7,16
334
335	rotr	$t8,$t8,24
336	rotr	$t9,$t9,24
337	rotr	$t10,$t10,24
338	rotr	$t11,$t11,24
339#else
340	lwl	$t4,2($i0)		# Te2[s2>>8]
341	lwl	$t5,2($i1)		# Te2[s3>>8]
342	lwl	$t6,2($i2)		# Te2[s0>>8]
343	lwl	$t7,2($i3)		# Te2[s1>>8]
344	lwr	$t4,1($i0)		# Te2[s2>>8]
345	_xtr	$i0,$s3,0-2
346	lwr	$t5,1($i1)		# Te2[s3>>8]
347	_xtr	$i1,$s0,0-2
348	lwr	$t6,1($i2)		# Te2[s0>>8]
349	_xtr	$i2,$s1,0-2
350	lwr	$t7,1($i3)		# Te2[s1>>8]
351	_xtr	$i3,$s2,0-2
352
353	and	$i0,0x3fc
354	and	$i1,0x3fc
355	and	$i2,0x3fc
356	and	$i3,0x3fc
357	$PTR_ADD $i0,$Tbl
358	$PTR_ADD $i1,$Tbl
359	$PTR_ADD $i2,$Tbl
360	$PTR_ADD $i3,$Tbl
361	lwl	$t8,1($i0)		# Te3[s3]
362	lwl	$t9,1($i1)		# Te3[s0]
363	lwl	$t10,1($i2)		# Te3[s1]
364	lwl	$t11,1($i3)		# Te3[s2]
365	lwr	$t8,0($i0)		# Te3[s3]
366	_xtr	$i0,$s0,24-2
367	lwr	$t9,0($i1)		# Te3[s0]
368	_xtr	$i1,$s1,24-2
369	lwr	$t10,0($i2)		# Te3[s1]
370	_xtr	$i2,$s2,24-2
371	lwr	$t11,0($i3)		# Te3[s2]
372	_xtr	$i3,$s3,24-2
373
374	and	$i0,0x3fc
375	and	$i1,0x3fc
376	and	$i2,0x3fc
377	and	$i3,0x3fc
378	$PTR_ADD $i0,$Tbl
379	$PTR_ADD $i1,$Tbl
380	$PTR_ADD $i2,$Tbl
381	$PTR_ADD $i3,$Tbl
382#endif
383	xor	$t0,$t4
384	lw	$t4,0($i0)		# Te0[s0>>24]
385	xor	$t1,$t5
386	lw	$t5,0($i1)		# Te0[s1>>24]
387	xor	$t2,$t6
388	lw	$t6,0($i2)		# Te0[s2>>24]
389	xor	$t3,$t7
390	lw	$t7,0($i3)		# Te0[s3>>24]
391
392	xor	$t0,$t8
393	lw	$s0,0($key0)
394	xor	$t1,$t9
395	lw	$s1,4($key0)
396	xor	$t2,$t10
397	lw	$s2,8($key0)
398	xor	$t3,$t11
399	lw	$s3,12($key0)
400
401	xor	$t0,$t4
402	xor	$t1,$t5
403	xor	$t2,$t6
404	xor	$t3,$t7
405
406	subu	$cnt,1
407	$PTR_ADD $key0,16
408	xor	$s0,$t0
409	xor	$s1,$t1
410	xor	$s2,$t2
411	xor	$s3,$t3
412	.set	noreorder
413	bnez	$cnt,.Loop_enc
414	_xtr	$i0,$s1,16-2
415#endif
416
417	.set	reorder
418	_xtr	$i1,$s2,16-2
419	_xtr	$i2,$s3,16-2
420	_xtr	$i3,$s0,16-2
421	and	$i0,0x3fc
422	and	$i1,0x3fc
423	and	$i2,0x3fc
424	and	$i3,0x3fc
425	$PTR_ADD $i0,$Tbl
426	$PTR_ADD $i1,$Tbl
427	$PTR_ADD $i2,$Tbl
428	$PTR_ADD $i3,$Tbl
429	lbu	$t0,2($i0)		# Te4[s1>>16]
430	_xtr	$i0,$s2,8-2
431	lbu	$t1,2($i1)		# Te4[s2>>16]
432	_xtr	$i1,$s3,8-2
433	lbu	$t2,2($i2)		# Te4[s3>>16]
434	_xtr	$i2,$s0,8-2
435	lbu	$t3,2($i3)		# Te4[s0>>16]
436	_xtr	$i3,$s1,8-2
437
438	and	$i0,0x3fc
439	and	$i1,0x3fc
440	and	$i2,0x3fc
441	and	$i3,0x3fc
442	$PTR_ADD $i0,$Tbl
443	$PTR_ADD $i1,$Tbl
444	$PTR_ADD $i2,$Tbl
445	$PTR_ADD $i3,$Tbl
446#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
447# if defined(_MIPSEL)
448	lbu	$t4,2($i0)		# Te4[s2>>8]
449	$PTR_INS $i0,$s0,2,8
450	lbu	$t5,2($i1)		# Te4[s3>>8]
451	$PTR_INS $i1,$s1,2,8
452	lbu	$t6,2($i2)		# Te4[s0>>8]
453	$PTR_INS $i2,$s2,2,8
454	lbu	$t7,2($i3)		# Te4[s1>>8]
455	$PTR_INS $i3,$s3,2,8
456
457	lbu	$t8,2($i0)		# Te4[s0>>24]
458	_xtr	$i0,$s3,0-2
459	lbu	$t9,2($i1)		# Te4[s1>>24]
460	_xtr	$i1,$s0,0-2
461	lbu	$t10,2($i2)		# Te4[s2>>24]
462	_xtr	$i2,$s1,0-2
463	lbu	$t11,2($i3)		# Te4[s3>>24]
464	_xtr	$i3,$s2,0-2
465
466	and	$i0,0x3fc
467	and	$i1,0x3fc
468	and	$i2,0x3fc
469	and	$i3,0x3fc
470	$PTR_ADD $i0,$Tbl
471	$PTR_ADD $i1,$Tbl
472	$PTR_ADD $i2,$Tbl
473	$PTR_ADD $i3,$Tbl
474# else
475	lbu	$t4,2($i0)		# Te4[s2>>8]
476	_xtr	$i0,$s0,24-2
477	lbu	$t5,2($i1)		# Te4[s3>>8]
478	_xtr	$i1,$s1,24-2
479	lbu	$t6,2($i2)		# Te4[s0>>8]
480	_xtr	$i2,$s2,24-2
481	lbu	$t7,2($i3)		# Te4[s1>>8]
482	_xtr	$i3,$s3,24-2
483
484	and	$i0,0x3fc
485	and	$i1,0x3fc
486	and	$i2,0x3fc
487	and	$i3,0x3fc
488	$PTR_ADD $i0,$Tbl
489	$PTR_ADD $i1,$Tbl
490	$PTR_ADD $i2,$Tbl
491	$PTR_ADD $i3,$Tbl
492	lbu	$t8,2($i0)		# Te4[s0>>24]
493	$PTR_INS $i0,$s3,2,8
494	lbu	$t9,2($i1)		# Te4[s1>>24]
495	$PTR_INS $i1,$s0,2,8
496	lbu	$t10,2($i2)		# Te4[s2>>24]
497	$PTR_INS $i2,$s1,2,8
498	lbu	$t11,2($i3)		# Te4[s3>>24]
499	$PTR_INS $i3,$s2,2,8
500# endif
501	_ins	$t0,16
502	_ins	$t1,16
503	_ins	$t2,16
504	_ins	$t3,16
505
506	_ins2	$t0,$t4,8
507	lbu	$t4,2($i0)		# Te4[s3]
508	_ins2	$t1,$t5,8
509	lbu	$t5,2($i1)		# Te4[s0]
510	_ins2	$t2,$t6,8
511	lbu	$t6,2($i2)		# Te4[s1]
512	_ins2	$t3,$t7,8
513	lbu	$t7,2($i3)		# Te4[s2]
514
515	_ins2	$t0,$t8,24
516	lw	$s0,0($key0)
517	_ins2	$t1,$t9,24
518	lw	$s1,4($key0)
519	_ins2	$t2,$t10,24
520	lw	$s2,8($key0)
521	_ins2	$t3,$t11,24
522	lw	$s3,12($key0)
523
524	_ins2	$t0,$t4,0
525	_ins2	$t1,$t5,0
526	_ins2	$t2,$t6,0
527	_ins2	$t3,$t7,0
528#else
529	lbu	$t4,2($i0)		# Te4[s2>>8]
530	_xtr	$i0,$s0,24-2
531	lbu	$t5,2($i1)		# Te4[s3>>8]
532	_xtr	$i1,$s1,24-2
533	lbu	$t6,2($i2)		# Te4[s0>>8]
534	_xtr	$i2,$s2,24-2
535	lbu	$t7,2($i3)		# Te4[s1>>8]
536	_xtr	$i3,$s3,24-2
537
538	and	$i0,0x3fc
539	and	$i1,0x3fc
540	and	$i2,0x3fc
541	and	$i3,0x3fc
542	$PTR_ADD $i0,$Tbl
543	$PTR_ADD $i1,$Tbl
544	$PTR_ADD $i2,$Tbl
545	$PTR_ADD $i3,$Tbl
546	lbu	$t8,2($i0)		# Te4[s0>>24]
547	_xtr	$i0,$s3,0-2
548	lbu	$t9,2($i1)		# Te4[s1>>24]
549	_xtr	$i1,$s0,0-2
550	lbu	$t10,2($i2)		# Te4[s2>>24]
551	_xtr	$i2,$s1,0-2
552	lbu	$t11,2($i3)		# Te4[s3>>24]
553	_xtr	$i3,$s2,0-2
554
555	and	$i0,0x3fc
556	and	$i1,0x3fc
557	and	$i2,0x3fc
558	and	$i3,0x3fc
559	$PTR_ADD $i0,$Tbl
560	$PTR_ADD $i1,$Tbl
561	$PTR_ADD $i2,$Tbl
562	$PTR_ADD $i3,$Tbl
563
564	_ins	$t0,16
565	_ins	$t1,16
566	_ins	$t2,16
567	_ins	$t3,16
568
569	_ins	$t4,8
570	_ins	$t5,8
571	_ins	$t6,8
572	_ins	$t7,8
573
574	xor	$t0,$t4
575	lbu	$t4,2($i0)		# Te4[s3]
576	xor	$t1,$t5
577	lbu	$t5,2($i1)		# Te4[s0]
578	xor	$t2,$t6
579	lbu	$t6,2($i2)		# Te4[s1]
580	xor	$t3,$t7
581	lbu	$t7,2($i3)		# Te4[s2]
582
583	_ins	$t8,24
584	lw	$s0,0($key0)
585	_ins	$t9,24
586	lw	$s1,4($key0)
587	_ins	$t10,24
588	lw	$s2,8($key0)
589	_ins	$t11,24
590	lw	$s3,12($key0)
591
592	xor	$t0,$t8
593	xor	$t1,$t9
594	xor	$t2,$t10
595	xor	$t3,$t11
596
597	_ins	$t4,0
598	_ins	$t5,0
599	_ins	$t6,0
600	_ins	$t7,0
601
602	xor	$t0,$t4
603	xor	$t1,$t5
604	xor	$t2,$t6
605	xor	$t3,$t7
606#endif
607	xor	$s0,$t0
608	xor	$s1,$t1
609	xor	$s2,$t2
610	xor	$s3,$t3
611
612	jr	$ra
613.end	_mips_AES_encrypt
614
615.align	5
616.globl	AES_encrypt
617.ent	AES_encrypt
618AES_encrypt:
619	.frame	$sp,$FRAMESIZE,$ra
620	.mask	$SAVED_REGS_MASK,-$SZREG
621	.set	noreorder
622___
623$code.=<<___ if ($flavour =~ /o32/i);	# o32 PIC-ification
624	.cpload	$pf
625___
626$code.=<<___;
627	$PTR_SUB $sp,$FRAMESIZE
628	$REG_S	$ra,$FRAMESIZE-1*$SZREG($sp)
629	$REG_S	$fp,$FRAMESIZE-2*$SZREG($sp)
630	$REG_S	$s11,$FRAMESIZE-3*$SZREG($sp)
631	$REG_S	$s10,$FRAMESIZE-4*$SZREG($sp)
632	$REG_S	$s9,$FRAMESIZE-5*$SZREG($sp)
633	$REG_S	$s8,$FRAMESIZE-6*$SZREG($sp)
634	$REG_S	$s7,$FRAMESIZE-7*$SZREG($sp)
635	$REG_S	$s6,$FRAMESIZE-8*$SZREG($sp)
636	$REG_S	$s5,$FRAMESIZE-9*$SZREG($sp)
637	$REG_S	$s4,$FRAMESIZE-10*$SZREG($sp)
638___
639$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
640	$REG_S	\$15,$FRAMESIZE-11*$SZREG($sp)
641	$REG_S	\$14,$FRAMESIZE-12*$SZREG($sp)
642	$REG_S	\$13,$FRAMESIZE-13*$SZREG($sp)
643	$REG_S	\$12,$FRAMESIZE-14*$SZREG($sp)
644	$REG_S	$gp,$FRAMESIZE-15*$SZREG($sp)
645___
646$code.=<<___ if ($flavour !~ /o32/i);	# non-o32 PIC-ification
647	.cplocal	$Tbl
648	.cpsetup	$pf,$zero,AES_encrypt
649___
650$code.=<<___;
651	.set	reorder
652	$PTR_LA	$Tbl,AES_Te		# PIC-ified 'load address'
653
654#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
655	lw	$s0,0($inp)
656	lw	$s1,4($inp)
657	lw	$s2,8($inp)
658	lw	$s3,12($inp)
659#else
660	lwl	$s0,0+$MSB($inp)
661	lwl	$s1,4+$MSB($inp)
662	lwl	$s2,8+$MSB($inp)
663	lwl	$s3,12+$MSB($inp)
664	lwr	$s0,0+$LSB($inp)
665	lwr	$s1,4+$LSB($inp)
666	lwr	$s2,8+$LSB($inp)
667	lwr	$s3,12+$LSB($inp)
668#endif
669
670	bal	_mips_AES_encrypt
671
672#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
673	sw	$s0,0($out)
674	sw	$s1,4($out)
675	sw	$s2,8($out)
676	sw	$s3,12($out)
677#else
678	swr	$s0,0+$LSB($out)
679	swr	$s1,4+$LSB($out)
680	swr	$s2,8+$LSB($out)
681	swr	$s3,12+$LSB($out)
682	swl	$s0,0+$MSB($out)
683	swl	$s1,4+$MSB($out)
684	swl	$s2,8+$MSB($out)
685	swl	$s3,12+$MSB($out)
686#endif
687
688	.set	noreorder
689	$REG_L	$ra,$FRAMESIZE-1*$SZREG($sp)
690	$REG_L	$fp,$FRAMESIZE-2*$SZREG($sp)
691	$REG_L	$s11,$FRAMESIZE-3*$SZREG($sp)
692	$REG_L	$s10,$FRAMESIZE-4*$SZREG($sp)
693	$REG_L	$s9,$FRAMESIZE-5*$SZREG($sp)
694	$REG_L	$s8,$FRAMESIZE-6*$SZREG($sp)
695	$REG_L	$s7,$FRAMESIZE-7*$SZREG($sp)
696	$REG_L	$s6,$FRAMESIZE-8*$SZREG($sp)
697	$REG_L	$s5,$FRAMESIZE-9*$SZREG($sp)
698	$REG_L	$s4,$FRAMESIZE-10*$SZREG($sp)
699___
700$code.=<<___ if ($flavour =~ /nubi/i);
701	$REG_L	\$15,$FRAMESIZE-11*$SZREG($sp)
702	$REG_L	\$14,$FRAMESIZE-12*$SZREG($sp)
703	$REG_L	\$13,$FRAMESIZE-13*$SZREG($sp)
704	$REG_L	\$12,$FRAMESIZE-14*$SZREG($sp)
705	$REG_L	$gp,$FRAMESIZE-15*$SZREG($sp)
706___
707$code.=<<___;
708	jr	$ra
709	$PTR_ADD $sp,$FRAMESIZE
710.end	AES_encrypt
711___
712
713$code.=<<___;
714.align	5
715.ent	_mips_AES_decrypt
716_mips_AES_decrypt:
717	.frame	$sp,0,$ra
718	.set	reorder
719	lw	$t0,0($key)
720	lw	$t1,4($key)
721	lw	$t2,8($key)
722	lw	$t3,12($key)
723	lw	$cnt,240($key)
724	$PTR_ADD $key0,$key,16
725
726	xor	$s0,$t0
727	xor	$s1,$t1
728	xor	$s2,$t2
729	xor	$s3,$t3
730
731	subu	$cnt,1
732#if defined(__mips_smartmips)
733	ext	$i0,$s3,16,8
734.Loop_dec:
735	ext	$i1,$s0,16,8
736	ext	$i2,$s1,16,8
737	ext	$i3,$s2,16,8
738	lwxs	$t0,$i0($Tbl)		# Td1[s3>>16]
739	ext	$i0,$s2,8,8
740	lwxs	$t1,$i1($Tbl)		# Td1[s0>>16]
741	ext	$i1,$s3,8,8
742	lwxs	$t2,$i2($Tbl)		# Td1[s1>>16]
743	ext	$i2,$s0,8,8
744	lwxs	$t3,$i3($Tbl)		# Td1[s2>>16]
745	ext	$i3,$s1,8,8
746
747	lwxs	$t4,$i0($Tbl)		# Td2[s2>>8]
748	ext	$i0,$s1,0,8
749	lwxs	$t5,$i1($Tbl)		# Td2[s3>>8]
750	ext	$i1,$s2,0,8
751	lwxs	$t6,$i2($Tbl)		# Td2[s0>>8]
752	ext	$i2,$s3,0,8
753	lwxs	$t7,$i3($Tbl)		# Td2[s1>>8]
754	ext	$i3,$s0,0,8
755
756	lwxs	$t8,$i0($Tbl)		# Td3[s1]
757	ext	$i0,$s0,24,8
758	lwxs	$t9,$i1($Tbl)		# Td3[s2]
759	ext	$i1,$s1,24,8
760	lwxs	$t10,$i2($Tbl)		# Td3[s3]
761	ext	$i2,$s2,24,8
762	lwxs	$t11,$i3($Tbl)		# Td3[s0]
763	ext	$i3,$s3,24,8
764
765	rotr	$t0,$t0,8
766	rotr	$t1,$t1,8
767	rotr	$t2,$t2,8
768	rotr	$t3,$t3,8
769
770	rotr	$t4,$t4,16
771	rotr	$t5,$t5,16
772	rotr	$t6,$t6,16
773	rotr	$t7,$t7,16
774
775	xor	$t0,$t4
776	lwxs	$t4,$i0($Tbl)		# Td0[s0>>24]
777	xor	$t1,$t5
778	lwxs	$t5,$i1($Tbl)		# Td0[s1>>24]
779	xor	$t2,$t6
780	lwxs	$t6,$i2($Tbl)		# Td0[s2>>24]
781	xor	$t3,$t7
782	lwxs	$t7,$i3($Tbl)		# Td0[s3>>24]
783
784	rotr	$t8,$t8,24
785	lw	$s0,0($key0)
786	rotr	$t9,$t9,24
787	lw	$s1,4($key0)
788	rotr	$t10,$t10,24
789	lw	$s2,8($key0)
790	rotr	$t11,$t11,24
791	lw	$s3,12($key0)
792
793	xor	$t0,$t8
794	xor	$t1,$t9
795	xor	$t2,$t10
796	xor	$t3,$t11
797
798	xor	$t0,$t4
799	xor	$t1,$t5
800	xor	$t2,$t6
801	xor	$t3,$t7
802
803	subu	$cnt,1
804	$PTR_ADD $key0,16
805	xor	$s0,$t0
806	xor	$s1,$t1
807	xor	$s2,$t2
808	xor	$s3,$t3
809	.set	noreorder
810	bnez	$cnt,.Loop_dec
811	ext	$i0,$s3,16,8
812
813	_xtr	$i0,$s3,16-2
814#else
815	_xtr	$i0,$s3,16-2
816.Loop_dec:
817	_xtr	$i1,$s0,16-2
818	_xtr	$i2,$s1,16-2
819	_xtr	$i3,$s2,16-2
820	and	$i0,0x3fc
821	and	$i1,0x3fc
822	and	$i2,0x3fc
823	and	$i3,0x3fc
824	$PTR_ADD $i0,$Tbl
825	$PTR_ADD $i1,$Tbl
826	$PTR_ADD $i2,$Tbl
827	$PTR_ADD $i3,$Tbl
828#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
829	lw	$t0,0($i0)		# Td1[s3>>16]
830	_xtr	$i0,$s2,8-2
831	lw	$t1,0($i1)		# Td1[s0>>16]
832	_xtr	$i1,$s3,8-2
833	lw	$t2,0($i2)		# Td1[s1>>16]
834	_xtr	$i2,$s0,8-2
835	lw	$t3,0($i3)		# Td1[s2>>16]
836	_xtr	$i3,$s1,8-2
837#else
838	lwl	$t0,3($i0)		# Td1[s3>>16]
839	lwl	$t1,3($i1)		# Td1[s0>>16]
840	lwl	$t2,3($i2)		# Td1[s1>>16]
841	lwl	$t3,3($i3)		# Td1[s2>>16]
842	lwr	$t0,2($i0)		# Td1[s3>>16]
843	_xtr	$i0,$s2,8-2
844	lwr	$t1,2($i1)		# Td1[s0>>16]
845	_xtr	$i1,$s3,8-2
846	lwr	$t2,2($i2)		# Td1[s1>>16]
847	_xtr	$i2,$s0,8-2
848	lwr	$t3,2($i3)		# Td1[s2>>16]
849	_xtr	$i3,$s1,8-2
850#endif
851
852	and	$i0,0x3fc
853	and	$i1,0x3fc
854	and	$i2,0x3fc
855	and	$i3,0x3fc
856	$PTR_ADD $i0,$Tbl
857	$PTR_ADD $i1,$Tbl
858	$PTR_ADD $i2,$Tbl
859	$PTR_ADD $i3,$Tbl
860#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
861	rotr	$t0,$t0,8
862	rotr	$t1,$t1,8
863	rotr	$t2,$t2,8
864	rotr	$t3,$t3,8
865# if defined(_MIPSEL)
866	lw	$t4,0($i0)		# Td2[s2>>8]
867	_xtr	$i0,$s1,0-2
868	lw	$t5,0($i1)		# Td2[s3>>8]
869	_xtr	$i1,$s2,0-2
870	lw	$t6,0($i2)		# Td2[s0>>8]
871	_xtr	$i2,$s3,0-2
872	lw	$t7,0($i3)		# Td2[s1>>8]
873	_xtr	$i3,$s0,0-2
874
875	and	$i0,0x3fc
876	and	$i1,0x3fc
877	and	$i2,0x3fc
878	and	$i3,0x3fc
879	$PTR_ADD $i0,$Tbl
880	$PTR_ADD $i1,$Tbl
881	$PTR_ADD $i2,$Tbl
882	$PTR_ADD $i3,$Tbl
883	lw	$t8,0($i0)		# Td3[s1]
884	$PTR_INS $i0,$s0,2,8
885	lw	$t9,0($i1)		# Td3[s2]
886	$PTR_INS $i1,$s1,2,8
887	lw	$t10,0($i2)		# Td3[s3]
888	$PTR_INS $i2,$s2,2,8
889	lw	$t11,0($i3)		# Td3[s0]
890	$PTR_INS $i3,$s3,2,8
891#else
892	lw	$t4,0($i0)		# Td2[s2>>8]
893	$PTR_INS $i0,$s1,2,8
894	lw	$t5,0($i1)		# Td2[s3>>8]
895	$PTR_INS $i1,$s2,2,8
896	lw	$t6,0($i2)		# Td2[s0>>8]
897	$PTR_INS $i2,$s3,2,8
898	lw	$t7,0($i3)		# Td2[s1>>8]
899	$PTR_INS $i3,$s0,2,8
900
901	lw	$t8,0($i0)		# Td3[s1]
902	_xtr	$i0,$s0,24-2
903	lw	$t9,0($i1)		# Td3[s2]
904	_xtr	$i1,$s1,24-2
905	lw	$t10,0($i2)		# Td3[s3]
906	_xtr	$i2,$s2,24-2
907	lw	$t11,0($i3)		# Td3[s0]
908	_xtr	$i3,$s3,24-2
909
910	and	$i0,0x3fc
911	and	$i1,0x3fc
912	and	$i2,0x3fc
913	and	$i3,0x3fc
914	$PTR_ADD $i0,$Tbl
915	$PTR_ADD $i1,$Tbl
916	$PTR_ADD $i2,$Tbl
917	$PTR_ADD $i3,$Tbl
918#endif
919	rotr	$t4,$t4,16
920	rotr	$t5,$t5,16
921	rotr	$t6,$t6,16
922	rotr	$t7,$t7,16
923
924	rotr	$t8,$t8,24
925	rotr	$t9,$t9,24
926	rotr	$t10,$t10,24
927	rotr	$t11,$t11,24
928#else
929	lwl	$t4,2($i0)		# Td2[s2>>8]
930	lwl	$t5,2($i1)		# Td2[s3>>8]
931	lwl	$t6,2($i2)		# Td2[s0>>8]
932	lwl	$t7,2($i3)		# Td2[s1>>8]
933	lwr	$t4,1($i0)		# Td2[s2>>8]
934	_xtr	$i0,$s1,0-2
935	lwr	$t5,1($i1)		# Td2[s3>>8]
936	_xtr	$i1,$s2,0-2
937	lwr	$t6,1($i2)		# Td2[s0>>8]
938	_xtr	$i2,$s3,0-2
939	lwr	$t7,1($i3)		# Td2[s1>>8]
940	_xtr	$i3,$s0,0-2
941
942	and	$i0,0x3fc
943	and	$i1,0x3fc
944	and	$i2,0x3fc
945	and	$i3,0x3fc
946	$PTR_ADD $i0,$Tbl
947	$PTR_ADD $i1,$Tbl
948	$PTR_ADD $i2,$Tbl
949	$PTR_ADD $i3,$Tbl
950	lwl	$t8,1($i0)		# Td3[s1]
951	lwl	$t9,1($i1)		# Td3[s2]
952	lwl	$t10,1($i2)		# Td3[s3]
953	lwl	$t11,1($i3)		# Td3[s0]
954	lwr	$t8,0($i0)		# Td3[s1]
955	_xtr	$i0,$s0,24-2
956	lwr	$t9,0($i1)		# Td3[s2]
957	_xtr	$i1,$s1,24-2
958	lwr	$t10,0($i2)		# Td3[s3]
959	_xtr	$i2,$s2,24-2
960	lwr	$t11,0($i3)		# Td3[s0]
961	_xtr	$i3,$s3,24-2
962
963	and	$i0,0x3fc
964	and	$i1,0x3fc
965	and	$i2,0x3fc
966	and	$i3,0x3fc
967	$PTR_ADD $i0,$Tbl
968	$PTR_ADD $i1,$Tbl
969	$PTR_ADD $i2,$Tbl
970	$PTR_ADD $i3,$Tbl
971#endif
972
973	xor	$t0,$t4
974	lw	$t4,0($i0)		# Td0[s0>>24]
975	xor	$t1,$t5
976	lw	$t5,0($i1)		# Td0[s1>>24]
977	xor	$t2,$t6
978	lw	$t6,0($i2)		# Td0[s2>>24]
979	xor	$t3,$t7
980	lw	$t7,0($i3)		# Td0[s3>>24]
981
982	xor	$t0,$t8
983	lw	$s0,0($key0)
984	xor	$t1,$t9
985	lw	$s1,4($key0)
986	xor	$t2,$t10
987	lw	$s2,8($key0)
988	xor	$t3,$t11
989	lw	$s3,12($key0)
990
991	xor	$t0,$t4
992	xor	$t1,$t5
993	xor	$t2,$t6
994	xor	$t3,$t7
995
996	subu	$cnt,1
997	$PTR_ADD $key0,16
998	xor	$s0,$t0
999	xor	$s1,$t1
1000	xor	$s2,$t2
1001	xor	$s3,$t3
1002	.set	noreorder
1003	bnez	$cnt,.Loop_dec
1004	_xtr	$i0,$s3,16-2
1005#endif
1006
1007	.set	reorder
1008	lw	$t4,1024($Tbl)		# prefetch Td4
1009	_xtr	$i0,$s3,16
1010	lw	$t5,1024+32($Tbl)
1011	_xtr	$i1,$s0,16
1012	lw	$t6,1024+64($Tbl)
1013	_xtr	$i2,$s1,16
1014	lw	$t7,1024+96($Tbl)
1015	_xtr	$i3,$s2,16
1016	lw	$t8,1024+128($Tbl)
1017	and	$i0,0xff
1018	lw	$t9,1024+160($Tbl)
1019	and	$i1,0xff
1020	lw	$t10,1024+192($Tbl)
1021	and	$i2,0xff
1022	lw	$t11,1024+224($Tbl)
1023	and	$i3,0xff
1024
1025	$PTR_ADD $i0,$Tbl
1026	$PTR_ADD $i1,$Tbl
1027	$PTR_ADD $i2,$Tbl
1028	$PTR_ADD $i3,$Tbl
1029	lbu	$t0,1024($i0)		# Td4[s3>>16]
1030	_xtr	$i0,$s2,8
1031	lbu	$t1,1024($i1)		# Td4[s0>>16]
1032	_xtr	$i1,$s3,8
1033	lbu	$t2,1024($i2)		# Td4[s1>>16]
1034	_xtr	$i2,$s0,8
1035	lbu	$t3,1024($i3)		# Td4[s2>>16]
1036	_xtr	$i3,$s1,8
1037
1038	and	$i0,0xff
1039	and	$i1,0xff
1040	and	$i2,0xff
1041	and	$i3,0xff
1042	$PTR_ADD $i0,$Tbl
1043	$PTR_ADD $i1,$Tbl
1044	$PTR_ADD $i2,$Tbl
1045	$PTR_ADD $i3,$Tbl
1046#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1047# if defined(_MIPSEL)
1048	lbu	$t4,1024($i0)		# Td4[s2>>8]
1049	$PTR_INS $i0,$s0,0,8
1050	lbu	$t5,1024($i1)		# Td4[s3>>8]
1051	$PTR_INS $i1,$s1,0,8
1052	lbu	$t6,1024($i2)		# Td4[s0>>8]
1053	$PTR_INS $i2,$s2,0,8
1054	lbu	$t7,1024($i3)		# Td4[s1>>8]
1055	$PTR_INS $i3,$s3,0,8
1056
1057	lbu	$t8,1024($i0)		# Td4[s0>>24]
1058	_xtr	$i0,$s1,0
1059	lbu	$t9,1024($i1)		# Td4[s1>>24]
1060	_xtr	$i1,$s2,0
1061	lbu	$t10,1024($i2)		# Td4[s2>>24]
1062	_xtr	$i2,$s3,0
1063	lbu	$t11,1024($i3)		# Td4[s3>>24]
1064	_xtr	$i3,$s0,0
1065
1066	$PTR_ADD $i0,$Tbl
1067	$PTR_ADD $i1,$Tbl
1068	$PTR_ADD $i2,$Tbl
1069	$PTR_ADD $i3,$Tbl
1070# else
1071	lbu	$t4,1024($i0)		# Td4[s2>>8]
1072	_xtr	$i0,$s0,24
1073	lbu	$t5,1024($i1)		# Td4[s3>>8]
1074	_xtr	$i1,$s1,24
1075	lbu	$t6,1024($i2)		# Td4[s0>>8]
1076	_xtr	$i2,$s2,24
1077	lbu	$t7,1024($i3)		# Td4[s1>>8]
1078	_xtr	$i3,$s3,24
1079
1080	$PTR_ADD $i0,$Tbl
1081	$PTR_ADD $i1,$Tbl
1082	$PTR_ADD $i2,$Tbl
1083	$PTR_ADD $i3,$Tbl
1084	lbu	$t8,1024($i0)		# Td4[s0>>24]
1085	$PTR_INS $i0,$s1,0,8
1086	lbu	$t9,1024($i1)		# Td4[s1>>24]
1087	$PTR_INS $i1,$s2,0,8
1088	lbu	$t10,1024($i2)		# Td4[s2>>24]
1089	$PTR_INS $i2,$s3,0,8
1090	lbu	$t11,1024($i3)		# Td4[s3>>24]
1091	$PTR_INS $i3,$s0,0,8
1092# endif
1093	_ins	$t0,16
1094	_ins	$t1,16
1095	_ins	$t2,16
1096	_ins	$t3,16
1097
1098	_ins2	$t0,$t4,8
1099	lbu	$t4,1024($i0)		# Td4[s1]
1100	_ins2	$t1,$t5,8
1101	lbu	$t5,1024($i1)		# Td4[s2]
1102	_ins2	$t2,$t6,8
1103	lbu	$t6,1024($i2)		# Td4[s3]
1104	_ins2	$t3,$t7,8
1105	lbu	$t7,1024($i3)		# Td4[s0]
1106
1107	_ins2	$t0,$t8,24
1108	lw	$s0,0($key0)
1109	_ins2	$t1,$t9,24
1110	lw	$s1,4($key0)
1111	_ins2	$t2,$t10,24
1112	lw	$s2,8($key0)
1113	_ins2	$t3,$t11,24
1114	lw	$s3,12($key0)
1115
1116	_ins2	$t0,$t4,0
1117	_ins2	$t1,$t5,0
1118	_ins2	$t2,$t6,0
1119	_ins2	$t3,$t7,0
1120#else
1121	lbu	$t4,1024($i0)		# Td4[s2>>8]
1122	_xtr	$i0,$s0,24
1123	lbu	$t5,1024($i1)		# Td4[s3>>8]
1124	_xtr	$i1,$s1,24
1125	lbu	$t6,1024($i2)		# Td4[s0>>8]
1126	_xtr	$i2,$s2,24
1127	lbu	$t7,1024($i3)		# Td4[s1>>8]
1128	_xtr	$i3,$s3,24
1129
1130	$PTR_ADD $i0,$Tbl
1131	$PTR_ADD $i1,$Tbl
1132	$PTR_ADD $i2,$Tbl
1133	$PTR_ADD $i3,$Tbl
1134	lbu	$t8,1024($i0)		# Td4[s0>>24]
1135	_xtr	$i0,$s1,0
1136	lbu	$t9,1024($i1)		# Td4[s1>>24]
1137	_xtr	$i1,$s2,0
1138	lbu	$t10,1024($i2)		# Td4[s2>>24]
1139	_xtr	$i2,$s3,0
1140	lbu	$t11,1024($i3)		# Td4[s3>>24]
1141	_xtr	$i3,$s0,0
1142
1143	$PTR_ADD $i0,$Tbl
1144	$PTR_ADD $i1,$Tbl
1145	$PTR_ADD $i2,$Tbl
1146	$PTR_ADD $i3,$Tbl
1147
1148	_ins	$t0,16
1149	_ins	$t1,16
1150	_ins	$t2,16
1151	_ins	$t3,16
1152
1153	_ins	$t4,8
1154	_ins	$t5,8
1155	_ins	$t6,8
1156	_ins	$t7,8
1157
1158	xor	$t0,$t4
1159	lbu	$t4,1024($i0)		# Td4[s1]
1160	xor	$t1,$t5
1161	lbu	$t5,1024($i1)		# Td4[s2]
1162	xor	$t2,$t6
1163	lbu	$t6,1024($i2)		# Td4[s3]
1164	xor	$t3,$t7
1165	lbu	$t7,1024($i3)		# Td4[s0]
1166
1167	_ins	$t8,24
1168	lw	$s0,0($key0)
1169	_ins	$t9,24
1170	lw	$s1,4($key0)
1171	_ins	$t10,24
1172	lw	$s2,8($key0)
1173	_ins	$t11,24
1174	lw	$s3,12($key0)
1175
1176	xor	$t0,$t8
1177	xor	$t1,$t9
1178	xor	$t2,$t10
1179	xor	$t3,$t11
1180
1181	_ins	$t4,0
1182	_ins	$t5,0
1183	_ins	$t6,0
1184	_ins	$t7,0
1185
1186	xor	$t0,$t4
1187	xor	$t1,$t5
1188	xor	$t2,$t6
1189	xor	$t3,$t7
1190#endif
1191
1192	xor	$s0,$t0
1193	xor	$s1,$t1
1194	xor	$s2,$t2
1195	xor	$s3,$t3
1196
1197	jr	$ra
1198.end	_mips_AES_decrypt
1199
1200.align	5
1201.globl	AES_decrypt
1202.ent	AES_decrypt
1203AES_decrypt:
1204	.frame	$sp,$FRAMESIZE,$ra
1205	.mask	$SAVED_REGS_MASK,-$SZREG
1206	.set	noreorder
1207___
1208$code.=<<___ if ($flavour =~ /o32/i);	# o32 PIC-ification
1209	.cpload	$pf
1210___
1211$code.=<<___;
1212	$PTR_SUB $sp,$FRAMESIZE
1213	$REG_S	$ra,$FRAMESIZE-1*$SZREG($sp)
1214	$REG_S	$fp,$FRAMESIZE-2*$SZREG($sp)
1215	$REG_S	$s11,$FRAMESIZE-3*$SZREG($sp)
1216	$REG_S	$s10,$FRAMESIZE-4*$SZREG($sp)
1217	$REG_S	$s9,$FRAMESIZE-5*$SZREG($sp)
1218	$REG_S	$s8,$FRAMESIZE-6*$SZREG($sp)
1219	$REG_S	$s7,$FRAMESIZE-7*$SZREG($sp)
1220	$REG_S	$s6,$FRAMESIZE-8*$SZREG($sp)
1221	$REG_S	$s5,$FRAMESIZE-9*$SZREG($sp)
1222	$REG_S	$s4,$FRAMESIZE-10*$SZREG($sp)
1223___
1224$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
1225	$REG_S	\$15,$FRAMESIZE-11*$SZREG($sp)
1226	$REG_S	\$14,$FRAMESIZE-12*$SZREG($sp)
1227	$REG_S	\$13,$FRAMESIZE-13*$SZREG($sp)
1228	$REG_S	\$12,$FRAMESIZE-14*$SZREG($sp)
1229	$REG_S	$gp,$FRAMESIZE-15*$SZREG($sp)
1230___
1231$code.=<<___ if ($flavour !~ /o32/i);	# non-o32 PIC-ification
1232	.cplocal	$Tbl
1233	.cpsetup	$pf,$zero,AES_decrypt
1234___
1235$code.=<<___;
1236	.set	reorder
1237	$PTR_LA	$Tbl,AES_Td		# PIC-ified 'load address'
1238
1239#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
1240	lw	$s0,0($inp)
1241	lw	$s1,4($inp)
1242	lw	$s2,8($inp)
1243	lw	$s3,12($inp)
1244#else
1245	lwl	$s0,0+$MSB($inp)
1246	lwl	$s1,4+$MSB($inp)
1247	lwl	$s2,8+$MSB($inp)
1248	lwl	$s3,12+$MSB($inp)
1249	lwr	$s0,0+$LSB($inp)
1250	lwr	$s1,4+$LSB($inp)
1251	lwr	$s2,8+$LSB($inp)
1252	lwr	$s3,12+$LSB($inp)
1253#endif
1254
1255	bal	_mips_AES_decrypt
1256
1257#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
1258	sw	$s0,0($out)
1259	sw	$s1,4($out)
1260	sw	$s2,8($out)
1261	sw	$s3,12($out)
1262#else
1263	swr	$s0,0+$LSB($out)
1264	swr	$s1,4+$LSB($out)
1265	swr	$s2,8+$LSB($out)
1266	swr	$s3,12+$LSB($out)
1267	swl	$s0,0+$MSB($out)
1268	swl	$s1,4+$MSB($out)
1269	swl	$s2,8+$MSB($out)
1270	swl	$s3,12+$MSB($out)
1271#endif
1272
1273	.set	noreorder
1274	$REG_L	$ra,$FRAMESIZE-1*$SZREG($sp)
1275	$REG_L	$fp,$FRAMESIZE-2*$SZREG($sp)
1276	$REG_L	$s11,$FRAMESIZE-3*$SZREG($sp)
1277	$REG_L	$s10,$FRAMESIZE-4*$SZREG($sp)
1278	$REG_L	$s9,$FRAMESIZE-5*$SZREG($sp)
1279	$REG_L	$s8,$FRAMESIZE-6*$SZREG($sp)
1280	$REG_L	$s7,$FRAMESIZE-7*$SZREG($sp)
1281	$REG_L	$s6,$FRAMESIZE-8*$SZREG($sp)
1282	$REG_L	$s5,$FRAMESIZE-9*$SZREG($sp)
1283	$REG_L	$s4,$FRAMESIZE-10*$SZREG($sp)
1284___
1285$code.=<<___ if ($flavour =~ /nubi/i);
1286	$REG_L	\$15,$FRAMESIZE-11*$SZREG($sp)
1287	$REG_L	\$14,$FRAMESIZE-12*$SZREG($sp)
1288	$REG_L	\$13,$FRAMESIZE-13*$SZREG($sp)
1289	$REG_L	\$12,$FRAMESIZE-14*$SZREG($sp)
1290	$REG_L	$gp,$FRAMESIZE-15*$SZREG($sp)
1291___
1292$code.=<<___;
1293	jr	$ra
1294	$PTR_ADD $sp,$FRAMESIZE
1295.end	AES_decrypt
1296___
1297}}}
1298
1299{{{
1300my $FRAMESIZE=8*$SZREG;
1301my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0xc000f008" : "0xc0000000";
1302
1303my ($inp,$bits,$key,$Tbl)=($a0,$a1,$a2,$a3);
1304my ($rk0,$rk1,$rk2,$rk3,$rk4,$rk5,$rk6,$rk7)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
1305my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
1306my ($rcon,$cnt)=($gp,$fp);
1307
1308$code.=<<___;
1309.align	5
1310.ent	_mips_AES_set_encrypt_key
1311_mips_AES_set_encrypt_key:
1312	.frame	$sp,0,$ra
1313	.set	noreorder
1314	beqz	$inp,.Lekey_done
1315	li	$t0,-1
1316	beqz	$key,.Lekey_done
1317	$PTR_ADD $rcon,$Tbl,256
1318
1319	.set	reorder
1320#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
1321	lw	$rk0,0($inp)		# load 128 bits
1322	lw	$rk1,4($inp)
1323	lw	$rk2,8($inp)
1324	lw	$rk3,12($inp)
1325#else
1326	lwl	$rk0,0+$MSB($inp)	# load 128 bits
1327	lwl	$rk1,4+$MSB($inp)
1328	lwl	$rk2,8+$MSB($inp)
1329	lwl	$rk3,12+$MSB($inp)
1330	lwr	$rk0,0+$LSB($inp)
1331	lwr	$rk1,4+$LSB($inp)
1332	lwr	$rk2,8+$LSB($inp)
1333	lwr	$rk3,12+$LSB($inp)
1334#endif
1335	li	$at,128
1336	.set	noreorder
1337	beq	$bits,$at,.L128bits
1338	li	$cnt,10
1339
1340	.set	reorder
1341#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
1342	lw	$rk4,16($inp)		# load 192 bits
1343	lw	$rk5,20($inp)
1344#else
1345	lwl	$rk4,16+$MSB($inp)	# load 192 bits
1346	lwl	$rk5,20+$MSB($inp)
1347	lwr	$rk4,16+$LSB($inp)
1348	lwr	$rk5,20+$LSB($inp)
1349#endif
1350	li	$at,192
1351	.set	noreorder
1352	beq	$bits,$at,.L192bits
1353	li	$cnt,8
1354
1355	.set	reorder
1356#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
1357	lw	$rk6,24($inp)		# load 256 bits
1358	lw	$rk7,28($inp)
1359#else
1360	lwl	$rk6,24+$MSB($inp)	# load 256 bits
1361	lwl	$rk7,28+$MSB($inp)
1362	lwr	$rk6,24+$LSB($inp)
1363	lwr	$rk7,28+$LSB($inp)
1364#endif
1365	li	$at,256
1366	.set	noreorder
1367	beq	$bits,$at,.L256bits
1368	li	$cnt,7
1369
1370	b	.Lekey_done
1371	li	$t0,-2
1372
1373.align	4
1374.L128bits:
1375	.set	reorder
1376	srl	$i0,$rk3,16
1377	srl	$i1,$rk3,8
1378	and	$i0,0xff
1379	and	$i1,0xff
1380	and	$i2,$rk3,0xff
1381	srl	$i3,$rk3,24
1382	$PTR_ADD $i0,$Tbl
1383	$PTR_ADD $i1,$Tbl
1384	$PTR_ADD $i2,$Tbl
1385	$PTR_ADD $i3,$Tbl
1386	lbu	$i0,0($i0)
1387	lbu	$i1,0($i1)
1388	lbu	$i2,0($i2)
1389	lbu	$i3,0($i3)
1390
1391	sw	$rk0,0($key)
1392	sw	$rk1,4($key)
1393	sw	$rk2,8($key)
1394	sw	$rk3,12($key)
1395	subu	$cnt,1
1396	$PTR_ADD $key,16
1397
1398	_bias	$i0,24
1399	_bias	$i1,16
1400	_bias	$i2,8
1401	_bias	$i3,0
1402
1403	xor	$rk0,$i0
1404	lw	$i0,0($rcon)
1405	xor	$rk0,$i1
1406	xor	$rk0,$i2
1407	xor	$rk0,$i3
1408	xor	$rk0,$i0
1409
1410	xor	$rk1,$rk0
1411	xor	$rk2,$rk1
1412	xor	$rk3,$rk2
1413
1414	.set	noreorder
1415	bnez	$cnt,.L128bits
1416	$PTR_ADD $rcon,4
1417
1418	sw	$rk0,0($key)
1419	sw	$rk1,4($key)
1420	sw	$rk2,8($key)
1421	li	$cnt,10
1422	sw	$rk3,12($key)
1423	li	$t0,0
1424	sw	$cnt,80($key)
1425	b	.Lekey_done
1426	$PTR_SUB $key,10*16
1427
1428.align	4
1429.L192bits:
1430	.set	reorder
1431	srl	$i0,$rk5,16
1432	srl	$i1,$rk5,8
1433	and	$i0,0xff
1434	and	$i1,0xff
1435	and	$i2,$rk5,0xff
1436	srl	$i3,$rk5,24
1437	$PTR_ADD $i0,$Tbl
1438	$PTR_ADD $i1,$Tbl
1439	$PTR_ADD $i2,$Tbl
1440	$PTR_ADD $i3,$Tbl
1441	lbu	$i0,0($i0)
1442	lbu	$i1,0($i1)
1443	lbu	$i2,0($i2)
1444	lbu	$i3,0($i3)
1445
1446	sw	$rk0,0($key)
1447	sw	$rk1,4($key)
1448	sw	$rk2,8($key)
1449	sw	$rk3,12($key)
1450	sw	$rk4,16($key)
1451	sw	$rk5,20($key)
1452	subu	$cnt,1
1453	$PTR_ADD $key,24
1454
1455	_bias	$i0,24
1456	_bias	$i1,16
1457	_bias	$i2,8
1458	_bias	$i3,0
1459
1460	xor	$rk0,$i0
1461	lw	$i0,0($rcon)
1462	xor	$rk0,$i1
1463	xor	$rk0,$i2
1464	xor	$rk0,$i3
1465	xor	$rk0,$i0
1466
1467	xor	$rk1,$rk0
1468	xor	$rk2,$rk1
1469	xor	$rk3,$rk2
1470	xor	$rk4,$rk3
1471	xor	$rk5,$rk4
1472
1473	.set	noreorder
1474	bnez	$cnt,.L192bits
1475	$PTR_ADD $rcon,4
1476
1477	sw	$rk0,0($key)
1478	sw	$rk1,4($key)
1479	sw	$rk2,8($key)
1480	li	$cnt,12
1481	sw	$rk3,12($key)
1482	li	$t0,0
1483	sw	$cnt,48($key)
1484	b	.Lekey_done
1485	$PTR_SUB $key,12*16
1486
1487.align	4
1488.L256bits:
1489	.set	reorder
1490	srl	$i0,$rk7,16
1491	srl	$i1,$rk7,8
1492	and	$i0,0xff
1493	and	$i1,0xff
1494	and	$i2,$rk7,0xff
1495	srl	$i3,$rk7,24
1496	$PTR_ADD $i0,$Tbl
1497	$PTR_ADD $i1,$Tbl
1498	$PTR_ADD $i2,$Tbl
1499	$PTR_ADD $i3,$Tbl
1500	lbu	$i0,0($i0)
1501	lbu	$i1,0($i1)
1502	lbu	$i2,0($i2)
1503	lbu	$i3,0($i3)
1504
1505	sw	$rk0,0($key)
1506	sw	$rk1,4($key)
1507	sw	$rk2,8($key)
1508	sw	$rk3,12($key)
1509	sw	$rk4,16($key)
1510	sw	$rk5,20($key)
1511	sw	$rk6,24($key)
1512	sw	$rk7,28($key)
1513	subu	$cnt,1
1514
1515	_bias	$i0,24
1516	_bias	$i1,16
1517	_bias	$i2,8
1518	_bias	$i3,0
1519
1520	xor	$rk0,$i0
1521	lw	$i0,0($rcon)
1522	xor	$rk0,$i1
1523	xor	$rk0,$i2
1524	xor	$rk0,$i3
1525	xor	$rk0,$i0
1526
1527	xor	$rk1,$rk0
1528	xor	$rk2,$rk1
1529	xor	$rk3,$rk2
1530	beqz	$cnt,.L256bits_done
1531
1532	srl	$i0,$rk3,24
1533	srl	$i1,$rk3,16
1534	srl	$i2,$rk3,8
1535	and	$i3,$rk3,0xff
1536	and	$i1,0xff
1537	and	$i2,0xff
1538	$PTR_ADD $i0,$Tbl
1539	$PTR_ADD $i1,$Tbl
1540	$PTR_ADD $i2,$Tbl
1541	$PTR_ADD $i3,$Tbl
1542	lbu	$i0,0($i0)
1543	lbu	$i1,0($i1)
1544	lbu	$i2,0($i2)
1545	lbu	$i3,0($i3)
1546	sll	$i0,24
1547	sll	$i1,16
1548	sll	$i2,8
1549
1550	xor	$rk4,$i0
1551	xor	$rk4,$i1
1552	xor	$rk4,$i2
1553	xor	$rk4,$i3
1554
1555	xor	$rk5,$rk4
1556	xor	$rk6,$rk5
1557	xor	$rk7,$rk6
1558
1559	$PTR_ADD $key,32
1560	.set	noreorder
1561	b	.L256bits
1562	$PTR_ADD $rcon,4
1563
1564.L256bits_done:
1565	sw	$rk0,32($key)
1566	sw	$rk1,36($key)
1567	sw	$rk2,40($key)
1568	li	$cnt,14
1569	sw	$rk3,44($key)
1570	li	$t0,0
1571	sw	$cnt,48($key)
1572	$PTR_SUB $key,12*16
1573
1574.Lekey_done:
1575	jr	$ra
1576	nop
1577.end	_mips_AES_set_encrypt_key
1578
1579.globl	AES_set_encrypt_key
1580.ent	AES_set_encrypt_key
1581AES_set_encrypt_key:
1582	.frame	$sp,$FRAMESIZE,$ra
1583	.mask	$SAVED_REGS_MASK,-$SZREG
1584	.set	noreorder
1585___
1586$code.=<<___ if ($flavour =~ /o32/i);	# o32 PIC-ification
1587	.cpload	$pf
1588___
1589$code.=<<___;
1590	$PTR_SUB $sp,$FRAMESIZE
1591	$REG_S	$ra,$FRAMESIZE-1*$SZREG($sp)
1592	$REG_S	$fp,$FRAMESIZE-2*$SZREG($sp)
1593___
1594$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
1595	$REG_S	$s3,$FRAMESIZE-3*$SZREG($sp)
1596	$REG_S	$s2,$FRAMESIZE-4*$SZREG($sp)
1597	$REG_S	$s1,$FRAMESIZE-5*$SZREG($sp)
1598	$REG_S	$s0,$FRAMESIZE-6*$SZREG($sp)
1599	$REG_S	$gp,$FRAMESIZE-7*$SZREG($sp)
1600___
1601$code.=<<___ if ($flavour !~ /o32/i);	# non-o32 PIC-ification
1602	.cplocal	$Tbl
1603	.cpsetup	$pf,$zero,AES_set_encrypt_key
1604___
1605$code.=<<___;
1606	.set	reorder
1607	$PTR_LA	$Tbl,AES_Te4		# PIC-ified 'load address'
1608
1609	bal	_mips_AES_set_encrypt_key
1610
1611	.set	noreorder
1612	move	$a0,$t0
1613	$REG_L	$ra,$FRAMESIZE-1*$SZREG($sp)
1614	$REG_L	$fp,$FRAMESIZE-2*$SZREG($sp)
1615___
1616$code.=<<___ if ($flavour =~ /nubi/i);
1617	$REG_L	$s3,$FRAMESIZE-11*$SZREG($sp)
1618	$REG_L	$s2,$FRAMESIZE-12*$SZREG($sp)
1619	$REG_L	$s1,$FRAMESIZE-13*$SZREG($sp)
1620	$REG_L	$s0,$FRAMESIZE-14*$SZREG($sp)
1621	$REG_L	$gp,$FRAMESIZE-15*$SZREG($sp)
1622___
1623$code.=<<___;
1624	jr	$ra
1625	$PTR_ADD $sp,$FRAMESIZE
1626.end	AES_set_encrypt_key
1627___
1628
1629my ($head,$tail)=($inp,$bits);
1630my ($tp1,$tp2,$tp4,$tp8,$tp9,$tpb,$tpd,$tpe)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
1631my ($m,$x80808080,$x7f7f7f7f,$x1b1b1b1b)=($at,$t0,$t1,$t2);
1632$code.=<<___;
1633.align	5
1634.globl	AES_set_decrypt_key
1635.ent	AES_set_decrypt_key
1636AES_set_decrypt_key:
1637	.frame	$sp,$FRAMESIZE,$ra
1638	.mask	$SAVED_REGS_MASK,-$SZREG
1639	.set	noreorder
1640___
1641$code.=<<___ if ($flavour =~ /o32/i);	# o32 PIC-ification
1642	.cpload	$pf
1643___
1644$code.=<<___;
1645	$PTR_SUB $sp,$FRAMESIZE
1646	$REG_S	$ra,$FRAMESIZE-1*$SZREG($sp)
1647	$REG_S	$fp,$FRAMESIZE-2*$SZREG($sp)
1648___
1649$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
1650	$REG_S	$s3,$FRAMESIZE-3*$SZREG($sp)
1651	$REG_S	$s2,$FRAMESIZE-4*$SZREG($sp)
1652	$REG_S	$s1,$FRAMESIZE-5*$SZREG($sp)
1653	$REG_S	$s0,$FRAMESIZE-6*$SZREG($sp)
1654	$REG_S	$gp,$FRAMESIZE-7*$SZREG($sp)
1655___
1656$code.=<<___ if ($flavour !~ /o32/i);	# non-o32 PIC-ification
1657	.cplocal	$Tbl
1658	.cpsetup	$pf,$zero,AES_set_decrypt_key
1659___
1660$code.=<<___;
1661	.set	reorder
1662	$PTR_LA	$Tbl,AES_Te4		# PIC-ified 'load address'
1663
1664	bal	_mips_AES_set_encrypt_key
1665
1666	bltz	$t0,.Ldkey_done
1667
1668	sll	$at,$cnt,4
1669	$PTR_ADD $head,$key,0
1670	$PTR_ADD $tail,$key,$at
1671.align	4
1672.Lswap:
1673	lw	$rk0,0($head)
1674	lw	$rk1,4($head)
1675	lw	$rk2,8($head)
1676	lw	$rk3,12($head)
1677	lw	$rk4,0($tail)
1678	lw	$rk5,4($tail)
1679	lw	$rk6,8($tail)
1680	lw	$rk7,12($tail)
1681	sw	$rk0,0($tail)
1682	sw	$rk1,4($tail)
1683	sw	$rk2,8($tail)
1684	sw	$rk3,12($tail)
1685	$PTR_ADD $head,16
1686	$PTR_SUB $tail,16
1687	sw	$rk4,-16($head)
1688	sw	$rk5,-12($head)
1689	sw	$rk6,-8($head)
1690	sw	$rk7,-4($head)
1691	bne	$head,$tail,.Lswap
1692
1693	lw	$tp1,16($key)		# modulo-scheduled
1694	lui	$x80808080,0x8080
1695	subu	$cnt,1
1696	or	$x80808080,0x8080
1697	sll	$cnt,2
1698	$PTR_ADD $key,16
1699	lui	$x1b1b1b1b,0x1b1b
1700	nor	$x7f7f7f7f,$zero,$x80808080
1701	or	$x1b1b1b1b,0x1b1b
1702.align	4
1703.Lmix:
1704	and	$m,$tp1,$x80808080
1705	and	$tp2,$tp1,$x7f7f7f7f
1706	srl	$tp4,$m,7
1707	addu	$tp2,$tp2		# tp2<<1
1708	subu	$m,$tp4
1709	and	$m,$x1b1b1b1b
1710	xor	$tp2,$m
1711
1712	and	$m,$tp2,$x80808080
1713	and	$tp4,$tp2,$x7f7f7f7f
1714	srl	$tp8,$m,7
1715	addu	$tp4,$tp4		# tp4<<1
1716	subu	$m,$tp8
1717	and	$m,$x1b1b1b1b
1718	xor	$tp4,$m
1719
1720	and	$m,$tp4,$x80808080
1721	and	$tp8,$tp4,$x7f7f7f7f
1722	srl	$tp9,$m,7
1723	addu	$tp8,$tp8		# tp8<<1
1724	subu	$m,$tp9
1725	and	$m,$x1b1b1b1b
1726	xor	$tp8,$m
1727
1728	xor	$tp9,$tp8,$tp1
1729	xor	$tpe,$tp8,$tp4
1730	xor	$tpb,$tp9,$tp2
1731	xor	$tpd,$tp9,$tp4
1732
1733#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
1734	rotr	$tp1,$tpd,16
1735	 xor	$tpe,$tp2
1736	rotr	$tp2,$tp9,8
1737	xor	$tpe,$tp1
1738	rotr	$tp4,$tpb,24
1739	xor	$tpe,$tp2
1740	lw	$tp1,4($key)		# modulo-scheduled
1741	xor	$tpe,$tp4
1742#else
1743	_ror	$tp1,$tpd,16
1744	 xor	$tpe,$tp2
1745	_ror	$tp2,$tpd,-16
1746	xor	$tpe,$tp1
1747	_ror	$tp1,$tp9,8
1748	xor	$tpe,$tp2
1749	_ror	$tp2,$tp9,-24
1750	xor	$tpe,$tp1
1751	_ror	$tp1,$tpb,24
1752	xor	$tpe,$tp2
1753	_ror	$tp2,$tpb,-8
1754	xor	$tpe,$tp1
1755	lw	$tp1,4($key)		# modulo-scheduled
1756	xor	$tpe,$tp2
1757#endif
1758	subu	$cnt,1
1759	sw	$tpe,0($key)
1760	$PTR_ADD $key,4
1761	bnez	$cnt,.Lmix
1762
1763	li	$t0,0
1764.Ldkey_done:
1765	.set	noreorder
1766	move	$a0,$t0
1767	$REG_L	$ra,$FRAMESIZE-1*$SZREG($sp)
1768	$REG_L	$fp,$FRAMESIZE-2*$SZREG($sp)
1769___
1770$code.=<<___ if ($flavour =~ /nubi/i);
1771	$REG_L	$s3,$FRAMESIZE-11*$SZREG($sp)
1772	$REG_L	$s2,$FRAMESIZE-12*$SZREG($sp)
1773	$REG_L	$s1,$FRAMESIZE-13*$SZREG($sp)
1774	$REG_L	$s0,$FRAMESIZE-14*$SZREG($sp)
1775	$REG_L	$gp,$FRAMESIZE-15*$SZREG($sp)
1776___
1777$code.=<<___;
1778	jr	$ra
1779	$PTR_ADD $sp,$FRAMESIZE
1780.end	AES_set_decrypt_key
1781___
1782}}}
1783
1784######################################################################
1785# Tables are kept in endian-neutral manner
1786$code.=<<___;
1787.rdata
1788.align	10
1789AES_Te:
1790.byte	0xc6,0x63,0x63,0xa5,	0xf8,0x7c,0x7c,0x84	# Te0
1791.byte	0xee,0x77,0x77,0x99,	0xf6,0x7b,0x7b,0x8d
1792.byte	0xff,0xf2,0xf2,0x0d,	0xd6,0x6b,0x6b,0xbd
1793.byte	0xde,0x6f,0x6f,0xb1,	0x91,0xc5,0xc5,0x54
1794.byte	0x60,0x30,0x30,0x50,	0x02,0x01,0x01,0x03
1795.byte	0xce,0x67,0x67,0xa9,	0x56,0x2b,0x2b,0x7d
1796.byte	0xe7,0xfe,0xfe,0x19,	0xb5,0xd7,0xd7,0x62
1797.byte	0x4d,0xab,0xab,0xe6,	0xec,0x76,0x76,0x9a
1798.byte	0x8f,0xca,0xca,0x45,	0x1f,0x82,0x82,0x9d
1799.byte	0x89,0xc9,0xc9,0x40,	0xfa,0x7d,0x7d,0x87
1800.byte	0xef,0xfa,0xfa,0x15,	0xb2,0x59,0x59,0xeb
1801.byte	0x8e,0x47,0x47,0xc9,	0xfb,0xf0,0xf0,0x0b
1802.byte	0x41,0xad,0xad,0xec,	0xb3,0xd4,0xd4,0x67
1803.byte	0x5f,0xa2,0xa2,0xfd,	0x45,0xaf,0xaf,0xea
1804.byte	0x23,0x9c,0x9c,0xbf,	0x53,0xa4,0xa4,0xf7
1805.byte	0xe4,0x72,0x72,0x96,	0x9b,0xc0,0xc0,0x5b
1806.byte	0x75,0xb7,0xb7,0xc2,	0xe1,0xfd,0xfd,0x1c
1807.byte	0x3d,0x93,0x93,0xae,	0x4c,0x26,0x26,0x6a
1808.byte	0x6c,0x36,0x36,0x5a,	0x7e,0x3f,0x3f,0x41
1809.byte	0xf5,0xf7,0xf7,0x02,	0x83,0xcc,0xcc,0x4f
1810.byte	0x68,0x34,0x34,0x5c,	0x51,0xa5,0xa5,0xf4
1811.byte	0xd1,0xe5,0xe5,0x34,	0xf9,0xf1,0xf1,0x08
1812.byte	0xe2,0x71,0x71,0x93,	0xab,0xd8,0xd8,0x73
1813.byte	0x62,0x31,0x31,0x53,	0x2a,0x15,0x15,0x3f
1814.byte	0x08,0x04,0x04,0x0c,	0x95,0xc7,0xc7,0x52
1815.byte	0x46,0x23,0x23,0x65,	0x9d,0xc3,0xc3,0x5e
1816.byte	0x30,0x18,0x18,0x28,	0x37,0x96,0x96,0xa1
1817.byte	0x0a,0x05,0x05,0x0f,	0x2f,0x9a,0x9a,0xb5
1818.byte	0x0e,0x07,0x07,0x09,	0x24,0x12,0x12,0x36
1819.byte	0x1b,0x80,0x80,0x9b,	0xdf,0xe2,0xe2,0x3d
1820.byte	0xcd,0xeb,0xeb,0x26,	0x4e,0x27,0x27,0x69
1821.byte	0x7f,0xb2,0xb2,0xcd,	0xea,0x75,0x75,0x9f
1822.byte	0x12,0x09,0x09,0x1b,	0x1d,0x83,0x83,0x9e
1823.byte	0x58,0x2c,0x2c,0x74,	0x34,0x1a,0x1a,0x2e
1824.byte	0x36,0x1b,0x1b,0x2d,	0xdc,0x6e,0x6e,0xb2
1825.byte	0xb4,0x5a,0x5a,0xee,	0x5b,0xa0,0xa0,0xfb
1826.byte	0xa4,0x52,0x52,0xf6,	0x76,0x3b,0x3b,0x4d
1827.byte	0xb7,0xd6,0xd6,0x61,	0x7d,0xb3,0xb3,0xce
1828.byte	0x52,0x29,0x29,0x7b,	0xdd,0xe3,0xe3,0x3e
1829.byte	0x5e,0x2f,0x2f,0x71,	0x13,0x84,0x84,0x97
1830.byte	0xa6,0x53,0x53,0xf5,	0xb9,0xd1,0xd1,0x68
1831.byte	0x00,0x00,0x00,0x00,	0xc1,0xed,0xed,0x2c
1832.byte	0x40,0x20,0x20,0x60,	0xe3,0xfc,0xfc,0x1f
1833.byte	0x79,0xb1,0xb1,0xc8,	0xb6,0x5b,0x5b,0xed
1834.byte	0xd4,0x6a,0x6a,0xbe,	0x8d,0xcb,0xcb,0x46
1835.byte	0x67,0xbe,0xbe,0xd9,	0x72,0x39,0x39,0x4b
1836.byte	0x94,0x4a,0x4a,0xde,	0x98,0x4c,0x4c,0xd4
1837.byte	0xb0,0x58,0x58,0xe8,	0x85,0xcf,0xcf,0x4a
1838.byte	0xbb,0xd0,0xd0,0x6b,	0xc5,0xef,0xef,0x2a
1839.byte	0x4f,0xaa,0xaa,0xe5,	0xed,0xfb,0xfb,0x16
1840.byte	0x86,0x43,0x43,0xc5,	0x9a,0x4d,0x4d,0xd7
1841.byte	0x66,0x33,0x33,0x55,	0x11,0x85,0x85,0x94
1842.byte	0x8a,0x45,0x45,0xcf,	0xe9,0xf9,0xf9,0x10
1843.byte	0x04,0x02,0x02,0x06,	0xfe,0x7f,0x7f,0x81
1844.byte	0xa0,0x50,0x50,0xf0,	0x78,0x3c,0x3c,0x44
1845.byte	0x25,0x9f,0x9f,0xba,	0x4b,0xa8,0xa8,0xe3
1846.byte	0xa2,0x51,0x51,0xf3,	0x5d,0xa3,0xa3,0xfe
1847.byte	0x80,0x40,0x40,0xc0,	0x05,0x8f,0x8f,0x8a
1848.byte	0x3f,0x92,0x92,0xad,	0x21,0x9d,0x9d,0xbc
1849.byte	0x70,0x38,0x38,0x48,	0xf1,0xf5,0xf5,0x04
1850.byte	0x63,0xbc,0xbc,0xdf,	0x77,0xb6,0xb6,0xc1
1851.byte	0xaf,0xda,0xda,0x75,	0x42,0x21,0x21,0x63
1852.byte	0x20,0x10,0x10,0x30,	0xe5,0xff,0xff,0x1a
1853.byte	0xfd,0xf3,0xf3,0x0e,	0xbf,0xd2,0xd2,0x6d
1854.byte	0x81,0xcd,0xcd,0x4c,	0x18,0x0c,0x0c,0x14
1855.byte	0x26,0x13,0x13,0x35,	0xc3,0xec,0xec,0x2f
1856.byte	0xbe,0x5f,0x5f,0xe1,	0x35,0x97,0x97,0xa2
1857.byte	0x88,0x44,0x44,0xcc,	0x2e,0x17,0x17,0x39
1858.byte	0x93,0xc4,0xc4,0x57,	0x55,0xa7,0xa7,0xf2
1859.byte	0xfc,0x7e,0x7e,0x82,	0x7a,0x3d,0x3d,0x47
1860.byte	0xc8,0x64,0x64,0xac,	0xba,0x5d,0x5d,0xe7
1861.byte	0x32,0x19,0x19,0x2b,	0xe6,0x73,0x73,0x95
1862.byte	0xc0,0x60,0x60,0xa0,	0x19,0x81,0x81,0x98
1863.byte	0x9e,0x4f,0x4f,0xd1,	0xa3,0xdc,0xdc,0x7f
1864.byte	0x44,0x22,0x22,0x66,	0x54,0x2a,0x2a,0x7e
1865.byte	0x3b,0x90,0x90,0xab,	0x0b,0x88,0x88,0x83
1866.byte	0x8c,0x46,0x46,0xca,	0xc7,0xee,0xee,0x29
1867.byte	0x6b,0xb8,0xb8,0xd3,	0x28,0x14,0x14,0x3c
1868.byte	0xa7,0xde,0xde,0x79,	0xbc,0x5e,0x5e,0xe2
1869.byte	0x16,0x0b,0x0b,0x1d,	0xad,0xdb,0xdb,0x76
1870.byte	0xdb,0xe0,0xe0,0x3b,	0x64,0x32,0x32,0x56
1871.byte	0x74,0x3a,0x3a,0x4e,	0x14,0x0a,0x0a,0x1e
1872.byte	0x92,0x49,0x49,0xdb,	0x0c,0x06,0x06,0x0a
1873.byte	0x48,0x24,0x24,0x6c,	0xb8,0x5c,0x5c,0xe4
1874.byte	0x9f,0xc2,0xc2,0x5d,	0xbd,0xd3,0xd3,0x6e
1875.byte	0x43,0xac,0xac,0xef,	0xc4,0x62,0x62,0xa6
1876.byte	0x39,0x91,0x91,0xa8,	0x31,0x95,0x95,0xa4
1877.byte	0xd3,0xe4,0xe4,0x37,	0xf2,0x79,0x79,0x8b
1878.byte	0xd5,0xe7,0xe7,0x32,	0x8b,0xc8,0xc8,0x43
1879.byte	0x6e,0x37,0x37,0x59,	0xda,0x6d,0x6d,0xb7
1880.byte	0x01,0x8d,0x8d,0x8c,	0xb1,0xd5,0xd5,0x64
1881.byte	0x9c,0x4e,0x4e,0xd2,	0x49,0xa9,0xa9,0xe0
1882.byte	0xd8,0x6c,0x6c,0xb4,	0xac,0x56,0x56,0xfa
1883.byte	0xf3,0xf4,0xf4,0x07,	0xcf,0xea,0xea,0x25
1884.byte	0xca,0x65,0x65,0xaf,	0xf4,0x7a,0x7a,0x8e
1885.byte	0x47,0xae,0xae,0xe9,	0x10,0x08,0x08,0x18
1886.byte	0x6f,0xba,0xba,0xd5,	0xf0,0x78,0x78,0x88
1887.byte	0x4a,0x25,0x25,0x6f,	0x5c,0x2e,0x2e,0x72
1888.byte	0x38,0x1c,0x1c,0x24,	0x57,0xa6,0xa6,0xf1
1889.byte	0x73,0xb4,0xb4,0xc7,	0x97,0xc6,0xc6,0x51
1890.byte	0xcb,0xe8,0xe8,0x23,	0xa1,0xdd,0xdd,0x7c
1891.byte	0xe8,0x74,0x74,0x9c,	0x3e,0x1f,0x1f,0x21
1892.byte	0x96,0x4b,0x4b,0xdd,	0x61,0xbd,0xbd,0xdc
1893.byte	0x0d,0x8b,0x8b,0x86,	0x0f,0x8a,0x8a,0x85
1894.byte	0xe0,0x70,0x70,0x90,	0x7c,0x3e,0x3e,0x42
1895.byte	0x71,0xb5,0xb5,0xc4,	0xcc,0x66,0x66,0xaa
1896.byte	0x90,0x48,0x48,0xd8,	0x06,0x03,0x03,0x05
1897.byte	0xf7,0xf6,0xf6,0x01,	0x1c,0x0e,0x0e,0x12
1898.byte	0xc2,0x61,0x61,0xa3,	0x6a,0x35,0x35,0x5f
1899.byte	0xae,0x57,0x57,0xf9,	0x69,0xb9,0xb9,0xd0
1900.byte	0x17,0x86,0x86,0x91,	0x99,0xc1,0xc1,0x58
1901.byte	0x3a,0x1d,0x1d,0x27,	0x27,0x9e,0x9e,0xb9
1902.byte	0xd9,0xe1,0xe1,0x38,	0xeb,0xf8,0xf8,0x13
1903.byte	0x2b,0x98,0x98,0xb3,	0x22,0x11,0x11,0x33
1904.byte	0xd2,0x69,0x69,0xbb,	0xa9,0xd9,0xd9,0x70
1905.byte	0x07,0x8e,0x8e,0x89,	0x33,0x94,0x94,0xa7
1906.byte	0x2d,0x9b,0x9b,0xb6,	0x3c,0x1e,0x1e,0x22
1907.byte	0x15,0x87,0x87,0x92,	0xc9,0xe9,0xe9,0x20
1908.byte	0x87,0xce,0xce,0x49,	0xaa,0x55,0x55,0xff
1909.byte	0x50,0x28,0x28,0x78,	0xa5,0xdf,0xdf,0x7a
1910.byte	0x03,0x8c,0x8c,0x8f,	0x59,0xa1,0xa1,0xf8
1911.byte	0x09,0x89,0x89,0x80,	0x1a,0x0d,0x0d,0x17
1912.byte	0x65,0xbf,0xbf,0xda,	0xd7,0xe6,0xe6,0x31
1913.byte	0x84,0x42,0x42,0xc6,	0xd0,0x68,0x68,0xb8
1914.byte	0x82,0x41,0x41,0xc3,	0x29,0x99,0x99,0xb0
1915.byte	0x5a,0x2d,0x2d,0x77,	0x1e,0x0f,0x0f,0x11
1916.byte	0x7b,0xb0,0xb0,0xcb,	0xa8,0x54,0x54,0xfc
1917.byte	0x6d,0xbb,0xbb,0xd6,	0x2c,0x16,0x16,0x3a
1918
1919AES_Td:
1920.byte	0x51,0xf4,0xa7,0x50,	0x7e,0x41,0x65,0x53	# Td0
1921.byte	0x1a,0x17,0xa4,0xc3,	0x3a,0x27,0x5e,0x96
1922.byte	0x3b,0xab,0x6b,0xcb,	0x1f,0x9d,0x45,0xf1
1923.byte	0xac,0xfa,0x58,0xab,	0x4b,0xe3,0x03,0x93
1924.byte	0x20,0x30,0xfa,0x55,	0xad,0x76,0x6d,0xf6
1925.byte	0x88,0xcc,0x76,0x91,	0xf5,0x02,0x4c,0x25
1926.byte	0x4f,0xe5,0xd7,0xfc,	0xc5,0x2a,0xcb,0xd7
1927.byte	0x26,0x35,0x44,0x80,	0xb5,0x62,0xa3,0x8f
1928.byte	0xde,0xb1,0x5a,0x49,	0x25,0xba,0x1b,0x67
1929.byte	0x45,0xea,0x0e,0x98,	0x5d,0xfe,0xc0,0xe1
1930.byte	0xc3,0x2f,0x75,0x02,	0x81,0x4c,0xf0,0x12
1931.byte	0x8d,0x46,0x97,0xa3,	0x6b,0xd3,0xf9,0xc6
1932.byte	0x03,0x8f,0x5f,0xe7,	0x15,0x92,0x9c,0x95
1933.byte	0xbf,0x6d,0x7a,0xeb,	0x95,0x52,0x59,0xda
1934.byte	0xd4,0xbe,0x83,0x2d,	0x58,0x74,0x21,0xd3
1935.byte	0x49,0xe0,0x69,0x29,	0x8e,0xc9,0xc8,0x44
1936.byte	0x75,0xc2,0x89,0x6a,	0xf4,0x8e,0x79,0x78
1937.byte	0x99,0x58,0x3e,0x6b,	0x27,0xb9,0x71,0xdd
1938.byte	0xbe,0xe1,0x4f,0xb6,	0xf0,0x88,0xad,0x17
1939.byte	0xc9,0x20,0xac,0x66,	0x7d,0xce,0x3a,0xb4
1940.byte	0x63,0xdf,0x4a,0x18,	0xe5,0x1a,0x31,0x82
1941.byte	0x97,0x51,0x33,0x60,	0x62,0x53,0x7f,0x45
1942.byte	0xb1,0x64,0x77,0xe0,	0xbb,0x6b,0xae,0x84
1943.byte	0xfe,0x81,0xa0,0x1c,	0xf9,0x08,0x2b,0x94
1944.byte	0x70,0x48,0x68,0x58,	0x8f,0x45,0xfd,0x19
1945.byte	0x94,0xde,0x6c,0x87,	0x52,0x7b,0xf8,0xb7
1946.byte	0xab,0x73,0xd3,0x23,	0x72,0x4b,0x02,0xe2
1947.byte	0xe3,0x1f,0x8f,0x57,	0x66,0x55,0xab,0x2a
1948.byte	0xb2,0xeb,0x28,0x07,	0x2f,0xb5,0xc2,0x03
1949.byte	0x86,0xc5,0x7b,0x9a,	0xd3,0x37,0x08,0xa5
1950.byte	0x30,0x28,0x87,0xf2,	0x23,0xbf,0xa5,0xb2
1951.byte	0x02,0x03,0x6a,0xba,	0xed,0x16,0x82,0x5c
1952.byte	0x8a,0xcf,0x1c,0x2b,	0xa7,0x79,0xb4,0x92
1953.byte	0xf3,0x07,0xf2,0xf0,	0x4e,0x69,0xe2,0xa1
1954.byte	0x65,0xda,0xf4,0xcd,	0x06,0x05,0xbe,0xd5
1955.byte	0xd1,0x34,0x62,0x1f,	0xc4,0xa6,0xfe,0x8a
1956.byte	0x34,0x2e,0x53,0x9d,	0xa2,0xf3,0x55,0xa0
1957.byte	0x05,0x8a,0xe1,0x32,	0xa4,0xf6,0xeb,0x75
1958.byte	0x0b,0x83,0xec,0x39,	0x40,0x60,0xef,0xaa
1959.byte	0x5e,0x71,0x9f,0x06,	0xbd,0x6e,0x10,0x51
1960.byte	0x3e,0x21,0x8a,0xf9,	0x96,0xdd,0x06,0x3d
1961.byte	0xdd,0x3e,0x05,0xae,	0x4d,0xe6,0xbd,0x46
1962.byte	0x91,0x54,0x8d,0xb5,	0x71,0xc4,0x5d,0x05
1963.byte	0x04,0x06,0xd4,0x6f,	0x60,0x50,0x15,0xff
1964.byte	0x19,0x98,0xfb,0x24,	0xd6,0xbd,0xe9,0x97
1965.byte	0x89,0x40,0x43,0xcc,	0x67,0xd9,0x9e,0x77
1966.byte	0xb0,0xe8,0x42,0xbd,	0x07,0x89,0x8b,0x88
1967.byte	0xe7,0x19,0x5b,0x38,	0x79,0xc8,0xee,0xdb
1968.byte	0xa1,0x7c,0x0a,0x47,	0x7c,0x42,0x0f,0xe9
1969.byte	0xf8,0x84,0x1e,0xc9,	0x00,0x00,0x00,0x00
1970.byte	0x09,0x80,0x86,0x83,	0x32,0x2b,0xed,0x48
1971.byte	0x1e,0x11,0x70,0xac,	0x6c,0x5a,0x72,0x4e
1972.byte	0xfd,0x0e,0xff,0xfb,	0x0f,0x85,0x38,0x56
1973.byte	0x3d,0xae,0xd5,0x1e,	0x36,0x2d,0x39,0x27
1974.byte	0x0a,0x0f,0xd9,0x64,	0x68,0x5c,0xa6,0x21
1975.byte	0x9b,0x5b,0x54,0xd1,	0x24,0x36,0x2e,0x3a
1976.byte	0x0c,0x0a,0x67,0xb1,	0x93,0x57,0xe7,0x0f
1977.byte	0xb4,0xee,0x96,0xd2,	0x1b,0x9b,0x91,0x9e
1978.byte	0x80,0xc0,0xc5,0x4f,	0x61,0xdc,0x20,0xa2
1979.byte	0x5a,0x77,0x4b,0x69,	0x1c,0x12,0x1a,0x16
1980.byte	0xe2,0x93,0xba,0x0a,	0xc0,0xa0,0x2a,0xe5
1981.byte	0x3c,0x22,0xe0,0x43,	0x12,0x1b,0x17,0x1d
1982.byte	0x0e,0x09,0x0d,0x0b,	0xf2,0x8b,0xc7,0xad
1983.byte	0x2d,0xb6,0xa8,0xb9,	0x14,0x1e,0xa9,0xc8
1984.byte	0x57,0xf1,0x19,0x85,	0xaf,0x75,0x07,0x4c
1985.byte	0xee,0x99,0xdd,0xbb,	0xa3,0x7f,0x60,0xfd
1986.byte	0xf7,0x01,0x26,0x9f,	0x5c,0x72,0xf5,0xbc
1987.byte	0x44,0x66,0x3b,0xc5,	0x5b,0xfb,0x7e,0x34
1988.byte	0x8b,0x43,0x29,0x76,	0xcb,0x23,0xc6,0xdc
1989.byte	0xb6,0xed,0xfc,0x68,	0xb8,0xe4,0xf1,0x63
1990.byte	0xd7,0x31,0xdc,0xca,	0x42,0x63,0x85,0x10
1991.byte	0x13,0x97,0x22,0x40,	0x84,0xc6,0x11,0x20
1992.byte	0x85,0x4a,0x24,0x7d,	0xd2,0xbb,0x3d,0xf8
1993.byte	0xae,0xf9,0x32,0x11,	0xc7,0x29,0xa1,0x6d
1994.byte	0x1d,0x9e,0x2f,0x4b,	0xdc,0xb2,0x30,0xf3
1995.byte	0x0d,0x86,0x52,0xec,	0x77,0xc1,0xe3,0xd0
1996.byte	0x2b,0xb3,0x16,0x6c,	0xa9,0x70,0xb9,0x99
1997.byte	0x11,0x94,0x48,0xfa,	0x47,0xe9,0x64,0x22
1998.byte	0xa8,0xfc,0x8c,0xc4,	0xa0,0xf0,0x3f,0x1a
1999.byte	0x56,0x7d,0x2c,0xd8,	0x22,0x33,0x90,0xef
2000.byte	0x87,0x49,0x4e,0xc7,	0xd9,0x38,0xd1,0xc1
2001.byte	0x8c,0xca,0xa2,0xfe,	0x98,0xd4,0x0b,0x36
2002.byte	0xa6,0xf5,0x81,0xcf,	0xa5,0x7a,0xde,0x28
2003.byte	0xda,0xb7,0x8e,0x26,	0x3f,0xad,0xbf,0xa4
2004.byte	0x2c,0x3a,0x9d,0xe4,	0x50,0x78,0x92,0x0d
2005.byte	0x6a,0x5f,0xcc,0x9b,	0x54,0x7e,0x46,0x62
2006.byte	0xf6,0x8d,0x13,0xc2,	0x90,0xd8,0xb8,0xe8
2007.byte	0x2e,0x39,0xf7,0x5e,	0x82,0xc3,0xaf,0xf5
2008.byte	0x9f,0x5d,0x80,0xbe,	0x69,0xd0,0x93,0x7c
2009.byte	0x6f,0xd5,0x2d,0xa9,	0xcf,0x25,0x12,0xb3
2010.byte	0xc8,0xac,0x99,0x3b,	0x10,0x18,0x7d,0xa7
2011.byte	0xe8,0x9c,0x63,0x6e,	0xdb,0x3b,0xbb,0x7b
2012.byte	0xcd,0x26,0x78,0x09,	0x6e,0x59,0x18,0xf4
2013.byte	0xec,0x9a,0xb7,0x01,	0x83,0x4f,0x9a,0xa8
2014.byte	0xe6,0x95,0x6e,0x65,	0xaa,0xff,0xe6,0x7e
2015.byte	0x21,0xbc,0xcf,0x08,	0xef,0x15,0xe8,0xe6
2016.byte	0xba,0xe7,0x9b,0xd9,	0x4a,0x6f,0x36,0xce
2017.byte	0xea,0x9f,0x09,0xd4,	0x29,0xb0,0x7c,0xd6
2018.byte	0x31,0xa4,0xb2,0xaf,	0x2a,0x3f,0x23,0x31
2019.byte	0xc6,0xa5,0x94,0x30,	0x35,0xa2,0x66,0xc0
2020.byte	0x74,0x4e,0xbc,0x37,	0xfc,0x82,0xca,0xa6
2021.byte	0xe0,0x90,0xd0,0xb0,	0x33,0xa7,0xd8,0x15
2022.byte	0xf1,0x04,0x98,0x4a,	0x41,0xec,0xda,0xf7
2023.byte	0x7f,0xcd,0x50,0x0e,	0x17,0x91,0xf6,0x2f
2024.byte	0x76,0x4d,0xd6,0x8d,	0x43,0xef,0xb0,0x4d
2025.byte	0xcc,0xaa,0x4d,0x54,	0xe4,0x96,0x04,0xdf
2026.byte	0x9e,0xd1,0xb5,0xe3,	0x4c,0x6a,0x88,0x1b
2027.byte	0xc1,0x2c,0x1f,0xb8,	0x46,0x65,0x51,0x7f
2028.byte	0x9d,0x5e,0xea,0x04,	0x01,0x8c,0x35,0x5d
2029.byte	0xfa,0x87,0x74,0x73,	0xfb,0x0b,0x41,0x2e
2030.byte	0xb3,0x67,0x1d,0x5a,	0x92,0xdb,0xd2,0x52
2031.byte	0xe9,0x10,0x56,0x33,	0x6d,0xd6,0x47,0x13
2032.byte	0x9a,0xd7,0x61,0x8c,	0x37,0xa1,0x0c,0x7a
2033.byte	0x59,0xf8,0x14,0x8e,	0xeb,0x13,0x3c,0x89
2034.byte	0xce,0xa9,0x27,0xee,	0xb7,0x61,0xc9,0x35
2035.byte	0xe1,0x1c,0xe5,0xed,	0x7a,0x47,0xb1,0x3c
2036.byte	0x9c,0xd2,0xdf,0x59,	0x55,0xf2,0x73,0x3f
2037.byte	0x18,0x14,0xce,0x79,	0x73,0xc7,0x37,0xbf
2038.byte	0x53,0xf7,0xcd,0xea,	0x5f,0xfd,0xaa,0x5b
2039.byte	0xdf,0x3d,0x6f,0x14,	0x78,0x44,0xdb,0x86
2040.byte	0xca,0xaf,0xf3,0x81,	0xb9,0x68,0xc4,0x3e
2041.byte	0x38,0x24,0x34,0x2c,	0xc2,0xa3,0x40,0x5f
2042.byte	0x16,0x1d,0xc3,0x72,	0xbc,0xe2,0x25,0x0c
2043.byte	0x28,0x3c,0x49,0x8b,	0xff,0x0d,0x95,0x41
2044.byte	0x39,0xa8,0x01,0x71,	0x08,0x0c,0xb3,0xde
2045.byte	0xd8,0xb4,0xe4,0x9c,	0x64,0x56,0xc1,0x90
2046.byte	0x7b,0xcb,0x84,0x61,	0xd5,0x32,0xb6,0x70
2047.byte	0x48,0x6c,0x5c,0x74,	0xd0,0xb8,0x57,0x42
2048
2049.byte	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38	# Td4
2050.byte	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
2051.byte	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
2052.byte	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
2053.byte	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
2054.byte	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
2055.byte	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
2056.byte	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
2057.byte	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
2058.byte	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
2059.byte	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
2060.byte	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
2061.byte	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
2062.byte	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
2063.byte	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
2064.byte	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
2065.byte	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
2066.byte	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
2067.byte	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
2068.byte	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
2069.byte	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
2070.byte	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
2071.byte	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
2072.byte	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
2073.byte	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
2074.byte	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
2075.byte	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
2076.byte	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
2077.byte	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
2078.byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
2079.byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
2080.byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
2081
2082AES_Te4:
2083.byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5	# Te4
2084.byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
2085.byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
2086.byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
2087.byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
2088.byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
2089.byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
2090.byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
2091.byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
2092.byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
2093.byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
2094.byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
2095.byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
2096.byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
2097.byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
2098.byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
2099.byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
2100.byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
2101.byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
2102.byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
2103.byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
2104.byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
2105.byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
2106.byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
2107.byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
2108.byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
2109.byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
2110.byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
2111.byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
2112.byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
2113.byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
2114.byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
2115
2116.byte	0x01,0x00,0x00,0x00,	0x02,0x00,0x00,0x00	# rcon
2117.byte	0x04,0x00,0x00,0x00,	0x08,0x00,0x00,0x00
2118.byte	0x10,0x00,0x00,0x00,	0x20,0x00,0x00,0x00
2119.byte	0x40,0x00,0x00,0x00,	0x80,0x00,0x00,0x00
2120.byte	0x1B,0x00,0x00,0x00,	0x36,0x00,0x00,0x00
2121___
2122
2123foreach (split("\n",$code)) {
2124	s/\`([^\`]*)\`/eval $1/ge;
2125
2126	# made-up _instructions, _xtr, _ins, _ror and _bias, cope
2127	# with byte order dependencies...
2128	if (/^\s+_/) {
2129	    s/(_[a-z]+\s+)(\$[0-9]+),([^,]+)(#.*)*$/$1$2,$2,$3/;
2130
2131	    s/_xtr\s+(\$[0-9]+),(\$[0-9]+),([0-9]+(\-2)*)/
2132		sprintf("srl\t$1,$2,%d",$big_endian ?	eval($3)
2133					:		eval("24-$3"))/e or
2134	    s/_ins\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
2135		sprintf("sll\t$1,$2,%d",$big_endian ?	eval($3)
2136					:		eval("24-$3"))/e or
2137	    s/_ins2\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
2138		sprintf("ins\t$1,$2,%d,8",$big_endian ?	eval($3)
2139					:		eval("24-$3"))/e or
2140	    s/_ror\s+(\$[0-9]+),(\$[0-9]+),(\-?[0-9]+)/
2141		sprintf("srl\t$1,$2,%d",$big_endian ?	eval($3)
2142					:		eval("$3*-1"))/e or
2143	    s/_bias\s+(\$[0-9]+),(\$[0-9]+),([0-9]+)/
2144		sprintf("sll\t$1,$2,%d",$big_endian ?	eval($3)
2145					:		eval("($3-16)&31"))/e;
2146
2147	    s/srl\s+(\$[0-9]+),(\$[0-9]+),\-([0-9]+)/
2148		sprintf("sll\t$1,$2,$3")/e				or
2149	    s/srl\s+(\$[0-9]+),(\$[0-9]+),0/
2150		sprintf("and\t$1,$2,0xff")/e				or
2151	    s/(sll\s+\$[0-9]+,\$[0-9]+,0)/#$1/;
2152	}
2153
2154	# convert lwl/lwr and swr/swl to little-endian order
2155	if (!$big_endian && /^\s+[sl]w[lr]\s+/) {
2156	    s/([sl]wl.*)([0-9]+)\((\$[0-9]+)\)/
2157		sprintf("$1%d($3)",eval("$2-$2%4+($2%4-1)&3"))/e	or
2158	    s/([sl]wr.*)([0-9]+)\((\$[0-9]+)\)/
2159		sprintf("$1%d($3)",eval("$2-$2%4+($2%4+1)&3"))/e;
2160	}
2161
2162	if (!$big_endian) {
2163	    s/(rotr\s+\$[0-9]+,\$[0-9]+),([0-9]+)/sprintf("$1,%d",32-$2)/e;
2164	    s/(ext\s+\$[0-9]+,\$[0-9]+),([0-9]+),8/sprintf("$1,%d,8",24-$2)/e;
2165	}
2166
2167	print $_,"\n";
2168}
2169
2170close STDOUT or die "error closing STDOUT: $!";
2171