1/*********************************************************************/
2/*                                                                   */
3/*             Optimized BLAS libraries                              */
4/*                     By Kazushige Goto <kgoto@tacc.utexas.edu>     */
5/*                                                                   */
6/* Copyright (c) The University of Texas, 2009. All rights reserved. */
7/* UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING  */
8/* THIS SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF      */
9/* MERCHANTABILITY, FITNESS FOR ANY PARTICULAR PURPOSE,              */
10/* NON-INFRINGEMENT AND WARRANTIES OF PERFORMANCE, AND ANY WARRANTY  */
11/* THAT MIGHT OTHERWISE ARISE FROM COURSE OF DEALING OR USAGE OF     */
12/* TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH RESPECT TO   */
13/* THE USE OF THE SOFTWARE OR DOCUMENTATION.                         */
14/* Under no circumstances shall University be liable for incidental, */
15/* special, indirect, direct or consequential damages or loss of     */
16/* profits, interruption of business, or related expenses which may  */
17/* arise from use of Software or Documentation, including but not    */
18/* limited to those resulting from defects in Software and/or        */
19/* Documentation, or loss or inaccuracy of data of any kind.         */
20/*********************************************************************/
21
22#define ASSEMBLER
23#include "common.h"
24
25#define M	$4
26#define	N	$5
27#define	K	$6
28#define A	$8
29#define B	$9
30#define C	$10
31#define LDC	$11
32
33#define AO	$12
34#define BO	$13
35
36#define I	$2
37#define J	$3
38#define L	$7
39
40#define CO1	$14
41#define CO2	$15
42#define CO3	$16
43#define CO4	$17
44#define CO5	$18
45#define CO6	$19
46#define CO7	$20
47#define CO8	$21
48
49#define OFFSET	$22
50#define KK	$23
51#define TEMP	$24
52#define AORIG	$25
53
54#define a1	$f0
55#define a2	$f1
56#define a3	$f27
57#define a4	$f28
58
59#define b1	$f2
60#define b2	$f3
61#define b3	$f4
62#define b4	$f5
63#define b5	$f6
64#define b6	$f7
65#define b7	$f8
66#define b8	$f9
67
68#define a5	b8
69
70#define c11	$f10
71#define c12	$f11
72#define c21	$f12
73#define c22	$f13
74#define c31	$f14
75#define c32	$f16
76#define c41	$f17
77#define c42	$f18
78#define c51	$f19
79#define c52	$f20
80#define c61	$f21
81#define c62	$f22
82#define c71	$f23
83#define c72	$f24
84#define c81	$f25
85#define c82	$f26
86
87#define ALPHA	$f15
88
89	PROLOGUE
90
91	daddiu	$sp, $sp, -144
92
93	SDARG	$16,   0($sp)
94	SDARG	$17,   8($sp)
95	SDARG	$18,  16($sp)
96	SDARG	$19,  24($sp)
97	SDARG	$20,  32($sp)
98	SDARG	$21,  40($sp)
99	sdc1	$f24, 48($sp)
100	sdc1	$f25, 56($sp)
101	sdc1	$f26, 64($sp)
102	sdc1	$f27, 72($sp)
103	sdc1	$f28, 80($sp)
104
105	SDARG	$22,  88($sp)
106	SDARG	$23,  96($sp)
107	SDARG	$24, 104($sp)
108	SDARG	$25, 112($sp)
109
110#ifndef __64BIT__
111	sdc1	$f20,112($sp)
112	sdc1	$f21,120($sp)
113	sdc1	$f22,128($sp)
114	sdc1	$f23,136($sp)
115#endif
116
117	LDARG	OFFSET, 144($sp)
118
119	dsll	LDC, LDC, BASE_SHIFT
120
121#ifdef LN
122	mult	M, K
123	mflo	TEMP
124
125	dsll	TEMP, TEMP, BASE_SHIFT
126	daddu	A, A, TEMP
127
128	dsll	TEMP, M, BASE_SHIFT
129	daddu	C, C, TEMP
130#endif
131
132#ifdef RN
133	neg	KK, OFFSET
134#endif
135
136#ifdef RT
137	mult	N, K
138	mflo	TEMP
139
140	dsll	TEMP, TEMP, BASE_SHIFT
141	daddu	B, B, TEMP
142
143	mult	N, LDC
144	mflo	TEMP
145	daddu	C, C, TEMP
146
147	dsubu	KK, N, OFFSET
148#endif
149
150	andi	J,  N, 1
151	blez	J, .L30
152	NOP
153
154#ifdef RT
155	dsll	TEMP, K, BASE_SHIFT
156	dsubu	B, B, TEMP
157
158	dsubu	C, C, LDC
159#endif
160
161	move	AO, A
162	move	CO1, C
163
164#ifdef LN
165	daddu	KK, M, OFFSET
166#endif
167
168#ifdef LT
169	move	KK, OFFSET
170#endif
171
172#if defined(LN) || defined(RT)
173	move	AORIG, A
174#else
175	move	AO, A
176#endif
177#ifndef RT
178	daddu	C,  CO1, LDC
179#endif
180
181	dsra	I,  M, 1
182	blez	I, .L80
183	NOP
184
185.L71:
186#if defined(LT) || defined(RN)
187	LD	a1,  0 * SIZE(AO)
188	MTC	$0,  c11
189	LD	a2,  1 * SIZE(AO)
190	MOV	c21, c11
191	LD	a5,  4 * SIZE(AO)
192
193	LD	b1,  0 * SIZE(B)
194	MOV	c12, c11
195	LD	b2,  1 * SIZE(B)
196	MOV	c22, c11
197	LD	b3,  2 * SIZE(B)
198	LD	b5,  4 * SIZE(B)
199	dsra	L,  KK, 2
200	LD	b6,  8 * SIZE(B)
201	LD	b7, 12 * SIZE(B)
202
203	blez	L, .L75
204	move	BO,  B
205#else
206#ifdef LN
207	dsll	TEMP,   K,  1 + BASE_SHIFT
208	dsubu	AORIG, AORIG, TEMP
209#endif
210
211	dsll	L,    KK, 1 + BASE_SHIFT
212	dsll	TEMP, KK, 0 + BASE_SHIFT
213
214	daddu	AO, AORIG, L
215	daddu	BO, B,     TEMP
216
217	dsubu	TEMP, K, KK
218
219	LD	a1,  0 * SIZE(AO)
220	MTC	$0,  c11
221	LD	a2,  1 * SIZE(AO)
222	MOV	c21, c11
223	LD	a5,  4 * SIZE(AO)
224
225	LD	b1,  0 * SIZE(BO)
226	MOV	c12, c11
227	LD	b2,  1 * SIZE(BO)
228	MOV	c22, c11
229	LD	b3,  2 * SIZE(BO)
230	LD	b5,  4 * SIZE(BO)
231	dsra	L,  TEMP, 2
232	LD	b6,  8 * SIZE(BO)
233	LD	b7, 12 * SIZE(BO)
234
235	blez	L, .L75
236	NOP
237#endif
238	.align	3
239
240.L72:
241	LD	a1,  0 * SIZE(AO)
242	LD	a2,  1 * SIZE(AO)
243	LD	b1,  0 * SIZE(BO)
244
245	MADD	c11, c11, a1, b1
246	MADD	c12, c12, a2, b1
247
248	LD	a1,  2 * SIZE(AO)
249	LD	a2,  3 * SIZE(AO)
250	LD	b1,  1 * SIZE(BO)
251
252	MADD	c11, c11, a1, b1
253	MADD	c12, c12, a2, b1
254
255	LD	a1,  4 * SIZE(AO)
256	LD	a2,  5 * SIZE(AO)
257	LD	b1,  2 * SIZE(BO)
258
259	MADD	c11, c11, a1, b1
260	MADD	c12, c12, a2, b1
261
262	LD	a1,  6 * SIZE(AO)
263	LD	a2,  7 * SIZE(AO)
264	LD	b1,  3 * SIZE(BO)
265
266	MADD	c11, c11, a1, b1
267	MADD	c12, c12, a2, b1
268
269	daddiu	L, L, -1
270	daddiu	AO, AO,  8 * SIZE
271	bgtz	L, .L72
272	daddiu	BO, BO,  4 * SIZE
273	.align 3
274
275.L75:
276#if defined(LT) || defined(RN)
277	andi	L, KK,  3
278#else
279	andi	L, TEMP, 3
280#endif
281	NOP
282	blez	L, .L78
283	NOP
284	.align	3
285
286.L76:
287	LD	a1,  0 * SIZE(AO)
288	LD	a2,  1 * SIZE(AO)
289	LD	b1,  0 * SIZE(BO)
290
291	MADD	c11, c11, a1, b1
292	MADD	c12, c12, a2, b1
293
294	daddiu	L, L, -1
295	daddiu	AO, AO,  2 * SIZE
296	bgtz	L, .L76
297	daddiu	BO, BO,  1 * SIZE
298
299.L78:
300	ADD	c11, c11, c21
301	ADD	c12, c12, c22
302
303#if defined(LN) || defined(RT)
304#ifdef LN
305	daddiu	TEMP, KK, -2
306#else
307	daddiu	TEMP, KK, -1
308#endif
309
310	dsll	L,    TEMP, 1 + BASE_SHIFT
311	dsll	TEMP, TEMP, 0 + BASE_SHIFT
312	daddu	AO, AORIG, L
313	daddu	BO, B,     TEMP
314#endif
315
316
317#if defined(LN) || defined(LT)
318	LD	b1,  0 * SIZE(BO)
319	LD	b2,  1 * SIZE(BO)
320
321	SUB	c11, b1, c11
322	SUB	c12, b2, c12
323#else
324	LD	b1,  0 * SIZE(AO)
325	LD	b2,  1 * SIZE(AO)
326
327	SUB	c11, b1, c11
328	SUB	c12, b2, c12
329#endif
330
331#ifdef LN
332	LD	b1,  3 * SIZE(AO)
333	LD	b2,  2 * SIZE(AO)
334	LD	b3,  0 * SIZE(AO)
335
336	MUL	c12, b1, c12
337	NMSUB	c11, c11, b2, c12
338	MUL	c11, b3, c11
339#endif
340
341#ifdef LT
342	LD	b1,  0 * SIZE(AO)
343	LD	b2,  1 * SIZE(AO)
344	LD	b3,  3 * SIZE(AO)
345
346	MUL	c11, b1, c11
347	NMSUB	c12, c12, b2, c11
348	MUL	c12, b3, c12
349#endif
350
351#if defined(RN) || defined(RT)
352	LD	b1,  0 * SIZE(BO)
353
354	MUL	c11, b1, c11
355	MUL	c12, b1, c12
356#endif
357
358#ifdef LN
359	daddiu	CO1, CO1, -2 * SIZE
360#endif
361
362#if defined(LN) || defined(LT)
363	ST	c11,  0 * SIZE(BO)
364	ST	c12,  1 * SIZE(BO)
365#else
366	ST	c11,  0 * SIZE(AO)
367	ST	c12,  1 * SIZE(AO)
368#endif
369
370	ST	c11,  0 * SIZE(CO1)
371	ST	c12,  1 * SIZE(CO1)
372
373#ifndef LN
374	daddiu	CO1, CO1, 2 * SIZE
375#endif
376
377#ifdef RT
378	dsll	TEMP, K, 1 + BASE_SHIFT
379	daddu	AORIG, AORIG, TEMP
380#endif
381
382#if defined(LT) || defined(RN)
383	dsubu	TEMP, K, KK
384	dsll	L,    TEMP, 1 + BASE_SHIFT
385	dsll	TEMP, TEMP, 0 + BASE_SHIFT
386	daddu	AO, AO, L
387	daddu	BO, BO, TEMP
388#endif
389
390#ifdef LT
391	daddiu	KK, KK, 2
392#endif
393
394#ifdef LN
395	daddiu	KK, KK, -2
396#endif
397
398	daddiu	I, I, -1
399
400	bgtz	I, .L71
401	NOP
402	.align 3
403
404.L80:
405	andi	I,  M, 1
406	blez	I, .L89
407	NOP
408
409#if defined(LT) || defined(RN)
410	LD	a1,  0 * SIZE(AO)
411	MTC	$0,  c11
412	LD	a2,  1 * SIZE(AO)
413	LD	a3,  2 * SIZE(AO)
414	LD	a4,  3 * SIZE(AO)
415
416	LD	b1,  0 * SIZE(B)
417	LD	b2,  1 * SIZE(B)
418	MOV	c21, c11
419	LD	b3,  2 * SIZE(B)
420	LD	b4,  3 * SIZE(B)
421	LD	b5,  4 * SIZE(B)
422	LD	b6,  8 * SIZE(B)
423	LD	b7, 12 * SIZE(B)
424
425	dsra	L,  KK, 2
426	blez	L, .L85
427	move	BO,  B
428#else
429#ifdef LN
430	dsll	TEMP,   K,  BASE_SHIFT
431	dsubu	AORIG, AORIG, TEMP
432#endif
433
434	dsll	TEMP, KK, BASE_SHIFT
435
436	daddu	AO, AORIG, TEMP
437	daddu	BO, B,     TEMP
438
439	dsubu	TEMP, K, KK
440
441	LD	a1,  0 * SIZE(AO)
442	MTC	$0,  c11
443	LD	a2,  1 * SIZE(AO)
444	LD	a3,  2 * SIZE(AO)
445	LD	a4,  3 * SIZE(AO)
446
447	LD	b1,  0 * SIZE(BO)
448	LD	b2,  1 * SIZE(BO)
449	LD	b3,  2 * SIZE(BO)
450	LD	b4,  3 * SIZE(BO)
451	MOV	c21, c11
452	LD	b5,  4 * SIZE(BO)
453	LD	b6,  8 * SIZE(BO)
454	LD	b7, 12 * SIZE(BO)
455
456	dsra	L,  TEMP, 2
457	blez	L, .L85
458	NOP
459#endif
460	.align	3
461
462.L82:
463	LD	a1,  0 * SIZE(AO)
464	LD	b1,  0 * SIZE(BO)
465
466	MADD	c11, c11, a1, b1
467
468	LD	a1,  1 * SIZE(AO)
469	LD	b1,  1 * SIZE(BO)
470
471	MADD	c21, c21, a1, b1
472
473	LD	a1,  2 * SIZE(AO)
474	LD	b1,  2 * SIZE(BO)
475
476	MADD	c11, c11, a1, b1
477
478	LD	a1,  3 * SIZE(AO)
479	LD	b1,  3 * SIZE(BO)
480
481	MADD	c21, c21, a1, b1
482
483	daddiu	L, L, -1
484	daddiu	AO, AO,  4 * SIZE
485	bgtz	L, .L82
486	daddiu	BO, BO,  4 * SIZE
487	.align 3
488
489.L85:
490#if defined(LT) || defined(RN)
491	andi	L, KK,  3
492#else
493	andi	L, TEMP, 3
494#endif
495	NOP
496	blez	L, .L88
497	NOP
498	.align	3
499
500.L86:
501	LD	a1,  0 * SIZE(AO)
502	LD	b1,  0 * SIZE(BO)
503
504	MADD	c11, c11, a1, b1
505
506	daddiu	L, L, -1
507	daddiu	AO, AO,  1 * SIZE
508	bgtz	L, .L86
509	daddiu	BO, BO,  1 * SIZE
510
511
512.L88:
513	ADD	c11, c11, c21
514
515#if defined(LN) || defined(RT)
516#ifdef LN
517	daddiu	TEMP, KK, -1
518#else
519	daddiu	TEMP, KK, -1
520#endif
521
522	dsll	TEMP, TEMP, 0 + BASE_SHIFT
523	daddu	AO, AORIG, TEMP
524	daddu	BO, B,     TEMP
525#endif
526
527
528#if defined(LN) || defined(LT)
529	LD	b1,  0 * SIZE(BO)
530
531	SUB	c11, b1, c11
532#else
533	LD	b1,  0 * SIZE(AO)
534
535	SUB	c11, b1, c11
536#endif
537
538#if defined(LN) || defined(LT)
539	LD	b1,  0 * SIZE(AO)
540
541	MUL	c11, b1, c11
542#endif
543
544#if defined(RN) || defined(RT)
545	LD	b1,  0 * SIZE(BO)
546
547	MUL	c11, b1, c11
548#endif
549
550#ifdef LN
551	daddiu	CO1, CO1, -1 * SIZE
552#endif
553
554#if defined(LN) || defined(LT)
555	ST	c11,  0 * SIZE(BO)
556#else
557	ST	c11,  0 * SIZE(AO)
558#endif
559
560	ST	c11,  0 * SIZE(CO1)
561
562#ifndef LN
563	daddiu	CO1, CO1, 1 * SIZE
564#endif
565
566#ifdef RT
567	dsll	TEMP, K, BASE_SHIFT
568	daddu	AORIG, AORIG, TEMP
569#endif
570
571#if defined(LT) || defined(RN)
572	dsubu	TEMP, K, KK
573	dsll	TEMP, TEMP, 0 + BASE_SHIFT
574	daddu	AO, AO, TEMP
575	daddu	BO, BO, TEMP
576#endif
577
578#ifdef LT
579	daddiu	KK, KK, 1
580#endif
581
582#ifdef LN
583	daddiu	KK, KK, -1
584#endif
585	.align 3
586
587.L89:
588#ifdef LN
589	dsll	TEMP, K, BASE_SHIFT
590	daddu	B, B, TEMP
591#endif
592
593#if defined(LT) || defined(RN)
594	move	B,  BO
595#endif
596
597#ifdef RN
598	daddiu	KK, KK,  1
599#endif
600
601#ifdef RT
602	daddiu	KK, KK, -1
603#endif
604	.align 3
605
606.L30:
607	andi	J,  N, 2
608	blez	J, .L50
609	NOP
610
611#ifdef RT
612	dsll	TEMP, K, 1 + BASE_SHIFT
613	dsubu	B, B, TEMP
614
615	dsll	TEMP, LDC, 1
616	dsubu	C, C, TEMP
617#endif
618
619	move	AO, A
620	move	CO1, C
621	daddu	CO2, C,   LDC
622
623#ifdef LN
624	daddu	KK, M, OFFSET
625#endif
626
627#ifdef LT
628	move	KK, OFFSET
629#endif
630
631#if defined(LN) || defined(RT)
632	move	AORIG, A
633#else
634	move	AO, A
635#endif
636#ifndef RT
637	daddu	C,  CO2, LDC
638#endif
639
640	dsra	I,  M, 1
641	blez	I, .L60
642	NOP
643
644.L51:
645#if defined(LT) || defined(RN)
646	LD	a1,  0 * SIZE(AO)
647	MTC	$0,  c11
648	LD	a2,  1 * SIZE(AO)
649	MOV	c21, c11
650	LD	a5,  4 * SIZE(AO)
651
652	LD	b1,  0 * SIZE(B)
653	MOV	c12, c11
654	LD	b2,  1 * SIZE(B)
655	MOV	c22, c11
656	LD	b3,  2 * SIZE(B)
657	LD	b5,  4 * SIZE(B)
658	dsra	L,  KK, 2
659	LD	b6,  8 * SIZE(B)
660	LD	b7, 12 * SIZE(B)
661
662	blez	L, .L55
663	move	BO,  B
664
665#else
666#ifdef LN
667	dsll	TEMP,   K,  1 + BASE_SHIFT
668	dsubu	AORIG, AORIG, TEMP
669#endif
670
671	dsll	L,    KK, 1 + BASE_SHIFT
672	dsll	TEMP, KK, 1 + BASE_SHIFT
673
674	daddu	AO, AORIG, L
675	daddu	BO, B,     TEMP
676
677	dsubu	TEMP, K, KK
678
679	LD	a1,  0 * SIZE(AO)
680	MTC	$0,  c11
681	LD	a2,  1 * SIZE(AO)
682	MOV	c21, c11
683	LD	a5,  4 * SIZE(AO)
684
685	LD	b1,  0 * SIZE(BO)
686	MOV	c12, c11
687	LD	b2,  1 * SIZE(BO)
688	MOV	c22, c11
689	LD	b3,  2 * SIZE(BO)
690	LD	b5,  4 * SIZE(BO)
691	dsra	L,  TEMP, 2
692	LD	b6,  8 * SIZE(BO)
693	LD	b7, 12 * SIZE(BO)
694
695	blez	L, .L55
696	NOP
697#endif
698	.align	3
699
700.L52:
701	MADD	c11, c11, a1, b1
702	LD	a3,  2 * SIZE(AO)
703	MADD	c21, c21, a1, b2
704	LD	b4,  3 * SIZE(BO)
705	MADD	c12, c12, a2, b1
706	LD	a4,  3 * SIZE(AO)
707	MADD	c22, c22, a2, b2
708	LD	b1,  8 * SIZE(BO)
709
710	MADD	c11, c11, a3, b3
711	LD	a1,  8 * SIZE(AO)
712	MADD	c21, c21, a3, b4
713	LD	b2,  5 * SIZE(BO)
714	MADD	c12, c12, a4, b3
715	LD	a2,  5 * SIZE(AO)
716	MADD	c22, c22, a4, b4
717	LD	b3,  6 * SIZE(BO)
718
719	MADD	c11, c11, a5, b5
720	LD	a3,  6 * SIZE(AO)
721	MADD	c21, c21, a5, b2
722	LD	b4,  7 * SIZE(BO)
723	MADD	c12, c12, a2, b5
724	LD	a4,  7 * SIZE(AO)
725	MADD	c22, c22, a2, b2
726	LD	b5, 12 * SIZE(BO)
727
728	MADD	c11, c11, a3, b3
729	LD	a5, 12 * SIZE(AO)
730	MADD	c21, c21, a3, b4
731	LD	b2,  9 * SIZE(BO)
732	MADD	c12, c12, a4, b3
733	LD	a2,  9 * SIZE(AO)
734	MADD	c22, c22, a4, b4
735	LD	b3, 10 * SIZE(BO)
736
737	daddiu	AO, AO,  8 * SIZE
738	daddiu	L, L, -1
739	bgtz	L, .L52
740	daddiu	BO, BO,  8 * SIZE
741	.align 3
742
743.L55:
744#if defined(LT) || defined(RN)
745	andi	L, KK,  3
746#else
747	andi	L, TEMP, 3
748#endif
749	NOP
750	blez	L, .L58
751	NOP
752	.align	3
753
754.L56:
755	MADD	c11, c11, a1, b1
756	LD	a2,  1 * SIZE(AO)
757	MADD	c21, c21, a1, b2
758	LD	a1,  2 * SIZE(AO)
759
760	MADD	c12, c12, a2, b1
761	LD	b1,  2 * SIZE(BO)
762	MADD	c22, c22, a2, b2
763	LD	b2,  3 * SIZE(BO)
764
765	daddiu	L, L, -1
766	daddiu	AO, AO,  2 * SIZE
767	bgtz	L, .L56
768	daddiu	BO, BO,  2 * SIZE
769
770.L58:
771#if defined(LN) || defined(RT)
772#ifdef LN
773	daddiu	TEMP, KK, -2
774#else
775	daddiu	TEMP, KK, -2
776#endif
777
778	dsll	L,    TEMP, 1 + BASE_SHIFT
779	dsll	TEMP, TEMP, 1 + BASE_SHIFT
780	daddu	AO, AORIG, L
781	daddu	BO, B,     TEMP
782#endif
783
784
785#if defined(LN) || defined(LT)
786	LD	b1,  0 * SIZE(BO)
787	LD	b2,  1 * SIZE(BO)
788	LD	b3,  2 * SIZE(BO)
789	LD	b4,  3 * SIZE(BO)
790
791	SUB	c11, b1, c11
792	SUB	c21, b2, c21
793	SUB	c12, b3, c12
794	SUB	c22, b4, c22
795#else
796	LD	b1,  0 * SIZE(AO)
797	LD	b2,  1 * SIZE(AO)
798	LD	b3,  2 * SIZE(AO)
799	LD	b4,  3 * SIZE(AO)
800
801	SUB	c11, b1, c11
802	SUB	c12, b2, c12
803	SUB	c21, b3, c21
804	SUB	c22, b4, c22
805#endif
806
807#ifdef LN
808	LD	b1,  3 * SIZE(AO)
809	LD	b2,  2 * SIZE(AO)
810	LD	b3,  0 * SIZE(AO)
811
812	MUL	c12, b1, c12
813	MUL	c22, b1, c22
814
815	NMSUB	c11, c11, b2, c12
816	NMSUB	c21, c21, b2, c22
817
818	MUL	c11, b3, c11
819	MUL	c21, b3, c21
820#endif
821
822#ifdef LT
823	LD	b1,  0 * SIZE(AO)
824	LD	b2,  1 * SIZE(AO)
825	LD	b3,  3 * SIZE(AO)
826
827	MUL	c11, b1, c11
828	MUL	c21, b1, c21
829
830	NMSUB	c12, c12, b2, c11
831	NMSUB	c22, c22, b2, c21
832
833	MUL	c12, b3, c12
834	MUL	c22, b3, c22
835#endif
836
837#ifdef RN
838	LD	b1,  0 * SIZE(BO)
839	LD	b2,  1 * SIZE(BO)
840	LD	b3,  3 * SIZE(BO)
841
842	MUL	c11, b1, c11
843	MUL	c12, b1, c12
844
845	NMSUB	c21, c21, b2, c11
846	NMSUB	c22, c22, b2, c12
847
848	MUL	c21, b3, c21
849	MUL	c22, b3, c22
850#endif
851
852#ifdef RT
853	LD	b1,  3 * SIZE(BO)
854	LD	b2,  2 * SIZE(BO)
855	LD	b3,  0 * SIZE(BO)
856
857	MUL	c21, b1, c21
858	MUL	c22, b1, c22
859
860	NMSUB	c11, c11, b2, c21
861	NMSUB	c12, c12, b2, c22
862
863	MUL	c11, b3, c11
864	MUL	c12, b3, c12
865#endif
866
867#ifdef LN
868	daddiu	CO1, CO1, -2 * SIZE
869	daddiu	CO2, CO2, -2 * SIZE
870#endif
871
872#if defined(LN) || defined(LT)
873	ST	c11,  0 * SIZE(BO)
874	ST	c21,  1 * SIZE(BO)
875	ST	c12,  2 * SIZE(BO)
876	ST	c22,  3 * SIZE(BO)
877#else
878	ST	c11,  0 * SIZE(AO)
879	ST	c12,  1 * SIZE(AO)
880	ST	c21,  2 * SIZE(AO)
881	ST	c22,  3 * SIZE(AO)
882#endif
883
884	ST	c11,  0 * SIZE(CO1)
885	ST	c12,  1 * SIZE(CO1)
886	ST	c21,  0 * SIZE(CO2)
887	ST	c22,  1 * SIZE(CO2)
888
889#ifndef LN
890	daddiu	CO1, CO1, 2 * SIZE
891	daddiu	CO2, CO2, 2 * SIZE
892#endif
893
894#ifdef RT
895	dsll	TEMP, K, 1 + BASE_SHIFT
896	daddu	AORIG, AORIG, TEMP
897#endif
898
899#if defined(LT) || defined(RN)
900	dsubu	TEMP, K, KK
901	dsll	TEMP, TEMP, 1 + BASE_SHIFT
902	daddu	AO, AO, TEMP
903	daddu	BO, BO, TEMP
904#endif
905
906#ifdef LT
907	daddiu	KK, KK, 2
908#endif
909
910#ifdef LN
911	daddiu	KK, KK, -2
912#endif
913
914	MTC	$0,  a1
915
916	MOV	c11, a1
917	MOV	c21, a1
918	MOV	c31, a1
919
920	daddiu	I, I, -1
921
922	bgtz	I, .L51
923	MOV	c41, c11
924	.align 3
925
926.L60:
927	andi	I,  M, 1
928	blez	I, .L69
929	NOP
930
931#if defined(LT) || defined(RN)
932	dsra	L,  KK, 2
933	LD	a1,  0 * SIZE(AO)
934	MTC	$0,  c11
935	LD	a2,  1 * SIZE(AO)
936	MOV	c21, c11
937	LD	a3,  2 * SIZE(AO)
938	MOV	c31, c11
939	LD	a4,  3 * SIZE(AO)
940	MOV	c41, c11
941
942	LD	b1,  0 * SIZE(B)
943	LD	b2,  1 * SIZE(B)
944	LD	b3,  2 * SIZE(B)
945	LD	b4,  3 * SIZE(B)
946	LD	b5,  4 * SIZE(B)
947	LD	b6,  8 * SIZE(B)
948	LD	b7, 12 * SIZE(B)
949
950	blez	L, .L65
951	move	BO,  B
952#else
953#ifdef LN
954	dsll	TEMP,   K,  BASE_SHIFT
955	dsubu	AORIG, AORIG, TEMP
956#endif
957
958	dsll	L,    KK, 0 + BASE_SHIFT
959	dsll	TEMP, KK, 1 + BASE_SHIFT
960
961	daddu	AO, AORIG, L
962	daddu	BO, B,     TEMP
963
964	dsubu	TEMP, K, KK
965
966	dsra	L,  TEMP, 2
967	LD	a1,  0 * SIZE(AO)
968	MTC	$0,  c11
969	LD	a2,  1 * SIZE(AO)
970	MOV	c21, c11
971	LD	a3,  2 * SIZE(AO)
972	MOV	c31, c11
973	LD	a4,  3 * SIZE(AO)
974	MOV	c41, c11
975
976	LD	b1,  0 * SIZE(BO)
977	LD	b2,  1 * SIZE(BO)
978	LD	b3,  2 * SIZE(BO)
979	LD	b4,  3 * SIZE(BO)
980	LD	b5,  4 * SIZE(BO)
981	LD	b6,  8 * SIZE(BO)
982	LD	b7, 12 * SIZE(BO)
983
984	blez	L, .L65
985	NOP
986#endif
987	.align	3
988
989.L62:
990	MADD	c11, c11, a1, b1
991	LD	b1,  4 * SIZE(BO)
992	MADD	c21, c21, a1, b2
993	LD	b2,  5 * SIZE(BO)
994	MADD	c31, c31, a2, b3
995	LD	b3,  6 * SIZE(BO)
996	MADD	c41, c41, a2, b4
997	LD	b4,  7 * SIZE(BO)
998
999	LD	a1,  4 * SIZE(AO)
1000	LD	a2,  5 * SIZE(AO)
1001
1002	MADD	c11, c11, a3, b1
1003	LD	b1,  8 * SIZE(BO)
1004	MADD	c21, c21, a3, b2
1005	LD	b2,  9 * SIZE(BO)
1006	MADD	c31, c31, a4, b3
1007	LD	b3, 10 * SIZE(BO)
1008	MADD	c41, c41, a4, b4
1009	LD	b4, 11 * SIZE(BO)
1010
1011	LD	a3,  6 * SIZE(AO)
1012	LD	a4,  7 * SIZE(AO)
1013
1014	daddiu	L, L, -1
1015	daddiu	AO, AO,  4 * SIZE
1016
1017	bgtz	L, .L62
1018	daddiu	BO, BO,  8 * SIZE
1019	.align 3
1020
1021.L65:
1022#if defined(LT) || defined(RN)
1023	andi	L, KK,  3
1024#else
1025	andi	L, TEMP, 3
1026#endif
1027	NOP
1028	blez	L, .L68
1029	NOP
1030	.align	3
1031
1032.L66:
1033	MADD	c11, c11, a1, b1
1034	LD	b1,  2 * SIZE(BO)
1035	MADD	c21, c21, a1, b2
1036	LD	b2,  3 * SIZE(BO)
1037
1038	LD	a1,  1 * SIZE(AO)
1039	daddiu	L, L, -1
1040
1041	daddiu	AO, AO,  1 * SIZE
1042	bgtz	L, .L66
1043	daddiu	BO, BO,  2 * SIZE
1044
1045
1046.L68:
1047	ADD	c11, c11, c31
1048	ADD	c21, c21, c41
1049
1050#if defined(LN) || defined(RT)
1051#ifdef LN
1052	daddiu	TEMP, KK, -1
1053#else
1054	daddiu	TEMP, KK, -2
1055#endif
1056
1057	dsll	L,    TEMP, 0 + BASE_SHIFT
1058	dsll	TEMP, TEMP, 1 + BASE_SHIFT
1059	daddu	AO, AORIG, L
1060	daddu	BO, B,     TEMP
1061#endif
1062
1063
1064#if defined(LN) || defined(LT)
1065	LD	b1,  0 * SIZE(BO)
1066	LD	b2,  1 * SIZE(BO)
1067
1068	SUB	c11, b1, c11
1069	SUB	c21, b2, c21
1070#else
1071	LD	b1,  0 * SIZE(AO)
1072	LD	b2,  1 * SIZE(AO)
1073
1074	SUB	c11, b1, c11
1075	SUB	c21, b2, c21
1076#endif
1077
1078#if defined(LN) || defined(LT)
1079	LD	b3,  0 * SIZE(AO)
1080
1081	MUL	c11, b3, c11
1082	MUL	c21, b3, c21
1083#endif
1084
1085#ifdef RN
1086	LD	b1,  0 * SIZE(BO)
1087	LD	b2,  1 * SIZE(BO)
1088	LD	b3,  3 * SIZE(BO)
1089
1090	MUL	c11, b1, c11
1091
1092	NMSUB	c21, c21, b2, c11
1093
1094	MUL	c21, b3, c21
1095#endif
1096
1097#ifdef RT
1098	LD	b1,  3 * SIZE(BO)
1099	LD	b2,  2 * SIZE(BO)
1100	LD	b3,  0 * SIZE(BO)
1101
1102	MUL	c21, b1, c21
1103
1104	NMSUB	c11, c11, b2, c21
1105
1106	MUL	c11, b3, c11
1107#endif
1108
1109#ifdef LN
1110	daddiu	CO1, CO1, -1 * SIZE
1111	daddiu	CO2, CO2, -1 * SIZE
1112#endif
1113
1114#if defined(LN) || defined(LT)
1115	ST	c11,  0 * SIZE(BO)
1116	ST	c21,  1 * SIZE(BO)
1117#else
1118	ST	c11,  0 * SIZE(AO)
1119	ST	c21,  1 * SIZE(AO)
1120#endif
1121
1122	ST	c11,  0 * SIZE(CO1)
1123	ST	c21,  0 * SIZE(CO2)
1124
1125#ifndef LN
1126	daddiu	CO1, CO1, 1 * SIZE
1127	daddiu	CO2, CO2, 1 * SIZE
1128#endif
1129
1130#ifdef RT
1131	dsll	TEMP, K, 0 + BASE_SHIFT
1132	daddu	AORIG, AORIG, TEMP
1133#endif
1134
1135#if defined(LT) || defined(RN)
1136	dsubu	TEMP, K, KK
1137	dsll	L,    TEMP, 0 + BASE_SHIFT
1138	dsll	TEMP, TEMP, 1 + BASE_SHIFT
1139	daddu	AO, AO, L
1140	daddu	BO, BO, TEMP
1141#endif
1142
1143#ifdef LT
1144	daddiu	KK, KK, 1
1145#endif
1146
1147#ifdef LN
1148	daddiu	KK, KK, -1
1149#endif
1150	.align 3
1151
1152.L69:
1153#ifdef LN
1154	dsll	TEMP, K, 1 + BASE_SHIFT
1155	daddu	B, B, TEMP
1156#endif
1157
1158#if defined(LT) || defined(RN)
1159	move	B,  BO
1160#endif
1161
1162#ifdef RN
1163	daddiu	KK, KK,  2
1164#endif
1165
1166#ifdef RT
1167	daddiu	KK, KK, -2
1168#endif
1169	.align 3
1170
1171.L50:
1172	andi	J,  N, 4
1173	blez	J, .L70
1174	move	AO, A
1175
1176#ifdef RT
1177	dsll	TEMP, K, 2 + BASE_SHIFT
1178	dsubu	B, B, TEMP
1179
1180	dsll	TEMP, LDC, 2
1181	dsubu	C, C, TEMP
1182#endif
1183
1184	move	CO1, C
1185	MTC	$0,  c11
1186	daddu	CO2, C,   LDC
1187	daddu	CO3, CO2, LDC
1188	daddu	CO4, CO3, LDC
1189	MOV	c21, c11
1190	dsra	I,  M, 1
1191	MOV	c31, c11
1192
1193#ifdef LN
1194	daddu	KK, M, OFFSET
1195#endif
1196
1197#ifdef LT
1198	move	KK, OFFSET
1199#endif
1200
1201#if defined(LN) || defined(RT)
1202	move	AORIG, A
1203#else
1204	move	AO, A
1205#endif
1206#ifndef RT
1207	daddu	C,  CO4, LDC
1208#endif
1209
1210	blez	I, .L40
1211	MOV	c41, c11
1212
1213.L31:
1214#if defined(LT) || defined(RN)
1215	LD	a1,  0 * SIZE(AO)
1216	LD	a3,  4 * SIZE(AO)
1217
1218	LD	b1,  0 * SIZE(B)
1219	MOV	c12, c11
1220	LD	b2,  1 * SIZE(B)
1221	MOV	c22, c11
1222	LD	b3,  2 * SIZE(B)
1223	MOV	c32, c11
1224	LD	b4,  3 * SIZE(B)
1225	MOV	c42, c11
1226
1227	LD	b5,  4 * SIZE(B)
1228	dsra	L,  KK, 2
1229	LD	b6,  8 * SIZE(B)
1230	LD	b7, 12 * SIZE(B)
1231
1232	blez	L, .L35
1233	move	BO,  B
1234#else
1235#ifdef LN
1236	dsll	TEMP,   K,  1 + BASE_SHIFT
1237	dsubu	AORIG, AORIG, TEMP
1238#endif
1239
1240	dsll	L,    KK, 1 + BASE_SHIFT
1241	dsll	TEMP, KK, 2 + BASE_SHIFT
1242
1243	daddu	AO, AORIG, L
1244	daddu	BO, B,     TEMP
1245
1246	dsubu	TEMP, K, KK
1247
1248	LD	a1,  0 * SIZE(AO)
1249	LD	a3,  4 * SIZE(AO)
1250
1251	LD	b1,  0 * SIZE(BO)
1252	MOV	c12, c11
1253	LD	b2,  1 * SIZE(BO)
1254	MOV	c22, c11
1255	LD	b3,  2 * SIZE(BO)
1256	MOV	c32, c11
1257	LD	b4,  3 * SIZE(BO)
1258	MOV	c42, c11
1259
1260	LD	b5,  4 * SIZE(BO)
1261	dsra	L,  TEMP, 2
1262	LD	b6,  8 * SIZE(BO)
1263	LD	b7, 12 * SIZE(BO)
1264
1265	blez	L, .L35
1266	NOP
1267#endif
1268	.align	3
1269
1270.L32:
1271	MADD	c11, c11, a1, b1
1272	LD	a2,  1 * SIZE(AO)
1273	MADD	c21, c21, a1, b2
1274	daddiu	L, L, -1
1275	MADD	c31, c31, a1, b3
1276	NOP
1277	MADD	c41, c41, a1, b4
1278	LD	a1,  2 * SIZE(AO)
1279
1280	MADD	c12, c12, a2, b1
1281	LD	b1, 16 * SIZE(BO)
1282	MADD	c22, c22, a2, b2
1283	LD	b2,  5 * SIZE(BO)
1284	MADD	c32, c32, a2, b3
1285	LD	b3,  6 * SIZE(BO)
1286	MADD	c42, c42, a2, b4
1287	LD	b4,  7 * SIZE(BO)
1288
1289	MADD	c11, c11, a1, b5
1290	LD	a2,  3 * SIZE(AO)
1291	MADD	c21, c21, a1, b2
1292	NOP
1293	MADD	c31, c31, a1, b3
1294	NOP
1295	MADD	c41, c41, a1, b4
1296	LD	a1,  8 * SIZE(AO)
1297
1298	MADD	c12, c12, a2, b5
1299	LD	b5, 20 * SIZE(BO)
1300	MADD	c22, c22, a2, b2
1301	LD	b2,  9 * SIZE(BO)
1302	MADD	c32, c32, a2, b3
1303	LD	b3, 10 * SIZE(BO)
1304	MADD	c42, c42, a2, b4
1305	LD	b4, 11 * SIZE(BO)
1306
1307	MADD	c11, c11, a3, b6
1308	LD	a2,  5 * SIZE(AO)
1309	MADD	c21, c21, a3, b2
1310	NOP
1311	MADD	c31, c31, a3, b3
1312	NOP
1313	MADD	c41, c41, a3, b4
1314	LD	a3,  6 * SIZE(AO)
1315
1316	MADD	c12, c12, a2, b6
1317	LD	b6, 24 * SIZE(BO)
1318	MADD	c22, c22, a2, b2
1319	LD	b2, 13 * SIZE(BO)
1320	MADD	c32, c32, a2, b3
1321	LD	b3, 14 * SIZE(BO)
1322	MADD	c42, c42, a2, b4
1323	LD	b4, 15 * SIZE(BO)
1324
1325	MADD	c11, c11, a3, b7
1326	LD	a2,  7 * SIZE(AO)
1327	MADD	c21, c21, a3, b2
1328	daddiu	AO, AO,  8 * SIZE
1329	MADD	c31, c31, a3, b3
1330	daddiu	BO, BO, 16 * SIZE
1331	MADD	c41, c41, a3, b4
1332	LD	a3,  4 * SIZE(AO)
1333
1334	MADD	c12, c12, a2, b7
1335	LD	b7, 12 * SIZE(BO)
1336	MADD	c22, c22, a2, b2
1337	LD	b2,  1 * SIZE(BO)
1338	MADD	c32, c32, a2, b3
1339	LD	b3,  2 * SIZE(BO)
1340	MADD	c42, c42, a2, b4
1341	NOP
1342
1343	bgtz	L, .L32
1344	LD	b4,  3 * SIZE(BO)
1345	.align 3
1346
1347.L35:
1348#if defined(LT) || defined(RN)
1349	andi	L, KK,  3
1350#else
1351	andi	L, TEMP, 3
1352#endif
1353	NOP
1354	blez	L, .L38
1355	NOP
1356	.align	3
1357
1358.L36:
1359	MADD	c11, c11, a1, b1
1360	LD	a2,  1 * SIZE(AO)
1361	MADD	c21, c21, a1, b2
1362	daddiu	L, L, -1
1363	MADD	c31, c31, a1, b3
1364	daddiu	AO, AO,  2 * SIZE
1365	MADD	c41, c41, a1, b4
1366	LD	a1,  0 * SIZE(AO)
1367
1368	MADD	c12, c12, a2, b1
1369	LD	b1,  4 * SIZE(BO)
1370	MADD	c22, c22, a2, b2
1371	LD	b2,  5 * SIZE(BO)
1372	MADD	c32, c32, a2, b3
1373	LD	b3,  6 * SIZE(BO)
1374	MADD	c42, c42, a2, b4
1375	LD	b4,  7 * SIZE(BO)
1376
1377	bgtz	L, .L36
1378	daddiu	BO, BO,  4 * SIZE
1379
1380.L38:
1381#if defined(LN) || defined(RT)
1382#ifdef LN
1383	daddiu	TEMP, KK, -2
1384#else
1385	daddiu	TEMP, KK, -4
1386#endif
1387
1388	dsll	L,    TEMP, 1 + BASE_SHIFT
1389	dsll	TEMP, TEMP, 2 + BASE_SHIFT
1390	daddu	AO, AORIG, L
1391	daddu	BO, B,     TEMP
1392#endif
1393
1394
1395#if defined(LN) || defined(LT)
1396	LD	b1,  0 * SIZE(BO)
1397	LD	b2,  1 * SIZE(BO)
1398	LD	b3,  2 * SIZE(BO)
1399	LD	b4,  3 * SIZE(BO)
1400	LD	b5,  4 * SIZE(BO)
1401	LD	b6,  5 * SIZE(BO)
1402	LD	b7,  6 * SIZE(BO)
1403	LD	b8,  7 * SIZE(BO)
1404
1405	SUB	c11, b1, c11
1406	SUB	c21, b2, c21
1407	SUB	c31, b3, c31
1408	SUB	c41, b4, c41
1409	SUB	c12, b5, c12
1410	SUB	c22, b6, c22
1411	SUB	c32, b7, c32
1412	SUB	c42, b8, c42
1413#else
1414	LD	b1,  0 * SIZE(AO)
1415	LD	b2,  1 * SIZE(AO)
1416	LD	b3,  2 * SIZE(AO)
1417	LD	b4,  3 * SIZE(AO)
1418	LD	b5,  4 * SIZE(AO)
1419	LD	b6,  5 * SIZE(AO)
1420	LD	b7,  6 * SIZE(AO)
1421	LD	b8,  7 * SIZE(AO)
1422
1423	SUB	c11, b1, c11
1424	SUB	c12, b2, c12
1425	SUB	c21, b3, c21
1426	SUB	c22, b4, c22
1427	SUB	c31, b5, c31
1428	SUB	c32, b6, c32
1429	SUB	c41, b7, c41
1430	SUB	c42, b8, c42
1431#endif
1432
1433#ifdef LN
1434	LD	b1,  3 * SIZE(AO)
1435	LD	b2,  2 * SIZE(AO)
1436	LD	b3,  0 * SIZE(AO)
1437
1438	MUL	c12, b1, c12
1439	MUL	c22, b1, c22
1440	MUL	c32, b1, c32
1441	MUL	c42, b1, c42
1442
1443	NMSUB	c11, c11, b2, c12
1444	NMSUB	c21, c21, b2, c22
1445	NMSUB	c31, c31, b2, c32
1446	NMSUB	c41, c41, b2, c42
1447
1448	MUL	c11, b3, c11
1449	MUL	c21, b3, c21
1450	MUL	c31, b3, c31
1451	MUL	c41, b3, c41
1452#endif
1453
1454#ifdef LT
1455	LD	b1,  0 * SIZE(AO)
1456	LD	b2,  1 * SIZE(AO)
1457	LD	b3,  3 * SIZE(AO)
1458
1459	MUL	c11, b1, c11
1460	MUL	c21, b1, c21
1461	MUL	c31, b1, c31
1462	MUL	c41, b1, c41
1463
1464	NMSUB	c12, c12, b2, c11
1465	NMSUB	c22, c22, b2, c21
1466	NMSUB	c32, c32, b2, c31
1467	NMSUB	c42, c42, b2, c41
1468
1469	MUL	c12, b3, c12
1470	MUL	c22, b3, c22
1471	MUL	c32, b3, c32
1472	MUL	c42, b3, c42
1473#endif
1474
1475#ifdef RN
1476	LD	b1,  0 * SIZE(BO)
1477	LD	b2,  1 * SIZE(BO)
1478	LD	b3,  2 * SIZE(BO)
1479	LD	b4,  3 * SIZE(BO)
1480
1481	MUL	c11, b1, c11
1482	MUL	c12, b1, c12
1483
1484	NMSUB	c21, c21, b2, c11
1485	NMSUB	c22, c22, b2, c12
1486	NMSUB	c31, c31, b3, c11
1487	NMSUB	c32, c32, b3, c12
1488	NMSUB	c41, c41, b4, c11
1489	NMSUB	c42, c42, b4, c12
1490
1491	LD	b2,  5 * SIZE(BO)
1492	LD	b3,  6 * SIZE(BO)
1493	LD	b4,  7 * SIZE(BO)
1494
1495	MUL	c21, b2, c21
1496	MUL	c22, b2, c22
1497
1498	NMSUB	c31, c31, b3, c21
1499	NMSUB	c32, c32, b3, c22
1500	NMSUB	c41, c41, b4, c21
1501	NMSUB	c42, c42, b4, c22
1502
1503	LD	b3, 10 * SIZE(BO)
1504	LD	b4, 11 * SIZE(BO)
1505
1506	MUL	c31, b3, c31
1507	MUL	c32, b3, c32
1508
1509	NMSUB	c41, c41, b4, c31
1510	NMSUB	c42, c42, b4, c32
1511
1512	LD	b4, 15 * SIZE(BO)
1513
1514	MUL	c41, b4, c41
1515	MUL	c42, b4, c42
1516#endif
1517
1518#ifdef RT
1519	LD	b5, 15 * SIZE(BO)
1520	LD	b6, 14 * SIZE(BO)
1521	LD	b7, 13 * SIZE(BO)
1522	LD	b8, 12 * SIZE(BO)
1523
1524	MUL	c41, b5, c41
1525	MUL	c42, b5, c42
1526
1527	NMSUB	c31, c31, b6, c41
1528	NMSUB	c32, c32, b6, c42
1529	NMSUB	c21, c21, b7, c41
1530	NMSUB	c22, c22, b7, c42
1531	NMSUB	c11, c11, b8, c41
1532	NMSUB	c12, c12, b8, c42
1533
1534	LD	b6, 10 * SIZE(BO)
1535	LD	b7,  9 * SIZE(BO)
1536	LD	b8,  8 * SIZE(BO)
1537
1538	MUL	c31, b6, c31
1539	MUL	c32, b6, c32
1540
1541	NMSUB	c21, c21, b7, c31
1542	NMSUB	c22, c22, b7, c32
1543	NMSUB	c11, c11, b8, c31
1544	NMSUB	c12, c12, b8, c32
1545
1546	LD	b7,  5 * SIZE(BO)
1547	LD	b8,  4 * SIZE(BO)
1548
1549	MUL	c21, b7, c21
1550	MUL	c22, b7, c22
1551
1552	NMSUB	c11, c11, b8, c21
1553	NMSUB	c12, c12, b8, c22
1554
1555	LD	b8,  0 * SIZE(BO)
1556
1557	MUL	c11, b8, c11
1558	MUL	c12, b8, c12
1559#endif
1560
1561#ifdef LN
1562	daddiu	CO1, CO1, -2 * SIZE
1563	daddiu	CO2, CO2, -2 * SIZE
1564	daddiu	CO3, CO3, -2 * SIZE
1565	daddiu	CO4, CO4, -2 * SIZE
1566#endif
1567
1568#if defined(LN) || defined(LT)
1569	ST	c11,  0 * SIZE(BO)
1570	ST	c21,  1 * SIZE(BO)
1571	ST	c31,  2 * SIZE(BO)
1572	ST	c41,  3 * SIZE(BO)
1573	ST	c12,  4 * SIZE(BO)
1574	ST	c22,  5 * SIZE(BO)
1575	ST	c32,  6 * SIZE(BO)
1576	ST	c42,  7 * SIZE(BO)
1577#else
1578	ST	c11,  0 * SIZE(AO)
1579	ST	c12,  1 * SIZE(AO)
1580	ST	c21,  2 * SIZE(AO)
1581	ST	c22,  3 * SIZE(AO)
1582	ST	c31,  4 * SIZE(AO)
1583	ST	c32,  5 * SIZE(AO)
1584	ST	c41,  6 * SIZE(AO)
1585	ST	c42,  7 * SIZE(AO)
1586#endif
1587
1588	ST	c11,  0 * SIZE(CO1)
1589	ST	c12,  1 * SIZE(CO1)
1590	ST	c21,  0 * SIZE(CO2)
1591	ST	c22,  1 * SIZE(CO2)
1592	ST	c31,  0 * SIZE(CO3)
1593	ST	c32,  1 * SIZE(CO3)
1594	ST	c41,  0 * SIZE(CO4)
1595	ST	c42,  1 * SIZE(CO4)
1596
1597#ifndef LN
1598	daddiu	CO1, CO1, 2 * SIZE
1599	daddiu	CO2, CO2, 2 * SIZE
1600	daddiu	CO3, CO3, 2 * SIZE
1601	daddiu	CO4, CO4, 2 * SIZE
1602#endif
1603
1604#ifdef RT
1605	dsll	TEMP, K, 1 + BASE_SHIFT
1606	daddu	AORIG, AORIG, TEMP
1607#endif
1608
1609#if defined(LT) || defined(RN)
1610	dsubu	TEMP, K, KK
1611	dsll	L,    TEMP, 1 + BASE_SHIFT
1612	dsll	TEMP, TEMP, 2 + BASE_SHIFT
1613	daddu	AO, AO, L
1614	daddu	BO, BO, TEMP
1615#endif
1616
1617#ifdef LT
1618	daddiu	KK, KK, 2
1619#endif
1620
1621#ifdef LN
1622	daddiu	KK, KK, -2
1623#endif
1624
1625	MTC	$0,  a1
1626
1627	MOV	c11, a1
1628	MOV	c21, a1
1629	MOV	c31, a1
1630
1631	daddiu	I, I, -1
1632
1633	bgtz	I, .L31
1634	MOV	c41, c11
1635	.align 3
1636
1637.L40:
1638	andi	I,  M, 1
1639	blez	I, .L49
1640	MOV	c61, c11
1641
1642#if defined(LT) || defined(RN)
1643	LD	a1,  0 * SIZE(AO)
1644	MOV	c71, c11
1645	LD	a2,  1 * SIZE(AO)
1646	MOV	c81, c11
1647
1648	LD	b1,  0 * SIZE(B)
1649	LD	b2,  1 * SIZE(B)
1650	LD	b3,  2 * SIZE(B)
1651	LD	b4,  3 * SIZE(B)
1652	LD	b5,  4 * SIZE(B)
1653	LD	b6,  8 * SIZE(B)
1654	LD	b7, 12 * SIZE(B)
1655
1656	dsra	L,  KK, 2
1657
1658	blez	L, .L45
1659	move	BO,  B
1660#else
1661#ifdef LN
1662	dsll	TEMP,   K,  BASE_SHIFT
1663	dsubu	AORIG, AORIG, TEMP
1664#endif
1665
1666	dsll	L,    KK, 0 + BASE_SHIFT
1667	dsll	TEMP, KK, 2 + BASE_SHIFT
1668
1669	daddu	AO, AORIG, L
1670	daddu	BO, B,     TEMP
1671
1672	dsubu	TEMP, K, KK
1673
1674	LD	a1,  0 * SIZE(AO)
1675	MOV	c71, c11
1676	LD	a2,  1 * SIZE(AO)
1677	MOV	c81, c11
1678
1679	LD	b1,  0 * SIZE(BO)
1680	LD	b2,  1 * SIZE(BO)
1681	LD	b3,  2 * SIZE(BO)
1682	LD	b4,  3 * SIZE(BO)
1683	LD	b5,  4 * SIZE(BO)
1684	LD	b6,  8 * SIZE(BO)
1685	LD	b7, 12 * SIZE(BO)
1686
1687	dsra	L,  TEMP, 2
1688
1689	blez	L, .L45
1690	NOP
1691#endif
1692	.align	3
1693
1694.L42:
1695	MADD	c11, c11, a1, b1
1696	LD	b1, 16 * SIZE(BO)
1697	MADD	c21, c21, a1, b2
1698	LD	b2,  5 * SIZE(BO)
1699	MADD	c31, c31, a1, b3
1700	LD	b3,  6 * SIZE(BO)
1701	MADD	c41, c41, a1, b4
1702	LD	b4,  7 * SIZE(BO)
1703
1704	LD	a1,  4 * SIZE(AO)
1705	daddiu	L, L, -1
1706
1707	MADD	c11, c11, a2, b5
1708	LD	b5, 20 * SIZE(BO)
1709	MADD	c21, c21, a2, b2
1710	LD	b2,  9 * SIZE(BO)
1711	MADD	c31, c31, a2, b3
1712	LD	b3, 10 * SIZE(BO)
1713	MADD	c41, c41, a2, b4
1714	LD	b4, 11 * SIZE(BO)
1715
1716	LD	a2,  2 * SIZE(AO)
1717	daddiu	AO, AO,  4 * SIZE
1718
1719	MADD	c11, c11, a2, b6
1720	LD	b6, 24 * SIZE(BO)
1721	MADD	c21, c21, a2, b2
1722	LD	b2, 13 * SIZE(BO)
1723	MADD	c31, c31, a2, b3
1724	LD	b3, 14 * SIZE(BO)
1725	MADD	c41, c41, a2, b4
1726	LD	b4, 15 * SIZE(BO)
1727
1728	LD	a2, -1 * SIZE(AO)
1729	daddiu	BO, BO, 16 * SIZE
1730
1731	MADD	c11, c11, a2, b7
1732	LD	b7, 12 * SIZE(BO)
1733	MADD	c21, c21, a2, b2
1734	LD	b2,  1 * SIZE(BO)
1735	MADD	c31, c31, a2, b3
1736	LD	b3,  2 * SIZE(BO)
1737	MADD	c41, c41, a2, b4
1738	LD	b4,  3 * SIZE(BO)
1739
1740	bgtz	L, .L42
1741	LD	a2,  1 * SIZE(AO)
1742	.align 3
1743
1744.L45:
1745#if defined(LT) || defined(RN)
1746	andi	L, KK,  3
1747#else
1748	andi	L, TEMP, 3
1749#endif
1750	NOP
1751	blez	L, .L48
1752	NOP
1753	.align	3
1754
1755.L46:
1756	MADD	c11, c11, a1, b1
1757	LD	b1,  4 * SIZE(BO)
1758	MADD	c21, c21, a1, b2
1759	LD	b2,  5 * SIZE(BO)
1760	MADD	c31, c31, a1, b3
1761	LD	b3,  6 * SIZE(BO)
1762	MADD	c41, c41, a1, b4
1763	LD	a1,  1 * SIZE(AO)
1764
1765	LD	b4,  7 * SIZE(BO)
1766	daddiu	L, L, -1
1767
1768	daddiu	AO, AO,  1 * SIZE
1769	MOV	a2, a2
1770	bgtz	L, .L46
1771	daddiu	BO, BO,  4 * SIZE
1772
1773
1774.L48:
1775#if defined(LN) || defined(RT)
1776#ifdef LN
1777	daddiu	TEMP, KK, -1
1778#else
1779	daddiu	TEMP, KK, -4
1780#endif
1781
1782	dsll	L,    TEMP, 0 + BASE_SHIFT
1783	dsll	TEMP, TEMP, 2 + BASE_SHIFT
1784	daddu	AO, AORIG, L
1785	daddu	BO, B,     TEMP
1786#endif
1787
1788
1789#if defined(LN) || defined(LT)
1790	LD	b1,  0 * SIZE(BO)
1791	LD	b2,  1 * SIZE(BO)
1792	LD	b3,  2 * SIZE(BO)
1793	LD	b4,  3 * SIZE(BO)
1794
1795	SUB	c11, b1, c11
1796	SUB	c21, b2, c21
1797	SUB	c31, b3, c31
1798	SUB	c41, b4, c41
1799#else
1800	LD	b1,  0 * SIZE(AO)
1801	LD	b2,  1 * SIZE(AO)
1802	LD	b3,  2 * SIZE(AO)
1803	LD	b4,  3 * SIZE(AO)
1804
1805	SUB	c11, b1, c11
1806	SUB	c21, b2, c21
1807	SUB	c31, b3, c31
1808	SUB	c41, b4, c41
1809#endif
1810
1811#if defined(LN) || defined(LT)
1812	LD	b1,  0 * SIZE(AO)
1813
1814	MUL	c11, b1, c11
1815	MUL	c21, b1, c21
1816	MUL	c31, b1, c31
1817	MUL	c41, b1, c41
1818#endif
1819
1820#ifdef RN
1821	LD	b1,  0 * SIZE(BO)
1822	LD	b2,  1 * SIZE(BO)
1823	LD	b3,  2 * SIZE(BO)
1824	LD	b4,  3 * SIZE(BO)
1825
1826	MUL	c11, b1, c11
1827
1828	NMSUB	c21, c21, b2, c11
1829	NMSUB	c31, c31, b3, c11
1830	NMSUB	c41, c41, b4, c11
1831
1832	LD	b2,  5 * SIZE(BO)
1833	LD	b3,  6 * SIZE(BO)
1834	LD	b4,  7 * SIZE(BO)
1835
1836	MUL	c21, b2, c21
1837
1838	NMSUB	c31, c31, b3, c21
1839	NMSUB	c41, c41, b4, c21
1840
1841	LD	b3, 10 * SIZE(BO)
1842	LD	b4, 11 * SIZE(BO)
1843
1844	MUL	c31, b3, c31
1845
1846	NMSUB	c41, c41, b4, c31
1847
1848	LD	b4, 15 * SIZE(BO)
1849
1850	MUL	c41, b4, c41
1851#endif
1852
1853#ifdef RT
1854	LD	b5, 15 * SIZE(BO)
1855	LD	b6, 14 * SIZE(BO)
1856	LD	b7, 13 * SIZE(BO)
1857	LD	b8, 12 * SIZE(BO)
1858
1859	MUL	c41, b5, c41
1860
1861	NMSUB	c31, c31, b6, c41
1862	NMSUB	c21, c21, b7, c41
1863	NMSUB	c11, c11, b8, c41
1864
1865	LD	b6, 10 * SIZE(BO)
1866	LD	b7,  9 * SIZE(BO)
1867	LD	b8,  8 * SIZE(BO)
1868
1869	MUL	c31, b6, c31
1870
1871	NMSUB	c21, c21, b7, c31
1872	NMSUB	c11, c11, b8, c31
1873
1874	LD	b7,  5 * SIZE(BO)
1875	LD	b8,  4 * SIZE(BO)
1876
1877	MUL	c21, b7, c21
1878
1879	NMSUB	c11, c11, b8, c21
1880
1881	LD	b8,  0 * SIZE(BO)
1882
1883	MUL	c11, b8, c11
1884#endif
1885
1886#ifdef LN
1887	daddiu	CO1, CO1, -1 * SIZE
1888	daddiu	CO2, CO2, -1 * SIZE
1889	daddiu	CO3, CO3, -1 * SIZE
1890	daddiu	CO4, CO4, -1 * SIZE
1891#endif
1892
1893#if defined(LN) || defined(LT)
1894	ST	c11,  0 * SIZE(BO)
1895	ST	c21,  1 * SIZE(BO)
1896	ST	c31,  2 * SIZE(BO)
1897	ST	c41,  3 * SIZE(BO)
1898#else
1899	ST	c11,  0 * SIZE(AO)
1900	ST	c21,  1 * SIZE(AO)
1901	ST	c31,  2 * SIZE(AO)
1902	ST	c41,  3 * SIZE(AO)
1903#endif
1904
1905	ST	c11,  0 * SIZE(CO1)
1906	ST	c21,  0 * SIZE(CO2)
1907	ST	c31,  0 * SIZE(CO3)
1908	ST	c41,  0 * SIZE(CO4)
1909
1910#ifndef LN
1911	daddiu	CO1, CO1, 1 * SIZE
1912	daddiu	CO2, CO2, 1 * SIZE
1913	daddiu	CO3, CO3, 1 * SIZE
1914	daddiu	CO4, CO4, 1 * SIZE
1915#endif
1916
1917#ifdef RT
1918	dsll	TEMP, K, BASE_SHIFT
1919	daddu	AORIG, AORIG, TEMP
1920#endif
1921
1922#if defined(LT) || defined(RN)
1923	dsubu	TEMP, K, KK
1924	dsll	L,    TEMP, 0 + BASE_SHIFT
1925	dsll	TEMP, TEMP, 2 + BASE_SHIFT
1926	daddu	AO, AO, L
1927	daddu	BO, BO, TEMP
1928#endif
1929
1930#ifdef LT
1931	daddiu	KK, KK, 1
1932#endif
1933
1934#ifdef LN
1935	daddiu	KK, KK, -1
1936#endif
1937	.align 3
1938
1939.L49:
1940#ifdef LN
1941	dsll	TEMP, K, 2 + BASE_SHIFT
1942	daddu	B, B, TEMP
1943#endif
1944
1945#if defined(LT) || defined(RN)
1946	move	B,  BO
1947#endif
1948
1949#ifdef RN
1950	daddiu	KK, KK,  4
1951#endif
1952
1953#ifdef RT
1954	daddiu	KK, KK, -4
1955#endif
1956	.align 3
1957
1958.L70:
1959	dsra	J,  N, 3
1960	blez	J, .L999
1961	nop
1962
1963.L10:
1964#ifdef RT
1965	dsll	TEMP, K, 3 + BASE_SHIFT
1966	dsubu	B, B, TEMP
1967
1968	dsll	TEMP, LDC, 3
1969	dsubu	C, C, TEMP
1970#endif
1971
1972	move	CO1, C
1973	MTC	$0,  c11
1974	daddu	CO2, C,   LDC
1975	daddu	CO3, CO2, LDC
1976	daddiu	J, J, -1
1977	daddu	CO4, CO3, LDC
1978	MOV	c21, c11
1979	daddu	CO5, CO4, LDC
1980	MOV	c31, c11
1981	daddu	CO6, CO5, LDC
1982	MOV	c41, c11
1983	daddu	CO7, CO6, LDC
1984	MOV	c51, c11
1985	daddu	CO8, CO7, LDC
1986	dsra	I,  M, 1
1987
1988#ifdef LN
1989	daddu	KK, M, OFFSET
1990#endif
1991
1992#ifdef LT
1993	move	KK, OFFSET
1994#endif
1995
1996#if defined(LN) || defined(RT)
1997	move	AORIG, A
1998#else
1999	move	AO, A
2000#endif
2001#ifndef RT
2002	daddu	C,  CO8, LDC
2003#endif
2004
2005	blez	I, .L20
2006	MOV	c61, c11
2007
2008.L11:
2009#if defined(LT) || defined(RN)
2010	LD	a1,  0 * SIZE(AO)
2011	MOV	c71, c11
2012	LD	b1,  0 * SIZE(B)
2013	MOV	c81, c11
2014
2015	LD	a3,  4 * SIZE(AO)
2016	MOV	c12, c11
2017	LD	b2,  1 * SIZE(B)
2018	MOV	c22, c11
2019
2020	dsra	L,  KK, 2
2021	MOV	c32, c11
2022	LD	b3,  2 * SIZE(B)
2023	MOV	c42, c11
2024
2025	LD	b4,  3 * SIZE(B)
2026	MOV	c52, c11
2027	LD	b5,  4 * SIZE(B)
2028	MOV	c62, c11
2029
2030	LD	b6,  8 * SIZE(B)
2031	MOV	c72, c11
2032	LD	b7, 12 * SIZE(B)
2033	MOV	c82, c11
2034
2035	blez	L, .L15
2036	move	BO,  B
2037#else
2038
2039#ifdef LN
2040	dsll	TEMP,   K,  1 + BASE_SHIFT
2041	dsubu	AORIG, AORIG, TEMP
2042#endif
2043
2044	dsll	L,    KK, 1 + BASE_SHIFT
2045	dsll	TEMP, KK, 3 + BASE_SHIFT
2046
2047	daddu	AO, AORIG, L
2048	daddu	BO, B,     TEMP
2049
2050	dsubu	TEMP, K, KK
2051
2052	LD	a1,  0 * SIZE(AO)
2053	MOV	c71, c11
2054	LD	b1,  0 * SIZE(BO)
2055	MOV	c81, c11
2056
2057	LD	a3,  4 * SIZE(AO)
2058	MOV	c12, c11
2059	LD	b2,  1 * SIZE(BO)
2060	MOV	c22, c11
2061
2062	MOV	c32, c11
2063	LD	b3,  2 * SIZE(BO)
2064	MOV	c42, c11
2065
2066	LD	b4,  3 * SIZE(BO)
2067	MOV	c52, c11
2068	LD	b5,  4 * SIZE(BO)
2069	MOV	c62, c11
2070
2071	LD	b6,  8 * SIZE(BO)
2072	MOV	c72, c11
2073	LD	b7, 12 * SIZE(BO)
2074	MOV	c82, c11
2075
2076	dsra	L,  TEMP, 2
2077	blez	L, .L15
2078	NOP
2079#endif
2080
2081	MADD	c11, c11, a1, b1
2082	LD	a2,  1 * SIZE(AO)
2083	MADD	c21, c21, a1, b2
2084	daddiu	L, L, -1
2085	MADD	c31, c31, a1, b3
2086	blez	L, .L13
2087	MADD	c41, c41, a1, b4
2088	NOP
2089	.align	3
2090
2091.L12:
2092	MADD	c12, c12, a2, b1
2093	LD	b1, 16 * SIZE(BO)
2094	MADD	c22, c22, a2, b2
2095	LD	b2,  5 * SIZE(BO)
2096	MADD	c32, c32, a2, b3
2097	LD	b3,  6 * SIZE(BO)
2098	MADD	c42, c42, a2, b4
2099	LD	b4,  7 * SIZE(BO)
2100
2101	MADD	c51, c51, a1, b5
2102	NOP
2103	MADD	c61, c61, a1, b2
2104	LD	a4,  2 * SIZE(AO)
2105	MADD	c71, c71, a1, b3
2106	NOP
2107	MADD	c81, c81, a1, b4
2108	LD	a1,  8 * SIZE(AO)
2109
2110	MADD	c52, c52, a2, b5
2111	LD	b5, 20 * SIZE(BO)
2112	MADD	c62, c62, a2, b2
2113	LD	b2,  9 * SIZE(BO)
2114	MADD	c72, c72, a2, b3
2115	LD	b3, 10 * SIZE(BO)
2116	MADD	c82, c82, a2, b4
2117	LD	b4, 11 * SIZE(BO)
2118
2119	MADD	c11, c11, a4, b6
2120	LD	a2,  3 * SIZE(AO)
2121	MADD	c21, c21, a4, b2
2122	NOP
2123	MADD	c31, c31, a4, b3
2124	NOP
2125	MADD	c41, c41, a4, b4
2126	NOP
2127
2128	MADD	c12, c12, a2, b6
2129	LD	b6, 24 * SIZE(BO)
2130	MADD	c22, c22, a2, b2
2131	LD	b2, 13 * SIZE(BO)
2132	MADD	c32, c32, a2, b3
2133	LD	b3, 14 * SIZE(BO)
2134	MADD	c42, c42, a2, b4
2135	LD	b4, 15 * SIZE(BO)
2136
2137	MADD	c51, c51, a4, b7
2138	NOP
2139	MADD	c61, c61, a4, b2
2140	NOP
2141	MADD	c71, c71, a4, b3
2142	NOP
2143	MADD	c81, c81, a4, b4
2144	NOP
2145
2146	MADD	c52, c52, a2, b7
2147	LD	b7, 28 * SIZE(BO)
2148	MADD	c62, c62, a2, b2
2149	LD	b2, 17 * SIZE(BO)
2150	MADD	c72, c72, a2, b3
2151	LD	b3, 18 * SIZE(BO)
2152	MADD	c82, c82, a2, b4
2153	LD	b4, 19 * SIZE(BO)
2154
2155	MADD	c11, c11, a3, b1
2156	LD	a2,  5 * SIZE(AO)
2157	MADD	c21, c21, a3, b2
2158	NOP
2159	MADD	c31, c31, a3, b3
2160	NOP
2161	MADD	c41, c41, a3, b4
2162	NOP
2163
2164	MADD	c12, c12, a2, b1
2165	LD	b1, 32 * SIZE(BO)
2166	MADD	c22, c22, a2, b2
2167	LD	b2, 21 * SIZE(BO)
2168	MADD	c32, c32, a2, b3
2169	LD	b3, 22 * SIZE(BO)
2170	MADD	c42, c42, a2, b4
2171	LD	b4, 23 * SIZE(BO)
2172
2173	MADD	c51, c51, a3, b5
2174	NOP
2175	MADD	c61, c61, a3, b2
2176	LD	a4,  6 * SIZE(AO)
2177	MADD	c71, c71, a3, b3
2178	NOP
2179	MADD	c81, c81, a3, b4
2180	LD	a3, 12 * SIZE(AO)
2181
2182	MADD	c52, c52, a2, b5
2183	LD	b5, 36 * SIZE(BO)
2184	MADD	c62, c62, a2, b2
2185	LD	b2, 25 * SIZE(BO)
2186	MADD	c72, c72, a2, b3
2187	LD	b3, 26 * SIZE(BO)
2188	MADD	c82, c82, a2, b4
2189	LD	b4, 27 * SIZE(BO)
2190
2191	MADD	c11, c11, a4, b6
2192	LD	a2,  7 * SIZE(AO)
2193	MADD	c21, c21, a4, b2
2194	NOP
2195	MADD	c31, c31, a4, b3
2196	NOP
2197	MADD	c41, c41, a4, b4
2198	daddiu	L, L, -1
2199
2200	MADD	c12, c12, a2, b6
2201	LD	b6, 40 * SIZE(BO)
2202	MADD	c22, c22, a2, b2
2203	LD	b2, 29 * SIZE(BO)
2204	MADD	c32, c32, a2, b3
2205	LD	b3, 30 * SIZE(BO)
2206	MADD	c42, c42, a2, b4
2207	LD	b4, 31 * SIZE(BO)
2208
2209	MADD	c51, c51, a4, b7
2210	daddiu	BO, BO, 32 * SIZE
2211	MADD	c61, c61, a4, b2
2212	daddiu	AO, AO,  8 * SIZE
2213	MADD	c71, c71, a4, b3
2214	NOP
2215	MADD	c81, c81, a4, b4
2216	NOP
2217
2218	MADD	c52, c52, a2, b7
2219	LD	b7, 12 * SIZE(BO)
2220	MADD	c62, c62, a2, b2
2221	LD	b2,  1 * SIZE(BO)
2222	MADD	c72, c72, a2, b3
2223	LD	b3,  2 * SIZE(BO)
2224	MADD	c82, c82, a2, b4
2225	LD	b4,  3 * SIZE(BO)
2226
2227	MADD	c11, c11, a1, b1
2228	LD	a2,  1 * SIZE(AO)
2229	MADD	c21, c21, a1, b2
2230	NOP
2231	MADD	c31, c31, a1, b3
2232	bgtz	L, .L12
2233	MADD	c41, c41, a1, b4
2234	NOP
2235	.align 3
2236
2237.L13:
2238	MADD	c12, c12, a2, b1
2239	LD	b1, 16 * SIZE(BO)
2240	MADD	c22, c22, a2, b2
2241	LD	b2,  5 * SIZE(BO)
2242	MADD	c32, c32, a2, b3
2243	LD	b3,  6 * SIZE(BO)
2244	MADD	c42, c42, a2, b4
2245	LD	b4,  7 * SIZE(BO)
2246
2247	MADD	c51, c51, a1, b5
2248	NOP
2249	MADD	c61, c61, a1, b2
2250	LD	a4,  2 * SIZE(AO)
2251	MADD	c71, c71, a1, b3
2252	NOP
2253	MADD	c81, c81, a1, b4
2254	LD	a1,  8 * SIZE(AO)
2255
2256	MADD	c52, c52, a2, b5
2257	LD	b5, 20 * SIZE(BO)
2258	MADD	c62, c62, a2, b2
2259	LD	b2,  9 * SIZE(BO)
2260	MADD	c72, c72, a2, b3
2261	LD	b3, 10 * SIZE(BO)
2262	MADD	c82, c82, a2, b4
2263	LD	b4, 11 * SIZE(BO)
2264
2265	MADD	c11, c11, a4, b6
2266	LD	a2,  3 * SIZE(AO)
2267	MADD	c21, c21, a4, b2
2268	NOP
2269	MADD	c31, c31, a4, b3
2270	NOP
2271	MADD	c41, c41, a4, b4
2272	NOP
2273
2274	MADD	c12, c12, a2, b6
2275	LD	b6, 24 * SIZE(BO)
2276	MADD	c22, c22, a2, b2
2277	LD	b2, 13 * SIZE(BO)
2278	MADD	c32, c32, a2, b3
2279	LD	b3, 14 * SIZE(BO)
2280	MADD	c42, c42, a2, b4
2281	LD	b4, 15 * SIZE(BO)
2282
2283	MADD	c51, c51, a4, b7
2284	NOP
2285	MADD	c61, c61, a4, b2
2286	NOP
2287	MADD	c71, c71, a4, b3
2288	NOP
2289	MADD	c81, c81, a4, b4
2290	NOP
2291
2292	MADD	c52, c52, a2, b7
2293	LD	b7, 28 * SIZE(BO)
2294	MADD	c62, c62, a2, b2
2295	LD	b2, 17 * SIZE(BO)
2296	MADD	c72, c72, a2, b3
2297	LD	b3, 18 * SIZE(BO)
2298	MADD	c82, c82, a2, b4
2299	LD	b4, 19 * SIZE(BO)
2300
2301	MADD	c11, c11, a3, b1
2302	LD	a2,  5 * SIZE(AO)
2303	MADD	c21, c21, a3, b2
2304	NOP
2305	MADD	c31, c31, a3, b3
2306	NOP
2307	MADD	c41, c41, a3, b4
2308	NOP
2309
2310	MADD	c12, c12, a2, b1
2311	LD	b1, 32 * SIZE(BO)
2312	MADD	c22, c22, a2, b2
2313	LD	b2, 21 * SIZE(BO)
2314	MADD	c32, c32, a2, b3
2315	LD	b3, 22 * SIZE(BO)
2316	MADD	c42, c42, a2, b4
2317	LD	b4, 23 * SIZE(BO)
2318
2319	MADD	c51, c51, a3, b5
2320	NOP
2321	MADD	c61, c61, a3, b2
2322	LD	a4,  6 * SIZE(AO)
2323	MADD	c71, c71, a3, b3
2324	NOP
2325	MADD	c81, c81, a3, b4
2326	LD	a3, 12 * SIZE(AO)
2327
2328	MADD	c52, c52, a2, b5
2329	LD	b5, 36 * SIZE(BO)
2330	MADD	c62, c62, a2, b2
2331	LD	b2, 25 * SIZE(BO)
2332	MADD	c72, c72, a2, b3
2333	LD	b3, 26 * SIZE(BO)
2334	MADD	c82, c82, a2, b4
2335	LD	b4, 27 * SIZE(BO)
2336
2337	MADD	c11, c11, a4, b6
2338	LD	a2,  7 * SIZE(AO)
2339	MADD	c21, c21, a4, b2
2340	NOP
2341	MADD	c31, c31, a4, b3
2342	NOP
2343	MADD	c41, c41, a4, b4
2344	NOP
2345
2346	MADD	c12, c12, a2, b6
2347	LD	b6, 40 * SIZE(BO)
2348	MADD	c22, c22, a2, b2
2349	LD	b2, 29 * SIZE(BO)
2350	MADD	c32, c32, a2, b3
2351	LD	b3, 30 * SIZE(BO)
2352	MADD	c42, c42, a2, b4
2353	LD	b4, 31 * SIZE(BO)
2354
2355	MADD	c51, c51, a4, b7
2356	daddiu	BO, BO, 32 * SIZE
2357	MADD	c61, c61, a4, b2
2358	daddiu	AO, AO,  8 * SIZE
2359	MADD	c71, c71, a4, b3
2360	NOP
2361	MADD	c81, c81, a4, b4
2362	NOP
2363
2364	MADD	c52, c52, a2, b7
2365	LD	b7, 12 * SIZE(BO)
2366	MADD	c62, c62, a2, b2
2367	LD	b2,  1 * SIZE(BO)
2368	MADD	c72, c72, a2, b3
2369	LD	b3,  2 * SIZE(BO)
2370	MADD	c82, c82, a2, b4
2371	LD	b4,  3 * SIZE(BO)
2372	.align 3
2373
2374.L15:
2375#if defined(LT) || defined(RN)
2376	andi	L, KK,  3
2377#else
2378	andi	L, TEMP, 3
2379#endif
2380	blez	L, .L18
2381	NOP
2382	.align	3
2383
2384.L16:
2385	MADD	c11, c11, a1, b1
2386	LD	a2,  1 * SIZE(AO)
2387	MADD	c21, c21, a1, b2
2388	NOP
2389	MADD	c31, c31, a1, b3
2390	NOP
2391	MADD	c41, c41, a1, b4
2392	NOP
2393
2394	MADD	c12, c12, a2, b1
2395	LD	b1,  8 * SIZE(BO)
2396	MADD	c22, c22, a2, b2
2397	LD	b2,  5 * SIZE(BO)
2398	MADD	c32, c32, a2, b3
2399	LD	b3,  6 * SIZE(BO)
2400	MADD	c42, c42, a2, b4
2401	LD	b4,  7 * SIZE(BO)
2402
2403	MADD	c51, c51, a1, b5
2404	daddiu	L, L, -1
2405	MADD	c61, c61, a1, b2
2406	daddiu	AO, AO,  2 * SIZE
2407	MADD	c71, c71, a1, b3
2408	daddiu	BO, BO,  8 * SIZE
2409	MADD	c81, c81, a1, b4
2410	LD	a1,  0 * SIZE(AO)
2411
2412	MADD	c52, c52, a2, b5
2413	LD	b5,  4 * SIZE(BO)
2414	MADD	c62, c62, a2, b2
2415	LD	b2,  1 * SIZE(BO)
2416	MADD	c72, c72, a2, b3
2417	LD	b3,  2 * SIZE(BO)
2418	MADD	c82, c82, a2, b4
2419	bgtz	L, .L16
2420	LD	b4,  3 * SIZE(BO)
2421
2422.L18:
2423#if defined(LN) || defined(RT)
2424#ifdef LN
2425	daddiu	TEMP, KK, -2
2426#else
2427	daddiu	TEMP, KK, -8
2428#endif
2429
2430	dsll	L,    TEMP, 1 + BASE_SHIFT
2431	dsll	TEMP, TEMP, 3 + BASE_SHIFT
2432	daddu	AO, AORIG, L
2433	daddu	BO, B,     TEMP
2434#endif
2435
2436#if defined(LN) || defined(LT)
2437	LD	b1,  0 * SIZE(BO)
2438	LD	b2,  1 * SIZE(BO)
2439	LD	b3,  2 * SIZE(BO)
2440	LD	b4,  3 * SIZE(BO)
2441
2442	SUB	c11, b1, c11
2443	LD	b5,  4 * SIZE(BO)
2444	SUB	c21, b2, c21
2445	LD	b6,  5 * SIZE(BO)
2446	SUB	c31, b3, c31
2447	LD	b7,  6 * SIZE(BO)
2448	SUB	c41, b4, c41
2449	LD	b8,  7 * SIZE(BO)
2450
2451	SUB	c51, b5, c51
2452	LD	b1,  8 * SIZE(BO)
2453	SUB	c61, b6, c61
2454	LD	b2,  9 * SIZE(BO)
2455	SUB	c71, b7, c71
2456	LD	b3, 10 * SIZE(BO)
2457	SUB	c81, b8, c81
2458	LD	b4, 11 * SIZE(BO)
2459
2460	SUB	c12, b1, c12
2461	LD	b5, 12 * SIZE(BO)
2462	SUB	c22, b2, c22
2463	LD	b6, 13 * SIZE(BO)
2464	SUB	c32, b3, c32
2465	LD	b7, 14 * SIZE(BO)
2466	SUB	c42, b4, c42
2467	LD	b8, 15 * SIZE(BO)
2468
2469	SUB	c52, b5, c52
2470#ifdef LN
2471	LD	b1,  3 * SIZE(AO)
2472#else
2473	LD	b1,  0 * SIZE(AO)
2474#endif
2475	SUB	c62, b6, c62
2476	SUB	c72, b7, c72
2477	SUB	c82, b8, c82
2478#else
2479	LD	b1,  0 * SIZE(AO)
2480	LD	b2,  1 * SIZE(AO)
2481	LD	b3,  2 * SIZE(AO)
2482	LD	b4,  3 * SIZE(AO)
2483
2484	SUB	c11, b1, c11
2485	LD	b5,  4 * SIZE(AO)
2486	SUB	c12, b2, c12
2487	LD	b6,  5 * SIZE(AO)
2488	SUB	c21, b3, c21
2489	LD	b7,  6 * SIZE(AO)
2490	SUB	c22, b4, c22
2491	LD	b8,  7 * SIZE(AO)
2492
2493	SUB	c31, b5, c31
2494	LD	b1,  8 * SIZE(AO)
2495	SUB	c32, b6, c32
2496	LD	b2,  9 * SIZE(AO)
2497	SUB	c41, b7, c41
2498	LD	b3, 10 * SIZE(AO)
2499	SUB	c42, b8, c42
2500	LD	b4, 11 * SIZE(AO)
2501
2502	LD	b5, 12 * SIZE(AO)
2503	SUB	c51, b1, c51
2504	LD	b6, 13 * SIZE(AO)
2505	SUB	c52, b2, c52
2506	LD	b7, 14 * SIZE(AO)
2507	SUB	c61, b3, c61
2508	LD	b8, 15 * SIZE(AO)
2509	SUB	c62, b4, c62
2510
2511	SUB	c71, b5, c71
2512	SUB	c72, b6, c72
2513	SUB	c81, b7, c81
2514	SUB	c82, b8, c82
2515#endif
2516
2517#ifdef LN
2518	MUL	c12, b1, c12
2519	LD	b2,  2 * SIZE(AO)
2520	MUL	c22, b1, c22
2521	MUL	c32, b1, c32
2522	MUL	c42, b1, c42
2523	MUL	c52, b1, c52
2524	MUL	c62, b1, c62
2525	MUL	c72, b1, c72
2526	MUL	c82, b1, c82
2527
2528	NMSUB	c11, c11, b2, c12
2529	LD	b3,  0 * SIZE(AO)
2530	NMSUB	c21, c21, b2, c22
2531	NMSUB	c31, c31, b2, c32
2532	NMSUB	c41, c41, b2, c42
2533	NMSUB	c51, c51, b2, c52
2534	NMSUB	c61, c61, b2, c62
2535	NMSUB	c71, c71, b2, c72
2536	NMSUB	c81, c81, b2, c82
2537
2538	MUL	c11, b3, c11
2539	daddiu	CO1, CO1, -2 * SIZE
2540	MUL	c21, b3, c21
2541	daddiu	CO2, CO2, -2 * SIZE
2542	MUL	c31, b3, c31
2543	daddiu	CO3, CO3, -2 * SIZE
2544	MUL	c41, b3, c41
2545	daddiu	CO4, CO4, -2 * SIZE
2546	MUL	c51, b3, c51
2547	daddiu	CO5, CO5, -2 * SIZE
2548	MUL	c61, b3, c61
2549	daddiu	CO6, CO6, -2 * SIZE
2550	MUL	c71, b3, c71
2551	daddiu	CO7, CO7, -2 * SIZE
2552	MUL	c81, b3, c81
2553	daddiu	CO8, CO8, -2 * SIZE
2554#endif
2555
2556#ifdef LT
2557	MUL	c11, b1, c11
2558	LD	b2,  1 * SIZE(AO)
2559	MUL	c21, b1, c21
2560	MUL	c31, b1, c31
2561	MUL	c41, b1, c41
2562	MUL	c51, b1, c51
2563	MUL	c61, b1, c61
2564	MUL	c71, b1, c71
2565	MUL	c81, b1, c81
2566
2567	NMSUB	c12, c12, b2, c11
2568	LD	b3,  3 * SIZE(AO)
2569	NMSUB	c22, c22, b2, c21
2570	NMSUB	c32, c32, b2, c31
2571	NMSUB	c42, c42, b2, c41
2572	NMSUB	c52, c52, b2, c51
2573	NMSUB	c62, c62, b2, c61
2574	NMSUB	c72, c72, b2, c71
2575	NMSUB	c82, c82, b2, c81
2576
2577	MUL	c12, b3, c12
2578	MUL	c22, b3, c22
2579	MUL	c32, b3, c32
2580	MUL	c42, b3, c42
2581	MUL	c52, b3, c52
2582	MUL	c62, b3, c62
2583	MUL	c72, b3, c72
2584	MUL	c82, b3, c82
2585#endif
2586
2587#ifdef RN
2588	LD	b1,  0 * SIZE(BO)
2589	LD	b2,  1 * SIZE(BO)
2590	LD	b3,  2 * SIZE(BO)
2591	LD	b4,  3 * SIZE(BO)
2592
2593	MUL	c11, b1, c11
2594	MUL	c12, b1, c12
2595	LD	b5,  4 * SIZE(BO)
2596
2597	NMSUB	c21, c21, b2, c11
2598	NMSUB	c22, c22, b2, c12
2599	LD	b6,  5 * SIZE(BO)
2600	NMSUB	c31, c31, b3, c11
2601	NMSUB	c32, c32, b3, c12
2602	LD	b7,  6 * SIZE(BO)
2603	NMSUB	c41, c41, b4, c11
2604	NMSUB	c42, c42, b4, c12
2605	LD	b8,  7 * SIZE(BO)
2606
2607	NMSUB	c51, c51, b5, c11
2608	NMSUB	c52, c52, b5, c12
2609	LD	b2,  9 * SIZE(BO)
2610	NMSUB	c61, c61, b6, c11
2611	NMSUB	c62, c62, b6, c12
2612	LD	b3, 10 * SIZE(BO)
2613	NMSUB	c71, c71, b7, c11
2614	NMSUB	c72, c72, b7, c12
2615	LD	b4, 11 * SIZE(BO)
2616	NMSUB	c81, c81, b8, c11
2617	NMSUB	c82, c82, b8, c12
2618	LD	b5, 12 * SIZE(BO)
2619
2620	MUL	c21, b2, c21
2621	MUL	c22, b2, c22
2622	LD	b6, 13 * SIZE(BO)
2623
2624	NMSUB	c31, c31, b3, c21
2625	NMSUB	c32, c32, b3, c22
2626	LD	b7, 14 * SIZE(BO)
2627	NMSUB	c41, c41, b4, c21
2628	NMSUB	c42, c42, b4, c22
2629	LD	b8, 15 * SIZE(BO)
2630	NMSUB	c51, c51, b5, c21
2631	NMSUB	c52, c52, b5, c22
2632	LD	b3, 18 * SIZE(BO)
2633	NMSUB	c61, c61, b6, c21
2634	NMSUB	c62, c62, b6, c22
2635	LD	b4, 19 * SIZE(BO)
2636	NMSUB	c71, c71, b7, c21
2637	NMSUB	c72, c72, b7, c22
2638	LD	b5, 20 * SIZE(BO)
2639	NMSUB	c81, c81, b8, c21
2640	NMSUB	c82, c82, b8, c22
2641	LD	b6, 21 * SIZE(BO)
2642
2643	MUL	c31, b3, c31
2644	MUL	c32, b3, c32
2645	LD	b7, 22 * SIZE(BO)
2646
2647	NMSUB	c41, c41, b4, c31
2648	NMSUB	c42, c42, b4, c32
2649	LD	b8, 23 * SIZE(BO)
2650	NMSUB	c51, c51, b5, c31
2651	NMSUB	c52, c52, b5, c32
2652	LD	b4, 27 * SIZE(BO)
2653	NMSUB	c61, c61, b6, c31
2654	NMSUB	c62, c62, b6, c32
2655	LD	b5, 28 * SIZE(BO)
2656	NMSUB	c71, c71, b7, c31
2657	NMSUB	c72, c72, b7, c32
2658	LD	b6, 29 * SIZE(BO)
2659	NMSUB	c81, c81, b8, c31
2660	NMSUB	c82, c82, b8, c32
2661	LD	b7, 30 * SIZE(BO)
2662
2663	MUL	c41, b4, c41
2664	MUL	c42, b4, c42
2665	LD	b8, 31 * SIZE(BO)
2666
2667	NMSUB	c51, c51, b5, c41
2668	NMSUB	c52, c52, b5, c42
2669	LD	b5, 36 * SIZE(BO)
2670	NMSUB	c61, c61, b6, c41
2671	NMSUB	c62, c62, b6, c42
2672	LD	b6, 37 * SIZE(BO)
2673	NMSUB	c71, c71, b7, c41
2674	NMSUB	c72, c72, b7, c42
2675	LD	b7, 38 * SIZE(BO)
2676	NMSUB	c81, c81, b8, c41
2677	NMSUB	c82, c82, b8, c42
2678	LD	b8, 39 * SIZE(BO)
2679
2680	MUL	c51, b5, c51
2681	MUL	c52, b5, c52
2682
2683	NMSUB	c61, c61, b6, c51
2684	NMSUB	c62, c62, b6, c52
2685	LD	b6, 45 * SIZE(BO)
2686	NMSUB	c71, c71, b7, c51
2687	NMSUB	c72, c72, b7, c52
2688	LD	b7, 46 * SIZE(BO)
2689	NMSUB	c81, c81, b8, c51
2690	NMSUB	c82, c82, b8, c52
2691	LD	b8, 47 * SIZE(BO)
2692
2693	MUL	c61, b6, c61
2694	MUL	c62, b6, c62
2695
2696	NMSUB	c71, c71, b7, c61
2697	NMSUB	c72, c72, b7, c62
2698	LD	b7, 54 * SIZE(BO)
2699	NMSUB	c81, c81, b8, c61
2700	NMSUB	c82, c82, b8, c62
2701	LD	b8, 55 * SIZE(BO)
2702
2703	MUL	c71, b7, c71
2704	MUL	c72, b7, c72
2705
2706	NMSUB	c81, c81, b8, c71
2707	NMSUB	c82, c82, b8, c72
2708	LD	b8, 63 * SIZE(BO)
2709
2710	MUL	c81, b8, c81
2711	MUL	c82, b8, c82
2712#endif
2713
2714#ifdef RT
2715	LD	b1, 63 * SIZE(BO)
2716	LD	b2, 62 * SIZE(BO)
2717	LD	b3, 61 * SIZE(BO)
2718	LD	b4, 60 * SIZE(BO)
2719
2720	MUL	c81, b1, c81
2721	MUL	c82, b1, c82
2722	LD	b5, 59 * SIZE(BO)
2723
2724	NMSUB	c71, c71, b2, c81
2725	NMSUB	c72, c72, b2, c82
2726	LD	b6, 58 * SIZE(BO)
2727	NMSUB	c61, c61, b3, c81
2728	NMSUB	c62, c62, b3, c82
2729	LD	b7, 57 * SIZE(BO)
2730	NMSUB	c51, c51, b4, c81
2731	NMSUB	c52, c52, b4, c82
2732	LD	b8, 56 * SIZE(BO)
2733
2734	NMSUB	c41, c41, b5, c81
2735	NMSUB	c42, c42, b5, c82
2736	LD	b2, 54 * SIZE(BO)
2737	NMSUB	c31, c31, b6, c81
2738	NMSUB	c32, c32, b6, c82
2739	LD	b3, 53 * SIZE(BO)
2740	NMSUB	c21, c21, b7, c81
2741	NMSUB	c22, c22, b7, c82
2742	LD	b4, 52 * SIZE(BO)
2743	NMSUB	c11, c11, b8, c81
2744	NMSUB	c12, c12, b8, c82
2745	LD	b5, 51 * SIZE(BO)
2746
2747	MUL	c71, b2, c71
2748	MUL	c72, b2, c72
2749	LD	b6, 50 * SIZE(BO)
2750
2751	NMSUB	c61, c61, b3, c71
2752	NMSUB	c62, c62, b3, c72
2753	LD	b7, 49 * SIZE(BO)
2754	NMSUB	c51, c51, b4, c71
2755	NMSUB	c52, c52, b4, c72
2756	LD	b8, 48 * SIZE(BO)
2757	NMSUB	c41, c41, b5, c71
2758	NMSUB	c42, c42, b5, c72
2759	LD	b3, 45 * SIZE(BO)
2760	NMSUB	c31, c31, b6, c71
2761	NMSUB	c32, c32, b6, c72
2762	LD	b4, 44 * SIZE(BO)
2763	NMSUB	c21, c21, b7, c71
2764	NMSUB	c22, c22, b7, c72
2765	LD	b5, 43 * SIZE(BO)
2766	NMSUB	c11, c11, b8, c71
2767	NMSUB	c12, c12, b8, c72
2768	LD	b6, 42 * SIZE(BO)
2769
2770	MUL	c61, b3, c61
2771	MUL	c62, b3, c62
2772	LD	b7, 41 * SIZE(BO)
2773
2774	NMSUB	c51, c51, b4, c61
2775	NMSUB	c52, c52, b4, c62
2776	LD	b8, 40 * SIZE(BO)
2777	NMSUB	c41, c41, b5, c61
2778	NMSUB	c42, c42, b5, c62
2779	LD	b4, 36 * SIZE(BO)
2780	NMSUB	c31, c31, b6, c61
2781	NMSUB	c32, c32, b6, c62
2782	LD	b5, 35 * SIZE(BO)
2783	NMSUB	c21, c21, b7, c61
2784	NMSUB	c22, c22, b7, c62
2785	LD	b6, 34 * SIZE(BO)
2786	NMSUB	c11, c11, b8, c61
2787	NMSUB	c12, c12, b8, c62
2788	LD	b7, 33 * SIZE(BO)
2789
2790	MUL	c51, b4, c51
2791	MUL	c52, b4, c52
2792	LD	b8, 32 * SIZE(BO)
2793
2794	NMSUB	c41, c41, b5, c51
2795	NMSUB	c42, c42, b5, c52
2796	LD	b5, 27 * SIZE(BO)
2797	NMSUB	c31, c31, b6, c51
2798	NMSUB	c32, c32, b6, c52
2799	LD	b6, 26 * SIZE(BO)
2800	NMSUB	c21, c21, b7, c51
2801	NMSUB	c22, c22, b7, c52
2802	LD	b7, 25 * SIZE(BO)
2803	NMSUB	c11, c11, b8, c51
2804	NMSUB	c12, c12, b8, c52
2805	LD	b8, 24 * SIZE(BO)
2806
2807	MUL	c41, b5, c41
2808	MUL	c42, b5, c42
2809
2810	NMSUB	c31, c31, b6, c41
2811	NMSUB	c32, c32, b6, c42
2812	LD	b6, 18 * SIZE(BO)
2813	NMSUB	c21, c21, b7, c41
2814	NMSUB	c22, c22, b7, c42
2815	LD	b7, 17 * SIZE(BO)
2816	NMSUB	c11, c11, b8, c41
2817	NMSUB	c12, c12, b8, c42
2818	LD	b8, 16 * SIZE(BO)
2819
2820	MUL	c31, b6, c31
2821	MUL	c32, b6, c32
2822
2823	NMSUB	c21, c21, b7, c31
2824	NMSUB	c22, c22, b7, c32
2825	LD	b7,  9 * SIZE(BO)
2826	NMSUB	c11, c11, b8, c31
2827	NMSUB	c12, c12, b8, c32
2828	LD	b8,  8 * SIZE(BO)
2829
2830	MUL	c21, b7, c21
2831	MUL	c22, b7, c22
2832
2833	NMSUB	c11, c11, b8, c21
2834	NMSUB	c12, c12, b8, c22
2835	LD	b8,  0 * SIZE(BO)
2836
2837	MUL	c11, b8, c11
2838	MUL	c12, b8, c12
2839#endif
2840
2841#if defined(LN) || defined(LT)
2842	ST	c11,  0 * SIZE(BO)
2843	ST	c21,  1 * SIZE(BO)
2844	ST	c31,  2 * SIZE(BO)
2845	ST	c41,  3 * SIZE(BO)
2846	ST	c51,  4 * SIZE(BO)
2847	ST	c61,  5 * SIZE(BO)
2848	ST	c71,  6 * SIZE(BO)
2849	ST	c81,  7 * SIZE(BO)
2850
2851	ST	c12,  8 * SIZE(BO)
2852	ST	c22,  9 * SIZE(BO)
2853	ST	c32, 10 * SIZE(BO)
2854	ST	c42, 11 * SIZE(BO)
2855	ST	c52, 12 * SIZE(BO)
2856	ST	c62, 13 * SIZE(BO)
2857	ST	c72, 14 * SIZE(BO)
2858	ST	c82, 15 * SIZE(BO)
2859#else
2860	ST	c11,  0 * SIZE(AO)
2861	ST	c12,  1 * SIZE(AO)
2862	ST	c21,  2 * SIZE(AO)
2863	ST	c22,  3 * SIZE(AO)
2864	ST	c31,  4 * SIZE(AO)
2865	ST	c32,  5 * SIZE(AO)
2866	ST	c41,  6 * SIZE(AO)
2867	ST	c42,  7 * SIZE(AO)
2868
2869	ST	c51,  8 * SIZE(AO)
2870	ST	c52,  9 * SIZE(AO)
2871	ST	c61, 10 * SIZE(AO)
2872	ST	c62, 11 * SIZE(AO)
2873	ST	c71, 12 * SIZE(AO)
2874	ST	c72, 13 * SIZE(AO)
2875	ST	c81, 14 * SIZE(AO)
2876	ST	c82, 15 * SIZE(AO)
2877#endif
2878
2879	ST	c11,  0 * SIZE(CO1)
2880	ST	c12,  1 * SIZE(CO1)
2881	ST	c21,  0 * SIZE(CO2)
2882	ST	c22,  1 * SIZE(CO2)
2883	ST	c31,  0 * SIZE(CO3)
2884	ST	c32,  1 * SIZE(CO3)
2885	ST	c41,  0 * SIZE(CO4)
2886	ST	c42,  1 * SIZE(CO4)
2887	ST	c51,  0 * SIZE(CO5)
2888	ST	c52,  1 * SIZE(CO5)
2889	ST	c61,  0 * SIZE(CO6)
2890	ST	c62,  1 * SIZE(CO6)
2891	ST	c71,  0 * SIZE(CO7)
2892	ST	c72,  1 * SIZE(CO7)
2893	ST	c81,  0 * SIZE(CO8)
2894	ST	c82,  1 * SIZE(CO8)
2895
2896	MTC	$0,  a1
2897
2898#ifndef LN
2899	daddiu	CO1, CO1, 2 * SIZE
2900	daddiu	CO2, CO2, 2 * SIZE
2901	daddiu	CO3, CO3, 2 * SIZE
2902	daddiu	CO4, CO4, 2 * SIZE
2903	daddiu	CO5, CO5, 2 * SIZE
2904	daddiu	CO6, CO6, 2 * SIZE
2905	daddiu	CO7, CO7, 2 * SIZE
2906	daddiu	CO8, CO8, 2 * SIZE
2907#endif
2908
2909	MOV	c11, a1
2910	MOV	c21, a1
2911
2912#ifdef RT
2913	dsll	TEMP, K, 1 + BASE_SHIFT
2914	daddu	AORIG, AORIG, TEMP
2915#endif
2916
2917	MOV	c31, a1
2918	MOV	c41, a1
2919
2920#if defined(LT) || defined(RN)
2921	dsubu	TEMP, K, KK
2922	dsll	L,    TEMP, 1 + BASE_SHIFT
2923	dsll	TEMP, TEMP, 3 + BASE_SHIFT
2924	daddu	AO, AO, L
2925	daddu	BO, BO, TEMP
2926#endif
2927
2928#ifdef LT
2929	daddiu	KK, KK, 2
2930#endif
2931
2932#ifdef LN
2933	daddiu	KK, KK, -2
2934#endif
2935
2936	daddiu	I, I, -1
2937	MOV	c51, a1
2938
2939	bgtz	I, .L11
2940	MOV	c61, a1
2941	.align 3
2942
2943.L20:
2944	andi	I,  M, 1
2945	MOV	c61, c11
2946	blez	I, .L29
2947	MOV	c71, c11
2948
2949#if defined(LT) || defined(RN)
2950	LD	a1,  0 * SIZE(AO)
2951	LD	a2,  1 * SIZE(AO)
2952	LD	a3,  2 * SIZE(AO)
2953	LD	a4,  3 * SIZE(AO)
2954
2955	LD	b1,  0 * SIZE(B)
2956	LD	b2,  1 * SIZE(B)
2957	LD	b3,  2 * SIZE(B)
2958	LD	b4,  3 * SIZE(B)
2959	LD	b5,  4 * SIZE(B)
2960	LD	b6,  8 * SIZE(B)
2961	LD	b7, 12 * SIZE(B)
2962
2963	dsra	L,  KK, 2
2964	MOV	c81, c11
2965
2966	blez	L, .L25
2967	move	BO,  B
2968#else
2969
2970#ifdef LN
2971	dsll	TEMP,   K,  0 + BASE_SHIFT
2972	dsubu	AORIG, AORIG, TEMP
2973#endif
2974
2975	dsll	L,    KK, 0 + BASE_SHIFT
2976	dsll	TEMP, KK, 3 + BASE_SHIFT
2977
2978	daddu	AO, AORIG, L
2979	daddu	BO, B,     TEMP
2980
2981	dsubu	TEMP, K, KK
2982
2983	LD	a1,  0 * SIZE(AO)
2984	LD	a2,  1 * SIZE(AO)
2985	LD	a3,  2 * SIZE(AO)
2986	LD	a4,  3 * SIZE(AO)
2987
2988	LD	b1,  0 * SIZE(BO)
2989	LD	b2,  1 * SIZE(BO)
2990	LD	b3,  2 * SIZE(BO)
2991	LD	b4,  3 * SIZE(BO)
2992	LD	b5,  4 * SIZE(BO)
2993	LD	b6,  8 * SIZE(BO)
2994	LD	b7, 12 * SIZE(BO)
2995
2996	dsra	L,  TEMP, 2
2997	MOV	c81, c11
2998
2999	blez	L, .L25
3000	NOP
3001#endif
3002	.align	3
3003
3004.L22:
3005	MADD	c11, c11, a1, b1
3006	LD	b1, 16 * SIZE(BO)
3007	MADD	c21, c21, a1, b2
3008	LD	b2,  5 * SIZE(BO)
3009	MADD	c31, c31, a1, b3
3010	LD	b3,  6 * SIZE(BO)
3011	MADD	c41, c41, a1, b4
3012	LD	b4,  7 * SIZE(BO)
3013
3014	MADD	c51, c51, a1, b5
3015	LD	b5, 20 * SIZE(BO)
3016	MADD	c61, c61, a1, b2
3017	LD	b2,  9 * SIZE(BO)
3018	MADD	c71, c71, a1, b3
3019	LD	b3, 10 * SIZE(BO)
3020	MADD	c81, c81, a1, b4
3021	LD	b4, 11 * SIZE(BO)
3022
3023	LD	a1,  4 * SIZE(AO)
3024	daddiu	L, L, -1
3025
3026	MADD	c11, c11, a2, b6
3027	LD	b6, 24 * SIZE(BO)
3028	MADD	c21, c21, a2, b2
3029	LD	b2, 13 * SIZE(BO)
3030	MADD	c31, c31, a2, b3
3031	LD	b3, 14 * SIZE(BO)
3032	MADD	c41, c41, a2, b4
3033	LD	b4, 15 * SIZE(BO)
3034
3035	MADD	c51, c51, a2, b7
3036	LD	b7, 28 * SIZE(BO)
3037	MADD	c61, c61, a2, b2
3038	LD	b2, 17 * SIZE(BO)
3039	MADD	c71, c71, a2, b3
3040	LD	b3, 18 * SIZE(BO)
3041	MADD	c81, c81, a2, b4
3042	LD	b4, 19 * SIZE(BO)
3043
3044	LD	a2,  5 * SIZE(AO)
3045	daddiu	AO, AO,  4 * SIZE
3046
3047	MADD	c11, c11, a3, b1
3048	LD	b1, 32 * SIZE(BO)
3049	MADD	c21, c21, a3, b2
3050	LD	b2, 21 * SIZE(BO)
3051	MADD	c31, c31, a3, b3
3052	LD	b3, 22 * SIZE(BO)
3053	MADD	c41, c41, a3, b4
3054	LD	b4, 23 * SIZE(BO)
3055
3056	MADD	c51, c51, a3, b5
3057	LD	b5, 36 * SIZE(BO)
3058	MADD	c61, c61, a3, b2
3059	LD	b2, 25 * SIZE(BO)
3060	MADD	c71, c71, a3, b3
3061	LD	b3, 26 * SIZE(BO)
3062	MADD	c81, c81, a3, b4
3063	LD	b4, 27 * SIZE(BO)
3064
3065	LD	a3,  2 * SIZE(AO)
3066	daddiu	BO, BO, 32 * SIZE
3067
3068	MADD	c11, c11, a4, b6
3069	LD	b6,  8 * SIZE(BO)
3070	MADD	c21, c21, a4, b2
3071	LD	b2, -3 * SIZE(BO)
3072	MADD	c31, c31, a4, b3
3073	LD	b3, -2 * SIZE(BO)
3074	MADD	c41, c41, a4, b4
3075	LD	b4, -1 * SIZE(BO)
3076
3077	MADD	c51, c51, a4, b7
3078	LD	b7, 12 * SIZE(BO)
3079	MADD	c61, c61, a4, b2
3080	LD	b2,  1 * SIZE(BO)
3081	MADD	c71, c71, a4, b3
3082	LD	b3,  2 * SIZE(BO)
3083	MADD	c81, c81, a4, b4
3084	LD	b4,  3 * SIZE(BO)
3085	bgtz	L, .L22
3086	LD	a4,  3 * SIZE(AO)
3087	.align 3
3088
3089.L25:
3090#if defined(LT) || defined(RN)
3091	andi	L, KK,  3
3092#else
3093	andi	L, TEMP, 3
3094#endif
3095	NOP
3096	blez	L, .L28
3097	NOP
3098	.align	3
3099
3100.L26:
3101	MADD	c11, c11, a1, b1
3102	LD	b1,  8 * SIZE(BO)
3103	MADD	c21, c21, a1, b2
3104	LD	b2,  5 * SIZE(BO)
3105	MADD	c31, c31, a1, b3
3106	LD	b3,  6 * SIZE(BO)
3107	MADD	c41, c41, a1, b4
3108	LD	b4,  7 * SIZE(BO)
3109
3110	daddiu	L, L, -1
3111	MOV	a2, a2
3112	daddiu	AO, AO,  1 * SIZE
3113	daddiu	BO, BO,  8 * SIZE
3114
3115	MADD	c51, c51, a1, b5
3116	LD	b5,  4 * SIZE(BO)
3117	MADD	c61, c61, a1, b2
3118	LD	b2,  1 * SIZE(BO)
3119	MADD	c71, c71, a1, b3
3120	LD	b3,  2 * SIZE(BO)
3121	MADD	c81, c81, a1, b4
3122	LD	a1,  0 * SIZE(AO)
3123
3124	bgtz	L, .L26
3125	LD	b4,  3 * SIZE(BO)
3126
3127.L28:
3128#if defined(LN) || defined(RT)
3129#ifdef LN
3130	daddiu	TEMP, KK, -1
3131#else
3132	daddiu	TEMP, KK, -8
3133#endif
3134
3135	dsll	L,    TEMP, 0 + BASE_SHIFT
3136	dsll	TEMP, TEMP, 3 + BASE_SHIFT
3137	daddu	AO, AORIG, L
3138	daddu	BO, B,     TEMP
3139#endif
3140
3141
3142#if defined(LN) || defined(LT)
3143	LD	b1,  0 * SIZE(BO)
3144	LD	b2,  1 * SIZE(BO)
3145	LD	b3,  2 * SIZE(BO)
3146	LD	b4,  3 * SIZE(BO)
3147	LD	b5,  4 * SIZE(BO)
3148	LD	b6,  5 * SIZE(BO)
3149	LD	b7,  6 * SIZE(BO)
3150	LD	b8,  7 * SIZE(BO)
3151
3152	SUB	c11, b1, c11
3153	SUB	c21, b2, c21
3154	SUB	c31, b3, c31
3155	SUB	c41, b4, c41
3156	SUB	c51, b5, c51
3157	SUB	c61, b6, c61
3158	SUB	c71, b7, c71
3159	SUB	c81, b8, c81
3160#else
3161	LD	b1,  0 * SIZE(AO)
3162	LD	b2,  1 * SIZE(AO)
3163	LD	b3,  2 * SIZE(AO)
3164	LD	b4,  3 * SIZE(AO)
3165	LD	b5,  4 * SIZE(AO)
3166	LD	b6,  5 * SIZE(AO)
3167	LD	b7,  6 * SIZE(AO)
3168	LD	b8,  7 * SIZE(AO)
3169
3170	SUB	c11, b1, c11
3171	SUB	c21, b2, c21
3172	SUB	c31, b3, c31
3173	SUB	c41, b4, c41
3174	SUB	c51, b5, c51
3175	SUB	c61, b6, c61
3176	SUB	c71, b7, c71
3177	SUB	c81, b8, c81
3178#endif
3179
3180#if defined(LN) || defined(LT)
3181	LD	b1,  0 * SIZE(AO)
3182
3183	MUL	c11, b1, c11
3184	MUL	c21, b1, c21
3185	MUL	c31, b1, c31
3186	MUL	c41, b1, c41
3187	MUL	c51, b1, c51
3188	MUL	c61, b1, c61
3189	MUL	c71, b1, c71
3190	MUL	c81, b1, c81
3191#endif
3192
3193#ifdef RN
3194	LD	b1,  0 * SIZE(BO)
3195	LD	b2,  1 * SIZE(BO)
3196	LD	b3,  2 * SIZE(BO)
3197	LD	b4,  3 * SIZE(BO)
3198	LD	b5,  4 * SIZE(BO)
3199	LD	b6,  5 * SIZE(BO)
3200	LD	b7,  6 * SIZE(BO)
3201	LD	b8,  7 * SIZE(BO)
3202
3203	MUL	c11, b1, c11
3204
3205	NMSUB	c21, c21, b2, c11
3206	NMSUB	c31, c31, b3, c11
3207	NMSUB	c41, c41, b4, c11
3208	NMSUB	c51, c51, b5, c11
3209	NMSUB	c61, c61, b6, c11
3210	NMSUB	c71, c71, b7, c11
3211	NMSUB	c81, c81, b8, c11
3212
3213	LD	b2,  9 * SIZE(BO)
3214	LD	b3, 10 * SIZE(BO)
3215	LD	b4, 11 * SIZE(BO)
3216	LD	b5, 12 * SIZE(BO)
3217	LD	b6, 13 * SIZE(BO)
3218	LD	b7, 14 * SIZE(BO)
3219	LD	b8, 15 * SIZE(BO)
3220
3221	MUL	c21, b2, c21
3222
3223	NMSUB	c31, c31, b3, c21
3224	NMSUB	c41, c41, b4, c21
3225	NMSUB	c51, c51, b5, c21
3226	NMSUB	c61, c61, b6, c21
3227	NMSUB	c71, c71, b7, c21
3228	NMSUB	c81, c81, b8, c21
3229
3230	LD	b3, 18 * SIZE(BO)
3231	LD	b4, 19 * SIZE(BO)
3232	LD	b5, 20 * SIZE(BO)
3233	LD	b6, 21 * SIZE(BO)
3234	LD	b7, 22 * SIZE(BO)
3235	LD	b8, 23 * SIZE(BO)
3236
3237	MUL	c31, b3, c31
3238
3239	NMSUB	c41, c41, b4, c31
3240	NMSUB	c51, c51, b5, c31
3241	NMSUB	c61, c61, b6, c31
3242	NMSUB	c71, c71, b7, c31
3243	NMSUB	c81, c81, b8, c31
3244
3245	LD	b4, 27 * SIZE(BO)
3246	LD	b5, 28 * SIZE(BO)
3247	LD	b6, 29 * SIZE(BO)
3248	LD	b7, 30 * SIZE(BO)
3249	LD	b8, 31 * SIZE(BO)
3250
3251	MUL	c41, b4, c41
3252
3253	NMSUB	c51, c51, b5, c41
3254	NMSUB	c61, c61, b6, c41
3255	NMSUB	c71, c71, b7, c41
3256	NMSUB	c81, c81, b8, c41
3257
3258	LD	b5, 36 * SIZE(BO)
3259	LD	b6, 37 * SIZE(BO)
3260	LD	b7, 38 * SIZE(BO)
3261	LD	b8, 39 * SIZE(BO)
3262
3263	MUL	c51, b5, c51
3264
3265	NMSUB	c61, c61, b6, c51
3266	NMSUB	c71, c71, b7, c51
3267	NMSUB	c81, c81, b8, c51
3268
3269	LD	b6, 45 * SIZE(BO)
3270	LD	b7, 46 * SIZE(BO)
3271	LD	b8, 47 * SIZE(BO)
3272
3273	MUL	c61, b6, c61
3274
3275	NMSUB	c71, c71, b7, c61
3276	NMSUB	c81, c81, b8, c61
3277
3278	LD	b7, 54 * SIZE(BO)
3279	LD	b8, 55 * SIZE(BO)
3280
3281	MUL	c71, b7, c71
3282
3283	NMSUB	c81, c81, b8, c71
3284
3285	LD	b8, 63 * SIZE(BO)
3286
3287	MUL	c81, b8, c81
3288#endif
3289
3290#ifdef RT
3291	LD	b1, 63 * SIZE(BO)
3292	LD	b2, 62 * SIZE(BO)
3293	LD	b3, 61 * SIZE(BO)
3294	LD	b4, 60 * SIZE(BO)
3295	LD	b5, 59 * SIZE(BO)
3296	LD	b6, 58 * SIZE(BO)
3297	LD	b7, 57 * SIZE(BO)
3298	LD	b8, 56 * SIZE(BO)
3299
3300	MUL	c81, b1, c81
3301
3302	NMSUB	c71, c71, b2, c81
3303	NMSUB	c61, c61, b3, c81
3304	NMSUB	c51, c51, b4, c81
3305	NMSUB	c41, c41, b5, c81
3306	NMSUB	c31, c31, b6, c81
3307	NMSUB	c21, c21, b7, c81
3308	NMSUB	c11, c11, b8, c81
3309
3310	LD	b2, 54 * SIZE(BO)
3311	LD	b3, 53 * SIZE(BO)
3312	LD	b4, 52 * SIZE(BO)
3313	LD	b5, 51 * SIZE(BO)
3314	LD	b6, 50 * SIZE(BO)
3315	LD	b7, 49 * SIZE(BO)
3316	LD	b8, 48 * SIZE(BO)
3317
3318	MUL	c71, b2, c71
3319
3320	NMSUB	c61, c61, b3, c71
3321	NMSUB	c51, c51, b4, c71
3322	NMSUB	c41, c41, b5, c71
3323	NMSUB	c31, c31, b6, c71
3324	NMSUB	c21, c21, b7, c71
3325	NMSUB	c11, c11, b8, c71
3326
3327	LD	b3, 45 * SIZE(BO)
3328	LD	b4, 44 * SIZE(BO)
3329	LD	b5, 43 * SIZE(BO)
3330	LD	b6, 42 * SIZE(BO)
3331	LD	b7, 41 * SIZE(BO)
3332	LD	b8, 40 * SIZE(BO)
3333
3334	MUL	c61, b3, c61
3335
3336	NMSUB	c51, c51, b4, c61
3337	NMSUB	c41, c41, b5, c61
3338	NMSUB	c31, c31, b6, c61
3339	NMSUB	c21, c21, b7, c61
3340	NMSUB	c11, c11, b8, c61
3341
3342	LD	b4, 36 * SIZE(BO)
3343	LD	b5, 35 * SIZE(BO)
3344	LD	b6, 34 * SIZE(BO)
3345	LD	b7, 33 * SIZE(BO)
3346	LD	b8, 32 * SIZE(BO)
3347
3348	MUL	c51, b4, c51
3349
3350	NMSUB	c41, c41, b5, c51
3351	NMSUB	c31, c31, b6, c51
3352	NMSUB	c21, c21, b7, c51
3353	NMSUB	c11, c11, b8, c51
3354
3355	LD	b5, 27 * SIZE(BO)
3356	LD	b6, 26 * SIZE(BO)
3357	LD	b7, 25 * SIZE(BO)
3358	LD	b8, 24 * SIZE(BO)
3359
3360	MUL	c41, b5, c41
3361
3362	NMSUB	c31, c31, b6, c41
3363	NMSUB	c21, c21, b7, c41
3364	NMSUB	c11, c11, b8, c41
3365
3366	LD	b6, 18 * SIZE(BO)
3367	LD	b7, 17 * SIZE(BO)
3368	LD	b8, 16 * SIZE(BO)
3369
3370	MUL	c31, b6, c31
3371
3372	NMSUB	c21, c21, b7, c31
3373	NMSUB	c11, c11, b8, c31
3374
3375	LD	b7,  9 * SIZE(BO)
3376	LD	b8,  8 * SIZE(BO)
3377
3378	MUL	c21, b7, c21
3379
3380	NMSUB	c11, c11, b8, c21
3381
3382	LD	b8,  0 * SIZE(BO)
3383
3384	MUL	c11, b8, c11
3385#endif
3386
3387#ifdef LN
3388	daddiu	CO1, CO1, -1 * SIZE
3389	daddiu	CO2, CO2, -1 * SIZE
3390	daddiu	CO3, CO3, -1 * SIZE
3391	daddiu	CO4, CO4, -1 * SIZE
3392	daddiu	CO5, CO5, -1 * SIZE
3393	daddiu	CO6, CO6, -1 * SIZE
3394	daddiu	CO7, CO7, -1 * SIZE
3395	daddiu	CO8, CO8, -1 * SIZE
3396#endif
3397
3398#if defined(LN) || defined(LT)
3399	ST	c11,  0 * SIZE(BO)
3400	ST	c21,  1 * SIZE(BO)
3401	ST	c31,  2 * SIZE(BO)
3402	ST	c41,  3 * SIZE(BO)
3403	ST	c51,  4 * SIZE(BO)
3404	ST	c61,  5 * SIZE(BO)
3405	ST	c71,  6 * SIZE(BO)
3406	ST	c81,  7 * SIZE(BO)
3407#else
3408	ST	c11,  0 * SIZE(AO)
3409	ST	c21,  1 * SIZE(AO)
3410	ST	c31,  2 * SIZE(AO)
3411	ST	c41,  3 * SIZE(AO)
3412	ST	c51,  4 * SIZE(AO)
3413	ST	c61,  5 * SIZE(AO)
3414	ST	c71,  6 * SIZE(AO)
3415	ST	c81,  7 * SIZE(AO)
3416#endif
3417
3418	ST	c11,  0 * SIZE(CO1)
3419	ST	c21,  0 * SIZE(CO2)
3420	ST	c31,  0 * SIZE(CO3)
3421	ST	c41,  0 * SIZE(CO4)
3422	ST	c51,  0 * SIZE(CO5)
3423	ST	c61,  0 * SIZE(CO6)
3424	ST	c71,  0 * SIZE(CO7)
3425	ST	c81,  0 * SIZE(CO8)
3426
3427#ifndef LN
3428	daddiu	CO1, CO1, 1 * SIZE
3429	daddiu	CO2, CO2, 1 * SIZE
3430	daddiu	CO3, CO3, 1 * SIZE
3431	daddiu	CO4, CO4, 1 * SIZE
3432	daddiu	CO5, CO5, 1 * SIZE
3433	daddiu	CO6, CO6, 1 * SIZE
3434	daddiu	CO7, CO7, 1 * SIZE
3435	daddiu	CO8, CO8, 1 * SIZE
3436#endif
3437
3438#ifdef RT
3439	dsll	TEMP, K, BASE_SHIFT
3440	daddu	AORIG, AORIG, TEMP
3441#endif
3442
3443#if defined(LT) || defined(RN)
3444	dsubu	TEMP, K, KK
3445	dsll	L,    TEMP, 0 + BASE_SHIFT
3446	dsll	TEMP, TEMP, 3 + BASE_SHIFT
3447	daddu	AO, AO, L
3448	daddu	BO, BO, TEMP
3449#endif
3450
3451#ifdef LT
3452	daddiu	KK, KK, 1
3453#endif
3454
3455#ifdef LN
3456	daddiu	KK, KK, -1
3457#endif
3458	.align 3
3459
3460.L29:
3461#ifdef LN
3462	dsll	TEMP, K, 3 + BASE_SHIFT
3463	daddu	B, B, TEMP
3464#endif
3465
3466#if defined(LT) || defined(RN)
3467	move	B,  BO
3468#endif
3469
3470#ifdef RN
3471	daddiu	KK, KK,  8
3472#endif
3473
3474#ifdef RT
3475	daddiu	KK, KK, -8
3476#endif
3477
3478	bgtz	J, .L10
3479	NOP
3480	.align 3
3481
3482
3483
3484.L999:
3485	LDARG	$16,   0($sp)
3486	LDARG	$17,   8($sp)
3487	LDARG	$18,  16($sp)
3488	LDARG	$19,  24($sp)
3489	LDARG	$20,  32($sp)
3490	LDARG	$21,  40($sp)
3491	ldc1	$f24, 48($sp)
3492	ldc1	$f25, 56($sp)
3493	ldc1	$f26, 64($sp)
3494	ldc1	$f27, 72($sp)
3495	ldc1	$f28, 80($sp)
3496
3497	LDARG	$22,  88($sp)
3498	LDARG	$23,  96($sp)
3499	LDARG	$24, 104($sp)
3500	LDARG	$25, 112($sp)
3501
3502#ifndef __64BIT__
3503	ldc1	$f20,112($sp)
3504	ldc1	$f21,120($sp)
3505	ldc1	$f22,128($sp)
3506	ldc1	$f23,136($sp)
3507#endif
3508
3509	j	$31
3510	daddiu	$sp, $sp, 144
3511
3512	EPILOGUE
3513