1##define REALNAME gemm
2#define ASSEMBLER
3#include "common.h"
4
5#define FETCH	ld
6#define	STACKSIZE	192
7#define gsLQC1(base,fq,ft,offset) .word(0x32<<26|base<<21|ft<<16|0x1<<15|offset<<6|0x1<<5|fq)
8#define gsSQC1(base,fq,ft,offset) .word(0x3A<<26|base<<21|ft<<16|0x1<<15|offset<<6|0x1<<5|fq)
9
10
11#####	Parameter registers	####
12#define M	$4
13#define	N	$5
14#define	K	$6
15#define A	$9
16#define B	$10
17#define C	$11
18#define LDC	$8
19
20####	Pointer A, B, C	####
21#define AO	$12
22#define BO	$13
23
24#define CO1	$14
25#define CO2	$15
26
27#define PREA	$18
28#define PREB	$19
29
30####	Used registers	####
31#define A1	$f0
32#define A2	$f1
33#define A3	$f2
34#define A4	$f3
35#define A5	$f4
36#define	A6	$f5
37#define	A7	$f6
38#define	A8	$f7
39
40#define B1	$f8
41#define B2	$f9
42#define B3	$f10
43#define B4	$f11
44#define B5	$f12
45#define	B6	$f13
46#define	B7	$f14
47#define	B8	$f15
48
49#define C11	$f16
50#define C12	$f17
51#define C21	$f18
52#define C22	$f19
53#define C31	$f20
54#define C32	$f21
55#define C41	$f22
56#define C42	$f23
57#define C13	$f24
58#define C14	$f25
59#define C23	$f26
60#define C24	$f27
61#define C33	$f28
62#define C34	$f29
63#define C43	$f30
64#define C44	$f31
65
66#define I	$2
67#define J	$3
68#define L	$7
69
70####	Alpha register	####
71#define ALPHA	$f15
72
73#define F31 31
74#define F30 30
75#define F29 29
76#define F28 28
77#define F27 27
78#define F26 26
79#define F25 25
80#define F24 24
81#define F23 23
82#define F22 22
83#define F21 21
84#define F20 20
85#define F19 19
86#define F18 18
87#define F17 17
88#define F16 16
89#define F15 15
90#define F14 14
91#define F13 13
92#define F12 12
93#define F11 11
94#define F10 10
95#define F9 9
96#define F8 8
97#define F7 7
98#define F6 6
99#define F5 5
100#define F4 4
101#define F3 3
102#define F2 2
103#define F1 1
104#define F0 0
105
106#define	R12	12
107#define	R13	13
108
109#define R14	14
110#define R15	15
111#define	R16	16
112#define	R17	17
113
114#if defined(TRMMKERNEL)
115#define	OFFSET	$23
116#define	KK		$24
117#define TEMP	$25
118#endif
119
120
121	PROLOGUE
122
123	LDARG	LDC,   0($sp)
124	daddiu	$sp,$sp,-STACKSIZE
125
126	sd	$16,   0($sp)
127	sd	$17,   8($sp)
128	sd	$18,  16($sp)
129	sd	$19,  24($sp)
130	sd	$20,  32($sp)
131	sd	$21,  40($sp)
132	sd	$22,  48($sp)
133
134	ST	$f24, 56($sp)
135	ST	$f25, 64($sp)
136	ST	$f26, 72($sp)
137	ST	$f27, 80($sp)
138	ST	$f28, 88($sp)
139
140#if defined(TRMMKERNEL)
141	sd	$23,  96($sp)
142	sd	$24, 104($sp)
143	sd	$25, 112($sp)
144
145	LDARG	OFFSET, STACKSIZE+8($sp)
146#endif
147
148#ifndef __64BIT__
149	ST	$f20,120($sp)
150	ST	$f21,128($sp)
151	ST	$f22,136($sp)
152	ST	$f23,144($sp)
153#endif
154
155	.align	4
156.L2:
157	dsra	J, N, 1				#	NR=2
158	ST		$f15, 152($sp)
159
160#if defined(TRMMKERNEL) && !defined(LEFT)
161	neg	KK, OFFSET
162#endif
163
164	dsll	LDC, LDC, ZBASE_SHIFT#	LDC*SIZE
165	blez	J, .L1
166	ST		$f16, 160($sp)
167
168.L24:
169#if defined(TRMMKERNEL) &&  defined(LEFT)
170	move	KK, OFFSET
171#endif
172
173	dsra	I, M, 2				#	MR=8
174	move	AO, A				#	Reset A
175
176	dsll	PREA, K, 1 + ZBASE_SHIFT
177	move	CO1, C
178
179	daddu	CO2, C,   LDC
180	daddu	PREA, AO, PREA
181
182	blez	I, .L22
183	daddu	C,   CO2, LDC
184
185	.align	4
186.L241:
187#if defined(TRMMKERNEL)
188#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
189	move	BO,  B
190#else
191	dsll	L, KK, 2 + ZBASE_SHIFT
192	dsll	TEMP, KK, 1 + ZBASE_SHIFT
193
194	daddu	AO, AO, L
195	daddu	BO, B,  TEMP
196#endif
197	MTC		$0, C11				#	CLEAR REAULTS REGISTERS
198	MOV		C12, C11
199
200	dsll	PREB, K, ZBASE_SHIFT
201	MOV		C21, C11
202	MOV		C22, C11
203
204	gsLQC1(R13, F9, F8, 0)		#	B1 B2
205	MOV		C31, C11
206	MOV		C32, C11
207
208	gsLQC1(R12, F1, F0, 0)		#	A1 A2
209	MOV		C41, C11
210	MOV		C42, C11
211
212	gsLQC1(R12, F3, F2, 1)		#	A3 A4
213	MOV		C13, C11
214	MOV		C14, C11
215
216	MOV		C23, C11
217	MOV		C24, C11
218
219	MOV		C33, C11
220	MOV		C34, C11
221
222	MOV		C43, C11
223	MOV		C44, C11
224
225	PLU		B3,	B1, B1
226	PLU		B4, B2, B2
227	daddu	PREB, BO, PREB
228
229	FETCH	$0, 0 * SIZE(CO1)
230	FETCH	$0, 8 * SIZE(CO1)
231	FETCH	$0, 0 * SIZE(CO2)
232	FETCH	$0, 8 * SIZE(CO2)
233#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
234	dsubu	TEMP, K, KK
235#elif defined(LEFT)
236	daddiu	TEMP, KK, 4
237#else
238	daddiu	TEMP, KK, 2
239#endif
240	dsra	L,  TEMP, 2
241	blez	L, .L242
242	NOP
243
244#else
245
246	move	BO, B				#	Reset	B
247	dsra	L, K, 2				#	UnRoll	K=64
248	MTC		$0, C11				#	CLEAR REAULTS REGISTERS
249	MOV		C12, C11
250
251	dsll	PREB, K, ZBASE_SHIFT
252	MOV		C21, C11
253	MOV		C22, C11
254
255	gsLQC1(R13, F9, F8, 0)		#	B1 B2
256	MOV		C31, C11
257	MOV		C32, C11
258
259	gsLQC1(R12, F1, F0, 0)		#	A1 A2
260	MOV		C41, C11
261	MOV		C42, C11
262
263	gsLQC1(R12, F3, F2, 1)		#	A3 A4
264	MOV		C13, C11
265	MOV		C14, C11
266
267	FETCH	$0, 0 * SIZE(CO1)
268	MOV		C23, C11
269	MOV		C24, C11
270
271	FETCH	$0, 0 * SIZE(CO2)
272	MOV		C33, C11
273	MOV		C34, C11
274
275	MOV		C43, C11
276	MOV		C44, C11
277	daddu	PREB, BO, PREB
278
279	PLU		B3,	B1, B1
280	PLU		B4, B2, B2
281
282	FETCH	$0, 8 * SIZE(CO1)
283	blez	L, .L242
284	FETCH	$0, 8 * SIZE(CO2)
285#endif
286
287.L2410:
288	daddiu	L, L, -1
289	gsLQC1(R13, F13, F12, 1)	#	B3 B4
290	MADPS	C11, C11, A1, B1
291	MADPS	C21, C21, A2, B1
292
293	gsLQC1(R12, F5, F4, 2)		#	A5 A6
294	MADPS	C12, C12, A1, B2
295	MADPS	C22, C22, A2, B2
296
297	gsLQC1(R12, F7, F6, 3)		#	A7 A8
298	MADPS	C31, C31, A3, B1
299	MADPS	C41, C41, A4, B1
300
301	FETCH	$0, 0 * SIZE(PREB)
302	MADPS	C32, C32, A3, B2
303	MADPS	C42, C42, A4, B2
304
305	FETCH	$0, 0 * SIZE(PREA)
306	MADPS	C13, C13, A1, B3
307	MADPS	C23, C23, A2, B3
308
309	MADPS	C33, C33, A3, B3
310	MADPS	C43, C43, A4, B3
311
312	MADPS	C14, C14, A1, B4
313	MADPS	C24, C24, A2, B4
314
315	PLU		B7,	B5, B5
316	PLU		B8, B6, B6
317	daddu	PREB, PREB, 8 * SIZE
318
319	MADPS	C34, C34, A3, B4
320	MADPS	C44, C44, A4, B4
321
322	gsLQC1(R13, F9, F8, 2)		#	B1 B2
323	MADPS	C11, C11, A5, B5
324	MADPS	C21, C21, A6, B5
325
326	gsLQC1(R12, F1, F0, 4)		#	A1 A2
327	MADPS	C12, C12, A5, B6
328	MADPS	C22, C22, A6, B6
329
330	gsLQC1(R12, F3, F2, 5)		#	A3 A4
331	MADPS	C31, C31, A7, B5
332	MADPS	C41, C41, A8, B5
333
334	FETCH	$0, 8 * SIZE(PREA)
335	MADPS	C32, C32, A7, B6
336	MADPS	C42, C42, A8, B6
337
338	MADPS	C13, C13, A5, B7
339	MADPS	C23, C23, A6, B7
340
341	MADPS	C33, C33, A7, B7
342	MADPS	C43, C43, A8, B7
343
344	MADPS	C14, C14, A5, B8
345	MADPS	C24, C24, A6, B8
346
347	PLU		B3,	B1, B1
348	PLU		B4, B2, B2
349
350	MADPS	C34, C34, A7, B8
351	MADPS	C44, C44, A8, B8
352
353	gsLQC1(R13, F13, F12, 3)	#	B3 B4
354	MADPS	C11, C11, A1, B1
355	MADPS	C21, C21, A2, B1
356
357	gsLQC1(R12, F5, F4, 6)		#	A5 A6
358	MADPS	C12, C12, A1, B2
359	MADPS	C22, C22, A2, B2
360
361	gsLQC1(R12, F7, F6, 7)		#	A7 A8
362	MADPS	C31, C31, A3, B1
363	MADPS	C41, C41, A4, B1
364	daddiu	BO, BO, 4 * 4 * SIZE	#	4KR*4NR
365
366	FETCH	$0, 16 * SIZE(PREA)
367	MADPS	C32, C32, A3, B2
368	MADPS	C42, C42, A4, B2
369	daddiu	AO, AO, 8 * 4 * SIZE 	#	4KR*8MR
370
371	MADPS	C13, C13, A1, B3
372	MADPS	C23, C23, A2, B3
373
374	MADPS	C33, C33, A3, B3
375	MADPS	C43, C43, A4, B3
376
377	MADPS	C14, C14, A1, B4
378	MADPS	C24, C24, A2, B4
379
380	PLU		B7,	B5, B5
381	PLU		B8, B6, B6
382
383	MADPS	C34, C34, A3, B4
384	MADPS	C44, C44, A4, B4
385
386	gsLQC1(R13, F9, F8, 0)		#	B1 B2
387	MADPS	C11, C11, A5, B5
388	MADPS	C21, C21, A6, B5
389
390	gsLQC1(R12, F1, F0, 0)		#	A1 A2
391	MADPS	C12, C12, A5, B6
392	MADPS	C22, C22, A6, B6
393
394	gsLQC1(R12, F3, F2, 1)		#	A3 A4
395	MADPS	C31, C31, A7, B5
396	MADPS	C41, C41, A8, B5
397
398	FETCH	$0, 24 * SIZE(PREA)
399	MADPS	C32, C32, A7, B6
400	MADPS	C42, C42, A8, B6
401
402	MADPS	C13, C13, A5, B7
403	MADPS	C23, C23, A6, B7
404	daddu	PREA, PREA, 32 * SIZE
405
406	MADPS	C33, C33, A7, B7
407	MADPS	C43, C43, A8, B7
408
409	MADPS	C14, C14, A5, B8
410	MADPS	C24, C24, A6, B8
411
412	PLU		B3,	B1, B1
413	PLU		B4, B2, B2
414
415	MADPS	C34, C34, A7, B8
416	bgtz	L, .L2410
417	MADPS	C44, C44, A8, B8
418
419
420	.align	4
421.L242:
422#ifndef	TRMMKERNEL
423	andi	L, K, 2
424#else
425	andi	L, TEMP, 2
426#endif
427	blez	L, .L247
428	NOP
429
430	gsLQC1(R13, F13, F12, 1)	#	B3 B4
431	MADPS	C11, C11, A1, B1
432	MADPS	C21, C21, A2, B1
433
434	gsLQC1(R12, F5, F4, 2)		#	A5 A6
435	MADPS	C12, C12, A1, B2
436	MADPS	C22, C22, A2, B2
437
438	gsLQC1(R12, F7, F6, 3)		#	A7 A8
439	MADPS	C31, C31, A3, B1
440	MADPS	C41, C41, A4, B1
441	daddiu	BO, BO, 2 * 4 * SIZE	#	4KR*4NR
442
443	MADPS	C32, C32, A3, B2
444	MADPS	C42, C42, A4, B2
445	daddiu	AO, AO, 4 * 4 * SIZE
446
447	MADPS	C13, C13, A1, B3
448	MADPS	C23, C23, A2, B3
449
450	MADPS	C33, C33, A3, B3
451	MADPS	C43, C43, A4, B3
452
453	MADPS	C14, C14, A1, B4
454	MADPS	C24, C24, A2, B4
455
456	PLU		B7,	B5, B5
457	PLU		B8, B6, B6
458
459	MADPS	C34, C34, A3, B4
460	MADPS	C44, C44, A4, B4
461
462	gsLQC1(R13, F9, F8, 0)		#	B1 B2
463	MADPS	C11, C11, A5, B5
464	MADPS	C21, C21, A6, B5
465
466	gsLQC1(R12, F1, F0, 0)		#	A1 A2
467	MADPS	C12, C12, A5, B6
468	MADPS	C22, C22, A6, B6
469
470	gsLQC1(R12, F3, F2, 1)		#	A3 A4
471	MADPS	C31, C31, A7, B5
472	MADPS	C41, C41, A8, B5
473
474	MADPS	C32, C32, A7, B6
475	MADPS	C42, C42, A8, B6
476
477	MADPS	C13, C13, A5, B7
478	MADPS	C23, C23, A6, B7
479
480	MADPS	C33, C33, A7, B7
481	MADPS	C43, C43, A8, B7
482
483	MADPS	C14, C14, A5, B8
484	MADPS	C24, C24, A6, B8
485
486	PLU		B3,	B1, B1
487	PLU		B4, B2, B2
488
489	MADPS	C34, C34, A7, B8
490	MADPS	C44, C44, A8, B8
491
492	.align	4
493.L247:
494#ifndef	TRMMKERNEL
495	andi	L, K, 1
496#else
497	andi	L, TEMP, 1
498#endif
499	blez	L, .L240
500	NOP
501
502	MADPS	C11, C11, A1, B1
503	MADPS	C21, C21, A2, B1
504
505	MADPS	C12, C12, A1, B2
506	MADPS	C22, C22, A2, B2
507
508	MADPS	C31, C31, A3, B1
509	MADPS	C41, C41, A4, B1
510	daddiu	BO, BO, 1 * 4 * SIZE	#	4KR*4NR
511
512	MADPS	C32, C32, A3, B2
513	MADPS	C42, C42, A4, B2
514	daddiu	AO, AO, 2 * 4 * SIZE
515
516	MADPS	C13, C13, A1, B3
517	MADPS	C23, C23, A2, B3
518
519	MADPS	C33, C33, A3, B3
520	MADPS	C43, C43, A4, B3
521
522	MADPS	C14, C14, A1, B4
523	MADPS	C24, C24, A2, B4
524
525	MADPS	C34, C34, A3, B4
526	MADPS	C44, C44, A4, B4
527
528
529	.align	4
530.L240:							#	Write Back
531#ifndef TRMMKERNEL
532	daddiu	I, I, -1
533	CVTU	A1, C11
534	CVTU	A2, C21
535
536	CVTU	A3, C31
537	CVTU	A4, C41
538
539	CVTU	A5, C13
540	CVTU	A6, C23
541
542	CVTU	A7, C33
543	CVTU	A8, C43
544
545	CVTU	B1, C12
546	CVTU	B2, C22
547
548	CVTU	B3, C32
549	CVTU	B4, C42
550
551	CVTU	B5, C14
552	CVTU	B6, C24
553
554	CVTU	B7, C34
555	CVTU	B8, C44
556
557#if   defined(NN) || defined(NT) || defined(TN) || defined(TT)
558	/*	(a + bi) * (c + di) */
559	SUB		C11, C11, A1		#	ac'+'bd
560	SUB		C21, C21, A2
561#	LD		A1, 0 * SIZE(A)		#	load alpha_r
562	SUB		C31, C31, A3
563	LD		A1, 152($sp)		#	load alpha_r
564	SUB		C41, C41, A4
565	LD		A2, 160($sp)		#	load alpha_i
566#	LD		A2, 0 * SIZE(A)		#	load alpha_i
567	ADD		C13, A5, C13		#	ad'+'cb
568	ADD		C23, A6, C23
569	ADD		C33, A7, C33
570	ADD		C43, A8, C43
571	SUB		C12, C12, B1
572	SUB		C22, C22, B2
573	SUB		C32, C32, B3
574	SUB		C42, C42, B4
575	ADD		C14, B5, C14
576	ADD		C24, B6, C24
577	ADD		C34, B7, C34
578	ADD		C44, B8, C44
579
580	LD		B1, 0 * SIZE(CO1)
581	LD		B3, 2 * SIZE(CO1)
582	LD		B5, 4 * SIZE(CO1)
583	LD		B7, 6 * SIZE(CO1)
584	LD		B2, 1 * SIZE(CO1)
585	LD		B4, 3 * SIZE(CO1)
586	LD		B6, 5 * SIZE(CO1)
587	LD		B8, 7 * SIZE(CO1)
588
589	MADD	B1, B1, C11, A1		#	A1 = alpha_r
590	MADD	B3, B3, C21, A1
591	MADD	B5, B5, C31, A1
592	MADD	B7, B7, C41, A1
593	MADD	B2, B2, C13, A1
594	MADD	B4, B4, C23, A1
595	MADD	B6, B6, C33, A1
596	MADD	B8, B8, C43, A1
597	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
598	NMSUB	B3, B3, C23, A2
599	NMSUB	B5, B5, C33, A2
600	NMSUB	B7, B7, C43, A2
601	MADD	B2, B2, C11, A2
602	MADD	B4, B4, C21, A2
603	MADD	B6, B6, C31, A2
604	MADD	B8, B8, C41, A2
605
606	LD		C13, 0 * SIZE(CO2)
607	LD		C23, 2 * SIZE(CO2)
608	LD		C33, 4 * SIZE(CO2)
609	LD		C43, 6 * SIZE(CO2)
610	LD		C11, 1 * SIZE(CO2)
611	LD		C21, 3 * SIZE(CO2)
612	LD		C31, 5 * SIZE(CO2)
613	LD		C41, 7 * SIZE(CO2)
614
615	MADD	C13, C13, C12, A1
616	MADD	C23, C23, C22, A1
617
618	MADD	C33, C33, C32, A1
619	ST		B1, 0 * SIZE(CO1)
620
621	MADD	C43, C43, C42, A1
622	ST		B3, 2 * SIZE(CO1)
623
624	MADD	C11, C11, C14, A1
625	ST		B5, 4 * SIZE(CO1)
626
627	MADD	C21, C21, C24, A1
628	ST		B7, 6 * SIZE(CO1)
629
630	MADD	C31, C31, C34, A1
631	ST		B2, 1 * SIZE(CO1)
632
633	MADD	C41, C41, C44, A1
634	ST		B4, 3 * SIZE(CO1)
635
636	NMSUB	C13, C13, C14, A2
637	ST		B6, 5 * SIZE(CO1)
638
639	NMSUB	C23, C23, C24, A2
640	ST		B8, 7 * SIZE(CO1)
641
642	NMSUB	C33, C33, C34, A2
643	NMSUB	C43, C43, C44, A2
644
645	MADD	C11, C11, C12, A2
646	MADD	C21, C21, C22, A2
647
648	MADD	C31, C31, C32, A2
649	MADD	C41, C41, C42, A2
650
651	ST		C13, 0 * SIZE(CO2)
652	ST		C23, 2 * SIZE(CO2)
653	ST		C33, 4 * SIZE(CO2)
654	ST		C43, 6 * SIZE(CO2)
655	ST		C11, 1 * SIZE(CO2)
656	ST		C21, 3 * SIZE(CO2)
657	ST		C31, 5 * SIZE(CO2)
658	ST		C41, 7 * SIZE(CO2)
659#endif
660
661#if   defined(NR) || defined(NC) || defined(TR) || defined(TC)
662	/*	(a + bi) * (c - di) */
663	ADD		C11, A1, C11		#	ac'+'bd
664	ADD		C21, A2, C21
665#	LD		A1, 0 * SIZE(A)		#	load alpha_r
666	ADD		C31, A3, C31
667	LD		A1, 152($sp)		#	load alpha_r
668	ADD		C41, A4, C41
669	LD		A2, 160($sp)		#	load alpha_i
670#	LD		A2, 0 * SIZE(A)		#	load alpha_r
671	SUB		C13, A5, C13		#	ad'+'cb
672	SUB		C23, A6, C23
673	SUB		C33, A7, C33
674	SUB		C43, A8, C43
675	ADD		C12, B1, C12
676	ADD		C22, B2, C22
677	ADD		C32, B3, C32
678	ADD		C42, B4, C42
679	SUB		C14, B5, C14
680	SUB		C24, B6, C24
681	SUB		C34, B7, C34
682	SUB		C44, B8, C44
683
684	LD		B1, 0 * SIZE(CO1)
685	LD		B3, 2 * SIZE(CO1)
686	LD		B5, 4 * SIZE(CO1)
687	LD		B7, 6 * SIZE(CO1)
688	LD		B2, 1 * SIZE(CO1)
689	LD		B4, 3 * SIZE(CO1)
690	LD		B6, 5 * SIZE(CO1)
691	LD		B8, 7 * SIZE(CO1)
692
693	MADD	B1, B1, C11, A1		#	A1 = alpha_r
694	MADD	B3, B3, C21, A1
695	MADD	B5, B5, C31, A1
696	MADD	B7, B7, C41, A1
697	MADD	B2, B2, C13, A1
698	MADD	B4, B4, C23, A1
699	MADD	B6, B6, C33, A1
700	MADD	B8, B8, C43, A1
701	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
702	NMSUB	B3, B3, C23, A2
703	NMSUB	B5, B5, C33, A2
704	NMSUB	B7, B7, C43, A2
705	MADD	B2, B2, C11, A2
706	MADD	B4, B4, C21, A2
707	MADD	B6, B6, C31, A2
708	MADD	B8, B8, C41, A2
709
710	LD		C13, 0 * SIZE(CO2)
711	LD		C23, 2 * SIZE(CO2)
712	LD		C33, 4 * SIZE(CO2)
713	LD		C43, 6 * SIZE(CO2)
714	LD		C11, 1 * SIZE(CO2)
715	LD		C21, 3 * SIZE(CO2)
716	LD		C31, 5 * SIZE(CO2)
717	LD		C41, 7 * SIZE(CO2)
718
719	MADD	C13, C13, C12, A1
720	MADD	C23, C23, C22, A1
721
722	MADD	C33, C33, C32, A1
723	ST		B1, 0 * SIZE(CO1)
724
725	MADD	C43, C43, C42, A1
726	ST		B3, 2 * SIZE(CO1)
727
728	MADD	C11, C11, C14, A1
729	ST		B5, 4 * SIZE(CO1)
730
731	MADD	C21, C21, C24, A1
732	ST		B7, 6 * SIZE(CO1)
733
734	MADD	C31, C31, C34, A1
735	ST		B2, 1 * SIZE(CO1)
736
737	MADD	C41, C41, C44, A1
738	ST		B4, 3 * SIZE(CO1)
739
740	NMSUB	C13, C13, C14, A2
741	ST		B6, 5 * SIZE(CO1)
742
743	NMSUB	C23, C23, C24, A2
744	ST		B8, 7 * SIZE(CO1)
745
746	NMSUB	C33, C33, C34, A2
747	NMSUB	C43, C43, C44, A2
748
749	MADD	C11, C11, C12, A2
750	MADD	C21, C21, C22, A2
751
752	MADD	C31, C31, C32, A2
753	MADD	C41, C41, C42, A2
754
755	ST		C13, 0 * SIZE(CO2)
756	ST		C23, 2 * SIZE(CO2)
757	ST		C33, 4 * SIZE(CO2)
758	ST		C43, 6 * SIZE(CO2)
759	ST		C11, 1 * SIZE(CO2)
760	ST		C21, 3 * SIZE(CO2)
761	ST		C31, 5 * SIZE(CO2)
762	ST		C41, 7 * SIZE(CO2)
763
764#endif
765
766#if	  defined(RN) || defined(RT) || defined(CN) || defined(CT)
767	/*	(a - bi) * (c + di) */
768	ADD		C11, A1, C11		#	ac'+'bd
769	ADD		C21, A2, C21
770#	LD		A1, 0 * SIZE(A)		#	load alpha_r
771	ADD		C31, A3, C31
772	LD		A1, 152($sp)		#	load alpha_r
773#	LD		A2, 0 * SIZE(A)		#	load alpha_r
774	ADD		C41, A4, C41
775	LD		A2, 160($sp)		#	load alpha_i
776	SUB		C13, C13, A5		#	ad'+'cb
777	SUB		C23, C23, A6
778	SUB		C33, C33, A7
779	SUB		C43, C43, A8
780	ADD		C12, B1, C12
781	ADD		C22, B2, C22
782	ADD		C32, B3, C32
783	ADD		C42, B4, C42
784	SUB		C14, C14, B5
785	SUB		C24, C24, B6
786	SUB		C34, C34, B7
787	SUB		C44, C44, B8
788
789	LD		B1, 0 * SIZE(CO1)
790	LD		B3, 2 * SIZE(CO1)
791	LD		B5, 4 * SIZE(CO1)
792	LD		B7, 6 * SIZE(CO1)
793	LD		B2, 1 * SIZE(CO1)
794	LD		B4, 3 * SIZE(CO1)
795	LD		B6, 5 * SIZE(CO1)
796	LD		B8, 7 * SIZE(CO1)
797
798	MADD	B1, B1, C11, A1		#	A1 = alpha_r
799	MADD	B3, B3, C21, A1
800	MADD	B5, B5, C31, A1
801	MADD	B7, B7, C41, A1
802	MADD	B2, B2, C13, A1
803	MADD	B4, B4, C23, A1
804	MADD	B6, B6, C33, A1
805	MADD	B8, B8, C43, A1
806	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
807	NMSUB	B3, B3, C23, A2
808	NMSUB	B5, B5, C33, A2
809	NMSUB	B7, B7, C43, A2
810	MADD	B2, B2, C11, A2
811	MADD	B4, B4, C21, A2
812	MADD	B6, B6, C31, A2
813	MADD	B8, B8, C41, A2
814
815	LD		C13, 0 * SIZE(CO2)
816	LD		C23, 2 * SIZE(CO2)
817	LD		C33, 4 * SIZE(CO2)
818	LD		C43, 6 * SIZE(CO2)
819	LD		C11, 1 * SIZE(CO2)
820	LD		C21, 3 * SIZE(CO2)
821	LD		C31, 5 * SIZE(CO2)
822	LD		C41, 7 * SIZE(CO2)
823
824	MADD	C13, C13, C12, A1
825	MADD	C23, C23, C22, A1
826
827	MADD	C33, C33, C32, A1
828	ST		B1, 0 * SIZE(CO1)
829
830	MADD	C43, C43, C42, A1
831	ST		B3, 2 * SIZE(CO1)
832
833	MADD	C11, C11, C14, A1
834	ST		B5, 4 * SIZE(CO1)
835
836	MADD	C21, C21, C24, A1
837	ST		B7, 6 * SIZE(CO1)
838
839	MADD	C31, C31, C34, A1
840	ST		B2, 1 * SIZE(CO1)
841
842	MADD	C41, C41, C44, A1
843	ST		B4, 3 * SIZE(CO1)
844
845	NMSUB	C13, C13, C14, A2
846	ST		B6, 5 * SIZE(CO1)
847
848	NMSUB	C23, C23, C24, A2
849	ST		B8, 7 * SIZE(CO1)
850
851	NMSUB	C33, C33, C34, A2
852	NMSUB	C43, C43, C44, A2
853
854	MADD	C11, C11, C12, A2
855	MADD	C21, C21, C22, A2
856
857	MADD	C31, C31, C32, A2
858	MADD	C41, C41, C42, A2
859
860	ST		C13, 0 * SIZE(CO2)
861	ST		C23, 2 * SIZE(CO2)
862	ST		C33, 4 * SIZE(CO2)
863	ST		C43, 6 * SIZE(CO2)
864	ST		C11, 1 * SIZE(CO2)
865	ST		C21, 3 * SIZE(CO2)
866	ST		C31, 5 * SIZE(CO2)
867	ST		C41, 7 * SIZE(CO2)
868
869#endif
870
871#if   defined(RR) || defined(RC) || defined(CR) || defined(CC)
872	/*	(a - bi) * (c - di) */
873	SUB		C11, C11, A1		#	ac'+'bd
874	SUB		C21, C21, A2
875	SUB		C31, C31, A3
876	LD		A1, 152($sp)		#	load alpha_r
877#	LD		A1, 0 * SIZE(A)		#	load alpha_r
878	SUB		C41, C41, A4
879	LD		A2, 160($sp)
880#	LD		A2, 0 * SIZE(A)		#	load alpha_i
881
882	ADD		C13, A5, C13		#	ad'+'cb
883	ADD		C23, A6, C23
884	ADD		C33, A7, C33
885	ADD		C43, A8, C43
886	SUB		C12, C12, B1
887	SUB		C22, C22, B2
888	SUB		C32, C32, B3
889	SUB		C42, C42, B4
890	ADD		C14, B5, C14
891	ADD		C24, B6, C24
892	ADD		C34, B7, C34
893	ADD		C44, B8, C44
894	NEG		C13, C13
895	NEG		C23, C23
896	NEG		C33, C33
897	NEG		C43, C43
898	NEG		C14, C14
899	NEG		C24, C24
900	NEG		C34, C34
901	NEG		C44, C44
902
903	LD		B1, 0 * SIZE(CO1)
904	LD		B3, 2 * SIZE(CO1)
905	LD		B5, 4 * SIZE(CO1)
906	LD		B7, 6 * SIZE(CO1)
907	LD		B2, 1 * SIZE(CO1)
908	LD		B4, 3 * SIZE(CO1)
909	LD		B6, 5 * SIZE(CO1)
910	LD		B8, 7 * SIZE(CO1)
911
912	MADD	B1, B1, C11, A1		#	A1 = alpha_r
913	MADD	B3, B3, C21, A1
914	MADD	B5, B5, C31, A1
915	MADD	B7, B7, C41, A1
916	MADD	B2, B2, C13, A1
917	MADD	B4, B4, C23, A1
918	MADD	B6, B6, C33, A1
919	MADD	B8, B8, C43, A1
920	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
921	NMSUB	B3, B3, C23, A2
922	NMSUB	B5, B5, C33, A2
923	NMSUB	B7, B7, C43, A2
924	MADD	B2, B2, C11, A2
925	MADD	B4, B4, C21, A2
926	MADD	B6, B6, C31, A2
927	MADD	B8, B8, C41, A2
928
929	LD		C13, 0 * SIZE(CO2)
930	LD		C43, 6 * SIZE(CO2)
931	LD		C23, 2 * SIZE(CO2)
932	LD		C33, 4 * SIZE(CO2)
933	LD		C11, 1 * SIZE(CO2)
934	LD		C21, 3 * SIZE(CO2)
935	LD		C31, 5 * SIZE(CO2)
936	LD		C41, 7 * SIZE(CO2)
937
938	MADD	C13, C13, C12, A1
939	ST		B1, 0 * SIZE(CO1)
940
941	MADD	C23, C23, C22, A1
942	ST		B3, 2 * SIZE(CO1)
943
944	MADD	C33, C33, C32, A1
945	ST		B5, 4 * SIZE(CO1)
946
947	MADD	C43, C43, C42, A1
948	ST		B7, 6 * SIZE(CO1)
949
950	MADD	C11, C11, C14, A1
951	ST		B2, 1 * SIZE(CO1)
952
953	MADD	C21, C21, C24, A1
954	ST		B4, 3 * SIZE(CO1)
955
956	MADD	C31, C31, C34, A1
957	ST		B6, 5 * SIZE(CO1)
958
959	MADD	C41, C41, C44, A1
960	ST		B8, 7 * SIZE(CO1)
961
962	NMSUB	C13, C13, C14, A2
963	NMSUB	C23, C23, C24, A2
964	NMSUB	C33, C33, C34, A2
965	NMSUB	C43, C43, C44, A2
966
967	MADD	C11, C11, C12, A2
968	MADD	C21, C21, C22, A2
969	MADD	C31, C31, C32, A2
970	MADD	C41, C41, C42, A2
971
972	ST		C13, 0 * SIZE(CO2)
973	ST		C23, 2 * SIZE(CO2)
974	ST		C33, 4 * SIZE(CO2)
975	ST		C43, 6 * SIZE(CO2)
976	ST		C11, 1 * SIZE(CO2)
977	ST		C21, 3 * SIZE(CO2)
978	ST		C31, 5 * SIZE(CO2)
979	ST		C41, 7 * SIZE(CO2)
980
981#endif
982
983#else
984	daddiu	I, I, -1
985	CVTU	A1, C11
986	CVTU	A2, C21
987
988	CVTU	A3, C31
989	CVTU	A4, C41
990
991	CVTU	A5, C13
992	CVTU	A6, C23
993
994	CVTU	A7, C33
995	CVTU	A8, C43
996
997	CVTU	B1, C12
998	CVTU	B2, C22
999
1000	CVTU	B3, C32
1001	CVTU	B4, C42
1002
1003	CVTU	B5, C14
1004	CVTU	B6, C24
1005
1006	CVTU	B7, C34
1007	CVTU	B8, C44
1008
1009#if   defined(NN) || defined(NT) || defined(TN) || defined(TT)
1010	/*	(a + bi) * (c + di) */
1011	SUB		C11, C11, A1		#	ac'+'bd
1012	SUB		C21, C21, A2
1013	SUB		C31, C31, A3
1014	LD		A1, 152($sp)		#	load alpha_r
1015	SUB		C41, C41, A4
1016#	LD		A1, 0 * SIZE(A)		#	load alpha_r
1017	LD		A2, 160($sp)		#	load alpha_i
1018	ADD		C13, A5, C13		#	ad'+'cb
1019	ADD		C23, A6, C23
1020#	LD		A2, 0 * SIZE(A)		#	load alpha_i
1021	ADD		C33, A7, C33
1022	ADD		C43, A8, C43
1023	SUB		C12, C12, B1
1024	SUB		C22, C22, B2
1025	SUB		C32, C32, B3
1026	SUB		C42, C42, B4
1027	ADD		C14, B5, C14
1028	ADD		C24, B6, C24
1029	ADD		C34, B7, C34
1030	ADD		C44, B8, C44
1031
1032	MUL		B1, C11, A1		#	A1 = alpha_r
1033	MUL		B3, C21, A1
1034	MUL		B5, C31, A1
1035	MUL		B7, C41, A1
1036	MUL		B2, C13, A1
1037	MUL		B4, C23, A1
1038	MUL		B6, C33, A1
1039	MUL		B8, C43, A1
1040	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
1041	NMSUB	B3, B3, C23, A2
1042	NMSUB	B5, B5, C33, A2
1043	NMSUB	B7, B7, C43, A2
1044	MADD	B2, B2, C11, A2
1045	MADD	B4, B4, C21, A2
1046	MADD	B6, B6, C31, A2
1047	MADD	B8, B8, C41, A2
1048
1049	ST		B1, 0 * SIZE(CO1)
1050	MUL		C13, C12, A1
1051	MUL		C23, C22, A1
1052
1053	ST		B3, 2 * SIZE(CO1)
1054	MUL		C33, C32, A1
1055	MUL		C43, C42, A1
1056
1057	ST		B5, 4 * SIZE(CO1)
1058	MUL		C11, C14, A1
1059	MUL		C21, C24, A1
1060
1061	ST		B7, 6 * SIZE(CO1)
1062	MUL		C31, C34, A1
1063	MUL		C41, C44, A1
1064
1065	ST		B2, 1 * SIZE(CO1)
1066	NMSUB	C13, C13, C14, A2
1067	NMSUB	C23, C23, C24, A2
1068
1069	ST		B4, 3 * SIZE(CO1)
1070	NMSUB	C33, C33, C34, A2
1071	NMSUB	C43, C43, C44, A2
1072
1073	ST		B6, 5 * SIZE(CO1)
1074	MADD	C11, C11, C12, A2
1075	MADD	C21, C21, C22, A2
1076
1077	ST		B8, 7 * SIZE(CO1)
1078	MADD	C31, C31, C32, A2
1079	MADD	C41, C41, C42, A2
1080
1081	ST		C13, 0 * SIZE(CO2)
1082	ST		C23, 2 * SIZE(CO2)
1083	ST		C33, 4 * SIZE(CO2)
1084	ST		C43, 6 * SIZE(CO2)
1085	ST		C11, 1 * SIZE(CO2)
1086	ST		C21, 3 * SIZE(CO2)
1087	ST		C31, 5 * SIZE(CO2)
1088	ST		C41, 7 * SIZE(CO2)
1089#endif
1090
1091#if   defined(NR) || defined(NC) || defined(TR) || defined(TC)
1092	/*	(a + bi) * (c - di) */
1093	ADD		C11, A1, C11		#	ac'+'bd
1094	ADD		C21, A2, C21
1095#	LD		A1, 0 * SIZE(A)		#	load alpha_r
1096	ADD		C31, A3, C31
1097	LD		A1, 152($sp)		#	load alpha_r
1098	ADD		C41, A4, C41
1099	LD		A2, 160($sp)		#	load alpha_i
1100#	LD		A2, 0 * SIZE(A)		#	load alpha_r
1101	SUB		C13, A5, C13		#	ad'+'cb
1102	SUB		C23, A6, C23
1103	SUB		C33, A7, C33
1104	SUB		C43, A8, C43
1105	ADD		C12, B1, C12
1106	ADD		C22, B2, C22
1107	ADD		C32, B3, C32
1108	ADD		C42, B4, C42
1109	SUB		C14, B5, C14
1110	SUB		C24, B6, C24
1111	SUB		C34, B7, C34
1112	SUB		C44, B8, C44
1113
1114	MUL		B1, C11, A1		#	A1 = alpha_r
1115	MUL		B3, C21, A1
1116	MUL		B5, C31, A1
1117	MUL	 	B7, C41, A1
1118	MUL		B2, C13, A1
1119	MUL		B4, C23, A1
1120	MUL		B6, C33, A1
1121	MUL		B8, C43, A1
1122	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
1123	NMSUB	B3, B3, C23, A2
1124	NMSUB	B5, B5, C33, A2
1125	NMSUB	B7, B7, C43, A2
1126	MADD	B2, B2, C11, A2
1127	MADD	B4, B4, C21, A2
1128	MADD	B6, B6, C31, A2
1129	MADD	B8, B8, C41, A2
1130
1131	MUL		C13, C12, A1
1132	MUL		C23, C22, A1
1133
1134	ST		B1, 0 * SIZE(CO1)
1135	MUL		C33, C32, A1
1136	MUL		C43, C42, A1
1137
1138	ST		B3, 2 * SIZE(CO1)
1139	MUL		C11, C14, A1
1140	MUL		C21, C24, A1
1141
1142	ST		B5, 4 * SIZE(CO1)
1143	MUL 	C31, C34, A1
1144	MUL 	C41, C44, A1
1145
1146	ST		B7, 6 * SIZE(CO1)
1147	NMSUB	C13, C13, C14, A2
1148	NMSUB	C23, C23, C24, A2
1149
1150	ST		B2, 1 * SIZE(CO1)
1151	NMSUB	C33, C33, C34, A2
1152	NMSUB	C43, C43, C44, A2
1153
1154	ST		B4, 3 * SIZE(CO1)
1155	MADD	C11, C11, C12, A2
1156	MADD	C21, C21, C22, A2
1157
1158	ST		B6, 5 * SIZE(CO1)
1159	MADD	C31, C31, C32, A2
1160	MADD	C41, C41, C42, A2
1161
1162	ST		B8, 7 * SIZE(CO1)
1163	ST		C13, 0 * SIZE(CO2)
1164	ST		C23, 2 * SIZE(CO2)
1165	ST		C33, 4 * SIZE(CO2)
1166	ST		C43, 6 * SIZE(CO2)
1167	ST		C11, 1 * SIZE(CO2)
1168	ST		C21, 3 * SIZE(CO2)
1169	ST		C31, 5 * SIZE(CO2)
1170	ST		C41, 7 * SIZE(CO2)
1171
1172#endif
1173
1174#if	  defined(RN) || defined(RT) || defined(CN) || defined(CT)
1175	/*	(a - bi) * (c + di) */
1176	ADD		C11, A1, C11		#	ac'+'bd
1177	ADD		C21, A2, C21
1178#	LD		A1, 0 * SIZE(A)		#	load alpha_r
1179	ADD		C31, A3, C31
1180	LD		A1, 152($sp)		#	load alpha_r
1181#	LD		A2, 0 * SIZE(A)		#	load alpha_r
1182	ADD		C41, A4, C41
1183	LD		A2, 160($sp)		#	load alpha_i
1184	SUB		C13, C13, A5		#	ad'+'cb
1185	SUB		C23, C23, A6
1186	SUB		C33, C33, A7
1187	SUB		C43, C43, A8
1188	ADD		C12, B1, C12
1189	ADD		C22, B2, C22
1190	ADD		C32, B3, C32
1191	ADD		C42, B4, C42
1192	SUB		C14, C14, B5
1193	SUB		C24, C24, B6
1194
1195	SUB		C34, C34, B7
1196	SUB		C44, C44, B8
1197
1198	MUL	B1, C11, A1		#	A1 = alpha_r
1199	MUL	B3, C21, A1
1200	MUL	B5, C31, A1
1201	MUL	B7, C41, A1
1202	MUL	B2, C13, A1
1203	MUL	B4, C23, A1
1204	MUL	B6, C33, A1
1205	MUL	B8, C43, A1
1206	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
1207	NMSUB	B3, B3, C23, A2
1208	NMSUB	B5, B5, C33, A2
1209	NMSUB	B7, B7, C43, A2
1210	MADD	B2, B2, C11, A2
1211	MADD	B4, B4, C21, A2
1212	MADD	B6, B6, C31, A2
1213	MADD	B8, B8, C41, A2
1214
1215	MUL	C13, C12, A1
1216	MUL	C23, C22, A1
1217
1218	ST		B1, 0 * SIZE(CO1)
1219	MUL	C33, C32, A1
1220	MUL	C43, C42, A1
1221
1222	ST		B3, 2 * SIZE(CO1)
1223	MUL	C11, C14, A1
1224	MUL	C21, C24, A1
1225
1226	ST		B5, 4 * SIZE(CO1)
1227	MUL	C31, C34, A1
1228	MUL	C41, C44, A1
1229
1230	ST		B7, 6 * SIZE(CO1)
1231	NMSUB	C13, C13, C14, A2
1232	NMSUB	C23, C23, C24, A2
1233
1234	ST		B2, 1 * SIZE(CO1)
1235	NMSUB	C33, C33, C34, A2
1236	NMSUB	C43, C43, C44, A2
1237
1238	ST		B4, 3 * SIZE(CO1)
1239	MADD	C11, C11, C12, A2
1240	MADD	C21, C21, C22, A2
1241
1242	ST		B6, 5 * SIZE(CO1)
1243	MADD	C31, C31, C32, A2
1244	MADD	C41, C41, C42, A2
1245
1246	ST		B8, 7 * SIZE(CO1)
1247	ST		C13, 0 * SIZE(CO2)
1248	ST		C23, 2 * SIZE(CO2)
1249	ST		C33, 4 * SIZE(CO2)
1250	ST		C43, 6 * SIZE(CO2)
1251	ST		C11, 1 * SIZE(CO2)
1252	ST		C21, 3 * SIZE(CO2)
1253	ST		C31, 5 * SIZE(CO2)
1254	ST		C41, 7 * SIZE(CO2)
1255
1256#endif
1257
1258#if   defined(RR) || defined(RC) || defined(CR) || defined(CC)
1259	/*	(a - bi) * (c - di) */
1260	SUB		C11, C11, A1		#	ac'+'bd
1261	SUB		C21, C21, A2
1262	SUB		C31, C31, A3
1263	LD		A1, 152($sp)		#	load alpha_r
1264#	LD		A1, 0 * SIZE(A)		#	load alpha_r
1265	SUB		C41, C41, A4
1266	LD		A2, 160($sp)
1267#	LD		A2, 0 * SIZE(A)		#	load alpha_i
1268
1269	ADD		C13, A5, C13		#	ad'+'cb
1270	ADD		C23, A6, C23
1271	ADD		C33, A7, C33
1272	ADD		C43, A8, C43
1273	SUB		C12, C12, B1
1274	SUB		C22, C22, B2
1275	SUB		C32, C32, B3
1276	SUB		C42, C42, B4
1277	ADD		C14, B5, C14
1278	ADD		C24, B6, C24
1279	ADD		C34, B7, C34
1280	ADD		C44, B8, C44
1281
1282	NEG		C13, C13
1283	NEG		C23, C23
1284	NEG		C33, C33
1285	NEG		C43, C43
1286	NEG		C14, C14
1287	NEG		C24, C24
1288	NEG		C34, C34
1289	NEG		C44, C44
1290
1291	MUL		B1, C11, A1		#	A1 = alpha_r
1292	MUL		B3, C21, A1
1293	MUL		B5, C31, A1
1294	MUL		B7, C41, A1
1295	MUL		B2, C13, A1
1296	MUL		B4, C23, A1
1297	MUL		B6, C33, A1
1298	MUL		B8, C43, A1
1299	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
1300	NMSUB	B3, B3, C23, A2
1301	NMSUB	B5, B5, C33, A2
1302	NMSUB	B7, B7, C43, A2
1303	MADD	B2, B2, C11, A2
1304	MADD	B4, B4, C21, A2
1305	MADD	B6, B6, C31, A2
1306	MADD	B8, B8, C41, A2
1307
1308	ST		B1, 0 * SIZE(CO1)
1309	MUL		C13, C12, A1
1310	MUL		C23, C22, A1
1311
1312	ST		B3, 2 * SIZE(CO1)
1313	MUL		C33, C32, A1
1314	MUL		C43, C42, A1
1315
1316	ST		B5, 4 * SIZE(CO1)
1317	MUL		C11, C14, A1
1318	MUL		C21, C24, A1
1319
1320	ST		B7, 6 * SIZE(CO1)
1321	MUL		C31, C34, A1
1322	MUL		C41, C44, A1
1323
1324	ST		B2, 1 * SIZE(CO1)
1325	NMSUB	C13, C13, C14, A2
1326	NMSUB	C23, C23, C24, A2
1327
1328	ST		B4, 3 * SIZE(CO1)
1329	NMSUB	C33, C33, C34, A2
1330	NMSUB	C43, C43, C44, A2
1331
1332	ST		B6, 5 * SIZE(CO1)
1333	MADD	C11, C11, C12, A2
1334	MADD	C21, C21, C22, A2
1335
1336	ST		B8, 7 * SIZE(CO1)
1337	MADD	C31, C31, C32, A2
1338	MADD	C41, C41, C42, A2
1339
1340	ST		C13, 0 * SIZE(CO2)
1341	ST		C23, 2 * SIZE(CO2)
1342	ST		C33, 4 * SIZE(CO2)
1343	ST		C43, 6 * SIZE(CO2)
1344	ST		C11, 1 * SIZE(CO2)
1345	ST		C21, 3 * SIZE(CO2)
1346	ST		C31, 5 * SIZE(CO2)
1347	ST		C41, 7 * SIZE(CO2)
1348#endif
1349
1350
1351#if ( defined(LEFT) &&  defined(TRANSA)) || \
1352    (!defined(LEFT) && !defined(TRANSA))
1353	dsubu	TEMP, K, KK
1354#ifdef LEFT
1355	daddiu	TEMP, TEMP, -4
1356#else
1357	daddiu	TEMP, TEMP, -2
1358#endif
1359
1360	dsll	L, TEMP, 2 + ZBASE_SHIFT
1361	dsll	TEMP, TEMP, 1 + ZBASE_SHIFT
1362
1363	daddu	AO, AO, L
1364	daddu	BO, BO, TEMP
1365#endif
1366
1367#ifdef LEFT
1368	daddiu	KK, KK, 4
1369#endif
1370
1371#endif
1372	daddiu	CO1, CO1, 8 * SIZE
1373	bgtz	I, .L241
1374	daddiu	CO2, CO2, 8 * SIZE
1375
1376	.align	4
1377.L22:
1378	andi	I, M, 2				#	MR=4
1379	blez	I, .L21
1380	NOP
1381
1382	.align	4
1383.L221:
1384#if defined(TRMMKERNEL)
1385#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
1386	move	BO,  B
1387#else
1388	dsll	TEMP, KK, 1 + ZBASE_SHIFT		#	NR=2
1389
1390	daddu	AO, AO, TEMP
1391	daddu	BO, B,  TEMP
1392#endif
1393	MTC		$0, C11				#	CLEAR REAULTS REGISTERS
1394	MOV		C12, C11
1395
1396	MOV		C21, C11
1397	MOV		C22, C11
1398	gsLQC1(R13, F9, F8, 0)		#	B1 B2
1399
1400	gsLQC1(R12, F1, F0, 0)		#	A1 A2
1401	MOV		C13, C11
1402	MOV		C14, C11
1403
1404	MOV		C23, C11
1405	FETCH	$0, 0 * SIZE(CO1)
1406
1407	FETCH	$0, 8 * SIZE(CO1)
1408	MOV		C24, C11
1409
1410	FETCH	$0, 0 * SIZE(CO2)
1411	FETCH	$0, 8 * SIZE(CO2)
1412
1413	PLU		B3,	B1, B1
1414	PLU		B4, B2, B2
1415#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
1416	dsubu	TEMP, K, KK
1417#elif defined(LEFT)
1418	daddiu	TEMP, KK, 2							#	MR=2
1419#else
1420	daddiu	TEMP, KK, 2							#	NR=2
1421#endif
1422	dsra	L,  TEMP, 2
1423	blez	L, .L222
1424	NOP
1425
1426#else
1427	move	BO, B				#	Reset	B
1428	dsra	L, K, 2				#	UnRoll	K=64
1429
1430	MTC		$0, C11				#	CLEAR REAULTS REGISTERS
1431	MOV		C12, C11
1432
1433	MOV		C21, C11
1434	MOV		C22, C11
1435	gsLQC1(R13, F9, F8, 0)		#	B1 B2
1436
1437	gsLQC1(R12, F1, F0, 0)		#	A1 A2
1438	MOV		C13, C11
1439	MOV		C14, C11
1440
1441	MOV		C23, C11
1442	FETCH	$0, 0 * SIZE(CO1)
1443
1444	FETCH	$0, 8 * SIZE(CO1)
1445	MOV		C24, C11
1446
1447	FETCH	$0, 0 * SIZE(CO2)
1448	FETCH	$0, 8 * SIZE(CO2)
1449
1450	PLU		B3,	B1, B1
1451	blez	L, .L222
1452	PLU		B4, B2, B2
1453#endif
1454
1455.L2210:
1456	daddiu	L, L, -1
1457	gsLQC1(R13, F13, F12, 1)	#	B3 B4
1458	MADPS	C11, C11, A1, B1
1459	MADPS	C21, C21, A2, B1
1460
1461	gsLQC1(R12, F3, F2, 1)		#	A3 A4
1462	MADPS	C12, C12, A1, B2
1463	MADPS	C22, C22, A2, B2
1464
1465	MADPS	C13, C13, A1, B3
1466	MADPS	C23, C23, A2, B3
1467
1468	MADPS	C14, C14, A1, B4
1469	MADPS	C24, C24, A2, B4
1470
1471	gsLQC1(R12, F5, F4, 2)		#	A5 A6
1472	PLU		B7,	B5, B5
1473	PLU		B8, B6, B6
1474
1475	gsLQC1(R13, F9, F8, 2)		#	B1 B2
1476	MADPS	C11, C11, A3, B5
1477	MADPS	C21, C21, A4, B5
1478
1479	MADPS	C12, C12, A3, B6
1480	MADPS	C22, C22, A4, B6
1481
1482	MADPS	C13, C13, A3, B7
1483	MADPS	C23, C23, A4, B7
1484
1485	MADPS	C14, C14, A3, B8
1486	MADPS	C24, C24, A4, B8
1487
1488	gsLQC1(R12, F7, F6, 3)		#	A7 A8
1489	PLU		B3,	B1, B1
1490	PLU		B4, B2, B2
1491
1492	gsLQC1(R13, F13, F12, 3)	#	B3 B4
1493	MADPS	C11, C11, A5, B1
1494	MADPS	C21, C21, A6, B1
1495
1496	MADPS	C12, C12, A5, B2
1497	MADPS	C22, C22, A6, B2
1498	daddiu	BO, BO, 4 * 4 * SIZE	#	4KR*4NR
1499
1500	daddiu	AO, AO, 4 * 4 * SIZE 	#	4KR*8MR
1501	MADPS	C13, C13, A5, B3
1502	MADPS	C23, C23, A6, B3
1503
1504	MADPS	C14, C14, A5, B4
1505	MADPS	C24, C24, A6, B4
1506
1507	gsLQC1(R12, F1, F0, 0)		#	A1 A2
1508	PLU		B7,	B5, B5
1509	PLU		B8, B6, B6
1510
1511	gsLQC1(R13, F9, F8, 0)		#	B1 B2
1512	MADPS	C11, C11, A7, B5
1513	MADPS	C21, C21, A8, B5
1514
1515	MADPS	C12, C12, A7, B6
1516	MADPS	C22, C22, A8, B6
1517
1518	MADPS	C13, C13, A7, B7
1519	MADPS	C23, C23, A8, B7
1520
1521	MADPS	C14, C14, A7, B8
1522	MADPS	C24, C24, A8, B8
1523
1524	PLU		B3,	B1, B1
1525	bgtz	L, .L2210
1526	PLU		B4, B2, B2
1527
1528
1529	.align	4
1530.L222:
1531#ifndef TRMMKERNEL
1532	andi	L, K, 2
1533#else
1534	andi	L, TEMP, 2
1535#endif
1536	blez	L, .L227
1537	NOP
1538
1539	gsLQC1(R13, F13, F12, 1)	#	B3 B4
1540	MADPS	C11, C11, A1, B1
1541	MADPS	C21, C21, A2, B1
1542
1543	gsLQC1(R12, F3, F2, 1)		#	A3 A4
1544	MADPS	C12, C12, A1, B2
1545	MADPS	C22, C22, A2, B2
1546
1547	MADPS	C13, C13, A1, B3
1548	MADPS	C23, C23, A2, B3
1549
1550	MADPS	C14, C14, A1, B4
1551	MADPS	C24, C24, A2, B4
1552
1553	PLU		B7,	B5, B5
1554	PLU		B8, B6, B6
1555	daddiu	BO, BO, 2 * 4 * SIZE
1556
1557	daddiu	AO, AO, 2 * 4 * SIZE
1558	MADPS	C11, C11, A3, B5
1559	MADPS	C21, C21, A4, B5
1560	gsLQC1(R13, F9, F8, 0)		#	A1 A2
1561
1562	MADPS	C12, C12, A3, B6
1563	MADPS	C22, C22, A4, B6
1564	gsLQC1(R12, F1, F0, 0)		#	A1 A2
1565
1566	MADPS	C13, C13, A3, B7
1567	MADPS	C23, C23, A4, B7
1568
1569	MADPS	C14, C14, A3, B8
1570	MADPS	C24, C24, A4, B8
1571
1572	PLU		B3,	B1, B1
1573	PLU		B4, B2, B2
1574
1575
1576	.align	4
1577.L227:
1578#ifndef TRMMKERNEL
1579	andi	L, K, 1
1580#else
1581	andi	L, TEMP, 1
1582#endif
1583	blez	L, .L220
1584	NOP
1585
1586	MADPS	C11, C11, A1, B1
1587	MADPS	C21, C21, A2, B1
1588	daddiu	BO, BO, 4 * SIZE
1589	daddiu	AO, AO, 4 * SIZE
1590
1591	MADPS	C12, C12, A1, B2
1592	MADPS	C22, C22, A2, B2
1593
1594	MADPS	C13, C13, A1, B3
1595	MADPS	C23, C23, A2, B3
1596
1597	MADPS	C14, C14, A1, B4
1598	MADPS	C24, C24, A2, B4
1599
1600	.align	4
1601.L220:							#	Write Back
1602#ifndef TRMMKERNEL
1603	daddiu	I, I, -1
1604	CVTU	A1, C11
1605	CVTU	A2, C21
1606
1607	CVTU	A3, C13
1608	CVTU	A4, C23
1609
1610	CVTU	A5, C12
1611	CVTU	A6, C22
1612
1613	CVTU	A7, C14
1614	CVTU	A8, C24
1615
1616
1617#if   defined(NN) || defined(NT) || defined(TN) || defined(TT)
1618	/*	(a + bi) * (c + di) */
1619	SUB		C11, C11, A1		#	ac'+'bd
1620	SUB		C21, C21, A2
1621	ADD		C13, A3, C13		#	ad'+'cb
1622	ADD		C23, A4, C23
1623#	LD		A1, 0 * SIZE(A)		#	load alpha_r
1624	LD		A1, 152($sp)		#	load alpha_r
1625	LD		A2, 160($sp)		#	load alpha_i
1626#	LD		A2, 0 * SIZE(A)		#	load alpha_i
1627	SUB		C12, C12, A5
1628	SUB		C22, C22, A6
1629	ADD		C14, A7, C14
1630	ADD		C24, A8, C24
1631
1632	LD		B1, 0 * SIZE(CO1)
1633	LD		B3, 2 * SIZE(CO1)
1634	LD		B2, 1 * SIZE(CO1)
1635	LD		B4, 3 * SIZE(CO1)
1636
1637	MADD	B1, B1, C11, A1		#	A1 = alpha_r
1638	MADD	B3, B3, C21, A1
1639	MADD	B2, B2, C13, A1
1640	MADD	B4, B4, C23, A1
1641	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
1642	NMSUB	B3, B3, C23, A2
1643	MADD	B2, B2, C11, A2
1644	MADD	B4, B4, C21, A2
1645
1646	LD		B5, 0 * SIZE(CO2)
1647	LD		B7, 2 * SIZE(CO2)
1648	LD		B6, 1 * SIZE(CO2)
1649	LD		B8, 3 * SIZE(CO2)
1650
1651	MADD	B5, B5, C12, A1
1652	MADD	B7, B7, C22, A1
1653
1654	ST		B1, 0 * SIZE(CO1)
1655	ST		B3, 2 * SIZE(CO1)
1656
1657	MADD	B6, B6, C14, A1
1658	MADD	B8, B8, C24, A1
1659
1660	ST		B2, 1 * SIZE(CO1)
1661	ST		B4, 3 * SIZE(CO1)
1662
1663	NMSUB	B5, B5, C14, A2
1664	NMSUB	B7, B7, C24, A2
1665
1666	MADD	B6, B6, C12, A2
1667	MADD	B8, B8, C22, A2
1668
1669	ST		B5, 0 * SIZE(CO2)
1670	ST		B7, 2 * SIZE(CO2)
1671	ST		B6, 1 * SIZE(CO2)
1672	ST		B8, 3 * SIZE(CO2)
1673#endif
1674
1675#if   defined(NR) || defined(NC) || defined(TR) || defined(TC)
1676	/*	(a + bi) * (c - di) */
1677	ADD		C11, A1, C11		#	ac'+'bd
1678	ADD		C21, A2, C21
1679	SUB		C13, A3, C13		#	ad'+'cb
1680	SUB		C23, A4, C23
1681#	LD		A1, 0 * SIZE(A)		#	load alpha_r
1682	LD		A1, 152($sp)		#	load alpha_r
1683	LD		A2, 160($sp)		#	load alpha_i
1684#	LD		A2, 0 * SIZE(A)		#	load alpha_r
1685	ADD		C12, A5, C12
1686	ADD		C22, A6, C22
1687	SUB		C14, A7, C14
1688	SUB		C24, A8, C24
1689
1690	LD		B1, 0 * SIZE(CO1)
1691	LD		B3, 2 * SIZE(CO1)
1692	LD		B2, 1 * SIZE(CO1)
1693	LD		B4, 3 * SIZE(CO1)
1694
1695	MADD	B1, B1, C11, A1		#	A1 = alpha_r
1696	MADD	B3, B3, C21, A1
1697	MADD	B2, B2, C13, A1
1698	MADD	B4, B4, C23, A1
1699	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
1700	NMSUB	B3, B3, C23, A2
1701	MADD	B2, B2, C11, A2
1702	MADD	B4, B4, C21, A2
1703
1704	LD		B5, 0 * SIZE(CO2)
1705	LD		B7, 2 * SIZE(CO2)
1706	LD		B6, 1 * SIZE(CO2)
1707	LD		B8, 3 * SIZE(CO2)
1708
1709	MADD	B5, B5, C12, A1
1710	MADD	B7, B7, C22, A1
1711
1712	ST		B1, 0 * SIZE(CO1)
1713	ST		B3, 2 * SIZE(CO1)
1714
1715	MADD	B6, B6, C14, A1
1716	MADD	B8, B8, C24, A1
1717
1718	ST		B2, 1 * SIZE(CO1)
1719	ST		B4, 3 * SIZE(CO1)
1720
1721	NMSUB	B5, B5, C14, A2
1722	NMSUB	B7, B7, C24, A2
1723
1724	MADD	B6, B6, C12, A2
1725	MADD	B8, B8, C22, A2
1726
1727	ST		B5, 0 * SIZE(CO2)
1728	ST		B7, 2 * SIZE(CO2)
1729	ST		B6, 1 * SIZE(CO2)
1730	ST		B8, 3 * SIZE(CO2)
1731
1732#endif
1733
1734#if	  defined(RN) || defined(RT) || defined(CN) || defined(CT)
1735	/*	(a - bi) * (c + di) */
1736	ADD		C11, A1, C11		#	ac'+'bd
1737	ADD		C21, A2, C21
1738	SUB		C13, C13, A3		#	ad'+'cb
1739	SUB		C23, C23, A4
1740#	LD		A1, 0 * SIZE(A)		#	load alpha_r
1741	LD		A1, 152($sp)		#	load alpha_r
1742#	LD		A2, 0 * SIZE(A)		#	load alpha_r
1743	LD		A2, 160($sp)		#	load alpha_i
1744	ADD		C12, A5, C12
1745	ADD		C22, A6, C22
1746	SUB		C14, C14, A7
1747	SUB		C24, C24, A8
1748
1749	LD		B1, 0 * SIZE(CO1)
1750	LD		B3, 2 * SIZE(CO1)
1751	LD		B2, 1 * SIZE(CO1)
1752	LD		B4, 3 * SIZE(CO1)
1753
1754	MADD	B1, B1, C11, A1		#	A1 = alpha_r
1755	MADD	B3, B3, C21, A1
1756	MADD	B2, B2, C13, A1
1757	MADD	B4, B4, C23, A1
1758	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
1759	NMSUB	B3, B3, C23, A2
1760	MADD	B2, B2, C11, A2
1761	MADD	B4, B4, C21, A2
1762
1763	LD		B5, 0 * SIZE(CO2)
1764	LD		B7, 2 * SIZE(CO2)
1765	LD		B6, 1 * SIZE(CO2)
1766	LD		B8, 3 * SIZE(CO2)
1767
1768	MADD	B5, B5, C12, A1
1769	MADD	B7, B7, C22, A1
1770
1771	ST		B1, 0 * SIZE(CO1)
1772	ST		B3, 2 * SIZE(CO1)
1773
1774	MADD	B6, B6, C14, A1
1775	MADD	B8, B8, C24, A1
1776
1777	ST		B2, 1 * SIZE(CO1)
1778	ST		B4, 3 * SIZE(CO1)
1779
1780	NMSUB	B5, B5, C14, A2
1781	NMSUB	B7, B7, C24, A2
1782
1783	MADD	B6, B6, C12, A2
1784	MADD	B8, B8, C22, A2
1785
1786	ST		B5, 0 * SIZE(CO2)
1787	ST		B7, 2 * SIZE(CO2)
1788	ST		B6, 1 * SIZE(CO2)
1789	ST		B8, 3 * SIZE(CO2)
1790
1791#endif
1792
1793#if   defined(RR) || defined(RC) || defined(CR) || defined(CC)
1794	/*	(a - bi) * (c - di) */
1795	SUB		C11, C11, A1		#	ac'+'bd
1796	SUB		C21, C21, A2
1797	ADD		C13, A3, C13		#	ad'+'cb
1798	ADD		C23, A4, C23
1799	LD		A1, 152($sp)		#	load alpha_r
1800#	LD		A1, 0 * SIZE(A)		#	load alpha_r
1801	LD		A2, 160($sp)
1802#	LD		A2, 0 * SIZE(A)		#	load alpha_i
1803	SUB		C12, C12, A5
1804	SUB		C22, C22, A6
1805	ADD		C14, A7, C14
1806	ADD		C24, A8, C24
1807	NEG		C13, C13
1808	NEG		C23, C23
1809	NEG		C14, C14
1810	NEG		C24, C24
1811
1812
1813	LD		B1, 0 * SIZE(CO1)
1814	LD		B3, 2 * SIZE(CO1)
1815	LD		B2, 1 * SIZE(CO1)
1816	LD		B4, 3 * SIZE(CO1)
1817
1818	MADD	B1, B1, C11, A1		#	A1 = alpha_r
1819	MADD	B3, B3, C21, A1
1820	MADD	B2, B2, C13, A1
1821	MADD	B4, B4, C23, A1
1822	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
1823	NMSUB	B3, B3, C23, A2
1824	MADD	B2, B2, C11, A2
1825	MADD	B4, B4, C21, A2
1826
1827	LD		B5, 0 * SIZE(CO2)
1828	LD		B7, 2 * SIZE(CO2)
1829	LD		B6, 1 * SIZE(CO2)
1830	LD		B8, 3 * SIZE(CO2)
1831
1832	MADD	B5, B5, C12, A1
1833	MADD	B7, B7, C22, A1
1834
1835	ST		B1, 0 * SIZE(CO1)
1836	ST		B3, 2 * SIZE(CO1)
1837
1838	MADD	B6, B6, C14, A1
1839	MADD	B8, B8, C24, A1
1840
1841	ST		B2, 1 * SIZE(CO1)
1842	ST		B4, 3 * SIZE(CO1)
1843
1844	NMSUB	B5, B5, C14, A2
1845	NMSUB	B7, B7, C24, A2
1846
1847	MADD	B6, B6, C12, A2
1848	MADD	B8, B8, C22, A2
1849
1850	ST		B5, 0 * SIZE(CO2)
1851	ST		B7, 2 * SIZE(CO2)
1852	ST		B6, 1 * SIZE(CO2)
1853	ST		B8, 3 * SIZE(CO2)
1854#endif
1855
1856#else
1857	daddiu	I, I, -1
1858	CVTU	A1, C11
1859	CVTU	A2, C21
1860
1861	CVTU	A3, C13
1862	CVTU	A4, C23
1863
1864	CVTU	A5, C12
1865	CVTU	A6, C22
1866
1867	CVTU	A7, C14
1868	CVTU	A8, C24
1869
1870
1871#if   defined(NN) || defined(NT) || defined(TN) || defined(TT)
1872	/*	(a + bi) * (c + di) */
1873	SUB		C11, C11, A1		#	ac'+'bd
1874	SUB		C21, C21, A2
1875	ADD		C13, A3, C13		#	ad'+'cb
1876	ADD		C23, A4, C23
1877#	LD		A1, 0 * SIZE(A)		#	load alpha_r
1878	LD		A1, 152($sp)		#	load alpha_r
1879	LD		A2, 160($sp)		#	load alpha_i
1880#	LD		A2, 0 * SIZE(A)		#	load alpha_i
1881	SUB		C12, C12, A5
1882	SUB		C22, C22, A6
1883	ADD		C14, A7, C14
1884	ADD		C24, A8, C24
1885
1886	MUL	B1, C11, A1		#	A1 = alpha_r
1887	MUL	B3, C21, A1
1888	MUL	B2, C13, A1
1889	MUL	B4, C23, A1
1890	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
1891	NMSUB	B3, B3, C23, A2
1892	MADD	B2, B2, C11, A2
1893	MADD	B4, B4, C21, A2
1894
1895
1896	MUL	B5, C12, A1
1897	MUL	B7, C22, A1
1898
1899	ST		B1, 0 * SIZE(CO1)
1900	ST		B3, 2 * SIZE(CO1)
1901
1902	MUL	B6, C14, A1
1903	MUL	B8, C24, A1
1904
1905	ST		B2, 1 * SIZE(CO1)
1906	ST		B4, 3 * SIZE(CO1)
1907
1908	NMSUB	B5, B5, C14, A2
1909	NMSUB	B7, B7, C24, A2
1910
1911	MADD	B6, B6, C12, A2
1912	MADD	B8, B8, C22, A2
1913
1914	ST		B5, 0 * SIZE(CO2)
1915	ST		B7, 2 * SIZE(CO2)
1916	ST		B6, 1 * SIZE(CO2)
1917	ST		B8, 3 * SIZE(CO2)
1918#endif
1919
1920#if   defined(NR) || defined(NC) || defined(TR) || defined(TC)
1921	/*	(a + bi) * (c - di) */
1922	ADD		C11, A1, C11		#	ac'+'bd
1923	ADD		C21, A2, C21
1924	SUB		C13, A3, C13		#	ad'+'cb
1925	SUB		C23, A4, C23
1926#	LD		A1, 0 * SIZE(A)		#	load alpha_r
1927	LD		A1, 152($sp)		#	load alpha_r
1928	LD		A2, 160($sp)		#	load alpha_i
1929#	LD		A2, 0 * SIZE(A)		#	load alpha_r
1930	ADD		C12, A5, C12
1931	ADD		C22, A6, C22
1932	SUB		C14, A7, C14
1933	SUB		C24, A8, C24
1934
1935	MUL	B1, C11, A1		#	A1 = alpha_r
1936	MUL	B3, C21, A1
1937	MUL	B2, C13, A1
1938	MUL	B4, C23, A1
1939	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
1940	NMSUB	B3, B3, C23, A2
1941	MADD	B2, B2, C11, A2
1942	MADD	B4, B4, C21, A2
1943
1944	MUL	B5, C12, A1
1945	MUL	B7, C22, A1
1946
1947	ST		B1, 0 * SIZE(CO1)
1948	ST		B3, 2 * SIZE(CO1)
1949
1950	MUL	B6, C14, A1
1951	MUL	B8, C24, A1
1952
1953	ST		B2, 1 * SIZE(CO1)
1954	ST		B4, 3 * SIZE(CO1)
1955
1956	NMSUB	B5, B5, C14, A2
1957	NMSUB	B7, B7, C24, A2
1958
1959	MADD	B6, B6, C12, A2
1960	MADD	B8, B8, C22, A2
1961
1962	ST		B5, 0 * SIZE(CO2)
1963	ST		B7, 2 * SIZE(CO2)
1964	ST		B6, 1 * SIZE(CO2)
1965	ST		B8, 3 * SIZE(CO2)
1966
1967#endif
1968
1969#if	  defined(RN) || defined(RT) || defined(CN) || defined(CT)
1970	/*	(a - bi) * (c + di) */
1971	ADD		C11, A1, C11		#	ac'+'bd
1972	ADD		C21, A2, C21
1973	SUB		C13, C13, A3		#	ad'+'cb
1974	SUB		C23, C23, A4
1975#	LD		A1, 0 * SIZE(A)		#	load alpha_r
1976	LD		A1, 152($sp)		#	load alpha_r
1977#	LD		A2, 0 * SIZE(A)		#	load alpha_r
1978	LD		A2, 160($sp)		#	load alpha_i
1979	ADD		C12, A5, C12
1980	ADD		C22, A6, C22
1981	SUB		C14, C14, A7
1982	SUB		C24, C24, A8
1983
1984	MUL	B1, C11, A1		#	A1 = alpha_r
1985	MUL	B3, C21, A1
1986	MUL	B2, C13, A1
1987	MUL	B4, C23, A1
1988	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
1989	NMSUB	B3, B3, C23, A2
1990	MADD	B2, B2, C11, A2
1991	MADD	B4, B4, C21, A2
1992
1993	MUL	B5, C12, A1
1994	MUL	B7, C22, A1
1995
1996	ST		B1, 0 * SIZE(CO1)
1997	ST		B3, 2 * SIZE(CO1)
1998
1999	MUL	B6, C14, A1
2000	MUL	B8, C24, A1
2001
2002	ST		B2, 1 * SIZE(CO1)
2003	ST		B4, 3 * SIZE(CO1)
2004
2005	NMSUB	B5, B5, C14, A2
2006	NMSUB	B7, B7, C24, A2
2007
2008	MADD	B6, B6, C12, A2
2009	MADD	B8, B8, C22, A2
2010
2011	ST		B5, 0 * SIZE(CO2)
2012	ST		B7, 2 * SIZE(CO2)
2013	ST		B6, 1 * SIZE(CO2)
2014	ST		B8, 3 * SIZE(CO2)
2015
2016#endif
2017
2018#if   defined(RR) || defined(RC) || defined(CR) || defined(CC)
2019	/*	(a - bi) * (c - di) */
2020	SUB		C11, C11, A1		#	ac'+'bd
2021	SUB		C21, C21, A2
2022	ADD		C13, A3, C13		#	ad'+'cb
2023	ADD		C23, A4, C23
2024	LD		A1, 152($sp)		#	load alpha_r
2025#	LD		A1, 0 * SIZE(A)		#	load alpha_r
2026	LD		A2, 160($sp)
2027#	LD		A2, 0 * SIZE(A)		#	load alpha_i
2028	SUB		C12, C12, A5
2029	SUB		C22, C22, A6
2030	ADD		C14, A7, C14
2031	ADD		C24, A8, C24
2032	NEG		C13, C13
2033	NEG		C23, C23
2034	NEG		C14, C14
2035	NEG		C24, C24
2036
2037	MUL	B1, C11, A1		#	A1 = alpha_r
2038	MUL	B3, C21, A1
2039	MUL	B2, C13, A1
2040	MUL	B4, C23, A1
2041	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
2042	NMSUB	B3, B3, C23, A2
2043	MADD	B2, B2, C11, A2
2044	MADD	B4, B4, C21, A2
2045
2046	MUL	B5, C12, A1
2047	MUL	B7, C22, A1
2048
2049	ST		B1, 0 * SIZE(CO1)
2050	ST		B3, 2 * SIZE(CO1)
2051
2052	MUL	B6, C14, A1
2053	MUL	B8, C24, A1
2054
2055	ST		B2, 1 * SIZE(CO1)
2056	ST		B4, 3 * SIZE(CO1)
2057
2058	NMSUB	B5, B5, C14, A2
2059	NMSUB	B7, B7, C24, A2
2060
2061	MADD	B6, B6, C12, A2
2062	MADD	B8, B8, C22, A2
2063
2064	ST		B5, 0 * SIZE(CO2)
2065	ST		B7, 2 * SIZE(CO2)
2066	ST		B6, 1 * SIZE(CO2)
2067	ST		B8, 3 * SIZE(CO2)
2068#endif
2069
2070#if ( defined(LEFT) &&  defined(TRANSA)) || \
2071    (!defined(LEFT) && !defined(TRANSA))
2072	dsubu	TEMP, K, KK
2073#ifdef LEFT
2074	daddiu	TEMP, TEMP, -2
2075#else
2076	daddiu	TEMP, TEMP, -2
2077#endif
2078	dsll	TEMP, TEMP, 1 + ZBASE_SHIFT
2079
2080	daddu	AO, AO, TEMP
2081	daddu	BO, BO, TEMP
2082#endif
2083
2084#ifdef LEFT
2085	daddiu	KK, KK, 2
2086#endif
2087
2088#endif
2089	daddiu	CO1, CO1, 4 * SIZE
2090	daddiu	CO2, CO2, 4 * SIZE
2091
2092
2093	.align	4
2094.L21:
2095	andi	I, M, 1
2096	blez	I, .L20
2097	NOP
2098
2099	.align	4
2100.L211:
2101#if defined(TRMMKERNEL)
2102#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
2103	move	BO,  B
2104#else
2105	dsll	L,    KK, ZBASE_SHIFT			#	MR=1
2106	dsll	TEMP, KK, 1 + ZBASE_SHIFT		#	NR=2
2107
2108	daddu	AO, AO, L
2109	daddu	BO, B,  TEMP
2110#endif
2111	MTC		$0, C11				#	CLEAR REAULTS REGISTERS
2112	MOV		C12, C11
2113	gsLQC1(R13, F9, F8, 0)		#	B1 B2
2114
2115	gsLQC1(R12, F1, F0, 0)		#	A1 A2
2116	MOV		C13, C11
2117	MOV		C14, C11
2118
2119	FETCH	$0, 0 * SIZE(CO1)
2120	FETCH	$0, 0 * SIZE(CO2)
2121
2122	PLU		B3,	B1, B1
2123	PLU		B4, B2, B2
2124#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
2125	dsubu	TEMP, K, KK
2126#elif defined(LEFT)
2127	daddiu	TEMP, KK, 1							#	MR=1
2128#else
2129	daddiu	TEMP, KK, 2							#	NR=2
2130#endif
2131	dsra	L,  TEMP, 2
2132	blez	L, .L212
2133	NOP
2134
2135#else
2136	move	BO, B				#	Reset	B
2137	dsra	L, K, 2				#	UnRoll	K=64
2138
2139	MTC		$0, C11				#	CLEAR REAULTS REGISTERS
2140	MOV		C12, C11
2141	gsLQC1(R13, F9, F8, 0)		#	B1 B2
2142
2143	gsLQC1(R12, F1, F0, 0)		#	A1 A2
2144	MOV		C13, C11
2145	MOV		C14, C11
2146
2147	FETCH	$0, 0 * SIZE(CO1)
2148	FETCH	$0, 0 * SIZE(CO2)
2149
2150	PLU		B3,	B1, B1
2151	blez	L, .L212
2152	PLU		B4, B2, B2
2153#endif
2154
2155.L2110:
2156	daddiu	L, L, -1
2157	gsLQC1(R13, F13, F12, 1)	#	B3 B4
2158	MADPS	C11, C11, A1, B1
2159	MADPS	C12, C12, A1, B2
2160
2161	MADPS	C13, C13, A1, B3
2162	MADPS	C14, C14, A1, B4
2163
2164	PLU		B7,	B5, B5
2165	PLU		B8, B6, B6
2166
2167	gsLQC1(R13, F9, F8, 2)		#	B1 B2
2168	MADPS	C11, C11, A2, B5
2169	MADPS	C12, C12, A2, B6
2170
2171	gsLQC1(R12, F3, F2, 1)		#	A3 A4
2172	MADPS	C13, C13, A2, B7
2173	MADPS	C14, C14, A2, B8
2174
2175	PLU		B3,	B1, B1
2176	PLU		B4, B2, B2
2177
2178	gsLQC1(R13, F13, F12, 3)	#	B3 B4
2179	MADPS	C11, C11, A3, B1
2180	MADPS	C12, C12, A3, B2
2181	daddiu	BO, BO, 4 * 4 * SIZE	#	4KR*4NR
2182
2183	daddiu	AO, AO, 2 * 4 * SIZE 	#	4KR*8MR
2184	MADPS	C13, C13, A3, B3
2185	MADPS	C14, C14, A3, B4
2186
2187	PLU		B7,	B5, B5
2188	PLU		B8, B6, B6
2189
2190	gsLQC1(R13, F9, F8, 0)		#	B1 B2
2191	MADPS	C11, C11, A4, B5
2192	MADPS	C12, C12, A4, B6
2193
2194	gsLQC1(R12, F1, F0, 0)		#	A1 A2
2195	MADPS	C13, C13, A4, B7
2196	MADPS	C14, C14, A4, B8
2197
2198	PLU		B3,	B1, B1
2199	bgtz	L, .L2110
2200	PLU		B4, B2, B2
2201
2202
2203	.align	4
2204.L212:
2205#ifndef TRMMKERNEL
2206	andi	L, K, 2
2207#else
2208	andi	L, TEMP, 2
2209#endif
2210	blez	L, .L217
2211	NOP
2212
2213	gsLQC1(R13, F13, F12, 1)	#	B3 B4
2214	MADPS	C11, C11, A1, B1
2215	MADPS	C12, C12, A1, B2
2216
2217	MADPS	C13, C13, A1, B3
2218	MADPS	C14, C14, A1, B4
2219
2220	PLU		B7,	B5, B5
2221	PLU		B8, B6, B6
2222	daddiu	BO, BO, 2 * 4 * SIZE
2223
2224	MADPS	C11, C11, A2, B5
2225	MADPS	C12, C12, A2, B6
2226	daddiu	AO, AO, 4 * SIZE
2227
2228	MADPS	C13, C13, A2, B7
2229	MADPS	C14, C14, A2, B8
2230
2231	gsLQC1(R12, F1, F0, 0)		#	A5 A6
2232	gsLQC1(R13, F9, F8, 0)		#	B1 B2
2233	PLU		B3,	B1, B1
2234	PLU		B4, B2, B2
2235
2236
2237	.align	4
2238.L217:
2239#ifndef TRMMKERNEL
2240	andi	L, K, 1
2241#else
2242	andi	L, TEMP, 1
2243#endif
2244	blez	L, .L210
2245	NOP
2246
2247	MADPS	C11, C11, A1, B1
2248	daddiu	BO, BO, 4 * SIZE
2249	MADPS	C12, C12, A1, B2
2250	daddiu	AO, AO, 2 * SIZE
2251
2252	MADPS	C13, C13, A1, B3
2253	MADPS	C14, C14, A1, B4
2254
2255	.align	4
2256.L210:							#	Write Back
2257#ifndef TRMMKERNEL
2258	daddiu	I, I, -1
2259	CVTU	A1, C11
2260	CVTU	A3, C13
2261	CVTU	A5, C12
2262	CVTU	A7, C14
2263
2264#if   defined(NN) || defined(NT) || defined(TN) || defined(TT)
2265	/*	(a + bi) * (c + di) */
2266	SUB		C11, C11, A1		#	ac'+'bd
2267	ADD		C13, A3, C13		#	ad'+'cb
2268#	LD		A1, 0 * SIZE(A)		#	load alpha_r
2269	LD		A4, 152($sp)		#	load alpha_r
2270	LD		A2, 160($sp)		#	load alpha_i
2271#	LD		A2, 0 * SIZE(A)		#	load alpha_i
2272	SUB		C12, C12, A5
2273	ADD		C14, A7, C14
2274
2275	LD		B1, 0 * SIZE(CO1)
2276	LD		B2, 1 * SIZE(CO1)
2277
2278	MADD	B1, B1, C11, A4		#	A1 = alpha_r
2279	MADD	B2, B2, C13, A4
2280	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
2281	MADD	B2, B2, C11, A2
2282
2283	LD		B5, 0 * SIZE(CO2)
2284	LD		B6, 1 * SIZE(CO2)
2285
2286	MADD	B5, B5, C12, A4
2287	ST		B1, 0 * SIZE(CO1)
2288	MADD	B6, B6, C14, A4
2289	ST		B2, 1 * SIZE(CO1)
2290
2291	NMSUB	B5, B5, C14, A2
2292	MADD	B6, B6, C12, A2
2293
2294	ST		B5, 0 * SIZE(CO2)
2295	ST		B6, 1 * SIZE(CO2)
2296#endif
2297
2298#if   defined(NR) || defined(NC) || defined(TR) || defined(TC)
2299	/*	(a + bi) * (c - di) */
2300	ADD		C11, A1, C11		#	ac'+'bd
2301	SUB		C13, A3, C13		#	ad'+'cb
2302#	LD		A1, 0 * SIZE(A)		#	load alpha_r
2303	LD		A4, 152($sp)		#	load alpha_r
2304	LD		A2, 160($sp)		#	load alpha_i
2305#	LD		A2, 0 * SIZE(A)		#	load alpha_r
2306	ADD		C12, A5, C12
2307	SUB		C14, A7, C14
2308
2309	LD		B1, 0 * SIZE(CO1)
2310	LD		B2, 1 * SIZE(CO1)
2311
2312	MADD	B1, B1, C11, A4		#	A1 = alpha_r
2313	MADD	B2, B2, C13, A4
2314	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
2315	MADD	B2, B2, C11, A2
2316
2317	LD		B5, 0 * SIZE(CO2)
2318	LD		B6, 1 * SIZE(CO2)
2319
2320	MADD	B5, B5, C12, A4
2321	ST		B1, 0 * SIZE(CO1)
2322	MADD	B6, B6, C14, A4
2323	ST		B2, 1 * SIZE(CO1)
2324
2325	NMSUB	B5, B5, C14, A2
2326	MADD	B6, B6, C12, A2
2327
2328	ST		B5, 0 * SIZE(CO2)
2329	ST		B6, 1 * SIZE(CO2)
2330
2331#endif
2332
2333#if	  defined(RN) || defined(RT) || defined(CN) || defined(CT)
2334	/*	(a - bi) * (c + di) */
2335	ADD		C11, A1, C11		#	ac'+'bd
2336	SUB		C13, C13, A3		#	ad'+'cb
2337#	LD		A1, 0 * SIZE(A)		#	load alpha_r
2338	LD		A4, 152($sp)		#	load alpha_r
2339#	LD		A2, 0 * SIZE(A)		#	load alpha_r
2340	LD		A2, 160($sp)		#	load alpha_i
2341	ADD		C12, A5, C12
2342	SUB		C14, C14, A7
2343
2344	LD		B1, 0 * SIZE(CO1)
2345	LD		B2, 1 * SIZE(CO1)
2346
2347	MADD	B1, B1, C11, A4		#	A1 = alpha_r
2348	MADD	B2, B2, C13, A4
2349	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
2350	MADD	B2, B2, C11, A2
2351
2352	LD		B5, 0 * SIZE(CO2)
2353	LD		B6, 1 * SIZE(CO2)
2354
2355	MADD	B5, B5, C12, A4
2356	ST		B1, 0 * SIZE(CO1)
2357	MADD	B6, B6, C14, A4
2358	ST		B2, 1 * SIZE(CO1)
2359
2360	NMSUB	B5, B5, C14, A2
2361	MADD	B6, B6, C12, A2
2362
2363	ST		B5, 0 * SIZE(CO2)
2364	ST		B6, 1 * SIZE(CO2)
2365#endif
2366
2367#if   defined(RR) || defined(RC) || defined(CR) || defined(CC)
2368	/*	(a - bi) * (c - di) */
2369	SUB		C11, C11, A1		#	ac'+'bd
2370	ADD		C13, A3, C13		#	ad'+'cb
2371	LD		A4, 152($sp)		#	load alpha_r
2372#	LD		A1, 0 * SIZE(A)		#	load alpha_r
2373	LD		A2, 160($sp)
2374#	LD		A2, 0 * SIZE(A)		#	load alpha_i
2375	SUB		C12, C12, A5
2376	ADD		C14, A7, C14
2377	NEG		C13, C13
2378	LD		B1, 0 * SIZE(CO1)
2379	LD		B2, 1 * SIZE(CO1)
2380	NEG		C14, C14
2381
2382	MADD	B1, B1, C11, A4		#	A1 = alpha_r
2383	MADD	B2, B2, C13, A4
2384	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
2385	MADD	B2, B2, C11, A2
2386
2387	LD		B5, 0 * SIZE(CO2)
2388	LD		B6, 1 * SIZE(CO2)
2389
2390	MADD	B5, B5, C12, A4
2391	ST		B1, 0 * SIZE(CO1)
2392	MADD	B6, B6, C14, A4
2393	ST		B2, 1 * SIZE(CO1)
2394
2395	NMSUB	B5, B5, C14, A2
2396	MADD	B6, B6, C12, A2
2397
2398	ST		B5, 0 * SIZE(CO2)
2399	ST		B6, 1 * SIZE(CO2)
2400#endif
2401
2402#else
2403	daddiu	I, I, -1
2404	CVTU	A1, C11
2405	CVTU	A3, C13
2406	CVTU	A5, C12
2407	CVTU	A7, C14
2408
2409#if   defined(NN) || defined(NT) || defined(TN) || defined(TT)
2410	/*	(a + bi) * (c + di) */
2411	SUB		C11, C11, A1		#	ac'+'bd
2412	ADD		C13, A3, C13		#	ad'+'cb
2413#	LD		A1, 0 * SIZE(A)		#	load alpha_r
2414	LD		A4, 152($sp)		#	load alpha_r
2415	LD		A2, 160($sp)		#	load alpha_i
2416#	LD		A2, 0 * SIZE(A)		#	load alpha_i
2417	SUB		C12, C12, A5
2418	ADD		C14, A7, C14
2419
2420	MUL B1, C11, A4		#	A1 = alpha_r
2421	MUL B2, C13, A4
2422	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
2423	MADD	B2, B2, C11, A2
2424
2425	MUL B5, C12, A4
2426	ST		B1, 0 * SIZE(CO1)
2427	MUL B6, C14, A4
2428	ST		B2, 1 * SIZE(CO1)
2429
2430	NMSUB	B5, B5, C14, A2
2431	MADD	B6, B6, C12, A2
2432
2433	ST		B5, 0 * SIZE(CO2)
2434	ST		B6, 1 * SIZE(CO2)
2435#endif
2436
2437#if   defined(NR) || defined(NC) || defined(TR) || defined(TC)
2438	/*	(a + bi) * (c - di) */
2439	ADD		C11, A1, C11		#	ac'+'bd
2440	SUB		C13, A3, C13		#	ad'+'cb
2441#	LD		A1, 0 * SIZE(A)		#	load alpha_r
2442	LD		A4, 152($sp)		#	load alpha_r
2443	LD		A2, 160($sp)		#	load alpha_i
2444#	LD		A2, 0 * SIZE(A)		#	load alpha_r
2445	ADD		C12, A5, C12
2446	SUB		C14, A7, C14
2447
2448	MUL B1, C11, A4		#	A1 = alpha_r
2449	MUL B2, C13, A4
2450	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
2451	MADD	B2, B2, C11, A2
2452
2453	MUL B5, C12, A4
2454	ST		B1, 0 * SIZE(CO1)
2455	MUL B6, C14, A4
2456	ST		B2, 1 * SIZE(CO1)
2457
2458	NMSUB	B5, B5, C14, A2
2459	MADD	B6, B6, C12, A2
2460
2461	ST		B5, 0 * SIZE(CO2)
2462	ST		B6, 1 * SIZE(CO2)
2463
2464#endif
2465
2466#if	  defined(RN) || defined(RT) || defined(CN) || defined(CT)
2467	/*	(a - bi) * (c + di) */
2468	ADD		C11, A1, C11		#	ac'+'bd
2469	SUB		C13, C13, A3		#	ad'+'cb
2470#	LD		A1, 0 * SIZE(A)		#	load alpha_r
2471	LD		A4, 152($sp)		#	load alpha_r
2472#	LD		A2, 0 * SIZE(A)		#	load alpha_r
2473	LD		A2, 160($sp)		#	load alpha_i
2474	ADD		C12, A5, C12
2475	SUB		C14, C14, A7
2476
2477	MUL B1, C11, A4		#	A1 = alpha_r
2478	MUL B2, C13, A4
2479	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
2480	MADD	B2, B2, C11, A2
2481
2482	MUL B5, C12, A4
2483	ST		B1, 0 * SIZE(CO1)
2484	MUL B6, C14, A4
2485	ST		B2, 1 * SIZE(CO1)
2486
2487	NMSUB	B5, B5, C14, A2
2488	MADD	B6, B6, C12, A2
2489
2490	ST		B5, 0 * SIZE(CO2)
2491	ST		B6, 1 * SIZE(CO2)
2492#endif
2493
2494#if   defined(RR) || defined(RC) || defined(CR) || defined(CC)
2495	/*	(a - bi) * (c - di) */
2496	SUB		C11, C11, A1		#	ac'+'bd
2497	ADD		C13, A3, C13		#	ad'+'cb
2498	LD		A4, 152($sp)		#	load alpha_r
2499#	LD		A1, 0 * SIZE(A)		#	load alpha_r
2500	LD		A2, 160($sp)
2501#	LD		A2, 0 * SIZE(A)		#	load alpha_i
2502	SUB		C12, C12, A5
2503	ADD		C14, A7, C14
2504	NEG		C13, C13
2505	NEG		C14, C14
2506
2507	MUL B1, C11, A4		#	A1 = alpha_r
2508	MUL B2, C13, A4
2509	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
2510	MADD	B2, B2, C11, A2
2511
2512	MUL B5, C12, A4
2513	ST		B1, 0 * SIZE(CO1)
2514	MUL B6, C14, A4
2515	ST		B2, 1 * SIZE(CO1)
2516
2517	NMSUB	B5, B5, C14, A2
2518	MADD	B6, B6, C12, A2
2519
2520	ST		B5, 0 * SIZE(CO2)
2521	ST		B6, 1 * SIZE(CO2)
2522#endif
2523
2524
2525#if ( defined(LEFT) &&  defined(TRANSA)) || \
2526    (!defined(LEFT) && !defined(TRANSA))
2527	dsubu	TEMP, K, KK
2528#ifdef LEFT
2529	daddiu	TEMP, TEMP, -1
2530#else
2531	daddiu	TEMP, TEMP, -2
2532#endif
2533	dsll	L,    TEMP, ZBASE_SHIFT
2534	dsll	TEMP, TEMP, 1 + ZBASE_SHIFT
2535
2536	daddu	AO, AO, L
2537	daddu	BO, BO, TEMP
2538#endif
2539
2540#ifdef LEFT
2541	daddiu	KK, KK, 1
2542#endif
2543
2544#endif
2545	daddiu	CO1, CO1, 2 * SIZE
2546	daddiu	CO2, CO2, 2 * SIZE
2547
2548
2549	.align	4
2550.L20:
2551	daddiu	J, J, -1
2552	move	B, BO
2553
2554#if defined(TRMMKERNEL) && !defined(LEFT)
2555	daddiu	KK, KK, 2
2556#endif
2557
2558	bgtz	J, .L24
2559	NOP
2560
2561
2562	.align	4
2563.L1:
2564	andi	J, N, 1
2565	blez	J, .L999
2566	NOP
2567
2568.L14:
2569	dsra	I, M, 2				#	MR=8
2570	move	AO, A				#	Reset A
2571
2572#if defined(TRMMKERNEL) &&  defined(LEFT)
2573	move	KK, OFFSET
2574#endif
2575
2576	move	CO1, C
2577	blez	I, .L12
2578	daddu	C,   CO1, LDC
2579
2580	.align	4
2581.L141:
2582#if defined(TRMMKERNEL)
2583#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
2584	move	BO,  B
2585#else
2586	dsll	L,    KK, 2 + ZBASE_SHIFT
2587	dsll	TEMP, KK, ZBASE_SHIFT
2588
2589	daddu	AO, AO, L
2590	daddu	BO, B,  TEMP
2591#endif
2592	MTC		$0, C11				#	CLEAR REAULTS REGISTERS
2593	MOV		C21, C11
2594	gsLQC1(R13, F9, F8, 0)		#	B1 B2
2595
2596	gsLQC1(R12, F1, F0, 0)		#	A1 A2
2597	MOV		C31, C11
2598	MOV		C41, C11
2599
2600	gsLQC1(R12, F3, F2, 1)		#	A3 A4
2601	MOV		C13, C11
2602	MOV		C23, C11
2603
2604	FETCH	$0, 0 * SIZE(CO1)
2605	MOV		C33, C11
2606	MOV		C43, C11
2607
2608	FETCH	$0, 8 * SIZE(CO1)
2609	PLU		B3,	B1, B1
2610	PLU		B4, B2, B2
2611#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
2612	dsubu	TEMP, K, KK
2613#elif defined(LEFT)
2614	daddiu	TEMP, KK, 4						#	define Mr=4
2615#else
2616	daddiu	TEMP, KK, 1						#	define	NR=1
2617#endif
2618	dsra	L,  TEMP, 2
2619	blez	L, .L142
2620	NOP
2621
2622#else
2623	move	BO, B				#	Reset	B
2624	dsra	L, K, 2				#	UnRoll	K=64
2625
2626	MTC		$0, C11				#	CLEAR REAULTS REGISTERS
2627	MOV		C21, C11
2628	gsLQC1(R13, F9, F8, 0)		#	B1 B2
2629
2630	gsLQC1(R12, F1, F0, 0)		#	A1 A2
2631	MOV		C31, C11
2632	MOV		C41, C11
2633
2634	gsLQC1(R12, F3, F2, 1)		#	A3 A4
2635	MOV		C13, C11
2636	MOV		C23, C11
2637
2638	FETCH	$0, 0 * SIZE(CO1)
2639	MOV		C33, C11
2640	MOV		C43, C11
2641
2642	FETCH	$0, 8 * SIZE(CO1)
2643	PLU		B3,	B1, B1
2644	blez	L, .L142
2645	PLU		B4, B2, B2
2646#endif
2647
2648.L1410:
2649	daddiu	L, L, -1
2650	MADPS	C11, C11, A1, B1
2651	MADPS	C21, C21, A2, B1
2652	gsLQC1(R12, F5, F4, 2)		#	A5 A6
2653
2654	gsLQC1(R12, F7, F6, 3)		#	A7 A8
2655	MADPS	C31, C31, A3, B1
2656	MADPS	C41, C41, A4, B1
2657
2658	MADPS	C13, C13, A1, B3
2659	MADPS	C23, C23, A2, B3
2660
2661	MADPS	C33, C33, A3, B3
2662	MADPS	C43, C43, A4, B3
2663	gsLQC1(R13, F13, F12, 1)	#	B3 B4
2664
2665	gsLQC1(R12, F1, F0, 4)		#	A1 A2
2666	MADPS	C11, C11, A5, B2
2667	MADPS	C21, C21, A6, B2
2668
2669	gsLQC1(R12, F3, F2, 5)		#	A3 A4
2670	MADPS	C31, C31, A7, B2
2671	MADPS	C41, C41, A8, B2
2672	daddiu	BO, BO, 2 * 4 * SIZE	#	4KR*4NR
2673
2674	MADPS	C13, C13, A5, B4
2675	MADPS	C23, C23, A6, B4
2676
2677	MADPS	C33, C33, A7, B4
2678	MADPS	C43, C43, A8, B4
2679
2680	PLU		B7,	B5, B5
2681	PLU		B8, B6, B6
2682
2683	MADPS	C11, C11, A1, B5
2684	MADPS	C21, C21, A2, B5
2685	gsLQC1(R12, F5, F4, 6)		#	A5 A6
2686
2687	gsLQC1(R12, F7, F6, 7)		#	A7 A8
2688	MADPS	C31, C31, A3, B5
2689	MADPS	C41, C41, A4, B5
2690
2691	daddiu	AO, AO, 8 * 4 * SIZE 	#	4KR*8MR
2692	MADPS	C13, C13, A1, B7
2693	MADPS	C23, C23, A2, B7
2694
2695	MADPS	C33, C33, A3, B7
2696	MADPS	C43, C43, A4, B7
2697	gsLQC1(R13, F9, F8, 0)		#	B1 B2
2698
2699	gsLQC1(R12, F1, F0, 0)		#	A1 A2
2700	MADPS	C11, C11, A5, B6
2701	MADPS	C21, C21, A6, B6
2702
2703	gsLQC1(R12, F3, F2, 1)		#	A3 A4
2704	MADPS	C31, C31, A7, B6
2705	MADPS	C41, C41, A8, B6
2706
2707	MADPS	C13, C13, A5, B8
2708	MADPS	C23, C23, A6, B8
2709
2710	MADPS	C33, C33, A7, B8
2711	MADPS	C43, C43, A8, B8
2712
2713	PLU		B3,	B1, B1
2714	bgtz	L, .L1410
2715	PLU		B4, B2, B2
2716
2717
2718	.align	4
2719.L142:
2720#ifndef TRMMKERNEL
2721	andi	L, K, 2
2722#else
2723	andi	L, TEMP, 2
2724#endif
2725	blez	L, .L147
2726	NOP
2727
2728	MADPS	C11, C11, A1, B1
2729	MADPS	C21, C21, A2, B1
2730	gsLQC1(R12, F5, F4, 2)		#	A5 A6
2731
2732	gsLQC1(R12, F7, F6, 3)		#	A7 A8
2733	MADPS	C31, C31, A3, B1
2734	MADPS	C41, C41, A4, B1
2735	daddiu	AO, AO, 4 * 4 * SIZE 	#	4KR*8MR
2736
2737	MADPS	C13, C13, A1, B3
2738	MADPS	C23, C23, A2, B3
2739
2740	MADPS	C33, C33, A3, B3
2741	MADPS	C43, C43, A4, B3
2742	gsLQC1(R13, F13, F8, 1)	#	B3 B4
2743
2744	gsLQC1(R12, F1, F0, 0)		#	A1 A2
2745	MADPS	C11, C11, A5, B2
2746	MADPS	C21, C21, A6, B2
2747
2748	gsLQC1(R12, F3, F2, 1)		#	A3 A4
2749	MADPS	C31, C31, A7, B2
2750	MADPS	C41, C41, A8, B2
2751	daddiu	BO, BO, 4 * SIZE	#	4KR*4NR
2752
2753	MADPS	C13, C13, A5, B4
2754	MADPS	C23, C23, A6, B4
2755
2756	MADPS	C33, C33, A7, B4
2757	MADPS	C43, C43, A8, B4
2758	PLU		B3,	B1, B1
2759
2760
2761	.align	4
2762.L147:
2763#ifndef TRMMKERNEL
2764	andi	L, K, 1
2765#else
2766	andi	L, TEMP, 1
2767#endif
2768	blez	L, .L140
2769	NOP
2770
2771	MADPS	C11, C11, A1, B1
2772	MADPS	C21, C21, A2, B1
2773	daddiu	BO, BO, 2 * SIZE
2774
2775	MADPS	C31, C31, A3, B1
2776	MADPS	C41, C41, A4, B1
2777	daddiu	AO, AO, 2 * 4 * SIZE
2778
2779	MADPS	C13, C13, A1, B3
2780	MADPS	C23, C23, A2, B3
2781
2782	MADPS	C33, C33, A3, B3
2783	MADPS	C43, C43, A4, B3
2784
2785
2786	.align	4
2787.L140:							#	Write Back
2788#ifndef TRMMKERNEL
2789	daddiu	I, I, -1
2790	CVTU	A1, C11
2791	CVTU	A2, C21
2792
2793	CVTU	A3, C31
2794	CVTU	A4, C41
2795
2796	CVTU	A5, C13
2797	CVTU	A6, C23
2798
2799	CVTU	A7, C33
2800	CVTU	A8, C43
2801
2802	CVTU	B1, C12
2803	CVTU	B2, C22
2804
2805	CVTU	B3, C32
2806	CVTU	B4, C42
2807
2808	CVTU	B5, C14
2809	CVTU	B6, C24
2810
2811	CVTU	B7, C34
2812	CVTU	B8, C44
2813
2814#if   defined(NN) || defined(NT) || defined(TN) || defined(TT)
2815	/*	(a + bi) * (c + di) */
2816	SUB		C11, C11, A1		#	ac'+'bd
2817	SUB		C21, C21, A2
2818#	LD		A1, 0 * SIZE(A)		#	load alpha_r
2819	SUB		C31, C31, A3
2820	LD		A1, 152($sp)		#	load alpha_r
2821	SUB		C41, C41, A4
2822	LD		A2, 160($sp)		#	load alpha_i
2823#	LD		A2, 0 * SIZE(A)		#	load alpha_i
2824	ADD		C13, A5, C13		#	ad'+'cb
2825	ADD		C23, A6, C23
2826	ADD		C33, A7, C33
2827	ADD		C43, A8, C43
2828
2829	LD		B1, 0 * SIZE(CO1)
2830	LD		B3, 2 * SIZE(CO1)
2831	LD		B5, 4 * SIZE(CO1)
2832	LD		B7, 6 * SIZE(CO1)
2833	LD		B2, 1 * SIZE(CO1)
2834	LD		B4, 3 * SIZE(CO1)
2835	LD		B6, 5 * SIZE(CO1)
2836	LD		B8, 7 * SIZE(CO1)
2837
2838	MADD	B1, B1, C11, A1		#	A1 = alpha_r
2839	MADD	B3, B3, C21, A1
2840	MADD	B5, B5, C31, A1
2841	MADD	B7, B7, C41, A1
2842	MADD	B2, B2, C13, A1
2843	MADD	B4, B4, C23, A1
2844	MADD	B6, B6, C33, A1
2845	MADD	B8, B8, C43, A1
2846	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
2847	NMSUB	B3, B3, C23, A2
2848	NMSUB	B5, B5, C33, A2
2849	NMSUB	B7, B7, C43, A2
2850	MADD	B2, B2, C11, A2
2851	MADD	B4, B4, C21, A2
2852	MADD	B6, B6, C31, A2
2853	MADD	B8, B8, C41, A2
2854
2855	ST		B1, 0 * SIZE(CO1)
2856	ST		B3, 2 * SIZE(CO1)
2857	ST		B5, 4 * SIZE(CO1)
2858	ST		B7, 6 * SIZE(CO1)
2859	ST		B2, 1 * SIZE(CO1)
2860	ST		B4, 3 * SIZE(CO1)
2861	ST		B6, 5 * SIZE(CO1)
2862	ST		B8, 7 * SIZE(CO1)
2863#endif
2864
2865#if   defined(NR) || defined(NC) || defined(TR) || defined(TC)
2866	/*	(a + bi) * (c - di) */
2867	ADD		C11, A1, C11		#	ac'+'bd
2868	ADD		C21, A2, C21
2869#	LD		A1, 0 * SIZE(A)		#	load alpha_r
2870	ADD		C31, A3, C31
2871	LD		A1, 152($sp)		#	load alpha_r
2872	ADD		C41, A4, C41
2873	LD		A2, 160($sp)		#	load alpha_i
2874#	LD		A2, 0 * SIZE(A)		#	load alpha_r
2875	SUB		C13, A5, C13		#	ad'+'cb
2876	SUB		C23, A6, C23
2877	SUB		C33, A7, C33
2878	SUB		C43, A8, C43
2879
2880	LD		B1, 0 * SIZE(CO1)
2881	LD		B3, 2 * SIZE(CO1)
2882	LD		B5, 4 * SIZE(CO1)
2883	LD		B7, 6 * SIZE(CO1)
2884	LD		B2, 1 * SIZE(CO1)
2885	LD		B4, 3 * SIZE(CO1)
2886	LD		B6, 5 * SIZE(CO1)
2887	LD		B8, 7 * SIZE(CO1)
2888
2889	MADD	B1, B1, C11, A1		#	A1 = alpha_r
2890	MADD	B3, B3, C21, A1
2891	MADD	B5, B5, C31, A1
2892	MADD	B7, B7, C41, A1
2893	MADD	B2, B2, C13, A1
2894	MADD	B4, B4, C23, A1
2895	MADD	B6, B6, C33, A1
2896	MADD	B8, B8, C43, A1
2897	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
2898	NMSUB	B3, B3, C23, A2
2899	NMSUB	B5, B5, C33, A2
2900	NMSUB	B7, B7, C43, A2
2901	MADD	B2, B2, C11, A2
2902	MADD	B4, B4, C21, A2
2903	MADD	B6, B6, C31, A2
2904	MADD	B8, B8, C41, A2
2905
2906	ST		B1, 0 * SIZE(CO1)
2907	ST		B3, 2 * SIZE(CO1)
2908	ST		B5, 4 * SIZE(CO1)
2909	ST		B7, 6 * SIZE(CO1)
2910	ST		B2, 1 * SIZE(CO1)
2911	ST		B4, 3 * SIZE(CO1)
2912	ST		B6, 5 * SIZE(CO1)
2913	ST		B8, 7 * SIZE(CO1)
2914#endif
2915
2916#if	  defined(RN) || defined(RT) || defined(CN) || defined(CT)
2917	/*	(a - bi) * (c + di) */
2918	ADD		C11, A1, C11		#	ac'+'bd
2919	ADD		C21, A2, C21
2920#	LD		A1, 0 * SIZE(A)		#	load alpha_r
2921	ADD		C31, A3, C31
2922	LD		A1, 152($sp)		#	load alpha_r
2923#	LD		A2, 0 * SIZE(A)		#	load alpha_r
2924	ADD		C41, A4, C41
2925	LD		A2, 160($sp)		#	load alpha_i
2926	SUB		C13, C13, A5		#	ad'+'cb
2927	SUB		C23, C23, A6
2928	SUB		C33, C33, A7
2929	SUB		C43, C43, A8
2930
2931	LD		B1, 0 * SIZE(CO1)
2932	LD		B3, 2 * SIZE(CO1)
2933	LD		B5, 4 * SIZE(CO1)
2934	LD		B7, 6 * SIZE(CO1)
2935	LD		B2, 1 * SIZE(CO1)
2936	LD		B4, 3 * SIZE(CO1)
2937	LD		B6, 5 * SIZE(CO1)
2938	LD		B8, 7 * SIZE(CO1)
2939
2940	MADD	B1, B1, C11, A1		#	A1 = alpha_r
2941	MADD	B3, B3, C21, A1
2942	MADD	B5, B5, C31, A1
2943	MADD	B7, B7, C41, A1
2944	MADD	B2, B2, C13, A1
2945	MADD	B4, B4, C23, A1
2946	MADD	B6, B6, C33, A1
2947	MADD	B8, B8, C43, A1
2948	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
2949	NMSUB	B3, B3, C23, A2
2950	NMSUB	B5, B5, C33, A2
2951	NMSUB	B7, B7, C43, A2
2952	MADD	B2, B2, C11, A2
2953	MADD	B4, B4, C21, A2
2954	MADD	B6, B6, C31, A2
2955	MADD	B8, B8, C41, A2
2956
2957	ST		B1, 0 * SIZE(CO1)
2958	ST		B3, 2 * SIZE(CO1)
2959	ST		B5, 4 * SIZE(CO1)
2960	ST		B7, 6 * SIZE(CO1)
2961	ST		B2, 1 * SIZE(CO1)
2962	ST		B4, 3 * SIZE(CO1)
2963	ST		B6, 5 * SIZE(CO1)
2964	ST		B8, 7 * SIZE(CO1)
2965#endif
2966
2967#if   defined(RR) || defined(RC) || defined(CR) || defined(CC)
2968	/*	(a - bi) * (c - di) */
2969	SUB		C11, C11, A1		#	AC'+'BD
2970	SUB		C21, C21, A2
2971	SUB		C31, C31, A3
2972	LD		A1, 152($sp)		#	LOAD ALPHA_R
2973#	LD		A1, 0 * SIZE(A)		#	LOAD ALPHA_R
2974	SUB		C41, C41, A4
2975	LD		A2, 160($sp)
2976#	LD		A2, 0 * SIZE(A)		#	LOAD ALPHA_I
2977
2978	ADD		C13, A5, C13		#	AD'+'CB
2979	ADD		C23, A6, C23
2980	ADD		C33, A7, C33
2981	ADD		C43, A8, C43
2982	NEG		C13, C13		#	AD'+'CB
2983	NEG		C23, C23
2984	NEG		C33, C33
2985	NEG		C43, C43
2986
2987
2988	LD		B1, 0 * SIZE(CO1)
2989	LD		B3, 2 * SIZE(CO1)
2990	LD		B5, 4 * SIZE(CO1)
2991	LD		B7, 6 * SIZE(CO1)
2992	LD		B2, 1 * SIZE(CO1)
2993	LD		B4, 3 * SIZE(CO1)
2994	LD		B6, 5 * SIZE(CO1)
2995	LD		B8, 7 * SIZE(CO1)
2996
2997	MADD	B1, B1, C11, A1		#	A1 = ALPHA_R
2998	MADD	B3, B3, C21, A1
2999	MADD	B5, B5, C31, A1
3000	MADD	B7, B7, C41, A1
3001	MADD	B2, B2, C13, A1
3002	MADD	B4, B4, C23, A1
3003	MADD	B6, B6, C33, A1
3004	MADD	B8, B8, C43, A1
3005	NMSUB	B1, B1, C13, A2		#	A2 = ALPHA_I
3006	NMSUB	B3, B3, C23, A2
3007	NMSUB	B5, B5, C33, A2
3008	NMSUB	B7, B7, C43, A2
3009	MADD	B2, B2, C11, A2
3010	MADD	B4, B4, C21, A2
3011	MADD	B6, B6, C31, A2
3012	MADD	B8, B8, C41, A2
3013
3014	ST		B1, 0 * SIZE(CO1)
3015	ST		B3, 2 * SIZE(CO1)
3016	ST		B5, 4 * SIZE(CO1)
3017	ST		B7, 6 * SIZE(CO1)
3018	ST		B2, 1 * SIZE(CO1)
3019	ST		B4, 3 * SIZE(CO1)
3020	ST		B6, 5 * SIZE(CO1)
3021	ST		B8, 7 * SIZE(CO1)
3022#endif
3023
3024#else
3025	daddiu	I, I, -1
3026	CVTU	A1, C11
3027	CVTU	A2, C21
3028
3029	CVTU	A3, C31
3030	CVTU	A4, C41
3031
3032	CVTU	A5, C13
3033	CVTU	A6, C23
3034
3035	CVTU	A7, C33
3036	CVTU	A8, C43
3037
3038	CVTU	B1, C12
3039	CVTU	B2, C22
3040
3041	CVTU	B3, C32
3042	CVTU	B4, C42
3043
3044	CVTU	B5, C14
3045	CVTU	B6, C24
3046
3047	CVTU	B7, C34
3048	CVTU	B8, C44
3049
3050#if   defined(NN) || defined(NT) || defined(TN) || defined(TT)
3051	/*	(a + bi) * (c + di) */
3052	SUB		C11, C11, A1		#	ac'+'bd
3053	SUB		C21, C21, A2
3054#	LD		A1, 0 * SIZE(A)		#	load alpha_r
3055	SUB		C31, C31, A3
3056	LD		A1, 152($sp)		#	load alpha_r
3057	SUB		C41, C41, A4
3058	LD		A2, 160($sp)		#	load alpha_i
3059#	LD		A2, 0 * SIZE(A)		#	load alpha_i
3060	ADD		C13, A5, C13		#	ad'+'cb
3061	ADD		C23, A6, C23
3062	ADD		C33, A7, C33
3063	ADD		C43, A8, C43
3064
3065	MUL B1, C11, A1		#	A1 = alpha_r
3066	MUL B3, C21, A1
3067	MUL B5, C31, A1
3068	MUL B7, C41, A1
3069	MUL B2, C13, A1
3070	MUL B4, C23, A1
3071	MUL B6, C33, A1
3072	MUL B8, C43, A1
3073	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
3074	NMSUB	B3, B3, C23, A2
3075	NMSUB	B5, B5, C33, A2
3076	NMSUB	B7, B7, C43, A2
3077	MADD	B2, B2, C11, A2
3078	MADD	B4, B4, C21, A2
3079	MADD	B6, B6, C31, A2
3080	MADD	B8, B8, C41, A2
3081
3082	ST		B1, 0 * SIZE(CO1)
3083	ST		B3, 2 * SIZE(CO1)
3084	ST		B5, 4 * SIZE(CO1)
3085	ST		B7, 6 * SIZE(CO1)
3086	ST		B2, 1 * SIZE(CO1)
3087	ST		B4, 3 * SIZE(CO1)
3088	ST		B6, 5 * SIZE(CO1)
3089	ST		B8, 7 * SIZE(CO1)
3090#endif
3091
3092#if   defined(NR) || defined(NC) || defined(TR) || defined(TC)
3093	/*	(a + bi) * (c - di) */
3094	ADD		C11, A1, C11		#	ac'+'bd
3095	ADD		C21, A2, C21
3096#	LD		A1, 0 * SIZE(A)		#	load alpha_r
3097	ADD		C31, A3, C31
3098	LD		A1, 152($sp)		#	load alpha_r
3099	ADD		C41, A4, C41
3100	LD		A2, 160($sp)		#	load alpha_i
3101#	LD		A2, 0 * SIZE(A)		#	load alpha_r
3102	SUB		C13, A5, C13		#	ad'+'cb
3103	SUB		C23, A6, C23
3104	SUB		C33, A7, C33
3105	SUB		C43, A8, C43
3106
3107	MUL B1, C11, A1		#	A1 = alpha_r
3108	MUL B3, C21, A1
3109	MUL B5, C31, A1
3110	MUL B7, C41, A1
3111	MUL B2, C13, A1
3112	MUL B4, C23, A1
3113	MUL B6, C33, A1
3114	MUL B8, C43, A1
3115	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
3116	NMSUB	B3, B3, C23, A2
3117	NMSUB	B5, B5, C33, A2
3118	NMSUB	B7, B7, C43, A2
3119	MADD	B2, B2, C11, A2
3120	MADD	B4, B4, C21, A2
3121	MADD	B6, B6, C31, A2
3122	MADD	B8, B8, C41, A2
3123
3124	ST		B1, 0 * SIZE(CO1)
3125	ST		B3, 2 * SIZE(CO1)
3126	ST		B5, 4 * SIZE(CO1)
3127	ST		B7, 6 * SIZE(CO1)
3128	ST		B2, 1 * SIZE(CO1)
3129	ST		B4, 3 * SIZE(CO1)
3130	ST		B6, 5 * SIZE(CO1)
3131	ST		B8, 7 * SIZE(CO1)
3132#endif
3133
3134#if	  defined(RN) || defined(RT) || defined(CN) || defined(CT)
3135	/*	(a - bi) * (c + di) */
3136	ADD		C11, A1, C11		#	ac'+'bd
3137	ADD		C21, A2, C21
3138#	LD		A1, 0 * SIZE(A)		#	load alpha_r
3139	ADD		C31, A3, C31
3140	LD		A1, 152($sp)		#	load alpha_r
3141#	LD		A2, 0 * SIZE(A)		#	load alpha_r
3142	ADD		C41, A4, C41
3143	LD		A2, 160($sp)		#	load alpha_i
3144	SUB		C13, C13, A5		#	ad'+'cb
3145	SUB		C23, C23, A6
3146	SUB		C33, C33, A7
3147	SUB		C43, C43, A8
3148
3149	MUL B1, C11, A1		#	A1 = alpha_r
3150	MUL B3, C21, A1
3151	MUL B5, C31, A1
3152	MUL B7, C41, A1
3153	MUL B2, C13, A1
3154	MUL B4, C23, A1
3155	MUL B6, C33, A1
3156	MUL B8, C43, A1
3157	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
3158	NMSUB	B3, B3, C23, A2
3159	NMSUB	B5, B5, C33, A2
3160	NMSUB	B7, B7, C43, A2
3161	MADD	B2, B2, C11, A2
3162	MADD	B4, B4, C21, A2
3163	MADD	B6, B6, C31, A2
3164	MADD	B8, B8, C41, A2
3165
3166	ST		B1, 0 * SIZE(CO1)
3167	ST		B3, 2 * SIZE(CO1)
3168	ST		B5, 4 * SIZE(CO1)
3169	ST		B7, 6 * SIZE(CO1)
3170	ST		B2, 1 * SIZE(CO1)
3171	ST		B4, 3 * SIZE(CO1)
3172	ST		B6, 5 * SIZE(CO1)
3173	ST		B8, 7 * SIZE(CO1)
3174#endif
3175
3176#if   defined(RR) || defined(RC) || defined(CR) || defined(CC)
3177	/*	(a - bi) * (c - di) */
3178	SUB		C11, C11, A1		#	AC'+'BD
3179	SUB		C21, C21, A2
3180	SUB		C31, C31, A3
3181	LD		A1, 152($sp)		#	LOAD ALPHA_R
3182#	LD		A1, 0 * SIZE(A)		#	LOAD ALPHA_R
3183	SUB		C41, C41, A4
3184	LD		A2, 160($sp)
3185#	LD		A2, 0 * SIZE(A)		#	LOAD ALPHA_I
3186
3187	ADD		C13, A5, C13		#	AD'+'CB
3188	ADD		C23, A6, C23
3189	ADD		C33, A7, C33
3190	ADD		C43, A8, C43
3191	NEG		C13, C13		#	AD'+'CB
3192	NEG		C23, C23
3193	NEG		C33, C33
3194	NEG		C43, C43
3195
3196	MUL B1, C11, A1		#	A1 = ALPHA_R
3197	MUL B3, C21, A1
3198	MUL B5, C31, A1
3199	MUL B7, C41, A1
3200	MUL B2, C13, A1
3201	MUL B4, C23, A1
3202	MUL B6, C33, A1
3203	MUL B8, C43, A1
3204	NMSUB	B1, B1, C13, A2		#	A2 = ALPHA_I
3205	NMSUB	B3, B3, C23, A2
3206	NMSUB	B5, B5, C33, A2
3207	NMSUB	B7, B7, C43, A2
3208	MADD	B2, B2, C11, A2
3209	MADD	B4, B4, C21, A2
3210	MADD	B6, B6, C31, A2
3211	MADD	B8, B8, C41, A2
3212
3213	ST		B1, 0 * SIZE(CO1)
3214	ST		B3, 2 * SIZE(CO1)
3215	ST		B5, 4 * SIZE(CO1)
3216	ST		B7, 6 * SIZE(CO1)
3217	ST		B2, 1 * SIZE(CO1)
3218	ST		B4, 3 * SIZE(CO1)
3219	ST		B6, 5 * SIZE(CO1)
3220	ST		B8, 7 * SIZE(CO1)
3221#endif
3222
3223
3224#if ( defined(LEFT) &&  defined(TRANSA)) || \
3225    (!defined(LEFT) && !defined(TRANSA))
3226	dsubu	TEMP, K, KK
3227#ifdef LEFT
3228	daddiu	TEMP, TEMP, -4
3229#else
3230	daddiu	TEMP, TEMP, -1
3231#endif
3232
3233	dsll	L,    TEMP, 2 + ZBASE_SHIFT
3234	dsll	TEMP, TEMP, ZBASE_SHIFT
3235
3236	daddu	AO, AO, L
3237	daddu	BO, BO, TEMP
3238#endif
3239
3240#ifdef LEFT
3241	daddiu	KK, KK, 4
3242#endif
3243
3244#endif
3245	bgtz	I, .L141
3246	daddiu	CO1, CO1, 8 * SIZE
3247
3248	.align	4
3249.L12:
3250	andi	I, M, 2				#	MR=4
3251	blez	I, .L11
3252	NOP
3253
3254	.align	4
3255.L121:
3256#if defined(TRMMKERNEL)
3257#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
3258	move	BO,  B
3259#else
3260	dsll	L, KK, 1 + ZBASE_SHIFT
3261	dsll	TEMP, KK,  ZBASE_SHIFT
3262
3263	daddu	AO, AO, L
3264	daddu	BO, B,  TEMP
3265#endif
3266
3267	MTC		$0, C11				#	CLEAR REAULTS REGISTERS
3268	MOV		C21, C11
3269	gsLQC1(R13, F9, F8, 0)		#	B1 B2
3270
3271	gsLQC1(R12, F1, F0, 0)		#	A1 A2
3272	MOV		C13, C11
3273	MOV		C23, C11
3274
3275	FETCH	$0, 0 * SIZE(CO1)
3276	FETCH	$0, 8 * SIZE(CO1)
3277
3278	PLU		B3,	B1, B1
3279	PLU		B4, B2, B2
3280#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
3281	dsubu	TEMP, K, KK
3282#elif defined(LEFT)
3283	daddiu	TEMP, KK, 2
3284#else
3285	daddiu	TEMP, KK, 1
3286#endif
3287	dsra	L,  TEMP, 2
3288	blez	L, .L122
3289	NOP
3290
3291#else
3292	move	BO, B				#	Reset	B
3293	dsra	L, K, 2				#	UnRoll	K=64
3294
3295	MTC		$0, C11				#	CLEAR REAULTS REGISTERS
3296	MOV		C21, C11
3297	gsLQC1(R13, F9, F8, 0)		#	B1 B2
3298
3299	gsLQC1(R12, F1, F0, 0)		#	A1 A2
3300	MOV		C13, C11
3301	MOV		C23, C11
3302
3303	FETCH	$0, 0 * SIZE(CO1)
3304	FETCH	$0, 8 * SIZE(CO1)
3305
3306	PLU		B3,	B1, B1
3307	blez	L, .L122
3308	PLU		B4, B2, B2
3309#endif
3310
3311.L1210:
3312	daddiu	L, L, -1
3313	gsLQC1(R13, F13, F12, 1)	#	B3 B4
3314	MADPS	C11, C11, A1, B1
3315	MADPS	C21, C21, A2, B1
3316
3317	gsLQC1(R12, F3, F2, 1)		#	A3 A4
3318	MADPS	C13, C13, A1, B3
3319	MADPS	C23, C23, A2, B3
3320
3321	gsLQC1(R12, F5, F4, 2)		#	A5 A6
3322	PLU		B7,	B5, B5
3323	PLU		B8, B6, B6
3324	daddiu	BO, BO, 2 * 4 * SIZE	#	4KR*4NR
3325
3326	MADPS	C11, C11, A3, B2
3327	MADPS	C21, C21, A4, B2
3328
3329	gsLQC1(R12, F7, F6, 3)		#	A7 A8
3330	MADPS	C13, C13, A3, B4
3331	MADPS	C23, C23, A4, B4
3332
3333	MADPS	C11, C11, A5, B5
3334	MADPS	C21, C21, A6, B5
3335	daddiu	AO, AO, 4 * 4 * SIZE 	#	4KR*8MR
3336
3337	gsLQC1(R13, F9, F8, 0)		#	B1 B2
3338	MADPS	C13, C13, A5, B7
3339	MADPS	C23, C23, A6, B7
3340
3341	gsLQC1(R12, F1, F0, 0)		#	A1 A2
3342	MADPS	C11, C11, A7, B6
3343	MADPS	C21, C21, A8, B6
3344
3345	MADPS	C13, C13, A7, B8
3346	MADPS	C23, C23, A8, B8
3347
3348	PLU		B3,	B1, B1
3349	bgtz	L, .L1210
3350	PLU		B4, B2, B2
3351
3352
3353	.align	4
3354.L122:
3355#ifndef	TRMMKERNEL
3356	andi	L, K, 2
3357#else
3358	andi	L, TEMP, 2
3359#endif
3360	blez	L, .L127
3361	NOP
3362
3363	MADPS	C11, C11, A1, B1
3364	MADPS	C21, C21, A2, B1
3365
3366	gsLQC1(R12, F3, F2, 1)		#	A3 A4
3367	MADPS	C13, C13, A1, B3
3368	MADPS	C23, C23, A2, B3
3369
3370	PLU		B7,	B5, B5
3371	daddiu	BO, BO, 1 * 4 * SIZE
3372
3373	daddiu	AO, AO, 2 * 4 * SIZE
3374	MADPS	C11, C11, A3, B2
3375	MADPS	C21, C21, A4, B2
3376
3377	MADPS	C13, C13, A3, B4
3378	MADPS	C23, C23, A4, B4
3379
3380	gsLQC1(R13, F9, F8, 0)
3381	gsLQC1(R12, F1, F0, 0)
3382	PLU		B3, B1, B1
3383
3384	.align	4
3385.L127:
3386#ifndef	TRMMKERNEL
3387	andi	L, K, 1
3388#else
3389	andi	L, TEMP, 1
3390#endif
3391	blez	L, .L120
3392	NOP
3393
3394	MADPS	C11, C11, A1, B1
3395	MADPS	C21, C21, A2, B1
3396	daddiu	BO, BO, 2 * SIZE
3397	daddiu	AO, AO, 4 * SIZE
3398
3399	MADPS	C13, C13, A1, B3
3400	MADPS	C23, C23, A2, B3
3401
3402	.align	4
3403.L120:							#	Write Back
3404#ifndef	TRMMKERNEL
3405	daddiu	I, I, -1
3406	CVTU	A1, C11
3407	CVTU	A2, C21
3408
3409	CVTU	A3, C13
3410	CVTU	A4, C23
3411
3412
3413#if   defined(NN) || defined(NT) || defined(TN) || defined(TT)
3414	/*	(a + bi) * (c + di) */
3415	SUB		C11, C11, A1		#	ac'+'bd
3416	SUB		C21, C21, A2
3417	ADD		C13, A3, C13		#	ad'+'cb
3418	ADD		C23, A4, C23
3419#	LD		A1, 0 * SIZE(A)		#	load alpha_r
3420	LD		A1, 152($sp)		#	load alpha_r
3421	LD		A2, 160($sp)		#	load alpha_i
3422#	LD		A2, 0 * SIZE(A)		#	load alpha_i
3423
3424	LD		B1, 0 * SIZE(CO1)
3425	LD		B3, 2 * SIZE(CO1)
3426	LD		B2, 1 * SIZE(CO1)
3427	LD		B4, 3 * SIZE(CO1)
3428
3429	MADD	B1, B1, C11, A1		#	A1 = alpha_r
3430	MADD	B3, B3, C21, A1
3431	MADD	B2, B2, C13, A1
3432	MADD	B4, B4, C23, A1
3433	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
3434	NMSUB	B3, B3, C23, A2
3435	MADD	B2, B2, C11, A2
3436	MADD	B4, B4, C21, A2
3437
3438	ST		B1, 0 * SIZE(CO1)
3439	ST		B3, 2 * SIZE(CO1)
3440
3441	ST		B2, 1 * SIZE(CO1)
3442	ST		B4, 3 * SIZE(CO1)
3443#endif
3444
3445#if   defined(NR) || defined(NC) || defined(TR) || defined(TC)
3446	/*	(a + bi) * (c - di) */
3447	ADD		C11, A1, C11		#	ac'+'bd
3448	ADD		C21, A2, C21
3449	SUB		C13, A3, C13		#	ad'+'cb
3450	SUB		C23, A4, C23
3451#	LD		A1, 0 * SIZE(A)		#	load alpha_r
3452	LD		A1, 152($sp)		#	load alpha_r
3453	LD		A2, 160($sp)		#	load alpha_i
3454#	LD		A2, 0 * SIZE(A)		#	load alpha_r
3455
3456	LD		B1, 0 * SIZE(CO1)
3457	LD		B3, 2 * SIZE(CO1)
3458	LD		B2, 1 * SIZE(CO1)
3459	LD		B4, 3 * SIZE(CO1)
3460
3461	MADD	B1, B1, C11, A1		#	A1 = alpha_r
3462	MADD	B3, B3, C21, A1
3463	MADD	B2, B2, C13, A1
3464	MADD	B4, B4, C23, A1
3465	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
3466	NMSUB	B3, B3, C23, A2
3467	MADD	B2, B2, C11, A2
3468	MADD	B4, B4, C21, A2
3469
3470	ST		B1, 0 * SIZE(CO1)
3471	ST		B3, 2 * SIZE(CO1)
3472	ST		B2, 1 * SIZE(CO1)
3473	ST		B4, 3 * SIZE(CO1)
3474#endif
3475
3476#if	  defined(RN) || defined(RT) || defined(CN) || defined(CT)
3477	/*	(a - bi) * (c + di) */
3478	ADD		C11, A1, C11		#	ac'+'bd
3479	ADD		C21, A2, C21
3480	SUB		C13, C13, A3		#	ad'+'cb
3481	SUB		C23, C23, A4
3482#	LD		A1, 0 * SIZE(A)		#	load alpha_r
3483	LD		A1, 152($sp)		#	load alpha_r
3484#	LD		A2, 0 * SIZE(A)		#	load alpha_r
3485	LD		A2, 160($sp)		#	load alpha_i
3486
3487	LD		B1, 0 * SIZE(CO1)
3488	LD		B3, 2 * SIZE(CO1)
3489	LD		B2, 1 * SIZE(CO1)
3490	LD		B4, 3 * SIZE(CO1)
3491
3492	MADD	B1, B1, C11, A1		#	A1 = alpha_r
3493	MADD	B3, B3, C21, A1
3494	MADD	B2, B2, C13, A1
3495	MADD	B4, B4, C23, A1
3496	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
3497	NMSUB	B3, B3, C23, A2
3498	MADD	B2, B2, C11, A2
3499	MADD	B4, B4, C21, A2
3500
3501	ST		B1, 0 * SIZE(CO1)
3502	ST		B3, 2 * SIZE(CO1)
3503	ST		B2, 1 * SIZE(CO1)
3504	ST		B4, 3 * SIZE(CO1)
3505#endif
3506
3507#if   defined(RR) || defined(RC) || defined(CR) || defined(CC)
3508	/*	(a - bi) * (c - di) */
3509	SUB		C11, C11, A1		#	ac'+'bd
3510	SUB		C21, C21, A2
3511	ADD		C13, A3, C13		#	ad'+'cb
3512	ADD		C23, A4, C23
3513	LD		A1, 152($sp)		#	load alpha_r
3514#	LD		A1, 0 * SIZE(A)		#	load alpha_r
3515	LD		A2, 160($sp)
3516#	LD		A2, 0 * SIZE(A)		#	load alpha_i
3517	NEG		C13, C13		#	ad'+'cb
3518	NEG		C23, C23
3519
3520	LD		B1, 0 * SIZE(CO1)
3521	LD		B3, 2 * SIZE(CO1)
3522	LD		B2, 1 * SIZE(CO1)
3523	LD		B4, 3 * SIZE(CO1)
3524
3525	MADD	B1, B1, C11, A1		#	A1 = alpha_r
3526	MADD	B3, B3, C21, A1
3527	MADD	B2, B2, C13, A1
3528	MADD	B4, B4, C23, A1
3529	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
3530	NMSUB	B3, B3, C23, A2
3531	MADD	B2, B2, C11, A2
3532	MADD	B4, B4, C21, A2
3533
3534	ST		B1, 0 * SIZE(CO1)
3535	ST		B3, 2 * SIZE(CO1)
3536	ST		B2, 1 * SIZE(CO1)
3537	ST		B4, 3 * SIZE(CO1)
3538#endif
3539
3540#else
3541	daddiu	I, I, -1
3542	CVTU	A1, C11
3543	CVTU	A2, C21
3544
3545	CVTU	A3, C13
3546	CVTU	A4, C23
3547
3548
3549#if   defined(NN) || defined(NT) || defined(TN) || defined(TT)
3550	/*	(a + bi) * (c + di) */
3551	SUB		C11, C11, A1		#	ac'+'bd
3552	SUB		C21, C21, A2
3553	ADD		C13, A3, C13		#	ad'+'cb
3554	ADD		C23, A4, C23
3555#	LD		A1, 0 * SIZE(A)		#	load alpha_r
3556	LD		A1, 152($sp)		#	load alpha_r
3557	LD		A2, 160($sp)		#	load alpha_i
3558#	LD		A2, 0 * SIZE(A)		#	load alpha_i
3559
3560	MUL B1, C11, A1		#	A1 = alpha_r
3561	MUL B3, C21, A1
3562	MUL B2, C13, A1
3563	MUL B4, C23, A1
3564	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
3565	NMSUB	B3, B3, C23, A2
3566	MADD	B2, B2, C11, A2
3567	MADD	B4, B4, C21, A2
3568
3569	ST		B1, 0 * SIZE(CO1)
3570	ST		B3, 2 * SIZE(CO1)
3571
3572	ST		B2, 1 * SIZE(CO1)
3573	ST		B4, 3 * SIZE(CO1)
3574#endif
3575
3576#if   defined(NR) || defined(NC) || defined(TR) || defined(TC)
3577	/*	(a + bi) * (c - di) */
3578	ADD		C11, A1, C11		#	ac'+'bd
3579	ADD		C21, A2, C21
3580	SUB		C13, A3, C13		#	ad'+'cb
3581	SUB		C23, A4, C23
3582#	LD		A1, 0 * SIZE(A)		#	load alpha_r
3583	LD		A1, 152($sp)		#	load alpha_r
3584	LD		A2, 160($sp)		#	load alpha_i
3585#	LD		A2, 0 * SIZE(A)		#	load alpha_r
3586
3587	MUL B1, C11, A1		#	A1 = alpha_r
3588	MUL B3, C21, A1
3589	MUL B2, C13, A1
3590	MUL B4, C23, A1
3591	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
3592	NMSUB	B3, B3, C23, A2
3593	MADD	B2, B2, C11, A2
3594	MADD	B4, B4, C21, A2
3595
3596	ST		B1, 0 * SIZE(CO1)
3597	ST		B3, 2 * SIZE(CO1)
3598	ST		B2, 1 * SIZE(CO1)
3599	ST		B4, 3 * SIZE(CO1)
3600#endif
3601
3602#if	  defined(RN) || defined(RT) || defined(CN) || defined(CT)
3603	/*	(a - bi) * (c + di) */
3604	ADD		C11, A1, C11		#	ac'+'bd
3605	ADD		C21, A2, C21
3606	SUB		C13, C13, A3		#	ad'+'cb
3607	SUB		C23, C23, A4
3608#	LD		A1, 0 * SIZE(A)		#	load alpha_r
3609	LD		A1, 152($sp)		#	load alpha_r
3610#	LD		A2, 0 * SIZE(A)		#	load alpha_r
3611	LD		A2, 160($sp)		#	load alpha_i
3612
3613	MUL B1, C11, A1		#	A1 = alpha_r
3614	MUL B3, C21, A1
3615	MUL B2, C13, A1
3616	MUL B4, C23, A1
3617	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
3618	NMSUB	B3, B3, C23, A2
3619	MADD	B2, B2, C11, A2
3620	MADD	B4, B4, C21, A2
3621
3622	ST		B1, 0 * SIZE(CO1)
3623	ST		B3, 2 * SIZE(CO1)
3624	ST		B2, 1 * SIZE(CO1)
3625	ST		B4, 3 * SIZE(CO1)
3626#endif
3627
3628#if   defined(RR) || defined(RC) || defined(CR) || defined(CC)
3629	/*	(a - bi) * (c - di) */
3630	SUB		C11, C11, A1		#	ac'+'bd
3631	SUB		C21, C21, A2
3632	ADD		C13, A3, C13		#	ad'+'cb
3633	ADD		C23, A4, C23
3634	LD		A1, 152($sp)		#	load alpha_r
3635#	LD		A1, 0 * SIZE(A)		#	load alpha_r
3636	LD		A2, 160($sp)
3637#	LD		A2, 0 * SIZE(A)		#	load alpha_i
3638	NEG		C13, C13		#	ad'+'cb
3639	NEG		C23, C23
3640
3641	MUL B1, C11, A1		#	A1 = alpha_r
3642	MUL B3, C21, A1
3643	MUL B2, C13, A1
3644	MUL B4, C23, A1
3645	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
3646	NMSUB	B3, B3, C23, A2
3647	MADD	B2, B2, C11, A2
3648	MADD	B4, B4, C21, A2
3649
3650	ST		B1, 0 * SIZE(CO1)
3651	ST		B3, 2 * SIZE(CO1)
3652	ST		B2, 1 * SIZE(CO1)
3653	ST		B4, 3 * SIZE(CO1)
3654#endif
3655#if ( defined(LEFT) &&  defined(TRANSA)) || \
3656    (!defined(LEFT) && !defined(TRANSA))
3657	dsubu	TEMP, K, KK
3658#ifdef LEFT
3659	daddiu	TEMP, TEMP, -2
3660#else
3661	daddiu	TEMP, TEMP, -1
3662#endif
3663	dsll	L, TEMP, 1 + ZBASE_SHIFT
3664	dsll	TEMP, TEMP, ZBASE_SHIFT
3665
3666	daddu	AO, AO, L
3667	daddu	BO, BO, TEMP
3668#endif
3669
3670#ifdef LEFT
3671	daddiu	KK, KK, 2
3672#endif
3673
3674#endif
3675	daddiu	CO1, CO1, 4 * SIZE
3676	daddiu	CO2, CO2, 4 * SIZE
3677
3678
3679	.align	4
3680.L11:
3681	andi	I, M, 1
3682	blez	I, .L10
3683	NOP
3684
3685	.align	4
3686.L111:
3687#if defined(TRMMKERNEL)
3688#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
3689	move	BO,  B
3690#else
3691	dsll	TEMP, KK,  ZBASE_SHIFT
3692
3693	daddu	AO, AO, TEMP
3694	daddu	BO, B,  TEMP
3695#endif
3696	MTC		$0, C11				#	CLEAR REAULTS REGISTERS
3697	gsLQC1(R13, F9, F8, 0)		#	B1 B2
3698
3699	gsLQC1(R12, F1, F0, 0)		#	A1 A2
3700	MOV		C13, C11
3701
3702	FETCH	$0, 0 * SIZE(CO1)
3703
3704	PLU		B3,	B1, B1
3705	PLU		B4, B2, B2
3706#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
3707	dsubu	TEMP, K, KK
3708#elif defined(LEFT)
3709	daddiu	TEMP, KK, 1
3710#else
3711	daddiu	TEMP, KK, 1
3712#endif
3713	dsra	L,  TEMP, 2
3714	blez	L, .L112
3715	NOP
3716
3717#else
3718	move	BO, B				#	Reset	B
3719	dsra	L, K, 2				#	UnRoll	K=64
3720
3721	MTC		$0, C11				#	CLEAR REAULTS REGISTERS
3722	gsLQC1(R13, F9, F8, 0)		#	B1 B2
3723
3724	gsLQC1(R12, F1, F0, 0)		#	A1 A2
3725	MOV		C13, C11
3726
3727	FETCH	$0, 0 * SIZE(CO1)
3728
3729	PLU		B3,	B1, B1
3730	blez	L, .L112
3731	PLU		B4, B2, B2
3732#endif
3733
3734.L1110:
3735	daddiu	L, L, -1
3736	gsLQC1(R13, F13, F12, 1)	#	B3 B4
3737	MADPS	C11, C11, A1, B1
3738
3739	gsLQC1(R12, F3, F2, 1)		#	A3 A4
3740	MADPS	C13, C13, A1, B3
3741	daddiu	BO, BO, 2 * 4 * SIZE	#	4KR*4NR
3742
3743	PLU		B7,	B5, B5
3744	PLU		B8, B6, B6
3745	daddiu	AO, AO, 2 * 4 * SIZE 	#	4KR*8MR
3746
3747	MADPS	C11, C11, A2, B2
3748	MADPS	C13, C13, A2, B4
3749
3750	MADPS	C11, C11, A3, B5
3751	MADPS	C13, C13, A3, B7
3752
3753	gsLQC1(R13, F9, F8, 0)		#	B1 B2
3754	MADPS	C11, C11, A4, B6
3755
3756	gsLQC1(R12, F1, F0, 0)		#	A1 A2
3757	MADPS	C13, C13, A4, B8
3758
3759	PLU		B3,	B1, B1
3760	bgtz	L, .L1110
3761	PLU		B4, B2, B2
3762
3763
3764	.align	4
3765.L112:
3766#ifndef	TRMMKERNEL
3767	andi	L, K, 2
3768#else
3769	andi	L, TEMP, 2
3770#endif
3771	blez	L, .L117
3772	NOP
3773
3774	MADPS	C11, C11, A1, B1
3775	MADPS	C13, C13, A1, B3
3776	daddiu	BO, BO, 4 * SIZE
3777	daddiu	AO, AO, 4 * SIZE
3778
3779	MADPS	C11, C11, A2, B2
3780	MADPS	C13, C13, A2, B4
3781
3782	gsLQC1(R13, F9, F8, 0)
3783	gsLQC1(R12, F1, F0, 0)
3784	PLU		B3,	B1, B1
3785
3786
3787	.align	4
3788.L117:
3789#ifndef	TRMMKERNEL
3790	andi	L, K, 1
3791#else
3792	andi	L, TEMP, 1
3793#endif
3794	blez	L, .L110
3795	NOP
3796
3797	daddiu	BO, BO, 2 * SIZE
3798	daddiu	AO, AO, 2 * SIZE
3799
3800	MADPS	C11, C11, A1, B1
3801	MADPS	C13, C13, A1, B3
3802
3803
3804	.align	4
3805.L110:							#	Write Back
3806#ifndef	TRMMKERNEL
3807	daddiu	I, I, -1
3808	CVTU	A1, C11
3809	CVTU	A3, C13
3810
3811#if   defined(NN) || defined(NT) || defined(TN) || defined(TT)
3812	/*	(a + bi) * (c + di) */
3813	SUB		C11, C11, A1		#	ac'+'bd
3814	ADD		C13, A3, C13		#	ad'+'cb
3815#	LD		A1, 0 * SIZE(A)		#	load alpha_r
3816	LD		A4, 152($sp)		#	load alpha_r
3817	LD		A2, 160($sp)		#	load alpha_i
3818#	LD		A2, 0 * SIZE(A)		#	load alpha_i
3819
3820	LD		B1, 0 * SIZE(CO1)
3821	LD		B2, 1 * SIZE(CO1)
3822
3823	MADD	B1, B1, C11, A4		#	A1 = alpha_r
3824	MADD	B2, B2, C13, A4
3825	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
3826	MADD	B2, B2, C11, A2
3827
3828	ST		B1, 0 * SIZE(CO1)
3829	ST		B2, 1 * SIZE(CO1)
3830#endif
3831
3832#if   defined(NR) || defined(NC) || defined(TR) || defined(TC)
3833	/*	(a + bi) * (c - di) */
3834	ADD		C11, A1, C11		#	ac'+'bd
3835	SUB		C13, A3, C13		#	ad'+'cb
3836	LD		A4, 152($sp)		#	load alpha_r
3837	LD		A2, 160($sp)		#	load alpha_i
3838
3839	LD		B1, 0 * SIZE(CO1)
3840	LD		B2, 1 * SIZE(CO1)
3841
3842	MADD	B1, B1, C11, A4		#	A1 = alpha_r
3843	MADD	B2, B2, C13, A4
3844	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
3845	MADD	B2, B2, C11, A2
3846
3847	ST		B1, 0 * SIZE(CO1)
3848	ST		B2, 1 * SIZE(CO1)
3849#endif
3850
3851#if	  defined(RN) || defined(RT) || defined(CN) || defined(CT)
3852	/*	(a - bi) * (c + di) */
3853	ADD		C11, A1, C11		#	ac'+'bd
3854	SUB		C13, C13, A3		#	ad'+'cb
3855	LD		A4, 152($sp)		#	load alpha_r
3856	LD		A2, 160($sp)		#	load alpha_i
3857
3858	LD		B1, 0 * SIZE(CO1)
3859	LD		B2, 1 * SIZE(CO1)
3860
3861	MADD	B1, B1, C11, A4		#	A1 = alpha_r
3862	MADD	B2, B2, C13, A4
3863	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
3864	MADD	B2, B2, C11, A2
3865
3866	ST		B1, 0 * SIZE(CO1)
3867	ST		B2, 1 * SIZE(CO1)
3868#endif
3869
3870#if   defined(RR) || defined(RC) || defined(CR) || defined(CC)
3871	/*	(a - bi) * (c - di) */
3872	SUB		C11, C11, A1		#	ac'+'bd
3873	ADD		C13, A3, C13		#	ad'+'cb
3874	NEG		C13, C13
3875	LD		A4, 152($sp)		#	load alpha_r
3876	LD		A2, 160($sp)
3877
3878	LD		B1, 0 * SIZE(CO1)
3879	LD		B2, 1 * SIZE(CO1)
3880
3881	MADD	B1, B1, C11, A4		#	A1 = alpha_r
3882	MADD	B2, B2, C13, A4
3883	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
3884	MADD	B2, B2, C11, A2
3885
3886	ST		B1, 0 * SIZE(CO1)
3887	ST		B2, 1 * SIZE(CO1)
3888#endif
3889
3890#else
3891	daddiu	I, I, -1
3892	CVTU	A1, C11
3893	CVTU	A3, C13
3894
3895#if   defined(NN) || defined(NT) || defined(TN) || defined(TT)
3896	/*	(a + bi) * (c + di) */
3897	SUB		C11, C11, A1		#	ac'+'bd
3898	ADD		C13, A3, C13		#	ad'+'cb
3899#	LD		A1, 0 * SIZE(A)		#	load alpha_r
3900	LD		A4, 152($sp)		#	load alpha_r
3901	LD		A2, 160($sp)		#	load alpha_i
3902#	LD		A2, 0 * SIZE(A)		#	load alpha_i
3903
3904	MUL B1, C11, A4		#	A1 = alpha_r
3905	MUL B2, C13, A4
3906	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
3907	MADD	B2, B2, C11, A2
3908
3909	ST		B1, 0 * SIZE(CO1)
3910	ST		B2, 1 * SIZE(CO1)
3911#endif
3912
3913#if   defined(NR) || defined(NC) || defined(TR) || defined(TC)
3914	/*	(a + bi) * (c - di) */
3915	ADD		C11, A1, C11		#	ac'+'bd
3916	SUB		C13, A3, C13		#	ad'+'cb
3917	LD		A4, 152($sp)		#	load alpha_r
3918	LD		A2, 160($sp)		#	load alpha_i
3919
3920	MUL B1, C11, A4		#	A1 = alpha_r
3921	MUL B2, C13, A4
3922	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
3923	MADD	B2, B2, C11, A2
3924
3925	ST		B1, 0 * SIZE(CO1)
3926	ST		B2, 1 * SIZE(CO1)
3927#endif
3928
3929#if	  defined(RN) || defined(RT) || defined(CN) || defined(CT)
3930	/*	(a - bi) * (c + di) */
3931	ADD		C11, A1, C11		#	ac'+'bd
3932	SUB		C13, C13, A3		#	ad'+'cb
3933	LD		A4, 152($sp)		#	load alpha_r
3934	LD		A2, 160($sp)		#	load alpha_i
3935
3936	MUL B1, C11, A4		#	A1 = alpha_r
3937	MUL B2, C13, A4
3938	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
3939	MADD	B2, B2, C11, A2
3940
3941	ST		B1, 0 * SIZE(CO1)
3942	ST		B2, 1 * SIZE(CO1)
3943#endif
3944
3945#if   defined(RR) || defined(RC) || defined(CR) || defined(CC)
3946	/*	(a - bi) * (c - di) */
3947	SUB		C11, C11, A1		#	ac'+'bd
3948	ADD		C13, A3, C13		#	ad'+'cb
3949	NEG		C13, C13
3950	LD		A4, 152($sp)		#	load alpha_r
3951	LD		A2, 160($sp)
3952
3953	MUL B1, C11, A4		#	A1 = alpha_r
3954	MUL B2, C13, A4
3955	NMSUB	B1, B1, C13, A2		#	A2 = alpha_i
3956	MADD	B2, B2, C11, A2
3957
3958	ST		B1, 0 * SIZE(CO1)
3959	ST		B2, 1 * SIZE(CO1)
3960#endif
3961
3962
3963#if ( defined(LEFT) &&  defined(TRANSA)) || \
3964    (!defined(LEFT) && !defined(TRANSA))
3965	dsubu	TEMP, K, KK
3966#ifdef LEFT
3967	daddiu	TEMP, TEMP, -1
3968#else
3969	daddiu	TEMP, TEMP, -1
3970#endif
3971
3972	dsll	TEMP, TEMP, ZBASE_SHIFT
3973
3974	daddu	AO, AO, TEMP
3975	daddu	BO, BO, TEMP
3976#endif
3977
3978#ifdef LEFT
3979	daddiu	KK, KK, 1
3980#endif
3981
3982#endif
3983	daddiu	CO1, CO1, 2 * SIZE
3984	daddiu	CO2, CO2, 2 * SIZE
3985
3986
3987	.align	4
3988.L10:
3989	move	B, BO
3990#if defined(TRMMKERNEL) && !defined(LEFT)
3991	daddiu	KK, KK, 1
3992#endif
3993
3994.L999:
3995	ld	$16,   0($sp)
3996	ld	$17,   8($sp)
3997	ld	$18,  16($sp)
3998	ld	$19,  24($sp)
3999	ld	$20,  32($sp)
4000	ld	$21,  40($sp)
4001	ld	$22,  48($sp)
4002
4003	LD	$f24, 56($sp)
4004	LD	$f25, 64($sp)
4005	LD	$f26, 72($sp)
4006	LD	$f27, 80($sp)
4007	LD	$f28, 88($sp)
4008
4009#if defined(TRMMKERNEL)
4010	ld	$23,  96($sp)
4011	ld	$24, 104($sp)
4012	ld	$25, 112($sp)
4013#endif
4014
4015#ifndef __64BIT__
4016	LD	$f20,120($sp)
4017	LD	$f21,128($sp)
4018	LD	$f22,136($sp)
4019	LD	$f23,144($sp)
4020#endif
4021
4022	daddiu	$sp,$sp,STACKSIZE
4023	j	$31
4024	nop
4025
4026	EPILOGUE
4027