1/*********************************************************************/
2/* Copyright 2009, 2010 The University of Texas at Austin.           */
3/* All rights reserved.                                              */
4/*                                                                   */
5/* Redistribution and use in source and binary forms, with or        */
6/* without modification, are permitted provided that the following   */
7/* conditions are met:                                               */
8/*                                                                   */
9/*   1. Redistributions of source code must retain the above         */
10/*      copyright notice, this list of conditions and the following  */
11/*      disclaimer.                                                  */
12/*                                                                   */
13/*   2. Redistributions in binary form must reproduce the above      */
14/*      copyright notice, this list of conditions and the following  */
15/*      disclaimer in the documentation and/or other materials       */
16/*      provided with the distribution.                              */
17/*                                                                   */
18/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
19/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
20/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
21/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
22/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
23/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
24/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
25/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
26/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
27/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
28/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
29/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
30/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
31/*    POSSIBILITY OF SUCH DAMAGE.                                    */
32/*                                                                   */
33/* The views and conclusions contained in the software and           */
34/* documentation are those of the authors and should not be          */
35/* interpreted as representing official policies, either expressed   */
36/* or implied, of The University of Texas at Austin.                 */
37/*********************************************************************/
38
39#define ASSEMBLER
40#include "common.h"
41
42#define ALPHA    0
43#define FZERO	 8
44
45#define	M	r3
46#define	N	r4
47#define	K	r5
48
49#if defined(linux) || defined(__FreeBSD__)
50#define A	r6
51#define	B	r7
52#define	C	r8
53#define	LDC	r9
54#define OFFSET	r10
55#endif
56
57#define TEMP	r11
58#define KK	r14
59#define INCM1	r15
60#define INCM3	r16
61#define INCM5	r17
62#define INCM7	r18
63#define INC2	r19
64#define INC	r20
65#define INC4	r21
66
67#define	I	r22
68#define J	r23
69#define AO	r24
70#define BO	r25
71#define AO2	r26
72#define	BO2	r27
73
74#define	CO1	r28
75#define CO2	r29
76#define	CO3	r30
77#define	CO4	r31
78
79#ifndef NEEDPARAM
80
81#define A1	f16
82#define A2	f17
83#define A3	f18
84#define A4	f19
85#define A5	f20
86#define A6	f21
87#define A7	f22
88#define A8	f23
89#define A9	f24
90#define A10	f25
91
92#define B1	f26
93#define B2	f27
94#define B3	f28
95#define B4	f29
96#define B5	f30
97#define B6	f31
98
99#define AP	B6
100
101
102	PROLOGUE
103	PROFCODE
104
105	li	r0, -16
106
107	stfpdux	f14, SP, r0
108	stfpdux	f15, SP, r0
109	stfpdux	f16, SP, r0
110	stfpdux	f17, SP, r0
111	stfpdux	f18, SP, r0
112	stfpdux	f19, SP, r0
113	stfpdux	f20, SP, r0
114	stfpdux	f21, SP, r0
115	stfpdux	f22, SP, r0
116	stfpdux	f23, SP, r0
117	stfpdux	f24, SP, r0
118	stfpdux	f25, SP, r0
119	stfpdux	f26, SP, r0
120	stfpdux	f27, SP, r0
121	stfpdux	f28, SP, r0
122	stfpdux	f29, SP, r0
123	stfpdux	f30, SP, r0
124	stfpdux	f31, SP, r0
125
126	stwu	r31,  -4(SP)
127	stwu	r30,  -4(SP)
128	stwu	r29,  -4(SP)
129	stwu	r28,  -4(SP)
130
131	stwu	r27,  -4(SP)
132	stwu	r26,  -4(SP)
133	stwu	r25,  -4(SP)
134	stwu	r24,  -4(SP)
135
136	stwu	r23,  -4(SP)
137	stwu	r22,  -4(SP)
138	stwu	r21,  -4(SP)
139	stwu	r20,  -4(SP)
140
141	stwu	r19,  -4(SP)
142	stwu	r18,  -4(SP)
143	stwu	r17,  -4(SP)
144	stwu	r16,  -4(SP)
145
146	stwu	r15,  -4(SP)
147	stwu	r14,  -4(SP)	# dummy
148
149	li	r0,   0
150
151	stwu	r0,   -4(SP)
152	stwu	r0,   -4(SP)
153	stfdu	f1,   -8(SP)
154
155	slwi	LDC, LDC, BASE_SHIFT
156
157	cmpwi	cr0, M, 0
158	ble	.L999
159	cmpwi	cr0, N, 0
160	ble	.L999
161	cmpwi	cr0, K, 0
162	ble	.L999
163
164	li	INC,    1 * SIZE
165	li	INC2,   2 * SIZE
166	li	INC4,   4 * SIZE
167
168#if defined(TRMMKERNEL) && !defined(LEFT)
169	neg	KK, OFFSET
170#endif
171
172	andi.	r0, C,   2 * SIZE - 1
173	bne	.L1000
174	andi.	r0, LDC, 2 * SIZE - 1
175	bne	.L1000
176
177/* High performance version */
178
179	li	INCM3, -2 * SIZE
180	li	INCM5, -5 * SIZE
181	li	INCM7, -6 * SIZE
182
183	addi	C, C, - 2 * SIZE
184	srawi.	J, N,  2
185	ble	.L50
186	.align 4
187
188.L10:
189	mr	CO1, C
190	add	CO2, C,   LDC
191	add	CO3, CO2, LDC
192	add	CO4, CO3, LDC
193	add	C,   CO4, LDC
194
195#if defined(TRMMKERNEL) &&  defined(LEFT)
196	mr	KK, OFFSET
197#endif
198
199	addi	AO, A, -4 * SIZE
200
201	li	r0, FZERO
202	lfpsx	f0, SP, r0
203
204	srawi.	I, M,  3
205	ble	.L20
206	.align 4
207
208.L11:
209#if defined(TRMMKERNEL)
210#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
211	addi	AO2, AO,   2 * SIZE
212	fpmr	f4,  f0
213	addi	BO,  B,  - 4 * SIZE
214	fpmr	f8,  f0
215	addi	BO2, B,  - 2 * SIZE
216	fpmr	f12, f0
217#else
218	slwi	TEMP, KK, 3 + BASE_SHIFT
219	slwi	r0,   KK, 2 + BASE_SHIFT
220	add	AO, AO, TEMP
221	add	BO, B,  r0
222
223	addi	AO2, AO,   2 * SIZE
224	fpmr	f4,  f0
225	addi	BO,  BO, - 4 * SIZE
226	fpmr	f8,  f0
227	addi	BO2, BO,   2 * SIZE
228	fpmr	f12, f0
229#endif
230
231
232#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
233	sub	TEMP, K, KK
234#elif defined(LEFT)
235	addi	TEMP, KK, 8
236#else
237	addi	TEMP, KK, 4
238#endif
239	srawi.	TEMP,  TEMP,  2
240 	fpmr	f1,  f0
241	mtspr	CTR, TEMP
242	ble	.L14
243
244#else
245	addi	AO2, AO,   2 * SIZE
246	fpmr	f4,  f0
247	addi	BO,  B,  - 4 * SIZE
248	fpmr	f8,  f0
249	addi	BO2, B,  - 2 * SIZE
250	fpmr	f12, f0
251
252	srawi.	r0,  K,  2
253 	fpmr	f1,  f0
254	mtspr	CTR, r0
255	ble	.L14
256#endif
257
258	LFPDUX	A1,  AO, INC4
259	fpmr	f5,  f0
260	LFPDUX	A3,  AO, INC4
261	fpmr	f9,  f0
262	LFPDUX	B1,  BO, INC4
263	fpmr	f13, f0
264
265	LFPDUX	A5,  AO, INC4
266	fpmr	f2,  f0
267	LFPDUX	A6,  AO, INC4
268	fpmr	f6,  f0
269	LFPDUX	B3,  BO, INC4
270	fpmr	f10, f0
271	LFPDUX	A7,  AO, INC4
272	fpmr	f14, f0
273
274	LFPDUX	A8,  AO, INC4
275	fpmr	f3,  f0
276	LFPDUX	B5,  BO, INC4
277	fpmr	f7,  f0
278	LFPDUX	A9,  AO, INC4
279	fpmr	f11, f0
280	LFPDUX	A2, AO2, INC4
281	fpmr	f15, f0
282	LFPDUX	B2, BO2, INC4
283	bdz-	.L13
284	.align 4
285
286.L12:
287
288## 1 ##
289	fxcpmadd	f0,  B1, A1, f0
290	nop
291	fxcsmadd	f4,  B1, A1, f4
292	nop
293	fxcpmadd	f8,  B2, A1, f8
294	LFPDUX	B4, BO2, INC4
295	fxcsmadd	f12, B2, A1, f12
296	LFPDUX	B6,  BO, INC4
297
298	fxcpmadd	f1,  B1, A2, f1
299	nop
300	fxcsmadd	f5,  B1, A2, f5
301	LFPDUX	A4, AO2, INC4
302	fxcpmadd	f9,  B2, A2, f9
303	LFPDUX	A10, AO, INC4
304	fxcsmadd	f13, B2, A2, f13
305	nop
306
307	fxcpmadd	f2,  B1, A3, f2
308	nop
309	fxcsmadd	f6,  B1, A3, f6
310	nop
311	fxcpmadd	f10, B2, A3, f10
312	nop
313	fxcsmadd	f14, B2, A3, f14
314	nop
315
316	fxcpmadd	f3,  B1, A4, f3
317	nop
318	fxcsmadd	f7,  B1, A4, f7
319	LFPDUX	A2, AO2, INC4
320	fxcpmadd	f11, B2, A4, f11
321	LFPDUX	A1,  AO, INC4
322	fxcsmadd	f15, B2, A4, f15
323	nop
324
325## 2 ##
326
327	fxcpmadd	f0,  B3, A5, f0
328	nop
329	fxcsmadd	f4,  B3, A5, f4
330	nop
331	fxcpmadd	f8,  B4, A5, f8
332	LFPDUX	B2, BO2, INC4
333	fxcsmadd	f12, B4, A5, f12
334	LFPDUX	B1,  BO, INC4
335
336	fxcpmadd	f1,  B3, A2, f1
337	nop
338	fxcsmadd	f5,  B3, A2, f5
339	LFPDUX	A4, AO2, INC4
340	fxcpmadd	f9,  B4, A2, f9
341	LFPDUX	A3,  AO, INC4
342	fxcsmadd	f13, B4, A2, f13
343	nop
344
345	fxcpmadd	f2,  B3, A6, f2
346	nop
347	fxcsmadd	f6,  B3, A6, f6
348	nop
349	fxcpmadd	f10, B4, A6, f10
350	nop
351	fxcsmadd	f14, B4, A6, f14
352	nop
353
354	fxcpmadd	f3,  B3, A4, f3
355	nop
356	fxcsmadd	f7,  B3, A4, f7
357	LFPDUX	A2, AO2, INC4
358	fxcpmadd	f11, B4, A4, f11
359	LFPDUX	A5,  AO, INC4
360	fxcsmadd	f15, B4, A4, f15
361	nop
362
363## 3 ##
364
365	fxcpmadd	f0,  B5, A7, f0
366	nop
367	fxcsmadd	f4,  B5, A7, f4
368	nop
369	fxcpmadd	f8,  B2, A7, f8
370	LFPDUX	B4, BO2, INC4
371	fxcsmadd	f12, B2, A7, f12
372	LFPDUX	B3,  BO, INC4
373
374	fxcpmadd	f1,  B5, A2, f1
375	nop
376	fxcsmadd	f5,  B5, A2, f5
377	LFPDUX	A4, AO2, INC4
378	fxcpmadd	f9,  B2, A2, f9
379	LFPDUX	A6,  AO, INC4
380	fxcsmadd	f13, B2, A2, f13
381	nop
382
383	fxcpmadd	f2,  B5, A8, f2
384	nop
385	fxcsmadd	f6,  B5, A8, f6
386	nop
387	fxcpmadd	f10, B2, A8, f10
388	nop
389	fxcsmadd	f14, B2, A8, f14
390	nop
391
392	fxcpmadd	f3,  B5, A4, f3
393	nop
394	fxcsmadd	f7,  B5, A4, f7
395	LFPDUX	A2, AO2, INC4
396	fxcpmadd	f11, B2, A4, f11
397	LFPDUX	A7,  AO, INC4
398	fxcsmadd	f15, B2, A4, f15
399	nop
400
401## 4 ##
402	fxcpmadd	f0,  B6, A9, f0
403	nop
404	fxcsmadd	f4,  B6, A9, f4
405	nop
406	fxcpmadd	f8,  B4, A9, f8
407	LFPDUX	B2, BO2, INC4
408	fxcsmadd	f12, B4, A9, f12
409	LFPDUX	B5,  BO, INC4
410
411	fxcpmadd	f1,  B6, A2, f1
412	nop
413	fxcsmadd	f5,  B6, A2, f5
414	LFPDUX	A4, AO2, INC4
415	fxcpmadd	f9,  B4, A2, f9
416	LFPDUX	A8,  AO, INC4
417	fxcsmadd	f13, B4, A2, f13
418	nop
419
420	fxcpmadd	f2,  B6, A10, f2
421	nop
422	fxcsmadd	f6,  B6, A10, f6
423	nop
424	fxcpmadd	f10, B4, A10, f10
425	nop
426	fxcsmadd	f14, B4, A10, f14
427	nop
428
429	fxcpmadd	f3,  B6, A4, f3
430	LFPDUX	A2, AO2, INC4
431	fxcsmadd	f7,  B6, A4, f7
432	LFPDUX	A9,  AO, INC4
433	fxcpmadd	f11, B4, A4, f11
434	nop
435	fxcsmadd	f15, B4, A4, f15
436	bdnz+	.L12
437	.align 4
438
439.L13:
440## 1 ##
441
442	fxcpmadd	f0,  B1, A1, f0
443	nop
444	fxcsmadd	f4,  B1, A1, f4
445	nop
446	fxcpmadd	f8,  B2, A1, f8
447	LFPDUX	B4, BO2, INC4
448	fxcsmadd	f12, B2, A1, f12
449	LFPDUX	B6,  BO, INC4
450
451	fxcpmadd	f1,  B1, A2, f1
452	nop
453	fxcsmadd	f5,  B1, A2, f5
454	LFPDUX	A4, AO2, INC4
455	fxcpmadd	f9,  B2, A2, f9
456	LFPDUX	A10, AO, INC4
457	fxcsmadd	f13, B2, A2, f13
458	nop
459
460	fxcpmadd	f2,  B1, A3, f2
461	nop
462	fxcsmadd	f6,  B1, A3, f6
463	nop
464	fxcpmadd	f10, B2, A3, f10
465	nop
466	fxcsmadd	f14, B2, A3, f14
467	nop
468
469	fxcpmadd	f3,  B1, A4, f3
470	nop
471	fxcsmadd	f7,  B1, A4, f7
472	LFPDUX	A2, AO2, INC4
473	fxcpmadd	f11, B2, A4, f11
474#ifndef TRMMKERNEL
475	LFPDUX	A1, CO1, INC2
476#else
477	nop
478#endif
479	fxcsmadd	f15, B2, A4, f15
480	nop
481
482## 2 ##
483
484	fxcpmadd	f0,  B3, A5, f0
485	nop
486	fxcsmadd	f4,  B3, A5, f4
487	nop
488	fxcpmadd	f8,  B4, A5, f8
489	LFPDUX	B2, BO2, INC4
490	fxcsmadd	f12, B4, A5, f12
491#ifndef TRMMKERNEL
492	LFPDUX	B1, CO1, INC4
493#else
494	nop
495#endif
496
497	fxcpmadd	f1,  B3, A2, f1
498	nop
499	fxcsmadd	f5,  B3, A2, f5
500	LFPDUX	A4, AO2, INC4
501	fxcpmadd	f9,  B4, A2, f9
502#ifndef TRMMKERNEL
503 	LFPDUX	A3, CO2, INC2
504#else
505	nop
506#endif
507	fxcsmadd	f13, B4, A2, f13
508	nop
509
510	fxcpmadd	f2,  B3, A6, f2
511	nop
512	fxcsmadd	f6,  B3, A6, f6
513	nop
514	fxcpmadd	f10, B4, A6, f10
515	nop
516	fxcsmadd	f14, B4, A6, f14
517	nop
518
519	fxcpmadd	f3,  B3, A4, f3
520	nop
521	fxcsmadd	f7,  B3, A4, f7
522	LFPDUX	A2, AO2, INC4
523	fxcpmadd	f11, B4, A4, f11
524#ifndef TRMMKERNEL
525	LFPDUX	A5, CO2, INC4
526#else
527	nop
528#endif
529	fxcsmadd	f15, B4, A4, f15
530	nop
531
532## 3 ##
533
534	fxcpmadd	f0,  B5, A7, f0
535	nop
536	fxcsmadd	f4,  B5, A7, f4
537	nop
538	fxcpmadd	f8,  B2, A7, f8
539	LFPDUX	B4, BO2, INC4
540	fxcsmadd	f12, B2, A7, f12
541#ifndef TRMMKERNEL
542	LFPDUX	B3, CO3, INC2
543#else
544	nop
545#endif
546
547	fxcpmadd	f1,  B5, A2, f1
548	nop
549	fxcsmadd	f5,  B5, A2, f5
550	LFPDUX	A4, AO2, INC4
551	fxcpmadd	f9,  B2, A2, f9
552#ifndef TRMMKERNEL
553	LFPDUX	A6, CO3, INC4
554#else
555	nop
556#endif
557	fxcsmadd	f13, B2, A2, f13
558	nop
559
560	fxcpmadd	f2,  B5, A8, f2
561	nop
562	fxcsmadd	f6,  B5, A8, f6
563	nop
564	fxcpmadd	f10, B2, A8, f10
565	nop
566	fxcsmadd	f14, B2, A8, f14
567	nop
568
569	fxcpmadd	f3,  B5, A4, f3
570	nop
571	fxcsmadd	f7,  B5, A4, f7
572	LFPDUX	A2, AO2, INC4
573	fxcpmadd	f11, B2, A4, f11
574#ifndef TRMMKERNEL
575	LFPDUX	A7, CO4, INC2
576#else
577	nop
578#endif
579	fxcsmadd	f15, B2, A4, f15
580	nop
581
582## 4 ##
583
584	fxcpmadd	f0,  B6, A9, f0
585	nop
586	fxcsmadd	f4,  B6, A9, f4
587	nop
588	fxcpmadd	f8,  B4, A9, f8
589	nop
590	fxcsmadd	f12, B4, A9, f12
591#ifndef TRMMKERNEL
592	LFPDUX	B2, CO4, INC4
593#else
594	nop
595#endif
596
597	fxcpmadd	f1,  B6, A2, f1
598	nop
599	fxcsmadd	f5,  B6, A2, f5
600	LFPDUX	A4, AO2, INC4
601	fxcpmadd	f9,  B4, A2, f9
602#ifndef TRMMKERNEL
603	LFPDUX	B5, CO1, INCM3
604#else
605	nop
606#endif
607	fxcsmadd	f13, B4, A2, f13
608	nop
609
610	fxcpmadd	f2,  B6, A10, f2
611	nop
612	fxcsmadd	f6,  B6, A10, f6
613	nop
614	fxcpmadd	f10, B4, A10, f10
615	nop
616	fxcsmadd	f14, B4, A10, f14
617#ifndef TRMMKERNEL
618    	LFPDUX	A8, CO1, INC4
619#else
620	nop
621#endif
622
623	fxcpmadd	f3,  B6, A4, f3
624	nop
625	fxcsmadd	f7,  B6, A4, f7
626	nop
627	fxcpmadd	f11, B4, A4, f11
628	nop
629	fxcsmadd	f15, B4, A4, f15
630#ifndef TRMMKERNEL
631	LFPDUX	A9, CO2, INCM3
632#else
633	nop
634#endif
635	.align 4
636
637.L14:
638	lfd	AP,  ALPHA(SP)
639#ifdef TRMMKERNEL
640       fsmfp	AP, AP
641#endif
642
643#if defined(TRMMKERNEL)
644#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
645	sub	TEMP, K, KK
646#elif defined(LEFT)
647	addi	TEMP, KK, 8
648#else
649	addi	TEMP, KK, 4
650#endif
651	andi.	r0,  TEMP,  3
652	mtspr	CTR, r0
653	ble+	.L18
654
655	cmpwi	cr0, TEMP, 3
656	bgt+	.L15
657#else
658	andi.	r0,  K,  3
659	mtspr	CTR, r0
660	ble+	.L18
661
662	cmpwi	cr0, K, 3
663	bgt+	.L15
664#endif
665
666#ifndef TRMMKERNEL
667	LFPDUX	A1, CO1, INC2
668	fpmr	f5,  f0
669	LFPDUX	B1, CO1, INC4
670	fpmr	f9,  f0
671 	LFPDUX	A3, CO2, INC2
672	fpmr	f13, f0
673	LFPDUX	A5, CO2, INC4
674	fpmr	f2,  f0
675
676	LFPDUX	B3, CO3, INC2
677	fpmr	f6,  f0
678	LFPDUX	A6, CO3, INC4
679	fpmr	f10, f0
680	LFPDUX	A7,  CO4, INC2
681	fpmr	f14, f0
682	LFPDUX	B2,  CO4, INC4
683	fpmr	f3,  f0
684
685	LFPDUX	B5, CO1, INCM3
686	fpmr	f7,  f0
687    	LFPDUX	A8, CO1, INC4
688	fpmr	f11, f0
689	LFPDUX	A9, CO2, INCM3
690	fpmr	f15, f0
691#else
692	fpmr	f5,  f0
693	fpmr	f9,  f0
694	fpmr	f13, f0
695	fpmr	f2,  f0
696
697	fpmr	f6,  f0
698	fpmr	f10, f0
699	fpmr	f14, f0
700	fpmr	f3,  f0
701
702	fpmr	f7,  f0
703	fpmr	f11, f0
704	fpmr	f15, f0
705	nop
706#endif
707	.align 4
708
709.L15:
710	LFPDUX	A2,  AO,  INC4
711	LFPDUX	A4,  AO2, INC4
712	LFPDUX	A10, BO,  INC4
713	LFPDUX	B4,  BO2, INC4
714	bdz-	.L17
715	.align 4
716
717.L16:
718	fxcpmadd	f0,  A10, A2, f0
719	fxcsmadd	f4,  A10, A2, f4
720	fxcpmadd	f8,  B4, A2, f8
721	fxcsmadd	f12, B4, A2, f12
722	LFPDUX	A2, AO,  INC4
723
724	fxcpmadd	f1,  A10, A4, f1
725	fxcsmadd	f5,  A10, A4, f5
726	fxcpmadd	f9,  B4, A4, f9
727	fxcsmadd	f13, B4, A4, f13
728	LFPDUX	A4, AO2, INC4
729
730	fxcpmadd	f2,  A10, A2, f2
731	fxcsmadd	f6,  A10, A2, f6
732	fxcpmadd	f10, B4, A2, f10
733	fxcsmadd	f14, B4, A2, f14
734	LFPDUX	A2, AO,  INC4
735
736	fxcpmadd	f3,  A10, A4, f3
737	fxcsmadd	f7,  A10, A4, f7
738	LFPDUX	A10, BO,  INC4
739	fxcpmadd	f11, B4, A4, f11
740	fxcsmadd	f15, B4, A4, f15
741	LFPDUX	A4, AO2, INC4
742	LFPDUX	B4, BO2, INC4
743	bdnz+	.L16
744	.align 4
745
746.L17:
747	fxcpmadd	f0,  A10, A2, f0
748	fxcsmadd	f4,  A10, A2, f4
749	fxcpmadd	f8,  B4, A2, f8
750	fxcsmadd	f12, B4, A2, f12
751	LFPDUX	A2, AO,  INC4
752
753	fxcpmadd	f1,  A10, A4, f1
754	fxcsmadd	f5,  A10, A4, f5
755	fxcpmadd	f9,  B4, A4, f9
756	fxcsmadd	f13, B4, A4, f13
757	LFPDUX	A4, AO2, INC4
758
759	fxcpmadd	f2,  A10, A2, f2
760	fxcsmadd	f6,  A10, A2, f6
761	fxcpmadd	f10, B4, A2, f10
762	fxcsmadd	f14, B4, A2, f14
763
764	fxcpmadd	f3,  A10, A4, f3
765	fxcsmadd	f7,  A10, A4, f7
766	fxcpmadd	f11, B4, A4, f11
767	fxcsmadd	f15, B4, A4, f15
768	.align 4
769
770.L18:
771#ifndef TRMMKERNEL
772	fxcpmadd	f0,  AP, f0,  A1
773	LFPDUX	B4, CO2, INC4
774	fxcpmadd	f1,  AP, f1,  B5
775	LFPDUX	A2, CO3, INCM3
776
777	fxcpmadd	f2,  AP, f2,  B1
778 	LFPDUX	A4, CO3, INC4
779	fxcpmadd	f3,  AP, f3,  A8
780	LFPDUX	A10,  CO4, INCM3
781
782	fxcpmadd	f4,  AP, f4,  A3
783	LFPDUX	A1,  CO4, INC4
784	fxcpmadd	f5,  AP, f5,  A9
785	STFPDUX	f0,  CO1, INCM7
786
787	fxcpmadd	f6,  AP, f6,  A5
788 	STFPDUX	f1,  CO1, INC2
789	fxcpmadd	f7,  AP, f7,  B4
790	STFPDUX	f2,  CO1, INC2
791
792	fxcpmadd	f8,  AP, f8,  B3
793	STFPDUX	f3,  CO1, INC2
794	fxcpmadd	f9,  AP, f9,  A2
795	STFPDUX	f4,  CO2, INCM7
796
797	fxcpmadd	f10, AP, f10, A6
798	STFPDUX	f5,  CO2, INC2
799	fxcpmadd	f11, AP, f11, A4
800	STFPDUX	f6,  CO2, INC2
801
802	fxcpmadd	f12, AP, f12, A7
803	STFPDUX	f7,  CO2, INC2
804	fxcpmadd	f13, AP, f13, A10
805	STFPDUX	f8,  CO3, INCM7
806
807	fxcpmadd	f14, AP, f14, B2
808	STFPDUX	f9,  CO3, INC2
809	fxcpmadd	f15, AP, f15, A1
810	STFPDUX	f10, CO3, INC2
811
812	STFPDUX	f11, CO3, INC2
813	STFPDUX	f12, CO4, INCM7
814	STFPDUX	f13, CO4, INC2
815	STFPDUX	f14, CO4, INC2
816	STFPDUX	f15, CO4, INC2
817#else
818	fpmul	f0,  AP, f0
819	fpmul	f1,  AP, f1
820	fpmul	f2,  AP, f2
821	fpmul	f3,  AP, f3
822
823	fpmul	f4,  AP, f4
824	fpmul	f5,  AP, f5
825	STFPDUX	f0,  CO1, INC2
826
827	fpmul	f6,  AP, f6
828 	STFPDUX	f1,  CO1, INC2
829	fpmul	f7,  AP, f7
830	STFPDUX	f2,  CO1, INC2
831
832	fpmul	f8,  AP, f8
833	STFPDUX	f3,  CO1, INC2
834	fpmul	f9,  AP, f9
835	STFPDUX	f4,  CO2, INC2
836
837	fpmul	f10, AP, f10
838	STFPDUX	f5,  CO2, INC2
839	fpmul	f11, AP, f11
840	STFPDUX	f6,  CO2, INC2
841
842	fpmul	f12, AP, f12
843	STFPDUX	f7,  CO2, INC2
844	fpmul	f13, AP, f13
845	STFPDUX	f8,  CO3, INC2
846
847	fpmul	f14, AP, f14
848	STFPDUX	f9,  CO3, INC2
849	fpmul	f15, AP, f15
850	STFPDUX	f10, CO3, INC2
851
852	STFPDUX	f11, CO3, INC2
853	STFPDUX	f12, CO4, INC2
854	STFPDUX	f13, CO4, INC2
855	STFPDUX	f14, CO4, INC2
856	STFPDUX	f15, CO4, INC2
857#endif
858
859#ifdef TRMMKERNEL
860#if ( defined(LEFT) &&  defined(TRANSA)) || \
861    (!defined(LEFT) && !defined(TRANSA))
862	sub	TEMP, K, KK
863#ifdef LEFT
864	addi	TEMP, TEMP, -8
865#else
866	addi	TEMP, TEMP, -4
867#endif
868	slwi	r0,   TEMP, 3 + BASE_SHIFT
869	slwi	TEMP, TEMP, 2 + BASE_SHIFT
870	add	AO, AO, r0
871	add	BO, BO, TEMP
872#endif
873
874#ifdef LEFT
875	addi	KK, KK, 8
876#endif
877#endif
878
879	addic.	I, I, -1
880	li	r0, FZERO
881
882	lfpsx	f0, SP, r0
883	bgt+	.L11
884	.align 4
885
886.L20:
887	andi.	I, M,  4
888	beq	.L30
889
890#if defined(TRMMKERNEL)
891#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
892	addi	AO2, AO,   2 * SIZE
893	fpmr	f4,  f0
894	addi	BO,  B,  - 4 * SIZE
895	fpmr	f8,  f0
896	addi	BO2, B,  - 2 * SIZE
897	fpmr	f12, f0
898#else
899	slwi	TEMP, KK, 2 + BASE_SHIFT
900	slwi	r0,   KK, 2 + BASE_SHIFT
901	add	AO, AO, TEMP
902	add	BO, B,  r0
903
904	addi	AO2, AO,   2 * SIZE
905	fpmr	f4,  f0
906	addi	BO,  BO,  - 4 * SIZE
907	fpmr	f8,  f0
908	addi	BO2, BO,    2 * SIZE
909	fpmr	f12, f0
910#endif
911
912#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
913	sub	TEMP, K, KK
914#elif defined(LEFT)
915	addi	TEMP, KK, 4
916#else
917	addi	TEMP, KK, 4
918#endif
919
920	srawi.	TEMP,  TEMP,  2
921 	fpmr	f1,  f0
922	fpmr	f5,  f0
923	fpmr	f9,  f0
924	mtspr	CTR, TEMP
925	fpmr	f13, f0
926	ble	.L24
927#else
928	addi	AO2, AO,   2 * SIZE
929	fpmr	f4,  f0
930	addi	BO,  B,  - 4 * SIZE
931	fpmr	f8,  f0
932	addi	BO2, B,  - 2 * SIZE
933	fpmr	f12, f0
934
935	srawi.	r0,  K,  2
936 	fpmr	f1,  f0
937	fpmr	f5,  f0
938	fpmr	f9,  f0
939	mtspr	CTR, r0
940	fpmr	f13, f0
941	ble	.L24
942#endif
943
944	LFPDUX	A1,   AO, INC4
945	LFPDUX	B1,   BO, INC4
946	LFPDUX	A2,  AO2, INC4
947	LFPDUX	B2,  BO2, INC4
948	LFPDUX	A3,   AO, INC4
949	LFPDUX	B3,   BO, INC4
950	LFPDUX	A4,  AO2, INC4
951	LFPDUX	B4,  BO2, INC4
952
953	LFPDUX	A5,   AO, INC4
954	LFPDUX	B5,   BO, INC4
955	LFPDUX	A6,  AO2, INC4
956	LFPDUX	B6,  BO2, INC4
957	LFPDUX	A7,   AO, INC4
958	LFPDUX	A9,   BO, INC4
959	LFPDUX	A10, BO2, INC4
960	bdz-	.L23
961	.align 4
962
963.L22:
964	fxcpmadd	f0,  B1, A1, f0
965	nop
966	fxcsmadd	f4,  B1, A1, f4
967	LFPDUX	A8,  AO2, INC4
968	fxcpmadd	f8,  B2, A1, f8
969	nop
970	fxcsmadd	f12, B2, A1, f12
971	LFPDUX	A1,   AO, INC4
972
973	fxcpmadd	f1,  B1, A2, f1
974	nop
975	fxcsmadd	f5,  B1, A2, f5
976	LFPDUX	B1,   BO, INC4
977	fxcpmadd	f9,  B2, A2, f9
978	nop
979	fxcsmadd	f13, B2, A2, f13
980	LFPDUX	B2,  BO2, INC4
981
982	fxcpmadd	f0,  B3, A3, f0
983	nop
984	fxcsmadd	f4,  B3, A3, f4
985	LFPDUX	A2,  AO2, INC4
986	fxcpmadd	f8,  B4, A3, f8
987	nop
988	fxcsmadd	f12, B4, A3, f12
989	LFPDUX	A3,   AO, INC4
990
991	fxcpmadd	f1,  B3, A4, f1
992	nop
993	fxcsmadd	f5,  B3, A4, f5
994	LFPDUX	B3,   BO, INC4
995	fxcpmadd	f9,  B4, A4, f9
996	nop
997	fxcsmadd	f13, B4, A4, f13
998	LFPDUX	B4,  BO2, INC4
999
1000	fxcpmadd	f0,  B5, A5, f0
1001	nop
1002	fxcsmadd	f4,  B5, A5, f4
1003	LFPDUX	A4,  AO2, INC4
1004	fxcpmadd	f8,  B6, A5, f8
1005	nop
1006	fxcsmadd	f12, B6, A5, f12
1007	LFPDUX	A5,   AO, INC4
1008
1009	fxcpmadd	f1,  B5, A6, f1
1010	nop
1011	fxcsmadd	f5,  B5, A6, f5
1012	LFPDUX	B5,   BO, INC4
1013	fxcpmadd	f9,  B6, A6, f9
1014	nop
1015	fxcsmadd	f13, B6, A6, f13
1016	LFPDUX	B6,  BO2, INC4
1017
1018	fxcpmadd	f0,  A9,  A7, f0
1019	nop
1020	fxcsmadd	f4,  A9,  A7, f4
1021	LFPDUX	A6,  AO2, INC4
1022	fxcpmadd	f8,  A10, A7, f8
1023	nop
1024	fxcsmadd	f12, A10, A7, f12
1025	LFPDUX	A7,   AO, INC4
1026
1027	fxcpmadd	f1,  A9,  A8, f1
1028	nop
1029	fxcsmadd	f5,  A9,  A8, f5
1030	LFPDUX	A9,   BO, INC4
1031	fxcpmadd	f9,  A10, A8, f9
1032	nop
1033	fxcsmadd	f13, A10, A8, f13
1034	LFPDUX	A10, BO2, INC4
1035	bdnz+	.L22
1036	.align 4
1037
1038.L23:
1039	fxcpmadd	f0,  B1, A1, f0
1040	fxcsmadd	f4,  B1, A1, f4
1041	LFPDUX	A8,  AO2, INC4
1042	fxcpmadd	f8,  B2, A1, f8
1043	fxcsmadd	f12, B2, A1, f12
1044
1045	fxcpmadd	f1,  B1, A2, f1
1046	fxcsmadd	f5,  B1, A2, f5
1047	fxcpmadd	f9,  B2, A2, f9
1048	fxcsmadd	f13, B2, A2, f13
1049
1050	fxcpmadd	f0,  B3, A3, f0
1051	fxcsmadd	f4,  B3, A3, f4
1052	fxcpmadd	f8,  B4, A3, f8
1053	fxcsmadd	f12, B4, A3, f12
1054
1055	fxcpmadd	f1,  B3, A4, f1
1056	fxcsmadd	f5,  B3, A4, f5
1057	fxcpmadd	f9,  B4, A4, f9
1058	fxcsmadd	f13, B4, A4, f13
1059
1060	fxcpmadd	f0,  B5, A5, f0
1061	fxcsmadd	f4,  B5, A5, f4
1062	fxcpmadd	f8,  B6, A5, f8
1063	fxcsmadd	f12, B6, A5, f12
1064
1065	fxcpmadd	f1,  B5, A6, f1
1066	fxcsmadd	f5,  B5, A6, f5
1067	fxcpmadd	f9,  B6, A6, f9
1068	fxcsmadd	f13, B6, A6, f13
1069
1070	fxcpmadd	f0,  A9, A7, f0
1071	fxcsmadd	f4,  A9, A7, f4
1072	fxcpmadd	f8,  A10, A7, f8
1073	fxcsmadd	f12, A10, A7, f12
1074
1075	fxcpmadd	f1,  A9, A8, f1
1076	fxcsmadd	f5,  A9, A8, f5
1077	fxcpmadd	f9,  A10, A8, f9
1078	fxcsmadd	f13, A10, A8, f13
1079	.align 4
1080
1081.L24:
1082	lfd	AP,  ALPHA(SP)
1083#ifdef TRMMKERNEL
1084       fsmfp	AP, AP
1085#endif
1086
1087#if defined(TRMMKERNEL)
1088#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
1089	sub	TEMP, K, KK
1090#elif defined(LEFT)
1091	addi	TEMP, KK, 4
1092#else
1093	addi	TEMP, KK, 4
1094#endif
1095	andi.	TEMP,  TEMP,  3
1096	mtspr	CTR, TEMP
1097#else
1098	andi.	r0,  K,  3
1099	mtspr	CTR, r0
1100#endif
1101	ble+	.L28
1102
1103	LFPDUX	A1,  AO,  INC4
1104	LFPDUX	A2,  AO2, INC4
1105	LFPDUX	B1,  BO,  INC4
1106	LFPDUX	B2,  BO2, INC4
1107	bdz-	.L27
1108	.align 4
1109
1110.L26:
1111	fxcpmadd	f0,  B1, A1, f0
1112	fxcsmadd	f4,  B1, A1, f4
1113	fxcpmadd	f8,  B2, A1, f8
1114	fxcsmadd	f12, B2, A1, f12
1115	LFPDUX	A1,  AO,  INC4
1116
1117	fxcpmadd	f1,  B1, A2, f1
1118	fxcsmadd	f5,  B1, A2, f5
1119	LFPDUX	B1,  BO,  INC4
1120	fxcpmadd	f9,  B2, A2, f9
1121	fxcsmadd	f13, B2, A2, f13
1122	LFPDUX	A2,  AO2, INC4
1123	LFPDUX	B2,  BO2, INC4
1124	bdnz+	.L26
1125	.align 4
1126
1127.L27:
1128	fxcpmadd	f0,  B1, A1, f0
1129	fxcsmadd	f4,  B1, A1, f4
1130	fxcpmadd	f8,  B2, A1, f8
1131	fxcsmadd	f12, B2, A1, f12
1132
1133	fxcpmadd	f1,  B1, A2, f1
1134	fxcsmadd	f5,  B1, A2, f5
1135	fxcpmadd	f9,  B2, A2, f9
1136	fxcsmadd	f13, B2, A2, f13
1137	.align 4
1138
1139.L28:
1140#ifndef TRMMKERNEL
1141	LFPDUX	A1, CO1, INC2
1142	LFPDUX	B1, CO1, INC2
1143	LFPDUX	B3, CO2, INC2
1144	LFPDUX	A6, CO2, INC2
1145
1146	LFPDUX	B5, CO3, INC2
1147	LFPDUX	A8, CO3, INC2
1148	LFPDUX	A2, CO4, INC2
1149	LFPDUX	A4, CO4, INC2
1150
1151	fxcpmadd	f0,  AP, f0,  A1
1152	fxcpmadd	f1,  AP, f1,  B1
1153	fxcpmadd	f4,  AP, f4,  B3
1154	fxcpmadd	f5,  AP, f5,  A6
1155
1156	fxcpmadd	f8,  AP, f8,  B5
1157	fxcpmadd	f9,  AP, f9,  A8
1158	STFPDUX	f0,  CO1, INCM3
1159	fxcpmadd	f12, AP, f12, A2
1160 	STFPDUX	f1,  CO1, INC2
1161	fxcpmadd	f13, AP, f13, A4
1162	STFPDUX	f4,  CO2, INCM3
1163
1164	STFPDUX	f5,  CO2, INC2
1165	STFPDUX	f8,  CO3, INCM3
1166	STFPDUX	f9,  CO3, INC2
1167	STFPDUX	f12, CO4, INCM3
1168	STFPDUX	f13, CO4, INC2
1169#else
1170	fpmul	f0,  AP, f0
1171	fpmul	f1,  AP, f1
1172	fpmul	f4,  AP, f4
1173	fpmul	f5,  AP, f5
1174
1175	fpmul	f8,  AP, f8
1176	fpmul	f9,  AP, f9
1177	STFPDUX	f0,  CO1, INC2
1178	fpmul	f12, AP, f12
1179 	STFPDUX	f1,  CO1, INC2
1180	fpmul	f13, AP, f13
1181	STFPDUX	f4,  CO2, INC2
1182
1183	STFPDUX	f5,  CO2, INC2
1184	STFPDUX	f8,  CO3, INC2
1185	STFPDUX	f9,  CO3, INC2
1186	STFPDUX	f12, CO4, INC2
1187	STFPDUX	f13, CO4, INC2
1188#endif
1189
1190
1191#ifdef TRMMKERNEL
1192#if ( defined(LEFT) &&  defined(TRANSA)) || \
1193    (!defined(LEFT) && !defined(TRANSA))
1194	sub	TEMP, K, KK
1195#ifdef LEFT
1196	addi	TEMP, TEMP, -4
1197#else
1198	addi	TEMP, TEMP, -4
1199#endif
1200	slwi	r0,   TEMP, 2 + BASE_SHIFT
1201	slwi	TEMP, TEMP, 2 + BASE_SHIFT
1202	add	AO, AO, r0
1203	add	BO, BO, TEMP
1204#endif
1205
1206#ifdef LEFT
1207	addi	KK, KK, 4
1208#endif
1209#endif
1210
1211	li	r0, FZERO
1212	lfpsx	f0, SP, r0
1213	.align 4
1214
1215.L30:
1216	andi.	I, M,  2
1217	beq	.L40
1218
1219#if defined(TRMMKERNEL)
1220#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
1221	addi	AO2, AO,   2 * SIZE
1222	fpmr	f1,  f0
1223	addi	BO,  B,  - 4 * SIZE
1224	fpmr	f2,  f0
1225	addi	BO2, B,  - 2 * SIZE
1226	fpmr	f3, f0
1227#else
1228	slwi	TEMP, KK, 1 + BASE_SHIFT
1229	slwi	r0,   KK, 2 + BASE_SHIFT
1230	add	AO, AO, TEMP
1231	add	BO, B,  r0
1232
1233	addi	AO2, AO,   2 * SIZE
1234	fpmr	f1,  f0
1235	addi	BO,  BO, - 4 * SIZE
1236	fpmr	f2,  f0
1237	addi	BO2, BO,   2 * SIZE
1238	fpmr	f3, f0
1239#endif
1240
1241#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
1242	sub	TEMP, K, KK
1243#elif defined(LEFT)
1244	addi	TEMP, KK, 2
1245#else
1246	addi	TEMP, KK, 4
1247#endif
1248
1249	srawi.	r0,  TEMP,  2
1250	mtspr	CTR, r0
1251	ble	.L34
1252
1253#else
1254	addi	AO2, AO,   2 * SIZE
1255	fpmr	f1,  f0
1256	addi	BO,  B,  - 4 * SIZE
1257	fpmr	f2,  f0
1258	addi	BO2, B,  - 2 * SIZE
1259	fpmr	f3, f0
1260
1261	srawi.	r0,  K,  2
1262	mtspr	CTR, r0
1263	ble	.L34
1264#endif
1265
1266	LFPDUX	A1,  AO, INC4
1267	LFPDUX	B1,  BO, INC4
1268	LFPDUX	B2, BO2, INC4
1269	LFPDUX	A2, AO2, INC4
1270	LFPDUX	B3,  BO, INC4
1271	LFPDUX	B4, BO2, INC4
1272
1273	LFPDUX	A3,  AO, INC4
1274	LFPDUX	A5,  BO, INC4
1275	LFPDUX	A6, BO2, INC4
1276	LFPDUX	A4, AO2, INC4
1277	LFPDUX	A7,  BO, INC4
1278	LFPDUX	A8, BO2, INC4
1279	bdz-	.L33
1280	.align 4
1281
1282.L32:
1283	fxcpmadd	f0,  B1, A1, f0
1284	fxcsmadd	f1,  B1, A1, f1
1285	LFPDUX	B1,  BO, INC4
1286	fxcpmadd	f2,  B2, A1, f2
1287	fxcsmadd	f3,  B2, A1, f3
1288	LFPDUX	B2, BO2, INC4
1289	LFPDUX	A1,  AO, INC4
1290
1291	fxcpmadd	f0,  B3, A2, f0
1292	fxcsmadd	f1,  B3, A2, f1
1293	LFPDUX	B3,  BO, INC4
1294	fxcpmadd	f2,  B4, A2, f2
1295	fxcsmadd	f3,  B4, A2, f3
1296	LFPDUX	B4, BO2, INC4
1297	LFPDUX	A2, AO2, INC4
1298
1299	fxcpmadd	f0,  A5, A3, f0
1300	fxcsmadd	f1,  A5, A3, f1
1301	LFPDUX	A5,  BO, INC4
1302	fxcpmadd	f2,  A6, A3, f2
1303	fxcsmadd	f3,  A6, A3, f3
1304	LFPDUX	A6, BO2, INC4
1305	LFPDUX	A3,  AO, INC4
1306
1307	fxcpmadd	f0,  A7, A4, f0
1308	fxcsmadd	f1,  A7, A4, f1
1309	LFPDUX	A7,  BO, INC4
1310	fxcpmadd	f2,  A8, A4, f2
1311	fxcsmadd	f3,  A8, A4, f3
1312	LFPDUX	A8, BO2, INC4
1313	LFPDUX	A4, AO2, INC4
1314	bdnz+	.L32
1315	.align 4
1316
1317.L33:
1318	fxcpmadd	f0,  B1, A1, f0
1319	fxcsmadd	f1,  B1, A1, f1
1320	fxcpmadd	f2,  B2, A1, f2
1321	fxcsmadd	f3,  B2, A1, f3
1322
1323	fxcpmadd	f0,  B3, A2, f0
1324	fxcsmadd	f1,  B3, A2, f1
1325	fxcpmadd	f2,  B4, A2, f2
1326	fxcsmadd	f3,  B4, A2, f3
1327
1328	fxcpmadd	f0,  A5, A3, f0
1329	fxcsmadd	f1,  A5, A3, f1
1330	fxcpmadd	f2,  A6, A3, f2
1331	fxcsmadd	f3,  A6, A3, f3
1332
1333	fxcpmadd	f0,  A7, A4, f0
1334	fxcsmadd	f1,  A7, A4, f1
1335	fxcpmadd	f2,  A8, A4, f2
1336	fxcsmadd	f3,  A8, A4, f3
1337	.align 4
1338
1339.L34:
1340	lfd	AP,  ALPHA(SP)
1341#ifdef TRMMKERNEL
1342       fsmfp	AP, AP
1343#endif
1344
1345#if defined(TRMMKERNEL)
1346#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
1347	sub	TEMP, K, KK
1348#elif defined(LEFT)
1349	addi	TEMP, KK, 2
1350#else
1351	addi	TEMP, KK, 4
1352#endif
1353	andi.	TEMP,  TEMP,  3
1354	mtspr	CTR, TEMP
1355#else
1356	andi.	r0,  K,  3
1357	mtspr	CTR, r0
1358#endif
1359	ble+	.L38
1360
1361	LFPDX	A1,  AO,  INC4
1362	LFPDUX	B1,  BO,  INC4
1363	LFPDUX	B2,  BO2, INC4
1364	add	AO, AO, INC2
1365	bdz-	.L37
1366	.align 4
1367
1368.L36:
1369	fxcpmadd	f0,  B1, A1, f0
1370	fxcsmadd	f1,  B1, A1, f1
1371	LFPDUX	B1,  BO,  INC4
1372	fxcpmadd	f2,  B2, A1, f2
1373	fxcsmadd	f3,  B2, A1, f3
1374	LFPDX	A1,  AO,  INC4
1375	LFPDUX	B2,  BO2, INC4
1376	add	AO, AO, INC2
1377	bdnz+	.L36
1378	.align 4
1379
1380.L37:
1381	fxcpmadd	f0,  B1, A1, f0
1382	fxcsmadd	f1,  B1, A1, f1
1383	fxcpmadd	f2,  B2, A1, f2
1384	fxcsmadd	f3,  B2, A1, f3
1385	.align 4
1386
1387.L38:
1388#ifndef TRMMKERNEL
1389	LFPDX	A1, CO1, INC2
1390	LFPDX	A2, CO2, INC2
1391	LFPDX	A3, CO3, INC2
1392	LFPDX	A4, CO4, INC2
1393
1394	fxcpmadd	f0, AP, f0, A1
1395	fxcpmadd	f1, AP, f1, A2
1396	fxcpmadd	f2, AP, f2, A3
1397	fxcpmadd	f3, AP, f3, A4
1398#else
1399	fpmul	f0, AP, f0
1400	fpmul	f1, AP, f1
1401	fpmul	f2, AP, f2
1402	fpmul	f3, AP, f3
1403#endif
1404
1405	STFPDUX	f0,  CO1, INC2
1406	STFPDUX	f1,  CO2, INC2
1407	STFPDUX	f2,  CO3, INC2
1408	STFPDUX	f3,  CO4, INC2
1409
1410#ifdef TRMMKERNEL
1411#if ( defined(LEFT) &&  defined(TRANSA)) || \
1412    (!defined(LEFT) && !defined(TRANSA))
1413	sub	TEMP, K, KK
1414#ifdef LEFT
1415	addi	TEMP, TEMP, -2
1416#else
1417	addi	TEMP, TEMP, -4
1418#endif
1419	slwi	r0,   TEMP, 1 + BASE_SHIFT
1420	slwi	TEMP, TEMP, 2 + BASE_SHIFT
1421	add	AO, AO, r0
1422	add	BO, BO, TEMP
1423#endif
1424
1425#ifdef LEFT
1426	addi	KK, KK, 2
1427#endif
1428#endif
1429
1430	li	r0, FZERO
1431	lfpsx	f0, SP, r0
1432	.align 4
1433
1434.L40:
1435	andi.	I, M,  1
1436	beq	.L49
1437
1438#if defined(TRMMKERNEL)
1439#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
1440	addi	AO2, AO,   2 * SIZE
1441	fpmr	f1,  f0
1442	addi	BO,  B,  - 4 * SIZE
1443	fpmr	f2,  f0
1444	addi	BO2, B,  - 2 * SIZE
1445	fpmr	f3,  f0
1446#else
1447	slwi	TEMP, KK, 0 + BASE_SHIFT
1448	slwi	r0,   KK, 2 + BASE_SHIFT
1449	add	AO, AO, TEMP
1450	add	BO, B,  r0
1451
1452	addi	AO2, AO,   2 * SIZE
1453	fpmr	f1,  f0
1454	addi	BO,  BO, - 4 * SIZE
1455	fpmr	f2,  f0
1456	addi	BO2, BO,   2 * SIZE
1457	fpmr	f3,  f0
1458#endif
1459
1460#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
1461	sub	TEMP, K, KK
1462#elif defined(LEFT)
1463	addi	TEMP, KK, 1
1464#else
1465	addi	TEMP, KK, 4
1466#endif
1467	srawi.	r0,  TEMP,  3
1468	mtspr	CTR, r0
1469	ble	.L44
1470
1471#else
1472	addi	AO2, AO,   2 * SIZE
1473	fpmr	f1,  f0
1474	addi	BO,  B,  - 4 * SIZE
1475	fpmr	f2,  f0
1476	addi	BO2, B,  - 2 * SIZE
1477	fpmr	f3,  f0
1478
1479	srawi.	r0,  K,  3
1480	mtspr	CTR, r0
1481	ble	.L44
1482#endif
1483
1484	LFPDUX	A1,  AO,  INC4
1485	LFPDUX	B1,  BO,  INC4
1486	LFPDUX	B2,  BO2, INC4
1487	LFPDUX	A2, AO2,  INC4
1488	LFPDUX	B3,  BO,  INC4
1489	LFPDUX	B4,  BO2, INC4
1490
1491	LFPDUX	A3,  AO,  INC4
1492	LFPDUX	A5,  BO,  INC4
1493	LFPDUX	A6,  BO2, INC4
1494	LFPDUX	A4, AO2,  INC4
1495	LFPDUX	A7,  BO,  INC4
1496	LFPDUX	A8,  BO2, INC4
1497	bdz-	.L43
1498	.align 4
1499
1500.L42:
1501	fxcpmadd	f0,  A1, B1, f0
1502	LFPDUX	B1,  BO,  INC4
1503	fxcpmadd	f1,  A1, B2, f1
1504	LFPDUX	B2,  BO2, INC4
1505	fxcsmadd	f2,  A1, B3, f2
1506	LFPDUX	B3,  BO,  INC4
1507	fxcsmadd	f3,  A1, B4, f3
1508	LFPDUX	B4,  BO2, INC4
1509	LFPDUX	A1,  AO,  INC4
1510
1511	fxcpmadd	f0,  A2, A5, f0
1512	LFPDUX	A5,  BO,  INC4
1513	fxcpmadd	f1,  A2, A6, f1
1514	LFPDUX	A6,  BO2, INC4
1515	fxcsmadd	f2,  A2, A7, f2
1516	LFPDUX	A7,  BO,  INC4
1517	fxcsmadd	f3,  A2, A8, f3
1518	LFPDUX	A8,  BO2, INC4
1519	LFPDUX	A2, AO2,  INC4
1520
1521	fxcpmadd	f0,  A3, B1, f0
1522	LFPDUX	B1,  BO,  INC4
1523	fxcpmadd	f1,  A3, B2, f1
1524	LFPDUX	B2,  BO2, INC4
1525	fxcsmadd	f2,  A3, B3, f2
1526	LFPDUX	B3,  BO,  INC4
1527	fxcsmadd	f3,  A3, B4, f3
1528	LFPDUX	B4,  BO2, INC4
1529	LFPDUX	A3,  AO,  INC4
1530
1531	fxcpmadd	f0,  A4, A5, f0
1532	LFPDUX	A5,  BO,  INC4
1533	fxcpmadd	f1,  A4, A6, f1
1534	LFPDUX	A6,  BO2, INC4
1535	fxcsmadd	f2,  A4, A7, f2
1536	LFPDUX	A7,  BO,  INC4
1537	fxcsmadd	f3,  A4, A8, f3
1538	LFPDUX	A8,  BO2, INC4
1539	LFPDUX	A4, AO2,  INC4
1540	bdnz+	.L42
1541	.align 4
1542
1543.L43:
1544	fxcpmadd	f0,  A1, B1, f0
1545	LFPDUX	B1,  BO,  INC4
1546	fxcpmadd	f1,  A1, B2, f1
1547	LFPDUX	B2,  BO2, INC4
1548	fxcsmadd	f2,  A1, B3, f2
1549	LFPDUX	B3,  BO,  INC4
1550	fxcsmadd	f3,  A1, B4, f3
1551	LFPDUX	B4,  BO2, INC4
1552
1553	fxcpmadd	f0,  A2, A5, f0
1554	LFPDUX	A5,  BO,  INC4
1555	fxcpmadd	f1,  A2, A6, f1
1556	LFPDUX	A6,  BO2, INC4
1557	fxcsmadd	f2,  A2, A7, f2
1558	LFPDUX	A7,  BO,  INC4
1559	fxcsmadd	f3,  A2, A8, f3
1560	LFPDUX	A8,  BO2, INC4
1561
1562	fxcpmadd	f0,  A3, B1, f0
1563	fxcpmadd	f1,  A3, B2, f1
1564	fxcsmadd	f2,  A3, B3, f2
1565	fxcsmadd	f3,  A3, B4, f3
1566
1567	fxcpmadd	f0,  A4, A5, f0
1568	fxcpmadd	f1,  A4, A6, f1
1569	fxcsmadd	f2,  A4, A7, f2
1570	fxcsmadd	f3,  A4, A8, f3
1571	.align 4
1572
1573.L44:
1574	lfd	AP,  ALPHA(SP)
1575#ifdef TRMMKERNEL
1576       fsmfp	AP, AP
1577#endif
1578
1579#if defined(TRMMKERNEL)
1580#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
1581	sub	TEMP, K, KK
1582#elif defined(LEFT)
1583	addi	TEMP, KK, 1
1584#else
1585	addi	TEMP, KK, 4
1586#endif
1587	andi.	TEMP,  TEMP,  7
1588	mtspr	CTR, TEMP
1589#else
1590	andi.	r0,  K,  7
1591	mtspr	CTR, r0
1592#endif
1593	ble+	.L48
1594
1595	LFDX	A1,  AO,  INC4
1596	LFPDUX	B1,  BO,  INC4
1597	LFPDUX	B2,  BO2, INC4
1598	add	AO, AO, INC
1599	bdz-	.L47
1600	.align 4
1601
1602.L46:
1603	fxcpmadd	f0,  A1, B1, f0
1604	LFPDUX	B1,  BO,  INC4
1605	fxcpmadd	f1,  A1, B2, f1
1606	LFDX	A1,  AO,  INC4
1607	LFPDUX	B2,  BO2, INC4
1608	add	AO, AO, INC
1609	bdnz+	.L46
1610	.align 4
1611
1612.L47:
1613	fxcpmadd	f0,  A1, B1, f0
1614	fxcpmadd	f1,  A1, B2, f1
1615	.align 4
1616
1617.L48:
1618#ifndef TRMMKERNEL
1619	LFDX	A1, CO1, INC2
1620	LFDX	A2, CO2, INC2
1621	LFDX	A3, CO3, INC2
1622	LFDX	A4, CO4, INC2
1623
1624	fpadd	f0, f0, f2
1625	fpadd	f1, f1, f3
1626
1627	fsmfp	A1, A2
1628	fsmfp	A3, A4
1629
1630	fxcpmadd	f0,  AP, f0,  A1
1631	fxcpmadd	f1,  AP, f1,  A3
1632#else
1633	fpadd	f0, f0, f2
1634	fpadd	f1, f1, f3
1635
1636	fpmul	f0,  AP, f0
1637	fpmul	f1,  AP, f1
1638#endif
1639
1640	STFDX	f0,  CO1, INC2
1641	STFSDX	f0,  CO2, INC2
1642	STFDX	f1,  CO3, INC2
1643	STFSDX	f1,  CO4, INC2
1644
1645#ifdef TRMMKERNEL
1646#if ( defined(LEFT) &&  defined(TRANSA)) || \
1647    (!defined(LEFT) && !defined(TRANSA))
1648	sub	TEMP, K, KK
1649#ifdef LEFT
1650	addi	TEMP, TEMP, -1
1651#else
1652	addi	TEMP, TEMP, -4
1653#endif
1654	slwi	r0,   TEMP, 0 + BASE_SHIFT
1655	slwi	TEMP, TEMP, 2 + BASE_SHIFT
1656	add	AO, AO, r0
1657	add	BO, BO, TEMP
1658#endif
1659
1660#ifdef LEFT
1661	addi	KK, KK, 1
1662#endif
1663#endif
1664	.align 4
1665
1666.L49:
1667#if defined(TRMMKERNEL) && !defined(LEFT)
1668	addi	KK, KK, 4
1669#endif
1670
1671	addi	B,  BO, 4 * SIZE
1672
1673	addic.	J, J, -1
1674	bgt+	.L10
1675	.align 4
1676
1677.L50:
1678	andi.	J, N,  2
1679	beq	.L90
1680
1681	mr	CO1, C
1682	add	CO2, C,   LDC
1683	add	C,   CO2, LDC
1684
1685#if defined(TRMMKERNEL) &&  defined(LEFT)
1686	mr	KK, OFFSET
1687#endif
1688
1689	addi	AO, A, -2 * SIZE
1690
1691	li	r0, FZERO
1692	lfpsx	f0, SP, r0
1693
1694	srawi.	I, M,  3
1695	ble	.L60
1696	.align 4
1697
1698.L51:
1699#if defined(TRMMKERNEL)
1700#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
1701	fpmr	f4,  f0
1702	addi	BO,  B,  - 2 * SIZE
1703 	fpmr	f1,  f0
1704	fpmr	f5,  f0
1705	fpmr	f2,  f0
1706	fpmr	f6,  f0
1707#else
1708	slwi	TEMP, KK, 3 + BASE_SHIFT
1709	slwi	r0,   KK, 1 + BASE_SHIFT
1710	add	AO, AO, TEMP
1711	add	BO, B,  r0
1712
1713	fpmr	f4,  f0
1714	addi	BO,  BO,  - 2 * SIZE
1715 	fpmr	f1,  f0
1716	fpmr	f5,  f0
1717	fpmr	f2,  f0
1718	fpmr	f6,  f0
1719#endif
1720
1721
1722#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
1723	sub	TEMP, K, KK
1724#elif defined(LEFT)
1725	addi	TEMP, KK, 8
1726#else
1727	addi	TEMP, KK, 2
1728#endif
1729	srawi.	r0,  TEMP,  2
1730	fpmr	f3,  f0
1731	mtspr	CTR, r0
1732	fpmr	f7,  f0
1733	ble	.L54
1734#else
1735	fpmr	f4,  f0
1736	addi	BO,  B,  - 2 * SIZE
1737 	fpmr	f1,  f0
1738	fpmr	f5,  f0
1739	fpmr	f2,  f0
1740	fpmr	f6,  f0
1741
1742	srawi.	r0,  K,  2
1743	fpmr	f3,  f0
1744	mtspr	CTR, r0
1745	fpmr	f7,  f0
1746	ble	.L54
1747#endif
1748
1749	LFPDUX	B1,  BO,  INC2
1750	LFPDUX	A1,  AO,  INC2
1751	LFPDUX	A2,  AO,  INC2
1752	LFPDUX	B2,  BO,  INC2
1753	LFPDUX	A3,  AO,  INC2
1754	LFPDUX	A4,  AO,  INC2
1755
1756	LFPDUX	B3,  BO,  INC2
1757	LFPDUX	A5,  AO,  INC2
1758	LFPDUX	A6,  AO,  INC2
1759	LFPDUX	A7,  AO,  INC2
1760	LFPDUX	A8,  AO,  INC2
1761	bdz-	.L53
1762	.align 4
1763
1764.L52:
1765	fxcpmadd	f0,  B1, A1, f0
1766	LFPDUX	B4,  BO,  INC2
1767	fxcsmadd	f4,  B1, A1, f4
1768	LFPDUX	A1,  AO,  INC2
1769	fxcpmadd	f1,  B1, A2, f1
1770	nop
1771	fxcsmadd	f5,  B1, A2, f5
1772	LFPDUX	A2,  AO,  INC2
1773
1774	fxcpmadd	f2,  B1, A3, f2
1775	nop
1776	fxcsmadd	f6,  B1, A3, f6
1777	LFPDUX	A3,  AO,  INC2
1778	fxcpmadd	f3,  B1, A4, f3
1779	nop
1780	fxcsmadd	f7,  B1, A4, f7
1781	LFPDUX	A4,  AO,  INC2
1782
1783	fxcpmadd	f0,  B2, A5, f0
1784	LFPDUX	B1,  BO,  INC2
1785	fxcsmadd	f4,  B2, A5, f4
1786	LFPDUX	A5,  AO,  INC2
1787	fxcpmadd	f1,  B2, A6, f1
1788	nop
1789	fxcsmadd	f5,  B2, A6, f5
1790	LFPDUX	A6,  AO,  INC2
1791
1792	fxcpmadd	f2,  B2, A7, f2
1793	nop
1794	fxcsmadd	f6,  B2, A7, f6
1795	LFPDUX	A7,  AO,  INC2
1796	fxcpmadd	f3,  B2, A8, f3
1797	nop
1798	fxcsmadd	f7,  B2, A8, f7
1799	LFPDUX	A8,  AO,  INC2
1800
1801	fxcpmadd	f0,  B3, A1, f0
1802	LFPDUX	B2,  BO,  INC2
1803	fxcsmadd	f4,  B3, A1, f4
1804	LFPDUX	A1,  AO,  INC2
1805	fxcpmadd	f1,  B3, A2, f1
1806	nop
1807	fxcsmadd	f5,  B3, A2, f5
1808	LFPDUX	A2,  AO,  INC2
1809
1810	fxcpmadd	f2,  B3, A3, f2
1811	nop
1812	fxcsmadd	f6,  B3, A3, f6
1813	LFPDUX	A3,  AO,  INC2
1814	fxcpmadd	f3,  B3, A4, f3
1815	nop
1816	fxcsmadd	f7,  B3, A4, f7
1817	LFPDUX	A4,  AO,  INC2
1818
1819	fxcpmadd	f0,  B4, A5, f0
1820	LFPDUX	B3,  BO,  INC2
1821	fxcsmadd	f4,  B4, A5, f4
1822	LFPDUX	A5,  AO,  INC2
1823	fxcpmadd	f1,  B4, A6, f1
1824	nop
1825	fxcsmadd	f5,  B4, A6, f5
1826	LFPDUX	A6,  AO,  INC2
1827
1828	fxcpmadd	f2,  B4, A7, f2
1829	nop
1830	fxcsmadd	f6,  B4, A7, f6
1831	LFPDUX	A7,  AO,  INC2
1832	fxcpmadd	f3,  B4, A8, f3
1833	nop
1834	fxcsmadd	f7,  B4, A8, f7
1835	LFPDUX	A8,  AO,  INC2
1836	bdnz+	.L52
1837	.align 4
1838
1839.L53:
1840	fxcpmadd	f0,  B1, A1, f0
1841	LFPDUX	B4,  BO,  INC2
1842	fxcsmadd	f4,  B1, A1, f4
1843	LFPDUX	A1,  AO,  INC2
1844	fxcpmadd	f1,  B1, A2, f1
1845	nop
1846	fxcsmadd	f5,  B1, A2, f5
1847	LFPDUX	A2,  AO,  INC2
1848
1849	fxcpmadd	f2,  B1, A3, f2
1850	nop
1851	fxcsmadd	f6,  B1, A3, f6
1852	LFPDUX	A3,  AO,  INC2
1853	fxcpmadd	f3,  B1, A4, f3
1854	nop
1855	fxcsmadd	f7,  B1, A4, f7
1856	LFPDUX	A4,  AO,  INC2
1857
1858	fxcpmadd	f0,  B2, A5, f0
1859	nop
1860	fxcsmadd	f4,  B2, A5, f4
1861	LFPDUX	A5,  AO,  INC2
1862	fxcpmadd	f1,  B2, A6, f1
1863	nop
1864	fxcsmadd	f5,  B2, A6, f5
1865	LFPDUX	A6,  AO,  INC2
1866
1867	fxcpmadd	f2,  B2, A7, f2
1868	nop
1869	fxcsmadd	f6,  B2, A7, f6
1870	LFPDUX	A7,  AO,  INC2
1871	fxcpmadd	f3,  B2, A8, f3
1872	nop
1873	fxcsmadd	f7,  B2, A8, f7
1874	LFPDUX	A8,  AO,  INC2
1875
1876	fxcpmadd	f0,  B3, A1, f0
1877	fxcsmadd	f4,  B3, A1, f4
1878	fxcpmadd	f1,  B3, A2, f1
1879	fxcsmadd	f5,  B3, A2, f5
1880
1881	fxcpmadd	f2,  B3, A3, f2
1882	fxcsmadd	f6,  B3, A3, f6
1883	fxcpmadd	f3,  B3, A4, f3
1884	fxcsmadd	f7,  B3, A4, f7
1885
1886	fxcpmadd	f0,  B4, A5, f0
1887	fxcsmadd	f4,  B4, A5, f4
1888	fxcpmadd	f1,  B4, A6, f1
1889	fxcsmadd	f5,  B4, A6, f5
1890
1891	fxcpmadd	f2,  B4, A7, f2
1892	fxcsmadd	f6,  B4, A7, f6
1893	fxcpmadd	f3,  B4, A8, f3
1894	fxcsmadd	f7,  B4, A8, f7
1895	.align 4
1896
1897.L54:
1898	lfd	AP,  ALPHA(SP)
1899#ifdef TRMMKERNEL
1900       fsmfp	AP, AP
1901#endif
1902
1903#if defined(TRMMKERNEL)
1904#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
1905	sub	TEMP, K, KK
1906#elif defined(LEFT)
1907	addi	TEMP, KK, 8
1908#else
1909	addi	TEMP, KK, 2
1910#endif
1911	andi.	TEMP,  TEMP,  3
1912	mtspr	CTR, TEMP
1913#else
1914	andi.	r0,  K,  3
1915	mtspr	CTR, r0
1916#endif
1917	ble+	.L58
1918
1919	LFPDUX	A1,  AO,  INC2
1920	LFPDUX	B1,  BO,  INC2
1921	LFPDUX	A2,  AO,  INC2
1922	LFPDUX	A3,  AO,  INC2
1923	LFPDUX	A4,  AO,  INC2
1924	bdz-	.L57
1925	.align 4
1926
1927.L56:
1928	fxcpmadd	f0,  B1, A1, f0
1929	fxcsmadd	f4,  B1, A1, f4
1930	LFPDUX	A1,  AO,  INC2
1931	fxcpmadd	f1,  B1, A2, f1
1932	fxcsmadd	f5,  B1, A2, f5
1933	LFPDUX	A2,  AO,  INC2
1934
1935	fxcpmadd	f2,  B1, A3, f2
1936	fxcsmadd	f6,  B1, A3, f6
1937	LFPDUX	A3,  AO,  INC2
1938	fxcpmadd	f3,  B1, A4, f3
1939	fxcsmadd	f7,  B1, A4, f7
1940	LFPDUX	A4,  AO,  INC2
1941	LFPDUX	B1,  BO,  INC2
1942	bdnz+	.L56
1943	.align 4
1944
1945.L57:
1946	fxcpmadd	f0,  B1, A1, f0
1947	fxcsmadd	f4,  B1, A1, f4
1948	fxcpmadd	f1,  B1, A2, f1
1949	fxcsmadd	f5,  B1, A2, f5
1950
1951	fxcpmadd	f2,  B1, A3, f2
1952	fxcsmadd	f6,  B1, A3, f6
1953	fxcpmadd	f3,  B1, A4, f3
1954	fxcsmadd	f7,  B1, A4, f7
1955	.align 4
1956
1957.L58:
1958#ifndef TRMMKERNEL
1959	LFPDUX	A1, CO1, INC2
1960	LFPDUX	B1, CO1, INC2
1961	LFPDUX	A3, CO1, INC2
1962   	LFPDUX	A5, CO1, INC2
1963
1964 	LFPDUX	B3, CO2, INC2
1965	LFPDUX	A6, CO2, INC2
1966	LFPDUX	A7, CO2, INC2
1967	LFPDUX	B2, CO2, INC2
1968
1969	fxcpmadd	f0,  AP, f0,  A1
1970	fxcpmadd	f1,  AP, f1,  B1
1971	fxcpmadd	f2,  AP, f2,  A3
1972	fxcpmadd	f3,  AP, f3,  A5
1973
1974	fxcpmadd	f4,  AP, f4,  B3
1975	fxcpmadd	f5,  AP, f5,  A6
1976	STFPDUX	f0,  CO1, INCM7
1977	fxcpmadd	f6,  AP, f6,  A7
1978	STFPDUX	f1,  CO1, INC2
1979	fxcpmadd	f7,  AP, f7,  B2
1980	STFPDUX	f2,  CO1, INC2
1981	STFPDUX	f3,  CO1, INC2
1982	STFPDUX	f4,  CO2, INCM7
1983
1984	STFPDUX	f5,  CO2, INC2
1985	STFPDUX	f6,  CO2, INC2
1986	STFPDUX	f7,  CO2, INC2
1987#else
1988	fpmul	f0,  AP, f0
1989	fpmul	f1,  AP, f1
1990	fpmul	f2,  AP, f2
1991	fpmul	f3,  AP, f3
1992
1993	fpmul	f4,  AP, f4
1994	fpmul	f5,  AP, f5
1995	STFPDUX	f0,  CO1, INC2
1996	fpmul	f6,  AP, f6
1997	STFPDUX	f1,  CO1, INC2
1998	fpmul	f7,  AP, f7
1999	STFPDUX	f2,  CO1, INC2
2000	STFPDUX	f3,  CO1, INC2
2001	STFPDUX	f4,  CO2, INC2
2002
2003	STFPDUX	f5,  CO2, INC2
2004	STFPDUX	f6,  CO2, INC2
2005	STFPDUX	f7,  CO2, INC2
2006#endif
2007
2008
2009#ifdef TRMMKERNEL
2010#if ( defined(LEFT) &&  defined(TRANSA)) || \
2011    (!defined(LEFT) && !defined(TRANSA))
2012	sub	TEMP, K, KK
2013#ifdef LEFT
2014	addi	TEMP, TEMP, -8
2015#else
2016	addi	TEMP, TEMP, -2
2017#endif
2018	slwi	r0,   TEMP, 3 + BASE_SHIFT
2019	slwi	TEMP, TEMP, 1 + BASE_SHIFT
2020	add	AO, AO, r0
2021	add	BO, BO, TEMP
2022#endif
2023
2024#ifdef LEFT
2025	addi	KK, KK, 8
2026#endif
2027#endif
2028
2029	addic.	I, I, -1
2030	li	r0, FZERO
2031
2032	lfpsx	f0, SP, r0
2033	bgt+	.L51
2034	.align 4
2035
2036.L60:
2037	andi.	I, M,  4
2038	beq	.L70
2039
2040#if defined(TRMMKERNEL)
2041#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
2042	addi	BO,  B,  - 2 * SIZE
2043 	fpmr	f1,  f0
2044#else
2045	slwi	TEMP, KK, 2 + BASE_SHIFT
2046	slwi	r0,   KK, 1 + BASE_SHIFT
2047	add	AO, AO, TEMP
2048	add	BO, B,  r0
2049
2050	addi	BO,  BO,  - 2 * SIZE
2051 	fpmr	f1,  f0
2052#endif
2053
2054#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
2055	sub	TEMP, K, KK
2056#elif defined(LEFT)
2057	addi	TEMP, KK, 4
2058#else
2059	addi	TEMP, KK, 2
2060#endif
2061	fpmr	f2,  f0
2062	srawi.	r0,  TEMP,  2
2063	mtspr	CTR, r0
2064	fpmr	f3,  f0
2065	ble	.L64
2066#else
2067	srawi.	r0,  K,  2
2068 	fpmr	f1,  f0
2069	addi	BO,  B,  - 2 * SIZE
2070	fpmr	f2,  f0
2071	mtspr	CTR, r0
2072	fpmr	f3,  f0
2073	ble	.L64
2074#endif
2075
2076	LFPDUX	B1,  BO, INC2
2077	LFPDUX	A1,  AO, INC2
2078	LFPDUX	A2,  AO, INC2
2079	LFPDUX	B2,  BO, INC2
2080	LFPDUX	A3,  AO, INC2
2081	LFPDUX	A4,  AO, INC2
2082
2083	LFPDUX	B3,  BO, INC2
2084	LFPDUX	A5,  AO, INC2
2085	LFPDUX	A6,  AO, INC2
2086	LFPDUX	B4,  BO, INC2
2087	LFPDUX	A7,  AO, INC2
2088	LFPDUX	A8,  AO, INC2
2089	bdz-	.L63
2090	.align 4
2091
2092.L62:
2093	fxcpmadd	f0,  B1, A1, f0
2094	fxcsmadd	f2,  B1, A1, f2
2095	LFPDUX	A1,  AO, INC2
2096	fxcpmadd	f1,  B1, A2, f1
2097	fxcsmadd	f3,  B1, A2, f3
2098	LFPDUX	A2,  AO, INC2
2099	LFPDUX	B1,  BO, INC2
2100
2101	fxcpmadd	f0,  B2, A3, f0
2102	fxcsmadd	f2,  B2, A3, f2
2103	LFPDUX	A3,  AO, INC2
2104	fxcpmadd	f1,  B2, A4, f1
2105	fxcsmadd	f3,  B2, A4, f3
2106	LFPDUX	A4,  AO, INC2
2107	LFPDUX	B2,  BO, INC2
2108
2109	fxcpmadd	f0,  B3, A5, f0
2110	fxcsmadd	f2,  B3, A5, f2
2111	LFPDUX	A5,  AO, INC2
2112	fxcpmadd	f1,  B3, A6, f1
2113	fxcsmadd	f3,  B3, A6, f3
2114	LFPDUX	A6,  AO, INC2
2115	LFPDUX	B3,  BO, INC2
2116
2117	fxcpmadd	f0,  B4, A7, f0
2118	fxcsmadd	f2,  B4, A7, f2
2119	LFPDUX	A7,  AO, INC2
2120	fxcpmadd	f1,  B4, A8, f1
2121	fxcsmadd	f3,  B4, A8, f3
2122	LFPDUX	A8,  AO, INC2
2123	LFPDUX	B4,  BO, INC2
2124	bdnz+	.L62
2125	.align 4
2126
2127.L63:
2128	fxcpmadd	f0,  B1, A1, f0
2129	fxcsmadd	f2,  B1, A1, f2
2130	fxcpmadd	f1,  B1, A2, f1
2131	fxcsmadd	f3,  B1, A2, f3
2132
2133	fxcpmadd	f0,  B2, A3, f0
2134	fxcsmadd	f2,  B2, A3, f2
2135	fxcpmadd	f1,  B2, A4, f1
2136	fxcsmadd	f3,  B2, A4, f3
2137
2138	fxcpmadd	f0,  B3, A5, f0
2139	fxcsmadd	f2,  B3, A5, f2
2140	fxcpmadd	f1,  B3, A6, f1
2141	fxcsmadd	f3,  B3, A6, f3
2142
2143	fxcpmadd	f0,  B4, A7, f0
2144	fxcsmadd	f2,  B4, A7, f2
2145	fxcpmadd	f1,  B4, A8, f1
2146	fxcsmadd	f3,  B4, A8, f3
2147	.align 4
2148
2149.L64:
2150	lfd	AP,  ALPHA(SP)
2151#ifdef TRMMKERNEL
2152       fsmfp	AP, AP
2153#endif
2154
2155#if defined(TRMMKERNEL)
2156#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
2157	sub	TEMP, K, KK
2158#elif defined(LEFT)
2159	addi	TEMP, KK, 4
2160#else
2161	addi	TEMP, KK, 2
2162#endif
2163	andi.	TEMP,  TEMP,  3
2164	mtspr	CTR, TEMP
2165#else
2166	andi.	r0,  K,  3
2167	mtspr	CTR, r0
2168#endif
2169	ble+	.L68
2170
2171	LFPDUX	A1,  AO,  INC2
2172	LFPDUX	B1,  BO,  INC2
2173	LFPDUX	A2,  AO,  INC2
2174	bdz-	.L67
2175	.align 4
2176
2177.L66:
2178	fxcpmadd	f0,  B1, A1, f0
2179	fxcsmadd	f2,  B1, A1, f2
2180	LFPDUX	A1,  AO,  INC2
2181	fxcpmadd	f1,  B1, A2, f1
2182	fxcsmadd	f3,  B1, A2, f3
2183	LFPDUX	B1,  BO,  INC2
2184	LFPDUX	A2,  AO,  INC2
2185	bdnz+	.L66
2186	.align 4
2187
2188.L67:
2189	fxcpmadd	f0,  B1, A1, f0
2190	fxcsmadd	f2,  B1, A1, f2
2191	fxcpmadd	f1,  B1, A2, f1
2192	fxcsmadd	f3,  B1, A2, f3
2193	.align 4
2194
2195.L68:
2196#ifndef TRMMKERNEL
2197	LFPDUX	A1, CO1, INC2
2198	LFPDUX	A2, CO1, INC2
2199	LFPDUX	A3, CO2, INC2
2200	LFPDUX	A4, CO2, INC2
2201
2202	fxcpmadd	f0,  AP, f0,  A1
2203	fxcpmadd	f1,  AP, f1,  A2
2204	fxcpmadd	f2,  AP, f2,  A3
2205 	fxcpmadd	f3,  AP, f3,  A4
2206
2207	STFPDUX	f0,  CO1, INCM3
2208	STFPDUX	f1,  CO1, INC2
2209	STFPDUX	f2,  CO2, INCM3
2210	STFPDUX	f3,  CO2, INC2
2211#else
2212	fpmul	f0,  AP, f0
2213	fpmul	f1,  AP, f1
2214	fpmul	f2,  AP, f2
2215 	fpmul	f3,  AP, f3
2216
2217	STFPDUX	f0,  CO1, INC2
2218	STFPDUX	f1,  CO1, INC2
2219	STFPDUX	f2,  CO2, INC2
2220	STFPDUX	f3,  CO2, INC2
2221#endif
2222
2223#ifdef TRMMKERNEL
2224#if ( defined(LEFT) &&  defined(TRANSA)) || \
2225    (!defined(LEFT) && !defined(TRANSA))
2226	sub	TEMP, K, KK
2227#ifdef LEFT
2228	addi	TEMP, TEMP, -4
2229#else
2230	addi	TEMP, TEMP, -2
2231#endif
2232	slwi	r0,   TEMP, 2 + BASE_SHIFT
2233	slwi	TEMP, TEMP, 1 + BASE_SHIFT
2234	add	AO, AO, r0
2235	add	BO, BO, TEMP
2236#endif
2237
2238#ifdef LEFT
2239	addi	KK, KK, 4
2240#endif
2241#endif
2242
2243	li	r0, FZERO
2244	lfpsx	f0, SP, r0
2245	.align 4
2246
2247.L70:
2248	andi.	I, M,  2
2249	beq	.L80
2250
2251#if defined(TRMMKERNEL)
2252#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
2253	addi	BO,  B,  - 2 * SIZE
2254	fpmr	f1,  f0
2255#else
2256	slwi	TEMP, KK, 1 + BASE_SHIFT
2257	slwi	r0,   KK, 1 + BASE_SHIFT
2258	add	AO, AO, TEMP
2259	add	BO, B,  r0
2260
2261	addi	BO,  BO,  - 2 * SIZE
2262	fpmr	f1,  f0
2263#endif
2264
2265#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
2266	sub	TEMP, K, KK
2267#elif defined(LEFT)
2268	addi	TEMP, KK, 2
2269#else
2270	addi	TEMP, KK, 2
2271#endif
2272	srawi.	r0,  TEMP,  3
2273	fpmr	f2,  f0
2274	mtspr	CTR, r0
2275	fpmr	f3, f0
2276	ble	.L74
2277#else
2278	addi	BO,  B,  - 2 * SIZE
2279	fpmr	f1,  f0
2280
2281	srawi.	r0,  K,  3
2282	fpmr	f2,  f0
2283	mtspr	CTR, r0
2284	fpmr	f3, f0
2285	ble	.L74
2286#endif
2287
2288
2289	LFPDUX	A1,  AO, INC2
2290	LFPDUX	B1,  BO, INC2
2291	LFPDUX	A2,  AO, INC2
2292	LFPDUX	B2,  BO, INC2
2293	LFPDUX	A3,  AO, INC2
2294	LFPDUX	B3,  BO, INC2
2295	LFPDUX	A4,  AO, INC2
2296	LFPDUX	B4,  BO, INC2
2297
2298	LFPDUX	A5,  AO, INC2
2299	LFPDUX	B5,  BO, INC2
2300	LFPDUX	A6,  AO, INC2
2301	LFPDUX	B6,  BO, INC2
2302	LFPDUX	A7,  AO, INC2
2303	LFPDUX	A9,  BO, INC2
2304	LFPDUX	A8,  AO, INC2
2305	LFPDUX	A10, BO, INC2
2306	bdz-	.L73
2307	.align 4
2308
2309.L72:
2310	fxcpmadd	f0,  B1, A1, f0
2311	fxcsmadd	f1,  B1, A1, f1
2312	LFPDUX	A1,  AO, INC2
2313	LFPDUX	B1,  BO, INC2
2314	fxcpmadd	f2,  B2, A2, f2
2315	fxcsmadd	f3,  B2, A2, f3
2316	LFPDUX	A2,  AO, INC2
2317	LFPDUX	B2,  BO, INC2
2318
2319	fxcpmadd	f0,  B3, A3, f0
2320	fxcsmadd	f1,  B3, A3, f1
2321	LFPDUX	A3,  AO, INC2
2322	LFPDUX	B3,  BO, INC2
2323	fxcpmadd	f2,  B4, A4, f2
2324	fxcsmadd	f3,  B4, A4, f3
2325	LFPDUX	A4,  AO, INC2
2326	LFPDUX	B4,  BO, INC2
2327
2328	fxcpmadd	f0,  B5, A5, f0
2329	fxcsmadd	f1,  B5, A5, f1
2330	LFPDUX	A5,  AO, INC2
2331	LFPDUX	B5,  BO, INC2
2332	fxcpmadd	f2,  B6, A6, f2
2333	fxcsmadd	f3,  B6, A6, f3
2334	LFPDUX	A6,  AO, INC2
2335	LFPDUX	B6,  BO, INC2
2336
2337	fxcpmadd	f0,  A9,  A7, f0
2338	fxcsmadd	f1,  A9,  A7, f1
2339	LFPDUX	A7,  AO, INC2
2340	LFPDUX	A9,  BO, INC2
2341	fxcpmadd	f2,  A10, A8, f2
2342	fxcsmadd	f3,  A10, A8, f3
2343	LFPDUX	A8,  AO, INC2
2344	LFPDUX	A10, BO, INC2
2345	bdnz+	.L72
2346	.align 4
2347
2348.L73:
2349	fxcpmadd	f0,  B1, A1, f0
2350	fxcsmadd	f1,  B1, A1, f1
2351	fxcpmadd	f2,  B2, A2, f2
2352	fxcsmadd	f3,  B2, A2, f3
2353
2354	fxcpmadd	f0,  B3, A3, f0
2355	fxcsmadd	f1,  B3, A3, f1
2356	fxcpmadd	f2,  B4, A4, f2
2357	fxcsmadd	f3,  B4, A4, f3
2358
2359	fxcpmadd	f0,  B5, A5, f0
2360	fxcsmadd	f1,  B5, A5, f1
2361	fxcpmadd	f2,  B6, A6, f2
2362	fxcsmadd	f3,  B6, A6, f3
2363
2364	fxcpmadd	f0,  A9,  A7, f0
2365	fxcsmadd	f1,  A9,  A7, f1
2366	fxcpmadd	f2,  A10, A8, f2
2367	fxcsmadd	f3,  A10, A8, f3
2368	.align 4
2369
2370.L74:
2371	lfd	AP,  ALPHA(SP)
2372#ifdef TRMMKERNEL
2373       fsmfp	AP, AP
2374#endif
2375
2376#if defined(TRMMKERNEL)
2377#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
2378	sub	TEMP, K, KK
2379#elif defined(LEFT)
2380	addi	TEMP, KK, 2
2381#else
2382	addi	TEMP, KK, 2
2383#endif
2384	andi.	TEMP,  TEMP,  7
2385	mtspr	CTR, TEMP
2386#else
2387	andi.	r0,  K,  7
2388	mtspr	CTR, r0
2389#endif
2390	ble+	.L78
2391
2392	LFPDUX	A1,  AO,  INC2
2393	LFPDUX	B1,  BO,  INC2
2394	bdz-	.L77
2395	.align 4
2396
2397.L76:
2398	fxcpmadd	f0,  B1, A1, f0
2399	fxcsmadd	f1,  B1, A1, f1
2400	LFPDUX	A1,  AO,  INC2
2401	LFPDUX	B1,  BO,  INC2
2402	bdnz+	.L76
2403	.align 4
2404
2405.L77:
2406	fxcpmadd	f0,  B1, A1, f0
2407	fxcsmadd	f1,  B1, A1, f1
2408	.align 4
2409
2410.L78:
2411#ifndef TRMMKERNEL
2412	LFPDX	A1, CO1, INC2
2413	LFPDX	B3, CO2, INC2
2414
2415	fpadd	f0, f0, f2
2416	fpadd	f1, f1, f3
2417
2418	fxcpmadd	f0,  AP, f0,  A1
2419	fxcpmadd	f1,  AP, f1,  B3
2420#else
2421	fpadd	f0, f0, f2
2422	fpadd	f1, f1, f3
2423
2424	fpmul	f0,  AP, f0
2425	fpmul	f1,  AP, f1
2426#endif
2427
2428	STFPDUX	f0,  CO1, INC2
2429	STFPDUX	f1,  CO2, INC2
2430
2431#ifdef TRMMKERNEL
2432#if ( defined(LEFT) &&  defined(TRANSA)) || \
2433    (!defined(LEFT) && !defined(TRANSA))
2434	sub	TEMP, K, KK
2435#ifdef LEFT
2436	addi	TEMP, TEMP, -2
2437#else
2438	addi	TEMP, TEMP, -2
2439#endif
2440	slwi	r0,   TEMP, 1 + BASE_SHIFT
2441	slwi	TEMP, TEMP, 1 + BASE_SHIFT
2442	add	AO, AO, r0
2443	add	BO, BO, TEMP
2444#endif
2445
2446#ifdef LEFT
2447	addi	KK, KK, 2
2448#endif
2449#endif
2450
2451	li	r0, FZERO
2452	lfpsx	f0, SP, r0
2453	.align 4
2454
2455.L80:
2456	andi.	I, M,  1
2457	beq	.L89
2458
2459#if defined(TRMMKERNEL)
2460#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
2461	addi	BO,  B,  - 2 * SIZE
2462	fpmr	f1,  f0
2463	fpmr	f2,  f0
2464	fpmr	f3,  f0
2465#else
2466	slwi	TEMP, KK, 0 + BASE_SHIFT
2467	slwi	r0,   KK, 1 + BASE_SHIFT
2468	add	AO, AO, TEMP
2469	add	BO, B,  r0
2470
2471	addi	BO,  BO,  - 2 * SIZE
2472	fpmr	f1,  f0
2473	fpmr	f2,  f0
2474	fpmr	f3,  f0
2475#endif
2476
2477#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
2478	sub	TEMP, K, KK
2479#elif defined(LEFT)
2480	addi	TEMP, KK, 1
2481#else
2482	addi	TEMP, KK, 2
2483#endif
2484	srawi.	r0,  TEMP,  3
2485	mtspr	CTR, r0
2486	ble	.L84
2487#else
2488	addi	BO,  B,  - 2 * SIZE
2489	fpmr	f1,  f0
2490	fpmr	f2,  f0
2491	fpmr	f3,  f0
2492
2493	srawi.	r0,  K,  3
2494	mtspr	CTR, r0
2495	ble	.L84
2496#endif
2497
2498	LFPDUX	B1,  BO,  INC2
2499	LFPDUX	A1,  AO,  INC2
2500	LFPDUX	A2,  AO,  INC2
2501
2502	LFPDUX	B2,  BO,  INC2
2503	LFPDUX	A3,  AO,  INC2
2504	LFPDUX	A4,  AO,  INC2
2505
2506	LFPDUX	B3,  BO,  INC2
2507	LFPDUX	B4,  BO,  INC2
2508	bdz-	.L83
2509	.align 4
2510
2511.L82:
2512	fxcpmadd	f0,  A1, B1, f0
2513	LFPDUX	B1,  BO,  INC2
2514	fxcsmadd	f1,  A1, B2, f1
2515	LFPDUX	B2,  BO,  INC2
2516	LFPDUX	A1,  AO,  INC2
2517	fxcpmadd	f2,  A2, B3, f2
2518	LFPDUX	B3,  BO,  INC2
2519	fxcsmadd	f3,  A2, B4, f3
2520	LFPDUX	B4,  BO,  INC2
2521	LFPDUX	A2,  AO,  INC2
2522
2523	fxcpmadd	f0,  A3, B1, f0
2524	LFPDUX	B1,  BO,  INC2
2525	fxcsmadd	f1,  A3, B2, f1
2526	LFPDUX	B2,  BO,  INC2
2527	LFPDUX	A3,  AO,  INC2
2528	fxcpmadd	f2,  A4, B3, f2
2529	LFPDUX	B3,  BO,  INC2
2530	fxcsmadd	f3,  A4, B4, f3
2531	LFPDUX	B4,  BO,  INC2
2532	LFPDUX	A4,  AO,  INC2
2533	bdnz+	.L82
2534	.align 4
2535
2536.L83:
2537	fxcpmadd	f0,  A1, B1, f0
2538	LFPDUX	B1,  BO,  INC2
2539	fxcsmadd	f1,  A1, B2, f1
2540	LFPDUX	B2,  BO,  INC2
2541	fxcpmadd	f2,  A2, B3, f2
2542	LFPDUX	B3,  BO,  INC2
2543	fxcsmadd	f3,  A2, B4, f3
2544	LFPDUX	B4,  BO,  INC2
2545
2546	fxcpmadd	f0,  A3, B1, f0
2547	fxcsmadd	f1,  A3, B2, f1
2548	fxcpmadd	f2,  A4, B3, f2
2549	fxcsmadd	f3,  A4, B4, f3
2550	.align 4
2551
2552.L84:
2553	lfd	AP,  ALPHA(SP)
2554#ifdef TRMMKERNEL
2555       fsmfp	AP, AP
2556#endif
2557
2558#if defined(TRMMKERNEL)
2559#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
2560	sub	TEMP, K, KK
2561#elif defined(LEFT)
2562	addi	TEMP, KK, 1
2563#else
2564	addi	TEMP, KK, 2
2565#endif
2566	andi.	TEMP,  TEMP,  7
2567	mtspr	CTR, TEMP
2568#else
2569	andi.	r0,  K,  7
2570	mtspr	CTR, r0
2571#endif
2572	ble+	.L88
2573
2574	LFDX	A1,  AO,  INC2
2575	LFPDUX	B1,  BO,  INC2
2576	add	AO, AO, INC
2577	bdz-	.L87
2578	.align 4
2579
2580.L86:
2581	fxcpmadd	f0,  A1, B1, f0
2582	LFDX	A1,  AO,  INC2
2583	LFPDUX	B1,  BO,  INC2
2584	add	AO, AO, INC
2585	bdnz+	.L86
2586	.align 4
2587
2588.L87:
2589	fxcpmadd	f0,  A1, B1, f0
2590	.align 4
2591
2592.L88:
2593#ifndef TRMMKERNEL
2594	LFDX	A1, CO1, INC2
2595	LFDX	A2, CO2, INC2
2596
2597	fpadd	f0, f0, f1
2598	fpadd	f2, f2, f3
2599	fsmfp	A1, A2
2600	fpadd	f0, f0, f2
2601	fxcpmadd	f0,  AP, f0,  A1
2602#else
2603	fpadd	f0, f0, f1
2604	fpadd	f2, f2, f3
2605	fsmfp	A1, A2
2606	fpadd	f0, f0, f2
2607	fpmul	f0,  AP, f0
2608#endif
2609
2610	STFDX	f0,  CO1, INC2
2611	STFSDX	f0,  CO2, INC2
2612
2613#ifdef TRMMKERNEL
2614#if ( defined(LEFT) &&  defined(TRANSA)) || \
2615    (!defined(LEFT) && !defined(TRANSA))
2616	sub	TEMP, K, KK
2617#ifdef LEFT
2618	addi	TEMP, TEMP, -1
2619#else
2620	addi	TEMP, TEMP, -2
2621#endif
2622	slwi	r0,   TEMP, 0 + BASE_SHIFT
2623	slwi	TEMP, TEMP, 1 + BASE_SHIFT
2624	add	AO, AO, r0
2625	add	BO, BO, TEMP
2626#endif
2627
2628#ifdef LEFT
2629	addi	KK, KK, 1
2630#endif
2631#endif
2632	.align 4
2633
2634.L89:
2635#if defined(TRMMKERNEL) && !defined(LEFT)
2636	addi	KK, KK, 2
2637#endif
2638
2639	addi	B,  BO, 2 * SIZE
2640	.align 4
2641
2642.L90:
2643	andi.	J, N,  1
2644	beq	.L999
2645
2646#if defined(TRMMKERNEL) &&  defined(LEFT)
2647	mr	KK, OFFSET
2648#endif
2649
2650	mr	CO1, C
2651	addi	AO, A, -2 * SIZE
2652
2653	li	r0, FZERO
2654	lfpsx	f0, SP, r0
2655
2656	srawi.	I, M,  3
2657	ble	.L100
2658	.align 4
2659
2660.L91:
2661#if defined(TRMMKERNEL)
2662#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
2663	addi	BO,  B,  - 2 * SIZE
2664 	fpmr	f1,  f0
2665#else
2666	slwi	TEMP, KK, 3 + BASE_SHIFT
2667	slwi	r0,   KK, 0 + BASE_SHIFT
2668	add	AO, AO, TEMP
2669	add	BO, B,  r0
2670
2671	addi	BO,  BO,  - 2 * SIZE
2672 	fpmr	f1,  f0
2673#endif
2674
2675#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
2676	sub	TEMP, K, KK
2677#elif defined(LEFT)
2678	addi	TEMP, KK, 8
2679#else
2680	addi	TEMP, KK, 1
2681#endif
2682	fpmr	f2,  f0
2683	srawi.	r0,  TEMP,  2
2684	fpmr	f3,  f0
2685	mtspr	CTR, r0
2686	ble	.L94
2687
2688#else
2689	srawi.	r0,  K,  2
2690 	fpmr	f1,  f0
2691	addi	BO,  B,  - 2 * SIZE
2692	fpmr	f2,  f0
2693	fpmr	f3,  f0
2694	mtspr	CTR, r0
2695	ble	.L94
2696#endif
2697
2698	LFPDUX	B1,  BO,  INC2
2699	LFPDUX	A1,  AO,  INC2
2700	LFPDUX	A2,  AO,  INC2
2701	LFPDUX	A3,  AO,  INC2
2702	LFPDUX	A4,  AO,  INC2
2703	LFPDUX	B2,  BO,  INC2
2704	LFPDUX	A5,  AO,  INC2
2705	LFPDUX	A6,  AO,  INC2
2706	LFPDUX	A7,  AO,  INC2
2707	LFPDUX	A8,  AO,  INC2
2708	bdz-	.L93
2709	.align 4
2710
2711.L92:
2712	fxcpmadd	f0,  B1, A1, f0
2713	LFPDUX	A1,  AO,  INC2
2714	fxcpmadd	f1,  B1, A2, f1
2715	LFPDUX	A2,  AO,  INC2
2716	fxcpmadd	f2,  B1, A3, f2
2717	LFPDUX	A3,  AO,  INC2
2718	fxcpmadd	f3,  B1, A4, f3
2719	LFPDUX	A4,  AO,  INC2
2720
2721	fxcsmadd	f0,  B1, A5, f0
2722	LFPDUX	A5,  AO,  INC2
2723	fxcsmadd	f1,  B1, A6, f1
2724	LFPDUX	A6,  AO,  INC2
2725	fxcsmadd	f2,  B1, A7, f2
2726	LFPDUX	A7,  AO,  INC2
2727	fxcsmadd	f3,  B1, A8, f3
2728	LFPDUX	A8,  AO,  INC2
2729	LFPDUX	B1,  BO,  INC2
2730
2731	fxcpmadd	f0,  B2, A1, f0
2732	LFPDUX	A1,  AO,  INC2
2733	fxcpmadd	f1,  B2, A2, f1
2734	LFPDUX	A2,  AO,  INC2
2735	fxcpmadd	f2,  B2, A3, f2
2736	LFPDUX	A3,  AO,  INC2
2737	fxcpmadd	f3,  B2, A4, f3
2738	LFPDUX	A4,  AO,  INC2
2739
2740	fxcsmadd	f0,  B2, A5, f0
2741	LFPDUX	A5,  AO,  INC2
2742	fxcsmadd	f1,  B2, A6, f1
2743	LFPDUX	A6,  AO,  INC2
2744	fxcsmadd	f2,  B2, A7, f2
2745	LFPDUX	A7,  AO,  INC2
2746	fxcsmadd	f3,  B2, A8, f3
2747	LFPDUX	A8,  AO,  INC2
2748	LFPDUX	B2,  BO,  INC2
2749	bdnz+	.L92
2750	.align 4
2751
2752.L93:
2753	fxcpmadd	f0,  B1, A1, f0
2754	LFPDUX	A1,  AO,  INC2
2755	fxcpmadd	f1,  B1, A2, f1
2756	LFPDUX	A2,  AO,  INC2
2757	fxcpmadd	f2,  B1, A3, f2
2758	LFPDUX	A3,  AO,  INC2
2759	fxcpmadd	f3,  B1, A4, f3
2760	LFPDUX	A4,  AO,  INC2
2761
2762	fxcsmadd	f0,  B1, A5, f0
2763	LFPDUX	A5,  AO,  INC2
2764	fxcsmadd	f1,  B1, A6, f1
2765	LFPDUX	A6,  AO,  INC2
2766	fxcsmadd	f2,  B1, A7, f2
2767	LFPDUX	A7,  AO,  INC2
2768	fxcsmadd	f3,  B1, A8, f3
2769	LFPDUX	A8,  AO,  INC2
2770
2771	fxcpmadd	f0,  B2, A1, f0
2772	fxcpmadd	f1,  B2, A2, f1
2773	fxcpmadd	f2,  B2, A3, f2
2774	fxcpmadd	f3,  B2, A4, f3
2775
2776	fxcsmadd	f0,  B2, A5, f0
2777	fxcsmadd	f1,  B2, A6, f1
2778	fxcsmadd	f2,  B2, A7, f2
2779	fxcsmadd	f3,  B2, A8, f3
2780	.align 4
2781
2782.L94:
2783	lfd	AP,  ALPHA(SP)
2784#ifdef TRMMKERNEL
2785       fsmfp	AP, AP
2786#endif
2787
2788#if defined(TRMMKERNEL)
2789#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
2790	sub	TEMP, K, KK
2791#elif defined(LEFT)
2792	addi	TEMP, KK, 8
2793#else
2794	addi	TEMP, KK, 1
2795#endif
2796	andi.	TEMP,  TEMP,  3
2797	mtspr	CTR, TEMP
2798#else
2799	andi.	r0,  K,  3
2800	mtspr	CTR, r0
2801#endif
2802	ble+	.L98
2803
2804	LFDX	B1,  BO,  INC2
2805	LFPDUX	A1,  AO,  INC2
2806	LFPDUX	A2,  AO,  INC2
2807	LFPDUX	A3,  AO,  INC2
2808	LFPDUX	A4,  AO,  INC2
2809	add	BO, BO, INC
2810	bdz-	.L97
2811	.align 4
2812
2813.L96:
2814	fxcpmadd	f0,  B1, A1, f0
2815	LFPDUX	A1,  AO,  INC2
2816	fxcpmadd	f1,  B1, A2, f1
2817	LFPDUX	A2,  AO,  INC2
2818	fxcpmadd	f2,  B1, A3, f2
2819	LFPDUX	A3,  AO,  INC2
2820	fxcpmadd	f3,  B1, A4, f3
2821	LFDX	B1,  BO,  INC2
2822	LFPDUX	A4,  AO,  INC2
2823	add	BO, BO, INC
2824	bdnz+	.L96
2825	.align 4
2826
2827.L97:
2828	fxcpmadd	f0,  B1, A1, f0
2829	fxcpmadd	f1,  B1, A2, f1
2830	fxcpmadd	f2,  B1, A3, f2
2831	fxcpmadd	f3,  B1, A4, f3
2832	.align 4
2833
2834.L98:
2835#ifndef TRMMKERNEL
2836	LFPDUX	A1, CO1, INC2
2837	LFPDUX	B1, CO1, INC2
2838	LFPDUX	A3, CO1, INC2
2839   	LFPDUX	A5, CO1, INC2
2840
2841	fxcpmadd	f0,  AP, f0,  A1
2842	fxcpmadd	f1,  AP, f1,  B1
2843	fxcpmadd	f2,  AP, f2,  A3
2844	fxcpmadd	f3,  AP, f3,  A5
2845
2846	STFPDUX	f0,  CO1, INCM7
2847	STFPDUX	f1,  CO1, INC2
2848	STFPDUX	f2,  CO1, INC2
2849	STFPDUX	f3,  CO1, INC2
2850#else
2851	fpmul	f0,  AP, f0
2852	fpmul	f1,  AP, f1
2853	fpmul	f2,  AP, f2
2854	fpmul	f3,  AP, f3
2855
2856	STFPDUX	f0,  CO1, INC2
2857	STFPDUX	f1,  CO1, INC2
2858	STFPDUX	f2,  CO1, INC2
2859	STFPDUX	f3,  CO1, INC2
2860#endif
2861
2862#ifdef TRMMKERNEL
2863#if ( defined(LEFT) &&  defined(TRANSA)) || \
2864    (!defined(LEFT) && !defined(TRANSA))
2865	sub	TEMP, K, KK
2866#ifdef LEFT
2867	addi	TEMP, TEMP, -8
2868#else
2869	addi	TEMP, TEMP, -1
2870#endif
2871	slwi	r0,   TEMP, 3 + BASE_SHIFT
2872	slwi	TEMP, TEMP, 0 + BASE_SHIFT
2873	add	AO, AO, r0
2874	add	BO, BO, TEMP
2875#endif
2876
2877#ifdef LEFT
2878	addi	KK, KK, 8
2879#endif
2880#endif
2881
2882	addic.	I, I, -1
2883	li	r0, FZERO
2884
2885	lfpsx	f0, SP, r0
2886	bgt+	.L91
2887	.align 4
2888
2889.L100:
2890	andi.	I, M,  4
2891	beq	.L110
2892
2893#if defined(TRMMKERNEL)
2894#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
2895	addi	BO,  B,  - 2 * SIZE
2896	fpmr	f1,  f0
2897	fpmr	f2,  f0
2898	fpmr	f3, f0
2899#else
2900	slwi	TEMP, KK, 2 + BASE_SHIFT
2901	slwi	r0,   KK, 0 + BASE_SHIFT
2902	add	AO, AO, TEMP
2903	add	BO, B,  r0
2904
2905	fpmr	f1,  f0
2906	addi	BO,  BO,  - 2 * SIZE
2907	fpmr	f2,  f0
2908	fpmr	f3, f0
2909#endif
2910
2911#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
2912	sub	TEMP, K, KK
2913#elif defined(LEFT)
2914	addi	TEMP, KK, 4
2915#else
2916	addi	TEMP, KK, 1
2917#endif
2918	srawi.	r0,  TEMP,  3
2919	mtspr	CTR, r0
2920	ble	.L104
2921#else
2922	addi	BO,  B,  - 2 * SIZE
2923	fpmr	f1,  f0
2924	fpmr	f2,  f0
2925	fpmr	f3, f0
2926
2927	srawi.	r0,  K,  3
2928	mtspr	CTR, r0
2929	ble	.L104
2930#endif
2931
2932	LFPDUX	B1,  BO,  INC2
2933	LFPDUX	A1,  AO,  INC2
2934	LFPDUX	A2,  AO,  INC2
2935	LFPDUX	A3,  AO,  INC2
2936	LFPDUX	A4,  AO,  INC2
2937	LFPDUX	B2,  BO,  INC2
2938	LFPDUX	A5,  AO,  INC2
2939	LFPDUX	A6,  AO,  INC2
2940	LFPDUX	A7,  AO,  INC2
2941	LFPDUX	A8,  AO,  INC2
2942	LFPDUX	B3,  BO,  INC2
2943	LFPDUX	B4,  BO,  INC2
2944
2945	bdz-	.L103
2946	.align 4
2947
2948.L102:
2949	fxcpmadd	f0,  B1, A1, f0
2950	LFPDUX	A1,  AO,  INC2
2951	fxcpmadd	f1,  B1, A2, f1
2952	LFPDUX	A2,  AO,  INC2
2953	fxcsmadd	f2,  B1, A3, f2
2954	LFPDUX	A3,  AO,  INC2
2955	fxcsmadd	f3,  B1, A4, f3
2956	LFPDUX	A4,  AO,  INC2
2957	LFPDUX	B1,  BO,  INC2
2958
2959	fxcpmadd	f0,  B2, A5, f0
2960	LFPDUX	A5,  AO,  INC2
2961	fxcpmadd	f1,  B2, A6, f1
2962	LFPDUX	A6,  AO,  INC2
2963	fxcsmadd	f2,  B2, A7, f2
2964	LFPDUX	A7,  AO,  INC2
2965	fxcsmadd	f3,  B2, A8, f3
2966	LFPDUX	A8,  AO,  INC2
2967	LFPDUX	B2,  BO,  INC2
2968
2969	fxcpmadd	f0,  B3, A1, f0
2970	LFPDUX	A1,  AO,  INC2
2971	fxcpmadd	f1,  B3, A2, f1
2972	LFPDUX	A2,  AO,  INC2
2973	fxcsmadd	f2,  B3, A3, f2
2974	LFPDUX	A3,  AO,  INC2
2975	fxcsmadd	f3,  B3, A4, f3
2976	LFPDUX	A4,  AO,  INC2
2977	LFPDUX	B3,  BO,  INC2
2978
2979	fxcpmadd	f0,  B4, A5, f0
2980	LFPDUX	A5,  AO,  INC2
2981	fxcpmadd	f1,  B4, A6, f1
2982	LFPDUX	A6,  AO,  INC2
2983	fxcsmadd	f2,  B4, A7, f2
2984	LFPDUX	A7,  AO,  INC2
2985	fxcsmadd	f3,  B4, A8, f3
2986	LFPDUX	A8,  AO,  INC2
2987	LFPDUX	B4,  BO,  INC2
2988	bdnz+	.L102
2989	.align 4
2990
2991.L103:
2992	fxcpmadd	f0,  B1, A1, f0
2993	LFPDUX	A1,  AO,  INC2
2994	fxcpmadd	f1,  B1, A2, f1
2995	LFPDUX	A2,  AO,  INC2
2996	fxcsmadd	f2,  B1, A3, f2
2997	LFPDUX	A3,  AO,  INC2
2998	fxcsmadd	f3,  B1, A4, f3
2999	LFPDUX	A4,  AO,  INC2
3000
3001	fxcpmadd	f0,  B2, A5, f0
3002	LFPDUX	A5,  AO,  INC2
3003	fxcpmadd	f1,  B2, A6, f1
3004	LFPDUX	A6,  AO,  INC2
3005	fxcsmadd	f2,  B2, A7, f2
3006	LFPDUX	A7,  AO,  INC2
3007	fxcsmadd	f3,  B2, A8, f3
3008	LFPDUX	A8,  AO,  INC2
3009
3010	fxcpmadd	f0,  B3, A1, f0
3011	fxcpmadd	f1,  B3, A2, f1
3012	fxcsmadd	f2,  B3, A3, f2
3013	fxcsmadd	f3,  B3, A4, f3
3014
3015	fxcpmadd	f0,  B4, A5, f0
3016	fxcpmadd	f1,  B4, A6, f1
3017	fxcsmadd	f2,  B4, A7, f2
3018	fxcsmadd	f3,  B4, A8, f3
3019	.align 4
3020
3021.L104:
3022	lfd	AP,  ALPHA(SP)
3023#ifdef TRMMKERNEL
3024       fsmfp	AP, AP
3025#endif
3026
3027#if defined(TRMMKERNEL)
3028#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
3029	sub	TEMP, K, KK
3030#elif defined(LEFT)
3031	addi	TEMP, KK, 4
3032#else
3033	addi	TEMP, KK, 1
3034#endif
3035	andi.	TEMP,  TEMP,  7
3036	mtspr	CTR, TEMP
3037#else
3038	andi.	r0,  K,  7
3039	mtspr	CTR, r0
3040#endif
3041	ble+	.L108
3042
3043	LFPDUX	A1,  AO,  INC2
3044	LFDX	B1,  BO,  INC2
3045	LFPDUX	A2,  AO,  INC2
3046	add	BO, BO, INC
3047	bdz-	.L107
3048	.align 4
3049
3050.L106:
3051	fxcpmadd	f0,  B1, A1, f0
3052	LFPDUX	A1,  AO,  INC2
3053	fxcpmadd	f1,  B1, A2, f1
3054	LFDX	B1,  BO,  INC2
3055	LFPDUX	A2,  AO,  INC2
3056	add	BO, BO, INC
3057	bdnz+	.L106
3058	.align 4
3059
3060.L107:
3061	fxcpmadd	f0,  B1, A1, f0
3062	fxcpmadd	f1,  B1, A2, f1
3063	.align 4
3064
3065.L108:
3066#ifndef TRMMKERNEL
3067	LFPDUX	A1, CO1, INC2
3068	LFPDUX	B1, CO1, INC2
3069
3070	fpadd	f0, f0, f2
3071	fpadd	f1, f1, f3
3072
3073	fxcpmadd	f0,  AP, f0,  A1
3074	fxcpmadd	f1,  AP, f1,  B1
3075
3076	STFPDUX	f0,  CO1, INCM3
3077	STFPDUX	f1,  CO1, INC2
3078#else
3079	fpadd	f0, f0, f2
3080	fpadd	f1, f1, f3
3081
3082	fpmul	f0,  AP, f0
3083	fpmul	f1,  AP, f1
3084
3085	STFPDUX	f0,  CO1, INC2
3086	STFPDUX	f1,  CO1, INC2
3087#endif
3088
3089
3090#ifdef TRMMKERNEL
3091#if ( defined(LEFT) &&  defined(TRANSA)) || \
3092    (!defined(LEFT) && !defined(TRANSA))
3093	sub	TEMP, K, KK
3094#ifdef LEFT
3095	addi	TEMP, TEMP, -4
3096#else
3097	addi	TEMP, TEMP, -1
3098#endif
3099	slwi	r0,   TEMP, 2 + BASE_SHIFT
3100	slwi	TEMP, TEMP, 0 + BASE_SHIFT
3101	add	AO, AO, r0
3102	add	BO, BO, TEMP
3103#endif
3104
3105#ifdef LEFT
3106	addi	KK, KK, 4
3107#endif
3108#endif
3109
3110	li	r0, FZERO
3111	lfpsx	f0, SP, r0
3112	.align 4
3113
3114.L110:
3115	andi.	I, M,  2
3116	beq	.L120
3117
3118#if defined(TRMMKERNEL)
3119#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
3120	addi	BO,  B,  - 2 * SIZE
3121	fpmr	f1,  f0
3122	fpmr	f2,  f0
3123	fpmr	f3,  f0
3124#else
3125	slwi	TEMP, KK, 1 + BASE_SHIFT
3126	slwi	r0,   KK, 0 + BASE_SHIFT
3127	add	AO, AO, TEMP
3128	add	BO, B,  r0
3129
3130	fpmr	f1,  f0
3131	addi	BO,  BO,  - 2 * SIZE
3132	fpmr	f2,  f0
3133	fpmr	f3,  f0
3134#endif
3135
3136#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
3137	sub	TEMP, K, KK
3138#elif defined(LEFT)
3139	addi	TEMP, KK, 2
3140#else
3141	addi	TEMP, KK, 1
3142#endif
3143	srawi.	r0,  TEMP,  3
3144	mtspr	CTR, r0
3145	ble	.L114
3146#else
3147	addi	BO,  B,  - 2 * SIZE
3148	fpmr	f1,  f0
3149	fpmr	f2,  f0
3150	fpmr	f3,  f0
3151
3152	srawi.	r0,  K,  3
3153	mtspr	CTR, r0
3154	ble	.L114
3155#endif
3156
3157	LFPDUX	A1,  AO,  INC2
3158	LFPDUX	A2,  AO,  INC2
3159	LFPDUX	B1,  BO,  INC2
3160
3161	LFPDUX	A3,  AO,  INC2
3162	LFPDUX	A4,  AO,  INC2
3163	LFPDUX	B2,  BO,  INC2
3164
3165	LFPDUX	A5,  AO,  INC2
3166	LFPDUX	A6,  AO,  INC2
3167	LFPDUX	B3,  BO,  INC2
3168
3169	LFPDUX	A7,  AO,  INC2
3170	LFPDUX	A8,  AO,  INC2
3171	LFPDUX	B4,  BO,  INC2
3172	bdz-	.L113
3173	.align 4
3174
3175.L112:
3176	fxcpmadd	f0,  B1, A1, f0
3177	LFPDUX	A1,  AO,  INC2
3178	fxcsmadd	f1,  B1, A2, f1
3179	LFPDUX	A2,  AO,  INC2
3180	LFPDUX	B1,  BO,  INC2
3181	fxcpmadd	f2,  B2, A3, f2
3182	LFPDUX	A3,  AO,  INC2
3183	fxcsmadd	f3,  B2, A4, f3
3184	LFPDUX	A4,  AO,  INC2
3185	LFPDUX	B2,  BO,  INC2
3186	fxcpmadd	f0,  B3, A5, f0
3187	LFPDUX	A5,  AO,  INC2
3188	fxcsmadd	f1,  B3, A6, f1
3189	LFPDUX	A6,  AO,  INC2
3190	LFPDUX	B3,  BO,  INC2
3191	fxcpmadd	f2,  B4, A7, f2
3192	LFPDUX	A7,  AO,  INC2
3193	fxcsmadd	f3,  B4, A8, f3
3194	LFPDUX	A8,  AO,  INC2
3195	LFPDUX	B4,  BO,  INC2
3196	bdnz+	.L112
3197	.align 4
3198
3199.L113:
3200	fxcpmadd	f0,  B1, A1, f0
3201	fxcsmadd	f1,  B1, A2, f1
3202	fxcpmadd	f2,  B2, A3, f2
3203	fxcsmadd	f3,  B2, A4, f3
3204	fxcpmadd	f0,  B3, A5, f0
3205	fxcsmadd	f1,  B3, A6, f1
3206	fxcpmadd	f2,  B4, A7, f2
3207	fxcsmadd	f3,  B4, A8, f3
3208	.align 4
3209
3210.L114:
3211	lfd	AP,  ALPHA(SP)
3212#ifdef TRMMKERNEL
3213       fsmfp	AP, AP
3214#endif
3215
3216#if defined(TRMMKERNEL)
3217#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
3218	sub	TEMP, K, KK
3219#elif defined(LEFT)
3220	addi	TEMP, KK, 2
3221#else
3222	addi	TEMP, KK, 1
3223#endif
3224	andi.	TEMP,  TEMP,  7
3225	mtspr	CTR, TEMP
3226#else
3227	andi.	r0,  K,  7
3228	mtspr	CTR, r0
3229#endif
3230	ble+	.L118
3231
3232	LFPDUX	A1,  AO,  INC2
3233	LFDX	B1,  BO,  INC2
3234	add	BO, BO, INC
3235	bdz-	.L117
3236	.align 4
3237
3238.L116:
3239	fxcpmadd	f0,  B1, A1, f0
3240	LFPDUX	A1,  AO,  INC2
3241	LFDX	B1,  BO,  INC2
3242	add	BO, BO, INC
3243	bdnz+	.L116
3244	.align 4
3245
3246.L117:
3247	fxcpmadd	f0,  B1, A1, f0
3248	.align 4
3249
3250.L118:
3251#ifndef TRMMKERNEL
3252	LFPDX	A1, CO1, INC2
3253
3254	fpadd	f0, f0, f1
3255	fpadd	f2, f3, f2
3256	fpadd	f0, f0, f2
3257	fxcpmadd	f1,  AP, f0,  A1
3258
3259	li	r0, FZERO
3260	lfpsx	f0, SP, r0
3261
3262	STFPDUX	f1,  CO1, INC2
3263#else
3264	fpadd	f0, f0, f1
3265	fpadd	f2, f3, f2
3266	fpadd	f0, f0, f2
3267	fpmul	f1,  AP, f0
3268
3269	li	r0, FZERO
3270	lfpsx	f0, SP, r0
3271
3272	STFPDUX	f1,  CO1, INC2
3273#endif
3274
3275
3276#ifdef TRMMKERNEL
3277#if ( defined(LEFT) &&  defined(TRANSA)) || \
3278    (!defined(LEFT) && !defined(TRANSA))
3279	sub	TEMP, K, KK
3280#ifdef LEFT
3281	addi	TEMP, TEMP, -2
3282#else
3283	addi	TEMP, TEMP, -1
3284#endif
3285	slwi	r0,   TEMP, 1 + BASE_SHIFT
3286	slwi	TEMP, TEMP, 0 + BASE_SHIFT
3287	add	AO, AO, r0
3288	add	BO, BO, TEMP
3289#endif
3290
3291#ifdef LEFT
3292	addi	KK, KK, 2
3293#endif
3294#endif
3295	.align 4
3296
3297.L120:
3298	andi.	I, M,  1
3299	beq	.L999
3300
3301#if defined(TRMMKERNEL)
3302#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
3303	addi	BO,  B,  - 2 * SIZE
3304	fpmr	f1,  f0
3305	fpmr	f2,  f0
3306	fpmr	f3,  f0
3307#else
3308	slwi	TEMP, KK, 0 + BASE_SHIFT
3309	slwi	r0,   KK, 0 + BASE_SHIFT
3310	add	AO, AO, TEMP
3311	add	BO, B,  r0
3312
3313	fpmr	f1,  f0
3314	addi	BO,  BO,  - 2 * SIZE
3315	fpmr	f2,  f0
3316	fpmr	f3,  f0
3317#endif
3318
3319#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
3320	sub	TEMP, K, KK
3321#elif defined(LEFT)
3322	addi	TEMP, KK, 1
3323#else
3324	addi	TEMP, KK, 1
3325#endif
3326	srawi.	r0,  TEMP,  3
3327	mtspr	CTR, r0
3328	ble	.L124
3329#else
3330	addi	BO,  B,  - 2 * SIZE
3331	fpmr	f1,  f0
3332	fpmr	f2,  f0
3333	fpmr	f3,  f0
3334
3335	srawi.	r0,  K,  3
3336	mtspr	CTR, r0
3337	ble	.L124
3338#endif
3339
3340	LFPDUX	A1,  AO,  INC2
3341	LFPDUX	B1,  BO,  INC2
3342	LFPDUX	A2,  AO,  INC2
3343	LFPDUX	B2,  BO,  INC2
3344	LFPDUX	A3,  AO,  INC2
3345	LFPDUX	B3,  BO,  INC2
3346	LFPDUX	A4,  AO,  INC2
3347	LFPDUX	B4,  BO,  INC2
3348	bdz-	.L123
3349	.align 4
3350
3351.L122:
3352	fpmadd	f0,  A1, B1, f0
3353	LFPDUX	A1,  AO,  INC2
3354	LFPDUX	B1,  BO,  INC2
3355	fpmadd	f1,  A2, B2, f1
3356	LFPDUX	A2,  AO,  INC2
3357	LFPDUX	B2,  BO,  INC2
3358	fpmadd	f2,  A3, B3, f2
3359	LFPDUX	A3,  AO,  INC2
3360	LFPDUX	B3,  BO,  INC2
3361	fpmadd	f3,  A4, B4, f3
3362	LFPDUX	A4,  AO,  INC2
3363	LFPDUX	B4,  BO,  INC2
3364	bdnz+	.L122
3365	.align 4
3366
3367.L123:
3368	fpmadd	f0,  A1, B1, f0
3369	fpmadd	f1,  A2, B2, f1
3370	fpmadd	f2,  A3, B3, f2
3371	fpmadd	f3,  A4, B4, f3
3372	.align 4
3373
3374.L124:
3375	lfd	AP,  ALPHA(SP)
3376#ifdef TRMMKERNEL
3377       fsmfp	AP, AP
3378#endif
3379
3380#if defined(TRMMKERNEL)
3381#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
3382	sub	TEMP, K, KK
3383#elif defined(LEFT)
3384	addi	TEMP, KK, 1
3385#else
3386	addi	TEMP, KK, 1
3387#endif
3388	andi.	TEMP,  TEMP,  7
3389	mtspr	CTR, TEMP
3390#else
3391	andi.	r0,  K,  7
3392	mtspr	CTR, r0
3393#endif
3394	ble+	.L128
3395
3396	LFDX	A1,  AO,  INC2
3397	LFDX	B1,  BO,  INC2
3398	add	AO, AO, INC
3399	add	BO, BO, INC
3400	bdz-	.L127
3401	.align 4
3402
3403.L126:
3404	fmadd	f0,  A1, B1, f0
3405	LFDX	A1,  AO,  INC2
3406	LFDX	B1,  BO,  INC2
3407	add	AO, AO, INC
3408	add	BO, BO, INC
3409	bdnz+	.L126
3410	.align 4
3411
3412.L127:
3413	fmadd	f0,  A1, B1, f0
3414	.align 4
3415
3416.L128:
3417#ifndef TRMMKERNEL
3418	LFDX	A1, CO1, INC2
3419	fpadd	f0, f0, f1
3420	fpadd	f2, f2, f3
3421	fpadd	f0, f0, f2
3422	fsmtp	f1, f0
3423	fadd	f0, f0, f1
3424	fmadd	f0,  AP, f0,  A1
3425#else
3426	fpadd	f0, f0, f1
3427	fpadd	f2, f2, f3
3428	fpadd	f0, f0, f2
3429	fsmtp	f1, f0
3430	fadd	f0, f0, f1
3431	fpmul	f0,  AP, f0
3432#endif
3433	STFDUX	f0,  CO1, INC2
3434	.align 4
3435
3436.L999:
3437	addi	SP, SP, 12
3438
3439	lwzu	r14,   4(SP)
3440	lwzu	r15,   4(SP)
3441
3442	lwzu	r16,   4(SP)
3443	lwzu	r17,   4(SP)
3444	lwzu	r18,   4(SP)
3445	lwzu	r19,   4(SP)
3446
3447	lwzu	r20,   4(SP)
3448	lwzu	r21,   4(SP)
3449	lwzu	r22,   4(SP)
3450	lwzu	r23,   4(SP)
3451
3452	lwzu	r24,   4(SP)
3453	lwzu	r25,   4(SP)
3454	lwzu	r26,   4(SP)
3455	lwzu	r27,   4(SP)
3456
3457	lwzu	r28,   4(SP)
3458	lwzu	r29,   4(SP)
3459	lwzu	r30,   4(SP)
3460	lwzu	r31,   4(SP)
3461
3462	subi	SP, SP, 12
3463	li	r0, 16
3464
3465	lfpdux	f31, SP, r0
3466	lfpdux	f30, SP, r0
3467	lfpdux	f29, SP, r0
3468	lfpdux	f28, SP, r0
3469	lfpdux	f27, SP, r0
3470	lfpdux	f26, SP, r0
3471	lfpdux	f25, SP, r0
3472	lfpdux	f24, SP, r0
3473	lfpdux	f23, SP, r0
3474	lfpdux	f22, SP, r0
3475	lfpdux	f21, SP, r0
3476	lfpdux	f20, SP, r0
3477	lfpdux	f19, SP, r0
3478	lfpdux	f18, SP, r0
3479	lfpdux	f17, SP, r0
3480	lfpdux	f16, SP, r0
3481	lfpdux	f15, SP, r0
3482	lfpdux	f14, SP, r0
3483	addi	SP, SP, 16
3484	blr
3485	.align 4
3486
3487.L1000:
3488	li	INCM1, -1 * SIZE
3489	li	INCM3, -3 * SIZE
3490	li	INCM5, -5 * SIZE
3491	li	INCM7, -7 * SIZE
3492
3493	addi	C, C, - 1 * SIZE
3494	srawi.	J, N,  2
3495	ble	.L1050
3496	.align 4
3497
3498.L1010:
3499	mr	CO1, C
3500	add	CO2, C,   LDC
3501	add	CO3, CO2, LDC
3502	add	CO4, CO3, LDC
3503	add	C,   CO4, LDC
3504
3505#if defined(TRMMKERNEL) &&  defined(LEFT)
3506	mr	KK, OFFSET
3507#endif
3508
3509	addi	AO, A, -4 * SIZE
3510
3511	li	r0, FZERO
3512	lfpsx	f0, SP, r0
3513
3514	srawi.	I, M,  3
3515	ble	.L1020
3516	.align 4
3517
3518.L1011:
3519#if defined(TRMMKERNEL)
3520#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
3521	addi	AO2, AO,   2 * SIZE
3522	fpmr	f4,  f0
3523	addi	BO,  B,  - 4 * SIZE
3524	fpmr	f8,  f0
3525	addi	BO2, B,  - 2 * SIZE
3526	fpmr	f12, f0
3527#else
3528	slwi	TEMP, KK, 3 + BASE_SHIFT
3529	slwi	r0,   KK, 2 + BASE_SHIFT
3530	add	AO, AO, TEMP
3531	add	BO, B,  r0
3532
3533	addi	AO2, AO,   2 * SIZE
3534	fpmr	f4,  f0
3535	addi	BO,  BO, - 4 * SIZE
3536	fpmr	f8,  f0
3537	addi	BO2, BO,   2 * SIZE
3538	fpmr	f12, f0
3539#endif
3540
3541
3542#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
3543	sub	TEMP, K, KK
3544#elif defined(LEFT)
3545	addi	TEMP, KK, 8
3546#else
3547	addi	TEMP, KK, 4
3548#endif
3549	srawi.	TEMP,  TEMP,  2
3550 	fpmr	f1,  f0
3551	mtspr	CTR, TEMP
3552	ble	.L1014
3553
3554#else
3555	addi	AO2, AO,   2 * SIZE
3556	fpmr	f4,  f0
3557	addi	BO,  B,  - 4 * SIZE
3558	fpmr	f8,  f0
3559	addi	BO2, B,  - 2 * SIZE
3560	fpmr	f12, f0
3561
3562	srawi.	r0,  K,  2
3563 	fpmr	f1,  f0
3564	mtspr	CTR, r0
3565	ble	.L1014
3566#endif
3567
3568	LFPDUX	A1,  AO, INC4
3569	fpmr	f5,  f0
3570	LFPDUX	A3,  AO, INC4
3571	fpmr	f9,  f0
3572	LFPDUX	B1,  BO, INC4
3573	fpmr	f13, f0
3574
3575	LFPDUX	A5,  AO, INC4
3576	fpmr	f2,  f0
3577	LFPDUX	A6,  AO, INC4
3578	fpmr	f6,  f0
3579	LFPDUX	B3,  BO, INC4
3580	fpmr	f10, f0
3581	LFPDUX	A7,  AO, INC4
3582	fpmr	f14, f0
3583
3584	LFPDUX	A8,  AO, INC4
3585	fpmr	f3,  f0
3586	LFPDUX	B5,  BO, INC4
3587	fpmr	f7,  f0
3588	LFPDUX	A9,  AO, INC4
3589	fpmr	f11, f0
3590	LFPDUX	A2, AO2, INC4
3591	fpmr	f15, f0
3592	LFPDUX	B2, BO2, INC4
3593	bdz-	.L1013
3594	.align 4
3595
3596.L1012:
3597
3598## 1 ##
3599	fxcpmadd	f0,  B1, A1, f0
3600	nop
3601	fxcsmadd	f4,  B1, A1, f4
3602	nop
3603	fxcpmadd	f8,  B2, A1, f8
3604	LFPDUX	B4, BO2, INC4
3605	fxcsmadd	f12, B2, A1, f12
3606	LFPDUX	B6,  BO, INC4
3607
3608	fxcpmadd	f1,  B1, A2, f1
3609	nop
3610	fxcsmadd	f5,  B1, A2, f5
3611	LFPDUX	A4, AO2, INC4
3612	fxcpmadd	f9,  B2, A2, f9
3613	LFPDUX	A10, AO, INC4
3614	fxcsmadd	f13, B2, A2, f13
3615	nop
3616
3617	fxcpmadd	f2,  B1, A3, f2
3618	nop
3619	fxcsmadd	f6,  B1, A3, f6
3620	nop
3621	fxcpmadd	f10, B2, A3, f10
3622	nop
3623	fxcsmadd	f14, B2, A3, f14
3624	nop
3625
3626	fxcpmadd	f3,  B1, A4, f3
3627	nop
3628	fxcsmadd	f7,  B1, A4, f7
3629	LFPDUX	A2, AO2, INC4
3630	fxcpmadd	f11, B2, A4, f11
3631	LFPDUX	A1,  AO, INC4
3632	fxcsmadd	f15, B2, A4, f15
3633	nop
3634
3635## 2 ##
3636
3637	fxcpmadd	f0,  B3, A5, f0
3638	nop
3639	fxcsmadd	f4,  B3, A5, f4
3640	nop
3641	fxcpmadd	f8,  B4, A5, f8
3642	LFPDUX	B2, BO2, INC4
3643	fxcsmadd	f12, B4, A5, f12
3644	LFPDUX	B1,  BO, INC4
3645
3646	fxcpmadd	f1,  B3, A2, f1
3647	nop
3648	fxcsmadd	f5,  B3, A2, f5
3649	LFPDUX	A4, AO2, INC4
3650	fxcpmadd	f9,  B4, A2, f9
3651	LFPDUX	A3,  AO, INC4
3652	fxcsmadd	f13, B4, A2, f13
3653	nop
3654
3655	fxcpmadd	f2,  B3, A6, f2
3656	nop
3657	fxcsmadd	f6,  B3, A6, f6
3658	nop
3659	fxcpmadd	f10, B4, A6, f10
3660	nop
3661	fxcsmadd	f14, B4, A6, f14
3662	nop
3663
3664	fxcpmadd	f3,  B3, A4, f3
3665	nop
3666	fxcsmadd	f7,  B3, A4, f7
3667	LFPDUX	A2, AO2, INC4
3668	fxcpmadd	f11, B4, A4, f11
3669	LFPDUX	A5,  AO, INC4
3670	fxcsmadd	f15, B4, A4, f15
3671	nop
3672
3673## 3 ##
3674
3675	fxcpmadd	f0,  B5, A7, f0
3676	nop
3677	fxcsmadd	f4,  B5, A7, f4
3678	nop
3679	fxcpmadd	f8,  B2, A7, f8
3680	LFPDUX	B4, BO2, INC4
3681	fxcsmadd	f12, B2, A7, f12
3682	LFPDUX	B3,  BO, INC4
3683
3684	fxcpmadd	f1,  B5, A2, f1
3685	nop
3686	fxcsmadd	f5,  B5, A2, f5
3687	LFPDUX	A4, AO2, INC4
3688	fxcpmadd	f9,  B2, A2, f9
3689	LFPDUX	A6,  AO, INC4
3690	fxcsmadd	f13, B2, A2, f13
3691	nop
3692
3693	fxcpmadd	f2,  B5, A8, f2
3694	nop
3695	fxcsmadd	f6,  B5, A8, f6
3696	nop
3697	fxcpmadd	f10, B2, A8, f10
3698	nop
3699	fxcsmadd	f14, B2, A8, f14
3700	nop
3701
3702	fxcpmadd	f3,  B5, A4, f3
3703	nop
3704	fxcsmadd	f7,  B5, A4, f7
3705	LFPDUX	A2, AO2, INC4
3706	fxcpmadd	f11, B2, A4, f11
3707	LFPDUX	A7,  AO, INC4
3708	fxcsmadd	f15, B2, A4, f15
3709	nop
3710
3711## 4 ##
3712	fxcpmadd	f0,  B6, A9, f0
3713	nop
3714	fxcsmadd	f4,  B6, A9, f4
3715	nop
3716	fxcpmadd	f8,  B4, A9, f8
3717	LFPDUX	B2, BO2, INC4
3718	fxcsmadd	f12, B4, A9, f12
3719	LFPDUX	B5,  BO, INC4
3720
3721	fxcpmadd	f1,  B6, A2, f1
3722	nop
3723	fxcsmadd	f5,  B6, A2, f5
3724	LFPDUX	A4, AO2, INC4
3725	fxcpmadd	f9,  B4, A2, f9
3726	LFPDUX	A8,  AO, INC4
3727	fxcsmadd	f13, B4, A2, f13
3728	nop
3729
3730	fxcpmadd	f2,  B6, A10, f2
3731	nop
3732	fxcsmadd	f6,  B6, A10, f6
3733	nop
3734	fxcpmadd	f10, B4, A10, f10
3735	nop
3736	fxcsmadd	f14, B4, A10, f14
3737	nop
3738
3739	fxcpmadd	f3,  B6, A4, f3
3740	LFPDUX	A2, AO2, INC4
3741	fxcsmadd	f7,  B6, A4, f7
3742	LFPDUX	A9,  AO, INC4
3743	fxcpmadd	f11, B4, A4, f11
3744	nop
3745	fxcsmadd	f15, B4, A4, f15
3746	bdnz+	.L1012
3747	.align 4
3748
3749.L1013:
3750## 1 ##
3751
3752	fxcpmadd	f0,  B1, A1, f0
3753	nop
3754	fxcsmadd	f4,  B1, A1, f4
3755	nop
3756	fxcpmadd	f8,  B2, A1, f8
3757	LFPDUX	B4, BO2, INC4
3758	fxcsmadd	f12, B2, A1, f12
3759	LFPDUX	B6,  BO, INC4
3760
3761	fxcpmadd	f1,  B1, A2, f1
3762	nop
3763	fxcsmadd	f5,  B1, A2, f5
3764	LFPDUX	A4, AO2, INC4
3765	fxcpmadd	f9,  B2, A2, f9
3766	LFPDUX	A10, AO, INC4
3767	fxcsmadd	f13, B2, A2, f13
3768	nop
3769
3770	fxcpmadd	f2,  B1, A3, f2
3771	nop
3772	fxcsmadd	f6,  B1, A3, f6
3773	nop
3774	fxcpmadd	f10, B2, A3, f10
3775	nop
3776	fxcsmadd	f14, B2, A3, f14
3777	nop
3778
3779	fxcpmadd	f3,  B1, A4, f3
3780	nop
3781	fxcsmadd	f7,  B1, A4, f7
3782	LFPDUX	A2, AO2, INC4
3783	fxcpmadd	f11, B2, A4, f11
3784#ifndef TRMMKERNEL
3785	LFDUX	A1, CO1, INC
3786#else
3787	nop
3788#endif
3789	fxcsmadd	f15, B2, A4, f15
3790	nop
3791
3792## 2 ##
3793
3794	fxcpmadd	f0,  B3, A5, f0
3795	nop
3796	fxcsmadd	f4,  B3, A5, f4
3797	nop
3798	fxcpmadd	f8,  B4, A5, f8
3799	LFPDUX	B2, BO2, INC4
3800	fxcsmadd	f12, B4, A5, f12
3801#ifndef TRMMKERNEL
3802	LFDUX	B1, CO1, INC2
3803#else
3804	nop
3805#endif
3806
3807	fxcpmadd	f1,  B3, A2, f1
3808	nop
3809	fxcsmadd	f5,  B3, A2, f5
3810	LFPDUX	A4, AO2, INC4
3811	fxcpmadd	f9,  B4, A2, f9
3812#ifndef TRMMKERNEL
3813	LFDUX	A3, CO1, INC2
3814#else
3815	nop
3816#endif
3817	fxcsmadd	f13, B4, A2, f13
3818	nop
3819
3820	fxcpmadd	f2,  B3, A6, f2
3821	nop
3822	fxcsmadd	f6,  B3, A6, f6
3823	nop
3824	fxcpmadd	f10, B4, A6, f10
3825	nop
3826	fxcsmadd	f14, B4, A6, f14
3827	nop
3828
3829	fxcpmadd	f3,  B3, A4, f3
3830	nop
3831	fxcsmadd	f7,  B3, A4, f7
3832	LFPDUX	A2, AO2, INC4
3833	fxcpmadd	f11, B4, A4, f11
3834#ifndef TRMMKERNEL
3835   	LFDUX	A5, CO1, INC2
3836#else
3837	nop
3838#endif
3839	fxcsmadd	f15, B4, A4, f15
3840	nop
3841
3842## 3 ##
3843
3844	fxcpmadd	f0,  B5, A7, f0
3845	nop
3846	fxcsmadd	f4,  B5, A7, f4
3847	nop
3848	fxcpmadd	f8,  B2, A7, f8
3849	LFPDUX	B4, BO2, INC4
3850	fxcsmadd	f12, B2, A7, f12
3851#ifndef TRMMKERNEL
3852	LFSDUX	A1, CO1, INCM5
3853#else
3854	nop
3855#endif
3856
3857	fxcpmadd	f1,  B5, A2, f1
3858	nop
3859	fxcsmadd	f5,  B5, A2, f5
3860	LFPDUX	A4, AO2, INC4
3861	fxcpmadd	f9,  B2, A2, f9
3862#ifndef TRMMKERNEL
3863	LFSDUX	B1, CO1, INC2
3864#else
3865	nop
3866#endif
3867	fxcsmadd	f13, B2, A2, f13
3868	nop
3869
3870	fxcpmadd	f2,  B5, A8, f2
3871	nop
3872	fxcsmadd	f6,  B5, A8, f6
3873	nop
3874	fxcpmadd	f10, B2, A8, f10
3875	nop
3876	fxcsmadd	f14, B2, A8, f14
3877	nop
3878
3879	fxcpmadd	f3,  B5, A4, f3
3880	nop
3881	fxcsmadd	f7,  B5, A4, f7
3882	LFPDUX	A2, AO2, INC4
3883	fxcpmadd	f11, B2, A4, f11
3884#ifndef TRMMKERNEL
3885	LFSDUX	A3, CO1, INC2
3886#else
3887	nop
3888#endif
3889	fxcsmadd	f15, B2, A4, f15
3890	nop
3891
3892## 4 ##
3893
3894	fxcpmadd	f0,  B6, A9, f0
3895	nop
3896	fxcsmadd	f4,  B6, A9, f4
3897	nop
3898	fxcpmadd	f8,  B4, A9, f8
3899#ifndef TRMMKERNEL
3900	LFSDUX	A5, CO1, INC2
3901#else
3902	nop
3903#endif
3904	fxcsmadd	f12, B4, A9, f12
3905#ifndef TRMMKERNEL
3906	LFDUX	B3, CO2, INC
3907#else
3908	nop
3909#endif
3910
3911	fxcpmadd	f1,  B6, A2, f1
3912	nop
3913	fxcsmadd	f5,  B6, A2, f5
3914	LFPDUX	A4, AO2, INC4
3915	fxcpmadd	f9,  B4, A2, f9
3916#ifndef TRMMKERNEL
3917	LFDUX	A6, CO2, INC2
3918#else
3919	nop
3920#endif
3921	fxcsmadd	f13, B4, A2, f13
3922	nop
3923
3924	fxcpmadd	f2,  B6, A10, f2
3925	nop
3926	fxcsmadd	f6,  B6, A10, f6
3927	nop
3928	fxcpmadd	f10, B4, A10, f10
3929	nop
3930	fxcsmadd	f14, B4, A10, f14
3931#ifndef TRMMKERNEL
3932	LFDUX	A7, CO2, INC2
3933#else
3934	nop
3935#endif
3936
3937	fxcpmadd	f3,  B6, A4, f3
3938	nop
3939	fxcsmadd	f7,  B6, A4, f7
3940	nop
3941	fxcpmadd	f11, B4, A4, f11
3942	nop
3943	fxcsmadd	f15, B4, A4, f15
3944#ifndef TRMMKERNEL
3945	LFDUX	B2, CO2, INC2
3946#else
3947	nop
3948#endif
3949	.align 4
3950
3951.L1014:
3952	lfd	AP,  ALPHA(SP)
3953#ifdef TRMMKERNEL
3954       fsmfp	AP, AP
3955#endif
3956
3957#if defined(TRMMKERNEL)
3958#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
3959	sub	TEMP, K, KK
3960#elif defined(LEFT)
3961	addi	TEMP, KK, 8
3962#else
3963	addi	TEMP, KK, 4
3964#endif
3965	andi.	r0,  TEMP,  3
3966	mtspr	CTR, r0
3967	ble+	.L1018
3968
3969	cmpwi	cr0, TEMP, 3
3970	bgt+	.L1015
3971#else
3972	andi.	r0,  K,  3
3973	mtspr	CTR, r0
3974	ble+	.L1018
3975
3976	cmpwi	cr0, K, 3
3977	bgt+	.L1015
3978#endif
3979
3980#ifndef TRMMKERNEL
3981	LFDUX	A1, CO1, INC
3982	fpmr	f5,  f0
3983	LFDUX	B1, CO1, INC2
3984	fpmr	f9,  f0
3985	LFDUX	A3, CO1, INC2
3986	fpmr	f13, f0
3987   	LFDUX	A5, CO1, INC2
3988	fpmr	f2,  f0
3989
3990	LFSDUX	A1, CO1, INCM5
3991	fpmr	f6,  f0
3992	LFSDUX	B1, CO1, INC2
3993	fpmr	f10, f0
3994	LFSDUX	A3, CO1, INC2
3995	fpmr	f14, f0
3996	LFSDUX	A5, CO1, INC2
3997	fpmr	f3,  f0
3998
3999	LFDUX	B3, CO2, INC
4000	fpmr	f7,  f0
4001	LFDUX	A6, CO2, INC2
4002	fpmr	f11, f0
4003	LFDUX	A7, CO2, INC2
4004	fpmr	f15, f0
4005	LFDUX	B2, CO2, INC2
4006#else
4007	fpmr	f5,  f0
4008	fpmr	f9,  f0
4009	fpmr	f13, f0
4010	fpmr	f2,  f0
4011
4012	fpmr	f6,  f0
4013	fpmr	f10, f0
4014	fpmr	f14, f0
4015	fpmr	f3,  f0
4016
4017	fpmr	f7,  f0
4018	fpmr	f11, f0
4019	fpmr	f15, f0
4020	nop
4021#endif
4022	.align 4
4023
4024.L1015:
4025	LFPDUX	A2,  AO,  INC4
4026	LFPDUX	A4,  AO2, INC4
4027	LFPDUX	A10, BO,  INC4
4028	LFPDUX	B4,  BO2, INC4
4029	bdz-	.L1017
4030	.align 4
4031
4032.L1016:
4033	fxcpmadd	f0,  A10, A2, f0
4034	fxcsmadd	f4,  A10, A2, f4
4035	fxcpmadd	f8,  B4, A2, f8
4036	fxcsmadd	f12, B4, A2, f12
4037	LFPDUX	A2, AO,  INC4
4038
4039	fxcpmadd	f1,  A10, A4, f1
4040	fxcsmadd	f5,  A10, A4, f5
4041	fxcpmadd	f9,  B4, A4, f9
4042	fxcsmadd	f13, B4, A4, f13
4043	LFPDUX	A4, AO2, INC4
4044
4045	fxcpmadd	f2,  A10, A2, f2
4046	fxcsmadd	f6,  A10, A2, f6
4047	fxcpmadd	f10, B4, A2, f10
4048	fxcsmadd	f14, B4, A2, f14
4049	LFPDUX	A2, AO,  INC4
4050
4051	fxcpmadd	f3,  A10, A4, f3
4052	fxcsmadd	f7,  A10, A4, f7
4053	LFPDUX	A10, BO,  INC4
4054	fxcpmadd	f11, B4, A4, f11
4055	fxcsmadd	f15, B4, A4, f15
4056	LFPDUX	A4, AO2, INC4
4057	LFPDUX	B4, BO2, INC4
4058	bdnz+	.L1016
4059	.align 4
4060
4061.L1017:
4062	fxcpmadd	f0,  A10, A2, f0
4063	fxcsmadd	f4,  A10, A2, f4
4064	fxcpmadd	f8,  B4, A2, f8
4065	fxcsmadd	f12, B4, A2, f12
4066	LFPDUX	A2, AO,  INC4
4067
4068	fxcpmadd	f1,  A10, A4, f1
4069	fxcsmadd	f5,  A10, A4, f5
4070	fxcpmadd	f9,  B4, A4, f9
4071	fxcsmadd	f13, B4, A4, f13
4072	LFPDUX	A4, AO2, INC4
4073
4074	fxcpmadd	f2,  A10, A2, f2
4075	fxcsmadd	f6,  A10, A2, f6
4076	fxcpmadd	f10, B4, A2, f10
4077	fxcsmadd	f14, B4, A2, f14
4078
4079	fxcpmadd	f3,  A10, A4, f3
4080	fxcsmadd	f7,  A10, A4, f7
4081	fxcpmadd	f11, B4, A4, f11
4082	fxcsmadd	f15, B4, A4, f15
4083	.align 4
4084
4085.L1018:
4086#ifndef TRMMKERNEL
4087	LFSDUX	B3, CO2, INCM5
4088	LFSDUX	A6, CO2, INC2
4089	LFSDUX	A7, CO2, INC2
4090	LFSDUX	B2, CO2, INC2
4091
4092	LFDUX	B5, CO3, INC
4093	LFDUX	A8, CO3, INC2
4094	LFDUX	A9, CO3, INC2
4095 	LFDUX	B4, CO3, INC2
4096
4097	LFSDUX	B5, CO3, INCM5
4098	LFSDUX	A8, CO3, INC2
4099	LFSDUX	A9, CO3, INC2
4100	LFSDUX	B4, CO3, INC2
4101
4102	LFDUX	A2,  CO4, INC
4103	LFDUX	A4,  CO4, INC2
4104
4105	fxcpmadd	f0,  AP, f0,  A1
4106	LFDUX	A10, CO4, INC2
4107	LFDUX	A1,  CO4, INC2
4108
4109	fxcpmadd	f1,  AP, f1,  B1
4110	LFSDUX	A2,  CO4, INCM5
4111	LFSDUX	A4,  CO4, INC2
4112
4113	fxcpmadd	f2,  AP, f2,  A3
4114	LFSDUX	A10, CO4, INC2
4115	LFSDUX	A1,  CO4, INC2
4116
4117	fxcpmadd	f3,  AP, f3,  A5
4118	STFDUX	f0,  CO1, INCM7
4119	STFSDUX	f0,  CO1, INC
4120
4121	fxcpmadd	f4,  AP, f4,  B3
4122	STFDUX	f1,  CO1, INC
4123	STFSDUX	f1,  CO1, INC
4124
4125	fxcpmadd	f5,  AP, f5,  A6
4126	STFDUX	f2,  CO1, INC
4127	STFSDUX	f2,  CO1, INC
4128
4129	fxcpmadd	f6,  AP, f6,  A7
4130	STFDUX	f3,  CO1, INC
4131	STFSDUX	f3,  CO1, INC
4132
4133	fxcpmadd	f7,  AP, f7,  B2
4134	STFDUX	f4,  CO2, INCM7
4135	STFSDUX	f4,  CO2, INC
4136
4137	fxcpmadd	f8,  AP, f8,  B5
4138	STFDUX	f5,  CO2, INC
4139	STFSDUX	f5,  CO2, INC
4140
4141	fxcpmadd	f9,  AP, f9,  A8
4142	STFDUX	f6,  CO2, INC
4143	STFSDUX	f6,  CO2, INC
4144
4145	fxcpmadd	f10, AP, f10, A9
4146	STFDUX	f7,  CO2, INC
4147	STFSDUX	f7,  CO2, INC
4148
4149	fxcpmadd	f11, AP, f11, B4
4150	STFDUX	f8,  CO3, INCM7
4151	STFSDUX	f8,  CO3, INC
4152
4153	fxcpmadd	f12, AP, f12, A2
4154	STFDUX	f9,  CO3, INC
4155	STFSDUX	f9,  CO3, INC
4156
4157	fxcpmadd	f13, AP, f13, A4
4158	STFDUX	f10, CO3, INC
4159	STFSDUX	f10, CO3, INC
4160
4161	fxcpmadd	f14, AP, f14, A10
4162	STFDUX	f11, CO3, INC
4163	STFSDUX	f11, CO3, INC
4164
4165	fxcpmadd	f15, AP, f15, A1
4166	STFDUX	f12, CO4, INCM7
4167#else
4168	fpmul	f0,  AP, f0
4169	fpmul	f1,  AP, f1
4170	fpmul	f2,  AP, f2
4171	fpmul	f3,  AP, f3
4172
4173	STFDUX	f0,  CO1, INC
4174	STFSDUX	f0,  CO1, INC
4175
4176	fpmul	f4,  AP, f4
4177	STFDUX	f1,  CO1, INC
4178	STFSDUX	f1,  CO1, INC
4179
4180	fpmul	f5,  AP, f5
4181	STFDUX	f2,  CO1, INC
4182	STFSDUX	f2,  CO1, INC
4183
4184	fpmul	f6,  AP, f6
4185	STFDUX	f3,  CO1, INC
4186	STFSDUX	f3,  CO1, INC
4187
4188	fpmul	f7,  AP, f7
4189	STFDUX	f4,  CO2, INC
4190	STFSDUX	f4,  CO2, INC
4191
4192	fpmul	f8,  AP, f8
4193	STFDUX	f5,  CO2, INC
4194	STFSDUX	f5,  CO2, INC
4195
4196	fpmul	f9,  AP, f9
4197	STFDUX	f6,  CO2, INC
4198	STFSDUX	f6,  CO2, INC
4199
4200	fpmul	f10, AP, f10
4201	STFDUX	f7,  CO2, INC
4202	STFSDUX	f7,  CO2, INC
4203
4204	fpmul	f11, AP, f11
4205	STFDUX	f8,  CO3, INC
4206	STFSDUX	f8,  CO3, INC
4207
4208	fpmul	f12, AP, f12
4209	STFDUX	f9,  CO3, INC
4210	STFSDUX	f9,  CO3, INC
4211
4212	fpmul	f13, AP, f13
4213	STFDUX	f10, CO3, INC
4214	STFSDUX	f10, CO3, INC
4215
4216	fpmul	f14, AP, f14
4217	STFDUX	f11, CO3, INC
4218	STFSDUX	f11, CO3, INC
4219
4220	fpmul	f15, AP, f15
4221	STFDUX	f12, CO4, INC
4222#endif
4223
4224	STFSDUX	f12, CO4, INC
4225	STFDUX	f13, CO4, INC
4226	STFSDUX	f13, CO4, INC
4227	STFDUX	f14, CO4, INC
4228	STFSDUX	f14, CO4, INC
4229	STFDUX	f15, CO4, INC
4230	STFSDUX	f15, CO4, INC
4231
4232#ifdef TRMMKERNEL
4233#if ( defined(LEFT) &&  defined(TRANSA)) || \
4234    (!defined(LEFT) && !defined(TRANSA))
4235	sub	TEMP, K, KK
4236#ifdef LEFT
4237	addi	TEMP, TEMP, -8
4238#else
4239	addi	TEMP, TEMP, -4
4240#endif
4241	slwi	r0,   TEMP, 3 + BASE_SHIFT
4242	slwi	TEMP, TEMP, 2 + BASE_SHIFT
4243	add	AO, AO, r0
4244	add	BO, BO, TEMP
4245#endif
4246
4247#ifdef LEFT
4248	addi	KK, KK, 8
4249#endif
4250#endif
4251
4252	addic.	I, I, -1
4253	li	r0, FZERO
4254
4255	lfpsx	f0, SP, r0
4256	bgt+	.L1011
4257	.align 4
4258
4259.L1020:
4260	andi.	I, M,  4
4261	beq	.L1030
4262
4263#if defined(TRMMKERNEL)
4264#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
4265	addi	AO2, AO,   2 * SIZE
4266	fpmr	f4,  f0
4267	addi	BO,  B,  - 4 * SIZE
4268	fpmr	f8,  f0
4269	addi	BO2, B,  - 2 * SIZE
4270	fpmr	f12, f0
4271#else
4272	slwi	TEMP, KK, 2 + BASE_SHIFT
4273	slwi	r0,   KK, 2 + BASE_SHIFT
4274	add	AO, AO, TEMP
4275	add	BO, B,  r0
4276
4277	addi	AO2, AO,   2 * SIZE
4278	fpmr	f4,  f0
4279	addi	BO,  BO,  - 4 * SIZE
4280	fpmr	f8,  f0
4281	addi	BO2, BO,    2 * SIZE
4282	fpmr	f12, f0
4283#endif
4284
4285#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
4286	sub	TEMP, K, KK
4287#elif defined(LEFT)
4288	addi	TEMP, KK, 4
4289#else
4290	addi	TEMP, KK, 4
4291#endif
4292
4293	srawi.	TEMP,  TEMP,  2
4294 	fpmr	f1,  f0
4295	fpmr	f5,  f0
4296	fpmr	f9,  f0
4297	mtspr	CTR, TEMP
4298	fpmr	f13, f0
4299	ble	.L1024
4300#else
4301	addi	AO2, AO,   2 * SIZE
4302	fpmr	f4,  f0
4303	addi	BO,  B,  - 4 * SIZE
4304	fpmr	f8,  f0
4305	addi	BO2, B,  - 2 * SIZE
4306	fpmr	f12, f0
4307
4308	srawi.	r0,  K,  2
4309 	fpmr	f1,  f0
4310	fpmr	f5,  f0
4311	fpmr	f9,  f0
4312	mtspr	CTR, r0
4313	fpmr	f13, f0
4314	ble	.L1024
4315#endif
4316
4317	LFPDUX	A1,   AO, INC4
4318	LFPDUX	B1,   BO, INC4
4319	LFPDUX	A2,  AO2, INC4
4320	LFPDUX	B2,  BO2, INC4
4321	LFPDUX	A3,   AO, INC4
4322	LFPDUX	B3,   BO, INC4
4323	LFPDUX	A4,  AO2, INC4
4324	LFPDUX	B4,  BO2, INC4
4325
4326	LFPDUX	A5,   AO, INC4
4327	LFPDUX	B5,   BO, INC4
4328	LFPDUX	A6,  AO2, INC4
4329	LFPDUX	B6,  BO2, INC4
4330	LFPDUX	A7,   AO, INC4
4331	LFPDUX	A9,   BO, INC4
4332	LFPDUX	A10, BO2, INC4
4333	bdz-	.L1023
4334	.align 4
4335
4336.L1022:
4337	fxcpmadd	f0,  B1, A1, f0
4338	nop
4339	fxcsmadd	f4,  B1, A1, f4
4340	LFPDUX	A8,  AO2, INC4
4341	fxcpmadd	f8,  B2, A1, f8
4342	nop
4343	fxcsmadd	f12, B2, A1, f12
4344	LFPDUX	A1,   AO, INC4
4345
4346	fxcpmadd	f1,  B1, A2, f1
4347	nop
4348	fxcsmadd	f5,  B1, A2, f5
4349	LFPDUX	B1,   BO, INC4
4350	fxcpmadd	f9,  B2, A2, f9
4351	nop
4352	fxcsmadd	f13, B2, A2, f13
4353	LFPDUX	B2,  BO2, INC4
4354
4355	fxcpmadd	f0,  B3, A3, f0
4356	nop
4357	fxcsmadd	f4,  B3, A3, f4
4358	LFPDUX	A2,  AO2, INC4
4359	fxcpmadd	f8,  B4, A3, f8
4360	nop
4361	fxcsmadd	f12, B4, A3, f12
4362	LFPDUX	A3,   AO, INC4
4363
4364	fxcpmadd	f1,  B3, A4, f1
4365	nop
4366	fxcsmadd	f5,  B3, A4, f5
4367	LFPDUX	B3,   BO, INC4
4368	fxcpmadd	f9,  B4, A4, f9
4369	nop
4370	fxcsmadd	f13, B4, A4, f13
4371	LFPDUX	B4,  BO2, INC4
4372
4373	fxcpmadd	f0,  B5, A5, f0
4374	nop
4375	fxcsmadd	f4,  B5, A5, f4
4376	LFPDUX	A4,  AO2, INC4
4377	fxcpmadd	f8,  B6, A5, f8
4378	nop
4379	fxcsmadd	f12, B6, A5, f12
4380	LFPDUX	A5,   AO, INC4
4381
4382	fxcpmadd	f1,  B5, A6, f1
4383	nop
4384	fxcsmadd	f5,  B5, A6, f5
4385	LFPDUX	B5,   BO, INC4
4386	fxcpmadd	f9,  B6, A6, f9
4387	nop
4388	fxcsmadd	f13, B6, A6, f13
4389	LFPDUX	B6,  BO2, INC4
4390
4391	fxcpmadd	f0,  A9,  A7, f0
4392	nop
4393	fxcsmadd	f4,  A9,  A7, f4
4394	LFPDUX	A6,  AO2, INC4
4395	fxcpmadd	f8,  A10, A7, f8
4396	nop
4397	fxcsmadd	f12, A10, A7, f12
4398	LFPDUX	A7,   AO, INC4
4399
4400	fxcpmadd	f1,  A9,  A8, f1
4401	nop
4402	fxcsmadd	f5,  A9,  A8, f5
4403	LFPDUX	A9,   BO, INC4
4404	fxcpmadd	f9,  A10, A8, f9
4405	nop
4406	fxcsmadd	f13, A10, A8, f13
4407	LFPDUX	A10, BO2, INC4
4408	bdnz+	.L1022
4409	.align 4
4410
4411.L1023:
4412	fxcpmadd	f0,  B1, A1, f0
4413	fxcsmadd	f4,  B1, A1, f4
4414	LFPDUX	A8,  AO2, INC4
4415	fxcpmadd	f8,  B2, A1, f8
4416	fxcsmadd	f12, B2, A1, f12
4417
4418	fxcpmadd	f1,  B1, A2, f1
4419	fxcsmadd	f5,  B1, A2, f5
4420	fxcpmadd	f9,  B2, A2, f9
4421	fxcsmadd	f13, B2, A2, f13
4422
4423	fxcpmadd	f0,  B3, A3, f0
4424	fxcsmadd	f4,  B3, A3, f4
4425	fxcpmadd	f8,  B4, A3, f8
4426	fxcsmadd	f12, B4, A3, f12
4427
4428	fxcpmadd	f1,  B3, A4, f1
4429	fxcsmadd	f5,  B3, A4, f5
4430	fxcpmadd	f9,  B4, A4, f9
4431	fxcsmadd	f13, B4, A4, f13
4432
4433	fxcpmadd	f0,  B5, A5, f0
4434	fxcsmadd	f4,  B5, A5, f4
4435	fxcpmadd	f8,  B6, A5, f8
4436	fxcsmadd	f12, B6, A5, f12
4437
4438	fxcpmadd	f1,  B5, A6, f1
4439	fxcsmadd	f5,  B5, A6, f5
4440	fxcpmadd	f9,  B6, A6, f9
4441	fxcsmadd	f13, B6, A6, f13
4442
4443	fxcpmadd	f0,  A9, A7, f0
4444	fxcsmadd	f4,  A9, A7, f4
4445	fxcpmadd	f8,  A10, A7, f8
4446	fxcsmadd	f12, A10, A7, f12
4447
4448	fxcpmadd	f1,  A9, A8, f1
4449	fxcsmadd	f5,  A9, A8, f5
4450	fxcpmadd	f9,  A10, A8, f9
4451	fxcsmadd	f13, A10, A8, f13
4452	.align 4
4453
4454.L1024:
4455	lfd	AP,  ALPHA(SP)
4456#ifdef TRMMKERNEL
4457       fsmfp	AP, AP
4458#endif
4459
4460#if defined(TRMMKERNEL)
4461#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
4462	sub	TEMP, K, KK
4463#elif defined(LEFT)
4464	addi	TEMP, KK, 4
4465#else
4466	addi	TEMP, KK, 4
4467#endif
4468	andi.	TEMP,  TEMP,  3
4469	mtspr	CTR, TEMP
4470#else
4471	andi.	r0,  K,  3
4472	mtspr	CTR, r0
4473#endif
4474	ble+	.L1028
4475
4476	LFPDUX	A1,  AO,  INC4
4477	LFPDUX	A2,  AO2, INC4
4478	LFPDUX	B1,  BO,  INC4
4479	LFPDUX	B2,  BO2, INC4
4480	bdz-	.L1027
4481	.align 4
4482
4483.L1026:
4484	fxcpmadd	f0,  B1, A1, f0
4485	fxcsmadd	f4,  B1, A1, f4
4486	fxcpmadd	f8,  B2, A1, f8
4487	fxcsmadd	f12, B2, A1, f12
4488	LFPDUX	A1,  AO,  INC4
4489
4490	fxcpmadd	f1,  B1, A2, f1
4491	fxcsmadd	f5,  B1, A2, f5
4492	LFPDUX	B1,  BO,  INC4
4493	fxcpmadd	f9,  B2, A2, f9
4494	fxcsmadd	f13, B2, A2, f13
4495	LFPDUX	A2,  AO2, INC4
4496	LFPDUX	B2,  BO2, INC4
4497	bdnz+	.L1026
4498	.align 4
4499
4500.L1027:
4501	fxcpmadd	f0,  B1, A1, f0
4502	fxcsmadd	f4,  B1, A1, f4
4503	fxcpmadd	f8,  B2, A1, f8
4504	fxcsmadd	f12, B2, A1, f12
4505
4506	fxcpmadd	f1,  B1, A2, f1
4507	fxcsmadd	f5,  B1, A2, f5
4508	fxcpmadd	f9,  B2, A2, f9
4509	fxcsmadd	f13, B2, A2, f13
4510	.align 4
4511
4512.L1028:
4513#ifndef TRMMKERNEL
4514	LFDUX	A1, CO1, INC
4515	LFDUX	B1, CO1, INC2
4516	LFDUX	B3, CO2, INC
4517	LFDUX	A6, CO2, INC2
4518
4519	LFSDUX	A1, CO1, INCM1
4520	LFSDUX	B1, CO1, INC2
4521	LFSDUX	B3, CO2, INCM1
4522	LFSDUX	A6, CO2, INC2
4523
4524	LFDUX	B5, CO3, INC
4525	LFDUX	A8, CO3, INC2
4526	LFDUX	A2, CO4, INC
4527	LFDUX	A4, CO4, INC2
4528
4529	fxcpmadd	f0,  AP, f0,  A1
4530	LFSDUX	B5, CO3, INCM1
4531	LFSDUX	A8, CO3, INC2
4532
4533	fxcpmadd	f1,  AP, f1,  B1
4534	LFSDUX	A2,  CO4, INCM1
4535	LFSDUX	A4,  CO4, INC2
4536
4537	fxcpmadd	f4,  AP, f4,  B3
4538	STFDUX	f0,  CO1, INCM3
4539	STFSDUX	f0,  CO1, INC
4540
4541	fxcpmadd	f5,  AP, f5,  A6
4542	STFDUX	f1,  CO1, INC
4543	STFSDUX	f1,  CO1, INC
4544
4545	fxcpmadd	f8,  AP, f8,  B5
4546	STFDUX	f4,  CO2, INCM3
4547	STFSDUX	f4,  CO2, INC
4548
4549	fxcpmadd	f9,  AP, f9,  A8
4550	STFDUX	f5,  CO2, INC
4551	STFSDUX	f5,  CO2, INC
4552
4553	fxcpmadd	f12, AP, f12, A2
4554	STFDUX	f8,  CO3, INCM3
4555	STFSDUX	f8,  CO3, INC
4556
4557	fxcpmadd	f13, AP, f13, A4
4558	STFDUX	f9,  CO3, INC
4559	STFSDUX	f9,  CO3, INC
4560
4561	STFDUX	f12, CO4, INCM3
4562	STFSDUX	f12, CO4, INC
4563
4564	STFDUX	f13, CO4, INC
4565	STFSDUX	f13, CO4, INC
4566#else
4567	fpmul	f0,  AP, f0
4568	fpmul	f1,  AP, f1
4569
4570	fpmul	f4,  AP, f4
4571	STFDUX	f0,  CO1, INC
4572	STFSDUX	f0,  CO1, INC
4573
4574	fpmul	f5,  AP, f5
4575	STFDUX	f1,  CO1, INC
4576	STFSDUX	f1,  CO1, INC
4577
4578	fpmul	f8,  AP, f8
4579	STFDUX	f4,  CO2, INC
4580	STFSDUX	f4,  CO2, INC
4581
4582	fpmul	f9,  AP, f9
4583	STFDUX	f5,  CO2, INC
4584	STFSDUX	f5,  CO2, INC
4585
4586	fpmul	f12, AP, f12
4587	STFDUX	f8,  CO3, INC
4588	STFSDUX	f8,  CO3, INC
4589
4590	fpmul	f13, AP, f13
4591	STFDUX	f9,  CO3, INC
4592	STFSDUX	f9,  CO3, INC
4593
4594	STFDUX	f12, CO4, INC
4595	STFSDUX	f12, CO4, INC
4596
4597	STFDUX	f13, CO4, INC
4598	STFSDUX	f13, CO4, INC
4599#endif
4600
4601
4602#ifdef TRMMKERNEL
4603#if ( defined(LEFT) &&  defined(TRANSA)) || \
4604    (!defined(LEFT) && !defined(TRANSA))
4605	sub	TEMP, K, KK
4606#ifdef LEFT
4607	addi	TEMP, TEMP, -4
4608#else
4609	addi	TEMP, TEMP, -4
4610#endif
4611	slwi	r0,   TEMP, 2 + BASE_SHIFT
4612	slwi	TEMP, TEMP, 2 + BASE_SHIFT
4613	add	AO, AO, r0
4614	add	BO, BO, TEMP
4615#endif
4616
4617#ifdef LEFT
4618	addi	KK, KK, 4
4619#endif
4620#endif
4621
4622	li	r0, FZERO
4623	lfpsx	f0, SP, r0
4624	.align 4
4625
4626.L1030:
4627	andi.	I, M,  2
4628	beq	.L1040
4629
4630#if defined(TRMMKERNEL)
4631#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
4632	addi	AO2, AO,   2 * SIZE
4633	fpmr	f1,  f0
4634	addi	BO,  B,  - 4 * SIZE
4635	fpmr	f2,  f0
4636	addi	BO2, B,  - 2 * SIZE
4637	fpmr	f3, f0
4638#else
4639	slwi	TEMP, KK, 1 + BASE_SHIFT
4640	slwi	r0,   KK, 2 + BASE_SHIFT
4641	add	AO, AO, TEMP
4642	add	BO, B,  r0
4643
4644	addi	AO2, AO,   2 * SIZE
4645	fpmr	f1,  f0
4646	addi	BO,  BO, - 4 * SIZE
4647	fpmr	f2,  f0
4648	addi	BO2, BO,   2 * SIZE
4649	fpmr	f3, f0
4650#endif
4651
4652#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
4653	sub	TEMP, K, KK
4654#elif defined(LEFT)
4655	addi	TEMP, KK, 2
4656#else
4657	addi	TEMP, KK, 4
4658#endif
4659
4660	srawi.	r0,  TEMP,  2
4661	mtspr	CTR, r0
4662	ble	.L1034
4663
4664#else
4665	addi	AO2, AO,   2 * SIZE
4666	fpmr	f1,  f0
4667	addi	BO,  B,  - 4 * SIZE
4668	fpmr	f2,  f0
4669	addi	BO2, B,  - 2 * SIZE
4670	fpmr	f3, f0
4671
4672	srawi.	r0,  K,  2
4673	mtspr	CTR, r0
4674	ble	.L1034
4675#endif
4676
4677	LFPDUX	A1,  AO, INC4
4678	LFPDUX	B1,  BO, INC4
4679	LFPDUX	B2, BO2, INC4
4680	LFPDUX	A2, AO2, INC4
4681	LFPDUX	B3,  BO, INC4
4682	LFPDUX	B4, BO2, INC4
4683
4684	LFPDUX	A3,  AO, INC4
4685	LFPDUX	A5,  BO, INC4
4686	LFPDUX	A6, BO2, INC4
4687	LFPDUX	A4, AO2, INC4
4688	LFPDUX	A7,  BO, INC4
4689	LFPDUX	A8, BO2, INC4
4690	bdz-	.L1033
4691	.align 4
4692
4693.L1032:
4694	fxcpmadd	f0,  B1, A1, f0
4695	fxcsmadd	f1,  B1, A1, f1
4696	LFPDUX	B1,  BO, INC4
4697	fxcpmadd	f2,  B2, A1, f2
4698	fxcsmadd	f3,  B2, A1, f3
4699	LFPDUX	B2, BO2, INC4
4700	LFPDUX	A1,  AO, INC4
4701
4702	fxcpmadd	f0,  B3, A2, f0
4703	fxcsmadd	f1,  B3, A2, f1
4704	LFPDUX	B3,  BO, INC4
4705	fxcpmadd	f2,  B4, A2, f2
4706	fxcsmadd	f3,  B4, A2, f3
4707	LFPDUX	B4, BO2, INC4
4708	LFPDUX	A2, AO2, INC4
4709
4710	fxcpmadd	f0,  A5, A3, f0
4711	fxcsmadd	f1,  A5, A3, f1
4712	LFPDUX	A5,  BO, INC4
4713	fxcpmadd	f2,  A6, A3, f2
4714	fxcsmadd	f3,  A6, A3, f3
4715	LFPDUX	A6, BO2, INC4
4716	LFPDUX	A3,  AO, INC4
4717
4718	fxcpmadd	f0,  A7, A4, f0
4719	fxcsmadd	f1,  A7, A4, f1
4720	LFPDUX	A7,  BO, INC4
4721	fxcpmadd	f2,  A8, A4, f2
4722	fxcsmadd	f3,  A8, A4, f3
4723	LFPDUX	A8, BO2, INC4
4724	LFPDUX	A4, AO2, INC4
4725	bdnz+	.L1032
4726	.align 4
4727
4728.L1033:
4729	fxcpmadd	f0,  B1, A1, f0
4730	fxcsmadd	f1,  B1, A1, f1
4731	fxcpmadd	f2,  B2, A1, f2
4732	fxcsmadd	f3,  B2, A1, f3
4733
4734	fxcpmadd	f0,  B3, A2, f0
4735	fxcsmadd	f1,  B3, A2, f1
4736	fxcpmadd	f2,  B4, A2, f2
4737	fxcsmadd	f3,  B4, A2, f3
4738
4739	fxcpmadd	f0,  A5, A3, f0
4740	fxcsmadd	f1,  A5, A3, f1
4741	fxcpmadd	f2,  A6, A3, f2
4742	fxcsmadd	f3,  A6, A3, f3
4743
4744	fxcpmadd	f0,  A7, A4, f0
4745	fxcsmadd	f1,  A7, A4, f1
4746	fxcpmadd	f2,  A8, A4, f2
4747	fxcsmadd	f3,  A8, A4, f3
4748	.align 4
4749
4750.L1034:
4751	lfd	AP,  ALPHA(SP)
4752#ifdef TRMMKERNEL
4753       fsmfp	AP, AP
4754#endif
4755
4756#if defined(TRMMKERNEL)
4757#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
4758	sub	TEMP, K, KK
4759#elif defined(LEFT)
4760	addi	TEMP, KK, 2
4761#else
4762	addi	TEMP, KK, 4
4763#endif
4764	andi.	TEMP,  TEMP,  3
4765	mtspr	CTR, TEMP
4766#else
4767	andi.	r0,  K,  3
4768	mtspr	CTR, r0
4769#endif
4770	ble+	.L1038
4771
4772	LFPDX	A1,  AO,  INC4
4773	LFPDUX	B1,  BO,  INC4
4774	LFPDUX	B2,  BO2, INC4
4775	add	AO, AO, INC2
4776	bdz-	.L1037
4777	.align 4
4778
4779.L1036:
4780	fxcpmadd	f0,  B1, A1, f0
4781	fxcsmadd	f1,  B1, A1, f1
4782	LFPDUX	B1,  BO,  INC4
4783	fxcpmadd	f2,  B2, A1, f2
4784	fxcsmadd	f3,  B2, A1, f3
4785	LFPDX	A1,  AO,  INC4
4786	LFPDUX	B2,  BO2, INC4
4787	add	AO, AO, INC2
4788	bdnz+	.L1036
4789	.align 4
4790
4791.L1037:
4792	fxcpmadd	f0,  B1, A1, f0
4793	fxcsmadd	f1,  B1, A1, f1
4794	fxcpmadd	f2,  B2, A1, f2
4795	fxcsmadd	f3,  B2, A1, f3
4796	.align 4
4797
4798.L1038:
4799#ifndef TRMMKERNEL
4800	LFDUX	A1, CO1, INC
4801	LFDUX	A2, CO2, INC
4802	LFDUX	A3, CO3, INC
4803	LFDUX	A4, CO4, INC
4804
4805	LFSDUX	A1, CO1, INC
4806	LFSDUX	A2, CO2, INC
4807	LFSDUX	A3, CO3, INC
4808	LFSDUX	A4, CO4, INC
4809
4810	fxcpmadd	f0, AP, f0, A1
4811	fxcpmadd	f1, AP, f1, A2
4812	fxcpmadd	f2, AP, f2, A3
4813	fxcpmadd	f3, AP, f3, A4
4814
4815	STFDUX	f0,  CO1, INCM1
4816	STFSDUX	f0,  CO1, INC
4817
4818	STFDUX	f1,  CO2, INCM1
4819	STFSDUX	f1,  CO2, INC
4820
4821	STFDUX	f2,  CO3, INCM1
4822	STFSDUX	f2,  CO3, INC
4823
4824	STFDUX	f3,  CO4, INCM1
4825	STFSDUX	f3,  CO4, INC
4826#else
4827	fpmul	f0, AP, f0
4828	fpmul	f1, AP, f1
4829	fpmul	f2, AP, f2
4830	fpmul	f3, AP, f3
4831
4832	STFDUX	f0,  CO1, INC
4833	STFSDUX	f0,  CO1, INC
4834
4835	STFDUX	f1,  CO2, INC
4836	STFSDUX	f1,  CO2, INC
4837
4838	STFDUX	f2,  CO3, INC
4839	STFSDUX	f2,  CO3, INC
4840
4841	STFDUX	f3,  CO4, INC
4842	STFSDUX	f3,  CO4, INC
4843#endif
4844
4845
4846#ifdef TRMMKERNEL
4847#if ( defined(LEFT) &&  defined(TRANSA)) || \
4848    (!defined(LEFT) && !defined(TRANSA))
4849	sub	TEMP, K, KK
4850#ifdef LEFT
4851	addi	TEMP, TEMP, -2
4852#else
4853	addi	TEMP, TEMP, -4
4854#endif
4855	slwi	r0,   TEMP, 1 + BASE_SHIFT
4856	slwi	TEMP, TEMP, 2 + BASE_SHIFT
4857	add	AO, AO, r0
4858	add	BO, BO, TEMP
4859#endif
4860
4861#ifdef LEFT
4862	addi	KK, KK, 2
4863#endif
4864#endif
4865
4866	li	r0, FZERO
4867	lfpsx	f0, SP, r0
4868	.align 4
4869
4870.L1040:
4871	andi.	I, M,  1
4872	beq	.L1049
4873
4874#if defined(TRMMKERNEL)
4875#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
4876	addi	AO2, AO,   2 * SIZE
4877	fpmr	f1,  f0
4878	addi	BO,  B,  - 4 * SIZE
4879	fpmr	f2,  f0
4880	addi	BO2, B,  - 2 * SIZE
4881	fpmr	f3,  f0
4882#else
4883	slwi	TEMP, KK, 0 + BASE_SHIFT
4884	slwi	r0,   KK, 2 + BASE_SHIFT
4885	add	AO, AO, TEMP
4886	add	BO, B,  r0
4887
4888	addi	AO2, AO,   2 * SIZE
4889	fpmr	f1,  f0
4890	addi	BO,  BO, - 4 * SIZE
4891	fpmr	f2,  f0
4892	addi	BO2, BO,   2 * SIZE
4893	fpmr	f3,  f0
4894#endif
4895
4896#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
4897	sub	TEMP, K, KK
4898#elif defined(LEFT)
4899	addi	TEMP, KK, 1
4900#else
4901	addi	TEMP, KK, 4
4902#endif
4903	srawi.	r0,  TEMP,  3
4904	mtspr	CTR, r0
4905	ble	.L1044
4906
4907#else
4908	addi	AO2, AO,   2 * SIZE
4909	fpmr	f1,  f0
4910	addi	BO,  B,  - 4 * SIZE
4911	fpmr	f2,  f0
4912	addi	BO2, B,  - 2 * SIZE
4913	fpmr	f3,  f0
4914
4915	srawi.	r0,  K,  3
4916	mtspr	CTR, r0
4917	ble	.L1044
4918#endif
4919
4920	LFPDUX	A1,  AO,  INC4
4921	LFPDUX	B1,  BO,  INC4
4922	LFPDUX	B2,  BO2, INC4
4923	LFPDUX	A2, AO2,  INC4
4924	LFPDUX	B3,  BO,  INC4
4925	LFPDUX	B4,  BO2, INC4
4926
4927	LFPDUX	A3,  AO,  INC4
4928	LFPDUX	A5,  BO,  INC4
4929	LFPDUX	A6,  BO2, INC4
4930	LFPDUX	A4, AO2,  INC4
4931	LFPDUX	A7,  BO,  INC4
4932	LFPDUX	A8,  BO2, INC4
4933	bdz-	.L1043
4934	.align 4
4935
4936.L1042:
4937	fxcpmadd	f0,  A1, B1, f0
4938	LFPDUX	B1,  BO,  INC4
4939	fxcpmadd	f1,  A1, B2, f1
4940	LFPDUX	B2,  BO2, INC4
4941	fxcsmadd	f2,  A1, B3, f2
4942	LFPDUX	B3,  BO,  INC4
4943	fxcsmadd	f3,  A1, B4, f3
4944	LFPDUX	B4,  BO2, INC4
4945	LFPDUX	A1,  AO,  INC4
4946
4947	fxcpmadd	f0,  A2, A5, f0
4948	LFPDUX	A5,  BO,  INC4
4949	fxcpmadd	f1,  A2, A6, f1
4950	LFPDUX	A6,  BO2, INC4
4951	fxcsmadd	f2,  A2, A7, f2
4952	LFPDUX	A7,  BO,  INC4
4953	fxcsmadd	f3,  A2, A8, f3
4954	LFPDUX	A8,  BO2, INC4
4955	LFPDUX	A2, AO2,  INC4
4956
4957	fxcpmadd	f0,  A3, B1, f0
4958	LFPDUX	B1,  BO,  INC4
4959	fxcpmadd	f1,  A3, B2, f1
4960	LFPDUX	B2,  BO2, INC4
4961	fxcsmadd	f2,  A3, B3, f2
4962	LFPDUX	B3,  BO,  INC4
4963	fxcsmadd	f3,  A3, B4, f3
4964	LFPDUX	B4,  BO2, INC4
4965	LFPDUX	A3,  AO,  INC4
4966
4967	fxcpmadd	f0,  A4, A5, f0
4968	LFPDUX	A5,  BO,  INC4
4969	fxcpmadd	f1,  A4, A6, f1
4970	LFPDUX	A6,  BO2, INC4
4971	fxcsmadd	f2,  A4, A7, f2
4972	LFPDUX	A7,  BO,  INC4
4973	fxcsmadd	f3,  A4, A8, f3
4974	LFPDUX	A8,  BO2, INC4
4975	LFPDUX	A4, AO2,  INC4
4976	bdnz+	.L1042
4977	.align 4
4978
4979.L1043:
4980	fxcpmadd	f0,  A1, B1, f0
4981	LFPDUX	B1,  BO,  INC4
4982	fxcpmadd	f1,  A1, B2, f1
4983	LFPDUX	B2,  BO2, INC4
4984	fxcsmadd	f2,  A1, B3, f2
4985	LFPDUX	B3,  BO,  INC4
4986	fxcsmadd	f3,  A1, B4, f3
4987	LFPDUX	B4,  BO2, INC4
4988
4989	fxcpmadd	f0,  A2, A5, f0
4990	LFPDUX	A5,  BO,  INC4
4991	fxcpmadd	f1,  A2, A6, f1
4992	LFPDUX	A6,  BO2, INC4
4993	fxcsmadd	f2,  A2, A7, f2
4994	LFPDUX	A7,  BO,  INC4
4995	fxcsmadd	f3,  A2, A8, f3
4996	LFPDUX	A8,  BO2, INC4
4997
4998	fxcpmadd	f0,  A3, B1, f0
4999	fxcpmadd	f1,  A3, B2, f1
5000	fxcsmadd	f2,  A3, B3, f2
5001	fxcsmadd	f3,  A3, B4, f3
5002
5003	fxcpmadd	f0,  A4, A5, f0
5004	fxcpmadd	f1,  A4, A6, f1
5005	fxcsmadd	f2,  A4, A7, f2
5006	fxcsmadd	f3,  A4, A8, f3
5007	.align 4
5008
5009.L1044:
5010	lfd	AP,  ALPHA(SP)
5011#ifdef TRMMKERNEL
5012       fsmfp	AP, AP
5013#endif
5014
5015#if defined(TRMMKERNEL)
5016#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
5017	sub	TEMP, K, KK
5018#elif defined(LEFT)
5019	addi	TEMP, KK, 1
5020#else
5021	addi	TEMP, KK, 4
5022#endif
5023	andi.	TEMP,  TEMP,  7
5024	mtspr	CTR, TEMP
5025#else
5026	andi.	r0,  K,  7
5027	mtspr	CTR, r0
5028#endif
5029	ble+	.L1048
5030
5031	LFDX	A1,  AO,  INC4
5032	LFPDUX	B1,  BO,  INC4
5033	LFPDUX	B2,  BO2, INC4
5034	add	AO, AO, INC
5035	bdz-	.L1047
5036	.align 4
5037
5038.L1046:
5039	fxcpmadd	f0,  A1, B1, f0
5040	LFPDUX	B1,  BO,  INC4
5041	fxcpmadd	f1,  A1, B2, f1
5042	LFDX	A1,  AO,  INC4
5043	LFPDUX	B2,  BO2, INC4
5044	add	AO, AO, INC
5045	bdnz+	.L1046
5046	.align 4
5047
5048.L1047:
5049	fxcpmadd	f0,  A1, B1, f0
5050	fxcpmadd	f1,  A1, B2, f1
5051	.align 4
5052
5053.L1048:
5054#ifndef TRMMKERNEL
5055	LFDX	A1, CO1, INC
5056	LFDX	B3, CO3, INC
5057	LFSDX	A1, CO2, INC
5058	LFSDX	B3, CO4, INC
5059
5060	fpadd	f0, f0, f2
5061	fpadd	f1, f1, f3
5062
5063	fxcpmadd	f0,  AP, f0,  A1
5064	fxcpmadd	f1,  AP, f1,  B3
5065#else
5066	fpadd	f0, f0, f2
5067	fpadd	f1, f1, f3
5068
5069	fpmul	f0,  AP, f0
5070	fpmul	f1,  AP, f1
5071#endif
5072
5073	STFDUX	f0,  CO1, INC
5074	STFSDUX	f0,  CO2, INC
5075	STFDUX	f1,  CO3, INC
5076	STFSDUX	f1,  CO4, INC
5077
5078#ifdef TRMMKERNEL
5079#if ( defined(LEFT) &&  defined(TRANSA)) || \
5080    (!defined(LEFT) && !defined(TRANSA))
5081	sub	TEMP, K, KK
5082#ifdef LEFT
5083	addi	TEMP, TEMP, -1
5084#else
5085	addi	TEMP, TEMP, -4
5086#endif
5087	slwi	r0,   TEMP, 0 + BASE_SHIFT
5088	slwi	TEMP, TEMP, 2 + BASE_SHIFT
5089	add	AO, AO, r0
5090	add	BO, BO, TEMP
5091#endif
5092
5093#ifdef LEFT
5094	addi	KK, KK, 1
5095#endif
5096#endif
5097	.align 4
5098
5099.L1049:
5100#if defined(TRMMKERNEL) && !defined(LEFT)
5101	addi	KK, KK, 4
5102#endif
5103
5104	addi	B,  BO, 4 * SIZE
5105
5106	addic.	J, J, -1
5107	bgt+	.L1010
5108	.align 4
5109
5110.L1050:
5111	andi.	J, N,  2
5112	beq	.L1090
5113
5114	mr	CO1, C
5115	add	CO2, C,   LDC
5116	add	C,   CO2, LDC
5117
5118#if defined(TRMMKERNEL) &&  defined(LEFT)
5119	mr	KK, OFFSET
5120#endif
5121
5122	addi	AO, A, -2 * SIZE
5123
5124	li	r0, FZERO
5125	lfpsx	f0, SP, r0
5126
5127	srawi.	I, M,  3
5128	ble	.L1060
5129	.align 4
5130
5131.L1051:
5132#if defined(TRMMKERNEL)
5133#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
5134	fpmr	f4,  f0
5135	addi	BO,  B,  - 2 * SIZE
5136 	fpmr	f1,  f0
5137	fpmr	f5,  f0
5138	fpmr	f2,  f0
5139	fpmr	f6,  f0
5140#else
5141	slwi	TEMP, KK, 3 + BASE_SHIFT
5142	slwi	r0,   KK, 1 + BASE_SHIFT
5143	add	AO, AO, TEMP
5144	add	BO, B,  r0
5145
5146	fpmr	f4,  f0
5147	addi	BO,  BO,  - 2 * SIZE
5148 	fpmr	f1,  f0
5149	fpmr	f5,  f0
5150	fpmr	f2,  f0
5151	fpmr	f6,  f0
5152#endif
5153
5154
5155#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
5156	sub	TEMP, K, KK
5157#elif defined(LEFT)
5158	addi	TEMP, KK, 8
5159#else
5160	addi	TEMP, KK, 2
5161#endif
5162	srawi.	r0,  TEMP,  2
5163	fpmr	f3,  f0
5164	mtspr	CTR, r0
5165	fpmr	f7,  f0
5166	ble	.L1054
5167#else
5168	fpmr	f4,  f0
5169	addi	BO,  B,  - 2 * SIZE
5170 	fpmr	f1,  f0
5171	fpmr	f5,  f0
5172	fpmr	f2,  f0
5173	fpmr	f6,  f0
5174
5175	srawi.	r0,  K,  2
5176	fpmr	f3,  f0
5177	mtspr	CTR, r0
5178	fpmr	f7,  f0
5179	ble	.L1054
5180#endif
5181
5182	LFPDUX	B1,  BO,  INC2
5183	LFPDUX	A1,  AO,  INC2
5184	LFPDUX	A2,  AO,  INC2
5185	LFPDUX	B2,  BO,  INC2
5186	LFPDUX	A3,  AO,  INC2
5187	LFPDUX	A4,  AO,  INC2
5188
5189	LFPDUX	B3,  BO,  INC2
5190	LFPDUX	A5,  AO,  INC2
5191	LFPDUX	A6,  AO,  INC2
5192	LFPDUX	A7,  AO,  INC2
5193	LFPDUX	A8,  AO,  INC2
5194	bdz-	.L1053
5195	.align 4
5196
5197.L1052:
5198	fxcpmadd	f0,  B1, A1, f0
5199	LFPDUX	B4,  BO,  INC2
5200	fxcsmadd	f4,  B1, A1, f4
5201	LFPDUX	A1,  AO,  INC2
5202	fxcpmadd	f1,  B1, A2, f1
5203	nop
5204	fxcsmadd	f5,  B1, A2, f5
5205	LFPDUX	A2,  AO,  INC2
5206
5207	fxcpmadd	f2,  B1, A3, f2
5208	nop
5209	fxcsmadd	f6,  B1, A3, f6
5210	LFPDUX	A3,  AO,  INC2
5211	fxcpmadd	f3,  B1, A4, f3
5212	nop
5213	fxcsmadd	f7,  B1, A4, f7
5214	LFPDUX	A4,  AO,  INC2
5215
5216	fxcpmadd	f0,  B2, A5, f0
5217	LFPDUX	B1,  BO,  INC2
5218	fxcsmadd	f4,  B2, A5, f4
5219	LFPDUX	A5,  AO,  INC2
5220	fxcpmadd	f1,  B2, A6, f1
5221	nop
5222	fxcsmadd	f5,  B2, A6, f5
5223	LFPDUX	A6,  AO,  INC2
5224
5225	fxcpmadd	f2,  B2, A7, f2
5226	nop
5227	fxcsmadd	f6,  B2, A7, f6
5228	LFPDUX	A7,  AO,  INC2
5229	fxcpmadd	f3,  B2, A8, f3
5230	nop
5231	fxcsmadd	f7,  B2, A8, f7
5232	LFPDUX	A8,  AO,  INC2
5233
5234	fxcpmadd	f0,  B3, A1, f0
5235	LFPDUX	B2,  BO,  INC2
5236	fxcsmadd	f4,  B3, A1, f4
5237	LFPDUX	A1,  AO,  INC2
5238	fxcpmadd	f1,  B3, A2, f1
5239	nop
5240	fxcsmadd	f5,  B3, A2, f5
5241	LFPDUX	A2,  AO,  INC2
5242
5243	fxcpmadd	f2,  B3, A3, f2
5244	nop
5245	fxcsmadd	f6,  B3, A3, f6
5246	LFPDUX	A3,  AO,  INC2
5247	fxcpmadd	f3,  B3, A4, f3
5248	nop
5249	fxcsmadd	f7,  B3, A4, f7
5250	LFPDUX	A4,  AO,  INC2
5251
5252	fxcpmadd	f0,  B4, A5, f0
5253	LFPDUX	B3,  BO,  INC2
5254	fxcsmadd	f4,  B4, A5, f4
5255	LFPDUX	A5,  AO,  INC2
5256	fxcpmadd	f1,  B4, A6, f1
5257	nop
5258	fxcsmadd	f5,  B4, A6, f5
5259	LFPDUX	A6,  AO,  INC2
5260
5261	fxcpmadd	f2,  B4, A7, f2
5262	nop
5263	fxcsmadd	f6,  B4, A7, f6
5264	LFPDUX	A7,  AO,  INC2
5265	fxcpmadd	f3,  B4, A8, f3
5266	nop
5267	fxcsmadd	f7,  B4, A8, f7
5268	LFPDUX	A8,  AO,  INC2
5269	bdnz+	.L1052
5270	.align 4
5271
5272.L1053:
5273	fxcpmadd	f0,  B1, A1, f0
5274	LFPDUX	B4,  BO,  INC2
5275	fxcsmadd	f4,  B1, A1, f4
5276	LFPDUX	A1,  AO,  INC2
5277	fxcpmadd	f1,  B1, A2, f1
5278	nop
5279	fxcsmadd	f5,  B1, A2, f5
5280	LFPDUX	A2,  AO,  INC2
5281
5282	fxcpmadd	f2,  B1, A3, f2
5283	nop
5284	fxcsmadd	f6,  B1, A3, f6
5285	LFPDUX	A3,  AO,  INC2
5286	fxcpmadd	f3,  B1, A4, f3
5287	nop
5288	fxcsmadd	f7,  B1, A4, f7
5289	LFPDUX	A4,  AO,  INC2
5290
5291	fxcpmadd	f0,  B2, A5, f0
5292	nop
5293	fxcsmadd	f4,  B2, A5, f4
5294	LFPDUX	A5,  AO,  INC2
5295	fxcpmadd	f1,  B2, A6, f1
5296	nop
5297	fxcsmadd	f5,  B2, A6, f5
5298	LFPDUX	A6,  AO,  INC2
5299
5300	fxcpmadd	f2,  B2, A7, f2
5301	nop
5302	fxcsmadd	f6,  B2, A7, f6
5303	LFPDUX	A7,  AO,  INC2
5304	fxcpmadd	f3,  B2, A8, f3
5305	nop
5306	fxcsmadd	f7,  B2, A8, f7
5307	LFPDUX	A8,  AO,  INC2
5308
5309	fxcpmadd	f0,  B3, A1, f0
5310	fxcsmadd	f4,  B3, A1, f4
5311	fxcpmadd	f1,  B3, A2, f1
5312	fxcsmadd	f5,  B3, A2, f5
5313
5314	fxcpmadd	f2,  B3, A3, f2
5315	fxcsmadd	f6,  B3, A3, f6
5316	fxcpmadd	f3,  B3, A4, f3
5317	fxcsmadd	f7,  B3, A4, f7
5318
5319	fxcpmadd	f0,  B4, A5, f0
5320	fxcsmadd	f4,  B4, A5, f4
5321	fxcpmadd	f1,  B4, A6, f1
5322	fxcsmadd	f5,  B4, A6, f5
5323
5324	fxcpmadd	f2,  B4, A7, f2
5325	fxcsmadd	f6,  B4, A7, f6
5326	fxcpmadd	f3,  B4, A8, f3
5327	fxcsmadd	f7,  B4, A8, f7
5328	.align 4
5329
5330.L1054:
5331	lfd	AP,  ALPHA(SP)
5332#ifdef TRMMKERNEL
5333       fsmfp	AP, AP
5334#endif
5335
5336#if defined(TRMMKERNEL)
5337#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
5338	sub	TEMP, K, KK
5339#elif defined(LEFT)
5340	addi	TEMP, KK, 8
5341#else
5342	addi	TEMP, KK, 2
5343#endif
5344	andi.	TEMP,  TEMP,  3
5345	mtspr	CTR, TEMP
5346#else
5347	andi.	r0,  K,  3
5348	mtspr	CTR, r0
5349#endif
5350	ble+	.L1058
5351
5352	LFPDUX	A1,  AO,  INC2
5353	LFPDUX	B1,  BO,  INC2
5354	LFPDUX	A2,  AO,  INC2
5355	LFPDUX	A3,  AO,  INC2
5356	LFPDUX	A4,  AO,  INC2
5357	bdz-	.L1057
5358	.align 4
5359
5360.L1056:
5361	fxcpmadd	f0,  B1, A1, f0
5362	fxcsmadd	f4,  B1, A1, f4
5363	LFPDUX	A1,  AO,  INC2
5364	fxcpmadd	f1,  B1, A2, f1
5365	fxcsmadd	f5,  B1, A2, f5
5366	LFPDUX	A2,  AO,  INC2
5367
5368	fxcpmadd	f2,  B1, A3, f2
5369	fxcsmadd	f6,  B1, A3, f6
5370	LFPDUX	A3,  AO,  INC2
5371	fxcpmadd	f3,  B1, A4, f3
5372	fxcsmadd	f7,  B1, A4, f7
5373	LFPDUX	A4,  AO,  INC2
5374	LFPDUX	B1,  BO,  INC2
5375	bdnz+	.L1056
5376	.align 4
5377
5378.L1057:
5379	fxcpmadd	f0,  B1, A1, f0
5380	fxcsmadd	f4,  B1, A1, f4
5381	fxcpmadd	f1,  B1, A2, f1
5382	fxcsmadd	f5,  B1, A2, f5
5383
5384	fxcpmadd	f2,  B1, A3, f2
5385	fxcsmadd	f6,  B1, A3, f6
5386	fxcpmadd	f3,  B1, A4, f3
5387	fxcsmadd	f7,  B1, A4, f7
5388	.align 4
5389
5390.L1058:
5391#ifndef TRMMKERNEL
5392	LFDUX	A1, CO1, INC
5393	LFDUX	B1, CO1, INC2
5394	LFDUX	A3, CO1, INC2
5395   	LFDUX	A5, CO1, INC2
5396
5397	LFSDUX	A1, CO1, INCM5
5398	LFSDUX	B1, CO1, INC2
5399	LFSDUX	A3, CO1, INC2
5400	LFSDUX	A5, CO1, INC2
5401
5402 	LFDUX	B3, CO2, INC
5403	LFDUX	A6, CO2, INC2
5404	LFDUX	A7, CO2, INC2
5405	LFDUX	B2, CO2, INC2
5406
5407	fxcpmadd	f0,  AP, f0,  A1
5408	LFSDUX	B3, CO2, INCM5
5409	LFSDUX	A6, CO2, INC2
5410	fxcpmadd	f1,  AP, f1,  B1
5411	LFSDUX	A7, CO2, INC2
5412	LFSDUX	B2, CO2, INC2
5413
5414	fxcpmadd	f2,  AP, f2,  A3
5415	STFDUX	f0,  CO1, INCM7
5416	STFSDUX	f0,  CO1, INC
5417
5418	fxcpmadd	f3,  AP, f3,  A5
5419	STFDUX	f1,  CO1, INC
5420	STFSDUX	f1,  CO1, INC
5421
5422	fxcpmadd	f4,  AP, f4,  B3
5423	STFDUX	f2,  CO1, INC
5424	STFSDUX	f2,  CO1, INC
5425
5426	fxcpmadd	f5,  AP, f5,  A6
5427	STFDUX	f3,  CO1, INC
5428	STFSDUX	f3,  CO1, INC
5429
5430	fxcpmadd	f6,  AP, f6,  A7
5431	STFDUX	f4,  CO2, INCM7
5432	STFSDUX	f4,  CO2, INC
5433
5434	fxcpmadd	f7,  AP, f7,  B2
5435	STFDUX	f5,  CO2, INC
5436	STFSDUX	f5,  CO2, INC
5437
5438	STFDUX	f6,  CO2, INC
5439	STFSDUX	f6,  CO2, INC
5440
5441	STFDUX	f7,  CO2, INC
5442	STFSDUX	f7,  CO2, INC
5443#else
5444	fpmul	f0,  AP, f0
5445	fpmul	f1,  AP, f1
5446
5447	fpmul	f2,  AP, f2
5448	STFDUX	f0,  CO1, INC
5449	STFSDUX	f0,  CO1, INC
5450
5451	fpmul	f3,  AP, f3
5452	STFDUX	f1,  CO1, INC
5453	STFSDUX	f1,  CO1, INC
5454
5455	fpmul	f4,  AP, f4
5456	STFDUX	f2,  CO1, INC
5457	STFSDUX	f2,  CO1, INC
5458
5459	fpmul	f5,  AP, f5
5460	STFDUX	f3,  CO1, INC
5461	STFSDUX	f3,  CO1, INC
5462
5463	fpmul	f6,  AP, f6
5464	STFDUX	f4,  CO2, INC
5465	STFSDUX	f4,  CO2, INC
5466
5467	fpmul	f7,  AP, f7
5468	STFDUX	f5,  CO2, INC
5469	STFSDUX	f5,  CO2, INC
5470
5471	STFDUX	f6,  CO2, INC
5472	STFSDUX	f6,  CO2, INC
5473
5474	STFDUX	f7,  CO2, INC
5475	STFSDUX	f7,  CO2, INC
5476#endif
5477
5478
5479#ifdef TRMMKERNEL
5480#if ( defined(LEFT) &&  defined(TRANSA)) || \
5481    (!defined(LEFT) && !defined(TRANSA))
5482	sub	TEMP, K, KK
5483#ifdef LEFT
5484	addi	TEMP, TEMP, -8
5485#else
5486	addi	TEMP, TEMP, -2
5487#endif
5488	slwi	r0,   TEMP, 3 + BASE_SHIFT
5489	slwi	TEMP, TEMP, 1 + BASE_SHIFT
5490	add	AO, AO, r0
5491	add	BO, BO, TEMP
5492#endif
5493
5494#ifdef LEFT
5495	addi	KK, KK, 8
5496#endif
5497#endif
5498
5499	addic.	I, I, -1
5500	li	r0, FZERO
5501
5502	lfpsx	f0, SP, r0
5503	bgt+	.L1051
5504	.align 4
5505
5506.L1060:
5507	andi.	I, M,  4
5508	beq	.L1070
5509
5510#if defined(TRMMKERNEL)
5511#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
5512	addi	BO,  B,  - 2 * SIZE
5513 	fpmr	f1,  f0
5514#else
5515	slwi	TEMP, KK, 2 + BASE_SHIFT
5516	slwi	r0,   KK, 1 + BASE_SHIFT
5517	add	AO, AO, TEMP
5518	add	BO, B,  r0
5519
5520	addi	BO,  BO,  - 2 * SIZE
5521 	fpmr	f1,  f0
5522#endif
5523
5524#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
5525	sub	TEMP, K, KK
5526#elif defined(LEFT)
5527	addi	TEMP, KK, 4
5528#else
5529	addi	TEMP, KK, 2
5530#endif
5531	fpmr	f2,  f0
5532	srawi.	r0,  TEMP,  2
5533	mtspr	CTR, r0
5534	fpmr	f3,  f0
5535	ble	.L1064
5536#else
5537	srawi.	r0,  K,  2
5538 	fpmr	f1,  f0
5539	addi	BO,  B,  - 2 * SIZE
5540	fpmr	f2,  f0
5541	mtspr	CTR, r0
5542	fpmr	f3,  f0
5543	ble	.L1064
5544#endif
5545
5546	LFPDUX	B1,  BO, INC2
5547	LFPDUX	A1,  AO, INC2
5548	LFPDUX	A2,  AO, INC2
5549	LFPDUX	B2,  BO, INC2
5550	LFPDUX	A3,  AO, INC2
5551	LFPDUX	A4,  AO, INC2
5552
5553	LFPDUX	B3,  BO, INC2
5554	LFPDUX	A5,  AO, INC2
5555	LFPDUX	A6,  AO, INC2
5556	LFPDUX	B4,  BO, INC2
5557	LFPDUX	A7,  AO, INC2
5558	LFPDUX	A8,  AO, INC2
5559	bdz-	.L1063
5560	.align 4
5561
5562.L1062:
5563	fxcpmadd	f0,  B1, A1, f0
5564	fxcsmadd	f2,  B1, A1, f2
5565	LFPDUX	A1,  AO, INC2
5566	fxcpmadd	f1,  B1, A2, f1
5567	fxcsmadd	f3,  B1, A2, f3
5568	LFPDUX	A2,  AO, INC2
5569	LFPDUX	B1,  BO, INC2
5570
5571	fxcpmadd	f0,  B2, A3, f0
5572	fxcsmadd	f2,  B2, A3, f2
5573	LFPDUX	A3,  AO, INC2
5574	fxcpmadd	f1,  B2, A4, f1
5575	fxcsmadd	f3,  B2, A4, f3
5576	LFPDUX	A4,  AO, INC2
5577	LFPDUX	B2,  BO, INC2
5578
5579	fxcpmadd	f0,  B3, A5, f0
5580	fxcsmadd	f2,  B3, A5, f2
5581	LFPDUX	A5,  AO, INC2
5582	fxcpmadd	f1,  B3, A6, f1
5583	fxcsmadd	f3,  B3, A6, f3
5584	LFPDUX	A6,  AO, INC2
5585	LFPDUX	B3,  BO, INC2
5586
5587	fxcpmadd	f0,  B4, A7, f0
5588	fxcsmadd	f2,  B4, A7, f2
5589	LFPDUX	A7,  AO, INC2
5590	fxcpmadd	f1,  B4, A8, f1
5591	fxcsmadd	f3,  B4, A8, f3
5592	LFPDUX	A8,  AO, INC2
5593	LFPDUX	B4,  BO, INC2
5594	bdnz+	.L1062
5595	.align 4
5596
5597.L1063:
5598	fxcpmadd	f0,  B1, A1, f0
5599	fxcsmadd	f2,  B1, A1, f2
5600	fxcpmadd	f1,  B1, A2, f1
5601	fxcsmadd	f3,  B1, A2, f3
5602
5603	fxcpmadd	f0,  B2, A3, f0
5604	fxcsmadd	f2,  B2, A3, f2
5605	fxcpmadd	f1,  B2, A4, f1
5606	fxcsmadd	f3,  B2, A4, f3
5607
5608	fxcpmadd	f0,  B3, A5, f0
5609	fxcsmadd	f2,  B3, A5, f2
5610	fxcpmadd	f1,  B3, A6, f1
5611	fxcsmadd	f3,  B3, A6, f3
5612
5613	fxcpmadd	f0,  B4, A7, f0
5614	fxcsmadd	f2,  B4, A7, f2
5615	fxcpmadd	f1,  B4, A8, f1
5616	fxcsmadd	f3,  B4, A8, f3
5617	.align 4
5618
5619.L1064:
5620	lfd	AP,  ALPHA(SP)
5621#ifdef TRMMKERNEL
5622       fsmfp	AP, AP
5623#endif
5624
5625#if defined(TRMMKERNEL)
5626#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
5627	sub	TEMP, K, KK
5628#elif defined(LEFT)
5629	addi	TEMP, KK, 4
5630#else
5631	addi	TEMP, KK, 2
5632#endif
5633	andi.	TEMP,  TEMP,  3
5634	mtspr	CTR, TEMP
5635#else
5636	andi.	r0,  K,  3
5637	mtspr	CTR, r0
5638#endif
5639	ble+	.L1068
5640
5641	LFPDUX	A1,  AO,  INC2
5642	LFPDUX	B1,  BO,  INC2
5643	LFPDUX	A2,  AO,  INC2
5644	bdz-	.L1067
5645	.align 4
5646
5647.L1066:
5648	fxcpmadd	f0,  B1, A1, f0
5649	fxcsmadd	f2,  B1, A1, f2
5650	LFPDUX	A1,  AO,  INC2
5651	fxcpmadd	f1,  B1, A2, f1
5652	fxcsmadd	f3,  B1, A2, f3
5653	LFPDUX	B1,  BO,  INC2
5654	LFPDUX	A2,  AO,  INC2
5655	bdnz+	.L1066
5656	.align 4
5657
5658.L1067:
5659	fxcpmadd	f0,  B1, A1, f0
5660	fxcsmadd	f2,  B1, A1, f2
5661	fxcpmadd	f1,  B1, A2, f1
5662	fxcsmadd	f3,  B1, A2, f3
5663	.align 4
5664
5665.L1068:
5666#ifndef TRMMKERNEL
5667	LFDUX	A1, CO1, INC
5668	LFDUX	A2, CO1, INC2
5669	LFDUX	A3, CO2, INC
5670	LFDUX	A4, CO2, INC2
5671
5672	LFSDUX	A1, CO1, INCM1
5673	LFSDUX	A2, CO1, INC2
5674	LFSDUX	A3, CO2, INCM1
5675	LFSDUX	A4, CO2, INC2
5676
5677	fxcpmadd	f0,  AP, f0,  A1
5678	fxcpmadd	f1,  AP, f1,  A2
5679	fxcpmadd	f2,  AP, f2,  A3
5680	STFDUX	f0,  CO1, INCM3
5681	STFSDUX	f0,  CO1, INC
5682
5683 	fxcpmadd	f3,  AP, f3,  A4
5684	STFDUX	f1,  CO1, INC
5685	STFSDUX	f1,  CO1, INC
5686
5687	STFDUX	f2,  CO2, INCM3
5688	STFSDUX	f2,  CO2, INC
5689
5690	STFDUX	f3,  CO2, INC
5691	STFSDUX	f3,  CO2, INC
5692#else
5693	fpmul	f0,  AP, f0
5694	fpmul	f1,  AP, f1
5695	fpmul	f2,  AP, f2
5696	STFDUX	f0,  CO1, INC
5697	STFSDUX	f0,  CO1, INC
5698
5699 	fpmul	f3,  AP, f3
5700	STFDUX	f1,  CO1, INC
5701	STFSDUX	f1,  CO1, INC
5702
5703	STFDUX	f2,  CO2, INC
5704	STFSDUX	f2,  CO2, INC
5705
5706	STFDUX	f3,  CO2, INC
5707	STFSDUX	f3,  CO2, INC
5708#endif
5709
5710
5711#ifdef TRMMKERNEL
5712#if ( defined(LEFT) &&  defined(TRANSA)) || \
5713    (!defined(LEFT) && !defined(TRANSA))
5714	sub	TEMP, K, KK
5715#ifdef LEFT
5716	addi	TEMP, TEMP, -4
5717#else
5718	addi	TEMP, TEMP, -2
5719#endif
5720	slwi	r0,   TEMP, 2 + BASE_SHIFT
5721	slwi	TEMP, TEMP, 1 + BASE_SHIFT
5722	add	AO, AO, r0
5723	add	BO, BO, TEMP
5724#endif
5725
5726#ifdef LEFT
5727	addi	KK, KK, 4
5728#endif
5729#endif
5730
5731	li	r0, FZERO
5732	lfpsx	f0, SP, r0
5733	.align 4
5734
5735.L1070:
5736	andi.	I, M,  2
5737	beq	.L1080
5738
5739#if defined(TRMMKERNEL)
5740#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
5741	addi	BO,  B,  - 2 * SIZE
5742	fpmr	f1,  f0
5743#else
5744	slwi	TEMP, KK, 1 + BASE_SHIFT
5745	slwi	r0,   KK, 1 + BASE_SHIFT
5746	add	AO, AO, TEMP
5747	add	BO, B,  r0
5748
5749	addi	BO,  BO,  - 2 * SIZE
5750	fpmr	f1,  f0
5751#endif
5752
5753#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
5754	sub	TEMP, K, KK
5755#elif defined(LEFT)
5756	addi	TEMP, KK, 2
5757#else
5758	addi	TEMP, KK, 2
5759#endif
5760	srawi.	r0,  TEMP,  3
5761	fpmr	f2,  f0
5762	mtspr	CTR, r0
5763	fpmr	f3, f0
5764	ble	.L1074
5765#else
5766	addi	BO,  B,  - 2 * SIZE
5767	fpmr	f1,  f0
5768
5769	srawi.	r0,  K,  3
5770	fpmr	f2,  f0
5771	mtspr	CTR, r0
5772	fpmr	f3, f0
5773	ble	.L1074
5774#endif
5775
5776
5777	LFPDUX	A1,  AO, INC2
5778	LFPDUX	B1,  BO, INC2
5779	LFPDUX	A2,  AO, INC2
5780	LFPDUX	B2,  BO, INC2
5781	LFPDUX	A3,  AO, INC2
5782	LFPDUX	B3,  BO, INC2
5783	LFPDUX	A4,  AO, INC2
5784	LFPDUX	B4,  BO, INC2
5785
5786	LFPDUX	A5,  AO, INC2
5787	LFPDUX	B5,  BO, INC2
5788	LFPDUX	A6,  AO, INC2
5789	LFPDUX	B6,  BO, INC2
5790	LFPDUX	A7,  AO, INC2
5791	LFPDUX	A9,  BO, INC2
5792	LFPDUX	A8,  AO, INC2
5793	LFPDUX	A10, BO, INC2
5794	bdz-	.L1073
5795	.align 4
5796
5797.L1072:
5798	fxcpmadd	f0,  B1, A1, f0
5799	fxcsmadd	f1,  B1, A1, f1
5800	LFPDUX	A1,  AO, INC2
5801	LFPDUX	B1,  BO, INC2
5802	fxcpmadd	f2,  B2, A2, f2
5803	fxcsmadd	f3,  B2, A2, f3
5804	LFPDUX	A2,  AO, INC2
5805	LFPDUX	B2,  BO, INC2
5806
5807	fxcpmadd	f0,  B3, A3, f0
5808	fxcsmadd	f1,  B3, A3, f1
5809	LFPDUX	A3,  AO, INC2
5810	LFPDUX	B3,  BO, INC2
5811	fxcpmadd	f2,  B4, A4, f2
5812	fxcsmadd	f3,  B4, A4, f3
5813	LFPDUX	A4,  AO, INC2
5814	LFPDUX	B4,  BO, INC2
5815
5816	fxcpmadd	f0,  B5, A5, f0
5817	fxcsmadd	f1,  B5, A5, f1
5818	LFPDUX	A5,  AO, INC2
5819	LFPDUX	B5,  BO, INC2
5820	fxcpmadd	f2,  B6, A6, f2
5821	fxcsmadd	f3,  B6, A6, f3
5822	LFPDUX	A6,  AO, INC2
5823	LFPDUX	B6,  BO, INC2
5824
5825	fxcpmadd	f0,  A9,  A7, f0
5826	fxcsmadd	f1,  A9,  A7, f1
5827	LFPDUX	A7,  AO, INC2
5828	LFPDUX	A9,  BO, INC2
5829	fxcpmadd	f2,  A10, A8, f2
5830	fxcsmadd	f3,  A10, A8, f3
5831	LFPDUX	A8,  AO, INC2
5832	LFPDUX	A10, BO, INC2
5833	bdnz+	.L1072
5834	.align 4
5835
5836.L1073:
5837	fxcpmadd	f0,  B1, A1, f0
5838	fxcsmadd	f1,  B1, A1, f1
5839	fxcpmadd	f2,  B2, A2, f2
5840	fxcsmadd	f3,  B2, A2, f3
5841
5842	fxcpmadd	f0,  B3, A3, f0
5843	fxcsmadd	f1,  B3, A3, f1
5844	fxcpmadd	f2,  B4, A4, f2
5845	fxcsmadd	f3,  B4, A4, f3
5846
5847	fxcpmadd	f0,  B5, A5, f0
5848	fxcsmadd	f1,  B5, A5, f1
5849	fxcpmadd	f2,  B6, A6, f2
5850	fxcsmadd	f3,  B6, A6, f3
5851
5852	fxcpmadd	f0,  A9,  A7, f0
5853	fxcsmadd	f1,  A9,  A7, f1
5854	fxcpmadd	f2,  A10, A8, f2
5855	fxcsmadd	f3,  A10, A8, f3
5856	.align 4
5857
5858.L1074:
5859	lfd	AP,  ALPHA(SP)
5860#ifdef TRMMKERNEL
5861       fsmfp	AP, AP
5862#endif
5863
5864#if defined(TRMMKERNEL)
5865#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
5866	sub	TEMP, K, KK
5867#elif defined(LEFT)
5868	addi	TEMP, KK, 2
5869#else
5870	addi	TEMP, KK, 2
5871#endif
5872	andi.	TEMP,  TEMP,  7
5873	mtspr	CTR, TEMP
5874#else
5875	andi.	r0,  K,  7
5876	mtspr	CTR, r0
5877#endif
5878	ble+	.L1078
5879
5880	LFPDUX	A1,  AO,  INC2
5881	LFPDUX	B1,  BO,  INC2
5882	bdz-	.L1077
5883	.align 4
5884
5885.L1076:
5886	fxcpmadd	f0,  B1, A1, f0
5887	fxcsmadd	f1,  B1, A1, f1
5888	LFPDUX	A1,  AO,  INC2
5889	LFPDUX	B1,  BO,  INC2
5890	bdnz+	.L1076
5891	.align 4
5892
5893.L1077:
5894	fxcpmadd	f0,  B1, A1, f0
5895	fxcsmadd	f1,  B1, A1, f1
5896	.align 4
5897
5898.L1078:
5899#ifndef TRMMKERNEL
5900	LFDUX	A1, CO1, INC
5901	LFDUX	B3, CO2, INC
5902	LFSDUX	A1, CO1, INC
5903	LFSDUX	B3, CO2, INC
5904
5905	fpadd	f0, f0, f2
5906	fpadd	f1, f1, f3
5907
5908	fxcpmadd	f0,  AP, f0,  A1
5909	fxcpmadd	f1,  AP, f1,  B3
5910
5911	STFDUX	f0,  CO1, INCM1
5912	STFSDUX	f0,  CO1, INC
5913	STFDUX	f1,  CO2, INCM1
5914	STFSDUX	f1,  CO2, INC
5915#else
5916	fpadd	f0, f0, f2
5917	fpadd	f1, f1, f3
5918
5919	fpmul	f0,  AP, f0
5920	fpmul	f1,  AP, f1
5921
5922	STFDUX	f0,  CO1, INC
5923	STFSDUX	f0,  CO1, INC
5924	STFDUX	f1,  CO2, INC
5925	STFSDUX	f1,  CO2, INC
5926#endif
5927
5928
5929#ifdef TRMMKERNEL
5930#if ( defined(LEFT) &&  defined(TRANSA)) || \
5931    (!defined(LEFT) && !defined(TRANSA))
5932	sub	TEMP, K, KK
5933#ifdef LEFT
5934	addi	TEMP, TEMP, -2
5935#else
5936	addi	TEMP, TEMP, -2
5937#endif
5938	slwi	r0,   TEMP, 1 + BASE_SHIFT
5939	slwi	TEMP, TEMP, 1 + BASE_SHIFT
5940	add	AO, AO, r0
5941	add	BO, BO, TEMP
5942#endif
5943
5944#ifdef LEFT
5945	addi	KK, KK, 2
5946#endif
5947#endif
5948
5949	li	r0, FZERO
5950	lfpsx	f0, SP, r0
5951	.align 4
5952
5953.L1080:
5954	andi.	I, M,  1
5955	beq	.L1089
5956
5957#if defined(TRMMKERNEL)
5958#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
5959	addi	BO,  B,  - 2 * SIZE
5960	fpmr	f1,  f0
5961	fpmr	f2,  f0
5962	fpmr	f3,  f0
5963#else
5964	slwi	TEMP, KK, 0 + BASE_SHIFT
5965	slwi	r0,   KK, 1 + BASE_SHIFT
5966	add	AO, AO, TEMP
5967	add	BO, B,  r0
5968
5969	addi	BO,  BO,  - 2 * SIZE
5970	fpmr	f1,  f0
5971	fpmr	f2,  f0
5972	fpmr	f3,  f0
5973#endif
5974
5975#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
5976	sub	TEMP, K, KK
5977#elif defined(LEFT)
5978	addi	TEMP, KK, 1
5979#else
5980	addi	TEMP, KK, 2
5981#endif
5982	srawi.	r0,  TEMP,  3
5983	mtspr	CTR, r0
5984	ble	.L1084
5985#else
5986	addi	BO,  B,  - 2 * SIZE
5987	fpmr	f1,  f0
5988	fpmr	f2,  f0
5989	fpmr	f3,  f0
5990
5991	srawi.	r0,  K,  3
5992	mtspr	CTR, r0
5993	ble	.L1084
5994#endif
5995
5996	LFPDUX	B1,  BO,  INC2
5997	LFPDUX	A1,  AO,  INC2
5998	LFPDUX	A2,  AO,  INC2
5999
6000	LFPDUX	B2,  BO,  INC2
6001	LFPDUX	A3,  AO,  INC2
6002	LFPDUX	A4,  AO,  INC2
6003
6004	LFPDUX	B3,  BO,  INC2
6005	LFPDUX	B4,  BO,  INC2
6006	bdz-	.L1083
6007	.align 4
6008
6009.L1082:
6010	fxcpmadd	f0,  A1, B1, f0
6011	LFPDUX	B1,  BO,  INC2
6012	fxcsmadd	f1,  A1, B2, f1
6013	LFPDUX	B2,  BO,  INC2
6014	LFPDUX	A1,  AO,  INC2
6015	fxcpmadd	f2,  A2, B3, f2
6016	LFPDUX	B3,  BO,  INC2
6017	fxcsmadd	f3,  A2, B4, f3
6018	LFPDUX	B4,  BO,  INC2
6019	LFPDUX	A2,  AO,  INC2
6020
6021	fxcpmadd	f0,  A3, B1, f0
6022	LFPDUX	B1,  BO,  INC2
6023	fxcsmadd	f1,  A3, B2, f1
6024	LFPDUX	B2,  BO,  INC2
6025	LFPDUX	A3,  AO,  INC2
6026	fxcpmadd	f2,  A4, B3, f2
6027	LFPDUX	B3,  BO,  INC2
6028	fxcsmadd	f3,  A4, B4, f3
6029	LFPDUX	B4,  BO,  INC2
6030	LFPDUX	A4,  AO,  INC2
6031	bdnz+	.L1082
6032	.align 4
6033
6034.L1083:
6035	fxcpmadd	f0,  A1, B1, f0
6036	LFPDUX	B1,  BO,  INC2
6037	fxcsmadd	f1,  A1, B2, f1
6038	LFPDUX	B2,  BO,  INC2
6039	fxcpmadd	f2,  A2, B3, f2
6040	LFPDUX	B3,  BO,  INC2
6041	fxcsmadd	f3,  A2, B4, f3
6042	LFPDUX	B4,  BO,  INC2
6043
6044	fxcpmadd	f0,  A3, B1, f0
6045	fxcsmadd	f1,  A3, B2, f1
6046	fxcpmadd	f2,  A4, B3, f2
6047	fxcsmadd	f3,  A4, B4, f3
6048	.align 4
6049
6050.L1084:
6051	lfd	AP,  ALPHA(SP)
6052#ifdef TRMMKERNEL
6053       fsmfp	AP, AP
6054#endif
6055
6056#if defined(TRMMKERNEL)
6057#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
6058	sub	TEMP, K, KK
6059#elif defined(LEFT)
6060	addi	TEMP, KK, 1
6061#else
6062	addi	TEMP, KK, 2
6063#endif
6064	andi.	TEMP,  TEMP,  7
6065	mtspr	CTR, TEMP
6066#else
6067	andi.	r0,  K,  7
6068	mtspr	CTR, r0
6069#endif
6070	ble+	.L1088
6071
6072	LFDX	A1,  AO,  INC2
6073	LFPDUX	B1,  BO,  INC2
6074	add	AO, AO, INC
6075	bdz-	.L1087
6076	.align 4
6077
6078.L1086:
6079	fxcpmadd	f0,  A1, B1, f0
6080	LFDX	A1,  AO,  INC2
6081	LFPDUX	B1,  BO,  INC2
6082	add	AO, AO, INC
6083	bdnz+	.L1086
6084	.align 4
6085
6086.L1087:
6087	fxcpmadd	f0,  A1, B1, f0
6088	.align 4
6089
6090.L1088:
6091#ifndef TRMMKERNEL
6092	LFDX	A1, CO1, INC
6093	LFDX	A2, CO2, INC
6094
6095	fpadd	f0, f0, f1
6096	fpadd	f2, f2, f3
6097	fsmfp	A1, A2
6098	fpadd	f0, f0, f2
6099	fxcpmadd	f0,  AP, f0,  A1
6100#else
6101	fpadd	f0, f0, f1
6102	fpadd	f2, f2, f3
6103	fsmfp	A1, A2
6104	fpadd	f0, f0, f2
6105	fpmul	f0,  AP, f0
6106#endif
6107
6108	STFDUX	f0,  CO1, INC
6109	STFSDUX	f0,  CO2, INC
6110
6111#ifdef TRMMKERNEL
6112#if ( defined(LEFT) &&  defined(TRANSA)) || \
6113    (!defined(LEFT) && !defined(TRANSA))
6114	sub	TEMP, K, KK
6115#ifdef LEFT
6116	addi	TEMP, TEMP, -1
6117#else
6118	addi	TEMP, TEMP, -2
6119#endif
6120	slwi	r0,   TEMP, 0 + BASE_SHIFT
6121	slwi	TEMP, TEMP, 1 + BASE_SHIFT
6122	add	AO, AO, r0
6123	add	BO, BO, TEMP
6124#endif
6125
6126#ifdef LEFT
6127	addi	KK, KK, 1
6128#endif
6129#endif
6130	.align 4
6131
6132.L1089:
6133#if defined(TRMMKERNEL) && !defined(LEFT)
6134	addi	KK, KK, 2
6135#endif
6136
6137	addi	B,  BO, 2 * SIZE
6138	.align 4
6139
6140.L1090:
6141	andi.	J, N,  1
6142	beq	.L10999
6143
6144#if defined(TRMMKERNEL) &&  defined(LEFT)
6145	mr	KK, OFFSET
6146#endif
6147
6148	mr	CO1, C
6149	addi	AO, A, -2 * SIZE
6150
6151	li	r0, FZERO
6152	lfpsx	f0, SP, r0
6153
6154	srawi.	I, M,  3
6155	ble	.L10100
6156	.align 4
6157
6158.L1091:
6159#if defined(TRMMKERNEL)
6160#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
6161	addi	BO,  B,  - 2 * SIZE
6162 	fpmr	f1,  f0
6163#else
6164	slwi	TEMP, KK, 3 + BASE_SHIFT
6165	slwi	r0,   KK, 0 + BASE_SHIFT
6166	add	AO, AO, TEMP
6167	add	BO, B,  r0
6168
6169	addi	BO,  BO,  - 2 * SIZE
6170 	fpmr	f1,  f0
6171#endif
6172
6173#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
6174	sub	TEMP, K, KK
6175#elif defined(LEFT)
6176	addi	TEMP, KK, 8
6177#else
6178	addi	TEMP, KK, 1
6179#endif
6180	fpmr	f2,  f0
6181	srawi.	r0,  TEMP,  2
6182	fpmr	f3,  f0
6183	mtspr	CTR, r0
6184	ble	.L1094
6185
6186#else
6187	srawi.	r0,  K,  2
6188 	fpmr	f1,  f0
6189	addi	BO,  B,  - 2 * SIZE
6190	fpmr	f2,  f0
6191	fpmr	f3,  f0
6192	mtspr	CTR, r0
6193	ble	.L1094
6194#endif
6195
6196	LFPDUX	B1,  BO,  INC2
6197	LFPDUX	A1,  AO,  INC2
6198	LFPDUX	A2,  AO,  INC2
6199	LFPDUX	A3,  AO,  INC2
6200	LFPDUX	A4,  AO,  INC2
6201	LFPDUX	B2,  BO,  INC2
6202	LFPDUX	A5,  AO,  INC2
6203	LFPDUX	A6,  AO,  INC2
6204	LFPDUX	A7,  AO,  INC2
6205	LFPDUX	A8,  AO,  INC2
6206	bdz-	.L1093
6207	.align 4
6208
6209.L1092:
6210	fxcpmadd	f0,  B1, A1, f0
6211	LFPDUX	A1,  AO,  INC2
6212	fxcpmadd	f1,  B1, A2, f1
6213	LFPDUX	A2,  AO,  INC2
6214	fxcpmadd	f2,  B1, A3, f2
6215	LFPDUX	A3,  AO,  INC2
6216	fxcpmadd	f3,  B1, A4, f3
6217	LFPDUX	A4,  AO,  INC2
6218
6219	fxcsmadd	f0,  B1, A5, f0
6220	LFPDUX	A5,  AO,  INC2
6221	fxcsmadd	f1,  B1, A6, f1
6222	LFPDUX	A6,  AO,  INC2
6223	fxcsmadd	f2,  B1, A7, f2
6224	LFPDUX	A7,  AO,  INC2
6225	fxcsmadd	f3,  B1, A8, f3
6226	LFPDUX	A8,  AO,  INC2
6227	LFPDUX	B1,  BO,  INC2
6228
6229	fxcpmadd	f0,  B2, A1, f0
6230	LFPDUX	A1,  AO,  INC2
6231	fxcpmadd	f1,  B2, A2, f1
6232	LFPDUX	A2,  AO,  INC2
6233	fxcpmadd	f2,  B2, A3, f2
6234	LFPDUX	A3,  AO,  INC2
6235	fxcpmadd	f3,  B2, A4, f3
6236	LFPDUX	A4,  AO,  INC2
6237
6238	fxcsmadd	f0,  B2, A5, f0
6239	LFPDUX	A5,  AO,  INC2
6240	fxcsmadd	f1,  B2, A6, f1
6241	LFPDUX	A6,  AO,  INC2
6242	fxcsmadd	f2,  B2, A7, f2
6243	LFPDUX	A7,  AO,  INC2
6244	fxcsmadd	f3,  B2, A8, f3
6245	LFPDUX	A8,  AO,  INC2
6246	LFPDUX	B2,  BO,  INC2
6247	bdnz+	.L1092
6248	.align 4
6249
6250.L1093:
6251	fxcpmadd	f0,  B1, A1, f0
6252	LFPDUX	A1,  AO,  INC2
6253	fxcpmadd	f1,  B1, A2, f1
6254	LFPDUX	A2,  AO,  INC2
6255	fxcpmadd	f2,  B1, A3, f2
6256	LFPDUX	A3,  AO,  INC2
6257	fxcpmadd	f3,  B1, A4, f3
6258	LFPDUX	A4,  AO,  INC2
6259
6260	fxcsmadd	f0,  B1, A5, f0
6261	LFPDUX	A5,  AO,  INC2
6262	fxcsmadd	f1,  B1, A6, f1
6263	LFPDUX	A6,  AO,  INC2
6264	fxcsmadd	f2,  B1, A7, f2
6265	LFPDUX	A7,  AO,  INC2
6266	fxcsmadd	f3,  B1, A8, f3
6267	LFPDUX	A8,  AO,  INC2
6268
6269	fxcpmadd	f0,  B2, A1, f0
6270	fxcpmadd	f1,  B2, A2, f1
6271	fxcpmadd	f2,  B2, A3, f2
6272	fxcpmadd	f3,  B2, A4, f3
6273
6274	fxcsmadd	f0,  B2, A5, f0
6275	fxcsmadd	f1,  B2, A6, f1
6276	fxcsmadd	f2,  B2, A7, f2
6277	fxcsmadd	f3,  B2, A8, f3
6278	.align 4
6279
6280.L1094:
6281	lfd	AP,  ALPHA(SP)
6282#ifdef TRMMKERNEL
6283       fsmfp	AP, AP
6284#endif
6285
6286#if defined(TRMMKERNEL)
6287#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
6288	sub	TEMP, K, KK
6289#elif defined(LEFT)
6290	addi	TEMP, KK, 8
6291#else
6292	addi	TEMP, KK, 1
6293#endif
6294	andi.	TEMP,  TEMP,  3
6295	mtspr	CTR, TEMP
6296#else
6297	andi.	r0,  K,  3
6298	mtspr	CTR, r0
6299#endif
6300	ble+	.L1098
6301
6302	LFDX	B1,  BO,  INC2
6303	LFPDUX	A1,  AO,  INC2
6304	LFPDUX	A2,  AO,  INC2
6305	LFPDUX	A3,  AO,  INC2
6306	LFPDUX	A4,  AO,  INC2
6307	add	BO, BO, INC
6308	bdz-	.L1097
6309	.align 4
6310
6311.L1096:
6312	fxcpmadd	f0,  B1, A1, f0
6313	LFPDUX	A1,  AO,  INC2
6314	fxcpmadd	f1,  B1, A2, f1
6315	LFPDUX	A2,  AO,  INC2
6316	fxcpmadd	f2,  B1, A3, f2
6317	LFPDUX	A3,  AO,  INC2
6318	fxcpmadd	f3,  B1, A4, f3
6319	LFDX	B1,  BO,  INC2
6320	LFPDUX	A4,  AO,  INC2
6321	add	BO, BO, INC
6322	bdnz+	.L1096
6323	.align 4
6324
6325.L1097:
6326	fxcpmadd	f0,  B1, A1, f0
6327	fxcpmadd	f1,  B1, A2, f1
6328	fxcpmadd	f2,  B1, A3, f2
6329	fxcpmadd	f3,  B1, A4, f3
6330	.align 4
6331
6332.L1098:
6333#ifndef TRMMKERNEL
6334	LFDUX	A1, CO1, INC
6335	LFDUX	B1, CO1, INC2
6336	LFDUX	A3, CO1, INC2
6337   	LFDUX	A5, CO1, INC2
6338
6339	LFSDUX	A1, CO1, INCM5
6340	LFSDUX	B1, CO1, INC2
6341	LFSDUX	A3, CO1, INC2
6342	LFSDUX	A5, CO1, INC2
6343
6344	fxcpmadd	f0,  AP, f0,  A1
6345	fxcpmadd	f1,  AP, f1,  B1
6346	fxcpmadd	f2,  AP, f2,  A3
6347	STFDUX	f0,  CO1, INCM7
6348	STFSDUX	f0,  CO1, INC
6349
6350	fxcpmadd	f3,  AP, f3,  A5
6351#else
6352	fpmul	f0,  AP, f0
6353	fpmul	f1,  AP, f1
6354	fpmul	f2,  AP, f2
6355	STFDUX	f0,  CO1, INC
6356	STFSDUX	f0,  CO1, INC
6357
6358	fpmul	f3,  AP, f3
6359#endif
6360
6361	STFDUX	f1,  CO1, INC
6362	STFSDUX	f1,  CO1, INC
6363
6364	STFDUX	f2,  CO1, INC
6365	STFSDUX	f2,  CO1, INC
6366
6367	STFDUX	f3,  CO1, INC
6368	STFSDUX	f3,  CO1, INC
6369
6370#ifdef TRMMKERNEL
6371#if ( defined(LEFT) &&  defined(TRANSA)) || \
6372    (!defined(LEFT) && !defined(TRANSA))
6373	sub	TEMP, K, KK
6374#ifdef LEFT
6375	addi	TEMP, TEMP, -8
6376#else
6377	addi	TEMP, TEMP, -1
6378#endif
6379	slwi	r0,   TEMP, 3 + BASE_SHIFT
6380	slwi	TEMP, TEMP, 0 + BASE_SHIFT
6381	add	AO, AO, r0
6382	add	BO, BO, TEMP
6383#endif
6384
6385#ifdef LEFT
6386	addi	KK, KK, 8
6387#endif
6388#endif
6389
6390	addic.	I, I, -1
6391	li	r0, FZERO
6392
6393	lfpsx	f0, SP, r0
6394	bgt+	.L1091
6395	.align 4
6396
6397.L10100:
6398	andi.	I, M,  4
6399	beq	.L10110
6400
6401#if defined(TRMMKERNEL)
6402#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
6403	addi	BO,  B,  - 2 * SIZE
6404	fpmr	f1,  f0
6405	fpmr	f2,  f0
6406	fpmr	f3, f0
6407#else
6408	slwi	TEMP, KK, 2 + BASE_SHIFT
6409	slwi	r0,   KK, 0 + BASE_SHIFT
6410	add	AO, AO, TEMP
6411	add	BO, B,  r0
6412
6413	fpmr	f1,  f0
6414	addi	BO,  BO,  - 2 * SIZE
6415	fpmr	f2,  f0
6416	fpmr	f3, f0
6417#endif
6418
6419#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
6420	sub	TEMP, K, KK
6421#elif defined(LEFT)
6422	addi	TEMP, KK, 4
6423#else
6424	addi	TEMP, KK, 1
6425#endif
6426	srawi.	r0,  TEMP,  3
6427	mtspr	CTR, r0
6428	ble	.L10104
6429#else
6430	addi	BO,  B,  - 2 * SIZE
6431	fpmr	f1,  f0
6432	fpmr	f2,  f0
6433	fpmr	f3, f0
6434
6435	srawi.	r0,  K,  3
6436	mtspr	CTR, r0
6437	ble	.L10104
6438#endif
6439
6440	LFPDUX	B1,  BO,  INC2
6441	LFPDUX	A1,  AO,  INC2
6442	LFPDUX	A2,  AO,  INC2
6443	LFPDUX	A3,  AO,  INC2
6444	LFPDUX	A4,  AO,  INC2
6445	LFPDUX	B2,  BO,  INC2
6446	LFPDUX	A5,  AO,  INC2
6447	LFPDUX	A6,  AO,  INC2
6448	LFPDUX	A7,  AO,  INC2
6449	LFPDUX	A8,  AO,  INC2
6450	LFPDUX	B3,  BO,  INC2
6451	LFPDUX	B4,  BO,  INC2
6452
6453	bdz-	.L10103
6454	.align 4
6455
6456.L10102:
6457	fxcpmadd	f0,  B1, A1, f0
6458	LFPDUX	A1,  AO,  INC2
6459	fxcpmadd	f1,  B1, A2, f1
6460	LFPDUX	A2,  AO,  INC2
6461	fxcsmadd	f2,  B1, A3, f2
6462	LFPDUX	A3,  AO,  INC2
6463	fxcsmadd	f3,  B1, A4, f3
6464	LFPDUX	A4,  AO,  INC2
6465	LFPDUX	B1,  BO,  INC2
6466
6467	fxcpmadd	f0,  B2, A5, f0
6468	LFPDUX	A5,  AO,  INC2
6469	fxcpmadd	f1,  B2, A6, f1
6470	LFPDUX	A6,  AO,  INC2
6471	fxcsmadd	f2,  B2, A7, f2
6472	LFPDUX	A7,  AO,  INC2
6473	fxcsmadd	f3,  B2, A8, f3
6474	LFPDUX	A8,  AO,  INC2
6475	LFPDUX	B2,  BO,  INC2
6476
6477	fxcpmadd	f0,  B3, A1, f0
6478	LFPDUX	A1,  AO,  INC2
6479	fxcpmadd	f1,  B3, A2, f1
6480	LFPDUX	A2,  AO,  INC2
6481	fxcsmadd	f2,  B3, A3, f2
6482	LFPDUX	A3,  AO,  INC2
6483	fxcsmadd	f3,  B3, A4, f3
6484	LFPDUX	A4,  AO,  INC2
6485	LFPDUX	B3,  BO,  INC2
6486
6487	fxcpmadd	f0,  B4, A5, f0
6488	LFPDUX	A5,  AO,  INC2
6489	fxcpmadd	f1,  B4, A6, f1
6490	LFPDUX	A6,  AO,  INC2
6491	fxcsmadd	f2,  B4, A7, f2
6492	LFPDUX	A7,  AO,  INC2
6493	fxcsmadd	f3,  B4, A8, f3
6494	LFPDUX	A8,  AO,  INC2
6495	LFPDUX	B4,  BO,  INC2
6496	bdnz+	.L10102
6497	.align 4
6498
6499.L10103:
6500	fxcpmadd	f0,  B1, A1, f0
6501	LFPDUX	A1,  AO,  INC2
6502	fxcpmadd	f1,  B1, A2, f1
6503	LFPDUX	A2,  AO,  INC2
6504	fxcsmadd	f2,  B1, A3, f2
6505	LFPDUX	A3,  AO,  INC2
6506	fxcsmadd	f3,  B1, A4, f3
6507	LFPDUX	A4,  AO,  INC2
6508
6509	fxcpmadd	f0,  B2, A5, f0
6510	LFPDUX	A5,  AO,  INC2
6511	fxcpmadd	f1,  B2, A6, f1
6512	LFPDUX	A6,  AO,  INC2
6513	fxcsmadd	f2,  B2, A7, f2
6514	LFPDUX	A7,  AO,  INC2
6515	fxcsmadd	f3,  B2, A8, f3
6516	LFPDUX	A8,  AO,  INC2
6517
6518	fxcpmadd	f0,  B3, A1, f0
6519	fxcpmadd	f1,  B3, A2, f1
6520	fxcsmadd	f2,  B3, A3, f2
6521	fxcsmadd	f3,  B3, A4, f3
6522
6523	fxcpmadd	f0,  B4, A5, f0
6524	fxcpmadd	f1,  B4, A6, f1
6525	fxcsmadd	f2,  B4, A7, f2
6526	fxcsmadd	f3,  B4, A8, f3
6527	.align 4
6528
6529.L10104:
6530	lfd	AP,  ALPHA(SP)
6531#ifdef TRMMKERNEL
6532       fsmfp	AP, AP
6533#endif
6534
6535#if defined(TRMMKERNEL)
6536#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
6537	sub	TEMP, K, KK
6538#elif defined(LEFT)
6539	addi	TEMP, KK, 4
6540#else
6541	addi	TEMP, KK, 1
6542#endif
6543	andi.	TEMP,  TEMP,  7
6544	mtspr	CTR, TEMP
6545#else
6546	andi.	r0,  K,  7
6547	mtspr	CTR, r0
6548#endif
6549	ble+	.L10108
6550
6551	LFPDUX	A1,  AO,  INC2
6552	LFDX	B1,  BO,  INC2
6553	LFPDUX	A2,  AO,  INC2
6554	add	BO, BO, INC
6555	bdz-	.L10107
6556	.align 4
6557
6558.L10106:
6559	fxcpmadd	f0,  B1, A1, f0
6560	LFPDUX	A1,  AO,  INC2
6561	fxcpmadd	f1,  B1, A2, f1
6562	LFDX	B1,  BO,  INC2
6563	LFPDUX	A2,  AO,  INC2
6564	add	BO, BO, INC
6565	bdnz+	.L10106
6566	.align 4
6567
6568.L10107:
6569	fxcpmadd	f0,  B1, A1, f0
6570	fxcpmadd	f1,  B1, A2, f1
6571	.align 4
6572
6573.L10108:
6574#ifndef TRMMKERNEL
6575	LFDUX	A1, CO1, INC
6576	LFDUX	B1, CO1, INC2
6577	LFSDUX	A1, CO1, INCM1
6578	LFSDUX	B1, CO1, INC2
6579
6580	fpadd	f0, f0, f2
6581	fpadd	f1, f1, f3
6582
6583	fxcpmadd	f0,  AP, f0,  A1
6584	fxcpmadd	f1,  AP, f1,  B1
6585
6586	STFDUX	f0,  CO1, INCM3
6587	STFSDUX	f0,  CO1, INC
6588#else
6589	fpadd	f0, f0, f2
6590	fpadd	f1, f1, f3
6591
6592	fpmul	f0,  AP, f0
6593	fpmul	f1,  AP, f1
6594
6595	STFDUX	f0,  CO1, INC
6596	STFSDUX	f0,  CO1, INC
6597#endif
6598
6599	STFDUX	f1,  CO1, INC
6600	STFSDUX	f1,  CO1, INC
6601
6602#ifdef TRMMKERNEL
6603#if ( defined(LEFT) &&  defined(TRANSA)) || \
6604    (!defined(LEFT) && !defined(TRANSA))
6605	sub	TEMP, K, KK
6606#ifdef LEFT
6607	addi	TEMP, TEMP, -4
6608#else
6609	addi	TEMP, TEMP, -1
6610#endif
6611	slwi	r0,   TEMP, 2 + BASE_SHIFT
6612	slwi	TEMP, TEMP, 0 + BASE_SHIFT
6613	add	AO, AO, r0
6614	add	BO, BO, TEMP
6615#endif
6616
6617#ifdef LEFT
6618	addi	KK, KK, 4
6619#endif
6620#endif
6621
6622	li	r0, FZERO
6623	lfpsx	f0, SP, r0
6624	.align 4
6625
6626.L10110:
6627	andi.	I, M,  2
6628	beq	.L10120
6629
6630#if defined(TRMMKERNEL)
6631#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
6632	addi	BO,  B,  - 2 * SIZE
6633	fpmr	f1,  f0
6634	fpmr	f2,  f0
6635	fpmr	f3,  f0
6636#else
6637	slwi	TEMP, KK, 1 + BASE_SHIFT
6638	slwi	r0,   KK, 0 + BASE_SHIFT
6639	add	AO, AO, TEMP
6640	add	BO, B,  r0
6641
6642	fpmr	f1,  f0
6643	addi	BO,  BO,  - 2 * SIZE
6644	fpmr	f2,  f0
6645	fpmr	f3,  f0
6646#endif
6647
6648#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
6649	sub	TEMP, K, KK
6650#elif defined(LEFT)
6651	addi	TEMP, KK, 2
6652#else
6653	addi	TEMP, KK, 1
6654#endif
6655	srawi.	r0,  TEMP,  3
6656	mtspr	CTR, r0
6657	ble	.L10114
6658#else
6659	addi	BO,  B,  - 2 * SIZE
6660	fpmr	f1,  f0
6661	fpmr	f2,  f0
6662	fpmr	f3,  f0
6663
6664	srawi.	r0,  K,  3
6665	mtspr	CTR, r0
6666	ble	.L10114
6667#endif
6668
6669	LFPDUX	A1,  AO,  INC2
6670	LFPDUX	A2,  AO,  INC2
6671	LFPDUX	B1,  BO,  INC2
6672
6673	LFPDUX	A3,  AO,  INC2
6674	LFPDUX	A4,  AO,  INC2
6675	LFPDUX	B2,  BO,  INC2
6676
6677	LFPDUX	A5,  AO,  INC2
6678	LFPDUX	A6,  AO,  INC2
6679	LFPDUX	B3,  BO,  INC2
6680
6681	LFPDUX	A7,  AO,  INC2
6682	LFPDUX	A8,  AO,  INC2
6683	LFPDUX	B4,  BO,  INC2
6684	bdz-	.L10113
6685	.align 4
6686
6687.L10112:
6688	fxcpmadd	f0,  B1, A1, f0
6689	LFPDUX	A1,  AO,  INC2
6690	fxcsmadd	f1,  B1, A2, f1
6691	LFPDUX	A2,  AO,  INC2
6692	LFPDUX	B1,  BO,  INC2
6693	fxcpmadd	f2,  B2, A3, f2
6694	LFPDUX	A3,  AO,  INC2
6695	fxcsmadd	f3,  B2, A4, f3
6696	LFPDUX	A4,  AO,  INC2
6697	LFPDUX	B2,  BO,  INC2
6698	fxcpmadd	f0,  B3, A5, f0
6699	LFPDUX	A5,  AO,  INC2
6700	fxcsmadd	f1,  B3, A6, f1
6701	LFPDUX	A6,  AO,  INC2
6702	LFPDUX	B3,  BO,  INC2
6703	fxcpmadd	f2,  B4, A7, f2
6704	LFPDUX	A7,  AO,  INC2
6705	fxcsmadd	f3,  B4, A8, f3
6706	LFPDUX	A8,  AO,  INC2
6707	LFPDUX	B4,  BO,  INC2
6708	bdnz+	.L10112
6709	.align 4
6710
6711.L10113:
6712	fxcpmadd	f0,  B1, A1, f0
6713	fxcsmadd	f1,  B1, A2, f1
6714	fxcpmadd	f2,  B2, A3, f2
6715	fxcsmadd	f3,  B2, A4, f3
6716	fxcpmadd	f0,  B3, A5, f0
6717	fxcsmadd	f1,  B3, A6, f1
6718	fxcpmadd	f2,  B4, A7, f2
6719	fxcsmadd	f3,  B4, A8, f3
6720	.align 4
6721
6722.L10114:
6723	lfd	AP,  ALPHA(SP)
6724#ifdef TRMMKERNEL
6725       fsmfp	AP, AP
6726#endif
6727
6728#if defined(TRMMKERNEL)
6729#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
6730	sub	TEMP, K, KK
6731#elif defined(LEFT)
6732	addi	TEMP, KK, 2
6733#else
6734	addi	TEMP, KK, 1
6735#endif
6736	andi.	TEMP,  TEMP,  7
6737	mtspr	CTR, TEMP
6738#else
6739	andi.	r0,  K,  7
6740	mtspr	CTR, r0
6741#endif
6742	ble+	.L10118
6743
6744	LFPDUX	A1,  AO,  INC2
6745	LFDX	B1,  BO,  INC2
6746	add	BO, BO, INC
6747	bdz-	.L10117
6748	.align 4
6749
6750.L10116:
6751	fxcpmadd	f0,  B1, A1, f0
6752	LFPDUX	A1,  AO,  INC2
6753	LFDX	B1,  BO,  INC2
6754	add	BO, BO, INC
6755	bdnz+	.L10116
6756	.align 4
6757
6758.L10117:
6759	fxcpmadd	f0,  B1, A1, f0
6760	.align 4
6761
6762.L10118:
6763#ifndef TRMMKERNEL
6764	LFDUX	A1, CO1, INC
6765	LFDUX	A2, CO1, INC
6766
6767	fpadd	f0, f0, f1
6768	fpadd	f2, f3, f2
6769	fsmfp	A1, A2
6770	fpadd	f0, f0, f2
6771	fxcpmadd	f1,  AP, f0,  A1
6772
6773	li	r0, FZERO
6774	lfpsx	f0, SP, r0
6775
6776	STFDUX	f1,  CO1, INCM1
6777	STFSDUX	f1,  CO1, INC
6778#else
6779	fpadd	f0, f0, f1
6780	fpadd	f2, f3, f2
6781	fsmfp	A1, A2
6782	fpadd	f0, f0, f2
6783	fpmul	f1,  AP, f0
6784
6785	li	r0, FZERO
6786	lfpsx	f0, SP, r0
6787
6788	STFDUX	f1,  CO1, INC
6789	STFSDUX	f1,  CO1, INC
6790#endif
6791
6792
6793#ifdef TRMMKERNEL
6794#if ( defined(LEFT) &&  defined(TRANSA)) || \
6795    (!defined(LEFT) && !defined(TRANSA))
6796	sub	TEMP, K, KK
6797#ifdef LEFT
6798	addi	TEMP, TEMP, -2
6799#else
6800	addi	TEMP, TEMP, -1
6801#endif
6802	slwi	r0,   TEMP, 1 + BASE_SHIFT
6803	slwi	TEMP, TEMP, 0 + BASE_SHIFT
6804	add	AO, AO, r0
6805	add	BO, BO, TEMP
6806#endif
6807
6808#ifdef LEFT
6809	addi	KK, KK, 2
6810#endif
6811#endif
6812	.align 4
6813
6814.L10120:
6815	andi.	I, M,  1
6816	beq	.L10999
6817
6818#if defined(TRMMKERNEL)
6819#if (defined(LEFT) &&  defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
6820	addi	BO,  B,  - 2 * SIZE
6821	fpmr	f1,  f0
6822	fpmr	f2,  f0
6823	fpmr	f3,  f0
6824#else
6825	slwi	TEMP, KK, 0 + BASE_SHIFT
6826	slwi	r0,   KK, 0 + BASE_SHIFT
6827	add	AO, AO, TEMP
6828	add	BO, B,  r0
6829
6830	fpmr	f1,  f0
6831	addi	BO,  BO,  - 2 * SIZE
6832	fpmr	f2,  f0
6833	fpmr	f3,  f0
6834#endif
6835
6836#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
6837	sub	TEMP, K, KK
6838#elif defined(LEFT)
6839	addi	TEMP, KK, 1
6840#else
6841	addi	TEMP, KK, 1
6842#endif
6843	srawi.	r0,  TEMP,  3
6844	mtspr	CTR, r0
6845	ble	.L10124
6846#else
6847	addi	BO,  B,  - 2 * SIZE
6848	fpmr	f1,  f0
6849	fpmr	f2,  f0
6850	fpmr	f3,  f0
6851
6852	srawi.	r0,  K,  3
6853	mtspr	CTR, r0
6854	ble	.L10124
6855#endif
6856
6857	LFPDUX	A1,  AO,  INC2
6858	LFPDUX	B1,  BO,  INC2
6859	LFPDUX	A2,  AO,  INC2
6860	LFPDUX	B2,  BO,  INC2
6861	LFPDUX	A3,  AO,  INC2
6862	LFPDUX	B3,  BO,  INC2
6863	LFPDUX	A4,  AO,  INC2
6864	LFPDUX	B4,  BO,  INC2
6865	bdz-	.L10123
6866	.align 4
6867
6868.L10122:
6869	fpmadd	f0,  A1, B1, f0
6870	LFPDUX	A1,  AO,  INC2
6871	LFPDUX	B1,  BO,  INC2
6872	fpmadd	f1,  A2, B2, f1
6873	LFPDUX	A2,  AO,  INC2
6874	LFPDUX	B2,  BO,  INC2
6875	fpmadd	f2,  A3, B3, f2
6876	LFPDUX	A3,  AO,  INC2
6877	LFPDUX	B3,  BO,  INC2
6878	fpmadd	f3,  A4, B4, f3
6879	LFPDUX	A4,  AO,  INC2
6880	LFPDUX	B4,  BO,  INC2
6881	bdnz+	.L10122
6882	.align 4
6883
6884.L10123:
6885	fpmadd	f0,  A1, B1, f0
6886	fpmadd	f1,  A2, B2, f1
6887	fpmadd	f2,  A3, B3, f2
6888	fpmadd	f3,  A4, B4, f3
6889	.align 4
6890
6891.L10124:
6892	lfd	AP,  ALPHA(SP)
6893#ifdef TRMMKERNEL
6894       fsmfp	AP, AP
6895#endif
6896
6897#if defined(TRMMKERNEL)
6898#if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA))
6899	sub	TEMP, K, KK
6900#elif defined(LEFT)
6901	addi	TEMP, KK, 1
6902#else
6903	addi	TEMP, KK, 1
6904#endif
6905	andi.	TEMP,  TEMP,  7
6906	mtspr	CTR, TEMP
6907#else
6908	andi.	r0,  K,  7
6909	mtspr	CTR, r0
6910#endif
6911	ble+	.L10128
6912
6913	LFDX	A1,  AO,  INC2
6914	LFDX	B1,  BO,  INC2
6915	add	AO, AO, INC
6916	add	BO, BO, INC
6917	bdz-	.L10127
6918	.align 4
6919
6920.L10126:
6921	fmadd	f0,  A1, B1, f0
6922	LFDX	A1,  AO,  INC2
6923	LFDX	B1,  BO,  INC2
6924	add	AO, AO, INC
6925	add	BO, BO, INC
6926	bdnz+	.L10126
6927	.align 4
6928
6929.L10127:
6930	fmadd	f0,  A1, B1, f0
6931	.align 4
6932
6933.L10128:
6934#ifndef TRMMKERNEL
6935	LFDX	A1, CO1, INC
6936	fpadd	f0, f0, f1
6937	fpadd	f2, f2, f3
6938	fpadd	f0, f0, f2
6939	fsmtp	f1, f0
6940	fadd	f0, f0, f1
6941	fmadd	f0,  AP, f0,  A1
6942	STFDUX	f0,  CO1, INC
6943#else
6944	fpadd	f0, f0, f1
6945	fpadd	f2, f2, f3
6946	fpadd	f0, f0, f2
6947	fsmtp	f1, f0
6948	fadd	f0, f0, f1
6949	fmul	f0,  AP, f0
6950	STFDUX	f0,  CO1, INC
6951#endif
6952	.align 4
6953
6954.L10999:
6955	addi	SP, SP, 12
6956
6957	lwzu	r14,   4(SP)
6958	lwzu	r15,   4(SP)
6959
6960	lwzu	r16,   4(SP)
6961	lwzu	r17,   4(SP)
6962	lwzu	r18,   4(SP)
6963	lwzu	r19,   4(SP)
6964
6965	lwzu	r20,   4(SP)
6966	lwzu	r21,   4(SP)
6967	lwzu	r22,   4(SP)
6968	lwzu	r23,   4(SP)
6969
6970	lwzu	r24,   4(SP)
6971	lwzu	r25,   4(SP)
6972	lwzu	r26,   4(SP)
6973	lwzu	r27,   4(SP)
6974
6975	lwzu	r28,   4(SP)
6976	lwzu	r29,   4(SP)
6977	lwzu	r30,   4(SP)
6978	lwzu	r31,   4(SP)
6979
6980	subi	SP, SP, 12
6981	li	r0, 16
6982
6983	lfpdux	f31, SP, r0
6984	lfpdux	f30, SP, r0
6985	lfpdux	f29, SP, r0
6986	lfpdux	f28, SP, r0
6987	lfpdux	f27, SP, r0
6988	lfpdux	f26, SP, r0
6989	lfpdux	f25, SP, r0
6990	lfpdux	f24, SP, r0
6991	lfpdux	f23, SP, r0
6992	lfpdux	f22, SP, r0
6993	lfpdux	f21, SP, r0
6994	lfpdux	f20, SP, r0
6995	lfpdux	f19, SP, r0
6996	lfpdux	f18, SP, r0
6997	lfpdux	f17, SP, r0
6998	lfpdux	f16, SP, r0
6999	lfpdux	f15, SP, r0
7000	lfpdux	f14, SP, r0
7001	addi	SP, SP, 16
7002	blr
7003
7004
7005	EPILOGUE
7006#endif
7007