1/*********************************************************************/
2/* Copyright 2009, 2010 The University of Texas at Austin.           */
3/* All rights reserved.                                              */
4/*                                                                   */
5/* Redistribution and use in source and binary forms, with or        */
6/* without modification, are permitted provided that the following   */
7/* conditions are met:                                               */
8/*                                                                   */
9/*   1. Redistributions of source code must retain the above         */
10/*      copyright notice, this list of conditions and the following  */
11/*      disclaimer.                                                  */
12/*                                                                   */
13/*   2. Redistributions in binary form must reproduce the above      */
14/*      copyright notice, this list of conditions and the following  */
15/*      disclaimer in the documentation and/or other materials       */
16/*      provided with the distribution.                              */
17/*                                                                   */
18/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
19/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
20/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
21/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
22/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
23/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
24/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
25/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
26/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
27/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
28/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
29/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
30/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
31/*    POSSIBILITY OF SUCH DAMAGE.                                    */
32/*                                                                   */
33/* The views and conclusions contained in the software and           */
34/* documentation are those of the authors and should not be          */
35/* interpreted as representing official policies, either expressed   */
36/* or implied, of The University of Texas at Austin.                 */
37/*********************************************************************/
38
39#define ASSEMBLER
40#include "common.h"
41
42#undef ZERO
43
44#define ALPHA    0
45#define FZERO	16
46
47#define	M	r3
48#define	N	r4
49#define	K	r5
50
51#if defined(linux) || defined(__FreeBSD__)
52#define A	r6
53#define	B	r7
54#define	C	r8
55#define	LDC	r9
56#define OFFSET	r10
57#endif
58
59#define TEMP	r11
60#define AORIG	r12
61#define KK	r14
62#define INCM1	r15
63#define INCM3	r16
64#define INCM5	r17
65#define INCM7	r18
66#define INC2	r19
67#define INC	r20
68#define INC4	r21
69
70#define	I	r22
71#define J	r23
72#define AO	r24
73#define BO	r25
74#define AO2	r26
75#define	BO2	r27
76
77#define	CO1	r28
78#define CO2	r29
79#define	ZERO	r31
80
81#ifndef NEEDPARAM
82
83#define A1	f16
84#define A2	f17
85#define A3	f18
86#define A4	f19
87#define A5	f20
88#define A6	f21
89#define A7	f22
90#define A8	f23
91#define A9	f24
92#define A10	f25
93
94#define B1	f26
95#define B2	f27
96#define B3	f28
97#define B4	f29
98#define B5	f30
99#define B6	f31
100
101#define AP	B6
102
103#ifndef CONJ
104#define FXCPMADD	fxcpmadd
105#define FXCSMADD	fxcxnpma
106#else
107#if defined(LN) || defined(LT)
108#define FXCPMADD	fxcpnsma
109#define FXCSMADD	fxcxma
110#else
111#define FXCPMADD	fxcpmadd
112#define FXCSMADD	fxcxnsma
113#endif
114#endif
115
116#ifndef CONJ
117#define FXCXNPMA	fxcxnpma
118#define FXCXNSMA	fxcxnsma
119#else
120#define FXCXNPMA	fxcxnsma
121#define FXCXNSMA	fxcxnpma
122#endif
123
124
125	PROLOGUE
126	PROFCODE
127
128	li	r0, -16
129
130	stfpdux	f14, SP, r0
131	stfpdux	f15, SP, r0
132	stfpdux	f16, SP, r0
133	stfpdux	f17, SP, r0
134	stfpdux	f18, SP, r0
135	stfpdux	f19, SP, r0
136	stfpdux	f20, SP, r0
137	stfpdux	f21, SP, r0
138	stfpdux	f22, SP, r0
139	stfpdux	f23, SP, r0
140	stfpdux	f24, SP, r0
141	stfpdux	f25, SP, r0
142	stfpdux	f26, SP, r0
143	stfpdux	f27, SP, r0
144	stfpdux	f28, SP, r0
145	stfpdux	f29, SP, r0
146	stfpdux	f30, SP, r0
147	stfpdux	f31, SP, r0
148
149	stwu	r31,  -4(SP)
150	stwu	r30,  -4(SP)
151	stwu	r29,  -4(SP)
152	stwu	r28,  -4(SP)
153
154	stwu	r27,  -4(SP)
155	stwu	r26,  -4(SP)
156	stwu	r25,  -4(SP)
157	stwu	r24,  -4(SP)
158
159	stwu	r23,  -4(SP)
160	stwu	r22,  -4(SP)
161	stwu	r21,  -4(SP)
162	stwu	r20,  -4(SP)
163
164	stwu	r19,  -4(SP)
165	stwu	r18,  -4(SP)
166	stwu	r17,  -4(SP)
167	stwu	r16,  -4(SP)
168
169	stwu	r15,  -4(SP)
170	stwu	r14,  -4(SP)
171
172	li	r0,   0
173	stwu	r0,   -4(SP)
174	stwu	r0,   -4(SP)
175
176	stfdu	f2,   -8(SP)
177	stfdu	f1,   -8(SP)
178
179	slwi	LDC, LDC, ZBASE_SHIFT
180
181	cmpwi	cr0, M, 0
182	ble	.L999
183	cmpwi	cr0, N, 0
184	ble	.L999
185	cmpwi	cr0, K, 0
186	ble	.L999
187
188	li	INC,    1 * SIZE
189	li	INC2,   2 * SIZE
190	li	INC4,   4 * SIZE
191	li	INCM1, -1 * SIZE
192	li	INCM3, -3 * SIZE
193	li	INCM5, -5 * SIZE
194	li	INCM7, -7 * SIZE
195
196	addi	C, C, - 1 * SIZE
197
198#ifdef LN
199	mullw	r0, M, K
200	slwi	r0, r0, ZBASE_SHIFT
201	add	A, A, r0
202
203	slwi	r0, M, ZBASE_SHIFT
204	add	C, C, r0
205#endif
206
207#ifdef RN
208	neg	KK, OFFSET
209#endif
210
211#ifdef RT
212	mullw	r0, N, K
213	slwi	r0, r0, ZBASE_SHIFT
214	add	B, B, r0
215
216	mullw	r0, N, LDC
217	add	C, C, r0
218
219	sub	KK, N, OFFSET
220#endif
221
222	srawi.	J, N,  1
223	ble	.L50
224	.align 4
225
226.L10:
227#ifdef RT
228	slwi	r0, K, 1 + ZBASE_SHIFT
229	sub	B, B, r0
230
231	slwi	r0, LDC, 1
232	sub	C, C, r0
233#endif
234
235	mr	CO1, C
236	add	CO2, C,   LDC
237
238#ifdef LN
239	add	KK, M, OFFSET
240#endif
241
242#ifdef LT
243	mr	KK, OFFSET
244#endif
245
246#if defined(LN) || defined(RT)
247	addi	AORIG, A, -4 * SIZE
248#else
249	addi	AO, A, -4 * SIZE
250#endif
251#ifndef RT
252	add	C,  CO2, LDC
253#endif
254
255	li	r0, FZERO
256	lfpsx	f0, SP, r0
257
258
259	andi.	I, M,  1
260	beq	.L20
261
262#if defined(LT) || defined(RN)
263	addi	AO2, AO,   2 * SIZE
264	fpmr	f1,  f0
265	addi	BO,  B,  - 4 * SIZE
266	fpmr	f2,  f0
267	addi	BO2, B,  - 2 * SIZE
268	fpmr	f3, f0
269
270	srawi.	r0,  KK,  2
271	mtspr	CTR, r0
272	ble	.L34
273#else
274#ifdef LN
275	slwi	r0,   K,  0 + ZBASE_SHIFT
276	sub	AORIG, AORIG, r0
277#endif
278
279	slwi	r0  , KK, 0 + ZBASE_SHIFT
280	slwi	TEMP, KK, 1 + ZBASE_SHIFT
281	add	AO, AORIG, r0
282	add	BO, B,     TEMP
283
284	sub	TEMP, K, KK
285
286	addi	AO2, AO,   2 * SIZE
287	fpmr	f1,  f0
288	addi	BO,  BO,  - 4 * SIZE
289	fpmr	f2,  f0
290	addi	BO2, BO,    2 * SIZE
291	fpmr	f3, f0
292
293	srawi.	r0,  TEMP,  2
294	mtspr	CTR, r0
295	ble	.L34
296#endif
297
298	LFPDUX	A1,  AO, INC4
299	LFPDUX	B1,  BO, INC4
300	LFPDUX	B2, BO2, INC4
301	LFPDUX	A2, AO2, INC4
302	LFPDUX	B3,  BO, INC4
303	LFPDUX	B4, BO2, INC4
304
305	LFPDUX	A3,  AO, INC4
306	LFPDUX	A5,  BO, INC4
307	LFPDUX	A6, BO2, INC4
308	LFPDUX	A4, AO2, INC4
309	LFPDUX	A7,  BO, INC4
310	LFPDUX	A8, BO2, INC4
311	bdz-	.L33
312	.align 4
313
314.L32:
315	FXCPMADD	f0,  B1, A1, f0
316	FXCSMADD	f1,  B1, A1, f1
317	LFPDUX	B1,  BO, INC4
318	FXCPMADD	f2,  B2, A1, f2
319	FXCSMADD	f3,  B2, A1, f3
320	LFPDUX	B2, BO2, INC4
321	LFPDUX	A1,  AO, INC4
322
323	FXCPMADD	f0,  B3, A2, f0
324	FXCSMADD	f1,  B3, A2, f1
325	LFPDUX	B3,  BO, INC4
326	FXCPMADD	f2,  B4, A2, f2
327	FXCSMADD	f3,  B4, A2, f3
328	LFPDUX	B4, BO2, INC4
329	LFPDUX	A2, AO2, INC4
330
331	FXCPMADD	f0,  A5, A3, f0
332	FXCSMADD	f1,  A5, A3, f1
333	LFPDUX	A5,  BO, INC4
334	FXCPMADD	f2,  A6, A3, f2
335	FXCSMADD	f3,  A6, A3, f3
336	LFPDUX	A6, BO2, INC4
337	LFPDUX	A3,  AO, INC4
338
339	FXCPMADD	f0,  A7, A4, f0
340	FXCSMADD	f1,  A7, A4, f1
341	LFPDUX	A7,  BO, INC4
342	FXCPMADD	f2,  A8, A4, f2
343	FXCSMADD	f3,  A8, A4, f3
344	LFPDUX	A8, BO2, INC4
345	LFPDUX	A4, AO2, INC4
346	bdnz+	.L32
347	.align 4
348
349.L33:
350	FXCPMADD	f0,  B1, A1, f0
351	FXCSMADD	f1,  B1, A1, f1
352	FXCPMADD	f2,  B2, A1, f2
353	FXCSMADD	f3,  B2, A1, f3
354
355	FXCPMADD	f0,  B3, A2, f0
356	FXCSMADD	f1,  B3, A2, f1
357	FXCPMADD	f2,  B4, A2, f2
358	FXCSMADD	f3,  B4, A2, f3
359
360	FXCPMADD	f0,  A5, A3, f0
361	FXCSMADD	f1,  A5, A3, f1
362	FXCPMADD	f2,  A6, A3, f2
363	FXCSMADD	f3,  A6, A3, f3
364
365	FXCPMADD	f0,  A7, A4, f0
366	FXCSMADD	f1,  A7, A4, f1
367	FXCPMADD	f2,  A8, A4, f2
368	FXCSMADD	f3,  A8, A4, f3
369	.align 4
370
371.L34:
372#if defined(LT) || defined(RN)
373	andi.	r0,  KK,  3
374	mtspr	CTR, r0
375	ble+	.L38
376#else
377	andi.	r0, TEMP, 3
378	mtspr	CTR, r0
379	ble+	.L38
380#endif
381
382	LFPDX	A1,  AO,  INC4
383	LFPDUX	B1,  BO,  INC4
384	LFPDUX	B2,  BO2, INC4
385	add	AO, AO, INC2
386	bdz-	.L37
387	.align 4
388
389.L36:
390	FXCPMADD	f0,  B1, A1, f0
391	FXCSMADD	f1,  B1, A1, f1
392	LFPDUX	B1,  BO,  INC4
393	FXCPMADD	f2,  B2, A1, f2
394	FXCSMADD	f3,  B2, A1, f3
395	LFPDX	A1,  AO,  INC4
396	LFPDUX	B2,  BO2, INC4
397	add	AO, AO, INC2
398	bdnz+	.L36
399	.align 4
400
401.L37:
402	FXCPMADD	f0,  B1, A1, f0
403	FXCSMADD	f1,  B1, A1, f1
404	FXCPMADD	f2,  B2, A1, f2
405	FXCSMADD	f3,  B2, A1, f3
406	.align 4
407
408.L38:
409	fpadd	f0, f0, f1
410	fpadd	f2, f2, f3
411
412#if defined(LN) || defined(RT)
413#ifdef LN
414	subi	r0, KK, 1
415#else
416	subi	r0, KK, 2
417#endif
418	slwi	TEMP, r0, 0 + ZBASE_SHIFT
419	slwi	r0,   r0, 1 + ZBASE_SHIFT
420	add	AO, AORIG, TEMP
421	add	BO, B,     r0
422	addi	BO,  BO, - 4 * SIZE
423#endif
424
425	addi	AO2, AO,   2 * SIZE
426	addi	BO2, BO,   2 * SIZE
427
428#if defined(LN) || defined(LT)
429	LFPDX	f16, BO,  INC4
430	LFPDX	f17, BO2, INC4
431#else
432	LFPDX	f16, AO,  INC4
433	LFPDX	f17, AO2, INC4
434#endif
435
436	fpsub	f0,  f16,  f0
437	fpsub	f2,  f17,  f2
438
439#ifdef LN
440	LFPDX	A1,  AO,  INC4
441
442	fxpmul	  f4,  A1, f0
443	fxpmul	  f5,  A1, f2
444	FXCXNPMA  f0,  A1, f0,  f4
445	FXCXNPMA  f2,  A1, f2,  f5
446#endif
447
448#ifdef LT
449	LFPDX	A1,  AO,  INC4
450
451	fxpmul	  f4,  A1, f0
452	fxpmul	  f5,  A1, f2
453	FXCXNPMA  f0,  A1, f0, f4
454	FXCXNPMA  f2,  A1, f2, f5
455#endif
456
457#ifdef RN
458	LFPDUX	A1,  BO,  INC4
459	LFPDUX	A2,  BO2, INC4
460	add	BO,  BO,  INC4
461	LFPDUX	A3,  BO2, INC4
462
463	subi	BO,  BO,   8 * SIZE
464	subi	BO2, BO2,  8 * SIZE
465
466	fxpmul	  f4,  A1, f0
467	FXCXNPMA  f0,  A1, f0, f4
468
469	fxcpnmsub f2,  A2, f0, f2
470	FXCXNSMA  f2,  A2, f0, f2
471
472	fxpmul	  f4,  A3, f2
473	FXCXNPMA  f2,  A3, f2,  f4
474#endif
475
476#ifdef RT
477	LFPDUX	A1,  BO,  INC4
478	add	BO2, BO2, INC4
479	LFPDUX	A2,  BO,  INC4
480	LFPDUX	A3,  BO2, INC4
481
482	subi	BO,  BO,   8 * SIZE
483	subi	BO2, BO2,  8 * SIZE
484
485	fxpmul	  f4,  A3, f2
486	FXCXNPMA  f2,  A3, f2,  f4
487
488	fxcpnmsub f0,  A2, f2,  f0
489	FXCXNSMA  f0,  A2, f2,  f0
490
491	fxpmul	  f4,  A1, f0
492	FXCXNPMA  f0,  A1, f0,  f4
493#endif
494
495#ifdef LN
496	subi	CO1, CO1, 2 * SIZE
497	subi	CO2, CO2, 2 * SIZE
498#endif
499
500#if defined(LN) || defined(LT)
501	STFPDX	f0,  BO,  INC4
502	STFPDX	f2,  BO2, INC4
503#else
504	STFPDX	f0,  AO,  INC4
505	STFPDX	f2,  AO2, INC4
506#endif
507
508	STFDUX	f0,  CO1, INC
509	STFSDUX	f0,  CO1, INC
510	STFDUX	f2,  CO2, INC
511	STFSDUX	f2,  CO2, INC
512
513#ifdef LN
514	subi	CO1, CO1, 2 * SIZE
515	subi	CO2, CO2, 2 * SIZE
516#endif
517
518#ifdef RT
519	slwi	r0, K, 0 + ZBASE_SHIFT
520	add	AORIG, AORIG, r0
521#endif
522
523#if defined(LT) || defined(RN)
524	sub	TEMP, K, KK
525	slwi	r0,   TEMP, 0 + ZBASE_SHIFT
526	slwi	TEMP, TEMP, 1 + ZBASE_SHIFT
527	add	AO, AO, r0
528	add	BO, BO, TEMP
529#endif
530
531#ifdef LT
532	addi	KK, KK, 1
533#endif
534
535#ifdef LN
536	subi	KK, KK, 1
537#endif
538
539	li	r0, FZERO
540	lfpsx	f0, SP, r0
541	.align 4
542
543.L20:
544	andi.	I, M,  2
545	beq	.L30
546
547#if defined(LT) || defined(RN)
548	addi	AO2, AO,   2 * SIZE
549	fpmr	f4,  f0
550	addi	BO,  B,  - 4 * SIZE
551	fpmr	f8,  f0
552	addi	BO2, B,  - 2 * SIZE
553	fpmr	f12, f0
554
555	srawi.	r0,  KK,  2
556 	fpmr	f1,  f0
557	fpmr	f5,  f0
558	fpmr	f9,  f0
559	mtspr	CTR, r0
560	fpmr	f13, f0
561	ble	.L24
562#else
563#ifdef LN
564	slwi	r0,   K,  1 + ZBASE_SHIFT
565	sub	AORIG, AORIG, r0
566#endif
567
568	slwi	r0  , KK, 1 + ZBASE_SHIFT
569	add	AO, AORIG, r0
570	add	BO, B,     r0
571
572	sub	TEMP, K, KK
573
574	addi	AO2, AO,   2 * SIZE
575	fpmr	f4,  f0
576	addi	BO,  BO,  - 4 * SIZE
577	fpmr	f8,  f0
578	addi	BO2, BO,    2 * SIZE
579	fpmr	f12, f0
580
581 	fpmr	f1,  f0
582	fpmr	f5,  f0
583	fpmr	f9,  f0
584	fpmr	f13, f0
585	srawi.	r0,  TEMP,  2
586	mtspr	CTR, r0
587	ble	.L24
588#endif
589
590	LFPDUX	A1,   AO, INC4
591	LFPDUX	B1,   BO, INC4
592	LFPDUX	A2,  AO2, INC4
593	LFPDUX	B2,  BO2, INC4
594	LFPDUX	A3,   AO, INC4
595	LFPDUX	B3,   BO, INC4
596	LFPDUX	A4,  AO2, INC4
597	LFPDUX	B4,  BO2, INC4
598
599	LFPDUX	A5,   AO, INC4
600	LFPDUX	B5,   BO, INC4
601	LFPDUX	A6,  AO2, INC4
602	LFPDUX	B6,  BO2, INC4
603	LFPDUX	A7,   AO, INC4
604	LFPDUX	A9,   BO, INC4
605	LFPDUX	A10, BO2, INC4
606	bdz-	.L23
607	.align 4
608
609.L22:
610	FXCPMADD	f0,  B1, A1, f0
611	nop
612	FXCSMADD	f4,  B1, A1, f4
613	LFPDUX	A8,  AO2, INC4
614	FXCPMADD	f8,  B2, A1, f8
615	nop
616	FXCSMADD	f12, B2, A1, f12
617	LFPDUX	A1,   AO, INC4
618
619	FXCPMADD	f1,  B1, A2, f1
620	nop
621	FXCSMADD	f5,  B1, A2, f5
622	LFPDUX	B1,   BO, INC4
623	FXCPMADD	f9,  B2, A2, f9
624	nop
625	FXCSMADD	f13, B2, A2, f13
626	LFPDUX	B2,  BO2, INC4
627
628	FXCPMADD	f0,  B3, A3, f0
629	nop
630	FXCSMADD	f4,  B3, A3, f4
631	LFPDUX	A2,  AO2, INC4
632	FXCPMADD	f8,  B4, A3, f8
633	nop
634	FXCSMADD	f12, B4, A3, f12
635	LFPDUX	A3,   AO, INC4
636
637	FXCPMADD	f1,  B3, A4, f1
638	nop
639	FXCSMADD	f5,  B3, A4, f5
640	LFPDUX	B3,   BO, INC4
641	FXCPMADD	f9,  B4, A4, f9
642	nop
643	FXCSMADD	f13, B4, A4, f13
644	LFPDUX	B4,  BO2, INC4
645
646	FXCPMADD	f0,  B5, A5, f0
647	nop
648	FXCSMADD	f4,  B5, A5, f4
649	LFPDUX	A4,  AO2, INC4
650	FXCPMADD	f8,  B6, A5, f8
651	nop
652	FXCSMADD	f12, B6, A5, f12
653	LFPDUX	A5,   AO, INC4
654
655	FXCPMADD	f1,  B5, A6, f1
656	nop
657	FXCSMADD	f5,  B5, A6, f5
658	LFPDUX	B5,   BO, INC4
659	FXCPMADD	f9,  B6, A6, f9
660	nop
661	FXCSMADD	f13, B6, A6, f13
662	LFPDUX	B6,  BO2, INC4
663
664	FXCPMADD	f0,  A9,  A7, f0
665	nop
666	FXCSMADD	f4,  A9,  A7, f4
667	LFPDUX	A6,  AO2, INC4
668	FXCPMADD	f8,  A10, A7, f8
669	nop
670	FXCSMADD	f12, A10, A7, f12
671	LFPDUX	A7,   AO, INC4
672
673	FXCPMADD	f1,  A9,  A8, f1
674	nop
675	FXCSMADD	f5,  A9,  A8, f5
676	LFPDUX	A9,   BO, INC4
677	FXCPMADD	f9,  A10, A8, f9
678	nop
679	FXCSMADD	f13, A10, A8, f13
680	LFPDUX	A10, BO2, INC4
681	bdnz+	.L22
682	.align 4
683
684.L23:
685	FXCPMADD	f0,  B1, A1, f0
686	FXCSMADD	f4,  B1, A1, f4
687	LFPDUX	A8,  AO2, INC4
688	FXCPMADD	f8,  B2, A1, f8
689	FXCSMADD	f12, B2, A1, f12
690
691	FXCPMADD	f1,  B1, A2, f1
692	FXCSMADD	f5,  B1, A2, f5
693	FXCPMADD	f9,  B2, A2, f9
694	FXCSMADD	f13, B2, A2, f13
695
696	FXCPMADD	f0,  B3, A3, f0
697	FXCSMADD	f4,  B3, A3, f4
698	FXCPMADD	f8,  B4, A3, f8
699	FXCSMADD	f12, B4, A3, f12
700
701	FXCPMADD	f1,  B3, A4, f1
702	FXCSMADD	f5,  B3, A4, f5
703	FXCPMADD	f9,  B4, A4, f9
704	FXCSMADD	f13, B4, A4, f13
705
706	FXCPMADD	f0,  B5, A5, f0
707	FXCSMADD	f4,  B5, A5, f4
708	FXCPMADD	f8,  B6, A5, f8
709	FXCSMADD	f12, B6, A5, f12
710
711	FXCPMADD	f1,  B5, A6, f1
712	FXCSMADD	f5,  B5, A6, f5
713	FXCPMADD	f9,  B6, A6, f9
714	FXCSMADD	f13, B6, A6, f13
715
716	FXCPMADD	f0,  A9, A7, f0
717	FXCSMADD	f4,  A9, A7, f4
718	FXCPMADD	f8,  A10, A7, f8
719	FXCSMADD	f12, A10, A7, f12
720
721	FXCPMADD	f1,  A9, A8, f1
722	FXCSMADD	f5,  A9, A8, f5
723	FXCPMADD	f9,  A10, A8, f9
724	FXCSMADD	f13, A10, A8, f13
725	.align 4
726
727.L24:
728#if defined(LT) || defined(RN)
729	andi.	r0,  KK,  3
730	mtspr	CTR, r0
731	ble+	.L28
732#else
733	andi.	r0, TEMP, 3
734	mtspr	CTR, r0
735	ble+	.L28
736#endif
737
738	LFPDUX	A1,  AO,  INC4
739	LFPDUX	A2,  AO2, INC4
740	LFPDUX	B1,  BO,  INC4
741	LFPDUX	B2,  BO2, INC4
742	bdz-	.L27
743	.align 4
744
745.L26:
746	FXCPMADD	f0,  B1, A1, f0
747	FXCSMADD	f4,  B1, A1, f4
748	FXCPMADD	f8,  B2, A1, f8
749	FXCSMADD	f12, B2, A1, f12
750	LFPDUX	A1,  AO,  INC4
751
752	FXCPMADD	f1,  B1, A2, f1
753	FXCSMADD	f5,  B1, A2, f5
754	LFPDUX	B1,  BO,  INC4
755	FXCPMADD	f9,  B2, A2, f9
756	FXCSMADD	f13, B2, A2, f13
757	LFPDUX	A2,  AO2, INC4
758	LFPDUX	B2,  BO2, INC4
759	bdnz+	.L26
760	.align 4
761
762.L27:
763	FXCPMADD	f0,  B1, A1, f0
764	FXCSMADD	f4,  B1, A1, f4
765	FXCPMADD	f8,  B2, A1, f8
766	FXCSMADD	f12, B2, A1, f12
767
768	FXCPMADD	f1,  B1, A2, f1
769	FXCSMADD	f5,  B1, A2, f5
770	FXCPMADD	f9,  B2, A2, f9
771	FXCSMADD	f13, B2, A2, f13
772	.align 4
773
774.L28:
775	fpadd	f0, f0, f4
776	fpadd	f8, f8, f12
777	fpadd	f1, f1, f5
778	fpadd	f9, f9, f13
779
780#if defined(LN) || defined(RT)
781#ifdef LN
782	subi	r0, KK, 2
783#else
784	subi	r0, KK, 2
785#endif
786	slwi	r0,   r0, 1 + ZBASE_SHIFT
787	add	AO, AORIG, r0
788	add	BO, B,     r0
789	addi	AO2, AO,   2 * SIZE
790	addi	BO,  BO, - 4 * SIZE
791	addi	BO2, BO,   2 * SIZE
792#endif
793
794#if defined(LN) || defined(LT)
795	LFPDUX	f16, BO,  INC4
796	LFPDUX	f18, BO2, INC4
797	LFPDUX	f17, BO,  INC4
798	LFPDUX	f19, BO2, INC4
799
800	subi	BO,  BO,   8 * SIZE
801	subi	BO2, BO2,  8 * SIZE
802#else
803	LFPDUX	f16, AO,  INC4
804	LFPDUX	f17, AO2, INC4
805	LFPDUX	f18, AO,  INC4
806	LFPDUX	f19, AO2, INC4
807
808	subi	AO,  AO,   8 * SIZE
809	subi	AO2, AO2,  8 * SIZE
810#endif
811
812	fpsub	f0,  f16,  f0
813	fpsub	f1,  f17,  f1
814	fpsub	f8,  f18,  f8
815	fpsub	f9,  f19,  f9
816
817#ifdef LN
818	LFPDUX	A1,  AO,  INC4
819	add	AO2, AO2, INC4
820	LFPDUX	A2,  AO,  INC4
821	LFPDUX	A3,  AO2, INC4
822
823	subi	AO,  AO,   8 * SIZE
824	subi	AO2, AO2,  8 * SIZE
825
826	fxpmul	  f4,  A3, f1
827	fxpmul	  f5,  A3, f9
828	FXCXNPMA  f1,  A3, f1,  f4
829	FXCXNPMA  f9,  A3, f9,  f5
830
831	fxcpnmsub f0,  A2, f1,  f0
832	fxcpnmsub f8,  A2, f9,  f8
833	FXCXNSMA  f0,  A2, f1,  f0
834	FXCXNSMA  f8,  A2, f9,  f8
835
836	fxpmul	  f4,  A1, f0
837	fxpmul	  f5,  A1, f8
838	FXCXNPMA  f0,  A1, f0,  f4
839	FXCXNPMA  f8,  A1, f8,  f5
840#endif
841
842#ifdef LT
843	LFPDUX	A1,  AO,  INC4
844	LFPDUX	A2,  AO2, INC4
845	add	AO,  AO,  INC4
846	LFPDUX	A3,  AO2, INC4
847
848	subi	AO,  AO,   8 * SIZE
849	subi	AO2, AO2,  8 * SIZE
850
851	fxpmul	  f4,  A1, f0
852	fxpmul	  f5,  A1, f8
853	FXCXNPMA  f0,  A1, f0, f4
854	FXCXNPMA  f8,  A1, f8, f5
855
856	fxcpnmsub f1,  A2, f0, f1
857	fxcpnmsub f9,  A2, f8, f9
858	FXCXNSMA  f1,  A2, f0, f1
859	FXCXNSMA  f9,  A2, f8, f9
860
861	fxpmul	  f6,  A3, f1
862	fxpmul	  f7,  A3, f9
863	FXCXNPMA  f1,  A3, f1, f6
864	FXCXNPMA  f9,  A3, f9, f7
865#endif
866
867#ifdef RN
868	LFPDUX	A1,  BO,  INC4
869	LFPDUX	A2,  BO2, INC4
870	add	BO,  BO,  INC4
871	LFPDUX	A3,  BO2, INC4
872
873	subi	BO,  BO,   8 * SIZE
874	subi	BO2, BO2,  8 * SIZE
875
876	fxpmul	  f4,  A1, f0
877	fxpmul	  f5,  A1, f1
878
879	FXCXNPMA  f0,  A1, f0, f4
880	FXCXNPMA  f1,  A1, f1, f5
881
882	fxcpnmsub f8,  A2, f0, f8
883	fxcpnmsub f9,  A2, f1, f9
884
885	FXCXNSMA  f8,  A2, f0, f8
886	FXCXNSMA  f9,  A2, f1, f9
887
888	fxpmul	  f4,  A3, f8
889	fxpmul	  f5,  A3, f9
890
891	FXCXNPMA  f8,  A3, f8,  f4
892	FXCXNPMA  f9,  A3, f9,  f5
893#endif
894
895#ifdef RT
896	LFPDUX	A1,  BO,  INC4
897	add	BO2, BO2, INC4
898	LFPDUX	A2,  BO,  INC4
899	LFPDUX	A3,  BO2, INC4
900
901	subi	BO,  BO,   8 * SIZE
902	subi	BO2, BO2,  8 * SIZE
903
904	fxpmul	  f4,  A3, f8
905	fxpmul	  f5,  A3, f9
906
907	FXCXNPMA  f8,  A3, f8,  f4
908	FXCXNPMA  f9,  A3, f9,  f5
909
910	fxcpnmsub f0,  A2, f8,  f0
911	fxcpnmsub f1,  A2, f9,  f1
912
913	FXCXNSMA  f0,  A2, f8,  f0
914	FXCXNSMA  f1,  A2, f9,  f1
915
916	fxpmul	  f4,  A1, f0
917	fxpmul	  f5,  A1, f1
918
919	FXCXNPMA  f0,  A1, f0,  f4
920	FXCXNPMA  f1,  A1, f1,  f5
921#endif
922
923#ifdef LN
924	subi	CO1, CO1, 4 * SIZE
925	subi	CO2, CO2, 4 * SIZE
926#endif
927
928#if defined(LN) || defined(LT)
929	STFPDUX	f0,  BO,  INC4
930	STFPDUX	f8,  BO2, INC4
931	STFPDUX	f1,  BO,  INC4
932	STFPDUX	f9,  BO2, INC4
933
934	subi	BO,  BO,   8 * SIZE
935	subi	BO2, BO2,  8 * SIZE
936#else
937	STFPDUX	f0,  AO,  INC4
938	STFPDUX	f1,  AO2, INC4
939	STFPDUX	f8,  AO,  INC4
940	STFPDUX	f9,  AO2, INC4
941
942	subi	AO,  AO,   8 * SIZE
943	subi	AO2, AO2,  8 * SIZE
944#endif
945
946	STFDUX	f0,  CO1, INC
947	STFSDUX	f0,  CO1, INC
948	STFDUX	f1,  CO1, INC
949	STFSDUX	f1,  CO1, INC
950
951	STFDUX	f8,  CO2, INC
952	STFSDUX	f8,  CO2, INC
953	STFDUX	f9,  CO2, INC
954	STFSDUX	f9,  CO2, INC
955
956#ifdef LN
957	subi	CO1, CO1, 4 * SIZE
958	subi	CO2, CO2, 4 * SIZE
959#endif
960
961#ifdef RT
962	slwi	r0, K, 1 + ZBASE_SHIFT
963	add	AORIG, AORIG, r0
964#endif
965
966#if defined(LT) || defined(RN)
967	sub	TEMP, K, KK
968	slwi	r0,   TEMP, 1 + ZBASE_SHIFT
969	add	AO, AO, r0
970	add	BO, BO, r0
971#endif
972
973#ifdef LT
974	addi	KK, KK, 2
975#endif
976
977#ifdef LN
978	subi	KK, KK, 2
979#endif
980
981	li	r0, FZERO
982	lfpsx	f0, SP, r0
983	.align 4
984
985.L30:
986	srawi.	I, M,  2
987	ble	.L49
988	.align 4
989
990.L11:
991#if defined(LT) || defined(RN)
992
993	addi	AO2, AO,   2 * SIZE
994	fpmr	f4,  f0
995	addi	BO,  B,  - 4 * SIZE
996	fpmr	f8,  f0
997	addi	BO2, B,  - 2 * SIZE
998	fpmr	f12, f0
999
1000	fpmr	f5,  f0
1001	fpmr	f9,  f0
1002	fpmr	f13, f0
1003	fpmr	f2,  f0
1004
1005	fpmr	f6,  f0
1006	fpmr	f10, f0
1007	fpmr	f14, f0
1008	fpmr	f3,  f0
1009
1010	fpmr	f7,  f0
1011	fpmr	f11, f0
1012	fpmr	f15, f0
1013
1014	srawi.	r0,  KK,  2
1015 	fpmr	f1,  f0
1016	mtspr	CTR, r0
1017	ble	.L14
1018#else
1019
1020#ifdef LN
1021	slwi	r0,   K,  2 + ZBASE_SHIFT
1022	sub	AORIG, AORIG, r0
1023#endif
1024
1025	slwi	r0  , KK, 2 + ZBASE_SHIFT
1026	slwi	TEMP, KK, 1 + ZBASE_SHIFT
1027	add	AO, AORIG, r0
1028	add	BO, B,     TEMP
1029
1030	sub	TEMP, K, KK
1031
1032	fpmr	f5,  f0
1033	fpmr	f9,  f0
1034	fpmr	f13, f0
1035	fpmr	f2,  f0
1036
1037	fpmr	f6,  f0
1038	fpmr	f10, f0
1039	fpmr	f14, f0
1040	fpmr	f3,  f0
1041
1042	fpmr	f7,  f0
1043	fpmr	f11, f0
1044	fpmr	f15, f0
1045
1046	addi	AO2, AO,    2 * SIZE
1047	fpmr	f4,  f0
1048	addi	BO,  BO,  - 4 * SIZE
1049	fpmr	f8,  f0
1050	addi	BO2, BO,    2 * SIZE
1051	fpmr	f12, f0
1052
1053	srawi.	r0,  TEMP,  2
1054 	fpmr	f1,  f0
1055	mtspr	CTR, r0
1056	ble	.L14
1057#endif
1058
1059	LFPDUX	A1,  AO, INC4
1060	fpmr	f5,  f0
1061	LFPDUX	A3,  AO, INC4
1062	fpmr	f9,  f0
1063	LFPDUX	B1,  BO, INC4
1064	fpmr	f13, f0
1065
1066	LFPDUX	A5,  AO, INC4
1067	fpmr	f2,  f0
1068	LFPDUX	A6,  AO, INC4
1069	fpmr	f6,  f0
1070	LFPDUX	B3,  BO, INC4
1071	fpmr	f10, f0
1072	LFPDUX	A7,  AO, INC4
1073	fpmr	f14, f0
1074
1075	LFPDUX	A8,  AO, INC4
1076	fpmr	f3,  f0
1077	LFPDUX	B5,  BO, INC4
1078	fpmr	f7,  f0
1079	LFPDUX	A9,  AO, INC4
1080	fpmr	f11, f0
1081	LFPDUX	A2, AO2, INC4
1082	fpmr	f15, f0
1083	LFPDUX	B2, BO2, INC4
1084	bdz-	.L13
1085	.align 4
1086
1087.L12:
1088
1089## 1 ##
1090	FXCPMADD	f0,  B1, A1, f0
1091	nop
1092	FXCSMADD	f4,  B1, A1, f4
1093	nop
1094	FXCPMADD	f8,  B2, A1, f8
1095	LFPDUX	B4, BO2, INC4
1096	FXCSMADD	f12, B2, A1, f12
1097	LFPDUX	B6,  BO, INC4
1098
1099	FXCPMADD	f1,  B1, A2, f1
1100	nop
1101	FXCSMADD	f5,  B1, A2, f5
1102	LFPDUX	A4, AO2, INC4
1103	FXCPMADD	f9,  B2, A2, f9
1104	LFPDUX	A10, AO, INC4
1105	FXCSMADD	f13, B2, A2, f13
1106	nop
1107
1108	FXCPMADD	f2,  B1, A3, f2
1109	nop
1110	FXCSMADD	f6,  B1, A3, f6
1111	nop
1112	FXCPMADD	f10, B2, A3, f10
1113	nop
1114	FXCSMADD	f14, B2, A3, f14
1115	nop
1116
1117	FXCPMADD	f3,  B1, A4, f3
1118	nop
1119	FXCSMADD	f7,  B1, A4, f7
1120	LFPDUX	A2, AO2, INC4
1121	FXCPMADD	f11, B2, A4, f11
1122	LFPDUX	A1,  AO, INC4
1123	FXCSMADD	f15, B2, A4, f15
1124	nop
1125
1126## 2 ##
1127
1128	FXCPMADD	f0,  B3, A5, f0
1129	nop
1130	FXCSMADD	f4,  B3, A5, f4
1131	nop
1132	FXCPMADD	f8,  B4, A5, f8
1133	LFPDUX	B2, BO2, INC4
1134	FXCSMADD	f12, B4, A5, f12
1135	LFPDUX	B1,  BO, INC4
1136
1137	FXCPMADD	f1,  B3, A2, f1
1138	nop
1139	FXCSMADD	f5,  B3, A2, f5
1140	LFPDUX	A4, AO2, INC4
1141	FXCPMADD	f9,  B4, A2, f9
1142	LFPDUX	A3,  AO, INC4
1143	FXCSMADD	f13, B4, A2, f13
1144	nop
1145
1146	FXCPMADD	f2,  B3, A6, f2
1147	nop
1148	FXCSMADD	f6,  B3, A6, f6
1149	nop
1150	FXCPMADD	f10, B4, A6, f10
1151	nop
1152	FXCSMADD	f14, B4, A6, f14
1153	nop
1154
1155	FXCPMADD	f3,  B3, A4, f3
1156	nop
1157	FXCSMADD	f7,  B3, A4, f7
1158	LFPDUX	A2, AO2, INC4
1159	FXCPMADD	f11, B4, A4, f11
1160	LFPDUX	A5,  AO, INC4
1161	FXCSMADD	f15, B4, A4, f15
1162	nop
1163
1164## 3 ##
1165
1166	FXCPMADD	f0,  B5, A7, f0
1167	nop
1168	FXCSMADD	f4,  B5, A7, f4
1169	nop
1170	FXCPMADD	f8,  B2, A7, f8
1171	LFPDUX	B4, BO2, INC4
1172	FXCSMADD	f12, B2, A7, f12
1173	LFPDUX	B3,  BO, INC4
1174
1175	FXCPMADD	f1,  B5, A2, f1
1176	nop
1177	FXCSMADD	f5,  B5, A2, f5
1178	LFPDUX	A4, AO2, INC4
1179	FXCPMADD	f9,  B2, A2, f9
1180	LFPDUX	A6,  AO, INC4
1181	FXCSMADD	f13, B2, A2, f13
1182	nop
1183
1184	FXCPMADD	f2,  B5, A8, f2
1185	nop
1186	FXCSMADD	f6,  B5, A8, f6
1187	nop
1188	FXCPMADD	f10, B2, A8, f10
1189	nop
1190	FXCSMADD	f14, B2, A8, f14
1191	nop
1192
1193	FXCPMADD	f3,  B5, A4, f3
1194	nop
1195	FXCSMADD	f7,  B5, A4, f7
1196	LFPDUX	A2, AO2, INC4
1197	FXCPMADD	f11, B2, A4, f11
1198	LFPDUX	A7,  AO, INC4
1199	FXCSMADD	f15, B2, A4, f15
1200	nop
1201
1202## 4 ##
1203	FXCPMADD	f0,  B6, A9, f0
1204	nop
1205	FXCSMADD	f4,  B6, A9, f4
1206	nop
1207	FXCPMADD	f8,  B4, A9, f8
1208	LFPDUX	B2, BO2, INC4
1209	FXCSMADD	f12, B4, A9, f12
1210	LFPDUX	B5,  BO, INC4
1211
1212	FXCPMADD	f1,  B6, A2, f1
1213	nop
1214	FXCSMADD	f5,  B6, A2, f5
1215	LFPDUX	A4, AO2, INC4
1216	FXCPMADD	f9,  B4, A2, f9
1217	LFPDUX	A8,  AO, INC4
1218	FXCSMADD	f13, B4, A2, f13
1219	nop
1220
1221	FXCPMADD	f2,  B6, A10, f2
1222	nop
1223	FXCSMADD	f6,  B6, A10, f6
1224	nop
1225	FXCPMADD	f10, B4, A10, f10
1226	nop
1227	FXCSMADD	f14, B4, A10, f14
1228	nop
1229
1230	FXCPMADD	f3,  B6, A4, f3
1231	LFPDUX	A2, AO2, INC4
1232	FXCSMADD	f7,  B6, A4, f7
1233	LFPDUX	A9,  AO, INC4
1234	FXCPMADD	f11, B4, A4, f11
1235	nop
1236	FXCSMADD	f15, B4, A4, f15
1237	bdnz+	.L12
1238	.align 4
1239
1240.L13:
1241## 1 ##
1242
1243	FXCPMADD	f0,  B1, A1, f0
1244	nop
1245	FXCSMADD	f4,  B1, A1, f4
1246	nop
1247	FXCPMADD	f8,  B2, A1, f8
1248	LFPDUX	B4, BO2, INC4
1249	FXCSMADD	f12, B2, A1, f12
1250	LFPDUX	B6,  BO, INC4
1251
1252	FXCPMADD	f1,  B1, A2, f1
1253	nop
1254	FXCSMADD	f5,  B1, A2, f5
1255	LFPDUX	A4, AO2, INC4
1256	FXCPMADD	f9,  B2, A2, f9
1257	LFPDUX	A10, AO, INC4
1258	FXCSMADD	f13, B2, A2, f13
1259	nop
1260
1261	FXCPMADD	f2,  B1, A3, f2
1262	nop
1263	FXCSMADD	f6,  B1, A3, f6
1264	nop
1265	FXCPMADD	f10, B2, A3, f10
1266	nop
1267	FXCSMADD	f14, B2, A3, f14
1268	nop
1269
1270	FXCPMADD	f3,  B1, A4, f3
1271	nop
1272	FXCSMADD	f7,  B1, A4, f7
1273	LFPDUX	A2, AO2, INC4
1274	FXCPMADD	f11, B2, A4, f11
1275	nop
1276	FXCSMADD	f15, B2, A4, f15
1277	nop
1278
1279## 2 ##
1280
1281	FXCPMADD	f0,  B3, A5, f0
1282	nop
1283	FXCSMADD	f4,  B3, A5, f4
1284	nop
1285	FXCPMADD	f8,  B4, A5, f8
1286	LFPDUX	B2, BO2, INC4
1287	FXCSMADD	f12, B4, A5, f12
1288	nop
1289
1290	FXCPMADD	f1,  B3, A2, f1
1291	nop
1292	FXCSMADD	f5,  B3, A2, f5
1293	LFPDUX	A4, AO2, INC4
1294	FXCPMADD	f9,  B4, A2, f9
1295	nop
1296	FXCSMADD	f13, B4, A2, f13
1297	nop
1298
1299	FXCPMADD	f2,  B3, A6, f2
1300	nop
1301	FXCSMADD	f6,  B3, A6, f6
1302	nop
1303	FXCPMADD	f10, B4, A6, f10
1304	nop
1305	FXCSMADD	f14, B4, A6, f14
1306	nop
1307
1308	FXCPMADD	f3,  B3, A4, f3
1309	nop
1310	FXCSMADD	f7,  B3, A4, f7
1311	LFPDUX	A2, AO2, INC4
1312	FXCPMADD	f11, B4, A4, f11
1313	nop
1314	FXCSMADD	f15, B4, A4, f15
1315	nop
1316
1317## 3 ##
1318
1319	FXCPMADD	f0,  B5, A7, f0
1320	nop
1321	FXCSMADD	f4,  B5, A7, f4
1322	nop
1323	FXCPMADD	f8,  B2, A7, f8
1324	LFPDUX	B4, BO2, INC4
1325	FXCSMADD	f12, B2, A7, f12
1326	nop
1327
1328	FXCPMADD	f1,  B5, A2, f1
1329	nop
1330	FXCSMADD	f5,  B5, A2, f5
1331	LFPDUX	A4, AO2, INC4
1332	FXCPMADD	f9,  B2, A2, f9
1333	nop
1334	FXCSMADD	f13, B2, A2, f13
1335	nop
1336
1337	FXCPMADD	f2,  B5, A8, f2
1338	nop
1339	FXCSMADD	f6,  B5, A8, f6
1340	nop
1341	FXCPMADD	f10, B2, A8, f10
1342	nop
1343	FXCSMADD	f14, B2, A8, f14
1344	nop
1345
1346	FXCPMADD	f3,  B5, A4, f3
1347	nop
1348	FXCSMADD	f7,  B5, A4, f7
1349	LFPDUX	A2, AO2, INC4
1350	FXCPMADD	f11, B2, A4, f11
1351	nop
1352	FXCSMADD	f15, B2, A4, f15
1353	nop
1354
1355## 4 ##
1356
1357	FXCPMADD	f0,  B6, A9, f0
1358	nop
1359	FXCSMADD	f4,  B6, A9, f4
1360	nop
1361	FXCPMADD	f8,  B4, A9, f8
1362	nop
1363	FXCSMADD	f12, B4, A9, f12
1364	nop
1365
1366	FXCPMADD	f1,  B6, A2, f1
1367	nop
1368	FXCSMADD	f5,  B6, A2, f5
1369	LFPDUX	A4, AO2, INC4
1370	FXCPMADD	f9,  B4, A2, f9
1371	nop
1372	FXCSMADD	f13, B4, A2, f13
1373	nop
1374
1375	FXCPMADD	f2,  B6, A10, f2
1376	nop
1377	FXCSMADD	f6,  B6, A10, f6
1378	nop
1379	FXCPMADD	f10, B4, A10, f10
1380	nop
1381	FXCSMADD	f14, B4, A10, f14
1382	nop
1383
1384	FXCPMADD	f3,  B6, A4, f3
1385	nop
1386	FXCSMADD	f7,  B6, A4, f7
1387	nop
1388	FXCPMADD	f11, B4, A4, f11
1389	nop
1390	FXCSMADD	f15, B4, A4, f15
1391	nop
1392	.align 4
1393
1394.L14:
1395#if defined(LT) || defined(RN)
1396	andi.	r0,  KK,  3
1397	mtspr	CTR, r0
1398	ble+	.L18
1399#else
1400	andi.	r0, TEMP, 3
1401	mtspr	CTR, r0
1402	ble+	.L18
1403#endif
1404
1405.L15:
1406	LFPDUX	A2,  AO,  INC4
1407	LFPDUX	A4,  AO2, INC4
1408	LFPDUX	A10, BO,  INC4
1409	LFPDUX	B4,  BO2, INC4
1410	bdz-	.L17
1411	.align 4
1412
1413.L16:
1414	FXCPMADD	f0,  A10, A2, f0
1415	FXCSMADD	f4,  A10, A2, f4
1416	FXCPMADD	f8,  B4, A2, f8
1417	FXCSMADD	f12, B4, A2, f12
1418	LFPDUX	A2, AO,  INC4
1419
1420	FXCPMADD	f1,  A10, A4, f1
1421	FXCSMADD	f5,  A10, A4, f5
1422	FXCPMADD	f9,  B4, A4, f9
1423	FXCSMADD	f13, B4, A4, f13
1424	LFPDUX	A4, AO2, INC4
1425
1426	FXCPMADD	f2,  A10, A2, f2
1427	FXCSMADD	f6,  A10, A2, f6
1428	FXCPMADD	f10, B4, A2, f10
1429	FXCSMADD	f14, B4, A2, f14
1430	LFPDUX	A2, AO,  INC4
1431
1432	FXCPMADD	f3,  A10, A4, f3
1433	FXCSMADD	f7,  A10, A4, f7
1434	LFPDUX	A10, BO,  INC4
1435	FXCPMADD	f11, B4, A4, f11
1436	FXCSMADD	f15, B4, A4, f15
1437	LFPDUX	A4, AO2, INC4
1438	LFPDUX	B4, BO2, INC4
1439	bdnz+	.L16
1440	.align 4
1441
1442.L17:
1443	FXCPMADD	f0,  A10, A2, f0
1444	FXCSMADD	f4,  A10, A2, f4
1445	FXCPMADD	f8,  B4, A2, f8
1446	FXCSMADD	f12, B4, A2, f12
1447	LFPDUX	A2, AO,  INC4
1448
1449	FXCPMADD	f1,  A10, A4, f1
1450	FXCSMADD	f5,  A10, A4, f5
1451	FXCPMADD	f9,  B4, A4, f9
1452	FXCSMADD	f13, B4, A4, f13
1453	LFPDUX	A4, AO2, INC4
1454
1455	FXCPMADD	f2,  A10, A2, f2
1456	FXCSMADD	f6,  A10, A2, f6
1457	FXCPMADD	f10, B4, A2, f10
1458	FXCSMADD	f14, B4, A2, f14
1459
1460	FXCPMADD	f3,  A10, A4, f3
1461	FXCSMADD	f7,  A10, A4, f7
1462	FXCPMADD	f11, B4, A4, f11
1463	FXCSMADD	f15, B4, A4, f15
1464	.align 4
1465
1466.L18:
1467	fpadd	f0,  f0,  f4
1468	fpadd	f8,  f8,  f12
1469	fpadd	f1,  f1,  f5
1470	fpadd	f9,  f9,  f13
1471
1472	fpadd	f2,  f2,  f6
1473	fpadd	f10, f10, f14
1474	fpadd	f3,  f3,  f7
1475	fpadd	f11, f11, f15
1476
1477#if defined(LN) || defined(RT)
1478#ifdef LN
1479	subi	r0, KK, 4
1480#else
1481	subi	r0, KK, 2
1482#endif
1483	slwi	TEMP, r0, 2 + ZBASE_SHIFT
1484	slwi	r0,   r0, 1 + ZBASE_SHIFT
1485	add	AO, AORIG, TEMP
1486	add	BO, B,     r0
1487	addi	AO2, AO,   2 * SIZE
1488	addi	BO,  BO, - 4 * SIZE
1489	addi	BO2, BO,   2 * SIZE
1490#endif
1491
1492#if defined(LN) || defined(LT)
1493	LFPDUX	f16, BO,  INC4
1494	LFPDUX	f20, BO2, INC4
1495	LFPDUX	f17, BO,  INC4
1496	LFPDUX	f21, BO2, INC4
1497	LFPDUX	f18, BO,  INC4
1498	LFPDUX	f22, BO2, INC4
1499	LFPDUX	f19, BO,  INC4
1500	LFPDUX	f23, BO2, INC4
1501
1502	subi	BO,  BO,  16 * SIZE
1503	subi	BO2, BO2, 16 * SIZE
1504#else
1505	LFPDUX	f16, AO,  INC4
1506	LFPDUX	f17, AO2, INC4
1507	LFPDUX	f18, AO,  INC4
1508	LFPDUX	f19, AO2, INC4
1509	LFPDUX	f20, AO,  INC4
1510	LFPDUX	f21, AO2, INC4
1511	LFPDUX	f22, AO,  INC4
1512	LFPDUX	f23, AO2, INC4
1513
1514	subi	AO,  AO,  16 * SIZE
1515	subi	AO2, AO2, 16 * SIZE
1516#endif
1517
1518	fpsub	f0,  f16,  f0
1519	fpsub	f1,  f17,  f1
1520	fpsub	f2,  f18,  f2
1521	fpsub	f3,  f19,  f3
1522
1523	fpsub	f8,  f20,  f8
1524	fpsub	f9,  f21,  f9
1525	fpsub	f10, f22,  f10
1526	fpsub	f11, f23,  f11
1527
1528#ifdef LN
1529	LFPDUX	A1,  AO,  INC4
1530	add	AO2, AO2, INC4
1531	add	AO,  AO,  INC4
1532	add	AO2, AO2, INC4
1533
1534	LFPDUX	A2,  AO,  INC4
1535	LFPDUX	A3,  AO2, INC4
1536	add	AO,  AO,  INC4
1537	add	AO2, AO2, INC4
1538
1539	LFPDUX	A4,  AO,  INC4
1540	LFPDUX	A5,  AO2, INC4
1541	LFPDUX	A6,  AO,  INC4
1542	add	AO2, AO2, INC4
1543
1544	LFPDUX	A7,  AO,  INC4
1545	LFPDUX	A8,  AO2, INC4
1546	LFPDUX	A9,  AO,  INC4
1547	LFPDUX	A10, AO2, INC4
1548
1549	subi	AO,  AO,  32 * SIZE
1550	subi	AO2, AO2, 32 * SIZE
1551
1552	fxpmul	  f4,  A10, f3
1553	fxpmul	  f5,  A10, f11
1554	FXCXNPMA  f3,  A10, f3,  f4
1555	FXCXNPMA  f11, A10, f11, f5
1556
1557	fxcpnmsub f2,  A9, f3,  f2
1558	fxcpnmsub f10, A9, f11, f10
1559	FXCXNSMA  f2,  A9, f3,  f2
1560	FXCXNSMA  f10, A9, f11, f10
1561
1562	fxcpnmsub f1,  A8, f3,  f1
1563	fxcpnmsub f9,  A8, f11, f9
1564	FXCXNSMA  f1,  A8, f3,  f1
1565	FXCXNSMA  f9,  A8, f11, f9
1566
1567	fxcpnmsub f0,  A7, f3,  f0
1568	fxcpnmsub f8,  A7, f11, f8
1569	FXCXNSMA  f0,  A7, f3,  f0
1570	FXCXNSMA  f8,  A7, f11, f8
1571
1572	fxpmul	  f4,  A6, f2
1573	fxpmul	  f5,  A6, f10
1574	FXCXNPMA  f2,  A6, f2,  f4
1575	FXCXNPMA  f10, A6, f10, f5
1576
1577	fxcpnmsub f1,  A5, f2,  f1
1578	fxcpnmsub f9,  A5, f10, f9
1579	FXCXNSMA  f1,  A5, f2,  f1
1580	FXCXNSMA  f9,  A5, f10, f9
1581
1582	fxcpnmsub f0,  A4, f2,  f0
1583	fxcpnmsub f8,  A4, f10, f8
1584	FXCXNSMA  f0,  A4, f2,  f0
1585	FXCXNSMA  f8,  A4, f10, f8
1586
1587	fxpmul	  f4,  A3, f1
1588	fxpmul	  f5,  A3, f9
1589	FXCXNPMA  f1,  A3, f1,  f4
1590	FXCXNPMA  f9,  A3, f9,  f5
1591
1592	fxcpnmsub f0,  A2, f1,  f0
1593	fxcpnmsub f8,  A2, f9,  f8
1594	FXCXNSMA  f0,  A2, f1,  f0
1595	FXCXNSMA  f8,  A2, f9,  f8
1596
1597	fxpmul	  f4,  A1, f0
1598	fxpmul	  f5,  A1, f8
1599	FXCXNPMA  f0,  A1, f0,  f4
1600	FXCXNPMA  f8,  A1, f8,  f5
1601#endif
1602
1603#ifdef LT
1604	LFPDUX	A1,  AO,  INC4
1605	LFPDUX	A2,  AO2, INC4
1606	LFPDUX	A3,  AO,  INC4
1607	LFPDUX	A4,  AO2, INC4
1608
1609	add	AO,  AO,  INC4
1610	LFPDUX	A5,  AO2, INC4
1611	LFPDUX	A6,  AO,  INC4
1612	LFPDUX	A7,  AO2, INC4
1613
1614	add	AO,  AO,  INC4
1615	add	AO2, AO2, INC4
1616	LFPDUX	A8,  AO,  INC4
1617	LFPDUX	A9,  AO2, INC4
1618
1619	add	AO,  AO,  INC4
1620	add	AO2, AO2, INC4
1621	add	AO,  AO,  INC4
1622	LFPDUX	A10, AO2, INC4
1623
1624	subi	AO,  AO,  32 * SIZE
1625	subi	AO2, AO2, 32 * SIZE
1626
1627	fxpmul	  f4,  A1, f0
1628	fxpmul	  f5,  A1, f8
1629	FXCXNPMA  f0,  A1, f0, f4
1630	FXCXNPMA  f8,  A1, f8, f5
1631
1632	fxcpnmsub f1,  A2, f0, f1
1633	fxcpnmsub f9,  A2, f8, f9
1634	FXCXNSMA  f1,  A2, f0, f1
1635	FXCXNSMA  f9,  A2, f8, f9
1636
1637	fxcpnmsub f2,  A3, f0, f2
1638	fxcpnmsub f10, A3, f8, f10
1639	FXCXNSMA  f2,  A3, f0, f2
1640	FXCXNSMA  f10, A3, f8, f10
1641
1642	fxcpnmsub f3,  A4, f0, f3
1643	fxcpnmsub f11, A4, f8, f11
1644	FXCXNSMA  f3,  A4, f0, f3
1645	FXCXNSMA  f11, A4, f8, f11
1646
1647	fxpmul	  f6,  A5, f1
1648	fxpmul	  f7,  A5, f9
1649	FXCXNPMA  f1,  A5, f1, f6
1650	FXCXNPMA  f9,  A5, f9, f7
1651
1652	fxcpnmsub f2,  A6, f1, f2
1653	fxcpnmsub f10, A6, f9, f10
1654	FXCXNSMA  f2,  A6, f1, f2
1655	FXCXNSMA  f10, A6, f9, f10
1656
1657	fxcpnmsub f3,  A7, f1, f3
1658	fxcpnmsub f11, A7, f9, f11
1659	FXCXNSMA  f3,  A7, f1, f3
1660	FXCXNSMA  f11, A7, f9, f11
1661
1662	fxpmul	  f4,  A8, f2
1663	fxpmul	  f5,  A8, f10
1664	FXCXNPMA  f2,  A8, f2,  f4
1665	FXCXNPMA  f10, A8, f10, f5
1666
1667	fxcpnmsub f3,  A9, f2,  f3
1668	fxcpnmsub f11, A9, f10, f11
1669	FXCXNSMA  f3,  A9, f2,  f3
1670	FXCXNSMA  f11, A9, f10, f11
1671
1672	fxpmul	  f6,  A10, f3
1673	fxpmul	  f7,  A10, f11
1674	FXCXNPMA  f3,  A10, f3,  f6
1675	FXCXNPMA  f11, A10, f11, f7
1676#endif
1677
1678#ifdef RN
1679	LFPDUX	A1,  BO,  INC4
1680	LFPDUX	A2,  BO2, INC4
1681	add	BO,  BO,  INC4
1682	LFPDUX	A3,  BO2, INC4
1683
1684	subi	BO,  BO,   8 * SIZE
1685	subi	BO2, BO2,  8 * SIZE
1686
1687	fxpmul	  f4,  A1, f0
1688	fxpmul	  f5,  A1, f1
1689	fxpmul	  f6,  A1, f2
1690	fxpmul	  f7,  A1, f3
1691
1692	FXCXNPMA  f0,  A1, f0, f4
1693	FXCXNPMA  f1,  A1, f1, f5
1694	FXCXNPMA  f2,  A1, f2, f6
1695	FXCXNPMA  f3,  A1, f3, f7
1696
1697	fxcpnmsub f8,  A2, f0, f8
1698	fxcpnmsub f9,  A2, f1, f9
1699	fxcpnmsub f10, A2, f2, f10
1700	fxcpnmsub f11, A2, f3, f11
1701
1702	FXCXNSMA  f8,  A2, f0, f8
1703	FXCXNSMA  f9,  A2, f1, f9
1704	FXCXNSMA  f10, A2, f2, f10
1705	FXCXNSMA  f11, A2, f3, f11
1706
1707	fxpmul	  f4,  A3, f8
1708	fxpmul	  f5,  A3, f9
1709	fxpmul	  f6,  A3, f10
1710	fxpmul	  f7,  A3, f11
1711
1712	FXCXNPMA  f8,  A3, f8,  f4
1713	FXCXNPMA  f9,  A3, f9,  f5
1714	FXCXNPMA  f10, A3, f10, f6
1715	FXCXNPMA  f11, A3, f11, f7
1716#endif
1717
1718#ifdef RT
1719	LFPDUX	A1,  BO,  INC4
1720	add	BO2, BO2, INC4
1721	LFPDUX	A2,  BO,  INC4
1722	LFPDUX	A3,  BO2, INC4
1723
1724	subi	BO,  BO,   8 * SIZE
1725	subi	BO2, BO2,  8 * SIZE
1726
1727	fxpmul	  f4,  A3, f8
1728	fxpmul	  f5,  A3, f9
1729	fxpmul	  f6,  A3, f10
1730	fxpmul	  f7,  A3, f11
1731
1732	FXCXNPMA  f8,  A3, f8,  f4
1733	FXCXNPMA  f9,  A3, f9,  f5
1734	FXCXNPMA  f10, A3, f10, f6
1735	FXCXNPMA  f11, A3, f11, f7
1736
1737	fxcpnmsub f0,  A2, f8,  f0
1738	fxcpnmsub f1,  A2, f9,  f1
1739	fxcpnmsub f2,  A2, f10, f2
1740	fxcpnmsub f3,  A2, f11, f3
1741
1742	FXCXNSMA  f0,  A2, f8,  f0
1743	FXCXNSMA  f1,  A2, f9,  f1
1744	FXCXNSMA  f2,  A2, f10, f2
1745	FXCXNSMA  f3,  A2, f11, f3
1746
1747	fxpmul	  f4,  A1, f0
1748	fxpmul	  f5,  A1, f1
1749	fxpmul	  f6,  A1, f2
1750	fxpmul	  f7,  A1, f3
1751
1752	FXCXNPMA  f0,  A1, f0,  f4
1753	FXCXNPMA  f1,  A1, f1,  f5
1754	FXCXNPMA  f2,  A1, f2,  f6
1755	FXCXNPMA  f3,  A1, f3,  f7
1756#endif
1757
1758#ifdef LN
1759	subi	CO1, CO1, 8 * SIZE
1760	subi	CO2, CO2, 8 * SIZE
1761#endif
1762
1763#if defined(LN) || defined(LT)
1764	STFPDUX	f0,  BO,  INC4
1765	STFPDUX	f8,  BO2, INC4
1766	STFPDUX	f1,  BO,  INC4
1767	STFPDUX	f9,  BO2, INC4
1768	STFPDUX	f2,  BO,  INC4
1769	STFPDUX	f10, BO2, INC4
1770	STFPDUX	f3,  BO,  INC4
1771	STFPDUX	f11, BO2, INC4
1772
1773	subi	BO,  BO,  16 * SIZE
1774	subi	BO2, BO2, 16 * SIZE
1775#else
1776	STFPDUX	f0,  AO,  INC4
1777	STFPDUX	f1,  AO2, INC4
1778	STFPDUX	f2,  AO,  INC4
1779	STFPDUX	f3,  AO2, INC4
1780	STFPDUX	f8,  AO,  INC4
1781	STFPDUX	f9,  AO2, INC4
1782	STFPDUX	f10, AO,  INC4
1783	STFPDUX	f11, AO2, INC4
1784
1785	subi	AO,  AO,  16 * SIZE
1786	subi	AO2, AO2, 16 * SIZE
1787#endif
1788
1789	STFDUX	f0,  CO1, INC
1790	STFSDUX	f0,  CO1, INC
1791	STFDUX	f1,  CO1, INC
1792	STFSDUX	f1,  CO1, INC
1793	STFDUX	f2,  CO1, INC
1794	STFSDUX	f2,  CO1, INC
1795	STFDUX	f3,  CO1, INC
1796	STFSDUX	f3,  CO1, INC
1797
1798	STFDUX	f8,  CO2, INC
1799	STFSDUX	f8,  CO2, INC
1800	STFDUX	f9,  CO2, INC
1801	STFSDUX	f9,  CO2, INC
1802	STFDUX	f10, CO2, INC
1803	STFSDUX	f10, CO2, INC
1804	STFDUX	f11, CO2, INC
1805	STFSDUX	f11, CO2, INC
1806
1807#ifdef LN
1808	subi	CO1, CO1, 8 * SIZE
1809	subi	CO2, CO2, 8 * SIZE
1810#endif
1811
1812#ifdef RT
1813	slwi	r0, K, 2 + ZBASE_SHIFT
1814	add	AORIG, AORIG, r0
1815#endif
1816
1817#if defined(LT) || defined(RN)
1818	sub	TEMP, K, KK
1819	slwi	r0,   TEMP, 2 + ZBASE_SHIFT
1820	slwi	TEMP, TEMP, 1 + ZBASE_SHIFT
1821	add	AO, AO, r0
1822	add	BO, BO, TEMP
1823#endif
1824
1825#ifdef LT
1826	addi	KK, KK, 4
1827#endif
1828
1829#ifdef LN
1830	subi	KK, KK, 4
1831#endif
1832
1833	addic.	I, I, -1
1834	li	r0, FZERO
1835
1836	lfpsx	f0, SP, r0
1837	bgt+	.L11
1838	.align 4
1839
1840.L49:
1841#ifdef LN
1842	slwi	r0, K, 1 + ZBASE_SHIFT
1843	add	B, B, r0
1844#endif
1845
1846#if defined(LT) || defined(RN)
1847	addi	B,  BO, 4 * SIZE
1848#endif
1849
1850#ifdef RN
1851	addi	KK, KK, 2
1852#endif
1853
1854#ifdef RT
1855	subi	KK, KK, 2
1856#endif
1857
1858	addic.	J, J, -1
1859	bgt+	.L10
1860	.align 4
1861
1862.L50:
1863	andi.	J, N,  1
1864	beq	.L999
1865
1866#ifdef RT
1867	slwi	r0, K, 0 + ZBASE_SHIFT
1868	sub	B, B, r0
1869
1870	sub	C, C, LDC
1871#endif
1872
1873	mr	CO1, C
1874
1875#ifdef LN
1876	add	KK, M, OFFSET
1877#endif
1878
1879#ifdef LT
1880	mr	KK, OFFSET
1881#endif
1882
1883#if defined(LN) || defined(RT)
1884	addi	AORIG, A, -2 * SIZE
1885#else
1886	addi	AO, A, -2 * SIZE
1887#endif
1888#ifndef RT
1889	add	C,  CO2, LDC
1890#endif
1891	li	r0, FZERO
1892	lfpsx	f0, SP, r0
1893
1894	andi.	I, M,  1
1895	beq	.L60
1896
1897#if defined(LT) || defined(RN)
1898	addi	BO,  B,  - 2 * SIZE
1899	fpmr	f1,  f0
1900	fpmr	f2,  f0
1901	fpmr	f3,  f0
1902	srawi.	r0, KK,  3
1903	mtspr	CTR, r0
1904	ble	.L74
1905#else
1906#ifdef LN
1907	slwi	r0,   K,  0 + ZBASE_SHIFT
1908	sub	AORIG, AORIG, r0
1909#endif
1910
1911	slwi	TEMP, KK, 0 + ZBASE_SHIFT
1912	add	AO, AORIG, TEMP
1913	add	BO, B,     TEMP
1914
1915	sub	TEMP, K, KK
1916
1917	addi	BO,  BO,  - 2 * SIZE
1918	fpmr	f1,  f0
1919	fpmr	f2,  f0
1920	fpmr	f3,  f0
1921	srawi.	r0, TEMP,  3
1922	mtspr	CTR, r0
1923	ble	.L74
1924#endif
1925
1926	LFPDUX	A1,  AO, INC2
1927	LFPDUX	B1,  BO, INC2
1928	LFPDUX	A2,  AO, INC2
1929	LFPDUX	B2,  BO, INC2
1930	LFPDUX	A3,  AO, INC2
1931	LFPDUX	B3,  BO, INC2
1932	LFPDUX	A4,  AO, INC2
1933	LFPDUX	B4,  BO, INC2
1934
1935	LFPDUX	A5,  AO, INC2
1936	LFPDUX	B5,  BO, INC2
1937	LFPDUX	A6,  AO, INC2
1938	LFPDUX	B6,  BO, INC2
1939	LFPDUX	A7,  AO, INC2
1940	LFPDUX	A9,  BO, INC2
1941	LFPDUX	A8,  AO, INC2
1942	LFPDUX	A10, BO, INC2
1943	bdz-	.L73
1944	.align 4
1945
1946.L72:
1947	FXCPMADD	f0,  B1, A1, f0
1948	FXCSMADD	f1,  B1, A1, f1
1949	LFPDUX	A1,  AO, INC2
1950	LFPDUX	B1,  BO, INC2
1951	FXCPMADD	f2,  B2, A2, f2
1952	FXCSMADD	f3,  B2, A2, f3
1953	LFPDUX	A2,  AO, INC2
1954	LFPDUX	B2,  BO, INC2
1955
1956	FXCPMADD	f0,  B3, A3, f0
1957	FXCSMADD	f1,  B3, A3, f1
1958	LFPDUX	A3,  AO, INC2
1959	LFPDUX	B3,  BO, INC2
1960	FXCPMADD	f2,  B4, A4, f2
1961	FXCSMADD	f3,  B4, A4, f3
1962	LFPDUX	A4,  AO, INC2
1963	LFPDUX	B4,  BO, INC2
1964
1965	FXCPMADD	f0,  B5, A5, f0
1966	FXCSMADD	f1,  B5, A5, f1
1967	LFPDUX	A5,  AO, INC2
1968	LFPDUX	B5,  BO, INC2
1969	FXCPMADD	f2,  B6, A6, f2
1970	FXCSMADD	f3,  B6, A6, f3
1971	LFPDUX	A6,  AO, INC2
1972	LFPDUX	B6,  BO, INC2
1973
1974	FXCPMADD	f0,  A9,  A7, f0
1975	FXCSMADD	f1,  A9,  A7, f1
1976	LFPDUX	A7,  AO, INC2
1977	LFPDUX	A9,  BO, INC2
1978	FXCPMADD	f2,  A10, A8, f2
1979	FXCSMADD	f3,  A10, A8, f3
1980	LFPDUX	A8,  AO, INC2
1981	LFPDUX	A10, BO, INC2
1982
1983	bdnz+	.L72
1984	.align 4
1985
1986.L73:
1987	FXCPMADD	f0,  B1, A1, f0
1988	FXCSMADD	f1,  B1, A1, f1
1989	FXCPMADD	f2,  B2, A2, f2
1990	FXCSMADD	f3,  B2, A2, f3
1991
1992	FXCPMADD	f0,  B3, A3, f0
1993	FXCSMADD	f1,  B3, A3, f1
1994	FXCPMADD	f2,  B4, A4, f2
1995	FXCSMADD	f3,  B4, A4, f3
1996
1997	FXCPMADD	f0,  B5, A5, f0
1998	FXCSMADD	f1,  B5, A5, f1
1999	FXCPMADD	f2,  B6, A6, f2
2000	FXCSMADD	f3,  B6, A6, f3
2001
2002	FXCPMADD	f0,  A9,  A7, f0
2003	FXCSMADD	f1,  A9,  A7, f1
2004	FXCPMADD	f2,  A10, A8, f2
2005	FXCSMADD	f3,  A10, A8, f3
2006	.align 4
2007
2008.L74:
2009#if defined(LT) || defined(RN)
2010	andi.	r0,  KK,  7
2011	mtspr	CTR, r0
2012	ble+	.L78
2013#else
2014	andi.	r0, TEMP, 7
2015	mtspr	CTR, r0
2016	ble+	.L78
2017#endif
2018
2019	LFPDUX	A1,  AO,  INC2
2020	LFPDUX	B1,  BO,  INC2
2021	bdz-	.L77
2022	.align 4
2023
2024.L76:
2025	FXCPMADD	f0,  B1, A1, f0
2026	FXCSMADD	f1,  B1, A1, f1
2027	LFPDUX	A1,  AO,  INC2
2028	LFPDUX	B1,  BO,  INC2
2029	bdnz+	.L76
2030	.align 4
2031
2032.L77:
2033	FXCPMADD	f0,  B1, A1, f0
2034	FXCSMADD	f1,  B1, A1, f1
2035	.align 4
2036
2037.L78:
2038	fpadd	f0, f0, f2
2039	fpadd	f1, f1, f3
2040
2041	fpadd	f0, f0, f1
2042
2043#if defined(LN) || defined(RT)
2044#ifdef LN
2045	subi	r0, KK, 1
2046#else
2047	subi	r0, KK, 1
2048#endif
2049	slwi	TEMP, r0, 0 + ZBASE_SHIFT
2050	add	AO, AORIG, TEMP
2051	add	BO, B,     TEMP
2052	addi	BO,  BO, - 2 * SIZE
2053#endif
2054
2055#if defined(LN) || defined(LT)
2056	LFPDX	f16, BO,  INC2
2057#else
2058	LFPDX	f16, AO,  INC2
2059#endif
2060
2061	fpsub	f0,  f16,  f0
2062
2063#ifdef LN
2064	LFPDX	A1,  AO,  INC2
2065
2066	fxpmul	  f4,  A1, f0
2067	FXCXNPMA  f0,  A1, f0,  f4
2068#endif
2069
2070#ifdef LT
2071	LFPDX	A1,  AO,  INC2
2072
2073	fxpmul	  f4,  A1, f0
2074	FXCXNPMA  f0,  A1, f0, f4
2075#endif
2076
2077#ifdef RN
2078	LFPDX	A1,  BO,  INC2
2079
2080	fxpmul	  f4,  A1, f0
2081	FXCXNPMA  f0,  A1, f0, f4
2082#endif
2083
2084#ifdef RT
2085	LFPDX	A1,  BO,  INC2
2086
2087	fxpmul	  f4,  A1, f0
2088	FXCXNPMA  f0,  A1, f0,  f4
2089#endif
2090
2091#ifdef LN
2092	subi	CO1, CO1, 2 * SIZE
2093#endif
2094
2095#if defined(LN) || defined(LT)
2096	STFPDX	f0,  BO,  INC2
2097#else
2098	STFPDX	f0,  AO,  INC2
2099#endif
2100
2101	STFDUX	f0,  CO1, INC
2102	STFSDUX	f0,  CO1, INC
2103
2104#ifdef LN
2105	subi	CO1, CO1, 2 * SIZE
2106#endif
2107
2108#ifdef RT
2109	slwi	r0, K, 0 + ZBASE_SHIFT
2110	add	AORIG, AORIG, r0
2111#endif
2112
2113#if defined(LT) || defined(RN)
2114	sub	TEMP, K, KK
2115	slwi	TEMP, TEMP, 0 + ZBASE_SHIFT
2116	add	AO, AO, TEMP
2117	add	BO, BO, TEMP
2118#endif
2119
2120#ifdef LT
2121	addi	KK, KK, 1
2122#endif
2123
2124#ifdef LN
2125	subi	KK, KK, 1
2126#endif
2127
2128	li	r0, FZERO
2129	lfpsx	f0, SP, r0
2130	.align 4
2131
2132.L60:
2133	andi.	I, M,  2
2134	beq	.L70
2135
2136#if defined(LT) || defined(RN)
2137 	fpmr	f1,  f0
2138	addi	BO,  B,  - 2 * SIZE
2139	fpmr	f2,  f0
2140	fpmr	f3,  f0
2141	srawi.	r0,  KK,  2
2142	mtspr	CTR, r0
2143	ble	.L64
2144#else
2145#ifdef LN
2146	slwi	r0,   K,  1 + ZBASE_SHIFT
2147	sub	AORIG, AORIG, r0
2148#endif
2149
2150	slwi	r0  , KK, 1 + ZBASE_SHIFT
2151	slwi	TEMP, KK, 0 + ZBASE_SHIFT
2152	add	AO, AORIG, r0
2153	add	BO, B,     TEMP
2154
2155	sub	TEMP, K, KK
2156
2157 	fpmr	f1,  f0
2158	addi	BO,  BO,  - 2 * SIZE
2159	fpmr	f2,  f0
2160	fpmr	f3,  f0
2161	srawi.	r0,  TEMP,  2
2162	mtspr	CTR, r0
2163	ble	.L64
2164#endif
2165
2166	LFPDUX	B1,  BO, INC2
2167	LFPDUX	A1,  AO, INC2
2168	LFPDUX	A2,  AO, INC2
2169	LFPDUX	B2,  BO, INC2
2170	LFPDUX	A3,  AO, INC2
2171	LFPDUX	A4,  AO, INC2
2172
2173	LFPDUX	B3,  BO, INC2
2174	LFPDUX	A5,  AO, INC2
2175	LFPDUX	A6,  AO, INC2
2176	LFPDUX	B4,  BO, INC2
2177	LFPDUX	A7,  AO, INC2
2178	LFPDUX	A8,  AO, INC2
2179	bdz-	.L63
2180	.align 4
2181
2182.L62:
2183	FXCPMADD	f0,  B1, A1, f0
2184	FXCSMADD	f2,  B1, A1, f2
2185	LFPDUX	A1,  AO, INC2
2186	FXCPMADD	f1,  B1, A2, f1
2187	FXCSMADD	f3,  B1, A2, f3
2188	LFPDUX	A2,  AO, INC2
2189	LFPDUX	B1,  BO, INC2
2190
2191	FXCPMADD	f0,  B2, A3, f0
2192	FXCSMADD	f2,  B2, A3, f2
2193	LFPDUX	A3,  AO, INC2
2194	FXCPMADD	f1,  B2, A4, f1
2195	FXCSMADD	f3,  B2, A4, f3
2196	LFPDUX	A4,  AO, INC2
2197	LFPDUX	B2,  BO, INC2
2198
2199	FXCPMADD	f0,  B3, A5, f0
2200	FXCSMADD	f2,  B3, A5, f2
2201	LFPDUX	A5,  AO, INC2
2202	FXCPMADD	f1,  B3, A6, f1
2203	FXCSMADD	f3,  B3, A6, f3
2204	LFPDUX	A6,  AO, INC2
2205	LFPDUX	B3,  BO, INC2
2206
2207	FXCPMADD	f0,  B4, A7, f0
2208	FXCSMADD	f2,  B4, A7, f2
2209	LFPDUX	A7,  AO, INC2
2210	FXCPMADD	f1,  B4, A8, f1
2211	FXCSMADD	f3,  B4, A8, f3
2212	LFPDUX	A8,  AO, INC2
2213	LFPDUX	B4,  BO, INC2
2214	bdnz+	.L62
2215	.align 4
2216
2217.L63:
2218	FXCPMADD	f0,  B1, A1, f0
2219	FXCSMADD	f2,  B1, A1, f2
2220	FXCPMADD	f1,  B1, A2, f1
2221	FXCSMADD	f3,  B1, A2, f3
2222
2223	FXCPMADD	f0,  B2, A3, f0
2224	FXCSMADD	f2,  B2, A3, f2
2225	FXCPMADD	f1,  B2, A4, f1
2226	FXCSMADD	f3,  B2, A4, f3
2227
2228	FXCPMADD	f0,  B3, A5, f0
2229	FXCSMADD	f2,  B3, A5, f2
2230	FXCPMADD	f1,  B3, A6, f1
2231	FXCSMADD	f3,  B3, A6, f3
2232
2233	FXCPMADD	f0,  B4, A7, f0
2234	FXCSMADD	f2,  B4, A7, f2
2235	FXCPMADD	f1,  B4, A8, f1
2236	FXCSMADD	f3,  B4, A8, f3
2237	.align 4
2238
2239.L64:
2240#if defined(LT) || defined(RN)
2241	andi.	r0,  KK,  3
2242	mtspr	CTR, r0
2243	ble+	.L68
2244#else
2245	andi.	r0, TEMP, 3
2246	mtspr	CTR, r0
2247	ble+	.L68
2248#endif
2249
2250	LFPDUX	A1,  AO,  INC2
2251	LFPDUX	B1,  BO,  INC2
2252	LFPDUX	A2,  AO,  INC2
2253	bdz-	.L67
2254	.align 4
2255
2256.L66:
2257	FXCPMADD	f0,  B1, A1, f0
2258	FXCSMADD	f2,  B1, A1, f2
2259	LFPDUX	A1,  AO,  INC2
2260	FXCPMADD	f1,  B1, A2, f1
2261	FXCSMADD	f3,  B1, A2, f3
2262	LFPDUX	B1,  BO,  INC2
2263	LFPDUX	A2,  AO,  INC2
2264	bdnz+	.L66
2265	.align 4
2266
2267.L67:
2268	FXCPMADD	f0,  B1, A1, f0
2269	FXCSMADD	f2,  B1, A1, f2
2270	FXCPMADD	f1,  B1, A2, f1
2271	FXCSMADD	f3,  B1, A2, f3
2272	.align 4
2273
2274.L68:
2275	fpadd	f0, f0, f2
2276	fpadd	f1, f1, f3
2277
2278#if defined(LN) || defined(RT)
2279#ifdef LN
2280	subi	r0, KK, 2
2281#else
2282	subi	r0, KK, 1
2283#endif
2284	slwi	TEMP, r0, 1 + ZBASE_SHIFT
2285	slwi	r0,   r0, 0 + ZBASE_SHIFT
2286	add	AO, AORIG, TEMP
2287	add	BO, B,     r0
2288	addi	BO,  BO, - 2 * SIZE
2289#endif
2290
2291#if defined(LN) || defined(LT)
2292	LFPDUX	f16, BO,  INC2
2293	LFPDUX	f17, BO,  INC2
2294
2295	subi	BO,  BO,   4 * SIZE
2296#else
2297	LFPDUX	f16, AO,  INC2
2298	LFPDUX	f17, AO,  INC2
2299
2300	subi	AO,  AO,   4 * SIZE
2301#endif
2302
2303	fpsub	f0,  f16,  f0
2304	fpsub	f1,  f17,  f1
2305
2306#ifdef LN
2307	LFPDUX	A1,  AO,  INC2
2308	add	AO,  AO,  INC2
2309	LFPDUX	A2,  AO,  INC2
2310	LFPDUX	A3,  AO,  INC2
2311
2312	subi	AO,  AO,   8 * SIZE
2313
2314	fxpmul	  f4,  A3, f1
2315	FXCXNPMA  f1,  A3, f1,  f4
2316
2317	fxcpnmsub f0,  A2, f1,  f0
2318	FXCXNSMA  f0,  A2, f1,  f0
2319
2320	fxpmul	  f4,  A1, f0
2321	FXCXNPMA  f0,  A1, f0,  f4
2322#endif
2323
2324#ifdef LT
2325	LFPDUX	A1,  AO,  INC2
2326	LFPDUX	A2,  AO,  INC2
2327	add	AO,  AO,  INC2
2328	LFPDUX	A3,  AO,  INC2
2329
2330	subi	AO,  AO,   8 * SIZE
2331
2332	fxpmul	  f4,  A1, f0
2333	FXCXNPMA  f0,  A1, f0, f4
2334
2335	fxcpnmsub f1,  A2, f0, f1
2336	FXCXNSMA  f1,  A2, f0, f1
2337
2338	fxpmul	  f6,  A3, f1
2339	FXCXNPMA  f1,  A3, f1, f6
2340#endif
2341
2342#ifdef RN
2343	LFPDX	A1,  BO,  INC2
2344
2345	fxpmul	  f4,  A1, f0
2346	fxpmul	  f5,  A1, f1
2347
2348	FXCXNPMA  f0,  A1, f0, f4
2349	FXCXNPMA  f1,  A1, f1, f5
2350#endif
2351
2352#ifdef RT
2353	LFPDX	A1,  BO,  INC2
2354
2355	fxpmul	  f4,  A1, f0
2356	fxpmul	  f5,  A1, f1
2357
2358	FXCXNPMA  f0,  A1, f0,  f4
2359	FXCXNPMA  f1,  A1, f1,  f5
2360#endif
2361
2362#ifdef LN
2363	subi	CO1, CO1, 4 * SIZE
2364#endif
2365
2366#if defined(LN) || defined(LT)
2367	STFPDUX	f0,  BO,  INC2
2368	STFPDUX	f1,  BO,  INC2
2369
2370	subi	BO,  BO,   4 * SIZE
2371#else
2372	STFPDUX	f0,  AO,  INC2
2373	STFPDUX	f1,  AO,  INC2
2374
2375	subi	AO,  AO,   4 * SIZE
2376#endif
2377
2378	STFDUX	f0,  CO1, INC
2379	STFSDUX	f0,  CO1, INC
2380	STFDUX	f1,  CO1, INC
2381	STFSDUX	f1,  CO1, INC
2382
2383#ifdef LN
2384	subi	CO1, CO1, 4 * SIZE
2385#endif
2386
2387#ifdef RT
2388	slwi	r0, K, 1 + ZBASE_SHIFT
2389	add	AORIG, AORIG, r0
2390#endif
2391
2392#if defined(LT) || defined(RN)
2393	sub	TEMP, K, KK
2394	slwi	r0,   TEMP, 1 + ZBASE_SHIFT
2395	slwi	TEMP, TEMP, 0 + ZBASE_SHIFT
2396	add	AO, AO, r0
2397	add	BO, BO, TEMP
2398#endif
2399
2400#ifdef LT
2401	addi	KK, KK, 2
2402#endif
2403
2404#ifdef LN
2405	subi	KK, KK, 2
2406#endif
2407
2408	li	r0, FZERO
2409	lfpsx	f0, SP, r0
2410	.align 4
2411
2412.L70:
2413	srawi.	I, M,  2
2414	ble	.L89
2415	.align 4
2416
2417.L51:
2418#if defined(LT) || defined(RN)
2419	fpmr	f4,  f0
2420	addi	BO,  B,  - 2 * SIZE
2421 	fpmr	f1,  f0
2422	fpmr	f5,  f0
2423	fpmr	f2,  f0
2424	fpmr	f6,  f0
2425	fpmr	f3,  f0
2426	fpmr	f7,  f0
2427	srawi.	r0,  KK,  2
2428	mtspr	CTR, r0
2429	ble	.L54
2430#else
2431
2432#ifdef LN
2433	slwi	r0,   K,  2 + ZBASE_SHIFT
2434	sub	AORIG, AORIG, r0
2435#endif
2436
2437	slwi	r0  , KK, 2 + ZBASE_SHIFT
2438	slwi	TEMP, KK, 0 + ZBASE_SHIFT
2439	add	AO, AORIG, r0
2440	add	BO, B,     TEMP
2441
2442	sub	TEMP, K, KK
2443
2444	fpmr	f4,  f0
2445	addi	BO,  BO,  - 2 * SIZE
2446 	fpmr	f1,  f0
2447	fpmr	f5,  f0
2448	fpmr	f2,  f0
2449	fpmr	f6,  f0
2450	fpmr	f3,  f0
2451	fpmr	f7,  f0
2452	srawi.	r0, TEMP,  2
2453	mtspr	CTR, r0
2454	ble	.L54
2455#endif
2456
2457	LFPDUX	B1,  BO,  INC2
2458	LFPDUX	A1,  AO,  INC2
2459	LFPDUX	A2,  AO,  INC2
2460	LFPDUX	B2,  BO,  INC2
2461	LFPDUX	A3,  AO,  INC2
2462	LFPDUX	A4,  AO,  INC2
2463
2464	LFPDUX	B3,  BO,  INC2
2465	LFPDUX	A5,  AO,  INC2
2466	LFPDUX	A6,  AO,  INC2
2467	LFPDUX	A7,  AO,  INC2
2468	LFPDUX	A8,  AO,  INC2
2469	bdz-	.L53
2470	.align 4
2471
2472.L52:
2473	FXCPMADD	f0,  B1, A1, f0
2474	LFPDUX	B4,  BO,  INC2
2475	FXCSMADD	f4,  B1, A1, f4
2476	LFPDUX	A1,  AO,  INC2
2477	FXCPMADD	f1,  B1, A2, f1
2478	nop
2479	FXCSMADD	f5,  B1, A2, f5
2480	LFPDUX	A2,  AO,  INC2
2481
2482	FXCPMADD	f2,  B1, A3, f2
2483	nop
2484	FXCSMADD	f6,  B1, A3, f6
2485	LFPDUX	A3,  AO,  INC2
2486	FXCPMADD	f3,  B1, A4, f3
2487	nop
2488	FXCSMADD	f7,  B1, A4, f7
2489	LFPDUX	A4,  AO,  INC2
2490
2491	FXCPMADD	f0,  B2, A5, f0
2492	LFPDUX	B1,  BO,  INC2
2493	FXCSMADD	f4,  B2, A5, f4
2494	LFPDUX	A5,  AO,  INC2
2495	FXCPMADD	f1,  B2, A6, f1
2496	nop
2497	FXCSMADD	f5,  B2, A6, f5
2498	LFPDUX	A6,  AO,  INC2
2499
2500	FXCPMADD	f2,  B2, A7, f2
2501	nop
2502	FXCSMADD	f6,  B2, A7, f6
2503	LFPDUX	A7,  AO,  INC2
2504	FXCPMADD	f3,  B2, A8, f3
2505	nop
2506	FXCSMADD	f7,  B2, A8, f7
2507	LFPDUX	A8,  AO,  INC2
2508
2509	FXCPMADD	f0,  B3, A1, f0
2510	LFPDUX	B2,  BO,  INC2
2511	FXCSMADD	f4,  B3, A1, f4
2512	LFPDUX	A1,  AO,  INC2
2513	FXCPMADD	f1,  B3, A2, f1
2514	nop
2515	FXCSMADD	f5,  B3, A2, f5
2516	LFPDUX	A2,  AO,  INC2
2517
2518	FXCPMADD	f2,  B3, A3, f2
2519	nop
2520	FXCSMADD	f6,  B3, A3, f6
2521	LFPDUX	A3,  AO,  INC2
2522	FXCPMADD	f3,  B3, A4, f3
2523	nop
2524	FXCSMADD	f7,  B3, A4, f7
2525	LFPDUX	A4,  AO,  INC2
2526
2527	FXCPMADD	f0,  B4, A5, f0
2528	LFPDUX	B3,  BO,  INC2
2529	FXCSMADD	f4,  B4, A5, f4
2530	LFPDUX	A5,  AO,  INC2
2531	FXCPMADD	f1,  B4, A6, f1
2532	nop
2533	FXCSMADD	f5,  B4, A6, f5
2534	LFPDUX	A6,  AO,  INC2
2535
2536	FXCPMADD	f2,  B4, A7, f2
2537	nop
2538	FXCSMADD	f6,  B4, A7, f6
2539	LFPDUX	A7,  AO,  INC2
2540	FXCPMADD	f3,  B4, A8, f3
2541	nop
2542	FXCSMADD	f7,  B4, A8, f7
2543	LFPDUX	A8,  AO,  INC2
2544	bdnz+	.L52
2545	.align 4
2546
2547.L53:
2548	FXCPMADD	f0,  B1, A1, f0
2549	LFPDUX	B4,  BO,  INC2
2550	FXCSMADD	f4,  B1, A1, f4
2551	LFPDUX	A1,  AO,  INC2
2552	FXCPMADD	f1,  B1, A2, f1
2553	nop
2554	FXCSMADD	f5,  B1, A2, f5
2555	LFPDUX	A2,  AO,  INC2
2556
2557	FXCPMADD	f2,  B1, A3, f2
2558	nop
2559	FXCSMADD	f6,  B1, A3, f6
2560	LFPDUX	A3,  AO,  INC2
2561	FXCPMADD	f3,  B1, A4, f3
2562	nop
2563	FXCSMADD	f7,  B1, A4, f7
2564	LFPDUX	A4,  AO,  INC2
2565
2566	FXCPMADD	f0,  B2, A5, f0
2567	nop
2568	FXCSMADD	f4,  B2, A5, f4
2569	LFPDUX	A5,  AO,  INC2
2570	FXCPMADD	f1,  B2, A6, f1
2571	nop
2572	FXCSMADD	f5,  B2, A6, f5
2573	LFPDUX	A6,  AO,  INC2
2574
2575	FXCPMADD	f2,  B2, A7, f2
2576	nop
2577	FXCSMADD	f6,  B2, A7, f6
2578	LFPDUX	A7,  AO,  INC2
2579	FXCPMADD	f3,  B2, A8, f3
2580	nop
2581	FXCSMADD	f7,  B2, A8, f7
2582	LFPDUX	A8,  AO,  INC2
2583
2584	FXCPMADD	f0,  B3, A1, f0
2585	FXCSMADD	f4,  B3, A1, f4
2586	FXCPMADD	f1,  B3, A2, f1
2587	FXCSMADD	f5,  B3, A2, f5
2588
2589	FXCPMADD	f2,  B3, A3, f2
2590	FXCSMADD	f6,  B3, A3, f6
2591	FXCPMADD	f3,  B3, A4, f3
2592	FXCSMADD	f7,  B3, A4, f7
2593
2594	FXCPMADD	f0,  B4, A5, f0
2595	FXCSMADD	f4,  B4, A5, f4
2596	FXCPMADD	f1,  B4, A6, f1
2597	FXCSMADD	f5,  B4, A6, f5
2598
2599	FXCPMADD	f2,  B4, A7, f2
2600	FXCSMADD	f6,  B4, A7, f6
2601	FXCPMADD	f3,  B4, A8, f3
2602	FXCSMADD	f7,  B4, A8, f7
2603	.align 4
2604
2605.L54:
2606#if defined(LT) || defined(RN)
2607	andi.	r0,  KK,  3
2608	mtspr	CTR, r0
2609	ble+	.L58
2610#else
2611	andi.	r0, TEMP, 3
2612	mtspr	CTR, r0
2613	ble+	.L58
2614#endif
2615
2616	LFPDUX	A1,  AO,  INC2
2617	LFPDUX	B1,  BO,  INC2
2618	LFPDUX	A2,  AO,  INC2
2619	LFPDUX	A3,  AO,  INC2
2620	LFPDUX	A4,  AO,  INC2
2621	bdz-	.L57
2622	.align 4
2623
2624.L56:
2625	FXCPMADD	f0,  B1, A1, f0
2626	FXCSMADD	f4,  B1, A1, f4
2627	LFPDUX	A1,  AO,  INC2
2628	FXCPMADD	f1,  B1, A2, f1
2629	FXCSMADD	f5,  B1, A2, f5
2630	LFPDUX	A2,  AO,  INC2
2631
2632	FXCPMADD	f2,  B1, A3, f2
2633	FXCSMADD	f6,  B1, A3, f6
2634	LFPDUX	A3,  AO,  INC2
2635	FXCPMADD	f3,  B1, A4, f3
2636	FXCSMADD	f7,  B1, A4, f7
2637	LFPDUX	A4,  AO,  INC2
2638	LFPDUX	B1,  BO,  INC2
2639	bdnz+	.L56
2640	.align 4
2641
2642.L57:
2643	FXCPMADD	f0,  B1, A1, f0
2644	FXCSMADD	f4,  B1, A1, f4
2645	FXCPMADD	f1,  B1, A2, f1
2646	FXCSMADD	f5,  B1, A2, f5
2647
2648	FXCPMADD	f2,  B1, A3, f2
2649	FXCSMADD	f6,  B1, A3, f6
2650	FXCPMADD	f3,  B1, A4, f3
2651	FXCSMADD	f7,  B1, A4, f7
2652	.align 4
2653
2654.L58:
2655	fpadd	f0,  f0,  f4
2656	fpadd	f1,  f1,  f5
2657	fpadd	f2,  f2,  f6
2658	fpadd	f3,  f3,  f7
2659
2660#if defined(LN) || defined(RT)
2661#ifdef LN
2662	subi	r0, KK, 4
2663#else
2664	subi	r0, KK, 1
2665#endif
2666	slwi	TEMP, r0, 2 + ZBASE_SHIFT
2667	slwi	r0,   r0, 0 + ZBASE_SHIFT
2668	add	AO, AORIG, TEMP
2669	add	BO, B,     r0
2670	addi	BO,  BO, - 2 * SIZE
2671#endif
2672
2673#if defined(LN) || defined(LT)
2674	LFPDUX	f16, BO,  INC2
2675	LFPDUX	f17, BO,  INC2
2676	LFPDUX	f18, BO,  INC2
2677	LFPDUX	f19, BO,  INC2
2678
2679	subi	BO,  BO,   8 * SIZE
2680#else
2681	LFPDUX	f16, AO,  INC2
2682	LFPDUX	f17, AO,  INC2
2683	LFPDUX	f18, AO,  INC2
2684	LFPDUX	f19, AO,  INC2
2685
2686	subi	AO,  AO,   8 * SIZE
2687#endif
2688
2689	fpsub	f0,  f16,  f0
2690	fpsub	f1,  f17,  f1
2691	fpsub	f2,  f18,  f2
2692	fpsub	f3,  f19,  f3
2693
2694#ifdef LN
2695	LFPDUX	A1,  AO,  INC2
2696	add	AO,  AO,  INC2
2697	add	AO,  AO,  INC2
2698	add	AO,  AO,  INC2
2699
2700	LFPDUX	A2,  AO,  INC2
2701	LFPDUX	A3,  AO,  INC2
2702	add	AO,  AO,  INC2
2703	add	AO,  AO,  INC2
2704
2705	LFPDUX	A4,  AO,  INC2
2706	LFPDUX	A5,  AO,  INC2
2707	LFPDUX	A6,  AO,  INC2
2708	add	AO,  AO,  INC2
2709
2710	LFPDUX	A7,  AO,  INC2
2711	LFPDUX	A8,  AO,  INC2
2712	LFPDUX	A9,  AO,  INC2
2713	LFPDUX	A10, AO,  INC2
2714
2715	subi	AO,  AO,  32 * SIZE
2716
2717	fxpmul	  f4,  A10, f3
2718	FXCXNPMA  f3,  A10, f3,  f4
2719
2720	fxcpnmsub f2,  A9, f3,  f2
2721	FXCXNSMA  f2,  A9, f3,  f2
2722
2723	fxcpnmsub f1,  A8, f3,  f1
2724	FXCXNSMA  f1,  A8, f3,  f1
2725
2726	fxcpnmsub f0,  A7, f3,  f0
2727	FXCXNSMA  f0,  A7, f3,  f0
2728
2729	fxpmul	  f4,  A6, f2
2730	FXCXNPMA  f2,  A6, f2,  f4
2731
2732	fxcpnmsub f1,  A5, f2,  f1
2733	FXCXNSMA  f1,  A5, f2,  f1
2734
2735	fxcpnmsub f0,  A4, f2,  f0
2736	FXCXNSMA  f0,  A4, f2,  f0
2737
2738	fxpmul	  f4,  A3, f1
2739	FXCXNPMA  f1,  A3, f1,  f4
2740
2741	fxcpnmsub f0,  A2, f1,  f0
2742	FXCXNSMA  f0,  A2, f1,  f0
2743
2744	fxpmul	  f4,  A1, f0
2745	FXCXNPMA  f0,  A1, f0,  f4
2746#endif
2747
2748#ifdef LT
2749	LFPDUX	A1,  AO,  INC2
2750	LFPDUX	A2,  AO,  INC2
2751	LFPDUX	A3,  AO,  INC2
2752	LFPDUX	A4,  AO,  INC2
2753
2754	add	AO,  AO,  INC2
2755	LFPDUX	A5,  AO,  INC2
2756	LFPDUX	A6,  AO,  INC2
2757	LFPDUX	A7,  AO,  INC2
2758
2759	add	AO,  AO,  INC2
2760	add	AO,  AO,  INC2
2761	LFPDUX	A8,  AO,  INC2
2762	LFPDUX	A9,  AO,  INC2
2763
2764	add	AO,  AO,  INC2
2765	add	AO,  AO,  INC2
2766	add	AO,  AO,  INC2
2767	LFPDUX	A10, AO,  INC2
2768
2769	subi	AO,  AO,  32 * SIZE
2770
2771	fxpmul	  f4,  A1, f0
2772	FXCXNPMA  f0,  A1, f0, f4
2773
2774	fxcpnmsub f1,  A2, f0, f1
2775	FXCXNSMA  f1,  A2, f0, f1
2776
2777	fxcpnmsub f2,  A3, f0, f2
2778	FXCXNSMA  f2,  A3, f0, f2
2779
2780	fxcpnmsub f3,  A4, f0, f3
2781	FXCXNSMA  f3,  A4, f0, f3
2782
2783	fxpmul	  f6,  A5, f1
2784	FXCXNPMA  f1,  A5, f1, f6
2785
2786	fxcpnmsub f2,  A6, f1, f2
2787	FXCXNSMA  f2,  A6, f1, f2
2788
2789	fxcpnmsub f3,  A7, f1, f3
2790	FXCXNSMA  f3,  A7, f1, f3
2791
2792	fxpmul	  f4,  A8, f2
2793	FXCXNPMA  f2,  A8, f2,  f4
2794
2795	fxcpnmsub f3,  A9, f2,  f3
2796	FXCXNSMA  f3,  A9, f2,  f3
2797
2798	fxpmul	  f6,  A10, f3
2799	FXCXNPMA  f3,  A10, f3,  f6
2800#endif
2801
2802#ifdef RN
2803	LFPDX	A1,  BO,  INC2
2804
2805	fxpmul	  f4,  A1, f0
2806	fxpmul	  f5,  A1, f1
2807	fxpmul	  f6,  A1, f2
2808	fxpmul	  f7,  A1, f3
2809
2810	FXCXNPMA  f0,  A1, f0, f4
2811	FXCXNPMA  f1,  A1, f1, f5
2812	FXCXNPMA  f2,  A1, f2, f6
2813	FXCXNPMA  f3,  A1, f3, f7
2814#endif
2815
2816#ifdef RT
2817	LFPDX	A1,  BO,  INC2
2818
2819	fxpmul	  f4,  A1, f0
2820	fxpmul	  f5,  A1, f1
2821	fxpmul	  f6,  A1, f2
2822	fxpmul	  f7,  A1, f3
2823
2824	FXCXNPMA  f0,  A1, f0,  f4
2825	FXCXNPMA  f1,  A1, f1,  f5
2826	FXCXNPMA  f2,  A1, f2,  f6
2827	FXCXNPMA  f3,  A1, f3,  f7
2828#endif
2829
2830#ifdef LN
2831	subi	CO1, CO1, 8 * SIZE
2832#endif
2833
2834#if defined(LN) || defined(LT)
2835	STFPDUX	f0,  BO,  INC2
2836	STFPDUX	f1,  BO,  INC2
2837	STFPDUX	f2,  BO,  INC2
2838	STFPDUX	f3,  BO,  INC2
2839
2840	subi	BO,  BO,   8 * SIZE
2841#else
2842	STFPDUX	f0,  AO,  INC2
2843	STFPDUX	f1,  AO,  INC2
2844	STFPDUX	f2,  AO,  INC2
2845	STFPDUX	f3,  AO,  INC2
2846
2847	subi	AO,  AO,   8 * SIZE
2848#endif
2849
2850	STFDUX	f0,  CO1, INC
2851	STFSDUX	f0,  CO1, INC
2852	STFDUX	f1,  CO1, INC
2853	STFSDUX	f1,  CO1, INC
2854	STFDUX	f2,  CO1, INC
2855	STFSDUX	f2,  CO1, INC
2856	STFDUX	f3,  CO1, INC
2857	STFSDUX	f3,  CO1, INC
2858
2859#ifdef LN
2860	subi	CO1, CO1, 8 * SIZE
2861#endif
2862
2863#ifdef RT
2864	slwi	r0, K, 2 + ZBASE_SHIFT
2865	add	AORIG, AORIG, r0
2866#endif
2867
2868#if defined(LT) || defined(RN)
2869	sub	TEMP, K, KK
2870	slwi	r0,   TEMP, 2 + ZBASE_SHIFT
2871	slwi	TEMP, TEMP, 0 + ZBASE_SHIFT
2872	add	AO, AO, r0
2873	add	BO, BO, TEMP
2874#endif
2875
2876#ifdef LT
2877	addi	KK, KK, 4
2878#endif
2879
2880#ifdef LN
2881	subi	KK, KK, 4
2882#endif
2883
2884	addic.	I, I, -1
2885	li	r0, FZERO
2886
2887	lfpsx	f0, SP, r0
2888	bgt+	.L51
2889	.align 4
2890
2891.L89:
2892#ifdef LN
2893	slwi	r0, K, 0 + ZBASE_SHIFT
2894	add	B, B, r0
2895#endif
2896
2897#if defined(LT) || defined(RN)
2898	addi	B,  BO, 2 * SIZE
2899#endif
2900
2901#ifdef RN
2902	addi	KK, KK, 1
2903#endif
2904
2905#ifdef RT
2906	subi	KK, KK, 1
2907#endif
2908	.align 4
2909
2910.L999:
2911	addi	SP, SP, 20
2912
2913	lwzu	r14,   4(SP)
2914	lwzu	r15,   4(SP)
2915
2916	lwzu	r16,   4(SP)
2917	lwzu	r17,   4(SP)
2918	lwzu	r18,   4(SP)
2919	lwzu	r19,   4(SP)
2920
2921	lwzu	r20,   4(SP)
2922	lwzu	r21,   4(SP)
2923	lwzu	r22,   4(SP)
2924	lwzu	r23,   4(SP)
2925
2926	lwzu	r24,   4(SP)
2927	lwzu	r25,   4(SP)
2928	lwzu	r26,   4(SP)
2929	lwzu	r27,   4(SP)
2930
2931	lwzu	r28,   4(SP)
2932	lwzu	r29,   4(SP)
2933	lwzu	r30,   4(SP)
2934	lwzu	r31,   4(SP)
2935
2936	subi	SP, SP, 12
2937	li	r0, 16
2938
2939	lfpdux	f31, SP, r0
2940	lfpdux	f30, SP, r0
2941	lfpdux	f29, SP, r0
2942	lfpdux	f28, SP, r0
2943	lfpdux	f27, SP, r0
2944	lfpdux	f26, SP, r0
2945	lfpdux	f25, SP, r0
2946	lfpdux	f24, SP, r0
2947	lfpdux	f23, SP, r0
2948	lfpdux	f22, SP, r0
2949	lfpdux	f21, SP, r0
2950	lfpdux	f20, SP, r0
2951	lfpdux	f19, SP, r0
2952	lfpdux	f18, SP, r0
2953	lfpdux	f17, SP, r0
2954	lfpdux	f16, SP, r0
2955	lfpdux	f15, SP, r0
2956	lfpdux	f14, SP, r0
2957	addi	SP, SP, 16
2958	blr
2959	.align 4
2960
2961
2962	EPILOGUE
2963#endif
2964