1/*********************************************************************/
2/* Copyright 2009, 2010 The University of Texas at Austin.           */
3/* All rights reserved.                                              */
4/*                                                                   */
5/* Redistribution and use in source and binary forms, with or        */
6/* without modification, are permitted provided that the following   */
7/* conditions are met:                                               */
8/*                                                                   */
9/*   1. Redistributions of source code must retain the above         */
10/*      copyright notice, this list of conditions and the following  */
11/*      disclaimer.                                                  */
12/*                                                                   */
13/*   2. Redistributions in binary form must reproduce the above      */
14/*      copyright notice, this list of conditions and the following  */
15/*      disclaimer in the documentation and/or other materials       */
16/*      provided with the distribution.                              */
17/*                                                                   */
18/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
19/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
20/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
21/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
22/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
23/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
24/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
25/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
26/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
27/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
28/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
29/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
30/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
31/*    POSSIBILITY OF SUCH DAMAGE.                                    */
32/*                                                                   */
33/* The views and conclusions contained in the software and           */
34/* documentation are those of the authors and should not be          */
35/* interpreted as representing official policies, either expressed   */
36/* or implied, of The University of Texas at Austin.                 */
37/*********************************************************************/
38
39#define ASSEMBLER
40#include "common.h"
41
42#undef ZERO
43
44#define ALPHA    0
45#define FZERO	16
46
47#define	M	r3
48#define	N	r4
49#define	K	r5
50
51#ifdef linux
52#define A	r6
53#define	B	r7
54#define	C	r8
55#define	LDC	r9
56#define OFFSET	r10
57#endif
58
59#define TEMP	r11
60#define AORIG	r12
61#define KK	r14
62#define INCM1	r15
63#define INCM3	r16
64#define INCM5	r17
65#define INCM7	r18
66#define INC2	r19
67#define INC	r20
68#define INC4	r21
69
70#define	I	r22
71#define J	r23
72#define AO	r24
73#define BO	r25
74#define AO2	r26
75#define	BO2	r27
76
77#define	CO1	r28
78#define CO2	r29
79#define	ZERO	r31
80
81#ifndef NEEDPARAM
82
83#define A1	f16
84#define A2	f17
85#define A3	f18
86#define A4	f19
87#define A5	f20
88#define A6	f21
89#define A7	f22
90#define A8	f23
91#define A9	f24
92#define A10	f25
93
94#define B1	f26
95#define B2	f27
96#define B3	f28
97#define B4	f29
98#define B5	f30
99#define B6	f31
100
101#define AP	B6
102
103#ifndef CONJ
104#define FXCPMADD	fxcpmadd
105#define FXCSMADD	fxcxnpma
106#else
107#if defined(LN) || defined(LT)
108#define FXCPMADD	fxcpnsma
109#define FXCSMADD	fxcxma
110#else
111#define FXCPMADD	fxcpmadd
112#define FXCSMADD	fxcxnsma
113#endif
114#endif
115
116#ifndef CONJ
117#define FXCXNPMA	fxcxnpma
118#define FXCXNSMA	fxcxnsma
119#else
120#define FXCXNPMA	fxcxnsma
121#define FXCXNSMA	fxcxnpma
122#endif
123
124
125	PROLOGUE
126	PROFCODE
127
128	li	r0, -16
129
130	stfpdux	f14, SP, r0
131	stfpdux	f15, SP, r0
132	stfpdux	f16, SP, r0
133	stfpdux	f17, SP, r0
134	stfpdux	f18, SP, r0
135	stfpdux	f19, SP, r0
136	stfpdux	f20, SP, r0
137	stfpdux	f21, SP, r0
138	stfpdux	f22, SP, r0
139	stfpdux	f23, SP, r0
140	stfpdux	f24, SP, r0
141	stfpdux	f25, SP, r0
142	stfpdux	f26, SP, r0
143	stfpdux	f27, SP, r0
144	stfpdux	f28, SP, r0
145	stfpdux	f29, SP, r0
146	stfpdux	f30, SP, r0
147	stfpdux	f31, SP, r0
148
149	stwu	r31,  -4(SP)
150	stwu	r30,  -4(SP)
151	stwu	r29,  -4(SP)
152	stwu	r28,  -4(SP)
153
154	stwu	r27,  -4(SP)
155	stwu	r26,  -4(SP)
156	stwu	r25,  -4(SP)
157	stwu	r24,  -4(SP)
158
159	stwu	r23,  -4(SP)
160	stwu	r22,  -4(SP)
161	stwu	r21,  -4(SP)
162	stwu	r20,  -4(SP)
163
164	stwu	r19,  -4(SP)
165	stwu	r18,  -4(SP)
166	stwu	r17,  -4(SP)
167	stwu	r16,  -4(SP)
168
169	stwu	r15,  -4(SP)
170	stwu	r14,  -4(SP)
171
172	li	r0,   0
173	stwu	r0,   -4(SP)
174	stwu	r0,   -4(SP)
175
176	stfdu	f2,   -8(SP)
177	stfdu	f1,   -8(SP)
178
179	slwi	LDC, LDC, ZBASE_SHIFT
180
181	cmpwi	cr0, M, 0
182	ble	.L999
183	cmpwi	cr0, N, 0
184	ble	.L999
185	cmpwi	cr0, K, 0
186	ble	.L999
187
188	li	INC,    1 * SIZE
189	li	INC2,   2 * SIZE
190	li	INC4,   4 * SIZE
191	li	INCM1, -1 * SIZE
192	li	INCM3, -3 * SIZE
193	li	INCM5, -5 * SIZE
194	li	INCM7, -7 * SIZE
195
196	addi	C, C, - 1 * SIZE
197
198#ifdef LN
199	mullw	r0, M, K
200	slwi	r0, r0, ZBASE_SHIFT
201	add	A, A, r0
202
203	slwi	r0, M, ZBASE_SHIFT
204	add	C, C, r0
205#endif
206
207#ifdef RN
208	neg	KK, OFFSET
209#endif
210
211#ifdef RT
212	mullw	r0, N, K
213	slwi	r0, r0, ZBASE_SHIFT
214	add	B, B, r0
215
216	mullw	r0, N, LDC
217	add	C, C, r0
218
219	sub	KK, N, OFFSET
220#endif
221
222	andi.	J, N,  1
223	beq	.L50
224
225#ifdef RT
226	slwi	r0, K, 0 + ZBASE_SHIFT
227	sub	B, B, r0
228
229	sub	C, C, LDC
230#endif
231
232	mr	CO1, C
233
234#ifdef LN
235	add	KK, M, OFFSET
236#endif
237
238#ifdef LT
239	mr	KK, OFFSET
240#endif
241
242#if defined(LN) || defined(RT)
243	addi	AORIG, A, -2 * SIZE
244#else
245	addi	AO, A, -2 * SIZE
246#endif
247#ifndef RT
248	add	C,  CO2, LDC
249#endif
250	li	r0, FZERO
251	lfpsx	f0, SP, r0
252
253	srawi.	I, M,  2
254	ble	.L60
255	.align 4
256
257.L51:
258#if defined(LT) || defined(RN)
259	fpmr	f4,  f0
260	addi	BO,  B,  - 2 * SIZE
261 	fpmr	f1,  f0
262	fpmr	f5,  f0
263	fpmr	f2,  f0
264	fpmr	f6,  f0
265	fpmr	f3,  f0
266	fpmr	f7,  f0
267	srawi.	r0,  KK,  2
268	mtspr	CTR, r0
269	ble	.L54
270#else
271
272#ifdef LN
273	slwi	r0,   K,  2 + ZBASE_SHIFT
274	sub	AORIG, AORIG, r0
275#endif
276
277	slwi	r0  , KK, 2 + ZBASE_SHIFT
278	slwi	TEMP, KK, 0 + ZBASE_SHIFT
279	add	AO, AORIG, r0
280	add	BO, B,     TEMP
281
282	sub	TEMP, K, KK
283
284	fpmr	f4,  f0
285	addi	BO,  BO,  - 2 * SIZE
286 	fpmr	f1,  f0
287	fpmr	f5,  f0
288	fpmr	f2,  f0
289	fpmr	f6,  f0
290	fpmr	f3,  f0
291	fpmr	f7,  f0
292	srawi.	r0, TEMP,  2
293	mtspr	CTR, r0
294	ble	.L54
295#endif
296
297	LFPDUX	B1,  BO,  INC2
298	LFPDUX	A1,  AO,  INC2
299	LFPDUX	A2,  AO,  INC2
300	LFPDUX	B2,  BO,  INC2
301	LFPDUX	A3,  AO,  INC2
302	LFPDUX	A4,  AO,  INC2
303
304	LFPDUX	B3,  BO,  INC2
305	LFPDUX	A5,  AO,  INC2
306	LFPDUX	A6,  AO,  INC2
307	LFPDUX	A7,  AO,  INC2
308	LFPDUX	A8,  AO,  INC2
309	bdz-	.L53
310	.align 4
311
312.L52:
313	FXCPMADD	f0,  B1, A1, f0
314	LFPDUX	B4,  BO,  INC2
315	FXCSMADD	f4,  B1, A1, f4
316	LFPDUX	A1,  AO,  INC2
317	FXCPMADD	f1,  B1, A2, f1
318	nop
319	FXCSMADD	f5,  B1, A2, f5
320	LFPDUX	A2,  AO,  INC2
321
322	FXCPMADD	f2,  B1, A3, f2
323	nop
324	FXCSMADD	f6,  B1, A3, f6
325	LFPDUX	A3,  AO,  INC2
326	FXCPMADD	f3,  B1, A4, f3
327	nop
328	FXCSMADD	f7,  B1, A4, f7
329	LFPDUX	A4,  AO,  INC2
330
331	FXCPMADD	f0,  B2, A5, f0
332	LFPDUX	B1,  BO,  INC2
333	FXCSMADD	f4,  B2, A5, f4
334	LFPDUX	A5,  AO,  INC2
335	FXCPMADD	f1,  B2, A6, f1
336	nop
337	FXCSMADD	f5,  B2, A6, f5
338	LFPDUX	A6,  AO,  INC2
339
340	FXCPMADD	f2,  B2, A7, f2
341	nop
342	FXCSMADD	f6,  B2, A7, f6
343	LFPDUX	A7,  AO,  INC2
344	FXCPMADD	f3,  B2, A8, f3
345	nop
346	FXCSMADD	f7,  B2, A8, f7
347	LFPDUX	A8,  AO,  INC2
348
349	FXCPMADD	f0,  B3, A1, f0
350	LFPDUX	B2,  BO,  INC2
351	FXCSMADD	f4,  B3, A1, f4
352	LFPDUX	A1,  AO,  INC2
353	FXCPMADD	f1,  B3, A2, f1
354	nop
355	FXCSMADD	f5,  B3, A2, f5
356	LFPDUX	A2,  AO,  INC2
357
358	FXCPMADD	f2,  B3, A3, f2
359	nop
360	FXCSMADD	f6,  B3, A3, f6
361	LFPDUX	A3,  AO,  INC2
362	FXCPMADD	f3,  B3, A4, f3
363	nop
364	FXCSMADD	f7,  B3, A4, f7
365	LFPDUX	A4,  AO,  INC2
366
367	FXCPMADD	f0,  B4, A5, f0
368	LFPDUX	B3,  BO,  INC2
369	FXCSMADD	f4,  B4, A5, f4
370	LFPDUX	A5,  AO,  INC2
371	FXCPMADD	f1,  B4, A6, f1
372	nop
373	FXCSMADD	f5,  B4, A6, f5
374	LFPDUX	A6,  AO,  INC2
375
376	FXCPMADD	f2,  B4, A7, f2
377	nop
378	FXCSMADD	f6,  B4, A7, f6
379	LFPDUX	A7,  AO,  INC2
380	FXCPMADD	f3,  B4, A8, f3
381	nop
382	FXCSMADD	f7,  B4, A8, f7
383	LFPDUX	A8,  AO,  INC2
384	bdnz+	.L52
385	.align 4
386
387.L53:
388	FXCPMADD	f0,  B1, A1, f0
389	LFPDUX	B4,  BO,  INC2
390	FXCSMADD	f4,  B1, A1, f4
391	LFPDUX	A1,  AO,  INC2
392	FXCPMADD	f1,  B1, A2, f1
393	nop
394	FXCSMADD	f5,  B1, A2, f5
395	LFPDUX	A2,  AO,  INC2
396
397	FXCPMADD	f2,  B1, A3, f2
398	nop
399	FXCSMADD	f6,  B1, A3, f6
400	LFPDUX	A3,  AO,  INC2
401	FXCPMADD	f3,  B1, A4, f3
402	nop
403	FXCSMADD	f7,  B1, A4, f7
404	LFPDUX	A4,  AO,  INC2
405
406	FXCPMADD	f0,  B2, A5, f0
407	nop
408	FXCSMADD	f4,  B2, A5, f4
409	LFPDUX	A5,  AO,  INC2
410	FXCPMADD	f1,  B2, A6, f1
411	nop
412	FXCSMADD	f5,  B2, A6, f5
413	LFPDUX	A6,  AO,  INC2
414
415	FXCPMADD	f2,  B2, A7, f2
416	nop
417	FXCSMADD	f6,  B2, A7, f6
418	LFPDUX	A7,  AO,  INC2
419	FXCPMADD	f3,  B2, A8, f3
420	nop
421	FXCSMADD	f7,  B2, A8, f7
422	LFPDUX	A8,  AO,  INC2
423
424	FXCPMADD	f0,  B3, A1, f0
425	FXCSMADD	f4,  B3, A1, f4
426	FXCPMADD	f1,  B3, A2, f1
427	FXCSMADD	f5,  B3, A2, f5
428
429	FXCPMADD	f2,  B3, A3, f2
430	FXCSMADD	f6,  B3, A3, f6
431	FXCPMADD	f3,  B3, A4, f3
432	FXCSMADD	f7,  B3, A4, f7
433
434	FXCPMADD	f0,  B4, A5, f0
435	FXCSMADD	f4,  B4, A5, f4
436	FXCPMADD	f1,  B4, A6, f1
437	FXCSMADD	f5,  B4, A6, f5
438
439	FXCPMADD	f2,  B4, A7, f2
440	FXCSMADD	f6,  B4, A7, f6
441	FXCPMADD	f3,  B4, A8, f3
442	FXCSMADD	f7,  B4, A8, f7
443	.align 4
444
445.L54:
446#if defined(LT) || defined(RN)
447	andi.	r0,  KK,  3
448	mtspr	CTR, r0
449	ble+	.L58
450#else
451	andi.	r0, TEMP, 3
452	mtspr	CTR, r0
453	ble+	.L58
454#endif
455
456	LFPDUX	A1,  AO,  INC2
457	LFPDUX	B1,  BO,  INC2
458	LFPDUX	A2,  AO,  INC2
459	LFPDUX	A3,  AO,  INC2
460	LFPDUX	A4,  AO,  INC2
461	bdz-	.L57
462	.align 4
463
464.L56:
465	FXCPMADD	f0,  B1, A1, f0
466	FXCSMADD	f4,  B1, A1, f4
467	LFPDUX	A1,  AO,  INC2
468	FXCPMADD	f1,  B1, A2, f1
469	FXCSMADD	f5,  B1, A2, f5
470	LFPDUX	A2,  AO,  INC2
471
472	FXCPMADD	f2,  B1, A3, f2
473	FXCSMADD	f6,  B1, A3, f6
474	LFPDUX	A3,  AO,  INC2
475	FXCPMADD	f3,  B1, A4, f3
476	FXCSMADD	f7,  B1, A4, f7
477	LFPDUX	A4,  AO,  INC2
478	LFPDUX	B1,  BO,  INC2
479	bdnz+	.L56
480	.align 4
481
482.L57:
483	FXCPMADD	f0,  B1, A1, f0
484	FXCSMADD	f4,  B1, A1, f4
485	FXCPMADD	f1,  B1, A2, f1
486	FXCSMADD	f5,  B1, A2, f5
487
488	FXCPMADD	f2,  B1, A3, f2
489	FXCSMADD	f6,  B1, A3, f6
490	FXCPMADD	f3,  B1, A4, f3
491	FXCSMADD	f7,  B1, A4, f7
492	.align 4
493
494.L58:
495	fpadd	f0,  f0,  f4
496	fpadd	f1,  f1,  f5
497	fpadd	f2,  f2,  f6
498	fpadd	f3,  f3,  f7
499
500#if defined(LN) || defined(RT)
501#ifdef LN
502	subi	r0, KK, 4
503#else
504	subi	r0, KK, 1
505#endif
506	slwi	TEMP, r0, 2 + ZBASE_SHIFT
507	slwi	r0,   r0, 0 + ZBASE_SHIFT
508	add	AO, AORIG, TEMP
509	add	BO, B,     r0
510	addi	BO,  BO, - 2 * SIZE
511#endif
512
513#if defined(LN) || defined(LT)
514	LFPDUX	f16, BO,  INC2
515	LFPDUX	f17, BO,  INC2
516	LFPDUX	f18, BO,  INC2
517	LFPDUX	f19, BO,  INC2
518
519	subi	BO,  BO,   8 * SIZE
520#else
521	LFPDUX	f16, AO,  INC2
522	LFPDUX	f17, AO,  INC2
523	LFPDUX	f18, AO,  INC2
524	LFPDUX	f19, AO,  INC2
525
526	subi	AO,  AO,   8 * SIZE
527#endif
528
529	fpsub	f0,  f16,  f0
530	fpsub	f1,  f17,  f1
531	fpsub	f2,  f18,  f2
532	fpsub	f3,  f19,  f3
533
534#ifdef LN
535	LFPDUX	A1,  AO,  INC2
536	add	AO,  AO,  INC2
537	add	AO,  AO,  INC2
538	add	AO,  AO,  INC2
539
540	LFPDUX	A2,  AO,  INC2
541	LFPDUX	A3,  AO,  INC2
542	add	AO,  AO,  INC2
543	add	AO,  AO,  INC2
544
545	LFPDUX	A4,  AO,  INC2
546	LFPDUX	A5,  AO,  INC2
547	LFPDUX	A6,  AO,  INC2
548	add	AO,  AO,  INC2
549
550	LFPDUX	A7,  AO,  INC2
551	LFPDUX	A8,  AO,  INC2
552	LFPDUX	A9,  AO,  INC2
553	LFPDUX	A10, AO,  INC2
554
555	subi	AO,  AO,  32 * SIZE
556
557	fxpmul	  f4,  A10, f3
558	FXCXNPMA  f3,  A10, f3,  f4
559
560	fxcpnmsub f2,  A9, f3,  f2
561	FXCXNSMA  f2,  A9, f3,  f2
562
563	fxcpnmsub f1,  A8, f3,  f1
564	FXCXNSMA  f1,  A8, f3,  f1
565
566	fxcpnmsub f0,  A7, f3,  f0
567	FXCXNSMA  f0,  A7, f3,  f0
568
569	fxpmul	  f4,  A6, f2
570	FXCXNPMA  f2,  A6, f2,  f4
571
572	fxcpnmsub f1,  A5, f2,  f1
573	FXCXNSMA  f1,  A5, f2,  f1
574
575	fxcpnmsub f0,  A4, f2,  f0
576	FXCXNSMA  f0,  A4, f2,  f0
577
578	fxpmul	  f4,  A3, f1
579	FXCXNPMA  f1,  A3, f1,  f4
580
581	fxcpnmsub f0,  A2, f1,  f0
582	FXCXNSMA  f0,  A2, f1,  f0
583
584	fxpmul	  f4,  A1, f0
585	FXCXNPMA  f0,  A1, f0,  f4
586#endif
587
588#ifdef LT
589	LFPDUX	A1,  AO,  INC2
590	LFPDUX	A2,  AO,  INC2
591	LFPDUX	A3,  AO,  INC2
592	LFPDUX	A4,  AO,  INC2
593
594	add	AO,  AO,  INC2
595	LFPDUX	A5,  AO,  INC2
596	LFPDUX	A6,  AO,  INC2
597	LFPDUX	A7,  AO,  INC2
598
599	add	AO,  AO,  INC2
600	add	AO,  AO,  INC2
601	LFPDUX	A8,  AO,  INC2
602	LFPDUX	A9,  AO,  INC2
603
604	add	AO,  AO,  INC2
605	add	AO,  AO,  INC2
606	add	AO,  AO,  INC2
607	LFPDUX	A10, AO,  INC2
608
609	subi	AO,  AO,  32 * SIZE
610
611	fxpmul	  f4,  A1, f0
612	FXCXNPMA  f0,  A1, f0, f4
613
614	fxcpnmsub f1,  A2, f0, f1
615	FXCXNSMA  f1,  A2, f0, f1
616
617	fxcpnmsub f2,  A3, f0, f2
618	FXCXNSMA  f2,  A3, f0, f2
619
620	fxcpnmsub f3,  A4, f0, f3
621	FXCXNSMA  f3,  A4, f0, f3
622
623	fxpmul	  f6,  A5, f1
624	FXCXNPMA  f1,  A5, f1, f6
625
626	fxcpnmsub f2,  A6, f1, f2
627	FXCXNSMA  f2,  A6, f1, f2
628
629	fxcpnmsub f3,  A7, f1, f3
630	FXCXNSMA  f3,  A7, f1, f3
631
632	fxpmul	  f4,  A8, f2
633	FXCXNPMA  f2,  A8, f2,  f4
634
635	fxcpnmsub f3,  A9, f2,  f3
636	FXCXNSMA  f3,  A9, f2,  f3
637
638	fxpmul	  f6,  A10, f3
639	FXCXNPMA  f3,  A10, f3,  f6
640#endif
641
642#ifdef RN
643	LFPDX	A1,  BO,  INC2
644
645	fxpmul	  f4,  A1, f0
646	fxpmul	  f5,  A1, f1
647	fxpmul	  f6,  A1, f2
648	fxpmul	  f7,  A1, f3
649
650	FXCXNPMA  f0,  A1, f0, f4
651	FXCXNPMA  f1,  A1, f1, f5
652	FXCXNPMA  f2,  A1, f2, f6
653	FXCXNPMA  f3,  A1, f3, f7
654#endif
655
656#ifdef RT
657	LFPDX	A1,  BO,  INC2
658
659	fxpmul	  f4,  A1, f0
660	fxpmul	  f5,  A1, f1
661	fxpmul	  f6,  A1, f2
662	fxpmul	  f7,  A1, f3
663
664	FXCXNPMA  f0,  A1, f0,  f4
665	FXCXNPMA  f1,  A1, f1,  f5
666	FXCXNPMA  f2,  A1, f2,  f6
667	FXCXNPMA  f3,  A1, f3,  f7
668#endif
669
670#ifdef LN
671	subi	CO1, CO1, 8 * SIZE
672#endif
673
674#if defined(LN) || defined(LT)
675	STFPDUX	f0,  BO,  INC2
676	STFPDUX	f1,  BO,  INC2
677	STFPDUX	f2,  BO,  INC2
678	STFPDUX	f3,  BO,  INC2
679
680	subi	BO,  BO,   8 * SIZE
681#else
682	STFPDUX	f0,  AO,  INC2
683	STFPDUX	f1,  AO,  INC2
684	STFPDUX	f2,  AO,  INC2
685	STFPDUX	f3,  AO,  INC2
686
687	subi	AO,  AO,   8 * SIZE
688#endif
689
690	STFDUX	f0,  CO1, INC
691	STFSDUX	f0,  CO1, INC
692	STFDUX	f1,  CO1, INC
693	STFSDUX	f1,  CO1, INC
694	STFDUX	f2,  CO1, INC
695	STFSDUX	f2,  CO1, INC
696	STFDUX	f3,  CO1, INC
697	STFSDUX	f3,  CO1, INC
698
699#ifdef LN
700	subi	CO1, CO1, 8 * SIZE
701#endif
702
703#ifdef RT
704	slwi	r0, K, 2 + ZBASE_SHIFT
705	add	AORIG, AORIG, r0
706#endif
707
708#if defined(LT) || defined(RN)
709	sub	TEMP, K, KK
710	slwi	r0,   TEMP, 2 + ZBASE_SHIFT
711	slwi	TEMP, TEMP, 0 + ZBASE_SHIFT
712	add	AO, AO, r0
713	add	BO, BO, TEMP
714#endif
715
716#ifdef LT
717	addi	KK, KK, 4
718#endif
719
720#ifdef LN
721	subi	KK, KK, 4
722#endif
723
724	addic.	I, I, -1
725	li	r0, FZERO
726
727	lfpsx	f0, SP, r0
728	bgt+	.L51
729	.align 4
730
731.L60:
732	andi.	I, M,  2
733	beq	.L70
734
735#if defined(LT) || defined(RN)
736 	fpmr	f1,  f0
737	addi	BO,  B,  - 2 * SIZE
738	fpmr	f2,  f0
739	fpmr	f3,  f0
740	srawi.	r0,  KK,  2
741	mtspr	CTR, r0
742	ble	.L64
743#else
744#ifdef LN
745	slwi	r0,   K,  1 + ZBASE_SHIFT
746	sub	AORIG, AORIG, r0
747#endif
748
749	slwi	r0  , KK, 1 + ZBASE_SHIFT
750	slwi	TEMP, KK, 0 + ZBASE_SHIFT
751	add	AO, AORIG, r0
752	add	BO, B,     TEMP
753
754	sub	TEMP, K, KK
755
756 	fpmr	f1,  f0
757	addi	BO,  BO,  - 2 * SIZE
758	fpmr	f2,  f0
759	fpmr	f3,  f0
760	srawi.	r0,  TEMP,  2
761	mtspr	CTR, r0
762	ble	.L64
763#endif
764
765	LFPDUX	B1,  BO, INC2
766	LFPDUX	A1,  AO, INC2
767	LFPDUX	A2,  AO, INC2
768	LFPDUX	B2,  BO, INC2
769	LFPDUX	A3,  AO, INC2
770	LFPDUX	A4,  AO, INC2
771
772	LFPDUX	B3,  BO, INC2
773	LFPDUX	A5,  AO, INC2
774	LFPDUX	A6,  AO, INC2
775	LFPDUX	B4,  BO, INC2
776	LFPDUX	A7,  AO, INC2
777	LFPDUX	A8,  AO, INC2
778	bdz-	.L63
779	.align 4
780
781.L62:
782	FXCPMADD	f0,  B1, A1, f0
783	FXCSMADD	f2,  B1, A1, f2
784	LFPDUX	A1,  AO, INC2
785	FXCPMADD	f1,  B1, A2, f1
786	FXCSMADD	f3,  B1, A2, f3
787	LFPDUX	A2,  AO, INC2
788	LFPDUX	B1,  BO, INC2
789
790	FXCPMADD	f0,  B2, A3, f0
791	FXCSMADD	f2,  B2, A3, f2
792	LFPDUX	A3,  AO, INC2
793	FXCPMADD	f1,  B2, A4, f1
794	FXCSMADD	f3,  B2, A4, f3
795	LFPDUX	A4,  AO, INC2
796	LFPDUX	B2,  BO, INC2
797
798	FXCPMADD	f0,  B3, A5, f0
799	FXCSMADD	f2,  B3, A5, f2
800	LFPDUX	A5,  AO, INC2
801	FXCPMADD	f1,  B3, A6, f1
802	FXCSMADD	f3,  B3, A6, f3
803	LFPDUX	A6,  AO, INC2
804	LFPDUX	B3,  BO, INC2
805
806	FXCPMADD	f0,  B4, A7, f0
807	FXCSMADD	f2,  B4, A7, f2
808	LFPDUX	A7,  AO, INC2
809	FXCPMADD	f1,  B4, A8, f1
810	FXCSMADD	f3,  B4, A8, f3
811	LFPDUX	A8,  AO, INC2
812	LFPDUX	B4,  BO, INC2
813	bdnz+	.L62
814	.align 4
815
816.L63:
817	FXCPMADD	f0,  B1, A1, f0
818	FXCSMADD	f2,  B1, A1, f2
819	FXCPMADD	f1,  B1, A2, f1
820	FXCSMADD	f3,  B1, A2, f3
821
822	FXCPMADD	f0,  B2, A3, f0
823	FXCSMADD	f2,  B2, A3, f2
824	FXCPMADD	f1,  B2, A4, f1
825	FXCSMADD	f3,  B2, A4, f3
826
827	FXCPMADD	f0,  B3, A5, f0
828	FXCSMADD	f2,  B3, A5, f2
829	FXCPMADD	f1,  B3, A6, f1
830	FXCSMADD	f3,  B3, A6, f3
831
832	FXCPMADD	f0,  B4, A7, f0
833	FXCSMADD	f2,  B4, A7, f2
834	FXCPMADD	f1,  B4, A8, f1
835	FXCSMADD	f3,  B4, A8, f3
836	.align 4
837
838.L64:
839#if defined(LT) || defined(RN)
840	andi.	r0,  KK,  3
841	mtspr	CTR, r0
842	ble+	.L68
843#else
844	andi.	r0, TEMP, 3
845	mtspr	CTR, r0
846	ble+	.L68
847#endif
848
849	LFPDUX	A1,  AO,  INC2
850	LFPDUX	B1,  BO,  INC2
851	LFPDUX	A2,  AO,  INC2
852	bdz-	.L67
853	.align 4
854
855.L66:
856	FXCPMADD	f0,  B1, A1, f0
857	FXCSMADD	f2,  B1, A1, f2
858	LFPDUX	A1,  AO,  INC2
859	FXCPMADD	f1,  B1, A2, f1
860	FXCSMADD	f3,  B1, A2, f3
861	LFPDUX	B1,  BO,  INC2
862	LFPDUX	A2,  AO,  INC2
863	bdnz+	.L66
864	.align 4
865
866.L67:
867	FXCPMADD	f0,  B1, A1, f0
868	FXCSMADD	f2,  B1, A1, f2
869	FXCPMADD	f1,  B1, A2, f1
870	FXCSMADD	f3,  B1, A2, f3
871	.align 4
872
873.L68:
874	fpadd	f0, f0, f2
875	fpadd	f1, f1, f3
876
877#if defined(LN) || defined(RT)
878#ifdef LN
879	subi	r0, KK, 2
880#else
881	subi	r0, KK, 1
882#endif
883	slwi	TEMP, r0, 1 + ZBASE_SHIFT
884	slwi	r0,   r0, 0 + ZBASE_SHIFT
885	add	AO, AORIG, TEMP
886	add	BO, B,     r0
887	addi	BO,  BO, - 2 * SIZE
888#endif
889
890#if defined(LN) || defined(LT)
891	LFPDUX	f16, BO,  INC2
892	LFPDUX	f17, BO,  INC2
893
894	subi	BO,  BO,   4 * SIZE
895#else
896	LFPDUX	f16, AO,  INC2
897	LFPDUX	f17, AO,  INC2
898
899	subi	AO,  AO,   4 * SIZE
900#endif
901
902	fpsub	f0,  f16,  f0
903	fpsub	f1,  f17,  f1
904
905#ifdef LN
906	LFPDUX	A1,  AO,  INC2
907	add	AO,  AO,  INC2
908	LFPDUX	A2,  AO,  INC2
909	LFPDUX	A3,  AO,  INC2
910
911	subi	AO,  AO,   8 * SIZE
912
913	fxpmul	  f4,  A3, f1
914	FXCXNPMA  f1,  A3, f1,  f4
915
916	fxcpnmsub f0,  A2, f1,  f0
917	FXCXNSMA  f0,  A2, f1,  f0
918
919	fxpmul	  f4,  A1, f0
920	FXCXNPMA  f0,  A1, f0,  f4
921#endif
922
923#ifdef LT
924	LFPDUX	A1,  AO,  INC2
925	LFPDUX	A2,  AO,  INC2
926	add	AO,  AO,  INC2
927	LFPDUX	A3,  AO,  INC2
928
929	subi	AO,  AO,   8 * SIZE
930
931	fxpmul	  f4,  A1, f0
932	FXCXNPMA  f0,  A1, f0, f4
933
934	fxcpnmsub f1,  A2, f0, f1
935	FXCXNSMA  f1,  A2, f0, f1
936
937	fxpmul	  f6,  A3, f1
938	FXCXNPMA  f1,  A3, f1, f6
939#endif
940
941#ifdef RN
942	LFPDX	A1,  BO,  INC2
943
944	fxpmul	  f4,  A1, f0
945	fxpmul	  f5,  A1, f1
946
947	FXCXNPMA  f0,  A1, f0, f4
948	FXCXNPMA  f1,  A1, f1, f5
949#endif
950
951#ifdef RT
952	LFPDX	A1,  BO,  INC2
953
954	fxpmul	  f4,  A1, f0
955	fxpmul	  f5,  A1, f1
956
957	FXCXNPMA  f0,  A1, f0,  f4
958	FXCXNPMA  f1,  A1, f1,  f5
959#endif
960
961#ifdef LN
962	subi	CO1, CO1, 4 * SIZE
963#endif
964
965#if defined(LN) || defined(LT)
966	STFPDUX	f0,  BO,  INC2
967	STFPDUX	f1,  BO,  INC2
968
969	subi	BO,  BO,   4 * SIZE
970#else
971	STFPDUX	f0,  AO,  INC2
972	STFPDUX	f1,  AO,  INC2
973
974	subi	AO,  AO,   4 * SIZE
975#endif
976
977	STFDUX	f0,  CO1, INC
978	STFSDUX	f0,  CO1, INC
979	STFDUX	f1,  CO1, INC
980	STFSDUX	f1,  CO1, INC
981
982#ifdef LN
983	subi	CO1, CO1, 4 * SIZE
984#endif
985
986#ifdef RT
987	slwi	r0, K, 1 + ZBASE_SHIFT
988	add	AORIG, AORIG, r0
989#endif
990
991#if defined(LT) || defined(RN)
992	sub	TEMP, K, KK
993	slwi	r0,   TEMP, 1 + ZBASE_SHIFT
994	slwi	TEMP, TEMP, 0 + ZBASE_SHIFT
995	add	AO, AO, r0
996	add	BO, BO, TEMP
997#endif
998
999#ifdef LT
1000	addi	KK, KK, 2
1001#endif
1002
1003#ifdef LN
1004	subi	KK, KK, 2
1005#endif
1006
1007	li	r0, FZERO
1008	lfpsx	f0, SP, r0
1009	.align 4
1010
1011.L70:
1012	andi.	I, M,  1
1013	beq	.L89
1014
1015#if defined(LT) || defined(RN)
1016	addi	BO,  B,  - 2 * SIZE
1017	fpmr	f1,  f0
1018	fpmr	f2,  f0
1019	fpmr	f3,  f0
1020	srawi.	r0, KK,  3
1021	mtspr	CTR, r0
1022	ble	.L74
1023#else
1024#ifdef LN
1025	slwi	r0,   K,  0 + ZBASE_SHIFT
1026	sub	AORIG, AORIG, r0
1027#endif
1028
1029	slwi	TEMP, KK, 0 + ZBASE_SHIFT
1030	add	AO, AORIG, TEMP
1031	add	BO, B,     TEMP
1032
1033	sub	TEMP, K, KK
1034
1035	addi	BO,  BO,  - 2 * SIZE
1036	fpmr	f1,  f0
1037	fpmr	f2,  f0
1038	fpmr	f3,  f0
1039	srawi.	r0, TEMP,  3
1040	mtspr	CTR, r0
1041	ble	.L74
1042#endif
1043
1044	LFPDUX	A1,  AO, INC2
1045	LFPDUX	B1,  BO, INC2
1046	LFPDUX	A2,  AO, INC2
1047	LFPDUX	B2,  BO, INC2
1048	LFPDUX	A3,  AO, INC2
1049	LFPDUX	B3,  BO, INC2
1050	LFPDUX	A4,  AO, INC2
1051	LFPDUX	B4,  BO, INC2
1052
1053	LFPDUX	A5,  AO, INC2
1054	LFPDUX	B5,  BO, INC2
1055	LFPDUX	A6,  AO, INC2
1056	LFPDUX	B6,  BO, INC2
1057	LFPDUX	A7,  AO, INC2
1058	LFPDUX	A9,  BO, INC2
1059	LFPDUX	A8,  AO, INC2
1060	LFPDUX	A10, BO, INC2
1061	bdz-	.L73
1062	.align 4
1063
1064.L72:
1065	FXCPMADD	f0,  B1, A1, f0
1066	FXCSMADD	f1,  B1, A1, f1
1067	LFPDUX	A1,  AO, INC2
1068	LFPDUX	B1,  BO, INC2
1069	FXCPMADD	f2,  B2, A2, f2
1070	FXCSMADD	f3,  B2, A2, f3
1071	LFPDUX	A2,  AO, INC2
1072	LFPDUX	B2,  BO, INC2
1073
1074	FXCPMADD	f0,  B3, A3, f0
1075	FXCSMADD	f1,  B3, A3, f1
1076	LFPDUX	A3,  AO, INC2
1077	LFPDUX	B3,  BO, INC2
1078	FXCPMADD	f2,  B4, A4, f2
1079	FXCSMADD	f3,  B4, A4, f3
1080	LFPDUX	A4,  AO, INC2
1081	LFPDUX	B4,  BO, INC2
1082
1083	FXCPMADD	f0,  B5, A5, f0
1084	FXCSMADD	f1,  B5, A5, f1
1085	LFPDUX	A5,  AO, INC2
1086	LFPDUX	B5,  BO, INC2
1087	FXCPMADD	f2,  B6, A6, f2
1088	FXCSMADD	f3,  B6, A6, f3
1089	LFPDUX	A6,  AO, INC2
1090	LFPDUX	B6,  BO, INC2
1091
1092	FXCPMADD	f0,  A9,  A7, f0
1093	FXCSMADD	f1,  A9,  A7, f1
1094	LFPDUX	A7,  AO, INC2
1095	LFPDUX	A9,  BO, INC2
1096	FXCPMADD	f2,  A10, A8, f2
1097	FXCSMADD	f3,  A10, A8, f3
1098	LFPDUX	A8,  AO, INC2
1099	LFPDUX	A10, BO, INC2
1100
1101	bdnz+	.L72
1102	.align 4
1103
1104.L73:
1105	FXCPMADD	f0,  B1, A1, f0
1106	FXCSMADD	f1,  B1, A1, f1
1107	FXCPMADD	f2,  B2, A2, f2
1108	FXCSMADD	f3,  B2, A2, f3
1109
1110	FXCPMADD	f0,  B3, A3, f0
1111	FXCSMADD	f1,  B3, A3, f1
1112	FXCPMADD	f2,  B4, A4, f2
1113	FXCSMADD	f3,  B4, A4, f3
1114
1115	FXCPMADD	f0,  B5, A5, f0
1116	FXCSMADD	f1,  B5, A5, f1
1117	FXCPMADD	f2,  B6, A6, f2
1118	FXCSMADD	f3,  B6, A6, f3
1119
1120	FXCPMADD	f0,  A9,  A7, f0
1121	FXCSMADD	f1,  A9,  A7, f1
1122	FXCPMADD	f2,  A10, A8, f2
1123	FXCSMADD	f3,  A10, A8, f3
1124	.align 4
1125
1126.L74:
1127#if defined(LT) || defined(RN)
1128	andi.	r0,  KK,  7
1129	mtspr	CTR, r0
1130	ble+	.L78
1131#else
1132	andi.	r0, TEMP, 7
1133	mtspr	CTR, r0
1134	ble+	.L78
1135#endif
1136
1137	LFPDUX	A1,  AO,  INC2
1138	LFPDUX	B1,  BO,  INC2
1139	bdz-	.L77
1140	.align 4
1141
1142.L76:
1143	FXCPMADD	f0,  B1, A1, f0
1144	FXCSMADD	f1,  B1, A1, f1
1145	LFPDUX	A1,  AO,  INC2
1146	LFPDUX	B1,  BO,  INC2
1147	bdnz+	.L76
1148	.align 4
1149
1150.L77:
1151	FXCPMADD	f0,  B1, A1, f0
1152	FXCSMADD	f1,  B1, A1, f1
1153	.align 4
1154
1155.L78:
1156	fpadd	f0, f0, f2
1157	fpadd	f1, f1, f3
1158
1159	fpadd	f0, f0, f1
1160
1161#if defined(LN) || defined(RT)
1162#ifdef LN
1163	subi	r0, KK, 1
1164#else
1165	subi	r0, KK, 1
1166#endif
1167	slwi	TEMP, r0, 0 + ZBASE_SHIFT
1168	add	AO, AORIG, TEMP
1169	add	BO, B,     TEMP
1170	addi	BO,  BO, - 2 * SIZE
1171#endif
1172
1173#if defined(LN) || defined(LT)
1174	LFPDX	f16, BO,  INC2
1175#else
1176	LFPDX	f16, AO,  INC2
1177#endif
1178
1179	fpsub	f0,  f16,  f0
1180
1181#ifdef LN
1182	LFPDX	A1,  AO,  INC2
1183
1184	fxpmul	  f4,  A1, f0
1185	FXCXNPMA  f0,  A1, f0,  f4
1186#endif
1187
1188#ifdef LT
1189	LFPDX	A1,  AO,  INC2
1190
1191	fxpmul	  f4,  A1, f0
1192	FXCXNPMA  f0,  A1, f0, f4
1193#endif
1194
1195#ifdef RN
1196	LFPDX	A1,  BO,  INC2
1197
1198	fxpmul	  f4,  A1, f0
1199	FXCXNPMA  f0,  A1, f0, f4
1200#endif
1201
1202#ifdef RT
1203	LFPDX	A1,  BO,  INC2
1204
1205	fxpmul	  f4,  A1, f0
1206	FXCXNPMA  f0,  A1, f0,  f4
1207#endif
1208
1209#ifdef LN
1210	subi	CO1, CO1, 2 * SIZE
1211#endif
1212
1213#if defined(LN) || defined(LT)
1214	STFPDX	f0,  BO,  INC2
1215#else
1216	STFPDX	f0,  AO,  INC2
1217#endif
1218
1219	STFDUX	f0,  CO1, INC
1220	STFSDUX	f0,  CO1, INC
1221
1222#ifdef LN
1223	subi	CO1, CO1, 2 * SIZE
1224#endif
1225
1226#ifdef RT
1227	slwi	r0, K, 0 + ZBASE_SHIFT
1228	add	AORIG, AORIG, r0
1229#endif
1230
1231#if defined(LT) || defined(RN)
1232	sub	TEMP, K, KK
1233	slwi	TEMP, TEMP, 0 + ZBASE_SHIFT
1234	add	AO, AO, TEMP
1235	add	BO, BO, TEMP
1236#endif
1237
1238#ifdef LT
1239	addi	KK, KK, 1
1240#endif
1241
1242#ifdef LN
1243	subi	KK, KK, 1
1244#endif
1245
1246	li	r0, FZERO
1247	lfpsx	f0, SP, r0
1248	.align 4
1249
1250.L89:
1251#ifdef LN
1252	slwi	r0, K, 0 + ZBASE_SHIFT
1253	add	B, B, r0
1254#endif
1255
1256#if defined(LT) || defined(RN)
1257	addi	B,  BO, 2 * SIZE
1258#endif
1259
1260#ifdef RN
1261	addi	KK, KK, 1
1262#endif
1263
1264#ifdef RT
1265	subi	KK, KK, 1
1266#endif
1267	.align 4
1268
1269.L50:
1270	srawi.	J, N,  1
1271	ble	.L999
1272	.align 4
1273
1274.L10:
1275#ifdef RT
1276	slwi	r0, K, 1 + ZBASE_SHIFT
1277	sub	B, B, r0
1278
1279	slwi	r0, LDC, 1
1280	sub	C, C, r0
1281#endif
1282
1283	mr	CO1, C
1284	add	CO2, C,   LDC
1285
1286#ifdef LN
1287	add	KK, M, OFFSET
1288#endif
1289
1290#ifdef LT
1291	mr	KK, OFFSET
1292#endif
1293
1294#if defined(LN) || defined(RT)
1295	addi	AORIG, A, -4 * SIZE
1296#else
1297	addi	AO, A, -4 * SIZE
1298#endif
1299#ifndef RT
1300	add	C,  CO2, LDC
1301#endif
1302
1303	li	r0, FZERO
1304	lfpsx	f0, SP, r0
1305
1306	srawi.	I, M,  2
1307	ble	.L20
1308	.align 4
1309
1310.L11:
1311#if defined(LT) || defined(RN)
1312
1313	addi	AO2, AO,   2 * SIZE
1314	fpmr	f4,  f0
1315	addi	BO,  B,  - 4 * SIZE
1316	fpmr	f8,  f0
1317	addi	BO2, B,  - 2 * SIZE
1318	fpmr	f12, f0
1319
1320	fpmr	f5,  f0
1321	fpmr	f9,  f0
1322	fpmr	f13, f0
1323	fpmr	f2,  f0
1324
1325	fpmr	f6,  f0
1326	fpmr	f10, f0
1327	fpmr	f14, f0
1328	fpmr	f3,  f0
1329
1330	fpmr	f7,  f0
1331	fpmr	f11, f0
1332	fpmr	f15, f0
1333
1334	srawi.	r0,  KK,  2
1335 	fpmr	f1,  f0
1336	mtspr	CTR, r0
1337	ble	.L14
1338#else
1339
1340#ifdef LN
1341	slwi	r0,   K,  2 + ZBASE_SHIFT
1342	sub	AORIG, AORIG, r0
1343#endif
1344
1345	slwi	r0  , KK, 2 + ZBASE_SHIFT
1346	slwi	TEMP, KK, 1 + ZBASE_SHIFT
1347	add	AO, AORIG, r0
1348	add	BO, B,     TEMP
1349
1350	sub	TEMP, K, KK
1351
1352	fpmr	f5,  f0
1353	fpmr	f9,  f0
1354	fpmr	f13, f0
1355	fpmr	f2,  f0
1356
1357	fpmr	f6,  f0
1358	fpmr	f10, f0
1359	fpmr	f14, f0
1360	fpmr	f3,  f0
1361
1362	fpmr	f7,  f0
1363	fpmr	f11, f0
1364	fpmr	f15, f0
1365
1366	addi	AO2, AO,    2 * SIZE
1367	fpmr	f4,  f0
1368	addi	BO,  BO,  - 4 * SIZE
1369	fpmr	f8,  f0
1370	addi	BO2, BO,    2 * SIZE
1371	fpmr	f12, f0
1372
1373	srawi.	r0,  TEMP,  2
1374 	fpmr	f1,  f0
1375	mtspr	CTR, r0
1376	ble	.L14
1377#endif
1378
1379	LFPDUX	A1,  AO, INC4
1380	fpmr	f5,  f0
1381	LFPDUX	A3,  AO, INC4
1382	fpmr	f9,  f0
1383	LFPDUX	B1,  BO, INC4
1384	fpmr	f13, f0
1385
1386	LFPDUX	A5,  AO, INC4
1387	fpmr	f2,  f0
1388	LFPDUX	A6,  AO, INC4
1389	fpmr	f6,  f0
1390	LFPDUX	B3,  BO, INC4
1391	fpmr	f10, f0
1392	LFPDUX	A7,  AO, INC4
1393	fpmr	f14, f0
1394
1395	LFPDUX	A8,  AO, INC4
1396	fpmr	f3,  f0
1397	LFPDUX	B5,  BO, INC4
1398	fpmr	f7,  f0
1399	LFPDUX	A9,  AO, INC4
1400	fpmr	f11, f0
1401	LFPDUX	A2, AO2, INC4
1402	fpmr	f15, f0
1403	LFPDUX	B2, BO2, INC4
1404	bdz-	.L13
1405	.align 4
1406
1407.L12:
1408
1409## 1 ##
1410	FXCPMADD	f0,  B1, A1, f0
1411	nop
1412	FXCSMADD	f4,  B1, A1, f4
1413	nop
1414	FXCPMADD	f8,  B2, A1, f8
1415	LFPDUX	B4, BO2, INC4
1416	FXCSMADD	f12, B2, A1, f12
1417	LFPDUX	B6,  BO, INC4
1418
1419	FXCPMADD	f1,  B1, A2, f1
1420	nop
1421	FXCSMADD	f5,  B1, A2, f5
1422	LFPDUX	A4, AO2, INC4
1423	FXCPMADD	f9,  B2, A2, f9
1424	LFPDUX	A10, AO, INC4
1425	FXCSMADD	f13, B2, A2, f13
1426	nop
1427
1428	FXCPMADD	f2,  B1, A3, f2
1429	nop
1430	FXCSMADD	f6,  B1, A3, f6
1431	nop
1432	FXCPMADD	f10, B2, A3, f10
1433	nop
1434	FXCSMADD	f14, B2, A3, f14
1435	nop
1436
1437	FXCPMADD	f3,  B1, A4, f3
1438	nop
1439	FXCSMADD	f7,  B1, A4, f7
1440	LFPDUX	A2, AO2, INC4
1441	FXCPMADD	f11, B2, A4, f11
1442	LFPDUX	A1,  AO, INC4
1443	FXCSMADD	f15, B2, A4, f15
1444	nop
1445
1446## 2 ##
1447
1448	FXCPMADD	f0,  B3, A5, f0
1449	nop
1450	FXCSMADD	f4,  B3, A5, f4
1451	nop
1452	FXCPMADD	f8,  B4, A5, f8
1453	LFPDUX	B2, BO2, INC4
1454	FXCSMADD	f12, B4, A5, f12
1455	LFPDUX	B1,  BO, INC4
1456
1457	FXCPMADD	f1,  B3, A2, f1
1458	nop
1459	FXCSMADD	f5,  B3, A2, f5
1460	LFPDUX	A4, AO2, INC4
1461	FXCPMADD	f9,  B4, A2, f9
1462	LFPDUX	A3,  AO, INC4
1463	FXCSMADD	f13, B4, A2, f13
1464	nop
1465
1466	FXCPMADD	f2,  B3, A6, f2
1467	nop
1468	FXCSMADD	f6,  B3, A6, f6
1469	nop
1470	FXCPMADD	f10, B4, A6, f10
1471	nop
1472	FXCSMADD	f14, B4, A6, f14
1473	nop
1474
1475	FXCPMADD	f3,  B3, A4, f3
1476	nop
1477	FXCSMADD	f7,  B3, A4, f7
1478	LFPDUX	A2, AO2, INC4
1479	FXCPMADD	f11, B4, A4, f11
1480	LFPDUX	A5,  AO, INC4
1481	FXCSMADD	f15, B4, A4, f15
1482	nop
1483
1484## 3 ##
1485
1486	FXCPMADD	f0,  B5, A7, f0
1487	nop
1488	FXCSMADD	f4,  B5, A7, f4
1489	nop
1490	FXCPMADD	f8,  B2, A7, f8
1491	LFPDUX	B4, BO2, INC4
1492	FXCSMADD	f12, B2, A7, f12
1493	LFPDUX	B3,  BO, INC4
1494
1495	FXCPMADD	f1,  B5, A2, f1
1496	nop
1497	FXCSMADD	f5,  B5, A2, f5
1498	LFPDUX	A4, AO2, INC4
1499	FXCPMADD	f9,  B2, A2, f9
1500	LFPDUX	A6,  AO, INC4
1501	FXCSMADD	f13, B2, A2, f13
1502	nop
1503
1504	FXCPMADD	f2,  B5, A8, f2
1505	nop
1506	FXCSMADD	f6,  B5, A8, f6
1507	nop
1508	FXCPMADD	f10, B2, A8, f10
1509	nop
1510	FXCSMADD	f14, B2, A8, f14
1511	nop
1512
1513	FXCPMADD	f3,  B5, A4, f3
1514	nop
1515	FXCSMADD	f7,  B5, A4, f7
1516	LFPDUX	A2, AO2, INC4
1517	FXCPMADD	f11, B2, A4, f11
1518	LFPDUX	A7,  AO, INC4
1519	FXCSMADD	f15, B2, A4, f15
1520	nop
1521
1522## 4 ##
1523	FXCPMADD	f0,  B6, A9, f0
1524	nop
1525	FXCSMADD	f4,  B6, A9, f4
1526	nop
1527	FXCPMADD	f8,  B4, A9, f8
1528	LFPDUX	B2, BO2, INC4
1529	FXCSMADD	f12, B4, A9, f12
1530	LFPDUX	B5,  BO, INC4
1531
1532	FXCPMADD	f1,  B6, A2, f1
1533	nop
1534	FXCSMADD	f5,  B6, A2, f5
1535	LFPDUX	A4, AO2, INC4
1536	FXCPMADD	f9,  B4, A2, f9
1537	LFPDUX	A8,  AO, INC4
1538	FXCSMADD	f13, B4, A2, f13
1539	nop
1540
1541	FXCPMADD	f2,  B6, A10, f2
1542	nop
1543	FXCSMADD	f6,  B6, A10, f6
1544	nop
1545	FXCPMADD	f10, B4, A10, f10
1546	nop
1547	FXCSMADD	f14, B4, A10, f14
1548	nop
1549
1550	FXCPMADD	f3,  B6, A4, f3
1551	LFPDUX	A2, AO2, INC4
1552	FXCSMADD	f7,  B6, A4, f7
1553	LFPDUX	A9,  AO, INC4
1554	FXCPMADD	f11, B4, A4, f11
1555	nop
1556	FXCSMADD	f15, B4, A4, f15
1557	bdnz+	.L12
1558	.align 4
1559
1560.L13:
1561## 1 ##
1562
1563	FXCPMADD	f0,  B1, A1, f0
1564	nop
1565	FXCSMADD	f4,  B1, A1, f4
1566	nop
1567	FXCPMADD	f8,  B2, A1, f8
1568	LFPDUX	B4, BO2, INC4
1569	FXCSMADD	f12, B2, A1, f12
1570	LFPDUX	B6,  BO, INC4
1571
1572	FXCPMADD	f1,  B1, A2, f1
1573	nop
1574	FXCSMADD	f5,  B1, A2, f5
1575	LFPDUX	A4, AO2, INC4
1576	FXCPMADD	f9,  B2, A2, f9
1577	LFPDUX	A10, AO, INC4
1578	FXCSMADD	f13, B2, A2, f13
1579	nop
1580
1581	FXCPMADD	f2,  B1, A3, f2
1582	nop
1583	FXCSMADD	f6,  B1, A3, f6
1584	nop
1585	FXCPMADD	f10, B2, A3, f10
1586	nop
1587	FXCSMADD	f14, B2, A3, f14
1588	nop
1589
1590	FXCPMADD	f3,  B1, A4, f3
1591	nop
1592	FXCSMADD	f7,  B1, A4, f7
1593	LFPDUX	A2, AO2, INC4
1594	FXCPMADD	f11, B2, A4, f11
1595	nop
1596	FXCSMADD	f15, B2, A4, f15
1597	nop
1598
1599## 2 ##
1600
1601	FXCPMADD	f0,  B3, A5, f0
1602	nop
1603	FXCSMADD	f4,  B3, A5, f4
1604	nop
1605	FXCPMADD	f8,  B4, A5, f8
1606	LFPDUX	B2, BO2, INC4
1607	FXCSMADD	f12, B4, A5, f12
1608	nop
1609
1610	FXCPMADD	f1,  B3, A2, f1
1611	nop
1612	FXCSMADD	f5,  B3, A2, f5
1613	LFPDUX	A4, AO2, INC4
1614	FXCPMADD	f9,  B4, A2, f9
1615	nop
1616	FXCSMADD	f13, B4, A2, f13
1617	nop
1618
1619	FXCPMADD	f2,  B3, A6, f2
1620	nop
1621	FXCSMADD	f6,  B3, A6, f6
1622	nop
1623	FXCPMADD	f10, B4, A6, f10
1624	nop
1625	FXCSMADD	f14, B4, A6, f14
1626	nop
1627
1628	FXCPMADD	f3,  B3, A4, f3
1629	nop
1630	FXCSMADD	f7,  B3, A4, f7
1631	LFPDUX	A2, AO2, INC4
1632	FXCPMADD	f11, B4, A4, f11
1633	nop
1634	FXCSMADD	f15, B4, A4, f15
1635	nop
1636
1637## 3 ##
1638
1639	FXCPMADD	f0,  B5, A7, f0
1640	nop
1641	FXCSMADD	f4,  B5, A7, f4
1642	nop
1643	FXCPMADD	f8,  B2, A7, f8
1644	LFPDUX	B4, BO2, INC4
1645	FXCSMADD	f12, B2, A7, f12
1646	nop
1647
1648	FXCPMADD	f1,  B5, A2, f1
1649	nop
1650	FXCSMADD	f5,  B5, A2, f5
1651	LFPDUX	A4, AO2, INC4
1652	FXCPMADD	f9,  B2, A2, f9
1653	nop
1654	FXCSMADD	f13, B2, A2, f13
1655	nop
1656
1657	FXCPMADD	f2,  B5, A8, f2
1658	nop
1659	FXCSMADD	f6,  B5, A8, f6
1660	nop
1661	FXCPMADD	f10, B2, A8, f10
1662	nop
1663	FXCSMADD	f14, B2, A8, f14
1664	nop
1665
1666	FXCPMADD	f3,  B5, A4, f3
1667	nop
1668	FXCSMADD	f7,  B5, A4, f7
1669	LFPDUX	A2, AO2, INC4
1670	FXCPMADD	f11, B2, A4, f11
1671	nop
1672	FXCSMADD	f15, B2, A4, f15
1673	nop
1674
1675## 4 ##
1676
1677	FXCPMADD	f0,  B6, A9, f0
1678	nop
1679	FXCSMADD	f4,  B6, A9, f4
1680	nop
1681	FXCPMADD	f8,  B4, A9, f8
1682	nop
1683	FXCSMADD	f12, B4, A9, f12
1684	nop
1685
1686	FXCPMADD	f1,  B6, A2, f1
1687	nop
1688	FXCSMADD	f5,  B6, A2, f5
1689	LFPDUX	A4, AO2, INC4
1690	FXCPMADD	f9,  B4, A2, f9
1691	nop
1692	FXCSMADD	f13, B4, A2, f13
1693	nop
1694
1695	FXCPMADD	f2,  B6, A10, f2
1696	nop
1697	FXCSMADD	f6,  B6, A10, f6
1698	nop
1699	FXCPMADD	f10, B4, A10, f10
1700	nop
1701	FXCSMADD	f14, B4, A10, f14
1702	nop
1703
1704	FXCPMADD	f3,  B6, A4, f3
1705	nop
1706	FXCSMADD	f7,  B6, A4, f7
1707	nop
1708	FXCPMADD	f11, B4, A4, f11
1709	nop
1710	FXCSMADD	f15, B4, A4, f15
1711	nop
1712	.align 4
1713
1714.L14:
1715#if defined(LT) || defined(RN)
1716	andi.	r0,  KK,  3
1717	mtspr	CTR, r0
1718	ble+	.L18
1719#else
1720	andi.	r0, TEMP, 3
1721	mtspr	CTR, r0
1722	ble+	.L18
1723#endif
1724
1725.L15:
1726	LFPDUX	A2,  AO,  INC4
1727	LFPDUX	A4,  AO2, INC4
1728	LFPDUX	A10, BO,  INC4
1729	LFPDUX	B4,  BO2, INC4
1730	bdz-	.L17
1731	.align 4
1732
1733.L16:
1734	FXCPMADD	f0,  A10, A2, f0
1735	FXCSMADD	f4,  A10, A2, f4
1736	FXCPMADD	f8,  B4, A2, f8
1737	FXCSMADD	f12, B4, A2, f12
1738	LFPDUX	A2, AO,  INC4
1739
1740	FXCPMADD	f1,  A10, A4, f1
1741	FXCSMADD	f5,  A10, A4, f5
1742	FXCPMADD	f9,  B4, A4, f9
1743	FXCSMADD	f13, B4, A4, f13
1744	LFPDUX	A4, AO2, INC4
1745
1746	FXCPMADD	f2,  A10, A2, f2
1747	FXCSMADD	f6,  A10, A2, f6
1748	FXCPMADD	f10, B4, A2, f10
1749	FXCSMADD	f14, B4, A2, f14
1750	LFPDUX	A2, AO,  INC4
1751
1752	FXCPMADD	f3,  A10, A4, f3
1753	FXCSMADD	f7,  A10, A4, f7
1754	LFPDUX	A10, BO,  INC4
1755	FXCPMADD	f11, B4, A4, f11
1756	FXCSMADD	f15, B4, A4, f15
1757	LFPDUX	A4, AO2, INC4
1758	LFPDUX	B4, BO2, INC4
1759	bdnz+	.L16
1760	.align 4
1761
1762.L17:
1763	FXCPMADD	f0,  A10, A2, f0
1764	FXCSMADD	f4,  A10, A2, f4
1765	FXCPMADD	f8,  B4, A2, f8
1766	FXCSMADD	f12, B4, A2, f12
1767	LFPDUX	A2, AO,  INC4
1768
1769	FXCPMADD	f1,  A10, A4, f1
1770	FXCSMADD	f5,  A10, A4, f5
1771	FXCPMADD	f9,  B4, A4, f9
1772	FXCSMADD	f13, B4, A4, f13
1773	LFPDUX	A4, AO2, INC4
1774
1775	FXCPMADD	f2,  A10, A2, f2
1776	FXCSMADD	f6,  A10, A2, f6
1777	FXCPMADD	f10, B4, A2, f10
1778	FXCSMADD	f14, B4, A2, f14
1779
1780	FXCPMADD	f3,  A10, A4, f3
1781	FXCSMADD	f7,  A10, A4, f7
1782	FXCPMADD	f11, B4, A4, f11
1783	FXCSMADD	f15, B4, A4, f15
1784	.align 4
1785
1786.L18:
1787	fpadd	f0,  f0,  f4
1788	fpadd	f8,  f8,  f12
1789	fpadd	f1,  f1,  f5
1790	fpadd	f9,  f9,  f13
1791
1792	fpadd	f2,  f2,  f6
1793	fpadd	f10, f10, f14
1794	fpadd	f3,  f3,  f7
1795	fpadd	f11, f11, f15
1796
1797#if defined(LN) || defined(RT)
1798#ifdef LN
1799	subi	r0, KK, 4
1800#else
1801	subi	r0, KK, 2
1802#endif
1803	slwi	TEMP, r0, 2 + ZBASE_SHIFT
1804	slwi	r0,   r0, 1 + ZBASE_SHIFT
1805	add	AO, AORIG, TEMP
1806	add	BO, B,     r0
1807	addi	AO2, AO,   2 * SIZE
1808	addi	BO,  BO, - 4 * SIZE
1809	addi	BO2, BO,   2 * SIZE
1810#endif
1811
1812#if defined(LN) || defined(LT)
1813	LFPDUX	f16, BO,  INC4
1814	LFPDUX	f20, BO2, INC4
1815	LFPDUX	f17, BO,  INC4
1816	LFPDUX	f21, BO2, INC4
1817	LFPDUX	f18, BO,  INC4
1818	LFPDUX	f22, BO2, INC4
1819	LFPDUX	f19, BO,  INC4
1820	LFPDUX	f23, BO2, INC4
1821
1822	subi	BO,  BO,  16 * SIZE
1823	subi	BO2, BO2, 16 * SIZE
1824#else
1825	LFPDUX	f16, AO,  INC4
1826	LFPDUX	f17, AO2, INC4
1827	LFPDUX	f18, AO,  INC4
1828	LFPDUX	f19, AO2, INC4
1829	LFPDUX	f20, AO,  INC4
1830	LFPDUX	f21, AO2, INC4
1831	LFPDUX	f22, AO,  INC4
1832	LFPDUX	f23, AO2, INC4
1833
1834	subi	AO,  AO,  16 * SIZE
1835	subi	AO2, AO2, 16 * SIZE
1836#endif
1837
1838	fpsub	f0,  f16,  f0
1839	fpsub	f1,  f17,  f1
1840	fpsub	f2,  f18,  f2
1841	fpsub	f3,  f19,  f3
1842
1843	fpsub	f8,  f20,  f8
1844	fpsub	f9,  f21,  f9
1845	fpsub	f10, f22,  f10
1846	fpsub	f11, f23,  f11
1847
1848#ifdef LN
1849	LFPDUX	A1,  AO,  INC4
1850	add	AO2, AO2, INC4
1851	add	AO,  AO,  INC4
1852	add	AO2, AO2, INC4
1853
1854	LFPDUX	A2,  AO,  INC4
1855	LFPDUX	A3,  AO2, INC4
1856	add	AO,  AO,  INC4
1857	add	AO2, AO2, INC4
1858
1859	LFPDUX	A4,  AO,  INC4
1860	LFPDUX	A5,  AO2, INC4
1861	LFPDUX	A6,  AO,  INC4
1862	add	AO2, AO2, INC4
1863
1864	LFPDUX	A7,  AO,  INC4
1865	LFPDUX	A8,  AO2, INC4
1866	LFPDUX	A9,  AO,  INC4
1867	LFPDUX	A10, AO2, INC4
1868
1869	subi	AO,  AO,  32 * SIZE
1870	subi	AO2, AO2, 32 * SIZE
1871
1872	fxpmul	  f4,  A10, f3
1873	fxpmul	  f5,  A10, f11
1874	FXCXNPMA  f3,  A10, f3,  f4
1875	FXCXNPMA  f11, A10, f11, f5
1876
1877	fxcpnmsub f2,  A9, f3,  f2
1878	fxcpnmsub f10, A9, f11, f10
1879	FXCXNSMA  f2,  A9, f3,  f2
1880	FXCXNSMA  f10, A9, f11, f10
1881
1882	fxcpnmsub f1,  A8, f3,  f1
1883	fxcpnmsub f9,  A8, f11, f9
1884	FXCXNSMA  f1,  A8, f3,  f1
1885	FXCXNSMA  f9,  A8, f11, f9
1886
1887	fxcpnmsub f0,  A7, f3,  f0
1888	fxcpnmsub f8,  A7, f11, f8
1889	FXCXNSMA  f0,  A7, f3,  f0
1890	FXCXNSMA  f8,  A7, f11, f8
1891
1892	fxpmul	  f4,  A6, f2
1893	fxpmul	  f5,  A6, f10
1894	FXCXNPMA  f2,  A6, f2,  f4
1895	FXCXNPMA  f10, A6, f10, f5
1896
1897	fxcpnmsub f1,  A5, f2,  f1
1898	fxcpnmsub f9,  A5, f10, f9
1899	FXCXNSMA  f1,  A5, f2,  f1
1900	FXCXNSMA  f9,  A5, f10, f9
1901
1902	fxcpnmsub f0,  A4, f2,  f0
1903	fxcpnmsub f8,  A4, f10, f8
1904	FXCXNSMA  f0,  A4, f2,  f0
1905	FXCXNSMA  f8,  A4, f10, f8
1906
1907	fxpmul	  f4,  A3, f1
1908	fxpmul	  f5,  A3, f9
1909	FXCXNPMA  f1,  A3, f1,  f4
1910	FXCXNPMA  f9,  A3, f9,  f5
1911
1912	fxcpnmsub f0,  A2, f1,  f0
1913	fxcpnmsub f8,  A2, f9,  f8
1914	FXCXNSMA  f0,  A2, f1,  f0
1915	FXCXNSMA  f8,  A2, f9,  f8
1916
1917	fxpmul	  f4,  A1, f0
1918	fxpmul	  f5,  A1, f8
1919	FXCXNPMA  f0,  A1, f0,  f4
1920	FXCXNPMA  f8,  A1, f8,  f5
1921#endif
1922
1923#ifdef LT
1924	LFPDUX	A1,  AO,  INC4
1925	LFPDUX	A2,  AO2, INC4
1926	LFPDUX	A3,  AO,  INC4
1927	LFPDUX	A4,  AO2, INC4
1928
1929	add	AO,  AO,  INC4
1930	LFPDUX	A5,  AO2, INC4
1931	LFPDUX	A6,  AO,  INC4
1932	LFPDUX	A7,  AO2, INC4
1933
1934	add	AO,  AO,  INC4
1935	add	AO2, AO2, INC4
1936	LFPDUX	A8,  AO,  INC4
1937	LFPDUX	A9,  AO2, INC4
1938
1939	add	AO,  AO,  INC4
1940	add	AO2, AO2, INC4
1941	add	AO,  AO,  INC4
1942	LFPDUX	A10, AO2, INC4
1943
1944	subi	AO,  AO,  32 * SIZE
1945	subi	AO2, AO2, 32 * SIZE
1946
1947	fxpmul	  f4,  A1, f0
1948	fxpmul	  f5,  A1, f8
1949	FXCXNPMA  f0,  A1, f0, f4
1950	FXCXNPMA  f8,  A1, f8, f5
1951
1952	fxcpnmsub f1,  A2, f0, f1
1953	fxcpnmsub f9,  A2, f8, f9
1954	FXCXNSMA  f1,  A2, f0, f1
1955	FXCXNSMA  f9,  A2, f8, f9
1956
1957	fxcpnmsub f2,  A3, f0, f2
1958	fxcpnmsub f10, A3, f8, f10
1959	FXCXNSMA  f2,  A3, f0, f2
1960	FXCXNSMA  f10, A3, f8, f10
1961
1962	fxcpnmsub f3,  A4, f0, f3
1963	fxcpnmsub f11, A4, f8, f11
1964	FXCXNSMA  f3,  A4, f0, f3
1965	FXCXNSMA  f11, A4, f8, f11
1966
1967	fxpmul	  f6,  A5, f1
1968	fxpmul	  f7,  A5, f9
1969	FXCXNPMA  f1,  A5, f1, f6
1970	FXCXNPMA  f9,  A5, f9, f7
1971
1972	fxcpnmsub f2,  A6, f1, f2
1973	fxcpnmsub f10, A6, f9, f10
1974	FXCXNSMA  f2,  A6, f1, f2
1975	FXCXNSMA  f10, A6, f9, f10
1976
1977	fxcpnmsub f3,  A7, f1, f3
1978	fxcpnmsub f11, A7, f9, f11
1979	FXCXNSMA  f3,  A7, f1, f3
1980	FXCXNSMA  f11, A7, f9, f11
1981
1982	fxpmul	  f4,  A8, f2
1983	fxpmul	  f5,  A8, f10
1984	FXCXNPMA  f2,  A8, f2,  f4
1985	FXCXNPMA  f10, A8, f10, f5
1986
1987	fxcpnmsub f3,  A9, f2,  f3
1988	fxcpnmsub f11, A9, f10, f11
1989	FXCXNSMA  f3,  A9, f2,  f3
1990	FXCXNSMA  f11, A9, f10, f11
1991
1992	fxpmul	  f6,  A10, f3
1993	fxpmul	  f7,  A10, f11
1994	FXCXNPMA  f3,  A10, f3,  f6
1995	FXCXNPMA  f11, A10, f11, f7
1996#endif
1997
1998#ifdef RN
1999	LFPDUX	A1,  BO,  INC4
2000	LFPDUX	A2,  BO2, INC4
2001	add	BO,  BO,  INC4
2002	LFPDUX	A3,  BO2, INC4
2003
2004	subi	BO,  BO,   8 * SIZE
2005	subi	BO2, BO2,  8 * SIZE
2006
2007	fxpmul	  f4,  A1, f0
2008	fxpmul	  f5,  A1, f1
2009	fxpmul	  f6,  A1, f2
2010	fxpmul	  f7,  A1, f3
2011
2012	FXCXNPMA  f0,  A1, f0, f4
2013	FXCXNPMA  f1,  A1, f1, f5
2014	FXCXNPMA  f2,  A1, f2, f6
2015	FXCXNPMA  f3,  A1, f3, f7
2016
2017	fxcpnmsub f8,  A2, f0, f8
2018	fxcpnmsub f9,  A2, f1, f9
2019	fxcpnmsub f10, A2, f2, f10
2020	fxcpnmsub f11, A2, f3, f11
2021
2022	FXCXNSMA  f8,  A2, f0, f8
2023	FXCXNSMA  f9,  A2, f1, f9
2024	FXCXNSMA  f10, A2, f2, f10
2025	FXCXNSMA  f11, A2, f3, f11
2026
2027	fxpmul	  f4,  A3, f8
2028	fxpmul	  f5,  A3, f9
2029	fxpmul	  f6,  A3, f10
2030	fxpmul	  f7,  A3, f11
2031
2032	FXCXNPMA  f8,  A3, f8,  f4
2033	FXCXNPMA  f9,  A3, f9,  f5
2034	FXCXNPMA  f10, A3, f10, f6
2035	FXCXNPMA  f11, A3, f11, f7
2036#endif
2037
2038#ifdef RT
2039	LFPDUX	A1,  BO,  INC4
2040	add	BO2, BO2, INC4
2041	LFPDUX	A2,  BO,  INC4
2042	LFPDUX	A3,  BO2, INC4
2043
2044	subi	BO,  BO,   8 * SIZE
2045	subi	BO2, BO2,  8 * SIZE
2046
2047	fxpmul	  f4,  A3, f8
2048	fxpmul	  f5,  A3, f9
2049	fxpmul	  f6,  A3, f10
2050	fxpmul	  f7,  A3, f11
2051
2052	FXCXNPMA  f8,  A3, f8,  f4
2053	FXCXNPMA  f9,  A3, f9,  f5
2054	FXCXNPMA  f10, A3, f10, f6
2055	FXCXNPMA  f11, A3, f11, f7
2056
2057	fxcpnmsub f0,  A2, f8,  f0
2058	fxcpnmsub f1,  A2, f9,  f1
2059	fxcpnmsub f2,  A2, f10, f2
2060	fxcpnmsub f3,  A2, f11, f3
2061
2062	FXCXNSMA  f0,  A2, f8,  f0
2063	FXCXNSMA  f1,  A2, f9,  f1
2064	FXCXNSMA  f2,  A2, f10, f2
2065	FXCXNSMA  f3,  A2, f11, f3
2066
2067	fxpmul	  f4,  A1, f0
2068	fxpmul	  f5,  A1, f1
2069	fxpmul	  f6,  A1, f2
2070	fxpmul	  f7,  A1, f3
2071
2072	FXCXNPMA  f0,  A1, f0,  f4
2073	FXCXNPMA  f1,  A1, f1,  f5
2074	FXCXNPMA  f2,  A1, f2,  f6
2075	FXCXNPMA  f3,  A1, f3,  f7
2076#endif
2077
2078#ifdef LN
2079	subi	CO1, CO1, 8 * SIZE
2080	subi	CO2, CO2, 8 * SIZE
2081#endif
2082
2083#if defined(LN) || defined(LT)
2084	STFPDUX	f0,  BO,  INC4
2085	STFPDUX	f8,  BO2, INC4
2086	STFPDUX	f1,  BO,  INC4
2087	STFPDUX	f9,  BO2, INC4
2088	STFPDUX	f2,  BO,  INC4
2089	STFPDUX	f10, BO2, INC4
2090	STFPDUX	f3,  BO,  INC4
2091	STFPDUX	f11, BO2, INC4
2092
2093	subi	BO,  BO,  16 * SIZE
2094	subi	BO2, BO2, 16 * SIZE
2095#else
2096	STFPDUX	f0,  AO,  INC4
2097	STFPDUX	f1,  AO2, INC4
2098	STFPDUX	f2,  AO,  INC4
2099	STFPDUX	f3,  AO2, INC4
2100	STFPDUX	f8,  AO,  INC4
2101	STFPDUX	f9,  AO2, INC4
2102	STFPDUX	f10, AO,  INC4
2103	STFPDUX	f11, AO2, INC4
2104
2105	subi	AO,  AO,  16 * SIZE
2106	subi	AO2, AO2, 16 * SIZE
2107#endif
2108
2109	STFDUX	f0,  CO1, INC
2110	STFSDUX	f0,  CO1, INC
2111	STFDUX	f1,  CO1, INC
2112	STFSDUX	f1,  CO1, INC
2113	STFDUX	f2,  CO1, INC
2114	STFSDUX	f2,  CO1, INC
2115	STFDUX	f3,  CO1, INC
2116	STFSDUX	f3,  CO1, INC
2117
2118	STFDUX	f8,  CO2, INC
2119	STFSDUX	f8,  CO2, INC
2120	STFDUX	f9,  CO2, INC
2121	STFSDUX	f9,  CO2, INC
2122	STFDUX	f10, CO2, INC
2123	STFSDUX	f10, CO2, INC
2124	STFDUX	f11, CO2, INC
2125	STFSDUX	f11, CO2, INC
2126
2127#ifdef LN
2128	subi	CO1, CO1, 8 * SIZE
2129	subi	CO2, CO2, 8 * SIZE
2130#endif
2131
2132#ifdef RT
2133	slwi	r0, K, 2 + ZBASE_SHIFT
2134	add	AORIG, AORIG, r0
2135#endif
2136
2137#if defined(LT) || defined(RN)
2138	sub	TEMP, K, KK
2139	slwi	r0,   TEMP, 2 + ZBASE_SHIFT
2140	slwi	TEMP, TEMP, 1 + ZBASE_SHIFT
2141	add	AO, AO, r0
2142	add	BO, BO, TEMP
2143#endif
2144
2145#ifdef LT
2146	addi	KK, KK, 4
2147#endif
2148
2149#ifdef LN
2150	subi	KK, KK, 4
2151#endif
2152
2153	addic.	I, I, -1
2154	li	r0, FZERO
2155
2156	lfpsx	f0, SP, r0
2157	bgt+	.L11
2158	.align 4
2159
2160.L20:
2161	andi.	I, M,  2
2162	beq	.L30
2163
2164#if defined(LT) || defined(RN)
2165	addi	AO2, AO,   2 * SIZE
2166	fpmr	f4,  f0
2167	addi	BO,  B,  - 4 * SIZE
2168	fpmr	f8,  f0
2169	addi	BO2, B,  - 2 * SIZE
2170	fpmr	f12, f0
2171
2172	srawi.	r0,  KK,  2
2173 	fpmr	f1,  f0
2174	fpmr	f5,  f0
2175	fpmr	f9,  f0
2176	mtspr	CTR, r0
2177	fpmr	f13, f0
2178	ble	.L24
2179#else
2180#ifdef LN
2181	slwi	r0,   K,  1 + ZBASE_SHIFT
2182	sub	AORIG, AORIG, r0
2183#endif
2184
2185	slwi	r0  , KK, 1 + ZBASE_SHIFT
2186	add	AO, AORIG, r0
2187	add	BO, B,     r0
2188
2189	sub	TEMP, K, KK
2190
2191	addi	AO2, AO,   2 * SIZE
2192	fpmr	f4,  f0
2193	addi	BO,  BO,  - 4 * SIZE
2194	fpmr	f8,  f0
2195	addi	BO2, BO,    2 * SIZE
2196	fpmr	f12, f0
2197
2198 	fpmr	f1,  f0
2199	fpmr	f5,  f0
2200	fpmr	f9,  f0
2201	fpmr	f13, f0
2202	srawi.	r0,  TEMP,  2
2203	mtspr	CTR, r0
2204	ble	.L24
2205#endif
2206
2207	LFPDUX	A1,   AO, INC4
2208	LFPDUX	B1,   BO, INC4
2209	LFPDUX	A2,  AO2, INC4
2210	LFPDUX	B2,  BO2, INC4
2211	LFPDUX	A3,   AO, INC4
2212	LFPDUX	B3,   BO, INC4
2213	LFPDUX	A4,  AO2, INC4
2214	LFPDUX	B4,  BO2, INC4
2215
2216	LFPDUX	A5,   AO, INC4
2217	LFPDUX	B5,   BO, INC4
2218	LFPDUX	A6,  AO2, INC4
2219	LFPDUX	B6,  BO2, INC4
2220	LFPDUX	A7,   AO, INC4
2221	LFPDUX	A9,   BO, INC4
2222	LFPDUX	A10, BO2, INC4
2223	bdz-	.L23
2224	.align 4
2225
2226.L22:
2227	FXCPMADD	f0,  B1, A1, f0
2228	nop
2229	FXCSMADD	f4,  B1, A1, f4
2230	LFPDUX	A8,  AO2, INC4
2231	FXCPMADD	f8,  B2, A1, f8
2232	nop
2233	FXCSMADD	f12, B2, A1, f12
2234	LFPDUX	A1,   AO, INC4
2235
2236	FXCPMADD	f1,  B1, A2, f1
2237	nop
2238	FXCSMADD	f5,  B1, A2, f5
2239	LFPDUX	B1,   BO, INC4
2240	FXCPMADD	f9,  B2, A2, f9
2241	nop
2242	FXCSMADD	f13, B2, A2, f13
2243	LFPDUX	B2,  BO2, INC4
2244
2245	FXCPMADD	f0,  B3, A3, f0
2246	nop
2247	FXCSMADD	f4,  B3, A3, f4
2248	LFPDUX	A2,  AO2, INC4
2249	FXCPMADD	f8,  B4, A3, f8
2250	nop
2251	FXCSMADD	f12, B4, A3, f12
2252	LFPDUX	A3,   AO, INC4
2253
2254	FXCPMADD	f1,  B3, A4, f1
2255	nop
2256	FXCSMADD	f5,  B3, A4, f5
2257	LFPDUX	B3,   BO, INC4
2258	FXCPMADD	f9,  B4, A4, f9
2259	nop
2260	FXCSMADD	f13, B4, A4, f13
2261	LFPDUX	B4,  BO2, INC4
2262
2263	FXCPMADD	f0,  B5, A5, f0
2264	nop
2265	FXCSMADD	f4,  B5, A5, f4
2266	LFPDUX	A4,  AO2, INC4
2267	FXCPMADD	f8,  B6, A5, f8
2268	nop
2269	FXCSMADD	f12, B6, A5, f12
2270	LFPDUX	A5,   AO, INC4
2271
2272	FXCPMADD	f1,  B5, A6, f1
2273	nop
2274	FXCSMADD	f5,  B5, A6, f5
2275	LFPDUX	B5,   BO, INC4
2276	FXCPMADD	f9,  B6, A6, f9
2277	nop
2278	FXCSMADD	f13, B6, A6, f13
2279	LFPDUX	B6,  BO2, INC4
2280
2281	FXCPMADD	f0,  A9,  A7, f0
2282	nop
2283	FXCSMADD	f4,  A9,  A7, f4
2284	LFPDUX	A6,  AO2, INC4
2285	FXCPMADD	f8,  A10, A7, f8
2286	nop
2287	FXCSMADD	f12, A10, A7, f12
2288	LFPDUX	A7,   AO, INC4
2289
2290	FXCPMADD	f1,  A9,  A8, f1
2291	nop
2292	FXCSMADD	f5,  A9,  A8, f5
2293	LFPDUX	A9,   BO, INC4
2294	FXCPMADD	f9,  A10, A8, f9
2295	nop
2296	FXCSMADD	f13, A10, A8, f13
2297	LFPDUX	A10, BO2, INC4
2298	bdnz+	.L22
2299	.align 4
2300
2301.L23:
2302	FXCPMADD	f0,  B1, A1, f0
2303	FXCSMADD	f4,  B1, A1, f4
2304	LFPDUX	A8,  AO2, INC4
2305	FXCPMADD	f8,  B2, A1, f8
2306	FXCSMADD	f12, B2, A1, f12
2307
2308	FXCPMADD	f1,  B1, A2, f1
2309	FXCSMADD	f5,  B1, A2, f5
2310	FXCPMADD	f9,  B2, A2, f9
2311	FXCSMADD	f13, B2, A2, f13
2312
2313	FXCPMADD	f0,  B3, A3, f0
2314	FXCSMADD	f4,  B3, A3, f4
2315	FXCPMADD	f8,  B4, A3, f8
2316	FXCSMADD	f12, B4, A3, f12
2317
2318	FXCPMADD	f1,  B3, A4, f1
2319	FXCSMADD	f5,  B3, A4, f5
2320	FXCPMADD	f9,  B4, A4, f9
2321	FXCSMADD	f13, B4, A4, f13
2322
2323	FXCPMADD	f0,  B5, A5, f0
2324	FXCSMADD	f4,  B5, A5, f4
2325	FXCPMADD	f8,  B6, A5, f8
2326	FXCSMADD	f12, B6, A5, f12
2327
2328	FXCPMADD	f1,  B5, A6, f1
2329	FXCSMADD	f5,  B5, A6, f5
2330	FXCPMADD	f9,  B6, A6, f9
2331	FXCSMADD	f13, B6, A6, f13
2332
2333	FXCPMADD	f0,  A9, A7, f0
2334	FXCSMADD	f4,  A9, A7, f4
2335	FXCPMADD	f8,  A10, A7, f8
2336	FXCSMADD	f12, A10, A7, f12
2337
2338	FXCPMADD	f1,  A9, A8, f1
2339	FXCSMADD	f5,  A9, A8, f5
2340	FXCPMADD	f9,  A10, A8, f9
2341	FXCSMADD	f13, A10, A8, f13
2342	.align 4
2343
2344.L24:
2345#if defined(LT) || defined(RN)
2346	andi.	r0,  KK,  3
2347	mtspr	CTR, r0
2348	ble+	.L28
2349#else
2350	andi.	r0, TEMP, 3
2351	mtspr	CTR, r0
2352	ble+	.L28
2353#endif
2354
2355	LFPDUX	A1,  AO,  INC4
2356	LFPDUX	A2,  AO2, INC4
2357	LFPDUX	B1,  BO,  INC4
2358	LFPDUX	B2,  BO2, INC4
2359	bdz-	.L27
2360	.align 4
2361
2362.L26:
2363	FXCPMADD	f0,  B1, A1, f0
2364	FXCSMADD	f4,  B1, A1, f4
2365	FXCPMADD	f8,  B2, A1, f8
2366	FXCSMADD	f12, B2, A1, f12
2367	LFPDUX	A1,  AO,  INC4
2368
2369	FXCPMADD	f1,  B1, A2, f1
2370	FXCSMADD	f5,  B1, A2, f5
2371	LFPDUX	B1,  BO,  INC4
2372	FXCPMADD	f9,  B2, A2, f9
2373	FXCSMADD	f13, B2, A2, f13
2374	LFPDUX	A2,  AO2, INC4
2375	LFPDUX	B2,  BO2, INC4
2376	bdnz+	.L26
2377	.align 4
2378
2379.L27:
2380	FXCPMADD	f0,  B1, A1, f0
2381	FXCSMADD	f4,  B1, A1, f4
2382	FXCPMADD	f8,  B2, A1, f8
2383	FXCSMADD	f12, B2, A1, f12
2384
2385	FXCPMADD	f1,  B1, A2, f1
2386	FXCSMADD	f5,  B1, A2, f5
2387	FXCPMADD	f9,  B2, A2, f9
2388	FXCSMADD	f13, B2, A2, f13
2389	.align 4
2390
2391.L28:
2392	fpadd	f0, f0, f4
2393	fpadd	f8, f8, f12
2394	fpadd	f1, f1, f5
2395	fpadd	f9, f9, f13
2396
2397#if defined(LN) || defined(RT)
2398#ifdef LN
2399	subi	r0, KK, 2
2400#else
2401	subi	r0, KK, 2
2402#endif
2403	slwi	r0,   r0, 1 + ZBASE_SHIFT
2404	add	AO, AORIG, r0
2405	add	BO, B,     r0
2406	addi	AO2, AO,   2 * SIZE
2407	addi	BO,  BO, - 4 * SIZE
2408	addi	BO2, BO,   2 * SIZE
2409#endif
2410
2411#if defined(LN) || defined(LT)
2412	LFPDUX	f16, BO,  INC4
2413	LFPDUX	f18, BO2, INC4
2414	LFPDUX	f17, BO,  INC4
2415	LFPDUX	f19, BO2, INC4
2416
2417	subi	BO,  BO,   8 * SIZE
2418	subi	BO2, BO2,  8 * SIZE
2419#else
2420	LFPDUX	f16, AO,  INC4
2421	LFPDUX	f17, AO2, INC4
2422	LFPDUX	f18, AO,  INC4
2423	LFPDUX	f19, AO2, INC4
2424
2425	subi	AO,  AO,   8 * SIZE
2426	subi	AO2, AO2,  8 * SIZE
2427#endif
2428
2429	fpsub	f0,  f16,  f0
2430	fpsub	f1,  f17,  f1
2431	fpsub	f8,  f18,  f8
2432	fpsub	f9,  f19,  f9
2433
2434#ifdef LN
2435	LFPDUX	A1,  AO,  INC4
2436	add	AO2, AO2, INC4
2437	LFPDUX	A2,  AO,  INC4
2438	LFPDUX	A3,  AO2, INC4
2439
2440	subi	AO,  AO,   8 * SIZE
2441	subi	AO2, AO2,  8 * SIZE
2442
2443	fxpmul	  f4,  A3, f1
2444	fxpmul	  f5,  A3, f9
2445	FXCXNPMA  f1,  A3, f1,  f4
2446	FXCXNPMA  f9,  A3, f9,  f5
2447
2448	fxcpnmsub f0,  A2, f1,  f0
2449	fxcpnmsub f8,  A2, f9,  f8
2450	FXCXNSMA  f0,  A2, f1,  f0
2451	FXCXNSMA  f8,  A2, f9,  f8
2452
2453	fxpmul	  f4,  A1, f0
2454	fxpmul	  f5,  A1, f8
2455	FXCXNPMA  f0,  A1, f0,  f4
2456	FXCXNPMA  f8,  A1, f8,  f5
2457#endif
2458
2459#ifdef LT
2460	LFPDUX	A1,  AO,  INC4
2461	LFPDUX	A2,  AO2, INC4
2462	add	AO,  AO,  INC4
2463	LFPDUX	A3,  AO2, INC4
2464
2465	subi	AO,  AO,   8 * SIZE
2466	subi	AO2, AO2,  8 * SIZE
2467
2468	fxpmul	  f4,  A1, f0
2469	fxpmul	  f5,  A1, f8
2470	FXCXNPMA  f0,  A1, f0, f4
2471	FXCXNPMA  f8,  A1, f8, f5
2472
2473	fxcpnmsub f1,  A2, f0, f1
2474	fxcpnmsub f9,  A2, f8, f9
2475	FXCXNSMA  f1,  A2, f0, f1
2476	FXCXNSMA  f9,  A2, f8, f9
2477
2478	fxpmul	  f6,  A3, f1
2479	fxpmul	  f7,  A3, f9
2480	FXCXNPMA  f1,  A3, f1, f6
2481	FXCXNPMA  f9,  A3, f9, f7
2482#endif
2483
2484#ifdef RN
2485	LFPDUX	A1,  BO,  INC4
2486	LFPDUX	A2,  BO2, INC4
2487	add	BO,  BO,  INC4
2488	LFPDUX	A3,  BO2, INC4
2489
2490	subi	BO,  BO,   8 * SIZE
2491	subi	BO2, BO2,  8 * SIZE
2492
2493	fxpmul	  f4,  A1, f0
2494	fxpmul	  f5,  A1, f1
2495
2496	FXCXNPMA  f0,  A1, f0, f4
2497	FXCXNPMA  f1,  A1, f1, f5
2498
2499	fxcpnmsub f8,  A2, f0, f8
2500	fxcpnmsub f9,  A2, f1, f9
2501
2502	FXCXNSMA  f8,  A2, f0, f8
2503	FXCXNSMA  f9,  A2, f1, f9
2504
2505	fxpmul	  f4,  A3, f8
2506	fxpmul	  f5,  A3, f9
2507
2508	FXCXNPMA  f8,  A3, f8,  f4
2509	FXCXNPMA  f9,  A3, f9,  f5
2510#endif
2511
2512#ifdef RT
2513	LFPDUX	A1,  BO,  INC4
2514	add	BO2, BO2, INC4
2515	LFPDUX	A2,  BO,  INC4
2516	LFPDUX	A3,  BO2, INC4
2517
2518	subi	BO,  BO,   8 * SIZE
2519	subi	BO2, BO2,  8 * SIZE
2520
2521	fxpmul	  f4,  A3, f8
2522	fxpmul	  f5,  A3, f9
2523
2524	FXCXNPMA  f8,  A3, f8,  f4
2525	FXCXNPMA  f9,  A3, f9,  f5
2526
2527	fxcpnmsub f0,  A2, f8,  f0
2528	fxcpnmsub f1,  A2, f9,  f1
2529
2530	FXCXNSMA  f0,  A2, f8,  f0
2531	FXCXNSMA  f1,  A2, f9,  f1
2532
2533	fxpmul	  f4,  A1, f0
2534	fxpmul	  f5,  A1, f1
2535
2536	FXCXNPMA  f0,  A1, f0,  f4
2537	FXCXNPMA  f1,  A1, f1,  f5
2538#endif
2539
2540#ifdef LN
2541	subi	CO1, CO1, 4 * SIZE
2542	subi	CO2, CO2, 4 * SIZE
2543#endif
2544
2545#if defined(LN) || defined(LT)
2546	STFPDUX	f0,  BO,  INC4
2547	STFPDUX	f8,  BO2, INC4
2548	STFPDUX	f1,  BO,  INC4
2549	STFPDUX	f9,  BO2, INC4
2550
2551	subi	BO,  BO,   8 * SIZE
2552	subi	BO2, BO2,  8 * SIZE
2553#else
2554	STFPDUX	f0,  AO,  INC4
2555	STFPDUX	f1,  AO2, INC4
2556	STFPDUX	f8,  AO,  INC4
2557	STFPDUX	f9,  AO2, INC4
2558
2559	subi	AO,  AO,   8 * SIZE
2560	subi	AO2, AO2,  8 * SIZE
2561#endif
2562
2563	STFDUX	f0,  CO1, INC
2564	STFSDUX	f0,  CO1, INC
2565	STFDUX	f1,  CO1, INC
2566	STFSDUX	f1,  CO1, INC
2567
2568	STFDUX	f8,  CO2, INC
2569	STFSDUX	f8,  CO2, INC
2570	STFDUX	f9,  CO2, INC
2571	STFSDUX	f9,  CO2, INC
2572
2573#ifdef LN
2574	subi	CO1, CO1, 4 * SIZE
2575	subi	CO2, CO2, 4 * SIZE
2576#endif
2577
2578#ifdef RT
2579	slwi	r0, K, 1 + ZBASE_SHIFT
2580	add	AORIG, AORIG, r0
2581#endif
2582
2583#if defined(LT) || defined(RN)
2584	sub	TEMP, K, KK
2585	slwi	r0,   TEMP, 1 + ZBASE_SHIFT
2586	add	AO, AO, r0
2587	add	BO, BO, r0
2588#endif
2589
2590#ifdef LT
2591	addi	KK, KK, 2
2592#endif
2593
2594#ifdef LN
2595	subi	KK, KK, 2
2596#endif
2597
2598	li	r0, FZERO
2599	lfpsx	f0, SP, r0
2600	.align 4
2601
2602.L30:
2603	andi.	I, M,  1
2604	beq	.L49
2605
2606#if defined(LT) || defined(RN)
2607	addi	AO2, AO,   2 * SIZE
2608	fpmr	f1,  f0
2609	addi	BO,  B,  - 4 * SIZE
2610	fpmr	f2,  f0
2611	addi	BO2, B,  - 2 * SIZE
2612	fpmr	f3, f0
2613
2614	srawi.	r0,  KK,  2
2615	mtspr	CTR, r0
2616	ble	.L34
2617#else
2618#ifdef LN
2619	slwi	r0,   K,  0 + ZBASE_SHIFT
2620	sub	AORIG, AORIG, r0
2621#endif
2622
2623	slwi	r0  , KK, 0 + ZBASE_SHIFT
2624	slwi	TEMP, KK, 1 + ZBASE_SHIFT
2625	add	AO, AORIG, r0
2626	add	BO, B,     TEMP
2627
2628	sub	TEMP, K, KK
2629
2630	addi	AO2, AO,   2 * SIZE
2631	fpmr	f1,  f0
2632	addi	BO,  BO,  - 4 * SIZE
2633	fpmr	f2,  f0
2634	addi	BO2, BO,    2 * SIZE
2635	fpmr	f3, f0
2636
2637	srawi.	r0,  TEMP,  2
2638	mtspr	CTR, r0
2639	ble	.L34
2640#endif
2641
2642	LFPDUX	A1,  AO, INC4
2643	LFPDUX	B1,  BO, INC4
2644	LFPDUX	B2, BO2, INC4
2645	LFPDUX	A2, AO2, INC4
2646	LFPDUX	B3,  BO, INC4
2647	LFPDUX	B4, BO2, INC4
2648
2649	LFPDUX	A3,  AO, INC4
2650	LFPDUX	A5,  BO, INC4
2651	LFPDUX	A6, BO2, INC4
2652	LFPDUX	A4, AO2, INC4
2653	LFPDUX	A7,  BO, INC4
2654	LFPDUX	A8, BO2, INC4
2655	bdz-	.L33
2656	.align 4
2657
2658.L32:
2659	FXCPMADD	f0,  B1, A1, f0
2660	FXCSMADD	f1,  B1, A1, f1
2661	LFPDUX	B1,  BO, INC4
2662	FXCPMADD	f2,  B2, A1, f2
2663	FXCSMADD	f3,  B2, A1, f3
2664	LFPDUX	B2, BO2, INC4
2665	LFPDUX	A1,  AO, INC4
2666
2667	FXCPMADD	f0,  B3, A2, f0
2668	FXCSMADD	f1,  B3, A2, f1
2669	LFPDUX	B3,  BO, INC4
2670	FXCPMADD	f2,  B4, A2, f2
2671	FXCSMADD	f3,  B4, A2, f3
2672	LFPDUX	B4, BO2, INC4
2673	LFPDUX	A2, AO2, INC4
2674
2675	FXCPMADD	f0,  A5, A3, f0
2676	FXCSMADD	f1,  A5, A3, f1
2677	LFPDUX	A5,  BO, INC4
2678	FXCPMADD	f2,  A6, A3, f2
2679	FXCSMADD	f3,  A6, A3, f3
2680	LFPDUX	A6, BO2, INC4
2681	LFPDUX	A3,  AO, INC4
2682
2683	FXCPMADD	f0,  A7, A4, f0
2684	FXCSMADD	f1,  A7, A4, f1
2685	LFPDUX	A7,  BO, INC4
2686	FXCPMADD	f2,  A8, A4, f2
2687	FXCSMADD	f3,  A8, A4, f3
2688	LFPDUX	A8, BO2, INC4
2689	LFPDUX	A4, AO2, INC4
2690	bdnz+	.L32
2691	.align 4
2692
2693.L33:
2694	FXCPMADD	f0,  B1, A1, f0
2695	FXCSMADD	f1,  B1, A1, f1
2696	FXCPMADD	f2,  B2, A1, f2
2697	FXCSMADD	f3,  B2, A1, f3
2698
2699	FXCPMADD	f0,  B3, A2, f0
2700	FXCSMADD	f1,  B3, A2, f1
2701	FXCPMADD	f2,  B4, A2, f2
2702	FXCSMADD	f3,  B4, A2, f3
2703
2704	FXCPMADD	f0,  A5, A3, f0
2705	FXCSMADD	f1,  A5, A3, f1
2706	FXCPMADD	f2,  A6, A3, f2
2707	FXCSMADD	f3,  A6, A3, f3
2708
2709	FXCPMADD	f0,  A7, A4, f0
2710	FXCSMADD	f1,  A7, A4, f1
2711	FXCPMADD	f2,  A8, A4, f2
2712	FXCSMADD	f3,  A8, A4, f3
2713	.align 4
2714
2715.L34:
2716#if defined(LT) || defined(RN)
2717	andi.	r0,  KK,  3
2718	mtspr	CTR, r0
2719	ble+	.L38
2720#else
2721	andi.	r0, TEMP, 3
2722	mtspr	CTR, r0
2723	ble+	.L38
2724#endif
2725
2726	LFPDX	A1,  AO,  INC4
2727	LFPDUX	B1,  BO,  INC4
2728	LFPDUX	B2,  BO2, INC4
2729	add	AO, AO, INC2
2730	bdz-	.L37
2731	.align 4
2732
2733.L36:
2734	FXCPMADD	f0,  B1, A1, f0
2735	FXCSMADD	f1,  B1, A1, f1
2736	LFPDUX	B1,  BO,  INC4
2737	FXCPMADD	f2,  B2, A1, f2
2738	FXCSMADD	f3,  B2, A1, f3
2739	LFPDX	A1,  AO,  INC4
2740	LFPDUX	B2,  BO2, INC4
2741	add	AO, AO, INC2
2742	bdnz+	.L36
2743	.align 4
2744
2745.L37:
2746	FXCPMADD	f0,  B1, A1, f0
2747	FXCSMADD	f1,  B1, A1, f1
2748	FXCPMADD	f2,  B2, A1, f2
2749	FXCSMADD	f3,  B2, A1, f3
2750	.align 4
2751
2752.L38:
2753	fpadd	f0, f0, f1
2754	fpadd	f2, f2, f3
2755
2756#if defined(LN) || defined(RT)
2757#ifdef LN
2758	subi	r0, KK, 1
2759#else
2760	subi	r0, KK, 2
2761#endif
2762	slwi	TEMP, r0, 0 + ZBASE_SHIFT
2763	slwi	r0,   r0, 1 + ZBASE_SHIFT
2764	add	AO, AORIG, TEMP
2765	add	BO, B,     r0
2766	addi	BO,  BO, - 4 * SIZE
2767#endif
2768
2769	addi	AO2, AO,   2 * SIZE
2770	addi	BO2, BO,   2 * SIZE
2771
2772#if defined(LN) || defined(LT)
2773	LFPDX	f16, BO,  INC4
2774	LFPDX	f17, BO2, INC4
2775#else
2776	LFPDX	f16, AO,  INC4
2777	LFPDX	f17, AO2, INC4
2778#endif
2779
2780	fpsub	f0,  f16,  f0
2781	fpsub	f2,  f17,  f2
2782
2783#ifdef LN
2784	LFPDX	A1,  AO,  INC4
2785
2786	fxpmul	  f4,  A1, f0
2787	fxpmul	  f5,  A1, f2
2788	FXCXNPMA  f0,  A1, f0,  f4
2789	FXCXNPMA  f2,  A1, f2,  f5
2790#endif
2791
2792#ifdef LT
2793	LFPDX	A1,  AO,  INC4
2794
2795	fxpmul	  f4,  A1, f0
2796	fxpmul	  f5,  A1, f2
2797	FXCXNPMA  f0,  A1, f0, f4
2798	FXCXNPMA  f2,  A1, f2, f5
2799#endif
2800
2801#ifdef RN
2802	LFPDUX	A1,  BO,  INC4
2803	LFPDUX	A2,  BO2, INC4
2804	add	BO,  BO,  INC4
2805	LFPDUX	A3,  BO2, INC4
2806
2807	subi	BO,  BO,   8 * SIZE
2808	subi	BO2, BO2,  8 * SIZE
2809
2810	fxpmul	  f4,  A1, f0
2811	FXCXNPMA  f0,  A1, f0, f4
2812
2813	fxcpnmsub f2,  A2, f0, f2
2814	FXCXNSMA  f2,  A2, f0, f2
2815
2816	fxpmul	  f4,  A3, f2
2817	FXCXNPMA  f2,  A3, f2,  f4
2818#endif
2819
2820#ifdef RT
2821	LFPDUX	A1,  BO,  INC4
2822	add	BO2, BO2, INC4
2823	LFPDUX	A2,  BO,  INC4
2824	LFPDUX	A3,  BO2, INC4
2825
2826	subi	BO,  BO,   8 * SIZE
2827	subi	BO2, BO2,  8 * SIZE
2828
2829	fxpmul	  f4,  A3, f2
2830	FXCXNPMA  f2,  A3, f2,  f4
2831
2832	fxcpnmsub f0,  A2, f2,  f0
2833	FXCXNSMA  f0,  A2, f2,  f0
2834
2835	fxpmul	  f4,  A1, f0
2836	FXCXNPMA  f0,  A1, f0,  f4
2837#endif
2838
2839#ifdef LN
2840	subi	CO1, CO1, 2 * SIZE
2841	subi	CO2, CO2, 2 * SIZE
2842#endif
2843
2844#if defined(LN) || defined(LT)
2845	STFPDX	f0,  BO,  INC4
2846	STFPDX	f2,  BO2, INC4
2847#else
2848	STFPDX	f0,  AO,  INC4
2849	STFPDX	f2,  AO2, INC4
2850#endif
2851
2852	STFDUX	f0,  CO1, INC
2853	STFSDUX	f0,  CO1, INC
2854	STFDUX	f2,  CO2, INC
2855	STFSDUX	f2,  CO2, INC
2856
2857#ifdef LN
2858	subi	CO1, CO1, 2 * SIZE
2859	subi	CO2, CO2, 2 * SIZE
2860#endif
2861
2862#ifdef RT
2863	slwi	r0, K, 0 + ZBASE_SHIFT
2864	add	AORIG, AORIG, r0
2865#endif
2866
2867#if defined(LT) || defined(RN)
2868	sub	TEMP, K, KK
2869	slwi	r0,   TEMP, 0 + ZBASE_SHIFT
2870	slwi	TEMP, TEMP, 1 + ZBASE_SHIFT
2871	add	AO, AO, r0
2872	add	BO, BO, TEMP
2873#endif
2874
2875#ifdef LT
2876	addi	KK, KK, 1
2877#endif
2878
2879#ifdef LN
2880	subi	KK, KK, 1
2881#endif
2882
2883	li	r0, FZERO
2884	lfpsx	f0, SP, r0
2885	.align 4
2886
2887.L49:
2888#ifdef LN
2889	slwi	r0, K, 1 + ZBASE_SHIFT
2890	add	B, B, r0
2891#endif
2892
2893#if defined(LT) || defined(RN)
2894	addi	B,  BO, 4 * SIZE
2895#endif
2896
2897#ifdef RN
2898	addi	KK, KK, 2
2899#endif
2900
2901#ifdef RT
2902	subi	KK, KK, 2
2903#endif
2904
2905	addic.	J, J, -1
2906	bgt+	.L10
2907	.align 4
2908
2909.L999:
2910	addi	SP, SP, 20
2911
2912	lwzu	r14,   4(SP)
2913	lwzu	r15,   4(SP)
2914
2915	lwzu	r16,   4(SP)
2916	lwzu	r17,   4(SP)
2917	lwzu	r18,   4(SP)
2918	lwzu	r19,   4(SP)
2919
2920	lwzu	r20,   4(SP)
2921	lwzu	r21,   4(SP)
2922	lwzu	r22,   4(SP)
2923	lwzu	r23,   4(SP)
2924
2925	lwzu	r24,   4(SP)
2926	lwzu	r25,   4(SP)
2927	lwzu	r26,   4(SP)
2928	lwzu	r27,   4(SP)
2929
2930	lwzu	r28,   4(SP)
2931	lwzu	r29,   4(SP)
2932	lwzu	r30,   4(SP)
2933	lwzu	r31,   4(SP)
2934
2935	subi	SP, SP, 12
2936	li	r0, 16
2937
2938	lfpdux	f31, SP, r0
2939	lfpdux	f30, SP, r0
2940	lfpdux	f29, SP, r0
2941	lfpdux	f28, SP, r0
2942	lfpdux	f27, SP, r0
2943	lfpdux	f26, SP, r0
2944	lfpdux	f25, SP, r0
2945	lfpdux	f24, SP, r0
2946	lfpdux	f23, SP, r0
2947	lfpdux	f22, SP, r0
2948	lfpdux	f21, SP, r0
2949	lfpdux	f20, SP, r0
2950	lfpdux	f19, SP, r0
2951	lfpdux	f18, SP, r0
2952	lfpdux	f17, SP, r0
2953	lfpdux	f16, SP, r0
2954	lfpdux	f15, SP, r0
2955	lfpdux	f14, SP, r0
2956	addi	SP, SP, 16
2957	blr
2958	.align 4
2959
2960
2961	EPILOGUE
2962#endif
2963