1/*********************************************************************/
2/* Copyright 2009, 2010 The University of Texas at Austin.           */
3/* All rights reserved.                                              */
4/*                                                                   */
5/* Redistribution and use in source and binary forms, with or        */
6/* without modification, are permitted provided that the following   */
7/* conditions are met:                                               */
8/*                                                                   */
9/*   1. Redistributions of source code must retain the above         */
10/*      copyright notice, this list of conditions and the following  */
11/*      disclaimer.                                                  */
12/*                                                                   */
13/*   2. Redistributions in binary form must reproduce the above      */
14/*      copyright notice, this list of conditions and the following  */
15/*      disclaimer in the documentation and/or other materials       */
16/*      provided with the distribution.                              */
17/*                                                                   */
18/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
19/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
20/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
21/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
22/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
23/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
24/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
25/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
26/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
27/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
28/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
29/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
30/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
31/*    POSSIBILITY OF SUCH DAMAGE.                                    */
32/*                                                                   */
33/* The views and conclusions contained in the software and           */
34/* documentation are those of the authors and should not be          */
35/* interpreted as representing official policies, either expressed   */
36/* or implied, of The University of Texas at Austin.                 */
37/*********************************************************************/
38
39#define ASSEMBLER
40#include "common.h"
41
42#define N	r3
43#define X	r6
44#define INCX	r7
45
46#define INCX2	r4
47#define XX	r5
48#define Y	r8
49#define YY	r9
50
51#define ALPHA	f1
52#define ALPHA_I	f2
53
54#define A1	f0
55#define A2	f16
56#define A3	f17
57#define A4	f3
58#define A5	f4
59#define A6	f5
60#define A7	f6
61#define A8	f7
62
63#define B1	f8
64#define B2	f9
65#define B3	f10
66#define B4	f11
67#define B5	f12
68#define B6	f13
69#define B7	f14
70#define B8	f15
71
72	PROLOGUE
73	PROFCODE
74
75	li	r10, -16
76
77	stfpdux	f14, SP, r10
78	stfpdux	f15, SP, r10
79	stfpdux	f16, SP, r10
80	stfpdux	f17, SP, r10
81
82	li	r10,   0
83	stwu	r10,   -4(SP)
84	stwu	r10,   -4(SP)
85	stwu	r10,   -4(SP)
86	stwu	r10,   -4(SP)
87
88	lfpdx	A1, SP, r10		# Zero clear
89	fsmfp	ALPHA, ALPHA_I
90
91	slwi	INCX,  INCX, BASE_SHIFT
92	add	INCX2, INCX, INCX
93
94	cmpwi	cr0, N, 0
95	ble	LL(999)
96
97	cmpwi	cr0, INCX, SIZE
98	bne	LL(100)
99
100	fcmpu	cr7, ALPHA, A1
101	bne	cr7, LL(50)
102
103	fscmp	cr7, ALPHA, A1
104	bne	cr7, LL(50)
105
106	andi.	r0, X, 2 * SIZE - 1
107	bne	LL(20)
108
109	sub	X,  X, INCX2
110
111	srawi.	r0, N, 2
112	mtspr	CTR,  r0
113	beq-	LL(15)
114	.align 4
115
116LL(12):
117	STFPDUX	A1,   X, INCX2
118	STFPDUX	A1,   X, INCX2
119	STFPDUX	A1,   X, INCX2
120	STFPDUX	A1,   X, INCX2
121	bdnz	LL(12)
122	.align 4
123
124LL(15):
125	andi.	r0,  N, 3
126	beq	LL(999)
127	andi.	r0,  N, 2
128	beq	LL(17)
129
130	STFPDUX	A1,   X, INCX2
131	STFPDUX	A1,   X, INCX2
132	.align 4
133
134LL(17):
135	andi.	r0,  N, 1
136	beq	LL(999)
137
138	STFPDUX	A1,   X, INCX2
139	b	LL(999)
140	.align 4
141
142LL(20):
143	sub	X,  X, INCX2
144
145	STFDX	A1, X, INCX2
146	addi	X, X, SIZE
147	addi	N, N, -1
148	cmpwi	cr0, N, 0
149	ble	LL(29)
150
151	srawi.	r0, N, 2
152	mtspr	CTR,  r0
153	beq-	LL(25)
154	.align 4
155
156LL(22):
157	STFPDUX	A1,   X, INCX2
158	STFPDUX	A1,   X, INCX2
159	STFPDUX	A1,   X, INCX2
160	STFPDUX	A1,   X, INCX2
161	bdnz	LL(22)
162	.align 4
163
164LL(25):
165	andi.	r0,  N, 3
166	beq	LL(29)
167	andi.	r0,  N, 2
168	beq	LL(27)
169
170	STFPDUX	A1,   X, INCX2
171	STFPDUX	A1,   X, INCX2
172	.align 4
173
174LL(27):
175	andi.	r0,  N, 1
176	beq	LL(29)
177
178	STFPDUX	A1,   X, INCX2
179	.align 4
180
181LL(29):
182	STFDX	A1,   X, INCX2
183	b	LL(999)
184	.align 4
185
186LL(50):
187	sub	Y,  X, INCX2
188	sub	X,  X, INCX2
189
190	andi.	r0, X, 2 * SIZE - 1
191	bne	LL(60)
192
193	srawi.	r0, N, 3
194	mtspr	CTR,  r0
195	beq-	LL(55)
196
197	LFPDUX	A1,  X,  INCX2
198	LFPDUX	A2,  X,  INCX2
199	LFPDUX	A3,  X,  INCX2
200	LFPDUX	A4,  X,  INCX2
201
202	LFPDUX	A5,  X,  INCX2
203	fxpmul	B1,  ALPHA, A1
204	LFPDUX	A6,  X,  INCX2
205	fxpmul	B2,  ALPHA, A2
206	LFPDUX	A7,  X,  INCX2
207	fxpmul	B3,  ALPHA, A3
208	LFPDUX	A8,  X,  INCX2
209	fxpmul	B4,  ALPHA, A4
210 	fxpmul	B5,  ALPHA, A5
211
212	fxcxnpma B1, ALPHA, A1, B1
213	fxcxnpma B2, ALPHA, A2, B2
214	bdz	LL(53)
215	.align 4
216
217LL(52):
218	fxcxnpma B3, ALPHA, A3, B3
219	LFPDUX	A1,  X,  INCX2
220	fxpmul	B6,  ALPHA, A6
221	STFPDUX	B1,   Y,  INCX2
222
223	fxcxnpma B4, ALPHA, A4, B4
224	LFPDUX	A2,  X,  INCX2
225	fxpmul	B7,  ALPHA, A7
226	STFPDUX	B2,   Y,  INCX2
227
228	fxcxnpma B5, ALPHA, A5, B5
229	LFPDUX	A3,  X,  INCX2
230	fxpmul	B8,  ALPHA, A8
231	STFPDUX	B3,   Y,  INCX2
232
233	fxcxnpma B6, ALPHA, A6, B6
234	LFPDUX	A4,  X,  INCX2
235	fxpmul	B1,  ALPHA, A1
236	STFPDUX	B4,   Y,  INCX2
237
238	fxcxnpma B7, ALPHA, A7, B7
239 	LFPDUX	A5,  X,  INCX2
240	fxpmul	B2,  ALPHA, A2
241	STFPDUX	B5,   Y,  INCX2
242
243	fxcxnpma B8, ALPHA, A8, B8
244	LFPDUX	A6,  X,  INCX2
245	fxpmul	B3,  ALPHA, A3
246	STFPDUX	B6,   Y,  INCX2
247
248	fxcxnpma B1, ALPHA, A1, B1
249	LFPDUX	A7,  X,  INCX2
250	fxpmul	B4,  ALPHA, A4
251	STFPDUX	B7,   Y,  INCX2
252
253	fxcxnpma B2, ALPHA, A2, B2
254	LFPDUX	A8,  X,  INCX2
255	fxpmul	B5,  ALPHA, A5
256	STFPDUX	B8,   Y,  INCX2
257	bdnz	LL(52)
258	.align 4
259
260LL(53):
261	fxcxnpma B3, ALPHA, A3, B3
262	fxpmul	B6,  ALPHA, A6
263	STFPDUX	B1,   Y,  INCX2
264
265	fxcxnpma B4, ALPHA, A4, B4
266	fxpmul	B7,  ALPHA, A7
267	STFPDUX	B2,   Y,  INCX2
268
269	fxcxnpma B5, ALPHA, A5, B5
270	fxpmul	B8,  ALPHA, A8
271	STFPDUX	B3,   Y,  INCX2
272
273	fxcxnpma B6, ALPHA, A6, B6
274	STFPDUX	B4,   Y,  INCX2
275	fxcxnpma B7, ALPHA, A7, B7
276	STFPDUX	B5,   Y,  INCX2
277	fxcxnpma B8, ALPHA, A8, B8
278	STFPDUX	B6,   Y,  INCX2
279	STFPDUX	B7,   Y,  INCX2
280	STFPDUX	B8,   Y,  INCX2
281	.align 4
282
283LL(55):
284	andi.	r0,  N, 7
285	beq	LL(999)
286
287	andi.	r0,  N, 4
288	beq	LL(56)
289
290	LFPDUX	A1,  X,  INCX2
291	LFPDUX	A2,  X,  INCX2
292	LFPDUX	A3,  X,  INCX2
293	LFPDUX	A4,  X,  INCX2
294
295	fxpmul	B1,  ALPHA, A1
296	fxpmul	B2,  ALPHA, A2
297	fxpmul	B3,  ALPHA, A3
298	fxpmul	B4,  ALPHA, A4
299
300	fxcxnpma B1, ALPHA, A1, B1
301	fxcxnpma B2, ALPHA, A2, B2
302	fxcxnpma B3, ALPHA, A3, B3
303	fxcxnpma B4, ALPHA, A4, B4
304
305	STFPDUX	B1,   Y,  INCX2
306	STFPDUX	B2,   Y,  INCX2
307	STFPDUX	B3,   Y,  INCX2
308	STFPDUX	B4,   Y,  INCX2
309	.align 4
310
311LL(56):
312	andi.	r0,  N, 2
313	beq	LL(57)
314
315	LFPDUX	A1,  X,  INCX2
316	LFPDUX	A2,  X,  INCX2
317
318	fxpmul	B1,  ALPHA, A1
319	fxpmul	B2,  ALPHA, A2
320
321	fxcxnpma B1, ALPHA, A1, B1
322	fxcxnpma B2, ALPHA, A2, B2
323
324	STFPDUX	B1,   Y,  INCX2
325	STFPDUX	B2,   Y,  INCX2
326	.align 4
327
328LL(57):
329	andi.	r0,  N, 1
330	beq	LL(999)
331
332	LFPDUX	A1,   X,  INCX2
333
334	fxpmul	B1,  ALPHA, A1
335	fxcxnpma B1, ALPHA, A1, B1
336
337	STFPDUX	B1,   Y,  INCX2
338	b	LL(999)
339	.align 4
340
341LL(60):
342	addi	XX, X, SIZE
343	addi	YY, Y, SIZE
344
345	srawi.	r0, N, 2
346	mtspr	CTR,  r0
347	beq-	LL(65)
348
349	LFDUX	A1,  X,  INCX2
350	LFDUX	A2,  XX, INCX2
351	LFDUX	A3,  X,  INCX2
352	LFDUX	A4,  XX, INCX2
353
354	LFDUX	A5,  X,  INCX2
355	fmul	B1, ALPHA,   A1
356	LFDUX	A6,  XX, INCX2
357	fmul	B2, ALPHA_I, A1
358	LFDUX	A7,  X,  INCX2
359	fmul	B3, ALPHA,   A3
360	LFDUX	A8,  XX, INCX2
361	fmul	B4, ALPHA_I, A3
362
363	fmul	B5, ALPHA,   A5
364	fnmsub	B1, ALPHA_I, A2, B1
365	fmadd	B2, ALPHA  , A2, B2
366	bdz	LL(63)
367	.align 4
368
369LL(62):
370	fnmsub	B3, ALPHA_I, A4, B3
371 	LFDUX	A1,  X,  INCX2
372	fmul	B6, ALPHA_I, A5
373	STFDUX	B1,  Y,  INCX2
374
375	fmadd	B4, ALPHA  , A4, B4
376	LFDUX	A2,  XX, INCX2
377	fmul	B7, ALPHA,   A7
378	STFDUX	B2,  YY, INCX2
379
380	fnmsub	B5, ALPHA_I, A6, B5
381	LFDUX	A3,  X,  INCX2
382	fmul	B8, ALPHA_I, A7
383	STFDUX	B3,  Y,  INCX2
384
385	fmadd	B6, ALPHA  , A6, B6
386	LFDUX	A4,  XX, INCX2
387	fmul	B1, ALPHA,   A1
388	STFDUX	B4,  YY, INCX2
389
390	fnmsub	B7, ALPHA_I, A8, B7
391	LFDUX	A5,  X,  INCX2
392	fmul	B2, ALPHA_I, A1
393	STFDUX	B5,  Y,  INCX2
394
395	fmadd	B8, ALPHA  , A8, B8
396	LFDUX	A6,  XX, INCX2
397	fmul	B3, ALPHA,   A3
398	STFDUX	B6,  YY, INCX2
399
400	fnmsub	B1, ALPHA_I, A2, B1
401	LFDUX	A7,  X,  INCX2
402	fmul	B4, ALPHA_I, A3
403	STFDUX	B7,  Y,  INCX2
404
405	fmadd	B2, ALPHA  , A2, B2
406	LFDUX	A8,  XX, INCX2
407	fmul	B5, ALPHA,   A5
408	STFDUX	B8,  YY, INCX2
409	bdnz	LL(62)
410	.align 4
411
412LL(63):
413	fnmsub	B3, ALPHA_I, A4, B3
414	fmul	B6, ALPHA_I, A5
415	STFDUX	B1,  Y,  INCX2
416
417	fmadd	B4, ALPHA  , A4, B4
418	fmul	B7, ALPHA,   A7
419	STFDUX	B2,  YY, INCX2
420
421	fnmsub	B5, ALPHA_I, A6, B5
422	fmul	B8, ALPHA_I, A7
423	STFDUX	B3,  Y,  INCX2
424
425	fmadd	B6, ALPHA  , A6, B6
426	STFDUX	B4,  YY, INCX2
427	fnmsub	B7, ALPHA_I, A8, B7
428	STFDUX	B5,  Y,  INCX2
429	fmadd	B8, ALPHA  , A8, B8
430	STFDUX	B6,  YY, INCX2
431	STFDUX	B7,  Y,  INCX2
432	STFDUX	B8,  YY, INCX2
433	.align 4
434
435LL(65):
436	andi.	r0,  N, 3
437	beq	LL(999)
438	andi.	r0,  N, 2
439	beq	LL(67)
440
441	LFDUX	A1,  X,  INCX2
442	LFDUX	A2,  XX, INCX2
443	LFDUX	A3,  X,  INCX2
444	LFDUX	A4,  XX, INCX2
445
446	fmul	B1, ALPHA, A1
447	fmul	B2, ALPHA, A2
448	fmul	B3, ALPHA, A3
449	fmul	B4, ALPHA, A4
450
451	fnmsub	B1, ALPHA_I, A2, B1
452	fmadd	B2, ALPHA_I, A1, B2
453	fnmsub	B3, ALPHA_I, A4, B3
454	fmadd	B4, ALPHA_I, A3, B4
455
456	STFDUX	B1,  Y,  INCX2
457	STFDUX	B2,  YY, INCX2
458	STFDUX	B3,  Y,  INCX2
459	STFDUX	B4,  YY, INCX2
460	.align 4
461
462LL(67):
463	andi.	r0,  N, 1
464	beq	LL(999)
465
466	LFDUX	A1,   X,  INCX2
467	LFDUX	A2,   XX, INCX2
468
469	fmul	B1, ALPHA, A1
470	fmul	B2, ALPHA, A2
471	fnmsub	B1, ALPHA_I, A2, B1
472	fmadd	B2, ALPHA_I, A1, B2
473
474	STFDUX	B1,   Y,  INCX2
475	STFDUX	B2,   YY, INCX2
476	b	LL(999)
477	.align 4
478
479
480LL(100):
481	fcmpu	cr7, ALPHA, A1
482	bne	cr7, LL(150)
483
484	fscmp	cr7, ALPHA, A1
485	bne	cr7, LL(150)
486
487	andi.	r0, X, 2 * SIZE - 1
488	bne	LL(120)
489
490	sub	X,  X, INCX2
491
492	srawi.	r0, N, 2
493	mtspr	CTR,  r0
494	beq-	LL(115)
495	.align 4
496
497LL(112):
498	STFPDUX	A1,   X, INCX2
499	STFPDUX	A1,   X, INCX2
500	STFPDUX	A1,   X, INCX2
501	STFPDUX	A1,   X, INCX2
502	bdnz	LL(112)
503	.align 4
504
505LL(115):
506	andi.	r0,  N, 3
507	beq	LL(999)
508	andi.	r0,  N, 2
509	beq	LL(117)
510
511	STFPDUX	A1,   X, INCX2
512	STFPDUX	A1,   X, INCX2
513	.align 4
514
515LL(117):
516	andi.	r0,  N, 1
517	beq	LL(999)
518
519	STFPDUX	A1,   X, INCX2
520	b	LL(999)
521	.align 4
522
523LL(120):
524	subi	INCX2, INCX2, SIZE
525	li	INCX, SIZE
526
527	sub	X,  X, INCX2
528
529	srawi.	r0, N, 2
530	mtspr	CTR,  r0
531	beq-	LL(125)
532	.align 4
533
534LL(122):
535	STFDUX	A1,   X, INCX2
536	STFDUX	A1,   X, INCX
537	STFDUX	A1,   X, INCX2
538	STFDUX	A1,   X, INCX
539	STFDUX	A1,   X, INCX2
540	STFDUX	A1,   X, INCX
541	STFDUX	A1,   X, INCX2
542	STFDUX	A1,   X, INCX
543	bdnz	LL(122)
544	.align 4
545
546LL(125):
547	andi.	r0,  N, 3
548	beq	LL(999)
549	andi.	r0,  N, 2
550	beq	LL(127)
551
552	STFDUX	A1,   X, INCX2
553	STFDUX	A1,   X, INCX
554	STFDUX	A1,   X, INCX2
555	STFDUX	A1,   X, INCX
556	.align 4
557
558LL(127):
559	andi.	r0,  N, 1
560	beq	LL(999)
561
562	STFDUX	A1,   X, INCX2
563	STFDUX	A1,   X, INCX
564	b	LL(999)
565	.align 4
566
567LL(150):
568	sub	Y,  X, INCX2
569	sub	X,  X, INCX2
570
571	andi.	r0, X, 2 * SIZE - 1
572	bne	LL(160)
573
574	srawi.	r0, N, 3
575	mtspr	CTR,  r0
576	beq-	LL(155)
577
578	LFPDUX	A1,  X,  INCX2
579	LFPDUX	A2,  X,  INCX2
580	LFPDUX	A3,  X,  INCX2
581	LFPDUX	A4,  X,  INCX2
582
583	LFPDUX	A5,  X,  INCX2
584	fxpmul	B1,  ALPHA, A1
585	LFPDUX	A6,  X,  INCX2
586	fxpmul	B2,  ALPHA, A2
587	LFPDUX	A7,  X,  INCX2
588	fxpmul	B3,  ALPHA, A3
589	LFPDUX	A8,  X,  INCX2
590	fxpmul	B4,  ALPHA, A4
591 	fxpmul	B5,  ALPHA, A5
592
593	fxcxnpma B1, ALPHA, A1, B1
594	fxcxnpma B2, ALPHA, A2, B2
595	bdz	LL(153)
596	.align 4
597
598LL(152):
599	fxcxnpma B3, ALPHA, A3, B3
600	LFPDUX	A1,  X,  INCX2
601	fxpmul	B6,  ALPHA, A6
602	STFPDUX	B1,   Y,  INCX2
603
604	fxcxnpma B4, ALPHA, A4, B4
605	LFPDUX	A2,  X,  INCX2
606	fxpmul	B7,  ALPHA, A7
607	STFPDUX	B2,   Y,  INCX2
608
609	fxcxnpma B5, ALPHA, A5, B5
610	LFPDUX	A3,  X,  INCX2
611	fxpmul	B8,  ALPHA, A8
612	STFPDUX	B3,   Y,  INCX2
613
614	fxcxnpma B6, ALPHA, A6, B6
615	LFPDUX	A4,  X,  INCX2
616	fxpmul	B1,  ALPHA, A1
617	STFPDUX	B4,   Y,  INCX2
618
619	fxcxnpma B7, ALPHA, A7, B7
620 	LFPDUX	A5,  X,  INCX2
621	fxpmul	B2,  ALPHA, A2
622	STFPDUX	B5,   Y,  INCX2
623
624	fxcxnpma B8, ALPHA, A8, B8
625	LFPDUX	A6,  X,  INCX2
626	fxpmul	B3,  ALPHA, A3
627	STFPDUX	B6,   Y,  INCX2
628
629	fxcxnpma B1, ALPHA, A1, B1
630	LFPDUX	A7,  X,  INCX2
631	fxpmul	B4,  ALPHA, A4
632	STFPDUX	B7,   Y,  INCX2
633
634	fxcxnpma B2, ALPHA, A2, B2
635	LFPDUX	A8,  X,  INCX2
636	fxpmul	B5,  ALPHA, A5
637	STFPDUX	B8,   Y,  INCX2
638	bdnz	LL(152)
639	.align 4
640
641LL(153):
642	fxcxnpma B3, ALPHA, A3, B3
643	fxpmul	B6,  ALPHA, A6
644	STFPDUX	B1,   Y,  INCX2
645
646	fxcxnpma B4, ALPHA, A4, B4
647	fxpmul	B7,  ALPHA, A7
648	STFPDUX	B2,   Y,  INCX2
649
650	fxcxnpma B5, ALPHA, A5, B5
651	fxpmul	B8,  ALPHA, A8
652	STFPDUX	B3,   Y,  INCX2
653
654	fxcxnpma B6, ALPHA, A6, B6
655	STFPDUX	B4,   Y,  INCX2
656	fxcxnpma B7, ALPHA, A7, B7
657	STFPDUX	B5,   Y,  INCX2
658	fxcxnpma B8, ALPHA, A8, B8
659	STFPDUX	B6,   Y,  INCX2
660	STFPDUX	B7,   Y,  INCX2
661	STFPDUX	B8,   Y,  INCX2
662	.align 4
663
664LL(155):
665	andi.	r0,  N, 7
666	beq	LL(999)
667
668	andi.	r0,  N, 4
669	beq	LL(156)
670
671	LFPDUX	A1,  X,  INCX2
672	LFPDUX	A2,  X,  INCX2
673	LFPDUX	A3,  X,  INCX2
674	LFPDUX	A4,  X,  INCX2
675
676	fxpmul	B1,  ALPHA, A1
677	fxpmul	B2,  ALPHA, A2
678	fxpmul	B3,  ALPHA, A3
679	fxpmul	B4,  ALPHA, A4
680
681	fxcxnpma B1, ALPHA, A1, B1
682	fxcxnpma B2, ALPHA, A2, B2
683	fxcxnpma B3, ALPHA, A3, B3
684	fxcxnpma B4, ALPHA, A4, B4
685
686	STFPDUX	B1,   Y,  INCX2
687	STFPDUX	B2,   Y,  INCX2
688	STFPDUX	B3,   Y,  INCX2
689	STFPDUX	B4,   Y,  INCX2
690	.align 4
691
692LL(156):
693	andi.	r0,  N, 2
694	beq	LL(157)
695
696	LFPDUX	A1,  X,  INCX2
697	LFPDUX	A2,  X,  INCX2
698
699	fxpmul	B1,  ALPHA, A1
700	fxpmul	B2,  ALPHA, A2
701
702	fxcxnpma B1, ALPHA, A1, B1
703	fxcxnpma B2, ALPHA, A2, B2
704
705	STFPDUX	B1,   Y,  INCX2
706	STFPDUX	B2,   Y,  INCX2
707	.align 4
708
709LL(157):
710	andi.	r0,  N, 1
711	beq	LL(999)
712
713	LFPDUX	A1,   X,  INCX2
714
715	fxpmul	B1,  ALPHA, A1
716	fxcxnpma B1, ALPHA, A1, B1
717
718	STFPDUX	B1,   Y,  INCX2
719	b	LL(999)
720	.align 4
721
722LL(160):
723	addi	XX, X, SIZE
724	addi	YY, Y, SIZE
725
726	srawi.	r0, N, 2
727	mtspr	CTR,  r0
728	beq-	LL(165)
729
730	LFDUX	A1,  X,  INCX2
731	LFDUX	A2,  XX, INCX2
732	LFDUX	A3,  X,  INCX2
733	LFDUX	A4,  XX, INCX2
734
735	LFDUX	A5,  X,  INCX2
736	fmul	B1, ALPHA,   A1
737	LFDUX	A6,  XX, INCX2
738	fmul	B2, ALPHA_I, A1
739	LFDUX	A7,  X,  INCX2
740	fmul	B3, ALPHA,   A3
741	LFDUX	A8,  XX, INCX2
742	fmul	B4, ALPHA_I, A3
743
744	fmul	B5, ALPHA,   A5
745	fnmsub	B1, ALPHA_I, A2, B1
746	fmadd	B2, ALPHA  , A2, B2
747	bdz	LL(163)
748
749	.align 4
750
751LL(162):
752	fnmsub	B3, ALPHA_I, A4, B3
753 	LFDUX	A1,  X,  INCX2
754	fmul	B6, ALPHA_I, A5
755	STFDUX	B1,  Y,  INCX2
756
757	fmadd	B4, ALPHA  , A4, B4
758	LFDUX	A2,  XX, INCX2
759	fmul	B7, ALPHA,   A7
760	STFDUX	B2,  YY, INCX2
761
762	fnmsub	B5, ALPHA_I, A6, B5
763	LFDUX	A3,  X,  INCX2
764	fmul	B8, ALPHA_I, A7
765	STFDUX	B3,  Y,  INCX2
766
767	fmadd	B6, ALPHA  , A6, B6
768	LFDUX	A4,  XX, INCX2
769	fmul	B1, ALPHA,   A1
770	STFDUX	B4,  YY, INCX2
771
772	fnmsub	B7, ALPHA_I, A8, B7
773	LFDUX	A5,  X,  INCX2
774	fmul	B2, ALPHA_I, A1
775	STFDUX	B5,  Y,  INCX2
776
777	fmadd	B8, ALPHA  , A8, B8
778	LFDUX	A6,  XX, INCX2
779	fmul	B3, ALPHA,   A3
780	STFDUX	B6,  YY, INCX2
781
782	fnmsub	B1, ALPHA_I, A2, B1
783	LFDUX	A7,  X,  INCX2
784	fmul	B4, ALPHA_I, A3
785	STFDUX	B7,  Y,  INCX2
786
787	fmadd	B2, ALPHA  , A2, B2
788	LFDUX	A8,  XX, INCX2
789	fmul	B5, ALPHA,   A5
790	STFDUX	B8,  YY, INCX2
791	bdnz	LL(162)
792	.align 4
793
794LL(163):
795	fnmsub	B3, ALPHA_I, A4, B3
796	fmul	B6, ALPHA_I, A5
797	STFDUX	B1,  Y,  INCX2
798
799	fmadd	B4, ALPHA  , A4, B4
800	fmul	B7, ALPHA,   A7
801	STFDUX	B2,  YY, INCX2
802
803	fnmsub	B5, ALPHA_I, A6, B5
804	fmul	B8, ALPHA_I, A7
805	STFDUX	B3,  Y,  INCX2
806
807	fmadd	B6, ALPHA  , A6, B6
808	STFDUX	B4,  YY, INCX2
809	fnmsub	B7, ALPHA_I, A8, B7
810	STFDUX	B5,  Y,  INCX2
811	fmadd	B8, ALPHA  , A8, B8
812	STFDUX	B6,  YY, INCX2
813	STFDUX	B7,  Y,  INCX2
814	STFDUX	B8,  YY, INCX2
815	.align 4
816
817LL(165):
818	andi.	r0,  N, 3
819	beq	LL(999)
820	andi.	r0,  N, 2
821	beq	LL(167)
822
823	LFDUX	A1,  X,  INCX2
824	LFDUX	A2,  XX, INCX2
825	LFDUX	A3,  X,  INCX2
826	LFDUX	A4,  XX, INCX2
827
828	fmul	B1, ALPHA, A1
829	fmul	B2, ALPHA, A2
830	fmul	B3, ALPHA, A3
831	fmul	B4, ALPHA, A4
832
833	fnmsub	B1, ALPHA_I, A2, B1
834	fmadd	B2, ALPHA_I, A1, B2
835	fnmsub	B3, ALPHA_I, A4, B3
836	fmadd	B4, ALPHA_I, A3, B4
837
838	STFDUX	B1,  Y,  INCX2
839	STFDUX	B2,  YY, INCX2
840	STFDUX	B3,  Y,  INCX2
841	STFDUX	B4,  YY, INCX2
842	.align 4
843
844LL(167):
845	andi.	r0,  N, 1
846	beq	LL(999)
847
848	LFDUX	A1,   X,  INCX2
849	LFDUX	A2,   XX, INCX2
850
851	fmul	B1, ALPHA, A1
852	fmul	B2, ALPHA, A2
853	fnmsub	B1, ALPHA_I, A2, B1
854	fmadd	B2, ALPHA_I, A1, B2
855
856	STFDUX	B1,   Y,  INCX2
857	STFDUX	B2,   YY, INCX2
858	.align 4
859
860LL(999):
861	li	r10, 16
862
863	lfpdux	f17, SP, r10
864	lfpdux	f16, SP, r10
865	lfpdux	f15, SP, r10
866	lfpdux	f14, SP, r10
867
868	addi	SP, SP,  16
869	blr
870
871	EPILOGUE
872