1/*********************************************************************/
2/* Copyright 2009, 2010 The University of Texas at Austin.           */
3/* All rights reserved.                                              */
4/*                                                                   */
5/* Redistribution and use in source and binary forms, with or        */
6/* without modification, are permitted provided that the following   */
7/* conditions are met:                                               */
8/*                                                                   */
9/*   1. Redistributions of source code must retain the above         */
10/*      copyright notice, this list of conditions and the following  */
11/*      disclaimer.                                                  */
12/*                                                                   */
13/*   2. Redistributions in binary form must reproduce the above      */
14/*      copyright notice, this list of conditions and the following  */
15/*      disclaimer in the documentation and/or other materials       */
16/*      provided with the distribution.                              */
17/*                                                                   */
18/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
19/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
20/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
21/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
22/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
23/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
24/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
25/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
26/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
27/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
28/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
29/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
30/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
31/*    POSSIBILITY OF SUCH DAMAGE.                                    */
32/*                                                                   */
33/* The views and conclusions contained in the software and           */
34/* documentation are those of the authors and should not be          */
35/* interpreted as representing official policies, either expressed   */
36/* or implied, of The University of Texas at Austin.                 */
37/*********************************************************************/
38
39#define ASSEMBLER
40#include "common.h"
41
42#ifdef XDOUBLE
43#define PREFETCH_SIZE ( 8 *  8 +  4)
44#elif defined(DOUBLE)
45#define PREFETCH_SIZE (16 *  8 +  8)
46#else
47#define PREFETCH_SIZE (32 *  8 + 16)
48#endif
49
50#define N	r32
51#define X1	r33
52#define INCX	r34
53#define Y1	r35
54#define INCY	r36
55
56#define PREX	r2
57#define PREY	r3
58
59#define I	r14
60#define J	r15
61#define Y2	r16
62#define X2	r17
63
64#define INCX16	r18
65#define INCY16	r19
66
67#define PR	r30
68#define ARLC	r31
69
70#define C	f8
71#define S	f9
72
73	PROLOGUE
74	.prologue
75	PROFCODE
76	{ .mmi
77	adds	r29 = 16, r12
78	add	INCX = INCX, INCX
79	.save ar.lc, ARLC
80	mov	ARLC = ar.lc
81	}
82	{ .mib
83	cmp.lt	p0, p6 = r0, N
84	shr	I =  N, 3
85	(p6) br.ret.spnt.many b0
86	}
87	;;
88	.body
89	{ .mmi
90#ifdef XDOUBLE
91	LDFD	S = [r29]
92#else
93	nop	__LINE__
94#endif
95	add	INCY = INCY, INCY
96	mov	PR = pr
97	}
98	{ .mmi
99	mov	X2 = X1
100	mov	Y2 = Y1
101	mov	pr.rot= 0
102	}
103	;;
104	{ .mmi
105	shladd	INCX = INCX, BASE_SHIFT, r0
106	shladd	INCY = INCY, BASE_SHIFT, r0
107	mov	ar.ec= 3
108	}
109	{ .mmi
110	adds	I = -1, I
111	cmp.eq	p16, p0 = r0, r0
112	and	J =  7, N
113	}
114	;;
115	{ .mmi
116#ifndef XDOUBLE
117	shladd	INCX16 = INCX, 3, r0
118	shladd	INCY16 = INCY, 3, r0
119#else
120	shladd	INCX16 = INCX, 2, r0
121	shladd	INCY16 = INCY, 2, r0
122#endif
123	nop	__LINE__
124	}
125	{ .mmi
126	adds	INCX = -SIZE, INCX
127	adds	INCY = -SIZE, INCY
128	nop	__LINE__
129	}
130	;;
131	{ .mmi
132	adds	PREX = PREFETCH_SIZE * SIZE, X1
133	adds	PREY = PREFETCH_SIZE * SIZE, Y1
134	mov	ar.lc = I
135	}
136	{ .mib
137	cmp.eq	p6 ,p0  =   -1, I
138	tbit.z	p0, p12 = N, 2
139	(p6) br.cond.dpnt  .L15
140	}
141	;;
142	.align 32
143
144.L12:
145	{ .mmf
146	(p19) STFD	[Y2] = f15
147	(p16) lfetch.excl.nt1 [PREX], INCX16
148	(p18) FMPY	f15  = C, f91
149	}
150	{ .mmf
151	(p16) LDFD	f32  = [X1], SIZE
152	(p19) add	Y2 = Y2, INCY
153	(p18) FNMA	f11  = S, f37, f11
154	}
155	;;
156	{ .mmf
157	(p18) STFD	[X2] = f6
158	(p16) lfetch.excl.nt1 [PREY], INCY16
159	(p18) FMA	f12  = C, f40, f12
160	}
161	{ .mmf
162	(p17) LDFD	f114 = [Y1], INCY
163	(p18) adds	X2 = SIZE, X2
164	(p18) FMPY	f6   = S, f94
165	}
166	;;
167	{ .mmf
168	(p18) STFD	[Y2] = f7
169	(p16) LDFD	f35  = [X1], INCX
170	(p18) FNMA	f13  = S, f40, f13
171	}
172	{ .mmf
173	nop   __LINE__
174	(p18) adds	Y2 = SIZE, Y2
175	(p18) FMPY	f7   = C, f94
176	}
177	;;
178	{ .mmf
179	(p18) STFD	[X2] = f10
180	(p17) LDFD	f117 = [Y1], SIZE
181	(p18) FMA	f14  = C, f43, f14
182	}
183	{ .mmf
184	(p18) add	X2 = X2, INCX
185	nop   __LINE__
186	(p18) FMPY	f10  = S, f97
187	}
188	;;
189	{ .mmf
190	(p18) STFD	[Y2] = f11
191	(p16) LDFD	f38  = [X1], SIZE
192	(p18) FNMA	f15  = S, f43, f15
193	}
194	{ .mmf
195	(p18) add	Y2 = Y2, INCY
196	nop   __LINE__
197	(p18) FMPY	f11  = C, f97
198	}
199	;;
200	{ .mmf
201	(p18) STFD	[X2] = f12
202	(p17) LDFD	f120 = [Y1], INCY
203	(p18) FMPY	f12  = S, f100
204	}
205	{ .mmf
206	(p18) adds	X2 = SIZE, X2
207	nop   __LINE__
208	(p18) FMA	f6   = C, f46, f6
209	}
210	;;
211	{ .mmf
212	(p18) STFD	[Y2] = f13
213	(p16) LDFD	f41  = [X1], INCX
214	(p18) FMPY	f13  = C, f100
215	}
216	{ .mmf
217	(p18) adds	Y2 = SIZE, Y2
218	nop   __LINE__
219	(p18) FNMA	f7   = S, f46, f7
220	}
221	;;
222	{ .mmf
223	(p18) STFD	[X2] = f14
224	(p17) LDFD	f123 = [Y1], SIZE
225	(p18) FMPY	f14  = S, f103
226	}
227	{ .mmf
228	(p18) add	X2 = X2, INCX
229	nop   __LINE__
230	(p18) FMA	f10  = C, f49, f10
231	}
232	;;
233	{ .mmf
234	(p18) STFD	[Y2] = f15
235	(p16) LDFD	f44  = [X1], SIZE
236	(p18) FMPY	f15  = C, f103
237	}
238	{ .mmf
239	(p18) add	Y2 = Y2, INCY
240	nop   __LINE__
241	(p18) FNMA	f11  = S, f49, f11
242	}
243	;;
244	{ .mmf
245	(p18) STFD	[X2] = f6
246	(p17) LDFD	f126 = [Y1], INCY
247	(p18) FMA	f12  = C, f52, f12
248	}
249	{ .mmf
250	(p18) adds	X2 = SIZE, X2
251	nop   __LINE__
252	(p18) FMPY	f6   = S, f106
253	}
254	;;
255	{ .mmf
256	(p18) STFD	[Y2] = f7
257	(p16) LDFD	f47  = [X1], INCX
258	(p18) FNMA	f13  = S, f52, f13
259	}
260	{ .mmf
261	(p18) adds	Y2 = SIZE, Y2
262	nop   __LINE__
263	(p18) FMPY	f7   = C, f106
264	}
265	;;
266	{ .mmf
267	(p18) STFD	[X2] = f10
268	(p16) LDFD	f80  = [Y1], SIZE
269	(p18) FMA	f14  = C, f55, f14
270	}
271	{ .mmf
272	(p18) add	X2 = X2, INCX
273	nop   __LINE__
274	(p18) FMPY	f10  = S, f109
275	}
276	;;
277	{ .mmf
278	(p18) STFD	[Y2] = f11
279	(p16) LDFD	f50  = [X1], SIZE
280	(p18) FNMA	f15  = S, f55, f15
281	}
282	{ .mmf
283	(p18) add	Y2 = Y2, INCY
284	nop   __LINE__
285	(p18) FMPY	f11  = C, f109
286	}
287	;;
288	{ .mmf
289	(p18) STFD	[X2] = f12
290	(p16) LDFD	f83  = [Y1], INCY
291	(p18) FMPY	f12  = S, f112
292	}
293	{ .mmf
294	(p18) adds	X2 = SIZE, X2
295	nop   __LINE__
296	(p18) FMA	f6   = C, f58, f6
297	}
298	;;
299	{ .mmf
300	(p18) STFD	[Y2] = f13
301	(p16) LDFD	f53  = [X1], INCX
302	(p18) FMPY	f13  = C, f112
303	}
304	{ .mmf
305	(p18) adds	Y2 = SIZE, Y2
306	nop   __LINE__
307	(p18) FNMA	f7   = S, f58, f7
308	}
309	;;
310	{ .mmf
311	(p18) STFD	[X2] = f14
312	(p16) LDFD	f86  = [Y1], SIZE
313	(p18) FMPY	f14  = S, f115
314	}
315	{ .mmf
316	(p18) add	X2 = X2, INCX
317	nop   __LINE__
318	(p18) FMA	f10  = C, f61, f10
319	}
320	;;
321	{ .mmf
322	(p18) STFD	[Y2] = f15
323	(p16) LDFD	f56  = [X1], SIZE
324	(p18) FMPY	f15  = C, f115
325	}
326	{ .mmf
327	(p18) add	Y2 = Y2, INCY
328	nop   __LINE__
329	(p18) FNMA	f11  = S, f61, f11
330	}
331	;;
332#ifndef XDOUBLE
333	{ .mmf
334	(p18) STFD	[X2] = f6
335	(p16) LDFD	f89  = [Y1], INCY
336	(p18) FMA	f12  = C, f64, f12
337	}
338	{ .mmf
339	(p18) adds	X2 = SIZE, X2
340	nop   __LINE__
341	(p18) FMPY	f6   = S, f118
342	}
343	;;
344	{ .mmf
345	(p18) STFD	[Y2] = f7
346	(p16) LDFD	f59  = [X1], INCX
347	(p18) FNMA	f13  = S, f64, f13
348	}
349	{ .mmf
350	(p18) adds	Y2 = SIZE, Y2
351	nop   __LINE__
352	(p18) FMPY	f7   = C, f118
353	}
354	;;
355#else
356	{ .mmf
357	(p18) STFD	[X2] = f6
358	(p16) lfetch.excl.nt1 [PREY], INCY16
359	(p18) FMA	f12  = C, f64, f12
360	}
361	{ .mmf
362	(p16) LDFD	f89  = [Y1], INCY
363	(p18) adds	X2 = SIZE, X2
364	(p18) FMPY	f6   = S, f118
365	}
366	;;
367	{ .mmf
368	(p18) STFD	[Y2] = f7
369	(p16) lfetch.excl.nt1 [PREX], INCX16
370	(p18) FNMA	f13  = S, f64, f13
371	}
372	{ .mmf
373	(p16) LDFD	f59  = [X1], INCX
374	(p18) adds	Y2 = SIZE, Y2
375	(p18) FMPY	f7   = C, f118
376	}
377	;;
378#endif
379	{ .mmf
380	(p18) STFD	[X2] = f10
381	(p16) LDFD	f92  = [Y1], SIZE
382	(p18) FMA	f14  = C, f67, f14
383	}
384	{ .mmf
385	(p18) add	X2 = X2, INCX
386	nop   __LINE__
387	(p18) FMPY	f10  = S, f121
388	}
389	;;
390	{ .mmf
391	(p18) STFD	[Y2] = f11
392	(p16) LDFD	f62  = [X1], SIZE
393	(p18) FNMA	f15  = S, f67, f15
394	}
395	{ .mmf
396	(p18) add	Y2 = Y2, INCY
397	nop   __LINE__
398	(p18) FMPY	f11  = C, f121
399	}
400	;;
401	{ .mmf
402	(p18) STFD	[X2] = f12
403	(p16) LDFD	f95  = [Y1], INCY
404	(p18) FMPY	f12  = S, f124
405	}
406	{ .mmf
407	(p18) adds	X2 = SIZE, X2
408	nop   __LINE__
409	(p18) FMA	f6   = C, f70, f6
410	}
411	;;
412	{ .mmf
413	(p18) STFD	[Y2] = f13
414	(p16) LDFD	f65  = [X1], INCX
415	(p18) FMPY	f13  = C, f124
416	}
417	{ .mmf
418	(p18) adds	Y2 = SIZE, Y2
419	nop   __LINE__
420	(p18) FNMA	f7   = S, f70, f7
421	}
422	;;
423	{ .mmf
424	(p18) STFD	[X2] = f14
425	(p16) LDFD	f98  = [Y1], SIZE
426	(p18) FMPY	f14  = S, f127
427	}
428	{ .mmf
429	(p18) add	X2 = X2, INCX
430	nop   __LINE__
431	(p18) FMA	f10  = C, f73, f10
432	}
433	;;
434	{ .mmf
435	(p18) STFD	[Y2] = f15
436	(p16) LDFD	f68  = [X1], SIZE
437	(p18) FMPY	f15  = C, f127
438	}
439	{ .mmf
440	(p18) add	Y2 = Y2, INCY
441	nop   __LINE__
442	(p18) FNMA	f11  = S, f73, f11
443	}
444	;;
445	{ .mmf
446	(p18) STFD	[X2] = f6
447	(p16) LDFD	f101 = [Y1], INCY
448	(p18) FMA	f12  = C, f76, f12
449	}
450	{ .mmf
451	(p18) adds	X2 = SIZE, X2
452	nop   __LINE__
453	(p17) FMPY	f6   = S, f81
454	}
455	;;
456	{ .mmf
457	(p18) STFD	[Y2] = f7
458	(p16) LDFD	f71  = [X1], INCX
459	(p18) FNMA	f13  = S, f76, f13
460	}
461	{ .mmf
462	(p18) adds	Y2 = SIZE, Y2
463	nop   __LINE__
464	(p17) FMPY	f7   = C, f81
465	}
466	;;
467	{ .mmf
468	(p18) STFD	[X2] = f10
469	(p16) LDFD	f104 = [Y1], SIZE
470	(p18) FMA	f14  = C, f79, f14
471	}
472	{ .mmf
473	(p18) add	X2 = X2, INCX
474	nop   __LINE__
475	(p17) FMPY	f10  = S, f84
476	}
477	;;
478	{ .mmf
479	(p18) STFD	[Y2] = f11
480	(p16) LDFD	f74  = [X1], SIZE
481	(p18) FNMA	f15  = S, f79, f15
482	}
483	{ .mmf
484	(p18) add	Y2 = Y2, INCY
485	nop   __LINE__
486	(p17) FMPY	f11  = C, f84
487	}
488	;;
489	{ .mmf
490	(p18) STFD	[X2] = f12
491	(p16) LDFD	f107 = [Y1], INCY
492	(p17) FMPY	f12  = S, f87
493	}
494	{ .mmf
495	(p18) adds	X2 = SIZE, X2
496	nop   __LINE__
497	(p17) FMA	f6   = C, f33, f6
498	}
499	;;
500	{ .mmf
501	(p18) STFD	[Y2] = f13
502	(p16) LDFD	f77  = [X1], INCX
503	(p17) FMPY	f13  = C, f87
504	}
505	{ .mmf
506	(p18) adds	Y2 = SIZE, Y2
507	nop   __LINE__
508	(p17) FNMA	f7   = S, f33, f7
509	}
510	;;
511	{ .mmf
512	(p18) STFD	[X2] = f14
513	(p16) LDFD	f110 = [Y1], SIZE
514	(p17) FMPY	f14  = S, f90
515	}
516	{ .mfb
517	(p18) add	X2 = X2, INCX
518	(p17) FMA	f10  = C, f36, f10
519	br.ctop.sptk.few .L12
520	}
521	;;
522	{ .mmi
523	(p19) STFD	[Y2] = f15
524	(p19) add	Y2 = Y2, INCY
525	nop   __LINE__
526	}
527	{ .mmi
528	nop   __LINE__
529	nop   __LINE__
530	nop   __LINE__
531	}
532	;;
533	.align 32
534
535.L15:
536	{ .mmi
537	(p12) LDFD	f40  = [Y1], SIZE
538	(p12) LDFD	f32  = [X1], SIZE
539	mov	ar.lc = ARLC
540	}
541	;;
542	{ .mmi
543	(p12) LDFD	f41  = [Y1], INCY
544	(p12) LDFD	f33  = [X1], INCX
545	mov	pr = PR, -65474
546	}
547	;;
548	{ .mmb
549	(p12) LDFD	f42  = [Y1], SIZE
550	cmp.eq	p7, p0  =   r0, J
551	(p7) br.ret.sptk.many b0
552	}
553	;;
554	{ .mmf
555	(p12) LDFD	f43  = [Y1], INCY
556	nop   __LINE__
557	(p12) FMPY	f6   = S, f40
558	}
559	;;
560	{ .mmf
561	(p12) LDFD	f34  = [X1], SIZE
562	nop   __LINE__
563	(p12) FMPY	f7   = C, f40
564	}
565	;;
566	{ .mmf
567	(p12) LDFD	f44  = [Y1], SIZE
568	nop   __LINE__
569	(p12) FMPY	f10  = S, f41
570	}
571	;;
572	{ .mmf
573	(p12) LDFD	f35  = [X1], INCX
574	nop   __LINE__
575	(p12) FMPY	f11  = C, f41
576	}
577	;;
578	{ .mmf
579	(p12) LDFD	f45  = [Y1], INCY
580	nop   __LINE__
581	(p12) FMPY	f12  = S, f42
582	}
583	{ .mmf
584	nop   __LINE__
585	nop   __LINE__
586	(p12) FMA	f6   = C, f32, f6
587	}
588	;;
589	{ .mmf
590	(p12) LDFD	f36  = [X1], SIZE
591	nop   __LINE__
592	(p12) FMPY	f13  = C, f42
593	}
594	{ .mmf
595	nop   __LINE__
596	nop   __LINE__
597	(p12) FNMA	f7   = S, f32, f7
598	}
599	;;
600	{ .mmf
601	(p12) LDFD	f46  = [Y1], SIZE
602	nop   __LINE__
603	(p12) FMPY	f14  = S, f43
604	}
605	{ .mmf
606	nop   __LINE__
607	nop   __LINE__
608	(p12) FMA	f10  = C, f33, f10
609	}
610	;;
611	{ .mmf
612	(p12) LDFD	f37  = [X1], INCX
613	nop   __LINE__
614	(p12) FMPY	f15  = C, f43
615	}
616	{ .mmf
617	nop   __LINE__
618	nop   __LINE__
619	(p12) FNMA	f11  = S, f33, f11
620	}
621	;;
622	{ .mmf
623	(p12) STFD	[X2] = f6, SIZE
624	(p12) LDFD	f47  = [Y1], INCY
625	(p12) FMA	f12  = C, f34, f12
626	}
627	{ .mfi
628	nop   __LINE__
629	(p12) FMPY	f6   = S, f44
630	tbit.z	p0, p13 = N, 1
631	}
632	;;
633	{ .mmf
634	(p12) STFD	[Y2] = f7, SIZE
635	(p12) LDFD	f38  = [X1], SIZE
636	(p12) FNMA	f13  = S, f34, f13
637	}
638	{ .mmf
639	nop   __LINE__
640	nop   __LINE__
641	(p12) FMPY	f7   = C, f44
642	}
643	;;
644	{ .mmf
645	(p12) STFD	[X2] = f10
646	(p13) LDFD	f52  = [Y1], SIZE
647	(p12) FMA	f14  = C, f35, f14
648	}
649	{ .mmf
650	(p12) add	X2 = X2, INCX
651	nop   __LINE__
652	(p12) FMPY	f10  = S, f45
653	}
654	;;
655	{ .mmf
656	(p12) STFD	[Y2] = f11
657	(p12) LDFD	f39  = [X1], INCX
658	(p12) FNMA	f15  = S, f35, f15
659	}
660	{ .mmf
661	(p12) add	Y2 = Y2, INCY
662	nop   __LINE__
663	(p12) FMPY	f11  = C, f45
664	}
665	;;
666	{ .mmf
667	(p12) STFD	[X2] = f12, SIZE
668	(p13) LDFD	f53  = [Y1], INCY
669	(p12) FMPY	f12  = S, f46
670	}
671	{ .mmf
672	nop	__LINE__
673	nop   __LINE__
674	(p12) FMA	f6   = C, f36, f6
675	}
676	;;
677	{ .mmf
678	(p12) STFD	[Y2] = f13, SIZE
679	(p13) LDFD	f48  = [X1], SIZE
680	(p12) FMPY	f13  = C, f46
681	}
682	{ .mmf
683	nop   __LINE__
684	nop   __LINE__
685	(p12) FNMA	f7   = S, f36, f7
686	}
687	;;
688	{ .mmf
689	(p12) STFD	[X2] = f14
690	(p13) LDFD	f54  = [Y1], SIZE
691	(p12) FMPY	f14  = S, f47
692	}
693	{ .mmf
694	(p12) add	X2 = X2, INCX
695	nop   __LINE__
696	(p12) FMA	f10  = C, f37, f10
697	}
698	;;
699	{ .mmf
700	(p12) STFD	[Y2] = f15
701	(p13) LDFD	f49  = [X1], INCX
702	(p12) FMPY	f15  = C, f47
703	}
704	{ .mfi
705	(p12) add	Y2 = Y2, INCY
706	(p12) FNMA	f11  = S, f37, f11
707	tbit.z	p0, p14 = N, 0
708	}
709	;;
710	{ .mmf
711	(p12) STFD	[X2] = f6, SIZE
712	(p13) LDFD	f55  = [Y1], INCY
713	(p12) FMA	f12  = C, f38, f12
714	}
715	{ .mmf
716	nop   __LINE__
717	nop   __LINE__
718	(p13) FMPY	f6   = S, f52
719	}
720	;;
721	{ .mmf
722	(p12) STFD	[Y2] = f7, SIZE
723	(p13) LDFD	f50  = [X1], SIZE
724	(p12) FNMA	f13  = S, f38, f13
725	}
726	{ .mmf
727	nop   __LINE__
728	nop   __LINE__
729	(p13) FMPY	f7   = C, f52
730	}
731	;;
732	{ .mmf
733	(p12) STFD	[X2] = f10
734	(p14) LDFD	f58  = [Y1], SIZE
735	(p12) FMA	f14  = C, f39, f14
736	}
737	{ .mmf
738	(p12) add	X2 = X2, INCX
739	nop   __LINE__
740	(p13) FMPY	f10  = S, f53
741	}
742	;;
743	{ .mmf
744	(p12) STFD	[Y2] = f11
745	(p13) LDFD	f51  = [X1], INCX
746	(p12) FNMA	f15  = S, f39, f15
747	}
748	{ .mmf
749	(p12) add	Y2 = Y2, INCY
750	nop   __LINE__
751	(p13) FMPY	f11  = C, f53
752	}
753	;;
754	{ .mmf
755	(p12) STFD	[X2] = f12, SIZE
756	(p14) LDFD	f59  = [Y1], INCY
757	(p13) FMPY	f12  = S, f54
758	}
759	{ .mmf
760	nop   __LINE__
761	nop   __LINE__
762	(p13) FMA	f6   = C, f48, f6
763	}
764	;;
765	{ .mmf
766	(p12) STFD	[Y2] = f13, SIZE
767	(p14) LDFD	f56  = [X1], SIZE
768	(p13) FMPY	f13  = C, f54
769	}
770	{ .mmf
771	nop   __LINE__
772	nop   __LINE__
773	(p13) FNMA	f7   = S, f48, f7
774	}
775	;;
776	{ .mmf
777	(p12) STFD	[X2] = f14
778	(p12) add	X2 = X2, INCX
779	(p13) FMPY	f14  = S, f55
780	}
781	{ .mmf
782	nop   __LINE__
783	nop   __LINE__
784	(p13) FMA	f10  = C, f49, f10
785	}
786	;;
787	{ .mmf
788	(p12) STFD	[Y2] = f15
789	(p14) LDFD	f57  = [X1], INCX
790	(p13) FMPY	f15  = C, f55
791	}
792	{ .mmf
793	(p12) add	Y2 = Y2, INCY
794	nop   __LINE__
795	(p13) FNMA	f11  = S, f49, f11
796	}
797	;;
798	{ .mmf
799	(p13) STFD	[X2] = f6, SIZE
800	nop   __LINE__
801	(p13) FMA	f12  = C, f50, f12
802	}
803	{ .mmf
804	nop   __LINE__
805	nop   __LINE__
806	(p14) FMPY	f6   = S, f58
807	}
808	;;
809	{ .mmf
810	(p13) STFD	[Y2] = f7, SIZE
811	nop   __LINE__
812	(p13) FNMA	f13  = S, f50, f13
813	}
814	{ .mmf
815	nop   __LINE__
816	nop   __LINE__
817	(p14) FMPY	f7   = C, f58
818	}
819	;;
820	{ .mmf
821	(p13) STFD	[X2] = f10
822	(p13) add	X2 = X2, INCX
823	(p13) FMA	f14  = C, f51, f14
824	}
825	{ .mmf
826	nop   __LINE__
827	nop   __LINE__
828	(p14) FMPY	f10  = S, f59
829	}
830	;;
831	{ .mmf
832	(p13) STFD	[Y2] = f11
833	(p13) add	Y2 = Y2, INCY
834	(p13) FNMA	f15  = S, f51, f15
835	}
836	{ .mmf
837	nop   __LINE__
838	nop   __LINE__
839	(p14) FMPY	f11  = C, f59
840	}
841	;;
842	{ .mmf
843	(p13) STFD	[X2] = f12, SIZE
844	nop   __LINE__
845	(p14) FMA	f6   = C, f56, f6
846	}
847	;;
848	{ .mmf
849	(p13) STFD	[Y2] = f13, SIZE
850	nop   __LINE__
851	(p14) FNMA	f7   = S, f56, f7
852	}
853	;;
854	{ .mmf
855	(p13) STFD	[X2] = f14
856	(p13) add	X2 = X2, INCX
857	(p14) FMA	f10  = C, f57, f10
858	}
859	;;
860	{ .mmf
861	(p13) STFD	[Y2] = f15
862	(p13) add	Y2 = Y2, INCY
863	(p14) FNMA	f11  = S, f57, f11
864	}
865	;;
866	{ .mmi
867	(p14) STFD	[X2] = f6, SIZE
868	(p14) STFD	[Y2] = f7, SIZE
869	nop   __LINE__
870	}
871	;;
872	{ .mmb
873	(p14) STFD	[X2] = f10
874	(p14) STFD	[Y2] = f11
875	br.ret.sptk.many b0
876	}
877	;;
878	EPILOGUE
879
880