1/*********************************************************************/
2/* Copyright 2009, 2010 The University of Texas at Austin.           */
3/* All rights reserved.                                              */
4/*                                                                   */
5/* Redistribution and use in source and binary forms, with or        */
6/* without modification, are permitted provided that the following   */
7/* conditions are met:                                               */
8/*                                                                   */
9/*   1. Redistributions of source code must retain the above         */
10/*      copyright notice, this list of conditions and the following  */
11/*      disclaimer.                                                  */
12/*                                                                   */
13/*   2. Redistributions in binary form must reproduce the above      */
14/*      copyright notice, this list of conditions and the following  */
15/*      disclaimer in the documentation and/or other materials       */
16/*      provided with the distribution.                              */
17/*                                                                   */
18/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
19/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
20/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
21/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
22/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
23/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
24/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
25/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
26/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
27/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
28/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
29/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
30/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
31/*    POSSIBILITY OF SUCH DAMAGE.                                    */
32/*                                                                   */
33/* The views and conclusions contained in the software and           */
34/* documentation are those of the authors and should not be          */
35/* interpreted as representing official policies, either expressed   */
36/* or implied, of The University of Texas at Austin.                 */
37/*********************************************************************/
38
39#define ASSEMBLER
40#include "common.h"
41
42#define N	%i0
43#define X	%i1
44#define INCX	%i2
45#define Y	%i3
46#define INCY	%i4
47#define I	%i5
48
49#ifdef DOUBLE
50#define a1	%f0
51#define a2	%f2
52#define a3	%f4
53#define a4	%f6
54#define a5	%f8
55#define a6	%f10
56#define a7	%f12
57#define a8	%f14
58#else
59#define a1	%f0
60#define a2	%f1
61#define a3	%f2
62#define a4	%f3
63#define a5	%f4
64#define a6	%f5
65#define a7	%f6
66#define a8	%f7
67#endif
68
69	PROLOGUE
70	SAVESP
71
72	sll	INCX, ZBASE_SHIFT, INCX
73	sll	INCY, ZBASE_SHIFT, INCY
74
75	cmp	INCX, 2 * SIZE
76	bne	.LL50
77	nop
78	cmp	INCY, 2 * SIZE
79	bne	.LL50
80	nop
81
82	sra	N, 2, I
83	cmp	I, 0
84	ble,pn	%icc, .LL15
85	nop
86
87#define PREFETCHSIZE 32
88
89.LL11:
90	prefetch [X  + PREFETCHSIZE * SIZE], 0
91	prefetch [Y  + PREFETCHSIZE * SIZE], 0
92
93	LDF	[X +  0 * SIZE], a1
94	LDF	[X +  1 * SIZE], a2
95	LDF	[X +  2 * SIZE], a3
96	LDF	[X +  3 * SIZE], a4
97	LDF	[X +  4 * SIZE], a5
98	LDF	[X +  5 * SIZE], a6
99	LDF	[X +  6 * SIZE], a7
100	LDF	[X +  7 * SIZE], a8
101
102	STF	a1, [Y +  0 * SIZE]
103	add	I, -1, I
104	STF	a2, [Y +  1 * SIZE]
105	cmp	I, 0
106	STF	a3, [Y +  2 * SIZE]
107	add	X, 8 * SIZE, X
108	STF	a4, [Y +  3 * SIZE]
109	STF	a5, [Y +  4 * SIZE]
110	STF	a6, [Y +  5 * SIZE]
111	STF	a7, [Y +  6 * SIZE]
112	STF	a8, [Y +  7 * SIZE]
113
114	bg,pt	%icc, .LL11
115	add	Y, 8 * SIZE, Y
116
117.LL15:
118	and	N, 3, I
119	cmp	I,  0
120	ble,a,pn %icc, .LL19
121	nop
122
123.LL16:
124	LDF	[X +  0 * SIZE], a1
125	LDF	[X +  1 * SIZE], a2
126	add	I, -1, I
127	cmp	I, 0
128	STF	a1, [Y +  0 * SIZE]
129	add	X, 2 * SIZE, X
130	STF	a2, [Y +  1 * SIZE]
131	bg,pt	%icc, .LL16
132	add	Y, 2 * SIZE, Y
133
134.LL19:
135	return	%i7 + 8
136	clr	%g0
137
138.LL50:
139	sra	N, 2, I
140	cmp	I, 0
141	ble,pn	%icc, .LL55
142	nop
143
144.LL51:
145	LDF	[X +  0 * SIZE], a1
146	LDF	[X +  1 * SIZE], a2
147	add	X, INCX, X
148	LDF	[X +  0 * SIZE], a3
149	LDF	[X +  1 * SIZE], a4
150	add	X, INCX, X
151	LDF	[X +  0 * SIZE], a5
152	LDF	[X +  1 * SIZE], a6
153	add	X, INCX, X
154	LDF	[X +  0 * SIZE], a7
155	LDF	[X +  1 * SIZE], a8
156	add	X, INCX, X
157
158	STF	a1, [Y +  0 * SIZE]
159	add	I, -1, I
160	STF	a2, [Y +  1 * SIZE]
161	add	Y, INCY, Y
162	cmp	I, 0
163	STF	a3, [Y +  0 * SIZE]
164	STF	a4, [Y +  1 * SIZE]
165	add	Y, INCY, Y
166	STF	a5, [Y +  0 * SIZE]
167	STF	a6, [Y +  1 * SIZE]
168	add	Y, INCY, Y
169	STF	a7, [Y +  0 * SIZE]
170	STF	a8, [Y +  1 * SIZE]
171
172	bg,pt	%icc, .LL51
173	add	Y, INCY, Y
174
175.LL55:
176	and	N, 3, I
177	cmp	I,  0
178	ble,a,pn %icc, .LL59
179	nop
180
181.LL56:
182	LDF	[X +  0 * SIZE], a1
183	LDF	[X +  1 * SIZE], a2
184	add	I, -1, I
185	cmp	I, 0
186	add	X, INCX, X
187	STF	a1, [Y +  0 * SIZE]
188	STF	a2, [Y +  1 * SIZE]
189	bg,pt	%icc, .LL56
190	add	Y, INCY, Y
191
192.LL59:
193	return	%i7 + 8
194	clr	%o0
195
196	EPILOGUE
197