1/*********************************************************************/
2/* Copyright 2009, 2010 The University of Texas at Austin.           */
3/* All rights reserved.                                              */
4/*                                                                   */
5/* Redistribution and use in source and binary forms, with or        */
6/* without modification, are permitted provided that the following   */
7/* conditions are met:                                               */
8/*                                                                   */
9/*   1. Redistributions of source code must retain the above         */
10/*      copyright notice, this list of conditions and the following  */
11/*      disclaimer.                                                  */
12/*                                                                   */
13/*   2. Redistributions in binary form must reproduce the above      */
14/*      copyright notice, this list of conditions and the following  */
15/*      disclaimer in the documentation and/or other materials       */
16/*      provided with the distribution.                              */
17/*                                                                   */
18/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
19/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
20/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
21/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
22/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
23/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
24/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
25/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
26/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
27/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
28/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
29/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
30/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
31/*    POSSIBILITY OF SUCH DAMAGE.                                    */
32/*                                                                   */
33/* The views and conclusions contained in the software and           */
34/* documentation are those of the authors and should not be          */
35/* interpreted as representing official policies, either expressed   */
36/* or implied, of The University of Texas at Austin.                 */
37/*********************************************************************/
38
39#define ASSEMBLER
40#include "common.h"
41
42#define N	%i0
43#define X	%i1
44#define INCX	%i2
45#define Y	%i3
46#define INCY	%i4
47#define I	%i5
48
49#ifdef DOUBLE
50#define a1	%f0
51#define a2	%f2
52#define a3	%f4
53#define a4	%f6
54#define a5	%f8
55#define a6	%f10
56#define a7	%f12
57#define a8	%f14
58#define a9	%f16
59#define a10	%f18
60#define a11	%f20
61#define a12	%f22
62#define a13	%f24
63#define a14	%f26
64#define a15	%f28
65#define a16	%f30
66#else
67#define a1	%f0
68#define a2	%f1
69#define a3	%f2
70#define a4	%f3
71#define a5	%f4
72#define a6	%f5
73#define a7	%f6
74#define a8	%f7
75#define a9	%f8
76#define a10	%f9
77#define a11	%f10
78#define a12	%f11
79#define a13	%f12
80#define a14	%f13
81#define a15	%f14
82#define a16	%f15
83#endif
84
85	PROLOGUE
86	SAVESP
87
88	sll	INCX, BASE_SHIFT, INCX
89	sll	INCY, BASE_SHIFT, INCY
90
91	cmp	INCX, SIZE
92	bne	.LL50
93	nop
94	cmp	INCY, SIZE
95	bne	.LL50
96	nop
97
98	sra	N, 3, I
99	cmp	I, 0
100	ble,pn	%icc, .LL15
101	nop
102
103#define PREFETCHSIZE 32
104
105.LL11:
106	LDF	[X +  0 * SIZE], a1
107	prefetch [X  + PREFETCHSIZE * SIZE], 0
108	LDF	[X +  1 * SIZE], a2
109	LDF	[X +  2 * SIZE], a3
110	LDF	[X +  3 * SIZE], a4
111	LDF	[X +  4 * SIZE], a5
112	LDF	[X +  5 * SIZE], a6
113	LDF	[X +  6 * SIZE], a7
114	LDF	[X +  7 * SIZE], a8
115
116	STF	a1, [Y +  0 * SIZE]
117	prefetch [Y  + PREFETCHSIZE * SIZE], 0
118	STF	a2, [Y +  1 * SIZE]
119	STF	a3, [Y +  2 * SIZE]
120	STF	a4, [Y +  3 * SIZE]
121	STF	a5, [Y +  4 * SIZE]
122	STF	a6, [Y +  5 * SIZE]
123	STF	a7, [Y +  6 * SIZE]
124	STF	a8, [Y +  7 * SIZE]
125
126	add	I, -1, I
127	cmp	I, 0
128	add	Y,  8 * SIZE, Y
129	add	X,  8 * SIZE, X
130
131	bg,pt	%icc, .LL11
132	nop
133
134
135.LL15:
136	and	N,  7, I
137	cmp	I,  0
138	ble,a,pn %icc, .LL19
139	nop
140
141.LL16:
142	LDF	[X +  0 * SIZE], a1
143	add	I, -1, I
144	cmp	I, 0
145	add	X, 1 * SIZE, X
146	STF	a1, [Y +  0 * SIZE]
147	bg,pt	%icc, .LL16
148	add	Y, 1 * SIZE, Y
149
150.LL19:
151	return	%i7 + 8
152	clr	%g0
153
154.LL50:
155	sra	N, 3, I
156	cmp	I, 0
157	ble,pn	%icc, .LL55
158	nop
159
160.LL51:
161	LDF	[X +  0 * SIZE], a1
162	add	X, INCX, X
163	LDF	[X +  0 * SIZE], a2
164	add	X, INCX, X
165	LDF	[X +  0 * SIZE], a3
166	add	X, INCX, X
167	LDF	[X +  0 * SIZE], a4
168	add	X, INCX, X
169	LDF	[X +  0 * SIZE], a5
170	add	X, INCX, X
171	LDF	[X +  0 * SIZE], a6
172	add	X, INCX, X
173	LDF	[X +  0 * SIZE], a7
174	add	X, INCX, X
175	LDF	[X +  0 * SIZE], a8
176	add	X, INCX, X
177
178	STF	a1, [Y +  0 * SIZE]
179	add	Y, INCY, Y
180	add	I, -1, I
181	STF	a2, [Y +  0 * SIZE]
182	add	Y, INCY, Y
183	cmp	I, 0
184	STF	a3, [Y +  0 * SIZE]
185	add	Y, INCY, Y
186	STF	a4, [Y +  0 * SIZE]
187	add	Y, INCY, Y
188	STF	a5, [Y +  0 * SIZE]
189	add	Y, INCY, Y
190	STF	a6, [Y +  0 * SIZE]
191	add	Y, INCY, Y
192	STF	a7, [Y +  0 * SIZE]
193	add	Y, INCY, Y
194	STF	a8, [Y +  0 * SIZE]
195
196	bg,pt	%icc, .LL51
197	add	Y, INCY, Y
198
199.LL55:
200	and	N, 7, I
201	cmp	I,  0
202	ble,a,pn %icc, .LL59
203	nop
204
205.LL56:
206	LDF	[X +  0 * SIZE], a1
207	add	I, -1, I
208	cmp	I, 0
209	add	X, INCX, X
210	STF	a1, [Y +  0 * SIZE]
211	bg,pt	%icc, .LL56
212	add	Y, INCY, Y
213
214.LL59:
215	return	%i7 + 8
216	clr	%o0
217
218	EPILOGUE
219