1/*********************************************************************/
2/* Copyright 2009, 2010 The University of Texas at Austin.           */
3/* All rights reserved.                                              */
4/*                                                                   */
5/* Redistribution and use in source and binary forms, with or        */
6/* without modification, are permitted provided that the following   */
7/* conditions are met:                                               */
8/*                                                                   */
9/*   1. Redistributions of source code must retain the above         */
10/*      copyright notice, this list of conditions and the following  */
11/*      disclaimer.                                                  */
12/*                                                                   */
13/*   2. Redistributions in binary form must reproduce the above      */
14/*      copyright notice, this list of conditions and the following  */
15/*      disclaimer in the documentation and/or other materials       */
16/*      provided with the distribution.                              */
17/*                                                                   */
18/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
19/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
20/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
21/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
22/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
23/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
24/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
25/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
26/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
27/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
28/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
29/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
30/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
31/*    POSSIBILITY OF SUCH DAMAGE.                                    */
32/*                                                                   */
33/* The views and conclusions contained in the software and           */
34/* documentation are those of the authors and should not be          */
35/* interpreted as representing official policies, either expressed   */
36/* or implied, of The University of Texas at Austin.                 */
37/*********************************************************************/
38
39#define ASSEMBLER
40#include "common.h"
41
42#define N	r3
43#define X	r4
44#define INCX	r5
45#define PREX	r6
46
47#define	ATTR	r7
48
49#define FZERO	f0
50
51#define STACKSIZE 160
52
53	PROLOGUE
54	PROFCODE
55
56	addi	SP, SP, -STACKSIZE
57	li	r0,   0
58
59	stfd	f14,    0(SP)
60	stfd	f15,    8(SP)
61	stfd	f16,   16(SP)
62	stfd	f17,   24(SP)
63
64	stfd	f18,   32(SP)
65	stfd	f19,   40(SP)
66	stfd	f20,   48(SP)
67	stfd	f21,   56(SP)
68
69	stfd	f22,   64(SP)
70	stfd	f23,   72(SP)
71	stfd	f24,   80(SP)
72	stfd	f25,   88(SP)
73
74	stfd	f26,   96(SP)
75	stfd	f27,  104(SP)
76	stfd	f28,  112(SP)
77	stfd	f29,  120(SP)
78
79	stfd	f30,  128(SP)
80	stfd	f31,  136(SP)
81
82	stw	r0,   144(SP)
83	lfs	FZERO,144(SP)
84
85#ifdef F_INTERFACE
86	LDINT	N,    0(N)
87	LDINT	INCX, 0(INCX)
88#endif
89
90	slwi	INCX, INCX, BASE_SHIFT
91	fmr	f1,  FZERO
92	li	PREX, 3 * 16 * SIZE
93	fmr	f2,  FZERO
94	sub	X, X, INCX
95	fmr	f3,  FZERO
96	fmr	f4,  FZERO
97	fmr	f5,  FZERO
98	fmr	f6,  FZERO
99	cmpwi	cr0, N, 0
100	fmr	f7,  FZERO
101	ble-	LL(999)
102
103	cmpwi	cr0, INCX, 0
104	ble-	LL(999)
105
106	srawi.	r0, N, 4
107	mtspr	CTR,  r0
108	beq-	LL(150)
109
110	LFDUX	f8,    X, INCX
111	LFDUX	f9,    X, INCX
112	LFDUX	f10,   X, INCX
113	LFDUX	f11,   X, INCX
114	LFDUX	f12,   X, INCX
115	LFDUX	f13,   X, INCX
116	LFDUX	f14,   X, INCX
117	LFDUX	f15,   X, INCX
118	fabs	f16, f8
119
120	LFDUX	f24,   X, INCX
121	fabs	f17, f9
122	LFDUX	f25,   X, INCX
123	fabs	f18, f10
124	LFDUX	f26,   X, INCX
125	fabs	f19, f11
126	LFDUX	f27,   X, INCX
127 	fabs	f20, f12
128	LFDUX	f28,   X, INCX
129	fabs	f21, f13
130	LFDUX	f29,   X, INCX
131	fabs	f22, f14
132	LFDUX	f30,   X, INCX
133	fabs	f23, f15
134	LFDUX	f31,   X, INCX
135	bdz	LL(120)
136	.align 4
137
138LL(110):
139	LFDUX	f8,    X, INCX
140	FADD	f0, f0, f16
141#ifdef PPCG4
142	dcbt	X, PREX
143#else
144	nop
145#endif
146	fabs	f16, f24
147
148	LFDUX	f9,    X, INCX
149	FADD	f1, f1, f17
150	nop
151	fabs	f17, f25
152
153	LFDUX	f10,   X, INCX
154	FADD	f2, f2, f18
155	nop
156	fabs	f18, f26
157	LFDUX	f11,   X, INCX
158	FADD	f3, f3, f19
159	nop
160	fabs	f19, f27
161
162	LFDUX	f12,   X, INCX
163	FADD	f4, f4, f20
164#if defined(PPCG4) && defined(DOUBLE)
165	dcbt	X, PREX
166#else
167	nop
168#endif
169	fabs	f20, f28
170
171	LFDUX	f13,   X, INCX
172	FADD	f5, f5, f21
173	nop
174	fabs	f21, f29
175
176	LFDUX	f14,   X, INCX
177	FADD	f6, f6, f22
178	nop
179	fabs	f22, f30
180	LFDUX	f15,   X, INCX
181	FADD	f7, f7, f23
182	nop
183	fabs	f23, f31
184
185	LFDUX	f24,   X, INCX
186	FADD	f0, f0, f16
187#ifdef PPCG4
188	dcbt	X, PREX
189#else
190	nop
191#endif
192	fabs	f16, f8
193	LFDUX	f25,   X, INCX
194	FADD	f1, f1, f17
195	nop
196	fabs	f17, f9
197
198	LFDUX	f26,   X, INCX
199	FADD	f2, f2, f18
200	nop
201	fabs	f18, f10
202	LFDUX	f27,   X, INCX
203	FADD	f3, f3, f19
204	nop
205	fabs	f19, f11
206
207	LFDUX	f28,   X, INCX
208	FADD	f4, f4, f20
209#if defined(PPCG4) && defined(DOUBLE)
210	dcbt	X, PREX
211#else
212	nop
213#endif
214	fabs	f20, f12
215
216	LFDUX	f29,   X, INCX
217	FADD	f5, f5, f21
218	nop
219	fabs	f21, f13
220
221	LFDUX	f30,   X, INCX
222	FADD	f6, f6, f22
223	nop
224	fabs	f22, f14
225
226	LFDUX	f31,   X, INCX
227	FADD	f7, f7, f23
228	fabs	f23, f15
229	bdnz	LL(110)
230	.align 4
231
232LL(120):
233	FADD	f0, f0, f16
234	fabs	f16, f24
235	FADD	f1, f1, f17
236	fabs	f17, f25
237
238	FADD	f2, f2, f18
239	fabs	f18, f26
240	FADD	f3, f3, f19
241	fabs	f19, f27
242
243	FADD	f4, f4, f20
244	fabs	f20, f28
245	FADD	f5, f5, f21
246	fabs	f21, f29
247
248	FADD	f6, f6, f22
249	fabs	f22, f30
250	FADD	f7, f7, f23
251	fabs	f23, f31
252
253	FADD	f0, f0, f16
254	FADD	f1, f1, f17
255	FADD	f2, f2, f18
256	FADD	f3, f3, f19
257
258	FADD	f4, f4, f20
259	FADD	f5, f5, f21
260	FADD	f6, f6, f22
261	FADD	f7, f7, f23
262	.align 4
263
264LL(150):
265	andi.	r0,  N, 15
266	mtspr	CTR, r0
267	beq	LL(999)
268	.align 4
269
270LL(160):
271	LFDUX	f8,    X, INCX
272	fabs	f8, f8
273	FADD	f0,  f0, f8
274	bdnz	LL(160)
275	.align 4
276
277LL(999):
278	FADD	f0,  f0,  f1
279	FADD	f2,  f2,  f3
280	FADD	f4,  f4,  f5
281	FADD	f6,  f6,  f7
282
283	FADD	f0,  f0,  f2
284	FADD	f4,  f4,  f6
285	FADD	f1,  f0,  f4
286
287	lfd	f14,    0(SP)
288	lfd	f15,    8(SP)
289	lfd	f16,   16(SP)
290	lfd	f17,   24(SP)
291
292	lfd	f18,   32(SP)
293	lfd	f19,   40(SP)
294	lfd	f20,   48(SP)
295	lfd	f21,   56(SP)
296
297	lfd	f22,   64(SP)
298	lfd	f23,   72(SP)
299	lfd	f24,   80(SP)
300	lfd	f25,   88(SP)
301
302	lfd	f26,   96(SP)
303	lfd	f27,  104(SP)
304	lfd	f28,  112(SP)
305	lfd	f29,  120(SP)
306
307	lfd	f30,  128(SP)
308	lfd	f31,  136(SP)
309
310	addi	SP, SP, STACKSIZE
311	blr
312
313	EPILOGUE
314