1/*********************************************************************/
2/* Copyright 2009, 2010 The University of Texas at Austin.           */
3/* All rights reserved.                                              */
4/*                                                                   */
5/* Redistribution and use in source and binary forms, with or        */
6/* without modification, are permitted provided that the following   */
7/* conditions are met:                                               */
8/*                                                                   */
9/*   1. Redistributions of source code must retain the above         */
10/*      copyright notice, this list of conditions and the following  */
11/*      disclaimer.                                                  */
12/*                                                                   */
13/*   2. Redistributions in binary form must reproduce the above      */
14/*      copyright notice, this list of conditions and the following  */
15/*      disclaimer in the documentation and/or other materials       */
16/*      provided with the distribution.                              */
17/*                                                                   */
18/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
19/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
20/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
21/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
22/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
23/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
24/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
25/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
26/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
27/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
28/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
29/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
30/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
31/*    POSSIBILITY OF SUCH DAMAGE.                                    */
32/*                                                                   */
33/* The views and conclusions contained in the software and           */
34/* documentation are those of the authors and should not be          */
35/* interpreted as representing official policies, either expressed   */
36/* or implied, of The University of Texas at Austin.                 */
37/*********************************************************************/
38
39#define ASSEMBLER
40#include "common.h"
41
42#define N	r3
43#define X	r4
44#define INCX	r5
45
46#define INCX2	r6
47
48#define C1	f1
49#define C2	f0
50#define C3	f2
51#define C4	f3
52
53#define A1	f4
54#define A2	f5
55#define A3	f6
56#define A4	f7
57#define A5	f8
58#define A6	f9
59#define A7	f10
60#define A8	f11
61
62#define F1	f12
63#define F2	f13
64#define F3	f14
65#define F4	f15
66
67#define T1	f16
68#define T2	f17
69#define T3	f18
70#define T4	f19
71
72#define B1	f20
73#define B2	f21
74#define B3	f22
75#define B4	f23
76#define B5	f24
77#define B6	f25
78#define B7	f26
79#define B8	f27
80
81
82	PROLOGUE
83	PROFCODE
84
85	li	r10, -16
86
87	stfpdux	f14, SP, r10
88	stfpdux	f15, SP, r10
89
90	stfpdux	f16, SP, r10
91	stfpdux	f17, SP, r10
92	stfpdux	f18, SP, r10
93	stfpdux	f19, SP, r10
94
95	stfpdux	f20, SP, r10
96	stfpdux	f21, SP, r10
97	stfpdux	f22, SP, r10
98	stfpdux	f23, SP, r10
99
100	stfpdux	f24, SP, r10
101	stfpdux	f25, SP, r10
102	stfpdux	f26, SP, r10
103	stfpdux	f27, SP, r10
104
105	li	r10,   0
106	stwu	r10,   -4(SP)
107	stwu	r10,   -4(SP)
108	stwu	r10,   -4(SP)
109	stwu	r10,   -4(SP)
110
111#ifdef F_INTERFACE
112	LDINT	N,    0(N)
113	LDINT	INCX, 0(INCX)
114#endif
115
116	lfpdx	C1, SP, r10		# Zero clear
117
118	slwi	INCX,  INCX, BASE_SHIFT
119	add	INCX2, INCX, INCX
120
121	cmpwi	cr0, N, 0
122	ble	LL(999)
123	cmpwi	cr0, INCX, 0
124	ble	LL(999)
125
126	LFD	A1, 0 * SIZE(X)
127	LFD	A2, 1 * SIZE(X)
128	add	X, X, INCX2
129
130	fabs	A1, A1
131	fabs	A2, A2
132
133	addi	N, N, -1
134	cmpwi	cr0, N, 0
135	fadd	C1, A1, A2
136	ble	LL(999)
137
138	subi	INCX2, INCX2, SIZE
139	fsmfp	C1, C1
140	li	INCX, SIZE
141	fpmr	C2, C1
142	sub	X,  X, INCX2
143	fpmr	C3, C1
144	srawi.	r0, N, 3
145	fpmr	C4, C1
146	mtspr	CTR,  r0
147	beq-	LL(105)
148
149	LFDUX	A1,   X, INCX2
150	LFDUX	A2,   X, INCX
151	LFDUX	A3,   X, INCX2
152	LFDUX	A4,   X, INCX
153
154	LFSDUX	A1,   X, INCX2
155	LFSDUX	A2,   X, INCX
156	LFSDUX	A3,   X, INCX2
157	LFSDUX	A4,   X, INCX
158
159	LFDUX	A5,   X, INCX2
160	LFDUX	A6,   X, INCX
161	LFDUX	A7,   X, INCX2
162	LFDUX	A8,   X, INCX
163
164	LFSDUX	A5,   X, INCX2
165	LFSDUX	A6,   X, INCX
166	LFSDUX	A7,   X, INCX2
167	LFSDUX	A8,   X, INCX
168	bdz	LL(103)
169	.align 4
170
171LL(102):
172	fpabs	B1, A1
173	LFDUX	A1,   X, INCX2
174	fpabs	B2, A2
175	LFDUX	A2,   X, INCX
176	fpabs	B3, A3
177	LFDUX	A3,   X, INCX2
178	fpabs	B4, A4
179	LFDUX	A4,   X, INCX
180
181	fpabs	B5, A5
182	LFSDUX	A1,   X, INCX2
183	fpabs	B6, A6
184	LFSDUX	A2,   X, INCX
185	fpabs	B7, A7
186	LFSDUX	A3,   X, INCX2
187	fpabs	B8, A8
188	LFSDUX	A4,   X, INCX
189
190	fpadd	T1, B1, B2
191	LFDUX	A5,   X, INCX2
192	fpadd	T2, B3, B4
193	LFDUX	A6,   X, INCX
194	fpadd	T3, B5, B6
195	LFDUX	A7,   X, INCX2
196	fpadd	T4, B7, B8
197	LFDUX	A8,   X, INCX
198
199	fpsub	F1, T1, C1
200	LFSDUX	A5,   X, INCX2
201	fpsub	F2, T2, C2
202	LFSDUX	A6,   X, INCX
203	fpsub	F3, T3, C3
204	LFSDUX	A7,   X, INCX2
205	fpsub	F4, T4, C4
206	LFSDUX	A8,   X, INCX
207
208	fpsel	C1, F1, C1, T1
209	fpsel	C2, F2, C2, T2
210	fpsel	C3, F3, C3, T3
211	fpsel	C4, F4, C4, T4
212	bdnz	LL(102)
213	.align 4
214
215LL(103):
216	fpabs	B1, A1
217	fpabs	B2, A2
218	fpabs	B3, A3
219	fpabs	B4, A4
220
221	fpabs	B5, A5
222	fpabs	B6, A6
223	fpabs	B7, A7
224	fpabs	B8, A8
225
226	fpadd	T1, B1, B2
227	fpadd	T2, B3, B4
228	fpadd	T3, B5, B6
229	fpadd	T4, B7, B8
230
231	fpsub	F1, T1, C1
232	fpsub	F2, T2, C2
233	fpsub	F3, T3, C3
234	fpsub	F4, T4, C4
235
236	fpsel	C1, F1, C1, T1
237	fpsel	C2, F2, C2, T2
238	fpsel	C3, F3, C3, T3
239	fpsel	C4, F4, C4, T4
240	.align 4
241
242LL(105):
243	andi.	r0,  N, 7
244	beq	LL(998)
245
246	andi.	r0,  N, 4
247	beq	LL(106)
248
249	LFDUX	A1,   X, INCX2
250	LFDUX	A2,   X, INCX
251	LFDUX	A3,   X, INCX2
252	LFDUX	A4,   X, INCX
253
254	LFSDUX	A1,   X, INCX2
255	LFSDUX	A2,   X, INCX
256	LFSDUX	A3,   X, INCX2
257	LFSDUX	A4,   X, INCX
258
259	fpabs	A1, A1
260	fpabs	A2, A2
261	fpabs	A3, A3
262	fpabs	A4, A4
263
264	fpadd	A1, A1, A2
265	fpadd	A3, A3, A4
266
267	fpsub	F1, A1, C1
268	fpsub	F2, A3, C2
269
270	fpsel	C1, F1, C1, A1
271	fpsel	C2, F2, C2, A3
272	.align 4
273
274LL(106):
275	andi.	r0,  N, 2
276	beq	LL(107)
277
278	LFDUX	A1,   X, INCX2
279	LFDUX	A2,   X, INCX
280	LFSDUX	A1,   X, INCX2
281	LFSDUX	A2,   X, INCX
282
283	fpabs	A1, A1
284	fpabs	A2, A2
285
286	fpadd	A1, A1, A2
287
288	fpsub	F1, A1, C1
289	fpsel	C1, F1, C1, A1
290	.align 4
291
292LL(107):
293	andi.	r0,  N, 1
294	beq	LL(998)
295
296	LFDUX	A1,   X, INCX2
297	LFDUX	A2,   X, INCX
298
299	fabs	A1, A1
300	fabs	A2, A2
301
302	fadd	A1, A1, A2
303
304	fsub	F1, A1, C1
305	fsel	C1, F1, C1, A1
306	.align 4
307
308LL(998):
309	fpsub	F1,  C2, C1
310	fpsub	F2,  C4, C3
311
312	fpsel	C1,  F1,  C1,  C2
313	fpsel	C3,  F2,  C3,  C4
314
315	fpsub	F1,  C3, C1
316	fpsel	C1,  F1,  C1,  C3
317
318	fsmtp	C2, C1
319
320	fsub	F1,  C2, C1
321	fsel	C1,  F1,  C1,  C2
322	.align 4
323
324LL(999):
325	li	r10, 16
326
327	lfpdux	f27, SP, r10
328	lfpdux	f26, SP, r10
329	lfpdux	f25, SP, r10
330	lfpdux	f24, SP, r10
331
332	lfpdux	f23, SP, r10
333	lfpdux	f22, SP, r10
334	lfpdux	f21, SP, r10
335	lfpdux	f20, SP, r10
336
337	lfpdux	f19, SP, r10
338	lfpdux	f18, SP, r10
339	lfpdux	f17, SP, r10
340	lfpdux	f16, SP, r10
341
342	lfpdux	f15, SP, r10
343	lfpdux	f14, SP, r10
344	addi	SP, SP,  16
345	blr
346
347	EPILOGUE
348