1/*********************************************************************/
2/* Copyright 2009, 2010 The University of Texas at Austin.           */
3/* All rights reserved.                                              */
4/*                                                                   */
5/* Redistribution and use in source and binary forms, with or        */
6/* without modification, are permitted provided that the following   */
7/* conditions are met:                                               */
8/*                                                                   */
9/*   1. Redistributions of source code must retain the above         */
10/*      copyright notice, this list of conditions and the following  */
11/*      disclaimer.                                                  */
12/*                                                                   */
13/*   2. Redistributions in binary form must reproduce the above      */
14/*      copyright notice, this list of conditions and the following  */
15/*      disclaimer in the documentation and/or other materials       */
16/*      provided with the distribution.                              */
17/*                                                                   */
18/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
19/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
20/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
21/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
22/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
23/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
24/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
25/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
26/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
27/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
28/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
29/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
30/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
31/*    POSSIBILITY OF SUCH DAMAGE.                                    */
32/*                                                                   */
33/* The views and conclusions contained in the software and           */
34/* documentation are those of the authors and should not be          */
35/* interpreted as representing official policies, either expressed   */
36/* or implied, of The University of Texas at Austin.                 */
37/*********************************************************************/
38
39#define ASSEMBLER
40#include "common.h"
41
42#define N	r3
43#define X	r4
44#define INCX	r5
45
46#define PREA	r8
47
48#define FZERO	f1
49
50#define STACKSIZE 160
51
52	PROLOGUE
53	PROFCODE
54
55	addi	SP, SP, -STACKSIZE
56	li	r0,   0
57
58	stfd	f14,    0(SP)
59	stfd	f15,    8(SP)
60	stfd	f16,   16(SP)
61	stfd	f17,   24(SP)
62
63	stfd	f18,   32(SP)
64	stfd	f19,   40(SP)
65	stfd	f20,   48(SP)
66	stfd	f21,   56(SP)
67
68	stfd	f22,   64(SP)
69	stfd	f23,   72(SP)
70	stfd	f24,   80(SP)
71	stfd	f25,   88(SP)
72
73	stfd	f26,   96(SP)
74	stfd	f27,  104(SP)
75	stfd	f28,  112(SP)
76	stfd	f29,  120(SP)
77
78	stfd	f30,  128(SP)
79	stfd	f31,  136(SP)
80
81	stw	r0,   144(SP)
82	lfs	FZERO,144(SP)
83
84#ifdef F_INTERFACE
85	LDINT	N,    0(N)
86	LDINT	INCX, 0(INCX)
87#endif
88
89	slwi	INCX, INCX, BASE_SHIFT
90
91	sub	X, X, INCX
92
93	cmpwi	cr0, N, 0
94	ble-	LL(9999)
95	cmpwi	cr0, INCX, 0
96	ble-	LL(9999)
97
98	LFDUX	f1, X, INCX
99
100	fmr	f0, f1
101	subi	N, N, 1
102	fmr	f2, f1
103	fmr	f3, f1
104	fmr	f4, f1
105	fmr	f5, f1
106	srawi.	r0, N, 4
107	fmr	f6, f1
108	mtspr	CTR,  r0
109	fmr	f7, f1
110	beq-	LL(150)
111
112	LFDUX	f16,   X, INCX
113	LFDUX	f17,   X, INCX
114	LFDUX	f18,   X, INCX
115	LFDUX	f19,   X, INCX
116	LFDUX	f20,   X, INCX
117	LFDUX	f21,   X, INCX
118	LFDUX	f22,   X, INCX
119	LFDUX	f23,   X, INCX
120
121	LFDUX	f24,   X, INCX
122	fsub	f8,  f0,  f16
123	LFDUX	f25,   X, INCX
124	fsub	f9,  f1,  f17
125	LFDUX	f26,   X, INCX
126	fsub	f10, f2,  f18
127	LFDUX	f27,   X, INCX
128	fsub	f11, f3,  f19
129	LFDUX	f28,   X, INCX
130	fsub	f12, f4,  f20
131	LFDUX	f29,   X, INCX
132	fsub	f13, f5,  f21
133	LFDUX	f30,   X, INCX
134	fsub	f14, f6,  f22
135	LFDUX	f31,   X, INCX
136	fsub	f15, f7,  f23
137	bdz	LL(120)
138	.align 4
139
140LL(110):
141	fsel	f0,  f8,  f16,  f0
142	LFDUX	f16,   X, INCX
143	fsub	f8,  f0,  f24
144	fsel	f1,  f9,  f17,  f1
145	LFDUX	f17,   X, INCX
146	fsub	f9,  f1,  f25
147	fsel	f2,  f10, f18,  f2
148	LFDUX	f18,   X, INCX
149	fsub	f10, f2,  f26
150	fsel	f3,  f11, f19,  f3
151	LFDUX	f19,   X, INCX
152	fsub	f11, f3,  f27
153
154	fsel	f4,  f12, f20, f4
155	LFDUX	f20,   X, INCX
156	fsub	f12, f4,  f28
157	fsel	f5,  f13, f21, f5
158	LFDUX	f21,   X, INCX
159	fsub	f13, f5,  f29
160	fsel	f6,  f14, f22, f6
161	LFDUX	f22,   X, INCX
162	fsub	f14, f6,  f30
163	fsel	f7,  f15, f23, f7
164	LFDUX	f23,   X, INCX
165	fsub	f15, f7,  f31
166
167	fsel	f0,  f8,  f24, f0
168	LFDUX	f24,   X, INCX
169	fsub	f8,  f0,  f16
170	fsel	f1,  f9,  f25, f1
171	LFDUX	f25,   X, INCX
172	fsub	f9,  f1,  f17
173	fsel	f2,  f10, f26, f2
174	LFDUX	f26,   X, INCX
175	fsub	f10, f2,  f18
176	fsel	f3,  f11, f27, f3
177	LFDUX	f27,   X, INCX
178	fsub	f11, f3,  f19
179
180	fsel	f4,  f12, f28, f4
181	LFDUX	f28,   X, INCX
182	fsub	f12, f4,  f20
183	fsel	f5,  f13, f29, f5
184	LFDUX	f29,   X, INCX
185	fsub	f13, f5,  f21
186	fsel	f6,  f14, f30, f6
187	LFDUX	f30,   X, INCX
188	fsub	f14, f6,  f22
189	fsel	f7,  f15, f31, f7
190	LFDUX	f31,   X, INCX
191	fsub	f15, f7,  f23
192	bdnz	LL(110)
193	.align 4
194
195LL(120):
196	fsel	f0,  f8,  f16, f0
197	fsub	f8,  f0,  f24
198	fsel	f1,  f9,  f17, f1
199	fsub	f9,  f1,  f25
200	fsel	f2,  f10, f18, f2
201	fsub	f10, f2,  f26
202	fsel	f3,  f11, f19, f3
203	fsub	f11, f3,  f27
204
205	fsel	f4,  f12, f20, f4
206	fsub	f12, f4,  f28
207	fsel	f5,  f13, f21, f5
208	fsub	f13, f5,  f29
209	fsel	f6,  f14, f22, f6
210	fsub	f14, f6,  f30
211	fsel	f7,  f15, f23, f7
212	fsub	f15, f7,  f31
213
214	fsel	f0,  f8,  f24, f0
215	fsel	f1,  f9,  f25, f1
216	fsel	f2,  f10, f26, f2
217	fsel	f3,  f11, f27, f3
218	fsel	f4,  f12, f28, f4
219	fsel	f5,  f13, f29, f5
220	fsel	f6,  f14, f30, f6
221	fsel	f7,  f15, f31, f7
222	.align 4
223
224LL(150):
225	andi.	r0,  N, 15
226	mtspr	CTR, r0
227	beq	LL(999)
228	.align 4
229
230LL(160):
231	LFDUX	f8,    X, INCX
232	fsub	f16, f1, f8
233	fsel	f1, f16, f8, f1
234	bdnz	LL(160)
235	.align 4
236
237LL(999):
238	fsub	f8,  f0,  f1
239	fsub	f9,  f2,  f3
240	fsub	f10, f4,  f5
241	fsub	f11, f6,  f7
242
243	fsel	f0,  f8,  f1,  f0
244	fsel	f2,  f9,  f3,  f2
245	fsel	f4,  f10, f5,  f4
246	fsel	f6,  f11, f7,  f6
247
248	fsub	f8,  f0,  f2
249	fsub	f9,  f4,  f6
250	fsel	f0,  f8,  f2,  f0
251	fsel	f4,  f9,  f6,  f4
252
253	fsub	f8,  f0,  f4
254	fsel	f1,  f8,  f4,  f0
255	.align 4
256
257LL(9999):
258	lfd	f14,    0(SP)
259	lfd	f15,    8(SP)
260	lfd	f16,   16(SP)
261	lfd	f17,   24(SP)
262
263	lfd	f18,   32(SP)
264	lfd	f19,   40(SP)
265	lfd	f20,   48(SP)
266	lfd	f21,   56(SP)
267
268	lfd	f22,   64(SP)
269	lfd	f23,   72(SP)
270	lfd	f24,   80(SP)
271	lfd	f25,   88(SP)
272
273	lfd	f26,   96(SP)
274	lfd	f27,  104(SP)
275	lfd	f28,  112(SP)
276	lfd	f29,  120(SP)
277
278	lfd	f30,  128(SP)
279	lfd	f31,  136(SP)
280
281	addi	SP, SP, STACKSIZE
282	blr
283
284	EPILOGUE
285