1/*********************************************************************/
2/* Copyright 2009, 2010 The University of Texas at Austin.           */
3/* All rights reserved.                                              */
4/*                                                                   */
5/* Redistribution and use in source and binary forms, with or        */
6/* without modification, are permitted provided that the following   */
7/* conditions are met:                                               */
8/*                                                                   */
9/*   1. Redistributions of source code must retain the above         */
10/*      copyright notice, this list of conditions and the following  */
11/*      disclaimer.                                                  */
12/*                                                                   */
13/*   2. Redistributions in binary form must reproduce the above      */
14/*      copyright notice, this list of conditions and the following  */
15/*      disclaimer in the documentation and/or other materials       */
16/*      provided with the distribution.                              */
17/*                                                                   */
18/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
19/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
20/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
21/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
22/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
23/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
24/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
25/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
26/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
27/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
28/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
29/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
30/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
31/*    POSSIBILITY OF SUCH DAMAGE.                                    */
32/*                                                                   */
33/* The views and conclusions contained in the software and           */
34/* documentation are those of the authors and should not be          */
35/* interpreted as representing official policies, either expressed   */
36/* or implied, of The University of Texas at Austin.                 */
37/*********************************************************************/
38
39#define ASSEMBLER
40#include "common.h"
41
42#define N	r3
43#define XX	r4
44#define PRE	r5
45
46#ifdef linux
47#ifndef __64BIT__
48#define X r6
49#define INCX r7
50#else
51#define X r7
52#define INCX r8
53#endif
54#endif
55
56#if defined(_AIX) || defined(__APPLE__)
57#if !defined(__64BIT__) && defined(DOUBLE)
58#define X r8
59#define INCX r9
60#else
61#define X r7
62#define INCX r8
63#endif
64#endif
65
66#define FZERO	f0
67#define ALPHA	f1
68
69	PROLOGUE
70	PROFCODE
71
72	addi	SP, SP, -8
73	li	r0,   0
74
75	stw	r0,      0(SP)
76	lfs	FZERO,   0(SP)
77
78	addi	SP, SP,  8
79
80	slwi	INCX, INCX, BASE_SHIFT
81	li	PRE, 3 * 16 * SIZE
82
83	cmpwi	cr0, N, 0
84	blelr-	cr0
85
86	sub	X, X, INCX
87
88	fcmpu	cr0, FZERO, ALPHA
89	bne-	cr0, LL(A1I1)
90
91	srawi.	r0, N, 4
92	mtspr	CTR, r0
93	beq-	cr0, LL(A0I1_Remain)
94	.align 4
95
96LL(A0I1_kernel):
97#ifdef PPCG4
98	dcbtst	X, PRE
99#endif
100
101	STFDUX	FZERO, X, INCX
102	STFDUX	FZERO, X, INCX
103	STFDUX	FZERO, X, INCX
104	STFDUX	FZERO, X, INCX
105
106#if defined(PPCG4) && defined(DOUBLE)
107	dcbtst	X, PRE
108#endif
109
110	STFDUX	FZERO, X, INCX
111	STFDUX	FZERO, X, INCX
112	STFDUX	FZERO, X, INCX
113	STFDUX	FZERO, X, INCX
114
115#ifdef PPCG4
116	dcbtst	X, PRE
117#endif
118
119	STFDUX	FZERO, X, INCX
120	STFDUX	FZERO, X, INCX
121	STFDUX	FZERO, X, INCX
122	STFDUX	FZERO, X, INCX
123
124#if defined(PPCG4) && defined(DOUBLE)
125	dcbtst	X, PRE
126#endif
127
128	STFDUX	FZERO, X, INCX
129	STFDUX	FZERO, X, INCX
130	STFDUX	FZERO, X, INCX
131	STFDUX	FZERO, X, INCX
132	bdnz	LL(A0I1_kernel)
133	.align 4
134
135LL(A0I1_Remain):
136	andi.	r0,  N, 15
137	mtspr	CTR, r0
138	beqlr+
139	.align 4
140
141LL(A0I1_RemainKernel):
142	STFDUX	FZERO,  X, INCX
143	bdnz	LL(A0I1_RemainKernel)
144	blr
145	.align 4
146
147LL(A1I1):
148	mr	XX, X
149
150	srawi.	r0, N, 3
151	mtspr	CTR, r0
152	beq+	LL(A1I1_Remain)
153
154	LFDUX	f2, X, INCX
155	LFDUX	f3, X, INCX
156	LFDUX	f4, X, INCX
157	LFDUX	f5, X, INCX
158	bdz	LL(12)
159	.align 4
160
161LL(11):
162	LFDUX	f6, X, INCX
163	FMUL	f2, ALPHA, f2
164	LFDUX	f7, X, INCX
165	FMUL	f3, ALPHA, f3
166	LFDUX	f8, X, INCX
167	FMUL	f4, ALPHA, f4
168	LFDUX	f9, X, INCX
169	FMUL	f5, ALPHA, f5
170
171#ifdef PPCG4
172	dcbtst	X, PRE
173#endif
174	STFDUX	f2, XX, INCX
175	STFDUX	f3, XX, INCX
176	STFDUX	f4, XX, INCX
177	STFDUX	f5, XX, INCX
178
179	LFDUX	f2, X, INCX
180	FMUL	f6, ALPHA, f6
181	LFDUX	f3, X, INCX
182	FMUL	f7, ALPHA, f7
183	LFDUX	f4, X, INCX
184	FMUL	f8, ALPHA, f8
185	LFDUX	f5, X, INCX
186	FMUL	f9, ALPHA, f9
187
188	STFDUX	f6, XX, INCX
189	STFDUX	f7, XX, INCX
190	STFDUX	f8, XX, INCX
191	STFDUX	f9, XX, INCX
192
193#if defined(PPCG4) && defined(DOUBLE)
194	dcbtst	X, PRE
195#endif
196
197	bdnz	LL(11)
198	.align 4
199
200LL(12):
201	LFDUX	f6, X, INCX
202	FMUL	f2, ALPHA, f2
203	LFDUX	f7, X, INCX
204	FMUL	f3, ALPHA, f3
205	LFDUX	f8, X, INCX
206	FMUL	f4, ALPHA, f4
207	LFDUX	f9, X, INCX
208	FMUL	f5, ALPHA, f5
209
210	STFDUX	f2, XX, INCX
211	FMUL	f6, ALPHA, f6
212	STFDUX	f3, XX, INCX
213	FMUL	f7, ALPHA, f7
214	STFDUX	f4, XX, INCX
215	FMUL	f8, ALPHA, f8
216	STFDUX	f5, XX, INCX
217	FMUL	f9, ALPHA, f9
218
219	STFDUX	f6, XX, INCX
220	STFDUX	f7, XX, INCX
221	STFDUX	f8, XX, INCX
222	STFDUX	f9, XX, INCX
223	.align 4
224
225LL(A1I1_Remain):
226	andi.	r0,  N, 7
227	mtspr	CTR, r0
228	beqlr+
229	.align 4
230
231LL(A1I1_RemainKernel):
232	LFDUX	f2,  X, INCX
233	FMUL	f2, ALPHA, f2
234	STFDUX	f2,  XX, INCX
235	bdnz	LL(A1I1_RemainKernel)
236	blr
237	.align 4
238
239	EPILOGUE
240