1/*********************************************************************/
2/* Copyright 2009, 2010 The University of Texas at Austin.           */
3/* All rights reserved.                                              */
4/*                                                                   */
5/* Redistribution and use in source and binary forms, with or        */
6/* without modification, are permitted provided that the following   */
7/* conditions are met:                                               */
8/*                                                                   */
9/*   1. Redistributions of source code must retain the above         */
10/*      copyright notice, this list of conditions and the following  */
11/*      disclaimer.                                                  */
12/*                                                                   */
13/*   2. Redistributions in binary form must reproduce the above      */
14/*      copyright notice, this list of conditions and the following  */
15/*      disclaimer in the documentation and/or other materials       */
16/*      provided with the distribution.                              */
17/*                                                                   */
18/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
19/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
20/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
21/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
22/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
23/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
24/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
25/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
26/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
27/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
28/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
29/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
30/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
31/*    POSSIBILITY OF SUCH DAMAGE.                                    */
32/*                                                                   */
33/* The views and conclusions contained in the software and           */
34/* documentation are those of the authors and should not be          */
35/* interpreted as representing official policies, either expressed   */
36/* or implied, of The University of Texas at Austin.                 */
37/*********************************************************************/
38
39#define ASSEMBLER
40#include "common.h"
41
42#define N	ARG1
43#define X	ARG4
44#define INCX	ARG5
45
46#define I	%rax
47
48#include "l1param.h"
49
50	PROLOGUE
51	PROFCODE
52
53	salq	$ZBASE_SHIFT, INCX
54
55	FLD	 8(%rsp)
56	FLD	24(%rsp)
57
58	testq	N, N
59	jle	.L999
60
61	fld	%st(1)
62	fabs
63	fld	%st(1)
64	fabs
65	faddp	%st, %st(1)
66
67	fldz
68	fcomip	%st(1), %st
69	ffreep	%st
70	jne	.L30
71
72	EMMS
73
74	pxor	%mm0, %mm0
75
76	cmpq	$2 * SIZE, INCX
77	jne	.L20
78
79	movq	N,  I
80	sarq	$2, I
81	jle	.L15
82	ALIGN_4
83
84.L12:
85#ifdef PREFETCHW
86	PREFETCHW (PREFETCHSIZE +  0) - PREOFFSET(X)
87#endif
88
89	movq	%mm0,   0(X)
90	movq	%mm0,   8(X)
91	movq	%mm0,  16(X)
92	movq	%mm0,  24(X)
93	movq	%mm0,  32(X)
94	movq	%mm0,  40(X)
95	movq	%mm0,  48(X)
96	movq	%mm0,  56(X)
97	movq	%mm0,  64(X)
98	movq	%mm0,  72(X)
99	movq	%mm0,  80(X)
100	movq	%mm0,  88(X)
101	movq	%mm0,  96(X)
102	movq	%mm0, 104(X)
103	movq	%mm0, 112(X)
104	movq	%mm0, 120(X)
105	addq	$8 * SIZE, X
106	decq	I
107	jg	.L12
108	ALIGN_3
109
110.L15:
111	movq	N,  I
112	andq	$3, I
113	jle	.L18
114	ALIGN_2
115
116.L16:
117	movq	%mm0,   0(X)
118	movq	%mm0,   8(X)
119	movq	%mm0,  16(X)
120	movq	%mm0,  24(X)
121
122	addq	$2 * SIZE, X
123	decq	I
124	jg	.L16
125
126.L18:
127	EMMS
128
129	ret
130	ALIGN_2
131
132.L20:
133	movq	N,  I
134	sarq	$2, I
135	jle	.L25
136	ALIGN_3
137
138.L22:
139	movq	%mm0,   0(X)
140	movq	%mm0,   8(X)
141	movq	%mm0,  16(X)
142	movq	%mm0,  24(X)
143	addq	INCX, X
144
145	movq	%mm0,   0(X)
146	movq	%mm0,   8(X)
147	movq	%mm0,  16(X)
148	movq	%mm0,  24(X)
149	addq	INCX, X
150
151	movq	%mm0,   0(X)
152	movq	%mm0,   8(X)
153	movq	%mm0,  16(X)
154	movq	%mm0,  24(X)
155	addq	INCX, X
156
157	movq	%mm0,   0(X)
158	movq	%mm0,   8(X)
159	movq	%mm0,  16(X)
160	movq	%mm0,  24(X)
161	addq	INCX, X
162
163	decq	I
164	jg	.L22
165	ALIGN_3
166
167.L25:
168	movq	N,  I
169	andq	$3, I
170	jle	.L28
171	ALIGN_3
172
173.L26:
174	movq	%mm0,   0(X)
175	movq	%mm0,   8(X)
176	movq	%mm0,  16(X)
177	movq	%mm0,  24(X)
178	addq	INCX, X
179
180	decq	I
181	jg	.L26
182
183.L28:
184	EMMS
185
186	ret
187	ALIGN_3
188
189.L30:
190	movq	N, I
191	ALIGN_2
192
193.L32:
194#ifdef PREFETCHW
195	PREFETCHW (PREFETCHSIZE +  0) - PREOFFSET(X)
196#endif
197
198	FLD	0 * SIZE(X)
199	fmul	%st(1),%st
200	FLD	1 * SIZE(X)
201	fmul	%st(3),%st
202	faddp	%st,%st(1)
203
204	FLD	0 * SIZE(X)
205	fmul	%st(3),%st
206	FLD	1 * SIZE(X)
207	fmul	%st(3),%st
208	fsubrp	%st,%st(1)
209
210	FST	0 * SIZE(X)
211	FST	1 * SIZE(X)
212	addq	INCX, X
213	decq	I
214	jg	.L32
215	ALIGN_2
216
217.L999:
218	ffreep	%st
219	ffreep	%st
220
221	ret
222
223	EPILOGUE
224