1/*********************************************************************/
2/* Copyright 2009, 2010 The University of Texas at Austin.           */
3/* All rights reserved.                                              */
4/*                                                                   */
5/* Redistribution and use in source and binary forms, with or        */
6/* without modification, are permitted provided that the following   */
7/* conditions are met:                                               */
8/*                                                                   */
9/*   1. Redistributions of source code must retain the above         */
10/*      copyright notice, this list of conditions and the following  */
11/*      disclaimer.                                                  */
12/*                                                                   */
13/*   2. Redistributions in binary form must reproduce the above      */
14/*      copyright notice, this list of conditions and the following  */
15/*      disclaimer in the documentation and/or other materials       */
16/*      provided with the distribution.                              */
17/*                                                                   */
18/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
19/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
20/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
21/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
22/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
23/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
24/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
25/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
26/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFITS;  OR    */
27/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
28/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
29/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
30/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
31/*    POSSIBILITY OF SUCH DAMAGE.                                    */
32/*                                                                   */
33/* The views and conclusions contained in the software and           */
34/* documentation are those of the authors and should not be          */
35/* interpreted as representing official policies, either expressed   */
36/* or implied, of The University of Texas at Austin.                 */
37/*********************************************************************/
38
39#define ASSEMBLER
40#include "common.h"
41
42#define STACK	12
43#define ARGS	 0
44
45#define M	 4 + STACK + ARGS(%esp)
46#define X	 8 + STACK + ARGS(%esp)
47#define INCX	12 + STACK + ARGS(%esp)
48#define Y	16 + STACK + ARGS(%esp)
49#define INCY	20 + STACK + ARGS(%esp)
50
51	PROLOGUE
52
53	pushl	%edi
54	pushl	%esi
55	pushl	%ebx
56
57	PROFCODE
58
59#if defined(F_INTERFACE_GFORT) || defined(F_INTERFACE_G95)
60	EMMS
61#endif
62
63	movl	M,    %ebx
64	movl	X,    %ecx
65	movl	INCX, %esi
66	movl	Y,    %edx
67	movl	INCY, %edi
68
69	testl	%ebx, %ebx	# if m == 0 goto End
70	jle	.L999
71
72#if SIZE > 8
73	sall	$BASE_SHIFT, %esi
74	sall	$BASE_SHIFT, %edi
75#else
76	leal	(, %esi, SIZE), %esi
77	leal	(, %edi, SIZE), %edi
78#endif
79
80	cmpl	$SIZE, %esi	# if incx != 1
81	jne	.L100
82	cmpl	$SIZE, %edi	# if incy != 1
83	jne	.L100
84
85	movl	%ebx, %eax	# i = m
86	sarl	$3,   %eax
87	jle	.L20
88	ALIGN_2
89
90.L11:
91	FLD	7 * SIZE(%ecx)
92	FLD	6 * SIZE(%ecx)
93	FLD	5 * SIZE(%ecx)
94	FLD	4 * SIZE(%ecx)
95	FLD	3 * SIZE(%ecx)
96	FLD	2 * SIZE(%ecx)
97	FLD	1 * SIZE(%ecx)
98	FLD	0 * SIZE(%ecx)
99
100	FST	0 * SIZE(%edx)
101	FST	1 * SIZE(%edx)
102	FST	2 * SIZE(%edx)
103	FST	3 * SIZE(%edx)
104	FST	4 * SIZE(%edx)
105	FST	5 * SIZE(%edx)
106	FST	6 * SIZE(%edx)
107	FST	7 * SIZE(%edx)
108
109	addl	$8 * SIZE, %ecx
110	addl	$8 * SIZE, %edx
111	decl	%eax
112	jg	.L11
113	ALIGN_2
114
115.L20:
116	movl	%ebx, %eax	# i = m
117	andl	$7,   %eax
118	jle	.L99
119	ALIGN_2
120
121.L21:
122	FLD	(%ecx)
123	FST	(%edx)
124	addl	$SIZE, %ecx
125	addl	$SIZE, %edx
126	decl	%eax
127	jg	.L21
128
129.L99:
130	xorl	%eax,%eax
131	popl	%ebx
132	popl	%esi
133	popl	%edi
134	ret
135	ALIGN_3
136
137.L100:
138	movl	%ebx, %eax
139	sarl	$3,   %eax
140	jle	.L120
141	ALIGN_2
142
143.L111:
144	FLD	(%ecx)
145	addl	%esi, %ecx
146	FLD	(%ecx)
147	addl	%esi, %ecx
148	FLD	(%ecx)
149	addl	%esi, %ecx
150	FLD	(%ecx)
151	addl	%esi, %ecx
152	FLD	(%ecx)
153	addl	%esi, %ecx
154	FLD	(%ecx)
155	addl	%esi, %ecx
156	FLD	(%ecx)
157	addl	%esi, %ecx
158	FLD	(%ecx)
159	addl	%esi, %ecx
160
161	fxch	%st(7)
162	FST	(%edx)
163	addl	%edi, %edx
164
165	fxch	%st(5)
166	FST	(%edx)
167	addl	%edi, %edx
168
169	fxch	%st(3)
170	FST	(%edx)
171	addl	%edi, %edx
172
173	fxch	%st(1)
174	FST	(%edx)
175	addl	%edi, %edx
176
177	FST	(%edx)
178	addl	%edi, %edx
179
180	FST	(%edx)
181	addl	%edi, %edx
182
183	FST	(%edx)
184	addl	%edi, %edx
185
186	FST	(%edx)
187	addl	%edi, %edx
188
189	decl	%eax
190	jg	.L111
191
192.L120:
193	movl	%ebx, %eax
194	andl	$7,   %eax
195	jle	.L999
196	ALIGN_2
197
198.L121:
199	FLD	(%ecx)
200	FST	(%edx)
201	addl	%esi, %ecx
202	addl	%edi, %edx
203	decl	%eax
204	jg	.L121
205
206.L999:
207	xorl	%eax,%eax
208	popl	%ebx
209	popl	%esi
210	popl	%edi
211	ret
212
213	EPILOGUE
214