1
2/*
3 * Mesa 3-D graphics library
4 *
5 * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23 * OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26/*
27 * NOTE: Avoid using spaces in between '(' ')' and arguments, especially
28 * with macros like CONST, LLBL that expand to CONCAT(...).  Putting spaces
29 * in there will break the build on some platforms.
30 */
31
32#include "assyntax.h"
33#define MATH_ASM_PTR_SIZE 4
34#include "math/m_vector_asm.h"
35#include "xform_args.h"
36
37	SEG_TEXT
38
39#define FP_ONE		1065353216
40#define FP_ZERO		0
41
42#define SRC0		REGOFF(0, ESI)
43#define SRC1		REGOFF(4, ESI)
44#define SRC2		REGOFF(8, ESI)
45#define SRC3		REGOFF(12, ESI)
46#define DST0		REGOFF(0, EDI)
47#define DST1		REGOFF(4, EDI)
48#define DST2		REGOFF(8, EDI)
49#define DST3		REGOFF(12, EDI)
50#define MAT0		REGOFF(0, EDX)
51#define MAT1		REGOFF(4, EDX)
52#define MAT2		REGOFF(8, EDX)
53#define MAT3		REGOFF(12, EDX)
54#define MAT4		REGOFF(16, EDX)
55#define MAT5		REGOFF(20, EDX)
56#define MAT6		REGOFF(24, EDX)
57#define MAT7		REGOFF(28, EDX)
58#define MAT8		REGOFF(32, EDX)
59#define MAT9		REGOFF(36, EDX)
60#define MAT10		REGOFF(40, EDX)
61#define MAT11		REGOFF(44, EDX)
62#define MAT12		REGOFF(48, EDX)
63#define MAT13		REGOFF(52, EDX)
64#define MAT14		REGOFF(56, EDX)
65#define MAT15		REGOFF(60, EDX)
66
67
68ALIGNTEXT16
69GLOBL GLNAME( _mesa_x86_transform_points3_general )
70HIDDEN(_mesa_x86_transform_points3_general)
71GLNAME( _mesa_x86_transform_points3_general ):
72
73#define FRAME_OFFSET 8
74	PUSH_L( ESI )
75	PUSH_L( EDI )
76
77	MOV_L( ARG_SOURCE, ESI )
78	MOV_L( ARG_DEST, EDI )
79
80	MOV_L( ARG_MATRIX, EDX )
81	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
82
83	TEST_L( ECX, ECX )
84	JZ( LLBL(x86_p3_gr_done) )
85
86	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
87	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
88
89	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
90	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
91
92	SHL_L( CONST(4), ECX )
93	MOV_L( REGOFF(V4F_START, ESI), ESI )
94
95	MOV_L( REGOFF(V4F_START, EDI), EDI )
96	ADD_L( EDI, ECX )
97
98ALIGNTEXT16
99LLBL(x86_p3_gr_loop):
100
101	FLD_S( SRC0 )			/* F4 */
102	FMUL_S( MAT0 )
103	FLD_S( SRC0 )			/* F5 F4 */
104	FMUL_S( MAT1 )
105	FLD_S( SRC0 )			/* F6 F5 F4 */
106	FMUL_S( MAT2 )
107	FLD_S( SRC0 )			/* F7 F6 F5 F4 */
108	FMUL_S( MAT3 )
109
110	FLD_S( SRC1 )			/* F0 F7 F6 F5 F4 */
111	FMUL_S( MAT4 )
112	FLD_S( SRC1 )			/* F1 F0 F7 F6 F5 F4 */
113	FMUL_S( MAT5 )
114	FLD_S( SRC1 )			/* F2 F1 F0 F7 F6 F5 F4 */
115	FMUL_S( MAT6 )
116	FLD_S( SRC1 )			/* F3 F2 F1 F0 F7 F6 F5 F4 */
117	FMUL_S( MAT7 )
118
119	FXCH( ST(3) )			/* F0 F2 F1 F3 F7 F6 F5 F4 */
120	FADDP( ST0, ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */
121	FXCH( ST(1) )			/* F1 F2 F3 F7 F6 F5 F4 */
122	FADDP( ST0, ST(5) )		/* F2 F3 F7 F6 F5 F4 */
123	FADDP( ST0, ST(3) )		/* F3 F7 F6 F5 F4 */
124	FADDP( ST0, ST(1) )		/* F7 F6 F5 F4 */
125
126	FLD_S( SRC2 )			/* F0 F7 F6 F5 F4 */
127	FMUL_S( MAT8 )
128	FLD_S( SRC2 )			/* F1 F0 F7 F6 F5 F4 */
129	FMUL_S( MAT9 )
130	FLD_S( SRC2 )			/* F2 F1 F0 F7 F6 F5 F4 */
131	FMUL_S( MAT10 )
132	FLD_S( SRC2 )			/* F3 F2 F1 F0 F7 F6 F5 F4 */
133	FMUL_S( MAT11 )
134
135	FXCH( ST(3) )			/* F0 F2 F1 F3 F7 F6 F5 F4 */
136	FADDP( ST0, ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */
137	FXCH( ST(1) )			/* F1 F2 F3 F7 F6 F5 F4 */
138	FADDP( ST0, ST(5) )		/* F2 F3 F7 F6 F5 F4 */
139	FADDP( ST0, ST(3) )		/* F3 F7 F6 F5 F4 */
140	FADDP( ST0, ST(1) )		/* F7 F6 F5 F4 */
141
142	FXCH( ST(3) )			/* F4 F6 F5 F7 */
143	FADD_S( MAT12 )
144	FXCH( ST(2) )			/* F5 F6 F4 F7 */
145	FADD_S( MAT13 )
146	FXCH( ST(1) )			/* F6 F5 F4 F7 */
147	FADD_S( MAT14 )
148	FXCH( ST(3) )			/* F7 F5 F4 F6 */
149	FADD_S( MAT15 )
150
151	FXCH( ST(2) )			/* F4 F5 F7 F6 */
152	FSTP_S( DST0 )		/* F5 F7 F6 */
153	FSTP_S( DST1 )		/* F7 F6 */
154	FXCH( ST(1) )			/* F6 F7 */
155	FSTP_S( DST2 )		/* F7 */
156	FSTP_S( DST3 )		/* */
157
158LLBL(x86_p3_gr_skip):
159
160	ADD_L( CONST(16), EDI )
161	ADD_L( EAX, ESI )
162	CMP_L( ECX, EDI )
163	JNE( LLBL(x86_p3_gr_loop) )
164
165LLBL(x86_p3_gr_done):
166
167	POP_L( EDI )
168	POP_L( ESI )
169	RET
170#undef FRAME_OFFSET
171
172
173
174
175ALIGNTEXT16
176GLOBL GLNAME( _mesa_x86_transform_points3_perspective )
177HIDDEN(_mesa_x86_transform_points3_perspective)
178GLNAME( _mesa_x86_transform_points3_perspective ):
179
180#define FRAME_OFFSET 12
181	PUSH_L( ESI )
182	PUSH_L( EDI )
183	PUSH_L( EBX )
184
185	MOV_L( ARG_SOURCE, ESI )
186	MOV_L( ARG_DEST, EDI )
187
188	MOV_L( ARG_MATRIX, EDX )
189	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
190
191	TEST_L( ECX, ECX )
192	JZ( LLBL(x86_p3_pr_done) )
193
194	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
195	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
196
197	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
198	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
199
200	SHL_L( CONST(4), ECX )
201	MOV_L( REGOFF(V4F_START, ESI), ESI )
202
203	MOV_L( REGOFF(V4F_START, EDI), EDI )
204	ADD_L( EDI, ECX )
205
206ALIGNTEXT16
207LLBL(x86_p3_pr_loop):
208
209	FLD_S( SRC0 )			/* F4 */
210	FMUL_S( MAT0 )
211
212	FLD_S( SRC1 )			/* F5 F4 */
213	FMUL_S( MAT5 )
214
215	FLD_S( SRC2 )			/* F0 F5 F4 */
216	FMUL_S( MAT8 )
217	FLD_S( SRC2 )			/* F1 F0 F5 F4 */
218	FMUL_S( MAT9 )
219	FLD_S( SRC2 )			/* F2 F1 F0 F5 F4 */
220	FMUL_S( MAT10 )
221
222	FXCH( ST(2) )			/* F0 F1 F2 F5 F4 */
223	FADDP( ST0, ST(4) )		/* F1 F2 F5 F4 */
224	FADDP( ST0, ST(2) )		/* F2 F5 F4 */
225	FLD_S( MAT14 )		/* F6 F2 F5 F4 */
226	FXCH( ST(1) )			/* F2 F6 F5 F4 */
227	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
228
229	MOV_L( SRC2, EBX )
230	XOR_L( CONST(-2147483648), EBX )/* change sign */
231
232	FXCH( ST(2) )			/* F4 F5 F6 */
233	FSTP_S( DST0 )		/* F5 F6 */
234	FSTP_S( DST1 )		/* F6 */
235	FSTP_S( DST2 )		/* */
236	MOV_L( EBX, DST3 )
237
238LLBL(x86_p3_pr_skip):
239
240	ADD_L( CONST(16), EDI )
241	ADD_L( EAX, ESI )
242	CMP_L( ECX, EDI )
243	JNE( LLBL(x86_p3_pr_loop) )
244
245LLBL(x86_p3_pr_done):
246
247	POP_L( EBX )
248	POP_L( EDI )
249	POP_L( ESI )
250	RET
251#undef FRAME_OFFSET
252
253
254
255
256ALIGNTEXT16
257GLOBL GLNAME( _mesa_x86_transform_points3_3d )
258HIDDEN(_mesa_x86_transform_points3_3d)
259GLNAME( _mesa_x86_transform_points3_3d ):
260
261#define FRAME_OFFSET 8
262	PUSH_L( ESI )
263	PUSH_L( EDI )
264
265	MOV_L( ARG_SOURCE, ESI )
266	MOV_L( ARG_DEST, EDI )
267
268	MOV_L( ARG_MATRIX, EDX )
269	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
270
271	TEST_L( ECX, ECX )
272	JZ( LLBL(x86_p3_3dr_done) )
273
274	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
275	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
276
277	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
278	MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
279
280	SHL_L( CONST(4), ECX )
281	MOV_L( REGOFF(V4F_START, ESI), ESI )
282
283	MOV_L( REGOFF(V4F_START, EDI), EDI )
284	ADD_L( EDI, ECX )
285
286ALIGNTEXT16
287LLBL(x86_p3_3dr_loop):
288
289	FLD_S( SRC0 )			/* F4 */
290	FMUL_S( MAT0 )
291	FLD_S( SRC0 )			/* F5 F4 */
292	FMUL_S( MAT1 )
293	FLD_S( SRC0 )			/* F6 F5 F4 */
294	FMUL_S( MAT2 )
295
296	FLD_S( SRC1 )			/* F0 F6 F5 F4 */
297	FMUL_S( MAT4 )
298	FLD_S( SRC1 )			/* F1 F0 F6 F5 F4 */
299	FMUL_S( MAT5 )
300	FLD_S( SRC1 )			/* F2 F1 F0 F6 F5 F4 */
301	FMUL_S( MAT6 )
302
303	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */
304	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */
305	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */
306	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
307
308	FLD_S( SRC2 )			/* F0 F6 F5 F4 */
309	FMUL_S( MAT8 )
310	FLD_S( SRC2 )			/* F1 F0 F6 F5 F4 */
311	FMUL_S( MAT9 )
312	FLD_S( SRC2 )			/* F2 F1 F0 F6 F5 F4 */
313	FMUL_S( MAT10 )
314
315	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */
316	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */
317	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */
318	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
319
320	FXCH( ST(2) )			/* F4 F5 F6 */
321	FADD_S( MAT12 )
322	FXCH( ST(1) )			/* F5 F4 F6 */
323	FADD_S( MAT13 )
324	FXCH( ST(2) )			/* F6 F4 F5 */
325	FADD_S( MAT14 )
326
327	FXCH( ST(1) )			/* F4 F6 F5 */
328	FSTP_S( DST0   )		/* F6 F5 */
329	FXCH( ST(1) )			/* F5 F6 */
330	FSTP_S( DST1   )		/* F6 */
331	FSTP_S( DST2   )		/* */
332
333LLBL(x86_p3_3dr_skip):
334
335	ADD_L( CONST(16), EDI )
336	ADD_L( EAX, ESI )
337	CMP_L( ECX, EDI )
338	JNE( LLBL(x86_p3_3dr_loop) )
339
340LLBL(x86_p3_3dr_done):
341
342	POP_L( EDI )
343	POP_L( ESI )
344	RET
345#undef FRAME_OFFSET
346
347
348
349
350ALIGNTEXT16
351GLOBL GLNAME( _mesa_x86_transform_points3_3d_no_rot )
352HIDDEN(_mesa_x86_transform_points3_3d_no_rot)
353GLNAME( _mesa_x86_transform_points3_3d_no_rot ):
354
355#define FRAME_OFFSET 8
356	PUSH_L( ESI )
357	PUSH_L( EDI )
358
359	MOV_L( ARG_SOURCE, ESI )
360	MOV_L( ARG_DEST, EDI )
361
362
363	MOV_L( ARG_MATRIX, EDX )
364	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
365
366	TEST_L( ECX, ECX )
367	JZ( LLBL(x86_p3_3dnrr_done) )
368
369	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
370	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
371
372	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
373	MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
374
375	SHL_L( CONST(4), ECX )
376	MOV_L( REGOFF(V4F_START, ESI), ESI )
377
378	MOV_L( REGOFF(V4F_START, EDI), EDI )
379	ADD_L( EDI, ECX )
380
381ALIGNTEXT16
382LLBL(x86_p3_3dnrr_loop):
383
384	FLD_S( SRC0 )			/* F4 */
385	FMUL_S( MAT0 )
386
387	FLD_S( SRC1 )			/* F1 F4 */
388	FMUL_S( MAT5 )
389
390	FLD_S( SRC2 )			/* F2 F1 F4 */
391	FMUL_S( MAT10 )
392
393	FXCH( ST(2) )			/* F4 F1 F2 */
394	FADD_S( MAT12 )
395	FLD_S( MAT13 )		/* F5 F4 F1 F2 */
396	FXCH( ST(2) )			/* F1 F4 F5 F2 */
397	FADDP( ST0, ST(2) )		/* F4 F5 F2 */
398	FLD_S( MAT14 )		/* F6 F4 F5 F2 */
399	FXCH( ST(3) )			/* F2 F4 F5 F6 */
400	FADDP( ST0, ST(3) )		/* F4 F5 F6 */
401
402	FSTP_S( DST0   )		/* F5 F6 */
403	FSTP_S( DST1   )		/* F6 */
404	FSTP_S( DST2   )		/* */
405
406LLBL(x86_p3_3dnrr_skip):
407
408	ADD_L( CONST(16), EDI )
409	ADD_L( EAX, ESI )
410	CMP_L( ECX, EDI )
411	JNE( LLBL(x86_p3_3dnrr_loop) )
412
413LLBL(x86_p3_3dnrr_done):
414
415	POP_L( EDI )
416	POP_L( ESI )
417	RET
418#undef FRAME_OFFSET
419
420
421
422
423ALIGNTEXT16
424GLOBL GLNAME( _mesa_x86_transform_points3_2d )
425HIDDEN(_mesa_x86_transform_points3_2d)
426GLNAME( _mesa_x86_transform_points3_2d ):
427
428#define FRAME_OFFSET 12
429	PUSH_L( ESI )
430	PUSH_L( EDI )
431	PUSH_L( EBX )
432
433	MOV_L( ARG_SOURCE, ESI )
434	MOV_L( ARG_DEST, EDI )
435
436	MOV_L( ARG_MATRIX, EDX )
437	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
438
439	TEST_L( ECX, ECX )
440	JZ( LLBL(x86_p3_2dr_done) )
441
442	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
443	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
444
445	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
446	MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
447
448	SHL_L( CONST(4), ECX )
449	MOV_L( REGOFF(V4F_START, ESI), ESI )
450
451	MOV_L( REGOFF(V4F_START, EDI), EDI )
452	ADD_L( EDI, ECX )
453
454ALIGNTEXT16
455LLBL(x86_p3_2dr_loop):
456
457	FLD_S( SRC0 )			/* F4 */
458	FMUL_S( MAT0 )
459	FLD_S( SRC0 )			/* F5 F4 */
460	FMUL_S( MAT1 )
461
462	FLD_S( SRC1 )			/* F0 F5 F4 */
463	FMUL_S( MAT4 )
464	FLD_S( SRC1 )			/* F1 F0 F5 F4 */
465	FMUL_S( MAT5 )
466
467	FXCH( ST(1) )			/* F0 F1 F5 F4 */
468	FADDP( ST0, ST(3) )		/* F1 F5 F4 */
469	FADDP( ST0, ST(1) )		/* F5 F4 */
470
471	FXCH( ST(1) )			/* F4 F5 */
472	FADD_S( MAT12 )
473	FXCH( ST(1) )			/* F5 F4 */
474	FADD_S( MAT13 )
475
476	MOV_L( SRC2, EBX )
477
478	FXCH( ST(1) )			/* F4 F5 */
479	FSTP_S( DST0   )		/* F5 */
480	FSTP_S( DST1   )		/* */
481	MOV_L( EBX, DST2 )
482
483LLBL(x86_p3_2dr_skip):
484
485	ADD_L( CONST(16), EDI )
486	ADD_L( EAX, ESI )
487	CMP_L( ECX, EDI )
488	JNE( LLBL(x86_p3_2dr_loop) )
489
490LLBL(x86_p3_2dr_done):
491
492	POP_L( EBX )
493	POP_L( EDI )
494	POP_L( ESI )
495	RET
496#undef FRAME_OFFSET
497
498
499
500
501ALIGNTEXT16
502GLOBL GLNAME( _mesa_x86_transform_points3_2d_no_rot )
503HIDDEN(_mesa_x86_transform_points3_2d_no_rot)
504GLNAME( _mesa_x86_transform_points3_2d_no_rot ):
505
506#define FRAME_OFFSET 12
507	PUSH_L( ESI )
508	PUSH_L( EDI )
509	PUSH_L( EBX )
510
511	MOV_L( ARG_SOURCE, ESI )
512	MOV_L( ARG_DEST, EDI )
513
514	MOV_L( ARG_MATRIX, EDX )
515	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
516
517	TEST_L( ECX, ECX )
518	JZ( LLBL(x86_p3_2dnrr_done) )
519
520	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
521	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
522
523	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
524	MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
525
526	SHL_L( CONST(4), ECX )
527	MOV_L( REGOFF(V4F_START, ESI), ESI )
528
529	MOV_L( REGOFF(V4F_START, EDI), EDI )
530	ADD_L( EDI, ECX )
531
532ALIGNTEXT16
533LLBL(x86_p3_2dnrr_loop):
534
535	FLD_S( SRC0 )			/* F4 */
536	FMUL_S( MAT0 )
537
538	FLD_S( SRC1 )			/* F1 F4 */
539	FMUL_S( MAT5 )
540
541	FXCH( ST(1) )			/* F4 F1 */
542	FADD_S( MAT12 )
543	FLD_S( MAT13 )		/* F5 F4 F1 */
544
545	FXCH( ST(2) )			/* F1 F4 F5 */
546	FADDP( ST0, ST(2) )		/* F4 F5 */
547
548	MOV_L( SRC2, EBX )
549
550	FSTP_S( DST0 )		/* F5 */
551	FSTP_S( DST1 )		/* */
552	MOV_L( EBX, DST2 )
553
554LLBL(x86_p3_2dnrr_skip):
555
556	ADD_L( CONST(16), EDI )
557	ADD_L( EAX, ESI )
558	CMP_L( ECX, EDI )
559	JNE( LLBL(x86_p3_2dnrr_loop) )
560
561LLBL(x86_p3_2dnrr_done):
562
563	POP_L( EBX )
564	POP_L( EDI )
565	POP_L( ESI )
566	RET
567#undef FRAME_OFFSET
568
569
570
571
572ALIGNTEXT16
573GLOBL GLNAME( _mesa_x86_transform_points3_identity )
574HIDDEN(_mesa_x86_transform_points3_identity)
575GLNAME(_mesa_x86_transform_points3_identity ):
576
577#define FRAME_OFFSET 16
578	PUSH_L( ESI )
579	PUSH_L( EDI )
580	PUSH_L( EBX )
581	PUSH_L( EBP )
582
583	MOV_L( ARG_SOURCE, ESI )
584	MOV_L( ARG_DEST, EDI )
585
586	MOV_L( ARG_MATRIX, EDX )
587	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
588
589	TEST_L( ECX, ECX )
590	JZ( LLBL(x86_p3_ir_done) )
591
592	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
593	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
594
595	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
596	MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
597
598	SHL_L( CONST(4), ECX )
599	MOV_L( REGOFF(V4F_START, ESI), ESI )
600
601	MOV_L( REGOFF(V4F_START, EDI), EDI )
602	ADD_L( EDI, ECX )
603
604	CMP_L( ESI, EDI )
605	JE( LLBL(x86_p3_ir_done) )
606
607ALIGNTEXT16
608LLBL(x86_p3_ir_loop):
609
610#if 1
611	MOV_L( SRC0, EBX )
612	MOV_L( SRC1, EBP )
613	MOV_L( SRC2, EDX )
614
615	MOV_L( EBX, DST0 )
616	MOV_L( EBP, DST1 )
617	MOV_L( EDX, DST2 )
618#else
619	FLD_S( SRC0 )
620	FLD_S( SRC1 )
621	FLD_S( SRC2 )
622
623	FSTP_S( DST2 )
624	FSTP_S( DST1 )
625	FSTP_S( DST0 )
626#endif
627
628LLBL(x86_p3_ir_skip):
629
630	ADD_L( CONST(16), EDI )
631	ADD_L( EAX, ESI )
632	CMP_L( ECX, EDI )
633	JNE( LLBL(x86_p3_ir_loop) )
634
635LLBL(x86_p3_ir_done):
636
637	POP_L( EBP )
638	POP_L( EBX )
639	POP_L( EDI )
640	POP_L( ESI )
641	RET
642
643#if defined (__ELF__) && defined (__linux__)
644	.section .note.GNU-stack,"",%progbits
645#endif
646