1
2/*
3 * Mesa 3-D graphics library
4 *
5 * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23 * OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26/*
27 * NOTE: Avoid using spaces in between '(' ')' and arguments, especially
28 * with macros like CONST, LLBL that expand to CONCAT(...).  Putting spaces
29 * in there will break the build on some platforms.
30 */
31
32#include "assyntax.h"
33#define MATH_ASM_PTR_SIZE 4
34#include "math/m_vector_asm.h"
35#include "xform_args.h"
36
37	SEG_TEXT
38
39#define FP_ONE		1065353216
40#define FP_ZERO		0
41
42#define SRC0		REGOFF(0, ESI)
43#define SRC1		REGOFF(4, ESI)
44#define SRC2		REGOFF(8, ESI)
45#define SRC3		REGOFF(12, ESI)
46#define DST0		REGOFF(0, EDI)
47#define DST1		REGOFF(4, EDI)
48#define DST2		REGOFF(8, EDI)
49#define DST3		REGOFF(12, EDI)
50#define MAT0		REGOFF(0, EDX)
51#define MAT1		REGOFF(4, EDX)
52#define MAT2		REGOFF(8, EDX)
53#define MAT3		REGOFF(12, EDX)
54#define MAT4		REGOFF(16, EDX)
55#define MAT5		REGOFF(20, EDX)
56#define MAT6		REGOFF(24, EDX)
57#define MAT7		REGOFF(28, EDX)
58#define MAT8		REGOFF(32, EDX)
59#define MAT9		REGOFF(36, EDX)
60#define MAT10		REGOFF(40, EDX)
61#define MAT11		REGOFF(44, EDX)
62#define MAT12		REGOFF(48, EDX)
63#define MAT13		REGOFF(52, EDX)
64#define MAT14		REGOFF(56, EDX)
65#define MAT15		REGOFF(60, EDX)
66
67
68ALIGNTEXT16
69GLOBL GLNAME( _mesa_x86_transform_points2_general )
70HIDDEN(_mesa_x86_transform_points2_general)
71GLNAME( _mesa_x86_transform_points2_general ):
72
73#define FRAME_OFFSET 8
74	PUSH_L( ESI )
75	PUSH_L( EDI )
76
77	MOV_L( ARG_SOURCE, ESI )
78	MOV_L( ARG_DEST, EDI )
79
80	MOV_L( ARG_MATRIX, EDX )
81	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
82
83	TEST_L( ECX, ECX )
84	JZ( LLBL(x86_p2_gr_done) )
85
86	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
87	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
88
89	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
90	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
91
92	SHL_L( CONST(4), ECX )
93	MOV_L( REGOFF(V4F_START, ESI), ESI )
94
95	MOV_L( REGOFF(V4F_START, EDI), EDI )
96	ADD_L( EDI, ECX )
97
98ALIGNTEXT16
99LLBL(x86_p2_gr_loop):
100
101	FLD_S( SRC0 )			/* F4 */
102	FMUL_S( MAT0 )
103	FLD_S( SRC0 )			/* F5 F4 */
104	FMUL_S( MAT1 )
105	FLD_S( SRC0 )			/* F6 F5 F4 */
106	FMUL_S( MAT2 )
107	FLD_S( SRC0 )			/* F7 F6 F5 F4 */
108	FMUL_S( MAT3 )
109
110	FLD_S( SRC1 )			/* F0 F7 F6 F5 F4 */
111	FMUL_S( MAT4 )
112	FLD_S( SRC1 )			/* F1 F0 F7 F6 F5 F4 */
113	FMUL_S( MAT5 )
114	FLD_S( SRC1 )			/* F2 F1 F0 F7 F6 F5 F4 */
115	FMUL_S( MAT6 )
116	FLD_S( SRC1 )			/* F3 F2 F1 F0 F7 F6 F5 F4 */
117	FMUL_S( MAT7 )
118
119	FXCH( ST(3) )			/* F0 F2 F1 F3 F7 F6 F5 F4 */
120	FADDP( ST0, ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */
121	FXCH( ST(1) )			/* F1 F2 F3 F7 F6 F5 F4 */
122	FADDP( ST0, ST(5) )		/* F2 F3 F7 F6 F5 F4 */
123	FADDP( ST0, ST(3) )		/* F3 F7 F6 F5 F4 */
124	FADDP( ST0, ST(1) )		/* F7 F6 F5 F4 */
125
126	FXCH( ST(3) )			/* F4 F6 F5 F7 */
127	FADD_S( MAT12 )
128	FXCH( ST(2) )			/* F5 F6 F4 F7 */
129	FADD_S( MAT13 )
130	FXCH( ST(1) )			/* F6 F5 F4 F7 */
131	FADD_S( MAT14 )
132	FXCH( ST(3) )			/* F7 F5 F4 F6 */
133	FADD_S( MAT15 )
134
135	FXCH( ST(2) )			/* F4 F5 F7 F6 */
136	FSTP_S( DST0 )			/* F5 F7 F6 */
137	FSTP_S( DST1 )			/* F7 F6 */
138	FXCH( ST(1) )			/* F6 F7 */
139	FSTP_S( DST2 )			/* F7 */
140	FSTP_S( DST3 )			/* */
141
142LLBL(x86_p2_gr_skip):
143
144	ADD_L( CONST(16), EDI )
145	ADD_L( EAX, ESI )
146	CMP_L( ECX, EDI )
147	JNE( LLBL(x86_p2_gr_loop) )
148
149LLBL(x86_p2_gr_done):
150
151	POP_L( EDI )
152	POP_L( ESI )
153	RET
154#undef FRAME_OFFSET
155
156
157
158
159ALIGNTEXT16
160GLOBL GLNAME( _mesa_x86_transform_points2_perspective )
161HIDDEN(_mesa_x86_transform_points2_perspective)
162GLNAME( _mesa_x86_transform_points2_perspective ):
163
164#define FRAME_OFFSET 12
165	PUSH_L( ESI )
166	PUSH_L( EDI )
167	PUSH_L( EBX )
168
169	MOV_L( ARG_SOURCE, ESI )
170	MOV_L( ARG_DEST, EDI )
171
172	MOV_L( ARG_MATRIX, EDX )
173	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
174
175	TEST_L( ECX, ECX )
176	JZ( LLBL(x86_p2_pr_done) )
177
178	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
179	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
180
181	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
182	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
183
184	SHL_L( CONST(4), ECX )
185	MOV_L( REGOFF(V4F_START, ESI), ESI )
186
187	MOV_L( REGOFF(V4F_START, EDI), EDI )
188	ADD_L( EDI, ECX )
189
190	MOV_L( MAT14, EBX )
191
192ALIGNTEXT16
193LLBL(x86_p2_pr_loop):
194
195	FLD_S( SRC0 )			/* F4 */
196	FMUL_S( MAT0 )
197
198	FLD_S( SRC1 )			/* F1 F4 */
199	FMUL_S( MAT5 )
200
201	FXCH( ST(1) )			/* F4 F1 */
202	FSTP_S( DST0   )		/* F1 */
203	FSTP_S( DST1   )		/* */
204	MOV_L( EBX, DST2 )
205	MOV_L( CONST(FP_ZERO), DST3 )
206
207LLBL(x86_p2_pr_skip):
208
209	ADD_L( CONST(16), EDI )
210	ADD_L( EAX, ESI )
211	CMP_L( ECX, EDI )
212	JNE( LLBL(x86_p2_pr_loop) )
213
214LLBL(x86_p2_pr_done):
215
216	POP_L( EBX )
217	POP_L( EDI )
218	POP_L( ESI )
219	RET
220#undef FRAME_OFFSET
221
222
223
224
225ALIGNTEXT16
226GLOBL GLNAME( _mesa_x86_transform_points2_3d )
227HIDDEN(_mesa_x86_transform_points2_3d)
228GLNAME( _mesa_x86_transform_points2_3d ):
229
230#define FRAME_OFFSET 8
231	PUSH_L( ESI )
232	PUSH_L( EDI )
233
234	MOV_L( ARG_SOURCE, ESI )
235	MOV_L( ARG_DEST, EDI )
236
237	MOV_L( ARG_MATRIX, EDX )
238	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
239
240	TEST_L( ECX, ECX )
241	JZ( LLBL(x86_p2_3dr_done) )
242
243	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
244	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
245
246	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
247	MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
248
249	SHL_L( CONST(4), ECX )
250	MOV_L( REGOFF(V4F_START, ESI), ESI )
251
252	MOV_L( REGOFF(V4F_START, EDI), EDI )
253	ADD_L( EDI, ECX )
254
255ALIGNTEXT16
256LLBL(x86_p2_3dr_loop):
257
258	FLD_S( SRC0 )			/* F4 */
259	FMUL_S( MAT0 )
260	FLD_S( SRC0 )			/* F5 F4 */
261	FMUL_S( MAT1 )
262	FLD_S( SRC0 )			/* F6 F5 F4 */
263	FMUL_S( MAT2 )
264
265	FLD_S( SRC1 )			/* F0 F6 F5 F4 */
266	FMUL_S( MAT4 )
267	FLD_S( SRC1 )			/* F1 F0 F6 F5 F4 */
268	FMUL_S( MAT5 )
269	FLD_S( SRC1 )			/* F2 F1 F0 F6 F5 F4 */
270	FMUL_S( MAT6 )
271
272	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */
273	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */
274	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */
275	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
276
277	FXCH( ST(2) )			/* F4 F5 F6 */
278	FADD_S( MAT12 )
279	FXCH( ST(1) )			/* F5 F4 F6 */
280	FADD_S( MAT13 )
281	FXCH( ST(2) )			/* F6 F4 F5 */
282	FADD_S( MAT14 )
283
284	FXCH( ST(1) )			/* F4 F6 F5 */
285	FSTP_S( DST0 )			/* F6 F5 */
286	FXCH( ST(1) )			/* F5 F6 */
287	FSTP_S( DST1 )			/* F6 */
288	FSTP_S( DST2 )			/* */
289
290LLBL(x86_p2_3dr_skip):
291
292	ADD_L( CONST(16), EDI )
293	ADD_L( EAX, ESI )
294	CMP_L( ECX, EDI )
295	JNE( LLBL(x86_p2_3dr_loop) )
296
297LLBL(x86_p2_3dr_done):
298
299	POP_L( EDI )
300	POP_L( ESI )
301	RET
302#undef FRAME_OFFSET
303
304
305
306
307ALIGNTEXT16
308GLOBL GLNAME( _mesa_x86_transform_points2_3d_no_rot )
309HIDDEN(_mesa_x86_transform_points2_3d_no_rot)
310GLNAME( _mesa_x86_transform_points2_3d_no_rot ):
311
312#define FRAME_OFFSET 12
313	PUSH_L( ESI )
314	PUSH_L( EDI )
315	PUSH_L( EBX )
316
317	MOV_L( ARG_SOURCE, ESI )
318	MOV_L( ARG_DEST, EDI )
319
320	MOV_L( ARG_MATRIX, EDX )
321	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
322
323	TEST_L( ECX, ECX )
324	JZ( LLBL(x86_p2_3dnrr_done) )
325
326	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
327	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
328
329	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
330	MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
331
332	SHL_L( CONST(4), ECX )
333	MOV_L( REGOFF(V4F_START, ESI), ESI )
334
335	MOV_L( REGOFF(V4F_START, EDI), EDI )
336	ADD_L( EDI, ECX )
337
338	MOV_L( MAT14, EBX )
339
340ALIGNTEXT16
341LLBL(x86_p2_3dnrr_loop):
342
343	FLD_S( SRC0 )			/* F4 */
344	FMUL_S( MAT0 )
345
346	FLD_S( SRC1 )			/* F1 F4 */
347	FMUL_S( MAT5 )
348
349	FXCH( ST(1) )			/* F4 F1 */
350	FADD_S( MAT12 )
351	FLD_S( MAT13 )		/* F5 F4 F1 */
352	FXCH( ST(2) )			/* F1 F4 F5 */
353	FADDP( ST0, ST(2) )		/* F4 F5 */
354
355	FSTP_S( DST0 )		/* F5 */
356	FSTP_S( DST1 )		/* */
357	MOV_L( EBX, DST2 )
358
359LLBL(x86_p2_3dnrr_skip):
360
361	ADD_L( CONST(16), EDI )
362	ADD_L( EAX, ESI )
363	CMP_L( ECX, EDI )
364	JNE( LLBL(x86_p2_3dnrr_loop) )
365
366LLBL(x86_p2_3dnrr_done):
367
368	POP_L( EBX )
369	POP_L( EDI )
370	POP_L( ESI )
371	RET
372#undef FRAME_OFFSET
373
374
375
376
377ALIGNTEXT16
378GLOBL GLNAME( _mesa_x86_transform_points2_2d )
379HIDDEN(_mesa_x86_transform_points2_2d)
380GLNAME( _mesa_x86_transform_points2_2d ):
381
382#define FRAME_OFFSET 8
383	PUSH_L( ESI )
384	PUSH_L( EDI )
385
386	MOV_L( ARG_SOURCE, ESI )
387	MOV_L( ARG_DEST, EDI )
388
389	MOV_L( ARG_MATRIX, EDX )
390	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
391
392	TEST_L( ECX, ECX )
393	JZ( LLBL(x86_p2_2dr_done) )
394
395	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
396	OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
397
398	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
399	MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
400
401	SHL_L( CONST(4), ECX )
402	MOV_L( REGOFF(V4F_START, ESI), ESI )
403
404	MOV_L( REGOFF(V4F_START, EDI), EDI )
405	ADD_L( EDI, ECX )
406
407ALIGNTEXT16
408LLBL(x86_p2_2dr_loop):
409
410	FLD_S( SRC0 )			/* F4 */
411	FMUL_S( MAT0 )
412	FLD_S( SRC0 )			/* F5 F4 */
413	FMUL_S( MAT1 )
414
415	FLD_S( SRC1 )			/* F0 F5 F4 */
416	FMUL_S( MAT4 )
417	FLD_S( SRC1 )			/* F1 F0 F5 F4 */
418	FMUL_S( MAT5 )
419
420	FXCH( ST(1) )			/* F0 F1 F5 F4 */
421	FADDP( ST0, ST(3) )		/* F1 F5 F4 */
422	FADDP( ST0, ST(1) )		/* F5 F4 */
423
424	FXCH( ST(1) )			/* F4 F5 */
425	FADD_S( MAT12 )
426	FXCH( ST(1) )			/* F5 F4 */
427	FADD_S( MAT13 )
428
429	FXCH( ST(1) )			/* F4 F5 */
430	FSTP_S( DST0 )		/* F5 */
431	FSTP_S( DST1 )		/* */
432
433LLBL(x86_p2_2dr_skip):
434
435	ADD_L( CONST(16), EDI )
436	ADD_L( EAX, ESI )
437	CMP_L( ECX, EDI )
438	JNE( LLBL(x86_p2_2dr_loop) )
439
440LLBL(x86_p2_2dr_done):
441
442	POP_L( EDI )
443	POP_L( ESI )
444	RET
445#undef FRAME_OFFSET
446
447
448
449
450ALIGNTEXT4
451GLOBL GLNAME( _mesa_x86_transform_points2_2d_no_rot )
452HIDDEN(_mesa_x86_transform_points2_2d_no_rot)
453GLNAME( _mesa_x86_transform_points2_2d_no_rot ):
454
455#define FRAME_OFFSET 8
456	PUSH_L( ESI )
457	PUSH_L( EDI )
458
459	MOV_L( ARG_SOURCE, ESI )
460	MOV_L( ARG_DEST, EDI )
461
462	MOV_L( ARG_MATRIX, EDX )
463	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
464
465	TEST_L( ECX, ECX )
466	JZ( LLBL(x86_p2_2dnrr_done) )
467
468	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
469	OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
470
471	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
472	MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
473
474	SHL_L( CONST(4), ECX )
475	MOV_L( REGOFF(V4F_START, ESI), ESI )
476
477	MOV_L( REGOFF(V4F_START, EDI), EDI )
478	ADD_L( EDI, ECX )
479
480ALIGNTEXT16
481LLBL(x86_p2_2dnrr_loop):
482
483	FLD_S( SRC0 )			/* F4 */
484	FMUL_S( MAT0 )
485
486	FLD_S( SRC1 )			/* F1 F4 */
487	FMUL_S( MAT5 )
488
489	FXCH( ST(1) )			/* F4 F1 */
490	FADD_S( MAT12 )
491	FLD_S( MAT13 )		/* F5 F4 F1 */
492	FXCH( ST(2) )			/* F1 F4 F5 */
493	FADDP( ST0, ST(2) )		/* F4 F5 */
494
495	FSTP_S( DST0   )		/* F5 */
496	FSTP_S( DST1   )		/* */
497
498LLBL(x86_p2_2dnrr_skip):
499
500	ADD_L( CONST(16), EDI )
501	ADD_L( EAX, ESI )
502	CMP_L( ECX, EDI )
503	JNE( LLBL(x86_p2_2dnrr_loop) )
504
505LLBL(x86_p2_2dnrr_done):
506
507	POP_L( EDI )
508	POP_L( ESI )
509	RET
510#undef FRAME_OFFSET
511
512
513
514
515ALIGNTEXT16
516GLOBL GLNAME( _mesa_x86_transform_points2_identity )
517HIDDEN(_mesa_x86_transform_points2_identity)
518GLNAME( _mesa_x86_transform_points2_identity ):
519
520#define FRAME_OFFSET 12
521	PUSH_L( ESI )
522	PUSH_L( EDI )
523	PUSH_L( EBX )
524
525	MOV_L( ARG_SOURCE, ESI )
526	MOV_L( ARG_DEST, EDI )
527
528	MOV_L( ARG_MATRIX, EDX )
529	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
530
531	TEST_L( ECX, ECX )
532	JZ( LLBL(x86_p2_ir_done) )
533
534	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
535	OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
536
537	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
538	MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
539
540	SHL_L( CONST(4), ECX )
541	MOV_L( REGOFF(V4F_START, ESI), ESI )
542
543	MOV_L( REGOFF(V4F_START, EDI), EDI )
544	ADD_L( EDI, ECX )
545
546	CMP_L( ESI, EDI )
547	JE( LLBL(x86_p2_ir_done) )
548
549ALIGNTEXT16
550LLBL(x86_p2_ir_loop):
551
552	MOV_L( SRC0, EBX )
553	MOV_L( SRC1, EDX )
554
555	MOV_L( EBX, DST0 )
556	MOV_L( EDX, DST1 )
557
558LLBL(x86_p2_ir_skip):
559
560	ADD_L( CONST(16), EDI )
561	ADD_L( EAX, ESI )
562	CMP_L( ECX, EDI )
563	JNE( LLBL(x86_p2_ir_loop) )
564
565LLBL(x86_p2_ir_done):
566
567	POP_L( EBX )
568	POP_L( EDI )
569	POP_L( ESI )
570	RET
571#undef FRAME_OFFSET
572
573#if defined (__ELF__) && defined (__linux__)
574	.section .note.GNU-stack,"",%progbits
575#endif
576