1
2/*
3 * Mesa 3-D graphics library
4 *
5 * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23 * OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26#ifdef USE_3DNOW_ASM
27#include "assyntax.h"
28#define MATH_ASM_PTR_SIZE 4
29#include "math/m_vector_asm.h"
30#include "xform_args.h"
31
32    SEG_TEXT
33
34#define FRAME_OFFSET	4
35
36
37ALIGNTEXT16
38GLOBL GLNAME( _mesa_3dnow_transform_points2_general )
39HIDDEN(_mesa_3dnow_transform_points2_general)
40GLNAME( _mesa_3dnow_transform_points2_general ):
41    _CET_ENDBR
42    PUSH_L    ( ESI )
43
44    MOV_L     ( ARG_DEST, ECX )
45    MOV_L     ( ARG_MATRIX, ESI )
46    MOV_L     ( ARG_SOURCE, EAX )
47    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
48    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
49    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
50    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
51
52    PUSH_L    ( EDI )
53
54    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
55    MOV_L     ( ESI, ECX )
56    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
57    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
58    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
59
60    TEST_L    ( ESI, ESI )
61    JZ        ( LLBL( G3TPGR_3 ) )
62
63    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
64    PUNPCKLDQ ( REGOFF(16, ECX), MM0 )	/* m10             | m00             */
65
66    MOVD      ( REGOFF(4, ECX), MM1 )	/*                 | m01             */
67    PUNPCKLDQ ( REGOFF(20, ECX), MM1 )	/* m11             | m01             */
68
69    MOVD      ( REGOFF(8, ECX), MM2 )	/*                 | m02             */
70    PUNPCKLDQ ( REGOFF(24, ECX), MM2 )	/* m12             | m02             */
71
72    MOVD      ( REGOFF(12, ECX), MM3 )	/*                 | m03             */
73    PUNPCKLDQ ( REGOFF(28, ECX), MM3 )	/* m13             | m03             */
74
75    MOVQ      ( REGOFF(48, ECX), MM4 )	/* m31             | m30             */
76    MOVQ      ( REGOFF(56, ECX), MM5 )	/* m33             | m32             */
77
78ALIGNTEXT16
79LLBL( G3TPGR_2 ):
80
81    MOVQ      ( REGIND(EAX), MM6 )	/* x1              | x0              */
82    MOVQ      ( MM6, MM7 )		/* x1              | x0              */
83
84    PFMUL     ( MM0, MM6 )		/* x1*m10          | x0*m00          */
85    PFMUL     ( MM1, MM7 )		/* x1*m11          | x0*m01          */
86
87    PFACC     ( MM7, MM6 )		/* x0*m01+x1*m11   | x0*x00+x1*m10   */
88    PFADD     ( MM4, MM6 )		/* x0*...*m11+m31  | x0*...*m10+m30  */
89
90    MOVQ      ( MM6, REGIND(EDX) )	/* write r1, r0                      */
91    MOVQ      ( REGIND(EAX), MM6 )	/* x1              | x0              */
92
93    MOVQ      ( MM6, MM7 )		/* x1              | x0              */
94    PFMUL     ( MM2, MM6 )		/* x1*m12          | x0*m02          */
95
96    PFMUL     ( MM3, MM7 )		/* x1*m13          | x0*m03          */
97    ADD_L     ( EDI, EAX )		/* next vertex                       */
98
99    PFACC     ( MM7, MM6 )		/* x0*m03+x1*m13   | x0*x02+x1*m12   */
100    PFADD     ( MM5, MM6 )		/* x0*...*m13+m33  | x0*...*m12+m32  */
101
102    MOVQ      ( MM6, REGOFF(8, EDX) )	/* write r3, r2                      */
103    ADD_L     ( CONST(16), EDX )	/* next r                            */
104
105    DEC_L     ( ESI )			/* decrement vertex counter          */
106    JNZ       ( LLBL( G3TPGR_2 ) )	/* cnt > 0 ? -> process next vertex  */
107
108LLBL( G3TPGR_3 ):
109
110    FEMMS
111    POP_L     ( EDI )
112    POP_L     ( ESI )
113    RET
114
115
116
117
118ALIGNTEXT16
119GLOBL GLNAME( _mesa_3dnow_transform_points2_perspective )
120HIDDEN(_mesa_3dnow_transform_points2_perspective)
121GLNAME( _mesa_3dnow_transform_points2_perspective ):
122    _CET_ENDBR
123    PUSH_L    ( ESI )
124
125    MOV_L     ( ARG_DEST, ECX )
126    MOV_L     ( ARG_MATRIX, ESI )
127    MOV_L     ( ARG_SOURCE, EAX )
128    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
129    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
130    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
131    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
132
133    PUSH_L    ( EDI )
134
135    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
136    MOV_L     ( ESI, ECX )
137    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
138    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
139    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
140
141    TEST_L    ( ESI, ESI )
142    JZ        ( LLBL( G3TPPR_3 ) )
143
144    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
145    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )	/* m11             | m00             */
146
147    MOVD      ( REGOFF(56, ECX), MM3 )	/*                 | m32             */
148
149ALIGNTEXT16
150LLBL( G3TPPR_2 ):
151
152    MOVQ      ( REGIND(EAX), MM4 )	/* x1              | x0              */
153    PFMUL     ( MM0, MM4 )		/* x1*m11          | x0*m00          */
154
155    MOVQ      ( MM4, REGIND(EDX) )	/* write r1, r0                      */
156    MOVQ      ( MM3, REGOFF(8, EDX) )	/* write r2  (=m32), r3 (=0)         */
157
158    ADD_L     ( EDI, EAX )		/* next vertex                       */
159    ADD_L     ( CONST(16), EDX )	/* next r                            */
160
161    DEC_L     ( ESI )			/* decrement vertex counter          */
162    JNZ       ( LLBL( G3TPPR_2 ) )	/* cnt > 0 ? -> process next vertex  */
163
164LLBL( G3TPPR_3 ):
165
166    FEMMS
167    POP_L     ( EDI )
168    POP_L     ( ESI )
169    RET
170
171
172
173
174ALIGNTEXT16
175GLOBL GLNAME( _mesa_3dnow_transform_points2_3d )
176HIDDEN(_mesa_3dnow_transform_points2_3d)
177GLNAME( _mesa_3dnow_transform_points2_3d ):
178    _CET_ENDBR
179    PUSH_L    ( ESI )
180
181    MOV_L     ( ARG_DEST, ECX )
182    MOV_L     ( ARG_MATRIX, ESI )
183    MOV_L     ( ARG_SOURCE, EAX )
184    MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
185    OR_B      ( CONST(VEC_SIZE_3 ), REGOFF(V4F_FLAGS, ECX) )
186    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
187    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
188
189    PUSH_L    ( EDI )
190
191    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
192    MOV_L     ( ESI, ECX )
193    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
194    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
195    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
196
197    TEST_L    ( ESI, ESI )
198    JZ        ( LLBL( G3TP3R_3 ) )
199
200    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
201    PUNPCKLDQ ( REGOFF(16, ECX), MM0 )	/* m10             | m00             */
202
203    MOVD      ( REGOFF(4, ECX), MM1 )	/*                 | m01             */
204    PUNPCKLDQ ( REGOFF(20, ECX), MM1 )	/* m11             | m01             */
205
206    MOVD      ( REGOFF(8, ECX), MM2 )	/*                 | m02             */
207    PUNPCKLDQ ( REGOFF(24, ECX), MM2 )	/* m12             | m02             */
208
209    MOVQ      ( REGOFF(48, ECX), MM4 )	/* m31             | m30             */
210    MOVD      ( REGOFF(56, ECX), MM5 )	/*                 | m32             */
211
212ALIGNTEXT16
213LLBL( G3TP3R_2 ):
214
215    MOVQ      ( REGIND(EAX), MM6 )	/* x1              | x0              */
216    MOVQ      ( MM6, MM7 )		/* x1              | x0              */
217
218    PFMUL     ( MM0, MM6 )		/* x1*m10          | x0*m00          */
219    PFMUL     ( MM1, MM7 )		/* x1*m11          | x0*m01          */
220
221    PFACC     ( MM7, MM6 )		/* x0*m01+x1*m11   | x0*x00+x1*m10   */
222    PFADD     ( MM4, MM6 )		/* x0*...*m11+m31  | x0*...*m10+m30  */
223
224    MOVQ      ( MM6, REGIND(EDX) )	/* write r1, r0                      */
225    MOVQ      ( REGIND(EAX), MM6 )	/* x1              | x0              */
226
227    MOVQ      ( MM6, MM7 )		/* x1              | x0              */
228    PFMUL     ( MM2, MM6 )		/* x1*m12          | x0*m02          */
229
230    PFACC     ( MM7, MM6 )		/* ***trash***     | x0*x02+x1*m12   */
231    PFADD     ( MM5, MM6 )		/* ***trash***     | x0*...*m12+m32  */
232
233    MOVD      ( MM6, REGOFF(8, EDX) )	/* write r2                          */
234    ADD_L     ( EDI, EAX )		/* next vertex                       */
235
236    ADD_L     ( CONST(16), EDX )	/* next r                            */
237    DEC_L     ( ESI )			/* decrement vertex counter          */
238
239    JNZ       ( LLBL( G3TP3R_2 ) )	/* cnt > 0 ? -> process next vertex  */
240
241LLBL( G3TP3R_3 ):
242
243    FEMMS
244    POP_L     ( EDI )
245    POP_L     ( ESI )
246    RET
247
248
249
250
251ALIGNTEXT16
252GLOBL GLNAME( _mesa_3dnow_transform_points2_3d_no_rot )
253HIDDEN(_mesa_3dnow_transform_points2_3d_no_rot)
254GLNAME( _mesa_3dnow_transform_points2_3d_no_rot ):
255    _CET_ENDBR
256    PUSH_L    ( ESI )
257
258    MOV_L     ( ARG_DEST, ECX )
259    MOV_L     ( ARG_MATRIX, ESI )
260    MOV_L     ( ARG_SOURCE, EAX )
261    MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
262    OR_B      ( CONST(VEC_SIZE_3 ), REGOFF(V4F_FLAGS, ECX) )
263    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
264    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
265
266    PUSH_L    ( EDI )
267
268    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
269    MOV_L     ( ESI, ECX )
270    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
271    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
272    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
273
274    TEST_L    ( ESI, ESI )
275    JZ        ( LLBL( G3TP3NRR_3 ) )
276
277    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
278    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )	/* m11             | m00             */
279
280    MOVQ      ( REGOFF(48, ECX), MM2 )	/* m31             | m30             */
281    MOVD      ( REGOFF(56, ECX), MM3 )	/*                 | m32             */
282
283ALIGNTEXT16
284LLBL( G3TP3NRR_2 ):
285
286    MOVQ      ( REGIND(EAX), MM4 )	/* x1              | x0              */
287    PFMUL     ( MM0, MM4 )		/* x1*m11          | x0*m00          */
288
289    PFADD     ( MM2, MM4 )		/* x1*m11+m31      | x0*m00+m30      */
290    MOVQ      ( MM4, REGIND(EDX) )	/* write r1, r0                      */
291
292    MOVD      ( MM3, REGOFF(8, EDX) )	/* write r2                          */
293    ADD_L     ( EDI, EAX )		/* next vertex                       */
294
295    ADD_L     ( CONST(16), EDX )	/* next r                            */
296    DEC_L     ( ESI )			/* decrement vertex counter          */
297
298    JNZ       ( LLBL( G3TP3NRR_2 ) )	/* cnt > 0 ? -> process next vertex  */
299
300LLBL( G3TP3NRR_3 ):
301
302    FEMMS
303    POP_L     ( EDI )
304    POP_L     ( ESI )
305    RET
306
307
308
309
310ALIGNTEXT16
311GLOBL GLNAME( _mesa_3dnow_transform_points2_2d )
312HIDDEN(_mesa_3dnow_transform_points2_2d)
313GLNAME( _mesa_3dnow_transform_points2_2d ):
314    _CET_ENDBR
315    PUSH_L    ( ESI )
316
317    MOV_L     ( ARG_DEST, ECX )
318    MOV_L     ( ARG_MATRIX, ESI )
319    MOV_L     ( ARG_SOURCE, EAX )
320    MOV_L     ( CONST(2), REGOFF(V4F_SIZE, ECX) )
321    OR_B      ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
322    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
323    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
324
325    PUSH_L    ( EDI )
326
327    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
328    MOV_L     ( ESI, ECX )
329    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
330    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
331    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
332
333    TEST_L    ( ESI, ESI )
334    JZ        ( LLBL( G3TP2R_3 ) )
335
336    MOVQ      ( REGIND(ECX), MM0 )	/* m01             | m00             */
337    MOVQ      ( REGOFF(16, ECX), MM1 )	/* m11             | m10             */
338
339    MOVQ      ( REGOFF(48, ECX), MM2 )	/* m31             | m30             */
340
341ALIGNTEXT16
342LLBL( G3TP2R_2 ):
343
344    MOVD      ( REGIND(EAX), MM4 )	/*                 | x0              */
345    MOVD      ( REGOFF(4, EAX), MM5 )	/*                 | x1              */
346
347    PUNPCKLDQ ( MM4, MM4 )		/* x0              | x0              */
348    ADD_L     ( EDI, EAX )		/* next vertex                       */
349
350    PFMUL     ( MM0, MM4 )		/* x0*m01          | x0*m00          */
351    PUNPCKLDQ ( MM5, MM5 )		/* x1              | x1              */
352
353    PFMUL     ( MM1, MM5 )		/* x1*m11          | x1*m10          */
354    PFADD     ( MM2, MM4 )		/* x...x1*m11+31   | x0*..*m10+m30   */
355
356    PFADD     ( MM5, MM4 )		/* x0*m01+x1*m11   | x0*m00+x1*m10   */
357    MOVQ      ( MM4, REGIND(EDX) )	/* write r1, r0                      */
358
359    ADD_L     ( CONST(16), EDX )	/* next r                            */
360    DEC_L     ( ESI )			/* decrement vertex counter          */
361
362    JNZ       ( LLBL( G3TP2R_2 ) )	/* cnt > 0 ? -> process next vertex  */
363
364LLBL( G3TP2R_3 ):
365
366    FEMMS
367    POP_L     ( EDI )
368    POP_L     ( ESI )
369    RET
370
371
372
373
374ALIGNTEXT16
375GLOBL GLNAME( _mesa_3dnow_transform_points2_2d_no_rot )
376HIDDEN(_mesa_3dnow_transform_points2_2d_no_rot)
377GLNAME( _mesa_3dnow_transform_points2_2d_no_rot ):
378    _CET_ENDBR
379    PUSH_L    ( ESI )
380
381    MOV_L     ( ARG_DEST, ECX )
382    MOV_L     ( ARG_MATRIX, ESI )
383    MOV_L     ( ARG_SOURCE, EAX )
384    MOV_L     ( CONST(2), REGOFF(V4F_SIZE, ECX) )
385    OR_B      ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
386    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
387    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
388
389    PUSH_L    ( EDI )
390
391    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
392    MOV_L     ( ESI, ECX )
393    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
394    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
395    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
396
397    TEST_L    ( ESI, ESI )
398    JZ        ( LLBL( G3TP2NRR_3 ) )
399
400    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
401    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )	/* m11             | m00             */
402
403    MOVQ      ( REGOFF(48, ECX), MM2 )	/* m31             | m30             */
404
405ALIGNTEXT16
406LLBL( G3TP2NRR_2 ):
407
408    MOVQ      ( REGIND(EAX), MM4 )	/* x1              | x0              */
409    ADD_L     ( EDI, EAX )		/* next vertex                       */
410
411    PFMUL     ( MM0, MM4 )		/* x1*m11          | x0*m00          */
412    PFADD     ( MM2, MM4 )		/* m31             | x0*m00+m30      */
413
414    MOVQ      ( MM4, REGIND(EDX) )	/* write r1, r0                      */
415    ADD_L     ( CONST(16), EDX )	/* next r                            */
416
417    DEC_L     ( ESI )			/* decrement vertex counter          */
418    JNZ       ( LLBL( G3TP2NRR_2 ) )	/* cnt > 0 ? -> process next vertex  */
419
420LLBL( G3TP2NRR_3 ):
421
422    FEMMS
423    POP_L     ( EDI )
424    POP_L     ( ESI )
425    RET
426
427
428
429
430ALIGNTEXT16
431GLOBL GLNAME( _mesa_3dnow_transform_points2_identity )
432HIDDEN(_mesa_3dnow_transform_points2_identity)
433GLNAME( _mesa_3dnow_transform_points2_identity ):
434    _CET_ENDBR
435    PUSH_L    ( ESI )
436
437    MOV_L     ( ARG_DEST, ECX )
438    MOV_L     ( ARG_MATRIX, ESI )
439    MOV_L     ( ARG_SOURCE, EAX )
440    MOV_L     ( CONST(2), REGOFF(V4F_SIZE, ECX) )
441    OR_B      ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
442    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
443    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
444
445    PUSH_L    ( EDI )
446
447    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
448    MOV_L     ( ESI, ECX )
449    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
450    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
451    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
452
453    TEST_L    ( ESI, ESI )
454    JZ        ( LLBL( G3TPIR_3 ) )
455
456ALIGNTEXT16
457LLBL( G3TPIR_3 ):
458
459    MOVQ      ( REGIND(EAX), MM0 )	/* x1              | x0              */
460    ADD_L     ( EDI, EAX )		/* next vertex                       */
461
462    MOVQ      ( MM0, REGIND(EDX) )	/* r1              | r0              */
463    ADD_L     ( CONST(16), EDX )	/* next r                            */
464
465    DEC_L     ( ESI )			/* decrement vertex counter          */
466    JNZ       ( LLBL( G3TPIR_3 ) )	/* cnt > 0 ? -> process next vertex  */
467
468LLBL( G3TPIR_4 ):
469
470    FEMMS
471    POP_L     ( EDI )
472    POP_L     ( ESI )
473    RET
474#endif
475
476#if defined (__ELF__) && defined (__linux__)
477	.section .note.GNU-stack,"",%progbits
478#endif
479