1
2/*
3 * Mesa 3-D graphics library
4 *
5 * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23 * OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26#ifdef USE_3DNOW_ASM
27#include "assyntax.h"
28#define MATH_ASM_PTR_SIZE 4
29#include "math/m_vector_asm.h"
30#include "xform_args.h"
31
32    SEG_TEXT
33
34#define FRAME_OFFSET	4
35
36
37ALIGNTEXT16
38GLOBL GLNAME( _mesa_3dnow_transform_points4_general )
39HIDDEN(_mesa_3dnow_transform_points4_general)
40GLNAME( _mesa_3dnow_transform_points4_general ):
41    _CET_ENDBR
42    PUSH_L    ( ESI )
43
44    MOV_L     ( ARG_DEST, ECX )
45    MOV_L     ( ARG_MATRIX, ESI )
46    MOV_L     ( ARG_SOURCE, EAX )
47    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
48    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
49    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
50    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
51
52    PUSH_L    ( EDI )
53
54    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
55    MOV_L     ( ESI, ECX )
56    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
57    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
58    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
59
60    TEST_L    ( ESI, ESI )
61    JZ        ( LLBL( G3TPGR_2 ) )
62
63    PREFETCHW ( REGIND(EDX) )
64
65ALIGNTEXT16
66LLBL( G3TPGR_1 ):
67
68    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
69
70    MOVQ      ( REGIND(EAX), MM0 )	/* x1            | x0                */
71    MOVQ      ( REGOFF(8, EAX), MM4 )	/* x3            | x2                */
72
73    ADD_L     ( EDI, EAX )		/* next vertex                       */
74    PREFETCH  ( REGIND(EAX) )
75
76    MOVQ      ( MM0, MM2 )		/* x1              | x0              */
77    MOVQ      ( MM4, MM6 )		/* x3              | x2              */
78
79    PUNPCKLDQ ( MM0, MM0 )		/* x0              | x0              */
80    PUNPCKHDQ ( MM2, MM2 )		/* x1              | x1              */
81
82    MOVQ      ( MM0, MM1 )		/* x0              | x0              */
83    ADD_L     ( CONST(16), EDX )	/* next r                            */
84
85    PFMUL     ( REGIND(ECX), MM0 )	/* x0*m1           | x0*m0           */
86    MOVQ      ( MM2, MM3 )		/* x1              | x1              */
87
88    PFMUL     ( REGOFF(8, ECX), MM1 )	/* x0*m3           | x0*m2           */
89    PUNPCKLDQ ( MM4, MM4 )		/* x2              | x2              */
90
91    PFMUL     ( REGOFF(16, ECX), MM2 )	/* x1*m5           | x1*m4           */
92    MOVQ      ( MM4, MM5 )		/* x2              | x2              */
93
94    PFMUL     ( REGOFF(24, ECX), MM3 )	/* x1*m7           | x1*m6           */
95    PUNPCKHDQ ( MM6, MM6 )		/* x3              | x3              */
96
97    PFMUL     ( REGOFF(32, ECX), MM4 )	/* x2*m9           | x2*m8           */
98    MOVQ      ( MM6, MM7 )		/* x3              | x3              */
99
100    PFMUL     ( REGOFF(40, ECX), MM5 )	/* x2*m11          | x2*m10          */
101    PFADD     ( MM0, MM2 )
102
103    PFMUL     ( REGOFF(48, ECX), MM6 )	/* x3*m13          | x3*m12          */
104    PFADD     ( MM1, MM3 )
105
106    PFMUL     ( REGOFF(56, ECX), MM7 )	/* x3*m15          | x3*m14          */
107    PFADD     ( MM4, MM6 )
108
109    PFADD     ( MM5, MM7 )
110    PFADD     ( MM2, MM6 )
111
112    PFADD     ( MM3, MM7 )
113    MOVQ      ( MM6, REGOFF(-16, EDX) )
114
115    MOVQ      ( MM7, REGOFF(-8, EDX) )
116
117    DEC_L     ( ESI )			/* decrement vertex counter          */
118    JNZ       ( LLBL( G3TPGR_1 ) )	/* cnt > 0 ? -> process next vertex  */
119
120LLBL( G3TPGR_2 ):
121
122    FEMMS
123    POP_L     ( EDI )
124    POP_L     ( ESI )
125    RET
126
127
128
129
130ALIGNTEXT16
131GLOBL GLNAME( _mesa_3dnow_transform_points4_perspective )
132HIDDEN(_mesa_3dnow_transform_points4_perspective)
133GLNAME( _mesa_3dnow_transform_points4_perspective ):
134    _CET_ENDBR
135    PUSH_L    ( ESI )
136
137    MOV_L     ( ARG_DEST, ECX )
138    MOV_L     ( ARG_MATRIX, ESI )
139    MOV_L     ( ARG_SOURCE, EAX )
140    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
141    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
142    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
143    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
144
145    PUSH_L    ( EDI )
146
147    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
148    MOV_L     ( ESI, ECX )
149    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
150    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
151    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
152
153    TEST_L    ( ESI, ESI )
154    JZ        ( LLBL( G3TPPR_2 ) )
155
156    PREFETCH  ( REGIND(EAX) )
157    PREFETCHW ( REGIND(EDX) )
158
159    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
160    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )	/* m11             | m00             */
161
162    MOVD      ( REGOFF(40, ECX), MM1 )	/*                 | m22             */
163    PUNPCKLDQ ( REGOFF(56, ECX), MM1 )	/* m32             | m22             */
164
165    MOVQ      ( REGOFF(32, ECX), MM2 )	/* m21             | m20             */
166    PXOR      ( MM7, MM7 )		/* 0               | 0               */
167
168ALIGNTEXT16
169LLBL( G3TPPR_1 ):
170
171    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
172
173    MOVQ      ( REGIND(EAX), MM4 )	/* x1              | x0              */
174    MOVQ      ( REGOFF(8, EAX), MM5 )	/* x3              | x2              */
175    MOVD      ( REGOFF(8, EAX), MM3 )	/*                 | x2              */
176
177    ADD_L     ( EDI, EAX )		/* next vertex                       */
178    PREFETCH  ( REGOFF(32, EAX) )	/* hopefully stride is zero          */
179
180    MOVQ      ( MM5, MM6 )		/* x3              | x2              */
181    PFMUL     ( MM0, MM4 )		/* x1*m11          | x0*m00          */
182
183    PUNPCKLDQ ( MM5, MM5 )		/* x2              | x2              */
184    ADD_L     ( CONST(16), EDX )	/* next r                            */
185
186    PFMUL     ( MM2, MM5 )		/* x2*m21          | x2*m20          */
187    PFSUBR    ( MM7, MM3 )		/*                 | -x2             */
188
189    PFMUL     ( MM1, MM6 )		/* x3*m32          | x2*m22          */
190    PFADD     ( MM4, MM5 )		/* x1*m11+x2*m21   | x0*m00+x2*m20   */
191
192    PFACC     ( MM3, MM6 )		/* -x2             | x2*m22+x3*m32   */
193    MOVQ      ( MM5, REGOFF(-16, EDX) )	/* write r0, r1                      */
194
195    MOVQ      ( MM6, REGOFF(-8, EDX) )	/* write r2, r3                      */
196    DEC_L     ( ESI )			/* decrement vertex counter          */
197
198    JNZ       ( LLBL( G3TPPR_1 ) )	/* cnt > 0 ? -> process next vertex  */
199
200LLBL( G3TPPR_2 ):
201
202    FEMMS
203    POP_L     ( EDI )
204    POP_L     ( ESI )
205    RET
206
207
208
209
210ALIGNTEXT16
211GLOBL GLNAME( _mesa_3dnow_transform_points4_3d )
212HIDDEN(_mesa_3dnow_transform_points4_3d)
213GLNAME( _mesa_3dnow_transform_points4_3d ):
214    _CET_ENDBR
215    PUSH_L    ( ESI )
216
217    MOV_L     ( ARG_DEST, ECX )
218    MOV_L     ( ARG_MATRIX, ESI )
219    MOV_L     ( ARG_SOURCE, EAX )
220    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
221    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
222    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
223    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
224
225    PUSH_L    ( EDI )
226
227    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
228    MOV_L     ( ESI, ECX )
229    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
230    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
231    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
232
233    TEST_L    ( ESI, ESI )
234    JZ        ( LLBL( G3TP3R_2 ) )
235
236    MOVD      ( REGOFF(8, ECX), MM6 )	/*                 | m2              */
237    PUNPCKLDQ ( REGOFF(24, ECX), MM6 )	/* m6              | m2              */
238
239    MOVD      ( REGOFF(40, ECX), MM7 )	/*                 | m10             */
240    PUNPCKLDQ ( REGOFF(56, ECX), MM7 )	/* m14             | m10             */
241
242ALIGNTEXT16
243LLBL( G3TP3R_1 ):
244
245    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
246    PREFETCH  ( REGOFF(32, EAX) )	/* hopefully array is tightly packed */
247
248    MOVQ      ( REGIND(EAX), MM2 )	/* x1              | x0              */
249    MOVQ      ( REGOFF(8, EAX), MM3 )	/* x3              | x2              */
250
251    MOVQ      ( MM2, MM0 )		/* x1              | x0              */
252    MOVQ      ( MM3, MM4 )		/* x3              | x2              */
253
254    MOVQ      ( MM0, MM1 )		/* x1              | x0              */
255    MOVQ      ( MM4, MM5 )		/* x3              | x2              */
256
257    PUNPCKLDQ ( MM0, MM0 )		/* x0              | x0              */
258    PUNPCKHDQ ( MM1, MM1 )		/* x1              | x1              */
259
260    PFMUL     ( REGIND(ECX), MM0 )	/* x0*m1           | x0*m0           */
261    PUNPCKLDQ ( MM3, MM3 )		/* x2              | x2              */
262
263    PFMUL     ( REGOFF(16, ECX), MM1 )	/* x1*m5           | x1*m4           */
264    PUNPCKHDQ ( MM4, MM4 )		/* x3              | x3              */
265
266    PFMUL     ( MM6, MM2 )		/* x1*m6           | x0*m2           */
267    PFADD     ( MM0, MM1 )		/* x0*m1+x1*m5     | x0*m0+x1*m4     */
268
269    PFMUL     ( REGOFF(32, ECX), MM3 )	/* x2*m9           | x2*m8           */
270    ADD_L     ( CONST(16), EDX )	/* next r                            */
271
272    PFMUL     ( REGOFF(48, ECX), MM4 )	/* x3*m13          | x3*m12          */
273    PFADD     ( MM1, MM3 )		/* x0*m1+..+x2*m9  | x0*m0+...+x2*m8 */
274
275    PFMUL     ( MM7, MM5 )		/* x3*m14          | x2*m10          */
276    PFADD     ( MM3, MM4 )		/* r1              | r0              */
277
278    PFACC     ( MM2, MM5 )		/* x0*m2+x1*m6     | x2*m10+x3*m14   */
279    MOVD      ( REGOFF(12, EAX), MM0 )	/*                 | x3              */
280
281    ADD_L     ( EDI, EAX )		/* next vertex                       */
282    PFACC     ( MM0, MM5 )		/* r3              | r2              */
283
284    MOVQ      ( MM4, REGOFF(-16, EDX) )	/* write r0, r1                      */
285    MOVQ      ( MM5, REGOFF(-8, EDX) )	/* write r2, r3                      */
286
287    DEC_L     ( ESI )			/* decrement vertex counter          */
288    JNZ       ( LLBL( G3TP3R_1 ) )	/* cnt > 0 ? -> process next vertex  */
289
290LLBL( G3TP3R_2 ):
291
292    FEMMS
293    POP_L     ( EDI )
294    POP_L     ( ESI )
295    RET
296
297
298
299
300ALIGNTEXT16
301GLOBL GLNAME( _mesa_3dnow_transform_points4_3d_no_rot )
302HIDDEN(_mesa_3dnow_transform_points4_3d_no_rot)
303GLNAME( _mesa_3dnow_transform_points4_3d_no_rot ):
304    _CET_ENDBR
305    PUSH_L    ( ESI )
306    MOV_L     ( ARG_DEST, ECX )
307    MOV_L     ( ARG_MATRIX, ESI )
308    MOV_L     ( ARG_SOURCE, EAX )
309    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
310    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
311    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
312    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
313
314    PUSH_L    ( EDI )
315
316    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
317    MOV_L     ( ESI, ECX )
318    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
319    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
320    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
321
322    TEST_L    ( ESI, ESI )
323    JZ        ( LLBL( G3TP3NRR_2 ) )
324
325    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
326    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )	/* m11             | m00             */
327
328    MOVD      ( REGOFF(40, ECX), MM2 )	/*                 | m22             */
329    PUNPCKLDQ ( REGOFF(56, ECX), MM2 )	/* m32             | m22             */
330
331    MOVQ      ( REGOFF(48, ECX), MM1 )	/* m31             | m30             */
332
333ALIGNTEXT16
334LLBL( G3TP3NRR_1 ):
335
336    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
337
338    MOVQ      ( REGIND(EAX), MM4 )	/* x1              | x0              */
339    MOVQ      ( REGOFF(8, EAX), MM5 )	/* x3              | x2              */
340    MOVD      ( REGOFF(12, EAX), MM7 )	/*                 | x3              */
341
342    ADD_L     ( EDI, EAX )		/* next vertex                       */
343    PREFETCH  ( REGOFF(32, EAX) )	/* hopefully stride is zero          */
344
345    MOVQ      ( MM5, MM6 )		/* x3              | x2              */
346    PFMUL     ( MM0, MM4 )		/* x1*m11          | x0*m00          */
347
348    PUNPCKHDQ ( MM6, MM6 )		/* x3              | x3              */
349    PFMUL     ( MM2, MM5 )		/* x3*m32          | x2*m22          */
350
351    PFMUL     ( MM1, MM6 )		/* x3*m31          | x3*m30          */
352    PFACC     ( MM7, MM5 )		/* x3              | x2*m22+x3*m32   */
353
354    PFADD     ( MM6, MM4 )		/* x1*m11+x3*m31   | x0*m00+x3*m30   */
355    ADD_L     ( CONST(16), EDX )	/* next r                            */
356
357    MOVQ      ( MM4, REGOFF(-16, EDX) )	/* write r0, r1                      */
358    MOVQ      ( MM5, REGOFF(-8, EDX) )	/* write r2, r3                      */
359
360    DEC_L     ( ESI )			/* decrement vertex counter          */
361    JNZ       ( LLBL( G3TP3NRR_1 ) )	/* cnt > 0 ? -> process next vertex  */
362
363LLBL( G3TP3NRR_2 ):
364
365    FEMMS
366    POP_L     ( EDI )
367    POP_L     ( ESI )
368    RET
369
370
371
372
373ALIGNTEXT16
374GLOBL GLNAME( _mesa_3dnow_transform_points4_2d )
375HIDDEN(_mesa_3dnow_transform_points4_2d)
376GLNAME( _mesa_3dnow_transform_points4_2d ):
377    _CET_ENDBR
378    PUSH_L    ( ESI )
379
380    MOV_L     ( ARG_DEST, ECX )
381    MOV_L     ( ARG_MATRIX, ESI )
382    MOV_L     ( ARG_SOURCE, EAX )
383    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
384    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
385    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
386    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
387
388    PUSH_L    ( EDI )
389
390    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
391    MOV_L     ( ESI, ECX )
392    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
393    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
394    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
395
396    TEST_L    ( ESI, ESI )
397    JZ        ( LLBL( G3TP2R_2 ) )
398
399    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
400    PUNPCKLDQ ( REGOFF(16, ECX), MM0 )	/* m10             | m00             */
401
402    MOVD      ( REGOFF(4, ECX), MM1 )	/*                 | m01             */
403    PUNPCKLDQ ( REGOFF(20, ECX), MM1 )	/* m11             | m01             */
404
405    MOVQ      ( REGOFF(48, ECX), MM2 )	/* m31             | m30             */
406
407ALIGNTEXT16
408LLBL( G3TP2R_1 ):
409
410    PREFETCHW ( REGOFF(32, EDX) )       /* prefetch 2 vertices ahead         */
411
412    MOVQ      ( REGIND(EAX), MM3 )	/* x1              | x0              */
413    MOVQ      ( REGOFF(8, EAX), MM5 )	/* x3              | x2              */
414
415    ADD_L     ( EDI, EAX )		/* next vertex                       */
416    PREFETCH  ( REGIND(EAX) )
417
418    MOVQ      ( MM3, MM4 )		/* x1              | x0              */
419    MOVQ      ( MM5, MM6 )		/* x3              | x2              */
420
421    PFMUL     ( MM1, MM4 )		/* x1*m11          | x0*m01          */
422    PUNPCKHDQ ( MM6, MM6 )		/* x3              | x3              */
423
424    PFMUL     ( MM0, MM3 )		/* x1*m10          | x0*m00          */
425    ADD_L     ( CONST(16), EDX )	/* next r                            */
426
427    PFACC     ( MM4, MM3 )		/* x0*m01+x1*m11   | x0*m00+x1*m10   */
428    PFMUL     ( MM2, MM6 )		/* x3*m31          | x3*m30          */
429
430    PFADD     ( MM6, MM3 )		/* r1              | r0              */
431    MOVQ      ( MM5, REGOFF(-8, EDX) )	/* write r2, r3                      */
432
433    MOVQ      ( MM3, REGOFF(-16, EDX) )	/* write r0, r1                      */
434
435    DEC_L     ( ESI )			/* decrement vertex counter          */
436    JNZ       ( LLBL( G3TP2R_1 ) )	/* cnt > 0 ? -> process next vertex  */
437
438LLBL( G3TP2R_2 ):
439
440    FEMMS
441    POP_L     ( EDI )
442    POP_L     ( ESI )
443    RET
444
445
446
447
448ALIGNTEXT16
449GLOBL GLNAME( _mesa_3dnow_transform_points4_2d_no_rot )
450HIDDEN(_mesa_3dnow_transform_points4_2d_no_rot)
451GLNAME( _mesa_3dnow_transform_points4_2d_no_rot ):
452    _CET_ENDBR
453    PUSH_L    ( ESI )
454
455    MOV_L     ( ARG_DEST, ECX )
456    MOV_L     ( ARG_MATRIX, ESI )
457    MOV_L     ( ARG_SOURCE, EAX )
458    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
459    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
460    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
461    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
462
463    PUSH_L    ( EDI )
464
465    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
466    MOV_L     ( ESI, ECX )
467    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
468    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
469    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
470
471    TEST_L    ( ESI, ESI )
472    JZ        ( LLBL( G3TP2NRR_3 ) )
473
474    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
475    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )	/* m11             | m00             */
476
477    MOVQ      ( REGOFF(48, ECX), MM1 )	/* m31             | m30             */
478
479ALIGNTEXT16
480LLBL( G3TP2NRR_2 ):
481
482    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
483
484    MOVQ      ( REGIND(EAX), MM4 )	/* x1              | x0              */
485    MOVQ      ( REGOFF(8, EAX), MM5 )	/* x3              | x2              */
486
487    ADD_L     ( EDI, EAX )		/* next vertex                       */
488    PREFETCH  ( REGIND(EAX) )
489
490    PFMUL     ( MM0, MM4 )		/* x1*m11          | x0*m00          */
491    MOVQ      ( MM5, MM6 )		/* x3              | x2              */
492
493    ADD_L     ( CONST(16), EDX )	/* next r                            */
494    PUNPCKHDQ ( MM6, MM6 )		/* x3              | x3              */
495
496    PFMUL     ( MM1, MM6 )		/* x3*m31          | x3*m30          */
497    PFADD     ( MM4, MM6 )		/* x1*m11+x3*m31   | x0*m00+x3*m30   */
498
499    MOVQ      ( MM6, REGOFF(-16, EDX) )	/* write r0, r1                      */
500    MOVQ      ( MM5, REGOFF(-8, EDX) )	/* write r2, r3                      */
501
502    DEC_L     ( ESI )			/* decrement vertex counter          */
503
504    JNZ       ( LLBL( G3TP2NRR_2 ) )	/* cnt > 0 ? -> process next vertex  */
505
506LLBL( G3TP2NRR_3 ):
507
508    FEMMS
509    POP_L     ( EDI )
510    POP_L     ( ESI )
511    RET
512
513
514
515
516ALIGNTEXT16
517GLOBL GLNAME( _mesa_3dnow_transform_points4_identity )
518HIDDEN(_mesa_3dnow_transform_points4_identity)
519GLNAME( _mesa_3dnow_transform_points4_identity ):
520    _CET_ENDBR
521    PUSH_L    ( ESI )
522
523    MOV_L     ( ARG_DEST, ECX )
524    MOV_L     ( ARG_MATRIX, ESI )
525    MOV_L     ( ARG_SOURCE, EAX )
526    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
527    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
528    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
529    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
530
531    PUSH_L    ( EDI )
532
533    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
534    MOV_L     ( ESI, ECX )
535    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
536    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
537    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
538
539    TEST_L    ( ESI, ESI )
540    JZ        ( LLBL( G3TPIR_2 ) )
541
542ALIGNTEXT16
543LLBL( G3TPIR_1 ):
544
545    PREFETCHW ( REGOFF(32, EDX) )       /* prefetch 2 vertices ahead         */
546
547    MOVQ      ( REGIND(EAX), MM0 )	/* x1              | x0              */
548    MOVQ      ( REGOFF(8, EAX), MM1 )	/* x3              | x2              */
549
550    ADD_L     ( EDI, EAX )		/* next vertex                       */
551    PREFETCH  ( REGIND(EAX) )
552
553    ADD_L     ( CONST(16), EDX )	/* next r                            */
554    MOVQ      ( MM0, REGOFF(-16, EDX) )	/* r1              | r0              */
555
556    MOVQ      ( MM1, REGOFF(-8, EDX) )	/* r3              | r2              */
557
558    DEC_L     ( ESI )			/* decrement vertex counter          */
559    JNZ       ( LLBL( G3TPIR_1 ) )	/* cnt > 0 ? -> process next vertex  */
560
561LLBL( G3TPIR_2 ):
562
563    FEMMS
564    POP_L     ( EDI )
565    POP_L     ( ESI )
566    RET
567#endif
568
569#if defined (__ELF__) && defined (__linux__)
570	.section .note.GNU-stack,"",%progbits
571#endif
572