1 // stop compiling if NORECBUILD build (only for Visual Studio)
2
3 #ifdef __x86_64__
4
5 #if !(defined(_MSC_VER) && defined(PCSX2_NORECBUILD))
6
7 #include <assert.h>
8 #include "ix86-64.h"
9
10 PCSX2_ALIGNED16(static unsigned int p[4]);
11 PCSX2_ALIGNED16(static unsigned int p2[4]);
12 PCSX2_ALIGNED16(static float f[4]);
13
14
15 XMMSSEType g_xmmtypes[XMMREGS] = {0};
16
17 /********************/
18 /* SSE instructions */
19 /********************/
20
21 #define SSEMtoRv( nc, code, overb ) \
22 assert( cpucaps.hasStreamingSIMDExtensions ); \
23 assert( to < XMMREGS ) ; \
24 MEMADDR_OP(0, nc, code, true, to, from, overb)
25
26 #define SSEMtoR( code, overb ) SSEMtoRv(2, code, overb)
27
28 #define SSERtoMv( nc, code, overb ) \
29 assert( cpucaps.hasStreamingSIMDExtensions ); \
30 assert( from < XMMREGS) ; \
31 MEMADDR_OP(0, nc, code, true, from, to, overb)
32
33 #define SSERtoM( code, overb ) SSERtoMv( 2, code, overb ) \
34
35 #define SSE_SS_MtoR( code, overb ) \
36 SSEMtoRv(3, (code << 8) | 0xF3, overb)
37
38 #define SSE_SS_RtoM( code, overb ) \
39 SSERtoMv(3, (code << 8) | 0xF3, overb)
40
41 #define SSERtoR( code ) \
42 assert( cpucaps.hasStreamingSIMDExtensions ); \
43 assert( to < XMMREGS && from < XMMREGS) ; \
44 RexRB(0, to, from); \
45 write16( code ); \
46 ModRM( 3, to, from );
47
48 #define SSEMtoR66( code ) \
49 SSEMtoRv( 3, (code << 8) | 0x66, 0 )
50
51 #define SSERtoM66( code ) \
52 SSERtoMv( 3, (code << 8) | 0x66, 0 )
53
54 #define SSERtoR66( code ) \
55 write8( 0x66 ); \
56 SSERtoR( code );
57
58 #define _SSERtoR66( code ) \
59 assert( cpucaps.hasStreamingSIMDExtensions ); \
60 assert( to < XMMREGS && from < XMMREGS) ; \
61 write8( 0x66 ); \
62 RexRB(0, from, to); \
63 write16( code ); \
64 ModRM( 3, from, to );
65
66 #define SSE_SS_RtoR( code ) \
67 assert( cpucaps.hasStreamingSIMDExtensions ); \
68 assert( to < XMMREGS && from < XMMREGS) ; \
69 write8( 0xf3 ); \
70 RexRB(0, to, from); \
71 write16( code ); \
72 ModRM( 3, to, from );
73
74 #define CMPPSMtoR( op ) \
75 SSEMtoR( 0xc20f, 1 ); \
76 write8( op );
77
78 #define CMPPSRtoR( op ) \
79 SSERtoR( 0xc20f ); \
80 write8( op );
81
82 #define CMPSSMtoR( op ) \
83 SSE_SS_MtoR( 0xc20f, 1 ); \
84 write8( op );
85
86 #define CMPSSRtoR( op ) \
87 SSE_SS_RtoR( 0xc20f ); \
88 write8( op );
89
90
91
92 void WriteRmOffset(x86IntRegType to, int offset);
93 void WriteRmOffsetFrom(x86IntRegType to, x86IntRegType from, int offset);
94
95 /* movups [r32][r32*scale] to xmm1 */
SSE_MOVUPSRmStoR(x86SSERegType to,x86IntRegType from,x86IntRegType from2,int scale)96 void SSE_MOVUPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale )
97 {
98 assert( cpucaps.hasStreamingSIMDExtensions );
99 RexRXB(0, to, from2, from);
100 write16( 0x100f );
101 ModRM( 0, to, 0x4 );
102 SibSB( scale, from2, from );
103 }
104
105 /* movups xmm1 to [r32][r32*scale] */
SSE_MOVUPSRtoRmS(x86SSERegType to,x86IntRegType from,x86IntRegType from2,int scale)106 void SSE_MOVUPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale )
107 {
108 assert( cpucaps.hasStreamingSIMDExtensions );
109 RexRXB(1, to, from2, from);
110 write16( 0x110f );
111 ModRM( 0, to, 0x4 );
112 SibSB( scale, from2, from );
113 }
114
115 /* movups [r32] to r32 */
SSE_MOVUPSRmtoR(x86IntRegType to,x86IntRegType from)116 void SSE_MOVUPSRmtoR( x86IntRegType to, x86IntRegType from )
117 {
118 assert( cpucaps.hasStreamingSIMDExtensions );
119 RexRB(0, to, from);
120 write16( 0x100f );
121 ModRM( 0, to, from );
122 }
123
124 /* movups r32 to [r32] */
SSE_MOVUPSRtoRm(x86IntRegType to,x86IntRegType from)125 void SSE_MOVUPSRtoRm( x86IntRegType to, x86IntRegType from )
126 {
127 assert( cpucaps.hasStreamingSIMDExtensions );
128 RexRB(0, from, to);
129 write16( 0x110f );
130 ModRM( 0, from, to );
131 }
132
133 /* movlps [r32] to r32 */
SSE_MOVLPSRmtoR(x86SSERegType to,x86IntRegType from)134 void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from )
135 {
136 assert( cpucaps.hasStreamingSIMDExtensions );
137 RexRB(1, to, from);
138 write16( 0x120f );
139 ModRM( 0, to, from );
140 }
141
SSE_MOVLPSRmtoROffset(x86SSERegType to,x86IntRegType from,int offset)142 void SSE_MOVLPSRmtoROffset( x86SSERegType to, x86IntRegType from, int offset )
143 {
144 assert( cpucaps.hasStreamingSIMDExtensions );
145 RexRB(0, to, from);
146 write16( 0x120f );
147 WriteRmOffsetFrom(to, from, offset);
148 }
149
150 /* movaps r32 to [r32] */
SSE_MOVLPSRtoRm(x86IntRegType to,x86IntRegType from)151 void SSE_MOVLPSRtoRm( x86IntRegType to, x86IntRegType from )
152 {
153 assert( cpucaps.hasStreamingSIMDExtensions );
154 RexRB(0, from, to);
155 write16( 0x130f );
156 ModRM( 0, from, to );
157 }
158
SSE_MOVLPSRtoRmOffset(x86SSERegType to,x86IntRegType from,int offset)159 void SSE_MOVLPSRtoRmOffset( x86SSERegType to, x86IntRegType from, int offset )
160 {
161 assert( cpucaps.hasStreamingSIMDExtensions );
162 RexRB(0, from, to);
163 write16( 0x130f );
164 WriteRmOffsetFrom(from, to, offset);
165 }
166
167 /* movaps [r32][r32*scale] to xmm1 */
SSE_MOVAPSRmStoR(x86SSERegType to,x86IntRegType from,x86IntRegType from2,int scale)168 void SSE_MOVAPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale )
169 {
170 assert( cpucaps.hasStreamingSIMDExtensions && from != EBP );
171 RexRXB(0, to, from2, from);
172 write16( 0x280f );
173 ModRM( 0, to, 0x4 );
174 SibSB( scale, from2, from );
175 }
176
177 /* movaps xmm1 to [r32][r32*scale] */
SSE_MOVAPSRtoRmS(x86SSERegType to,x86IntRegType from,x86IntRegType from2,int scale)178 void SSE_MOVAPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale )
179 {
180 assert( cpucaps.hasStreamingSIMDExtensions && from != EBP );
181 RexRXB(0, to, from2, from);
182 write16( 0x290f );
183 ModRM( 0, to, 0x4 );
184 SibSB( scale, from2, from );
185 }
186
187 // movaps [r32+offset] to r32
SSE_MOVAPSRmtoROffset(x86SSERegType to,x86IntRegType from,int offset)188 void SSE_MOVAPSRmtoROffset( x86SSERegType to, x86IntRegType from, int offset )
189 {
190 assert( cpucaps.hasStreamingSIMDExtensions );
191 RexRB(0, to, from);
192 write16( 0x280f );
193 WriteRmOffsetFrom(to, from, offset);
194 }
195
196 // movaps r32 to [r32+offset]
SSE_MOVAPSRtoRmOffset(x86IntRegType to,x86SSERegType from,int offset)197 void SSE_MOVAPSRtoRmOffset( x86IntRegType to, x86SSERegType from, int offset )
198 {
199 assert( cpucaps.hasStreamingSIMDExtensions );
200 RexRB(0, from, to);
201 write16( 0x290f );
202 WriteRmOffsetFrom(from, to, offset);
203 }
204
205 // movdqa [r32+offset] to r32
SSE2_MOVDQARmtoROffset(x86SSERegType to,x86IntRegType from,int offset)206 void SSE2_MOVDQARmtoROffset( x86SSERegType to, x86IntRegType from, int offset )
207 {
208 assert( cpucaps.hasStreamingSIMDExtensions );
209 write8(0x66);
210 RexRB(0, to, from);
211 write16( 0x6f0f );
212 WriteRmOffsetFrom(to, from, offset);
213 }
214
215 // movdqa r32 to [r32+offset]
SSE2_MOVDQARtoRmOffset(x86IntRegType to,x86SSERegType from,int offset)216 void SSE2_MOVDQARtoRmOffset( x86IntRegType to, x86SSERegType from, int offset )
217 {
218 assert( cpucaps.hasStreamingSIMDExtensions );
219 write8(0x66);
220 RexRB(0, from, to);
221 write16( 0x7f0f );
222 WriteRmOffsetFrom(from, to, offset);
223 }
224
225 // movups [r32+offset] to r32
SSE_MOVUPSRmtoROffset(x86SSERegType to,x86IntRegType from,int offset)226 void SSE_MOVUPSRmtoROffset( x86SSERegType to, x86IntRegType from, int offset )
227 {
228 RexRB(0, to, from);
229 write16( 0x100f );
230 WriteRmOffsetFrom(to, from, offset);
231 }
232
233 // movups r32 to [r32+offset]
SSE_MOVUPSRtoRmOffset(x86SSERegType to,x86IntRegType from,int offset)234 void SSE_MOVUPSRtoRmOffset( x86SSERegType to, x86IntRegType from, int offset )
235 {
236 assert( cpucaps.hasStreamingSIMDExtensions );
237 RexRB(0, from, to);
238 write16( 0x110f );
239 WriteRmOffsetFrom(from, to, offset);
240 }
241
242 //**********************************************************************************/
243 //MOVAPS: Move aligned Packed Single Precision FP values *
244 //**********************************************************************************
SSE_MOVAPS_M128_to_XMM(x86SSERegType to,uptr from)245 void SSE_MOVAPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x280f, 0 ); }
SSE_MOVAPS_XMM_to_M128(uptr to,x86SSERegType from)246 void SSE_MOVAPS_XMM_to_M128( uptr to, x86SSERegType from ) { SSERtoM( 0x290f, 0 ); }
SSE_MOVAPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)247 void SSE_MOVAPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x280f ); }
248
SSE_MOVUPS_M128_to_XMM(x86SSERegType to,uptr from)249 void SSE_MOVUPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x100f, 0 ); }
SSE_MOVUPS_XMM_to_M128(uptr to,x86SSERegType from)250 void SSE_MOVUPS_XMM_to_M128( uptr to, x86SSERegType from ) { SSERtoM( 0x110f, 0 ); }
251
SSE2_MOVSD_XMM_to_XMM(x86SSERegType to,x86SSERegType from)252 void SSE2_MOVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
253 {
254 if( !cpucaps.hasStreamingSIMD2Extensions ) SSE2EMU_MOVSD_XMM_to_XMM(to, from);
255 else {
256 write8(0xf2);
257 SSERtoR( 0x100f);
258 }
259 }
260
SSE2_MOVQ_M64_to_XMM(x86SSERegType to,uptr from)261 void SSE2_MOVQ_M64_to_XMM( x86SSERegType to, uptr from )
262 {
263 if( !cpucaps.hasStreamingSIMD2Extensions ) SSE2EMU_MOVQ_M64_to_XMM(to, from);
264 else {
265 SSE_SS_MtoR( 0x7e0f, 0);
266 }
267 }
268
SSE2_MOVQ_XMM_to_XMM(x86SSERegType to,x86SSERegType from)269 void SSE2_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
270 {
271 if( !cpucaps.hasStreamingSIMD2Extensions ) SSE2EMU_MOVQ_XMM_to_XMM(to, from);
272 else {
273 SSE_SS_RtoR( 0x7e0f);
274 }
275 }
276
SSE2_MOVQ_XMM_to_M64(u32 to,x86SSERegType from)277 void SSE2_MOVQ_XMM_to_M64( u32 to, x86SSERegType from )
278 {
279 if( !cpucaps.hasStreamingSIMD2Extensions ) SSE_MOVLPS_XMM_to_M64(to, from);
280 else {
281 SSERtoM66(0xd60f);
282 }
283 }
284
285 #ifndef __x86_64__
SSE2_MOVDQ2Q_XMM_to_MM(x86MMXRegType to,x86SSERegType from)286 void SSE2_MOVDQ2Q_XMM_to_MM( x86MMXRegType to, x86SSERegType from)
287 {
288 if( !cpucaps.hasStreamingSIMD2Extensions ) SSE2EMU_MOVDQ2Q_XMM_to_MM(to, from);
289 else {
290 write8(0xf2);
291 SSERtoR( 0xd60f);
292 }
293 }
SSE2_MOVQ2DQ_MM_to_XMM(x86SSERegType to,x86MMXRegType from)294 void SSE2_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from)
295 {
296 if( !cpucaps.hasStreamingSIMD2Extensions ) SSE2EMU_MOVQ2DQ_MM_to_XMM(to, from);
297 else {
298 SSE_SS_RtoR( 0xd60f);
299 }
300 }
301 #endif
302
303 //**********************************************************************************/
304 //MOVSS: Move Scalar Single-Precision FP value *
305 //**********************************************************************************
SSE_MOVSS_M32_to_XMM(x86SSERegType to,uptr from)306 void SSE_MOVSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x100f, 0 ); }
SSE_MOVSS_XMM_to_M32(u32 to,x86SSERegType from)307 void SSE_MOVSS_XMM_to_M32( u32 to, x86SSERegType from ) { SSE_SS_RtoM( 0x110f, 0 ); }
SSE_MOVSS_XMM_to_Rm(x86IntRegType to,x86SSERegType from)308 void SSE_MOVSS_XMM_to_Rm( x86IntRegType to, x86SSERegType from )
309 {
310 write8(0xf3);
311 RexRB(0, from, to);
312 write16(0x110f);
313 ModRM(0, from, to);
314 }
315
SSE_MOVSS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)316 void SSE_MOVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x100f ); }
317
SSE_MOVSS_RmOffset_to_XMM(x86SSERegType to,x86IntRegType from,int offset)318 void SSE_MOVSS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset )
319 {
320 write8(0xf3);
321 RexRB(0, to, from);
322 write16( 0x100f );
323 WriteRmOffsetFrom(to, from, offset);
324 }
325
SSE_MOVSS_XMM_to_RmOffset(x86IntRegType to,x86SSERegType from,int offset)326 void SSE_MOVSS_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset )
327 {
328 write8(0xf3);
329 RexRB(0, from, to);
330 write16(0x110f);
331 WriteRmOffsetFrom(from, to, offset);
332 }
333
SSE_MASKMOVDQU_XMM_to_XMM(x86SSERegType to,x86SSERegType from)334 void SSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xf70f ); }
335 //**********************************************************************************/
336 //MOVLPS: Move low Packed Single-Precision FP *
337 //**********************************************************************************
SSE_MOVLPS_M64_to_XMM(x86SSERegType to,uptr from)338 void SSE_MOVLPS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x120f, 0 ); }
SSE_MOVLPS_XMM_to_M64(u32 to,x86SSERegType from)339 void SSE_MOVLPS_XMM_to_M64( u32 to, x86SSERegType from ) { SSERtoM( 0x130f, 0 ); }
340
SSE_MOVLPS_RmOffset_to_XMM(x86SSERegType to,x86IntRegType from,int offset)341 void SSE_MOVLPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset )
342 {
343 assert( cpucaps.hasStreamingSIMDExtensions );
344 RexRB(0, to, from);
345 write16( 0x120f );
346 WriteRmOffsetFrom(to, from, offset);
347 }
348
SSE_MOVLPS_XMM_to_RmOffset(x86IntRegType to,x86SSERegType from,int offset)349 void SSE_MOVLPS_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset )
350 {
351 RexRB(0, from, to);
352 write16(0x130f);
353 WriteRmOffsetFrom(from, to, offset);
354 }
355
356 /////////////////////////////////////////////////////////////////////////////////////
357 //**********************************************************************************/
358 //MOVHPS: Move High Packed Single-Precision FP *
359 //**********************************************************************************
SSE_MOVHPS_M64_to_XMM(x86SSERegType to,uptr from)360 void SSE_MOVHPS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x160f, 0 ); }
SSE_MOVHPS_XMM_to_M64(u32 to,x86SSERegType from)361 void SSE_MOVHPS_XMM_to_M64( u32 to, x86SSERegType from ) { SSERtoM( 0x170f, 0 ); }
362
SSE_MOVHPS_RmOffset_to_XMM(x86SSERegType to,x86IntRegType from,int offset)363 void SSE_MOVHPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset )
364 {
365 assert( cpucaps.hasStreamingSIMDExtensions );
366 RexRB(0, to, from);
367 write16( 0x160f );
368 WriteRmOffsetFrom(to, from, offset);
369 }
370
SSE_MOVHPS_XMM_to_RmOffset(x86IntRegType to,x86SSERegType from,int offset)371 void SSE_MOVHPS_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset )
372 {
373 assert( cpucaps.hasStreamingSIMDExtensions );
374 RexRB(0, from, to);
375 write16(0x170f);
376 WriteRmOffsetFrom(from, to, offset);
377 }
378
379 /////////////////////////////////////////////////////////////////////////////////////
380 //**********************************************************************************/
381 //MOVLHPS: Moved packed Single-Precision FP low to high *
382 //**********************************************************************************
SSE_MOVLHPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)383 void SSE_MOVLHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x160f ); }
384
385 //////////////////////////////////////////////////////////////////////////////////////
386 //**********************************************************************************/
387 //MOVHLPS: Moved packed Single-Precision FP High to Low *
388 //**********************************************************************************
SSE_MOVHLPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)389 void SSE_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x120f ); }
390
391 ///////////////////////////////////////////////////////////////////////////////////
392 //**********************************************************************************/
393 //ANDPS: Logical Bit-wise AND for Single FP *
394 //**********************************************************************************
SSE_ANDPS_M128_to_XMM(x86SSERegType to,uptr from)395 void SSE_ANDPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x540f, 0 ); }
SSE_ANDPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)396 void SSE_ANDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x540f ); }
397
398 ///////////////////////////////////////////////////////////////////////////////////////
399 //**********************************************************************************/
400 //ANDNPS : Logical Bit-wise AND NOT of Single-precision FP values *
401 //**********************************************************************************
SSE_ANDNPS_M128_to_XMM(x86SSERegType to,uptr from)402 void SSE_ANDNPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x550f, 0 ); }
SSE_ANDNPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)403 void SSE_ANDNPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR( 0x550f ); }
404
405 /////////////////////////////////////////////////////////////////////////////////////
406 //**********************************************************************************/
407 //RCPPS : Packed Single-Precision FP Reciprocal *
408 //**********************************************************************************
SSE_RCPPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)409 void SSE_RCPPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x530f ); }
SSE_RCPPS_M128_to_XMM(x86SSERegType to,uptr from)410 void SSE_RCPPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x530f, 0 ); }
411
SSE_RCPSS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)412 void SSE_RCPSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR(0x530f); }
SSE_RCPSS_M32_to_XMM(x86SSERegType to,uptr from)413 void SSE_RCPSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR(0x530f, 0); }
414
415 //////////////////////////////////////////////////////////////////////////////////////
416 //**********************************************************************************/
417 //ORPS : Bit-wise Logical OR of Single-Precision FP Data *
418 //**********************************************************************************
SSE_ORPS_M128_to_XMM(x86SSERegType to,uptr from)419 void SSE_ORPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x560f, 0 ); }
SSE_ORPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)420 void SSE_ORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x560f ); }
421
422 /////////////////////////////////////////////////////////////////////////////////////
423 //**********************************************************************************/
424 //XORPS : Bitwise Logical XOR of Single-Precision FP Values *
425 //**********************************************************************************
SSE_XORPS_M128_to_XMM(x86SSERegType to,uptr from)426 void SSE_XORPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x570f, 0 ); }
SSE_XORPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)427 void SSE_XORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x570f ); }
428
429 ///////////////////////////////////////////////////////////////////////////////////////
430 //**********************************************************************************/
431 //ADDPS : ADD Packed Single-Precision FP Values *
432 //**********************************************************************************
SSE_ADDPS_M128_to_XMM(x86SSERegType to,uptr from)433 void SSE_ADDPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x580f, 0 ); }
SSE_ADDPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)434 void SSE_ADDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x580f ); }
435
436 ////////////////////////////////////////////////////////////////////////////////////
437 //**********************************************************************************/
438 //ADDSS : ADD Scalar Single-Precision FP Values *
439 //**********************************************************************************
SSE_ADDSS_M32_to_XMM(x86SSERegType to,uptr from)440 void SSE_ADDSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x580f, 0 ); }
SSE_ADDSS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)441 void SSE_ADDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x580f ); }
442
443 /////////////////////////////////////////////////////////////////////////////////////////
444 //**********************************************************************************/
445 //SUBPS: Packed Single-Precision FP Subtract *
446 //**********************************************************************************
SSE_SUBPS_M128_to_XMM(x86SSERegType to,uptr from)447 void SSE_SUBPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5c0f, 0 ); }
SSE_SUBPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)448 void SSE_SUBPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5c0f ); }
449
450 ///////////////////////////////////////////////////////////////////////////////////////
451 //**********************************************************************************/
452 //SUBSS : Scalar Single-Precision FP Subtract *
453 //**********************************************************************************
SSE_SUBSS_M32_to_XMM(x86SSERegType to,uptr from)454 void SSE_SUBSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5c0f, 0 ); }
SSE_SUBSS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)455 void SSE_SUBSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5c0f ); }
456
457 /////////////////////////////////////////////////////////////////////////////////////////
458 //**********************************************************************************/
459 //MULPS : Packed Single-Precision FP Multiply *
460 //**********************************************************************************
SSE_MULPS_M128_to_XMM(x86SSERegType to,uptr from)461 void SSE_MULPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x590f, 0 ); }
SSE_MULPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)462 void SSE_MULPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x590f ); }
463
464 ////////////////////////////////////////////////////////////////////////////////////////
465 //**********************************************************************************/
466 //MULSS : Scalar Single-Precision FP Multiply *
467 //**********************************************************************************
SSE_MULSS_M32_to_XMM(x86SSERegType to,uptr from)468 void SSE_MULSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x590f, 0 ); }
SSE_MULSS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)469 void SSE_MULSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x590f ); }
470
471 ////////////////////////////////////////////////////////////////////////////////////////////
472 //**********************************************************************************/
473 //Packed Single-Precission FP compare (CMPccPS) *
474 //**********************************************************************************
475 //missing SSE_CMPPS_I8_to_XMM
476 // SSE_CMPPS_M32_to_XMM
477 // SSE_CMPPS_XMM_to_XMM
SSE_CMPEQPS_M128_to_XMM(x86SSERegType to,uptr from)478 void SSE_CMPEQPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 0 ); }
SSE_CMPEQPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)479 void SSE_CMPEQPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 0 ); }
SSE_CMPLTPS_M128_to_XMM(x86SSERegType to,uptr from)480 void SSE_CMPLTPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 1 ); }
SSE_CMPLTPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)481 void SSE_CMPLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 1 ); }
SSE_CMPLEPS_M128_to_XMM(x86SSERegType to,uptr from)482 void SSE_CMPLEPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 2 ); }
SSE_CMPLEPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)483 void SSE_CMPLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 2 ); }
SSE_CMPUNORDPS_M128_to_XMM(x86SSERegType to,uptr from)484 void SSE_CMPUNORDPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 3 ); }
SSE_CMPUNORDPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)485 void SSE_CMPUNORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 3 ); }
SSE_CMPNEPS_M128_to_XMM(x86SSERegType to,uptr from)486 void SSE_CMPNEPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 4 ); }
SSE_CMPNEPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)487 void SSE_CMPNEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 4 ); }
SSE_CMPNLTPS_M128_to_XMM(x86SSERegType to,uptr from)488 void SSE_CMPNLTPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 5 ); }
SSE_CMPNLTPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)489 void SSE_CMPNLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 5 ); }
SSE_CMPNLEPS_M128_to_XMM(x86SSERegType to,uptr from)490 void SSE_CMPNLEPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 6 ); }
SSE_CMPNLEPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)491 void SSE_CMPNLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 6 ); }
SSE_CMPORDPS_M128_to_XMM(x86SSERegType to,uptr from)492 void SSE_CMPORDPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 7 ); }
SSE_CMPORDPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)493 void SSE_CMPORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 7 ); }
494
495 ///////////////////////////////////////////////////////////////////////////////////////////
496 //**********************************************************************************/
497 //Scalar Single-Precission FP compare (CMPccSS) *
498 //**********************************************************************************
499 //missing SSE_CMPSS_I8_to_XMM
500 // SSE_CMPSS_M32_to_XMM
501 // SSE_CMPSS_XMM_to_XMM
SSE_CMPEQSS_M32_to_XMM(x86SSERegType to,uptr from)502 void SSE_CMPEQSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 0 ); }
SSE_CMPEQSS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)503 void SSE_CMPEQSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 0 ); }
SSE_CMPLTSS_M32_to_XMM(x86SSERegType to,uptr from)504 void SSE_CMPLTSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 1 ); }
SSE_CMPLTSS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)505 void SSE_CMPLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 1 ); }
SSE_CMPLESS_M32_to_XMM(x86SSERegType to,uptr from)506 void SSE_CMPLESS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 2 ); }
SSE_CMPLESS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)507 void SSE_CMPLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 2 ); }
SSE_CMPUNORDSS_M32_to_XMM(x86SSERegType to,uptr from)508 void SSE_CMPUNORDSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 3 ); }
SSE_CMPUNORDSS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)509 void SSE_CMPUNORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 3 ); }
SSE_CMPNESS_M32_to_XMM(x86SSERegType to,uptr from)510 void SSE_CMPNESS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 4 ); }
SSE_CMPNESS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)511 void SSE_CMPNESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 4 ); }
SSE_CMPNLTSS_M32_to_XMM(x86SSERegType to,uptr from)512 void SSE_CMPNLTSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 5 ); }
SSE_CMPNLTSS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)513 void SSE_CMPNLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 5 ); }
SSE_CMPNLESS_M32_to_XMM(x86SSERegType to,uptr from)514 void SSE_CMPNLESS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 6 ); }
SSE_CMPNLESS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)515 void SSE_CMPNLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 6 ); }
SSE_CMPORDSS_M32_to_XMM(x86SSERegType to,uptr from)516 void SSE_CMPORDSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 7 ); }
SSE_CMPORDSS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)517 void SSE_CMPORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 7 ); }
518
SSE_UCOMISS_M32_to_XMM(x86SSERegType to,uptr from)519 void SSE_UCOMISS_M32_to_XMM( x86SSERegType to, uptr from )
520 {
521 MEMADDR_OP(0, VAROP2(0x0F, 0x2E), true, to, from, 0);
522 }
523
SSE_UCOMISS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)524 void SSE_UCOMISS_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
525 {
526 RexRB(0, to, from);
527 write16( 0x2e0f );
528 ModRM( 3, to, from );
529 }
530
531 //////////////////////////////////////////////////////////////////////////////////////////
532 //**********************************************************************************/
533 //RSQRTPS : Packed Single-Precision FP Square Root Reciprocal *
534 //**********************************************************************************
SSE_RSQRTPS_M128_to_XMM(x86SSERegType to,uptr from)535 void SSE_RSQRTPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x520f, 0 ); }
SSE_RSQRTPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)536 void SSE_RSQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR( 0x520f ); }
537
538 /////////////////////////////////////////////////////////////////////////////////////
539 //**********************************************************************************/
540 //RSQRTSS : Scalar Single-Precision FP Square Root Reciprocal *
541 //**********************************************************************************
SSE_RSQRTSS_M32_to_XMM(x86SSERegType to,uptr from)542 void SSE_RSQRTSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x520f, 0 ); }
SSE_RSQRTSS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)543 void SSE_RSQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSE_SS_RtoR( 0x520f ); }
544
545 ////////////////////////////////////////////////////////////////////////////////////
546 //**********************************************************************************/
547 //SQRTPS : Packed Single-Precision FP Square Root *
548 //**********************************************************************************
SSE_SQRTPS_M128_to_XMM(x86SSERegType to,uptr from)549 void SSE_SQRTPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x510f, 0 ); }
SSE_SQRTPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)550 void SSE_SQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR( 0x510f ); }
551
552 //////////////////////////////////////////////////////////////////////////////////////
553 //**********************************************************************************/
554 //SQRTSS : Scalar Single-Precision FP Square Root *
555 //**********************************************************************************
SSE_SQRTSS_M32_to_XMM(x86SSERegType to,uptr from)556 void SSE_SQRTSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x510f, 0 ); }
SSE_SQRTSS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)557 void SSE_SQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSE_SS_RtoR( 0x510f ); }
558
559 ////////////////////////////////////////////////////////////////////////////////////////
560 //**********************************************************************************/
561 //MAXPS: Return Packed Single-Precision FP Maximum *
562 //**********************************************************************************
SSE_MAXPS_M128_to_XMM(x86SSERegType to,uptr from)563 void SSE_MAXPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5f0f, 0 ); }
SSE_MAXPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)564 void SSE_MAXPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5f0f ); }
565
566 /////////////////////////////////////////////////////////////////////////////////////////
567 //**********************************************************************************/
568 //MAXSS: Return Scalar Single-Precision FP Maximum *
569 //**********************************************************************************
SSE_MAXSS_M32_to_XMM(x86SSERegType to,uptr from)570 void SSE_MAXSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5f0f, 0 ); }
SSE_MAXSS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)571 void SSE_MAXSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5f0f ); }
572
573 #ifndef __x86_64__
574 /////////////////////////////////////////////////////////////////////////////////////////
575 //**********************************************************************************/
576 //CVTPI2PS: Packed Signed INT32 to Packed Single FP Conversion *
577 //**********************************************************************************
SSE_CVTPI2PS_M64_to_XMM(x86SSERegType to,uptr from)578 void SSE_CVTPI2PS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x2a0f, 0 ); }
SSE_CVTPI2PS_MM_to_XMM(x86SSERegType to,x86MMXRegType from)579 void SSE_CVTPI2PS_MM_to_XMM( x86SSERegType to, x86MMXRegType from ) { SSERtoR( 0x2a0f ); }
580
581 ///////////////////////////////////////////////////////////////////////////////////////////
582 //**********************************************************************************/
583 //CVTPS2PI: Packed Single FP to Packed Signed INT32 Conversion *
584 //**********************************************************************************
SSE_CVTPS2PI_M64_to_MM(x86MMXRegType to,uptr from)585 void SSE_CVTPS2PI_M64_to_MM( x86MMXRegType to, uptr from ) { SSEMtoR( 0x2d0f, 0 ); }
SSE_CVTPS2PI_XMM_to_MM(x86MMXRegType to,x86SSERegType from)586 void SSE_CVTPS2PI_XMM_to_MM( x86MMXRegType to, x86SSERegType from ) { SSERtoR( 0x2d0f ); }
587 #endif
588
SSE_CVTTSS2SI_M32_to_R32(x86IntRegType to,uptr from)589 void SSE_CVTTSS2SI_M32_to_R32(x86IntRegType to, uptr from) { SSE_SS_MtoR(0x2c0f, 0); }
SSE_CVTTSS2SI_XMM_to_R32(x86IntRegType to,x86SSERegType from)590 void SSE_CVTTSS2SI_XMM_to_R32(x86IntRegType to, x86SSERegType from)
591 {
592 write8(0xf3);
593 RexRB(0, to, from);
594 write16(0x2c0f);
595 ModRM(3, to, from);
596 }
597
SSE_CVTSI2SS_M32_to_XMM(x86SSERegType to,uptr from)598 void SSE_CVTSI2SS_M32_to_XMM(x86SSERegType to, uptr from) { SSE_SS_MtoR(0x2a0f, 0); }
SSE_CVTSI2SS_R_to_XMM(x86SSERegType to,x86IntRegType from)599 void SSE_CVTSI2SS_R_to_XMM(x86SSERegType to, x86IntRegType from)
600 {
601 write8(0xf3);
602 RexRB(0, to, from);
603 write16(0x2a0f);
604 ModRM(3, to, from);
605 }
606
607 ///////////////////////////////////////////////////////////////////////////////////////////
608 //**********************************************************************************/
609 //CVTDQ2PS: Packed Signed INT32 to Packed Single Precision FP Conversion *
610 //**********************************************************************************
SSE2_CVTDQ2PS_M128_to_XMM(x86SSERegType to,uptr from)611 void SSE2_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5b0f, 0 ); }
SSE2_CVTDQ2PS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)612 void SSE2_CVTDQ2PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5b0f ); }
613
614 //**********************************************************************************/
615 //CVTPS2DQ: Packed Single Precision FP to Packed Signed INT32 Conversion *
616 //**********************************************************************************
SSE2_CVTPS2DQ_M128_to_XMM(x86SSERegType to,uptr from)617 void SSE2_CVTPS2DQ_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x5b0f ); }
SSE2_CVTPS2DQ_XMM_to_XMM(x86SSERegType to,x86SSERegType from)618 void SSE2_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x5b0f ); }
619
SSE2_CVTTPS2DQ_XMM_to_XMM(x86SSERegType to,x86SSERegType from)620 void SSE2_CVTTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR(0x5b0f); }
621 /////////////////////////////////////////////////////////////////////////////////////
622 //**********************************************************************************/
623 //MINPS: Return Packed Single-Precision FP Minimum *
624 //**********************************************************************************
SSE_MINPS_M128_to_XMM(x86SSERegType to,uptr from)625 void SSE_MINPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5d0f, 0 ); }
SSE_MINPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)626 void SSE_MINPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5d0f ); }
627
628 //////////////////////////////////////////////////////////////////////////////////////////
629 //**********************************************************************************/
630 //MINSS: Return Scalar Single-Precision FP Minimum *
631 //**********************************************************************************
SSE_MINSS_M32_to_XMM(x86SSERegType to,uptr from)632 void SSE_MINSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5d0f, 0 ); }
SSE_MINSS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)633 void SSE_MINSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5d0f ); }
634
635 #ifndef __x86_64__
636 ///////////////////////////////////////////////////////////////////////////////////////////
637 //**********************************************************************************/
638 //PMAXSW: Packed Signed Integer Word Maximum *
639 //**********************************************************************************
640 //missing
641 // SSE_PMAXSW_M64_to_MM
642 // SSE2_PMAXSW_M128_to_XMM
643 // SSE2_PMAXSW_XMM_to_XMM
SSE_PMAXSW_MM_to_MM(x86MMXRegType to,x86MMXRegType from)644 void SSE_PMAXSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSERtoR( 0xEE0F ); }
645
646 ///////////////////////////////////////////////////////////////////////////////////////
647 //**********************************************************************************/
648 //PMINSW: Packed Signed Integer Word Minimum *
649 //**********************************************************************************
650 //missing
651 // SSE_PMINSW_M64_to_MM
652 // SSE2_PMINSW_M128_to_XMM
653 // SSE2_PMINSW_XMM_to_XMM
SSE_PMINSW_MM_to_MM(x86MMXRegType to,x86MMXRegType from)654 void SSE_PMINSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSERtoR( 0xEA0F ); }
655 #endif
656
657 //////////////////////////////////////////////////////////////////////////////////////
658 //**********************************************************************************/
659 //SHUFPS: Shuffle Packed Single-Precision FP Values *
660 //**********************************************************************************
SSE_SHUFPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from,u8 imm8)661 void SSE_SHUFPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { SSERtoR( 0xC60F ); write8( imm8 ); }
SSE_SHUFPS_M128_to_XMM(x86SSERegType to,uptr from,u8 imm8)662 void SSE_SHUFPS_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoR( 0xC60F, 1 ); write8( imm8 ); }
663
SSE_SHUFPS_RmOffset_to_XMM(x86SSERegType to,x86IntRegType from,int offset,u8 imm8)664 void SSE_SHUFPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset, u8 imm8 )
665 {
666 RexRB(0, to, from);
667 write16(0xc60f);
668 WriteRmOffsetFrom(to, from, offset);
669 write8(imm8);
670 }
671
672 ////////////////////////////////////////////////////////////////////////////////////
673 //**********************************************************************************/
674 //PSHUFD: Shuffle Packed DoubleWords *
675 //**********************************************************************************
SSE2_PSHUFD_XMM_to_XMM(x86SSERegType to,x86SSERegType from,u8 imm8)676 void SSE2_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 )
677 {
678 if( !cpucaps.hasStreamingSIMD2Extensions ) {
679 SSE2EMU_PSHUFD_XMM_to_XMM(to, from, imm8);
680 }
681 else {
682 SSERtoR66( 0x700F );
683 write8( imm8 );
684 }
685 }
SSE2_PSHUFD_M128_to_XMM(x86SSERegType to,uptr from,u8 imm8)686 void SSE2_PSHUFD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoRv( 3, 0x700F66, 1 ); write8( imm8 ); }
687
SSE2_PSHUFLW_XMM_to_XMM(x86SSERegType to,x86SSERegType from,u8 imm8)688 void SSE2_PSHUFLW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { write8(0xF2); SSERtoR(0x700F); write8(imm8); }
SSE2_PSHUFLW_M128_to_XMM(x86SSERegType to,uptr from,u8 imm8)689 void SSE2_PSHUFLW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoRv(3, 0x700FF2, 1); write8(imm8); }
SSE2_PSHUFHW_XMM_to_XMM(x86SSERegType to,x86SSERegType from,u8 imm8)690 void SSE2_PSHUFHW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { SSE_SS_RtoR(0x700F); write8(imm8); }
SSE2_PSHUFHW_M128_to_XMM(x86SSERegType to,uptr from,u8 imm8)691 void SSE2_PSHUFHW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSE_SS_MtoR(0x700F, 1); write8(imm8); }
692
693 ///////////////////////////////////////////////////////////////////////////////////
694 //**********************************************************************************/
695 //UNPCKLPS: Unpack and Interleave low Packed Single-Precision FP Data *
696 //**********************************************************************************
SSE_UNPCKLPS_M128_to_XMM(x86SSERegType to,uptr from)697 void SSE_UNPCKLPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR(0x140f, 0); }
SSE_UNPCKLPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)698 void SSE_UNPCKLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x140F ); }
699
700 ////////////////////////////////////////////////////////////////////////////////////////
701 //**********************************************************************************/
702 //UNPCKHPS: Unpack and Interleave High Packed Single-Precision FP Data *
703 //**********************************************************************************
SSE_UNPCKHPS_M128_to_XMM(x86SSERegType to,uptr from)704 void SSE_UNPCKHPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR(0x150f, 0); }
SSE_UNPCKHPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)705 void SSE_UNPCKHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x150F ); }
706
707 ////////////////////////////////////////////////////////////////////////////////////////
708 //**********************************************************************************/
709 //DIVPS : Packed Single-Precision FP Divide *
710 //**********************************************************************************
SSE_DIVPS_M128_to_XMM(x86SSERegType to,uptr from)711 void SSE_DIVPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5e0F, 0 ); }
SSE_DIVPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)712 void SSE_DIVPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5e0F ); }
713
714 //////////////////////////////////////////////////////////////////////////////////////
715 //**********************************************************************************/
716 //DIVSS : Scalar Single-Precision FP Divide *
717 //**********************************************************************************
SSE_DIVSS_M32_to_XMM(x86SSERegType to,uptr from)718 void SSE_DIVSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5e0F, 0 ); }
SSE_DIVSS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)719 void SSE_DIVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5e0F ); }
720
721 /////////////////////////////////////////////////////////////////////////////////////////
722 //**********************************************************************************/
723 //STMXCSR : Store Streaming SIMD Extension Control/Status *
724 //**********************************************************************************
SSE_STMXCSR(uptr from)725 void SSE_STMXCSR( uptr from ) {
726 MEMADDR_OP(0, VAROP2(0x0F, 0xAE), false, 3, from, 0);
727 }
728
729 /////////////////////////////////////////////////////////////////////////////////////
730 //**********************************************************************************/
731 //LDMXCSR : Load Streaming SIMD Extension Control/Status *
732 //**********************************************************************************
SSE_LDMXCSR(uptr from)733 void SSE_LDMXCSR( uptr from ) {
734 MEMADDR_OP(0, VAROP2(0x0F, 0xAE), false, 2, from, 0);
735 }
736
737 /////////////////////////////////////////////////////////////////////////////////////
738 //**********************************************************************************/
739 //PADDB,PADDW,PADDD : Add Packed Integers *
740 //**********************************************************************************
SSE2_PADDB_XMM_to_XMM(x86SSERegType to,x86SSERegType from)741 void SSE2_PADDB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFC0F ); }
SSE2_PADDB_M128_to_XMM(x86SSERegType to,uptr from)742 void SSE2_PADDB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFC0F ); }
SSE2_PADDW_XMM_to_XMM(x86SSERegType to,x86SSERegType from)743 void SSE2_PADDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFD0F ); }
SSE2_PADDW_M128_to_XMM(x86SSERegType to,uptr from)744 void SSE2_PADDW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFD0F ); }
SSE2_PADDD_XMM_to_XMM(x86SSERegType to,x86SSERegType from)745 void SSE2_PADDD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFE0F ); }
SSE2_PADDD_M128_to_XMM(x86SSERegType to,uptr from)746 void SSE2_PADDD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFE0F ); }
747
SSE2_PADDQ_XMM_to_XMM(x86SSERegType to,x86SSERegType from)748 void SSE2_PADDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xD40F ); }
SSE2_PADDQ_M128_to_XMM(x86SSERegType to,uptr from)749 void SSE2_PADDQ_M128_to_XMM(x86SSERegType to, uptr from ) { SSEMtoR66( 0xD40F ); }
750
751 ///////////////////////////////////////////////////////////////////////////////////
752 //**********************************************************************************/
753 //PCMPxx: Compare Packed Integers *
754 //**********************************************************************************
SSE2_PCMPGTB_XMM_to_XMM(x86SSERegType to,x86SSERegType from)755 void SSE2_PCMPGTB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x640F ); }
SSE2_PCMPGTB_M128_to_XMM(x86SSERegType to,uptr from)756 void SSE2_PCMPGTB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x640F ); }
SSE2_PCMPGTW_XMM_to_XMM(x86SSERegType to,x86SSERegType from)757 void SSE2_PCMPGTW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x650F ); }
SSE2_PCMPGTW_M128_to_XMM(x86SSERegType to,uptr from)758 void SSE2_PCMPGTW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x650F ); }
SSE2_PCMPGTD_XMM_to_XMM(x86SSERegType to,x86SSERegType from)759 void SSE2_PCMPGTD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x660F ); }
SSE2_PCMPGTD_M128_to_XMM(x86SSERegType to,uptr from)760 void SSE2_PCMPGTD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x660F ); }
SSE2_PCMPEQB_XMM_to_XMM(x86SSERegType to,x86SSERegType from)761 void SSE2_PCMPEQB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x740F ); }
SSE2_PCMPEQB_M128_to_XMM(x86SSERegType to,uptr from)762 void SSE2_PCMPEQB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x740F ); }
SSE2_PCMPEQW_XMM_to_XMM(x86SSERegType to,x86SSERegType from)763 void SSE2_PCMPEQW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x750F ); }
SSE2_PCMPEQW_M128_to_XMM(x86SSERegType to,uptr from)764 void SSE2_PCMPEQW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x750F ); }
SSE2_PCMPEQD_XMM_to_XMM(x86SSERegType to,x86SSERegType from)765 void SSE2_PCMPEQD_XMM_to_XMM(x86SSERegType to, x86SSERegType from )
766 {
767 if( !cpucaps.hasStreamingSIMD2Extensions ) {
768 SSE_CMPEQPS_XMM_to_XMM(to, from);
769 }
770 else {
771 SSERtoR66( 0x760F );
772 }
773 }
774
SSE2_PCMPEQD_M128_to_XMM(x86SSERegType to,uptr from)775 void SSE2_PCMPEQD_M128_to_XMM(x86SSERegType to, uptr from )
776 {
777 if( !cpucaps.hasStreamingSIMD2Extensions ) {
778 SSE_CMPEQPS_M128_to_XMM(to, from);
779 }
780 else {
781 SSEMtoR66( 0x760F );
782 }
783 }
784
785 ////////////////////////////////////////////////////////////////////////////////////////////
786 //**********************************************************************************/
787 //PEXTRW,PINSRW: Packed Extract/Insert Word *
788 //**********************************************************************************
SSE_PEXTRW_XMM_to_R32(x86IntRegType to,x86SSERegType from,u8 imm8)789 void SSE_PEXTRW_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8 ){ SSERtoR66(0xC50F); write8( imm8 ); }
SSE_PINSRW_R32_to_XMM(x86SSERegType to,x86IntRegType from,u8 imm8)790 void SSE_PINSRW_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8 ){ SSERtoR66(0xC40F); write8( imm8 ); }
791
792 ////////////////////////////////////////////////////////////////////////////////////////////
793 //**********************************************************************************/
794 //PSUBx: Subtract Packed Integers *
795 //**********************************************************************************
SSE2_PSUBB_XMM_to_XMM(x86SSERegType to,x86SSERegType from)796 void SSE2_PSUBB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xF80F ); }
SSE2_PSUBB_M128_to_XMM(x86SSERegType to,uptr from)797 void SSE2_PSUBB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xF80F ); }
SSE2_PSUBW_XMM_to_XMM(x86SSERegType to,x86SSERegType from)798 void SSE2_PSUBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xF90F ); }
SSE2_PSUBW_M128_to_XMM(x86SSERegType to,uptr from)799 void SSE2_PSUBW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xF90F ); }
SSE2_PSUBD_XMM_to_XMM(x86SSERegType to,x86SSERegType from)800 void SSE2_PSUBD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFA0F ); }
SSE2_PSUBD_M128_to_XMM(x86SSERegType to,uptr from)801 void SSE2_PSUBD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFA0F ); }
SSE2_PSUBQ_XMM_to_XMM(x86SSERegType to,x86SSERegType from)802 void SSE2_PSUBQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFB0F ); }
SSE2_PSUBQ_M128_to_XMM(x86SSERegType to,uptr from)803 void SSE2_PSUBQ_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFB0F ); }
804
805 ///////////////////////////////////////////////////////////////////////////////////////
806 //**********************************************************************************/
807 //MOVD: Move Dword(32bit) to /from XMM reg *
808 //**********************************************************************************
SSE2_MOVD_M32_to_XMM(x86SSERegType to,uptr from)809 void SSE2_MOVD_M32_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66(0x6E0F); }
SSE2_MOVD_R_to_XMM(x86SSERegType to,x86IntRegType from)810 void SSE2_MOVD_R_to_XMM( x86SSERegType to, x86IntRegType from )
811 {
812 if( !cpucaps.hasStreamingSIMD2Extensions ) {
813 SSE2EMU_MOVD_R_to_XMM(to, from);
814 }
815 else {
816 SSERtoR66(0x6E0F);
817 }
818 }
819
SSE2_MOVD_Rm_to_XMM(x86SSERegType to,x86IntRegType from)820 void SSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from )
821 {
822 write8(0x66);
823 RexRB(0, to, from);
824 write16( 0x6e0f );
825 ModRM( 0, to, from);
826 }
827
SSE2_MOVD_RmOffset_to_XMM(x86SSERegType to,x86IntRegType from,int offset)828 void SSE2_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset )
829 {
830 write8(0x66);
831 RexRB(0, to, from);
832 write16( 0x6e0f );
833 WriteRmOffsetFrom(to, from, offset);
834 }
835
SSE2_MOVD_XMM_to_M32(u32 to,x86SSERegType from)836 void SSE2_MOVD_XMM_to_M32( u32 to, x86SSERegType from ) { SSERtoM66(0x7E0F); }
SSE2_MOVD_XMM_to_R(x86IntRegType to,x86SSERegType from)837 void SSE2_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from ) {
838 if( !cpucaps.hasStreamingSIMD2Extensions ) {
839 SSE2EMU_MOVD_XMM_to_R(to, from);
840 }
841 else {
842 _SSERtoR66(0x7E0F);
843 }
844 }
845
SSE2_MOVD_XMM_to_Rm(x86IntRegType to,x86SSERegType from)846 void SSE2_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from )
847 {
848 write8(0x66);
849 RexRB(0, from, to);
850 write16( 0x7e0f );
851 ModRM( 0, from, to );
852 }
853
SSE2_MOVD_XMM_to_RmOffset(x86IntRegType to,x86SSERegType from,int offset)854 void SSE2_MOVD_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset )
855 {
856 if( !cpucaps.hasStreamingSIMD2Extensions ) {
857 SSE2EMU_MOVD_XMM_to_RmOffset(to, from, offset);
858 }
859 else {
860 write8(0x66);
861 RexRB(0, from, to);
862 write16( 0x7e0f );
863 WriteRmOffsetFrom(from, to, offset);
864 }
865 }
866
867 #ifdef __x86_64__
SSE2_MOVQ_XMM_to_R(x86IntRegType to,x86SSERegType from)868 void SSE2_MOVQ_XMM_to_R( x86IntRegType to, x86SSERegType from )
869 {
870 assert( from < XMMREGS);
871 write8( 0x66 );
872 RexRB(1, from, to);
873 write16( 0x7e0f );
874 ModRM( 3, from, to );
875 }
876
SSE2_MOVQ_R_to_XMM(x86SSERegType to,x86IntRegType from)877 void SSE2_MOVQ_R_to_XMM( x86SSERegType to, x86IntRegType from )
878 {
879 assert( to < XMMREGS);
880 write8(0x66);
881 RexRB(1, to, from);
882 write16( 0x6e0f );
883 ModRM( 3, to, from );
884 }
885
886 #endif
887
888 ////////////////////////////////////////////////////////////////////////////////////
889 //**********************************************************************************/
890 //POR : SSE Bitwise OR *
891 //**********************************************************************************
SSE2_POR_XMM_to_XMM(x86SSERegType to,x86SSERegType from)892 void SSE2_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEB0F ); }
SSE2_POR_M128_to_XMM(x86SSERegType to,uptr from)893 void SSE2_POR_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEB0F ); }
894
895 // logical and to &= from
SSE2_PAND_XMM_to_XMM(x86SSERegType to,x86SSERegType from)896 void SSE2_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDB0F ); }
SSE2_PAND_M128_to_XMM(x86SSERegType to,uptr from)897 void SSE2_PAND_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDB0F ); }
898
899 // to = (~to) & from
SSE2_PANDN_XMM_to_XMM(x86SSERegType to,x86SSERegType from)900 void SSE2_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDF0F ); }
SSE2_PANDN_M128_to_XMM(x86SSERegType to,uptr from)901 void SSE2_PANDN_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDF0F ); }
902
903 /////////////////////////////////////////////////////////////////////////////////////
904 //**********************************************************************************/
905 //PXOR : SSE Bitwise XOR *
906 //**********************************************************************************
SSE2_PXOR_XMM_to_XMM(x86SSERegType to,x86SSERegType from)907 void SSE2_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xEF0F ); }
SSE2_PXOR_M128_to_XMM(x86SSERegType to,uptr from)908 void SSE2_PXOR_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xEF0F ); }
909 ///////////////////////////////////////////////////////////////////////////////////////
910
SSE2_MOVDQA_M128_to_XMM(x86SSERegType to,uptr from)911 void SSE2_MOVDQA_M128_to_XMM(x86SSERegType to, uptr from) {SSEMtoR66(0x6F0F); }
SSE2_MOVDQA_XMM_to_M128(uptr to,x86SSERegType from)912 void SSE2_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from ){SSERtoM66(0x7F0F);}
SSE2_MOVDQA_XMM_to_XMM(x86SSERegType to,x86SSERegType from)913 void SSE2_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { SSERtoR66(0x6F0F); }
914
SSE2_MOVDQU_M128_to_XMM(x86SSERegType to,uptr from)915 void SSE2_MOVDQU_M128_to_XMM(x86SSERegType to, uptr from) { SSE_SS_MtoR(0x6F0F, 0); }
SSE2_MOVDQU_XMM_to_M128(uptr to,x86SSERegType from)916 void SSE2_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from) { SSE_SS_RtoM(0x7F0F, 0); }
SSE2_MOVDQU_XMM_to_XMM(x86SSERegType to,x86SSERegType from)917 void SSE2_MOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { SSE_SS_RtoR(0x6F0F); }
918
919 // shift right logical
920
SSE2_PSRLW_XMM_to_XMM(x86SSERegType to,x86SSERegType from)921 void SSE2_PSRLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xD10F); }
SSE2_PSRLW_M128_to_XMM(x86SSERegType to,uptr from)922 void SSE2_PSRLW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xD10F); }
SSE2_PSRLW_I8_to_XMM(x86SSERegType to,u8 imm8)923 void SSE2_PSRLW_I8_to_XMM(x86SSERegType to, u8 imm8)
924 {
925 write8( 0x66 );
926 RexB(0, to);
927 write16( 0x710F );
928 ModRM( 3, 2 , to );
929 write8( imm8 );
930 }
931
SSE2_PSRLD_XMM_to_XMM(x86SSERegType to,x86SSERegType from)932 void SSE2_PSRLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xD20F); }
SSE2_PSRLD_M128_to_XMM(x86SSERegType to,uptr from)933 void SSE2_PSRLD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xD20F); }
SSE2_PSRLD_I8_to_XMM(x86SSERegType to,u8 imm8)934 void SSE2_PSRLD_I8_to_XMM(x86SSERegType to, u8 imm8)
935 {
936 write8( 0x66 );
937 RexB(0, to);
938 write16( 0x720F );
939 ModRM( 3, 2 , to );
940 write8( imm8 );
941 }
942
SSE2_PSRLQ_XMM_to_XMM(x86SSERegType to,x86SSERegType from)943 void SSE2_PSRLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xD30F); }
SSE2_PSRLQ_M128_to_XMM(x86SSERegType to,uptr from)944 void SSE2_PSRLQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xD30F); }
SSE2_PSRLQ_I8_to_XMM(x86SSERegType to,u8 imm8)945 void SSE2_PSRLQ_I8_to_XMM(x86SSERegType to, u8 imm8)
946 {
947 write8( 0x66 );
948 RexB(0, to);
949 write16( 0x730F );
950 ModRM( 3, 2 , to );
951 write8( imm8 );
952 }
953
SSE2_PSRLDQ_I8_to_XMM(x86SSERegType to,u8 imm8)954 void SSE2_PSRLDQ_I8_to_XMM(x86SSERegType to, u8 imm8)
955 {
956 write8( 0x66 );
957 RexB(0, to);
958 write16( 0x730F );
959 ModRM( 3, 3 , to );
960 write8( imm8 );
961 }
962
963 // shift right arithmetic
964
SSE2_PSRAW_XMM_to_XMM(x86SSERegType to,x86SSERegType from)965 void SSE2_PSRAW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xE10F); }
SSE2_PSRAW_M128_to_XMM(x86SSERegType to,uptr from)966 void SSE2_PSRAW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xE10F); }
SSE2_PSRAW_I8_to_XMM(x86SSERegType to,u8 imm8)967 void SSE2_PSRAW_I8_to_XMM(x86SSERegType to, u8 imm8)
968 {
969 write8( 0x66 );
970 RexB(0, to);
971 write16( 0x710F );
972 ModRM( 3, 4 , to );
973 write8( imm8 );
974 }
975
SSE2_PSRAD_XMM_to_XMM(x86SSERegType to,x86SSERegType from)976 void SSE2_PSRAD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xE20F); }
SSE2_PSRAD_M128_to_XMM(x86SSERegType to,uptr from)977 void SSE2_PSRAD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xE20F); }
SSE2_PSRAD_I8_to_XMM(x86SSERegType to,u8 imm8)978 void SSE2_PSRAD_I8_to_XMM(x86SSERegType to, u8 imm8)
979 {
980 write8( 0x66 );
981 RexB(0, to);
982 write16( 0x720F );
983 ModRM( 3, 4 , to );
984 write8( imm8 );
985 }
986
987 // shift left logical
988
SSE2_PSLLW_XMM_to_XMM(x86SSERegType to,x86SSERegType from)989 void SSE2_PSLLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF10F); }
SSE2_PSLLW_M128_to_XMM(x86SSERegType to,uptr from)990 void SSE2_PSLLW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xF10F); }
SSE2_PSLLW_I8_to_XMM(x86SSERegType to,u8 imm8)991 void SSE2_PSLLW_I8_to_XMM(x86SSERegType to, u8 imm8)
992 {
993 write8( 0x66 );
994 RexB(0, to);
995 write16( 0x710F );
996 ModRM( 3, 6 , to );
997 write8( imm8 );
998 }
999
SSE2_PSLLD_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1000 void SSE2_PSLLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF20F); }
SSE2_PSLLD_M128_to_XMM(x86SSERegType to,uptr from)1001 void SSE2_PSLLD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xF20F); }
SSE2_PSLLD_I8_to_XMM(x86SSERegType to,u8 imm8)1002 void SSE2_PSLLD_I8_to_XMM(x86SSERegType to, u8 imm8)
1003 {
1004 write8( 0x66 );
1005 RexB(0, to);
1006 write16( 0x720F );
1007 ModRM( 3, 6 , to );
1008 write8( imm8 );
1009 }
1010
SSE2_PSLLQ_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1011 void SSE2_PSLLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF30F); }
SSE2_PSLLQ_M128_to_XMM(x86SSERegType to,uptr from)1012 void SSE2_PSLLQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xF30F); }
SSE2_PSLLQ_I8_to_XMM(x86SSERegType to,u8 imm8)1013 void SSE2_PSLLQ_I8_to_XMM(x86SSERegType to, u8 imm8)
1014 {
1015 write8( 0x66 );
1016 RexB(0, to);
1017 write16( 0x730F );
1018 ModRM( 3, 6 , to );
1019 write8( imm8 );
1020 }
1021
SSE2_PSLLDQ_I8_to_XMM(x86SSERegType to,u8 imm8)1022 void SSE2_PSLLDQ_I8_to_XMM(x86SSERegType to, u8 imm8)
1023 {
1024 write8( 0x66 );
1025 RexB(0, to);
1026 write16( 0x730F );
1027 ModRM( 3, 7 , to );
1028 write8( imm8 );
1029 }
1030
1031
SSE2_PMAXSW_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1032 void SSE2_PMAXSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xEE0F ); }
SSE2_PMAXSW_M128_to_XMM(x86SSERegType to,uptr from)1033 void SSE2_PMAXSW_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xEE0F ); }
1034
SSE2_PMAXUB_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1035 void SSE2_PMAXUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xDE0F ); }
SSE2_PMAXUB_M128_to_XMM(x86SSERegType to,uptr from)1036 void SSE2_PMAXUB_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xDE0F ); }
1037
SSE2_PMINSW_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1038 void SSE2_PMINSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xEA0F ); }
SSE2_PMINSW_M128_to_XMM(x86SSERegType to,uptr from)1039 void SSE2_PMINSW_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xEA0F ); }
1040
SSE2_PMINUB_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1041 void SSE2_PMINUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xDA0F ); }
SSE2_PMINUB_M128_to_XMM(x86SSERegType to,uptr from)1042 void SSE2_PMINUB_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xDA0F ); }
1043
1044 //
1045
SSE2_PADDSB_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1046 void SSE2_PADDSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xEC0F ); }
SSE2_PADDSB_M128_to_XMM(x86SSERegType to,uptr from)1047 void SSE2_PADDSB_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xEC0F ); }
1048
SSE2_PADDSW_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1049 void SSE2_PADDSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xED0F ); }
SSE2_PADDSW_M128_to_XMM(x86SSERegType to,uptr from)1050 void SSE2_PADDSW_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xED0F ); }
1051
SSE2_PSUBSB_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1052 void SSE2_PSUBSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xE80F ); }
SSE2_PSUBSB_M128_to_XMM(x86SSERegType to,uptr from)1053 void SSE2_PSUBSB_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xE80F ); }
1054
SSE2_PSUBSW_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1055 void SSE2_PSUBSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xE90F ); }
SSE2_PSUBSW_M128_to_XMM(x86SSERegType to,uptr from)1056 void SSE2_PSUBSW_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xE90F ); }
1057
SSE2_PSUBUSB_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1058 void SSE2_PSUBUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xD80F ); }
SSE2_PSUBUSB_M128_to_XMM(x86SSERegType to,uptr from)1059 void SSE2_PSUBUSB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xD80F ); }
SSE2_PSUBUSW_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1060 void SSE2_PSUBUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xD90F ); }
SSE2_PSUBUSW_M128_to_XMM(x86SSERegType to,uptr from)1061 void SSE2_PSUBUSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xD90F ); }
1062
SSE2_PADDUSB_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1063 void SSE2_PADDUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDC0F ); }
SSE2_PADDUSB_M128_to_XMM(x86SSERegType to,uptr from)1064 void SSE2_PADDUSB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDC0F ); }
SSE2_PADDUSW_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1065 void SSE2_PADDUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDD0F ); }
SSE2_PADDUSW_M128_to_XMM(x86SSERegType to,uptr from)1066 void SSE2_PADDUSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDD0F ); }
1067
1068 //**********************************************************************************/
1069 //PACKSSWB,PACKSSDW: Pack Saturate Signed Word
1070 //**********************************************************************************
SSE2_PACKSSWB_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1071 void SSE2_PACKSSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x630F ); }
SSE2_PACKSSWB_M128_to_XMM(x86SSERegType to,uptr from)1072 void SSE2_PACKSSWB_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x630F ); }
SSE2_PACKSSDW_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1073 void SSE2_PACKSSDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6B0F ); }
SSE2_PACKSSDW_M128_to_XMM(x86SSERegType to,uptr from)1074 void SSE2_PACKSSDW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6B0F ); }
1075
SSE2_PACKUSWB_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1076 void SSE2_PACKUSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x670F ); }
SSE2_PACKUSWB_M128_to_XMM(x86SSERegType to,uptr from)1077 void SSE2_PACKUSWB_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x670F ); }
1078
1079 //**********************************************************************************/
1080 //PUNPCKHWD: Unpack 16bit high
1081 //**********************************************************************************
SSE2_PUNPCKLBW_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1082 void SSE2_PUNPCKLBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x600F ); }
SSE2_PUNPCKLBW_M128_to_XMM(x86SSERegType to,uptr from)1083 void SSE2_PUNPCKLBW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x600F ); }
1084
SSE2_PUNPCKHBW_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1085 void SSE2_PUNPCKHBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x680F ); }
SSE2_PUNPCKHBW_M128_to_XMM(x86SSERegType to,uptr from)1086 void SSE2_PUNPCKHBW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x680F ); }
1087
SSE2_PUNPCKLWD_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1088 void SSE2_PUNPCKLWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x610F ); }
SSE2_PUNPCKLWD_M128_to_XMM(x86SSERegType to,uptr from)1089 void SSE2_PUNPCKLWD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x610F ); }
SSE2_PUNPCKHWD_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1090 void SSE2_PUNPCKHWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x690F ); }
SSE2_PUNPCKHWD_M128_to_XMM(x86SSERegType to,uptr from)1091 void SSE2_PUNPCKHWD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x690F ); }
1092
SSE2_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1093 void SSE2_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x620F ); }
SSE2_PUNPCKLDQ_M128_to_XMM(x86SSERegType to,uptr from)1094 void SSE2_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x620F ); }
SSE2_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1095 void SSE2_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6A0F ); }
SSE2_PUNPCKHDQ_M128_to_XMM(x86SSERegType to,uptr from)1096 void SSE2_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6A0F ); }
1097
SSE2_PUNPCKLQDQ_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1098 void SSE2_PUNPCKLQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6C0F ); }
SSE2_PUNPCKLQDQ_M128_to_XMM(x86SSERegType to,uptr from)1099 void SSE2_PUNPCKLQDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6C0F ); }
1100
SSE2_PUNPCKHQDQ_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1101 void SSE2_PUNPCKHQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6D0F ); }
SSE2_PUNPCKHQDQ_M128_to_XMM(x86SSERegType to,uptr from)1102 void SSE2_PUNPCKHQDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6D0F ); }
1103
SSE2_PMULLW_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1104 void SSE2_PMULLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xD50F ); }
SSE2_PMULLW_M128_to_XMM(x86SSERegType to,uptr from)1105 void SSE2_PMULLW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xD50F ); }
SSE2_PMULHW_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1106 void SSE2_PMULHW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xE50F ); }
SSE2_PMULHW_M128_to_XMM(x86SSERegType to,uptr from)1107 void SSE2_PMULHW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xE50F ); }
1108
SSE2_PMULUDQ_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1109 void SSE2_PMULUDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xF40F ); }
SSE2_PMULUDQ_M128_to_XMM(x86SSERegType to,uptr from)1110 void SSE2_PMULUDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xF40F ); }
1111
SSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to,x86SSERegType from)1112 void SSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR66(0xD70F); }
1113
SSE_MOVMSKPS_XMM_to_R32(x86IntRegType to,x86SSERegType from)1114 void SSE_MOVMSKPS_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR(0x500F); }
SSE2_MOVMSKPD_XMM_to_R32(x86IntRegType to,x86SSERegType from)1115 void SSE2_MOVMSKPD_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR66(0x500F); }
1116
SSE3_HADDPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1117 void SSE3_HADDPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { write8(0xf2); SSERtoR( 0x7c0f ); }
SSE3_HADDPS_M128_to_XMM(x86SSERegType to,uptr from)1118 void SSE3_HADDPS_M128_to_XMM(x86SSERegType to, uptr from){ SSEMtoRv( 3, 0x7c0fF2, 0 ); }
1119
SSE3_MOVSLDUP_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1120 void SSE3_MOVSLDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from) {
1121 write8(0xf3);
1122 RexRB(0, to, from);
1123 write16( 0x120f);
1124 ModRM( 3, to, from );
1125 }
1126
SSE3_MOVSLDUP_M128_to_XMM(x86SSERegType to,uptr from)1127 void SSE3_MOVSLDUP_M128_to_XMM(x86SSERegType to, uptr from) { SSE_SS_MtoR(0x120f, 0); }
SSE3_MOVSHDUP_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1128 void SSE3_MOVSHDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSE_SS_RtoR(0x160f); }
SSE3_MOVSHDUP_M128_to_XMM(x86SSERegType to,uptr from)1129 void SSE3_MOVSHDUP_M128_to_XMM(x86SSERegType to, uptr from) { SSE_SS_MtoR(0x160f, 0); }
1130
1131 // SSE-X
SSEX_MOVDQA_M128_to_XMM(x86SSERegType to,uptr from)1132 void SSEX_MOVDQA_M128_to_XMM( x86SSERegType to, uptr from )
1133 {
1134 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQA_M128_to_XMM(to, from);
1135 else SSE_MOVAPS_M128_to_XMM(to, from);
1136 }
1137
SSEX_MOVDQA_XMM_to_M128(uptr to,x86SSERegType from)1138 void SSEX_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from )
1139 {
1140 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_M128(to, from);
1141 else SSE_MOVAPS_XMM_to_M128(to, from);
1142 }
1143
SSEX_MOVDQA_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1144 void SSEX_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
1145 {
1146 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_XMM(to, from);
1147 else SSE_MOVAPS_XMM_to_XMM(to, from);
1148 }
1149
SSEX_MOVDQARmtoROffset(x86SSERegType to,x86IntRegType from,int offset)1150 void SSEX_MOVDQARmtoROffset( x86SSERegType to, x86IntRegType from, int offset )
1151 {
1152 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQARmtoROffset(to, from, offset);
1153 else SSE_MOVAPSRmtoROffset(to, from, offset);
1154 }
1155
SSEX_MOVDQARtoRmOffset(x86IntRegType to,x86SSERegType from,int offset)1156 void SSEX_MOVDQARtoRmOffset( x86IntRegType to, x86SSERegType from, int offset )
1157 {
1158 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQARtoRmOffset(to, from, offset);
1159 else SSE_MOVAPSRtoRmOffset(to, from, offset);
1160 }
1161
SSEX_MOVDQU_M128_to_XMM(x86SSERegType to,uptr from)1162 void SSEX_MOVDQU_M128_to_XMM( x86SSERegType to, uptr from )
1163 {
1164 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQU_M128_to_XMM(to, from);
1165 else SSE_MOVAPS_M128_to_XMM(to, from);
1166 }
1167
SSEX_MOVDQU_XMM_to_M128(uptr to,x86SSERegType from)1168 void SSEX_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from )
1169 {
1170 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQU_XMM_to_M128(to, from);
1171 else SSE_MOVAPS_XMM_to_M128(to, from);
1172 }
1173
SSEX_MOVDQU_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1174 void SSEX_MOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
1175 {
1176 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQU_XMM_to_XMM(to, from);
1177 else SSE_MOVAPS_XMM_to_XMM(to, from);
1178 }
1179
SSEX_MOVD_M32_to_XMM(x86SSERegType to,uptr from)1180 void SSEX_MOVD_M32_to_XMM( x86SSERegType to, uptr from )
1181 {
1182 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVD_M32_to_XMM(to, from);
1183 else SSE_MOVSS_M32_to_XMM(to, from);
1184 }
1185
SSEX_MOVD_XMM_to_M32(u32 to,x86SSERegType from)1186 void SSEX_MOVD_XMM_to_M32( u32 to, x86SSERegType from )
1187 {
1188 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_M32(to, from);
1189 else SSE_MOVSS_XMM_to_M32(to, from);
1190 }
1191
SSEX_MOVD_XMM_to_Rm(x86IntRegType to,x86SSERegType from)1192 void SSEX_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from )
1193 {
1194 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_Rm(to, from);
1195 else SSE_MOVSS_XMM_to_Rm(to, from);
1196 }
1197
SSEX_MOVD_RmOffset_to_XMM(x86SSERegType to,x86IntRegType from,int offset)1198 void SSEX_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset )
1199 {
1200 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVD_RmOffset_to_XMM(to, from, offset);
1201 else SSE_MOVSS_RmOffset_to_XMM(to, from, offset);
1202 }
1203
SSEX_MOVD_XMM_to_RmOffset(x86IntRegType to,x86SSERegType from,int offset)1204 void SSEX_MOVD_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset )
1205 {
1206 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_RmOffset(to, from, offset);
1207 else SSE_MOVSS_XMM_to_RmOffset(to, from, offset);
1208 }
1209
SSEX_POR_M128_to_XMM(x86SSERegType to,uptr from)1210 void SSEX_POR_M128_to_XMM( x86SSERegType to, uptr from )
1211 {
1212 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_POR_M128_to_XMM(to, from);
1213 else SSE_ORPS_M128_to_XMM(to, from);
1214 }
1215
SSEX_POR_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1216 void SSEX_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
1217 {
1218 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_POR_XMM_to_XMM(to, from);
1219 else SSE_ORPS_XMM_to_XMM(to, from);
1220 }
1221
SSEX_PXOR_M128_to_XMM(x86SSERegType to,uptr from)1222 void SSEX_PXOR_M128_to_XMM( x86SSERegType to, uptr from )
1223 {
1224 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_PXOR_M128_to_XMM(to, from);
1225 else SSE_XORPS_M128_to_XMM(to, from);
1226 }
1227
SSEX_PXOR_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1228 void SSEX_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
1229 {
1230 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_PXOR_XMM_to_XMM(to, from);
1231 else SSE_XORPS_XMM_to_XMM(to, from);
1232 }
1233
SSEX_PAND_M128_to_XMM(x86SSERegType to,uptr from)1234 void SSEX_PAND_M128_to_XMM( x86SSERegType to, uptr from )
1235 {
1236 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_PAND_M128_to_XMM(to, from);
1237 else SSE_ANDPS_M128_to_XMM(to, from);
1238 }
1239
SSEX_PAND_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1240 void SSEX_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
1241 {
1242 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_PAND_XMM_to_XMM(to, from);
1243 else SSE_ANDPS_XMM_to_XMM(to, from);
1244 }
1245
SSEX_PANDN_M128_to_XMM(x86SSERegType to,uptr from)1246 void SSEX_PANDN_M128_to_XMM( x86SSERegType to, uptr from )
1247 {
1248 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_PANDN_M128_to_XMM(to, from);
1249 else SSE_ANDNPS_M128_to_XMM(to, from);
1250 }
1251
SSEX_PANDN_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1252 void SSEX_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
1253 {
1254 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_PANDN_XMM_to_XMM(to, from);
1255 else SSE_ANDNPS_XMM_to_XMM(to, from);
1256 }
1257
SSEX_PUNPCKLDQ_M128_to_XMM(x86SSERegType to,uptr from)1258 void SSEX_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from)
1259 {
1260 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_PUNPCKLDQ_M128_to_XMM(to, from);
1261 else SSE_UNPCKLPS_M128_to_XMM(to, from);
1262 }
1263
SSEX_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1264 void SSEX_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
1265 {
1266 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_PUNPCKLDQ_XMM_to_XMM(to, from);
1267 else SSE_UNPCKLPS_XMM_to_XMM(to, from);
1268 }
1269
SSEX_PUNPCKHDQ_M128_to_XMM(x86SSERegType to,uptr from)1270 void SSEX_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from)
1271 {
1272 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_PUNPCKHDQ_M128_to_XMM(to, from);
1273 else SSE_UNPCKHPS_M128_to_XMM(to, from);
1274 }
1275
SSEX_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1276 void SSEX_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
1277 {
1278 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_PUNPCKHDQ_XMM_to_XMM(to, from);
1279 else SSE_UNPCKHPS_XMM_to_XMM(to, from);
1280 }
1281
SSEX_MOVHLPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1282 void SSEX_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
1283 {
1284 if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) {
1285 SSE2_PUNPCKHQDQ_XMM_to_XMM(to, from);
1286 if( to != from ) SSE2_PSHUFD_XMM_to_XMM(to, to, 0x4e);
1287 }
1288 else {
1289 SSE_MOVHLPS_XMM_to_XMM(to, from);
1290 }
1291 }
1292
1293 // SSE2 emulation
SSE2EMU_MOVSD_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1294 void SSE2EMU_MOVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from)
1295 {
1296 SSE_SHUFPS_XMM_to_XMM(to, from, 0x4e);
1297 SSE_SHUFPS_XMM_to_XMM(to, to, 0x4e);
1298 }
1299
SSE2EMU_MOVQ_M64_to_XMM(x86SSERegType to,uptr from)1300 void SSE2EMU_MOVQ_M64_to_XMM( x86SSERegType to, uptr from)
1301 {
1302 SSE_XORPS_XMM_to_XMM(to, to);
1303 SSE_MOVLPS_M64_to_XMM(to, from);
1304 }
1305
SSE2EMU_MOVQ_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1306 void SSE2EMU_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from)
1307 {
1308 SSE_XORPS_XMM_to_XMM(to, to);
1309 SSE2EMU_MOVSD_XMM_to_XMM(to, from);
1310 }
1311
SSE2EMU_MOVD_RmOffset_to_XMM(x86SSERegType to,x86IntRegType from,int offset)1312 void SSE2EMU_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset )
1313 {
1314 MOV32RmtoROffset(EAX, from, offset);
1315 MOV32ItoM((uptr)p+4, 0);
1316 MOV32ItoM((uptr)p+8, 0);
1317 MOV32RtoM((uptr)p, EAX);
1318 MOV32ItoM((uptr)p+12, 0);
1319 SSE_MOVAPS_M128_to_XMM(to, (uptr)p);
1320 }
1321
SSE2EMU_MOVD_XMM_to_RmOffset(x86IntRegType to,x86SSERegType from,int offset)1322 void SSE2EMU_MOVD_XMM_to_RmOffset(x86IntRegType to, x86SSERegType from, int offset )
1323 {
1324 SSE_MOVSS_XMM_to_M32((uptr)p, from);
1325 MOV32MtoR(EAX, (uptr)p);
1326 MOV32RtoRmOffset(to, EAX, offset);
1327 }
1328
1329 #ifndef __x86_64__
1330 extern void SetMMXstate();
1331
SSE2EMU_MOVDQ2Q_XMM_to_MM(x86MMXRegType to,x86SSERegType from)1332 void SSE2EMU_MOVDQ2Q_XMM_to_MM( x86MMXRegType to, x86SSERegType from)
1333 {
1334 SSE_MOVLPS_XMM_to_M64(p, from);
1335 MOVQMtoR(to, p);
1336 SetMMXstate();
1337 }
1338
SSE2EMU_MOVQ2DQ_MM_to_XMM(x86SSERegType to,x86MMXRegType from)1339 void SSE2EMU_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from)
1340 {
1341 MOVQRtoM(p, from);
1342 SSE_MOVLPS_M64_to_XMM(to, p);
1343 SetMMXstate();
1344 }
1345 #endif
1346
1347 /****************************************************************************/
1348 /* SSE2 Emulated functions for SSE CPU's by kekko */
1349 /****************************************************************************/
SSE2EMU_PSHUFD_XMM_to_XMM(x86SSERegType to,x86SSERegType from,u8 imm8)1350 void SSE2EMU_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) {
1351 MOV64ItoR(EAX, (uptr)&p);
1352 MOV64ItoR(EBX, (uptr)&p2);
1353 SSE_MOVUPSRtoRm(EAX, from);
1354
1355 MOV32ItoR(ECX, (u32)imm8);
1356 AND32ItoR(ECX, 3);
1357 SHL32ItoR(ECX, 2);
1358 ADD32RtoR(ECX, EAX);
1359 MOV32RmtoR(ECX, ECX);
1360 MOV32RtoRm(EBX, ECX);
1361
1362 ADD32ItoR(EBX, 4);
1363 MOV32ItoR(ECX, (u32)imm8);
1364 SHR32ItoR(ECX, 2);
1365 AND32ItoR(ECX, 3);
1366 SHL32ItoR(ECX, 2);
1367 ADD32RtoR(ECX, EAX);
1368 MOV32RmtoR(ECX, ECX);
1369 MOV32RtoRm(EBX, ECX);
1370
1371 ADD32ItoR(EBX, 4);
1372 MOV32ItoR(ECX, (u32)imm8);
1373 SHR32ItoR(ECX, 4);
1374 AND32ItoR(ECX, 3);
1375 SHL32ItoR(ECX, 2);
1376 ADD32RtoR(ECX, EAX);
1377 MOV32RmtoR(ECX, ECX);
1378 MOV32RtoRm(EBX, ECX);
1379
1380 ADD32ItoR(EBX, 4);
1381 MOV32ItoR(ECX, (u32)imm8);
1382 SHR32ItoR(ECX, 6);
1383 AND32ItoR(ECX, 3);
1384 SHL32ItoR(ECX, 2);
1385 ADD32RtoR(ECX, EAX);
1386 MOV32RmtoR(ECX, ECX);
1387 MOV32RtoRm(EBX, ECX);
1388
1389 SUB32ItoR(EBX, 12);
1390
1391 SSE_MOVUPSRmtoR(to, EBX);
1392 }
1393
SSE2EMU_MOVD_XMM_to_R(x86IntRegType to,x86SSERegType from)1394 void SSE2EMU_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from ) {
1395 /* XXX? */
1396 MOV64ItoR(to, (uptr)&p);
1397 SSE_MOVUPSRtoRm(to, from);
1398 MOV32RmtoR(to, to);
1399 }
1400
1401 #ifndef __x86_64__
1402 extern void SetFPUstate();
1403 extern void _freeMMXreg(int mmxreg);
1404 #endif
1405
SSE2EMU_CVTPS2DQ_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1406 void SSE2EMU_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) {
1407 #ifndef __x86_64__
1408 SetFPUstate();
1409 _freeMMXreg(7);
1410 #endif
1411 SSE_MOVAPS_XMM_to_M128((uptr)f, from);
1412
1413 FLD32((uptr)&f[0]);
1414 FISTP32((uptr)&p2[0]);
1415 FLD32((uptr)&f[1]);
1416 FISTP32((uptr)&p2[1]);
1417 FLD32((uptr)&f[2]);
1418 FISTP32((uptr)&p2[2]);
1419 FLD32((uptr)&f[3]);
1420 FISTP32((uptr)&p2[3]);
1421
1422 SSE_MOVAPS_M128_to_XMM(to, (uptr)p2);
1423 }
1424
SSE2EMU_CVTDQ2PS_M128_to_XMM(x86SSERegType to,uptr from)1425 void SSE2EMU_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from ) {
1426 #ifndef __x86_64__
1427 SetFPUstate();
1428 _freeMMXreg(7);
1429 #endif
1430 FILD32(from);
1431 FSTP32((uptr)&f[0]);
1432 FILD32(from+4);
1433 FSTP32((uptr)&f[1]);
1434 FILD32(from+8);
1435 FSTP32((uptr)&f[2]);
1436 FILD32(from+12);
1437 FSTP32((uptr)&f[3]);
1438
1439 SSE_MOVAPS_M128_to_XMM(to, (uptr)f);
1440 }
1441
SSE2EMU_MOVD_XMM_to_M32(uptr to,x86SSERegType from)1442 void SSE2EMU_MOVD_XMM_to_M32( uptr to, x86SSERegType from ) {
1443 /* XXX? */
1444 MOV64ItoR(EAX, (uptr)&p);
1445 SSE_MOVUPSRtoRm(EAX, from);
1446 MOV32RmtoR(EAX, EAX);
1447 MOV32RtoM(to, EAX);
1448 }
1449
SSE2EMU_MOVD_R_to_XMM(x86SSERegType to,x86IntRegType from)1450 void SSE2EMU_MOVD_R_to_XMM( x86SSERegType to, x86IntRegType from ) {
1451 MOV32ItoM((uptr)p+4, 0);
1452 MOV32ItoM((uptr)p+8, 0);
1453 MOV32RtoM((uptr)p, from);
1454 MOV32ItoM((uptr)p+12, 0);
1455 SSE_MOVAPS_M128_to_XMM(to, (uptr)p);
1456 }
1457
1458 #endif
1459
1460 #endif
1461