1 // stop compiling if NORECBUILD build (only for Visual Studio)
2 
3 #ifdef __x86_64__
4 
5 #if !(defined(_MSC_VER) && defined(PCSX2_NORECBUILD))
6 
7 #include <assert.h>
8 #include "ix86-64.h"
9 
10 PCSX2_ALIGNED16(static unsigned int p[4]);
11 PCSX2_ALIGNED16(static unsigned int p2[4]);
12 PCSX2_ALIGNED16(static float f[4]);
13 
14 
15 XMMSSEType g_xmmtypes[XMMREGS] = {0};
16 
17 /********************/
18 /* SSE instructions */
19 /********************/
20 
21 #define SSEMtoRv( nc, code, overb ) \
22 	assert( cpucaps.hasStreamingSIMDExtensions ); \
23 	assert( to < XMMREGS ) ; \
24 	MEMADDR_OP(0, nc, code, true, to, from, overb)
25 
26 #define SSEMtoR( code, overb ) SSEMtoRv(2, code, overb)
27 
28 #define SSERtoMv( nc, code, overb ) \
29 	assert( cpucaps.hasStreamingSIMDExtensions ); \
30 	assert( from < XMMREGS) ; \
31 	MEMADDR_OP(0, nc, code, true, from, to, overb)
32 
33 #define SSERtoM( code, overb ) SSERtoMv( 2, code, overb ) \
34 
35 #define SSE_SS_MtoR( code, overb ) \
36 	SSEMtoRv(3, (code << 8) | 0xF3, overb)
37 
38 #define SSE_SS_RtoM( code, overb ) \
39 	SSERtoMv(3, (code << 8) | 0xF3, overb)
40 
41 #define SSERtoR( code ) \
42 	assert( cpucaps.hasStreamingSIMDExtensions ); \
43 	assert( to < XMMREGS && from < XMMREGS) ; \
44     RexRB(0, to, from);            \
45 	write16( code ); \
46 	ModRM( 3, to, from );
47 
48 #define SSEMtoR66( code ) \
49 	SSEMtoRv( 3, (code << 8) | 0x66, 0 )
50 
51 #define SSERtoM66( code ) \
52 	SSERtoMv( 3, (code << 8) | 0x66, 0 )
53 
54 #define SSERtoR66( code ) \
55 	write8( 0x66 ); \
56 	SSERtoR( code );
57 
58 #define _SSERtoR66( code ) \
59 	assert( cpucaps.hasStreamingSIMDExtensions ); \
60 	assert( to < XMMREGS && from < XMMREGS) ; \
61 	write8( 0x66 ); \
62 	RexRB(0, from, to); \
63 	write16( code ); \
64 	ModRM( 3, from, to );
65 
66 #define SSE_SS_RtoR( code ) \
67 	assert( cpucaps.hasStreamingSIMDExtensions ); \
68 	assert( to < XMMREGS && from < XMMREGS) ; \
69 	write8( 0xf3 ); \
70     RexRB(0, to, from);              \
71 	write16( code ); \
72 	ModRM( 3, to, from );
73 
74 #define CMPPSMtoR( op ) \
75    SSEMtoR( 0xc20f, 1 ); \
76    write8( op );
77 
78 #define CMPPSRtoR( op ) \
79    SSERtoR( 0xc20f ); \
80    write8( op );
81 
82 #define CMPSSMtoR( op ) \
83    SSE_SS_MtoR( 0xc20f, 1 ); \
84    write8( op );
85 
86 #define CMPSSRtoR( op ) \
87    SSE_SS_RtoR( 0xc20f ); \
88    write8( op );
89 
90 
91 
92 void WriteRmOffset(x86IntRegType to, int offset);
93 void WriteRmOffsetFrom(x86IntRegType to, x86IntRegType from, int offset);
94 
95 /* movups [r32][r32*scale] to xmm1 */
SSE_MOVUPSRmStoR(x86SSERegType to,x86IntRegType from,x86IntRegType from2,int scale)96 void SSE_MOVUPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale )
97 {
98 	assert( cpucaps.hasStreamingSIMDExtensions );
99     RexRXB(0, to, from2, from);
100 	write16( 0x100f );
101 	ModRM( 0, to, 0x4 );
102 	SibSB( scale, from2, from );
103 }
104 
105 /* movups xmm1 to [r32][r32*scale] */
SSE_MOVUPSRtoRmS(x86SSERegType to,x86IntRegType from,x86IntRegType from2,int scale)106 void SSE_MOVUPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale )
107 {
108 	assert( cpucaps.hasStreamingSIMDExtensions );
109     RexRXB(1, to, from2, from);
110 	write16( 0x110f );
111 	ModRM( 0, to, 0x4 );
112 	SibSB( scale, from2, from );
113 }
114 
115 /* movups [r32] to r32 */
SSE_MOVUPSRmtoR(x86IntRegType to,x86IntRegType from)116 void SSE_MOVUPSRmtoR( x86IntRegType to, x86IntRegType from )
117 {
118 	assert( cpucaps.hasStreamingSIMDExtensions );
119 	RexRB(0, to, from);
120 	write16( 0x100f );
121 	ModRM( 0, to, from );
122 }
123 
124 /* movups r32 to [r32] */
SSE_MOVUPSRtoRm(x86IntRegType to,x86IntRegType from)125 void SSE_MOVUPSRtoRm( x86IntRegType to, x86IntRegType from )
126 {
127 	assert( cpucaps.hasStreamingSIMDExtensions );
128     RexRB(0, from, to);
129 	write16( 0x110f );
130 	ModRM( 0, from, to );
131 }
132 
133 /* movlps [r32] to r32 */
SSE_MOVLPSRmtoR(x86SSERegType to,x86IntRegType from)134 void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from )
135 {
136 	assert( cpucaps.hasStreamingSIMDExtensions );
137 	RexRB(1, to, from);
138 	write16( 0x120f );
139 	ModRM( 0, to, from );
140 }
141 
SSE_MOVLPSRmtoROffset(x86SSERegType to,x86IntRegType from,int offset)142 void SSE_MOVLPSRmtoROffset( x86SSERegType to, x86IntRegType from, int offset )
143 {
144 	assert( cpucaps.hasStreamingSIMDExtensions );
145     RexRB(0, to, from);
146 	write16( 0x120f );
147     WriteRmOffsetFrom(to, from, offset);
148 }
149 
150 /* movaps r32 to [r32] */
SSE_MOVLPSRtoRm(x86IntRegType to,x86IntRegType from)151 void SSE_MOVLPSRtoRm( x86IntRegType to, x86IntRegType from )
152 {
153 	assert( cpucaps.hasStreamingSIMDExtensions );
154     RexRB(0, from, to);
155 	write16( 0x130f );
156 	ModRM( 0, from, to );
157 }
158 
SSE_MOVLPSRtoRmOffset(x86SSERegType to,x86IntRegType from,int offset)159 void SSE_MOVLPSRtoRmOffset( x86SSERegType to, x86IntRegType from, int offset )
160 {
161 	assert( cpucaps.hasStreamingSIMDExtensions );
162     RexRB(0, from, to);
163 	write16( 0x130f );
164     WriteRmOffsetFrom(from, to, offset);
165 }
166 
167 /* movaps [r32][r32*scale] to xmm1 */
SSE_MOVAPSRmStoR(x86SSERegType to,x86IntRegType from,x86IntRegType from2,int scale)168 void SSE_MOVAPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale )
169 {
170 	assert( cpucaps.hasStreamingSIMDExtensions && from != EBP );
171     RexRXB(0, to, from2, from);
172 	write16( 0x280f );
173 	ModRM( 0, to, 0x4 );
174 	SibSB( scale, from2, from );
175 }
176 
177 /* movaps xmm1 to [r32][r32*scale] */
SSE_MOVAPSRtoRmS(x86SSERegType to,x86IntRegType from,x86IntRegType from2,int scale)178 void SSE_MOVAPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale )
179 {
180 	assert( cpucaps.hasStreamingSIMDExtensions && from != EBP );
181     RexRXB(0, to, from2, from);
182 	write16( 0x290f );
183 	ModRM( 0, to, 0x4 );
184 	SibSB( scale, from2, from );
185 }
186 
187 // movaps [r32+offset] to r32
SSE_MOVAPSRmtoROffset(x86SSERegType to,x86IntRegType from,int offset)188 void SSE_MOVAPSRmtoROffset( x86SSERegType to, x86IntRegType from, int offset )
189 {
190 	assert( cpucaps.hasStreamingSIMDExtensions );
191 	RexRB(0, to, from);
192 	write16( 0x280f );
193     WriteRmOffsetFrom(to, from, offset);
194 }
195 
196 // movaps r32 to [r32+offset]
SSE_MOVAPSRtoRmOffset(x86IntRegType to,x86SSERegType from,int offset)197 void SSE_MOVAPSRtoRmOffset( x86IntRegType to, x86SSERegType from, int offset )
198 {
199 	assert( cpucaps.hasStreamingSIMDExtensions );
200 	RexRB(0, from, to);
201 	write16( 0x290f );
202     WriteRmOffsetFrom(from, to, offset);
203 }
204 
205 // movdqa [r32+offset] to r32
SSE2_MOVDQARmtoROffset(x86SSERegType to,x86IntRegType from,int offset)206 void SSE2_MOVDQARmtoROffset( x86SSERegType to, x86IntRegType from, int offset )
207 {
208 	assert( cpucaps.hasStreamingSIMDExtensions );
209 	write8(0x66);
210     RexRB(0, to, from);
211 	write16( 0x6f0f );
212     WriteRmOffsetFrom(to, from, offset);
213 }
214 
215 // movdqa r32 to [r32+offset]
SSE2_MOVDQARtoRmOffset(x86IntRegType to,x86SSERegType from,int offset)216 void SSE2_MOVDQARtoRmOffset( x86IntRegType to, x86SSERegType from, int offset )
217 {
218 	assert( cpucaps.hasStreamingSIMDExtensions );
219 	write8(0x66);
220     RexRB(0, from, to);
221 	write16( 0x7f0f );
222     WriteRmOffsetFrom(from, to, offset);
223 }
224 
225 // movups [r32+offset] to r32
SSE_MOVUPSRmtoROffset(x86SSERegType to,x86IntRegType from,int offset)226 void SSE_MOVUPSRmtoROffset( x86SSERegType to, x86IntRegType from, int offset )
227 {
228 	RexRB(0, to, from);
229 	write16( 0x100f );
230     WriteRmOffsetFrom(to, from, offset);
231 }
232 
233 // movups r32 to [r32+offset]
SSE_MOVUPSRtoRmOffset(x86SSERegType to,x86IntRegType from,int offset)234 void SSE_MOVUPSRtoRmOffset( x86SSERegType to, x86IntRegType from, int offset )
235 {
236 	assert( cpucaps.hasStreamingSIMDExtensions );
237     RexRB(0, from, to);
238 	write16( 0x110f );
239     WriteRmOffsetFrom(from, to, offset);
240 }
241 
242 //**********************************************************************************/
243 //MOVAPS: Move aligned Packed Single Precision FP values                           *
244 //**********************************************************************************
SSE_MOVAPS_M128_to_XMM(x86SSERegType to,uptr from)245 void SSE_MOVAPS_M128_to_XMM( x86SSERegType to, uptr from )          { SSEMtoR( 0x280f, 0 ); }
SSE_MOVAPS_XMM_to_M128(uptr to,x86SSERegType from)246 void SSE_MOVAPS_XMM_to_M128( uptr to, x86SSERegType from )          { SSERtoM( 0x290f, 0 ); }
SSE_MOVAPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)247 void SSE_MOVAPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from )     { SSERtoR( 0x280f ); }
248 
SSE_MOVUPS_M128_to_XMM(x86SSERegType to,uptr from)249 void SSE_MOVUPS_M128_to_XMM( x86SSERegType to, uptr from )          { SSEMtoR( 0x100f, 0 ); }
SSE_MOVUPS_XMM_to_M128(uptr to,x86SSERegType from)250 void SSE_MOVUPS_XMM_to_M128( uptr to, x86SSERegType from )          { SSERtoM( 0x110f, 0 ); }
251 
SSE2_MOVSD_XMM_to_XMM(x86SSERegType to,x86SSERegType from)252 void SSE2_MOVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
253 {
254 	if( !cpucaps.hasStreamingSIMD2Extensions ) SSE2EMU_MOVSD_XMM_to_XMM(to, from);
255 	else {
256 		write8(0xf2);
257 		SSERtoR( 0x100f);
258 	}
259 }
260 
SSE2_MOVQ_M64_to_XMM(x86SSERegType to,uptr from)261 void SSE2_MOVQ_M64_to_XMM( x86SSERegType to, uptr from )
262 {
263 	if( !cpucaps.hasStreamingSIMD2Extensions ) SSE2EMU_MOVQ_M64_to_XMM(to, from);
264 	else {
265 		SSE_SS_MtoR( 0x7e0f, 0);
266 	}
267 }
268 
SSE2_MOVQ_XMM_to_XMM(x86SSERegType to,x86SSERegType from)269 void SSE2_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
270 {
271 	if( !cpucaps.hasStreamingSIMD2Extensions ) SSE2EMU_MOVQ_XMM_to_XMM(to, from);
272 	else {
273 		SSE_SS_RtoR( 0x7e0f);
274 	}
275 }
276 
SSE2_MOVQ_XMM_to_M64(u32 to,x86SSERegType from)277 void SSE2_MOVQ_XMM_to_M64( u32 to, x86SSERegType from )
278 {
279 	if( !cpucaps.hasStreamingSIMD2Extensions ) SSE_MOVLPS_XMM_to_M64(to, from);
280 	else {
281 		SSERtoM66(0xd60f);
282 	}
283 }
284 
285 #ifndef __x86_64__
SSE2_MOVDQ2Q_XMM_to_MM(x86MMXRegType to,x86SSERegType from)286 void SSE2_MOVDQ2Q_XMM_to_MM( x86MMXRegType to, x86SSERegType from)
287 {
288 	if( !cpucaps.hasStreamingSIMD2Extensions ) SSE2EMU_MOVDQ2Q_XMM_to_MM(to, from);
289 	else {
290 		write8(0xf2);
291 		SSERtoR( 0xd60f);
292 	}
293 }
SSE2_MOVQ2DQ_MM_to_XMM(x86SSERegType to,x86MMXRegType from)294 void SSE2_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from)
295 {
296 	if( !cpucaps.hasStreamingSIMD2Extensions ) SSE2EMU_MOVQ2DQ_MM_to_XMM(to, from);
297 	else {
298 		SSE_SS_RtoR( 0xd60f);
299 	}
300 }
301 #endif
302 
303 //**********************************************************************************/
304 //MOVSS: Move Scalar Single-Precision FP  value                                    *
305 //**********************************************************************************
SSE_MOVSS_M32_to_XMM(x86SSERegType to,uptr from)306 void SSE_MOVSS_M32_to_XMM( x86SSERegType to, uptr from )           { SSE_SS_MtoR( 0x100f, 0 ); }
SSE_MOVSS_XMM_to_M32(u32 to,x86SSERegType from)307 void SSE_MOVSS_XMM_to_M32( u32 to, x86SSERegType from )           { SSE_SS_RtoM( 0x110f, 0 ); }
SSE_MOVSS_XMM_to_Rm(x86IntRegType to,x86SSERegType from)308 void SSE_MOVSS_XMM_to_Rm( x86IntRegType to, x86SSERegType from )
309 {
310 	write8(0xf3);
311     RexRB(0, from, to);
312     write16(0x110f);
313 	ModRM(0, from, to);
314 }
315 
SSE_MOVSS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)316 void SSE_MOVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from )      { SSE_SS_RtoR( 0x100f ); }
317 
SSE_MOVSS_RmOffset_to_XMM(x86SSERegType to,x86IntRegType from,int offset)318 void SSE_MOVSS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset )
319 {
320 	write8(0xf3);
321     RexRB(0, to, from);
322     write16( 0x100f );
323     WriteRmOffsetFrom(to, from, offset);
324 }
325 
SSE_MOVSS_XMM_to_RmOffset(x86IntRegType to,x86SSERegType from,int offset)326 void SSE_MOVSS_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset )
327 {
328 	write8(0xf3);
329     RexRB(0, from, to);
330     write16(0x110f);
331     WriteRmOffsetFrom(from, to, offset);
332 }
333 
SSE_MASKMOVDQU_XMM_to_XMM(x86SSERegType to,x86SSERegType from)334 void SSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from )      { SSERtoR66( 0xf70f ); }
335 //**********************************************************************************/
336 //MOVLPS: Move low Packed Single-Precision FP                                     *
337 //**********************************************************************************
SSE_MOVLPS_M64_to_XMM(x86SSERegType to,uptr from)338 void SSE_MOVLPS_M64_to_XMM( x86SSERegType to, uptr from )          { SSEMtoR( 0x120f, 0 ); }
SSE_MOVLPS_XMM_to_M64(u32 to,x86SSERegType from)339 void SSE_MOVLPS_XMM_to_M64( u32 to, x86SSERegType from )          { SSERtoM( 0x130f, 0 ); }
340 
SSE_MOVLPS_RmOffset_to_XMM(x86SSERegType to,x86IntRegType from,int offset)341 void SSE_MOVLPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset )
342 {
343 	assert( cpucaps.hasStreamingSIMDExtensions );
344     RexRB(0, to, from);
345 	write16( 0x120f );
346     WriteRmOffsetFrom(to, from, offset);
347 }
348 
SSE_MOVLPS_XMM_to_RmOffset(x86IntRegType to,x86SSERegType from,int offset)349 void SSE_MOVLPS_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset )
350 {
351     RexRB(0, from, to);
352 	write16(0x130f);
353     WriteRmOffsetFrom(from, to, offset);
354 }
355 
356 /////////////////////////////////////////////////////////////////////////////////////
357 //**********************************************************************************/
358 //MOVHPS: Move High Packed Single-Precision FP                                     *
359 //**********************************************************************************
SSE_MOVHPS_M64_to_XMM(x86SSERegType to,uptr from)360 void SSE_MOVHPS_M64_to_XMM( x86SSERegType to, uptr from )          { SSEMtoR( 0x160f, 0 ); }
SSE_MOVHPS_XMM_to_M64(u32 to,x86SSERegType from)361 void SSE_MOVHPS_XMM_to_M64( u32 to, x86SSERegType from )          { SSERtoM( 0x170f, 0 ); }
362 
SSE_MOVHPS_RmOffset_to_XMM(x86SSERegType to,x86IntRegType from,int offset)363 void SSE_MOVHPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset )
364 {
365 	assert( cpucaps.hasStreamingSIMDExtensions );
366     RexRB(0, to, from);
367 	write16( 0x160f );
368     WriteRmOffsetFrom(to, from, offset);
369 }
370 
SSE_MOVHPS_XMM_to_RmOffset(x86IntRegType to,x86SSERegType from,int offset)371 void SSE_MOVHPS_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset )
372 {
373 	assert( cpucaps.hasStreamingSIMDExtensions );
374     RexRB(0, from, to);
375 	write16(0x170f);
376     WriteRmOffsetFrom(from, to, offset);
377 }
378 
379 /////////////////////////////////////////////////////////////////////////////////////
380 //**********************************************************************************/
381 //MOVLHPS: Moved packed Single-Precision FP low to high                            *
382 //**********************************************************************************
SSE_MOVLHPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)383 void SSE_MOVLHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from )    { SSERtoR( 0x160f ); }
384 
385 //////////////////////////////////////////////////////////////////////////////////////
386 //**********************************************************************************/
387 //MOVHLPS: Moved packed Single-Precision FP High to Low                            *
388 //**********************************************************************************
SSE_MOVHLPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)389 void SSE_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from )    { SSERtoR( 0x120f ); }
390 
391 ///////////////////////////////////////////////////////////////////////////////////
392 //**********************************************************************************/
393 //ANDPS: Logical Bit-wise  AND for Single FP                                        *
394 //**********************************************************************************
SSE_ANDPS_M128_to_XMM(x86SSERegType to,uptr from)395 void SSE_ANDPS_M128_to_XMM( x86SSERegType to, uptr from )           { SSEMtoR( 0x540f, 0 ); }
SSE_ANDPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)396 void SSE_ANDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x540f ); }
397 
398 ///////////////////////////////////////////////////////////////////////////////////////
399 //**********************************************************************************/
400 //ANDNPS : Logical Bit-wise  AND NOT of Single-precision FP values                 *
401 //**********************************************************************************
SSE_ANDNPS_M128_to_XMM(x86SSERegType to,uptr from)402 void SSE_ANDNPS_M128_to_XMM( x86SSERegType to, uptr from )          { SSEMtoR( 0x550f, 0 ); }
SSE_ANDNPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)403 void SSE_ANDNPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR( 0x550f ); }
404 
405 /////////////////////////////////////////////////////////////////////////////////////
406 //**********************************************************************************/
407 //RCPPS : Packed Single-Precision FP Reciprocal                                     *
408 //**********************************************************************************
SSE_RCPPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)409 void SSE_RCPPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x530f ); }
SSE_RCPPS_M128_to_XMM(x86SSERegType to,uptr from)410 void SSE_RCPPS_M128_to_XMM( x86SSERegType to, uptr from )           { SSEMtoR( 0x530f, 0 ); }
411 
SSE_RCPSS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)412 void SSE_RCPSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR(0x530f); }
SSE_RCPSS_M32_to_XMM(x86SSERegType to,uptr from)413 void SSE_RCPSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR(0x530f, 0); }
414 
415 //////////////////////////////////////////////////////////////////////////////////////
416 //**********************************************************************************/
417 //ORPS : Bit-wise Logical OR of Single-Precision FP Data                            *
418 //**********************************************************************************
SSE_ORPS_M128_to_XMM(x86SSERegType to,uptr from)419 void SSE_ORPS_M128_to_XMM( x86SSERegType to, uptr from )            { SSEMtoR( 0x560f, 0 ); }
SSE_ORPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)420 void SSE_ORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from )  { SSERtoR( 0x560f ); }
421 
422 /////////////////////////////////////////////////////////////////////////////////////
423 //**********************************************************************************/
424 //XORPS : Bitwise Logical XOR of Single-Precision FP Values                        *
425 //**********************************************************************************
SSE_XORPS_M128_to_XMM(x86SSERegType to,uptr from)426 void SSE_XORPS_M128_to_XMM( x86SSERegType to, uptr from )           { SSEMtoR( 0x570f, 0 ); }
SSE_XORPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)427 void SSE_XORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x570f ); }
428 
429 ///////////////////////////////////////////////////////////////////////////////////////
430 //**********************************************************************************/
431 //ADDPS : ADD Packed Single-Precision FP Values                                    *
432 //**********************************************************************************
SSE_ADDPS_M128_to_XMM(x86SSERegType to,uptr from)433 void SSE_ADDPS_M128_to_XMM( x86SSERegType to, uptr from )           { SSEMtoR( 0x580f, 0 ); }
SSE_ADDPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)434 void SSE_ADDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x580f ); }
435 
436 ////////////////////////////////////////////////////////////////////////////////////
437 //**********************************************************************************/
438 //ADDSS : ADD Scalar Single-Precision FP Values                                    *
439 //**********************************************************************************
SSE_ADDSS_M32_to_XMM(x86SSERegType to,uptr from)440 void SSE_ADDSS_M32_to_XMM( x86SSERegType to, uptr from )           { SSE_SS_MtoR( 0x580f, 0 ); }
SSE_ADDSS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)441 void SSE_ADDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x580f ); }
442 
443 /////////////////////////////////////////////////////////////////////////////////////////
444 //**********************************************************************************/
445 //SUBPS: Packed Single-Precision FP Subtract                                       *
446 //**********************************************************************************
SSE_SUBPS_M128_to_XMM(x86SSERegType to,uptr from)447 void SSE_SUBPS_M128_to_XMM( x86SSERegType to, uptr from )           { SSEMtoR( 0x5c0f, 0 ); }
SSE_SUBPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)448 void SSE_SUBPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5c0f ); }
449 
450 ///////////////////////////////////////////////////////////////////////////////////////
451 //**********************************************************************************/
452 //SUBSS : Scalar  Single-Precision FP Subtract                                       *
453 //**********************************************************************************
SSE_SUBSS_M32_to_XMM(x86SSERegType to,uptr from)454 void SSE_SUBSS_M32_to_XMM( x86SSERegType to, uptr from )           { SSE_SS_MtoR( 0x5c0f, 0 ); }
SSE_SUBSS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)455 void SSE_SUBSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5c0f ); }
456 
457 /////////////////////////////////////////////////////////////////////////////////////////
458 //**********************************************************************************/
459 //MULPS : Packed Single-Precision FP Multiply                                      *
460 //**********************************************************************************
SSE_MULPS_M128_to_XMM(x86SSERegType to,uptr from)461 void SSE_MULPS_M128_to_XMM( x86SSERegType to, uptr from )           { SSEMtoR( 0x590f, 0 ); }
SSE_MULPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)462 void SSE_MULPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x590f ); }
463 
464 ////////////////////////////////////////////////////////////////////////////////////////
465 //**********************************************************************************/
466 //MULSS : Scalar  Single-Precision FP Multiply                                       *
467 //**********************************************************************************
SSE_MULSS_M32_to_XMM(x86SSERegType to,uptr from)468 void SSE_MULSS_M32_to_XMM( x86SSERegType to, uptr from )           { SSE_SS_MtoR( 0x590f, 0 ); }
SSE_MULSS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)469 void SSE_MULSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x590f ); }
470 
471 ////////////////////////////////////////////////////////////////////////////////////////////
472 //**********************************************************************************/
473 //Packed Single-Precission FP compare (CMPccPS)                                    *
474 //**********************************************************************************
475 //missing  SSE_CMPPS_I8_to_XMM
476 //         SSE_CMPPS_M32_to_XMM
477 //	       SSE_CMPPS_XMM_to_XMM
SSE_CMPEQPS_M128_to_XMM(x86SSERegType to,uptr from)478 void SSE_CMPEQPS_M128_to_XMM( x86SSERegType to, uptr from )         { CMPPSMtoR( 0 ); }
SSE_CMPEQPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)479 void SSE_CMPEQPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from )    { CMPPSRtoR( 0 ); }
SSE_CMPLTPS_M128_to_XMM(x86SSERegType to,uptr from)480 void SSE_CMPLTPS_M128_to_XMM( x86SSERegType to, uptr from )         { CMPPSMtoR( 1 ); }
SSE_CMPLTPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)481 void SSE_CMPLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from )    { CMPPSRtoR( 1 ); }
SSE_CMPLEPS_M128_to_XMM(x86SSERegType to,uptr from)482 void SSE_CMPLEPS_M128_to_XMM( x86SSERegType to, uptr from )         { CMPPSMtoR( 2 ); }
SSE_CMPLEPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)483 void SSE_CMPLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from )    { CMPPSRtoR( 2 ); }
SSE_CMPUNORDPS_M128_to_XMM(x86SSERegType to,uptr from)484 void SSE_CMPUNORDPS_M128_to_XMM( x86SSERegType to, uptr from )      { CMPPSMtoR( 3 ); }
SSE_CMPUNORDPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)485 void SSE_CMPUNORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 3 ); }
SSE_CMPNEPS_M128_to_XMM(x86SSERegType to,uptr from)486 void SSE_CMPNEPS_M128_to_XMM( x86SSERegType to, uptr from )         { CMPPSMtoR( 4 ); }
SSE_CMPNEPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)487 void SSE_CMPNEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from )    { CMPPSRtoR( 4 ); }
SSE_CMPNLTPS_M128_to_XMM(x86SSERegType to,uptr from)488 void SSE_CMPNLTPS_M128_to_XMM( x86SSERegType to, uptr from )        { CMPPSMtoR( 5 ); }
SSE_CMPNLTPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)489 void SSE_CMPNLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from )   { CMPPSRtoR( 5 ); }
SSE_CMPNLEPS_M128_to_XMM(x86SSERegType to,uptr from)490 void SSE_CMPNLEPS_M128_to_XMM( x86SSERegType to, uptr from )        { CMPPSMtoR( 6 ); }
SSE_CMPNLEPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)491 void SSE_CMPNLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from )   { CMPPSRtoR( 6 ); }
SSE_CMPORDPS_M128_to_XMM(x86SSERegType to,uptr from)492 void SSE_CMPORDPS_M128_to_XMM( x86SSERegType to, uptr from )        { CMPPSMtoR( 7 ); }
SSE_CMPORDPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)493 void SSE_CMPORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from )   { CMPPSRtoR( 7 ); }
494 
495 ///////////////////////////////////////////////////////////////////////////////////////////
496 //**********************************************************************************/
497 //Scalar Single-Precission FP compare (CMPccSS)                                    *
498 //**********************************************************************************
499 //missing  SSE_CMPSS_I8_to_XMM
500 //         SSE_CMPSS_M32_to_XMM
501 //	       SSE_CMPSS_XMM_to_XMM
SSE_CMPEQSS_M32_to_XMM(x86SSERegType to,uptr from)502 void SSE_CMPEQSS_M32_to_XMM( x86SSERegType to, uptr from )         { CMPSSMtoR( 0 ); }
SSE_CMPEQSS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)503 void SSE_CMPEQSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from )    { CMPSSRtoR( 0 ); }
SSE_CMPLTSS_M32_to_XMM(x86SSERegType to,uptr from)504 void SSE_CMPLTSS_M32_to_XMM( x86SSERegType to, uptr from )         { CMPSSMtoR( 1 ); }
SSE_CMPLTSS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)505 void SSE_CMPLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from )    { CMPSSRtoR( 1 ); }
SSE_CMPLESS_M32_to_XMM(x86SSERegType to,uptr from)506 void SSE_CMPLESS_M32_to_XMM( x86SSERegType to, uptr from )         { CMPSSMtoR( 2 ); }
SSE_CMPLESS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)507 void SSE_CMPLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from )    { CMPSSRtoR( 2 ); }
SSE_CMPUNORDSS_M32_to_XMM(x86SSERegType to,uptr from)508 void SSE_CMPUNORDSS_M32_to_XMM( x86SSERegType to, uptr from )      { CMPSSMtoR( 3 ); }
SSE_CMPUNORDSS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)509 void SSE_CMPUNORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 3 ); }
SSE_CMPNESS_M32_to_XMM(x86SSERegType to,uptr from)510 void SSE_CMPNESS_M32_to_XMM( x86SSERegType to, uptr from )         { CMPSSMtoR( 4 ); }
SSE_CMPNESS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)511 void SSE_CMPNESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from )    { CMPSSRtoR( 4 ); }
SSE_CMPNLTSS_M32_to_XMM(x86SSERegType to,uptr from)512 void SSE_CMPNLTSS_M32_to_XMM( x86SSERegType to, uptr from )        { CMPSSMtoR( 5 ); }
SSE_CMPNLTSS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)513 void SSE_CMPNLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from )   { CMPSSRtoR( 5 ); }
SSE_CMPNLESS_M32_to_XMM(x86SSERegType to,uptr from)514 void SSE_CMPNLESS_M32_to_XMM( x86SSERegType to, uptr from )        { CMPSSMtoR( 6 ); }
SSE_CMPNLESS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)515 void SSE_CMPNLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from )   { CMPSSRtoR( 6 ); }
SSE_CMPORDSS_M32_to_XMM(x86SSERegType to,uptr from)516 void SSE_CMPORDSS_M32_to_XMM( x86SSERegType to, uptr from )        { CMPSSMtoR( 7 ); }
SSE_CMPORDSS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)517 void SSE_CMPORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from )   { CMPSSRtoR( 7 ); }
518 
SSE_UCOMISS_M32_to_XMM(x86SSERegType to,uptr from)519 void SSE_UCOMISS_M32_to_XMM( x86SSERegType to, uptr from )
520 {
521 	MEMADDR_OP(0, VAROP2(0x0F, 0x2E), true, to, from, 0);
522 }
523 
SSE_UCOMISS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)524 void SSE_UCOMISS_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
525 {
526     RexRB(0, to, from);
527 	write16( 0x2e0f );
528 	ModRM( 3, to, from );
529 }
530 
531 //////////////////////////////////////////////////////////////////////////////////////////
532 //**********************************************************************************/
533 //RSQRTPS : Packed Single-Precision FP Square Root Reciprocal                      *
534 //**********************************************************************************
SSE_RSQRTPS_M128_to_XMM(x86SSERegType to,uptr from)535 void SSE_RSQRTPS_M128_to_XMM( x86SSERegType to, uptr from )          { SSEMtoR( 0x520f, 0 ); }
SSE_RSQRTPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)536 void SSE_RSQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR( 0x520f ); }
537 
538 /////////////////////////////////////////////////////////////////////////////////////
539 //**********************************************************************************/
540 //RSQRTSS : Scalar Single-Precision FP Square Root Reciprocal                      *
541 //**********************************************************************************
SSE_RSQRTSS_M32_to_XMM(x86SSERegType to,uptr from)542 void SSE_RSQRTSS_M32_to_XMM( x86SSERegType to, uptr from )          { SSE_SS_MtoR( 0x520f, 0 ); }
SSE_RSQRTSS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)543 void SSE_RSQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSE_SS_RtoR( 0x520f ); }
544 
545 ////////////////////////////////////////////////////////////////////////////////////
546 //**********************************************************************************/
547 //SQRTPS : Packed Single-Precision FP Square Root                                  *
548 //**********************************************************************************
SSE_SQRTPS_M128_to_XMM(x86SSERegType to,uptr from)549 void SSE_SQRTPS_M128_to_XMM( x86SSERegType to, uptr from )          { SSEMtoR( 0x510f, 0 ); }
SSE_SQRTPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)550 void SSE_SQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR( 0x510f ); }
551 
552 //////////////////////////////////////////////////////////////////////////////////////
553 //**********************************************************************************/
554 //SQRTSS : Scalar Single-Precision FP Square Root                                  *
555 //**********************************************************************************
SSE_SQRTSS_M32_to_XMM(x86SSERegType to,uptr from)556 void SSE_SQRTSS_M32_to_XMM( x86SSERegType to, uptr from )          { SSE_SS_MtoR( 0x510f, 0 ); }
SSE_SQRTSS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)557 void SSE_SQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSE_SS_RtoR( 0x510f ); }
558 
559 ////////////////////////////////////////////////////////////////////////////////////////
560 //**********************************************************************************/
561 //MAXPS: Return Packed Single-Precision FP Maximum                                 *
562 //**********************************************************************************
SSE_MAXPS_M128_to_XMM(x86SSERegType to,uptr from)563 void SSE_MAXPS_M128_to_XMM( x86SSERegType to, uptr from )           { SSEMtoR( 0x5f0f, 0 ); }
SSE_MAXPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)564 void SSE_MAXPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5f0f ); }
565 
566 /////////////////////////////////////////////////////////////////////////////////////////
567 //**********************************************************************************/
568 //MAXSS: Return Scalar Single-Precision FP Maximum                                 *
569 //**********************************************************************************
SSE_MAXSS_M32_to_XMM(x86SSERegType to,uptr from)570 void SSE_MAXSS_M32_to_XMM( x86SSERegType to, uptr from )           { SSE_SS_MtoR( 0x5f0f, 0 ); }
SSE_MAXSS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)571 void SSE_MAXSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5f0f ); }
572 
573 #ifndef __x86_64__
574 /////////////////////////////////////////////////////////////////////////////////////////
575 //**********************************************************************************/
576 //CVTPI2PS: Packed Signed INT32 to Packed Single  FP Conversion                    *
577 //**********************************************************************************
SSE_CVTPI2PS_M64_to_XMM(x86SSERegType to,uptr from)578 void SSE_CVTPI2PS_M64_to_XMM( x86SSERegType to, uptr from )        { SSEMtoR( 0x2a0f, 0 ); }
SSE_CVTPI2PS_MM_to_XMM(x86SSERegType to,x86MMXRegType from)579 void SSE_CVTPI2PS_MM_to_XMM( x86SSERegType to, x86MMXRegType from )   { SSERtoR( 0x2a0f ); }
580 
581 ///////////////////////////////////////////////////////////////////////////////////////////
582 //**********************************************************************************/
583 //CVTPS2PI: Packed Single FP to Packed Signed INT32 Conversion                      *
584 //**********************************************************************************
SSE_CVTPS2PI_M64_to_MM(x86MMXRegType to,uptr from)585 void SSE_CVTPS2PI_M64_to_MM( x86MMXRegType to, uptr from )        { SSEMtoR( 0x2d0f, 0 ); }
SSE_CVTPS2PI_XMM_to_MM(x86MMXRegType to,x86SSERegType from)586 void SSE_CVTPS2PI_XMM_to_MM( x86MMXRegType to, x86SSERegType from )   { SSERtoR( 0x2d0f ); }
587 #endif
588 
SSE_CVTTSS2SI_M32_to_R32(x86IntRegType to,uptr from)589 void SSE_CVTTSS2SI_M32_to_R32(x86IntRegType to, uptr from) { SSE_SS_MtoR(0x2c0f, 0); }
SSE_CVTTSS2SI_XMM_to_R32(x86IntRegType to,x86SSERegType from)590 void SSE_CVTTSS2SI_XMM_to_R32(x86IntRegType to, x86SSERegType from)
591 {
592 	write8(0xf3);
593     RexRB(0, to, from);
594 	write16(0x2c0f);
595 	ModRM(3, to, from);
596 }
597 
SSE_CVTSI2SS_M32_to_XMM(x86SSERegType to,uptr from)598 void SSE_CVTSI2SS_M32_to_XMM(x86SSERegType to, uptr from) { SSE_SS_MtoR(0x2a0f, 0); }
SSE_CVTSI2SS_R_to_XMM(x86SSERegType to,x86IntRegType from)599 void SSE_CVTSI2SS_R_to_XMM(x86SSERegType to, x86IntRegType from)
600 {
601 	write8(0xf3);
602     RexRB(0, to, from);
603 	write16(0x2a0f);
604 	ModRM(3, to, from);
605 }
606 
607 ///////////////////////////////////////////////////////////////////////////////////////////
608 //**********************************************************************************/
609 //CVTDQ2PS: Packed Signed INT32  to Packed Single Precision FP  Conversion         *
610 //**********************************************************************************
SSE2_CVTDQ2PS_M128_to_XMM(x86SSERegType to,uptr from)611 void SSE2_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from )        { SSEMtoR( 0x5b0f, 0 ); }
SSE2_CVTDQ2PS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)612 void SSE2_CVTDQ2PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from )   { SSERtoR( 0x5b0f ); }
613 
614 //**********************************************************************************/
615 //CVTPS2DQ: Packed Single Precision FP to Packed Signed INT32 Conversion           *
616 //**********************************************************************************
SSE2_CVTPS2DQ_M128_to_XMM(x86SSERegType to,uptr from)617 void SSE2_CVTPS2DQ_M128_to_XMM( x86SSERegType to, uptr from )        { SSEMtoR66( 0x5b0f ); }
SSE2_CVTPS2DQ_XMM_to_XMM(x86SSERegType to,x86SSERegType from)618 void SSE2_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from )   { SSERtoR66( 0x5b0f ); }
619 
SSE2_CVTTPS2DQ_XMM_to_XMM(x86SSERegType to,x86SSERegType from)620 void SSE2_CVTTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR(0x5b0f); }
621 /////////////////////////////////////////////////////////////////////////////////////
622 //**********************************************************************************/
623 //MINPS: Return Packed Single-Precision FP Minimum                                 *
624 //**********************************************************************************
SSE_MINPS_M128_to_XMM(x86SSERegType to,uptr from)625 void SSE_MINPS_M128_to_XMM( x86SSERegType to, uptr from )           { SSEMtoR( 0x5d0f, 0 ); }
SSE_MINPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)626 void SSE_MINPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5d0f ); }
627 
628 //////////////////////////////////////////////////////////////////////////////////////////
629 //**********************************************************************************/
630 //MINSS: Return Scalar Single-Precision FP Minimum                                 *
631 //**********************************************************************************
SSE_MINSS_M32_to_XMM(x86SSERegType to,uptr from)632 void SSE_MINSS_M32_to_XMM( x86SSERegType to, uptr from )           { SSE_SS_MtoR( 0x5d0f, 0 ); }
SSE_MINSS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)633 void SSE_MINSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5d0f ); }
634 
635 #ifndef __x86_64__
636 ///////////////////////////////////////////////////////////////////////////////////////////
637 //**********************************************************************************/
638 //PMAXSW: Packed Signed Integer Word Maximum                                        *
639 //**********************************************************************************
640 //missing
641  //     SSE_PMAXSW_M64_to_MM
642 //		SSE2_PMAXSW_M128_to_XMM
643 //		SSE2_PMAXSW_XMM_to_XMM
SSE_PMAXSW_MM_to_MM(x86MMXRegType to,x86MMXRegType from)644 void SSE_PMAXSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSERtoR( 0xEE0F ); }
645 
646 ///////////////////////////////////////////////////////////////////////////////////////
647 //**********************************************************************************/
648 //PMINSW: Packed Signed Integer Word Minimum                                        *
649 //**********************************************************************************
650 //missing
651  //     SSE_PMINSW_M64_to_MM
652 //		SSE2_PMINSW_M128_to_XMM
653 //		SSE2_PMINSW_XMM_to_XMM
SSE_PMINSW_MM_to_MM(x86MMXRegType to,x86MMXRegType from)654 void SSE_PMINSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSERtoR( 0xEA0F ); }
655 #endif
656 
657 //////////////////////////////////////////////////////////////////////////////////////
658 //**********************************************************************************/
659 //SHUFPS: Shuffle Packed Single-Precision FP Values                                *
660 //**********************************************************************************
SSE_SHUFPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from,u8 imm8)661 void SSE_SHUFPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 )	{ SSERtoR( 0xC60F ); write8( imm8 ); }
SSE_SHUFPS_M128_to_XMM(x86SSERegType to,uptr from,u8 imm8)662 void SSE_SHUFPS_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 )		{ SSEMtoR( 0xC60F, 1 ); write8( imm8 ); }
663 
SSE_SHUFPS_RmOffset_to_XMM(x86SSERegType to,x86IntRegType from,int offset,u8 imm8)664 void SSE_SHUFPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset, u8 imm8 )
665 {
666     RexRB(0, to, from);
667 	write16(0xc60f);
668     WriteRmOffsetFrom(to, from, offset);
669 	write8(imm8);
670 }
671 
672 ////////////////////////////////////////////////////////////////////////////////////
673 //**********************************************************************************/
674 //PSHUFD: Shuffle Packed DoubleWords                                               *
675 //**********************************************************************************
SSE2_PSHUFD_XMM_to_XMM(x86SSERegType to,x86SSERegType from,u8 imm8)676 void SSE2_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 )
677 {
678 	if( !cpucaps.hasStreamingSIMD2Extensions ) {
679 		SSE2EMU_PSHUFD_XMM_to_XMM(to, from, imm8);
680 	}
681 	else {
682 		SSERtoR66( 0x700F );
683 		write8( imm8 );
684 	}
685 }
SSE2_PSHUFD_M128_to_XMM(x86SSERegType to,uptr from,u8 imm8)686 void SSE2_PSHUFD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 )	{ SSEMtoRv( 3, 0x700F66, 1 ); write8( imm8 ); }
687 
SSE2_PSHUFLW_XMM_to_XMM(x86SSERegType to,x86SSERegType from,u8 imm8)688 void SSE2_PSHUFLW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { write8(0xF2); SSERtoR(0x700F); write8(imm8); }
SSE2_PSHUFLW_M128_to_XMM(x86SSERegType to,uptr from,u8 imm8)689 void SSE2_PSHUFLW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoRv(3, 0x700FF2, 1); write8(imm8); }
SSE2_PSHUFHW_XMM_to_XMM(x86SSERegType to,x86SSERegType from,u8 imm8)690 void SSE2_PSHUFHW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { SSE_SS_RtoR(0x700F); write8(imm8); }
SSE2_PSHUFHW_M128_to_XMM(x86SSERegType to,uptr from,u8 imm8)691 void SSE2_PSHUFHW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSE_SS_MtoR(0x700F, 1); write8(imm8); }
692 
693 ///////////////////////////////////////////////////////////////////////////////////
694 //**********************************************************************************/
695 //UNPCKLPS: Unpack and Interleave low Packed Single-Precision FP Data              *
696 //**********************************************************************************
SSE_UNPCKLPS_M128_to_XMM(x86SSERegType to,uptr from)697 void SSE_UNPCKLPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR(0x140f, 0); }
SSE_UNPCKLPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)698 void SSE_UNPCKLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from )	{ SSERtoR( 0x140F ); }
699 
700 ////////////////////////////////////////////////////////////////////////////////////////
701 //**********************************************************************************/
702 //UNPCKHPS: Unpack and Interleave High Packed Single-Precision FP Data              *
703 //**********************************************************************************
SSE_UNPCKHPS_M128_to_XMM(x86SSERegType to,uptr from)704 void SSE_UNPCKHPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR(0x150f, 0); }
SSE_UNPCKHPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)705 void SSE_UNPCKHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from )	{ SSERtoR( 0x150F ); }
706 
707 ////////////////////////////////////////////////////////////////////////////////////////
708 //**********************************************************************************/
709 //DIVPS : Packed Single-Precision FP Divide                                       *
710 //**********************************************************************************
SSE_DIVPS_M128_to_XMM(x86SSERegType to,uptr from)711 void SSE_DIVPS_M128_to_XMM( x86SSERegType to, uptr from )           { SSEMtoR( 0x5e0F, 0 ); }
SSE_DIVPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)712 void SSE_DIVPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5e0F ); }
713 
714 //////////////////////////////////////////////////////////////////////////////////////
715 //**********************************************************************************/
716 //DIVSS : Scalar  Single-Precision FP Divide                                       *
717 //**********************************************************************************
SSE_DIVSS_M32_to_XMM(x86SSERegType to,uptr from)718 void SSE_DIVSS_M32_to_XMM( x86SSERegType to, uptr from )           { SSE_SS_MtoR( 0x5e0F, 0 ); }
SSE_DIVSS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)719 void SSE_DIVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5e0F ); }
720 
721 /////////////////////////////////////////////////////////////////////////////////////////
722 //**********************************************************************************/
723 //STMXCSR : Store Streaming SIMD Extension Control/Status                         *
724 //**********************************************************************************
SSE_STMXCSR(uptr from)725 void SSE_STMXCSR( uptr from ) {
726 	MEMADDR_OP(0, VAROP2(0x0F, 0xAE), false, 3, from, 0);
727 }
728 
729 /////////////////////////////////////////////////////////////////////////////////////
730 //**********************************************************************************/
731 //LDMXCSR : Load Streaming SIMD Extension Control/Status                         *
732 //**********************************************************************************
SSE_LDMXCSR(uptr from)733 void SSE_LDMXCSR( uptr from ) {
734 	MEMADDR_OP(0, VAROP2(0x0F, 0xAE), false, 2, from, 0);
735 }
736 
737 /////////////////////////////////////////////////////////////////////////////////////
738 //**********************************************************************************/
739 //PADDB,PADDW,PADDD : Add Packed Integers                                          *
740 //**********************************************************************************
SSE2_PADDB_XMM_to_XMM(x86SSERegType to,x86SSERegType from)741 void SSE2_PADDB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFC0F ); }
SSE2_PADDB_M128_to_XMM(x86SSERegType to,uptr from)742 void SSE2_PADDB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFC0F ); }
SSE2_PADDW_XMM_to_XMM(x86SSERegType to,x86SSERegType from)743 void SSE2_PADDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFD0F ); }
SSE2_PADDW_M128_to_XMM(x86SSERegType to,uptr from)744 void SSE2_PADDW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFD0F ); }
SSE2_PADDD_XMM_to_XMM(x86SSERegType to,x86SSERegType from)745 void SSE2_PADDD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFE0F ); }
SSE2_PADDD_M128_to_XMM(x86SSERegType to,uptr from)746 void SSE2_PADDD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFE0F ); }
747 
SSE2_PADDQ_XMM_to_XMM(x86SSERegType to,x86SSERegType from)748 void SSE2_PADDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xD40F ); }
SSE2_PADDQ_M128_to_XMM(x86SSERegType to,uptr from)749 void SSE2_PADDQ_M128_to_XMM(x86SSERegType to, uptr from ) { SSEMtoR66( 0xD40F ); }
750 
751 ///////////////////////////////////////////////////////////////////////////////////
752 //**********************************************************************************/
753 //PCMPxx: Compare Packed Integers                                                  *
754 //**********************************************************************************
SSE2_PCMPGTB_XMM_to_XMM(x86SSERegType to,x86SSERegType from)755 void SSE2_PCMPGTB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x640F ); }
SSE2_PCMPGTB_M128_to_XMM(x86SSERegType to,uptr from)756 void SSE2_PCMPGTB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x640F ); }
SSE2_PCMPGTW_XMM_to_XMM(x86SSERegType to,x86SSERegType from)757 void SSE2_PCMPGTW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x650F ); }
SSE2_PCMPGTW_M128_to_XMM(x86SSERegType to,uptr from)758 void SSE2_PCMPGTW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x650F ); }
SSE2_PCMPGTD_XMM_to_XMM(x86SSERegType to,x86SSERegType from)759 void SSE2_PCMPGTD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x660F ); }
SSE2_PCMPGTD_M128_to_XMM(x86SSERegType to,uptr from)760 void SSE2_PCMPGTD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x660F ); }
SSE2_PCMPEQB_XMM_to_XMM(x86SSERegType to,x86SSERegType from)761 void SSE2_PCMPEQB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x740F ); }
SSE2_PCMPEQB_M128_to_XMM(x86SSERegType to,uptr from)762 void SSE2_PCMPEQB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x740F ); }
SSE2_PCMPEQW_XMM_to_XMM(x86SSERegType to,x86SSERegType from)763 void SSE2_PCMPEQW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x750F ); }
SSE2_PCMPEQW_M128_to_XMM(x86SSERegType to,uptr from)764 void SSE2_PCMPEQW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x750F ); }
SSE2_PCMPEQD_XMM_to_XMM(x86SSERegType to,x86SSERegType from)765 void SSE2_PCMPEQD_XMM_to_XMM(x86SSERegType to, x86SSERegType from )
766 {
767 	if( !cpucaps.hasStreamingSIMD2Extensions ) {
768 		SSE_CMPEQPS_XMM_to_XMM(to, from);
769 	}
770 	else {
771 		SSERtoR66( 0x760F );
772 	}
773 }
774 
SSE2_PCMPEQD_M128_to_XMM(x86SSERegType to,uptr from)775 void SSE2_PCMPEQD_M128_to_XMM(x86SSERegType to, uptr from )
776 {
777 	if( !cpucaps.hasStreamingSIMD2Extensions ) {
778 		SSE_CMPEQPS_M128_to_XMM(to, from);
779 	}
780 	else {
781 		SSEMtoR66( 0x760F );
782 	}
783 }
784 
785 ////////////////////////////////////////////////////////////////////////////////////////////
786 //**********************************************************************************/
787 //PEXTRW,PINSRW: Packed Extract/Insert Word                                        *
788 //**********************************************************************************
SSE_PEXTRW_XMM_to_R32(x86IntRegType to,x86SSERegType from,u8 imm8)789 void SSE_PEXTRW_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8 ){ SSERtoR66(0xC50F); write8( imm8 ); }
SSE_PINSRW_R32_to_XMM(x86SSERegType to,x86IntRegType from,u8 imm8)790 void SSE_PINSRW_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8 ){ SSERtoR66(0xC40F); write8( imm8 ); }
791 
792 ////////////////////////////////////////////////////////////////////////////////////////////
793 //**********************************************************************************/
794 //PSUBx: Subtract Packed Integers                                                  *
795 //**********************************************************************************
SSE2_PSUBB_XMM_to_XMM(x86SSERegType to,x86SSERegType from)796 void SSE2_PSUBB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xF80F ); }
SSE2_PSUBB_M128_to_XMM(x86SSERegType to,uptr from)797 void SSE2_PSUBB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xF80F ); }
SSE2_PSUBW_XMM_to_XMM(x86SSERegType to,x86SSERegType from)798 void SSE2_PSUBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xF90F ); }
SSE2_PSUBW_M128_to_XMM(x86SSERegType to,uptr from)799 void SSE2_PSUBW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xF90F ); }
SSE2_PSUBD_XMM_to_XMM(x86SSERegType to,x86SSERegType from)800 void SSE2_PSUBD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFA0F ); }
SSE2_PSUBD_M128_to_XMM(x86SSERegType to,uptr from)801 void SSE2_PSUBD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFA0F ); }
SSE2_PSUBQ_XMM_to_XMM(x86SSERegType to,x86SSERegType from)802 void SSE2_PSUBQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFB0F ); }
SSE2_PSUBQ_M128_to_XMM(x86SSERegType to,uptr from)803 void SSE2_PSUBQ_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFB0F ); }
804 
805 ///////////////////////////////////////////////////////////////////////////////////////
806 //**********************************************************************************/
807 //MOVD: Move Dword(32bit) to /from XMM reg                                         *
808 //**********************************************************************************
SSE2_MOVD_M32_to_XMM(x86SSERegType to,uptr from)809 void SSE2_MOVD_M32_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66(0x6E0F); }
SSE2_MOVD_R_to_XMM(x86SSERegType to,x86IntRegType from)810 void SSE2_MOVD_R_to_XMM( x86SSERegType to, x86IntRegType from )
811 {
812 	if( !cpucaps.hasStreamingSIMD2Extensions ) {
813 		SSE2EMU_MOVD_R_to_XMM(to, from);
814 	}
815 	else {
816 		SSERtoR66(0x6E0F);
817 	}
818 }
819 
SSE2_MOVD_Rm_to_XMM(x86SSERegType to,x86IntRegType from)820 void SSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from )
821 {
822 	write8(0x66);
823 	RexRB(0, to, from);
824 	write16( 0x6e0f );
825 	ModRM( 0, to, from);
826 }
827 
SSE2_MOVD_RmOffset_to_XMM(x86SSERegType to,x86IntRegType from,int offset)828 void SSE2_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset )
829 {
830 	write8(0x66);
831     RexRB(0, to, from);
832 	write16( 0x6e0f );
833     WriteRmOffsetFrom(to, from, offset);
834 }
835 
SSE2_MOVD_XMM_to_M32(u32 to,x86SSERegType from)836 void SSE2_MOVD_XMM_to_M32( u32 to, x86SSERegType from ) { SSERtoM66(0x7E0F); }
SSE2_MOVD_XMM_to_R(x86IntRegType to,x86SSERegType from)837 void SSE2_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from ) {
838 	if( !cpucaps.hasStreamingSIMD2Extensions ) {
839 		SSE2EMU_MOVD_XMM_to_R(to, from);
840 	}
841 	else {
842 		_SSERtoR66(0x7E0F);
843 	}
844 }
845 
SSE2_MOVD_XMM_to_Rm(x86IntRegType to,x86SSERegType from)846 void SSE2_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from )
847 {
848 	write8(0x66);
849     RexRB(0, from, to);
850 	write16( 0x7e0f );
851 	ModRM( 0, from, to );
852 }
853 
SSE2_MOVD_XMM_to_RmOffset(x86IntRegType to,x86SSERegType from,int offset)854 void SSE2_MOVD_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset )
855 {
856 	if( !cpucaps.hasStreamingSIMD2Extensions ) {
857 		SSE2EMU_MOVD_XMM_to_RmOffset(to, from, offset);
858 	}
859 	else {
860 		write8(0x66);
861         RexRB(0, from, to);
862 		write16( 0x7e0f );
863         WriteRmOffsetFrom(from, to, offset);
864 	}
865 }
866 
867 #ifdef __x86_64__
SSE2_MOVQ_XMM_to_R(x86IntRegType to,x86SSERegType from)868 void SSE2_MOVQ_XMM_to_R( x86IntRegType to, x86SSERegType from )
869 {
870     assert( from < XMMREGS);
871     write8( 0x66 );
872 	RexRB(1, from, to);
873 	write16( 0x7e0f );
874 	ModRM( 3, from, to );
875 }
876 
SSE2_MOVQ_R_to_XMM(x86SSERegType to,x86IntRegType from)877 void SSE2_MOVQ_R_to_XMM( x86SSERegType to, x86IntRegType from )
878 {
879     assert( to < XMMREGS);
880     write8(0x66);
881     RexRB(1, to, from);
882 	write16( 0x6e0f );
883 	ModRM( 3, to, from );
884 }
885 
886 #endif
887 
888 ////////////////////////////////////////////////////////////////////////////////////
889 //**********************************************************************************/
890 //POR : SSE Bitwise OR                                                             *
891 //**********************************************************************************
SSE2_POR_XMM_to_XMM(x86SSERegType to,x86SSERegType from)892 void SSE2_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEB0F ); }
SSE2_POR_M128_to_XMM(x86SSERegType to,uptr from)893 void SSE2_POR_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEB0F ); }
894 
895 // logical and to &= from
SSE2_PAND_XMM_to_XMM(x86SSERegType to,x86SSERegType from)896 void SSE2_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDB0F ); }
SSE2_PAND_M128_to_XMM(x86SSERegType to,uptr from)897 void SSE2_PAND_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDB0F ); }
898 
899 // to = (~to) & from
SSE2_PANDN_XMM_to_XMM(x86SSERegType to,x86SSERegType from)900 void SSE2_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDF0F ); }
SSE2_PANDN_M128_to_XMM(x86SSERegType to,uptr from)901 void SSE2_PANDN_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDF0F ); }
902 
903 /////////////////////////////////////////////////////////////////////////////////////
904 //**********************************************************************************/
905 //PXOR : SSE Bitwise XOR                                                             *
906 //**********************************************************************************
SSE2_PXOR_XMM_to_XMM(x86SSERegType to,x86SSERegType from)907 void SSE2_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xEF0F ); }
SSE2_PXOR_M128_to_XMM(x86SSERegType to,uptr from)908 void SSE2_PXOR_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xEF0F ); }
909 ///////////////////////////////////////////////////////////////////////////////////////
910 
SSE2_MOVDQA_M128_to_XMM(x86SSERegType to,uptr from)911 void SSE2_MOVDQA_M128_to_XMM(x86SSERegType to, uptr from) {SSEMtoR66(0x6F0F); }
SSE2_MOVDQA_XMM_to_M128(uptr to,x86SSERegType from)912 void SSE2_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from ){SSERtoM66(0x7F0F);}
SSE2_MOVDQA_XMM_to_XMM(x86SSERegType to,x86SSERegType from)913 void SSE2_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { SSERtoR66(0x6F0F); }
914 
SSE2_MOVDQU_M128_to_XMM(x86SSERegType to,uptr from)915 void SSE2_MOVDQU_M128_to_XMM(x86SSERegType to, uptr from) { SSE_SS_MtoR(0x6F0F, 0); }
SSE2_MOVDQU_XMM_to_M128(uptr to,x86SSERegType from)916 void SSE2_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from) { SSE_SS_RtoM(0x7F0F, 0); }
SSE2_MOVDQU_XMM_to_XMM(x86SSERegType to,x86SSERegType from)917 void SSE2_MOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { SSE_SS_RtoR(0x6F0F); }
918 
919 // shift right logical
920 
SSE2_PSRLW_XMM_to_XMM(x86SSERegType to,x86SSERegType from)921 void SSE2_PSRLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xD10F); }
SSE2_PSRLW_M128_to_XMM(x86SSERegType to,uptr from)922 void SSE2_PSRLW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xD10F); }
SSE2_PSRLW_I8_to_XMM(x86SSERegType to,u8 imm8)923 void SSE2_PSRLW_I8_to_XMM(x86SSERegType to, u8 imm8)
924 {
925 	write8( 0x66 );
926     RexB(0, to);
927 	write16( 0x710F );
928 	ModRM( 3, 2 , to );
929 	write8( imm8 );
930 }
931 
SSE2_PSRLD_XMM_to_XMM(x86SSERegType to,x86SSERegType from)932 void SSE2_PSRLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xD20F); }
SSE2_PSRLD_M128_to_XMM(x86SSERegType to,uptr from)933 void SSE2_PSRLD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xD20F); }
SSE2_PSRLD_I8_to_XMM(x86SSERegType to,u8 imm8)934 void SSE2_PSRLD_I8_to_XMM(x86SSERegType to, u8 imm8)
935 {
936 	write8( 0x66 );
937     RexB(0, to);
938 	write16( 0x720F );
939 	ModRM( 3, 2 , to );
940 	write8( imm8 );
941 }
942 
SSE2_PSRLQ_XMM_to_XMM(x86SSERegType to,x86SSERegType from)943 void SSE2_PSRLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xD30F); }
SSE2_PSRLQ_M128_to_XMM(x86SSERegType to,uptr from)944 void SSE2_PSRLQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xD30F); }
SSE2_PSRLQ_I8_to_XMM(x86SSERegType to,u8 imm8)945 void SSE2_PSRLQ_I8_to_XMM(x86SSERegType to, u8 imm8)
946 {
947 	write8( 0x66 );
948     RexB(0, to);
949 	write16( 0x730F );
950 	ModRM( 3, 2 , to );
951 	write8( imm8 );
952 }
953 
SSE2_PSRLDQ_I8_to_XMM(x86SSERegType to,u8 imm8)954 void SSE2_PSRLDQ_I8_to_XMM(x86SSERegType to, u8 imm8)
955 {
956 	write8( 0x66 );
957     RexB(0, to);
958 	write16( 0x730F );
959 	ModRM( 3, 3 , to );
960 	write8( imm8 );
961 }
962 
963 // shift right arithmetic
964 
SSE2_PSRAW_XMM_to_XMM(x86SSERegType to,x86SSERegType from)965 void SSE2_PSRAW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xE10F); }
SSE2_PSRAW_M128_to_XMM(x86SSERegType to,uptr from)966 void SSE2_PSRAW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xE10F); }
SSE2_PSRAW_I8_to_XMM(x86SSERegType to,u8 imm8)967 void SSE2_PSRAW_I8_to_XMM(x86SSERegType to, u8 imm8)
968 {
969 	write8( 0x66 );
970     RexB(0, to);
971 	write16( 0x710F );
972 	ModRM( 3, 4 , to );
973 	write8( imm8 );
974 }
975 
SSE2_PSRAD_XMM_to_XMM(x86SSERegType to,x86SSERegType from)976 void SSE2_PSRAD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xE20F); }
SSE2_PSRAD_M128_to_XMM(x86SSERegType to,uptr from)977 void SSE2_PSRAD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xE20F); }
SSE2_PSRAD_I8_to_XMM(x86SSERegType to,u8 imm8)978 void SSE2_PSRAD_I8_to_XMM(x86SSERegType to, u8 imm8)
979 {
980 	write8( 0x66 );
981     RexB(0, to);
982 	write16( 0x720F );
983 	ModRM( 3, 4 , to );
984 	write8( imm8 );
985 }
986 
987 // shift left logical
988 
SSE2_PSLLW_XMM_to_XMM(x86SSERegType to,x86SSERegType from)989 void SSE2_PSLLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF10F); }
SSE2_PSLLW_M128_to_XMM(x86SSERegType to,uptr from)990 void SSE2_PSLLW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xF10F); }
SSE2_PSLLW_I8_to_XMM(x86SSERegType to,u8 imm8)991 void SSE2_PSLLW_I8_to_XMM(x86SSERegType to, u8 imm8)
992 {
993 	write8( 0x66 );
994     RexB(0, to);
995 	write16( 0x710F );
996 	ModRM( 3, 6 , to );
997 	write8( imm8 );
998 }
999 
SSE2_PSLLD_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1000 void SSE2_PSLLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF20F); }
SSE2_PSLLD_M128_to_XMM(x86SSERegType to,uptr from)1001 void SSE2_PSLLD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xF20F); }
SSE2_PSLLD_I8_to_XMM(x86SSERegType to,u8 imm8)1002 void SSE2_PSLLD_I8_to_XMM(x86SSERegType to, u8 imm8)
1003 {
1004 	write8( 0x66 );
1005     RexB(0, to);
1006 	write16( 0x720F );
1007 	ModRM( 3, 6 , to );
1008 	write8( imm8 );
1009 }
1010 
SSE2_PSLLQ_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1011 void SSE2_PSLLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF30F); }
SSE2_PSLLQ_M128_to_XMM(x86SSERegType to,uptr from)1012 void SSE2_PSLLQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xF30F); }
SSE2_PSLLQ_I8_to_XMM(x86SSERegType to,u8 imm8)1013 void SSE2_PSLLQ_I8_to_XMM(x86SSERegType to, u8 imm8)
1014 {
1015 	write8( 0x66 );
1016     RexB(0, to);
1017 	write16( 0x730F );
1018 	ModRM( 3, 6 , to );
1019 	write8( imm8 );
1020 }
1021 
SSE2_PSLLDQ_I8_to_XMM(x86SSERegType to,u8 imm8)1022 void SSE2_PSLLDQ_I8_to_XMM(x86SSERegType to, u8 imm8)
1023 {
1024 	write8( 0x66 );
1025     RexB(0, to);
1026 	write16( 0x730F );
1027 	ModRM( 3, 7 , to );
1028 	write8( imm8 );
1029 }
1030 
1031 
SSE2_PMAXSW_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1032 void SSE2_PMAXSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xEE0F ); }
SSE2_PMAXSW_M128_to_XMM(x86SSERegType to,uptr from)1033 void SSE2_PMAXSW_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xEE0F ); }
1034 
SSE2_PMAXUB_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1035 void SSE2_PMAXUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xDE0F ); }
SSE2_PMAXUB_M128_to_XMM(x86SSERegType to,uptr from)1036 void SSE2_PMAXUB_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xDE0F ); }
1037 
SSE2_PMINSW_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1038 void SSE2_PMINSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xEA0F ); }
SSE2_PMINSW_M128_to_XMM(x86SSERegType to,uptr from)1039 void SSE2_PMINSW_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xEA0F ); }
1040 
SSE2_PMINUB_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1041 void SSE2_PMINUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xDA0F ); }
SSE2_PMINUB_M128_to_XMM(x86SSERegType to,uptr from)1042 void SSE2_PMINUB_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xDA0F ); }
1043 
1044 //
1045 
SSE2_PADDSB_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1046 void SSE2_PADDSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xEC0F ); }
SSE2_PADDSB_M128_to_XMM(x86SSERegType to,uptr from)1047 void SSE2_PADDSB_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xEC0F ); }
1048 
SSE2_PADDSW_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1049 void SSE2_PADDSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xED0F ); }
SSE2_PADDSW_M128_to_XMM(x86SSERegType to,uptr from)1050 void SSE2_PADDSW_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xED0F ); }
1051 
SSE2_PSUBSB_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1052 void SSE2_PSUBSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xE80F ); }
SSE2_PSUBSB_M128_to_XMM(x86SSERegType to,uptr from)1053 void SSE2_PSUBSB_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xE80F ); }
1054 
SSE2_PSUBSW_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1055 void SSE2_PSUBSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xE90F ); }
SSE2_PSUBSW_M128_to_XMM(x86SSERegType to,uptr from)1056 void SSE2_PSUBSW_M128_to_XMM( x86SSERegType to, uptr from ){ SSEMtoR66( 0xE90F ); }
1057 
SSE2_PSUBUSB_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1058 void SSE2_PSUBUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xD80F ); }
SSE2_PSUBUSB_M128_to_XMM(x86SSERegType to,uptr from)1059 void SSE2_PSUBUSB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xD80F ); }
SSE2_PSUBUSW_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1060 void SSE2_PSUBUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xD90F ); }
SSE2_PSUBUSW_M128_to_XMM(x86SSERegType to,uptr from)1061 void SSE2_PSUBUSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xD90F ); }
1062 
SSE2_PADDUSB_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1063 void SSE2_PADDUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDC0F ); }
SSE2_PADDUSB_M128_to_XMM(x86SSERegType to,uptr from)1064 void SSE2_PADDUSB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDC0F ); }
SSE2_PADDUSW_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1065 void SSE2_PADDUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDD0F ); }
SSE2_PADDUSW_M128_to_XMM(x86SSERegType to,uptr from)1066 void SSE2_PADDUSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDD0F ); }
1067 
1068 //**********************************************************************************/
1069 //PACKSSWB,PACKSSDW: Pack Saturate Signed Word
1070 //**********************************************************************************
SSE2_PACKSSWB_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1071 void SSE2_PACKSSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x630F ); }
SSE2_PACKSSWB_M128_to_XMM(x86SSERegType to,uptr from)1072 void SSE2_PACKSSWB_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x630F ); }
SSE2_PACKSSDW_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1073 void SSE2_PACKSSDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6B0F ); }
SSE2_PACKSSDW_M128_to_XMM(x86SSERegType to,uptr from)1074 void SSE2_PACKSSDW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6B0F ); }
1075 
SSE2_PACKUSWB_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1076 void SSE2_PACKUSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x670F ); }
SSE2_PACKUSWB_M128_to_XMM(x86SSERegType to,uptr from)1077 void SSE2_PACKUSWB_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x670F ); }
1078 
1079 //**********************************************************************************/
1080 //PUNPCKHWD: Unpack 16bit high
1081 //**********************************************************************************
SSE2_PUNPCKLBW_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1082 void SSE2_PUNPCKLBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x600F ); }
SSE2_PUNPCKLBW_M128_to_XMM(x86SSERegType to,uptr from)1083 void SSE2_PUNPCKLBW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x600F ); }
1084 
SSE2_PUNPCKHBW_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1085 void SSE2_PUNPCKHBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x680F ); }
SSE2_PUNPCKHBW_M128_to_XMM(x86SSERegType to,uptr from)1086 void SSE2_PUNPCKHBW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x680F ); }
1087 
SSE2_PUNPCKLWD_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1088 void SSE2_PUNPCKLWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x610F ); }
SSE2_PUNPCKLWD_M128_to_XMM(x86SSERegType to,uptr from)1089 void SSE2_PUNPCKLWD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x610F ); }
SSE2_PUNPCKHWD_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1090 void SSE2_PUNPCKHWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x690F ); }
SSE2_PUNPCKHWD_M128_to_XMM(x86SSERegType to,uptr from)1091 void SSE2_PUNPCKHWD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x690F ); }
1092 
SSE2_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1093 void SSE2_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x620F ); }
SSE2_PUNPCKLDQ_M128_to_XMM(x86SSERegType to,uptr from)1094 void SSE2_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x620F ); }
SSE2_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1095 void SSE2_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6A0F ); }
SSE2_PUNPCKHDQ_M128_to_XMM(x86SSERegType to,uptr from)1096 void SSE2_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6A0F ); }
1097 
SSE2_PUNPCKLQDQ_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1098 void SSE2_PUNPCKLQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6C0F ); }
SSE2_PUNPCKLQDQ_M128_to_XMM(x86SSERegType to,uptr from)1099 void SSE2_PUNPCKLQDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6C0F ); }
1100 
SSE2_PUNPCKHQDQ_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1101 void SSE2_PUNPCKHQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6D0F ); }
SSE2_PUNPCKHQDQ_M128_to_XMM(x86SSERegType to,uptr from)1102 void SSE2_PUNPCKHQDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6D0F ); }
1103 
SSE2_PMULLW_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1104 void SSE2_PMULLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xD50F ); }
SSE2_PMULLW_M128_to_XMM(x86SSERegType to,uptr from)1105 void SSE2_PMULLW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xD50F ); }
SSE2_PMULHW_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1106 void SSE2_PMULHW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xE50F ); }
SSE2_PMULHW_M128_to_XMM(x86SSERegType to,uptr from)1107 void SSE2_PMULHW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xE50F ); }
1108 
SSE2_PMULUDQ_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1109 void SSE2_PMULUDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xF40F ); }
SSE2_PMULUDQ_M128_to_XMM(x86SSERegType to,uptr from)1110 void SSE2_PMULUDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xF40F ); }
1111 
SSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to,x86SSERegType from)1112 void SSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR66(0xD70F); }
1113 
SSE_MOVMSKPS_XMM_to_R32(x86IntRegType to,x86SSERegType from)1114 void SSE_MOVMSKPS_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR(0x500F); }
SSE2_MOVMSKPD_XMM_to_R32(x86IntRegType to,x86SSERegType from)1115 void SSE2_MOVMSKPD_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR66(0x500F); }
1116 
SSE3_HADDPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1117 void SSE3_HADDPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { write8(0xf2); SSERtoR( 0x7c0f ); }
SSE3_HADDPS_M128_to_XMM(x86SSERegType to,uptr from)1118 void SSE3_HADDPS_M128_to_XMM(x86SSERegType to, uptr from){ SSEMtoRv( 3, 0x7c0fF2, 0 ); }
1119 
SSE3_MOVSLDUP_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1120 void SSE3_MOVSLDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from) {
1121 	write8(0xf3);
1122     RexRB(0, to, from);
1123 	write16( 0x120f);
1124 	ModRM( 3, to, from );
1125 }
1126 
SSE3_MOVSLDUP_M128_to_XMM(x86SSERegType to,uptr from)1127 void SSE3_MOVSLDUP_M128_to_XMM(x86SSERegType to, uptr from) { SSE_SS_MtoR(0x120f, 0); }
SSE3_MOVSHDUP_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1128 void SSE3_MOVSHDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSE_SS_RtoR(0x160f); }
SSE3_MOVSHDUP_M128_to_XMM(x86SSERegType to,uptr from)1129 void SSE3_MOVSHDUP_M128_to_XMM(x86SSERegType to, uptr from) { SSE_SS_MtoR(0x160f, 0); }
1130 
1131 // SSE-X
SSEX_MOVDQA_M128_to_XMM(x86SSERegType to,uptr from)1132 void SSEX_MOVDQA_M128_to_XMM( x86SSERegType to, uptr from )
1133 {
1134 	if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQA_M128_to_XMM(to, from);
1135 	else SSE_MOVAPS_M128_to_XMM(to, from);
1136 }
1137 
SSEX_MOVDQA_XMM_to_M128(uptr to,x86SSERegType from)1138 void SSEX_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from )
1139 {
1140 	if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_M128(to, from);
1141 	else SSE_MOVAPS_XMM_to_M128(to, from);
1142 }
1143 
SSEX_MOVDQA_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1144 void SSEX_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
1145 {
1146 	if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_XMM(to, from);
1147 	else SSE_MOVAPS_XMM_to_XMM(to, from);
1148 }
1149 
SSEX_MOVDQARmtoROffset(x86SSERegType to,x86IntRegType from,int offset)1150 void SSEX_MOVDQARmtoROffset( x86SSERegType to, x86IntRegType from, int offset )
1151 {
1152 	if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQARmtoROffset(to, from, offset);
1153 	else SSE_MOVAPSRmtoROffset(to, from, offset);
1154 }
1155 
SSEX_MOVDQARtoRmOffset(x86IntRegType to,x86SSERegType from,int offset)1156 void SSEX_MOVDQARtoRmOffset( x86IntRegType to, x86SSERegType from, int offset )
1157 {
1158 	if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQARtoRmOffset(to, from, offset);
1159 	else SSE_MOVAPSRtoRmOffset(to, from, offset);
1160 }
1161 
SSEX_MOVDQU_M128_to_XMM(x86SSERegType to,uptr from)1162 void SSEX_MOVDQU_M128_to_XMM( x86SSERegType to, uptr from )
1163 {
1164 	if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQU_M128_to_XMM(to, from);
1165 	else SSE_MOVAPS_M128_to_XMM(to, from);
1166 }
1167 
SSEX_MOVDQU_XMM_to_M128(uptr to,x86SSERegType from)1168 void SSEX_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from )
1169 {
1170 	if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQU_XMM_to_M128(to, from);
1171 	else SSE_MOVAPS_XMM_to_M128(to, from);
1172 }
1173 
SSEX_MOVDQU_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1174 void SSEX_MOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
1175 {
1176 	if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQU_XMM_to_XMM(to, from);
1177 	else SSE_MOVAPS_XMM_to_XMM(to, from);
1178 }
1179 
SSEX_MOVD_M32_to_XMM(x86SSERegType to,uptr from)1180 void SSEX_MOVD_M32_to_XMM( x86SSERegType to, uptr from )
1181 {
1182 	if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVD_M32_to_XMM(to, from);
1183 	else SSE_MOVSS_M32_to_XMM(to, from);
1184 }
1185 
SSEX_MOVD_XMM_to_M32(u32 to,x86SSERegType from)1186 void SSEX_MOVD_XMM_to_M32( u32 to, x86SSERegType from )
1187 {
1188 	if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_M32(to, from);
1189 	else SSE_MOVSS_XMM_to_M32(to, from);
1190 }
1191 
SSEX_MOVD_XMM_to_Rm(x86IntRegType to,x86SSERegType from)1192 void SSEX_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from )
1193 {
1194 	if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_Rm(to, from);
1195 	else SSE_MOVSS_XMM_to_Rm(to, from);
1196 }
1197 
SSEX_MOVD_RmOffset_to_XMM(x86SSERegType to,x86IntRegType from,int offset)1198 void SSEX_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset )
1199 {
1200 	if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVD_RmOffset_to_XMM(to, from, offset);
1201 	else SSE_MOVSS_RmOffset_to_XMM(to, from, offset);
1202 }
1203 
SSEX_MOVD_XMM_to_RmOffset(x86IntRegType to,x86SSERegType from,int offset)1204 void SSEX_MOVD_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset )
1205 {
1206 	if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_RmOffset(to, from, offset);
1207 	else SSE_MOVSS_XMM_to_RmOffset(to, from, offset);
1208 }
1209 
SSEX_POR_M128_to_XMM(x86SSERegType to,uptr from)1210 void SSEX_POR_M128_to_XMM( x86SSERegType to, uptr from )
1211 {
1212 	if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_POR_M128_to_XMM(to, from);
1213 	else SSE_ORPS_M128_to_XMM(to, from);
1214 }
1215 
SSEX_POR_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1216 void SSEX_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
1217 {
1218 	if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_POR_XMM_to_XMM(to, from);
1219 	else SSE_ORPS_XMM_to_XMM(to, from);
1220 }
1221 
SSEX_PXOR_M128_to_XMM(x86SSERegType to,uptr from)1222 void SSEX_PXOR_M128_to_XMM( x86SSERegType to, uptr from )
1223 {
1224 	if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_PXOR_M128_to_XMM(to, from);
1225 	else SSE_XORPS_M128_to_XMM(to, from);
1226 }
1227 
SSEX_PXOR_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1228 void SSEX_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
1229 {
1230 	if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_PXOR_XMM_to_XMM(to, from);
1231 	else SSE_XORPS_XMM_to_XMM(to, from);
1232 }
1233 
SSEX_PAND_M128_to_XMM(x86SSERegType to,uptr from)1234 void SSEX_PAND_M128_to_XMM( x86SSERegType to, uptr from )
1235 {
1236 	if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_PAND_M128_to_XMM(to, from);
1237 	else SSE_ANDPS_M128_to_XMM(to, from);
1238 }
1239 
SSEX_PAND_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1240 void SSEX_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
1241 {
1242 	if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_PAND_XMM_to_XMM(to, from);
1243 	else SSE_ANDPS_XMM_to_XMM(to, from);
1244 }
1245 
SSEX_PANDN_M128_to_XMM(x86SSERegType to,uptr from)1246 void SSEX_PANDN_M128_to_XMM( x86SSERegType to, uptr from )
1247 {
1248 	if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_PANDN_M128_to_XMM(to, from);
1249 	else SSE_ANDNPS_M128_to_XMM(to, from);
1250 }
1251 
SSEX_PANDN_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1252 void SSEX_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
1253 {
1254 	if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_PANDN_XMM_to_XMM(to, from);
1255 	else SSE_ANDNPS_XMM_to_XMM(to, from);
1256 }
1257 
SSEX_PUNPCKLDQ_M128_to_XMM(x86SSERegType to,uptr from)1258 void SSEX_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from)
1259 {
1260 	if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_PUNPCKLDQ_M128_to_XMM(to, from);
1261 	else SSE_UNPCKLPS_M128_to_XMM(to, from);
1262 }
1263 
SSEX_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1264 void SSEX_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
1265 {
1266 	if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_PUNPCKLDQ_XMM_to_XMM(to, from);
1267 	else SSE_UNPCKLPS_XMM_to_XMM(to, from);
1268 }
1269 
SSEX_PUNPCKHDQ_M128_to_XMM(x86SSERegType to,uptr from)1270 void SSEX_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from)
1271 {
1272 	if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[to] == XMMT_INT ) SSE2_PUNPCKHDQ_M128_to_XMM(to, from);
1273 	else SSE_UNPCKHPS_M128_to_XMM(to, from);
1274 }
1275 
SSEX_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1276 void SSEX_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from)
1277 {
1278 	if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) SSE2_PUNPCKHDQ_XMM_to_XMM(to, from);
1279 	else SSE_UNPCKHPS_XMM_to_XMM(to, from);
1280 }
1281 
SSEX_MOVHLPS_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1282 void SSEX_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from )
1283 {
1284 	if( cpucaps.hasStreamingSIMD2Extensions && g_xmmtypes[from] == XMMT_INT ) {
1285 		SSE2_PUNPCKHQDQ_XMM_to_XMM(to, from);
1286 		if( to != from ) SSE2_PSHUFD_XMM_to_XMM(to, to, 0x4e);
1287 	}
1288 	else {
1289 		SSE_MOVHLPS_XMM_to_XMM(to, from);
1290 	}
1291 }
1292 
1293 // SSE2 emulation
SSE2EMU_MOVSD_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1294 void SSE2EMU_MOVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from)
1295 {
1296 	SSE_SHUFPS_XMM_to_XMM(to, from, 0x4e);
1297 	SSE_SHUFPS_XMM_to_XMM(to, to, 0x4e);
1298 }
1299 
SSE2EMU_MOVQ_M64_to_XMM(x86SSERegType to,uptr from)1300 void SSE2EMU_MOVQ_M64_to_XMM( x86SSERegType to, uptr from)
1301 {
1302 	SSE_XORPS_XMM_to_XMM(to, to);
1303 	SSE_MOVLPS_M64_to_XMM(to, from);
1304 }
1305 
SSE2EMU_MOVQ_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1306 void SSE2EMU_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from)
1307 {
1308 	SSE_XORPS_XMM_to_XMM(to, to);
1309 	SSE2EMU_MOVSD_XMM_to_XMM(to, from);
1310 }
1311 
SSE2EMU_MOVD_RmOffset_to_XMM(x86SSERegType to,x86IntRegType from,int offset)1312 void SSE2EMU_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset )
1313 {
1314 	MOV32RmtoROffset(EAX, from, offset);
1315 	MOV32ItoM((uptr)p+4, 0);
1316 	MOV32ItoM((uptr)p+8, 0);
1317 	MOV32RtoM((uptr)p, EAX);
1318 	MOV32ItoM((uptr)p+12, 0);
1319 	SSE_MOVAPS_M128_to_XMM(to, (uptr)p);
1320 }
1321 
SSE2EMU_MOVD_XMM_to_RmOffset(x86IntRegType to,x86SSERegType from,int offset)1322 void SSE2EMU_MOVD_XMM_to_RmOffset(x86IntRegType to, x86SSERegType from, int offset )
1323 {
1324 	SSE_MOVSS_XMM_to_M32((uptr)p, from);
1325 	MOV32MtoR(EAX, (uptr)p);
1326 	MOV32RtoRmOffset(to, EAX, offset);
1327 }
1328 
1329 #ifndef __x86_64__
1330 extern void SetMMXstate();
1331 
SSE2EMU_MOVDQ2Q_XMM_to_MM(x86MMXRegType to,x86SSERegType from)1332 void SSE2EMU_MOVDQ2Q_XMM_to_MM( x86MMXRegType to, x86SSERegType from)
1333 {
1334 	SSE_MOVLPS_XMM_to_M64(p, from);
1335 	MOVQMtoR(to, p);
1336 	SetMMXstate();
1337 }
1338 
SSE2EMU_MOVQ2DQ_MM_to_XMM(x86SSERegType to,x86MMXRegType from)1339 void SSE2EMU_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from)
1340 {
1341 	MOVQRtoM(p, from);
1342 	SSE_MOVLPS_M64_to_XMM(to, p);
1343 	SetMMXstate();
1344 }
1345 #endif
1346 
1347 /****************************************************************************/
1348 /*  SSE2 Emulated functions for SSE CPU's by kekko							*/
1349 /****************************************************************************/
SSE2EMU_PSHUFD_XMM_to_XMM(x86SSERegType to,x86SSERegType from,u8 imm8)1350 void SSE2EMU_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) {
1351 	MOV64ItoR(EAX, (uptr)&p);
1352 	MOV64ItoR(EBX, (uptr)&p2);
1353 	SSE_MOVUPSRtoRm(EAX, from);
1354 
1355 	MOV32ItoR(ECX, (u32)imm8);
1356 	AND32ItoR(ECX, 3);
1357 	SHL32ItoR(ECX, 2);
1358 	ADD32RtoR(ECX, EAX);
1359 	MOV32RmtoR(ECX, ECX);
1360 	MOV32RtoRm(EBX, ECX);
1361 
1362 	ADD32ItoR(EBX, 4);
1363 	MOV32ItoR(ECX, (u32)imm8);
1364 	SHR32ItoR(ECX, 2);
1365 	AND32ItoR(ECX, 3);
1366 	SHL32ItoR(ECX, 2);
1367 	ADD32RtoR(ECX, EAX);
1368 	MOV32RmtoR(ECX, ECX);
1369 	MOV32RtoRm(EBX, ECX);
1370 
1371 	ADD32ItoR(EBX, 4);
1372 	MOV32ItoR(ECX, (u32)imm8);
1373 	SHR32ItoR(ECX, 4);
1374 	AND32ItoR(ECX, 3);
1375 	SHL32ItoR(ECX, 2);
1376 	ADD32RtoR(ECX, EAX);
1377 	MOV32RmtoR(ECX, ECX);
1378 	MOV32RtoRm(EBX, ECX);
1379 
1380 	ADD32ItoR(EBX, 4);
1381 	MOV32ItoR(ECX, (u32)imm8);
1382 	SHR32ItoR(ECX, 6);
1383 	AND32ItoR(ECX, 3);
1384 	SHL32ItoR(ECX, 2);
1385 	ADD32RtoR(ECX, EAX);
1386 	MOV32RmtoR(ECX, ECX);
1387 	MOV32RtoRm(EBX, ECX);
1388 
1389 	SUB32ItoR(EBX, 12);
1390 
1391 	SSE_MOVUPSRmtoR(to, EBX);
1392 }
1393 
SSE2EMU_MOVD_XMM_to_R(x86IntRegType to,x86SSERegType from)1394 void SSE2EMU_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from ) {
1395 	/* XXX? */
1396 	MOV64ItoR(to, (uptr)&p);
1397 	SSE_MOVUPSRtoRm(to, from);
1398 	MOV32RmtoR(to, to);
1399 }
1400 
1401 #ifndef __x86_64__
1402 extern void SetFPUstate();
1403 extern void _freeMMXreg(int mmxreg);
1404 #endif
1405 
SSE2EMU_CVTPS2DQ_XMM_to_XMM(x86SSERegType to,x86SSERegType from)1406 void SSE2EMU_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) {
1407 #ifndef __x86_64__
1408     SetFPUstate();
1409 	_freeMMXreg(7);
1410 #endif
1411 	SSE_MOVAPS_XMM_to_M128((uptr)f, from);
1412 
1413 	FLD32((uptr)&f[0]);
1414 	FISTP32((uptr)&p2[0]);
1415 	FLD32((uptr)&f[1]);
1416 	FISTP32((uptr)&p2[1]);
1417 	FLD32((uptr)&f[2]);
1418 	FISTP32((uptr)&p2[2]);
1419 	FLD32((uptr)&f[3]);
1420 	FISTP32((uptr)&p2[3]);
1421 
1422 	SSE_MOVAPS_M128_to_XMM(to, (uptr)p2);
1423 }
1424 
SSE2EMU_CVTDQ2PS_M128_to_XMM(x86SSERegType to,uptr from)1425 void SSE2EMU_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from ) {
1426 #ifndef __x86_64__
1427     SetFPUstate();
1428 	_freeMMXreg(7);
1429 #endif
1430 	FILD32(from);
1431 	FSTP32((uptr)&f[0]);
1432 	FILD32(from+4);
1433 	FSTP32((uptr)&f[1]);
1434 	FILD32(from+8);
1435 	FSTP32((uptr)&f[2]);
1436 	FILD32(from+12);
1437 	FSTP32((uptr)&f[3]);
1438 
1439 	SSE_MOVAPS_M128_to_XMM(to, (uptr)f);
1440 }
1441 
SSE2EMU_MOVD_XMM_to_M32(uptr to,x86SSERegType from)1442 void SSE2EMU_MOVD_XMM_to_M32( uptr to, x86SSERegType from ) {
1443 	/* XXX? */
1444 	MOV64ItoR(EAX, (uptr)&p);
1445 	SSE_MOVUPSRtoRm(EAX, from);
1446 	MOV32RmtoR(EAX, EAX);
1447 	MOV32RtoM(to, EAX);
1448 }
1449 
SSE2EMU_MOVD_R_to_XMM(x86SSERegType to,x86IntRegType from)1450 void SSE2EMU_MOVD_R_to_XMM( x86SSERegType to, x86IntRegType from ) {
1451 	MOV32ItoM((uptr)p+4, 0);
1452 	MOV32ItoM((uptr)p+8, 0);
1453 	MOV32RtoM((uptr)p, from);
1454 	MOV32ItoM((uptr)p+12, 0);
1455 	SSE_MOVAPS_M128_to_XMM(to, (uptr)p);
1456 }
1457 
1458 #endif
1459 
1460 #endif
1461