1 /*
2  * compiler/codegen_x86.h - IA-32 and AMD64 code generator
3  *
4  * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS)
5  *
6  * Inspired by Christian Bauer's Basilisk II
7  *
8  * This file is part of the ARAnyM project which builds a new and powerful
9  * TOS/FreeMiNT compatible virtual machine running on almost any hardware.
10  *
11  * JIT compiler m68k -> IA-32 and AMD64
12  *
13  * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
14  * This file is derived from CCG, copyright 1999-2003 Ian Piumarta
15  * Adaptation for Basilisk II and improvements, copyright 2000-2004 Gwenole Beauchesne
16  * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c
17  *
18  * This program is free software; you can redistribute it and/or modify
19  * it under the terms of the GNU General Public License as published by
20  * the Free Software Foundation; either version 2 of the License, or
21  * (at your option) any later version.
22  *
23  * This program is distributed in the hope that it will be useful,
24  * but WITHOUT ANY WARRANTY; without even the implied warranty of
25  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
26  * GNU General Public License for more details.
27  *
28  * You should have received a copy of the GNU General Public License
29  * along with this program; if not, write to the Free Software
30  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
31  */
32 
33 #ifndef X86_RTASM_H
34 #define X86_RTASM_H
35 
36 /* NOTES
37  *
38  *	o Best viewed on a 1024x768 screen with fixed-6x10 font ;-)
39  *
40  * TODO
41  *
42  *	o Fix FIXMEs
43  *	o i387 FPU instructions
44  *	o SSE instructions
45  *	o Optimize for cases where register numbers are not integral constants
46  */
47 
48 /* --- Configuration ------------------------------------------------------- */
49 
50 /* Define to settle a "flat" register set, i.e. different regno for
51    each size variant.  */
52 #ifndef X86_FLAT_REGISTERS
53 #define X86_FLAT_REGISTERS	1
54 #endif
55 
56 /* Define to generate x86-64 code.  */
57 #ifndef X86_TARGET_64BIT
58 #define X86_TARGET_64BIT	0
59 #endif
60 
61 /* Define to optimize ALU instructions.  */
62 #ifndef X86_OPTIMIZE_ALU
63 #define X86_OPTIMIZE_ALU	1
64 #endif
65 
66 /* Define to optimize rotate/shift instructions.  */
67 #ifndef X86_OPTIMIZE_ROTSHI
68 #define X86_OPTIMIZE_ROTSHI	1
69 #endif
70 
71 /* Define to optimize absolute addresses for RIP relative addressing.  */
72 #ifndef X86_RIP_RELATIVE_ADDR
73 #define X86_RIP_RELATIVE_ADDR	1
74 #endif
75 
76 
77 /* --- Macros -------------------------------------------------------------- */
78 
79 /* Functions used to emit code.
80  *
81  *	x86_emit_byte(B)
82  *	x86_emit_word(W)
83  *	x86_emit_long(L)
84  */
85 
86 /* Get pointer to current code
87  *
88  *	x86_get_target()
89  */
90 
91 /* Abort assembler, fatal failure.
92  *
93  *	x86_emit_failure(MSG)
94  */
95 
96 #define x86_emit_failure0(MSG) (x86_emit_failure(MSG),0)
97 
98 
99 /* --- Register set -------------------------------------------------------- */
100 
101 enum {
102   X86_RIP         = -2,
103 #if X86_FLAT_REGISTERS
104   X86_NOREG       = 0,
105   X86_Reg8L_Base  = 0x10,
106   X86_Reg8H_Base  = 0x20,
107   X86_Reg16_Base  = 0x30,
108   X86_Reg32_Base  = 0x40,
109   X86_Reg64_Base  = 0x50,
110   X86_RegMMX_Base = 0x60,
111   X86_RegXMM_Base = 0x70,
112 #else
113   X86_NOREG       = -1,
114   X86_Reg8L_Base  = 0,
115   X86_Reg8H_Base  = 16,
116   X86_Reg16_Base  = 0,
117   X86_Reg32_Base  = 0,
118   X86_Reg64_Base  = 0,
119   X86_RegMMX_Base = 0,
120   X86_RegXMM_Base = 0,
121 #endif
122 };
123 
124 enum {
125   X86_AL  = X86_Reg8L_Base,
126   X86_CL,   X86_DL,   X86_BL,
127   X86_SPL,  X86_BPL,  X86_SIL,  X86_DIL,
128   X86_R8B,  X86_R9B,  X86_R10B, X86_R11B,
129   X86_R12B, X86_R13B, X86_R14B, X86_R15B,
130   X86_AH  = X86_Reg8H_Base + 4,
131   X86_CH,   X86_DH,   X86_BH
132 };
133 
134 enum {
135   X86_AX  = X86_Reg16_Base,
136   X86_CX,   X86_DX,   X86_BX,
137   X86_SP,   X86_BP,   X86_SI,   X86_DI,
138   X86_R8W,  X86_R9W,  X86_R10W, X86_R11W,
139   X86_R12W, X86_R13W, X86_R14W, X86_R15W
140 };
141 
142 enum {
143   X86_EAX = X86_Reg32_Base,
144   X86_ECX,  X86_EDX,  X86_EBX,
145   X86_ESP,  X86_EBP,  X86_ESI,  X86_EDI,
146   X86_R8D,  X86_R9D,  X86_R10D, X86_R11D,
147   X86_R12D, X86_R13D, X86_R14D, X86_R15D
148 };
149 
150 enum {
151   X86_RAX = X86_Reg64_Base,
152   X86_RCX,  X86_RDX,  X86_RBX,
153   X86_RSP,  X86_RBP,  X86_RSI,  X86_RDI,
154   X86_R8,   X86_R9,   X86_R10,  X86_R11,
155   X86_R12,  X86_R13,  X86_R14,  X86_R15
156 };
157 
158 enum {
159   X86_MM0 = X86_RegMMX_Base,
160   X86_MM1,  X86_MM2,  X86_MM3,
161   X86_MM4,  X86_MM5,  X86_MM6,  X86_MM7,
162 };
163 
164 enum {
165   X86_XMM0 = X86_RegXMM_Base,
166   X86_XMM1,  X86_XMM2,  X86_XMM3,
167   X86_XMM4,  X86_XMM5,  X86_XMM6,  X86_XMM7,
168   X86_XMM8,  X86_XMM9,  X86_XMM10, X86_XMM11,
169   X86_XMM12, X86_XMM13, X86_XMM14, X86_XMM15
170 };
171 
172 /* Register control and access
173  *
174  *	_r0P(R)	Null register?
175  *	_rIP(R)	RIP register?
176  *	_rXP(R)	Extended register?
177  *
178  *	_rC(R)	Class of register (only valid if X86_FLAT_REGISTERS)
179  *	_rR(R)	Full register number
180  *	_rN(R)	Short register number for encoding
181  *
182  *	_r1(R)	8-bit register ID
183  *	_r2(R)	16-bit register ID
184  *	_r4(R)	32-bit register ID
185  *	_r8(R)	64-bit register ID
186  *	_rM(R)	MMX register ID
187  *	_rX(R)	XMM register ID
188  *	_rA(R)	Address register ID used for EA calculation
189  */
190 
191 #define _r0P(R)		((int)(R) == (int)X86_NOREG)
192 #define _rIP(R)		((int)(R) == (int)X86_RIP)
193 
194 #if X86_FLAT_REGISTERS
195 #define _rC(R)		((R) & 0xf0)
196 #define _rR(R)		((R) & 0x0f)
197 #define _rN(R)		((R) & 0x07)
198 #define _rXP(R)		((R) > 0 && _rR(R) > 7)
199 #else
200 #define _rN(R)		((R) & 0x07)
201 #define _rR(R)		(int(R))
202 #define _rXP(R)		(_rR(R) > 7 && _rR(R) < 16)
203 #endif
204 
205 #if !defined(_ASM_SAFETY) || ! X86_FLAT_REGISTERS
206 #define _r1(R)		_rN(R)
207 #define _r2(R)		_rN(R)
208 #define _r4(R)		_rN(R)
209 #define _r8(R)		_rN(R)
210 #define _rA(R)		_rN(R)
211 #define _rM(R)		_rN(R)
212 #define _rX(R)		_rN(R)
213 #else
214 #define _r1(R)		( ((_rC(R) & (X86_Reg8L_Base | X86_Reg8H_Base)) != 0)	? _rN(R) : x86_emit_failure0( "8-bit register required"))
215 #define _r2(R)		( (_rC(R) == X86_Reg16_Base)				? _rN(R) : x86_emit_failure0("16-bit register required"))
216 #define _r4(R)		( (_rC(R) == X86_Reg32_Base)				? _rN(R) : x86_emit_failure0("32-bit register required"))
217 #define _r8(R)		( (_rC(R) == X86_Reg64_Base)				? _rN(R) : x86_emit_failure0("64-bit register required"))
218 #define _rA(R)		( X86_TARGET_64BIT ? \
219 			( (_rC(R) == X86_Reg64_Base)				? _rN(R) : x86_emit_failure0("not a valid 64-bit base/index expression")) : \
220 			( (_rC(R) == X86_Reg32_Base)				? _rN(R) : x86_emit_failure0("not a valid 32-bit base/index expression")) )
221 #define _rM(R)		( (_rC(R) == X86_RegMMX_Base)				? _rN(R) : x86_emit_failure0("MMX register required"))
222 #define _rX(R)		( (_rC(R) == X86_RegXMM_Base)				? _rN(R) : x86_emit_failure0("SSE register required"))
223 #endif
224 
225 #define _rSP()		(X86_TARGET_64BIT ? (int)X86_RSP : (int)X86_ESP)
226 #define _r1e8lP(R)	(int(R) >= X86_SPL && int(R) <= X86_DIL)
227 #define _rbpP(R)	(_rR(R) == _rR(X86_RBP))
228 #define _rspP(R)	(_rR(R) == _rR(X86_RSP))
229 #define _rbp13P(R)	(_rN(R) == _rN(X86_RBP))
230 #define _rsp12P(R)	(_rN(R) == _rN(X86_RSP))
231 
232 
233 /* ========================================================================= */
234 /* --- UTILITY ------------------------------------------------------------- */
235 /* ========================================================================= */
236 
237 typedef signed char	_sc;
238 typedef unsigned char	_uc;
239 typedef signed short	_ss;
240 typedef unsigned short	_us;
241 typedef signed int	_sl;
242 typedef unsigned int	_ul;
243 
244 #define _UC(X)		((_uc  )(uintptr_t)(X))
245 #define _US(X)		((_us  )(uintptr_t)(X))
246 #define _SL(X)		((_sl  )(uintptr_t)(X))
247 #define _UL(X)		((_ul  )(uintptr_t)(X))
248 
249 #define _PUC(X)		((_uc *)(X))
250 #define _PUS(X)		((_us *)(X))
251 #define _PSL(X)		((_sl *)(X))
252 #define _PUL(X)		((_ul *)(X))
253 
254 #undef _B
255 #undef _W
256 #undef _L
257 #undef _Q
258 
259 #define _B(B)		x86_emit_byte((B))
260 #define _W(W)		x86_emit_word((W))
261 #define _L(L)		x86_emit_long((L))
262 #define _Q(Q)		x86_emit_quad((Q))
263 
264 #define _MASK(N)	((unsigned)((1<<(N)))-1)
265 #define _siP(N,I)	(!((((unsigned)(I))^(((unsigned)(I))<<1))&~_MASK(N)))
266 #define _uiP(N,I)	(!(((unsigned)(I))&~_MASK(N)))
267 #define _suiP(N,I)	(_siP(N,I) | _uiP(N,I))
268 
269 #ifndef _ASM_SAFETY
270 #define _ck_s(W,I)	(_UL(I) & _MASK(W))
271 #define _ck_u(W,I)    	(_UL(I) & _MASK(W))
272 #define _ck_su(W,I)    	(_UL(I) & _MASK(W))
273 #define _ck_d(W,I)    	(_UL(I) & _MASK(W))
274 #else
275 #define _ck_s(W,I)	(_siP(W,I) ? (_UL(I) & _MASK(W)) : x86_emit_failure0(  "signed integer `"#I"' too large for "#W"-bit field"))
276 #define _ck_u(W,I)    	(_uiP(W,I) ? (_UL(I) & _MASK(W)) : x86_emit_failure0("unsigned integer `"#I"' too large for "#W"-bit field"))
277 #define _ck_su(W,I)    	(_suiP(W,I) ? (_UL(I) & _MASK(W)) : x86_emit_failure0(        "integer `"#I"' too large for "#W"-bit field"))
278 #define _ck_d(W,I)    	(_siP(W,I) ? (_UL(I) & _MASK(W)) : x86_emit_failure0(    "displacement `"#I"' too large for "#W"-bit field"))
279 #endif
280 
281 #define _s0P(I)		((I)==0)
282 #define _s8P(I)		_siP(8,I)
283 #define _s16P(I)	_siP(16,I)
284 #define _u8P(I)		_uiP(8,I)
285 #define _u16P(I)	_uiP(16,I)
286 
287 #define _su8(I)		_ck_su(8,I)
288 #define _su16(I)	_ck_su(16,I)
289 
290 #define _s1(I)          _ck_s( 1,I)
291 #define _s2(I)          _ck_s( 2,I)
292 #define _s3(I)          _ck_s( 3,I)
293 #define _s4(I)          _ck_s( 4,I)
294 #define _s5(I)          _ck_s( 5,I)
295 #define _s6(I)          _ck_s( 6,I)
296 #define _s7(I)          _ck_s( 7,I)
297 #define _s8(I)          _ck_s( 8,I)
298 #define _s9(I)          _ck_s( 9,I)
299 #define _s10(I)         _ck_s(10,I)
300 #define _s11(I)         _ck_s(11,I)
301 #define _s12(I)         _ck_s(12,I)
302 #define _s13(I)         _ck_s(13,I)
303 #define _s14(I)         _ck_s(14,I)
304 #define _s15(I)         _ck_s(15,I)
305 #define _s16(I)         _ck_s(16,I)
306 #define _s17(I)         _ck_s(17,I)
307 #define _s18(I)         _ck_s(18,I)
308 #define _s19(I)         _ck_s(19,I)
309 #define _s20(I)         _ck_s(20,I)
310 #define _s21(I)         _ck_s(21,I)
311 #define _s22(I)         _ck_s(22,I)
312 #define _s23(I)         _ck_s(23,I)
313 #define _s24(I)         _ck_s(24,I)
314 #define _s25(I)         _ck_s(25,I)
315 #define _s26(I)         _ck_s(26,I)
316 #define _s27(I)         _ck_s(27,I)
317 #define _s28(I)         _ck_s(28,I)
318 #define _s29(I)         _ck_s(29,I)
319 #define _s30(I)         _ck_s(30,I)
320 #define _s31(I)         _ck_s(31,I)
321 #define _u1(I)          _ck_u( 1,I)
322 #define _u2(I)          _ck_u( 2,I)
323 #define _u3(I)          _ck_u( 3,I)
324 #define _u4(I)          _ck_u( 4,I)
325 #define _u5(I)          _ck_u( 5,I)
326 #define _u6(I)          _ck_u( 6,I)
327 #define _u7(I)          _ck_u( 7,I)
328 #define _u8(I)          _ck_u( 8,I)
329 #define _u9(I)          _ck_u( 9,I)
330 #define _u10(I)         _ck_u(10,I)
331 #define _u11(I)         _ck_u(11,I)
332 #define _u12(I)         _ck_u(12,I)
333 #define _u13(I)         _ck_u(13,I)
334 #define _u14(I)         _ck_u(14,I)
335 #define _u15(I)         _ck_u(15,I)
336 #define _u16(I)         _ck_u(16,I)
337 #define _u17(I)         _ck_u(17,I)
338 #define _u18(I)         _ck_u(18,I)
339 #define _u19(I)         _ck_u(19,I)
340 #define _u20(I)         _ck_u(20,I)
341 #define _u21(I)         _ck_u(21,I)
342 #define _u22(I)         _ck_u(22,I)
343 #define _u23(I)         _ck_u(23,I)
344 #define _u24(I)         _ck_u(24,I)
345 #define _u25(I)         _ck_u(25,I)
346 #define _u26(I)         _ck_u(26,I)
347 #define _u27(I)         _ck_u(27,I)
348 #define _u28(I)         _ck_u(28,I)
349 #define _u29(I)         _ck_u(29,I)
350 #define _u30(I)         _ck_u(30,I)
351 #define _u31(I)         _ck_u(31,I)
352 
353 /* ========================================================================= */
354 /* --- ASSEMBLER ----------------------------------------------------------- */
355 /* ========================================================================= */
356 
357 #define _b00		0
358 #define _b01		1
359 #define _b10		2
360 #define _b11		3
361 
362 #define _b000		0
363 #define _b001		1
364 #define _b010		2
365 #define _b011		3
366 #define _b100		4
367 #define _b101		5
368 #define _b110		6
369 #define _b111		7
370 
371 #define _OFF4(D)	(_UL(D) - _UL(x86_get_target()))
372 #define _CKD8(D)	_ck_d(8, ((_uc) _OFF4(D)) )
373 
374 #define _D8(D)		(_B(0), ((*(_PUC(x86_get_target())-1))= _CKD8(D)))
375 #define _D32(D)		(_L(0), ((*(_PUL(x86_get_target())-1))= _OFF4(D)))
376 
377 #ifndef _ASM_SAFETY
378 # define _M(M)		(M)
379 # define _r(R)		(R)
380 # define _m(M)		(M)
381 # define _s(S)		(S)
382 # define _i(I)		(I)
383 # define _b(B)		(B)
384 #else
385 # define _M(M)		(((M)>3) ? x86_emit_failure0("internal error: mod = " #M) : (M))
386 # define _r(R)		(((R)>7) ? x86_emit_failure0("internal error: reg = " #R) : (R))
387 # define _m(M)		(((M)>7) ? x86_emit_failure0("internal error: r/m = " #M) : (M))
388 # define _s(S)		(((S)>3) ? x86_emit_failure0("internal error: memory scale = " #S) : (S))
389 # define _i(I)		(((I)>7) ? x86_emit_failure0("internal error: memory index = " #I) : (I))
390 # define _b(B)		(((B)>7) ? x86_emit_failure0("internal error: memory base = "  #B) : (B))
391 #endif
392 
393 #define _Mrm(Md,R,M)	_B((_M(Md)<<6)|(_r(R)<<3)|_m(M))
394 #define _SIB(Sc,I, B)	_B((_s(Sc)<<6)|(_i(I)<<3)|_b(B))
395 
396 #define _SCL(S)		((((S)==1) ? _b00 : \
397 			 (((S)==2) ? _b01 : \
398 			 (((S)==4) ? _b10 : \
399 			 (((S)==8) ? _b11 : x86_emit_failure0("illegal scale: " #S))))))
400 
401 
402 /* --- Memory subformats - urgh! ------------------------------------------- */
403 
404 /* _r_D() is RIP addressing mode if X86_TARGET_64BIT, use _r_DSIB() instead */
405 #define _r_D(	R, D	  )	(_Mrm(_b00,_rN(R),_b101 )		             ,_L((long)(D)))
406 #define _r_DSIB(R, D      )	(_Mrm(_b00,_rN(R),_b100 ),_SIB(_SCL(1),_b100 ,_b101 ),_L((long)(D)))
407 #define _r_0B(	R,   B    )	(_Mrm(_b00,_rN(R),_rA(B))			                   )
408 #define _r_0BIS(R,   B,I,S)	(_Mrm(_b00,_rN(R),_b100 ),_SIB(_SCL(S),_rA(I),_rA(B))              )
409 #define _r_1B(	R, D,B    )	(_Mrm(_b01,_rN(R),_rA(B))		             ,_B((long)(D)))
410 #define _r_1BIS(R, D,B,I,S)	(_Mrm(_b01,_rN(R),_b100 ),_SIB(_SCL(S),_rA(I),_rA(B)),_B((long)(D)))
411 #define _r_4B(	R, D,B    )	(_Mrm(_b10,_rN(R),_rA(B))		             ,_L((long)(D)))
412 #define _r_4IS( R, D,I,S)	(_Mrm(_b00,_rN(R),_b100 ),_SIB(_SCL(S),_rA(I),_b101 ),_L((long)(D)))
413 #define _r_4BIS(R, D,B,I,S)	(_Mrm(_b10,_rN(R),_b100 ),_SIB(_SCL(S),_rA(I),_rA(B)),_L((long)(D)))
414 
415 #define _r_DB(  R, D,B    )	((_s0P(D) && (!_rbp13P(B)) ? _r_0B  (R,  B    ) : (_s8P(D) ? _r_1B(  R,D,B    ) : _r_4B(  R,D,B    ))))
416 #define _r_DBIS(R, D,B,I,S)	((_s0P(D) && (!_rbp13P(B)) ? _r_0BIS(R,  B,I,S) : (_s8P(D) ? _r_1BIS(R,D,B,I,S) : _r_4BIS(R,D,B,I,S))))
417 
418 /* Use RIP-addressing in 64-bit mode, if possible */
419 #define _x86_RIP_addressing_possible(D,O)	(X86_RIP_RELATIVE_ADDR && \
420 						((uintptr)x86_get_target() + 4 + (O) - (D) <= 0xffffffff))
421 
422 #define _r_X(   R, D,B,I,S,O)	(_r0P(I) ? (_r0P(B)    ? (!X86_TARGET_64BIT ? _r_D(R,D) : \
423 					                 (_x86_RIP_addressing_possible(D, O) ? \
424 				                          _r_D(R, (D) - ((uintptr)x86_get_target() + 4 + (O))) : \
425 				                          _r_DSIB(R,D))) : \
426 				           (_rIP(B)    ? _r_D   (R,D                )   : \
427 				           (_rsp12P(B) ? _r_DBIS(R,D,_rSP(),_rSP(),1)   : \
428 						         _r_DB  (R,D,     B       ))))  : \
429 				 (_r0P(B)	       ? _r_4IS (R,D,	         I,S)   : \
430 				 (!_rspP(I)            ? _r_DBIS(R,D,     B,     I,S)   : \
431 						         x86_emit_failure("illegal index register: %esp"))))
432 
433 
434 /* --- Instruction formats ------------------------------------------------- */
435 
436 #define _m32only(X)		(! X86_TARGET_64BIT ? X : x86_emit_failure("invalid instruction in 64-bit mode"))
437 #define _m64only(X)		(  X86_TARGET_64BIT ? X : x86_emit_failure("invalid instruction in 32-bit mode"))
438 #define _m64(X)			(  X86_TARGET_64BIT ? X : ((void)0)						)
439 
440 /*	 _format						     Opcd	  ModR/M dN(rB,rI,Sc)	  imm... */
441 
442 #define	 _d16()					   (		  _B(0x66	)				  )
443 #define	  _O(	     OP				)  (		  _B(  OP	)				  )
444 #define	  _Or(	     OP,R			)  (		  _B( (OP)|_r(R))				  )
445 #define	 _OO(	     OP				)  ( _B((OP)>>8), _B( (uae_u8)(OP)	)			  )
446 #define	 _OOr(	     OP,R			)  ( _B((OP)>>8), _B( (OP)|_r(R))				  )
447 #define	  _Os(	     OP,B			)  (	_s8P(B) ? _B(((OP)|_b10)) : _B(OP)			  )
448 #define	    _sW(			     W	)  (				       _s8P(W) ? _B(W):_W(W)	  )
449 #define	    _sL(			     L	)  (				       _s8P(L) ? _B(L):_L(L)	  )
450 #define	    _sWO(			     W	)  (				       _s8P(W) ?    1 :   2	  )
451 #define	    _sLO(			     L	)  (				       _s8P(L) ?    1 :   4	  )
452 #define	  _O_B(	     OP			    ,B	)  (	    _O	    (  OP  )			      ,_B(B)	  )
453 #define	  _O_W(	     OP			    ,W	)  (	    _O	    (  OP  )			      ,_W(W)	  )
454 #define	  _O_L(	     OP			    ,L	)  (	    _O	    (  OP  )			      ,_L(L)	  )
455 #define	  _O_D8(     OP			    ,D	)  (	    _O	    (  OP  )			     ,_D8(D)	  )
456 #define	  _O_D32(     OP		    ,D	)  (	    _O	    (  OP  )			     ,_D32(D)	  )
457 #define	 _OO_D32(     OP		    ,D	)  (	   _OO	    (  OP  )			     ,_D32(D)	  )
458 #define	  _Os_sW(    OP			    ,W	)  (	    _Os	    (  OP,W)			     ,_sW(W)	  )
459 #define	  _Os_sL(    OP			    ,L	)  (	    _Os	    (  OP,L)			     ,_sL(L)	  )
460 #define	  _O_W_B(    OP			    ,W,B)  (	    _O	    (  OP  )			      ,_W(W),_B(B))
461 #define	  _Or_B(     OP,R		    ,B	)  (	    _Or	    (  OP,R)			      ,_B(B)	  )
462 #define	  _Or_W(     OP,R		    ,W	)  (	    _Or	    (  OP,R)			      ,_W(W)	  )
463 #define	  _Or_L(     OP,R		    ,L	)  (	    _Or	    (  OP,R)			      ,_L(L)	  )
464 #define	  _Or_Q(     OP,R		    ,Q	)  (	    _Or	    (  OP,R)			      ,_Q(Q)	  )
465 #define	  _O_Mrm(    OP	 ,MO,R,M		)  (	    _O	    (  OP  ),_Mrm(MO,R,M	    )		  )
466 #define	 _OO_Mrm(    OP	 ,MO,R,M		)  (	   _OO	    (  OP  ),_Mrm(MO,R,M	    )		  )
467 #define	  _O_Mrm_B(  OP	 ,MO,R,M	    ,B	)  (	    _O	    (  OP  ),_Mrm(MO,R,M	    ) ,_B(B)	  )
468 #define	  _O_Mrm_W(  OP	 ,MO,R,M	    ,W	)  (	    _O	    (  OP  ),_Mrm(MO,R,M	    ) ,_W(W)	  )
469 #define	  _O_Mrm_L(  OP	 ,MO,R,M	    ,L	)  (	    _O	    (  OP  ),_Mrm(MO,R,M	    ) ,_L(L)	  )
470 #define	 _OO_Mrm_B(  OP	 ,MO,R,M	    ,B	)  (	   _OO	    (  OP  ),_Mrm(MO,R,M	    ) ,_B(B)	  )
471 #define	  _Os_Mrm_sW(OP	 ,MO,R,M	    ,W	)  (	    _Os	    (  OP,W),_Mrm(MO,R,M	    ),_sW(W)	  )
472 #define	  _Os_Mrm_sL(OP	 ,MO,R,M	    ,L	)  (	    _Os	    (  OP,L),_Mrm(MO,R,M	    ),_sL(L)	  )
473 #define	  _O_r_X(    OP	    ,R	,MD,MB,MI,MS	)  (	    _O	    (  OP  ),_r_X(   R	,MD,MB,MI,MS,0)		  )
474 #define	 _OO_r_X(    OP	    ,R	,MD,MB,MI,MS	)  (	   _OO	    (  OP  ),_r_X(   R	,MD,MB,MI,MS,0)		  )
475 #define	  _O_r_X_B(  OP	    ,R	,MD,MB,MI,MS,B	)  (	    _O	    (  OP  ),_r_X(   R	,MD,MB,MI,MS,1) ,_B(B)	  )
476 #define	  _O_r_X_W(  OP	    ,R	,MD,MB,MI,MS,W	)  (	    _O	    (  OP  ),_r_X(   R	,MD,MB,MI,MS,2) ,_W(W)	  )
477 #define	  _O_r_X_L(  OP	    ,R	,MD,MB,MI,MS,L	)  (	    _O	    (  OP  ),_r_X(   R	,MD,MB,MI,MS,4) ,_L(L)	  )
478 #define	 _OO_r_X_B(  OP	    ,R	,MD,MB,MI,MS,B	)  (	   _OO	    (  OP  ),_r_X(   R	,MD,MB,MI,MS,1) ,_B(B)	  )
479 #define	  _Os_r_X_sW(OP	    ,R	,MD,MB,MI,MS,W	)  (	    _Os	    (  OP,W),_r_X(   R	,MD,MB,MI,MS,_sWO(W)),_sW(W))
480 #define	  _Os_r_X_sL(OP	    ,R	,MD,MB,MI,MS,L	)  (	    _Os	    (  OP,L),_r_X(   R	,MD,MB,MI,MS,_sLO(L)),_sL(L))
481 #define	  _O_X_B(    OP		,MD,MB,MI,MS,B	)  (	    _O_r_X_B(  OP	    ,0	,MD,MB,MI,MS	 ,B)	  )
482 #define	  _O_X_W(    OP		,MD,MB,MI,MS,W	)  (	    _O_r_X_W(  OP	    ,0	,MD,MB,MI,MS	 ,W)	  )
483 #define	  _O_X_L(    OP		,MD,MB,MI,MS,L	)  (	    _O_r_X_L(  OP	    ,0	,MD,MB,MI,MS	 ,L)	  )
484 
485 
486 /* --- REX prefixes -------------------------------------------------------- */
487 
488 #undef _VOID
489 
490 #define _VOID()			((void)0)
491 #define _BIT(X)			(!!(X))
492 #define _d64(W,R,X,B)		(_B(0x40|(W)<<3|(R)<<2|(X)<<1|(B)))
493 
494 #define __REXwrxb(L,W,R,X,B)	((W|R|X|B) || (L) ? _d64(W,R,X,B) : _VOID())
495 #define __REXwrx_(L,W,R,X,MR)	(__REXwrxb(L,W,R,X,_BIT(_rIP(MR)?0:_rXP(MR))))
496 #define __REXw_x_(L,W,R,X,MR)	(__REXwrx_(L,W,_BIT(_rXP(R)),X,MR))
497 #define __REX_reg(RR)		(__REXwrxb(0,0,0,00,_BIT(_rXP(RR))))
498 #define __REX_mem(MB,MI)	(__REXwrxb(0,0,0,_BIT(_rXP(MI)),_BIT(_rXP(MB))))
499 
500 // FIXME: can't mix new (SPL,BPL,SIL,DIL) with (AH,BH,CH,DH)
501 #define _REXBrr(RR,MR)		_m64(__REXw_x_(_r1e8lP(RR)||_r1e8lP(MR),0,RR,0,MR))
502 #define _REXBmr(MB,MI,RD)	_m64(__REXw_x_(_r1e8lP(RD)||_r1e8lP(MB),0,RD,_BIT(_rXP(MI)),MB))
503 #define _REXBrm(RS,MB,MI)	_REXBmr(MB,MI,RS)
504 
505 #define _REXBLrr(RR,MR)		_m64(__REXw_x_(_r1e8lP(MR),0,RR,0,MR))
506 #define _REXLrr(RR,MR)		_m64(__REXw_x_(0,0,RR,0,MR))
507 #define _REXLmr(MB,MI,RD)	_m64(__REXw_x_(0,0,RD,_BIT(_rXP(MI)),MB))
508 #define _REXLrm(RS,MB,MI)	_REXLmr(MB,MI,RS)
509 #define _REXLr(RR)		_m64(__REX_reg(RR))
510 #define _REXLm(MB,MI)		_m64(__REX_mem(MB,MI))
511 
512 #define _REXQrr(RR,MR)		_m64only(__REXw_x_(0,1,RR,0,MR))
513 #define _REXQmr(MB,MI,RD)	_m64only(__REXw_x_(0,1,RD,_BIT(_rXP(MI)),MB))
514 #define _REXQrm(RS,MB,MI)	_REXQmr(MB,MI,RS)
515 #define _REXQr(RR)		_m64only(__REX_reg(RR))
516 #define _REXQm(MB,MI)		_m64only(__REX_mem(MB,MI))
517 
518 
519 /* ========================================================================= */
520 /* --- Fully-qualified intrinsic instructions ------------------------------ */
521 /* ========================================================================= */
522 
523 /*	OPCODE	+ i	= immediate operand
524  *		+ r	= register operand
525  *		+ m	= memory operand (disp,base,index,scale)
526  *		+ sr/sm	= a star preceding a register or memory
527  *		+ 0	= top of stack register (for FPU instructions)
528  *
529  *	NOTE in x86-64 mode: a memory operand with only a valid
530  *	displacement value will lead to the expect absolute mode. If
531  *	RIP addressing is necessary, X86_RIP shall be used as the base
532  *	register argument.
533  */
534 
535 /* --- ALU instructions ---------------------------------------------------- */
536 
537 enum {
538   X86_ADD = 0,
539   X86_OR  = 1,
540   X86_ADC = 2,
541   X86_SBB = 3,
542   X86_AND = 4,
543   X86_SUB = 5,
544   X86_XOR = 6,
545   X86_CMP = 7,
546 };
547 
548 /*									_format		Opcd		,Mod ,r	    ,m		,mem=dsp+sib	,imm... */
549 
550 #define _ALUBrr(OP,RS, RD)		(_REXBrr(RS, RD),		_O_Mrm		(((OP) << 3)	,_b11,_r1(RS),_r1(RD)				))
551 #define _ALUBmr(OP, MD, MB, MI, MS, RD)	(_REXBmr(MB, MI, RD),		_O_r_X		(((OP) << 3) + 2,_r1(RD)		,MD,MB,MI,MS		))
552 #define _ALUBrm(OP, RS, MD, MB, MI, MS)	(_REXBrm(RS, MB, MI),		_O_r_X		(((OP) << 3)	,    ,_r1(RS)		,MD,MB,MI,MS		))
553 #define _ALUBir(OP, IM, RD)		(X86_OPTIMIZE_ALU && ((RD) == X86_AL) ? \
554 					(_REXBrr(0, RD),		_O_B		(((OP) << 3) + 4					,_su8(IM))) : \
555 					(_REXBrr(0, RD),		_O_Mrm_B	(0x80		,_b11,OP     ,_r1(RD)			,_su8(IM))) )
556 #define _ALUBim(OP, IM, MD, MB, MI, MS)	(_REXBrm(0, MB, MI),		_O_r_X_B	(0x80		     ,OP		,MD,MB,MI,MS	,_su8(IM)))
557 
558 #define _ALUWrr(OP, RS, RD)		(_d16(), _REXLrr(RS, RD),	_O_Mrm		(((OP) << 3) + 1,_b11,_r2(RS),_r2(RD)				))
559 #define _ALUWmr(OP, MD, MB, MI, MS, RD)	(_d16(), _REXLmr(MB, MI, RD),	_O_r_X		(((OP) << 3) + 3     ,_r2(RD)		,MD,MB,MI,MS		))
560 #define _ALUWrm(OP, RS, MD, MB, MI, MS)	(_d16(), _REXLrm(RS, MB, MI),	_O_r_X		(((OP) << 3) + 1     ,_r2(RS)		,MD,MB,MI,MS		))
561 #define _ALUWir(OP, IM, RD)		(X86_OPTIMIZE_ALU && ((RD) == X86_AX) ? \
562 					(_d16(), _REXLrr(0, RD),	_O_W		(((OP) << 3) + 5					,_su16(IM))) : \
563 					(_d16(), _REXLrr(0, RD),	_Os_Mrm_sW	(0x81		,_b11,OP     ,_r2(RD)			,_su16(IM))) )
564 #define _ALUWim(OP, IM, MD, MB, MI, MS)	(_d16(), _REXLrm(0, MB, MI),	_Os_r_X_sW	(0x81		     ,OP		,MD,MB,MI,MS	,_su16(IM)))
565 
566 #define _ALULrr(OP, RS, RD)		(_REXLrr(RS, RD),		_O_Mrm		(((OP) << 3) + 1,_b11,_r4(RS),_r4(RD)				))
567 #define _ALULmr(OP, MD, MB, MI, MS, RD)	(_REXLmr(MB, MI, RD),		_O_r_X		(((OP) << 3) + 3     ,_r4(RD)		,MD,MB,MI,MS		))
568 #define _ALULrm(OP, RS, MD, MB, MI, MS)	(_REXLrm(RS, MB, MI),		_O_r_X		(((OP) << 3) + 1     ,_r4(RS)		,MD,MB,MI,MS		))
569 #define _ALULir(OP, IM, RD)		(X86_OPTIMIZE_ALU && ((RD) == X86_EAX) ? \
570 					(_REXLrr(0, RD),		_O_L		(((OP) << 3) + 5					,IM	)) : \
571 					(_REXLrr(0, RD),		_Os_Mrm_sL	(0x81		,_b11,OP     ,_r4(RD)			,IM	)) )
572 #define _ALULim(OP, IM, MD, MB, MI, MS)	(_REXLrm(0, MB, MI),		_Os_r_X_sL	(0x81		     ,OP		,MD,MB,MI,MS	,IM	))
573 
574 #define _ALUQrr(OP, RS, RD)		(_REXQrr(RS, RD),		_O_Mrm		(((OP) << 3) + 1,_b11,_r8(RS),_r8(RD)				))
575 #define _ALUQmr(OP, MD, MB, MI, MS, RD)	(_REXQmr(MB, MI, RD),		_O_r_X		(((OP) << 3) + 3     ,_r8(RD)		,MD,MB,MI,MS		))
576 #define _ALUQrm(OP, RS, MD, MB, MI, MS)	(_REXQrm(RS, MB, MI),		_O_r_X		(((OP) << 3) + 1     ,_r8(RS)		,MD,MB,MI,MS		))
577 #define _ALUQir(OP, IM, RD)		(X86_OPTIMIZE_ALU && ((RD) == X86_RAX) ? \
578 					(_REXQrr(0, RD),		_O_L		(((OP) << 3) + 5					,IM	)) : \
579 					(_REXQrr(0, RD),		_Os_Mrm_sL	(0x81		,_b11,OP     ,_r8(RD)			,IM	)) )
580 #define _ALUQim(OP, IM, MD, MB, MI, MS)	(_REXQrm(0, MB, MI),		_Os_r_X_sL	(0x81		     ,OP		,MD,MB,MI,MS	,IM	))
581 
582 #define ADCBrr(RS, RD)			_ALUBrr(X86_ADC, RS, RD)
583 #define ADCBmr(MD, MB, MI, MS, RD)	_ALUBmr(X86_ADC, MD, MB, MI, MS, RD)
584 #define ADCBrm(RS, MD, MB, MI, MS)	_ALUBrm(X86_ADC, RS, MD, MB, MI, MS)
585 #define ADCBir(IM, RD)			_ALUBir(X86_ADC, IM, RD)
586 #define ADCBim(IM, MD, MB, MI, MS)	_ALUBim(X86_ADC, IM, MD, MB, MI, MS)
587 
588 #define ADCWrr(RS, RD)			_ALUWrr(X86_ADC, RS, RD)
589 #define ADCWmr(MD, MB, MI, MS, RD)	_ALUWmr(X86_ADC, MD, MB, MI, MS, RD)
590 #define ADCWrm(RS, MD, MB, MI, MS)	_ALUWrm(X86_ADC, RS, MD, MB, MI, MS)
591 #define ADCWir(IM, RD)			_ALUWir(X86_ADC, IM, RD)
592 #define ADCWim(IM, MD, MB, MI, MS)	_ALUWim(X86_ADC, IM, MD, MB, MI, MS)
593 
594 #define ADCLrr(RS, RD)			_ALULrr(X86_ADC, RS, RD)
595 #define ADCLmr(MD, MB, MI, MS, RD)	_ALULmr(X86_ADC, MD, MB, MI, MS, RD)
596 #define ADCLrm(RS, MD, MB, MI, MS)	_ALULrm(X86_ADC, RS, MD, MB, MI, MS)
597 #define ADCLir(IM, RD)			_ALULir(X86_ADC, IM, RD)
598 #define ADCLim(IM, MD, MB, MI, MS)	_ALULim(X86_ADC, IM, MD, MB, MI, MS)
599 
600 #define ADCQrr(RS, RD)			_ALUQrr(X86_ADC, RS, RD)
601 #define ADCQmr(MD, MB, MI, MS, RD)	_ALUQmr(X86_ADC, MD, MB, MI, MS, RD)
602 #define ADCQrm(RS, MD, MB, MI, MS)	_ALUQrm(X86_ADC, RS, MD, MB, MI, MS)
603 #define ADCQir(IM, RD)			_ALUQir(X86_ADC, IM, RD)
604 #define ADCQim(IM, MD, MB, MI, MS)	_ALUQim(X86_ADC, IM, MD, MB, MI, MS)
605 
606 #define ADDBrr(RS, RD)			_ALUBrr(X86_ADD, RS, RD)
607 #define ADDBmr(MD, MB, MI, MS, RD)	_ALUBmr(X86_ADD, MD, MB, MI, MS, RD)
608 #define ADDBrm(RS, MD, MB, MI, MS)	_ALUBrm(X86_ADD, RS, MD, MB, MI, MS)
609 #define ADDBir(IM, RD)			_ALUBir(X86_ADD, IM, RD)
610 #define ADDBim(IM, MD, MB, MI, MS)	_ALUBim(X86_ADD, IM, MD, MB, MI, MS)
611 
612 #define ADDWrr(RS, RD)			_ALUWrr(X86_ADD, RS, RD)
613 #define ADDWmr(MD, MB, MI, MS, RD)	_ALUWmr(X86_ADD, MD, MB, MI, MS, RD)
614 #define ADDWrm(RS, MD, MB, MI, MS)	_ALUWrm(X86_ADD, RS, MD, MB, MI, MS)
615 #define ADDWir(IM, RD)			_ALUWir(X86_ADD, IM, RD)
616 #define ADDWim(IM, MD, MB, MI, MS)	_ALUWim(X86_ADD, IM, MD, MB, MI, MS)
617 
618 #define ADDLrr(RS, RD)			_ALULrr(X86_ADD, RS, RD)
619 #define ADDLmr(MD, MB, MI, MS, RD)	_ALULmr(X86_ADD, MD, MB, MI, MS, RD)
620 #define ADDLrm(RS, MD, MB, MI, MS)	_ALULrm(X86_ADD, RS, MD, MB, MI, MS)
621 #define ADDLir(IM, RD)			_ALULir(X86_ADD, IM, RD)
622 #define ADDLim(IM, MD, MB, MI, MS)	_ALULim(X86_ADD, IM, MD, MB, MI, MS)
623 
624 #define ADDQrr(RS, RD)			_ALUQrr(X86_ADD, RS, RD)
625 #define ADDQmr(MD, MB, MI, MS, RD)	_ALUQmr(X86_ADD, MD, MB, MI, MS, RD)
626 #define ADDQrm(RS, MD, MB, MI, MS)	_ALUQrm(X86_ADD, RS, MD, MB, MI, MS)
627 #define ADDQir(IM, RD)			_ALUQir(X86_ADD, IM, RD)
628 #define ADDQim(IM, MD, MB, MI, MS)	_ALUQim(X86_ADD, IM, MD, MB, MI, MS)
629 
630 #define ANDBrr(RS, RD)			_ALUBrr(X86_AND, RS, RD)
631 #define ANDBmr(MD, MB, MI, MS, RD)	_ALUBmr(X86_AND, MD, MB, MI, MS, RD)
632 #define ANDBrm(RS, MD, MB, MI, MS)	_ALUBrm(X86_AND, RS, MD, MB, MI, MS)
633 #define ANDBir(IM, RD)			_ALUBir(X86_AND, IM, RD)
634 #define ANDBim(IM, MD, MB, MI, MS)	_ALUBim(X86_AND, IM, MD, MB, MI, MS)
635 
636 #define ANDWrr(RS, RD)			_ALUWrr(X86_AND, RS, RD)
637 #define ANDWmr(MD, MB, MI, MS, RD)	_ALUWmr(X86_AND, MD, MB, MI, MS, RD)
638 #define ANDWrm(RS, MD, MB, MI, MS)	_ALUWrm(X86_AND, RS, MD, MB, MI, MS)
639 #define ANDWir(IM, RD)			_ALUWir(X86_AND, IM, RD)
640 #define ANDWim(IM, MD, MB, MI, MS)	_ALUWim(X86_AND, IM, MD, MB, MI, MS)
641 
642 #define ANDLrr(RS, RD)			_ALULrr(X86_AND, RS, RD)
643 #define ANDLmr(MD, MB, MI, MS, RD)	_ALULmr(X86_AND, MD, MB, MI, MS, RD)
644 #define ANDLrm(RS, MD, MB, MI, MS)	_ALULrm(X86_AND, RS, MD, MB, MI, MS)
645 #define ANDLir(IM, RD)			_ALULir(X86_AND, IM, RD)
646 #define ANDLim(IM, MD, MB, MI, MS)	_ALULim(X86_AND, IM, MD, MB, MI, MS)
647 
648 #define ANDQrr(RS, RD)			_ALUQrr(X86_AND, RS, RD)
649 #define ANDQmr(MD, MB, MI, MS, RD)	_ALUQmr(X86_AND, MD, MB, MI, MS, RD)
650 #define ANDQrm(RS, MD, MB, MI, MS)	_ALUQrm(X86_AND, RS, MD, MB, MI, MS)
651 #define ANDQir(IM, RD)			_ALUQir(X86_AND, IM, RD)
652 #define ANDQim(IM, MD, MB, MI, MS)	_ALUQim(X86_AND, IM, MD, MB, MI, MS)
653 
654 #define CMPBrr(RS, RD)			_ALUBrr(X86_CMP, RS, RD)
655 #define CMPBmr(MD, MB, MI, MS, RD)	_ALUBmr(X86_CMP, MD, MB, MI, MS, RD)
656 #define CMPBrm(RS, MD, MB, MI, MS)	_ALUBrm(X86_CMP, RS, MD, MB, MI, MS)
657 #define CMPBir(IM, RD)			_ALUBir(X86_CMP, IM, RD)
658 #define CMPBim(IM, MD, MB, MI, MS)	_ALUBim(X86_CMP, IM, MD, MB, MI, MS)
659 
660 #define CMPWrr(RS, RD)			_ALUWrr(X86_CMP, RS, RD)
661 #define CMPWmr(MD, MB, MI, MS, RD)	_ALUWmr(X86_CMP, MD, MB, MI, MS, RD)
662 #define CMPWrm(RS, MD, MB, MI, MS)	_ALUWrm(X86_CMP, RS, MD, MB, MI, MS)
663 #define CMPWir(IM, RD)			_ALUWir(X86_CMP, IM, RD)
664 #define CMPWim(IM, MD, MB, MI, MS)	_ALUWim(X86_CMP, IM, MD, MB, MI, MS)
665 
666 #define CMPLrr(RS, RD)			_ALULrr(X86_CMP, RS, RD)
667 #define CMPLmr(MD, MB, MI, MS, RD)	_ALULmr(X86_CMP, MD, MB, MI, MS, RD)
668 #define CMPLrm(RS, MD, MB, MI, MS)	_ALULrm(X86_CMP, RS, MD, MB, MI, MS)
669 #define CMPLir(IM, RD)			_ALULir(X86_CMP, IM, RD)
670 #define CMPLim(IM, MD, MB, MI, MS)	_ALULim(X86_CMP, IM, MD, MB, MI, MS)
671 
672 #define CMPQrr(RS, RD)			_ALUQrr(X86_CMP, RS, RD)
673 #define CMPQmr(MD, MB, MI, MS, RD)	_ALUQmr(X86_CMP, MD, MB, MI, MS, RD)
674 #define CMPQrm(RS, MD, MB, MI, MS)	_ALUQrm(X86_CMP, RS, MD, MB, MI, MS)
675 #define CMPQir(IM, RD)			_ALUQir(X86_CMP, IM, RD)
676 #define CMPQim(IM, MD, MB, MI, MS)	_ALUQim(X86_CMP, IM, MD, MB, MI, MS)
677 
678 #define ORBrr(RS, RD)			_ALUBrr(X86_OR, RS, RD)
679 #define ORBmr(MD, MB, MI, MS, RD)	_ALUBmr(X86_OR, MD, MB, MI, MS, RD)
680 #define ORBrm(RS, MD, MB, MI, MS)	_ALUBrm(X86_OR, RS, MD, MB, MI, MS)
681 #define ORBir(IM, RD)			_ALUBir(X86_OR, IM, RD)
682 #define ORBim(IM, MD, MB, MI, MS)	_ALUBim(X86_OR, IM, MD, MB, MI, MS)
683 
684 #define ORWrr(RS, RD)			_ALUWrr(X86_OR, RS, RD)
685 #define ORWmr(MD, MB, MI, MS, RD)	_ALUWmr(X86_OR, MD, MB, MI, MS, RD)
686 #define ORWrm(RS, MD, MB, MI, MS)	_ALUWrm(X86_OR, RS, MD, MB, MI, MS)
687 #define ORWir(IM, RD)			_ALUWir(X86_OR, IM, RD)
688 #define ORWim(IM, MD, MB, MI, MS)	_ALUWim(X86_OR, IM, MD, MB, MI, MS)
689 
690 #define ORLrr(RS, RD)			_ALULrr(X86_OR, RS, RD)
691 #define ORLmr(MD, MB, MI, MS, RD)	_ALULmr(X86_OR, MD, MB, MI, MS, RD)
692 #define ORLrm(RS, MD, MB, MI, MS)	_ALULrm(X86_OR, RS, MD, MB, MI, MS)
693 #define ORLir(IM, RD)			_ALULir(X86_OR, IM, RD)
694 #define ORLim(IM, MD, MB, MI, MS)	_ALULim(X86_OR, IM, MD, MB, MI, MS)
695 
696 #define ORQrr(RS, RD)			_ALUQrr(X86_OR, RS, RD)
697 #define ORQmr(MD, MB, MI, MS, RD)	_ALUQmr(X86_OR, MD, MB, MI, MS, RD)
698 #define ORQrm(RS, MD, MB, MI, MS)	_ALUQrm(X86_OR, RS, MD, MB, MI, MS)
699 #define ORQir(IM, RD)			_ALUQir(X86_OR, IM, RD)
700 #define ORQim(IM, MD, MB, MI, MS)	_ALUQim(X86_OR, IM, MD, MB, MI, MS)
701 
702 #define SBBBrr(RS, RD)			_ALUBrr(X86_SBB, RS, RD)
703 #define SBBBmr(MD, MB, MI, MS, RD)	_ALUBmr(X86_SBB, MD, MB, MI, MS, RD)
704 #define SBBBrm(RS, MD, MB, MI, MS)	_ALUBrm(X86_SBB, RS, MD, MB, MI, MS)
705 #define SBBBir(IM, RD)			_ALUBir(X86_SBB, IM, RD)
706 #define SBBBim(IM, MD, MB, MI, MS)	_ALUBim(X86_SBB, IM, MD, MB, MI, MS)
707 
708 #define SBBWrr(RS, RD)			_ALUWrr(X86_SBB, RS, RD)
709 #define SBBWmr(MD, MB, MI, MS, RD)	_ALUWmr(X86_SBB, MD, MB, MI, MS, RD)
710 #define SBBWrm(RS, MD, MB, MI, MS)	_ALUWrm(X86_SBB, RS, MD, MB, MI, MS)
711 #define SBBWir(IM, RD)			_ALUWir(X86_SBB, IM, RD)
712 #define SBBWim(IM, MD, MB, MI, MS)	_ALUWim(X86_SBB, IM, MD, MB, MI, MS)
713 
714 #define SBBLrr(RS, RD)			_ALULrr(X86_SBB, RS, RD)
715 #define SBBLmr(MD, MB, MI, MS, RD)	_ALULmr(X86_SBB, MD, MB, MI, MS, RD)
716 #define SBBLrm(RS, MD, MB, MI, MS)	_ALULrm(X86_SBB, RS, MD, MB, MI, MS)
717 #define SBBLir(IM, RD)			_ALULir(X86_SBB, IM, RD)
718 #define SBBLim(IM, MD, MB, MI, MS)	_ALULim(X86_SBB, IM, MD, MB, MI, MS)
719 
720 #define SBBQrr(RS, RD)			_ALUQrr(X86_SBB, RS, RD)
721 #define SBBQmr(MD, MB, MI, MS, RD)	_ALUQmr(X86_SBB, MD, MB, MI, MS, RD)
722 #define SBBQrm(RS, MD, MB, MI, MS)	_ALUQrm(X86_SBB, RS, MD, MB, MI, MS)
723 #define SBBQir(IM, RD)			_ALUQir(X86_SBB, IM, RD)
724 #define SBBQim(IM, MD, MB, MI, MS)	_ALUQim(X86_SBB, IM, MD, MB, MI, MS)
725 
726 #define SUBBrr(RS, RD)			_ALUBrr(X86_SUB, RS, RD)
727 #define SUBBmr(MD, MB, MI, MS, RD)	_ALUBmr(X86_SUB, MD, MB, MI, MS, RD)
728 #define SUBBrm(RS, MD, MB, MI, MS)	_ALUBrm(X86_SUB, RS, MD, MB, MI, MS)
729 #define SUBBir(IM, RD)			_ALUBir(X86_SUB, IM, RD)
730 #define SUBBim(IM, MD, MB, MI, MS)	_ALUBim(X86_SUB, IM, MD, MB, MI, MS)
731 
732 #define SUBWrr(RS, RD)			_ALUWrr(X86_SUB, RS, RD)
733 #define SUBWmr(MD, MB, MI, MS, RD)	_ALUWmr(X86_SUB, MD, MB, MI, MS, RD)
734 #define SUBWrm(RS, MD, MB, MI, MS)	_ALUWrm(X86_SUB, RS, MD, MB, MI, MS)
735 #define SUBWir(IM, RD)			_ALUWir(X86_SUB, IM, RD)
736 #define SUBWim(IM, MD, MB, MI, MS)	_ALUWim(X86_SUB, IM, MD, MB, MI, MS)
737 
738 #define SUBLrr(RS, RD)			_ALULrr(X86_SUB, RS, RD)
739 #define SUBLmr(MD, MB, MI, MS, RD)	_ALULmr(X86_SUB, MD, MB, MI, MS, RD)
740 #define SUBLrm(RS, MD, MB, MI, MS)	_ALULrm(X86_SUB, RS, MD, MB, MI, MS)
741 #define SUBLir(IM, RD)			_ALULir(X86_SUB, IM, RD)
742 #define SUBLim(IM, MD, MB, MI, MS)	_ALULim(X86_SUB, IM, MD, MB, MI, MS)
743 
744 #define SUBQrr(RS, RD)			_ALUQrr(X86_SUB, RS, RD)
745 #define SUBQmr(MD, MB, MI, MS, RD)	_ALUQmr(X86_SUB, MD, MB, MI, MS, RD)
746 #define SUBQrm(RS, MD, MB, MI, MS)	_ALUQrm(X86_SUB, RS, MD, MB, MI, MS)
747 #define SUBQir(IM, RD)			_ALUQir(X86_SUB, IM, RD)
748 #define SUBQim(IM, MD, MB, MI, MS)	_ALUQim(X86_SUB, IM, MD, MB, MI, MS)
749 
750 #define XORBrr(RS, RD)			_ALUBrr(X86_XOR, RS, RD)
751 #define XORBmr(MD, MB, MI, MS, RD)	_ALUBmr(X86_XOR, MD, MB, MI, MS, RD)
752 #define XORBrm(RS, MD, MB, MI, MS)	_ALUBrm(X86_XOR, RS, MD, MB, MI, MS)
753 #define XORBir(IM, RD)			_ALUBir(X86_XOR, IM, RD)
754 #define XORBim(IM, MD, MB, MI, MS)	_ALUBim(X86_XOR, IM, MD, MB, MI, MS)
755 
756 #define XORWrr(RS, RD)			_ALUWrr(X86_XOR, RS, RD)
757 #define XORWmr(MD, MB, MI, MS, RD)	_ALUWmr(X86_XOR, MD, MB, MI, MS, RD)
758 #define XORWrm(RS, MD, MB, MI, MS)	_ALUWrm(X86_XOR, RS, MD, MB, MI, MS)
759 #define XORWir(IM, RD)			_ALUWir(X86_XOR, IM, RD)
760 #define XORWim(IM, MD, MB, MI, MS)	_ALUWim(X86_XOR, IM, MD, MB, MI, MS)
761 
762 #define XORLrr(RS, RD)			_ALULrr(X86_XOR, RS, RD)
763 #define XORLmr(MD, MB, MI, MS, RD)	_ALULmr(X86_XOR, MD, MB, MI, MS, RD)
764 #define XORLrm(RS, MD, MB, MI, MS)	_ALULrm(X86_XOR, RS, MD, MB, MI, MS)
765 #define XORLir(IM, RD)			_ALULir(X86_XOR, IM, RD)
766 #define XORLim(IM, MD, MB, MI, MS)	_ALULim(X86_XOR, IM, MD, MB, MI, MS)
767 
768 #define XORQrr(RS, RD)			_ALUQrr(X86_XOR, RS, RD)
769 #define XORQmr(MD, MB, MI, MS, RD)	_ALUQmr(X86_XOR, MD, MB, MI, MS, RD)
770 #define XORQrm(RS, MD, MB, MI, MS)	_ALUQrm(X86_XOR, RS, MD, MB, MI, MS)
771 #define XORQir(IM, RD)			_ALUQir(X86_XOR, IM, RD)
772 #define XORQim(IM, MD, MB, MI, MS)	_ALUQim(X86_XOR, IM, MD, MB, MI, MS)
773 
774 
775 /* --- Shift/Rotate instructions ------------------------------------------- */
776 
777 enum {
778   X86_ROL = 0,
779   X86_ROR = 1,
780   X86_RCL = 2,
781   X86_RCR = 3,
782   X86_SHL = 4,
783   X86_SHR = 5,
784   X86_SAR = 7,
785 };
786 
787 /*									_format		Opcd		,Mod ,r	    ,m		,mem=dsp+sib	,imm... */
788 
789 #define _ROTSHIBir(OP,IM,RD)		(X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \
790 					(_REXBrr(0, RD),		_O_Mrm		(0xd0		,_b11,OP,_r1(RD)				)) : \
791 					(_REXBrr(0, RD),		_O_Mrm_B	(0xc0		,_b11,OP,_r1(RD)			,_u8(IM))) )
792 #define _ROTSHIBim(OP,IM,MD,MB,MI,MS)	(X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \
793 					(_REXBrm(0, MB, MI),		_O_r_X		(0xd0		     ,OP		,MD,MB,MI,MS		)) : \
794 					(_REXBrm(0, MB, MI),		_O_r_X_B	(0xc0		     ,OP		,MD,MB,MI,MS	,_u8(IM))) )
795 #define _ROTSHIBrr(OP,RS,RD)		(((RS) == X86_CL) ? \
796 					(_REXBrr(RS, RD),		_O_Mrm		(0xd2		,_b11,OP,_r1(RD)				)) : \
797 									x86_emit_failure("source register must be CL"					)  )
798 #define _ROTSHIBrm(OP,RS,MD,MB,MI,MS)	(((RS) == X86_CL) ? \
799 					(_REXBrm(RS, MB, MI),		_O_r_X		(0xd2		     ,OP		,MD,MB,MI,MS		)) : \
800 									x86_emit_failure("source register must be CL"					)  )
801 
802 #define _ROTSHIWir(OP,IM,RD)		(X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \
803 					(_d16(), _REXLrr(0, RD),	_O_Mrm		(0xd1		,_b11,OP,_r2(RD)				)) : \
804 					(_d16(), _REXLrr(0, RD),	_O_Mrm_B	(0xc1		,_b11,OP,_r2(RD)			,_u8(IM))) )
805 #define _ROTSHIWim(OP,IM,MD,MB,MI,MS)	(X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \
806 					(_d16(), _REXLrm(0, MB, MI),	_O_r_X		(0xd1		     ,OP		,MD,MB,MI,MS		)) : \
807 					(_d16(), _REXLrm(0, MB, MI),	_O_r_X_B	(0xc1		     ,OP		,MD,MB,MI,MS	,_u8(IM))) )
808 #define _ROTSHIWrr(OP,RS,RD)		(((RS) == X86_CL) ? \
809 					(_d16(), _REXLrr(RS, RD),	_O_Mrm		(0xd3		,_b11,OP,_r2(RD)				)) : \
810 									x86_emit_failure("source register must be CL"					)  )
811 #define _ROTSHIWrm(OP,RS,MD,MB,MI,MS)	(((RS) == X86_CL) ? \
812 					(_d16(), _REXLrm(RS, MB, MI),	_O_r_X		(0xd3		     ,OP		,MD,MB,MI,MS		)) : \
813 									x86_emit_failure("source register must be CL"					)  )
814 
815 #define _ROTSHILir(OP,IM,RD)		(X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \
816 					(_REXLrr(0, RD),		_O_Mrm		(0xd1		,_b11,OP,_r4(RD)				)) : \
817 					(_REXLrr(0, RD),		_O_Mrm_B	(0xc1		,_b11,OP,_r4(RD)			,_u8(IM))) )
818 #define _ROTSHILim(OP,IM,MD,MB,MI,MS)	(X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \
819 					(_REXLrm(0, MB, MI),		_O_r_X		(0xd1		     ,OP		,MD,MB,MI,MS		)) : \
820 					(_REXLrm(0, MB, MI),		_O_r_X_B	(0xc1		     ,OP		,MD,MB,MI,MS	,_u8(IM))) )
821 #define _ROTSHILrr(OP,RS,RD)		(((RS) == X86_CL) ? \
822 					(_REXLrr(RS, RD),		_O_Mrm		(0xd3		,_b11,OP,_r4(RD)				)) : \
823 									x86_emit_failure("source register must be CL"					)  )
824 #define _ROTSHILrm(OP,RS,MD,MB,MI,MS)	(((RS) == X86_CL) ? \
825 					(_REXLrm(RS, MB, MI),		_O_r_X		(0xd3		     ,OP		,MD,MB,MI,MS		)) : \
826 									x86_emit_failure("source register must be CL"					)  )
827 
828 #define _ROTSHIQir(OP,IM,RD)		(X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \
829 					(_REXQrr(0, RD),		_O_Mrm		(0xd1		,_b11,OP,_r8(RD)				)) : \
830 					(_REXQrr(0, RD),		_O_Mrm_B	(0xc1		,_b11,OP,_r8(RD)			,_u8(IM))) )
831 #define _ROTSHIQim(OP,IM,MD,MB,MI,MS)	(X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \
832 					(_REXQrm(0, MB, MI),		_O_r_X		(0xd1		     ,OP		,MD,MB,MI,MS		)) : \
833 					(_REXQrm(0, MB, MI),		_O_r_X_B	(0xc1		     ,OP		,MD,MB,MI,MS	,_u8(IM))) )
834 #define _ROTSHIQrr(OP,RS,RD)		(((RS) == X86_CL) ? \
835 					(_REXQrr(RS, RD),		_O_Mrm		(0xd3		,_b11,OP,_r8(RD)				)) : \
836 									x86_emit_failure("source register must be CL"					)  )
837 #define _ROTSHIQrm(OP,RS,MD,MB,MI,MS)	(((RS) == X86_CL) ? \
838 					(_REXQrm(RS, MB, MI),		_O_r_X		(0xd3		     ,OP		,MD,MB,MI,MS		)) : \
839 									x86_emit_failure("source register must be CL"					)  )
840 
841 #define ROLBir(IM, RD)			_ROTSHIBir(X86_ROL, IM, RD)
842 #define ROLBim(IM, MD, MB, MI, MS)	_ROTSHIBim(X86_ROL, IM, MD, MB, MI, MS)
843 #define ROLBrr(RS, RD)			_ROTSHIBrr(X86_ROL, RS, RD)
844 #define ROLBrm(RS, MD, MB, MI, MS)	_ROTSHIBrm(X86_ROL, RS, MD, MB, MI, MS)
845 
846 #define ROLWir(IM, RD)			_ROTSHIWir(X86_ROL, IM, RD)
847 #define ROLWim(IM, MD, MB, MI, MS)	_ROTSHIWim(X86_ROL, IM, MD, MB, MI, MS)
848 #define ROLWrr(RS, RD)			_ROTSHIWrr(X86_ROL, RS, RD)
849 #define ROLWrm(RS, MD, MB, MI, MS)	_ROTSHIWrm(X86_ROL, RS, MD, MB, MI, MS)
850 
851 #define ROLLir(IM, RD)			_ROTSHILir(X86_ROL, IM, RD)
852 #define ROLLim(IM, MD, MB, MI, MS)	_ROTSHILim(X86_ROL, IM, MD, MB, MI, MS)
853 #define ROLLrr(RS, RD)			_ROTSHILrr(X86_ROL, RS, RD)
854 #define ROLLrm(RS, MD, MB, MI, MS)	_ROTSHILrm(X86_ROL, RS, MD, MB, MI, MS)
855 
856 #define ROLQir(IM, RD)			_ROTSHIQir(X86_ROL, IM, RD)
857 #define ROLQim(IM, MD, MB, MI, MS)	_ROTSHIQim(X86_ROL, IM, MD, MB, MI, MS)
858 #define ROLQrr(RS, RD)			_ROTSHIQrr(X86_ROL, RS, RD)
859 #define ROLQrm(RS, MD, MB, MI, MS)	_ROTSHIQrm(X86_ROL, RS, MD, MB, MI, MS)
860 
861 #define RORBir(IM, RD)			_ROTSHIBir(X86_ROR, IM, RD)
862 #define RORBim(IM, MD, MB, MI, MS)	_ROTSHIBim(X86_ROR, IM, MD, MB, MI, MS)
863 #define RORBrr(RS, RD)			_ROTSHIBrr(X86_ROR, RS, RD)
864 #define RORBrm(RS, MD, MB, MI, MS)	_ROTSHIBrm(X86_ROR, RS, MD, MB, MI, MS)
865 
866 #define RORWir(IM, RD)			_ROTSHIWir(X86_ROR, IM, RD)
867 #define RORWim(IM, MD, MB, MI, MS)	_ROTSHIWim(X86_ROR, IM, MD, MB, MI, MS)
868 #define RORWrr(RS, RD)			_ROTSHIWrr(X86_ROR, RS, RD)
869 #define RORWrm(RS, MD, MB, MI, MS)	_ROTSHIWrm(X86_ROR, RS, MD, MB, MI, MS)
870 
871 #define RORLir(IM, RD)			_ROTSHILir(X86_ROR, IM, RD)
872 #define RORLim(IM, MD, MB, MI, MS)	_ROTSHILim(X86_ROR, IM, MD, MB, MI, MS)
873 #define RORLrr(RS, RD)			_ROTSHILrr(X86_ROR, RS, RD)
874 #define RORLrm(RS, MD, MB, MI, MS)	_ROTSHILrm(X86_ROR, RS, MD, MB, MI, MS)
875 
876 #define RORQir(IM, RD)			_ROTSHIQir(X86_ROR, IM, RD)
877 #define RORQim(IM, MD, MB, MI, MS)	_ROTSHIQim(X86_ROR, IM, MD, MB, MI, MS)
878 #define RORQrr(RS, RD)			_ROTSHIQrr(X86_ROR, RS, RD)
879 #define RORQrm(RS, MD, MB, MI, MS)	_ROTSHIQrm(X86_ROR, RS, MD, MB, MI, MS)
880 
881 #define RCLBir(IM, RD)			_ROTSHIBir(X86_RCL, IM, RD)
882 #define RCLBim(IM, MD, MB, MI, MS)	_ROTSHIBim(X86_RCL, IM, MD, MB, MI, MS)
883 #define RCLBrr(RS, RD)			_ROTSHIBrr(X86_RCL, RS, RD)
884 #define RCLBrm(RS, MD, MB, MI, MS)	_ROTSHIBrm(X86_RCL, RS, MD, MB, MI, MS)
885 
886 #define RCLWir(IM, RD)			_ROTSHIWir(X86_RCL, IM, RD)
887 #define RCLWim(IM, MD, MB, MI, MS)	_ROTSHIWim(X86_RCL, IM, MD, MB, MI, MS)
888 #define RCLWrr(RS, RD)			_ROTSHIWrr(X86_RCL, RS, RD)
889 #define RCLWrm(RS, MD, MB, MI, MS)	_ROTSHIWrm(X86_RCL, RS, MD, MB, MI, MS)
890 
891 #define RCLLir(IM, RD)			_ROTSHILir(X86_RCL, IM, RD)
892 #define RCLLim(IM, MD, MB, MI, MS)	_ROTSHILim(X86_RCL, IM, MD, MB, MI, MS)
893 #define RCLLrr(RS, RD)			_ROTSHILrr(X86_RCL, RS, RD)
894 #define RCLLrm(RS, MD, MB, MI, MS)	_ROTSHILrm(X86_RCL, RS, MD, MB, MI, MS)
895 
896 #define RCLQir(IM, RD)			_ROTSHIQir(X86_RCL, IM, RD)
897 #define RCLQim(IM, MD, MB, MI, MS)	_ROTSHIQim(X86_RCL, IM, MD, MB, MI, MS)
898 #define RCLQrr(RS, RD)			_ROTSHIQrr(X86_RCL, RS, RD)
899 #define RCLQrm(RS, MD, MB, MI, MS)	_ROTSHIQrm(X86_RCL, RS, MD, MB, MI, MS)
900 
901 #define RCRBir(IM, RD)			_ROTSHIBir(X86_RCR, IM, RD)
902 #define RCRBim(IM, MD, MB, MI, MS)	_ROTSHIBim(X86_RCR, IM, MD, MB, MI, MS)
903 #define RCRBrr(RS, RD)			_ROTSHIBrr(X86_RCR, RS, RD)
904 #define RCRBrm(RS, MD, MB, MI, MS)	_ROTSHIBrm(X86_RCR, RS, MD, MB, MI, MS)
905 
906 #define RCRWir(IM, RD)			_ROTSHIWir(X86_RCR, IM, RD)
907 #define RCRWim(IM, MD, MB, MI, MS)	_ROTSHIWim(X86_RCR, IM, MD, MB, MI, MS)
908 #define RCRWrr(RS, RD)			_ROTSHIWrr(X86_RCR, RS, RD)
909 #define RCRWrm(RS, MD, MB, MI, MS)	_ROTSHIWrm(X86_RCR, RS, MD, MB, MI, MS)
910 
911 #define RCRLir(IM, RD)			_ROTSHILir(X86_RCR, IM, RD)
912 #define RCRLim(IM, MD, MB, MI, MS)	_ROTSHILim(X86_RCR, IM, MD, MB, MI, MS)
913 #define RCRLrr(RS, RD)			_ROTSHILrr(X86_RCR, RS, RD)
914 #define RCRLrm(RS, MD, MB, MI, MS)	_ROTSHILrm(X86_RCR, RS, MD, MB, MI, MS)
915 
916 #define RCRQir(IM, RD)			_ROTSHIQir(X86_RCR, IM, RD)
917 #define RCRQim(IM, MD, MB, MI, MS)	_ROTSHIQim(X86_RCR, IM, MD, MB, MI, MS)
918 #define RCRQrr(RS, RD)			_ROTSHIQrr(X86_RCR, RS, RD)
919 #define RCRQrm(RS, MD, MB, MI, MS)	_ROTSHIQrm(X86_RCR, RS, MD, MB, MI, MS)
920 
921 #define SHLBir(IM, RD)			_ROTSHIBir(X86_SHL, IM, RD)
922 #define SHLBim(IM, MD, MB, MI, MS)	_ROTSHIBim(X86_SHL, IM, MD, MB, MI, MS)
923 #define SHLBrr(RS, RD)			_ROTSHIBrr(X86_SHL, RS, RD)
924 #define SHLBrm(RS, MD, MB, MI, MS)	_ROTSHIBrm(X86_SHL, RS, MD, MB, MI, MS)
925 
926 #define SHLWir(IM, RD)			_ROTSHIWir(X86_SHL, IM, RD)
927 #define SHLWim(IM, MD, MB, MI, MS)	_ROTSHIWim(X86_SHL, IM, MD, MB, MI, MS)
928 #define SHLWrr(RS, RD)			_ROTSHIWrr(X86_SHL, RS, RD)
929 #define SHLWrm(RS, MD, MB, MI, MS)	_ROTSHIWrm(X86_SHL, RS, MD, MB, MI, MS)
930 
931 #define SHLLir(IM, RD)			_ROTSHILir(X86_SHL, IM, RD)
932 #define SHLLim(IM, MD, MB, MI, MS)	_ROTSHILim(X86_SHL, IM, MD, MB, MI, MS)
933 #define SHLLrr(RS, RD)			_ROTSHILrr(X86_SHL, RS, RD)
934 #define SHLLrm(RS, MD, MB, MI, MS)	_ROTSHILrm(X86_SHL, RS, MD, MB, MI, MS)
935 
936 #define SHLQir(IM, RD)			_ROTSHIQir(X86_SHL, IM, RD)
937 #define SHLQim(IM, MD, MB, MI, MS)	_ROTSHIQim(X86_SHL, IM, MD, MB, MI, MS)
938 #define SHLQrr(RS, RD)			_ROTSHIQrr(X86_SHL, RS, RD)
939 #define SHLQrm(RS, MD, MB, MI, MS)	_ROTSHIQrm(X86_SHL, RS, MD, MB, MI, MS)
940 
941 #define SHRBir(IM, RD)			_ROTSHIBir(X86_SHR, IM, RD)
942 #define SHRBim(IM, MD, MB, MI, MS)	_ROTSHIBim(X86_SHR, IM, MD, MB, MI, MS)
943 #define SHRBrr(RS, RD)			_ROTSHIBrr(X86_SHR, RS, RD)
944 #define SHRBrm(RS, MD, MB, MI, MS)	_ROTSHIBrm(X86_SHR, RS, MD, MB, MI, MS)
945 
946 #define SHRWir(IM, RD)			_ROTSHIWir(X86_SHR, IM, RD)
947 #define SHRWim(IM, MD, MB, MI, MS)	_ROTSHIWim(X86_SHR, IM, MD, MB, MI, MS)
948 #define SHRWrr(RS, RD)			_ROTSHIWrr(X86_SHR, RS, RD)
949 #define SHRWrm(RS, MD, MB, MI, MS)	_ROTSHIWrm(X86_SHR, RS, MD, MB, MI, MS)
950 
951 #define SHRLir(IM, RD)			_ROTSHILir(X86_SHR, IM, RD)
952 #define SHRLim(IM, MD, MB, MI, MS)	_ROTSHILim(X86_SHR, IM, MD, MB, MI, MS)
953 #define SHRLrr(RS, RD)			_ROTSHILrr(X86_SHR, RS, RD)
954 #define SHRLrm(RS, MD, MB, MI, MS)	_ROTSHILrm(X86_SHR, RS, MD, MB, MI, MS)
955 
956 #define SHRQir(IM, RD)			_ROTSHIQir(X86_SHR, IM, RD)
957 #define SHRQim(IM, MD, MB, MI, MS)	_ROTSHIQim(X86_SHR, IM, MD, MB, MI, MS)
958 #define SHRQrr(RS, RD)			_ROTSHIQrr(X86_SHR, RS, RD)
959 #define SHRQrm(RS, MD, MB, MI, MS)	_ROTSHIQrm(X86_SHR, RS, MD, MB, MI, MS)
960 
961 #define SALBir				SHLBir
962 #define SALBim				SHLBim
963 #define SALBrr				SHLBrr
964 #define SALBrm				SHLBrm
965 
966 #define SALWir				SHLWir
967 #define SALWim				SHLWim
968 #define SALWrr				SHLWrr
969 #define SALWrm				SHLWrm
970 
971 #define SALLir				SHLLir
972 #define SALLim				SHLLim
973 #define SALLrr				SHLLrr
974 #define SALLrm				SHLLrm
975 
976 #define SALQir				SHLQir
977 #define SALQim				SHLQim
978 #define SALQrr				SHLQrr
979 #define SALQrm				SHLQrm
980 
981 #define SARBir(IM, RD)			_ROTSHIBir(X86_SAR, IM, RD)
982 #define SARBim(IM, MD, MB, MI, MS)	_ROTSHIBim(X86_SAR, IM, MD, MB, MI, MS)
983 #define SARBrr(RS, RD)			_ROTSHIBrr(X86_SAR, RS, RD)
984 #define SARBrm(RS, MD, MB, MI, MS)	_ROTSHIBrm(X86_SAR, RS, MD, MB, MI, MS)
985 
986 #define SARWir(IM, RD)			_ROTSHIWir(X86_SAR, IM, RD)
987 #define SARWim(IM, MD, MB, MI, MS)	_ROTSHIWim(X86_SAR, IM, MD, MB, MI, MS)
988 #define SARWrr(RS, RD)			_ROTSHIWrr(X86_SAR, RS, RD)
989 #define SARWrm(RS, MD, MB, MI, MS)	_ROTSHIWrm(X86_SAR, RS, MD, MB, MI, MS)
990 
991 #define SARLir(IM, RD)			_ROTSHILir(X86_SAR, IM, RD)
992 #define SARLim(IM, MD, MB, MI, MS)	_ROTSHILim(X86_SAR, IM, MD, MB, MI, MS)
993 #define SARLrr(RS, RD)			_ROTSHILrr(X86_SAR, RS, RD)
994 #define SARLrm(RS, MD, MB, MI, MS)	_ROTSHILrm(X86_SAR, RS, MD, MB, MI, MS)
995 
996 #define SARQir(IM, RD)			_ROTSHIQir(X86_SAR, IM, RD)
997 #define SARQim(IM, MD, MB, MI, MS)	_ROTSHIQim(X86_SAR, IM, MD, MB, MI, MS)
998 #define SARQrr(RS, RD)			_ROTSHIQrr(X86_SAR, RS, RD)
999 #define SARQrm(RS, MD, MB, MI, MS)	_ROTSHIQrm(X86_SAR, RS, MD, MB, MI, MS)
1000 
1001 
1002 /* --- Bit test instructions ----------------------------------------------- */
1003 
1004 enum {
1005   X86_BT  = 4,
1006   X86_BTS = 5,
1007   X86_BTR = 6,
1008   X86_BTC = 7,
1009 };
1010 
1011 /*									_format		Opcd		 ,Mod ,r      ,m	,mem=dsp+sib	,imm... */
1012 
1013 #define _BTWir(OP, IM, RD)		(_d16(), _REXLrr(0, RD),	_OO_Mrm_B	(0x0fba		 ,_b11,OP     ,_r2(RD)			,_u8(IM)))
1014 #define _BTWim(OP, IM, MD, MB, MI, MS)	(_d16(), _REXLrm(0, MB, MI),	_OO_r_X_B	(0x0fba		      ,OP		,MD,MB,MI,MS	,_u8(IM)))
1015 #define _BTWrr(OP, RS, RD)		(_d16(), _REXLrr(RS, RD),	_OO_Mrm		(0x0f83|((OP)<<3),_b11,_r2(RS),_r2(RD)				))
1016 #define _BTWrm(OP, RS, MD, MB, MI, MS)	(_d16(), _REXLrm(RS, MB, MI),	_OO_r_X		(0x0f83|((OP)<<3)     ,_r2(RS)		,MD,MB,MI,MS		))
1017 
1018 #define _BTLir(OP, IM, RD)		(_REXLrr(0, RD),		_OO_Mrm_B	(0x0fba		 ,_b11,OP     ,_r4(RD)			,_u8(IM)))
1019 #define _BTLim(OP, IM, MD, MB, MI, MS)	(_REXLrm(0, MB, MI),		_OO_r_X_B	(0x0fba		      ,OP		,MD,MB,MI,MS	,_u8(IM)))
1020 #define _BTLrr(OP, RS, RD)		(_REXLrr(RS, RD),		_OO_Mrm		(0x0f83|((OP)<<3),_b11,_r4(RS),_r4(RD)				))
1021 #define _BTLrm(OP, RS, MD, MB, MI, MS)	(_REXLrm(RS, MB, MI),		_OO_r_X		(0x0f83|((OP)<<3)     ,_r4(RS)		,MD,MB,MI,MS		))
1022 
1023 #define _BTQir(OP, IM, RD)		(_REXQrr(0, RD),		_OO_Mrm_B	(0x0fba		 ,_b11,OP     ,_r8(RD)			,_u8(IM)))
1024 #define _BTQim(OP, IM, MD, MB, MI, MS)	(_REXQrm(0, MB, MI),		_OO_r_X_B	(0x0fba		      ,OP		,MD,MB,MI,MS	,_u8(IM)))
1025 #define _BTQrr(OP, RS, RD)		(_REXQrr(RS, RD),		_OO_Mrm		(0x0f83|((OP)<<3),_b11,_r8(RS),_r8(RD)				))
1026 #define _BTQrm(OP, RS, MD, MB, MI, MS)	(_REXQrm(RS, MB, MI),		_OO_r_X		(0x0f83|((OP)<<3)     ,_r8(RS)		,MD,MB,MI,MS		))
1027 
1028 #define BTWir(IM, RD)			_BTWir(X86_BT, IM, RD)
1029 #define BTWim(IM, MD, MB, MI, MS)	_BTWim(X86_BT, IM, MD, MI, MS)
1030 #define BTWrr(RS, RD)			_BTWrr(X86_BT, RS, RD)
1031 #define BTWrm(RS, MD, MB, MI, MS)	_BTWrm(X86_BT, RS, MD, MB, MI, MS)
1032 
1033 #define BTLir(IM, RD)			_BTLir(X86_BT, IM, RD)
1034 #define BTLim(IM, MD, MB, MI, MS)	_BTLim(X86_BT, IM, MD, MB, MI, MS)
1035 #define BTLrr(RS, RD)			_BTLrr(X86_BT, RS, RD)
1036 #define BTLrm(RS, MD, MB, MI, MS)	_BTLrm(X86_BT, RS, MD, MB, MI, MS)
1037 
1038 #define BTQir(IM, RD)			_BTQir(X86_BT, IM, RD)
1039 #define BTQim(IM, MD, MB, MI, MS)	_BTQim(X86_BT, IM, MD, MB, MI, MS)
1040 #define BTQrr(RS, RD)			_BTQrr(X86_BT, RS, RD)
1041 #define BTQrm(RS, MD, MB, MI, MS)	_BTQrm(X86_BT, RS, MD, MB, MI, MS)
1042 
1043 #define BTCWir(IM, RD)			_BTWir(X86_BTC, IM, RD)
1044 #define BTCWim(IM, MD, MB, MI, MS)	_BTWim(X86_BTC, IM, MD, MI, MS)
1045 #define BTCWrr(RS, RD)			_BTWrr(X86_BTC, RS, RD)
1046 #define BTCWrm(RS, MD, MB, MI, MS)	_BTWrm(X86_BTC, RS, MD, MB, MI, MS)
1047 
1048 #define BTCLir(IM, RD)			_BTLir(X86_BTC, IM, RD)
1049 #define BTCLim(IM, MD, MB, MI, MS)	_BTLim(X86_BTC, IM, MD, MB, MI, MS)
1050 #define BTCLrr(RS, RD)			_BTLrr(X86_BTC, RS, RD)
1051 #define BTCLrm(RS, MD, MB, MI, MS)	_BTLrm(X86_BTC, RS, MD, MB, MI, MS)
1052 
1053 #define BTCQir(IM, RD)			_BTQir(X86_BTC, IM, RD)
1054 #define BTCQim(IM, MD, MB, MI, MS)	_BTQim(X86_BTC, IM, MD, MB, MI, MS)
1055 #define BTCQrr(RS, RD)			_BTQrr(X86_BTC, RS, RD)
1056 #define BTCQrm(RS, MD, MB, MI, MS)	_BTQrm(X86_BTC, RS, MD, MB, MI, MS)
1057 
1058 #define BTRWir(IM, RD)			_BTWir(X86_BTR, IM, RD)
1059 #define BTRWim(IM, MD, MB, MI, MS)	_BTWim(X86_BTR, IM, MD, MI, MS)
1060 #define BTRWrr(RS, RD)			_BTWrr(X86_BTR, RS, RD)
1061 #define BTRWrm(RS, MD, MB, MI, MS)	_BTWrm(X86_BTR, RS, MD, MB, MI, MS)
1062 
1063 #define BTRLir(IM, RD)			_BTLir(X86_BTR, IM, RD)
1064 #define BTRLim(IM, MD, MB, MI, MS)	_BTLim(X86_BTR, IM, MD, MB, MI, MS)
1065 #define BTRLrr(RS, RD)			_BTLrr(X86_BTR, RS, RD)
1066 #define BTRLrm(RS, MD, MB, MI, MS)	_BTLrm(X86_BTR, RS, MD, MB, MI, MS)
1067 
1068 #define BTRQir(IM, RD)			_BTQir(X86_BTR, IM, RD)
1069 #define BTRQim(IM, MD, MB, MI, MS)	_BTQim(X86_BTR, IM, MD, MB, MI, MS)
1070 #define BTRQrr(RS, RD)			_BTQrr(X86_BTR, RS, RD)
1071 #define BTRQrm(RS, MD, MB, MI, MS)	_BTQrm(X86_BTR, RS, MD, MB, MI, MS)
1072 
1073 #define BTSWir(IM, RD)			_BTWir(X86_BTS, IM, RD)
1074 #define BTSWim(IM, MD, MB, MI, MS)	_BTWim(X86_BTS, IM, MD, MI, MS)
1075 #define BTSWrr(RS, RD)			_BTWrr(X86_BTS, RS, RD)
1076 #define BTSWrm(RS, MD, MB, MI, MS)	_BTWrm(X86_BTS, RS, MD, MB, MI, MS)
1077 
1078 #define BTSLir(IM, RD)			_BTLir(X86_BTS, IM, RD)
1079 #define BTSLim(IM, MD, MB, MI, MS)	_BTLim(X86_BTS, IM, MD, MB, MI, MS)
1080 #define BTSLrr(RS, RD)			_BTLrr(X86_BTS, RS, RD)
1081 #define BTSLrm(RS, MD, MB, MI, MS)	_BTLrm(X86_BTS, RS, MD, MB, MI, MS)
1082 
1083 #define BTSQir(IM, RD)			_BTQir(X86_BTS, IM, RD)
1084 #define BTSQim(IM, MD, MB, MI, MS)	_BTQim(X86_BTS, IM, MD, MB, MI, MS)
1085 #define BTSQrr(RS, RD)			_BTQrr(X86_BTS, RS, RD)
1086 #define BTSQrm(RS, MD, MB, MI, MS)	_BTQrm(X86_BTS, RS, MD, MB, MI, MS)
1087 
1088 
1089 /* --- Move instructions --------------------------------------------------- */
1090 
1091 /*									_format		Opcd		,Mod ,r	    ,m		,mem=dsp+sib	,imm... */
1092 
1093 #define MOVBrr(RS, RD)			(_REXBrr(RS, RD),		_O_Mrm		(0x88		,_b11,_r1(RS),_r1(RD)				))
1094 #define MOVBmr(MD, MB, MI, MS, RD)	(_REXBmr(MB, MI, RD),		_O_r_X		(0x8a		     ,_r1(RD)		,MD,MB,MI,MS		))
1095 #define MOVBrm(RS, MD, MB, MI, MS)	(_REXBrm(RS, MB, MI),		_O_r_X		(0x88		     ,_r1(RS)		,MD,MB,MI,MS		))
1096 #define MOVBir(IM,  R)			(_REXBrr(0, R),			_Or_B		(0xb0,_r1(R)						,_su8(IM)))
1097 #define MOVBim(IM, MD, MB, MI, MS)	(_REXBrm(0, MB, MI),		_O_X_B		(0xc6					,MD,MB,MI,MS	,_su8(IM)))
1098 
1099 #define MOVWrr(RS, RD)			(_d16(), _REXLrr(RS, RD),	_O_Mrm		(0x89		,_b11,_r2(RS),_r2(RD)				))
1100 #define MOVWmr(MD, MB, MI, MS, RD)	(_d16(), _REXLmr(MB, MI, RD),	_O_r_X		(0x8b		     ,_r2(RD)		,MD,MB,MI,MS		))
1101 #define MOVWrm(RS, MD, MB, MI, MS)	(_d16(), _REXLrm(RS, MB, MI),	_O_r_X		(0x89		     ,_r2(RS)		,MD,MB,MI,MS		))
1102 #define MOVWir(IM,  R)			(_d16(), _REXLrr(0, R),		_Or_W		(0xb8,_r2(R)						,_su16(IM)))
1103 #define MOVWim(IM, MD, MB, MI, MS)	(_d16(), _REXLrm(0, MB, MI),	_O_X_W		(0xc7					,MD,MB,MI,MS	,_su16(IM)))
1104 
1105 #define MOVLrr(RS, RD)			(_REXLrr(RS, RD),		_O_Mrm		(0x89		,_b11,_r4(RS),_r4(RD)				))
1106 #define MOVLmr(MD, MB, MI, MS, RD)	(_REXLmr(MB, MI, RD),		_O_r_X		(0x8b		     ,_r4(RD)		,MD,MB,MI,MS		))
1107 #define MOVLrm(RS, MD, MB, MI, MS)	(_REXLrm(RS, MB, MI),		_O_r_X		(0x89		     ,_r4(RS)		,MD,MB,MI,MS		))
1108 #define MOVLir(IM,  R)			(_REXLrr(0, R),			_Or_L		(0xb8,_r4(R)						,IM	))
1109 #define MOVLim(IM, MD, MB, MI, MS)	(_REXLrm(0, MB, MI),		_O_X_L		(0xc7					,MD,MB,MI,MS	,IM	))
1110 
1111 #define MOVQrr(RS, RD)			(_REXQrr(RS, RD),		_O_Mrm		(0x89		,_b11,_r8(RS),_r8(RD)				))
1112 #define MOVQmr(MD, MB, MI, MS, RD)	(_REXQmr(MB, MI, RD),		_O_r_X		(0x8b		     ,_r8(RD)		,MD,MB,MI,MS		))
1113 #define MOVQrm(RS, MD, MB, MI, MS)	(_REXQrm(RS, MB, MI),		_O_r_X		(0x89		     ,_r8(RS)		,MD,MB,MI,MS		))
1114 #define MOVQir(IM,  R)			(_REXQrr(0, R),			_Or_Q		(0xb8,_r8(R)						,IM	))
1115 #define MOVQim(IM, MD, MB, MI, MS)	(_REXQrm(0, MB, MI),		_O_X_L		(0xc7					,MD,MB,MI,MS	,IM	))
1116 
1117 
1118 /* --- Unary and Multiply/Divide instructions ------------------------------ */
1119 
1120 enum {
1121   X86_NOT  = 2,
1122   X86_NEG  = 3,
1123   X86_MUL  = 4,
1124   X86_IMUL = 5,
1125   X86_DIV  = 6,
1126   X86_IDIV = 7,
1127 };
1128 
1129 /*									_format		Opcd		,Mod ,r	    ,m		,mem=dsp+sib	,imm... */
1130 
1131 #define _UNARYBr(OP, RS)		(_REXBrr(0, RS),		_O_Mrm		(0xf6		,_b11,OP    ,_r1(RS)				))
1132 #define _UNARYBm(OP, MD, MB, MI, MS)	(_REXBrm(0, MB, MI),		_O_r_X		(0xf6		     ,OP		,MD,MB,MI,MS		))
1133 #define _UNARYWr(OP, RS)		(_d16(), _REXLrr(0, RS),	_O_Mrm		(0xf7		,_b11,OP    ,_r2(RS)				))
1134 #define _UNARYWm(OP, MD, MB, MI, MS)	(_d16(), _REXLmr(MB, MI, 0),	_O_r_X		(0xf7		     ,OP		,MD,MB,MI,MS		))
1135 #define _UNARYLr(OP, RS)		(_REXLrr(0, RS),		_O_Mrm		(0xf7		,_b11,OP    ,_r4(RS)				))
1136 #define _UNARYLm(OP, MD, MB, MI, MS)	(_REXLmr(MB, MI, 0),		_O_r_X		(0xf7		     ,OP		,MD,MB,MI,MS		))
1137 #define _UNARYQr(OP, RS)		(_REXQrr(0, RS),		_O_Mrm		(0xf7		,_b11,OP    ,_r8(RS)				))
1138 #define _UNARYQm(OP, MD, MB, MI, MS)	(_REXQmr(MB, MI, 0),		_O_r_X		(0xf7		     ,OP		,MD,MB,MI,MS		))
1139 
1140 #define NOTBr(RS)			_UNARYBr(X86_NOT, RS)
1141 #define NOTBm(MD, MB, MI, MS)		_UNARYBm(X86_NOT, MD, MB, MI, MS)
1142 #define NOTWr(RS)			_UNARYWr(X86_NOT, RS)
1143 #define NOTWm(MD, MB, MI, MS)		_UNARYWm(X86_NOT, MD, MB, MI, MS)
1144 #define NOTLr(RS)			_UNARYLr(X86_NOT, RS)
1145 #define NOTLm(MD, MB, MI, MS)		_UNARYLm(X86_NOT, MD, MB, MI, MS)
1146 #define NOTQr(RS)			_UNARYQr(X86_NOT, RS)
1147 #define NOTQm(MD, MB, MI, MS)		_UNARYQm(X86_NOT, MD, MB, MI, MS)
1148 
1149 #define NEGBr(RS)			_UNARYBr(X86_NEG, RS)
1150 #define NEGBm(MD, MB, MI, MS)		_UNARYBm(X86_NEG, MD, MB, MI, MS)
1151 #define NEGWr(RS)			_UNARYWr(X86_NEG, RS)
1152 #define NEGWm(MD, MB, MI, MS)		_UNARYWm(X86_NEG, MD, MB, MI, MS)
1153 #define NEGLr(RS)			_UNARYLr(X86_NEG, RS)
1154 #define NEGLm(MD, MB, MI, MS)		_UNARYLm(X86_NEG, MD, MB, MI, MS)
1155 #define NEGQr(RS)			_UNARYQr(X86_NEG, RS)
1156 #define NEGQm(MD, MB, MI, MS)		_UNARYQm(X86_NEG, MD, MB, MI, MS)
1157 
1158 #define MULBr(RS)			_UNARYBr(X86_MUL, RS)
1159 #define MULBm(MD, MB, MI, MS)		_UNARYBm(X86_MUL, MD, MB, MI, MS)
1160 #define MULWr(RS)			_UNARYWr(X86_MUL, RS)
1161 #define MULWm(MD, MB, MI, MS)		_UNARYWm(X86_MUL, MD, MB, MI, MS)
1162 #define MULLr(RS)			_UNARYLr(X86_MUL, RS)
1163 #define MULLm(MD, MB, MI, MS)		_UNARYLm(X86_MUL, MD, MB, MI, MS)
1164 #define MULQr(RS)			_UNARYQr(X86_MUL, RS)
1165 #define MULQm(MD, MB, MI, MS)		_UNARYQm(X86_MUL, MD, MB, MI, MS)
1166 
1167 #define IMULBr(RS)			_UNARYBr(X86_IMUL, RS)
1168 #define IMULBm(MD, MB, MI, MS)		_UNARYBm(X86_IMUL, MD, MB, MI, MS)
1169 #define IMULWr(RS)			_UNARYWr(X86_IMUL, RS)
1170 #define IMULWm(MD, MB, MI, MS)		_UNARYWm(X86_IMUL, MD, MB, MI, MS)
1171 #define IMULLr(RS)			_UNARYLr(X86_IMUL, RS)
1172 #define IMULLm(MD, MB, MI, MS)		_UNARYLm(X86_IMUL, MD, MB, MI, MS)
1173 #define IMULQr(RS)			_UNARYQr(X86_IMUL, RS)
1174 #define IMULQm(MD, MB, MI, MS)		_UNARYQm(X86_IMUL, MD, MB, MI, MS)
1175 
1176 #define DIVBr(RS)			_UNARYBr(X86_DIV, RS)
1177 #define DIVBm(MD, MB, MI, MS)		_UNARYBm(X86_DIV, MD, MB, MI, MS)
1178 #define DIVWr(RS)			_UNARYWr(X86_DIV, RS)
1179 #define DIVWm(MD, MB, MI, MS)		_UNARYWm(X86_DIV, MD, MB, MI, MS)
1180 #define DIVLr(RS)			_UNARYLr(X86_DIV, RS)
1181 #define DIVLm(MD, MB, MI, MS)		_UNARYLm(X86_DIV, MD, MB, MI, MS)
1182 #define DIVQr(RS)			_UNARYQr(X86_DIV, RS)
1183 #define DIVQm(MD, MB, MI, MS)		_UNARYQm(X86_DIV, MD, MB, MI, MS)
1184 
1185 #define IDIVBr(RS)			_UNARYBr(X86_IDIV, RS)
1186 #define IDIVBm(MD, MB, MI, MS)		_UNARYBm(X86_IDIV, MD, MB, MI, MS)
1187 #define IDIVWr(RS)			_UNARYWr(X86_IDIV, RS)
1188 #define IDIVWm(MD, MB, MI, MS)		_UNARYWm(X86_IDIV, MD, MB, MI, MS)
1189 #define IDIVLr(RS)			_UNARYLr(X86_IDIV, RS)
1190 #define IDIVLm(MD, MB, MI, MS)		_UNARYLm(X86_IDIV, MD, MB, MI, MS)
1191 #define IDIVQr(RS)			_UNARYQr(X86_IDIV, RS)
1192 #define IDIVQm(MD, MB, MI, MS)		_UNARYQm(X86_IDIV, MD, MB, MI, MS)
1193 
1194 /*									_format		Opcd		,Mod ,r	    ,m		,mem=dsp+sib	,imm... */
1195 
1196 #define IMULWrr(RS, RD)			(_d16(), _REXLrr(RD, RS),	_OO_Mrm		(0x0faf		,_b11,_r2(RD),_r2(RS)				))
1197 #define IMULWmr(MD, MB, MI, MS, RD)	(_d16(), _REXLmr(MB, MI, RD),	_OO_r_X		(0x0faf		     ,_r2(RD)		,MD,MB,MI,MS		))
1198 
1199 #define IMULWirr(IM,RS,RD)		(_d16(), _REXLrr(RS, RD),	_Os_Mrm_sW	(0x69		,_b11,_r2(RS),_r2(RD)			,_su16(IM)	))
1200 #define IMULWimr(IM,MD,MB,MI,MS,RD)	(_d16(), _REXLmr(MB, MI, RD),	_Os_r_X_sW	(0x69		     ,_r2(RD)		,MD,MB,MI,MS	,_su16(IM)	))
1201 
1202 #define IMULLir(IM, RD)			(_REXLrr(0, RD),		_Os_Mrm_sL	(0x69		,_b11,_r4(RD),_r4(RD)			,IM	))
1203 #define IMULLrr(RS, RD)			(_REXLrr(RD, RS),		_OO_Mrm		(0x0faf		,_b11,_r4(RD),_r4(RS)				))
1204 #define IMULLmr(MD, MB, MI, MS, RD)	(_REXLmr(MB, MI, RD),		_OO_r_X		(0x0faf		     ,_r4(RD)		,MD,MB,MI,MS		))
1205 
1206 #define IMULQir(IM, RD)			(_REXQrr(0, RD),		_Os_Mrm_sL	(0x69		,_b11,_r8(RD),_r8(RD)			,IM	))
1207 #define IMULQrr(RS, RD)			(_REXQrr(RD, RS),		_OO_Mrm		(0x0faf		,_b11,_r8(RD),_r8(RS)				))
1208 #define IMULQmr(MD, MB, MI, MS, RD)	(_REXQmr(MB, MI, RD),		_OO_r_X		(0x0faf		     ,_r8(RD)		,MD,MB,MI,MS		))
1209 
1210 #define IMULLirr(IM,RS,RD)		(_REXLrr(RS, RD),		_Os_Mrm_sL	(0x69		,_b11,_r4(RS),_r4(RD)			,IM	))
1211 #define IMULLimr(IM,MD,MB,MI,MS,RD)	(_REXLmr(MB, MI, RD),		_Os_r_X_sL	(0x69		     ,_r4(RD)		,MD,MB,MI,MS	,IM	))
1212 
1213 #define IMULQirr(IM,RS,RD)		(_REXQrr(RS, RD),		_Os_Mrm_sL	(0x69		,_b11,_r8(RS),_r8(RD)			,IM	))
1214 #define IMULQimr(IM,MD,MB,MI,MS,RD)	(_REXQmr(MB, MI, RD),		_Os_r_X_sL	(0x69		     ,_r8(RD)		,MD,MB,MI,MS	,IM	))
1215 
1216 
1217 /* --- Control Flow related instructions ----------------------------------- */
1218 
1219 enum {
1220   X86_CC_O   = 0x0,
1221   X86_CC_NO  = 0x1,
1222   X86_CC_NAE = 0x2,
1223   X86_CC_B   = 0x2,
1224   X86_CC_C   = 0x2,
1225   X86_CC_AE  = 0x3,
1226   X86_CC_NB  = 0x3,
1227   X86_CC_NC  = 0x3,
1228   X86_CC_E   = 0x4,
1229   X86_CC_Z   = 0x4,
1230   X86_CC_NE  = 0x5,
1231   X86_CC_NZ  = 0x5,
1232   X86_CC_BE  = 0x6,
1233   X86_CC_NA  = 0x6,
1234   X86_CC_A   = 0x7,
1235   X86_CC_NBE = 0x7,
1236   X86_CC_S   = 0x8,
1237   X86_CC_NS  = 0x9,
1238   X86_CC_P   = 0xa,
1239   X86_CC_PE  = 0xa,
1240   X86_CC_NP  = 0xb,
1241   X86_CC_PO  = 0xb,
1242   X86_CC_L   = 0xc,
1243   X86_CC_NGE = 0xc,
1244   X86_CC_GE  = 0xd,
1245   X86_CC_NL  = 0xd,
1246   X86_CC_LE  = 0xe,
1247   X86_CC_NG  = 0xe,
1248   X86_CC_G   = 0xf,
1249   X86_CC_NLE = 0xf,
1250 };
1251 
1252 /*									_format		Opcd		,Mod ,r	    ,m		,mem=dsp+sib	,imm... */
1253 
1254 // FIXME: no prefix is availble to encode a 32-bit operand size in 64-bit mode
1255 #define CALLm(M)							_O_D32		(0xe8					,(int)(M)		)
1256 #define _CALLLsr(R)			(_REXLrr(0, R),			_O_Mrm		(0xff		,_b11,_b010,_r4(R)				))
1257 #define _CALLQsr(R)			(_REXQrr(0, R),			_O_Mrm		(0xff		,_b11,_b010,_r8(R)				))
1258 #define CALLsr(R)			( X86_TARGET_64BIT ? _CALLQsr(R) : _CALLLsr(R))
1259 #define CALLsm(D,B,I,S)			(_REXLrm(0, B, I),		_O_r_X		(0xff		     ,_b010		,(int)(D),B,I,S		))
1260 
1261 // FIXME: no prefix is availble to encode a 32-bit operand size in 64-bit mode
1262 #define JMPSm(M)							_O_D8		(0xeb					,(int)(M)		)
1263 #define JMPm(M)								_O_D32		(0xe9					,(int)(M)		)
1264 #define _JMPLsr(R)			(_REXLrr(0, R),			_O_Mrm		(0xff		,_b11,_b100,_r4(R)				))
1265 #define _JMPQsr(R)			(_REXQrr(0, R),			_O_Mrm		(0xff		,_b11,_b100,_r8(R)				))
1266 #define JMPsr(R)			( X86_TARGET_64BIT ? _JMPQsr(R) : _JMPLsr(R))
1267 #define JMPsm(D,B,I,S)			(_REXLrm(0, B, I),		_O_r_X		(0xff		     ,_b100		,(int)(D),B,I,S		))
1268 
1269 /*									_format		Opcd		,Mod ,r	    ,m		,mem=dsp+sib	,imm... */
1270 #define JCCSii(CC, D)							_O_B		(0x70|(CC)				,(_sc)(int)(D)		)
1271 #define JCCSim(CC, D)							_O_D8		(0x70|(CC)				,(int)(D)		)
1272 #define JOSm(D)				JCCSim(0x0, D)
1273 #define JNOSm(D)			JCCSim(0x1, D)
1274 #define JBSm(D)				JCCSim(0x2, D)
1275 #define JNAESm(D)			JCCSim(0x2, D)
1276 #define JNBSm(D)			JCCSim(0x3, D)
1277 #define JAESm(D)			JCCSim(0x3, D)
1278 #define JESm(D)				JCCSim(0x4, D)
1279 #define JZSm(D)				JCCSim(0x4, D)
1280 #define JNESm(D)			JCCSim(0x5, D)
1281 #define JNZSm(D)			JCCSim(0x5, D)
1282 #define JBESm(D)			JCCSim(0x6, D)
1283 #define JNASm(D)			JCCSim(0x6, D)
1284 #define JNBESm(D)			JCCSim(0x7, D)
1285 #define JASm(D)				JCCSim(0x7, D)
1286 #define JSSm(D)				JCCSim(0x8, D)
1287 #define JNSSm(D)			JCCSim(0x9, D)
1288 #define JPSm(D)				JCCSim(0xa, D)
1289 #define JPESm(D)			JCCSim(0xa, D)
1290 #define JNPSm(D)			JCCSim(0xb, D)
1291 #define JPOSm(D)			JCCSim(0xb, D)
1292 #define JLSm(D)				JCCSim(0xc, D)
1293 #define JNGESm(D)			JCCSim(0xc, D)
1294 #define JNLSm(D)			JCCSim(0xd, D)
1295 #define JGESm(D)			JCCSim(0xd, D)
1296 #define JLESm(D)			JCCSim(0xe, D)
1297 #define JNGSm(D)			JCCSim(0xe, D)
1298 #define JNLESm(D)			JCCSim(0xf, D)
1299 #define JGSm(D)				JCCSim(0xf, D)
1300 
1301 /*									_format		Opcd		,Mod ,r	    ,m		,mem=dsp+sib	,imm... */
1302 #define JCCii(CC, D)							_OO_L		(0x0f80|(CC)				,(int)(D)		)
1303 #define JCCim(CC, D)							_OO_D32		(0x0f80|(CC)				,(int)(D)		)
1304 #define JOm(D)				JCCim(0x0, D)
1305 #define JNOm(D)				JCCim(0x1, D)
1306 #define JBm(D)				JCCim(0x2, D)
1307 #define JNAEm(D)			JCCim(0x2, D)
1308 #define JNBm(D)				JCCim(0x3, D)
1309 #define JAEm(D)				JCCim(0x3, D)
1310 #define JEm(D)				JCCim(0x4, D)
1311 #define JZm(D)				JCCim(0x4, D)
1312 #define JNEm(D)				JCCim(0x5, D)
1313 #define JNZm(D)				JCCim(0x5, D)
1314 #define JBEm(D)				JCCim(0x6, D)
1315 #define JNAm(D)				JCCim(0x6, D)
1316 #define JNBEm(D)			JCCim(0x7, D)
1317 #define JAm(D)				JCCim(0x7, D)
1318 #define JSm(D)				JCCim(0x8, D)
1319 #define JNSm(D)				JCCim(0x9, D)
1320 #define JPm(D)				JCCim(0xa, D)
1321 #define JPEm(D)				JCCim(0xa, D)
1322 #define JNPm(D)				JCCim(0xb, D)
1323 #define JPOm(D)				JCCim(0xb, D)
1324 #define JLm(D)				JCCim(0xc, D)
1325 #define JNGEm(D)			JCCim(0xc, D)
1326 #define JNLm(D)				JCCim(0xd, D)
1327 #define JGEm(D)				JCCim(0xd, D)
1328 #define JLEm(D)				JCCim(0xe, D)
1329 #define JNGm(D)				JCCim(0xe, D)
1330 #define JNLEm(D)			JCCim(0xf, D)
1331 #define JGm(D)				JCCim(0xf, D)
1332 
1333 /*									_format		Opcd		,Mod ,r	    ,m		,mem=dsp+sib	,imm... */
1334 #define SETCCir(CC, RD)			(_REXBrr(0, RD),		_OO_Mrm		(0x0f90|(CC)	,_b11,_b000,_r1(RD)				))
1335 #define SETOr(RD)			SETCCir(0x0,RD)
1336 #define SETNOr(RD)			SETCCir(0x1,RD)
1337 #define SETBr(RD)			SETCCir(0x2,RD)
1338 #define SETNAEr(RD)			SETCCir(0x2,RD)
1339 #define SETNBr(RD)			SETCCir(0x3,RD)
1340 #define SETAEr(RD)			SETCCir(0x3,RD)
1341 #define SETEr(RD)			SETCCir(0x4,RD)
1342 #define SETZr(RD)			SETCCir(0x4,RD)
1343 #define SETNEr(RD)			SETCCir(0x5,RD)
1344 #define SETNZr(RD)			SETCCir(0x5,RD)
1345 #define SETBEr(RD)			SETCCir(0x6,RD)
1346 #define SETNAr(RD)			SETCCir(0x6,RD)
1347 #define SETNBEr(RD)			SETCCir(0x7,RD)
1348 #define SETAr(RD)			SETCCir(0x7,RD)
1349 #define SETSr(RD)			SETCCir(0x8,RD)
1350 #define SETNSr(RD)			SETCCir(0x9,RD)
1351 #define SETPr(RD)			SETCCir(0xa,RD)
1352 #define SETPEr(RD)			SETCCir(0xa,RD)
1353 #define SETNPr(RD)			SETCCir(0xb,RD)
1354 #define SETPOr(RD)			SETCCir(0xb,RD)
1355 #define SETLr(RD)			SETCCir(0xc,RD)
1356 #define SETNGEr(RD)			SETCCir(0xc,RD)
1357 #define SETNLr(RD)			SETCCir(0xd,RD)
1358 #define SETGEr(RD)			SETCCir(0xd,RD)
1359 #define SETLEr(RD)			SETCCir(0xe,RD)
1360 #define SETNGr(RD)			SETCCir(0xe,RD)
1361 #define SETNLEr(RD)			SETCCir(0xf,RD)
1362 #define SETGr(RD)			SETCCir(0xf,RD)
1363 
1364 /*									_format		Opcd		,Mod ,r	    ,m		,mem=dsp+sib	,imm... */
1365 #define SETCCim(CC,MD,MB,MI,MS)		(_REXBrm(0, MB, MI),		_OO_r_X		(0x0f90|(CC)	     ,_b000		,MD,MB,MI,MS		))
1366 #define SETOm(D, B, I, S)		SETCCim(0x0, D, B, I, S)
1367 #define SETNOm(D, B, I, S)		SETCCim(0x1, D, B, I, S)
1368 #define SETBm(D, B, I, S)		SETCCim(0x2, D, B, I, S)
1369 #define SETNAEm(D, B, I, S)		SETCCim(0x2, D, B, I, S)
1370 #define SETNBm(D, B, I, S)		SETCCim(0x3, D, B, I, S)
1371 #define SETAEm(D, B, I, S)		SETCCim(0x3, D, B, I, S)
1372 #define SETEm(D, B, I, S)		SETCCim(0x4, D, B, I, S)
1373 #define SETZm(D, B, I, S)		SETCCim(0x4, D, B, I, S)
1374 #define SETNEm(D, B, I, S)		SETCCim(0x5, D, B, I, S)
1375 #define SETNZm(D, B, I, S)		SETCCim(0x5, D, B, I, S)
1376 #define SETBEm(D, B, I, S)		SETCCim(0x6, D, B, I, S)
1377 #define SETNAm(D, B, I, S)		SETCCim(0x6, D, B, I, S)
1378 #define SETNBEm(D, B, I, S)		SETCCim(0x7, D, B, I, S)
1379 #define SETAm(D, B, I, S)		SETCCim(0x7, D, B, I, S)
1380 #define SETSm(D, B, I, S)		SETCCim(0x8, D, B, I, S)
1381 #define SETNSm(D, B, I, S)		SETCCim(0x9, D, B, I, S)
1382 #define SETPm(D, B, I, S)		SETCCim(0xa, D, B, I, S)
1383 #define SETPEm(D, B, I, S)		SETCCim(0xa, D, B, I, S)
1384 #define SETNPm(D, B, I, S)		SETCCim(0xb, D, B, I, S)
1385 #define SETPOm(D, B, I, S)		SETCCim(0xb, D, B, I, S)
1386 #define SETLm(D, B, I, S)		SETCCim(0xc, D, B, I, S)
1387 #define SETNGEm(D, B, I, S)		SETCCim(0xc, D, B, I, S)
1388 #define SETNLm(D, B, I, S)		SETCCim(0xd, D, B, I, S)
1389 #define SETGEm(D, B, I, S)		SETCCim(0xd, D, B, I, S)
1390 #define SETLEm(D, B, I, S)		SETCCim(0xe, D, B, I, S)
1391 #define SETNGm(D, B, I, S)		SETCCim(0xe, D, B, I, S)
1392 #define SETNLEm(D, B, I, S)		SETCCim(0xf, D, B, I, S)
1393 #define SETGm(D, B, I, S)		SETCCim(0xf, D, B, I, S)
1394 
1395 /*									_format		Opcd		,Mod ,r	     ,m		,mem=dsp+sib	,imm... */
1396 #define CMOVWrr(CC,RS,RD)		(_d16(), _REXLrr(RD, RS),	_OO_Mrm		(0x0f40|(CC)	,_b11,_r2(RD),_r2(RS)				))
1397 #define CMOVWmr(CC,MD,MB,MI,MS,RD)	(_d16(), _REXLmr(MB, MI, RD),	_OO_r_X		(0x0f40|(CC)	     ,_r2(RD)		,MD,MB,MI,MS		))
1398 #define CMOVLrr(CC,RS,RD)		(_REXLrr(RD, RS),		_OO_Mrm		(0x0f40|(CC)	,_b11,_r4(RD),_r4(RS)				))
1399 #define CMOVLmr(CC,MD,MB,MI,MS,RD)	(_REXLmr(MB, MI, RD),		_OO_r_X		(0x0f40|(CC)	     ,_r4(RD)		,MD,MB,MI,MS		))
1400 #define CMOVQrr(CC,RS,RD)		(_REXQrr(RD, RS),		_OO_Mrm		(0x0f40|(CC)	,_b11,_r8(RD),_r8(RS)				))
1401 #define CMOVQmr(CC,MD,MB,MI,MS,RD)	(_REXQmr(MB, MI, RD),		_OO_r_X		(0x0f40|(CC)	     ,_r8(RD)		,MD,MB,MI,MS		))
1402 
1403 
1404 /* --- Push/Pop instructions ----------------------------------------------- */
1405 
1406 /*									_format		Opcd		,Mod ,r	    ,m		,mem=dsp+sib	,imm... */
1407 
1408 #define POPWr(RD)			_m32only((_d16(),		_Or		(0x58,_r2(RD)							)))
1409 #define POPWm(MD, MB, MI, MS)		_m32only((_d16(),		_O_r_X		(0x8f		     ,_b000		,MD,MB,MI,MS		)))
1410 
1411 #define POPLr(RD)			_m32only(			_Or		(0x58,_r4(RD)							))
1412 #define POPLm(MD, MB, MI, MS)		_m32only(			_O_r_X		(0x8f		     ,_b000		,MD,MB,MI,MS		))
1413 
1414 #define POPQr(RD)			_m64only((_REXQr(RD),		_Or		(0x58,_r8(RD)							)))
1415 #define POPQm(MD, MB, MI, MS)		_m64only((_REXQm(MB, MI), 	_O_r_X		(0x8f		     ,_b000		,MD,MB,MI,MS		)))
1416 
1417 #define PUSHWr(RS)			_m32only((_d16(),		_Or		(0x50,_r2(RS)							)))
1418 #define PUSHWm(MD, MB, MI, MS)		_m32only((_d16(),		_O_r_X		(0xff,		     ,_b110		,MD,MB,MI,MS		)))
1419 #define PUSHWi(IM)			_m32only((_d16(),		_Os_sW		(0x68							,IM	)))
1420 
1421 #define PUSHLr(RS)			_m32only(			_Or		(0x50,_r4(RS)							))
1422 #define PUSHLm(MD, MB, MI, MS)		_m32only(			_O_r_X		(0xff		     ,_b110		,MD,MB,MI,MS		))
1423 #define PUSHLi(IM)			_m32only(			_Os_sL		(0x68							,IM	))
1424 
1425 #define PUSHQr(RS)			_m64only((_REXQr(RS),		_Or		(0x50,_r8(RS)							)))
1426 #define PUSHQm(MD, MB, MI, MS)		_m64only((_REXQm(MB, MI),	_O_r_X		(0xff		     ,_b110		,MD,MB,MI,MS		)))
1427 #define PUSHQi(IM)			_m64only(			_Os_sL		(0x68							,IM	))
1428 
1429 #define POPA()				(_d16(),			_O		(0x61								))
1430 #define POPAD()								_O		(0x61								)
1431 
1432 #define PUSHA()				(_d16(),			_O		(0x60								))
1433 #define PUSHAD()							_O		(0x60								)
1434 
1435 #define POPF()								_O		(0x9d								)
1436 #define PUSHF()								_O		(0x9c								)
1437 
1438 
1439 /* --- Test instructions --------------------------------------------------- */
1440 
1441 /*									_format		Opcd		,Mod ,r	    ,m		,mem=dsp+sib	,imm... */
1442 
1443 #define TESTBrr(RS, RD)			(_REXBrr(RS, RD),		_O_Mrm		(0x84		,_b11,_r1(RS),_r1(RD)				))
1444 #define TESTBrm(RS, MD, MB, MI, MS)	(_REXBrm(RS, MB, MI),		_O_r_X		(0x84		     ,_r1(RS)		,MD,MB,MI,MS		))
1445 #define TESTBir(IM, RD)			(X86_OPTIMIZE_ALU && ((RD) == X86_AL) ? \
1446 					(_REXBrr(0, RD),		_O_B		(0xa8							,_u8(IM))) : \
1447 					(_REXBrr(0, RD),		_O_Mrm_B	(0xf6		,_b11,_b000  ,_r1(RD)			,_u8(IM))) )
1448 #define TESTBim(IM, MD, MB, MI, MS)	(_REXBrm(0, MB, MI),		_O_r_X_B	(0xf6		     ,_b000		,MD,MB,MI,MS	,_u8(IM)))
1449 
1450 #define TESTWrr(RS, RD)			(_d16(), _REXLrr(RS, RD),	_O_Mrm		(0x85		,_b11,_r2(RS),_r2(RD)				))
1451 #define TESTWrm(RS, MD, MB, MI, MS)	(_d16(), _REXLrm(RS, MB, MI),	_O_r_X		(0x85		     ,_r2(RS)		,MD,MB,MI,MS		))
1452 #define TESTWir(IM, RD)			(X86_OPTIMIZE_ALU && ((RD) == X86_AX) ? \
1453 					(_d16(), _REXLrr(0, RD),	_O_W		(0xa9							,_u16(IM))) : \
1454 					(_d16(), _REXLrr(0, RD),	_O_Mrm_W	(0xf7		,_b11,_b000  ,_r2(RD)			,_u16(IM))) )
1455 #define TESTWim(IM, MD, MB, MI, MS)	(_d16(), _REXLrm(0, MB, MI),	_O_r_X_W	(0xf7		     ,_b000		,MD,MB,MI,MS	,_u16(IM)))
1456 
1457 #define TESTLrr(RS, RD)			(_REXLrr(RS, RD),		_O_Mrm		(0x85		,_b11,_r4(RS),_r4(RD)				))
1458 #define TESTLrm(RS, MD, MB, MI, MS)	(_REXLrm(RS, MB, MI),		_O_r_X		(0x85		     ,_r4(RS)		,MD,MB,MI,MS		))
1459 #define TESTLir(IM, RD)			(X86_OPTIMIZE_ALU && ((RD) == X86_EAX) ? \
1460 					(_REXLrr(0, RD),		_O_L		(0xa9							,IM	)) : \
1461 					(_REXLrr(0, RD),		_O_Mrm_L	(0xf7		,_b11,_b000  ,_r4(RD)			,IM	)) )
1462 #define TESTLim(IM, MD, MB, MI, MS)	(_REXLrm(0, MB, MI),		_O_r_X_L	(0xf7		     ,_b000		,MD,MB,MI,MS	,IM	))
1463 
1464 #define TESTQrr(RS, RD)			(_REXQrr(RS, RD),		_O_Mrm		(0x85		,_b11,_r8(RS),_r8(RD)				))
1465 #define TESTQrm(RS, MD, MB, MI, MS)	(_REXQrm(RS, MB, MI),		_O_r_X		(0x85		     ,_r8(RS)		,MD,MB,MI,MS		))
1466 #define TESTQir(IM, RD)			(X86_OPTIMIZE_ALU && ((RD) == X86_RAX) ? \
1467 					(_REXQrr(0, RD),		_O_L		(0xa9							,IM	)) : \
1468 					(_REXQrr(0, RD),		_O_Mrm_L	(0xf7		,_b11,_b000  ,_r8(RD)			,IM	)) )
1469 #define TESTQim(IM, MD, MB, MI, MS)	(_REXQrm(0, MB, MI),		_O_r_X_L	(0xf7		     ,_b000		,MD,MB,MI,MS	,IM	))
1470 
1471 
1472 /* --- Exchange instructions ----------------------------------------------- */
1473 
1474 /*									_format		Opcd		,Mod ,r	    ,m		,mem=dsp+sib	,imm... */
1475 
1476 #define CMPXCHGBrr(RS, RD)		(_REXBrr(RS, RD),		_OO_Mrm		(0x0fb0		,_b11,_r1(RS),_r1(RD)				))
1477 #define CMPXCHGBrm(RS, MD, MB, MI, MS)	(_REXBrm(RS, MB, MI),		_OO_r_X		(0x0fb0		     ,_r1(RS)		,MD,MB,MI,MS		))
1478 
1479 #define CMPXCHGWrr(RS, RD)		(_d16(), _REXLrr(RS, RD),	_OO_Mrm		(0x0fb1		,_b11,_r2(RS),_r2(RD)				))
1480 #define CMPXCHGWrm(RS, MD, MB, MI, MS)	(_d16(), _REXLrm(RS, MB, MI),	_OO_r_X		(0x0fb1		     ,_r2(RS)		,MD,MB,MI,MS		))
1481 
1482 #define CMPXCHGLrr(RS, RD)		(_REXLrr(RS, RD),		_OO_Mrm		(0x0fb1		,_b11,_r4(RS),_r4(RD)				))
1483 #define CMPXCHGLrm(RS, MD, MB, MI, MS)	(_REXLrm(RS, MB, MI),		_OO_r_X		(0x0fb1		     ,_r4(RS)		,MD,MB,MI,MS		))
1484 
1485 #define CMPXCHGQrr(RS, RD)		(_REXQrr(RS, RD),		_OO_Mrm		(0x0fb1		,_b11,_r8(RS),_r8(RD)				))
1486 #define CMPXCHGQrm(RS, MD, MB, MI, MS)	(_REXQrm(RS, MB, MI),		_OO_r_X		(0x0fb1		     ,_r8(RS)		,MD,MB,MI,MS		))
1487 
1488 #define XADDBrr(RS, RD)			(_REXBrr(RS, RD),		_OO_Mrm		(0x0fc0		,_b11,_r1(RS),_r1(RD)				))
1489 #define XADDBrm(RS, MD, MB, MI, MS)	(_REXBrm(RS, MB, MI),		_OO_r_X		(0x0fc0		     ,_r1(RS)		,MD,MB,MI,MS		))
1490 
1491 #define XADDWrr(RS, RD)			(_d16(), _REXLrr(RS, RD),	_OO_Mrm		(0x0fc1		,_b11,_r2(RS),_r2(RD)				))
1492 #define XADDWrm(RS, MD, MB, MI, MS)	(_d16(), _REXLrm(RS, MB, MI),	_OO_r_X		(0x0fc1		     ,_r2(RS)		,MD,MB,MI,MS		))
1493 
1494 #define XADDLrr(RS, RD)			(_REXLrr(RS, RD),		_OO_Mrm		(0x0fc1		,_b11,_r4(RS),_r4(RD)				))
1495 #define XADDLrm(RS, MD, MB, MI, MS)	(_REXLrm(RS, MB, MI),		_OO_r_X		(0x0fc1		     ,_r4(RS)		,MD,MB,MI,MS		))
1496 
1497 #define XADDQrr(RS, RD)			(_REXQrr(RS, RD),		_OO_Mrm		(0x0fc1		,_b11,_r8(RS),_r8(RD)				))
1498 #define XADDQrm(RS, MD, MB, MI, MS)	(_REXQrm(RS, MB, MI),		_OO_r_X		(0x0fc1		     ,_r8(RS)		,MD,MB,MI,MS		))
1499 
1500 #define XCHGBrr(RS, RD)			(_REXBrr(RS, RD),		_O_Mrm		(0x86		,_b11,_r1(RS),_r1(RD)				))
1501 #define XCHGBrm(RS, MD, MB, MI, MS)	(_REXBrm(RS, MB, MI),		_O_r_X		(0x86		     ,_r1(RS)		,MD,MB,MI,MS		))
1502 
1503 #define XCHGWrr(RS, RD)			(_d16(), _REXLrr(RS, RD),	_O_Mrm		(0x87		,_b11,_r2(RS),_r2(RD)				))
1504 #define XCHGWrm(RS, MD, MB, MI, MS)	(_d16(), _REXLrm(RS, MB, MI),	_O_r_X		(0x87		     ,_r2(RS)		,MD,MB,MI,MS		))
1505 
1506 #define XCHGLrr(RS, RD)			(_REXLrr(RS, RD),		_O_Mrm		(0x87		,_b11,_r4(RS),_r4(RD)				))
1507 #define XCHGLrm(RS, MD, MB, MI, MS)	(_REXLrm(RS, MB, MI),		_O_r_X		(0x87		     ,_r4(RS)		,MD,MB,MI,MS		))
1508 
1509 #define XCHGQrr(RS, RD)			(_REXQrr(RS, RD),		_O_Mrm		(0x87		,_b11,_r8(RS),_r8(RD)				))
1510 #define XCHGQrm(RS, MD, MB, MI, MS)	(_REXQrm(RS, MB, MI),		_O_r_X		(0x87		     ,_r8(RS)		,MD,MB,MI,MS		))
1511 
1512 
1513 /* --- Increment/Decrement instructions ------------------------------------ */
1514 
1515 /*									_format		Opcd		,Mod ,r	    ,m		,mem=dsp+sib	,imm... */
1516 
1517 #define DECBm(MD, MB, MI, MS)		(_REXBrm(0, MB, MI),		_O_r_X		(0xfe		     ,_b001		,MD,MB,MI,MS		))
1518 #define DECBr(RD)			(_REXBrr(0, RD),		_O_Mrm		(0xfe		,_b11,_b001  ,_r1(RD)				))
1519 
1520 #define DECWm(MD, MB, MI, MS)		(_d16(), _REXLrm(0, MB, MI),	_O_r_X		(0xff		     ,_b001		,MD,MB,MI,MS		))
1521 #define DECWr(RD)			(! X86_TARGET_64BIT ? (_d16(),	_Or		(0x48,_r2(RD)							)) : \
1522 					(_d16(), _REXLrr(0, RD),	_O_Mrm		(0xff		,_b11,_b001  ,_r2(RD)				)))
1523 
1524 #define DECLm(MD, MB, MI, MS)		(_REXLrm(0, MB, MI),		_O_r_X		(0xff		     ,_b001		,MD,MB,MI,MS		))
1525 #define DECLr(RD)			(! X86_TARGET_64BIT ?	 	_Or		(0x48,_r4(RD)							) : \
1526 					(_REXLrr(0, RD),		_O_Mrm		(0xff		,_b11,_b001  ,_r4(RD)				)))
1527 
1528 #define DECQm(MD, MB, MI, MS)		(_REXQrm(0, MB, MI),		_O_r_X		(0xff		     ,_b001		,MD,MB,MI,MS		))
1529 #define DECQr(RD)			(_REXQrr(0, RD),		_O_Mrm		(0xff		,_b11,_b001  ,_r8(RD)				))
1530 
1531 #define INCBm(MD, MB, MI, MS)		(_REXBrm(0, MB, MI),		_O_r_X		(0xfe		     ,_b000		,MD,MB,MI,MS		))
1532 #define INCBr(RD)			(_REXBrr(0, RD),		_O_Mrm		(0xfe		,_b11,_b000  ,_r1(RD)				))
1533 
1534 #define INCWm(MD, MB, MI, MS)		(_d16(), _REXLrm(0, MB, MI),	_O_r_X		(0xff		     ,_b000		,MD,MB,MI,MS		))
1535 #define INCWr(RD)			(! X86_TARGET_64BIT ? (_d16(),	_Or		(0x40,_r2(RD)							)) : \
1536 					(_d16(), _REXLrr(0, RD),	_O_Mrm		(0xff		,_b11,_b000  ,_r2(RD)				)) )
1537 
1538 #define INCLm(MD, MB, MI, MS)		(_REXLrm(0, MB, MI),		_O_r_X		(0xff		     ,_b000		,MD,MB,MI,MS		))
1539 #define INCLr(RD)			(! X86_TARGET_64BIT ? 		_Or		(0x40,_r4(RD)							) : \
1540 					(_REXLrr(0, RD),		_O_Mrm		(0xff		,_b11,_b000  ,_r4(RD)				)))
1541 
1542 #define INCQm(MD, MB, MI, MS)		(_REXQrm(0, MB, MI),		_O_r_X		(0xff		     ,_b000		,MD,MB,MI,MS		))
1543 #define INCQr(RD)			(_REXQrr(0, RD),		_O_Mrm		(0xff		,_b11,_b000  ,_r8(RD)				))
1544 
1545 
1546 /* --- Misc instructions --------------------------------------------------- */
1547 
1548 /*									_format		Opcd		,Mod ,r	    ,m		,mem=dsp+sib	,imm... */
1549 
1550 #define BSFWrr(RS, RD)			(_d16(), _REXLrr(RD, RS),	_OO_Mrm		(0x0fbc		,_b11,_r2(RD),_r2(RS)				))
1551 #define BSFWmr(MD, MB, MI, MS, RD)	(_d16(), _REXLmr(MB, MI, RD),	_OO_r_X		(0x0fbc		     ,_r2(RD)		,MD,MB,MI,MS		))
1552 #define BSRWrr(RS, RD)			(_d16(), _REXLrr(RD, RS),	_OO_Mrm		(0x0fbd		,_b11,_r2(RD),_r2(RS)				))
1553 #define BSRWmr(MD, MB, MI, MS, RD)	(_d16(), _REXLmr(MB, MI, RD),	_OO_r_X		(0x0fbd		     ,_r2(RD)		,MD,MB,MI,MS		))
1554 
1555 #define BSFLrr(RS, RD)			(_REXLrr(RD, RS),		_OO_Mrm		(0x0fbc		,_b11,_r4(RD),_r4(RS)				))
1556 #define BSFLmr(MD, MB, MI, MS, RD)	(_REXLmr(MB, MI, RD),		_OO_r_X		(0x0fbc		     ,_r4(RD)		,MD,MB,MI,MS		))
1557 #define BSRLrr(RS, RD)			(_REXLrr(RD, RS),		_OO_Mrm		(0x0fbd		,_b11,_r4(RD),_r4(RS)				))
1558 #define BSRLmr(MD, MB, MI, MS, RD)	(_REXLmr(MB, MI, RD),		_OO_r_X		(0x0fbd		     ,_r4(RD)		,MD,MB,MI,MS		))
1559 
1560 #define BSFQrr(RS, RD)			(_REXQrr(RD, RS),		_OO_Mrm		(0x0fbc		,_b11,_r8(RD),_r8(RS)				))
1561 #define BSFQmr(MD, MB, MI, MS, RD)	(_REXQmr(MB, MI, RD),		_OO_r_X		(0x0fbc		     ,_r8(RD)		,MD,MB,MI,MS		))
1562 #define BSRQrr(RS, RD)			(_REXQrr(RD, RS),		_OO_Mrm		(0x0fbd		,_b11,_r8(RD),_r8(RS)				))
1563 #define BSRQmr(MD, MB, MI, MS, RD)	(_REXQmr(MB, MI, RD),		_OO_r_X		(0x0fbd		     ,_r8(RD)		,MD,MB,MI,MS		))
1564 
1565 /*									_format		Opcd		,Mod ,r	    ,m		,mem=dsp+sib	,imm... */
1566 
1567 #define MOVSBWrr(RS, RD)		(_d16(), _REXBLrr(RD, RS),	_OO_Mrm		(0x0fbe		,_b11,_r2(RD),_r1(RS)				))
1568 #define MOVSBWmr(MD, MB, MI, MS, RD)	(_d16(), _REXLmr(MB, MI, RD),	_OO_r_X		(0x0fbe		     ,_r2(RD)		,MD,MB,MI,MS		))
1569 #define MOVZBWrr(RS, RD)		(_d16(), _REXBLrr(RD, RS),	_OO_Mrm		(0x0fb6		,_b11,_r2(RD),_r1(RS)				))
1570 #define MOVZBWmr(MD, MB, MI, MS, RD)	(_d16(), _REXLmr(MB, MI, RD),	_OO_r_X		(0x0fb6		     ,_r2(RD)		,MD,MB,MI,MS		))
1571 
1572 #define MOVSBLrr(RS, RD)		(_REXBLrr(RD, RS),		_OO_Mrm		(0x0fbe		,_b11,_r4(RD),_r1(RS)				))
1573 #define MOVSBLmr(MD, MB, MI, MS, RD)	(_REXLmr(MB, MI, RD),		_OO_r_X		(0x0fbe		     ,_r4(RD)		,MD,MB,MI,MS		))
1574 #define MOVZBLrr(RS, RD)		(_REXBLrr(RD, RS),		_OO_Mrm		(0x0fb6		,_b11,_r4(RD),_r1(RS)				))
1575 #define MOVZBLmr(MD, MB, MI, MS, RD)	(_REXLmr(MB, MI, RD),		_OO_r_X		(0x0fb6		     ,_r4(RD)		,MD,MB,MI,MS		))
1576 
1577 #define MOVSBQrr(RS, RD)		(_REXQrr(RD, RS),		_OO_Mrm		(0x0fbe		,_b11,_r8(RD),_r1(RS)				))
1578 #define MOVSBQmr(MD, MB, MI, MS, RD)	(_REXQmr(MB, MI, RD),		_OO_r_X		(0x0fbe		     ,_r8(RD)		,MD,MB,MI,MS		))
1579 #define MOVZBQrr(RS, RD)		(_REXQrr(RD, RS),		_OO_Mrm		(0x0fb6		,_b11,_r8(RD),_r1(RS)				))
1580 #define MOVZBQmr(MD, MB, MI, MS, RD)	(_REXQmr(MB, MI, RD),		_OO_r_X		(0x0fb6		     ,_r8(RD)		,MD,MB,MI,MS		))
1581 
1582 #define MOVSWLrr(RS, RD)		(_REXLrr(RD, RS),		_OO_Mrm		(0x0fbf		,_b11,_r4(RD),_r2(RS)				))
1583 #define MOVSWLmr(MD, MB, MI, MS, RD)	(_REXLmr(MB, MI, RD),		_OO_r_X		(0x0fbf		     ,_r4(RD)		,MD,MB,MI,MS		))
1584 #define MOVZWLrr(RS, RD)		(_REXLrr(RD, RS),		_OO_Mrm		(0x0fb7		,_b11,_r4(RD),_r2(RS)				))
1585 #define MOVZWLmr(MD, MB, MI, MS, RD)	(_REXLmr(MB, MI, RD),		_OO_r_X		(0x0fb7		     ,_r4(RD)		,MD,MB,MI,MS		))
1586 
1587 #define MOVSWQrr(RS, RD)		(_REXQrr(RD, RS),		_OO_Mrm		(0x0fbf		,_b11,_r8(RD),_r2(RS)				))
1588 #define MOVSWQmr(MD, MB, MI, MS, RD)	(_REXQmr(MB, MI, RD),		_OO_r_X		(0x0fbf		     ,_r8(RD)		,MD,MB,MI,MS		))
1589 #define MOVZWQrr(RS, RD)		(_REXQrr(RD, RS),		_OO_Mrm		(0x0fb7		,_b11,_r8(RD),_r2(RS)				))
1590 #define MOVZWQmr(MD, MB, MI, MS, RD)	(_REXQmr(MB, MI, RD),		_OO_r_X		(0x0fb7		     ,_r8(RD)		,MD,MB,MI,MS		))
1591 
1592 #define MOVSLQrr(RS, RD)		_m64only((_REXQrr(RD, RS),	_O_Mrm		(0x63		,_b11,_r8(RD),_r4(RS)				)))
1593 #define MOVSLQmr(MD, MB, MI, MS, RD)	_m64only((_REXQmr(MB, MI, RD),	_O_r_X		(0x63		     ,_r8(RD)		,MD,MB,MI,MS		)))
1594 
1595 /*									_format		Opcd		,Mod ,r	    ,m		,mem=dsp+sib	,imm... */
1596 
1597 #define LEALmr(MD, MB, MI, MS, RD)	(_REXLmr(MB, MI, RD),		_O_r_X		(0x8d		     ,_r4(RD)		,MD,MB,MI,MS		))
1598 
1599 #define BSWAPLr(R)			(_REXLrr(0, R),			_OOr		(0x0fc8,_r4(R)							))
1600 #define BSWAPQr(R)			(_REXQrr(0, R),			_OOr		(0x0fc8,_r8(R)							))
1601 
1602 #define CLC()								_O		(0xf8								)
1603 #define STC()								_O		(0xf9								)
1604 
1605 #define CMC()								_O		(0xf5								)
1606 #define CLD()								_O		(0xfc								)
1607 #define STD()								_O		(0xfd								)
1608 
1609 #define CBTW()				(_d16(),			_O		(0x98								))
1610 #define CWTL()								_O		(0x98								)
1611 #define CLTQ()				_m64only(_REXQrr(0, 0),		_O		(0x98								))
1612 
1613 #define CBW				CBTW
1614 #define CWDE				CWTL
1615 #define CDQE				CLTQ
1616 
1617 #define CWTD()				(_d16(),			_O		(0x99								))
1618 #define CLTD()								_O		(0x99								)
1619 #define CQTO()				_m64only(_REXQrr(0, 0),		_O		(0x99								))
1620 
1621 #define CWD				CWTD
1622 #define CDQ				CLTD
1623 #define CQO				CQTO
1624 
1625 #define LAHF()								_O		(0x9f								)
1626 #define SAHF()								_O		(0x9e								)
1627 
1628 /*									_format		Opcd		,Mod ,r	    ,m		,mem=dsp+sib	,imm... */
1629 
1630 #define CPUID()								_OO		(0x0fa2								)
1631 #define RDTSC()								_OO		(0xff31								)
1632 
1633 #define ENTERii(W, B)							_O_W_B		(0xc8						  ,_su16(W),_su8(B))
1634 
1635 #define LEAVE()								_O		(0xc9								)
1636 #define RET()								_O		(0xc3								)
1637 #define RETi(IM)							_O_W		(0xc2							,_su16(IM))
1638 
1639 #define NOP()								_O		(0x90								)
1640 
1641 
1642 /* --- Media 128-bit instructions ------------------------------------------ */
1643 
1644 enum {
1645   X86_SSE_CVTIS  = 0x2a,
1646   X86_SSE_CVTSI  = 0x2d,
1647   X86_SSE_UCOMI  = 0x2e,
1648   X86_SSE_COMI   = 0x2f,
1649   X86_SSE_SQRT   = 0x51,
1650   X86_SSE_RSQRT  = 0x52,
1651   X86_SSE_RCP    = 0x53,
1652   X86_SSE_AND    = 0x54,
1653   X86_SSE_ANDN   = 0x55,
1654   X86_SSE_OR     = 0x56,
1655   X86_SSE_XOR    = 0x57,
1656   X86_SSE_ADD    = 0x58,
1657   X86_SSE_MUL    = 0x59,
1658   X86_SSE_CVTSD  = 0x5a,
1659   X86_SSE_CVTDT  = 0x5b,
1660   X86_SSE_SUB    = 0x5c,
1661   X86_SSE_MIN    = 0x5d,
1662   X86_SSE_DIV    = 0x5e,
1663   X86_SSE_MAX    = 0x5f,
1664 };
1665 
1666 /*									_format		Opcd		,Mod ,r	     ,m		,mem=dsp+sib	,imm... */
1667 
1668 #define __SSELrr(OP,RS,RSA,RD,RDA)	(_REXLrr(RD, RS),		_OO_Mrm		(0x0f00|(OP)	,_b11,RDA(RD),RSA(RS)				))
1669 #define __SSELmr(OP,MD,MB,MI,MS,RD,RDA)	(_REXLmr(MB, MI, RD),		_OO_r_X		(0x0f00|(OP)	     ,RDA(RD)		,MD,MB,MI,MS		))
1670 #define __SSELrm(OP,RS,RSA,MD,MB,MI,MS)	(_REXLrm(RS, MB, MI),		_OO_r_X		(0x0f00|(OP)	     ,RSA(RS)		,MD,MB,MI,MS		))
1671 
1672 #define __SSEQrr(OP,RS,RSA,RD,RDA)	(_REXQrr(RD, RS),		_OO_Mrm		(0x0f00|(OP)	,_b11,RDA(RD),RSA(RS)				))
1673 #define __SSEQmr(OP,MD,MB,MI,MS,RD,RDA)	(_REXQmr(MB, MI, RD),		_OO_r_X		(0x0f00|(OP)	     ,RDA(RD)		,MD,MB,MI,MS		))
1674 #define __SSEQrm(OP,RS,RSA,MD,MB,MI,MS)	(_REXQrm(RS, MB, MI),		_OO_r_X		(0x0f00|(OP)	     ,RSA(RS)		,MD,MB,MI,MS		))
1675 
1676 #define _SSELrr(PX,OP,RS,RSA,RD,RDA)					(_B(PX), __SSELrr(OP, RS, RSA, RD, RDA))
1677 #define _SSELmr(PX,OP,MD,MB,MI,MS,RD,RDA)				(_B(PX), __SSELmr(OP, MD, MB, MI, MS, RD, RDA))
1678 #define _SSELrm(PX,OP,RS,RSA,MD,MB,MI,MS)				(_B(PX), __SSELrm(OP, RS, RSA, MD, MB, MI, MS))
1679 
1680 #define _SSEQrr(PX,OP,RS,RSA,RD,RDA)					(_B(PX), __SSEQrr(OP, RS, RSA, RD, RDA))
1681 #define _SSEQmr(PX,OP,MD,MB,MI,MS,RD,RDA)				(_B(PX), __SSEQmr(OP, MD, MB, MI, MS, RD, RDA))
1682 #define _SSEQrm(PX,OP,RS,RSA,MD,MB,MI,MS)				(_B(PX), __SSEQrm(OP, RS, RSA, MD, MB, MI, MS))
1683 
1684 #define _SSEPSrr(OP,RS,RD)		__SSELrr(      OP, RS,_rX, RD,_rX)
1685 #define _SSEPSmr(OP,MD,MB,MI,MS,RD)	__SSELmr(      OP, MD, MB, MI, MS, RD,_rX)
1686 #define _SSEPSrm(OP,RS,MD,MB,MI,MS)	__SSELrm(      OP, RS,_rX, MD, MB, MI, MS)
1687 
1688 #define _SSEPDrr(OP,RS,RD)		 _SSELrr(0x66, OP, RS,_rX, RD,_rX)
1689 #define _SSEPDmr(OP,MD,MB,MI,MS,RD)	 _SSELmr(0x66, OP, MD, MB, MI, MS, RD,_rX)
1690 #define _SSEPDrm(OP,RS,MD,MB,MI,MS)	 _SSELrm(0x66, OP, RS,_rX, MD, MB, MI, MS)
1691 
1692 #define _SSESSrr(OP,RS,RD)		 _SSELrr(0xf3, OP, RS,_rX, RD,_rX)
1693 #define _SSESSmr(OP,MD,MB,MI,MS,RD)	 _SSELmr(0xf3, OP, MD, MB, MI, MS, RD,_rX)
1694 #define _SSESSrm(OP,RS,MD,MB,MI,MS)	 _SSELrm(0xf3, OP, RS,_rX, MD, MB, MI, MS)
1695 
1696 #define _SSESDrr(OP,RS,RD)		 _SSELrr(0xf2, OP, RS,_rX, RD,_rX)
1697 #define _SSESDmr(OP,MD,MB,MI,MS,RD)	 _SSELmr(0xf2, OP, MD, MB, MI, MS, RD,_rX)
1698 #define _SSESDrm(OP,RS,MD,MB,MI,MS)	 _SSELrm(0xf2, OP, RS,_rX, MD, MB, MI, MS)
1699 
1700 #define ADDPSrr(RS, RD)			_SSEPSrr(X86_SSE_ADD, RS, RD)
1701 #define ADDPSmr(MD, MB, MI, MS, RD)	_SSEPSmr(X86_SSE_ADD, MD, MB, MI, MS, RD)
1702 #define ADDPDrr(RS, RD)			_SSEPDrr(X86_SSE_ADD, RS, RD)
1703 #define ADDPDmr(MD, MB, MI, MS, RD)	_SSEPDmr(X86_SSE_ADD, MD, MB, MI, MS, RD)
1704 
1705 #define ADDSSrr(RS, RD)			_SSESSrr(X86_SSE_ADD, RS, RD)
1706 #define ADDSSmr(MD, MB, MI, MS, RD)	_SSESSmr(X86_SSE_ADD, MD, MB, MI, MS, RD)
1707 #define ADDSDrr(RS, RD)			_SSESDrr(X86_SSE_ADD, RS, RD)
1708 #define ADDSDmr(MD, MB, MI, MS, RD)	_SSESDmr(X86_SSE_ADD, MD, MB, MI, MS, RD)
1709 
1710 #define ANDNPSrr(RS, RD)		_SSEPSrr(X86_SSE_ANDN, RS, RD)
1711 #define ANDNPSmr(MD, MB, MI, MS, RD)	_SSEPSmr(X86_SSE_ANDN, MD, MB, MI, MS, RD)
1712 #define ANDNPDrr(RS, RD)		_SSEPDrr(X86_SSE_ANDN, RS, RD)
1713 #define ANDNPDmr(MD, MB, MI, MS, RD)	_SSEPDmr(X86_SSE_ANDN, MD, MB, MI, MS, RD)
1714 
1715 #define ANDPSrr(RS, RD)			_SSEPSrr(X86_SSE_AND, RS, RD)
1716 #define ANDPSmr(MD, MB, MI, MS, RD)	_SSEPSmr(X86_SSE_AND, MD, MB, MI, MS, RD)
1717 #define ANDPDrr(RS, RD)			_SSEPDrr(X86_SSE_AND, RS, RD)
1718 #define ANDPDmr(MD, MB, MI, MS, RD)	_SSEPDmr(X86_SSE_AND, MD, MB, MI, MS, RD)
1719 
1720 #define DIVPSrr(RS, RD)			_SSEPSrr(X86_SSE_DIV, RS, RD)
1721 #define DIVPSmr(MD, MB, MI, MS, RD)	_SSEPSmr(X86_SSE_DIV, MD, MB, MI, MS, RD)
1722 #define DIVPDrr(RS, RD)			_SSEPDrr(X86_SSE_DIV, RS, RD)
1723 #define DIVPDmr(MD, MB, MI, MS, RD)	_SSEPDmr(X86_SSE_DIV, MD, MB, MI, MS, RD)
1724 
1725 #define DIVSSrr(RS, RD)			_SSESSrr(X86_SSE_DIV, RS, RD)
1726 #define DIVSSmr(MD, MB, MI, MS, RD)	_SSESSmr(X86_SSE_DIV, MD, MB, MI, MS, RD)
1727 #define DIVSDrr(RS, RD)			_SSESDrr(X86_SSE_DIV, RS, RD)
1728 #define DIVSDmr(MD, MB, MI, MS, RD)	_SSESDmr(X86_SSE_DIV, MD, MB, MI, MS, RD)
1729 
1730 #define MAXPSrr(RS, RD)			_SSEPSrr(X86_SSE_MAX, RS, RD)
1731 #define MAXPSmr(MD, MB, MI, MS, RD)	_SSEPSmr(X86_SSE_MAX, MD, MB, MI, MS, RD)
1732 #define MAXPDrr(RS, RD)			_SSEPDrr(X86_SSE_MAX, RS, RD)
1733 #define MAXPDmr(MD, MB, MI, MS, RD)	_SSEPDmr(X86_SSE_MAX, MD, MB, MI, MS, RD)
1734 
1735 #define MAXSSrr(RS, RD)			_SSESSrr(X86_SSE_MAX, RS, RD)
1736 #define MAXSSmr(MD, MB, MI, MS, RD)	_SSESSmr(X86_SSE_MAX, MD, MB, MI, MS, RD)
1737 #define MAXSDrr(RS, RD)			_SSESDrr(X86_SSE_MAX, RS, RD)
1738 #define MAXSDmr(MD, MB, MI, MS, RD)	_SSESDmr(X86_SSE_MAX, MD, MB, MI, MS, RD)
1739 
1740 #define MINPSrr(RS, RD)			_SSEPSrr(X86_SSE_MIN, RS, RD)
1741 #define MINPSmr(MD, MB, MI, MS, RD)	_SSEPSmr(X86_SSE_MIN, MD, MB, MI, MS, RD)
1742 #define MINPDrr(RS, RD)			_SSEPDrr(X86_SSE_MIN, RS, RD)
1743 #define MINPDmr(MD, MB, MI, MS, RD)	_SSEPDmr(X86_SSE_MIN, MD, MB, MI, MS, RD)
1744 
1745 #define MINSSrr(RS, RD)			_SSESSrr(X86_SSE_MIN, RS, RD)
1746 #define MINSSmr(MD, MB, MI, MS, RD)	_SSESSmr(X86_SSE_MIN, MD, MB, MI, MS, RD)
1747 #define MINSDrr(RS, RD)			_SSESDrr(X86_SSE_MIN, RS, RD)
1748 #define MINSDmr(MD, MB, MI, MS, RD)	_SSESDmr(X86_SSE_MIN, MD, MB, MI, MS, RD)
1749 
1750 #define MULPSrr(RS, RD)			_SSEPSrr(X86_SSE_MUL, RS, RD)
1751 #define MULPSmr(MD, MB, MI, MS, RD)	_SSEPSmr(X86_SSE_MUL, MD, MB, MI, MS, RD)
1752 #define MULPDrr(RS, RD)			_SSEPDrr(X86_SSE_MUL, RS, RD)
1753 #define MULPDmr(MD, MB, MI, MS, RD)	_SSEPDmr(X86_SSE_MUL, MD, MB, MI, MS, RD)
1754 
1755 #define MULSSrr(RS, RD)			_SSESSrr(X86_SSE_MUL, RS, RD)
1756 #define MULSSmr(MD, MB, MI, MS, RD)	_SSESSmr(X86_SSE_MUL, MD, MB, MI, MS, RD)
1757 #define MULSDrr(RS, RD)			_SSESDrr(X86_SSE_MUL, RS, RD)
1758 #define MULSDmr(MD, MB, MI, MS, RD)	_SSESDmr(X86_SSE_MUL, MD, MB, MI, MS, RD)
1759 
1760 #define ORPSrr(RS, RD)			_SSEPSrr(X86_SSE_OR, RS, RD)
1761 #define ORPSmr(MD, MB, MI, MS, RD)	_SSEPSmr(X86_SSE_OR, MD, MB, MI, MS, RD)
1762 #define ORPDrr(RS, RD)			_SSEPDrr(X86_SSE_OR, RS, RD)
1763 #define ORPDmr(MD, MB, MI, MS, RD)	_SSEPDmr(X86_SSE_OR, MD, MB, MI, MS, RD)
1764 
1765 #define RCPPSrr(RS, RD)			_SSEPSrr(X86_SSE_RCP, RS, RD)
1766 #define RCPPSmr(MD, MB, MI, MS, RD)	_SSEPSmr(X86_SSE_RCP, MD, MB, MI, MS, RD)
1767 #define RCPSSrr(RS, RD)			_SSESSrr(X86_SSE_RCP, RS, RD)
1768 #define RCPSSmr(MD, MB, MI, MS, RD)	_SSESSmr(X86_SSE_RCP, MD, MB, MI, MS, RD)
1769 
1770 #define RSQRTPSrr(RS, RD)		_SSEPSrr(X86_SSE_RSQRT, RS, RD)
1771 #define RSQRTPSmr(MD, MB, MI, MS, RD)	_SSEPSmr(X86_SSE_RSQRT, MD, MB, MI, MS, RD)
1772 #define RSQRTSSrr(RS, RD)		_SSESSrr(X86_SSE_RSQRT, RS, RD)
1773 #define RSQRTSSmr(MD, MB, MI, MS, RD)	_SSESSmr(X86_SSE_RSQRT, MD, MB, MI, MS, RD)
1774 
1775 #define SQRTPSrr(RS, RD)		_SSEPSrr(X86_SSE_SQRT, RS, RD)
1776 #define SQRTPSmr(MD, MB, MI, MS, RD)	_SSEPSmr(X86_SSE_SQRT, MD, MB, MI, MS, RD)
1777 #define SQRTPDrr(RS, RD)		_SSEPDrr(X86_SSE_SQRT, RS, RD)
1778 #define SQRTPDmr(MD, MB, MI, MS, RD)	_SSEPDmr(X86_SSE_SQRT, MD, MB, MI, MS, RD)
1779 
1780 #define SQRTSSrr(RS, RD)		_SSESSrr(X86_SSE_SQRT, RS, RD)
1781 #define SQRTSSmr(MD, MB, MI, MS, RD)	_SSESSmr(X86_SSE_SQRT, MD, MB, MI, MS, RD)
1782 #define SQRTSDrr(RS, RD)		_SSESDrr(X86_SSE_SQRT, RS, RD)
1783 #define SQRTSDmr(MD, MB, MI, MS, RD)	_SSESDmr(X86_SSE_SQRT, MD, MB, MI, MS, RD)
1784 
1785 #define SUBPSrr(RS, RD)			_SSEPSrr(X86_SSE_SUB, RS, RD)
1786 #define SUBPSmr(MD, MB, MI, MS, RD)	_SSEPSmr(X86_SSE_SUB, MD, MB, MI, MS, RD)
1787 #define SUBPDrr(RS, RD)			_SSEPDrr(X86_SSE_SUB, RS, RD)
1788 #define SUBPDmr(MD, MB, MI, MS, RD)	_SSEPDmr(X86_SSE_SUB, MD, MB, MI, MS, RD)
1789 
1790 #define SUBSSrr(RS, RD)			_SSESSrr(X86_SSE_SUB, RS, RD)
1791 #define SUBSSmr(MD, MB, MI, MS, RD)	_SSESSmr(X86_SSE_SUB, MD, MB, MI, MS, RD)
1792 #define SUBSDrr(RS, RD)			_SSESDrr(X86_SSE_SUB, RS, RD)
1793 #define SUBSDmr(MD, MB, MI, MS, RD)	_SSESDmr(X86_SSE_SUB, MD, MB, MI, MS, RD)
1794 
1795 #define XORPSrr(RS, RD)			_SSEPSrr(X86_SSE_XOR, RS, RD)
1796 #define XORPSmr(MD, MB, MI, MS, RD)	_SSEPSmr(X86_SSE_XOR, MD, MB, MI, MS, RD)
1797 #define XORPDrr(RS, RD)			_SSEPDrr(X86_SSE_XOR, RS, RD)
1798 #define XORPDmr(MD, MB, MI, MS, RD)	_SSEPDmr(X86_SSE_XOR, MD, MB, MI, MS, RD)
1799 
1800 #define COMISSrr(RS, RD)		_SSESSrr(X86_SSE_COMI, RS, RD)
1801 #define COMISSmr(MD, MB, MI, MS, RD)	_SSESSmr(X86_SSE_COMI, MD, MB, MI, MS, RD)
1802 #define COMISDrr(RS, RD)		_SSESDrr(X86_SSE_COMI, RS, RD)
1803 #define COMISDmr(MD, MB, MI, MS, RD)	_SSESDmr(X86_SSE_COMI, MD, MB, MI, MS, RD)
1804 
1805 #define UCOMISSrr(RS, RD)		_SSESSrr(X86_SSE_UCOMI, RS, RD)
1806 #define UCOMISSmr(MD, MB, MI, MS, RD)	_SSESSmr(X86_SSE_UCOMI, MD, MB, MI, MS, RD)
1807 #define UCOMISDrr(RS, RD)		_SSESDrr(X86_SSE_UCOMI, RS, RD)
1808 #define UCOMISDmr(MD, MB, MI, MS, RD)	_SSESDmr(X86_SSE_UCOMI, MD, MB, MI, MS, RD)
1809 
1810 #define MOVAPSrr(RS, RD)		_SSEPSrr(0x28, RS, RD)
1811 #define MOVAPSmr(MD, MB, MI, MS, RD)	_SSEPSmr(0x28, MD, MB, MI, MS, RD)
1812 #define MOVAPSrm(RS, MD, MB, MI, MS)	_SSEPSrm(0x29, RS, MD, MB, MI, MS)
1813 
1814 #define MOVAPDrr(RS, RD)		_SSEPDrr(0x28, RS, RD)
1815 #define MOVAPDmr(MD, MB, MI, MS, RD)	_SSEPDmr(0x28, MD, MB, MI, MS, RD)
1816 #define MOVAPDrm(RS, MD, MB, MI, MS)	_SSEPDrm(0x29, RS, MD, MB, MI, MS)
1817 
1818 #define CVTPS2PIrr(RS, RD)		__SSELrr(      X86_SSE_CVTSI, RS,_rX, RD,_rM)
1819 #define CVTPS2PImr(MD, MB, MI, MS, RD)	__SSELmr(      X86_SSE_CVTSI, MD, MB, MI, MS, RD,_rM)
1820 #define CVTPD2PIrr(RS, RD)		 _SSELrr(0x66, X86_SSE_CVTSI, RS,_rX, RD,_rM)
1821 #define CVTPD2PImr(MD, MB, MI, MS, RD)	 _SSELmr(0x66, X86_SSE_CVTSI, MD, MB, MI, MS, RD,_rM)
1822 
1823 #define CVTPI2PSrr(RS, RD)		__SSELrr(      X86_SSE_CVTIS, RS,_rM, RD,_rX)
1824 #define CVTPI2PSmr(MD, MB, MI, MS, RD)	__SSELmr(      X86_SSE_CVTIS, MD, MB, MI, MS, RD,_rX)
1825 #define CVTPI2PDrr(RS, RD)		 _SSELrr(0x66, X86_SSE_CVTIS, RS,_rM, RD,_rX)
1826 #define CVTPI2PDmr(MD, MB, MI, MS, RD)	 _SSELmr(0x66, X86_SSE_CVTIS, MD, MB, MI, MS, RD,_rX)
1827 
1828 #define CVTPS2PDrr(RS, RD)		__SSELrr(      X86_SSE_CVTSD, RS,_rX, RD,_rX)
1829 #define CVTPS2PDmr(MD, MB, MI, MS, RD)	__SSELmr(      X86_SSE_CVTSD, MD, MB, MI, MS, RD,_rX)
1830 #define CVTPD2PSrr(RS, RD)		 _SSELrr(0x66, X86_SSE_CVTSD, RS,_rX, RD,_rX)
1831 #define CVTPD2PSmr(MD, MB, MI, MS, RD)	 _SSELmr(0x66, X86_SSE_CVTSD, MD, MB, MI, MS, RD,_rX)
1832 
1833 #define CVTSS2SDrr(RS, RD)		 _SSELrr(0xf3, X86_SSE_CVTSD, RS,_rX, RD,_rX)
1834 #define CVTSS2SDmr(MD, MB, MI, MS, RD)	 _SSELmr(0xf3, X86_SSE_CVTSD, MD, MB, MI, MS, RD,_rX)
1835 #define CVTSD2SSrr(RS, RD)		 _SSELrr(0xf2, X86_SSE_CVTSD, RS,_rX, RD,_rX)
1836 #define CVTSD2SSmr(MD, MB, MI, MS, RD)	 _SSELmr(0xf2, X86_SSE_CVTSD, MD, MB, MI, MS, RD,_rX)
1837 
1838 #define CVTSS2SILrr(RS, RD)		 _SSELrr(0xf3, X86_SSE_CVTSI, RS,_rX, RD,_r4)
1839 #define CVTSS2SILmr(MD, MB, MI, MS, RD)	 _SSELmr(0xf3, X86_SSE_CVTSI, MD, MB, MI, MS, RD,_r4)
1840 #define CVTSD2SILrr(RS, RD)		 _SSELrr(0xf2, X86_SSE_CVTSI, RS,_rX, RD,_r4)
1841 #define CVTSD2SILmr(MD, MB, MI, MS, RD)	 _SSELmr(0xf2, X86_SSE_CVTSI, MD, MB, MI, MS, RD,_r4)
1842 
1843 #define CVTSI2SSLrr(RS, RD)		 _SSELrr(0xf3, X86_SSE_CVTIS, RS,_r4, RD,_rX)
1844 #define CVTSI2SSLmr(MD, MB, MI, MS, RD)	 _SSELmr(0xf3, X86_SSE_CVTIS, MD, MB, MI, MS, RD,_rX)
1845 #define CVTSI2SDLrr(RS, RD)		 _SSELrr(0xf2, X86_SSE_CVTIS, RS,_r4, RD,_rX)
1846 #define CVTSI2SDLmr(MD, MB, MI, MS, RD)	 _SSELmr(0xf2, X86_SSE_CVTIS, MD, MB, MI, MS, RD,_rX)
1847 
1848 #define CVTSS2SIQrr(RS, RD)		 _SSEQrr(0xf3, X86_SSE_CVTSI, RS,_rX, RD,_r8)
1849 #define CVTSS2SIQmr(MD, MB, MI, MS, RD)	 _SSEQmr(0xf3, X86_SSE_CVTSI, MD, MB, MI, MS, RD,_r8)
1850 #define CVTSD2SIQrr(RS, RD)		 _SSEQrr(0xf2, X86_SSE_CVTSI, RS,_rX, RD,_r8)
1851 #define CVTSD2SIQmr(MD, MB, MI, MS, RD)	 _SSEQmr(0xf2, X86_SSE_CVTSI, MD, MB, MI, MS, RD,_r8)
1852 
1853 #define CVTSI2SSQrr(RS, RD)		 _SSEQrr(0xf3, X86_SSE_CVTIS, RS,_r8, RD,_rX)
1854 #define CVTSI2SSQmr(MD, MB, MI, MS, RD)	 _SSEQmr(0xf3, X86_SSE_CVTIS, MD, MB, MI, MS, RD,_rX)
1855 #define CVTSI2SDQrr(RS, RD)		 _SSEQrr(0xf2, X86_SSE_CVTIS, RS,_r8, RD,_rX)
1856 #define CVTSI2SDQmr(MD, MB, MI, MS, RD)	 _SSEQmr(0xf2, X86_SSE_CVTIS, MD, MB, MI, MS, RD,_rX)
1857 
1858 #define MOVDLXrr(RS, RD)		 _SSELrr(0x66, 0x6e, RS,_r4, RD,_rX)
1859 #define MOVDLXmr(MD, MB, MI, MS, RD)	 _SSELmr(0x66, 0x6e, MD, MB, MI, MS, RD,_rX)
1860 #define MOVDQXrr(RS, RD)		 _SSEQrr(0x66, 0x6e, RS,_r8, RD,_rX)
1861 #define MOVDQXmr(MD, MB, MI, MS, RD)	 _SSEQmr(0x66, 0x6e, MD, MB, MI, MS, RD,_rX)
1862 
1863 #define MOVDXLrr(RS, RD)		 _SSELrr(0x66, 0x7e, RS,_rX, RD,_r4)
1864 #define MOVDXLrm(RS, MD, MB, MI, MS)	 _SSELrm(0x66, 0x7e, RS,_rX, MD, MB, MI, MS)
1865 #define MOVDXQrr(RS, RD)		 _SSEQrr(0x66, 0x7e, RS,_rX, RD,_r8)
1866 #define MOVDXQrm(RS, MD, MB, MI, MS)	 _SSEQrm(0x66, 0x7e, RS,_rX, MD, MB, MI, MS)
1867 
1868 #define MOVDLMrr(RS, RD)		__SSELrr(      0x6e, RS,_r4, RD,_rM)
1869 #define MOVDLMmr(MD, MB, MI, MS, RD)	__SSELmr(      0x6e, MD, MB, MI, MS, RD,_rM)
1870 #define MOVDQMrr(RS, RD)		__SSEQrr(      0x6e, RS,_r8, RD,_rM)
1871 #define MOVDQMmr(MD, MB, MI, MS, RD)	__SSEQmr(      0x6e, MD, MB, MI, MS, RD,_rM)
1872 
1873 #define MOVDMLrr(RS, RD)		__SSELrr(      0x7e, RS,_rM, RD,_r4)
1874 #define MOVDMLrm(RS, MD, MB, MI, MS)	__SSELrm(      0x7e, RS,_rM, MD, MB, MI, MS)
1875 #define MOVDMQrr(RS, RD)		__SSEQrr(      0x7e, RS,_rM, RD,_r8)
1876 #define MOVDMQrm(RS, MD, MB, MI, MS)	__SSEQrm(      0x7e, RS,_rM, MD, MB, MI, MS)
1877 
1878 #define MOVDQ2Qrr(RS, RD)		 _SSELrr(0xf2, 0xd6, RS,_rX, RD,_rM)
1879 #define MOVHLPSrr(RS, RD)		__SSELrr(      0x12, RS,_rX, RD,_rX)
1880 #define MOVLHPSrr(RS, RD)		__SSELrr(      0x16, RS,_rX, RD,_rX)
1881 
1882 #define MOVDQArr(RS, RD)		 _SSELrr(0x66, 0x6f, RS,_rX, RD,_rX)
1883 #define MOVDQAmr(MD, MB, MI, MS, RD)	 _SSELmr(0x66, 0x6f, MD, MB, MI, MS, RD,_rX)
1884 #define MOVDQArm(RS, MD, MB, MI, MS)	 _SSELrm(0x66, 0x7f, RS,_rX, MD, MB, MI, MS)
1885 
1886 #define MOVDQUrr(RS, RD)		 _SSELrr(0xf3, 0x6f, RS,_rX, RD,_rX)
1887 #define MOVDQUmr(MD, MB, MI, MS, RD)	 _SSELmr(0xf3, 0x6f, MD, MB, MI, MS, RD,_rX)
1888 #define MOVDQUrm(RS, MD, MB, MI, MS)	 _SSELrm(0xf3, 0x7f, RS,_rX, MD, MB, MI, MS)
1889 
1890 #define MOVHPDmr(MD, MB, MI, MS, RD)	 _SSELmr(0x66, 0x16, MD, MB, MI, MS, RD,_rX)
1891 #define MOVHPDrm(RS, MD, MB, MI, MS)	 _SSELrm(0x66, 0x17, RS,_rX, MD, MB, MI, MS)
1892 #define MOVHPSmr(MD, MB, MI, MS, RD)	__SSELmr(      0x16, MD, MB, MI, MS, RD,_rX)
1893 #define MOVHPSrm(RS, MD, MB, MI, MS)	__SSELrm(      0x17, RS,_rX, MD, MB, MI, MS)
1894 
1895 #define MOVLPDmr(MD, MB, MI, MS, RD)	 _SSELmr(0x66, 0x12, MD, MB, MI, MS, RD,_rX)
1896 #define MOVLPDrm(RS, MD, MB, MI, MS)	 _SSELrm(0x66, 0x13, RS,_rX, MD, MB, MI, MS)
1897 #define MOVLPSmr(MD, MB, MI, MS, RD)	__SSELmr(      0x12, MD, MB, MI, MS, RD,_rX)
1898 #define MOVLPSrm(RS, MD, MB, MI, MS)	__SSELrm(      0x13, RS,_rX, MD, MB, MI, MS)
1899 
1900 
1901 /* --- FLoating-Point instructions ----------------------------------------- */
1902 
1903 #define _ESCmi(D,B,I,S,OP)	(_REXLrm(0,B,I), _O_r_X(0xd8|(OP & 7), (OP >> 3), D,B,I,S))
1904 
1905 #define FLDr(R)			_OOr(0xd9c0,_rN(R))
1906 #define FLDLm(D,B,I,S)		_ESCmi(D,B,I,S,005)
1907 #define FLDSm(D,B,I,S)		_ESCmi(D,B,I,S,001)
1908 #define FLDTm(D,B,I,S)		_ESCmi(D,B,I,S,053)
1909 
1910 #define FSTr(R)			_OOr(0xddd0,_rN(R))
1911 #define FSTSm(D,B,I,S)		_ESCmi(D,B,I,S,021)
1912 #define FSTLm(D,B,I,S)		_ESCmi(D,B,I,S,025)
1913 
1914 #define FSTPr(R)		_OOr(0xddd8,_rN(R))
1915 #define FSTPSm(D,B,I,S)		_ESCmi(D,B,I,S,031)
1916 #define FSTPLm(D,B,I,S)		_ESCmi(D,B,I,S,035)
1917 #define FSTPTm(D,B,I,S)		_ESCmi(D,B,I,S,073)
1918 
1919 #define FADDr0(R)		_OOr(0xd8c0,_rN(R))
1920 #define FADD0r(R)		_OOr(0xdcc0,_rN(R))
1921 #define FADDP0r(R)		_OOr(0xdec0,_rN(R))
1922 #define FADDSm(D,B,I,S)		_ESCmi(D,B,I,S,000)
1923 #define FADDLm(D,B,I,S)		_ESCmi(D,B,I,S,004)
1924 
1925 #define FSUBSm(D,B,I,S)		_ESCmi(D,B,I,S,040)
1926 #define FSUBLm(D,B,I,S)		_ESCmi(D,B,I,S,044)
1927 #define FSUBr0(R)		_OOr(0xd8e0,_rN(R))
1928 #define FSUB0r(R)		_OOr(0xdce8,_rN(R))
1929 #define FSUBP0r(R)		_OOr(0xdee8,_rN(R))
1930 
1931 #define FSUBRr0(R)		_OOr(0xd8e8,_rN(R))
1932 #define FSUBR0r(R)		_OOr(0xdce0,_rN(R))
1933 #define FSUBRP0r(R)		_OOr(0xdee0,_rN(R))
1934 #define FSUBRSm(D,B,I,S)	_ESCmi(D,B,I,S,050)
1935 #define FSUBRLm(D,B,I,S)	_ESCmi(D,B,I,S,054)
1936 
1937 #define FMULr0(R)		_OOr(0xd8c8,_rN(R))
1938 #define FMUL0r(R)		_OOr(0xdcc8,_rN(R))
1939 #define FMULP0r(R)		_OOr(0xdec8,_rN(R))
1940 #define FMULSm(D,B,I,S)		_ESCmi(D,B,I,S,010)
1941 #define FMULLm(D,B,I,S)		_ESCmi(D,B,I,S,014)
1942 
1943 #define FDIVr0(R)		_OOr(0xd8f0,_rN(R))
1944 #define FDIV0r(R)		_OOr(0xdcf8,_rN(R))
1945 #define FDIVP0r(R)		_OOr(0xdef8,_rN(R))
1946 #define FDIVSm(D,B,I,S)		_ESCmi(D,B,I,S,060)
1947 #define FDIVLm(D,B,I,S)		_ESCmi(D,B,I,S,064)
1948 
1949 #define FDIVRr0(R)		_OOr(0xd8f8,_rN(R))
1950 #define FDIVR0r(R)		_OOr(0xdcf0,_rN(R))
1951 #define FDIVRP0r(R)		_OOr(0xdef0,_rN(R))
1952 #define FDIVRSm(D,B,I,S)	_ESCmi(D,B,I,S,070)
1953 #define FDIVRLm(D,B,I,S)	_ESCmi(D,B,I,S,074)
1954 
1955 #define FCMOVBr0(R)		_OOr(0xdac0,_rN(R))
1956 #define FCMOVBEr0(R)		_OOr(0xdad0,_rN(R))
1957 #define FCMOVEr0(R)		_OOr(0xdac8,_rN(R))
1958 #define FCMOVNBr0(R)		_OOr(0xdbc0,_rN(R))
1959 #define FCMOVNBEr0(R)		_OOr(0xdbd0,_rN(R))
1960 #define FCMOVNEr0(R)		_OOr(0xdbc8,_rN(R))
1961 #define FCMOVNUr0(R)		_OOr(0xdbd8,_rN(R))
1962 #define FCMOVUr0(R)		_OOr(0xdad8,_rN(R))
1963 #define FCOMIr0(R)		_OOr(0xdbf0,_rN(R))
1964 #define FCOMIPr0(R)		_OOr(0xdff0,_rN(R))
1965 
1966 #define FCOMr(R)		_OOr(0xd8d0,_rN(R))
1967 #define FCOMSm(D,B,I,S)		_ESCmi(D,B,I,S,020)
1968 #define FCOMLm(D,B,I,S)		_ESCmi(D,B,I,S,024)
1969 
1970 #define FCOMPr(R)		_OOr(0xd8d8,_rN(R))
1971 #define FCOMPSm(D,B,I,S)	_ESCmi(D,B,I,S,030)
1972 #define FCOMPLm(D,B,I,S)	_ESCmi(D,B,I,S,034)
1973 
1974 #define FUCOMIr0(R)		_OOr(0xdbe8,_rN(R))
1975 #define FUCOMIPr0(R)		_OOr(0xdfe8,_rN(R))
1976 #define FUCOMPr(R)		_OOr(0xdde8,_rN(R))
1977 #define FUCOMr(R)		_OOr(0xdde0,_rN(R))
1978 
1979 #define FIADDLm(D,B,I,S)	_ESCmi(D,B,I,S,002)
1980 #define FICOMLm(D,B,I,S)	_ESCmi(D,B,I,S,022)
1981 #define FICOMPLm(D,B,I,S)	_ESCmi(D,B,I,S,032)
1982 #define FIDIVLm(D,B,I,S)	_ESCmi(D,B,I,S,062)
1983 #define FIDIVRLm(D,B,I,S)	_ESCmi(D,B,I,S,072)
1984 #define FILDLm(D,B,I,S)		_ESCmi(D,B,I,S,003)
1985 #define FILDQm(D,B,I,S)		_ESCmi(D,B,I,S,057)
1986 #define FIMULLm(D,B,I,S)	_ESCmi(D,B,I,S,012)
1987 #define FISTLm(D,B,I,S)		_ESCmi(D,B,I,S,023)
1988 #define FISTPLm(D,B,I,S)	_ESCmi(D,B,I,S,033)
1989 #define FISTPQm(D,B,I,S)	_ESCmi(D,B,I,S,077)
1990 #define FISUBLm(D,B,I,S)	_ESCmi(D,B,I,S,042)
1991 #define FISUBRLm(D,B,I,S)	_ESCmi(D,B,I,S,052)
1992 
1993 #define FREEr(R)		_OOr(0xddc0,_rN(R))
1994 #define FXCHr(R)		_OOr(0xd9c8,_rN(R))
1995 
1996 #endif /* X86_RTASM_H */
1997