1 /******************************** -*- C -*- ****************************
2  *
3  *	Run-time assembler for the i386
4  *
5  ***********************************************************************/
6 
7 
8 /***********************************************************************
9  *
10  * Copyright 1999, 2000, 2001, 2002 Ian Piumarta
11  *
12  * This file is part of GNU lightning.
13  *
14  * GNU lightning is free software; you can redistribute it and/or modify it
15  * under the terms of the GNU Lesser General Public License as published
16  * by the Free Software Foundation; either version 2.1, or (at your option)
17  * any later version.
18  *
19  * GNU lightning is distributed in the hope that it will be useful, but
20  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
21  * or  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
22  * License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public License
25  * along with GNU lightning; see the file COPYING.LESSER; if not, write to the
26  * Free Software Foundation, 59 Temple Place - Suite 330, Boston,
27  * MA 02111-1307, USA.
28  *
29  ***********************************************************************/
30 
31 
32 
33 
34 #ifndef __lightning_asm_h
35 #define __lightning_asm_h
36 
37 /*	OPCODE	+ i		= immediate operand
38  *		+ r		= register operand
39  *		+ m		= memory operand (disp,base,index,scale)
40  *		+ sr/sm		= a star preceding a register or memory
41  */
42 
43 
44 typedef _uc		jit_insn;
45 
46 #ifndef LIGHTNING_DEBUG
47 #define _b00		0
48 #define _b01		1
49 #define _b10		2
50 #define _b11		3
51 
52 #define _b000		0
53 #define _b001		1
54 #define _b010		2
55 #define _b011		3
56 #define _b100		4
57 #define _b101		5
58 #define _b110		6
59 #define _b111		7
60 
61 /*** REGISTERS ***/	/* [size,,number] */
62 
63 
64 #define _AL		0x10
65 #define _CL		0x11
66 #define _DL		0x12
67 #define _BL		0x13
68 #define _AH		0x14
69 #define _CH		0x15
70 #define _DH		0x16
71 #define _BH		0x17
72 
73 #define _AX		0x20
74 #define _CX		0x21
75 #define _DX		0x22
76 #define _BX		0x23
77 #define _SP		0x24
78 #define _BP		0x25
79 #define _SI		0x26
80 #define _DI		0x27
81 
82 #define _EAX		0x40
83 #define _ECX		0x41
84 #define _EDX		0x42
85 #define _EBX		0x43
86 #define _ESP		0x44
87 #define _EBP		0x45
88 #define _ESI		0x46
89 #define _EDI		0x47
90 
91 #define _R12            0x4C
92 #define _R13            0x4D
93 #define JIT_CALLTMPSTART 0x48
94 #define JIT_REXTMP       0x4B
95 
96 #define _ST0		0
97 #define _ST1		1
98 #define _ST2		2
99 #define _ST3		3
100 #define _ST4		4
101 #define _ST5		5
102 #define _ST6		6
103 #define _ST7		7
104 
105 #define _rS(R)		((R)>>4)
106 #define _rN(R)		((R)&0x7)
107 #define _qrN(R)		((R)&0xF)
108 #define _r0P(R)		((R)==0)
109 
110 #ifndef _ASM_SAFETY
111 #define _r1(R)		_rN(R)
112 #define _r2(R)		_rN(R)
113 #define _r4(R)		_rN(R)
114 #define _r8(R)		_qrN(R)
115 #else
116 #define _r1(R)		((_rS(R)==1) ? _rN(R) : JITFAIL( "8-bit register required"))
117 #define _r2(R)		((_rS(R)==2) ? _rN(R) : JITFAIL("16-bit register required"))
118 #define _r4(R)		((_rS(R)==4) ? _rN(R) : JITFAIL("32-bit register required"))
119 #define _r8(R)		((_rS(R)==4) ? _rN(R) : JITFAIL("64-bit register required"))
120 #endif
121 
122 /*** ASSEMBLER ***/
123 
124 #define _OFF4(D)        (_jit_UL(D) - _jit_UL(_jit.x.pc))
125 #define _CKD8(D)        _ck_d(8, ((_sc) _OFF4(D)) )
126 
127 #define _D8(D)          (_jit_B(0), ((*(_PUC(_jit.x.pc)-1))= _CKD8(D)))
128 #define _D32(D)         (_jit_I(0), ((*(_PUI(_jit.x.pc)-1))= _OFF4(D)))
129 
130 #ifndef _ASM_SAFETY
131 # define _M(M)		(M)
132 # define _r(R)		(R)
133 # define _m(M)		(M)
134 # define _s(S)		(S)
135 # define _i(I)		(I)
136 # define _b(B)		(B)
137 # define _noESP(I,OK)	(OK)
138 #else
139 # define _M(M)		(((M)>3) ? JITFAIL("internal error: mod = " #M) : (M))
140 # define _r(R)		(((R)>7) ? JITFAIL("internal error: reg = " #R) : (R))
141 # define _m(M)		(((M)>7) ? JITFAIL("internal error: r/m = " #M) : (M))
142 # define _s(S)		(((S)>3) ? JITFAIL("internal error: memory scale = " #S) : (S))
143 # define _i(I)		(((I)>7) ? JITFAIL("internal error: memory index = " #I) : (I))
144 # define _b(B)		(((B)>7) ? JITFAIL("internal error: memory base = "  #B) : (B))
145 # define _noESP(I,OK)	(((I)==_ESP) ? JITFAIL("illegal index register: %esp") : (OK))
146 #endif
147 
148 #define _Mrm(Md,R,M)	_jit_B((_M(Md)<<6)|(_r(R)<<3)|_m(M))
149 #ifdef JIT_X86_64
150 # define _qMrm(Md,R,M)	_jit_B((_M(Md)<<6)|(_r((R & 0x7))<<3)|_m((M & 0x7)))
151 #else
152 # define _qMrm(Md,R,M)  _Mrm(Md,R,M)
153 #endif
154 
155 #define _SIB(Sc,I, B)	_jit_B((_s(Sc)<<6)|(_i(I)<<3)|_b(B))
156 
157 #define _SCL(S)		((((S)==1) ? _b00 : \
158 			 (((S)==2) ? _b01 : \
159 			 (((S)==4) ? _b10 : \
160 			 (((S)==8) ? _b11 : JITFAIL("illegal scale: " #S))))))
161 
162 /* memory subformats - urgh! */
163 
164 #ifdef JIT_X86_64
165 # define _r_D(	R, D	  )	(_Mrm(_b00,_rN(R),_b100 ),_SIB(0,_b100,_b101)	     ,_jit_I((intptr_t)(D)))
166 # define _r_Q(	R, D	  )	(_qMrm(_b00,_rN(R),_b100 ),_SIB(0,_b100,_b101)        ,_jit_I((intptr_t)(D)))
167 #else
168 # define _r_D(	R, D	  )	(_Mrm(_b00,_rN(R),_b101 )		             ,_jit_I((intptr_t)(D)))
169 # define _r_Q(R, D) _r_D(R, D)
170 #endif
171 #define _r_0B(	R,   B    )	(_Mrm(_b00,_rN(R),_r4(B))			           )
172 #define _r_0BIS(R,   B,I,S)	(_Mrm(_b00,_rN(R),_b100 ),_SIB(_SCL(S),_r4(I),_r4(B))      )
173 #define _r_1B(	R, D,B    )	(_Mrm(_b01,_rN(R),_r4(B))		             ,_jit_B((intptr_t)(D)))
174 #define _r_1BIS(R, D,B,I,S)	(_Mrm(_b01,_rN(R),_b100 ),_SIB(_SCL(S),_r4(I),_r4(B)),_jit_B((intptr_t)(D)))
175 #define _r_4B(	R, D,B    )	(_Mrm(_b10,_rN(R),_r4(B))		             ,_jit_I((intptr_t)(D)))
176 #define _r_4IS( R, D,I,S)	(_Mrm(_b00,_rN(R),_b100 ),_SIB(_SCL(S),_r4(I),_b101 ),_jit_I((intptr_t)(D)))
177 #define _r_4BIS(R, D,B,I,S)	(_Mrm(_b10,_rN(R),_b100 ),_SIB(_SCL(S),_r4(I),_r4(B)),_jit_I((intptr_t)(D)))
178 #define _r_8B(	R, D,B    )	(_qMrm(_b10,_rN(R),_r8(B))		             ,_jit_I((intptr_t)(D)))
179 #define _r_8IS( R, D,I,S)	(_qMrm(_b00,_rN(R),_b100 ),_SIB(_SCL(S),_r8(I),_b101 ),_jit_I((intptr_t)(D)))
180 #define _r_8BIS(R, D,B,I,S)	(_qMrm(_b10,_rN(R),_b100 ),_SIB(_SCL(S),_r8(I),_r8(B)),_jit_I((intptr_t)(D)))
181 
182 #define _r_DB(  R, D,B    )	((_s0P(D) && (B != _EBP) ? _r_0B  (R,  B    ) : (_s8P(D) ? _r_1B(  R,D,B    ) : _r_4B(  R,D,B    ))))
183 #define _r_DBIS(R, D,B,I,S)	((_s0P(D)		 ? _r_0BIS(R,  B,I,S) : (_s8P(D) ? _r_1BIS(R,D,B,I,S) : _r_4BIS(R,D,B,I,S))))
184 #define _r_QB(  R, D,B    )	((_s0P(D) && (B != _EBP) ? _r_0B  (R,  B    ) : (_s8P(D) ? _r_1B(  R,D,B    ) : _r_8B(  R,D,B    ))))
185 #define _r_QBIS(R, D,B,I,S)	((_s0P(D)		 ? _r_0BIS(R,  B,I,S) : (_s8P(D) ? _r_1BIS(R,D,B,I,S) : _r_8BIS(R,D,B,I,S))))
186 
187 #define _r_X(   R, D,B,I,S)	(_r0P(I) ? (_r0P(B)   ? _r_D   (R,D            )   : \
188 				           (_ESP==(B) ? _r_DBIS(R,D,_ESP,_ESP,1)   : \
189 						        _r_DB  (R,D,   B       ))) : \
190 				 (_r0P(B)	      ? _r_4IS (R,D,	    I,S)   : \
191 				 (((I)!=_ESP)         ? _r_DBIS(R,D,   B,   I,S)   : \
192 						        JITFAIL("illegal index register: %esp"))))
193 #define _qr_X(   R, D,B,I,S)	(_r0P(I) ? (_r0P(B)   ? _r_Q   (R,D            )   : \
194 				           (_ESP==(B) ? _r_QBIS(R,D,_ESP,_ESP,1)   : \
195 						        _r_QB  (R,D,   B       ))) : \
196 				 (_r0P(B)	      ? _r_8IS (R,D,	    I,S)   : \
197 				 (((I)!=_ESP)         ? _r_QBIS(R,D,   B,   I,S)   : \
198 						        JITFAIL("illegal index register: %esp"))))
199 
200 
201 /* instruction formats */
202 
203 /*	 _format						     Opcd	  ModR/M dN(rB,rI,Sc)	  imm... */
204 
205 #define	 _d16()					   (		  _jit_B(0x66	)				  )
206 #define	  _O(	     OP				)  (		  _jit_B(  OP	)				  )
207 #ifdef JIT_X86_64
208 # define  _REX_(P,R,X,B)                          ( _jit_B(P|((R&0x8)>>1)|((X&0x8)>>2)|((B&0x8)>>3)) )
209 # define  _REX(R,X,B)                              _REX_(0x48,R,X,B)
210 # define  _REXd(R,X,B)                             ((B&0x8) ? _REX_(0x40,R,X,B) : 0)
211 # define  _qO(	     OP, R,X,B			)  ( _REX(R,X,B), _jit_B(  OP	) )
212 # define  _qOd(	     OP, R,X,B			)  ( _REXd(R,X,B), _jit_B(  OP	) )
213 #else
214 # define  _qO(	     OP, R,X,B  		)  _O(OP)
215 # define  _qOd(	     OP, R,X,B  		)  _O(OP)
216 #endif
217 #define	  _Or(	     OP,R			)  (		  _jit_B( (OP)|_r(R))				  )
218 #ifdef JIT_X86_64
219 # define  _qOr(	     OP,R			)  ( _REX(0,0,R), _jit_B( (OP)|_r(R&0x7))				  )
220 # define  _qOdr(     OP,R			)  ( _REXd(0,0,R), _jit_B( (OP)|_r(R&0x7))				  )
221 #else
222 # define _qOr(       OP,R                       ) _Or(OP,R)
223 # define _qOdr(       OP,R                       ) _Or(OP,R)
224 #endif
225 #define	 _OO(	     OP				)  ( _jit_B((OP)>>8), _jit_B( (OP)	)				  )
226 #ifdef JIT_X86_64
227 # define _qOO(OP)  ( _REX(0,0,0), _OO(OP))
228 #else
229 # define _qOO(OP) _OO(OP)
230 #endif
231 #define	 _OOr(	     OP,R			)  ( _jit_B((OP)>>8), _jit_B( (OP)|_r(R))				  )
232 #define	  _Os(	     OP,B			)  (	_s8P(B) ? _jit_B(((OP)|_b10)) : _jit_B(OP)			  )
233 #ifdef JIT_X86_64
234 # define  _qOs(	     OP, B, R, M	       	)  ( _REX(0, M, R), _Os(OP, B) )
235 #else
236 # define  _qOs(	     OP, B, R, M	       	)  _Os(OP, B)
237 #endif
238 #define	    _sW(			     W	)  (				       _s8P(W) ? _jit_B(W):_jit_W(W)	  )
239 #define	    _sL(			     L	)  (				       _s8P(L) ? _jit_B(L):_jit_I(L)	  )
240 #define	  _O_W(	     OP			    ,W	)  (	    _O	    (  OP  )			      ,_jit_W(W)	  )
241 #define	  _O_D8(     OP			    ,D	)  (	    _O	    (  OP  )			     ,_D8(D)	  )
242 #define	  _O_D32(     OP		    ,D	)  (	    _O	    (  OP  )			     ,_D32(D)	  )
243 #define	 _OO_D32(     OP		    ,D	)  (	   _OO	    (  OP  )			     ,_D32(D)	  )
244 #define	  _Os_sW(    OP			    ,W	)  (	    _Os	    (  OP,W)			     ,_sW(W)	  )
245 #define	  _Os_sL(    OP			    ,L	)  (	    _Os	    (  OP,L)			     ,_sL(L)	  )
246 #define	  _O_W_B(    OP			    ,W,B)  (	    _O	    (  OP  )			      ,_jit_W(W),_jit_B(B))
247 #define	  _Or_B(     OP,R		    ,B	)  (	    _Or	    (  OP,R)			      ,_jit_B(B)	  )
248 #define	  _Or_W(     OP,R		    ,W	)  (	    _Or	    (  OP,R)			      ,_jit_W(W)	  )
249 #define	  _Or_L(     OP,R		    ,L	)  (	    _Or	    (  OP,R)			      ,_jit_I(L)	  )
250 #define	  _qOr_Q(     OP,R		    ,Q	)  (	   _qOr	    (  OP,R)			      ,_jit_L(Q)	  )
251 #define	  _O_Mrm(    OP	 ,MO,R,M		)  (	    _O	    (  OP  ),_Mrm(MO,R,M	    )		  )
252 #define	 _qO_Mrm(    OP	 ,MO,R,M		)  (	    _qO	    (  OP,R,0,M),_qMrm(MO,R,M	    )		  )
253 #define	 _qOd_Mrm(   OP	 ,MO,R,M		)  (	    _qOd    (  OP,R,0,M),_qMrm(MO,R,M	    )		  )
254 #define	 _OO_Mrm(    OP	 ,MO,R,M		)  (	   _OO	    (  OP  ),_Mrm(MO,R,M	    )		  )
255 #define	 _qOO_Mrm(   OP	 ,MO,R,M		)  (	   _qOO	    (  OP  ),_Mrm(MO,R,M	    )		  )
256 #define	  _O_Mrm_B(  OP	 ,MO,R,M	    ,B	)  (	    _O	    (  OP  ),_Mrm(MO,R,M	    ) ,_jit_B(B)	  )
257 #define	 _qO_Mrm_B(  OP	 ,MO,R,M	    ,B	)  (	    _qO	    (  OP,R,0,M),_qMrm(MO,R,M	    ) ,_jit_B(B)	  )
258 #define	  _O_Mrm_W(  OP	 ,MO,R,M	    ,W	)  (	    _O	    (  OP  ),_Mrm(MO,R,M	    ) ,_jit_W(W)	  )
259 #define	  _O_Mrm_L(  OP	 ,MO,R,M	    ,L	)  (	    _O	    (  OP  ),_Mrm(MO,R,M	    ) ,_jit_I(L)	  )
260 #define	 _qO_Mrm_L(  OP	 ,MO,R,M	    ,L	)  (	   _qO	    (  OP,R,0,M),_qMrm(MO,R,M	    ) ,_jit_I(L)	  )
261 #define	 _qO_Mrm_Q(  OP	 ,MO,R,M	    ,Q	)  (	   _qO	    (  OP,0,0,R),_qMrm(MO,R,M	    ) ,_jit_L(Q)	  )
262 #define	 _OO_Mrm_B(  OP	 ,MO,R,M	    ,B	)  (	   _OO	    (  OP  ),_Mrm(MO,R,M	    ) ,_jit_B(B)	  )
263 #define	  _Os_Mrm_sW(OP	 ,MO,R,M	    ,W	)  (	    _Os	    (  OP,W),_Mrm(MO,R,M	    ),_sW(W)	  )
264 #define	  _Os_Mrm_sL(OP	 ,MO,R,M	    ,L	)  (	    _Os	    (  OP,L),_Mrm(MO,R,M	    ),_sL(L)	  )
265 #define	 _qOs_Mrm_sL(OP	 ,MO,R,M	    ,L	)  (	   _qOs	    (  OP,L,R,M),_qMrm(MO,R,M	    ),_sL(L)	  )
266 #define	  _O_r_X(    OP	    ,R	,MD,MB,MI,MS	)  (	    _O	    (  OP  ),_r_X(   R	,MD,MB,MI,MS)		  )
267 #define	 _qO_r_X(    OP	    ,R	,MD,MB,MI,MS	)  (	   _qO	    (  OP,R,0,MS),_qr_X(R,MD,MB,MI,MS)		  )
268 #define	 _qO_r_XB(   OP	    ,R	,MD,MB,MI,MS	)  (	   _qO	    (  OP,R,0,MB),_qr_X(R,MD,MB,MI,MS)		  )
269 #define	 _qOd_r_X(   OP	    ,R	,MD,MB,MI,MS	)  (	   _qOd	    (  OP,R,0,MB),_qr_X(R,MD,MB,MI,MS)		  )
270 #define	 _OO_r_X(    OP	    ,R	,MD,MB,MI,MS	)  (	   _OO	    (  OP  ),_r_X(   R	,MD,MB,MI,MS)		  )
271 #define	 _qOO_r_X(    OP    ,R	,MD,MB,MI,MS	)  (	   _qOO	    (  OP  ),_r_X(   R	,MD,MB,MI,MS)		  )
272 #define	  _O_r_X_B(  OP	    ,R	,MD,MB,MI,MS,B	)  (	    _O	    (  OP  ),_r_X(   R	,MD,MB,MI,MS) ,_jit_B(B)	  )
273 #define	  _O_r_X_W(  OP	    ,R	,MD,MB,MI,MS,W	)  (	    _O	    (  OP  ),_r_X(   R	,MD,MB,MI,MS) ,_jit_W(W)	  )
274 #define	  _O_r_X_L(  OP	    ,R	,MD,MB,MI,MS,L	)  (	    _O	    (  OP  ),_r_X(   R	,MD,MB,MI,MS) ,_jit_I(L)	  )
275 #define	  _qO_r_X_L( OP	    ,R	,MD,MB,MI,MS,L	)  (	    _qO	    (  OP,R,0,MB),_r_X(   R	,MD,MB,MI,MS) ,_jit_I(L)	  )
276 #define	 _OO_r_X_B(  OP	    ,R	,MD,MB,MI,MS,B	)  (	   _OO	    (  OP  ),_r_X(   R	,MD,MB,MI,MS) ,_jit_B(B)	  )
277 #define	  _Os_r_X_sW(OP	    ,R	,MD,MB,MI,MS,W	)  (	    _Os	    (  OP,W),_r_X(   R	,MD,MB,MI,MS),_sW(W)	  )
278 #define	  _Os_r_X_sL(OP	    ,R	,MD,MB,MI,MS,L	)  (	    _Os	    (  OP,L),_r_X(   R	,MD,MB,MI,MS),_sL(L)	  )
279 #define	  _O_X_B(    OP		,MD,MB,MI,MS,B	)  (	    _O_r_X_B(  OP	    ,0	,MD,MB,MI,MS	 ,B)	  )
280 #define	  _O_X_W(    OP		,MD,MB,MI,MS,W	)  (	    _O_r_X_W(  OP	    ,0	,MD,MB,MI,MS	 ,W)	  )
281 #define	  _O_X_L(    OP		,MD,MB,MI,MS,L	)  (	    _O_r_X_L(  OP	    ,0	,MD,MB,MI,MS	 ,L)	  )
282 #define	  _qO_X_L(   OP		,MD,MB,MI,MS,L	)  (	    _qO_r_X_L(  OP	    ,0	,MD,MB,MI,MS	 ,L)	  )
283 #define	 _wO(	     OP				)  (_d16(), _O(	       OP				   )	  )
284 #define	 _wOr(	     OP,R			)  (_d16(), _Or(       OP,R				   )	  )
285 #define	 _wOr_W(     OP,R		    ,W	)  (_d16(), _Or_W(     OP,R				 ,W)	  )
286 #define	 _wOs_sW(    OP			    ,W	)  (_d16(), _Os_sW(    OP				 ,W)	  )
287 #define	 _wO_Mrm(    OP	 ,MO,R,M		)  (_d16(), _O_Mrm(    OP	 ,MO,R,M		   )	  )
288 #define _wOO_Mrm(    OP	 ,MO,R,M		)  (_d16(),_OO_Mrm(    OP	 ,MO,R,M		   )	  )
289 #define	 _wO_Mrm_B(  OP	 ,MO,R,M	    ,B	)  (_d16(), _O_Mrm_B(  OP	 ,MO,R,M		 ,B)	  )
290 #define _wOO_Mrm_B(  OP	 ,MO,R,M	    ,B	)  (_d16(),_OO_Mrm_B(  OP	 ,MO,R,M		 ,B)	  )
291 #define	 _wO_Mrm_W(  OP	 ,MO,R,M	    ,W	)  (_d16(), _O_Mrm_W(  OP	 ,MO,R,M		 ,W)	  )
292 #define	 _wOs_Mrm_sW(OP	 ,MO,R,M	    ,W	)  (_d16(), _Os_Mrm_sW(OP	 ,MO,R,M		 ,W)	  )
293 #define	 _wO_X_W(    OP		,MD,MB,MI,MS,W	)  (_d16(), _O_X_W(    OP		,MD,MB,MI,MS	 ,W)	  )
294 #define	 _wO_r_X(    OP	    ,R	,MD,MB,MI,MS	)  (_d16(), _O_r_X(    OP	    ,R	,MD,MB,MI,MS	   )	  )
295 #define _wOO_r_X(    OP	    ,R	,MD,MB,MI,MS	)  (_d16(),_OO_r_X(    OP	    ,R	,MD,MB,MI,MS	   )	  )
296 #define	 _wO_r_X_B(  OP	    ,R	,MD,MB,MI,MS,B	)  (_d16(), _O_r_X_B(  OP	    ,R	,MD,MB,MI,MS	 ,B)	  )
297 #define _wOO_r_X_B(  OP	    ,R	,MD,MB,MI,MS,B	)  (_d16(),_OO_r_X_B(  OP	    ,R	,MD,MB,MI,MS	 ,B)	  )
298 #define	 _wO_r_X_W(  OP	    ,R	,MD,MB,MI,MS,W	)  (_d16(), _O_r_X_W(  OP	    ,R	,MD,MB,MI,MS	 ,W)	  )
299 #define	 _wOs_r_X_sW(OP	    ,R	,MD,MB,MI,MS,W	)  (_d16(), _Os_r_X_sW(OP	    ,R	,MD,MB,MI,MS	 ,W)	  )
300 
301 /* +++ fully-qualified intrinsic instructions */
302 
303 /*					_format		 Opcd		,Mod ,r	    ,m		,mem=dsp+sib	,imm... */
304 
305 #define ADCBrr(RS, RD)			_O_Mrm		(0x10		,_b11,_r1(RS),_r1(RD)				)
306 #define ADCBmr(MD, MB, MI, MS, RD)	_O_r_X		(0x12		     ,_r1(RD)		,MD,MB,MI,MS		)
307 #define ADCBrm(RS, MD, MB, MI, MS)	_O_r_X		(0x10		     ,_r1(RS)		,MD,MB,MI,MS		)
308 #define ADCBir(IM, RD)			_O_Mrm_B	(0x80		,_b11,_b010  ,_r1(RD)			,_su8(IM))
309 #define ADCBim(IM, MD, MB, MI, MS)	_O_r_X_B	(0x80		     ,_b010		,MD,MB,MI,MS	,_su8(IM))
310 
311 #define ADCWrr(RS, RD)			_wO_Mrm		(0x11		,_b11,_r2(RS),_r2(RD)				)
312 #define ADCWmr(MD, MB, MI, MS, RD)	_wO_r_X		(0x13		     ,_r2(RD)		,MD,MB,MI,MS		)
313 #define ADCWrm(RS, MD, MB, MI, MS)	_wO_r_X		(0x11		     ,_r2(RS)		,MD,MB,MI,MS		)
314 #define ADCWir(IM, RD)			_wOs_Mrm_sW	(0x81		,_b11,_b010  ,_r2(RD)			,_su16(IM))
315 #define ADCWim(IM, MD, MB, MI, MS)	_wOs_r_X_sW	(0x81		     ,_b010		,MD,MB,MI,MS	,_su16(IM))
316 
317 #define ADCLrr(RS, RD)			_O_Mrm		(0x11		,_b11,_r4(RS),_r4(RD)				)
318 #define ADCLmr(MD, MB, MI, MS, RD)	_O_r_X		(0x13		     ,_r4(RD)		,MD,MB,MI,MS		)
319 #define ADCLrm(RS, MD, MB, MI, MS)	_O_r_X		(0x11		     ,_r4(RS)		,MD,MB,MI,MS		)
320 #define ADCLir(IM, RD)			_Os_Mrm_sL	(0x81		,_b11,_b010  ,_r4(RD)			,IM	)
321 #define ADCLim(IM, MD, MB, MI, MS)	_Os_r_X_sL	(0x81		     ,_b010		,MD,MB,MI,MS	,IM	)
322 
323 
324 #define ADDBrr(RS, RD)			_O_Mrm		(0x00		,_b11,_r1(RS),_r1(RD)				)
325 #define ADDBmr(MD, MB, MI, MS, RD)	_O_r_X		(0x02		     ,_r1(RD)		,MD,MB,MI,MS		)
326 #define ADDBrm(RS, MD, MB, MI, MS)	_O_r_X		(0x00		     ,_r1(RS)		,MD,MB,MI,MS		)
327 #define ADDBir(IM, RD)			_O_Mrm_B	(0x80		,_b11,_b000  ,_r1(RD)			,_su8(IM))
328 #define ADDBim(IM, MD, MB, MI, MS)	_O_r_X_B	(0x80		     ,_b000		,MD,MB,MI,MS	,_su8(IM))
329 
330 #define ADDWrr(RS, RD)			_wO_Mrm		(0x01		,_b11,_r2(RS),_r2(RD)				)
331 #define ADDWmr(MD, MB, MI, MS, RD)	_wO_r_X		(0x03		     ,_r2(RD)		,MD,MB,MI,MS		)
332 #define ADDWrm(RS, MD, MB, MI, MS)	_wO_r_X		(0x01		     ,_r2(RS)		,MD,MB,MI,MS		)
333 #define ADDWir(IM, RD)			_wOs_Mrm_sW	(0x81		,_b11,_b000  ,_r2(RD)			,_su16(IM))
334 #define ADDWim(IM, MD, MB, MI, MS)	_wOs_r_X_sW	(0x81		     ,_b000		,MD,MB,MI,MS	,_su16(IM))
335 
336 #define ADDLrr(RS, RD)			_O_Mrm		(0x01		,_b11,_r4(RS),_r4(RD)				)
337 #define ADDLmr(MD, MB, MI, MS, RD)	_O_r_X		(0x03		     ,_r4(RD)		,MD,MB,MI,MS		)
338 #define ADDLrm(RS, MD, MB, MI, MS)	_O_r_X		(0x01		     ,_r4(RS)		,MD,MB,MI,MS		)
339 #define ADDLir(IM, RD)			_Os_Mrm_sL	(0x81		,_b11,_b000  ,_r4(RD)			,IM	)
340 #define ADDLim(IM, MD, MB, MI, MS)	_Os_r_X_sL	(0x81		     ,_b000		,MD,MB,MI,MS	,IM	)
341 
342 #define ADDQrr(RS, RD)			_qO_Mrm		(0x01		,_b11,_r8(RS),_r8(RD)				)
343 #define ADDQir(IM, RD)			_qOs_Mrm_sL	(0x81		,_b11,_b000  ,_r8(RD)			,IM	)
344 
345 #define ADDQiBr(IM, RD)			_qO_Mrm_B	(0x83		,_b11,_b000  ,_r1(RD)			,_su8(IM))
346 
347 #define ANDBrr(RS, RD)			_O_Mrm		(0x20		,_b11,_r1(RS),_r1(RD)				)
348 #define ANDBmr(MD, MB, MI, MS, RD)	_O_r_X		(0x22		     ,_r1(RD)		,MD,MB,MI,MS		)
349 #define ANDBrm(RS, MD, MB, MI, MS)	_O_r_X		(0x20		     ,_r1(RS)		,MD,MB,MI,MS		)
350 #define ANDBir(IM, RD)			_O_Mrm_B	(0x80		,_b11,_b100  ,_r1(RD)			,_su8(IM))
351 #define ANDBim(IM, MD, MB, MI, MS)	_O_r_X_B	(0x80		     ,_b100		,MD,MB,MI,MS	,_su8(IM))
352 
353 #define ANDWrr(RS, RD)			_wO_Mrm		(0x21		,_b11,_r2(RS),_r2(RD)				)
354 #define ANDWmr(MD, MB, MI, MS, RD)	_wO_r_X		(0x23		     ,_r2(RD)		,MD,MB,MI,MS		)
355 #define ANDWrm(RS, MD, MB, MI, MS)	_wO_r_X		(0x21		     ,_r2(RS)		,MD,MB,MI,MS		)
356 #define ANDWir(IM, RD)			_wOs_Mrm_sW	(0x81		,_b11,_b100  ,_r2(RD)			,_su16(IM))
357 #define ANDWim(IM, MD, MB, MI, MS)	_wOs_r_X_sW	(0x81		     ,_b100		,MD,MB,MI,MS	,_su16(IM))
358 
359 #define ANDLrr(RS, RD)			_O_Mrm		(0x21		,_b11,_r4(RS),_r4(RD)				)
360 #define ANDLmr(MD, MB, MI, MS, RD)	_O_r_X		(0x23		     ,_r4(RD)		,MD,MB,MI,MS		)
361 #define ANDLrm(RS, MD, MB, MI, MS)	_O_r_X		(0x21		     ,_r4(RS)		,MD,MB,MI,MS		)
362 #define ANDLir(IM, RD)			_Os_Mrm_sL	(0x81		,_b11,_b100  ,_r4(RD)			,IM	)
363 #define ANDLim(IM, MD, MB, MI, MS)	_Os_r_X_sL	(0x81		     ,_b100		,MD,MB,MI,MS	,IM	)
364 
365 #define ANDQrr(RS, RD)			_qO_Mrm		(0x21		,_b11,_r8(RS),_r8(RD)				)
366 #define ANDQir(IM, RD)			_qOs_Mrm_sL	(0x81		,_b11,_b100  ,_r8(RD)			,IM	)
367 
368 #define BSWAPLr(R)			_OOr		(0x0fc8,_r4(R)							)
369 
370 
371 #define BTWir(IM,RD)			_wOO_Mrm_B	(0x0fba		,_b11,_b100  ,_r2(RD)			,_u8(IM))
372 #define BTWim(IM,MD,MB,MI,MS)		_wOO_r_X_B	(0x0fba		     ,_b100		,MD,MB,MI,MS	,_u8(IM))
373 #define BTWrr(RS,RD)			_wOO_Mrm	(0x0fa3		,_b11,_r2(RS),_r2(RD)				)
374 #define BTWrm(RS,MD,MB,MI,MS)		_wOO_r_X	(0x0fa3		     ,_r2(RS)		,MD,MB,MI,MS		)
375 
376 #define BTLir(IM,RD)			_OO_Mrm_B	(0x0fba		,_b11,_b100  ,_r4(RD)			,_u8(IM))
377 #define BTLim(IM,MD,MB,MI,MS)		_OO_r_X_B	(0x0fba		     ,_b100		,MD,MB,MI,MS	,_u8(IM))
378 #define BTLrr(RS,RD)			_OO_Mrm		(0x0fa3		,_b11,_r4(RS),_r4(RD)				)
379 #define BTLrm(RS,MD,MB,MI,MS)		_OO_r_X		(0x0fa3		     ,_r4(RS)		,MD,MB,MI,MS		)
380 
381 
382 #define BTCWir(IM,RD)			_wOO_Mrm_B	(0x0fba		,_b11,_b111  ,_r2(RD)			,_u8(IM))
383 #define BTCWim(IM,MD,MB,MI,MS)		_wOO_r_X_B	(0x0fba		     ,_b111		,MD,MB,MI,MS	,_u8(IM))
384 #define BTCWrr(RS,RD)			_wOO_Mrm	(0x0fbb		,_b11,_r2(RS),_r2(RD)				)
385 #define BTCWrm(RS,MD,MB,MI,MS)		_wOO_r_X	(0x0fbb		     ,_r2(RS)		,MD,MB,MI,MS		)
386 
387 #define BTCLir(IM,RD)			_OO_Mrm_B	(0x0fba		,_b11,_b111  ,_r4(RD)			,_u8(IM))
388 #define BTCLim(IM,MD,MB,MI,MS)		_OO_r_X_B	(0x0fba		     ,_b111		,MD,MB,MI,MS	,_u8(IM))
389 #define BTCLrr(RS,RD)			_OO_Mrm		(0x0fbb		,_b11,_r4(RS),_r4(RD)				)
390 #define BTCLrm(RS,MD,MB,MI,MS)		_OO_r_X		(0x0fbb		     ,_r4(RS)		,MD,MB,MI,MS		)
391 
392 
393 #define BTRWir(IM,RD)			_wOO_Mrm_B	(0x0fba		,_b11,_b110  ,_r2(RD)			,_u8(IM))
394 #define BTRWim(IM,MD,MB,MI,MS)		_wOO_r_X_B	(0x0fba		     ,_b110		,MD,MB,MI,MS	,_u8(IM))
395 #define BTRWrr(RS,RD)			_wOO_Mrm	(0x0fb3		,_b11,_r2(RS),_r2(RD)				)
396 #define BTRWrm(RS,MD,MB,MI,MS)		_wOO_r_X	(0x0fb3		     ,_r2(RS)		,MD,MB,MI,MS		)
397 
398 #define BTRLir(IM,RD)			_OO_Mrm_B	(0x0fba		,_b11,_b110  ,_r4(RD)			,_u8(IM))
399 #define BTRLim(IM,MD,MB,MI,MS)		_OO_r_X_B	(0x0fba		     ,_b110		,MD,MB,MI,MS	,_u8(IM))
400 #define BTRLrr(RS,RD)			_OO_Mrm		(0x0fb3		,_b11,_r4(RS),_r4(RD)				)
401 #define BTRLrm(RS,MD,MB,MI,MS)		_OO_r_X		(0x0fb3		     ,_r4(RS)		,MD,MB,MI,MS		)
402 
403 
404 #define BTSWir(IM,RD)			_wOO_Mrm_B	(0x0fba		,_b11,_b101  ,_r2(RD)			,_u8(IM))
405 #define BTSWim(IM,MD,MB,MI,MS)		_wOO_r_X_B	(0x0fba		     ,_b101		,MD,MB,MI,MS	,_u8(IM))
406 #define BTSWrr(RS,RD)			_wOO_Mrm	(0x0fab		,_b11,_r2(RS),_r2(RD)				)
407 #define BTSWrm(RS,MD,MB,MI,MS)		_wOO_r_X	(0x0fab		     ,_r2(RS)		,MD,MB,MI,MS		)
408 
409 #define BTSLir(IM,RD)			_OO_Mrm_B	(0x0fba		,_b11,_b101  ,_r4(RD)			,_u8(IM))
410 #define BTSLim(IM,MD,MB,MI,MS)		_OO_r_X_B	(0x0fba		     ,_b101		,MD,MB,MI,MS	,_u8(IM))
411 #define BTSLrr(RS,RD)			_OO_Mrm		(0x0fab		,_b11,_r4(RS),_r4(RD)				)
412 #define BTSLrm(RS,MD,MB,MI,MS)		_OO_r_X		(0x0fab		     ,_r4(RS)		,MD,MB,MI,MS		)
413 
414 #ifdef _ASM_SAFETY
415 # define CALLmL(D,B,I,S)		((_r0P(B) && _r0P(I)) ? _O_D32	(0xe8			,(intptr_t)(D)		) : \
416 								JITFAIL("illegal mode in direct jump"))
417 #else
418 # define CALLmL(D,B,I,S)		_O_D32	(0xe8			,(intptr_t)(D)		)
419 #endif
420 
421 #ifdef JIT_X86_64
422 # define CALLm(D,B,I,S)	                (MOVQir((D), JIT_REXTMP), CALQsr(JIT_REXTMP))
423 #else
424 # define CALLm(D,B,I,S)			CALLmL(D,B,I,S)
425 #endif
426 
427 #define CALLsr(R)			_O_Mrm	(0xff	,_b11,_b010,_r4(R)			)
428 #define CALQsr(R)                       _qOd_Mrm(0xff	,_b11,_b010,_r8(R))
429 
430 #define CALLsm(D,B,I,S)			_O_r_X	(0xff	     ,_b010	,(intptr_t)(D),B,I,S		)
431 
432 #define CBW_()				_O		(0x98								)
433 #define CLC_()				_O		(0xf8								)
434 #define CLTD_()				_O		(0x99								)
435 #define CMC_()				_O		(0xf5								)
436 
437 
438 #define CMPBrr(RS, RD)			_O_Mrm		(0x38		,_b11,_r1(RS),_r1(RD)				)
439 #define CMPBmr(MD, MB, MI, MS, RD)	_O_r_X		(0x3a		     ,_r1(RD)		,MD,MB,MI,MS		)
440 #define CMPBrm(RS, MD, MB, MI, MS)	_O_r_X		(0x38		     ,_r1(RS)		,MD,MB,MI,MS		)
441 #define CMPBir(IM, RD)			_O_Mrm_B	(0x80		,_b11,_b111  ,_r1(RD)			,_su8(IM))
442 #define CMPBim(IM, MD, MB, MI, MS)	_O_r_X_B	(0x80		     ,_b111		,MD,MB,MI,MS	,_su8(IM))
443 
444 #define CMPWrr(RS, RD)			_wO_Mrm		(0x39		,_b11,_r2(RS),_r2(RD)				)
445 #define CMPWmr(MD, MB, MI, MS, RD)	_wO_r_X		(0x3b		     ,_r2(RD)		,MD,MB,MI,MS		)
446 #define CMPWrm(RS, MD, MB, MI, MS)	_wO_r_X		(0x39		     ,_r2(RS)		,MD,MB,MI,MS		)
447 #define CMPWir(IM, RD)			_wOs_Mrm_sW	(0x81		,_b11,_b111  ,_r2(RD)			,_su16(IM))
448 #define CMPWim(IM, MD, MB, MI, MS)	_wOs_r_X_sW	(0x81		     ,_b111		,MD,MB,MI,MS	,_su16(IM))
449 
450 #define CMPLrr(RS, RD)			_O_Mrm		(0x39		,_b11,_r4(RS),_r4(RD)				)
451 #define CMPLmr(MD, MB, MI, MS, RD)	_O_r_X		(0x3b		     ,_r4(RD)		,MD,MB,MI,MS		)
452 #define CMPLrm(RS, MD, MB, MI, MS)	_O_r_X		(0x39		     ,_r4(RS)		,MD,MB,MI,MS		)
453 #define CMPLir(IM, RD)			_O_Mrm_L	(0x81		,_b11,_b111  ,_r4(RD)			,IM	)
454 #define CMPLim(IM, MD, MB, MI, MS)	_O_r_X_L	(0x81		     ,_b111		,MD,MB,MI,MS	,IM	)
455 
456 #define CMPQrr(RS, RD)			_qO_Mrm		(0x39		,_b11,_r8(RS),_r8(RD)				)
457 #define CMPQir(IM, RD)			_qO_Mrm_L	(0x81		,_b11,_b111  ,_r8(RD)			,IM	)
458 
459 #define CWD_()				_O		(0x99								)
460 
461 
462 #define CMPXCHGBrr(RS,RD)		_OO_Mrm		(0x0fb0		,_b11,_r1(RS),_r1(RD)				)
463 #define CMPXCHGBrm(RS,MD,MB,MI,MS)	_OO_r_X		(0x0fb0		     ,_r1(RS)		,MD,MB,MI,MS		)
464 
465 #define CMPXCHGWrr(RS,RD)		_wOO_Mrm	(0x0fb1		,_b11,_r2(RS),_r2(RD)				)
466 #define CMPXCHGWrm(RS,MD,MB,MI,MS)	_wOO_r_X	(0x0fb1		     ,_r2(RS)		,MD,MB,MI,MS		)
467 
468 #define CMPXCHGLrr(RS,RD)		_OO_Mrm		(0x0fb1		,_b11,_r4(RS),_r4(RD)				)
469 #define CMPXCHGLrm(RS,MD,MB,MI,MS)	_OO_r_X		(0x0fb1		     ,_r4(RS)		,MD,MB,MI,MS		)
470 
471 /* Above variants don't seem to work */
472 #define CMPXCHGr(RS, RD)          	(_jit_B(0xF), _O_r_X(0xb1 	     ,_r4(RD)		,0,RS,0,0		))
473 #define CMPXCHGQr(RS, RD)          	(_REX(0, 0, 0), _jit_B(0xF), _O_r_X(0xb1 ,_r4(RD)	,0,RS,0,0		))
474 #define CMPXCHGWr(RS, RD)          	(_d16(), _jit_B(0xF), _O_r_X(0xb1    ,_r4(RD)		,0,RS,0,0		))
475 
476 #define LOCK_PREFIX(i) (_jit_B(0xf0), i)
477 
478 #define DECBr(RD)			_O_Mrm		(0xfe		,_b11,_b001  ,_r1(RD)				)
479 #define DECBm(MD,MB,MI,MS)		_O_r_X		(0xfe		     ,_b001		,MD,MB,MI,MS		)
480 
481 #define DECWr(RD)			_wOr		(0x48,_r2(RD)							)
482 #define DECWm(MD,MB,MI,MS)		_wO_r_X		(0xff		     ,_b001		,MD,MB,MI,MS		)
483 
484 #define DECLr(RD)			_Or		(0x48,_r4(RD)							)
485 #define DECLm(MD,MB,MI,MS)		_O_r_X		(0xff		     ,_b001		,MD,MB,MI,MS		)
486 
487 
488 #define DIVBr(RS)			_O_Mrm		(0xf6		,_b11,_b110  ,_r1(RS)				)
489 #define DIVBm(MD,MB,MI,MS)		_O_r_X		(0xf6		     ,_b110		,MD,MB,MI,MS		)
490 
491 #define DIVWr(RS)			_wO_Mrm		(0xf7		,_b11,_b110  ,_r2(RS)				)
492 #define DIVWm(MD,MB,MI,MS)		_wO_r_X		(0xf7		     ,_b110		,MD,MB,MI,MS		)
493 
494 #define DIVLr(RS)			_O_Mrm		(0xf7		,_b11,_b110  ,_r4(RS)				)
495 #define DIVLm(MD,MB,MI,MS)		_O_r_X		(0xf7		     ,_b110		,MD,MB,MI,MS		)
496 
497 #define DIVQr(RS)			_qO_Mrm		(0xf7		,_b11,_b110  ,_r8(RS)				)
498 
499 #define ENTERii(W, B)			_O_W_B		(0xc8						  ,_su16(W),_su8(B))
500 #define HLT_()				_O		(0xf4								)
501 
502 
503 #define IDIVBr(RS)			_O_Mrm		(0xf6		,_b11,_b111  ,_r1(RS)				)
504 #define IDIVBm(MD,MB,MI,MS)		_O_r_X		(0xf6		     ,_b111		,MD,MB,MI,MS		)
505 
506 #define IDIVWr(RS)			_wO_Mrm 	(0xf7		,_b11,_b111  ,_r2(RS)				)
507 #define IDIVWm(MD,MB,MI,MS)		_wO_r_X 	(0xf7		     ,_b111		,MD,MB,MI,MS		)
508 
509 #define IDIVLr(RS)			_O_Mrm		(0xf7		,_b11,_b111  ,_r4(RS)				)
510 #define IDIVLm(MD,MB,MI,MS)		_O_r_X		(0xf7		     ,_b111		,MD,MB,MI,MS		)
511 
512 #define IDIVQr(RS)			_qO_Mrm		(0xf7		,_b11,_b111  ,_r8(RS)				)
513 
514 #define IMULBr(RS)			_O_Mrm		(0xf6		,_b11,_b101  ,_r1(RS)				)
515 #define IMULBm(MD,MB,MI,MS)		_O_r_X		(0xf6		     ,_b101		,MD,MB,MI,MS		)
516 
517 #define IMULWr(RS)			_wO_Mrm 	(0xf7		,_b11,_b101  ,_r2(RS)				)
518 #define IMULWm(MD,MB,MI,MS)		_wO_r_X 	(0xf7		     ,_b101		,MD,MB,MI,MS		)
519 
520 #define IMULLr(RS)			_O_Mrm		(0xf7		,_b11,_b101  ,_r4(RS)				)
521 #define IMULLm(MD,MB,MI,MS)		_O_r_X		(0xf7		     ,_b101		,MD,MB,MI,MS		)
522 
523 
524 #define IMULWrr(RS,RD)			_wOO_Mrm	(0x0faf		,_b11,_r2(RS),_r2(RD)				)
525 #define IMULWmr(MD,MB,MI,MS,RD)		_wOO_r_X	(0x0faf		     ,_r2(RD)		,MD,MB,MI,MS		)
526 #define IMULWirr(IM,RS,RD)		_wOs_Mrm_sW	(0x69		,_b11,_r2(RS),_r2(RD)			,_su16(IM)	)
527 #define IMULWimr(IM,MD,MB,MI,MS,RD)	_wOs_r_X_sW	(0x69		     ,_r2(RD)		,MD,MB,MI,MS	,_su16(IM)	)
528 
529 #define IMULLir(IM,RD)			_Os_Mrm_sL	(0x69		,_b11,_r4(RD),_r4(RD)			,IM	)
530 #define IMULLrr(RS,RD)			_OO_Mrm		(0x0faf		,_b11,_r4(RD),_r4(RS)				)
531 #define IMULLmr(MD,MB,MI,MS,RD)		_OO_r_X		(0x0faf		     ,_r4(RD)		,MD,MB,MI,MS		)
532 #define IMULLirr(IM,RS,RD)		_Os_Mrm_sL	(0x69		,_b11,_r4(RS),_r4(RD)			,IM	)
533 #define IMULLimr(IM,MD,MB,MI,MS,RD)	_Os_r_X_sL	(0x69		     ,_r4(RD)		,MD,MB,MI,MS	,IM	)
534 
535 #define IMULQrr(RS,RD)			_qOO_Mrm	(0x0faf		,_b11,_r4(RD),_r4(RS)				)
536 
537 #define INCBr(RD)			_O_Mrm		(0xfe		,_b11,_b000  ,_r1(RD)				)
538 #define INCBm(MD,MB,MI,MS)		_O_r_X		(0xfe		     ,_b000		,MD,MB,MI,MS		)
539 
540 #define INCWr(RD)			_wOr		(0x40,_r2(RD)							)
541 #define INCWm(MD,MB,MI,MS)		_wO_r_X		(0xff		     ,_b000		,MD,MB,MI,MS		)
542 
543 #define INCLr(RD)			_Or		(0x40,_r4(RD)							)
544 #define INCLm(MD,MB,MI,MS)		_O_r_X		(0xff		     ,_b000		,MD,MB,MI,MS		)
545 
546 
547 #define INVD_()				_OO		(0x0f08								)
548 #define INVLPGm(MD, MB, MI, MS)		_OO_r_X		(0x0f01		     ,_b111		,MD,MB,MI,MS		)
549 
550 
551 #define JCCSim(CC,D,B,I,S)		((_r0P(B) && _r0P(I)) ? _O_D8	(0x70|(CC)		,(intptr_t)(D)		) : \
552 								JITFAIL("illegal mode in conditional jump"))
553 
554 #define JOSm(D,B,I,S)			JCCSim(0x0,D,B,I,S)
555 #define JNOSm(D,B,I,S)			JCCSim(0x1,D,B,I,S)
556 #define JBSm(D,B,I,S)			JCCSim(0x2,D,B,I,S)
557 #define JNAESm(D,B,I,S)			JCCSim(0x2,D,B,I,S)
558 #define JNBSm(D,B,I,S)			JCCSim(0x3,D,B,I,S)
559 #define JAESm(D,B,I,S)			JCCSim(0x3,D,B,I,S)
560 #define JESm(D,B,I,S)			JCCSim(0x4,D,B,I,S)
561 #define JZSm(D,B,I,S)			JCCSim(0x4,D,B,I,S)
562 #define JNESm(D,B,I,S)			JCCSim(0x5,D,B,I,S)
563 #define JNZSm(D,B,I,S)			JCCSim(0x5,D,B,I,S)
564 #define JBESm(D,B,I,S)			JCCSim(0x6,D,B,I,S)
565 #define JNASm(D,B,I,S)			JCCSim(0x6,D,B,I,S)
566 #define JNBESm(D,B,I,S)			JCCSim(0x7,D,B,I,S)
567 #define JASm(D,B,I,S)			JCCSim(0x7,D,B,I,S)
568 #define JSSm(D,B,I,S)			JCCSim(0x8,D,B,I,S)
569 #define JNSSm(D,B,I,S)			JCCSim(0x9,D,B,I,S)
570 #define JPSm(D,B,I,S)			JCCSim(0xa,D,B,I,S)
571 #define JPESm(D,B,I,S)			JCCSim(0xa,D,B,I,S)
572 #define JNPSm(D,B,I,S)			JCCSim(0xb,D,B,I,S)
573 #define JPOSm(D,B,I,S)			JCCSim(0xb,D,B,I,S)
574 #define JLSm(D,B,I,S)			JCCSim(0xc,D,B,I,S)
575 #define JNGESm(D,B,I,S)			JCCSim(0xc,D,B,I,S)
576 #define JNLSm(D,B,I,S)			JCCSim(0xd,D,B,I,S)
577 #define JGESm(D,B,I,S)			JCCSim(0xd,D,B,I,S)
578 #define JLESm(D,B,I,S)			JCCSim(0xe,D,B,I,S)
579 #define JNGSm(D,B,I,S)			JCCSim(0xe,D,B,I,S)
580 #define JNLESm(D,B,I,S)			JCCSim(0xf,D,B,I,S)
581 #define JGSm(D,B,I,S)			JCCSim(0xf,D,B,I,S)
582 
583 #ifndef JIT_X86_64
584 # define SUPPORT_TINY_JUMPS
585 #endif
586 
587 #ifdef SUPPORT_TINY_JUMPS
588 # define JCCim_base(CC,nCC,D,B,I,S) ((_r0P(B) && _r0P(I)) ? (_jitl.tiny_jumps \
589                                                                  ? _O_D8(0x70|(CC), D) \
590                                                                  : _OO_D32	(0x0f80|(CC)		,(intptr_t)(D)		)) : \
591 								JITFAIL("illegal mode in conditional jump"))
592 #else
593 # define JCCim_base(CC,nCC,D,B,I,S) (_OO_D32	(0x0f80|(CC)		,(intptr_t)(D)		))
594 #endif
595 
596 #ifdef JIT_X86_64
597 # define JCCim(CC,nCC,D,B,I,S) (!_jitl.long_jumps \
598                                 ? JCCim_base(CC,nCC,D,B,I,S)            \
599                                 : (_O_D8(0x70|(nCC), _jit_UL(_jit.x.pc) + 13), JMPm((intptr_t)D, 0, 0, 0)))
600 #else
601 # define JCCim(CC,nCC,D,B,I,S)	JCCim_base(CC,nCC,D,B,I,S)
602 #endif
603 
604 #define JOm(D,B,I,S)			JCCim(0x0,0x1,D,B,I,S)
605 #define JNOm(D,B,I,S)			JCCim(0x1,0x0,D,B,I,S)
606 #define JBm(D,B,I,S)			JCCim(0x2,0x3,D,B,I,S)
607 #define JNAEm(D,B,I,S)			JCCim(0x2,0x3,D,B,I,S)
608 #define JNBm(D,B,I,S)			JCCim(0x3,0x2,D,B,I,S)
609 #define JAEm(D,B,I,S)			JCCim(0x3,0x2,D,B,I,S)
610 #define JEm(D,B,I,S)			JCCim(0x4,0x5,D,B,I,S)
611 #define JZm(D,B,I,S)			JCCim(0x4,0x5,D,B,I,S)
612 #define JNEm(D,B,I,S)			JCCim(0x5,0x4,D,B,I,S)
613 #define JNZm(D,B,I,S)			JCCim(0x5,0x4,D,B,I,S)
614 #define JBEm(D,B,I,S)			JCCim(0x6,0x7,D,B,I,S)
615 #define JNAm(D,B,I,S)			JCCim(0x6,0x7,D,B,I,S)
616 #define JNBEm(D,B,I,S)			JCCim(0x7,0x6,D,B,I,S)
617 #define JAm(D,B,I,S)			JCCim(0x7,0x6,D,B,I,S)
618 #define JSm(D,B,I,S)			JCCim(0x8,0x9,D,B,I,S)
619 #define JNSm(D,B,I,S)			JCCim(0x9,0x8,D,B,I,S)
620 #define JPm(D,B,I,S)			JCCim(0xa,0xb,D,B,I,S)
621 #define JPEm(D,B,I,S)			JCCim(0xa,0xb,D,B,I,S)
622 #define JNPm(D,B,I,S)			JCCim(0xb,0xa,D,B,I,S)
623 #define JPOm(D,B,I,S)			JCCim(0xb,0xa,D,B,I,S)
624 #define JLm(D,B,I,S)			JCCim(0xc,0xd,D,B,I,S)
625 #define JNGEm(D,B,I,S)			JCCim(0xc,0xd,D,B,I,S)
626 #define JNLm(D,B,I,S)			JCCim(0xd,0xc,D,B,I,S)
627 #define JGEm(D,B,I,S)			JCCim(0xd,0xc,D,B,I,S)
628 #define JLEm(D,B,I,S)			JCCim(0xe,0xf,D,B,I,S)
629 #define JNGm(D,B,I,S)			JCCim(0xe,0xf,D,B,I,S)
630 #define JNLEm(D,B,I,S)			JCCim(0xf,0xe,D,B,I,S)
631 #define JGm(D,B,I,S)			JCCim(0xf,0xe,D,B,I,S)
632 
633 #define JCm(D,B,I,S) JBm(D,B,I,S)
634 #define JNCm(D,B,I,S) JNBm(D,B,I,S)
635 
636 #define JMPSm(D,B,I,S)			((_r0P(B) && _r0P(I)) ? _O_D8	(0xeb			,(intptr_t)(D)		) : \
637 								JITFAIL("illegal mode in short jump"))
638 
639 #ifdef SUPPORT_TINY_JUMPS
640 # define JMPm_base(D,B,I,S)             ((_r0P(B) && _r0P(I)) ? (_jitl.tiny_jumps \
641                                                                  ? _O_D8(0xeB, D) \
642                                                                  : _O_D32	(0xe9			,(intptr_t)(D)		)) : \
643 								JITFAIL("illegal mode in direct jump"))
644 #else
645 # define JMPm_base(D,B,I,S)  (_O_D32(0xe9			,(intptr_t)(D)		))
646 #endif
647 
648 #ifdef JIT_X86_64
649 # define JMPm(D,B,I,S) (!_jitl.long_jumps \
650                         ? JMPm_base(D,B,I,S)  \
651                         : (MOVQir((D), JIT_REXTMP), _qO_Mrm(0xff,_b11,_b100,_r8(JIT_REXTMP))))
652 #else
653 # define JMPm(D,B,I,S)	JMPm_base(D,B,I,S)
654 #endif
655 
656 #define JMPsr(R)			_O_Mrm	(0xff	,_b11,_b100,_r4(R)			)
657 
658 #define JMPsm(D,B,I,S)			_O_r_X	(0xff	     ,_b100	,(intptr_t)(D),B,I,S		)
659 
660 
661 #define LAHF_()				_O		(0x9f								)
662 #define LEALmr(MD, MB, MI, MS, RD)	_O_r_X		(0x8d		     ,_r4(RD)		,MD,MB,MI,MS		)
663 #define LEAQmr(MD, MB, MI, MS, RD)	_qO_r_X		(0x8d		     ,_r8(RD)		,MD,MB,MI,MS		)
664 #define LEAQmQr(MD, MB, MI, MS, RD)	_qO_r_XB	(0x8d		     ,_r8(RD)		,MD,MB,MI,MS		)
665 #define LEAVE_()			_O		(0xc9								)
666 
667 
668 #define LMSWr(RS)			_OO_Mrm		(0x0f01		,_b11,_b110,_r4(RS)				)
669 #define LMSWm(MD,MB,MI,MS)		_OO_r_X		(0x0f01		     ,_b110		,MD,MB,MI,MS		)
670 
671 #define LOOPm(MD,MB,MI,MS)		((_r0P(MB) && _r0P(MI)) ? _O_D8 (0xe2			,MD			) : \
672 								  JITFAIL("illegal mode in loop"))
673 
674 #define LOOPEm(MD,MB,MI,MS)		((_r0P(MB) && _r0P(MI)) ? _O_D8 (0xe1			,MD			) : \
675 								  JITFAIL("illegal mode in loope"))
676 
677 #define LOOPZm(MD,MB,MI,MS)		((_r0P(MB) && _r0P(MI)) ? _O_D8 (0xe1			,MD			) : \
678 								  JITFAIL("illegal mode in loopz"))
679 
680 #define LOOPNEm(MD,MB,MI,MS)		((_r0P(MB) && _r0P(MI)) ? _O_D8 (0xe0			,MD			) : \
681 								  JITFAIL("illegal mode in loopne"))
682 
683 #define LOOPNZm(MD,MB,MI,MS)		((_r0P(MB) && _r0P(MI)) ? _O_D8 (0xe0			,MD			) : \
684 								  JITFAIL("illegal mode in loopnz"))
685 
686 
687 #define MOVBrr(RS, RD)			_O_Mrm		(0x80		,_b11,_r1(RS),_r1(RD)				)
688 #define MOVBmr(MD, MB, MI, MS, RD)	_O_r_X		(0x8a		     ,_r1(RD)		,MD,MB,MI,MS		)
689 #define MOVBrm(RS, MD, MB, MI, MS)	_O_r_X		(0x88		     ,_r1(RS)		,MD,MB,MI,MS		)
690 #define MOVBir(IM,  R)			_Or_B		(0xb0,_r1(R)						,_su8(IM))
691 #define MOVBim(IM, MD, MB, MI, MS)	_O_X_B		(0xc6					,MD,MB,MI,MS	,_su8(IM))
692 
693 #define MOVWrr(RS, RD)			_wO_Mrm		(0x89		,_b11,_r2(RS),_r2(RD)				)
694 #define MOVWmr(MD, MB, MI, MS, RD)	_wO_r_X		(0x8b		     ,_r2(RD)		,MD,MB,MI,MS		)
695 #define MOVWrm(RS, MD, MB, MI, MS)	_wO_r_X		(0x89		     ,_r2(RS)		,MD,MB,MI,MS		)
696 #define MOVWir(IM,  R)			_wOr_W		(0xb8,_r2(R)						,_su16(IM))
697 #define MOVWim(IM, MD, MB, MI, MS)	_wO_X_W		(0xc7					,MD,MB,MI,MS	,_su16(IM))
698 
699 #define MOVLrr(RS, RD)			_O_Mrm		(0x89		,_b11,_r4(RS),_r4(RD)				)
700 #define MOVLmr(MD, MB, MI, MS, RD)	_O_r_X		(0x8b		     ,_r4(RD)		,MD,MB,MI,MS		)
701 #define MOVLmQr(MD, MB, MI, MS, RD)	_qOd_r_X	(0x8b		     ,_r8(RD)		,MD,MB,MI,MS		)
702 #define MOVLrm(RS, MD, MB, MI, MS)	_O_r_X		(0x89		     ,_r4(RS)		,MD,MB,MI,MS		)
703 #define MOVLQrm(RS, MD, MB, MI, MS)	_qOd_r_X        (0x89		     ,_r8(RS)		,MD,MB,MI,MS		)
704 #define MOVLir(IM,  R)			_Or_L		(0xb8,_r4(R)						,IM	)
705 #define MOVLiQr(IM,  R)			(_REXd(0,0,R), MOVLir(IM, R))
706 #define MOVLim(IM, MD, MB, MI, MS)	_qOd_X_L	(0xc7					,MD,MB,MI,MS	,IM	)
707 
708 #define MOVQmr(MD, MB, MI, MS, RD)	_qO_r_X		(0x8b		     ,_r8(RD)		,MD,MB,MI,MS		)
709 #define MOVQmQr(MD, MB, MI, MS, RD)	_qO_r_XB	(0x8b		     ,_r8(RD)		,MD,MB,MI,MS		)
710 #define MOVQrm(RS, MD, MB, MI, MS)	_qOd_r_X	(0x89		     ,_r8(RS)		,MD,MB,MI,MS		)
711 #define MOVQrQm(RS, MD, MB, MI, MS)	_qO_r_XB     	(0x89		     ,_r8(RS)		,MD,MB,MI,MS		)
712 #define MOVQir(IM,  R)			_qOr_Q	        (0xb8,_r8(R)			,IM	)
713 #define MOVQim(IM, MD, MB, MI, MS)	_qO_X_L         (0xc7					,MD,MB,MI,MS	,IM	)
714 
715 #define MOVQrr(RS, RD)			_qO_Mrm		(0x89		,_b11,_r8(RS),_r8(RD)				)
716 
717 #define MOVZBLrr(RS, RD)		_OO_Mrm		(0x0fb6		,_b11,_r1(RD),_r1(RS)				)
718 #define MOVZBLmr(MD, MB, MI, MS, RD)	_OO_r_X		(0x0fb6		     ,_r1(RD)		,MD,MB,MI,MS		)
719 #define MOVZBWrr(RS, RD)		_wOO_Mrm	(0x0fb6		,_b11,_r2(RD),_r2(RS)				)
720 #define MOVZBWmr(MD, MB, MI, MS, RD)	_wOO_r_X	(0x0fb6		     ,_r2(RD)		,MD,MB,MI,MS		)
721 #define MOVZWLrr(RS, RD)		_OO_Mrm		(0x0fb7		,_b11,_r1(RD),_r1(RS)				)
722 #define MOVZWLmr(MD, MB, MI, MS, RD)	_OO_r_X		(0x0fb7		     ,_r1(RD)		,MD,MB,MI,MS		)
723 
724 #define MOVSBLrr(RS, RD)		_OO_Mrm		(0x0fbe		,_b11,_r1(RD),_r1(RS)				)
725 #define MOVSBLmr(MD, MB, MI, MS, RD)	_OO_r_X		(0x0fbe		     ,_r1(RD)		,MD,MB,MI,MS		)
726 #define MOVSBWrr(RS, RD)		_wOO_Mrm	(0x0fbe		,_b11,_r2(RD),_r2(RS)				)
727 #define MOVSBWmr(MD, MB, MI, MS, RD)	_wOO_r_X	(0x0fbe		     ,_r2(RD)		,MD,MB,MI,MS		)
728 #define MOVSWLrr(RS, RD)		_OO_Mrm		(0x0fbf		,_b11,_r1(RD),_r1(RS)				)
729 #define MOVSWLmr(MD, MB, MI, MS, RD)	_OO_r_X		(0x0fbf		     ,_r1(RD)		,MD,MB,MI,MS		)
730 
731 #define MOVSWQmr(MD, MB, MI, MS, RD)	_qOO_r_X	(0x0fbf		     ,_r1(RD)		,MD,MB,MI,MS		)
732 
733 #define MULBr(RS)			_O_Mrm		(0xf6		,_b11,_b100  ,_r1(RS)				)
734 #define MULBm(MD,MB,MI,MS)		_O_r_X		(0xf6		     ,_b100		,MD,MB,MI,MS		)
735 
736 #define MULWr(RS)			_wO_Mrm 	(0xf7		,_b11,_b100  ,_r2(RS)				)
737 #define MULWm(MD,MB,MI,MS)		_wO_r_X 	(0xf7		     ,_b100		,MD,MB,MI,MS		)
738 
739 #define MULLr(RS)			_O_Mrm		(0xf7		,_b11,_b100  ,_r4(RS)				)
740 #define MULLm(MD,MB,MI,MS)		_O_r_X		(0xf7		     ,_b100		,MD,MB,MI,MS		)
741 
742 
743 #define NEGBr(RD)			_O_Mrm		(0xf6		,_b11,_b011  ,_r1(RD)				)
744 #define NEGBm(MD,MB,MI,MS)		_O_r_X		(0xf6		     ,_b011		,MD,MB,MI,MS		)
745 
746 #define NEGWr(RD)			_wO_Mrm		(0xf7		,_b11,_b011  ,_r2(RD)				)
747 #define NEGWm(MD,MB,MI,MS)		_wO_r_X		(0xf7		     ,_b011		,MD,MB,MI,MS		)
748 
749 #define NEGLr(RD)			_O_Mrm		(0xf7		,_b11,_b011  ,_r4(RD)				)
750 #define NEGLm(MD,MB,MI,MS)		_O_r_X		(0xf7		     ,_b011		,MD,MB,MI,MS		)
751 
752 #define NEGQr(RD)			_qO_Mrm		(0xf7		,_b11,_b011  ,_r8(RD)				)
753 
754 #define NOP_()				_O		(0x90								)
755 
756 
757 #define NOTBr(RD)			_O_Mrm		(0xf6		,_b11,_b010  ,_r1(RD)				)
758 #define NOTBm(MD,MB,MI,MS)		_O_r_X		(0xf6		     ,_b010		,MD,MB,MI,MS		)
759 
760 #define NOTWr(RD)			_wO_Mrm		(0xf7		,_b11,_b010  ,_r2(RD)				)
761 #define NOTWm(MD,MB,MI,MS)		_wO_r_X		(0xf7		     ,_b010		,MD,MB,MI,MS		)
762 
763 #define NOTLr(RD)			_O_Mrm		(0xf7		,_b11,_b010  ,_r4(RD)				)
764 #define NOTLm(MD,MB,MI,MS)		_O_r_X		(0xf7		     ,_b010		,MD,MB,MI,MS		)
765 
766 
767 #define ORBrr(RS, RD)			_O_Mrm		(0x08		,_b11,_r1(RS),_r1(RD)				)
768 #define ORBmr(MD, MB, MI, MS, RD)	_O_r_X		(0x0a		     ,_r1(RD)		,MD,MB,MI,MS		)
769 #define ORBrm(RS, MD, MB, MI, MS)	_O_r_X		(0x08		     ,_r1(RS)		,MD,MB,MI,MS		)
770 #define ORBir(IM, RD)			_O_Mrm_B	(0x80		,_b11,_b001  ,_r1(RD)			,_su8(IM))
771 #define ORBim(IM, MD, MB, MI, MS)	_O_r_X_B	(0x80		     ,_b001		,MD,MB,MI,MS	,_su8(IM))
772 
773 #define ORWrr(RS, RD)			_wO_Mrm		(0x09		,_b11,_r2(RS),_r2(RD)				)
774 #define ORWmr(MD, MB, MI, MS, RD)	_wO_r_X		(0x0b		     ,_r2(RD)		,MD,MB,MI,MS		)
775 #define ORWrm(RS, MD, MB, MI, MS)	_wO_r_X		(0x09		     ,_r2(RS)		,MD,MB,MI,MS		)
776 #define ORWir(IM, RD)			_wOs_Mrm_sW	(0x81		,_b11,_b001  ,_r2(RD)			,_su16(IM))
777 #define ORWim(IM, MD, MB, MI, MS)	_wOs_r_X_sW	(0x81		     ,_b001		,MD,MB,MI,MS	,_su16(IM))
778 
779 #define ORLrr(RS, RD)			_O_Mrm		(0x09		,_b11,_r4(RS),_r4(RD)				)
780 #define ORLmr(MD, MB, MI, MS, RD)	_O_r_X		(0x0b		     ,_r4(RD)		,MD,MB,MI,MS		)
781 #define ORLrm(RS, MD, MB, MI, MS)	_O_r_X		(0x09		     ,_r4(RS)		,MD,MB,MI,MS		)
782 #define ORLir(IM, RD)			_Os_Mrm_sL	(0x81		,_b11,_b001  ,_r4(RD)			,IM	)
783 #define ORLim(IM, MD, MB, MI, MS)	_Os_r_X_sL	(0x81		     ,_b001		,MD,MB,MI,MS	,IM	)
784 
785 #define ORQrr(RS, RD)			_qO_Mrm		(0x09		,_b11,_r8(RS),_r8(RD)				)
786 #define ORQir(IM, RD)			_qOs_Mrm_sL	(0x81		,_b11,_b001  ,_r8(RD)			,IM	)
787 
788 #define POPWr(RD)			_wOr		(0x58,_r2(RD)							)
789 #define POPWm(MD,MB,MI,MS)		_wO_r_X		(0x8f		     ,_b000		,MD,MB,MI,MS		)
790 
791 #define POPLr(RD)			_Or		(0x58,_r4(RD)							)
792 #define POPLm(MD,MB,MI,MS)		_O_r_X		(0x8f		     ,_b000		,MD,MB,MI,MS		)
793 
794 #define POPQr(RD)			_qOdr		(0x58,_r8(RD)							)
795 
796 
797 #define POPA_()				_wO		(0x61								)
798 #define POPAD_()			_O		(0x61								)
799 
800 #define POPF_()				_wO		(0x9d								)
801 #define POPFD_()			_O		(0x9d								)
802 
803 
804 #define PUSHWr(R)			_wOr		(0x50,_r2(R)							)
805 #define PUSHWm(MD,MB,MI,MS)		_wO_r_X		(0xff,		     ,_b110		,MD,MB,MI,MS		)
806 #define PUSHWi(IM)			_wOs_sW		(0x68							,IM	)
807 
808 #define PUSHLr(R)			_Or		(0x50,_r4(R)							)
809 #define PUSHLm(MD,MB,MI,MS)		_O_r_X		(0xff		     ,_b110		,MD,MB,MI,MS		)
810 #define PUSHLi(IM)			_Os_sL		(0x68							,IM	)
811 
812 #define PUSHQr(R)			_qOdr		(0x50,_r8(R)							)
813 
814 #define PUSHA_()			_wO		(0x60								)
815 #define PUSHAD_()			_O		(0x60								)
816 
817 #define PUSHF_()			_O		(0x9c								)
818 #define PUSHFD_()			_wO		(0x9c								)
819 
820 #define RET_()				_O		(0xc3								)
821 #define RETi(IM)			_O_W		(0xc2							,_su16(IM))
822 
823 
824 #define ROLBir(IM,RD)		(((IM)==1) ?	_O_Mrm		(0xd0	,_b11,_b000,_r1(RD)				) : \
825 						_O_Mrm_B	(0xc0	,_b11,_b000,_r1(RD)			,_u8(IM) ) )
826 #define ROLBim(IM,MD,MB,MS,MI)	(((IM)==1) ?	_O_r_X		(0xd0	     ,_b000		,MD,MB,MI,MS		) : \
827 						_O_r_X_B	(0xc0	     ,_b000		,MD,MB,MI,MS	,_u8(IM) ) )
828 #define ROLBrr(RS,RD)		(((RS)==_CL) ?	_O_Mrm		(0xd2	,_b11,_b000,_r1(RD)				) : \
829 						JITFAIL		("source register must be CL"				) )
830 #define ROLBrm(RS,MD,MB,MS,MI)	(((RS)==_CL) ?	_O_r_X		(0xd2	     ,_b000		,MD,MB,MI,MS		) : \
831 						JITFAIL		("source register must be CL"				) )
832 
833 #define ROLWir(IM,RD)		(((IM)==1) ?	_wO_Mrm (0xd1	,_b11,_b000,_r2(RD)				) : \
834 						_wO_Mrm_B	(0xc1	,_b11,_b000,_r2(RD)			,_u8(IM) ) )
835 #define ROLWim(IM,MD,MB,MS,MI)	(((IM)==1) ?	_wO_r_X (0xd1	     ,_b000		,MD,MB,MI,MS		) : \
836 						_wO_r_X_B	(0xc1	     ,_b000		,MD,MB,MI,MS	,_u8(IM) ) )
837 #define ROLWrr(RS,RD)		(((RS)==_CL) ?	_wO_Mrm (0xd3	,_b11,_b000,_r2(RD)				) : \
838 						JITFAIL ("source register must be CL"					) )
839 #define ROLWrm(RS,MD,MB,MS,MI)	(((RS)==_CL) ?	_wO_r_X (0xd3	     ,_b000		,MD,MB,MI,MS		) : \
840 						JITFAIL ("source register must be CL"					) )
841 
842 #define ROLLir(IM,RD)		(((IM)==1) ?	_O_Mrm		(0xd1	,_b11,_b000,_r4(RD)				) : \
843 						_O_Mrm_B	(0xc1	,_b11,_b000,_r4(RD)			,_u8(IM) ) )
844 #define ROLLim(IM,MD,MB,MS,MI)	(((IM)==1) ?	_O_r_X		(0xd1	     ,_b000		,MD,MB,MI,MS		) : \
845 						_O_r_X_B	(0xc1	     ,_b000		,MD,MB,MI,MS	,_u8(IM) ) )
846 #define ROLLrr(RS,RD)		(((RS)==_CL) ?	_O_Mrm		(0xd3	,_b11,_b000,_r4(RD)				) : \
847 						JITFAIL		("source register must be CL"				) )
848 #define ROLLrm(RS,MD,MB,MS,MI)	(((RS)==_CL) ?	_O_r_X		(0xd3	     ,_b000		,MD,MB,MI,MS		) : \
849 						JITFAIL		("source register must be CL"				) )
850 
851 
852 #define RORBir(IM,RD)		(((IM)==1) ?	_O_Mrm		(0xd0	,_b11,_b001,_r1(RD)				) : \
853 						_O_Mrm_B	(0xc0	,_b11,_b001,_r1(RD)			,_u8(IM) ) )
854 #define RORBim(IM,MD,MB,MS,MI)	(((IM)==1) ?	_O_r_X		(0xd0	     ,_b001		,MD,MB,MI,MS		) : \
855 						_O_r_X_B	(0xc0	     ,_b001		,MD,MB,MI,MS	,_u8(IM) ) )
856 #define RORBrr(RS,RD)		(((RS)==_CL) ?	_O_Mrm		(0xd2	,_b11,_b001,_r1(RD)				) : \
857 						JITFAIL		("source register must be CL"				) )
858 #define RORBrm(RS,MD,MB,MS,MI)	(((RS)==_CL) ?	_O_r_X		(0xd2	     ,_b001		,MD,MB,MI,MS		) : \
859 						JITFAIL		("source register must be CL"				) )
860 
861 #define RORWir(IM,RD)		(((IM)==1) ?	_wO_Mrm (0xd1	,_b11,_b001,_r2(RD)				) : \
862 						_wO_Mrm_B	(0xc1	,_b11,_b001,_r2(RD)			,_u8(IM) ) )
863 #define RORWim(IM,MD,MB,MS,MI)	(((IM)==1) ?	_wO_r_X (0xd1	     ,_b001		,MD,MB,MI,MS		) : \
864 						_wO_r_X_B	(0xc1	     ,_b001		,MD,MB,MI,MS	,_u8(IM) ) )
865 #define RORWrr(RS,RD)		(((RS)==_CL) ?	_wO_Mrm (0xd3	,_b11,_b001,_r2(RD)				) : \
866 						JITFAIL ("source register must be CL"					) )
867 #define RORWrm(RS,MD,MB,MS,MI)	(((RS)==_CL) ?	_wO_r_X (0xd3	     ,_b001		,MD,MB,MI,MS		) : \
868 						JITFAIL ("source register must be CL"					) )
869 
870 #define RORLir(IM,RD)		(((IM)==1) ?	_O_Mrm		(0xd1	,_b11,_b001,_r4(RD)				) : \
871 						_O_Mrm_B	(0xc1	,_b11,_b001,_r4(RD)			,_u8(IM) ) )
872 #define RORLim(IM,MD,MB,MS,MI)	(((IM)==1) ?	_O_r_X		(0xd1	     ,_b001		,MD,MB,MI,MS		) : \
873 						_O_r_X_B	(0xc1	     ,_b001		,MD,MB,MI,MS	,_u8(IM) ) )
874 #define RORLrr(RS,RD)		(((RS)==_CL) ?	_O_Mrm		(0xd3	,_b11,_b001,_r4(RD)				) : \
875 						JITFAIL		("source register must be CL"				) )
876 #define RORLrm(RS,MD,MB,MS,MI)	(((RS)==_CL) ?	_O_r_X		(0xd3	     ,_b001		,MD,MB,MI,MS		) : \
877 						JITFAIL		("source register must be CL"				) )
878 
879 
880 #define SAHF_()					_O	(0x9e								)
881 
882 
883 #define SALBir	SHLBir
884 #define SALBim	SHLBim
885 #define SALBrr	SHLBrr
886 #define SALBrm	SHLBrm
887 #define SALWir	SHLWir
888 #define SALWim	SHLWim
889 #define SALWrr	SHLWrr
890 #define SALWrm	SHLWrm
891 #define SALLir	SHLLir
892 #define SALLim	SHLLim
893 #define SALLrr	SHLLrr
894 #define SALLrm	SHLLrm
895 #define SALQir	SHLQir
896 #define SALQim	SHLQim
897 #define SALQrr	SHLQrr
898 #define SALQrm	SHLQrm
899 
900 
901 #define SARBir(IM,RD)		(((IM)==1) ?	_O_Mrm		(0xd0	,_b11,_b111,_r1(RD)				) : \
902 						_O_Mrm_B	(0xc0	,_b11,_b111,_r1(RD)			,_u8(IM) ) )
903 #define SARBim(IM,MD,MB,MS,MI)	(((IM)==1) ?	_O_r_X		(0xd0	     ,_b111		,MD,MB,MI,MS		) : \
904 						_O_r_X_B	(0xc0	     ,_b111		,MD,MB,MI,MS	,_u8(IM) ) )
905 #define SARBrr(RS,RD)		(((RS)==_CL) ?	_O_Mrm		(0xd2	,_b11,_b111,_r1(RD)				) : \
906 						JITFAIL		("source register must be CL"				) )
907 #define SARBrm(RS,MD,MB,MS,MI)	(((RS)==_CL) ?	_O_r_X		(0xd2	     ,_b111		,MD,MB,MI,MS		) : \
908 						JITFAIL		("source register must be CL"				) )
909 
910 #define SARWir(IM,RD)		(((IM)==1) ?	_wO_Mrm (0xd1	,_b11,_b111,_r2(RD)				) : \
911 						_wO_Mrm_B	(0xc1	,_b11,_b111,_r2(RD)			,_u8(IM) ) )
912 #define SARWim(IM,MD,MB,MS,MI)	(((IM)==1) ?	_wO_r_X (0xd1	     ,_b111		,MD,MB,MI,MS		) : \
913 						_wO_r_X_B	(0xc1	     ,_b111		,MD,MB,MI,MS	,_u8(IM) ) )
914 #define SARWrr(RS,RD)		(((RS)==_CL) ?	_wO_Mrm (0xd3	,_b11,_b111,_r2(RD)				) : \
915 						JITFAIL ("source register must be CL"					) )
916 #define SARWrm(RS,MD,MB,MS,MI)	(((RS)==_CL) ?	_wO_r_X (0xd3	     ,_b111		,MD,MB,MI,MS		) : \
917 						JITFAIL ("source register must be CL"					) )
918 
919 #define SARLir(IM,RD)		(((IM)==1) ?	_O_Mrm		(0xd1	,_b11,_b111,_r4(RD)				) : \
920 						_O_Mrm_B	(0xc1	,_b11,_b111,_r4(RD)			,_u8(IM) ) )
921 #define SARLim(IM,MD,MB,MS,MI)	(((IM)==1) ?	_O_r_X		(0xd1	     ,_b111		,MD,MB,MI,MS		) : \
922 						_O_r_X_B	(0xc1	     ,_b111		,MD,MB,MI,MS	,_u8(IM) ) )
923 #define SARLrr(RS,RD)		(((RS)==_CL) ?	_O_Mrm		(0xd3	,_b11,_b111,_r4(RD)				) : \
924 						JITFAIL		("source register must be CL"				) )
925 #define SARLrm(RS,MD,MB,MS,MI)	(((RS)==_CL) ?	_O_r_X		(0xd3	     ,_b111		,MD,MB,MI,MS		) : \
926 						JITFAIL		("source register must be CL"				) )
927 
928 #define SARQir(IM,RD)		(((IM)==1) ?	_qO_Mrm		(0xd1	,_b11,_b111,_r8(RD)				) : \
929 						_qO_Mrm_B	(0xc1	,_b11,_b111,_r4(RD)			,_u8(IM) ) )
930 #define SARQrr(RS,RD)		(((RS)==_CL) ?	_qO_Mrm		(0xd3	,_b11,_b111,_r8(RD)				) : \
931 						JITFAIL		("source register must be CL"				) )
932 
933 
934 #define SBBBrr(RS, RD)			_O_Mrm		(0x18		,_b11,_r1(RS),_r1(RD)				)
935 #define SBBBmr(MD, MB, MI, MS, RD)	_O_r_X		(0x1a		     ,_r1(RD)		,MD,MB,MI,MS		)
936 #define SBBBrm(RS, MD, MB, MI, MS)	_O_r_X		(0x18		     ,_r1(RS)		,MD,MB,MI,MS		)
937 #define SBBBir(IM, RD)			_O_Mrm_B	(0x80		,_b11,_b011  ,_r1(RD)			,_su8(IM))
938 #define SBBBim(IM, MD, MB, MI, MS)	_O_r_X_B	(0x80		     ,_b011		,MD,MB,MI,MS	,_su8(IM))
939 
940 #define SBBWrr(RS, RD)			_wO_Mrm 	(0x19		,_b11,_r2(RS),_r2(RD)				)
941 #define SBBWmr(MD, MB, MI, MS, RD)	_wO_r_X 	(0x1b		     ,_r2(RD)		,MD,MB,MI,MS		)
942 #define SBBWrm(RS, MD, MB, MI, MS)	_wO_r_X 	(0x19		     ,_r2(RS)		,MD,MB,MI,MS		)
943 #define SBBWir(IM, RD)			_wOs_Mrm_sW	(0x81		,_b11,_b011  ,_r2(RD)			,_su16(IM))
944 #define SBBWim(IM, MD, MB, MI, MS)	_wOs_r_X_sW	(0x81		     ,_b011		,MD,MB,MI,MS	,_su16(IM))
945 
946 #define SBBLrr(RS, RD)			_O_Mrm		(0x19		,_b11,_r4(RS),_r4(RD)				)
947 #define SBBLmr(MD, MB, MI, MS, RD)	_O_r_X		(0x1b		     ,_r4(RD)		,MD,MB,MI,MS		)
948 #define SBBLrm(RS, MD, MB, MI, MS)	_O_r_X		(0x19		     ,_r4(RS)		,MD,MB,MI,MS		)
949 #define SBBLir(IM, RD)			_Os_Mrm_sL	(0x81		,_b11,_b011  ,_r4(RD)			,IM	)
950 #define SBBLim(IM, MD, MB, MI, MS)	_Os_r_X_sL	(0x81		     ,_b011		,MD,MB,MI,MS	,IM	)
951 
952 
953 #define SETCCir(CC,RD)			_OO_Mrm		(0x0f90|(CC)	,_b11,_b000,_r1(RD)				)
954 
955 #define SETOr(RD)			SETCCir(0x0,RD)
956 #define SETNOr(RD)			SETCCir(0x1,RD)
957 #define SETBr(RD)			SETCCir(0x2,RD)
958 #define SETNAEr(RD)			SETCCir(0x2,RD)
959 #define SETNBr(RD)			SETCCir(0x3,RD)
960 #define SETAEr(RD)			SETCCir(0x3,RD)
961 #define SETEr(RD)			SETCCir(0x4,RD)
962 #define SETZr(RD)			SETCCir(0x4,RD)
963 #define SETNEr(RD)			SETCCir(0x5,RD)
964 #define SETNZr(RD)			SETCCir(0x5,RD)
965 #define SETBEr(RD)			SETCCir(0x6,RD)
966 #define SETNAr(RD)			SETCCir(0x6,RD)
967 #define SETNBEr(RD)			SETCCir(0x7,RD)
968 #define SETAr(RD)			SETCCir(0x7,RD)
969 #define SETSr(RD)			SETCCir(0x8,RD)
970 #define SETNSr(RD)			SETCCir(0x9,RD)
971 #define SETPr(RD)			SETCCir(0xa,RD)
972 #define SETPEr(RD)			SETCCir(0xa,RD)
973 #define SETNPr(RD)			SETCCir(0xb,RD)
974 #define SETPOr(RD)			SETCCir(0xb,RD)
975 #define SETLr(RD)			SETCCir(0xc,RD)
976 #define SETNGEr(RD)			SETCCir(0xc,RD)
977 #define SETNLr(RD)			SETCCir(0xd,RD)
978 #define SETGEr(RD)			SETCCir(0xd,RD)
979 #define SETLEr(RD)			SETCCir(0xe,RD)
980 #define SETNGr(RD)			SETCCir(0xe,RD)
981 #define SETNLEr(RD)			SETCCir(0xf,RD)
982 #define SETGr(RD)			SETCCir(0xf,RD)
983 
984 #define SETCCim(CC,MD,MB,MI,MS)		_OO_r_X		(0x0f90|(CC)	     ,_b000		,MD,MB,MI,MS		)
985 
986 #define SETOm(D,B,I,S)			SETCCim(0x0,D,B,I,S)
987 #define SETNOm(D,B,I,S)			SETCCim(0x1,D,B,I,S)
988 #define SETBm(D,B,I,S)			SETCCim(0x2,D,B,I,S)
989 #define SETNAEm(D,B,I,S)		SETCCim(0x2,D,B,I,S)
990 #define SETNBm(D,B,I,S)			SETCCim(0x3,D,B,I,S)
991 #define SETAEm(D,B,I,S)			SETCCim(0x3,D,B,I,S)
992 #define SETEm(D,B,I,S)			SETCCim(0x4,D,B,I,S)
993 #define SETZm(D,B,I,S)			SETCCim(0x4,D,B,I,S)
994 #define SETNEm(D,B,I,S)			SETCCim(0x5,D,B,I,S)
995 #define SETNZm(D,B,I,S)			SETCCim(0x5,D,B,I,S)
996 #define SETBEm(D,B,I,S)			SETCCim(0x6,D,B,I,S)
997 #define SETNAm(D,B,I,S)			SETCCim(0x6,D,B,I,S)
998 #define SETNBEm(D,B,I,S)		SETCCim(0x7,D,B,I,S)
999 #define SETAm(D,B,I,S)			SETCCim(0x7,D,B,I,S)
1000 #define SETSm(D,B,I,S)			SETCCim(0x8,D,B,I,S)
1001 #define SETNSm(D,B,I,S)			SETCCim(0x9,D,B,I,S)
1002 #define SETPm(D,B,I,S)			SETCCim(0xa,D,B,I,S)
1003 #define SETPEm(D,B,I,S)			SETCCim(0xa,D,B,I,S)
1004 #define SETNPm(D,B,I,S)			SETCCim(0xb,D,B,I,S)
1005 #define SETPOm(D,B,I,S)			SETCCim(0xb,D,B,I,S)
1006 #define SETLm(D,B,I,S)			SETCCim(0xc,D,B,I,S)
1007 #define SETNGEm(D,B,I,S)		SETCCim(0xc,D,B,I,S)
1008 #define SETNLm(D,B,I,S)			SETCCim(0xd,D,B,I,S)
1009 #define SETGEm(D,B,I,S)			SETCCim(0xd,D,B,I,S)
1010 #define SETLEm(D,B,I,S)			SETCCim(0xe,D,B,I,S)
1011 #define SETNGm(D,B,I,S)			SETCCim(0xe,D,B,I,S)
1012 #define SETNLEm(D,B,I,S)		SETCCim(0xf,D,B,I,S)
1013 #define SETGm(D,B,I,S)			SETCCim(0xf,D,B,I,S)
1014 
1015 
1016 #define SHLBir(IM,RD)		(((IM)==1) ?	_O_Mrm		(0xd0	,_b11,_b100,_r1(RD)				) : \
1017 						_O_Mrm_B	(0xc0	,_b11,_b100,_r1(RD)			,_u8(IM) ) )
1018 #define SHLBim(IM,MD,MB,MS,MI)	(((IM)==1) ?	_O_r_X		(0xd0	     ,_b100		,MD,MB,MI,MS		) : \
1019 						_O_r_X_B	(0xc0	     ,_b100		,MD,MB,MI,MS	,_u8(IM) ) )
1020 #define SHLBrr(RS,RD)		(((RS)==_CL) ?	_O_Mrm		(0xd2	,_b11,_b100,_r1(RD)				) : \
1021 						JITFAIL		("source register must be CL"				) )
1022 #define SHLBrm(RS,MD,MB,MS,MI)	(((RS)==_CL) ?	_O_r_X		(0xd2	     ,_b100		,MD,MB,MI,MS		) : \
1023 						JITFAIL		("source register must be CL"				) )
1024 
1025 #define SHLWir(IM,RD)		(((IM)==1) ?	_wO_Mrm		(0xd1	,_b11,_b100,_r2(RD)				) : \
1026 						_wO_Mrm_B	(0xc1	,_b11,_b100,_r2(RD)			,_u8(IM) ) )
1027 #define SHLWim(IM,MD,MB,MS,MI)	(((IM)==1) ?	_wO_r_X		(0xd1	     ,_b100		,MD,MB,MI,MS		) : \
1028 						_wO_r_X_B	(0xc1	     ,_b100		,MD,MB,MI,MS	,_u8(IM) ) )
1029 #define SHLWrr(RS,RD)		(((RS)==_CL) ?	_wO_Mrm		(0xd3	,_b11,_b100,_r2(RD)				) : \
1030 						JITFAIL		("source register must be CL"				) )
1031 #define SHLWrm(RS,MD,MB,MS,MI)	(((RS)==_CL) ?	_wO_r_X		(0xd3	     ,_b100		,MD,MB,MI,MS		) : \
1032 						JITFAIL		("source register must be CL"					) )
1033 
1034 #define SHLLir(IM,RD)		(((IM)==1) ?	_O_Mrm		(0xd1	,_b11,_b100,_r4(RD)				) : \
1035 						_O_Mrm_B	(0xc1	,_b11,_b100,_r4(RD)			,_u8(IM) ) )
1036 #define SHLLim(IM,MD,MB,MS,MI)	(((IM)==1) ?	_O_r_X		(0xd1	     ,_b100		,MD,MB,MI,MS		) : \
1037 						_O_r_X_B	(0xc1	     ,_b100		,MD,MB,MI,MS	,_u8(IM) ) )
1038 #define SHLLrr(RS,RD)		(((RS)==_CL) ?	_O_Mrm		(0xd3	,_b11,_b100,_r4(RD)				) : \
1039 						JITFAIL		("source register must be CL"				) )
1040 #define SHLLrm(RS,MD,MB,MS,MI)	(((RS)==_CL) ?	_O_r_X		(0xd3	     ,_b100		,MD,MB,MI,MS		) : \
1041 						JITFAIL		("source register must be CL"				) )
1042 
1043 #define SHLQir(IM,RD)		(((IM)==1) ?	_qO_Mrm		(0xd1	,_b11,_b100,_r8(RD)				) : \
1044 						_qO_Mrm_B	(0xc1	,_b11,_b100,_r8(RD)			,_u8(IM) ) )
1045 #define SHLQrr(RS,RD)		(((RS)==_CL) ?	_qO_Mrm		(0xd3	,_b11,_b100,_r8(RD)				) : \
1046 						JITFAIL		("source register must be CL"				) )
1047 
1048 
1049 #define SHRBir(IM,RD)		(((IM)==1) ?	_O_Mrm		(0xd0	,_b11,_b101,_r1(RD)				) : \
1050 						_O_Mrm_B	(0xc0	,_b11,_b101,_r1(RD)			,_u8(IM) ) )
1051 #define SHRBim(IM,MD,MB,MS,MI)	(((IM)==1) ?	_O_r_X		(0xd0	     ,_b101		,MD,MB,MI,MS		) : \
1052 						_O_r_X_B	(0xc0	     ,_b101		,MD,MB,MI,MS	,_u8(IM) ) )
1053 #define SHRBrr(RS,RD)		(((RS)==_CL) ?	_O_Mrm		(0xd2	,_b11,_b101,_r1(RD)				) : \
1054 						JITFAIL		("source register must be CL"				) )
1055 #define SHRBrm(RS,MD,MB,MS,MI)	(((RS)==_CL) ?	_O_r_X		(0xd2	     ,_b101		,MD,MB,MI,MS		) : \
1056 						JITFAIL		("source register must be CL"				) )
1057 
1058 #define SHRWir(IM,RD)		(((IM)==1) ?	_wO_Mrm		(0xd1	,_b11,_b101,_r2(RD)				) : \
1059 						_wO_Mrm_B	(0xc1	,_b11,_b101,_r2(RD)			,_u8(IM) ) )
1060 #define SHRWim(IM,MD,MB,MS,MI)	(((IM)==1) ?	_wO_r_X		(0xd1	     ,_b101		,MD,MB,MI,MS		) : \
1061 						_wO_r_X_B	(0xc1	     ,_b101		,MD,MB,MI,MS	,_u8(IM) ) )
1062 #define SHRWrr(RS,RD)		(((RS)==_CL) ?	_wO_Mrm		(0xd3	,_b11,_b101,_r2(RD)				) : \
1063 						JITFAIL		("source register must be CL"				) )
1064 #define SHRWrm(RS,MD,MB,MS,MI)	(((RS)==_CL) ?	_wO_r_X		(0xd3	     ,_b101		,MD,MB,MI,MS		) : \
1065 						JITFAIL		("source register must be CL"				) )
1066 
1067 #define SHRLir(IM,RD)		(((IM)==1) ?	_O_Mrm		(0xd1	,_b11,_b101,_r4(RD)				) : \
1068 						_O_Mrm_B	(0xc1	,_b11,_b101,_r4(RD)			,_u8(IM) ) )
1069 #define SHRLim(IM,MD,MB,MS,MI)	(((IM)==1) ?	_O_r_X		(0xd1	     ,_b101		,MD,MB,MI,MS		) : \
1070 						_O_r_X_B	(0xc1	     ,_b101		,MD,MB,MI,MS	,_u8(IM) ) )
1071 #define SHRLrr(RS,RD)		(((RS)==_CL) ?	_O_Mrm		(0xd3	,_b11,_b101,_r4(RD)				) : \
1072 						JITFAIL		("source register must be CL"				) )
1073 #define SHRLrm(RS,MD,MB,MS,MI)	(((RS)==_CL) ?	_O_r_X		(0xd3	     ,_b101		,MD,MB,MI,MS		) : \
1074 						JITFAIL		("source register must be CL"				) )
1075 
1076 #define SHRQir(IM,RD)		(((IM)==1) ?	_qO_Mrm		(0xd1	,_b11,_b101,_r8(RD)				) : \
1077 						_qO_Mrm_B	(0xc1	,_b11,_b101,_r8(RD)			,_u8(IM) ) )
1078 #define SHRQrr(RS,RD)		(((RS)==_CL) ?	_qO_Mrm		(0xd3	,_b11,_b101,_r8(RD)				) : \
1079 						JITFAIL		("source register must be CL"				) )
1080 
1081 
1082 #define STC_()				_O		(0xf9								)
1083 
1084 
1085 #define SUBBrr(RS, RD)			_O_Mrm		(0x28		,_b11,_r1(RS),_r1(RD)				)
1086 #define SUBBmr(MD, MB, MI, MS, RD)	_O_r_X		(0x2a		     ,_r1(RD)		,MD,MB,MI,MS		)
1087 #define SUBBrm(RS, MD, MB, MI, MS)	_O_r_X		(0x28		     ,_r1(RS)		,MD,MB,MI,MS		)
1088 #define SUBBir(IM, RD)			_O_Mrm_B	(0x80		,_b11,_b101  ,_r1(RD)			,_su8(IM))
1089 #define SUBBim(IM, MD, MB, MI, MS)	_O_r_X_B	(0x80		     ,_b101		,MD,MB,MI,MS	,_su8(IM))
1090 
1091 #define SUBWrr(RS, RD)			_wO_Mrm		(0x29		,_b11,_r2(RS),_r2(RD)				)
1092 #define SUBWmr(MD, MB, MI, MS, RD)	_wO_r_X		(0x2b		     ,_r2(RD)		,MD,MB,MI,MS		)
1093 #define SUBWrm(RS, MD, MB, MI, MS)	_wO_r_X		(0x29		     ,_r2(RS)		,MD,MB,MI,MS		)
1094 #define SUBWir(IM, RD)			_wOs_Mrm_sW	(0x81		,_b11,_b101  ,_r2(RD)			,_su16(IM))
1095 #define SUBWim(IM, MD, MB, MI, MS)	_wOs_r_X_sW	(0x81		     ,_b101		,MD,MB,MI,MS	,_su16(IM))
1096 
1097 #define SUBLrr(RS, RD)			_O_Mrm		(0x29		,_b11,_r4(RS),_r4(RD)				)
1098 #define SUBLmr(MD, MB, MI, MS, RD)	_O_r_X		(0x2b		     ,_r4(RD)		,MD,MB,MI,MS		)
1099 #define SUBLrm(RS, MD, MB, MI, MS)	_O_r_X		(0x29		     ,_r4(RS)		,MD,MB,MI,MS		)
1100 #define SUBLir(IM, RD)			_Os_Mrm_sL	(0x81		,_b11,_b101  ,_r4(RD)			,IM	)
1101 #define SUBLim(IM, MD, MB, MI, MS)	_Os_r_X_sL	(0x81		     ,_b101		,MD,MB,MI,MS	,IM	)
1102 
1103 #define SUBQrr(RS, RD)			_qO_Mrm		(0x29		,_b11,_r8(RS),_r8(RD)				)
1104 #define SUBQir(IM, RD)			_qOs_Mrm_sL	(0x81		,_b11,_b101  ,_r8(RD)			,IM	)
1105 
1106 #define TESTBrr(RS, RD)			_O_Mrm		(0x84		,_b11,_r1(RS),_r1(RD)				)
1107 #define TESTBrm(RS, MD, MB, MI, MS)	_O_r_X		(0x84		     ,_r1(RS)		,MD,MB,MI,MS		)
1108 #define TESTBir(IM, RD)			_O_Mrm_B	(0xf6		,_b11,_b000  ,_r1(RD)			,_u8(IM))
1109 #define TESTBim(IM, MD, MB, MI, MS)	_O_r_X_B	(0xf6		     ,_b000		,MD,MB,MI,MS	,_u8(IM))
1110 
1111 #define TESTWrr(RS, RD)			_wO_Mrm		(0x85		,_b11,_r2(RS),_r2(RD)				)
1112 #define TESTWrm(RS, MD, MB, MI, MS)	_wO_r_X		(0x85		     ,_r2(RS)		,MD,MB,MI,MS		)
1113 #define TESTWir(IM, RD)			_wO_Mrm_W	(0xf7		,_b11,_b000  ,_r2(RD)			,_u16(IM))
1114 #define TESTWim(IM, MD, MB, MI, MS)	_wO_r_X_W	(0xf7		     ,_b000		,MD,MB,MI,MS	,_u16(IM))
1115 
1116 #define TESTLrr(RS, RD)			_O_Mrm		(0x85		,_b11,_r4(RS),_r4(RD)				)
1117 #define TESTLrm(RS, MD, MB, MI, MS)	_O_r_X		(0x85		     ,_r4(RS)		,MD,MB,MI,MS		)
1118 #define TESTLir(IM, RD)			_O_Mrm_L	(0xf7		,_b11,_b000  ,_r4(RD)			,IM	)
1119 #define TESTLim(IM, MD, MB, MI, MS)	_O_r_X_L	(0xf7		     ,_b000		,MD,MB,MI,MS	,IM	)
1120 
1121 #define TESTQrr(RS, RD)			_qO_Mrm		(0x85		,_b11,_r8(RS),_r8(RD)				)
1122 #define TESTQir(IM, RD)			_qO_Mrm_L	(0xf7		,_b11,_b000  ,_r8(RD)			,IM	)
1123 
1124 
1125 #define XADDBrr(RS,RD)			_OO_Mrm		(0x0fc0		,_b11,_r1(RS),_r1(RD)				)
1126 #define XADDBrm(RS,MD,MB,MI,MS)		_OO_r_X		(0x0fc0		     ,_r1(RS)		,MD,MB,MI,MS		)
1127 
1128 #define XADDWrr(RS,RD)			_wOO_Mrm	(0x0fc1		,_b11,_r2(RS),_r2(RD)				)
1129 #define XADDWrm(RS,MD,MB,MI,MS)		_wOO_r_X	(0x0fc1		     ,_r2(RS)		,MD,MB,MI,MS		)
1130 
1131 #define XADDLrr(RS,RD)			_OO_Mrm		(0x0fc1		,_b11,_r4(RS),_r4(RD)				)
1132 #define XADDLrm(RS,MD,MB,MI,MS)		_OO_r_X		(0x0fc1		     ,_r4(RS)		,MD,MB,MI,MS		)
1133 
1134 
1135 #define XCHGBrr(RS,RD)			_O_Mrm		(0x86		,_b11,_r1(RS),_r1(RD)				)
1136 #define XCHGBrm(RS,MD,MB,MI,MS)		_O_r_X		(0x86		     ,_r1(RS)		,MD,MB,MI,MS		)
1137 
1138 #define XCHGWrr(RS,RD)			_wO_Mrm		(0x87		,_b11,_r2(RS),_r2(RD)				)
1139 #define XCHGWrm(RS,MD,MB,MI,MS)		_wO_r_X		(0x87		     ,_r2(RS)		,MD,MB,MI,MS		)
1140 
1141 #define XCHGLrr(RS,RD)			_O_Mrm		(0x87		,_b11,_r4(RS),_r4(RD)				)
1142 #define XCHGLrm(RS,MD,MB,MI,MS)		_O_r_X		(0x87		     ,_r4(RS)		,MD,MB,MI,MS		)
1143 
1144 
1145 #define XORBrr(RS, RD)			_O_Mrm		(0x30		,_b11,_r1(RS),_r1(RD)				)
1146 #define XORBmr(MD, MB, MI, MS, RD)	_O_r_X		(0x32		     ,_r1(RD)		,MD,MB,MI,MS		)
1147 #define XORBrm(RS, MD, MB, MI, MS)	_O_r_X		(0x30		     ,_r1(RS)		,MD,MB,MI,MS		)
1148 #define XORBir(IM, RD)			_O_Mrm_B	(0x80		,_b11,_b110  ,_r1(RD)			,_su8(IM))
1149 #define XORBim(IM, MD, MB, MI, MS)	_O_r_X_B	(0x80		     ,_b110		,MD,MB,MI,MS	,_su8(IM))
1150 
1151 #define XORWrr(RS, RD)			_wO_Mrm		(0x31		,_b11,_r2(RS),_r2(RD)				)
1152 #define XORWmr(MD, MB, MI, MS, RD)	_wO_r_X		(0x33		     ,_r2(RD)		,MD,MB,MI,MS		)
1153 #define XORWrm(RS, MD, MB, MI, MS)	_wO_r_X		(0x31		     ,_r2(RS)		,MD,MB,MI,MS		)
1154 #define XORWir(IM, RD)			_wOs_Mrm_sW	(0x81		,_b11,_b110  ,_r2(RD)			,_su16(IM))
1155 #define XORWim(IM, MD, MB, MI, MS)	_wOs_r_X_sW	(0x81		     ,_b110		,MD,MB,MI,MS	,_su16(IM))
1156 
1157 #define XORLrr(RS, RD)			_O_Mrm		(0x31		,_b11,_r4(RS),_r4(RD)				)
1158 #define XORLmr(MD, MB, MI, MS, RD)	_O_r_X		(0x33		     ,_r4(RD)		,MD,MB,MI,MS		)
1159 #define XORLrm(RS, MD, MB, MI, MS)	_O_r_X		(0x31		     ,_r4(RS)		,MD,MB,MI,MS		)
1160 #define XORLir(IM, RD)			_Os_Mrm_sL	(0x81		,_b11,_b110  ,_r4(RD)			,IM	)
1161 #define XORLim(IM, MD, MB, MI, MS)	_Os_r_X_sL	(0x81		     ,_b110		,MD,MB,MI,MS	,IM	)
1162 
1163 #define XORQrr(RS, RD)			_qO_Mrm		(0x31		,_b11,_r8(RS),_r8(RD)				)
1164 #define XORQir(IM, RD)			_qOs_Mrm_sL	(0x81		,_b11,_b110  ,_r8(RD)			,IM	)
1165 
1166 /* x87 instructions -- yay, we found a use for octal constants :-) */
1167 #ifdef JIT_X86_64
1168 #define ESCmi(D,B,I,S,OP)	_qOd_r_X(0xd8|(OP >> 3), (OP & 7), D,B,I,S)
1169 #else
1170 #define ESCmi(D,B,I,S,OP)	_O_r_X(0xd8|(OP >> 3), (OP & 7), D,B,I,S)
1171 #endif
1172 #define ESCri(RD,OP)		_O_Mrm(0xd8|(OP >> 3), _b11, (OP & 7), RD)
1173 
1174 #define ESCrri(RS,RD,OP)	((RS) == _ST0 ? ESCri(RD,(OP|040))			\
1175 				 : (RD) == _ST0 ? ESCri(RS,OP)				\
1176 				 : JITFAIL ("coprocessor instruction without st0"))
1177 
1178 #define FLDSm(D,B,I,S)		ESCmi(D,B,I,S,010)     /* fld m32real  */
1179 #define FILDLm(D,B,I,S)		ESCmi(D,B,I,S,030)     /* fild m32int  */
1180 #define FLDLm(D,B,I,S)		ESCmi(D,B,I,S,050)     /* fld m64real  */
1181 #define FILDWm(D,B,I,S)		ESCmi(D,B,I,S,070)     /* fild m16int  */
1182 #define FSTSm(D,B,I,S)		ESCmi(D,B,I,S,012)     /* fst m32real  */
1183 #define FISTLm(D,B,I,S)		ESCmi(D,B,I,S,032)     /* fist m32int  */
1184 #define FSTLm(D,B,I,S)		ESCmi(D,B,I,S,052)     /* fst m64real  */
1185 #define FISTWm(D,B,I,S)		ESCmi(D,B,I,S,072)     /* fist m16int  */
1186 #define FSTPSm(D,B,I,S)		ESCmi(D,B,I,S,013)     /* fstp m32real */
1187 #define FISTPLm(D,B,I,S)	ESCmi(D,B,I,S,033)     /* fistp m32int */
1188 #define FISTTPLm(D,B,I,S)	ESCmi(D,B,I,S,031)     /* fisttp m32int */
1189 #define FSTPLm(D,B,I,S)		ESCmi(D,B,I,S,053)     /* fstp m64real */
1190 #define FISTPWm(D,B,I,S)	ESCmi(D,B,I,S,073)     /* fistp m16int */
1191 #define FLDTm(D,B,I,S)		ESCmi(D,B,I,S,035)     /* fld m80real  */
1192 #define FILDQm(D,B,I,S)		ESCmi(D,B,I,S,075)     /* fild m64int  */
1193 #define FSTPTm(D,B,I,S)		ESCmi(D,B,I,S,037)     /* fstp m80real */
1194 #ifdef JIT_X86_64
1195 # define FISTPQm(D,B,I,S)	ESCmi(D,B,I,S,077)     /* fistp m64int */
1196 # define FISTTPQm(D,B,I,S)	ESCmi(D,B,I,S,051)     /* fisttp m64int */
1197 #else
1198 # define FISTPQm(D,B,I,S)	FISTPLm(D,B,I,S)
1199 # define FISTTPQm(D,B,I,S)	FISTTPLm(D,B,I,S)
1200 #endif
1201 
1202 #define FADDrr(RS,RD)		ESCrri(RS,RD,000)
1203 #define FMULrr(RS,RD)		ESCrri(RS,RD,001)
1204 #define FSUBrr(RS,RD)		ESCrri(RS,RD,004)
1205 #define FSUBRrr(RS,RD)		ESCrri(RS,RD,005)
1206 #define FDIVrr(RS,RD)		ESCrri(RS,RD,006)
1207 #define FDIVRrr(RS,RD)		ESCrri(RS,RD,007)
1208 
1209 #define FLDr(RD)		ESCri(RD,010)
1210 #define FXCHr(RD)		ESCri(RD,011)
1211 #define FFREEr(RD)		ESCri(RD,050)
1212 #define FSTr(RD)		ESCri(RD,052)
1213 #define FSTPr(RD)		ESCri(RD,053)
1214 #define FCOMr(RD)		ESCri(RD,002)
1215 #define FCOMPr(RD)		ESCri(RD,003)
1216 #define FCOMPPr(RD)		ESCri(RD,073)
1217 #define FCOMIr(RD)		ESCri(RD,036)
1218 #define FCOMIPr(RD)		ESCri(RD,076)
1219 #define FUCOMr(RD)		ESCri(RD,054)
1220 #define FUCOMPr(RD)		ESCri(RD,055)
1221 #define FUCOMPPr(RD)		ESCri(RD,025)
1222 #define FUCOMIr(RD)		ESCri(RD,035)
1223 #define FUCOMIPr(RD)		ESCri(RD,075)
1224 #define FADDPr(RD)		ESCri(RD,060)
1225 #define FMULPr(RD)		ESCri(RD,061)
1226 #define FSUBPr(RD)		ESCri(RD,064)
1227 #define FSUBRPr(RD)		ESCri(RD,065)
1228 #define FDIVPr(RD)		ESCri(RD,066)
1229 #define FDIVRPr(RD)		ESCri(RD,067)
1230 
1231 #define FNSTSWr(RD)		((RD == _AX || RD == _EAX) ? _OO (0xdfe0)		\
1232 				 : JITFAIL ("AX or EAX expected"))
1233 
1234 #define FLDCWm(D, B, I, S) _O_r_X(0xd9, 5, D,B,I,S)
1235 #define FNSTCWm(D, B, I, S) _O_r_X(0xd9, 7, D,B,I,S)
1236 
1237 /* N byte NOPs */
1238 #define NOPi(N)		(((  (N)    >= 8) ? (_jit_B(0x8d),_jit_B(0xb4),_jit_B(0x26),_jit_I(0x00),_jit_B(0x90)) : (void) 0), \
1239 			 (( ((N)&7) == 7) ? (_jit_B(0x8d),_jit_B(0xb4),_jit_B(0x26),_jit_I(0x00)) : \
1240 			  ( ((N)&7) == 6) ? (_jit_B(0x8d),_jit_B(0xb6),_jit_I(0x00)) : \
1241 			  ( ((N)&7) == 5) ? (_jit_B(0x90),_jit_B(0x8d),_jit_B(0x74),_jit_B(0x26),_jit_B(0x00)) : \
1242 /* leal 0(,%esi), %esi */ ( ((N)&7) == 4) ? (_jit_B(0x8d),_jit_B(0x74),_jit_B(0x26),_jit_B(0x00)) : \
1243 /* leal (,%esi), %esi */  ( ((N)&7) == 3) ? (_jit_B(0x8d),_jit_B(0x76),_jit_B(0x00)) : \
1244 /* movl %esi, %esi */	  ( ((N)&7) == 2) ? (_jit_B(0x89),_jit_B(0xf6)) : \
1245 			  ( ((N)&7) == 1) ? (_jit_B(0x90)) : \
1246 			  ( ((N)&7) == 0) ? 0 : \
1247 			  JITFAIL(".align argument too large")))
1248 
1249 /* --- Media 128-bit instructions ------------------------------------------ */
1250 
1251 typedef enum {
1252     X86_SSE_MOV		= 0x10,
1253     X86_SSE_MOVLP	= 0x12,
1254     X86_SSE_MOVHP	= 0x16,
1255     X86_SSE_MOVA	= 0x28,
1256     X86_SSE_CVTIS	= 0x2a,
1257     X86_SSE_CVTTSI	= 0x2c,
1258     X86_SSE_CVTSI	= 0x2d,
1259     X86_SSE_UCOMI	= 0x2e,
1260     X86_SSE_COMI	= 0x2f,
1261     X86_SSE_ROUND	= 0x3a,
1262     X86_SSE_SQRT	= 0x51,
1263     X86_SSE_RSQRT	= 0x52,
1264     X86_SSE_RCP		= 0x53,
1265     X86_SSE_AND		= 0x54,
1266     X86_SSE_ANDN	= 0x55,
1267     X86_SSE_OR		= 0x56,
1268     X86_SSE_XOR		= 0x57,
1269     X86_SSE_ADD		= 0x58,
1270     X86_SSE_MUL		= 0x59,
1271     X86_SSE_CVTSD	= 0x5a,
1272     X86_SSE_CVTDT	= 0x5b,
1273     X86_SSE_SUB		= 0x5c,
1274     X86_SSE_MIN		= 0x5d,
1275     X86_SSE_DIV		= 0x5e,
1276     X86_SSE_MAX		= 0x5f,
1277     X86_SSE_X2G		= 0x6e,
1278     X86_SSE_EQB		= 0x74,
1279     X86_SSE_EQW		= 0x75,
1280     X86_SSE_EQD		= 0x76,
1281     X86_SSE_G2X		= 0x7e,
1282     X86_SSE_MOV2	= 0xd6
1283 } x86_sse_t;
1284 
1285 
1286 #define _BIT(X)			(!!(X))
1287 #define _rR(R)			((R) & 0x0f)
1288 #define _rX(R)                  _rN(R)
1289 #define _rXP(R)			((R) > 0 && _rR(R) > 7)
1290 #define _SCL1 _b00
1291 
1292 #define _rA(R)		_r4(R)
1293 
1294 #define _RSP 0x54
1295 
1296 #define _i_X(op, md, rb, ri, ms) _r_X(op, md, rb, ri, ms)
1297 
1298 #define _f_X(rd, md, rb, ri, ms) _i_X((int)_rX(rd), md, rb, ri, ms)
1299 
1300 #ifdef JIT_X86_64
1301 # define x86_REXwrxb(l, w, r, x, b)  \
1302   (((l) || (((int)(w) << 3) | (((int)(r)) << 2) | (((int)(x)) << 1) | ((int)(b)))) \
1303    ? _jit_B(0x40 | (((int)(w) << 3) | (((int)(r)) << 2) | (((int)(x)) << 1) | ((int)(b)))) \
1304    : (void)0)
1305 #else
1306 # define x86_REXwrxb(l, w, r, x, b) (void)0
1307 #endif
1308 
1309 #define x86_REXwrx_(l, w, r, x, mr) x86_REXwrxb(l, w, r, x, _BIT(_rXP(mr)))
1310 #define x86_REXw_x_(l, w, r, x, mr) x86_REXwrx_(l, w, _BIT(_rXP(r)), x, mr)
1311 #define x86_rex_l_rr(rr, mr) x86_REXw_x_(0, 0, rr, 0, mr)
1312 #define x86_rex_l_mr(rb, ri, rd) x86_REXw_x_(0, 0, rd, _BIT(_rXP(ri)), rb)
1313 #define x86_rex_l_rm(rs, rb, ri) x86_rex_l_mr(rb, ri, rs)
1314 
1315 
1316 #define _rex_ff_rr(rr, mr) x86_rex_l_rr(rr, mr)
1317 #define _rex_if_rr(rr, mr) x86_rex_l_rr(rr, mr)
1318 #define _rex_fi_rr(rr, mr) x86_rex_l_rr(rr, mr)
1319 #define _rex_if_mr(rb, ri, rd) x86_rex_l_mr(rb, ri, rd)
1320 #define _rex_fi_rm(rs, rb, ri) x86_rex_l_rm(rs, rb, ri)
1321 
1322 #define __sse_ff_rr(op, rs, rd)	(_rex_ff_rr(rd, rs), _O(0x0f), _O(op), _Mrm(_b11, _rX(rd), _rX(rs)))
1323 
1324 #define __sse_id_rr(op, rs, rd) __sse_if_rr(op, rs, rd)
1325 #define __sse_if_rr(op, rs, rd)	(_rex_if_rr(rd, rs), _O(0x0f), _O(op), _Mrm(_b11, _rA(rd), _rX(rs)))
1326 
1327 #define __sse_di_rr(op, rs, rd) __sse_fi_rr(op, rs, rd)
1328 #define __sse_fi_rr(op, rs, rd) (_rex_fi_rr(rd, rs), _O(0x0f), _O(op), _Mrm(_b11, _rX(rd), _rA(rs)))
1329 
1330 #define __sse_id_mr(op, md, rb, mi, ms, rd) __sse_if_mr(op, md, rb, mi, ms, rd)
1331 #define __sse_if_mr(op, md, rb, ri, ms, rd) (_rex_if_mr(rb, ri, rd), _O(0x0f), _O(op), _f_X(rd, md, rb, ri, ms))
1332 
1333 #define __sse_di_rm(op, rs, md, rb, mi, ms) __sse_fi_rm(op, rs, md, rb, mi, ms)
1334 #define __sse_fi_rm(op, rs, md, rb, ri, ms) (_rex_fi_rm(rs, rb, ri), _O(0x0f), _O(op), _f_X(rs, md, rb, ri, ms))
1335 
1336 #define __sse1_di_rm(op, rs, md, mb, mi, ms) __sse1_fi_rm(op, rs, md, mb, mi, ms)
1337 #define __sse1_fi_rm(op, rs, md, rb, ri, ms) (_rex_fi_rm(rs, rb, ri), _O(0x0f), _O(0x01 | op), _f_X(rs, md, rb, ri, ms))
1338 
1339 #define _sse_ff_rr(px, op, rs, rd) (_jit_B(px), __sse_ff_rr(op, rs, rd))
1340 
1341 #define _sse_id_rr(px, op, rs, rd) _sse_if_rr(px, op, rs, rd)
1342 #define _sse_if_rr(px, op, rs, rd) (_jit_B(px), __sse_if_rr(op, rs, rd))
1343 
1344 #define _sse_di_rr(px, op, rs, rd) _sse_fi_rr(px, op, rs, rd)
1345 #define _sse_fi_rr(px, op, rs, rd) (_jit_B(px), __sse_fi_rr(op, rs, rd))
1346 
1347 #define _sse_id_mr(px, op, md, rb, mi, ms, rd) _sse_if_mr(px, op, md, rb, mi, ms, rd)
1348 #define _sse_if_mr(px, op, md, rb, ri, ms, rd) (_jit_B(px), __sse_if_mr(op, md, rb, ri, ms, rd))
1349 
1350 #define _sse_di_rm(px, op, rs, md, rb, mi, ms) _sse_fi_rm(px, op, rs, md, rb, mi, ms)
1351 #define _sse_fi_rm(px, op, rs, md, rb, ri, ms) (_jit_B(px), __sse_fi_rm(op, rs, md, rb, ri, ms))
1352 
1353 #define _sse1_di_rm(px, op, rs, md, mb, mi, ms) _sse1_fi_rm(px, op, rs, md, mb, mi, ms)
1354 #define _sse1_fi_rm(px, op, rs, md, rb, ri, ms) (_jit_B(px), __sse1_fi_rm(op, rs, md, rb, ri, ms))
1355 
1356 #define _SSEPSrr(OP,RS,RD)		__sse_ff_rr (      OP, RS, RD)
1357 #define _SSEPSmr(OP,MD,MB,MI,MS,RD)	__sse_if_mr (      OP, MD, MB, MI, MS, RD)
1358 #define _SSEPSrm(OP,RS,MD,MB,MI,MS)	__sse_fi_rm (      OP, RS, MD, MB, MI, MS)
1359 #define _SSEPS1rm(OP,RS,MD,MB,MI,MS)	__sse1_fi_rm(      OP, RS, MD, MB, MI, MS)
1360 
1361 #define _SSEPDrr(OP,RS,RD)		 _sse_ff_rr (0x66, OP, RS, RD)
1362 #define _SSEPDmr(OP,MD,MB,MI,MS,RD)	 _sse_if_mr (0x66, OP, MD, MB, MI, MS, RD)
1363 #define _SSEPDrm(OP,RS,MD,MB,MI,MS)	 _sse_fi_rm (0x66, OP, RS, MD, MB, MI, MS)
1364 #define _SSEPD1rm(OP,RS,MD,MB,MI,MS)	 _sse1_fi_rm(0x66, OP, RS, MD, MB, MI, MS)
1365 
1366 #define _SSESSrr(OP,RS,RD)		 _sse_ff_rr (0xf3, OP, RS, RD)
1367 #define _SSESSmr(OP,MD,MB,MI,MS,RD)	 _sse_if_mr (0xf3, OP, MD, MB, MI, MS, RD)
1368 #define _SSESSrm(OP,RS,MD,MB,MI,MS)	 _sse_fi_rm (0xf3, OP, RS, MD, MB, MI, MS)
1369 #define _SSESS1rm(OP,RS,MD,MB,MI,MS)	 _sse1_fi_rm(0xf3, OP, RS, MD, MB, MI, MS)
1370 
1371 #define _SSESDrr(OP,RS,RD)		 _sse_ff_rr (0xf2, OP, RS, RD)
1372 #define _SSESDmr(OP,MD,MB,MI,MS,RD)	 _sse_if_mr (0xf2, OP, MD, MB, MI, MS, RD)
1373 #define _SSESDrm(OP,RS,MD,MB,MI,MS)	 _sse_fi_rm (0xf2, OP, RS, MD, MB, MI, MS)
1374 #define _SSESD1rm(OP,RS,MD,MB,MI,MS)	 _sse1_fi_rm(0xf2, OP, RS, MD, MB, MI, MS)
1375 
1376 #define _NOREG 0
1377 
1378 /* SSE */
1379 #define LDMXCSRmr(MD, MB, MI, MS)					\
1380     (_REXLmr(MB, MI, _NOREG),						\
1381      _O(0x0f),								\
1382      _O(0xae),								\
1383      _i_X(_b10, MD, MB, MI, MS))
1384 #define STMXCSRrm(MD, MB, MI, MS)					\
1385     (_REXLrm(_NOREG, MI, MB),						\
1386      _O(0x0f),								\
1387      _O(0xae),								\
1388      _i_X(_b11, MD, MB, MI, MS))
1389 
1390 /* SSE2 */
1391 #define ADDPSrr(RS, RD)			_SSEPSrr(X86_SSE_ADD, RS, RD)
1392 #define ADDPSmr(MD, MB, MI, MS, RD)	_SSEPSmr(X86_SSE_ADD, MD, MB, MI, MS, RD)
1393 #define ADDPDrr(RS, RD)			_SSEPDrr(X86_SSE_ADD, RS, RD)
1394 #define ADDPDmr(MD, MB, MI, MS, RD)	_SSEPDmr(X86_SSE_ADD, MD, MB, MI, MS, RD)
1395 
1396 /* SSE */
1397 #define ADDSSrr(RS, RD)			_SSESSrr(X86_SSE_ADD, RS, RD)
1398 #define ADDSSmr(MD, MB, MI, MS, RD)	_SSESSmr(X86_SSE_ADD, MD, MB, MI, MS, RD)
1399 
1400 /* SSE2 */
1401 #define ADDSDrr(RS, RD)			_SSESDrr(X86_SSE_ADD, RS, RD)
1402 #define ADDSDmr(MD, MB, MI, MS, RD)	_SSESDmr(X86_SSE_ADD, MD, MB, MI, MS, RD)
1403 
1404 /* SSE */
1405 #define ANDNPSrr(RS, RD)		_SSEPSrr(X86_SSE_ANDN, RS, RD)
1406 #define ANDNPSmr(MD, MB, MI, MS, RD)	_SSEPSmr(X86_SSE_ANDN, MD, MB, MI, MS, RD)
1407 
1408 /* SSE2 */
1409 #define ANDNPDrr(RS, RD)		_SSEPDrr(X86_SSE_ANDN, RS, RD)
1410 #define ANDNPDmr(MD, MB, MI, MS, RD)	_SSEPDmr(X86_SSE_ANDN, MD, MB, MI, MS, RD)
1411 
1412 /* SSE */
1413 #define ANDNSSrr			ANDNPSrr
1414 #define ANDNSSmr			ANDNPSrr
1415 
1416 /* SSE2 */
1417 #define ANDNSDrr			ANDNPDrr
1418 #define ANDNSDmr			ANDNPDrr
1419 
1420 /* SSE */
1421 #define ANDPSrr(RS, RD)			_SSEPSrr(X86_SSE_AND, RS, RD)
1422 #define ANDPSmr(MD, MB, MI, MS, RD)	_SSEPSmr(X86_SSE_AND, MD, MB, MI, MS, RD)
1423 
1424 /* SSE2 */
1425 #define ANDPDrr(RS, RD)			_SSEPDrr(X86_SSE_AND, RS, RD)
1426 #define ANDPDmr(MD, MB, MI, MS, RD)	_SSEPDmr(X86_SSE_AND, MD, MB, MI, MS, RD)
1427 
1428 /* SSE */
1429 #define ANDSSrr				ANDPSrr
1430 #define ANDSSmr				ANDPSrr
1431 
1432 /* SSE2 */
1433 #define ANDSDrr				ANDPDrr
1434 #define ANDSDmr				ANDPDrr
1435 
1436 /* SSE */
1437 #define DIVPSrr(RS, RD)			_SSEPSrr(X86_SSE_DIV, RS, RD)
1438 #define DIVPSmr(MD, MB, MI, MS, RD)	_SSEPSmr(X86_SSE_DIV, MD, MB, MI, MS, RD)
1439 
1440 /* SSE2 */
1441 #define DIVPDrr(RS, RD)			_SSEPDrr(X86_SSE_DIV, RS, RD)
1442 #define DIVPDmr(MD, MB, MI, MS, RD)	_SSEPDmr(X86_SSE_DIV, MD, MB, MI, MS, RD)
1443 
1444 /* SSE */
1445 #define DIVSSrr(RS, RD)			_SSESSrr(X86_SSE_DIV, RS, RD)
1446 #define DIVSSmr(MD, MB, MI, MS, RD)	_SSESSmr(X86_SSE_DIV, MD, MB, MI, MS, RD)
1447 
1448 /* SSE2 */
1449 #define DIVSDrr(RS, RD)			_SSESDrr(X86_SSE_DIV, RS, RD)
1450 #define DIVSDmr(MD, MB, MI, MS, RD)	_SSESDmr(X86_SSE_DIV, MD, MB, MI, MS, RD)
1451 
1452 /* SSE */
1453 #define MAXPSrr(RS, RD)			_SSEPSrr(X86_SSE_MAX, RS, RD)
1454 #define MAXPSmr(MD, MB, MI, MS, RD)	_SSEPSmr(X86_SSE_MAX, MD, MB, MI, MS, RD)
1455 
1456 /* SSE2 */
1457 #define MAXPDrr(RS, RD)			_SSEPDrr(X86_SSE_MAX, RS, RD)
1458 #define MAXPDmr(MD, MB, MI, MS, RD)	_SSEPDmr(X86_SSE_MAX, MD, MB, MI, MS, RD)
1459 
1460 /* SSE */
1461 #define MAXSSrr(RS, RD)			_SSESSrr(X86_SSE_MAX, RS, RD)
1462 #define MAXSSmr(MD, MB, MI, MS, RD)	_SSESSmr(X86_SSE_MAX, MD, MB, MI, MS, RD)
1463 
1464 /* SSE2 */
1465 #define MAXSDrr(RS, RD)			_SSESDrr(X86_SSE_MAX, RS, RD)
1466 #define MAXSDmr(MD, MB, MI, MS, RD)	_SSESDmr(X86_SSE_MAX, MD, MB, MI, MS, RD)
1467 
1468 /* SSE */
1469 #define MINPSrr(RS, RD)			_SSEPSrr(X86_SSE_MIN, RS, RD)
1470 #define MINPSmr(MD, MB, MI, MS, RD)	_SSEPSmr(X86_SSE_MIN, MD, MB, MI, MS, RD)
1471 
1472 /* SSE2 */
1473 #define MINPDrr(RS, RD)			_SSEPDrr(X86_SSE_MIN, RS, RD)
1474 #define MINPDmr(MD, MB, MI, MS, RD)	_SSEPDmr(X86_SSE_MIN, MD, MB, MI, MS, RD)
1475 
1476 /* SSE */
1477 #define MINSSrr(RS, RD)			_SSESSrr(X86_SSE_MIN, RS, RD)
1478 #define MINSSmr(MD, MB, MI, MS, RD)	_SSESSmr(X86_SSE_MIN, MD, MB, MI, MS, RD)
1479 
1480 /* SSE2 */
1481 #define MINSDrr(RS, RD)			_SSESDrr(X86_SSE_MIN, RS, RD)
1482 #define MINSDmr(MD, MB, MI, MS, RD)	_SSESDmr(X86_SSE_MIN, MD, MB, MI, MS, RD)
1483 
1484 /* SSE */
1485 #define MULPSrr(RS, RD)			_SSEPSrr(X86_SSE_MUL, RS, RD)
1486 #define MULPSmr(MD, MB, MI, MS, RD)	_SSEPSmr(X86_SSE_MUL, MD, MB, MI, MS, RD)
1487 
1488 /* SSE2 */
1489 #define MULPDrr(RS, RD)			_SSEPDrr(X86_SSE_MUL, RS, RD)
1490 #define MULPDmr(MD, MB, MI, MS, RD)	_SSEPDmr(X86_SSE_MUL, MD, MB, MI, MS, RD)
1491 
1492 /* SSE */
1493 #define MULSSrr(RS, RD)			_SSESSrr(X86_SSE_MUL, RS, RD)
1494 #define MULSSmr(MD, MB, MI, MS, RD)	_SSESSmr(X86_SSE_MUL, MD, MB, MI, MS, RD)
1495 
1496 /* SSE2 */
1497 #define MULSDrr(RS, RD)			_SSESDrr(X86_SSE_MUL, RS, RD)
1498 #define MULSDmr(MD, MB, MI, MS, RD)	_SSESDmr(X86_SSE_MUL, MD, MB, MI, MS, RD)
1499 
1500 /* SSE */
1501 #define ORPSrr(RS, RD)			_SSEPSrr(X86_SSE_OR, RS, RD)
1502 #define ORPSmr(MD, MB, MI, MS, RD)	_SSEPSmr(X86_SSE_OR, MD, MB, MI, MS, RD)
1503 
1504 /* SSE2 */
1505 #define ORPDrr(RS, RD)			_SSEPDrr(X86_SSE_OR, RS, RD)
1506 #define ORPDmr(MD, MB, MI, MS, RD)	_SSEPDmr(X86_SSE_OR, MD, MB, MI, MS, RD)
1507 
1508 /* SSE */
1509 #define ORSSrr				ORPSrr
1510 #define ORSSmr				ORPSrr
1511 
1512 /* SSE2 */
1513 #define ORSDrr				ORPDrr
1514 #define ORSDmr				ORPDrr
1515 
1516 /* SSE */
1517 #define RCPPSrr(RS, RD)			_SSEPSrr(X86_SSE_RCP, RS, RD)
1518 #define RCPPSmr(MD, MB, MI, MS, RD)	_SSEPSmr(X86_SSE_RCP, MD, MB, MI, MS, RD)
1519 #define RCPSSrr(RS, RD)			_SSESSrr(X86_SSE_RCP, RS, RD)
1520 #define RCPSSmr(MD, MB, MI, MS, RD)	_SSESSmr(X86_SSE_RCP, MD, MB, MI, MS, RD)
1521 
1522 /* SSE */
1523 #define RSQRTPSrr(RS, RD)		_SSEPSrr(X86_SSE_RSQRT, RS, RD)
1524 #define RSQRTPSmr(MD, MB, MI, MS, RD)	_SSEPSmr(X86_SSE_RSQRT, MD, MB, MI, MS, RD)
1525 #define RSQRTSSrr(RS, RD)		_SSESSrr(X86_SSE_RSQRT, RS, RD)
1526 #define RSQRTSSmr(MD, MB, MI, MS, RD)	_SSESSmr(X86_SSE_RSQRT, MD, MB, MI, MS, RD)
1527 
1528 /* SSE */
1529 #define SQRTPSrr(RS, RD)		_SSEPSrr(X86_SSE_SQRT, RS, RD)
1530 #define SQRTPSmr(MD, MB, MI, MS, RD)	_SSEPSmr(X86_SSE_SQRT, MD, MB, MI, MS, RD)
1531 
1532 /* SSE2 */
1533 #define SQRTPDrr(RS, RD)		_SSEPDrr(X86_SSE_SQRT, RS, RD)
1534 #define SQRTPDmr(MD, MB, MI, MS, RD)	_SSEPDmr(X86_SSE_SQRT, MD, MB, MI, MS, RD)
1535 
1536 /* SSE */
1537 #define SQRTSSrr(RS, RD)		_SSESSrr(X86_SSE_SQRT, RS, RD)
1538 #define SQRTSSmr(MD, MB, MI, MS, RD)	_SSESSmr(X86_SSE_SQRT, MD, MB, MI, MS, RD)
1539 
1540 /* SSE2 */
1541 #define SQRTSDrr(RS, RD)		_SSESDrr(X86_SSE_SQRT, RS, RD)
1542 #define SQRTSDmr(MD, MB, MI, MS, RD)	_SSESDmr(X86_SSE_SQRT, MD, MB, MI, MS, RD)
1543 
1544 /* SSE */
1545 #define SUBPSrr(RS, RD)			_SSEPSrr(X86_SSE_SUB, RS, RD)
1546 #define SUBPSmr(MD, MB, MI, MS, RD)	_SSEPSmr(X86_SSE_SUB, MD, MB, MI, MS, RD)
1547 
1548 /* SSE2 */
1549 #define SUBPDrr(RS, RD)			_SSEPDrr(X86_SSE_SUB, RS, RD)
1550 #define SUBPDmr(MD, MB, MI, MS, RD)	_SSEPDmr(X86_SSE_SUB, MD, MB, MI, MS, RD)
1551 
1552 /* SSE */
1553 #define SUBSSrr(RS, RD)			_SSESSrr(X86_SSE_SUB, RS, RD)
1554 #define SUBSSmr(MD, MB, MI, MS, RD)	_SSESSmr(X86_SSE_SUB, MD, MB, MI, MS, RD)
1555 
1556 /* SSE2 */
1557 #define SUBSDrr(RS, RD)			_SSESDrr(X86_SSE_SUB, RS, RD)
1558 #define SUBSDmr(MD, MB, MI, MS, RD)	_SSESDmr(X86_SSE_SUB, MD, MB, MI, MS, RD)
1559 
1560 /* SSE */
1561 #define XORPSrr(RS, RD)			_SSEPSrr(X86_SSE_XOR, RS, RD)
1562 #define XORPSmr(MD, MB, MI, MS, RD)	_SSEPSmr(X86_SSE_XOR, MD, MB, MI, MS, RD)
1563 
1564 /* SSE2 */
1565 #define XORPDrr(RS, RD)			_SSEPDrr(X86_SSE_XOR, RS, RD)
1566 #define XORPDmr(MD, MB, MI, MS, RD)	_SSEPDmr(X86_SSE_XOR, MD, MB, MI, MS, RD)
1567 
1568 /* SSE */
1569 #define XORSSrr				XORPSrr
1570 #define XORSSmr				XORPSrr
1571 
1572 /* SSE2 */
1573 #define XORSDrr				XORPDrr
1574 #define XORSDmr				XORPDrr
1575 
1576 /* No prefixes here.  */
1577 /* SSE */
1578 #define COMISSrr(RS, RD)		_SSEPSrr(X86_SSE_COMI, RS, RD)
1579 #define COMISSmr(MD, MB, MI, MS, RD)	_SSEPSmr(X86_SSE_COMI, MD, MB, MI, MS, RD)
1580 
1581 /* SSE2 */
1582 #define COMISDrr(RS, RD)		_SSEPDrr(X86_SSE_COMI, RS, RD)
1583 #define COMISDmr(MD, MB, MI, MS, RD)	_SSEPDmr(X86_SSE_COMI, MD, MB, MI, MS, RD)
1584 
1585 /* No prefixes here.  */
1586 /* SSE */
1587 #define UCOMISSrr(RS, RD)		_SSEPSrr(X86_SSE_UCOMI, RS, RD)
1588 #define UCOMISSmr(MD, MB, MI, MS, RD)	_SSEPSmr(X86_SSE_UCOMI, MD, MB, MI, MS, RD)
1589 
1590 /* SSE2 */
1591 #define UCOMISDrr(RS, RD)		_SSEPDrr(X86_SSE_UCOMI, RS, RD)
1592 #define UCOMISDmr(MD, MB, MI, MS, RD)	_SSEPDmr(X86_SSE_UCOMI, MD, MB, MI, MS, RD)
1593 
1594 /* SSE */
1595 #define MOVSSrr(RS, RD)			_SSESSrr (X86_SSE_MOV, RS, RD)
1596 #define MOVSSmr(MD, MB, MI, MS, RD)	_SSESSmr (X86_SSE_MOV, MD, MB, MI, MS, RD)
1597 #define MOVSSrm(RS, MD, MB, MI, MS)	_SSESS1rm(X86_SSE_MOV, RS, MD, MB, MI, MS)
1598 
1599 /* SSE2 */
1600 #define MOVSDrr(RS, RD)			_SSESDrr (X86_SSE_MOV, RS, RD)
1601 #define MOVSDmr(MD, MB, MI, MS, RD)	_SSESDmr (X86_SSE_MOV, MD, MB, MI, MS, RD)
1602 #define MOVSDrm(RS, MD, MB, MI, MS)	_SSESD1rm(X86_SSE_MOV, RS, MD, MB, MI, MS)
1603 
1604 /* SSE */
1605 #define MOVAPSrr(RS, RD)		_SSEPSrr (X86_SSE_MOVA, RS, RD)
1606 #define MOVAPSmr(MD, MB, MI, MS, RD)	_SSEPSmr (X86_SSE_MOVA, MD, MB, MI, MS, RD)
1607 #define MOVAPSrm(RS, MD, MB, MI, MS)	_SSEPS1rm(X86_SSE_MOVA, RS, MD, MB, MI, MS)
1608 
1609 /* SSE2 */
1610 #define MOVAPDrr(RS, RD)		_SSEPDrr (X86_SSE_MOVA, RS, RD)
1611 #define MOVAPDmr(MD, MB, MI, MS, RD)	_SSEPDmr (X86_SSE_MOVA, MD, MB, MI, MS, RD)
1612 #define MOVAPDrm(RS, MD, MB, MI, MS)	_SSEPD1rm(X86_SSE_MOVA, RS, MD, MB, MI, MS)
1613 
1614 /* SSE */
1615 #define CVTPS2PIrr(RS, RD)		__sse_ff_rr(      X86_SSE_CVTSI, RS, RD)
1616 #define CVTPS2PImr(MD, MB, MI, MS, RD)	__sse_if_mr(      X86_SSE_CVTSI, MD, MB, MI, MS, RD)
1617 
1618 /* SSE2 */
1619 #define CVTPD2PIrr(RS, RD)		 _sse_ff_rr(0x66, X86_SSE_CVTSI, RS, RD)
1620 #define CVTPD2PImr(MD, MB, MI, MS, RD)	 _sse_id_mr(0x66, X86_SSE_CVTSI, MD, MB, MI, MS, RD)
1621 
1622 /* SSE */
1623 #define CVTPI2PSrr(RS, RD)		__sse_ff_rr(      X86_SSE_CVTIS, RS, RD)
1624 #define CVTPI2PSmr(MD, MB, MI, MS, RD)	__sse_if_mr(      X86_SSE_CVTIS, MD, MB, MI, MS, RD)
1625 
1626 /* SSE2 */
1627 #define CVTPI2PDrr(RS, RD)		 _sse_ff_rr(0x66, X86_SSE_CVTIS, RS, RD)
1628 #define CVTPI2PDmr(MD, MB, MI, MS, RD)	 _sse_id_mr(0x66, X86_SSE_CVTIS, MD, MB, MI, MS, RD)
1629 
1630 /* SSE2 */
1631 #define CVTPS2PDrr(RS, RD)		__sse_ff_rr(      X86_SSE_CVTSD, RS, RD)
1632 #define CVTPS2PDmr(MD, MB, MI, MS, RD)	__sse_if_mr(      X86_SSE_CVTSD, MD, MB, MI, MS, RD)
1633 #define CVTPD2PSrr(RS, RD)		 _sse_ff_rr(0x66, X86_SSE_CVTSD, RS, RD)
1634 #define CVTPD2PSmr(MD, MB, MI, MS, RD)	 _sse_id_mr(0x66, X86_SSE_CVTSD, MD, MB, MI, MS, RD)
1635 
1636 /* SSE2 */
1637 #define CVTSS2SDrr(RS, RD)		 _sse_ff_rr(0xf3, X86_SSE_CVTSD, RS, RD)
1638 #define CVTSS2SDmr(MD, MB, MI, MS, RD)	 _sse_id_mr(0xf3, X86_SSE_CVTSD, MD, MB, MI, MS, RD)
1639 #define CVTSD2SSrr(RS, RD)		 _sse_ff_rr(0xf2, X86_SSE_CVTSD, RS, RD)
1640 #define CVTSD2SSmr(MD, MB, MI, MS, RD)	 _sse_id_mr(0xf2, X86_SSE_CVTSD, MD, MB, MI, MS, RD)
1641 
1642 /* SSE */
1643 #define CVTTSS2SILrr(RS, RD)		 _sse_id_rr(0xf3, X86_SSE_CVTTSI, RS, RD)
1644 #define CVTTSS2SILmr(MD, MB, MI, MS, RD) _sse_id_mr(0xf3, X86_SSE_CVTTSI, MD, MB, MI, MS, RD)
1645 
1646 /* SSE2 */
1647 #define CVTTSD2SILrr(RS, RD)		 _sse_id_rr(0xf2, X86_SSE_CVTTSI, RS, RD)
1648 #define CVTTSD2SILmr(MD, MB, MI, MS, RD) _sse_id_mr(0xf2, X86_SSE_CVTTSI, MD, MB, MI, MS, RD)
1649 
1650 /* SSE */
1651 #define CVTSS2SILrr(RS, RD)		 _sse_if_rr(0xf3, X86_SSE_CVTSI, RS, RD)
1652 #define CVTSS2SILmr(MD, MB, MI, MS, RD)	 _sse_if_mr(0xf3, X86_SSE_CVTSI, MD, MB, MI, MS, RD)
1653 
1654 /* SSE2 */
1655 #define CVTSD2SILrr(RS, RD)		 _sse_id_rr(0xf2, X86_SSE_CVTSI, RS, RD)
1656 #define CVTSD2SILmr(MD, MB, MI, MS, RD)	 _sse_id_mr(0xf2, X86_SSE_CVTSI, MD, MB, MI, MS, RD)
1657 
1658 /* SSE */
1659 #define CVTSI2SSLrr(RS, RD)		 _sse_fi_rr(0xf3, X86_SSE_CVTIS, RS, RD)
1660 #define CVTSI2SSLmr(MD, MB, MI, MS, RD)	 _sse_if_mr(0xf3, X86_SSE_CVTIS, MD, MB, MI, MS, RD)
1661 
1662 /* SSE2 */
1663 #define CVTSI2SDLrr(RS, RD)		 _sse_di_rr(0xf2, X86_SSE_CVTIS, RS, RD)
1664 #define CVTSI2SDLmr(MD, MB, MI, MS, RD)	 _sse_id_mr(0xf2, X86_SSE_CVTIS, MD, MB, MI, MS, RD)
1665 
1666 /* SSE2 */
1667 #define MOVDLXrr(RS, RD)		 _sse_di_rr(0x66, X86_SSE_X2G, RS, RD)
1668 #define MOVDLXmr(MD, MB, MI, MS, RD)	 _sse_id_mr(0x66, X86_SSE_X2G, MD, MB, MI, MS, RD)
1669 
1670 /* SSE2 */
1671 #define MOVDXLrr(RS, RD)		 _sse_ff_rr(0x66, X86_SSE_G2X, RS, RD)
1672 #define MOVDXLrm(RS, MD, MB, MI, MS)	 _sse_di_rm(0x66, X86_SSE_G2X, RS, MD, MB, MI, MS)
1673 
1674 /* SSE */
1675 #define MOVDLMrr(RS, RD)		__sse_ff_rr(      X86_SSE_X2G, RS, RD)
1676 #define MOVDLMmr(MD, MB, MI, MS, RD)	__sse_id_mr(      X86_SSE_X2G, MD, MB, MI, MS, RD)
1677 
1678 /* SSE */
1679 #define MOVDMLrr(RS, RD)		__sse_ff_rr(      X86_SSE_G2X, RS, RD)
1680 #define MOVDMLrm(RS, MD, MB, MI, MS)	__sse_fi_rm(      X86_SSE_G2X, RS, MD, MB, MI, MS)
1681 
1682 /* SSE3 */
1683 #define MOVDQ2Qrr(RS, RD)		 _sse_ff_rr(0xf2, X86_SSE_MOV2, RS, RD)
1684 #define MOVQ2DQrr(RS, RD)		 _sse_ff_rr(0xf3, X86_SSE_MOV2, RS, RD)
1685 
1686 /* SSE */
1687 #define MOVHLPSrr(RS, RD)		__sse_ff_rr(      X86_SSE_MOVLP, RS, RD)
1688 #define MOVLHPSrr(RS, RD)		__sse_ff_rr(      X86_SSE_MOVHP, RS, RD)
1689 
1690 /* SSE2 */
1691 #define MOVDQArr(RS, RD)		 _sse_ff_rr(0x66, 0x6f, RS, RD)
1692 #define MOVDQAmr(MD, MB, MI, MS, RD)	 _sse_id_mr(0x66, 0x6f, MD, MB, MI, MS, RD)
1693 #define MOVDQArm(RS, MD, MB, MI, MS)	 _sse_di_rm(0x66, 0x7f, RS, MD, MB, MI, MS)
1694 
1695 /* SSE2 */
1696 #define MOVDQUrr(RS, RD)		 _sse_ff_rr(0xf3, 0x6f, RS, RD)
1697 #define MOVDQUmr(MD, MB, MI, MS, RD)	 _sse_id_mr(0xf3, 0x6f, MD, MB, MI, MS, RD)
1698 #define MOVDQUrm(RS, MD, MB, MI, MS)	 _sse_di_rm(0xf3, 0x7f, RS, MD, MB, MI, MS)
1699 
1700 /* SSE2 */
1701 #define MOVHPDmr(MD, MB, MI, MS, RD)	 _sse_id_mr (0x66, X86_SSE_MOVHP, MD, MB, MI, MS, RD)
1702 #define MOVHPDrm(RS, MD, MB, MI, MS)	 _sse1_di_rm(0x66, X86_SSE_MOVHP, RS, MD, MB, MI, MS)
1703 
1704 /* SSE */
1705 #define MOVHPSmr(MD, MB, MI, MS, RD)	__sse_if_mr (      X86_SSE_MOVHP, MD, MB, MI, MS, RD)
1706 #define MOVHPSrm(RS, MD, MB, MI, MS)	__sse1_fi_rm(      X86_SSE_MOVHP, RS, MD, MB, MI, MS)
1707 
1708 /* SSE2 */
1709 #define MOVLPDmr(MD, MB, MI, MS, RD)	 _sse_id_mr (0x66, X86_SSE_MOVLP, MD, MB, MI, MS, RD)
1710 #define MOVLPDrm(RS, MD, MB, MI, MS)	 _sse1_di_rm(0x66, X86_SSE_MOVLP, RS, MD, MB, MI, MS)
1711 
1712 /* SSE */
1713 #define MOVLPSmr(MD, MB, MI, MS, RD)	__sse_if_mr (      X86_SSE_MOVLP, MD, MB, MI, MS, RD)
1714 #define MOVLPSrm(RS, MD, MB, MI, MS)	__sse1_fi_rm(      X86_SSE_MOVLP, RS, MD, MB, MI, MS)
1715 
1716 /* FIXME 0x66 prefix actually required to modify 128 bits register */
1717 /* SSE or SSE2 with 0x66 prefix */
1718 #define PCMPEQBrr(RS, RD)						\
1719     _sse_ff_rr(0x66, X86_SSE_EQB, RS, RD)
1720 #define PCMPEQBrm(RS, MD, MB, MI, MS)					\
1721     _sse_if_mr(0x66, X86_SSE_EQB, MD, MB, MI, MS, RD)
1722 #define PCMPEQWrr(RS, RD)						\
1723     _sse_ff_rr(0x66, X86_SSE_EQW, RS, RD)
1724 #define PCMPEQWrm(RS, MD, MB, MI, MS)					\
1725     _sse_if_mr(0x66, X86_SSE_EQW, MD, MB, MI, MS, RD)
1726 #define PCMPEQLrr(RS, RD)						\
1727     _sse_ff_rr(0x66, X86_SSE_EQD, RS, RD)
1728 #define PCMPEQLrm(RS, MD, MB, MI, MS)					\
1729     _sse_if_mr(0x66, X86_SSE_EQD, MD, MB, MI, MS, RD)
1730 
1731 /* SSE2 with 0x66 prefix, SSE otherwise */
1732 #define PSRLWrr(RS, RD)							\
1733     _sse_ff_rr(0x66, 0xd1, RS, RD)
1734 #define PSRLWrm(RS, MD, MB, MI, MS)					\
1735     _sse_if_mr(0x66, 0xd1, MD, MB, MI, MS, RD)
1736 #define PSRLWir(IM, RD)							\
1737     (_O(0x66),								\
1738      _REXLrr(_NOREG, RD),						\
1739      _O(0x0f),								\
1740      _O(0x71),								\
1741      _Mrm(_b11, _b10, _rX(RD)),						\
1742      _O(IM))
1743 
1744 /* SSE2 with 0x66 prefix, SSE otherwise */
1745 #define PSRLLrr(RS, RD)							\
1746     _sse_ff_rr(0x66, 0xd2, RS, RD)
1747 #define PSRLLrm(RS, MD, MB, MI, MS)					\
1748     _sse_id_mr(0x66, 0xd2, MD, MB, MI, MS, RD)
1749 #define PSRLLir(IM, RD)							\
1750     (_O(0x66),								\
1751      _rex_if_rr(_NOREG, RD),						\
1752      _O(0x0f),								\
1753      _O(0x72),								\
1754      _Mrm(_b11, _b10, _rX(RD)),						\
1755      _O(IM))
1756 
1757 /* SSE2 */
1758 #define PSRLQrr(RS, RD)							\
1759     _sse_ff_rr(0x66, 0xd3, RS, RD)
1760 #define PSRLQrm(RS, MD, MB, MI, MS)					\
1761     _sse_id_mr(0x66, 0xd3, MD, MB, MI, MS, RD)
1762 #define PSRLQir(IM, RD)							\
1763     (_O(0x66),								\
1764      _rex_if_rr(_NOREG, RD),						\
1765      _O(0x0f),								\
1766      _O(0x73),								\
1767      _Mrm(_b11, _b10, _rX(RD)),						\
1768      _O(IM))
1769 
1770 /* SSE4.1 */
1771 #define ROUNDSSrri(RS, RD, IM)						\
1772     (_O(0x66), _rex_ff_rr(RD, RS), _OO(0xf00|X86_SSE_ROUND), _O(0x0a),	\
1773      _Mrm(_b11, _rX(RD), _rX(RS)), _O(IM))
1774 #define ROUNDSDrri(RS, RD, IM)						\
1775     (_O(0x66), _rex_ff_rr(RD, RS), _OO(0xf00|X86_SSE_ROUND), _O(0x0b),	\
1776      _Mrm(_b11, _rX(RD), _rX(RS)), _O(IM))
1777 #define PCMPEQQrr(RS, RD)						\
1778     (_O(0x66), _rex_ff_rr(RD, RS), _OO(0x0f38), _O(0x29),		\
1779      _Mrm(_b11, _rX(RD), _rX(RS)))
1780 
1781 
1782 #ifdef JIT_X86_64
1783 
1784 #define _rex_q_rr(rr, mr) x86_REXw_x_(0, 1, rr, 0, mr)
1785 #define _rex_dl_rr(rr, mr) _rex_q_rr(rr, mr)
1786 #define _rex_ld_rr(rr, mr) _rex_q_rr(rr, mr)
1787 
1788 #define __sse_lf_rr(op, rs, rd) __sse_ld_rr(op, rs, rd)
1789 #define __sse_ld_rr(op, rs, rd) (_rex_ld_rr(rd, rs), _O(0x0f), _O(op), _Mrm(_b11, _rA(rd), _rX(rs)))
1790 
1791 #define __sse_fl_rr(op, rs, rd) __sse_dl_rr(op, rs, rd)
1792 #define __sse_dl_rr(op, rs, rd) (_rex_dl_rr(rd, rs), _O(0x0f), _O(op), _Mrm(_b11, _rX(rd), _rA(rs)))
1793 
1794 #define _sse_lf_rr(px, op, rs, rd) _sse_ld_rr(px, op, rs, rd)
1795 #define _sse_ld_rr(px, op, rs, rd) (_jit_B(px), __sse_ld_rr(op, rs, rd))
1796 
1797 #define _sse_fl_rr(px, op, rs, rd) _sse_dl_rr(px, op, rs, rd)
1798 #define _sse_dl_rr(px, op, rs, rd) (_jit_B(px), __sse_dl_rr(op, rs, rd))
1799 
1800 #define CVTTSD2SIQrr(RS, RD)		 _sse_lf_rr(0xf2, X86_SSE_CVTTSI, RS, RD)
1801 #define CVTSI2SDQrr(RS, RD)		 _sse_dl_rr(0xf2, X86_SSE_CVTIS, RS, RD)
1802 #define MOVDQXrr(RS, RD)		 _sse_dl_rr(0x66, X86_SSE_X2G, RS, RD)
1803 
1804 #endif
1805 
1806 /*** References:										*/
1807 /*												*/
1808 /* [1] "Intel Architecture Software Developer's Manual Volume 1: Basic Architecture",		*/
1809 /*     Intel Corporation 1997.									*/
1810 /*												*/
1811 /* [2] "Intel Architecture Software Developer's Manual Volume 2: Instruction Set Reference",	*/
1812 /*     Intel Corporation 1997.									*/
1813 
1814 #endif
1815 #endif /* __lightning_asm_h */
1816 
1817