1 // [AsmJit]
2 // Complete JIT Assembler for C++ Language.
3 //
4 // [License]
5 // Zlib - See COPYING file in this package.
6 
7 // [Guard]
8 #ifndef _ASMJIT_X86_X86ASSEMBLER_H
9 #define _ASMJIT_X86_X86ASSEMBLER_H
10 
11 // [Dependencies - AsmJit]
12 #include "../core/assembler.h"
13 
14 #include "../x86/x86defs.h"
15 #include "../x86/x86operand.h"
16 #include "../x86/x86util.h"
17 
18 // [Api-Begin]
19 #include "../core/apibegin.h"
20 
21 namespace AsmJit {
22 
23 //! @addtogroup AsmJit_X86
24 //! @{
25 
26 // ============================================================================
27 // [AsmJit::X86Assembler]
28 // ============================================================================
29 
30 //! @brief X86Assembler - low level x86/x64 code generation.
31 //!
32 //! @ref X86Assembler is the main class in AsmJit for generating low level
33 //! x86/x64 binary stream. It creates internal buffer where opcodes are stored
34 //! and contains methods that mimics x86/x64 assembler instructions. Code
35 //! generation should be safe, because basic type-checks are done by the C++
36 //! compiler. It's nearly impossible to create invalid instruction (for example
37 //! <code>mov [eax], [eax]</code> that will not be detected at compile time by
38 //! C++ compiler.
39 //!
40 //! Each call to assembler intrinsics directly emits instruction to internal
41 //! binary stream. Instruction emitting also contains runtime checks so it
42 //! should be impossible to create an instruction that is not valid (except
43 //! there is bug in AsmJit).
44 //!
45 //! @ref X86Assembler contains internal buffer where all emitted instructions
46 //! are stored. Look at @ref Buffer for an implementation. To generate and
47 //! allocate memory for function use @ref X86Assembler::make() method that will
48 //! allocate memory using the provided memory manager (see @ref MemoryManager)
49 //! and relocates the output code to the provided address. If you want to create
50 //! your function manually, you should look at @ref VirtualMemory interface and
51 //! use @ref X86Assembler::relocCode() method to relocate emitted code into
52 //! provided memory location. You can also take the emitted buffer by @ref
53 //! X86Assembler::take() to do something else with it. If you take buffer, you
54 //! must free it manually by using @ref ASMJIT_FREE() macro.
55 //!
56 //! @section AsmJit_Assembler_CodeGeneration Code Generation
57 //!
58 //! To generate code is only needed to create instance of @c AsmJit::Assembler
59 //! and to use intrinsics. See example how to do that:
60 //!
61 //! @code
62 //! // Use AsmJit namespace.
63 //! using namespace AsmJit;
64 //!
65 //! // Create Assembler instance.
66 //! Assembler a;
67 //!
68 //! // Prolog.
69 //! a.push(ebp);
70 //! a.mov(ebp, esp);
71 //!
72 //! // Mov 1024 to EAX, EAX is also return value.
73 //! a.mov(eax, imm(1024));
74 //!
75 //! // Epilog.
76 //! a.mov(esp, ebp);
77 //! a.pop(ebp);
78 //!
79 //! // Return.
80 //! a.ret();
81 //! @endcode
82 //!
83 //! You can see that syntax is very close to Intel one. Only difference is that
84 //! you are calling functions that emits the binary code for you. All registers
85 //! are in @c AsmJit namespace, so it's very comfortable to use it (look at
86 //! first line). There is also used method @c AsmJit::imm() to create an
87 //! immediate value. Use @c AsmJit::uimm() to create unsigned immediate value.
88 //!
89 //! There is also possibility to use memory addresses and immediates. To build
90 //! memory address use @c ptr(), @c byte_ptr(), @c word_ptr(), @c dword_ptr()
91 //! or other friend methods. In most cases you needs only @c ptr() method, but
92 //! there are instructions where you must specify address size,
93 //!
94 //! for example (a is @c AsmJit::Assembler instance):
95 //!
96 //! @code
97 //! a.mov(ptr(eax), imm(0));                   // mov ptr [eax], 0
98 //! a.mov(ptr(eax), edx);                      // mov ptr [eax], edx
99 //! @endcode
100 //!
101 //! But it's also possible to create complex addresses:
102 //!
103 //! @code
104 //! // eax + ecx*x addresses
105 //! a.mov(ptr(eax, ecx, kScaleNone), imm(0));     // mov ptr [eax + ecx], 0
106 //! a.mov(ptr(eax, ecx, kScale2Times), imm(0));     // mov ptr [eax + ecx * 2], 0
107 //! a.mov(ptr(eax, ecx, kScale4Times), imm(0));     // mov ptr [eax + ecx * 4], 0
108 //! a.mov(ptr(eax, ecx, kScale8Times), imm(0));     // mov ptr [eax + ecx * 8], 0
109 //! // eax + ecx*x + disp addresses
110 //! a.mov(ptr(eax, ecx, kScaleNone,  4), imm(0)); // mov ptr [eax + ecx     +  4], 0
111 //! a.mov(ptr(eax, ecx, kScale2Times,  8), imm(0)); // mov ptr [eax + ecx * 2 +  8], 0
112 //! a.mov(ptr(eax, ecx, kScale4Times, 12), imm(0)); // mov ptr [eax + ecx * 4 + 12], 0
113 //! a.mov(ptr(eax, ecx, kScale8Times, 16), imm(0)); // mov ptr [eax + ecx * 8 + 16], 0
114 //! @endcode
115 //!
116 //! All addresses shown are using @c AsmJit::ptr() to make memory operand.
117 //! Some assembler instructions (single operand ones) needs to specify memory
118 //! operand size. For example calling <code>a.inc(ptr(eax))</code> can't be
119 //! used. @c AsmJit::Assembler::inc(), @c AsmJit::Assembler::dec() and similar
120 //! instructions can't be serialized without specifying how bytes they are
121 //! operating on. See next code how assembler works:
122 //!
123 //! @code
124 //! // [byte] address
125 //! a.inc(byte_ptr(eax));                      // inc byte ptr [eax]
126 //! a.dec(byte_ptr(eax));                      // dec byte ptr [eax]
127 //! // [word] address
128 //! a.inc(word_ptr(eax));                      // inc word ptr [eax]
129 //! a.dec(word_ptr(eax));                      // dec word ptr [eax]
130 //! // [dword] address
131 //! a.inc(dword_ptr(eax));                     // inc dword ptr [eax]
132 //! a.dec(dword_ptr(eax));                     // dec dword ptr [eax]
133 //! @endcode
134 //!
135 //! @section AsmJit_Assembler_CallingJitCode Calling JIT Code
136 //!
137 //! While you are over from emitting instructions, you can make your function
138 //! using @c AsmJit::Assembler::make() method. This method will use memory
139 //! manager to allocate virtual memory and relocates generated code to it. For
140 //! memory allocation is used global memory manager by default and memory is
141 //! freeable, but of course this default behavior can be overridden specifying
142 //! your memory manager and allocation type. If you want to do with code
143 //! something else you can always override make() method and do what you want.
144 //!
145 //! You can get size of generated code by @c getCodeSize() or @c getOffset()
146 //! methods. These methods returns you code size (or more precisely current code
147 //! offset) in bytes. Use takeCode() to take internal buffer (all pointers in
148 //! @c AsmJit::Assembler instance will be zeroed and current buffer returned)
149 //! to use it. If you don't take it,  @c AsmJit::Assembler destructor will
150 //! free it automatically. To alloc and run code manually don't use
151 //! @c malloc()'ed memory, but instead use @c AsmJit::VirtualMemory::alloc()
152 //! to get memory for executing (specify @c canExecute to @c true) or
153 //! @c AsmJit::MemoryManager that provides more effective and comfortable way
154 //! to allocate virtual memory.
155 //!
156 //! See next example how to allocate memory where you can execute code created
157 //! by @c AsmJit::Assembler:
158 //!
159 //! @code
160 //! using namespace AsmJit;
161 //!
162 //! Assembler a;
163 //!
164 //! // ... your code generation
165 //!
166 //! // your function prototype
167 //! typedef void (*MyFn)();
168 //!
169 //! // make your function
170 //! MyFn fn = asmjit_cast<MyFn>(a.make());
171 //!
172 //! // call your function
173 //! fn();
174 //!
175 //! // If you don't need your function again, free it.
176 //! MemoryManager::getGlobal()->free(fn);
177 //! @endcode
178 //!
179 //! There is also low level alternative how to allocate virtual memory and
180 //! relocate code to it:
181 //!
182 //! @code
183 //! using namespace AsmJit;
184 //!
185 //! Assembler a;
186 //! // Your code generation ...
187 //!
188 //! // Your function prototype.
189 //! typedef void (*MyFn)();
190 //!
191 //! // Alloc memory for your function.
192 //! MyFn fn = asmjit_cast<MyFn>(
193 //!   MemoryManager::getGlobal()->alloc(a.getCodeSize());
194 //!
195 //! // Relocate the code (will make the function).
196 //! a.relocCode(fn);
197 //!
198 //! // Call the generated function.
199 //! fn();
200 //!
201 //! // If you don't need your function anymore, it should be freed.
202 //! MemoryManager::getGlobal()->free(fn);
203 //! @endcode
204 //!
205 //! @c note This was very primitive example how to call generated code.
206 //! In real production code you will never alloc and free code for one run,
207 //! you will usually use generated code many times.
208 //!
209 //! @section AsmJit_Assembler_Labels Labels
210 //!
211 //! While generating assembler code, you will usually need to create complex
212 //! code with labels. Labels are fully supported and you can call @c jmp or
213 //! @c je (and similar) instructions to initialized or yet uninitialized label.
214 //! Each label expects to be bound into offset. To bind label to specific
215 //! offset, use @c bind() method.
216 //!
217 //! See next example that contains complete code that creates simple memory
218 //! copy function (in DWORD entities).
219 //!
220 //! @code
221 //! // Example: Usage of Label (32-bit code).
222 //! //
223 //! // Create simple DWORD memory copy function:
224 //! // ASMJIT_STDCALL void copy32(uint32_t* dst, const uint32_t* src, size_t count);
225 //! using namespace AsmJit;
226 //!
227 //! // Assembler instance.
228 //! Assembler a;
229 //!
230 //! // Constants.
231 //! const int arg_offset = 8; // Arguments offset (STDCALL EBP).
232 //! const int arg_size = 12;  // Arguments size.
233 //!
234 //! // Labels.
235 //! Label L_Loop = a.newLabel();
236 //!
237 //! // Prolog.
238 //! a.push(ebp);
239 //! a.mov(ebp, esp);
240 //! a.push(esi);
241 //! a.push(edi);
242 //!
243 //! // Fetch arguments
244 //! a.mov(esi, dword_ptr(ebp, arg_offset + 0)); // Get dst.
245 //! a.mov(edi, dword_ptr(ebp, arg_offset + 4)); // Get src.
246 //! a.mov(ecx, dword_ptr(ebp, arg_offset + 8)); // Get count.
247 //!
248 //! // Bind L_Loop label to here.
249 //! a.bind(L_Loop);
250 //!
251 //! Copy 4 bytes.
252 //! a.mov(eax, dword_ptr(esi));
253 //! a.mov(dword_ptr(edi), eax);
254 //!
255 //! // Increment pointers.
256 //! a.add(esi, 4);
257 //! a.add(edi, 4);
258 //!
259 //! // Repeat loop until (--ecx != 0).
260 //! a.dec(ecx);
261 //! a.jz(L_Loop);
262 //!
263 //! // Epilog.
264 //! a.pop(edi);
265 //! a.pop(esi);
266 //! a.mov(esp, ebp);
267 //! a.pop(ebp);
268 //!
269 //! // Return: STDCALL convention is to pop stack in called function.
270 //! a.ret(arg_size);
271 //! @endcode
272 //!
273 //! If you need more abstraction for generating assembler code and you want
274 //! to hide calling conventions between 32-bit and 64-bit operating systems,
275 //! look at @c Compiler class that is designed for higher level code
276 //! generation.
277 //!
278 //! @section AsmJit_Assembler_AdvancedCodeGeneration Advanced Code Generation
279 //!
280 //! This section describes some advanced generation features of @c Assembler
281 //! class which can be simply overlooked. The first thing that is very likely
282 //! needed is generic register support. In previous example the named registers
283 //! were used. AsmJit contains functions which can convert register index into
284 //! operand and back.
285 //!
286 //! Let's define function which can be used to generate some abstract code:
287 //!
288 //! @code
289 //! // Simple function that generates dword copy.
290 //! void genCopyDWord(
291 //!   Assembler& a,
292 //!   const GpReg& dst, const GpReg& src, const GpReg& tmp)
293 //! {
294 //!   a.mov(tmp, dword_ptr(src));
295 //!   a.mov(dword_ptr(dst), tmp);
296 //! }
297 //! @endcode
298 //!
299 //! This function can be called like <code>genCopyDWord(a, edi, esi, ebx)</code>
300 //! or by using existing @ref GpReg instances. This abstraction allows to join
301 //! more code sections together without rewriting each to use specific registers.
302 //! You need to take care only about implicit registers which may be used by
303 //! several instructions (like mul, imul, div, idiv, shifting, etc...).
304 //!
305 //! Next, more advanced, but often needed technique is that you can build your
306 //! own registers allocator. X86 architecture contains 8 general purpose registers,
307 //! 8 MMX (MM) registers and 8 SSE (XMM) registers. The X64 (AMD64) architecture
308 //! extends count of general purpose registers and SSE2 registers to 16. Use the
309 //! @c kX86RegNumBase constant to get count of GP or XMM registers or @c kX86RegNumGp,
310 //! @c kX86RegNumMm and @c kX86RegNumXmm constants individually.
311 //!
312 //! To build register from index (value from 0 inclusive to kRegNumXXX
313 //! exclusive) use @ref gpd(), @ref gpq() or @ref gpz() functions. To create
314 //! a 8 or 16-bit register use @ref gpw(), @ref gpb_lo() or @ref gpb_hi().
315 //! To create other registers there are similar methods like @ref mm(), @ref xmm()
316 //! and @ref st().
317 //!
318 //! So our function call to genCopyDWord can be also used like this:
319 //!
320 //! @code
321 //! genCopyDWord(a, gpd(kX86RegIndexEdi), gpd(kX86RegIndexEsi), gpd(kX86RegIndexEbx));
322 //! @endcode
323 //!
324 //! kX86RegIndexXXX are constants defined by @ref kX86RegIndex enum. You can use your
325 //! own register allocator (or register slot manager) to alloc / free registers
326 //! so kX86RegIndexXXX values can be replaced by your variables (0 to kRegNumXXX-1).
327 //!
328 //! @sa @ref X86Compiler.
329 struct X86Assembler : public Assembler
330 {
331   // --------------------------------------------------------------------------
332   // [Construction / Destruction]
333   // --------------------------------------------------------------------------
334 
335   ASMJIT_API X86Assembler(Context* context = JitContext::getGlobal());
336   ASMJIT_API virtual ~X86Assembler();
337 
338   // --------------------------------------------------------------------------
339   // [Buffer - Setters (X86-Extensions)]
340   // --------------------------------------------------------------------------
341 
342   //! @brief Set custom variable @a i at position @a pos.
343   //!
344   //! @note This function is used to patch existing code.
345   ASMJIT_API void setVarAt(size_t pos, sysint_t i, uint8_t isUnsigned, uint32_t size);
346 
347   // --------------------------------------------------------------------------
348   // [Emit]
349   //
350   // These functions are not protected against buffer overrun. Each place of
351   // code which calls these functions ensures that there is some space using
352   // canEmit() method. Emitters are internally protected in AsmJit::Buffer,
353   // but only in debug builds.
354   // --------------------------------------------------------------------------
355 
356   //! @brief Emit single @a opCode without operands.
_emitOpCodeX86Assembler357   inline void _emitOpCode(uint32_t opCode)
358   {
359     // Instruction prefix.
360     if (opCode & 0xFF000000) _emitByte(static_cast<uint8_t>((opCode >> 24) & 0xFF));
361 
362     // Instruction opcodes.
363     if (opCode & 0x00FF0000) _emitByte(static_cast<uint8_t>((opCode >> 16) & 0xFF));
364     if (opCode & 0x0000FF00) _emitByte(static_cast<uint8_t>((opCode >>  8) & 0xFF));
365 
366     // Last opcode is always emitted (can be also 0x00).
367     _emitByte(static_cast<uint8_t>(opCode & 0xFF));
368   }
369 
370   //! @brief Emit MODR/M byte.
_emitModX86Assembler371   inline void _emitMod(uint8_t m, uint8_t o, uint8_t r)
372   { _emitByte(((m & 0x03) << 6) | ((o & 0x07) << 3) | (r & 0x07)); }
373 
374   //! @brief Emit SIB byte.
_emitSibX86Assembler375   inline void _emitSib(uint8_t s, uint8_t i, uint8_t b)
376   { _emitByte(((s & 0x03) << 6) | ((i & 0x07) << 3) | (b & 0x07)); }
377 
378   //! @brief Emit REX prefix (64-bit mode only).
_emitRexRX86Assembler379   inline void _emitRexR(uint8_t w, uint8_t opReg, uint8_t regCode, bool forceRexPrefix)
380   {
381 #if defined(ASMJIT_X64)
382     uint32_t rex;
383 
384     // w - Default operand size(0=Default, 1=64-bit).
385     // r - Register field (1=high bit extension of the ModR/M REG field).
386     // x - Index field not used in RexR
387     // b - Base field (1=high bit extension of the ModR/M or SIB Base field).
388     rex  = (static_cast<uint32_t>(forceRexPrefix != 0)) << 6; // Rex prefix code.
389     rex += (static_cast<uint32_t>(w      )            ) << 3; // Rex.W (w << 3).
390     rex += (static_cast<uint32_t>(opReg  ) & 0x08     ) >> 1; // Rex.R (r << 2).
391     rex += (static_cast<uint32_t>(regCode) & 0x08     ) >> 3; // Rex.B (b << 0).
392 
393     if (rex) _emitByte(static_cast<uint8_t>(rex | 0x40));
394 #else
395     ASMJIT_UNUSED(w);
396     ASMJIT_UNUSED(opReg);
397     ASMJIT_UNUSED(regCode);
398     ASMJIT_UNUSED(forceRexPrefix);
399 #endif // ASMJIT_X64
400   }
401 
402   //! @brief Emit REX prefix (64-bit mode only).
_emitRexRMX86Assembler403   inline void _emitRexRM(uint8_t w, uint8_t opReg, const Operand& rm, bool forceRexPrefix)
404   {
405 #if defined(ASMJIT_X64)
406     uint32_t rex;
407 
408     // w - Default operand size(0=Default, 1=64-bit).
409     // r - Register field (1=high bit extension of the ModR/M REG field).
410     // x - Index field (1=high bit extension of the SIB Index field).
411     // b - Base field (1=high bit extension of the ModR/M or SIB Base field).
412 
413     rex  = (static_cast<uint32_t>(forceRexPrefix != 0)) << 6; // Rex prefix code.
414     rex += (static_cast<uint32_t>(w      )            ) << 3; // Rex.W (w << 3).
415     rex += (static_cast<uint32_t>(opReg  ) & 0x08     ) >> 1; // Rex.R (r << 2).
416 
417     uint32_t b = 0;
418     uint32_t x = 0;
419 
420     if (rm.isReg())
421     {
422       b = (static_cast<const Reg&>(rm).getRegCode() & 0x08) != 0;
423     }
424     else if (rm.isMem())
425     {
426       b = ((static_cast<const Mem&>(rm).getBase()  & 0x8) != 0) & (static_cast<const Mem&>(rm).getBase()  != kInvalidValue);
427       x = ((static_cast<const Mem&>(rm).getIndex() & 0x8) != 0) & (static_cast<const Mem&>(rm).getIndex() != kInvalidValue);
428     }
429 
430     rex += static_cast<uint32_t>(x) << 1; // Rex.R (x << 1).
431     rex += static_cast<uint32_t>(b)     ; // Rex.B (b << 0).
432 
433     if (rex) _emitByte(static_cast<uint8_t>(rex | 0x40));
434 #else
435     ASMJIT_UNUSED(w);
436     ASMJIT_UNUSED(opReg);
437     ASMJIT_UNUSED(rm);
438 #endif // ASMJIT_X64
439   }
440 
441   //! @brief Emit Register / Register - calls _emitMod(3, opReg, r)
_emitModRX86Assembler442   inline void _emitModR(uint8_t opReg, uint8_t r)
443   { _emitMod(3, opReg, r); }
444 
445   //! @brief Emit Register / Register - calls _emitMod(3, opReg, r.code())
_emitModRX86Assembler446   inline void _emitModR(uint8_t opReg, const Reg& r)
447   { _emitMod(3, opReg, r.getRegCode()); }
448 
449   //! @brief Emit register / memory address combination to buffer.
450   //!
451   //! This method can hangle addresses from simple to complex ones with
452   //! index and displacement.
453   ASMJIT_API void _emitModM(uint8_t opReg, const Mem& mem, sysint_t immSize);
454 
455   //! @brief Emit Reg<-Reg or Reg<-Reg|Mem ModRM (can be followed by SIB
456   //! and displacement) to buffer.
457   //!
458   //! This function internally calls @c _emitModM() or _emitModR() that depends
459   //! to @a op type.
460   //!
461   //! @note @a opReg is usually real register ID (see @c R) but some instructions
462   //! have specific format and in that cases @a opReg is part of opcode.
463   ASMJIT_API void _emitModRM(uint8_t opReg, const Operand& op, sysint_t immSize);
464 
465   //! @brief Emit CS (code segmend) prefix.
466   //!
467   //! Behavior of this function is to emit code prefix only if memory operand
468   //! address uses code segment. Code segment is used through memory operand
469   //! with attached @c AsmJit::Label.
470   ASMJIT_API void _emitSegmentPrefix(const Operand& rm);
471 
472   //! @brief Emit instruction where register is inlined to opcode.
473   ASMJIT_API void _emitX86Inl(uint32_t opCode, uint8_t i16bit, uint8_t rexw, uint8_t reg, bool forceRexPrefix);
474 
475   //! @brief Emit instruction with reg/memory operand.
476   ASMJIT_API void _emitX86RM(uint32_t opCode, uint8_t i16bit, uint8_t rexw, uint8_t o,
477     const Operand& op, sysint_t immSize, bool forceRexPrefix);
478 
479   //! @brief Emit FPU instruction with no operands.
480   ASMJIT_API void _emitFpu(uint32_t opCode);
481 
482   //! @brief Emit FPU instruction with one operand @a sti (index of FPU register).
483   ASMJIT_API void _emitFpuSTI(uint32_t opCode, uint32_t sti);
484 
485   //! @brief Emit FPU instruction with one operand @a opReg and memory operand @a mem.
486   ASMJIT_API void _emitFpuMEM(uint32_t opCode, uint8_t opReg, const Mem& mem);
487 
488   //! @brief Emit MMX/SSE instruction.
489   ASMJIT_API void _emitMmu(uint32_t opCode, uint8_t rexw, uint8_t opReg, const Operand& src, sysint_t immSize);
490 
491   //! @brief Emit displacement.
492   ASMJIT_API LabelLink* _emitDisplacement(LabelData& l_data, sysint_t inlinedDisplacement, int size);
493 
494   //! @brief Emit relative relocation to absolute pointer @a target. It's needed
495   //! to add what instruction is emitting this, because in x64 mode the relative
496   //! displacement can be impossible to calculate and in this case the trampoline
497   //! is used.
498   ASMJIT_API void _emitJmpOrCallReloc(uint32_t instruction, void* target);
499 
500   // Helpers to decrease binary code size. These four emit methods are just
501   // helpers thats used by assembler. They call emitX86() adding NULLs
502   // to first, second and third operand, if needed.
503 
504   //! @brief Emit X86/FPU or MM/XMM instruction.
505   ASMJIT_API void _emitInstruction(uint32_t code);
506 
507   //! @brief Emit X86/FPU or MM/XMM instruction.
508   ASMJIT_API void _emitInstruction(uint32_t code, const Operand* o0);
509 
510   //! @brief Emit X86/FPU or MM/XMM instruction.
511   ASMJIT_API void _emitInstruction(uint32_t code, const Operand* o0, const Operand* o1);
512 
513   //! @brief Emit X86/FPU or MM/XMM instruction.
514   //!
515   //! Operands @a o1, @a o2 or @a o3 can be @c NULL if they are not used.
516   //!
517   //! Hint: Use @c emitX86() helpers to emit instructions.
518   ASMJIT_API void _emitInstruction(uint32_t code, const Operand* o0, const Operand* o1, const Operand* o2);
519 
520   //! @brief Private method for emitting jcc.
521   ASMJIT_API void _emitJcc(uint32_t code, const Label* label, uint32_t hint);
522 
523   //! @brief Private method for emitting short jcc.
_emitShortJccX86Assembler524   inline void _emitShortJcc(uint32_t code, const Label* label, uint32_t hint)
525   {
526     _emitOptions |= kX86EmitOptionShortJump;
527     _emitJcc(code, label, hint);
528   }
529 
530   // --------------------------------------------------------------------------
531   // [EmbedLabel]
532   // --------------------------------------------------------------------------
533 
534   //! @brief Embed absolute label pointer (4 or 8 bytes).
535   ASMJIT_API void embedLabel(const Label& label);
536 
537   // --------------------------------------------------------------------------
538   // [Align]
539   // --------------------------------------------------------------------------
540 
541   //! @brief Align target buffer to @a m bytes.
542   //!
543   //! Typical usage of this is to align labels at start of the inner loops.
544   //!
545   //! Inserts @c nop() instructions or CPU optimized NOPs.
546   ASMJIT_API void align(uint32_t m);
547 
548   // --------------------------------------------------------------------------
549   // [Label]
550   // --------------------------------------------------------------------------
551 
552   //! @brief Create and return new label.
553   ASMJIT_API Label newLabel();
554 
555   //! @brief Register labels (used by @c Compiler).
556   ASMJIT_API void registerLabels(size_t count);
557 
558   //! @brief Bind label to the current offset.
559   //!
560   //! @note Label can be bound only once!
561   ASMJIT_API void bind(const Label& label);
562 
563   // --------------------------------------------------------------------------
564   // [Reloc]
565   // --------------------------------------------------------------------------
566 
567   ASMJIT_API virtual size_t relocCode(void* dst, sysuint_t addressBase) const;
568 
569   // --------------------------------------------------------------------------
570   // [Make]
571   // --------------------------------------------------------------------------
572 
573   ASMJIT_API virtual void* make();
574 
575   // --------------------------------------------------------------------------
576   // [Embed]
577   // --------------------------------------------------------------------------
578 
579   //! @brief Add 8-bit integer data to the instuction stream.
dbX86Assembler580   inline void db(uint8_t  x) { embed(&x, 1); }
581   //! @brief Add 16-bit integer data to the instuction stream.
dwX86Assembler582   inline void dw(uint16_t x) { embed(&x, 2); }
583   //! @brief Add 32-bit integer data to the instuction stream.
ddX86Assembler584   inline void dd(uint32_t x) { embed(&x, 4); }
585   //! @brief Add 64-bit integer data to the instuction stream.
dqX86Assembler586   inline void dq(uint64_t x) { embed(&x, 8); }
587 
588   //! @brief Add 8-bit integer data to the instuction stream.
dint8X86Assembler589   inline void dint8(int8_t x) { embed(&x, sizeof(int8_t)); }
590   //! @brief Add 8-bit integer data to the instuction stream.
duint8X86Assembler591   inline void duint8(uint8_t x) { embed(&x, sizeof(uint8_t)); }
592 
593   //! @brief Add 16-bit integer data to the instuction stream.
dint16X86Assembler594   inline void dint16(int16_t x) { embed(&x, sizeof(int16_t)); }
595   //! @brief Add 16-bit integer data to the instuction stream.
duint16X86Assembler596   inline void duint16(uint16_t x) { embed(&x, sizeof(uint16_t)); }
597 
598   //! @brief Add 32-bit integer data to the instuction stream.
dint32X86Assembler599   inline void dint32(int32_t x) { embed(&x, sizeof(int32_t)); }
600   //! @brief Add 32-bit integer data to the instuction stream.
duint32X86Assembler601   inline void duint32(uint32_t x) { embed(&x, sizeof(uint32_t)); }
602 
603   //! @brief Add 64-bit integer data to the instuction stream.
dint64X86Assembler604   inline void dint64(int64_t x) { embed(&x, sizeof(int64_t)); }
605   //! @brief Add 64-bit integer data to the instuction stream.
duint64X86Assembler606   inline void duint64(uint64_t x) { embed(&x, sizeof(uint64_t)); }
607 
608   //! @brief Add system-integer data to the instuction stream.
dintptrX86Assembler609   inline void dintptr(intptr_t x) { embed(&x, sizeof(intptr_t)); }
610   //! @brief Add system-integer data to the instuction stream.
duintptrX86Assembler611   inline void duintptr(uintptr_t x) { embed(&x, sizeof(uintptr_t)); }
612 
613   //! @brief Add float data to the instuction stream.
dfloatX86Assembler614   inline void dfloat(float x) { embed(&x, sizeof(float)); }
615   //! @brief Add double data to the instuction stream.
ddoubleX86Assembler616   inline void ddouble(double x) { embed(&x, sizeof(double)); }
617 
618   //! @brief Add pointer data to the instuction stream.
dptrX86Assembler619   inline void dptr(void* x) { embed(&x, sizeof(void*)); }
620 
621   //! @brief Add MM data to the instuction stream.
dmmX86Assembler622   inline void dmm(const MmData& x) { embed(&x, sizeof(MmData)); }
623   //! @brief Add XMM data to the instuction stream.
dxmmX86Assembler624   inline void dxmm(const XmmData& x) { embed(&x, sizeof(XmmData)); }
625 
626   //! @brief Add data to the instuction stream.
dataX86Assembler627   inline void data(const void* data, size_t size) { embed(data, size); }
628 
629   //! @brief Add data in a given structure instance to the instuction stream.
630   template<typename T>
dstructX86Assembler631   inline void dstruct(const T& x) { embed(&x, sizeof(T)); }
632 
633   // --------------------------------------------------------------------------
634   // [X86 Instructions]
635   // --------------------------------------------------------------------------
636 
637   //! @brief Add with Carry.
adcX86Assembler638   inline void adc(const GpReg& dst, const GpReg& src)
639   { _emitInstruction(kX86InstAdc, &dst, &src); }
640   //! @brief Add with Carry.
adcX86Assembler641   inline void adc(const GpReg& dst, const Mem& src)
642   { _emitInstruction(kX86InstAdc, &dst, &src); }
643   //! @brief Add with Carry.
adcX86Assembler644   inline void adc(const GpReg& dst, const Imm& src)
645   { _emitInstruction(kX86InstAdc, &dst, &src); }
646   //! @brief Add with Carry.
adcX86Assembler647   inline void adc(const Mem& dst, const GpReg& src)
648   { _emitInstruction(kX86InstAdc, &dst, &src); }
649   //! @brief Add with Carry.
adcX86Assembler650   inline void adc(const Mem& dst, const Imm& src)
651   { _emitInstruction(kX86InstAdc, &dst, &src); }
652 
653   //! @brief Add.
addX86Assembler654   inline void add(const GpReg& dst, const GpReg& src)
655   { _emitInstruction(kX86InstAdd, &dst, &src); }
656   //! @brief Add.
addX86Assembler657   inline void add(const GpReg& dst, const Mem& src)
658   { _emitInstruction(kX86InstAdd, &dst, &src); }
659   //! @brief Add.
addX86Assembler660   inline void add(const GpReg& dst, const Imm& src)
661   { _emitInstruction(kX86InstAdd, &dst, &src); }
662   //! @brief Add.
addX86Assembler663   inline void add(const Mem& dst, const GpReg& src)
664   { _emitInstruction(kX86InstAdd, &dst, &src); }
665   //! @brief Add.
addX86Assembler666   inline void add(const Mem& dst, const Imm& src)
667   { _emitInstruction(kX86InstAdd, &dst, &src); }
668 
669   //! @brief Logical And.
and_X86Assembler670   inline void and_(const GpReg& dst, const GpReg& src)
671   { _emitInstruction(kX86InstAnd, &dst, &src); }
672   //! @brief Logical And.
and_X86Assembler673   inline void and_(const GpReg& dst, const Mem& src)
674   { _emitInstruction(kX86InstAnd, &dst, &src); }
675   //! @brief Logical And.
and_X86Assembler676   inline void and_(const GpReg& dst, const Imm& src)
677   { _emitInstruction(kX86InstAnd, &dst, &src); }
678   //! @brief Logical And.
and_X86Assembler679   inline void and_(const Mem& dst, const GpReg& src)
680   { _emitInstruction(kX86InstAnd, &dst, &src); }
681   //! @brief Logical And.
and_X86Assembler682   inline void and_(const Mem& dst, const Imm& src)
683   { _emitInstruction(kX86InstAnd, &dst, &src); }
684 
685   //! @brief Bit Scan Forward.
bsfX86Assembler686   inline void bsf(const GpReg& dst, const GpReg& src)
687   {
688     ASMJIT_ASSERT(!dst.isGpb());
689     _emitInstruction(kX86InstBsf, &dst, &src);
690   }
691   //! @brief Bit Scan Forward.
bsfX86Assembler692   inline void bsf(const GpReg& dst, const Mem& src)
693   {
694     ASMJIT_ASSERT(!dst.isGpb());
695     _emitInstruction(kX86InstBsf, &dst, &src);
696   }
697 
698   //! @brief Bit Scan Reverse.
bsrX86Assembler699   inline void bsr(const GpReg& dst, const GpReg& src)
700   {
701     ASMJIT_ASSERT(!dst.isGpb());
702     _emitInstruction(kX86InstBsr, &dst, &src);
703   }
704   //! @brief Bit Scan Reverse.
bsrX86Assembler705   inline void bsr(const GpReg& dst, const Mem& src)
706   {
707     ASMJIT_ASSERT(!dst.isGpb());
708     _emitInstruction(kX86InstBsr, &dst, &src);
709   }
710 
711   //! @brief Byte swap (32-bit or 64-bit registers only) (i486).
bswapX86Assembler712   inline void bswap(const GpReg& dst)
713   {
714     ASMJIT_ASSERT(dst.getRegType() == kX86RegTypeGpd || dst.getRegType() == kX86RegTypeGpq);
715     _emitInstruction(kX86InstBSwap, &dst);
716   }
717 
718   //! @brief Bit test.
btX86Assembler719   inline void bt(const GpReg& dst, const GpReg& src)
720   { _emitInstruction(kX86InstBt, &dst, &src); }
721   //! @brief Bit test.
btX86Assembler722   inline void bt(const GpReg& dst, const Imm& src)
723   { _emitInstruction(kX86InstBt, &dst, &src); }
724   //! @brief Bit test.
btX86Assembler725   inline void bt(const Mem& dst, const GpReg& src)
726   { _emitInstruction(kX86InstBt, &dst, &src); }
727   //! @brief Bit test.
btX86Assembler728   inline void bt(const Mem& dst, const Imm& src)
729   { _emitInstruction(kX86InstBt, &dst, &src); }
730 
731   //! @brief Bit test and complement.
btcX86Assembler732   inline void btc(const GpReg& dst, const GpReg& src)
733   { _emitInstruction(kX86InstBtc, &dst, &src); }
734   //! @brief Bit test and complement.
btcX86Assembler735   inline void btc(const GpReg& dst, const Imm& src)
736   { _emitInstruction(kX86InstBtc, &dst, &src); }
737   //! @brief Bit test and complement.
btcX86Assembler738   inline void btc(const Mem& dst, const GpReg& src)
739   { _emitInstruction(kX86InstBtc, &dst, &src); }
740   //! @brief Bit test and complement.
btcX86Assembler741   inline void btc(const Mem& dst, const Imm& src)
742   { _emitInstruction(kX86InstBtc, &dst, &src); }
743 
744   //! @brief Bit test and reset.
btrX86Assembler745   inline void btr(const GpReg& dst, const GpReg& src)
746   { _emitInstruction(kX86InstBtr, &dst, &src); }
747   //! @brief Bit test and reset.
btrX86Assembler748   inline void btr(const GpReg& dst, const Imm& src)
749   { _emitInstruction(kX86InstBtr, &dst, &src); }
750   //! @brief Bit test and reset.
btrX86Assembler751   inline void btr(const Mem& dst, const GpReg& src)
752   { _emitInstruction(kX86InstBtr, &dst, &src); }
753   //! @brief Bit test and reset.
btrX86Assembler754   inline void btr(const Mem& dst, const Imm& src)
755   { _emitInstruction(kX86InstBtr, &dst, &src); }
756 
757   //! @brief Bit test and set.
btsX86Assembler758   inline void bts(const GpReg& dst, const GpReg& src)
759   { _emitInstruction(kX86InstBts, &dst, &src); }
760   //! @brief Bit test and set.
btsX86Assembler761   inline void bts(const GpReg& dst, const Imm& src)
762   { _emitInstruction(kX86InstBts, &dst, &src); }
763   //! @brief Bit test and set.
btsX86Assembler764   inline void bts(const Mem& dst, const GpReg& src)
765   { _emitInstruction(kX86InstBts, &dst, &src); }
766   //! @brief Bit test and set.
btsX86Assembler767   inline void bts(const Mem& dst, const Imm& src)
768   { _emitInstruction(kX86InstBts, &dst, &src); }
769 
770   //! @brief Call Procedure.
callX86Assembler771   inline void call(const GpReg& dst)
772   {
773     ASMJIT_ASSERT(dst.isRegType(kX86RegTypeGpz));
774     _emitInstruction(kX86InstCall, &dst);
775   }
776   //! @brief Call Procedure.
callX86Assembler777   inline void call(const Mem& dst)
778   { _emitInstruction(kX86InstCall, &dst); }
779   //! @brief Call Procedure.
callX86Assembler780   inline void call(const Imm& dst)
781   { _emitInstruction(kX86InstCall, &dst); }
782   //! @brief Call Procedure.
783   //! @overload
callX86Assembler784   inline void call(void* dst)
785   {
786     Imm imm((sysint_t)dst);
787     _emitInstruction(kX86InstCall, &imm);
788   }
789 
790   //! @brief Call Procedure.
callX86Assembler791   inline void call(const Label& label)
792   { _emitInstruction(kX86InstCall, &label); }
793 
794   //! @brief Convert Byte to Word (Sign Extend).
795   //!
796   //! AX <- Sign Extend AL
cbwX86Assembler797   inline void cbw()
798   { _emitInstruction(kX86InstCbw); }
799 
800   //! @brief Convert Word to DWord (Sign Extend).
801   //!
802   //! DX:AX <- Sign Extend AX
cwdX86Assembler803   inline void cwd()
804   { _emitInstruction(kX86InstCwd); }
805 
806   //! @brief Convert Word to DWord (Sign Extend).
807   //!
808   //! EAX <- Sign Extend AX
cwdeX86Assembler809   inline void cwde()
810   { _emitInstruction(kX86InstCwde); }
811 
812   //! @brief Convert DWord to QWord (Sign Extend).
813   //!
814   //! EDX:EAX <- Sign Extend EAX
cdqX86Assembler815   inline void cdq()
816   { _emitInstruction(kX86InstCdq); }
817 
818 #if defined(ASMJIT_X64)
819   //! @brief Convert DWord to QWord (Sign Extend).
820   //!
821   //! RAX <- Sign Extend EAX
cdqeX86Assembler822   inline void cdqe()
823   { _emitInstruction(kX86InstCdqe); }
824 #endif // ASMJIT_X64
825 
826   //! @brief Clear Carry flag
827   //!
828   //! This instruction clears the CF flag in the EFLAGS register.
clcX86Assembler829   inline void clc()
830   { _emitInstruction(kX86InstClc); }
831 
832   //! @brief Clear Direction flag
833   //!
834   //! This instruction clears the DF flag in the EFLAGS register.
cldX86Assembler835   inline void cld()
836   { _emitInstruction(kX86InstCld); }
837 
838   //! @brief Complement Carry Flag.
839   //!
840   //! This instruction complements the CF flag in the EFLAGS register.
841   //! (CF = NOT CF)
cmcX86Assembler842   inline void cmc()
843   { _emitInstruction(kX86InstCmc); }
844 
845   //! @brief Conditional Move.
cmovX86Assembler846   inline void cmov(kX86Cond cc, const GpReg& dst, const GpReg& src)
847   { _emitInstruction(X86Util::getCMovccInstFromCond(cc), &dst, &src); }
848 
849   //! @brief Conditional Move.
cmovX86Assembler850   inline void cmov(kX86Cond cc, const GpReg& dst, const Mem& src)
851   { _emitInstruction(X86Util::getCMovccInstFromCond(cc), &dst, &src); }
852 
853   //! @brief Conditional Move.
cmovaX86Assembler854   inline void cmova  (const GpReg& dst, const GpReg& src) { _emitInstruction(kX86InstCMovA  , &dst, &src); }
855   //! @brief Conditional Move.
cmovaX86Assembler856   inline void cmova  (const GpReg& dst, const Mem& src)   { _emitInstruction(kX86InstCMovA  , &dst, &src); }
857   //! @brief Conditional Move.
cmovaeX86Assembler858   inline void cmovae (const GpReg& dst, const GpReg& src) { _emitInstruction(kX86InstCMovAE , &dst, &src); }
859   //! @brief Conditional Move.
cmovaeX86Assembler860   inline void cmovae (const GpReg& dst, const Mem& src)   { _emitInstruction(kX86InstCMovAE , &dst, &src); }
861   //! @brief Conditional Move.
cmovbX86Assembler862   inline void cmovb  (const GpReg& dst, const GpReg& src) { _emitInstruction(kX86InstCMovB  , &dst, &src); }
863   //! @brief Conditional Move.
cmovbX86Assembler864   inline void cmovb  (const GpReg& dst, const Mem& src)   { _emitInstruction(kX86InstCMovB  , &dst, &src); }
865   //! @brief Conditional Move.
cmovbeX86Assembler866   inline void cmovbe (const GpReg& dst, const GpReg& src) { _emitInstruction(kX86InstCMovBE , &dst, &src); }
867   //! @brief Conditional Move.
cmovbeX86Assembler868   inline void cmovbe (const GpReg& dst, const Mem& src)   { _emitInstruction(kX86InstCMovBE , &dst, &src); }
869   //! @brief Conditional Move.
cmovcX86Assembler870   inline void cmovc  (const GpReg& dst, const GpReg& src) { _emitInstruction(kX86InstCMovC  , &dst, &src); }
871   //! @brief Conditional Move.
cmovcX86Assembler872   inline void cmovc  (const GpReg& dst, const Mem& src)   { _emitInstruction(kX86InstCMovC  , &dst, &src); }
873   //! @brief Conditional Move.
cmoveX86Assembler874   inline void cmove  (const GpReg& dst, const GpReg& src) { _emitInstruction(kX86InstCMovE  , &dst, &src); }
875   //! @brief Conditional Move.
cmoveX86Assembler876   inline void cmove  (const GpReg& dst, const Mem& src)   { _emitInstruction(kX86InstCMovE  , &dst, &src); }
877   //! @brief Conditional Move.
cmovgX86Assembler878   inline void cmovg  (const GpReg& dst, const GpReg& src) { _emitInstruction(kX86InstCMovG  , &dst, &src); }
879   //! @brief Conditional Move.
cmovgX86Assembler880   inline void cmovg  (const GpReg& dst, const Mem& src)   { _emitInstruction(kX86InstCMovG  , &dst, &src); }
881   //! @brief Conditional Move.
cmovgeX86Assembler882   inline void cmovge (const GpReg& dst, const GpReg& src) { _emitInstruction(kX86InstCMovGE , &dst, &src); }
883   //! @brief Conditional Move.
cmovgeX86Assembler884   inline void cmovge (const GpReg& dst, const Mem& src)   { _emitInstruction(kX86InstCMovGE , &dst, &src); }
885   //! @brief Conditional Move.
cmovlX86Assembler886   inline void cmovl  (const GpReg& dst, const GpReg& src) { _emitInstruction(kX86InstCMovL  , &dst, &src); }
887   //! @brief Conditional Move.
cmovlX86Assembler888   inline void cmovl  (const GpReg& dst, const Mem& src)   { _emitInstruction(kX86InstCMovL  , &dst, &src); }
889   //! @brief Conditional Move.
cmovleX86Assembler890   inline void cmovle (const GpReg& dst, const GpReg& src) { _emitInstruction(kX86InstCMovLE , &dst, &src); }
891   //! @brief Conditional Move.
cmovleX86Assembler892   inline void cmovle (const GpReg& dst, const Mem& src)   { _emitInstruction(kX86InstCMovLE , &dst, &src); }
893   //! @brief Conditional Move.
cmovnaX86Assembler894   inline void cmovna (const GpReg& dst, const GpReg& src) { _emitInstruction(kX86InstCMovNA , &dst, &src); }
895   //! @brief Conditional Move.
cmovnaX86Assembler896   inline void cmovna (const GpReg& dst, const Mem& src)   { _emitInstruction(kX86InstCMovNA , &dst, &src); }
897   //! @brief Conditional Move.
cmovnaeX86Assembler898   inline void cmovnae(const GpReg& dst, const GpReg& src) { _emitInstruction(kX86InstCMovNAE, &dst, &src); }
899   //! @brief Conditional Move.
cmovnaeX86Assembler900   inline void cmovnae(const GpReg& dst, const Mem& src)   { _emitInstruction(kX86InstCMovNAE, &dst, &src); }
901   //! @brief Conditional Move.
cmovnbX86Assembler902   inline void cmovnb (const GpReg& dst, const GpReg& src) { _emitInstruction(kX86InstCMovNB , &dst, &src); }
903   //! @brief Conditional Move.
cmovnbX86Assembler904   inline void cmovnb (const GpReg& dst, const Mem& src)   { _emitInstruction(kX86InstCMovNB , &dst, &src); }
905   //! @brief Conditional Move.
cmovnbeX86Assembler906   inline void cmovnbe(const GpReg& dst, const GpReg& src) { _emitInstruction(kX86InstCMovNBE, &dst, &src); }
907   //! @brief Conditional Move.
cmovnbeX86Assembler908   inline void cmovnbe(const GpReg& dst, const Mem& src)   { _emitInstruction(kX86InstCMovNBE, &dst, &src); }
909   //! @brief Conditional Move.
cmovncX86Assembler910   inline void cmovnc (const GpReg& dst, const GpReg& src) { _emitInstruction(kX86InstCMovNC , &dst, &src); }
911   //! @brief Conditional Move.
cmovncX86Assembler912   inline void cmovnc (const GpReg& dst, const Mem& src)   { _emitInstruction(kX86InstCMovNC , &dst, &src); }
913   //! @brief Conditional Move.
cmovneX86Assembler914   inline void cmovne (const GpReg& dst, const GpReg& src) { _emitInstruction(kX86InstCMovNE , &dst, &src); }
915   //! @brief Conditional Move.
cmovneX86Assembler916   inline void cmovne (const GpReg& dst, const Mem& src)   { _emitInstruction(kX86InstCMovNE , &dst, &src); }
917   //! @brief Conditional Move.
cmovngX86Assembler918   inline void cmovng (const GpReg& dst, const GpReg& src) { _emitInstruction(kX86InstCMovNG , &dst, &src); }
919   //! @brief Conditional Move.
cmovngX86Assembler920   inline void cmovng (const GpReg& dst, const Mem& src)   { _emitInstruction(kX86InstCMovNG , &dst, &src); }
921   //! @brief Conditional Move.
cmovngeX86Assembler922   inline void cmovnge(const GpReg& dst, const GpReg& src) { _emitInstruction(kX86InstCMovNGE, &dst, &src); }
923   //! @brief Conditional Move.
cmovngeX86Assembler924   inline void cmovnge(const GpReg& dst, const Mem& src)   { _emitInstruction(kX86InstCMovNGE, &dst, &src); }
925   //! @brief Conditional Move.
cmovnlX86Assembler926   inline void cmovnl (const GpReg& dst, const GpReg& src) { _emitInstruction(kX86InstCMovNL , &dst, &src); }
927   //! @brief Conditional Move.
cmovnlX86Assembler928   inline void cmovnl (const GpReg& dst, const Mem& src)   { _emitInstruction(kX86InstCMovNL , &dst, &src); }
929   //! @brief Conditional Move.
cmovnleX86Assembler930   inline void cmovnle(const GpReg& dst, const GpReg& src) { _emitInstruction(kX86InstCMovNLE, &dst, &src); }
931   //! @brief Conditional Move.
cmovnleX86Assembler932   inline void cmovnle(const GpReg& dst, const Mem& src)   { _emitInstruction(kX86InstCMovNLE, &dst, &src); }
933   //! @brief Conditional Move.
cmovnoX86Assembler934   inline void cmovno (const GpReg& dst, const GpReg& src) { _emitInstruction(kX86InstCMovNO , &dst, &src); }
935   //! @brief Conditional Move.
cmovnoX86Assembler936   inline void cmovno (const GpReg& dst, const Mem& src)   { _emitInstruction(kX86InstCMovNO , &dst, &src); }
937   //! @brief Conditional Move.
cmovnpX86Assembler938   inline void cmovnp (const GpReg& dst, const GpReg& src) { _emitInstruction(kX86InstCMovNP , &dst, &src); }
939   //! @brief Conditional Move.
cmovnpX86Assembler940   inline void cmovnp (const GpReg& dst, const Mem& src)   { _emitInstruction(kX86InstCMovNP , &dst, &src); }
941   //! @brief Conditional Move.
cmovnsX86Assembler942   inline void cmovns (const GpReg& dst, const GpReg& src) { _emitInstruction(kX86InstCMovNS , &dst, &src); }
943   //! @brief Conditional Move.
cmovnsX86Assembler944   inline void cmovns (const GpReg& dst, const Mem& src)   { _emitInstruction(kX86InstCMovNS , &dst, &src); }
945   //! @brief Conditional Move.
cmovnzX86Assembler946   inline void cmovnz (const GpReg& dst, const GpReg& src) { _emitInstruction(kX86InstCMovNZ , &dst, &src); }
947   //! @brief Conditional Move.
cmovnzX86Assembler948   inline void cmovnz (const GpReg& dst, const Mem& src)   { _emitInstruction(kX86InstCMovNZ , &dst, &src); }
949   //! @brief Conditional Move.
cmovoX86Assembler950   inline void cmovo  (const GpReg& dst, const GpReg& src) { _emitInstruction(kX86InstCMovO  , &dst, &src); }
951   //! @brief Conditional Move.
cmovoX86Assembler952   inline void cmovo  (const GpReg& dst, const Mem& src)   { _emitInstruction(kX86InstCMovO  , &dst, &src); }
953   //! @brief Conditional Move.
cmovpX86Assembler954   inline void cmovp  (const GpReg& dst, const GpReg& src) { _emitInstruction(kX86InstCMovP  , &dst, &src); }
955   //! @brief Conditional Move.
cmovpX86Assembler956   inline void cmovp  (const GpReg& dst, const Mem& src)   { _emitInstruction(kX86InstCMovP  , &dst, &src); }
957   //! @brief Conditional Move.
cmovpeX86Assembler958   inline void cmovpe (const GpReg& dst, const GpReg& src) { _emitInstruction(kX86InstCMovPE , &dst, &src); }
959   //! @brief Conditional Move.
cmovpeX86Assembler960   inline void cmovpe (const GpReg& dst, const Mem& src)   { _emitInstruction(kX86InstCMovPE , &dst, &src); }
961   //! @brief Conditional Move.
cmovpoX86Assembler962   inline void cmovpo (const GpReg& dst, const GpReg& src) { _emitInstruction(kX86InstCMovPO , &dst, &src); }
963   //! @brief Conditional Move.
cmovpoX86Assembler964   inline void cmovpo (const GpReg& dst, const Mem& src)   { _emitInstruction(kX86InstCMovPO , &dst, &src); }
965   //! @brief Conditional Move.
cmovsX86Assembler966   inline void cmovs  (const GpReg& dst, const GpReg& src) { _emitInstruction(kX86InstCMovS  , &dst, &src); }
967   //! @brief Conditional Move.
cmovsX86Assembler968   inline void cmovs  (const GpReg& dst, const Mem& src)   { _emitInstruction(kX86InstCMovS  , &dst, &src); }
969   //! @brief Conditional Move.
cmovzX86Assembler970   inline void cmovz  (const GpReg& dst, const GpReg& src) { _emitInstruction(kX86InstCMovZ  , &dst, &src); }
971   //! @brief Conditional Move.
cmovzX86Assembler972   inline void cmovz  (const GpReg& dst, const Mem& src)   { _emitInstruction(kX86InstCMovZ  , &dst, &src); }
973 
974   //! @brief Compare Two Operands.
cmpX86Assembler975   inline void cmp(const GpReg& dst, const GpReg& src)
976   { _emitInstruction(kX86InstCmp, &dst, &src); }
977   //! @brief Compare Two Operands.
cmpX86Assembler978   inline void cmp(const GpReg& dst, const Mem& src)
979   { _emitInstruction(kX86InstCmp, &dst, &src); }
980   //! @brief Compare Two Operands.
cmpX86Assembler981   inline void cmp(const GpReg& dst, const Imm& src)
982   { _emitInstruction(kX86InstCmp, &dst, &src); }
983   //! @brief Compare Two Operands.
cmpX86Assembler984   inline void cmp(const Mem& dst, const GpReg& src)
985   { _emitInstruction(kX86InstCmp, &dst, &src); }
986   //! @brief Compare Two Operands.
cmpX86Assembler987   inline void cmp(const Mem& dst, const Imm& src)
988   { _emitInstruction(kX86InstCmp, &dst, &src); }
989 
990   //! @brief Compare and Exchange (i486).
cmpxchgX86Assembler991   inline void cmpxchg(const GpReg& dst, const GpReg& src)
992   { _emitInstruction(kX86InstCmpXCHG, &dst, &src); }
993   //! @brief Compare and Exchange (i486).
cmpxchgX86Assembler994   inline void cmpxchg(const Mem& dst, const GpReg& src)
995   { _emitInstruction(kX86InstCmpXCHG, &dst, &src); }
996 
997   //! @brief Compares the 64-bit value in EDX:EAX with the memory operand (Pentium).
998   //!
999   //! If the values are equal, then this instruction stores the 64-bit value
1000   //! in ECX:EBX into the memory operand and sets the zero flag. Otherwise,
1001   //! this instruction copies the 64-bit memory operand into the EDX:EAX
1002   //! registers and clears the zero flag.
cmpxchg8bX86Assembler1003   inline void cmpxchg8b(const Mem& dst)
1004   { _emitInstruction(kX86InstCmpXCHG8B, &dst); }
1005 
1006 #if defined(ASMJIT_X64)
1007   //! @brief Compares the 128-bit value in RDX:RAX with the memory operand (X64).
1008   //!
1009   //! If the values are equal, then this instruction stores the 128-bit value
1010   //! in RCX:RBX into the memory operand and sets the zero flag. Otherwise,
1011   //! this instruction copies the 128-bit memory operand into the RDX:RAX
1012   //! registers and clears the zero flag.
cmpxchg16bX86Assembler1013   inline void cmpxchg16b(const Mem& dst)
1014   { _emitInstruction(kX86InstCmpXCHG16B, &dst); }
1015 #endif // ASMJIT_X64
1016 
1017   //! @brief CPU Identification (i486).
cpuidX86Assembler1018   inline void cpuid()
1019   { _emitInstruction(kX86InstCpuId); }
1020 
1021 #if defined(ASMJIT_X64)
1022   //! @brief Convert QWord to DQWord (Sign Extend).
1023   //!
1024   //! RDX:RAX <- Sign Extend RAX
cqoX86Assembler1025   inline void cqo()
1026   { _emitInstruction(kX86InstCqo); }
1027 #endif // ASMJIT_X64
1028 
1029 #if defined(ASMJIT_X86)
1030   //! @brief Decimal adjust AL after addition
1031   //!
1032   //! This instruction adjusts the sum of two packed BCD values to create
1033   //! a packed BCD result.
1034   //!
1035   //! @note This instruction is only available in 32-bit mode.
daaX86Assembler1036   inline void daa()
1037   { _emitInstruction(kX86InstDaa); }
1038 #endif // ASMJIT_X86
1039 
1040 #if defined(ASMJIT_X86)
1041   //! @brief Decimal adjust AL after subtraction
1042   //!
1043   //! This instruction adjusts the result of the subtraction of two packed
1044   //! BCD values to create a packed BCD result.
1045   //!
1046   //! @note This instruction is only available in 32-bit mode.
dasX86Assembler1047   inline void das()
1048   { _emitInstruction(kX86InstDas); }
1049 #endif // ASMJIT_X86
1050 
1051   //! @brief Decrement by 1.
1052   //! @note This instruction can be slower than sub(dst, 1)
decX86Assembler1053   inline void dec(const GpReg& dst)
1054   { _emitInstruction(kX86InstDec, &dst); }
1055   //! @brief Decrement by 1.
1056   //! @note This instruction can be slower than sub(dst, 1)
decX86Assembler1057   inline void dec(const Mem& dst)
1058   { _emitInstruction(kX86InstDec, &dst); }
1059 
1060   //! @brief Unsigned divide.
1061   //!
1062   //! This instruction divides (unsigned) the value in the AL, AX, or EAX
1063   //! register by the source operand and stores the result in the AX,
1064   //! DX:AX, or EDX:EAX registers.
divX86Assembler1065   inline void div(const GpReg& src)
1066   { _emitInstruction(kX86InstDiv, &src); }
1067   //! @brief Unsigned divide.
1068   //! @overload
divX86Assembler1069   inline void div(const Mem& src)
1070   { _emitInstruction(kX86InstDiv, &src); }
1071 
1072   //! @brief Make Stack Frame for Procedure Parameters.
enterX86Assembler1073   inline void enter(const Imm& imm16, const Imm& imm8)
1074   { _emitInstruction(kX86InstEnter, &imm16, &imm8); }
1075 
1076   //! @brief Signed divide.
1077   //!
1078   //! This instruction divides (signed) the value in the AL, AX, or EAX
1079   //! register by the source operand and stores the result in the AX,
1080   //! DX:AX, or EDX:EAX registers.
idivX86Assembler1081   inline void idiv(const GpReg& src)
1082   { _emitInstruction(kX86InstIDiv, &src); }
1083   //! @brief Signed divide.
1084   //! @overload
idivX86Assembler1085   inline void idiv(const Mem& src)
1086   { _emitInstruction(kX86InstIDiv, &src); }
1087 
1088   //! @brief Signed multiply.
1089   //!
1090   //! Source operand (in a general-purpose register or memory location)
1091   //! is multiplied by the value in the AL, AX, or EAX register (depending
1092   //! on the operand size) and the product is stored in the AX, DX:AX, or
1093   //! EDX:EAX registers, respectively.
imulX86Assembler1094   inline void imul(const GpReg& src)
1095   { _emitInstruction(kX86InstIMul, &src); }
1096   //! @overload
imulX86Assembler1097   inline void imul(const Mem& src)
1098   { _emitInstruction(kX86InstIMul, &src); }
1099 
1100   //! @brief Signed multiply.
1101   //!
1102   //! Destination operand (the first operand) is multiplied by the source
1103   //! operand (second operand). The destination operand is a general-purpose
1104   //! register and the source operand is an immediate value, a general-purpose
1105   //! register, or a memory location. The product is then stored in the
1106   //! destination operand location.
imulX86Assembler1107   inline void imul(const GpReg& dst, const GpReg& src)
1108   { _emitInstruction(kX86InstIMul, &dst, &src); }
1109   //! @brief Signed multiply.
1110   //! @overload
imulX86Assembler1111   inline void imul(const GpReg& dst, const Mem& src)
1112   { _emitInstruction(kX86InstIMul, &dst, &src); }
1113   //! @brief Signed multiply.
1114   //! @overload
imulX86Assembler1115   inline void imul(const GpReg& dst, const Imm& src)
1116   { _emitInstruction(kX86InstIMul, &dst, &src); }
1117 
1118   //! @brief Signed multiply.
1119   //!
1120   //! source operand (which can be a general-purpose register or a memory
1121   //! location) is multiplied by the second source operand (an immediate
1122   //! value). The product is then stored in the destination operand
1123   //! (a general-purpose register).
imulX86Assembler1124   inline void imul(const GpReg& dst, const GpReg& src, const Imm& imm)
1125   { _emitInstruction(kX86InstIMul, &dst, &src, &imm); }
1126   //! @overload
imulX86Assembler1127   inline void imul(const GpReg& dst, const Mem& src, const Imm& imm)
1128   { _emitInstruction(kX86InstIMul, &dst, &src, &imm); }
1129 
1130   //! @brief Increment by 1.
1131   //! @note This instruction can be slower than add(dst, 1)
incX86Assembler1132   inline void inc(const GpReg& dst)
1133   { _emitInstruction(kX86InstInc, &dst); }
1134   //! @brief Increment by 1.
1135   //! @note This instruction can be slower than add(dst, 1)
incX86Assembler1136   inline void inc(const Mem& dst)
1137   { _emitInstruction(kX86InstInc, &dst); }
1138 
1139   //! @brief Interrupt 3 - trap to debugger.
int3X86Assembler1140   inline void int3()
1141   { _emitInstruction(kX86InstInt3); }
1142 
1143   //! @brief Jump to label @a label if condition @a cc is met.
1144   //!
1145   //! This instruction checks the state of one or more of the status flags in
1146   //! the EFLAGS register (CF, OF, PF, SF, and ZF) and, if the flags are in the
1147   //! specified state (condition), performs a jump to the target instruction
1148   //! specified by the destination operand. A condition code (cc) is associated
1149   //! with each instruction to indicate the condition being tested for. If the
1150   //! condition is not satisfied, the jump is not performed and execution
1151   //! continues with the instruction following the Jcc instruction.
1152   inline void j(kX86Cond cc, const Label& label, uint32_t hint = kCondHintNone)
1153   {
1154     _emitJcc(X86Util::getJccInstFromCond(cc), &label, hint);
1155   }
1156 
1157   //! @brief Jump to label @a label if condition is met.
1158   inline void ja  (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJA  , &label, hint); }
1159   //! @brief Jump to label @a label if condition is met.
1160   inline void jae (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJAE , &label, hint); }
1161   //! @brief Jump to label @a label if condition is met.
1162   inline void jb  (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJB  , &label, hint); }
1163   //! @brief Jump to label @a label if condition is met.
1164   inline void jbe (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJBE , &label, hint); }
1165   //! @brief Jump to label @a label if condition is met.
1166   inline void jc  (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJC  , &label, hint); }
1167   //! @brief Jump to label @a label if condition is met.
1168   inline void je  (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJE  , &label, hint); }
1169   //! @brief Jump to label @a label if condition is met.
1170   inline void jg  (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJG  , &label, hint); }
1171   //! @brief Jump to label @a label if condition is met.
1172   inline void jge (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJGE , &label, hint); }
1173   //! @brief Jump to label @a label if condition is met.
1174   inline void jl  (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJL  , &label, hint); }
1175   //! @brief Jump to label @a label if condition is met.
1176   inline void jle (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJLE , &label, hint); }
1177   //! @brief Jump to label @a label if condition is met.
1178   inline void jna (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJNA , &label, hint); }
1179   //! @brief Jump to label @a label if condition is met.
1180   inline void jnae(const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJNAE, &label, hint); }
1181   //! @brief Jump to label @a label if condition is met.
1182   inline void jnb (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJNB , &label, hint); }
1183   //! @brief Jump to label @a label if condition is met.
1184   inline void jnbe(const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJNBE, &label, hint); }
1185   //! @brief Jump to label @a label if condition is met.
1186   inline void jnc (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJNC , &label, hint); }
1187   //! @brief Jump to label @a label if condition is met.
1188   inline void jne (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJNE , &label, hint); }
1189   //! @brief Jump to label @a label if condition is met.
1190   inline void jng (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJNG , &label, hint); }
1191   //! @brief Jump to label @a label if condition is met.
1192   inline void jnge(const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJNGE, &label, hint); }
1193   //! @brief Jump to label @a label if condition is met.
1194   inline void jnl (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJNL , &label, hint); }
1195   //! @brief Jump to label @a label if condition is met.
1196   inline void jnle(const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJNLE, &label, hint); }
1197   //! @brief Jump to label @a label if condition is met.
1198   inline void jno (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJNO , &label, hint); }
1199   //! @brief Jump to label @a label if condition is met.
1200   inline void jnp (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJNP , &label, hint); }
1201   //! @brief Jump to label @a label if condition is met.
1202   inline void jns (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJNS , &label, hint); }
1203   //! @brief Jump to label @a label if condition is met.
1204   inline void jnz (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJNZ , &label, hint); }
1205   //! @brief Jump to label @a label if condition is met.
1206   inline void jo  (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJO  , &label, hint); }
1207   //! @brief Jump to label @a label if condition is met.
1208   inline void jp  (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJP  , &label, hint); }
1209   //! @brief Jump to label @a label if condition is met.
1210   inline void jpe (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJPE , &label, hint); }
1211   //! @brief Jump to label @a label if condition is met.
1212   inline void jpo (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJPO , &label, hint); }
1213   //! @brief Jump to label @a label if condition is met.
1214   inline void js  (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJS  , &label, hint); }
1215   //! @brief Jump to label @a label if condition is met.
1216   inline void jz  (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJZ  , &label, hint); }
1217 
1218   //! @brief Short jump to label @a label if condition @a cc is met.
1219   //! @sa j()
1220   inline void short_j(kX86Cond cc, const Label& label, uint32_t hint = kCondHintNone)
1221   {
1222     _emitOptions |= kX86EmitOptionShortJump;
1223     j(cc, label, hint);
1224   }
1225 
1226   //! @brief Short jump to label @a label if condition is met.
1227   inline void short_ja  (const Label& label, uint32_t hint = kCondHintNone) { _emitShortJcc(kX86InstJA  , &label, hint); }
1228   //! @brief Short jump to label @a label if condition is met.
1229   inline void short_jae (const Label& label, uint32_t hint = kCondHintNone) { _emitShortJcc(kX86InstJAE , &label, hint); }
1230   //! @brief Short jump to label @a label if condition is met.
1231   inline void short_jb  (const Label& label, uint32_t hint = kCondHintNone) { _emitShortJcc(kX86InstJB  , &label, hint); }
1232   //! @brief Short jump to label @a label if condition is met.
1233   inline void short_jbe (const Label& label, uint32_t hint = kCondHintNone) { _emitShortJcc(kX86InstJBE , &label, hint); }
1234   //! @brief Short jump to label @a label if condition is met.
1235   inline void short_jc  (const Label& label, uint32_t hint = kCondHintNone) { _emitShortJcc(kX86InstJC  , &label, hint); }
1236   //! @brief Short jump to label @a label if condition is met.
1237   inline void short_je  (const Label& label, uint32_t hint = kCondHintNone) { _emitShortJcc(kX86InstJE  , &label, hint); }
1238   //! @brief Short jump to label @a label if condition is met.
1239   inline void short_jg  (const Label& label, uint32_t hint = kCondHintNone) { _emitShortJcc(kX86InstJG  , &label, hint); }
1240   //! @brief Short jump to label @a label if condition is met.
1241   inline void short_jge (const Label& label, uint32_t hint = kCondHintNone) { _emitShortJcc(kX86InstJGE , &label, hint); }
1242   //! @brief Short jump to label @a label if condition is met.
1243   inline void short_jl  (const Label& label, uint32_t hint = kCondHintNone) { _emitShortJcc(kX86InstJL  , &label, hint); }
1244   //! @brief Short jump to label @a label if condition is met.
1245   inline void short_jle (const Label& label, uint32_t hint = kCondHintNone) { _emitShortJcc(kX86InstJLE , &label, hint); }
1246   //! @brief Short jump to label @a label if condition is met.
1247   inline void short_jna (const Label& label, uint32_t hint = kCondHintNone) { _emitShortJcc(kX86InstJNA , &label, hint); }
1248   //! @brief Short jump to label @a label if condition is met.
1249   inline void short_jnae(const Label& label, uint32_t hint = kCondHintNone) { _emitShortJcc(kX86InstJNAE, &label, hint); }
1250   //! @brief Short jump to label @a label if condition is met.
1251   inline void short_jnb (const Label& label, uint32_t hint = kCondHintNone) { _emitShortJcc(kX86InstJNB , &label, hint); }
1252   //! @brief Short jump to label @a label if condition is met.
1253   inline void short_jnbe(const Label& label, uint32_t hint = kCondHintNone) { _emitShortJcc(kX86InstJNBE, &label, hint); }
1254   //! @brief Short jump to label @a label if condition is met.
1255   inline void short_jnc (const Label& label, uint32_t hint = kCondHintNone) { _emitShortJcc(kX86InstJNC , &label, hint); }
1256   //! @brief Short jump to label @a label if condition is met.
1257   inline void short_jne (const Label& label, uint32_t hint = kCondHintNone) { _emitShortJcc(kX86InstJNE , &label, hint); }
1258   //! @brief Short jump to label @a label if condition is met.
1259   inline void short_jng (const Label& label, uint32_t hint = kCondHintNone) { _emitShortJcc(kX86InstJNG , &label, hint); }
1260   //! @brief Short jump to label @a label if condition is met.
1261   inline void short_jnge(const Label& label, uint32_t hint = kCondHintNone) { _emitShortJcc(kX86InstJNGE, &label, hint); }
1262   //! @brief Short jump to label @a label if condition is met.
1263   inline void short_jnl (const Label& label, uint32_t hint = kCondHintNone) { _emitShortJcc(kX86InstJNL , &label, hint); }
1264   //! @brief Short jump to label @a label if condition is met.
1265   inline void short_jnle(const Label& label, uint32_t hint = kCondHintNone) { _emitShortJcc(kX86InstJNLE, &label, hint); }
1266   //! @brief Short jump to label @a label if condition is met.
1267   inline void short_jno (const Label& label, uint32_t hint = kCondHintNone) { _emitShortJcc(kX86InstJNO , &label, hint); }
1268   //! @brief Short jump to label @a label if condition is met.
1269   inline void short_jnp (const Label& label, uint32_t hint = kCondHintNone) { _emitShortJcc(kX86InstJNP , &label, hint); }
1270   //! @brief Short jump to label @a label if condition is met.
1271   inline void short_jns (const Label& label, uint32_t hint = kCondHintNone) { _emitShortJcc(kX86InstJNS , &label, hint); }
1272   //! @brief Short jump to label @a label if condition is met.
1273   inline void short_jnz (const Label& label, uint32_t hint = kCondHintNone) { _emitShortJcc(kX86InstJNZ , &label, hint); }
1274   //! @brief Short jump to label @a label if condition is met.
1275   inline void short_jo  (const Label& label, uint32_t hint = kCondHintNone) { _emitShortJcc(kX86InstJO  , &label, hint); }
1276   //! @brief Short jump to label @a label if condition is met.
1277   inline void short_jp  (const Label& label, uint32_t hint = kCondHintNone) { _emitShortJcc(kX86InstJP  , &label, hint); }
1278   //! @brief Short jump to label @a label if condition is met.
1279   inline void short_jpe (const Label& label, uint32_t hint = kCondHintNone) { _emitShortJcc(kX86InstJPE , &label, hint); }
1280   //! @brief Short jump to label @a label if condition is met.
1281   inline void short_jpo (const Label& label, uint32_t hint = kCondHintNone) { _emitShortJcc(kX86InstJPO , &label, hint); }
1282   //! @brief Short jump to label @a label if condition is met.
1283   inline void short_js  (const Label& label, uint32_t hint = kCondHintNone) { _emitShortJcc(kX86InstJS  , &label, hint); }
1284   //! @brief Short jump to label @a label if condition is met.
1285   inline void short_jz  (const Label& label, uint32_t hint = kCondHintNone) { _emitShortJcc(kX86InstJZ  , &label, hint); }
1286 
1287   //! @brief Jump.
1288   //! @overload
jmpX86Assembler1289   inline void jmp(const GpReg& dst)
1290   { _emitInstruction(kX86InstJmp, &dst); }
1291   //! @brief Jump.
1292   //! @overload
jmpX86Assembler1293   inline void jmp(const Mem& dst)
1294   { _emitInstruction(kX86InstJmp, &dst); }
1295   //! @brief Jump.
1296   //! @overload
jmpX86Assembler1297   inline void jmp(const Imm& dst)
1298   { _emitInstruction(kX86InstJmp, &dst); }
1299 
1300   //! @brief Jump.
1301   //! @overload
jmpX86Assembler1302   inline void jmp(void* dst)
1303   {
1304     Imm imm((sysint_t)dst);
1305     _emitInstruction(kX86InstJmp, &imm);
1306   }
1307 
1308   //! @brief Jump.
1309   //!
1310   //! This instruction transfers program control to a different point
1311   //! in the instruction stream without recording return information.
1312   //! The destination (target) operand specifies the label of the
1313   //! instruction being jumped to.
jmpX86Assembler1314   inline void jmp(const Label& label)
1315   { _emitInstruction(kX86InstJmp, &label); }
1316 
1317   //! @brief Short jump.
1318   //! @sa jmp()
short_jmpX86Assembler1319   inline void short_jmp(const Label& label)
1320   {
1321     _emitOptions |= kX86EmitOptionShortJump;
1322     _emitInstruction(kX86InstJmp, &label);
1323   }
1324 
1325   //! @brief Load Effective Address
1326   //!
1327   //! This instruction computes the effective address of the second
1328   //! operand (the source operand) and stores it in the first operand
1329   //! (destination operand). The source operand is a memory address
1330   //! (offset part) specified with one of the processors addressing modes.
1331   //! The destination operand is a general-purpose register.
leaX86Assembler1332   inline void lea(const GpReg& dst, const Mem& src)
1333   { _emitInstruction(kX86InstLea, &dst, &src); }
1334 
1335   //! @brief High Level Procedure Exit.
leaveX86Assembler1336   inline void leave()
1337   { _emitInstruction(kX86InstLeave); }
1338 
1339   //! @brief Move.
1340   //!
1341   //! This instruction copies the second operand (source operand) to the first
1342   //! operand (destination operand). The source operand can be an immediate
1343   //! value, general-purpose register, segment register, or memory location.
1344   //! The destination register can be a general-purpose register, segment
1345   //! register, or memory location. Both operands must be the same size, which
1346   //! can be a byte, a word, or a DWORD.
1347   //!
1348   //! @note To move MMX or SSE registers to/from GP registers or memory, use
1349   //! corresponding functions: @c movd(), @c movq(), etc. Passing MMX or SSE
1350   //! registers to @c mov() is illegal.
movX86Assembler1351   inline void mov(const GpReg& dst, const GpReg& src)
1352   { _emitInstruction(kX86InstMov, &dst, &src); }
1353   //! @brief Move.
1354   //! @overload
movX86Assembler1355   inline void mov(const GpReg& dst, const Mem& src)
1356   { _emitInstruction(kX86InstMov, &dst, &src); }
1357   //! @brief Move.
1358   //! @overload
movX86Assembler1359   inline void mov(const GpReg& dst, const Imm& src)
1360   { _emitInstruction(kX86InstMov, &dst, &src); }
1361   //! @brief Move.
1362   //! @overload
movX86Assembler1363   inline void mov(const Mem& dst, const GpReg& src)
1364   { _emitInstruction(kX86InstMov, &dst, &src); }
1365   //! @brief Move.
1366   //! @overload
movX86Assembler1367   inline void mov(const Mem& dst, const Imm& src)
1368   { _emitInstruction(kX86InstMov, &dst, &src); }
1369 
1370   //! @brief Move from segment register.
1371   //! @overload.
movX86Assembler1372   inline void mov(const GpReg& dst, const SegmentReg& src)
1373   { _emitInstruction(kX86InstMov, &dst, &src); }
1374 
1375   //! @brief Move from segment register.
1376   //! @overload.
movX86Assembler1377   inline void mov(const Mem& dst, const SegmentReg& src)
1378   { _emitInstruction(kX86InstMov, &dst, &src); }
1379 
1380   //! @brief Move to segment register.
1381   //! @overload.
movX86Assembler1382   inline void mov(const SegmentReg& dst, const GpReg& src)
1383   { _emitInstruction(kX86InstMov, &dst, &src); }
1384 
1385   //! @brief Move to segment register.
1386   //! @overload.
movX86Assembler1387   inline void mov(const SegmentReg& dst, const Mem& src)
1388   { _emitInstruction(kX86InstMov, &dst, &src); }
1389 
1390   //! @brief Move byte, word, dword or qword from absolute address @a src to
1391   //! AL, AX, EAX or RAX register.
mov_ptrX86Assembler1392   inline void mov_ptr(const GpReg& dst, void* src)
1393   {
1394     ASMJIT_ASSERT(dst.getRegIndex() == 0);
1395     Imm imm((sysint_t)src);
1396     _emitInstruction(kX86InstMovPtr, &dst, &imm);
1397   }
1398 
1399   //! @brief Move byte, word, dword or qword from AL, AX, EAX or RAX register
1400   //! to absolute address @a dst.
mov_ptrX86Assembler1401   inline void mov_ptr(void* dst, const GpReg& src)
1402   {
1403     ASMJIT_ASSERT(src.getRegIndex() == 0);
1404     Imm imm((sysint_t)dst);
1405     _emitInstruction(kX86InstMovPtr, &imm, &src);
1406   }
1407 
1408   //! @brief Move with Sign-Extension.
1409   //!
1410   //! This instruction copies the contents of the source operand (register
1411   //! or memory location) to the destination operand (register) and sign
1412   //! extends the value to 16, 32 or 64-bits.
1413   //!
1414   //! @sa movsxd().
movsxX86Assembler1415   void movsx(const GpReg& dst, const GpReg& src)
1416   { _emitInstruction(kX86InstMovSX, &dst, &src); }
1417   //! @brief Move with Sign-Extension.
1418   //! @overload
movsxX86Assembler1419   void movsx(const GpReg& dst, const Mem& src)
1420   { _emitInstruction(kX86InstMovSX, &dst, &src); }
1421 
1422 #if defined(ASMJIT_X64)
1423   //! @brief Move DWord to QWord with sign-extension.
movsxdX86Assembler1424   inline void movsxd(const GpReg& dst, const GpReg& src)
1425   { _emitInstruction(kX86InstMovSXD, &dst, &src); }
1426   //! @brief Move DWord to QWord with sign-extension.
1427   //! @overload
movsxdX86Assembler1428   inline void movsxd(const GpReg& dst, const Mem& src)
1429   { _emitInstruction(kX86InstMovSXD, &dst, &src); }
1430 #endif // ASMJIT_X64
1431 
1432   //! @brief Move with Zero-Extend.
1433   //!
1434   //! This instruction copies the contents of the source operand (register
1435   //! or memory location) to the destination operand (register) and zero
1436   //! extends the value to 16 or 32-bits. The size of the converted value
1437   //! depends on the operand-size attribute.
movzxX86Assembler1438   inline void movzx(const GpReg& dst, const GpReg& src)
1439   { _emitInstruction(kX86InstMovZX, &dst, &src); }
1440   //! @brief Move with Zero-Extend.
1441   //! @brief Overload
movzxX86Assembler1442   inline void movzx(const GpReg& dst, const Mem& src)
1443   { _emitInstruction(kX86InstMovZX, &dst, &src); }
1444 
1445   //! @brief Unsigned multiply.
1446   //!
1447   //! Source operand (in a general-purpose register or memory location)
1448   //! is multiplied by the value in the AL, AX, or EAX register (depending
1449   //! on the operand size) and the product is stored in the AX, DX:AX, or
1450   //! EDX:EAX registers, respectively.
mulX86Assembler1451   inline void mul(const GpReg& src)
1452   { _emitInstruction(kX86InstMul, &src); }
1453   //! @brief Unsigned multiply.
1454   //! @overload
mulX86Assembler1455   inline void mul(const Mem& src)
1456   { _emitInstruction(kX86InstMul, &src); }
1457 
1458   //! @brief Two's Complement Negation.
negX86Assembler1459   inline void neg(const GpReg& dst)
1460   { _emitInstruction(kX86InstNeg, &dst); }
1461   //! @brief Two's Complement Negation.
negX86Assembler1462   inline void neg(const Mem& dst)
1463   { _emitInstruction(kX86InstNeg, &dst); }
1464 
1465   //! @brief No Operation.
1466   //!
1467   //! This instruction performs no operation. This instruction is a one-byte
1468   //! instruction that takes up space in the instruction stream but does not
1469   //! affect the machine context, except the EIP register. The NOP instruction
1470   //! is an alias mnemonic for the XCHG (E)AX, (E)AX instruction.
nopX86Assembler1471   inline void nop()
1472   { _emitInstruction(kX86InstNop); }
1473 
1474   //! @brief One's Complement Negation.
not_X86Assembler1475   inline void not_(const GpReg& dst)
1476   { _emitInstruction(kX86InstNot, &dst); }
1477   //! @brief One's Complement Negation.
not_X86Assembler1478   inline void not_(const Mem& dst)
1479   { _emitInstruction(kX86InstNot, &dst); }
1480 
1481   //! @brief Logical Inclusive OR.
or_X86Assembler1482   inline void or_(const GpReg& dst, const GpReg& src)
1483   { _emitInstruction(kX86InstOr, &dst, &src); }
1484   //! @brief Logical Inclusive OR.
or_X86Assembler1485   inline void or_(const GpReg& dst, const Mem& src)
1486   { _emitInstruction(kX86InstOr, &dst, &src); }
1487   //! @brief Logical Inclusive OR.
or_X86Assembler1488   inline void or_(const GpReg& dst, const Imm& src)
1489   { _emitInstruction(kX86InstOr, &dst, &src); }
1490   //! @brief Logical Inclusive OR.
or_X86Assembler1491   inline void or_(const Mem& dst, const GpReg& src)
1492   { _emitInstruction(kX86InstOr, &dst, &src); }
1493   //! @brief Logical Inclusive OR.
or_X86Assembler1494   inline void or_(const Mem& dst, const Imm& src)
1495   { _emitInstruction(kX86InstOr, &dst, &src); }
1496 
1497   //! @brief Pop a Value from the Stack.
1498   //!
1499   //! This instruction loads the value from the top of the stack to the location
1500   //! specified with the destination operand and then increments the stack pointer.
1501   //! The destination operand can be a general purpose register, memory location,
1502   //! or segment register.
popX86Assembler1503   inline void pop(const GpReg& dst)
1504   {
1505     ASMJIT_ASSERT(dst.isRegType(kX86RegTypeGpw) || dst.isRegType(kX86RegTypeGpz));
1506     _emitInstruction(kX86InstPop, &dst);
1507   }
1508   //! @brief Pop a Segment Register from the Stack.
1509   //!
1510   //! @note There is no instruction to pop a cs segment register.
popX86Assembler1511   inline void pop(const SegmentReg& dst)
1512   {
1513     ASMJIT_ASSERT(dst.getRegIndex() != kX86SegCs);
1514     _emitInstruction(kX86InstPop, &dst);
1515   }
1516 
popX86Assembler1517   inline void pop(const Mem& dst)
1518   {
1519     ASMJIT_ASSERT(dst.getSize() == 2 || dst.getSize() == sizeof(sysint_t));
1520     _emitInstruction(kX86InstPop, &dst);
1521   }
1522 
1523 #if defined(ASMJIT_X86)
1524   //! @brief Pop All General-Purpose Registers.
1525   //!
1526   //! Pop EDI, ESI, EBP, EBX, EDX, ECX, and EAX.
popadX86Assembler1527   inline void popad()
1528   { _emitInstruction(kX86InstPopAD); }
1529 #endif // ASMJIT_X86
1530 
1531   //! @brief Pop Stack into EFLAGS Register (32-bit or 64-bit).
popfX86Assembler1532   inline void popf()
1533   {
1534 #if defined(ASMJIT_X86)
1535     popfd();
1536 #else
1537     popfq();
1538 #endif
1539   }
1540 
1541 #if defined(ASMJIT_X86)
1542   //! @brief Pop Stack into EFLAGS Register (32-bit).
popfdX86Assembler1543   inline void popfd() { _emitInstruction(kX86InstPopFD); }
1544 #else
1545   //! @brief Pop Stack into EFLAGS Register (64-bit).
popfqX86Assembler1546   inline void popfq() { _emitInstruction(kX86InstPopFQ); }
1547 #endif
1548 
1549   //! @brief Push WORD/DWORD/QWORD Onto the Stack.
1550   //!
1551   //! @note 32-bit architecture pushed DWORD while 64-bit
1552   //! pushes QWORD. 64-bit mode not provides instruction to
1553   //! push 32-bit register/memory.
pushX86Assembler1554   inline void push(const GpReg& src)
1555   {
1556     ASMJIT_ASSERT(src.isRegType(kX86RegTypeGpw) || src.isRegType(kX86RegTypeGpz));
1557     _emitInstruction(kX86InstPush, &src);
1558   }
1559   //! @brief Push Segment Register Onto the Stack.
pushX86Assembler1560   inline void push(const SegmentReg& src)
1561   { _emitInstruction(kX86InstPush, &src); }
1562   //! @brief Push WORD/DWORD/QWORD Onto the Stack.
pushX86Assembler1563   inline void push(const Mem& src)
1564   {
1565     ASMJIT_ASSERT(src.getSize() == 2 || src.getSize() == sizeof(sysint_t));
1566     _emitInstruction(kX86InstPush, &src);
1567   }
1568   //! @brief Push WORD/DWORD/QWORD Onto the Stack.
pushX86Assembler1569   inline void push(const Imm& src)
1570   { _emitInstruction(kX86InstPush, &src); }
1571 
1572 #if defined(ASMJIT_X86)
1573   //! @brief Push All General-Purpose Registers.
1574   //!
1575   //! Push EAX, ECX, EDX, EBX, original ESP, EBP, ESI, and EDI.
pushadX86Assembler1576   inline void pushad()
1577   { _emitInstruction(kX86InstPushAD); }
1578 #endif // ASMJIT_X86
1579 
1580   //! @brief Push EFLAGS Register (32-bit or 64-bit) onto the Stack.
pushfX86Assembler1581   inline void pushf()
1582   {
1583 #if defined(ASMJIT_X86)
1584     pushfd();
1585 #else
1586     pushfq();
1587 #endif
1588   }
1589 
1590 #if defined(ASMJIT_X86)
1591   //! @brief Push EFLAGS Register (32-bit) onto the Stack.
pushfdX86Assembler1592   inline void pushfd() { _emitInstruction(kX86InstPushFD); }
1593 #else
1594   //! @brief Push EFLAGS Register (64-bit) onto the Stack.
pushfqX86Assembler1595   inline void pushfq() { _emitInstruction(kX86InstPushFQ); }
1596 #endif // ASMJIT_X86
1597 
1598   //! @brief Rotate Bits Left.
1599   //! @note @a src register can be only @c cl.
rclX86Assembler1600   inline void rcl(const GpReg& dst, const GpReg& src)
1601   { _emitInstruction(kX86InstRcl, &dst, &src); }
1602   //! @brief Rotate Bits Left.
rclX86Assembler1603   inline void rcl(const GpReg& dst, const Imm& src)
1604   { _emitInstruction(kX86InstRcl, &dst, &src); }
1605   //! @brief Rotate Bits Left.
1606   //! @note @a src register can be only @c cl.
rclX86Assembler1607   inline void rcl(const Mem& dst, const GpReg& src)
1608   { _emitInstruction(kX86InstRcl, &dst, &src); }
1609   //! @brief Rotate Bits Left.
rclX86Assembler1610   inline void rcl(const Mem& dst, const Imm& src)
1611   { _emitInstruction(kX86InstRcl, &dst, &src); }
1612 
1613   //! @brief Rotate Bits Right.
1614   //! @note @a src register can be only @c cl.
rcrX86Assembler1615   inline void rcr(const GpReg& dst, const GpReg& src)
1616   { _emitInstruction(kX86InstRcr, &dst, &src); }
1617   //! @brief Rotate Bits Right.
rcrX86Assembler1618   inline void rcr(const GpReg& dst, const Imm& src)
1619   { _emitInstruction(kX86InstRcr, &dst, &src); }
1620   //! @brief Rotate Bits Right.
1621   //! @note @a src register can be only @c cl.
rcrX86Assembler1622   inline void rcr(const Mem& dst, const GpReg& src)
1623   { _emitInstruction(kX86InstRcr, &dst, &src); }
1624   //! @brief Rotate Bits Right.
rcrX86Assembler1625   inline void rcr(const Mem& dst, const Imm& src)
1626   { _emitInstruction(kX86InstRcr, &dst, &src); }
1627 
1628   //! @brief Read Time-Stamp Counter (Pentium).
rdtscX86Assembler1629   inline void rdtsc()
1630   { _emitInstruction(kX86InstRdtsc); }
1631 
1632   //! @brief Read Time-Stamp Counter and Processor ID (New).
rdtscpX86Assembler1633   inline void rdtscp()
1634   { _emitInstruction(kX86InstRdtscP); }
1635 
1636   //! @brief Load ECX/RCX BYTEs from DS:[ESI/RSI] to AL.
rep_lodsbX86Assembler1637   inline void rep_lodsb()
1638   { _emitInstruction(kX86InstRepLodSB); }
1639 
1640   //! @brief Load ECX/RCX DWORDs from DS:[ESI/RSI] to EAX.
rep_lodsdX86Assembler1641   inline void rep_lodsd()
1642   { _emitInstruction(kX86InstRepLodSD); }
1643 
1644 #if defined(ASMJIT_X64)
1645   //! @brief Load ECX/RCX QWORDs from DS:[ESI/RSI] to RAX.
rep_lodsqX86Assembler1646   inline void rep_lodsq()
1647   { _emitInstruction(kX86InstRepLodSQ); }
1648 #endif // ASMJIT_X64
1649 
1650   //! @brief Load ECX/RCX WORDs from DS:[ESI/RSI] to AX.
rep_lodswX86Assembler1651   inline void rep_lodsw()
1652   { _emitInstruction(kX86InstRepLodSW); }
1653 
1654   //! @brief Move ECX/RCX BYTEs from DS:[ESI/RSI] to ES:[EDI/RDI].
rep_movsbX86Assembler1655   inline void rep_movsb()
1656   { _emitInstruction(kX86InstRepMovSB); }
1657 
1658   //! @brief Move ECX/RCX DWORDs from DS:[ESI/RSI] to ES:[EDI/RDI].
rep_movsdX86Assembler1659   inline void rep_movsd()
1660   { _emitInstruction(kX86InstRepMovSD); }
1661 
1662 #if defined(ASMJIT_X64)
1663   //! @brief Move ECX/RCX QWORDs from DS:[ESI/RSI] to ES:[EDI/RDI].
rep_movsqX86Assembler1664   inline void rep_movsq()
1665   { _emitInstruction(kX86InstRepMovSQ); }
1666 #endif // ASMJIT_X64
1667 
1668   //! @brief Move ECX/RCX WORDs from DS:[ESI/RSI] to ES:[EDI/RDI].
rep_movswX86Assembler1669   inline void rep_movsw()
1670   { _emitInstruction(kX86InstRepMovSW); }
1671 
1672   //! @brief Fill ECX/RCX BYTEs at ES:[EDI/RDI] with AL.
rep_stosbX86Assembler1673   inline void rep_stosb()
1674   { _emitInstruction(kX86InstRepStoSB); }
1675 
1676   //! @brief Fill ECX/RCX DWORDs at ES:[EDI/RDI] with EAX.
rep_stosdX86Assembler1677   inline void rep_stosd()
1678   { _emitInstruction(kX86InstRepStoSD); }
1679 
1680 #if defined(ASMJIT_X64)
1681   //! @brief Fill ECX/RCX QWORDs at ES:[EDI/RDI] with RAX.
rep_stosqX86Assembler1682   inline void rep_stosq()
1683   { _emitInstruction(kX86InstRepStoSQ); }
1684 #endif // ASMJIT_X64
1685 
1686   //! @brief Fill ECX/RCX WORDs at ES:[EDI/RDI] with AX.
rep_stoswX86Assembler1687   inline void rep_stosw()
1688   { _emitInstruction(kX86InstRepStoSW); }
1689 
1690   //! @brief Repeated find nonmatching BYTEs in ES:[EDI/RDI] and DS:[ESI/RDI].
repe_cmpsbX86Assembler1691   inline void repe_cmpsb()
1692   { _emitInstruction(kX86InstRepECmpSB); }
1693 
1694   //! @brief Repeated find nonmatching DWORDs in ES:[EDI/RDI] and DS:[ESI/RDI].
repe_cmpsdX86Assembler1695   inline void repe_cmpsd()
1696   { _emitInstruction(kX86InstRepECmpSD); }
1697 
1698 #if defined(ASMJIT_X64)
1699   //! @brief Repeated find nonmatching QWORDs in ES:[EDI/RDI] and DS:[ESI/RDI].
repe_cmpsqX86Assembler1700   inline void repe_cmpsq()
1701   { _emitInstruction(kX86InstRepECmpSQ); }
1702 #endif // ASMJIT_X64
1703 
1704   //! @brief Repeated find nonmatching WORDs in ES:[EDI/RDI] and DS:[ESI/RDI].
repe_cmpswX86Assembler1705   inline void repe_cmpsw()
1706   { _emitInstruction(kX86InstRepECmpSW); }
1707 
1708   //! @brief Find non-AL BYTE starting at ES:[EDI/RDI].
repe_scasbX86Assembler1709   inline void repe_scasb()
1710   { _emitInstruction(kX86InstRepEScaSB); }
1711 
1712   //! @brief Find non-EAX DWORD starting at ES:[EDI/RDI].
repe_scasdX86Assembler1713   inline void repe_scasd()
1714   { _emitInstruction(kX86InstRepEScaSD); }
1715 
1716 #if defined(ASMJIT_X64)
1717   //! @brief Find non-RAX QWORD starting at ES:[EDI/RDI].
repe_scasqX86Assembler1718   inline void repe_scasq()
1719   { _emitInstruction(kX86InstRepEScaSQ); }
1720 #endif // ASMJIT_X64
1721 
1722   //! @brief Find non-AX WORD starting at ES:[EDI/RDI].
repe_scaswX86Assembler1723   inline void repe_scasw()
1724   { _emitInstruction(kX86InstRepEScaSW); }
1725 
1726   //! @brief Repeated find nonmatching BYTEs in ES:[EDI/RDI] and DS:[ESI/RDI].
repne_cmpsbX86Assembler1727   inline void repne_cmpsb()
1728   { _emitInstruction(kX86InstRepNECmpSB); }
1729 
1730   //! @brief Repeated find nonmatching DWORDs in ES:[EDI/RDI] and DS:[ESI/RDI].
repne_cmpsdX86Assembler1731   inline void repne_cmpsd()
1732   { _emitInstruction(kX86InstRepNECmpSD); }
1733 
1734 #if defined(ASMJIT_X64)
1735   //! @brief Repeated find nonmatching QWORDs in ES:[EDI/RDI] and DS:[ESI/RDI].
repne_cmpsqX86Assembler1736   inline void repne_cmpsq()
1737   { _emitInstruction(kX86InstRepNECmpSQ); }
1738 #endif // ASMJIT_X64
1739 
1740   //! @brief Repeated find nonmatching WORDs in ES:[EDI/RDI] and DS:[ESI/RDI].
repne_cmpswX86Assembler1741   inline void repne_cmpsw()
1742   { _emitInstruction(kX86InstRepNECmpSW); }
1743 
1744   //! @brief Find AL, starting at ES:[EDI/RDI].
repne_scasbX86Assembler1745   inline void repne_scasb()
1746   { _emitInstruction(kX86InstRepNEScaSB); }
1747 
1748   //! @brief Find EAX, starting at ES:[EDI/RDI].
repne_scasdX86Assembler1749   inline void repne_scasd()
1750   { _emitInstruction(kX86InstRepNEScaSD); }
1751 
1752 #if defined(ASMJIT_X64)
1753   //! @brief Find RAX, starting at ES:[EDI/RDI].
repne_scasqX86Assembler1754   inline void repne_scasq()
1755   { _emitInstruction(kX86InstRepNEScaSQ); }
1756 #endif // ASMJIT_X64
1757 
1758   //! @brief Find AX, starting at ES:[EDI/RDI].
repne_scaswX86Assembler1759   inline void repne_scasw()
1760   { _emitInstruction(kX86InstRepNEScaSW); }
1761 
1762   //! @brief Return from Procedure.
retX86Assembler1763   inline void ret()
1764   { _emitInstruction(kX86InstRet); }
1765 
1766   //! @brief Return from Procedure.
retX86Assembler1767   inline void ret(const Imm& imm16)
1768   { _emitInstruction(kX86InstRet, &imm16); }
1769 
1770   //! @brief Rotate Bits Left.
1771   //! @note @a src register can be only @c cl.
rolX86Assembler1772   inline void rol(const GpReg& dst, const GpReg& src)
1773   { _emitInstruction(kX86InstRol, &dst, &src); }
1774   //! @brief Rotate Bits Left.
rolX86Assembler1775   inline void rol(const GpReg& dst, const Imm& src)
1776   { _emitInstruction(kX86InstRol, &dst, &src); }
1777   //! @brief Rotate Bits Left.
1778   //! @note @a src register can be only @c cl.
rolX86Assembler1779   inline void rol(const Mem& dst, const GpReg& src)
1780   { _emitInstruction(kX86InstRol, &dst, &src); }
1781   //! @brief Rotate Bits Left.
rolX86Assembler1782   inline void rol(const Mem& dst, const Imm& src)
1783   { _emitInstruction(kX86InstRol, &dst, &src); }
1784 
1785   //! @brief Rotate Bits Right.
1786   //! @note @a src register can be only @c cl.
rorX86Assembler1787   inline void ror(const GpReg& dst, const GpReg& src)
1788   { _emitInstruction(kX86InstRor, &dst, &src); }
1789   //! @brief Rotate Bits Right.
rorX86Assembler1790   inline void ror(const GpReg& dst, const Imm& src)
1791   { _emitInstruction(kX86InstRor, &dst, &src); }
1792   //! @brief Rotate Bits Right.
1793   //! @note @a src register can be only @c cl.
rorX86Assembler1794   inline void ror(const Mem& dst, const GpReg& src)
1795   { _emitInstruction(kX86InstRor, &dst, &src); }
1796   //! @brief Rotate Bits Right.
rorX86Assembler1797   inline void ror(const Mem& dst, const Imm& src)
1798   { _emitInstruction(kX86InstRor, &dst, &src); }
1799 
1800 #if defined(ASMJIT_X86)
1801   //! @brief Store AH into Flags.
sahfX86Assembler1802   inline void sahf()
1803   { _emitInstruction(kX86InstSahf); }
1804 #endif // ASMJIT_X86
1805 
1806   //! @brief Integer subtraction with borrow.
sbbX86Assembler1807   inline void sbb(const GpReg& dst, const GpReg& src)
1808   { _emitInstruction(kX86InstSbb, &dst, &src); }
1809   //! @brief Integer subtraction with borrow.
sbbX86Assembler1810   inline void sbb(const GpReg& dst, const Mem& src)
1811   { _emitInstruction(kX86InstSbb, &dst, &src); }
1812   //! @brief Integer subtraction with borrow.
sbbX86Assembler1813   inline void sbb(const GpReg& dst, const Imm& src)
1814   { _emitInstruction(kX86InstSbb, &dst, &src); }
1815   //! @brief Integer subtraction with borrow.
sbbX86Assembler1816   inline void sbb(const Mem& dst, const GpReg& src)
1817   { _emitInstruction(kX86InstSbb, &dst, &src); }
1818   //! @brief Integer subtraction with borrow.
sbbX86Assembler1819   inline void sbb(const Mem& dst, const Imm& src)
1820   { _emitInstruction(kX86InstSbb, &dst, &src); }
1821 
1822   //! @brief Shift Bits Left.
1823   //! @note @a src register can be only @c cl.
salX86Assembler1824   inline void sal(const GpReg& dst, const GpReg& src)
1825   { _emitInstruction(kX86InstSal, &dst, &src); }
1826   //! @brief Shift Bits Left.
salX86Assembler1827   inline void sal(const GpReg& dst, const Imm& src)
1828   { _emitInstruction(kX86InstSal, &dst, &src); }
1829   //! @brief Shift Bits Left.
1830   //! @note @a src register can be only @c cl.
salX86Assembler1831   inline void sal(const Mem& dst, const GpReg& src)
1832   { _emitInstruction(kX86InstSal, &dst, &src); }
1833   //! @brief Shift Bits Left.
salX86Assembler1834   inline void sal(const Mem& dst, const Imm& src)
1835   { _emitInstruction(kX86InstSal, &dst, &src); }
1836 
1837   //! @brief Shift Bits Right.
1838   //! @note @a src register can be only @c cl.
sarX86Assembler1839   inline void sar(const GpReg& dst, const GpReg& src)
1840   { _emitInstruction(kX86InstSar, &dst, &src); }
1841   //! @brief Shift Bits Right.
sarX86Assembler1842   inline void sar(const GpReg& dst, const Imm& src)
1843   { _emitInstruction(kX86InstSar, &dst, &src); }
1844   //! @brief Shift Bits Right.
1845   //! @note @a src register can be only @c cl.
sarX86Assembler1846   inline void sar(const Mem& dst, const GpReg& src)
1847   { _emitInstruction(kX86InstSar, &dst, &src); }
1848   //! @brief Shift Bits Right.
sarX86Assembler1849   inline void sar(const Mem& dst, const Imm& src)
1850   { _emitInstruction(kX86InstSar, &dst, &src); }
1851 
1852   //! @brief Set Byte on Condition.
setX86Assembler1853   inline void set(kX86Cond cc, const GpReg& dst)
1854   {
1855     ASMJIT_ASSERT(dst.getSize() == 1);
1856     _emitInstruction(X86Util::getSetccInstFromCond(cc), &dst);
1857   }
1858 
1859   //! @brief Set Byte on Condition.
setX86Assembler1860   inline void set(kX86Cond cc, const Mem& dst)
1861   {
1862     ASMJIT_ASSERT(dst.getSize() <= 1);
1863     _emitInstruction(X86Util::getSetccInstFromCond(cc), &dst);
1864   }
1865 
1866   //! @brief Set Byte on Condition.
setaX86Assembler1867   inline void seta  (const GpReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetA  , &dst); }
1868   //! @brief Set Byte on Condition.
setaX86Assembler1869   inline void seta  (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetA  , &dst); }
1870   //! @brief Set Byte on Condition.
setaeX86Assembler1871   inline void setae (const GpReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetAE , &dst); }
1872   //! @brief Set Byte on Condition.
setaeX86Assembler1873   inline void setae (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetAE , &dst); }
1874   //! @brief Set Byte on Condition.
setbX86Assembler1875   inline void setb  (const GpReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetB  , &dst); }
1876   //! @brief Set Byte on Condition.
setbX86Assembler1877   inline void setb  (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetB  , &dst); }
1878   //! @brief Set Byte on Condition.
setbeX86Assembler1879   inline void setbe (const GpReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetBE , &dst); }
1880   //! @brief Set Byte on Condition.
setbeX86Assembler1881   inline void setbe (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetBE , &dst); }
1882   //! @brief Set Byte on Condition.
setcX86Assembler1883   inline void setc  (const GpReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetC  , &dst); }
1884   //! @brief Set Byte on Condition.
setcX86Assembler1885   inline void setc  (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetC  , &dst); }
1886   //! @brief Set Byte on Condition.
seteX86Assembler1887   inline void sete  (const GpReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetE  , &dst); }
1888   //! @brief Set Byte on Condition.
seteX86Assembler1889   inline void sete  (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetE  , &dst); }
1890   //! @brief Set Byte on Condition.
setgX86Assembler1891   inline void setg  (const GpReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetG  , &dst); }
1892   //! @brief Set Byte on Condition.
setgX86Assembler1893   inline void setg  (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetG  , &dst); }
1894   //! @brief Set Byte on Condition.
setgeX86Assembler1895   inline void setge (const GpReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetGE , &dst); }
1896   //! @brief Set Byte on Condition.
setgeX86Assembler1897   inline void setge (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetGE , &dst); }
1898   //! @brief Set Byte on Condition.
setlX86Assembler1899   inline void setl  (const GpReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetL  , &dst); }
1900   //! @brief Set Byte on Condition.
setlX86Assembler1901   inline void setl  (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetL  , &dst); }
1902   //! @brief Set Byte on Condition.
setleX86Assembler1903   inline void setle (const GpReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetLE , &dst); }
1904   //! @brief Set Byte on Condition.
setleX86Assembler1905   inline void setle (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetLE , &dst); }
1906   //! @brief Set Byte on Condition.
setnaX86Assembler1907   inline void setna (const GpReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetNA , &dst); }
1908   //! @brief Set Byte on Condition.
setnaX86Assembler1909   inline void setna (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetNA , &dst); }
1910   //! @brief Set Byte on Condition.
setnaeX86Assembler1911   inline void setnae(const GpReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetNAE, &dst); }
1912   //! @brief Set Byte on Condition.
setnaeX86Assembler1913   inline void setnae(const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetNAE, &dst); }
1914   //! @brief Set Byte on Condition.
setnbX86Assembler1915   inline void setnb (const GpReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetNB , &dst); }
1916   //! @brief Set Byte on Condition.
setnbX86Assembler1917   inline void setnb (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetNB , &dst); }
1918   //! @brief Set Byte on Condition.
setnbeX86Assembler1919   inline void setnbe(const GpReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetNBE, &dst); }
1920   //! @brief Set Byte on Condition.
setnbeX86Assembler1921   inline void setnbe(const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetNBE, &dst); }
1922   //! @brief Set Byte on Condition.
setncX86Assembler1923   inline void setnc (const GpReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetNC , &dst); }
1924   //! @brief Set Byte on Condition.
setncX86Assembler1925   inline void setnc (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetNC , &dst); }
1926   //! @brief Set Byte on Condition.
setneX86Assembler1927   inline void setne (const GpReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetNE , &dst); }
1928   //! @brief Set Byte on Condition.
setneX86Assembler1929   inline void setne (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetNE , &dst); }
1930   //! @brief Set Byte on Condition.
setngX86Assembler1931   inline void setng (const GpReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetNG , &dst); }
1932   //! @brief Set Byte on Condition.
setngX86Assembler1933   inline void setng (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetNG , &dst); }
1934   //! @brief Set Byte on Condition.
setngeX86Assembler1935   inline void setnge(const GpReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetNGE, &dst); }
1936   //! @brief Set Byte on Condition.
setngeX86Assembler1937   inline void setnge(const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetNGE, &dst); }
1938   //! @brief Set Byte on Condition.
setnlX86Assembler1939   inline void setnl (const GpReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetNL , &dst); }
1940   //! @brief Set Byte on Condition.
setnlX86Assembler1941   inline void setnl (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetNL , &dst); }
1942   //! @brief Set Byte on Condition.
setnleX86Assembler1943   inline void setnle(const GpReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetNLE, &dst); }
1944   //! @brief Set Byte on Condition.
setnleX86Assembler1945   inline void setnle(const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetNLE, &dst); }
1946   //! @brief Set Byte on Condition.
setnoX86Assembler1947   inline void setno (const GpReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetNO , &dst); }
1948   //! @brief Set Byte on Condition.
setnoX86Assembler1949   inline void setno (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetNO , &dst); }
1950   //! @brief Set Byte on Condition.
setnpX86Assembler1951   inline void setnp (const GpReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetNP , &dst); }
1952   //! @brief Set Byte on Condition.
setnpX86Assembler1953   inline void setnp (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetNP , &dst); }
1954   //! @brief Set Byte on Condition.
setnsX86Assembler1955   inline void setns (const GpReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetNS , &dst); }
1956   //! @brief Set Byte on Condition.
setnsX86Assembler1957   inline void setns (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetNS , &dst); }
1958   //! @brief Set Byte on Condition.
setnzX86Assembler1959   inline void setnz (const GpReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetNZ , &dst); }
1960   //! @brief Set Byte on Condition.
setnzX86Assembler1961   inline void setnz (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetNZ , &dst); }
1962   //! @brief Set Byte on Condition.
setoX86Assembler1963   inline void seto  (const GpReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetO  , &dst); }
1964   //! @brief Set Byte on Condition.
setoX86Assembler1965   inline void seto  (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetO  , &dst); }
1966   //! @brief Set Byte on Condition.
setpX86Assembler1967   inline void setp  (const GpReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetP  , &dst); }
1968   //! @brief Set Byte on Condition.
setpX86Assembler1969   inline void setp  (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetP  , &dst); }
1970   //! @brief Set Byte on Condition.
setpeX86Assembler1971   inline void setpe (const GpReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetPE , &dst); }
1972   //! @brief Set Byte on Condition.
setpeX86Assembler1973   inline void setpe (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetPE , &dst); }
1974   //! @brief Set Byte on Condition.
setpoX86Assembler1975   inline void setpo (const GpReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetPO , &dst); }
1976   //! @brief Set Byte on Condition.
setpoX86Assembler1977   inline void setpo (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetPO , &dst); }
1978   //! @brief Set Byte on Condition.
setsX86Assembler1979   inline void sets  (const GpReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetS  , &dst); }
1980   //! @brief Set Byte on Condition.
setsX86Assembler1981   inline void sets  (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetS  , &dst); }
1982   //! @brief Set Byte on Condition.
setzX86Assembler1983   inline void setz  (const GpReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetZ  , &dst); }
1984   //! @brief Set Byte on Condition.
setzX86Assembler1985   inline void setz  (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetZ  , &dst); }
1986 
1987   //! @brief Shift Bits Left.
1988   //! @note @a src register can be only @c cl.
shlX86Assembler1989   inline void shl(const GpReg& dst, const GpReg& src)
1990   { _emitInstruction(kX86InstShl, &dst, &src); }
1991   //! @brief Shift Bits Left.
shlX86Assembler1992   inline void shl(const GpReg& dst, const Imm& src)
1993   { _emitInstruction(kX86InstShl, &dst, &src); }
1994   //! @brief Shift Bits Left.
1995   //! @note @a src register can be only @c cl.
shlX86Assembler1996   inline void shl(const Mem& dst, const GpReg& src)
1997   { _emitInstruction(kX86InstShl, &dst, &src); }
1998   //! @brief Shift Bits Left.
shlX86Assembler1999   inline void shl(const Mem& dst, const Imm& src)
2000   { _emitInstruction(kX86InstShl, &dst, &src); }
2001 
2002   //! @brief Shift Bits Right.
2003   //! @note @a src register can be only @c cl.
shrX86Assembler2004   inline void shr(const GpReg& dst, const GpReg& src)
2005   { _emitInstruction(kX86InstShr, &dst, &src); }
2006   //! @brief Shift Bits Right.
shrX86Assembler2007   inline void shr(const GpReg& dst, const Imm& src)
2008   { _emitInstruction(kX86InstShr, &dst, &src); }
2009   //! @brief Shift Bits Right.
2010   //! @note @a src register can be only @c cl.
shrX86Assembler2011   inline void shr(const Mem& dst, const GpReg& src)
2012   { _emitInstruction(kX86InstShr, &dst, &src); }
2013   //! @brief Shift Bits Right.
shrX86Assembler2014   inline void shr(const Mem& dst, const Imm& src)
2015   { _emitInstruction(kX86InstShr, &dst, &src); }
2016 
2017   //! @brief Double Precision Shift Left.
2018   //! @note src2 register can be only @c cl register.
shldX86Assembler2019   inline void shld(const GpReg& dst, const GpReg& src1, const GpReg& src2)
2020   { _emitInstruction(kX86InstShld, &dst, &src1, &src2); }
2021   //! @brief Double Precision Shift Left.
shldX86Assembler2022   inline void shld(const GpReg& dst, const GpReg& src1, const Imm& src2)
2023   { _emitInstruction(kX86InstShld, &dst, &src1, &src2); }
2024   //! @brief Double Precision Shift Left.
2025   //! @note src2 register can be only @c cl register.
shldX86Assembler2026   inline void shld(const Mem& dst, const GpReg& src1, const GpReg& src2)
2027   { _emitInstruction(kX86InstShld, &dst, &src1, &src2); }
2028   //! @brief Double Precision Shift Left.
shldX86Assembler2029   inline void shld(const Mem& dst, const GpReg& src1, const Imm& src2)
2030   { _emitInstruction(kX86InstShld, &dst, &src1, &src2); }
2031 
2032   //! @brief Double Precision Shift Right.
2033   //! @note src2 register can be only @c cl register.
shrdX86Assembler2034   inline void shrd(const GpReg& dst, const GpReg& src1, const GpReg& src2)
2035   { _emitInstruction(kX86InstShrd, &dst, &src1, &src2); }
2036   //! @brief Double Precision Shift Right.
shrdX86Assembler2037   inline void shrd(const GpReg& dst, const GpReg& src1, const Imm& src2)
2038   { _emitInstruction(kX86InstShrd, &dst, &src1, &src2); }
2039   //! @brief Double Precision Shift Right.
2040   //! @note src2 register can be only @c cl register.
shrdX86Assembler2041   inline void shrd(const Mem& dst, const GpReg& src1, const GpReg& src2)
2042   { _emitInstruction(kX86InstShrd, &dst, &src1, &src2); }
2043   //! @brief Double Precision Shift Right.
shrdX86Assembler2044   inline void shrd(const Mem& dst, const GpReg& src1, const Imm& src2)
2045   { _emitInstruction(kX86InstShrd, &dst, &src1, &src2); }
2046 
2047   //! @brief Set Carry Flag to 1.
stcX86Assembler2048   inline void stc()
2049   { _emitInstruction(kX86InstStc); }
2050 
2051   //! @brief Set Direction Flag to 1.
stdX86Assembler2052   inline void std()
2053   { _emitInstruction(kX86InstStd); }
2054 
2055   //! @brief Subtract.
subX86Assembler2056   inline void sub(const GpReg& dst, const GpReg& src)
2057   { _emitInstruction(kX86InstSub, &dst, &src); }
2058   //! @brief Subtract.
subX86Assembler2059   inline void sub(const GpReg& dst, const Mem& src)
2060   { _emitInstruction(kX86InstSub, &dst, &src); }
2061   //! @brief Subtract.
subX86Assembler2062   inline void sub(const GpReg& dst, const Imm& src)
2063   { _emitInstruction(kX86InstSub, &dst, &src); }
2064   //! @brief Subtract.
subX86Assembler2065   inline void sub(const Mem& dst, const GpReg& src)
2066   { _emitInstruction(kX86InstSub, &dst, &src); }
2067   //! @brief Subtract.
subX86Assembler2068   inline void sub(const Mem& dst, const Imm& src)
2069   { _emitInstruction(kX86InstSub, &dst, &src); }
2070 
2071   //! @brief Logical Compare.
testX86Assembler2072   inline void test(const GpReg& op1, const GpReg& op2)
2073   { _emitInstruction(kX86InstTest, &op1, &op2); }
2074   //! @brief Logical Compare.
testX86Assembler2075   inline void test(const GpReg& op1, const Imm& op2)
2076   { _emitInstruction(kX86InstTest, &op1, &op2); }
2077   //! @brief Logical Compare.
testX86Assembler2078   inline void test(const Mem& op1, const GpReg& op2)
2079   { _emitInstruction(kX86InstTest, &op1, &op2); }
2080   //! @brief Logical Compare.
testX86Assembler2081   inline void test(const Mem& op1, const Imm& op2)
2082   { _emitInstruction(kX86InstTest, &op1, &op2); }
2083 
2084   //! @brief Undefined instruction - Raise invalid opcode exception.
ud2X86Assembler2085   inline void ud2()
2086   { _emitInstruction(kX86InstUd2); }
2087 
2088   //! @brief Exchange and Add.
xaddX86Assembler2089   inline void xadd(const GpReg& dst, const GpReg& src)
2090   { _emitInstruction(kX86InstXadd, &dst, &src); }
2091   //! @brief Exchange and Add.
xaddX86Assembler2092   inline void xadd(const Mem& dst, const GpReg& src)
2093   { _emitInstruction(kX86InstXadd, &dst, &src); }
2094 
2095   //! @brief Exchange Register/Memory with Register.
xchgX86Assembler2096   inline void xchg(const GpReg& dst, const GpReg& src)
2097   { _emitInstruction(kX86InstXchg, &dst, &src); }
2098   //! @brief Exchange Register/Memory with Register.
xchgX86Assembler2099   inline void xchg(const Mem& dst, const GpReg& src)
2100   { _emitInstruction(kX86InstXchg, &dst, &src); }
2101   //! @brief Exchange Register/Memory with Register.
xchgX86Assembler2102   inline void xchg(const GpReg& dst, const Mem& src)
2103   { _emitInstruction(kX86InstXchg, &src, &dst); }
2104 
2105   //! @brief Exchange Register/Memory with Register.
xor_X86Assembler2106   inline void xor_(const GpReg& dst, const GpReg& src)
2107   { _emitInstruction(kX86InstXor, &dst, &src); }
2108   //! @brief Exchange Register/Memory with Register.
xor_X86Assembler2109   inline void xor_(const GpReg& dst, const Mem& src)
2110   { _emitInstruction(kX86InstXor, &dst, &src); }
2111   //! @brief Exchange Register/Memory with Register.
xor_X86Assembler2112   inline void xor_(const GpReg& dst, const Imm& src)
2113   { _emitInstruction(kX86InstXor, &dst, &src); }
2114   //! @brief Exchange Register/Memory with Register.
xor_X86Assembler2115   inline void xor_(const Mem& dst, const GpReg& src)
2116   { _emitInstruction(kX86InstXor, &dst, &src); }
2117   //! @brief Exchange Register/Memory with Register.
xor_X86Assembler2118   inline void xor_(const Mem& dst, const Imm& src)
2119   { _emitInstruction(kX86InstXor, &dst, &src); }
2120 
2121   // --------------------------------------------------------------------------
2122   // [X87 Instructions (FPU)]
2123   // --------------------------------------------------------------------------
2124 
2125   //! @brief Compute 2^x - 1 (FPU).
f2xm1X86Assembler2126   inline void f2xm1()
2127   { _emitInstruction(kX86InstF2XM1); }
2128 
2129   //! @brief Absolute Value of st(0) (FPU).
fabsX86Assembler2130   inline void fabs()
2131   { _emitInstruction(kX86InstFAbs); }
2132 
2133   //! @brief Add @a src to @a dst and store result in @a dst (FPU).
2134   //!
2135   //! @note One of dst or src must be st(0).
faddX86Assembler2136   inline void fadd(const X87Reg& dst, const X87Reg& src)
2137   {
2138     ASMJIT_ASSERT(dst.getRegIndex() == 0 || src.getRegIndex() == 0);
2139     _emitInstruction(kX86InstFAdd, &dst, &src);
2140   }
2141 
2142   //! @brief Add @a src to st(0) and store result in st(0) (FPU).
2143   //!
2144   //! @note SP-FP or DP-FP determined by @a adr size.
faddX86Assembler2145   inline void fadd(const Mem& src)
2146   { _emitInstruction(kX86InstFAdd, &src); }
2147 
2148   //! @brief Add st(0) to @a dst and POP register stack (FPU).
2149   inline void faddp(const X87Reg& dst = st(1))
2150   { _emitInstruction(kX86InstFAddP, &dst); }
2151 
2152   //! @brief Load Binary Coded Decimal (FPU).
fbldX86Assembler2153   inline void fbld(const Mem& src)
2154   { _emitInstruction(kX86InstFBLd, &src); }
2155 
2156   //! @brief Store BCD Integer and Pop (FPU).
fbstpX86Assembler2157   inline void fbstp(const Mem& dst)
2158   { _emitInstruction(kX86InstFBStP, &dst); }
2159 
2160   //! @brief Change st(0) Sign (FPU).
fchsX86Assembler2161   inline void fchs()
2162   { _emitInstruction(kX86InstFCHS); }
2163 
2164   //! @brief Clear Exceptions (FPU).
2165   //!
2166   //! Clear floating-point exception flags after checking for pending unmasked
2167   //! floating-point exceptions.
2168   //!
2169   //! Clears the floating-point exception flags (PE, UE, OE, ZE, DE, and IE),
2170   //! the exception summary status flag (ES), the stack fault flag (SF), and
2171   //! the busy flag (B) in the FPU status word. The FCLEX instruction checks
2172   //! for and handles any pending unmasked floating-point exceptions before
2173   //! clearing the exception flags.
fclexX86Assembler2174   inline void fclex()
2175   { _emitInstruction(kX86InstFClex); }
2176 
2177   //! @brief FP Conditional Move (FPU).
fcmovbX86Assembler2178   inline void fcmovb(const X87Reg& src)
2179   { _emitInstruction(kX86InstFCMovB, &src); }
2180   //! @brief FP Conditional Move (FPU).
fcmovbeX86Assembler2181   inline void fcmovbe(const X87Reg& src)
2182   { _emitInstruction(kX86InstFCMovBE, &src); }
2183   //! @brief FP Conditional Move (FPU).
fcmoveX86Assembler2184   inline void fcmove(const X87Reg& src)
2185   { _emitInstruction(kX86InstFCMovE, &src); }
2186   //! @brief FP Conditional Move (FPU).
fcmovnbX86Assembler2187   inline void fcmovnb(const X87Reg& src)
2188   { _emitInstruction(kX86InstFCMovNB, &src); }
2189   //! @brief FP Conditional Move (FPU).
fcmovnbeX86Assembler2190   inline void fcmovnbe(const X87Reg& src)
2191   { _emitInstruction(kX86InstFCMovNBE, &src); }
2192   //! @brief FP Conditional Move (FPU).
fcmovneX86Assembler2193   inline void fcmovne(const X87Reg& src)
2194   { _emitInstruction(kX86InstFCMovNE, &src); }
2195   //! @brief FP Conditional Move (FPU).
fcmovnuX86Assembler2196   inline void fcmovnu(const X87Reg& src)
2197   { _emitInstruction(kX86InstFCMovNU, &src); }
2198   //! @brief FP Conditional Move (FPU).
fcmovuX86Assembler2199   inline void fcmovu(const X87Reg& src)
2200   { _emitInstruction(kX86InstFCMovU, &src); }
2201 
2202   //! @brief Compare st(0) with @a reg (FPU).
2203   inline void fcom(const X87Reg& reg = st(1))
2204   { _emitInstruction(kX86InstFCom, &reg); }
2205   //! @brief Compare st(0) with 4-byte or 8-byte FP at @a src (FPU).
fcomX86Assembler2206   inline void fcom(const Mem& src)
2207   { _emitInstruction(kX86InstFCom, &src); }
2208 
2209   //! @brief Compare st(0) with @a reg and pop the stack (FPU).
2210   inline void fcomp(const X87Reg& reg = st(1))
2211   { _emitInstruction(kX86InstFComP, &reg); }
2212   //! @brief Compare st(0) with 4-byte or 8-byte FP at @a adr and pop the
2213   //! stack (FPU).
fcompX86Assembler2214   inline void fcomp(const Mem& mem)
2215   { _emitInstruction(kX86InstFComP, &mem); }
2216 
2217   //! @brief Compare st(0) with st(1) and pop register stack twice (FPU).
fcomppX86Assembler2218   inline void fcompp()
2219   { _emitInstruction(kX86InstFComPP); }
2220 
2221   //! @brief Compare st(0) and @a reg and Set EFLAGS (FPU).
fcomiX86Assembler2222   inline void fcomi(const X87Reg& reg)
2223   { _emitInstruction(kX86InstFComI, &reg); }
2224 
2225   //! @brief Compare st(0) and @a reg and Set EFLAGS and pop the stack (FPU).
fcomipX86Assembler2226   inline void fcomip(const X87Reg& reg)
2227   { _emitInstruction(kX86InstFComIP, &reg); }
2228 
2229   //! @brief Cosine (FPU).
2230   //!
2231   //! This instruction calculates the cosine of the source operand in
2232   //! register st(0) and stores the result in st(0).
fcosX86Assembler2233   inline void fcos()
2234   { _emitInstruction(kX86InstFCos); }
2235 
2236   //! @brief Decrement Stack-Top Pointer (FPU).
2237   //!
2238   //! Subtracts one from the TOP field of the FPU status word (decrements
2239   //! the top-ofstack pointer). If the TOP field contains a 0, it is set
2240   //! to 7. The effect of this instruction is to rotate the stack by one
2241   //! position. The contents of the FPU data registers and tag register
2242   //! are not affected.
fdecstpX86Assembler2243   inline void fdecstp()
2244   { _emitInstruction(kX86InstFDecStP); }
2245 
2246   //! @brief Divide @a dst by @a src (FPU).
2247   //!
2248   //! @note One of @a dst or @a src register must be st(0).
fdivX86Assembler2249   inline void fdiv(const X87Reg& dst, const X87Reg& src)
2250   {
2251     ASMJIT_ASSERT(dst.getRegIndex() == 0 || src.getRegIndex() == 0);
2252     _emitInstruction(kX86InstFDiv, &dst, &src);
2253   }
2254   //! @brief Divide st(0) by 32-bit or 64-bit FP value (FPU).
fdivX86Assembler2255   inline void fdiv(const Mem& src)
2256   { _emitInstruction(kX86InstFDiv, &src); }
2257 
2258   //! @brief Divide @a reg by st(0) (FPU).
2259   inline void fdivp(const X87Reg& reg = st(1))
2260   { _emitInstruction(kX86InstFDivP, &reg); }
2261 
2262   //! @brief Reverse Divide @a dst by @a src (FPU).
2263   //!
2264   //! @note One of @a dst or @a src register must be st(0).
fdivrX86Assembler2265   inline void fdivr(const X87Reg& dst, const X87Reg& src)
2266   {
2267     ASMJIT_ASSERT(dst.getRegIndex() == 0 || src.getRegIndex() == 0);
2268     _emitInstruction(kX86InstFDivR, &dst, &src);
2269   }
2270   //! @brief Reverse Divide st(0) by 32-bit or 64-bit FP value (FPU).
fdivrX86Assembler2271   inline void fdivr(const Mem& src)
2272   { _emitInstruction(kX86InstFDivR, &src); }
2273 
2274   //! @brief Reverse Divide @a reg by st(0) (FPU).
2275   inline void fdivrp(const X87Reg& reg = st(1))
2276   { _emitInstruction(kX86InstFDivRP, &reg); }
2277 
2278   //! @brief Free Floating-Point Register (FPU).
2279   //!
2280   //! Sets the tag in the FPU tag register associated with register @a reg
2281   //! to empty (11B). The contents of @a reg and the FPU stack-top pointer
2282   //! (TOP) are not affected.
ffreeX86Assembler2283   inline void ffree(const X87Reg& reg)
2284   { _emitInstruction(kX86InstFFree, &reg); }
2285 
2286   //! @brief Add 16-bit or 32-bit integer to st(0) (FPU).
fiaddX86Assembler2287   inline void fiadd(const Mem& src)
2288   {
2289     ASMJIT_ASSERT(src.getSize() == 2 || src.getSize() == 4);
2290     _emitInstruction(kX86InstFIAdd, &src);
2291   }
2292 
2293   //! @brief Compare st(0) with 16-bit or 32-bit Integer (FPU).
ficomX86Assembler2294   inline void ficom(const Mem& src)
2295   {
2296     ASMJIT_ASSERT(src.getSize() == 2 || src.getSize() == 4);
2297     _emitInstruction(kX86InstFICom, &src);
2298   }
2299 
2300   //! @brief Compare st(0) with 16-bit or 32-bit Integer and pop the stack (FPU).
ficompX86Assembler2301   inline void ficomp(const Mem& src)
2302   {
2303     ASMJIT_ASSERT(src.getSize() == 2 || src.getSize() == 4);
2304     _emitInstruction(kX86InstFIComP, &src);
2305   }
2306 
2307   //! @brief Divide st(0) by 32-bit or 16-bit integer (@a src) (FPU).
fidivX86Assembler2308   inline void fidiv(const Mem& src)
2309   {
2310     ASMJIT_ASSERT(src.getSize() == 2 || src.getSize() == 4);
2311     _emitInstruction(kX86InstFIDiv, &src);
2312   }
2313 
2314   //! @brief Reverse Divide st(0) by 32-bit or 16-bit integer (@a src) (FPU).
fidivrX86Assembler2315   inline void fidivr(const Mem& src)
2316   {
2317     ASMJIT_ASSERT(src.getSize() == 2 || src.getSize() == 4);
2318     _emitInstruction(kX86InstFIDivR, &src);
2319   }
2320 
2321   //! @brief Load 16-bit, 32-bit or 64-bit Integer and push it to the stack (FPU).
2322   //!
2323   //! Converts the signed-integer source operand into double extended-precision
2324   //! floating point format and pushes the value onto the FPU register stack.
2325   //! The source operand can be a word, doubleword, or quadword integer. It is
2326   //! loaded without rounding errors. The sign of the source operand is
2327   //! preserved.
fildX86Assembler2328   inline void fild(const Mem& src)
2329   {
2330     ASMJIT_ASSERT(src.getSize() == 2 || src.getSize() == 4 || src.getSize() == 8);
2331     _emitInstruction(kX86InstFILd, &src);
2332   }
2333 
2334   //! @brief Multiply st(0) by 16-bit or 32-bit integer and store it
2335   //! to st(0) (FPU).
fimulX86Assembler2336   inline void fimul(const Mem& src)
2337   {
2338     ASMJIT_ASSERT(src.getSize() == 2 || src.getSize() == 4);
2339     _emitInstruction(kX86InstFIMul, &src);
2340   }
2341 
2342   //! @brief Increment Stack-Top Pointer (FPU).
2343   //!
2344   //! Adds one to the TOP field of the FPU status word (increments the
2345   //! top-of-stack pointer). If the TOP field contains a 7, it is set to 0.
2346   //! The effect of this instruction is to rotate the stack by one position.
2347   //! The contents of the FPU data registers and tag register are not affected.
2348   //! This operation is not equivalent to popping the stack, because the tag
2349   //! for the previous top-of-stack register is not marked empty.
fincstpX86Assembler2350   inline void fincstp()
2351   { _emitInstruction(kX86InstFIncStP); }
2352 
2353   //! @brief Initialize Floating-Point Unit (FPU).
2354   //!
2355   //! Initialize FPU after checking for pending unmasked floating-point
2356   //! exceptions.
finitX86Assembler2357   inline void finit()
2358   { _emitInstruction(kX86InstFInit); }
2359 
2360   //! @brief Subtract 16-bit or 32-bit integer from st(0) and store result to
2361   //! st(0) (FPU).
fisubX86Assembler2362   inline void fisub(const Mem& src)
2363   {
2364     ASMJIT_ASSERT(src.getSize() == 2 || src.getSize() == 4);
2365     _emitInstruction(kX86InstFISub, &src);
2366   }
2367 
2368   //! @brief Reverse Subtract 16-bit or 32-bit integer from st(0) and
2369   //! store result to  st(0) (FPU).
fisubrX86Assembler2370   inline void fisubr(const Mem& src)
2371   {
2372     ASMJIT_ASSERT(src.getSize() == 2 || src.getSize() == 4);
2373     _emitInstruction(kX86InstFISubR, &src);
2374   }
2375 
2376   //! @brief Initialize Floating-Point Unit (FPU).
2377   //!
2378   //! Initialize FPU without checking for pending unmasked floating-point
2379   //! exceptions.
fninitX86Assembler2380   inline void fninit()
2381   { _emitInstruction(kX86InstFNInit); }
2382 
2383   //! @brief Store st(0) as 16-bit or 32-bit Integer to @a dst (FPU).
fistX86Assembler2384   inline void fist(const Mem& dst)
2385   {
2386     ASMJIT_ASSERT(dst.getSize() == 2 || dst.getSize() == 4);
2387     _emitInstruction(kX86InstFISt, &dst);
2388   }
2389 
2390   //! @brief Store st(0) as 16-bit, 32-bit or 64-bit Integer to @a dst and pop
2391   //! stack (FPU).
fistpX86Assembler2392   inline void fistp(const Mem& dst)
2393   {
2394     ASMJIT_ASSERT(dst.getSize() == 2 || dst.getSize() == 4 || dst.getSize() == 8);
2395     _emitInstruction(kX86InstFIStP, &dst);
2396   }
2397 
2398   //! @brief Push 32-bit, 64-bit or 80-bit Floating Point Value onto the FPU
2399   //! register stack (FPU).
fldX86Assembler2400   inline void fld(const Mem& src)
2401   {
2402     ASMJIT_ASSERT(src.getSize() == 4 || src.getSize() == 8 || src.getSize() == 10);
2403     _emitInstruction(kX86InstFLd, &src);
2404   }
2405 
2406   //! @brief Push @a reg onto the FPU register stack (FPU).
fldX86Assembler2407   inline void fld(const X87Reg& reg)
2408   { _emitInstruction(kX86InstFLd, &reg); }
2409 
2410   //! @brief Push +1.0 onto the FPU register stack (FPU).
fld1X86Assembler2411   inline void fld1()
2412   { _emitInstruction(kX86InstFLd1); }
2413 
2414   //! @brief Push log2(10) onto the FPU register stack (FPU).
fldl2tX86Assembler2415   inline void fldl2t()
2416   { _emitInstruction(kX86InstFLdL2T); }
2417 
2418   //! @brief Push log2(e) onto the FPU register stack (FPU).
fldl2eX86Assembler2419   inline void fldl2e()
2420   { _emitInstruction(kX86InstFLdL2E); }
2421 
2422   //! @brief Push pi onto the FPU register stack (FPU).
fldpiX86Assembler2423   inline void fldpi()
2424   { _emitInstruction(kX86InstFLdPi); }
2425 
2426   //! @brief Push log10(2) onto the FPU register stack (FPU).
fldlg2X86Assembler2427   inline void fldlg2()
2428   { _emitInstruction(kX86InstFLdLg2); }
2429 
2430   //! @brief Push ln(2) onto the FPU register stack (FPU).
fldln2X86Assembler2431   inline void fldln2()
2432   { _emitInstruction(kX86InstFLdLn2); }
2433 
2434   //! @brief Push +0.0 onto the FPU register stack (FPU).
fldzX86Assembler2435   inline void fldz()
2436   { _emitInstruction(kX86InstFLdZ); }
2437 
2438   //! @brief Load x87 FPU Control Word (2 bytes) (FPU).
fldcwX86Assembler2439   inline void fldcw(const Mem& src)
2440   { _emitInstruction(kX86InstFLdCw, &src); }
2441 
2442   //! @brief Load x87 FPU Environment (14 or 28 bytes) (FPU).
fldenvX86Assembler2443   inline void fldenv(const Mem& src)
2444   { _emitInstruction(kX86InstFLdEnv, &src); }
2445 
2446   //! @brief Multiply @a dst by @a src and store result in @a dst (FPU).
2447   //!
2448   //! @note One of dst or src must be st(0).
fmulX86Assembler2449   inline void fmul(const X87Reg& dst, const X87Reg& src)
2450   {
2451     ASMJIT_ASSERT(dst.getRegIndex() == 0 || src.getRegIndex() == 0);
2452     _emitInstruction(kX86InstFMul, &dst, &src);
2453   }
2454   //! @brief Multiply st(0) by @a src and store result in st(0) (FPU).
2455   //!
2456   //! @note SP-FP or DP-FP determined by @a adr size.
fmulX86Assembler2457   inline void fmul(const Mem& src)
2458   { _emitInstruction(kX86InstFMul, &src); }
2459 
2460   //! @brief Multiply st(0) by @a dst and POP register stack (FPU).
2461   inline void fmulp(const X87Reg& dst = st(1))
2462   { _emitInstruction(kX86InstFMulP, &dst); }
2463 
2464   //! @brief Clear Exceptions (FPU).
2465   //!
2466   //! Clear floating-point exception flags without checking for pending
2467   //! unmasked floating-point exceptions.
2468   //!
2469   //! Clears the floating-point exception flags (PE, UE, OE, ZE, DE, and IE),
2470   //! the exception summary status flag (ES), the stack fault flag (SF), and
2471   //! the busy flag (B) in the FPU status word. The FCLEX instruction does
2472   //! not checks for and handles any pending unmasked floating-point exceptions
2473   //! before clearing the exception flags.
fnclexX86Assembler2474   inline void fnclex()
2475   { _emitInstruction(kX86InstFNClex); }
2476 
2477   //! @brief No Operation (FPU).
fnopX86Assembler2478   inline void fnop()
2479   { _emitInstruction(kX86InstFNop); }
2480 
2481   //! @brief Save FPU State (FPU).
2482   //!
2483   //! Store FPU environment to m94byte or m108byte without
2484   //! checking for pending unmasked FP exceptions.
2485   //! Then re-initialize the FPU.
fnsaveX86Assembler2486   inline void fnsave(const Mem& dst)
2487   { _emitInstruction(kX86InstFNSave, &dst); }
2488 
2489   //! @brief Store x87 FPU Environment (FPU).
2490   //!
2491   //! Store FPU environment to @a dst (14 or 28 Bytes) without checking for
2492   //! pending unmasked floating-point exceptions. Then mask all floating
2493   //! point exceptions.
fnstenvX86Assembler2494   inline void fnstenv(const Mem& dst)
2495   { _emitInstruction(kX86InstFNStEnv, &dst); }
2496 
2497   //! @brief Store x87 FPU Control Word (FPU).
2498   //!
2499   //! Store FPU control word to @a dst (2 Bytes) without checking for pending
2500   //! unmasked floating-point exceptions.
fnstcwX86Assembler2501   inline void fnstcw(const Mem& dst)
2502   { _emitInstruction(kX86InstFNStCw, &dst); }
2503 
2504   //! @brief Store x87 FPU Status Word (2 Bytes) (FPU).
fnstswX86Assembler2505   inline void fnstsw(const GpReg& dst)
2506   {
2507     ASMJIT_ASSERT(dst.isRegCode(kX86RegAx));
2508     _emitInstruction(kX86InstFNStSw, &dst);
2509   }
2510   //! @brief Store x87 FPU Status Word (2 Bytes) (FPU).
fnstswX86Assembler2511   inline void fnstsw(const Mem& dst)
2512   { _emitInstruction(kX86InstFNStSw, &dst); }
2513 
2514   //! @brief Partial Arctangent (FPU).
2515   //!
2516   //! Replace st(1) with arctan(st(1)/st(0)) and pop the register stack.
fpatanX86Assembler2517   inline void fpatan()
2518   { _emitInstruction(kX86InstFPAtan); }
2519 
2520   //! @brief Partial Remainder (FPU).
2521   //!
2522   //! Replace st(0) with the remainder obtained from dividing st(0) by st(1).
fpremX86Assembler2523   inline void fprem()
2524   { _emitInstruction(kX86InstFPRem); }
2525 
2526   //! @brief Partial Remainder (FPU).
2527   //!
2528   //! Replace st(0) with the IEEE remainder obtained from dividing st(0) by
2529   //! st(1).
fprem1X86Assembler2530   inline void fprem1()
2531   { _emitInstruction(kX86InstFPRem1); }
2532 
2533   //! @brief Partial Tangent (FPU).
2534   //!
2535   //! Replace st(0) with its tangent and push 1 onto the FPU stack.
fptanX86Assembler2536   inline void fptan()
2537   { _emitInstruction(kX86InstFPTan); }
2538 
2539   //! @brief Round to Integer (FPU).
2540   //!
2541   //! Rount st(0) to an Integer.
frndintX86Assembler2542   inline void frndint()
2543   { _emitInstruction(kX86InstFRndInt); }
2544 
2545   //! @brief Restore FPU State (FPU).
2546   //!
2547   //! Load FPU state from src (94 or 108 bytes).
frstorX86Assembler2548   inline void frstor(const Mem& src)
2549   { _emitInstruction(kX86InstFRstor, &src); }
2550 
2551   //! @brief Save FPU State (FPU).
2552   //!
2553   //! Store FPU state to 94 or 108-bytes after checking for
2554   //! pending unmasked FP exceptions. Then reinitialize
2555   //! the FPU.
fsaveX86Assembler2556   inline void fsave(const Mem& dst)
2557   { _emitInstruction(kX86InstFSave, &dst); }
2558 
2559   //! @brief Scale (FPU).
2560   //!
2561   //! Scale st(0) by st(1).
fscaleX86Assembler2562   inline void fscale()
2563   { _emitInstruction(kX86InstFScale); }
2564 
2565   //! @brief Sine (FPU).
2566   //!
2567   //! This instruction calculates the sine of the source operand in
2568   //! register st(0) and stores the result in st(0).
fsinX86Assembler2569   inline void fsin()
2570   { _emitInstruction(kX86InstFSin); }
2571 
2572   //! @brief Sine and Cosine (FPU).
2573   //!
2574   //! Compute the sine and cosine of st(0); replace st(0) with
2575   //! the sine, and push the cosine onto the register stack.
fsincosX86Assembler2576   inline void fsincos()
2577   { _emitInstruction(kX86InstFSinCos); }
2578 
2579   //! @brief Square Root (FPU).
2580   //!
2581   //! Calculates square root of st(0) and stores the result in st(0).
fsqrtX86Assembler2582   inline void fsqrt()
2583   { _emitInstruction(kX86InstFSqrt); }
2584 
2585   //! @brief Store Floating Point Value (FPU).
2586   //!
2587   //! Store st(0) as 32-bit or 64-bit floating point value to @a dst.
fstX86Assembler2588   inline void fst(const Mem& dst)
2589   {
2590     ASMJIT_ASSERT(dst.getSize() == 4 || dst.getSize() == 8);
2591     _emitInstruction(kX86InstFSt, &dst);
2592   }
2593 
2594   //! @brief Store Floating Point Value (FPU).
2595   //!
2596   //! Store st(0) to @a reg.
fstX86Assembler2597   inline void fst(const X87Reg& reg)
2598   { _emitInstruction(kX86InstFSt, &reg); }
2599 
2600   //! @brief Store Floating Point Value and Pop Register Stack (FPU).
2601   //!
2602   //! Store st(0) as 32-bit or 64-bit floating point value to @a dst
2603   //! and pop register stack.
fstpX86Assembler2604   inline void fstp(const Mem& dst)
2605   {
2606     ASMJIT_ASSERT(dst.getSize() == 4 || dst.getSize() == 8 || dst.getSize() == 10);
2607     _emitInstruction(kX86InstFStP, &dst);
2608   }
2609 
2610   //! @brief Store Floating Point Value and Pop Register Stack (FPU).
2611   //!
2612   //! Store st(0) to @a reg and pop register stack.
fstpX86Assembler2613   inline void fstp(const X87Reg& reg)
2614   { _emitInstruction(kX86InstFStP, &reg); }
2615 
2616   //! @brief Store x87 FPU Control Word (FPU).
2617   //!
2618   //! Store FPU control word to @a dst (2 Bytes) after checking for pending
2619   //! unmasked floating-point exceptions.
fstcwX86Assembler2620   inline void fstcw(const Mem& dst)
2621   { _emitInstruction(kX86InstFStCw, &dst); }
2622 
2623   //! @brief Store x87 FPU Environment (FPU).
2624   //!
2625   //! Store FPU environment to @a dst (14 or 28 Bytes) after checking for
2626   //! pending unmasked floating-point exceptions. Then mask all floating
2627   //! point exceptions.
fstenvX86Assembler2628   inline void fstenv(const Mem& dst)
2629   { _emitInstruction(kX86InstFStEnv, &dst); }
2630 
2631   //! @brief Store x87 FPU Status Word (2 Bytes) (FPU).
fstswX86Assembler2632   inline void fstsw(const GpReg& dst)
2633   {
2634     ASMJIT_ASSERT(dst.isRegCode(kX86RegAx));
2635     _emitInstruction(kX86InstFStSw, &dst);
2636   }
2637   //! @brief Store x87 FPU Status Word (2 Bytes) (FPU).
fstswX86Assembler2638   inline void fstsw(const Mem& dst)
2639   { _emitInstruction(kX86InstFStSw, &dst); }
2640 
2641   //! @brief Subtract @a src from @a dst and store result in @a dst (FPU).
2642   //!
2643   //! @note One of dst or src must be st(0).
fsubX86Assembler2644   inline void fsub(const X87Reg& dst, const X87Reg& src)
2645   {
2646     ASMJIT_ASSERT(dst.getRegIndex() == 0 || src.getRegIndex() == 0);
2647     _emitInstruction(kX86InstFSub, &dst, &src);
2648   }
2649   //! @brief Subtract @a src from st(0) and store result in st(0) (FPU).
2650   //!
2651   //! @note SP-FP or DP-FP determined by @a adr size.
fsubX86Assembler2652   inline void fsub(const Mem& src)
2653   {
2654     ASMJIT_ASSERT(src.getSize() == 4 || src.getSize() == 8);
2655     _emitInstruction(kX86InstFSub, &src);
2656   }
2657 
2658   //! @brief Subtract st(0) from @a dst and POP register stack (FPU).
2659   inline void fsubp(const X87Reg& dst = st(1))
2660   { _emitInstruction(kX86InstFSubP, &dst); }
2661 
2662   //! @brief Reverse Subtract @a src from @a dst and store result in @a dst (FPU).
2663   //!
2664   //! @note One of dst or src must be st(0).
fsubrX86Assembler2665   inline void fsubr(const X87Reg& dst, const X87Reg& src)
2666   {
2667     ASMJIT_ASSERT(dst.getRegIndex() == 0 || src.getRegIndex() == 0);
2668     _emitInstruction(kX86InstFSubR, &dst, &src);
2669   }
2670 
2671   //! @brief Reverse Subtract @a src from st(0) and store result in st(0) (FPU).
2672   //!
2673   //! @note SP-FP or DP-FP determined by @a adr size.
fsubrX86Assembler2674   inline void fsubr(const Mem& src)
2675   {
2676     ASMJIT_ASSERT(src.getSize() == 4 || src.getSize() == 8);
2677     _emitInstruction(kX86InstFSubR, &src);
2678   }
2679 
2680   //! @brief Reverse Subtract st(0) from @a dst and POP register stack (FPU).
2681   inline void fsubrp(const X87Reg& dst = st(1))
2682   { _emitInstruction(kX86InstFSubRP, &dst); }
2683 
2684   //! @brief Floating point test - Compare st(0) with 0.0. (FPU).
ftstX86Assembler2685   inline void ftst()
2686   { _emitInstruction(kX86InstFTst); }
2687 
2688   //! @brief Unordered Compare st(0) with @a reg (FPU).
2689   inline void fucom(const X87Reg& reg = st(1))
2690   { _emitInstruction(kX86InstFUCom, &reg); }
2691 
2692   //! @brief Unordered Compare st(0) and @a reg, check for ordered values
2693   //! and Set EFLAGS (FPU).
fucomiX86Assembler2694   inline void fucomi(const X87Reg& reg)
2695   { _emitInstruction(kX86InstFUComI, &reg); }
2696 
2697   //! @brief UnorderedCompare st(0) and @a reg, Check for ordered values
2698   //! and Set EFLAGS and pop the stack (FPU).
2699   inline void fucomip(const X87Reg& reg = st(1))
2700   { _emitInstruction(kX86InstFUComIP, &reg); }
2701 
2702   //! @brief Unordered Compare st(0) with @a reg and pop register stack (FPU).
2703   inline void fucomp(const X87Reg& reg = st(1))
2704   { _emitInstruction(kX86InstFUComP, &reg); }
2705 
2706   //! @brief Unordered compare st(0) with st(1) and pop register stack twice
2707   //! (FPU).
fucomppX86Assembler2708   inline void fucompp()
2709   { _emitInstruction(kX86InstFUComPP); }
2710 
fwaitX86Assembler2711   inline void fwait()
2712   { _emitInstruction(kX86InstFWait); }
2713 
2714   //! @brief Examine st(0) (FPU).
2715   //!
2716   //! Examines the contents of the ST(0) register and sets the condition code
2717   //! flags C0, C2, and C3 in the FPU status word to indicate the class of
2718   //! value or number in the register.
fxamX86Assembler2719   inline void fxam()
2720   { _emitInstruction(kX86InstFXam); }
2721 
2722   //! @brief Exchange Register Contents (FPU).
2723   //!
2724   //! Exchange content of st(0) with @a reg.
2725   inline void fxch(const X87Reg& reg = st(1))
2726   { _emitInstruction(kX86InstFXch, &reg); }
2727 
2728   //! @brief Restore FP And MMX(tm) State And Streaming SIMD Extension State
2729   //! (FPU, MMX, SSE).
2730   //!
2731   //! Load FP and MMX(tm) technology and Streaming SIMD Extension state from
2732   //! src (512 bytes).
fxrstorX86Assembler2733   inline void fxrstor(const Mem& src)
2734   { _emitInstruction(kX86InstFXRstor, &src); }
2735 
2736   //! @brief Store FP and MMX(tm) State and Streaming SIMD Extension State
2737   //! (FPU, MMX, SSE).
2738   //!
2739   //! Store FP and MMX(tm) technology state and Streaming SIMD Extension state
2740   //! to dst (512 bytes).
fxsaveX86Assembler2741   inline void fxsave(const Mem& dst)
2742   { _emitInstruction(kX86InstFXSave, &dst); }
2743 
2744   //! @brief Extract Exponent and Significand (FPU).
2745   //!
2746   //! Separate value in st(0) into exponent and significand, store exponent
2747   //! in st(0), and push the significand onto the register stack.
fxtractX86Assembler2748   inline void fxtract()
2749   { _emitInstruction(kX86InstFXtract); }
2750 
2751   //! @brief Compute y * log2(x).
2752   //!
2753   //! Replace st(1) with (st(1) * log2st(0)) and pop the register stack.
fyl2xX86Assembler2754   inline void fyl2x()
2755   { _emitInstruction(kX86InstFYL2X); }
2756 
2757   //! @brief Compute y * log_2(x+1).
2758   //!
2759   //! Replace st(1) with (st(1) * (log2st(0) + 1.0)) and pop the register stack.
fyl2xp1X86Assembler2760   inline void fyl2xp1()
2761   { _emitInstruction(kX86InstFYL2XP1); }
2762 
2763   // --------------------------------------------------------------------------
2764   // [MMX]
2765   // --------------------------------------------------------------------------
2766 
2767   //! @brief Empty MMX state.
emmsX86Assembler2768   inline void emms()
2769   { _emitInstruction(kX86InstEmms); }
2770 
2771   //! @brief Move DWord (MMX).
movdX86Assembler2772   inline void movd(const Mem& dst, const MmReg& src)
2773   { _emitInstruction(kX86InstMovD, &dst, &src); }
2774   //! @brief Move DWord (MMX).
movdX86Assembler2775   inline void movd(const GpReg& dst, const MmReg& src)
2776   { _emitInstruction(kX86InstMovD, &dst, &src); }
2777   //! @brief Move DWord (MMX).
movdX86Assembler2778   inline void movd(const MmReg& dst, const Mem& src)
2779   { _emitInstruction(kX86InstMovD, &dst, &src); }
2780   //! @brief Move DWord (MMX).
movdX86Assembler2781   inline void movd(const MmReg& dst, const GpReg& src)
2782   { _emitInstruction(kX86InstMovD, &dst, &src); }
2783 
2784   //! @brief Move QWord (MMX).
movqX86Assembler2785   inline void movq(const MmReg& dst, const MmReg& src)
2786   { _emitInstruction(kX86InstMovQ, &dst, &src); }
2787   //! @brief Move QWord (MMX).
movqX86Assembler2788   inline void movq(const Mem& dst, const MmReg& src)
2789   { _emitInstruction(kX86InstMovQ, &dst, &src); }
2790 #if defined(ASMJIT_X64)
2791   //! @brief Move QWord (MMX).
movqX86Assembler2792   inline void movq(const GpReg& dst, const MmReg& src)
2793   { _emitInstruction(kX86InstMovQ, &dst, &src); }
2794 #endif
2795   //! @brief Move QWord (MMX).
movqX86Assembler2796   inline void movq(const MmReg& dst, const Mem& src)
2797   { _emitInstruction(kX86InstMovQ, &dst, &src); }
2798 #if defined(ASMJIT_X64)
2799   //! @brief Move QWord (MMX).
movqX86Assembler2800   inline void movq(const MmReg& dst, const GpReg& src)
2801   { _emitInstruction(kX86InstMovQ, &dst, &src); }
2802 #endif
2803 
2804   //! @brief Pack with Signed Saturation (MMX).
packsswbX86Assembler2805   inline void packsswb(const MmReg& dst, const MmReg& src)
2806   { _emitInstruction(kX86InstPackSSWB, &dst, &src); }
2807   //! @brief Pack with Signed Saturation (MMX).
packsswbX86Assembler2808   inline void packsswb(const MmReg& dst, const Mem& src)
2809   { _emitInstruction(kX86InstPackSSWB, &dst, &src); }
2810 
2811   //! @brief Pack with Signed Saturation (MMX).
packssdwX86Assembler2812   inline void packssdw(const MmReg& dst, const MmReg& src)
2813   { _emitInstruction(kX86InstPackSSDW, &dst, &src); }
2814   //! @brief Pack with Signed Saturation (MMX).
packssdwX86Assembler2815   inline void packssdw(const MmReg& dst, const Mem& src)
2816   { _emitInstruction(kX86InstPackSSDW, &dst, &src); }
2817 
2818   //! @brief Pack with Unsigned Saturation (MMX).
packuswbX86Assembler2819   inline void packuswb(const MmReg& dst, const MmReg& src)
2820   { _emitInstruction(kX86InstPackUSWB, &dst, &src); }
2821   //! @brief Pack with Unsigned Saturation (MMX).
packuswbX86Assembler2822   inline void packuswb(const MmReg& dst, const Mem& src)
2823   { _emitInstruction(kX86InstPackUSWB, &dst, &src); }
2824 
2825   //! @brief Packed BYTE Add (MMX).
paddbX86Assembler2826   inline void paddb(const MmReg& dst, const MmReg& src)
2827   { _emitInstruction(kX86InstPAddB, &dst, &src); }
2828   //! @brief Packed BYTE Add (MMX).
paddbX86Assembler2829   inline void paddb(const MmReg& dst, const Mem& src)
2830   { _emitInstruction(kX86InstPAddB, &dst, &src); }
2831 
2832   //! @brief Packed WORD Add (MMX).
paddwX86Assembler2833   inline void paddw(const MmReg& dst, const MmReg& src)
2834   { _emitInstruction(kX86InstPAddW, &dst, &src); }
2835   //! @brief Packed WORD Add (MMX).
paddwX86Assembler2836   inline void paddw(const MmReg& dst, const Mem& src)
2837   { _emitInstruction(kX86InstPAddW, &dst, &src); }
2838 
2839   //! @brief Packed DWORD Add (MMX).
padddX86Assembler2840   inline void paddd(const MmReg& dst, const MmReg& src)
2841   { _emitInstruction(kX86InstPAddD, &dst, &src); }
2842   //! @brief Packed DWORD Add (MMX).
padddX86Assembler2843   inline void paddd(const MmReg& dst, const Mem& src)
2844   { _emitInstruction(kX86InstPAddD, &dst, &src); }
2845 
2846   //! @brief Packed Add with Saturation (MMX).
paddsbX86Assembler2847   inline void paddsb(const MmReg& dst, const MmReg& src)
2848   { _emitInstruction(kX86InstPAddSB, &dst, &src); }
2849   //! @brief Packed Add with Saturation (MMX).
paddsbX86Assembler2850   inline void paddsb(const MmReg& dst, const Mem& src)
2851   { _emitInstruction(kX86InstPAddSB, &dst, &src); }
2852 
2853   //! @brief Packed Add with Saturation (MMX).
paddswX86Assembler2854   inline void paddsw(const MmReg& dst, const MmReg& src)
2855   { _emitInstruction(kX86InstPAddSW, &dst, &src); }
2856   //! @brief Packed Add with Saturation (MMX).
paddswX86Assembler2857   inline void paddsw(const MmReg& dst, const Mem& src)
2858   { _emitInstruction(kX86InstPAddSW, &dst, &src); }
2859 
2860   //! @brief Packed Add Unsigned with Saturation (MMX).
paddusbX86Assembler2861   inline void paddusb(const MmReg& dst, const MmReg& src)
2862   { _emitInstruction(kX86InstPAddUSB, &dst, &src); }
2863   //! @brief Packed Add Unsigned with Saturation (MMX).
paddusbX86Assembler2864   inline void paddusb(const MmReg& dst, const Mem& src)
2865   { _emitInstruction(kX86InstPAddUSB, &dst, &src); }
2866 
2867   //! @brief Packed Add Unsigned with Saturation (MMX).
padduswX86Assembler2868   inline void paddusw(const MmReg& dst, const MmReg& src)
2869   { _emitInstruction(kX86InstPAddUSW, &dst, &src); }
2870   //! @brief Packed Add Unsigned with Saturation (MMX).
padduswX86Assembler2871   inline void paddusw(const MmReg& dst, const Mem& src)
2872   { _emitInstruction(kX86InstPAddUSW, &dst, &src); }
2873 
2874   //! @brief Logical AND (MMX).
pandX86Assembler2875   inline void pand(const MmReg& dst, const MmReg& src)
2876   { _emitInstruction(kX86InstPAnd, &dst, &src); }
2877   //! @brief Logical AND (MMX).
pandX86Assembler2878   inline void pand(const MmReg& dst, const Mem& src)
2879   { _emitInstruction(kX86InstPAnd, &dst, &src); }
2880 
2881   //! @brief Logical AND Not (MMX).
pandnX86Assembler2882   inline void pandn(const MmReg& dst, const MmReg& src)
2883   { _emitInstruction(kX86InstPAndN, &dst, &src); }
2884   //! @brief Logical AND Not (MMX).
pandnX86Assembler2885   inline void pandn(const MmReg& dst, const Mem& src)
2886   { _emitInstruction(kX86InstPAndN, &dst, &src); }
2887 
2888   //! @brief Packed Compare for Equal (BYTES) (MMX).
pcmpeqbX86Assembler2889   inline void pcmpeqb(const MmReg& dst, const MmReg& src)
2890   { _emitInstruction(kX86InstPCmpEqB, &dst, &src); }
2891   //! @brief Packed Compare for Equal (BYTES) (MMX).
pcmpeqbX86Assembler2892   inline void pcmpeqb(const MmReg& dst, const Mem& src)
2893   { _emitInstruction(kX86InstPCmpEqB, &dst, &src); }
2894 
2895   //! @brief Packed Compare for Equal (WORDS) (MMX).
pcmpeqwX86Assembler2896   inline void pcmpeqw(const MmReg& dst, const MmReg& src)
2897   { _emitInstruction(kX86InstPCmpEqW, &dst, &src); }
2898   //! @brief Packed Compare for Equal (WORDS) (MMX).
pcmpeqwX86Assembler2899   inline void pcmpeqw(const MmReg& dst, const Mem& src)
2900   { _emitInstruction(kX86InstPCmpEqW, &dst, &src); }
2901 
2902   //! @brief Packed Compare for Equal (DWORDS) (MMX).
pcmpeqdX86Assembler2903   inline void pcmpeqd(const MmReg& dst, const MmReg& src)
2904   { _emitInstruction(kX86InstPCmpEqD, &dst, &src); }
2905   //! @brief Packed Compare for Equal (DWORDS) (MMX).
pcmpeqdX86Assembler2906   inline void pcmpeqd(const MmReg& dst, const Mem& src)
2907   { _emitInstruction(kX86InstPCmpEqD, &dst, &src); }
2908 
2909   //! @brief Packed Compare for Greater Than (BYTES) (MMX).
pcmpgtbX86Assembler2910   inline void pcmpgtb(const MmReg& dst, const MmReg& src)
2911   { _emitInstruction(kX86InstPCmpGtB, &dst, &src); }
2912   //! @brief Packed Compare for Greater Than (BYTES) (MMX).
pcmpgtbX86Assembler2913   inline void pcmpgtb(const MmReg& dst, const Mem& src)
2914   { _emitInstruction(kX86InstPCmpGtB, &dst, &src); }
2915 
2916   //! @brief Packed Compare for Greater Than (WORDS) (MMX).
pcmpgtwX86Assembler2917   inline void pcmpgtw(const MmReg& dst, const MmReg& src)
2918   { _emitInstruction(kX86InstPCmpGtW, &dst, &src); }
2919   //! @brief Packed Compare for Greater Than (WORDS) (MMX).
pcmpgtwX86Assembler2920   inline void pcmpgtw(const MmReg& dst, const Mem& src)
2921   { _emitInstruction(kX86InstPCmpGtW, &dst, &src); }
2922 
2923   //! @brief Packed Compare for Greater Than (DWORDS) (MMX).
pcmpgtdX86Assembler2924   inline void pcmpgtd(const MmReg& dst, const MmReg& src)
2925   { _emitInstruction(kX86InstPCmpGtD, &dst, &src); }
2926   //! @brief Packed Compare for Greater Than (DWORDS) (MMX).
pcmpgtdX86Assembler2927   inline void pcmpgtd(const MmReg& dst, const Mem& src)
2928   { _emitInstruction(kX86InstPCmpGtD, &dst, &src); }
2929 
2930   //! @brief Packed Multiply High (MMX).
pmulhwX86Assembler2931   inline void pmulhw(const MmReg& dst, const MmReg& src)
2932   { _emitInstruction(kX86InstPMulHW, &dst, &src); }
2933   //! @brief Packed Multiply High (MMX).
pmulhwX86Assembler2934   inline void pmulhw(const MmReg& dst, const Mem& src)
2935   { _emitInstruction(kX86InstPMulHW, &dst, &src); }
2936 
2937   //! @brief Packed Multiply Low (MMX).
pmullwX86Assembler2938   inline void pmullw(const MmReg& dst, const MmReg& src)
2939   { _emitInstruction(kX86InstPMulLW, &dst, &src); }
2940   //! @brief Packed Multiply Low (MMX).
pmullwX86Assembler2941   inline void pmullw(const MmReg& dst, const Mem& src)
2942   { _emitInstruction(kX86InstPMulLW, &dst, &src); }
2943 
2944   //! @brief Bitwise Logical OR (MMX).
porX86Assembler2945   inline void por(const MmReg& dst, const MmReg& src)
2946   { _emitInstruction(kX86InstPOr, &dst, &src); }
2947   //! @brief Bitwise Logical OR (MMX).
porX86Assembler2948   inline void por(const MmReg& dst, const Mem& src)
2949   { _emitInstruction(kX86InstPOr, &dst, &src); }
2950 
2951   //! @brief Packed Multiply and Add (MMX).
pmaddwdX86Assembler2952   inline void pmaddwd(const MmReg& dst, const MmReg& src)
2953   { _emitInstruction(kX86InstPMAddWD, &dst, &src); }
2954   //! @brief Packed Multiply and Add (MMX).
pmaddwdX86Assembler2955   inline void pmaddwd(const MmReg& dst, const Mem& src)
2956   { _emitInstruction(kX86InstPMAddWD, &dst, &src); }
2957 
2958   //! @brief Packed Shift Left Logical (MMX).
pslldX86Assembler2959   inline void pslld(const MmReg& dst, const MmReg& src)
2960   { _emitInstruction(kX86InstPSllD, &dst, &src); }
2961   //! @brief Packed Shift Left Logical (MMX).
pslldX86Assembler2962   inline void pslld(const MmReg& dst, const Mem& src)
2963   { _emitInstruction(kX86InstPSllD, &dst, &src); }
2964   //! @brief Packed Shift Left Logical (MMX).
pslldX86Assembler2965   inline void pslld(const MmReg& dst, const Imm& src)
2966   { _emitInstruction(kX86InstPSllD, &dst, &src); }
2967 
2968   //! @brief Packed Shift Left Logical (MMX).
psllqX86Assembler2969   inline void psllq(const MmReg& dst, const MmReg& src)
2970   { _emitInstruction(kX86InstPSllQ, &dst, &src); }
2971   //! @brief Packed Shift Left Logical (MMX).
psllqX86Assembler2972   inline void psllq(const MmReg& dst, const Mem& src)
2973   { _emitInstruction(kX86InstPSllQ, &dst, &src); }
2974   //! @brief Packed Shift Left Logical (MMX).
psllqX86Assembler2975   inline void psllq(const MmReg& dst, const Imm& src)
2976   { _emitInstruction(kX86InstPSllQ, &dst, &src); }
2977 
2978   //! @brief Packed Shift Left Logical (MMX).
psllwX86Assembler2979   inline void psllw(const MmReg& dst, const MmReg& src)
2980   { _emitInstruction(kX86InstPSllW, &dst, &src); }
2981   //! @brief Packed Shift Left Logical (MMX).
psllwX86Assembler2982   inline void psllw(const MmReg& dst, const Mem& src)
2983   { _emitInstruction(kX86InstPSllW, &dst, &src); }
2984   //! @brief Packed Shift Left Logical (MMX).
psllwX86Assembler2985   inline void psllw(const MmReg& dst, const Imm& src)
2986   { _emitInstruction(kX86InstPSllW, &dst, &src); }
2987 
2988   //! @brief Packed Shift Right Arithmetic (MMX).
psradX86Assembler2989   inline void psrad(const MmReg& dst, const MmReg& src)
2990   { _emitInstruction(kX86InstPSraD, &dst, &src); }
2991   //! @brief Packed Shift Right Arithmetic (MMX).
psradX86Assembler2992   inline void psrad(const MmReg& dst, const Mem& src)
2993   { _emitInstruction(kX86InstPSraD, &dst, &src); }
2994   //! @brief Packed Shift Right Arithmetic (MMX).
psradX86Assembler2995   inline void psrad(const MmReg& dst, const Imm& src)
2996   { _emitInstruction(kX86InstPSraD, &dst, &src); }
2997 
2998   //! @brief Packed Shift Right Arithmetic (MMX).
psrawX86Assembler2999   inline void psraw(const MmReg& dst, const MmReg& src)
3000   { _emitInstruction(kX86InstPSraW, &dst, &src); }
3001   //! @brief Packed Shift Right Arithmetic (MMX).
psrawX86Assembler3002   inline void psraw(const MmReg& dst, const Mem& src)
3003   { _emitInstruction(kX86InstPSraW, &dst, &src); }
3004   //! @brief Packed Shift Right Arithmetic (MMX).
psrawX86Assembler3005   inline void psraw(const MmReg& dst, const Imm& src)
3006   { _emitInstruction(kX86InstPSraW, &dst, &src); }
3007 
3008   //! @brief Packed Shift Right Logical (MMX).
psrldX86Assembler3009   inline void psrld(const MmReg& dst, const MmReg& src)
3010   { _emitInstruction(kX86InstPSrlD, &dst, &src); }
3011   //! @brief Packed Shift Right Logical (MMX).
psrldX86Assembler3012   inline void psrld(const MmReg& dst, const Mem& src)
3013   { _emitInstruction(kX86InstPSrlD, &dst, &src); }
3014   //! @brief Packed Shift Right Logical (MMX).
psrldX86Assembler3015   inline void psrld(const MmReg& dst, const Imm& src)
3016   { _emitInstruction(kX86InstPSrlD, &dst, &src); }
3017 
3018   //! @brief Packed Shift Right Logical (MMX).
psrlqX86Assembler3019   inline void psrlq(const MmReg& dst, const MmReg& src)
3020   { _emitInstruction(kX86InstPSrlQ, &dst, &src); }
3021   //! @brief Packed Shift Right Logical (MMX).
psrlqX86Assembler3022   inline void psrlq(const MmReg& dst, const Mem& src)
3023   { _emitInstruction(kX86InstPSrlQ, &dst, &src); }
3024   //! @brief Packed Shift Right Logical (MMX).
psrlqX86Assembler3025   inline void psrlq(const MmReg& dst, const Imm& src)
3026   { _emitInstruction(kX86InstPSrlQ, &dst, &src); }
3027 
3028   //! @brief Packed Shift Right Logical (MMX).
psrlwX86Assembler3029   inline void psrlw(const MmReg& dst, const MmReg& src)
3030   { _emitInstruction(kX86InstPSrlW, &dst, &src); }
3031   //! @brief Packed Shift Right Logical (MMX).
psrlwX86Assembler3032   inline void psrlw(const MmReg& dst, const Mem& src)
3033   { _emitInstruction(kX86InstPSrlW, &dst, &src); }
3034   //! @brief Packed Shift Right Logical (MMX).
psrlwX86Assembler3035   inline void psrlw(const MmReg& dst, const Imm& src)
3036   { _emitInstruction(kX86InstPSrlW, &dst, &src); }
3037 
3038   //! @brief Packed Subtract (MMX).
psubbX86Assembler3039   inline void psubb(const MmReg& dst, const MmReg& src)
3040   { _emitInstruction(kX86InstPSubB, &dst, &src); }
3041   //! @brief Packed Subtract (MMX).
psubbX86Assembler3042   inline void psubb(const MmReg& dst, const Mem& src)
3043   { _emitInstruction(kX86InstPSubB, &dst, &src); }
3044 
3045   //! @brief Packed Subtract (MMX).
psubwX86Assembler3046   inline void psubw(const MmReg& dst, const MmReg& src)
3047   { _emitInstruction(kX86InstPSubW, &dst, &src); }
3048   //! @brief Packed Subtract (MMX).
psubwX86Assembler3049   inline void psubw(const MmReg& dst, const Mem& src)
3050   { _emitInstruction(kX86InstPSubW, &dst, &src); }
3051 
3052   //! @brief Packed Subtract (MMX).
psubdX86Assembler3053   inline void psubd(const MmReg& dst, const MmReg& src)
3054   { _emitInstruction(kX86InstPSubD, &dst, &src); }
3055   //! @brief Packed Subtract (MMX).
psubdX86Assembler3056   inline void psubd(const MmReg& dst, const Mem& src)
3057   { _emitInstruction(kX86InstPSubD, &dst, &src); }
3058 
3059   //! @brief Packed Subtract with Saturation (MMX).
psubsbX86Assembler3060   inline void psubsb(const MmReg& dst, const MmReg& src)
3061   { _emitInstruction(kX86InstPSubSB, &dst, &src); }
3062   //! @brief Packed Subtract with Saturation (MMX).
psubsbX86Assembler3063   inline void psubsb(const MmReg& dst, const Mem& src)
3064   { _emitInstruction(kX86InstPSubSB, &dst, &src); }
3065 
3066   //! @brief Packed Subtract with Saturation (MMX).
psubswX86Assembler3067   inline void psubsw(const MmReg& dst, const MmReg& src)
3068   { _emitInstruction(kX86InstPSubSW, &dst, &src); }
3069   //! @brief Packed Subtract with Saturation (MMX).
psubswX86Assembler3070   inline void psubsw(const MmReg& dst, const Mem& src)
3071   { _emitInstruction(kX86InstPSubSW, &dst, &src); }
3072 
3073   //! @brief Packed Subtract with Unsigned Saturation (MMX).
psubusbX86Assembler3074   inline void psubusb(const MmReg& dst, const MmReg& src)
3075   { _emitInstruction(kX86InstPSubUSB, &dst, &src); }
3076   //! @brief Packed Subtract with Unsigned Saturation (MMX).
psubusbX86Assembler3077   inline void psubusb(const MmReg& dst, const Mem& src)
3078   { _emitInstruction(kX86InstPSubUSB, &dst, &src); }
3079 
3080   //! @brief Packed Subtract with Unsigned Saturation (MMX).
psubuswX86Assembler3081   inline void psubusw(const MmReg& dst, const MmReg& src)
3082   { _emitInstruction(kX86InstPSubUSW, &dst, &src); }
3083   //! @brief Packed Subtract with Unsigned Saturation (MMX).
psubuswX86Assembler3084   inline void psubusw(const MmReg& dst, const Mem& src)
3085   { _emitInstruction(kX86InstPSubUSW, &dst, &src); }
3086 
3087   //! @brief Unpack High Packed Data (MMX).
punpckhbwX86Assembler3088   inline void punpckhbw(const MmReg& dst, const MmReg& src)
3089   { _emitInstruction(kX86InstPunpckHBW, &dst, &src); }
3090   //! @brief Unpack High Packed Data (MMX).
punpckhbwX86Assembler3091   inline void punpckhbw(const MmReg& dst, const Mem& src)
3092   { _emitInstruction(kX86InstPunpckHBW, &dst, &src); }
3093 
3094   //! @brief Unpack High Packed Data (MMX).
punpckhwdX86Assembler3095   inline void punpckhwd(const MmReg& dst, const MmReg& src)
3096   { _emitInstruction(kX86InstPunpckHWD, &dst, &src); }
3097   //! @brief Unpack High Packed Data (MMX).
punpckhwdX86Assembler3098   inline void punpckhwd(const MmReg& dst, const Mem& src)
3099   { _emitInstruction(kX86InstPunpckHWD, &dst, &src); }
3100 
3101   //! @brief Unpack High Packed Data (MMX).
punpckhdqX86Assembler3102   inline void punpckhdq(const MmReg& dst, const MmReg& src)
3103   { _emitInstruction(kX86InstPunpckHDQ, &dst, &src); }
3104   //! @brief Unpack High Packed Data (MMX).
punpckhdqX86Assembler3105   inline void punpckhdq(const MmReg& dst, const Mem& src)
3106   { _emitInstruction(kX86InstPunpckHDQ, &dst, &src); }
3107 
3108   //! @brief Unpack High Packed Data (MMX).
punpcklbwX86Assembler3109   inline void punpcklbw(const MmReg& dst, const MmReg& src)
3110   { _emitInstruction(kX86InstPunpckLBW, &dst, &src); }
3111   //! @brief Unpack High Packed Data (MMX).
punpcklbwX86Assembler3112   inline void punpcklbw(const MmReg& dst, const Mem& src)
3113   { _emitInstruction(kX86InstPunpckLBW, &dst, &src); }
3114 
3115   //! @brief Unpack High Packed Data (MMX).
punpcklwdX86Assembler3116   inline void punpcklwd(const MmReg& dst, const MmReg& src)
3117   { _emitInstruction(kX86InstPunpckLWD, &dst, &src); }
3118   //! @brief Unpack High Packed Data (MMX).
punpcklwdX86Assembler3119   inline void punpcklwd(const MmReg& dst, const Mem& src)
3120   { _emitInstruction(kX86InstPunpckLWD, &dst, &src); }
3121 
3122   //! @brief Unpack High Packed Data (MMX).
punpckldqX86Assembler3123   inline void punpckldq(const MmReg& dst, const MmReg& src)
3124   { _emitInstruction(kX86InstPunpckLDQ, &dst, &src); }
3125   //! @brief Unpack High Packed Data (MMX).
punpckldqX86Assembler3126   inline void punpckldq(const MmReg& dst, const Mem& src)
3127   { _emitInstruction(kX86InstPunpckLDQ, &dst, &src); }
3128 
3129   //! @brief Bitwise Exclusive OR (MMX).
pxorX86Assembler3130   inline void pxor(const MmReg& dst, const MmReg& src)
3131   { _emitInstruction(kX86InstPXor, &dst, &src); }
3132   //! @brief Bitwise Exclusive OR (MMX).
pxorX86Assembler3133   inline void pxor(const MmReg& dst, const Mem& src)
3134   { _emitInstruction(kX86InstPXor, &dst, &src); }
3135 
3136   // -------------------------------------------------------------------------
3137   // [3dNow]
3138   // -------------------------------------------------------------------------
3139 
3140   //! @brief Faster EMMS (3dNow!).
3141   //!
3142   //! @note Use only for early AMD processors where is only 3dNow! or SSE. If
3143   //! CPU contains SSE2, it's better to use @c emms() ( @c femms() is mapped
3144   //! to @c emms() ).
femmsX86Assembler3145   inline void femms()
3146   { _emitInstruction(kX86InstFEmms); }
3147 
3148   //! @brief Packed SP-FP to Integer Convert (3dNow!).
pf2idX86Assembler3149   inline void pf2id(const MmReg& dst, const MmReg& src)
3150   { _emitInstruction(kX86InstPF2ID, &dst, &src); }
3151   //! @brief Packed SP-FP to Integer Convert (3dNow!).
pf2idX86Assembler3152   inline void pf2id(const MmReg& dst, const Mem& src)
3153   { _emitInstruction(kX86InstPF2ID, &dst, &src); }
3154 
3155   //! @brief  Packed SP-FP to Integer Word Convert (3dNow!).
pf2iwX86Assembler3156   inline void pf2iw(const MmReg& dst, const MmReg& src)
3157   { _emitInstruction(kX86InstPF2IW, &dst, &src); }
3158   //! @brief  Packed SP-FP to Integer Word Convert (3dNow!).
pf2iwX86Assembler3159   inline void pf2iw(const MmReg& dst, const Mem& src)
3160   { _emitInstruction(kX86InstPF2IW, &dst, &src); }
3161 
3162   //! @brief Packed SP-FP Accumulate (3dNow!).
pfaccX86Assembler3163   inline void pfacc(const MmReg& dst, const MmReg& src)
3164   { _emitInstruction(kX86InstPFAcc, &dst, &src); }
3165   //! @brief Packed SP-FP Accumulate (3dNow!).
pfaccX86Assembler3166   inline void pfacc(const MmReg& dst, const Mem& src)
3167   { _emitInstruction(kX86InstPFAcc, &dst, &src); }
3168 
3169   //! @brief Packed SP-FP Addition (3dNow!).
pfaddX86Assembler3170   inline void pfadd(const MmReg& dst, const MmReg& src)
3171   { _emitInstruction(kX86InstPFAdd, &dst, &src); }
3172   //! @brief Packed SP-FP Addition (3dNow!).
pfaddX86Assembler3173   inline void pfadd(const MmReg& dst, const Mem& src)
3174   { _emitInstruction(kX86InstPFAdd, &dst, &src); }
3175 
3176   //! @brief Packed SP-FP Compare - dst == src (3dNow!).
pfcmpeqX86Assembler3177   inline void pfcmpeq(const MmReg& dst, const MmReg& src)
3178   { _emitInstruction(kX86InstPFCmpEQ, &dst, &src); }
3179   //! @brief Packed SP-FP Compare - dst == src (3dNow!).
pfcmpeqX86Assembler3180   inline void pfcmpeq(const MmReg& dst, const Mem& src)
3181   { _emitInstruction(kX86InstPFCmpEQ, &dst, &src); }
3182 
3183   //! @brief Packed SP-FP Compare - dst >= src (3dNow!).
pfcmpgeX86Assembler3184   inline void pfcmpge(const MmReg& dst, const MmReg& src)
3185   { _emitInstruction(kX86InstPFCmpGE, &dst, &src); }
3186   //! @brief Packed SP-FP Compare - dst >= src (3dNow!).
pfcmpgeX86Assembler3187   inline void pfcmpge(const MmReg& dst, const Mem& src)
3188   { _emitInstruction(kX86InstPFCmpGE, &dst, &src); }
3189 
3190   //! @brief Packed SP-FP Compare - dst > src (3dNow!).
pfcmpgtX86Assembler3191   inline void pfcmpgt(const MmReg& dst, const MmReg& src)
3192   { _emitInstruction(kX86InstPFCmpGT, &dst, &src); }
3193   //! @brief Packed SP-FP Compare - dst > src (3dNow!).
pfcmpgtX86Assembler3194   inline void pfcmpgt(const MmReg& dst, const Mem& src)
3195   { _emitInstruction(kX86InstPFCmpGT, &dst, &src); }
3196 
3197   //! @brief Packed SP-FP Maximum (3dNow!).
pfmaxX86Assembler3198   inline void pfmax(const MmReg& dst, const MmReg& src)
3199   { _emitInstruction(kX86InstPFMax, &dst, &src); }
3200   //! @brief Packed SP-FP Maximum (3dNow!).
pfmaxX86Assembler3201   inline void pfmax(const MmReg& dst, const Mem& src)
3202   { _emitInstruction(kX86InstPFMax, &dst, &src); }
3203 
3204   //! @brief Packed SP-FP Minimum (3dNow!).
pfminX86Assembler3205   inline void pfmin(const MmReg& dst, const MmReg& src)
3206   { _emitInstruction(kX86InstPFMin, &dst, &src); }
3207   //! @brief Packed SP-FP Minimum (3dNow!).
pfminX86Assembler3208   inline void pfmin(const MmReg& dst, const Mem& src)
3209   { _emitInstruction(kX86InstPFMin, &dst, &src); }
3210 
3211   //! @brief Packed SP-FP Multiply (3dNow!).
pfmulX86Assembler3212   inline void pfmul(const MmReg& dst, const MmReg& src)
3213   { _emitInstruction(kX86InstPFMul, &dst, &src); }
3214   //! @brief Packed SP-FP Multiply (3dNow!).
pfmulX86Assembler3215   inline void pfmul(const MmReg& dst, const Mem& src)
3216   { _emitInstruction(kX86InstPFMul, &dst, &src); }
3217 
3218   //! @brief Packed SP-FP Negative Accumulate (3dNow!).
pfnaccX86Assembler3219   inline void pfnacc(const MmReg& dst, const MmReg& src)
3220   { _emitInstruction(kX86InstPFNAcc, &dst, &src); }
3221   //! @brief Packed SP-FP Negative Accumulate (3dNow!).
pfnaccX86Assembler3222   inline void pfnacc(const MmReg& dst, const Mem& src)
3223   { _emitInstruction(kX86InstPFNAcc, &dst, &src); }
3224 
3225   //! @brief Packed SP-FP Mixed Accumulate (3dNow!).
pfpnaccX86Assembler3226   inline void pfpnacc(const MmReg& dst, const MmReg& src)
3227   { _emitInstruction(kX86InstPFPNAcc, &dst, &src); }
3228   //! @brief Packed SP-FP Mixed Accumulate (3dNow!).
pfpnaccX86Assembler3229   inline void pfpnacc(const MmReg& dst, const Mem& src)
3230   { _emitInstruction(kX86InstPFPNAcc, &dst, &src); }
3231 
3232   //! @brief Packed SP-FP Reciprocal Approximation (3dNow!).
pfrcpX86Assembler3233   inline void pfrcp(const MmReg& dst, const MmReg& src)
3234   { _emitInstruction(kX86InstPFRcp, &dst, &src); }
3235   //! @brief Packed SP-FP Reciprocal Approximation (3dNow!).
pfrcpX86Assembler3236   inline void pfrcp(const MmReg& dst, const Mem& src)
3237   { _emitInstruction(kX86InstPFRcp, &dst, &src); }
3238 
3239   //! @brief Packed SP-FP Reciprocal, First Iteration Step (3dNow!).
pfrcpit1X86Assembler3240   inline void pfrcpit1(const MmReg& dst, const MmReg& src)
3241   { _emitInstruction(kX86InstPFRcpIt1, &dst, &src); }
3242   //! @brief Packed SP-FP Reciprocal, First Iteration Step (3dNow!).
pfrcpit1X86Assembler3243   inline void pfrcpit1(const MmReg& dst, const Mem& src)
3244   { _emitInstruction(kX86InstPFRcpIt1, &dst, &src); }
3245 
3246   //! @brief Packed SP-FP Reciprocal, Second Iteration Step (3dNow!).
pfrcpit2X86Assembler3247   inline void pfrcpit2(const MmReg& dst, const MmReg& src)
3248   { _emitInstruction(kX86InstPFRcpIt2, &dst, &src); }
3249   //! @brief Packed SP-FP Reciprocal, Second Iteration Step (3dNow!).
pfrcpit2X86Assembler3250   inline void pfrcpit2(const MmReg& dst, const Mem& src)
3251   { _emitInstruction(kX86InstPFRcpIt2, &dst, &src); }
3252 
3253   //! @brief Packed SP-FP Reciprocal Square Root, First Iteration Step (3dNow!).
pfrsqit1X86Assembler3254   inline void pfrsqit1(const MmReg& dst, const MmReg& src)
3255   { _emitInstruction(kX86InstPFRSqIt1, &dst, &src); }
3256   //! @brief Packed SP-FP Reciprocal Square Root, First Iteration Step (3dNow!).
pfrsqit1X86Assembler3257   inline void pfrsqit1(const MmReg& dst, const Mem& src)
3258   { _emitInstruction(kX86InstPFRSqIt1, &dst, &src); }
3259 
3260   //! @brief Packed SP-FP Reciprocal Square Root Approximation (3dNow!).
pfrsqrtX86Assembler3261   inline void pfrsqrt(const MmReg& dst, const MmReg& src)
3262   { _emitInstruction(kX86InstPFRSqrt, &dst, &src); }
3263   //! @brief Packed SP-FP Reciprocal Square Root Approximation (3dNow!).
pfrsqrtX86Assembler3264   inline void pfrsqrt(const MmReg& dst, const Mem& src)
3265   { _emitInstruction(kX86InstPFRSqrt, &dst, &src); }
3266 
3267   //! @brief Packed SP-FP Subtract (3dNow!).
pfsubX86Assembler3268   inline void pfsub(const MmReg& dst, const MmReg& src)
3269   { _emitInstruction(kX86InstPFSub, &dst, &src); }
3270   //! @brief Packed SP-FP Subtract (3dNow!).
pfsubX86Assembler3271   inline void pfsub(const MmReg& dst, const Mem& src)
3272   { _emitInstruction(kX86InstPFSub, &dst, &src); }
3273 
3274   //! @brief Packed SP-FP Reverse Subtract (3dNow!).
pfsubrX86Assembler3275   inline void pfsubr(const MmReg& dst, const MmReg& src)
3276   { _emitInstruction(kX86InstPFSubR, &dst, &src); }
3277   //! @brief Packed SP-FP Reverse Subtract (3dNow!).
pfsubrX86Assembler3278   inline void pfsubr(const MmReg& dst, const Mem& src)
3279   { _emitInstruction(kX86InstPFSubR, &dst, &src); }
3280 
3281   //! @brief Packed DWords to SP-FP (3dNow!).
pi2fdX86Assembler3282   inline void pi2fd(const MmReg& dst, const MmReg& src)
3283   { _emitInstruction(kX86InstPI2FD, &dst, &src); }
3284   //! @brief Packed DWords to SP-FP (3dNow!).
pi2fdX86Assembler3285   inline void pi2fd(const MmReg& dst, const Mem& src)
3286   { _emitInstruction(kX86InstPI2FD, &dst, &src); }
3287 
3288   //! @brief Packed Words to SP-FP (3dNow!).
pi2fwX86Assembler3289   inline void pi2fw(const MmReg& dst, const MmReg& src)
3290   { _emitInstruction(kX86InstPI2FW, &dst, &src); }
3291   //! @brief Packed Words to SP-FP (3dNow!).
pi2fwX86Assembler3292   inline void pi2fw(const MmReg& dst, const Mem& src)
3293   { _emitInstruction(kX86InstPI2FW, &dst, &src); }
3294 
3295   //! @brief Packed swap DWord (3dNow!)
pswapdX86Assembler3296   inline void pswapd(const MmReg& dst, const MmReg& src)
3297   { _emitInstruction(kX86InstPSwapD, &dst, &src); }
3298   //! @brief Packed swap DWord (3dNow!)
pswapdX86Assembler3299   inline void pswapd(const MmReg& dst, const Mem& src)
3300   { _emitInstruction(kX86InstPSwapD, &dst, &src); }
3301 
3302   // --------------------------------------------------------------------------
3303   // [SSE]
3304   // --------------------------------------------------------------------------
3305 
3306   //! @brief Packed SP-FP Add (SSE).
addpsX86Assembler3307   inline void addps(const XmmReg& dst, const XmmReg& src)
3308   { _emitInstruction(kX86InstAddPS, &dst, &src); }
3309   //! @brief Packed SP-FP Add (SSE).
addpsX86Assembler3310   inline void addps(const XmmReg& dst, const Mem& src)
3311   { _emitInstruction(kX86InstAddPS, &dst, &src); }
3312 
3313   //! @brief Scalar SP-FP Add (SSE).
addssX86Assembler3314   inline void addss(const XmmReg& dst, const XmmReg& src)
3315   { _emitInstruction(kX86InstAddSS, &dst, &src); }
3316   //! @brief Scalar SP-FP Add (SSE).
addssX86Assembler3317   inline void addss(const XmmReg& dst, const Mem& src)
3318   { _emitInstruction(kX86InstAddSS, &dst, &src); }
3319 
3320   //! @brief Bit-wise Logical And Not For SP-FP (SSE).
andnpsX86Assembler3321   inline void andnps(const XmmReg& dst, const XmmReg& src)
3322   { _emitInstruction(kX86InstAndnPS, &dst, &src); }
3323   //! @brief Bit-wise Logical And Not For SP-FP (SSE).
andnpsX86Assembler3324   inline void andnps(const XmmReg& dst, const Mem& src)
3325   { _emitInstruction(kX86InstAndnPS, &dst, &src); }
3326 
3327   //! @brief Bit-wise Logical And For SP-FP (SSE).
andpsX86Assembler3328   inline void andps(const XmmReg& dst, const XmmReg& src)
3329   { _emitInstruction(kX86InstAndPS, &dst, &src); }
3330   //! @brief Bit-wise Logical And For SP-FP (SSE).
andpsX86Assembler3331   inline void andps(const XmmReg& dst, const Mem& src)
3332   { _emitInstruction(kX86InstAndPS, &dst, &src); }
3333 
3334   //! @brief Packed SP-FP Compare (SSE).
cmppsX86Assembler3335   inline void cmpps(const XmmReg& dst, const XmmReg& src, const Imm& imm8)
3336   { _emitInstruction(kX86InstCmpPS, &dst, &src, &imm8); }
3337   //! @brief Packed SP-FP Compare (SSE).
cmppsX86Assembler3338   inline void cmpps(const XmmReg& dst, const Mem& src, const Imm& imm8)
3339   { _emitInstruction(kX86InstCmpPS, &dst, &src, &imm8); }
3340 
3341   //! @brief Compare Scalar SP-FP Values (SSE).
cmpssX86Assembler3342   inline void cmpss(const XmmReg& dst, const XmmReg& src, const Imm& imm8)
3343   { _emitInstruction(kX86InstCmpSS, &dst, &src, &imm8); }
3344   //! @brief Compare Scalar SP-FP Values (SSE).
cmpssX86Assembler3345   inline void cmpss(const XmmReg& dst, const Mem& src, const Imm& imm8)
3346   { _emitInstruction(kX86InstCmpSS, &dst, &src, &imm8); }
3347 
3348   //! @brief Scalar Ordered SP-FP Compare and Set EFLAGS (SSE).
comissX86Assembler3349   inline void comiss(const XmmReg& dst, const XmmReg& src)
3350   { _emitInstruction(kX86InstComISS, &dst, &src); }
3351   //! @brief Scalar Ordered SP-FP Compare and Set EFLAGS (SSE).
comissX86Assembler3352   inline void comiss(const XmmReg& dst, const Mem& src)
3353   { _emitInstruction(kX86InstComISS, &dst, &src); }
3354 
3355   //! @brief Packed Signed INT32 to Packed SP-FP Conversion (SSE).
cvtpi2psX86Assembler3356   inline void cvtpi2ps(const XmmReg& dst, const MmReg& src)
3357   { _emitInstruction(kX86InstCvtPI2PS, &dst, &src); }
3358   //! @brief Packed Signed INT32 to Packed SP-FP Conversion (SSE).
cvtpi2psX86Assembler3359   inline void cvtpi2ps(const XmmReg& dst, const Mem& src)
3360   { _emitInstruction(kX86InstCvtPI2PS, &dst, &src); }
3361 
3362   //! @brief Packed SP-FP to Packed INT32 Conversion (SSE).
cvtps2piX86Assembler3363   inline void cvtps2pi(const MmReg& dst, const XmmReg& src)
3364   { _emitInstruction(kX86InstCvtPS2PI, &dst, &src); }
3365   //! @brief Packed SP-FP to Packed INT32 Conversion (SSE).
cvtps2piX86Assembler3366   inline void cvtps2pi(const MmReg& dst, const Mem& src)
3367   { _emitInstruction(kX86InstCvtPS2PI, &dst, &src); }
3368 
3369   //! @brief Scalar Signed INT32 to SP-FP Conversion (SSE).
cvtsi2ssX86Assembler3370   inline void cvtsi2ss(const XmmReg& dst, const GpReg& src)
3371   { _emitInstruction(kX86InstCvtSI2SS, &dst, &src); }
3372   //! @brief Scalar Signed INT32 to SP-FP Conversion (SSE).
cvtsi2ssX86Assembler3373   inline void cvtsi2ss(const XmmReg& dst, const Mem& src)
3374   { _emitInstruction(kX86InstCvtSI2SS, &dst, &src); }
3375 
3376   //! @brief Scalar SP-FP to Signed INT32 Conversion (SSE).
cvtss2siX86Assembler3377   inline void cvtss2si(const GpReg& dst, const XmmReg& src)
3378   { _emitInstruction(kX86InstCvtSS2SI, &dst, &src); }
3379   //! @brief Scalar SP-FP to Signed INT32 Conversion (SSE).
cvtss2siX86Assembler3380   inline void cvtss2si(const GpReg& dst, const Mem& src)
3381   { _emitInstruction(kX86InstCvtSS2SI, &dst, &src); }
3382 
3383   //! @brief Packed SP-FP to Packed INT32 Conversion (truncate) (SSE).
cvttps2piX86Assembler3384   inline void cvttps2pi(const MmReg& dst, const XmmReg& src)
3385   { _emitInstruction(kX86InstCvttPS2PI, &dst, &src); }
3386   //! @brief Packed SP-FP to Packed INT32 Conversion (truncate) (SSE).
cvttps2piX86Assembler3387   inline void cvttps2pi(const MmReg& dst, const Mem& src)
3388   { _emitInstruction(kX86InstCvttPS2PI, &dst, &src); }
3389 
3390   //! @brief Scalar SP-FP to Signed INT32 Conversion (truncate) (SSE).
cvttss2siX86Assembler3391   inline void cvttss2si(const GpReg& dst, const XmmReg& src)
3392   { _emitInstruction(kX86InstCvttSS2SI, &dst, &src); }
3393   //! @brief Scalar SP-FP to Signed INT32 Conversion (truncate) (SSE).
cvttss2siX86Assembler3394   inline void cvttss2si(const GpReg& dst, const Mem& src)
3395   { _emitInstruction(kX86InstCvttSS2SI, &dst, &src); }
3396 
3397   //! @brief Packed SP-FP Divide (SSE).
divpsX86Assembler3398   inline void divps(const XmmReg& dst, const XmmReg& src)
3399   { _emitInstruction(kX86InstDivPS, &dst, &src); }
3400   //! @brief Packed SP-FP Divide (SSE).
divpsX86Assembler3401   inline void divps(const XmmReg& dst, const Mem& src)
3402   { _emitInstruction(kX86InstDivPS, &dst, &src); }
3403 
3404   //! @brief Scalar SP-FP Divide (SSE).
divssX86Assembler3405   inline void divss(const XmmReg& dst, const XmmReg& src)
3406   { _emitInstruction(kX86InstDivSS, &dst, &src); }
3407   //! @brief Scalar SP-FP Divide (SSE).
divssX86Assembler3408   inline void divss(const XmmReg& dst, const Mem& src)
3409   { _emitInstruction(kX86InstDivSS, &dst, &src); }
3410 
3411   //! @brief Load Streaming SIMD Extension Control/Status (SSE).
ldmxcsrX86Assembler3412   inline void ldmxcsr(const Mem& src)
3413   { _emitInstruction(kX86InstLdMXCSR, &src); }
3414 
3415   //! @brief Byte Mask Write (SSE).
3416   //!
3417   //! @note The default memory location is specified by DS:EDI.
maskmovqX86Assembler3418   inline void maskmovq(const MmReg& data, const MmReg& mask)
3419   { _emitInstruction(kX86InstMaskMovQ, &data, &mask); }
3420 
3421   //! @brief Packed SP-FP Maximum (SSE).
maxpsX86Assembler3422   inline void maxps(const XmmReg& dst, const XmmReg& src)
3423   { _emitInstruction(kX86InstMaxPS, &dst, &src); }
3424   //! @brief Packed SP-FP Maximum (SSE).
maxpsX86Assembler3425   inline void maxps(const XmmReg& dst, const Mem& src)
3426   { _emitInstruction(kX86InstMaxPS, &dst, &src); }
3427 
3428   //! @brief Scalar SP-FP Maximum (SSE).
maxssX86Assembler3429   inline void maxss(const XmmReg& dst, const XmmReg& src)
3430   { _emitInstruction(kX86InstMaxSS, &dst, &src); }
3431   //! @brief Scalar SP-FP Maximum (SSE).
maxssX86Assembler3432   inline void maxss(const XmmReg& dst, const Mem& src)
3433   { _emitInstruction(kX86InstMaxSS, &dst, &src); }
3434 
3435   //! @brief Packed SP-FP Minimum (SSE).
minpsX86Assembler3436   inline void minps(const XmmReg& dst, const XmmReg& src)
3437   { _emitInstruction(kX86InstMinPS, &dst, &src); }
3438   //! @brief Packed SP-FP Minimum (SSE).
minpsX86Assembler3439   inline void minps(const XmmReg& dst, const Mem& src)
3440   { _emitInstruction(kX86InstMinPS, &dst, &src); }
3441 
3442   //! @brief Scalar SP-FP Minimum (SSE).
minssX86Assembler3443   inline void minss(const XmmReg& dst, const XmmReg& src)
3444   { _emitInstruction(kX86InstMinSS, &dst, &src); }
3445   //! @brief Scalar SP-FP Minimum (SSE).
minssX86Assembler3446   inline void minss(const XmmReg& dst, const Mem& src)
3447   { _emitInstruction(kX86InstMinSS, &dst, &src); }
3448 
3449   //! @brief Move Aligned Packed SP-FP Values (SSE).
movapsX86Assembler3450   inline void movaps(const XmmReg& dst, const XmmReg& src)
3451   { _emitInstruction(kX86InstMovAPS, &dst, &src); }
3452   //! @brief Move Aligned Packed SP-FP Values (SSE).
movapsX86Assembler3453   inline void movaps(const XmmReg& dst, const Mem& src)
3454   { _emitInstruction(kX86InstMovAPS, &dst, &src); }
3455 
3456   //! @brief Move Aligned Packed SP-FP Values (SSE).
movapsX86Assembler3457   inline void movaps(const Mem& dst, const XmmReg& src)
3458   { _emitInstruction(kX86InstMovAPS, &dst, &src); }
3459 
3460   //! @brief Move DWord.
movdX86Assembler3461   inline void movd(const Mem& dst, const XmmReg& src)
3462   { _emitInstruction(kX86InstMovD, &dst, &src); }
3463   //! @brief Move DWord.
movdX86Assembler3464   inline void movd(const GpReg& dst, const XmmReg& src)
3465   { _emitInstruction(kX86InstMovD, &dst, &src); }
3466   //! @brief Move DWord.
movdX86Assembler3467   inline void movd(const XmmReg& dst, const Mem& src)
3468   { _emitInstruction(kX86InstMovD, &dst, &src); }
3469   //! @brief Move DWord.
movdX86Assembler3470   inline void movd(const XmmReg& dst, const GpReg& src)
3471   { _emitInstruction(kX86InstMovD, &dst, &src); }
3472 
3473   //! @brief Move QWord (SSE).
movqX86Assembler3474   inline void movq(const XmmReg& dst, const XmmReg& src)
3475   { _emitInstruction(kX86InstMovQ, &dst, &src); }
3476   //! @brief Move QWord (SSE).
movqX86Assembler3477   inline void movq(const Mem& dst, const XmmReg& src)
3478   { _emitInstruction(kX86InstMovQ, &dst, &src); }
3479 #if defined(ASMJIT_X64)
3480   //! @brief Move QWord (SSE).
movqX86Assembler3481   inline void movq(const GpReg& dst, const XmmReg& src)
3482   { _emitInstruction(kX86InstMovQ, &dst, &src); }
3483 #endif // ASMJIT_X64
3484   //! @brief Move QWord (SSE).
movqX86Assembler3485   inline void movq(const XmmReg& dst, const Mem& src)
3486   { _emitInstruction(kX86InstMovQ, &dst, &src); }
3487 #if defined(ASMJIT_X64)
3488   //! @brief Move QWord (SSE).
movqX86Assembler3489   inline void movq(const XmmReg& dst, const GpReg& src)
3490   { _emitInstruction(kX86InstMovQ, &dst, &src); }
3491 #endif // ASMJIT_X64
3492 
3493   //! @brief Move 64 Bits Non Temporal (SSE).
movntqX86Assembler3494   inline void movntq(const Mem& dst, const MmReg& src)
3495   { _emitInstruction(kX86InstMovNTQ, &dst, &src); }
3496 
3497   //! @brief High to Low Packed SP-FP (SSE).
movhlpsX86Assembler3498   inline void movhlps(const XmmReg& dst, const XmmReg& src)
3499   { _emitInstruction(kX86InstMovHLPS, &dst, &src); }
3500 
3501   //! @brief Move High Packed SP-FP (SSE).
movhpsX86Assembler3502   inline void movhps(const XmmReg& dst, const Mem& src)
3503   { _emitInstruction(kX86InstMovHPS, &dst, &src); }
3504 
3505   //! @brief Move High Packed SP-FP (SSE).
movhpsX86Assembler3506   inline void movhps(const Mem& dst, const XmmReg& src)
3507   { _emitInstruction(kX86InstMovHPS, &dst, &src); }
3508 
3509   //! @brief Move Low to High Packed SP-FP (SSE).
movlhpsX86Assembler3510   inline void movlhps(const XmmReg& dst, const XmmReg& src)
3511   { _emitInstruction(kX86InstMovLHPS, &dst, &src); }
3512 
3513   //! @brief Move Low Packed SP-FP (SSE).
movlpsX86Assembler3514   inline void movlps(const XmmReg& dst, const Mem& src)
3515   { _emitInstruction(kX86InstMovLPS, &dst, &src); }
3516 
3517   //! @brief Move Low Packed SP-FP (SSE).
movlpsX86Assembler3518   inline void movlps(const Mem& dst, const XmmReg& src)
3519   { _emitInstruction(kX86InstMovLPS, &dst, &src); }
3520 
3521   //! @brief Move Aligned Four Packed SP-FP Non Temporal (SSE).
movntpsX86Assembler3522   inline void movntps(const Mem& dst, const XmmReg& src)
3523   { _emitInstruction(kX86InstMovNTPS, &dst, &src); }
3524 
3525   //! @brief Move Scalar SP-FP (SSE).
movssX86Assembler3526   inline void movss(const XmmReg& dst, const XmmReg& src)
3527   { _emitInstruction(kX86InstMovSS, &dst, &src); }
3528 
3529   //! @brief Move Scalar SP-FP (SSE).
movssX86Assembler3530   inline void movss(const XmmReg& dst, const Mem& src)
3531   { _emitInstruction(kX86InstMovSS, &dst, &src); }
3532 
3533   //! @brief Move Scalar SP-FP (SSE).
movssX86Assembler3534   inline void movss(const Mem& dst, const XmmReg& src)
3535   { _emitInstruction(kX86InstMovSS, &dst, &src); }
3536 
3537   //! @brief Move Unaligned Packed SP-FP Values (SSE).
movupsX86Assembler3538   inline void movups(const XmmReg& dst, const XmmReg& src)
3539   { _emitInstruction(kX86InstMovUPS, &dst, &src); }
3540   //! @brief Move Unaligned Packed SP-FP Values (SSE).
movupsX86Assembler3541   inline void movups(const XmmReg& dst, const Mem& src)
3542   { _emitInstruction(kX86InstMovUPS, &dst, &src); }
3543 
3544   //! @brief Move Unaligned Packed SP-FP Values (SSE).
movupsX86Assembler3545   inline void movups(const Mem& dst, const XmmReg& src)
3546   { _emitInstruction(kX86InstMovUPS, &dst, &src); }
3547 
3548   //! @brief Packed SP-FP Multiply (SSE).
mulpsX86Assembler3549   inline void mulps(const XmmReg& dst, const XmmReg& src)
3550   { _emitInstruction(kX86InstMulPS, &dst, &src); }
3551   //! @brief Packed SP-FP Multiply (SSE).
mulpsX86Assembler3552   inline void mulps(const XmmReg& dst, const Mem& src)
3553   { _emitInstruction(kX86InstMulPS, &dst, &src); }
3554 
3555   //! @brief Scalar SP-FP Multiply (SSE).
mulssX86Assembler3556   inline void mulss(const XmmReg& dst, const XmmReg& src)
3557   { _emitInstruction(kX86InstMulSS, &dst, &src); }
3558   //! @brief Scalar SP-FP Multiply (SSE).
mulssX86Assembler3559   inline void mulss(const XmmReg& dst, const Mem& src)
3560   { _emitInstruction(kX86InstMulSS, &dst, &src); }
3561 
3562   //! @brief Bit-wise Logical OR for SP-FP Data (SSE).
orpsX86Assembler3563   inline void orps(const XmmReg& dst, const XmmReg& src)
3564   { _emitInstruction(kX86InstOrPS, &dst, &src); }
3565   //! @brief Bit-wise Logical OR for SP-FP Data (SSE).
orpsX86Assembler3566   inline void orps(const XmmReg& dst, const Mem& src)
3567   { _emitInstruction(kX86InstOrPS, &dst, &src); }
3568 
3569   //! @brief Packed Average (SSE).
pavgbX86Assembler3570   inline void pavgb(const MmReg& dst, const MmReg& src)
3571   { _emitInstruction(kX86InstPAvgB, &dst, &src); }
3572   //! @brief Packed Average (SSE).
pavgbX86Assembler3573   inline void pavgb(const MmReg& dst, const Mem& src)
3574   { _emitInstruction(kX86InstPAvgB, &dst, &src); }
3575 
3576   //! @brief Packed Average (SSE).
pavgwX86Assembler3577   inline void pavgw(const MmReg& dst, const MmReg& src)
3578   { _emitInstruction(kX86InstPAvgW, &dst, &src); }
3579   //! @brief Packed Average (SSE).
pavgwX86Assembler3580   inline void pavgw(const MmReg& dst, const Mem& src)
3581   { _emitInstruction(kX86InstPAvgW, &dst, &src); }
3582 
3583   //! @brief Extract Word (SSE).
pextrwX86Assembler3584   inline void pextrw(const GpReg& dst, const MmReg& src, const Imm& imm8)
3585   { _emitInstruction(kX86InstPExtrW, &dst, &src, &imm8); }
3586 
3587   //! @brief Insert Word (SSE).
pinsrwX86Assembler3588   inline void pinsrw(const MmReg& dst, const GpReg& src, const Imm& imm8)
3589   { _emitInstruction(kX86InstPInsRW, &dst, &src, &imm8); }
3590   //! @brief Insert Word (SSE).
pinsrwX86Assembler3591   inline void pinsrw(const MmReg& dst, const Mem& src, const Imm& imm8)
3592   { _emitInstruction(kX86InstPInsRW, &dst, &src, &imm8); }
3593 
3594   //! @brief Packed Signed Integer Word Maximum (SSE).
pmaxswX86Assembler3595   inline void pmaxsw(const MmReg& dst, const MmReg& src)
3596   { _emitInstruction(kX86InstPMaxSW, &dst, &src); }
3597   //! @brief Packed Signed Integer Word Maximum (SSE).
pmaxswX86Assembler3598   inline void pmaxsw(const MmReg& dst, const Mem& src)
3599   { _emitInstruction(kX86InstPMaxSW, &dst, &src); }
3600 
3601   //! @brief Packed Unsigned Integer Byte Maximum (SSE).
pmaxubX86Assembler3602   inline void pmaxub(const MmReg& dst, const MmReg& src)
3603   { _emitInstruction(kX86InstPMaxUB, &dst, &src); }
3604   //! @brief Packed Unsigned Integer Byte Maximum (SSE).
pmaxubX86Assembler3605   inline void pmaxub(const MmReg& dst, const Mem& src)
3606   { _emitInstruction(kX86InstPMaxUB, &dst, &src); }
3607 
3608   //! @brief Packed Signed Integer Word Minimum (SSE).
pminswX86Assembler3609   inline void pminsw(const MmReg& dst, const MmReg& src)
3610   { _emitInstruction(kX86InstPMinSW, &dst, &src); }
3611   //! @brief Packed Signed Integer Word Minimum (SSE).
pminswX86Assembler3612   inline void pminsw(const MmReg& dst, const Mem& src)
3613   { _emitInstruction(kX86InstPMinSW, &dst, &src); }
3614 
3615   //! @brief Packed Unsigned Integer Byte Minimum (SSE).
pminubX86Assembler3616   inline void pminub(const MmReg& dst, const MmReg& src)
3617   { _emitInstruction(kX86InstPMinUB, &dst, &src); }
3618   //! @brief Packed Unsigned Integer Byte Minimum (SSE).
pminubX86Assembler3619   inline void pminub(const MmReg& dst, const Mem& src)
3620   { _emitInstruction(kX86InstPMinUB, &dst, &src); }
3621 
3622   //! @brief Move Byte Mask To Integer (SSE).
pmovmskbX86Assembler3623   inline void pmovmskb(const GpReg& dst, const MmReg& src)
3624   { _emitInstruction(kX86InstPMovMskB, &dst, &src); }
3625 
3626   //! @brief Packed Multiply High Unsigned (SSE).
pmulhuwX86Assembler3627   inline void pmulhuw(const MmReg& dst, const MmReg& src)
3628   { _emitInstruction(kX86InstPMulHUW, &dst, &src); }
3629   //! @brief Packed Multiply High Unsigned (SSE).
pmulhuwX86Assembler3630   inline void pmulhuw(const MmReg& dst, const Mem& src)
3631   { _emitInstruction(kX86InstPMulHUW, &dst, &src); }
3632 
3633   //! @brief Packed Sum of Absolute Differences (SSE).
psadbwX86Assembler3634   inline void psadbw(const MmReg& dst, const MmReg& src)
3635   { _emitInstruction(kX86InstPSADBW, &dst, &src); }
3636   //! @brief Packed Sum of Absolute Differences (SSE).
psadbwX86Assembler3637   inline void psadbw(const MmReg& dst, const Mem& src)
3638   { _emitInstruction(kX86InstPSADBW, &dst, &src); }
3639 
3640   //! @brief Packed Shuffle word (SSE).
pshufwX86Assembler3641   inline void pshufw(const MmReg& dst, const MmReg& src, const Imm& imm8)
3642   { _emitInstruction(kX86InstPShufW, &dst, &src, &imm8); }
3643   //! @brief Packed Shuffle word (SSE).
pshufwX86Assembler3644   inline void pshufw(const MmReg& dst, const Mem& src, const Imm& imm8)
3645   { _emitInstruction(kX86InstPShufW, &dst, &src, &imm8); }
3646 
3647   //! @brief Packed SP-FP Reciprocal (SSE).
rcppsX86Assembler3648   inline void rcpps(const XmmReg& dst, const XmmReg& src)
3649   { _emitInstruction(kX86InstRcpPS, &dst, &src); }
3650   //! @brief Packed SP-FP Reciprocal (SSE).
rcppsX86Assembler3651   inline void rcpps(const XmmReg& dst, const Mem& src)
3652   { _emitInstruction(kX86InstRcpPS, &dst, &src); }
3653 
3654   //! @brief Scalar SP-FP Reciprocal (SSE).
rcpssX86Assembler3655   inline void rcpss(const XmmReg& dst, const XmmReg& src)
3656   { _emitInstruction(kX86InstRcpSS, &dst, &src); }
3657   //! @brief Scalar SP-FP Reciprocal (SSE).
rcpssX86Assembler3658   inline void rcpss(const XmmReg& dst, const Mem& src)
3659   { _emitInstruction(kX86InstRcpSS, &dst, &src); }
3660 
3661   //! @brief Prefetch (SSE).
prefetchX86Assembler3662   inline void prefetch(const Mem& mem, const Imm& hint)
3663   { _emitInstruction(kX86InstPrefetch, &mem, &hint); }
3664 
3665   //! @brief Compute Sum of Absolute Differences (SSE).
psadbwX86Assembler3666   inline void psadbw(const XmmReg& dst, const XmmReg& src)
3667   { _emitInstruction(kX86InstPSADBW, &dst, &src); }
3668   //! @brief Compute Sum of Absolute Differences (SSE).
psadbwX86Assembler3669   inline void psadbw(const XmmReg& dst, const Mem& src)
3670   { _emitInstruction(kX86InstPSADBW, &dst, &src); }
3671 
3672   //! @brief Packed SP-FP Square Root Reciprocal (SSE).
rsqrtpsX86Assembler3673   inline void rsqrtps(const XmmReg& dst, const XmmReg& src)
3674   { _emitInstruction(kX86InstSqrtPS, &dst, &src); }
3675   //! @brief Packed SP-FP Square Root Reciprocal (SSE).
rsqrtpsX86Assembler3676   inline void rsqrtps(const XmmReg& dst, const Mem& src)
3677   { _emitInstruction(kX86InstSqrtPS, &dst, &src); }
3678 
3679   //! @brief Scalar SP-FP Square Root Reciprocal (SSE).
rsqrtssX86Assembler3680   inline void rsqrtss(const XmmReg& dst, const XmmReg& src)
3681   { _emitInstruction(kX86InstSqrtSS, &dst, &src); }
3682   //! @brief Scalar SP-FP Square Root Reciprocal (SSE).
rsqrtssX86Assembler3683   inline void rsqrtss(const XmmReg& dst, const Mem& src)
3684   { _emitInstruction(kX86InstSqrtSS, &dst, &src); }
3685 
3686   //! @brief Store fence (SSE).
sfenceX86Assembler3687   inline void sfence()
3688   { _emitInstruction(kX86InstSFence); }
3689 
3690   //! @brief Shuffle SP-FP (SSE).
shufpsX86Assembler3691   inline void shufps(const XmmReg& dst, const XmmReg& src, const Imm& imm8)
3692   { _emitInstruction(kX86InstShufPS, &dst, &src, &imm8); }
3693   //! @brief Shuffle SP-FP (SSE).
shufpsX86Assembler3694   inline void shufps(const XmmReg& dst, const Mem& src, const Imm& imm8)
3695   { _emitInstruction(kX86InstShufPS, &dst, &src, &imm8); }
3696 
3697   //! @brief Packed SP-FP Square Root (SSE).
sqrtpsX86Assembler3698   inline void sqrtps(const XmmReg& dst, const XmmReg& src)
3699   { _emitInstruction(kX86InstSqrtPS, &dst, &src); }
3700   //! @brief Packed SP-FP Square Root (SSE).
sqrtpsX86Assembler3701   inline void sqrtps(const XmmReg& dst, const Mem& src)
3702   { _emitInstruction(kX86InstSqrtPS, &dst, &src); }
3703 
3704   //! @brief Scalar SP-FP Square Root (SSE).
sqrtssX86Assembler3705   inline void sqrtss(const XmmReg& dst, const XmmReg& src)
3706   { _emitInstruction(kX86InstSqrtSS, &dst, &src); }
3707   //! @brief Scalar SP-FP Square Root (SSE).
sqrtssX86Assembler3708   inline void sqrtss(const XmmReg& dst, const Mem& src)
3709   { _emitInstruction(kX86InstSqrtSS, &dst, &src); }
3710 
3711   //! @brief Store Streaming SIMD Extension Control/Status (SSE).
stmxcsrX86Assembler3712   inline void stmxcsr(const Mem& dst)
3713   { _emitInstruction(kX86InstStMXCSR, &dst); }
3714 
3715   //! @brief Packed SP-FP Subtract (SSE).
subpsX86Assembler3716   inline void subps(const XmmReg& dst, const XmmReg& src)
3717   { _emitInstruction(kX86InstSubPS, &dst, &src); }
3718   //! @brief Packed SP-FP Subtract (SSE).
subpsX86Assembler3719   inline void subps(const XmmReg& dst, const Mem& src)
3720   { _emitInstruction(kX86InstSubPS, &dst, &src); }
3721 
3722   //! @brief Scalar SP-FP Subtract (SSE).
subssX86Assembler3723   inline void subss(const XmmReg& dst, const XmmReg& src)
3724   { _emitInstruction(kX86InstSubSS, &dst, &src); }
3725   //! @brief Scalar SP-FP Subtract (SSE).
subssX86Assembler3726   inline void subss(const XmmReg& dst, const Mem& src)
3727   { _emitInstruction(kX86InstSubSS, &dst, &src); }
3728 
3729   //! @brief Unordered Scalar SP-FP compare and set EFLAGS (SSE).
ucomissX86Assembler3730   inline void ucomiss(const XmmReg& dst, const XmmReg& src)
3731   { _emitInstruction(kX86InstUComISS, &dst, &src); }
3732   //! @brief Unordered Scalar SP-FP compare and set EFLAGS (SSE).
ucomissX86Assembler3733   inline void ucomiss(const XmmReg& dst, const Mem& src)
3734   { _emitInstruction(kX86InstUComISS, &dst, &src); }
3735 
3736   //! @brief Unpack High Packed SP-FP Data (SSE).
unpckhpsX86Assembler3737   inline void unpckhps(const XmmReg& dst, const XmmReg& src)
3738   { _emitInstruction(kX86InstUnpckHPS, &dst, &src); }
3739   //! @brief Unpack High Packed SP-FP Data (SSE).
unpckhpsX86Assembler3740   inline void unpckhps(const XmmReg& dst, const Mem& src)
3741   { _emitInstruction(kX86InstUnpckHPS, &dst, &src); }
3742 
3743   //! @brief Unpack Low Packed SP-FP Data (SSE).
unpcklpsX86Assembler3744   inline void unpcklps(const XmmReg& dst, const XmmReg& src)
3745   { _emitInstruction(kX86InstUnpckLPS, &dst, &src); }
3746   //! @brief Unpack Low Packed SP-FP Data (SSE).
unpcklpsX86Assembler3747   inline void unpcklps(const XmmReg& dst, const Mem& src)
3748   { _emitInstruction(kX86InstUnpckLPS, &dst, &src); }
3749 
3750   //! @brief Bit-wise Logical Xor for SP-FP Data (SSE).
xorpsX86Assembler3751   inline void xorps(const XmmReg& dst, const XmmReg& src)
3752   { _emitInstruction(kX86InstXorPS, &dst, &src); }
3753   //! @brief Bit-wise Logical Xor for SP-FP Data (SSE).
xorpsX86Assembler3754   inline void xorps(const XmmReg& dst, const Mem& src)
3755   { _emitInstruction(kX86InstXorPS, &dst, &src); }
3756 
3757   // --------------------------------------------------------------------------
3758   // [SSE2]
3759   // --------------------------------------------------------------------------
3760 
3761   //! @brief Packed DP-FP Add (SSE2).
addpdX86Assembler3762   inline void addpd(const XmmReg& dst, const XmmReg& src)
3763   { _emitInstruction(kX86InstAddPD, &dst, &src); }
3764   //! @brief Packed DP-FP Add (SSE2).
addpdX86Assembler3765   inline void addpd(const XmmReg& dst, const Mem& src)
3766   { _emitInstruction(kX86InstAddPD, &dst, &src); }
3767 
3768   //! @brief Scalar DP-FP Add (SSE2).
addsdX86Assembler3769   inline void addsd(const XmmReg& dst, const XmmReg& src)
3770   { _emitInstruction(kX86InstAddSD, &dst, &src); }
3771   //! @brief Scalar DP-FP Add (SSE2).
addsdX86Assembler3772   inline void addsd(const XmmReg& dst, const Mem& src)
3773   { _emitInstruction(kX86InstAddSD, &dst, &src); }
3774 
3775   //! @brief Bit-wise Logical And Not For DP-FP (SSE2).
andnpdX86Assembler3776   inline void andnpd(const XmmReg& dst, const XmmReg& src)
3777   { _emitInstruction(kX86InstAndnPD, &dst, &src); }
3778   //! @brief Bit-wise Logical And Not For DP-FP (SSE2).
andnpdX86Assembler3779   inline void andnpd(const XmmReg& dst, const Mem& src)
3780   { _emitInstruction(kX86InstAndnPD, &dst, &src); }
3781 
3782   //! @brief Bit-wise Logical And For DP-FP (SSE2).
andpdX86Assembler3783   inline void andpd(const XmmReg& dst, const XmmReg& src)
3784   { _emitInstruction(kX86InstAndPD, &dst, &src); }
3785   //! @brief Bit-wise Logical And For DP-FP (SSE2).
andpdX86Assembler3786   inline void andpd(const XmmReg& dst, const Mem& src)
3787   { _emitInstruction(kX86InstAndPD, &dst, &src); }
3788 
3789   //! @brief Flush Cache Line (SSE2).
clflushX86Assembler3790   inline void clflush(const Mem& mem)
3791   { _emitInstruction(kX86InstClFlush, &mem); }
3792 
3793   //! @brief Packed DP-FP Compare (SSE2).
cmppdX86Assembler3794   inline void cmppd(const XmmReg& dst, const XmmReg& src, const Imm& imm8)
3795   { _emitInstruction(kX86InstCmpPD, &dst, &src, &imm8); }
3796   //! @brief Packed DP-FP Compare (SSE2).
cmppdX86Assembler3797   inline void cmppd(const XmmReg& dst, const Mem& src, const Imm& imm8)
3798   { _emitInstruction(kX86InstCmpPD, &dst, &src, &imm8); }
3799 
3800   //! @brief Compare Scalar SP-FP Values (SSE2).
cmpsdX86Assembler3801   inline void cmpsd(const XmmReg& dst, const XmmReg& src, const Imm& imm8)
3802   { _emitInstruction(kX86InstCmpSD, &dst, &src, &imm8); }
3803   //! @brief Compare Scalar SP-FP Values (SSE2).
cmpsdX86Assembler3804   inline void cmpsd(const XmmReg& dst, const Mem& src, const Imm& imm8)
3805   { _emitInstruction(kX86InstCmpSD, &dst, &src, &imm8); }
3806 
3807   //! @brief Scalar Ordered DP-FP Compare and Set EFLAGS (SSE2).
comisdX86Assembler3808   inline void comisd(const XmmReg& dst, const XmmReg& src)
3809   { _emitInstruction(kX86InstComISD, &dst, &src); }
3810   //! @brief Scalar Ordered DP-FP Compare and Set EFLAGS (SSE2).
comisdX86Assembler3811   inline void comisd(const XmmReg& dst, const Mem& src)
3812   { _emitInstruction(kX86InstComISD, &dst, &src); }
3813 
3814   //! @brief Convert Packed Dword Integers to Packed DP-FP Values (SSE2).
cvtdq2pdX86Assembler3815   inline void cvtdq2pd(const XmmReg& dst, const XmmReg& src)
3816   { _emitInstruction(kX86InstCvtDQ2PD, &dst, &src); }
3817   //! @brief Convert Packed Dword Integers to Packed DP-FP Values (SSE2).
cvtdq2pdX86Assembler3818   inline void cvtdq2pd(const XmmReg& dst, const Mem& src)
3819   { _emitInstruction(kX86InstCvtDQ2PD, &dst, &src); }
3820 
3821   //! @brief Convert Packed Dword Integers to Packed SP-FP Values (SSE2).
cvtdq2psX86Assembler3822   inline void cvtdq2ps(const XmmReg& dst, const XmmReg& src)
3823   { _emitInstruction(kX86InstCvtDQ2PS, &dst, &src); }
3824   //! @brief Convert Packed Dword Integers to Packed SP-FP Values (SSE2).
cvtdq2psX86Assembler3825   inline void cvtdq2ps(const XmmReg& dst, const Mem& src)
3826   { _emitInstruction(kX86InstCvtDQ2PS, &dst, &src); }
3827 
3828   //! @brief Convert Packed DP-FP Values to Packed Dword Integers (SSE2).
cvtpd2dqX86Assembler3829   inline void cvtpd2dq(const XmmReg& dst, const XmmReg& src)
3830   { _emitInstruction(kX86InstCvtPD2DQ, &dst, &src); }
3831   //! @brief Convert Packed DP-FP Values to Packed Dword Integers (SSE2).
cvtpd2dqX86Assembler3832   inline void cvtpd2dq(const XmmReg& dst, const Mem& src)
3833   { _emitInstruction(kX86InstCvtPD2DQ, &dst, &src); }
3834 
3835   //! @brief Convert Packed DP-FP Values to Packed Dword Integers (SSE2).
cvtpd2piX86Assembler3836   inline void cvtpd2pi(const MmReg& dst, const XmmReg& src)
3837   { _emitInstruction(kX86InstCvtPD2PI, &dst, &src); }
3838   //! @brief Convert Packed DP-FP Values to Packed Dword Integers (SSE2).
cvtpd2piX86Assembler3839   inline void cvtpd2pi(const MmReg& dst, const Mem& src)
3840   { _emitInstruction(kX86InstCvtPD2PI, &dst, &src); }
3841 
3842   //! @brief Convert Packed DP-FP Values to Packed SP-FP Values (SSE2).
cvtpd2psX86Assembler3843   inline void cvtpd2ps(const XmmReg& dst, const XmmReg& src)
3844   { _emitInstruction(kX86InstCvtPD2PS, &dst, &src); }
3845   //! @brief Convert Packed DP-FP Values to Packed SP-FP Values (SSE2).
cvtpd2psX86Assembler3846   inline void cvtpd2ps(const XmmReg& dst, const Mem& src)
3847   { _emitInstruction(kX86InstCvtPD2PS, &dst, &src); }
3848 
3849   //! @brief Convert Packed Dword Integers to Packed DP-FP Values (SSE2).
cvtpi2pdX86Assembler3850   inline void cvtpi2pd(const XmmReg& dst, const MmReg& src)
3851   { _emitInstruction(kX86InstCvtPI2PD, &dst, &src); }
3852   //! @brief Convert Packed Dword Integers to Packed DP-FP Values (SSE2).
cvtpi2pdX86Assembler3853   inline void cvtpi2pd(const XmmReg& dst, const Mem& src)
3854   { _emitInstruction(kX86InstCvtPI2PD, &dst, &src); }
3855 
3856   //! @brief Convert Packed SP-FP Values to Packed Dword Integers (SSE2).
cvtps2dqX86Assembler3857   inline void cvtps2dq(const XmmReg& dst, const XmmReg& src)
3858   { _emitInstruction(kX86InstCvtPS2DQ, &dst, &src); }
3859   //! @brief Convert Packed SP-FP Values to Packed Dword Integers (SSE2).
cvtps2dqX86Assembler3860   inline void cvtps2dq(const XmmReg& dst, const Mem& src)
3861   { _emitInstruction(kX86InstCvtPS2DQ, &dst, &src); }
3862 
3863   //! @brief Convert Packed SP-FP Values to Packed DP-FP Values (SSE2).
cvtps2pdX86Assembler3864   inline void cvtps2pd(const XmmReg& dst, const XmmReg& src)
3865   { _emitInstruction(kX86InstCvtPS2PD, &dst, &src); }
3866   //! @brief Convert Packed SP-FP Values to Packed DP-FP Values (SSE2).
cvtps2pdX86Assembler3867   inline void cvtps2pd(const XmmReg& dst, const Mem& src)
3868   { _emitInstruction(kX86InstCvtPS2PD, &dst, &src); }
3869 
3870   //! @brief Convert Scalar DP-FP Value to Dword Integer (SSE2).
cvtsd2siX86Assembler3871   inline void cvtsd2si(const GpReg& dst, const XmmReg& src)
3872   { _emitInstruction(kX86InstCvtSD2SI, &dst, &src); }
3873   //! @brief Convert Scalar DP-FP Value to Dword Integer (SSE2).
cvtsd2siX86Assembler3874   inline void cvtsd2si(const GpReg& dst, const Mem& src)
3875   { _emitInstruction(kX86InstCvtSD2SI, &dst, &src); }
3876 
3877   //! @brief Convert Scalar DP-FP Value to Scalar SP-FP Value (SSE2).
cvtsd2ssX86Assembler3878   inline void cvtsd2ss(const XmmReg& dst, const XmmReg& src)
3879   { _emitInstruction(kX86InstCvtSD2SS, &dst, &src); }
3880   //! @brief Convert Scalar DP-FP Value to Scalar SP-FP Value (SSE2).
cvtsd2ssX86Assembler3881   inline void cvtsd2ss(const XmmReg& dst, const Mem& src)
3882   { _emitInstruction(kX86InstCvtSD2SS, &dst, &src); }
3883 
3884   //! @brief Convert Dword Integer to Scalar DP-FP Value (SSE2).
cvtsi2sdX86Assembler3885   inline void cvtsi2sd(const XmmReg& dst, const GpReg& src)
3886   { _emitInstruction(kX86InstCvtSI2SD, &dst, &src); }
3887   //! @brief Convert Dword Integer to Scalar DP-FP Value (SSE2).
cvtsi2sdX86Assembler3888   inline void cvtsi2sd(const XmmReg& dst, const Mem& src)
3889   { _emitInstruction(kX86InstCvtSI2SD, &dst, &src); }
3890 
3891   //! @brief Convert Scalar SP-FP Value to Scalar DP-FP Value (SSE2).
cvtss2sdX86Assembler3892   inline void cvtss2sd(const XmmReg& dst, const XmmReg& src)
3893   { _emitInstruction(kX86InstCvtSS2SD, &dst, &src); }
3894   //! @brief Convert Scalar SP-FP Value to Scalar DP-FP Value (SSE2).
cvtss2sdX86Assembler3895   inline void cvtss2sd(const XmmReg& dst, const Mem& src)
3896   { _emitInstruction(kX86InstCvtSS2SD, &dst, &src); }
3897 
3898   //! @brief Convert with Truncation Packed DP-FP Values to Packed Dword Integers (SSE2).
cvttpd2piX86Assembler3899   inline void cvttpd2pi(const MmReg& dst, const XmmReg& src)
3900   { _emitInstruction(kX86InstCvttPD2PI, &dst, &src); }
3901   //! @brief Convert with Truncation Packed DP-FP Values to Packed Dword Integers (SSE2).
cvttpd2piX86Assembler3902   inline void cvttpd2pi(const MmReg& dst, const Mem& src)
3903   { _emitInstruction(kX86InstCvttPD2PI, &dst, &src); }
3904 
3905   //! @brief Convert with Truncation Packed DP-FP Values to Packed Dword Integers (SSE2).
cvttpd2dqX86Assembler3906   inline void cvttpd2dq(const XmmReg& dst, const XmmReg& src)
3907   { _emitInstruction(kX86InstCvttPD2DQ, &dst, &src); }
3908   //! @brief Convert with Truncation Packed DP-FP Values to Packed Dword Integers (SSE2).
cvttpd2dqX86Assembler3909   inline void cvttpd2dq(const XmmReg& dst, const Mem& src)
3910   { _emitInstruction(kX86InstCvttPD2DQ, &dst, &src); }
3911 
3912   //! @brief Convert with Truncation Packed SP-FP Values to Packed Dword Integers (SSE2).
cvttps2dqX86Assembler3913   inline void cvttps2dq(const XmmReg& dst, const XmmReg& src)
3914   { _emitInstruction(kX86InstCvttPS2DQ, &dst, &src); }
3915   //! @brief Convert with Truncation Packed SP-FP Values to Packed Dword Integers (SSE2).
cvttps2dqX86Assembler3916   inline void cvttps2dq(const XmmReg& dst, const Mem& src)
3917   { _emitInstruction(kX86InstCvttPS2DQ, &dst, &src); }
3918 
3919   //! @brief Convert with Truncation Scalar DP-FP Value to Signed Dword Integer (SSE2).
cvttsd2siX86Assembler3920   inline void cvttsd2si(const GpReg& dst, const XmmReg& src)
3921   { _emitInstruction(kX86InstCvttSD2SI, &dst, &src); }
3922   //! @brief Convert with Truncation Scalar DP-FP Value to Signed Dword Integer (SSE2).
cvttsd2siX86Assembler3923   inline void cvttsd2si(const GpReg& dst, const Mem& src)
3924   { _emitInstruction(kX86InstCvttSD2SI, &dst, &src); }
3925 
3926   //! @brief Packed DP-FP Divide (SSE2).
divpdX86Assembler3927   inline void divpd(const XmmReg& dst, const XmmReg& src)
3928   { _emitInstruction(kX86InstDivPD, &dst, &src); }
3929   //! @brief Packed DP-FP Divide (SSE2).
divpdX86Assembler3930   inline void divpd(const XmmReg& dst, const Mem& src)
3931   { _emitInstruction(kX86InstDivPD, &dst, &src); }
3932 
3933   //! @brief Scalar DP-FP Divide (SSE2).
divsdX86Assembler3934   inline void divsd(const XmmReg& dst, const XmmReg& src)
3935   { _emitInstruction(kX86InstDivSD, &dst, &src); }
3936   //! @brief Scalar DP-FP Divide (SSE2).
divsdX86Assembler3937   inline void divsd(const XmmReg& dst, const Mem& src)
3938   { _emitInstruction(kX86InstDivSD, &dst, &src); }
3939 
3940   //! @brief Load Fence (SSE2).
lfenceX86Assembler3941   inline void lfence()
3942   { _emitInstruction(kX86InstLFence); }
3943 
3944   //! @brief Store Selected Bytes of Double Quadword (SSE2).
3945   //!
3946   //! @note Target is DS:EDI.
maskmovdquX86Assembler3947   inline void maskmovdqu(const XmmReg& src, const XmmReg& mask)
3948   { _emitInstruction(kX86InstMaskMovDQU, &src, &mask); }
3949 
3950   //! @brief Return Maximum Packed Double-Precision FP Values (SSE2).
maxpdX86Assembler3951   inline void maxpd(const XmmReg& dst, const XmmReg& src)
3952   { _emitInstruction(kX86InstMaxPD, &dst, &src); }
3953   //! @brief Return Maximum Packed Double-Precision FP Values (SSE2).
maxpdX86Assembler3954   inline void maxpd(const XmmReg& dst, const Mem& src)
3955   { _emitInstruction(kX86InstMaxPD, &dst, &src); }
3956 
3957   //! @brief Return Maximum Scalar Double-Precision FP Value (SSE2).
maxsdX86Assembler3958   inline void maxsd(const XmmReg& dst, const XmmReg& src)
3959   { _emitInstruction(kX86InstMaxSD, &dst, &src); }
3960   //! @brief Return Maximum Scalar Double-Precision FP Value (SSE2).
maxsdX86Assembler3961   inline void maxsd(const XmmReg& dst, const Mem& src)
3962   { _emitInstruction(kX86InstMaxSD, &dst, &src); }
3963 
3964   //! @brief Memory Fence (SSE2).
mfenceX86Assembler3965   inline void mfence()
3966   { _emitInstruction(kX86InstMFence); }
3967 
3968   //! @brief Return Minimum Packed DP-FP Values (SSE2).
minpdX86Assembler3969   inline void minpd(const XmmReg& dst, const XmmReg& src)
3970   { _emitInstruction(kX86InstMinPD, &dst, &src); }
3971   //! @brief Return Minimum Packed DP-FP Values (SSE2).
minpdX86Assembler3972   inline void minpd(const XmmReg& dst, const Mem& src)
3973   { _emitInstruction(kX86InstMinPD, &dst, &src); }
3974 
3975   //! @brief Return Minimum Scalar DP-FP Value (SSE2).
minsdX86Assembler3976   inline void minsd(const XmmReg& dst, const XmmReg& src)
3977   { _emitInstruction(kX86InstMinSD, &dst, &src); }
3978   //! @brief Return Minimum Scalar DP-FP Value (SSE2).
minsdX86Assembler3979   inline void minsd(const XmmReg& dst, const Mem& src)
3980   { _emitInstruction(kX86InstMinSD, &dst, &src); }
3981 
3982   //! @brief Move Aligned DQWord (SSE2).
movdqaX86Assembler3983   inline void movdqa(const XmmReg& dst, const XmmReg& src)
3984   { _emitInstruction(kX86InstMovDQA, &dst, &src); }
3985   //! @brief Move Aligned DQWord (SSE2).
movdqaX86Assembler3986   inline void movdqa(const XmmReg& dst, const Mem& src)
3987   { _emitInstruction(kX86InstMovDQA, &dst, &src); }
3988 
3989   //! @brief Move Aligned DQWord (SSE2).
movdqaX86Assembler3990   inline void movdqa(const Mem& dst, const XmmReg& src)
3991   { _emitInstruction(kX86InstMovDQA, &dst, &src); }
3992 
3993   //! @brief Move Unaligned Double Quadword (SSE2).
movdquX86Assembler3994   inline void movdqu(const XmmReg& dst, const XmmReg& src)
3995   { _emitInstruction(kX86InstMovDQU, &dst, &src); }
3996   //! @brief Move Unaligned Double Quadword (SSE2).
movdquX86Assembler3997   inline void movdqu(const XmmReg& dst, const Mem& src)
3998   { _emitInstruction(kX86InstMovDQU, &dst, &src); }
3999 
4000   //! @brief Move Unaligned Double Quadword (SSE2).
movdquX86Assembler4001   inline void movdqu(const Mem& dst, const XmmReg& src)
4002   { _emitInstruction(kX86InstMovDQU, &dst, &src); }
4003 
4004   //! @brief Extract Packed SP-FP Sign Mask (SSE2).
movmskpsX86Assembler4005   inline void movmskps(const GpReg& dst, const XmmReg& src)
4006   { _emitInstruction(kX86InstMovMskPS, &dst, &src); }
4007 
4008   //! @brief Extract Packed DP-FP Sign Mask (SSE2).
movmskpdX86Assembler4009   inline void movmskpd(const GpReg& dst, const XmmReg& src)
4010   { _emitInstruction(kX86InstMovMskPD, &dst, &src); }
4011 
4012   //! @brief Move Scalar Double-Precision FP Value (SSE2).
movsdX86Assembler4013   inline void movsd(const XmmReg& dst, const XmmReg& src)
4014   { _emitInstruction(kX86InstMovSD, &dst, &src); }
4015   //! @brief Move Scalar Double-Precision FP Value (SSE2).
movsdX86Assembler4016   inline void movsd(const XmmReg& dst, const Mem& src)
4017   { _emitInstruction(kX86InstMovSD, &dst, &src); }
4018 
4019   //! @brief Move Scalar Double-Precision FP Value (SSE2).
movsdX86Assembler4020   inline void movsd(const Mem& dst, const XmmReg& src)
4021   { _emitInstruction(kX86InstMovSD, &dst, &src); }
4022 
4023   //! @brief Move Aligned Packed Double-Precision FP Values (SSE2).
movapdX86Assembler4024   inline void movapd(const XmmReg& dst, const XmmReg& src)
4025   { _emitInstruction(kX86InstMovAPD, &dst, &src); }
4026 
4027   //! @brief Move Aligned Packed Double-Precision FP Values (SSE2).
movapdX86Assembler4028   inline void movapd(const XmmReg& dst, const Mem& src)
4029   { _emitInstruction(kX86InstMovAPD, &dst, &src); }
4030 
4031   //! @brief Move Aligned Packed Double-Precision FP Values (SSE2).
movapdX86Assembler4032   inline void movapd(const Mem& dst, const XmmReg& src)
4033   { _emitInstruction(kX86InstMovAPD, &dst, &src); }
4034 
4035   //! @brief Move Quadword from XMM to MMX Technology Register (SSE2).
movdq2qX86Assembler4036   inline void movdq2q(const MmReg& dst, const XmmReg& src)
4037   { _emitInstruction(kX86InstMovDQ2Q, &dst, &src); }
4038 
4039   //! @brief Move Quadword from MMX Technology to XMM Register (SSE2).
movq2dqX86Assembler4040   inline void movq2dq(const XmmReg& dst, const MmReg& src)
4041   { _emitInstruction(kX86InstMovQ2DQ, &dst, &src); }
4042 
4043   //! @brief Move High Packed Double-Precision FP Value (SSE2).
movhpdX86Assembler4044   inline void movhpd(const XmmReg& dst, const Mem& src)
4045   { _emitInstruction(kX86InstMovHPD, &dst, &src); }
4046 
4047   //! @brief Move High Packed Double-Precision FP Value (SSE2).
movhpdX86Assembler4048   inline void movhpd(const Mem& dst, const XmmReg& src)
4049   { _emitInstruction(kX86InstMovHPD, &dst, &src); }
4050 
4051   //! @brief Move Low Packed Double-Precision FP Value (SSE2).
movlpdX86Assembler4052   inline void movlpd(const XmmReg& dst, const Mem& src)
4053   { _emitInstruction(kX86InstMovLPD, &dst, &src); }
4054 
4055   //! @brief Move Low Packed Double-Precision FP Value (SSE2).
movlpdX86Assembler4056   inline void movlpd(const Mem& dst, const XmmReg& src)
4057   { _emitInstruction(kX86InstMovLPD, &dst, &src); }
4058 
4059   //! @brief Store Double Quadword Using Non-Temporal Hint (SSE2).
movntdqX86Assembler4060   inline void movntdq(const Mem& dst, const XmmReg& src)
4061   { _emitInstruction(kX86InstMovNTDQ, &dst, &src); }
4062 
4063   //! @brief Store Store DWORD Using Non-Temporal Hint (SSE2).
movntiX86Assembler4064   inline void movnti(const Mem& dst, const GpReg& src)
4065   { _emitInstruction(kX86InstMovNTI, &dst, &src); }
4066 
4067   //! @brief Store Packed Double-Precision FP Values Using Non-Temporal Hint (SSE2).
movntpdX86Assembler4068   inline void movntpd(const Mem& dst, const XmmReg& src)
4069   { _emitInstruction(kX86InstMovNTPD, &dst, &src); }
4070 
4071   //! @brief Move Unaligned Packed Double-Precision FP Values (SSE2).
movupdX86Assembler4072   inline void movupd(const XmmReg& dst, const XmmReg& src)
4073   { _emitInstruction(kX86InstMovUPD, &dst, &src); }
4074 
4075   //! @brief Move Unaligned Packed Double-Precision FP Values (SSE2).
movupdX86Assembler4076   inline void movupd(const XmmReg& dst, const Mem& src)
4077   { _emitInstruction(kX86InstMovUPD, &dst, &src); }
4078 
4079   //! @brief Move Unaligned Packed Double-Precision FP Values (SSE2).
movupdX86Assembler4080   inline void movupd(const Mem& dst, const XmmReg& src)
4081   { _emitInstruction(kX86InstMovUPD, &dst, &src); }
4082 
4083   //! @brief Packed DP-FP Multiply (SSE2).
mulpdX86Assembler4084   inline void mulpd(const XmmReg& dst, const XmmReg& src)
4085   { _emitInstruction(kX86InstMulPD, &dst, &src); }
4086   //! @brief Packed DP-FP Multiply (SSE2).
mulpdX86Assembler4087   inline void mulpd(const XmmReg& dst, const Mem& src)
4088   { _emitInstruction(kX86InstMulPD, &dst, &src); }
4089 
4090   //! @brief Scalar DP-FP Multiply (SSE2).
mulsdX86Assembler4091   inline void mulsd(const XmmReg& dst, const XmmReg& src)
4092   { _emitInstruction(kX86InstMulSD, &dst, &src); }
4093   //! @brief Scalar DP-FP Multiply (SSE2).
mulsdX86Assembler4094   inline void mulsd(const XmmReg& dst, const Mem& src)
4095   { _emitInstruction(kX86InstMulSD, &dst, &src); }
4096 
4097   //! @brief Bit-wise Logical OR for DP-FP Data (SSE2).
orpdX86Assembler4098   inline void orpd(const XmmReg& dst, const XmmReg& src)
4099   { _emitInstruction(kX86InstOrPD, &dst, &src); }
4100   //! @brief Bit-wise Logical OR for DP-FP Data (SSE2).
orpdX86Assembler4101   inline void orpd(const XmmReg& dst, const Mem& src)
4102   { _emitInstruction(kX86InstOrPD, &dst, &src); }
4103 
4104   //! @brief Pack with Signed Saturation (SSE2).
packsswbX86Assembler4105   inline void packsswb(const XmmReg& dst, const XmmReg& src)
4106   { _emitInstruction(kX86InstPackSSWB, &dst, &src); }
4107   //! @brief Pack with Signed Saturation (SSE2).
packsswbX86Assembler4108   inline void packsswb(const XmmReg& dst, const Mem& src)
4109   { _emitInstruction(kX86InstPackSSWB, &dst, &src); }
4110 
4111   //! @brief Pack with Signed Saturation (SSE2).
packssdwX86Assembler4112   inline void packssdw(const XmmReg& dst, const XmmReg& src)
4113   { _emitInstruction(kX86InstPackSSDW, &dst, &src); }
4114   //! @brief Pack with Signed Saturation (SSE2).
packssdwX86Assembler4115   inline void packssdw(const XmmReg& dst, const Mem& src)
4116   { _emitInstruction(kX86InstPackSSDW, &dst, &src); }
4117 
4118   //! @brief Pack with Unsigned Saturation (SSE2).
packuswbX86Assembler4119   inline void packuswb(const XmmReg& dst, const XmmReg& src)
4120   { _emitInstruction(kX86InstPackUSWB, &dst, &src); }
4121   //! @brief Pack with Unsigned Saturation (SSE2).
packuswbX86Assembler4122   inline void packuswb(const XmmReg& dst, const Mem& src)
4123   { _emitInstruction(kX86InstPackUSWB, &dst, &src); }
4124 
4125   //! @brief Packed BYTE Add (SSE2).
paddbX86Assembler4126   inline void paddb(const XmmReg& dst, const XmmReg& src)
4127   { _emitInstruction(kX86InstPAddB, &dst, &src); }
4128   //! @brief Packed BYTE Add (SSE2).
paddbX86Assembler4129   inline void paddb(const XmmReg& dst, const Mem& src)
4130   { _emitInstruction(kX86InstPAddB, &dst, &src); }
4131 
4132   //! @brief Packed WORD Add (SSE2).
paddwX86Assembler4133   inline void paddw(const XmmReg& dst, const XmmReg& src)
4134   { _emitInstruction(kX86InstPAddW, &dst, &src); }
4135   //! @brief Packed WORD Add (SSE2).
paddwX86Assembler4136   inline void paddw(const XmmReg& dst, const Mem& src)
4137   { _emitInstruction(kX86InstPAddW, &dst, &src); }
4138 
4139   //! @brief Packed DWORD Add (SSE2).
padddX86Assembler4140   inline void paddd(const XmmReg& dst, const XmmReg& src)
4141   { _emitInstruction(kX86InstPAddD, &dst, &src); }
4142   //! @brief Packed DWORD Add (SSE2).
padddX86Assembler4143   inline void paddd(const XmmReg& dst, const Mem& src)
4144   { _emitInstruction(kX86InstPAddD, &dst, &src); }
4145 
4146   //! @brief Packed QWORD Add (SSE2).
paddqX86Assembler4147   inline void paddq(const MmReg& dst, const MmReg& src)
4148   { _emitInstruction(kX86InstPAddQ, &dst, &src); }
4149   //! @brief Packed QWORD Add (SSE2).
paddqX86Assembler4150   inline void paddq(const MmReg& dst, const Mem& src)
4151   { _emitInstruction(kX86InstPAddQ, &dst, &src); }
4152 
4153   //! @brief Packed QWORD Add (SSE2).
paddqX86Assembler4154   inline void paddq(const XmmReg& dst, const XmmReg& src)
4155   { _emitInstruction(kX86InstPAddQ, &dst, &src); }
4156   //! @brief Packed QWORD Add (SSE2).
paddqX86Assembler4157   inline void paddq(const XmmReg& dst, const Mem& src)
4158   { _emitInstruction(kX86InstPAddQ, &dst, &src); }
4159 
4160   //! @brief Packed Add with Saturation (SSE2).
paddsbX86Assembler4161   inline void paddsb(const XmmReg& dst, const XmmReg& src)
4162   { _emitInstruction(kX86InstPAddSB, &dst, &src); }
4163   //! @brief Packed Add with Saturation (SSE2).
paddsbX86Assembler4164   inline void paddsb(const XmmReg& dst, const Mem& src)
4165   { _emitInstruction(kX86InstPAddSB, &dst, &src); }
4166 
4167   //! @brief Packed Add with Saturation (SSE2).
paddswX86Assembler4168   inline void paddsw(const XmmReg& dst, const XmmReg& src)
4169   { _emitInstruction(kX86InstPAddSW, &dst, &src); }
4170   //! @brief Packed Add with Saturation (SSE2).
paddswX86Assembler4171   inline void paddsw(const XmmReg& dst, const Mem& src)
4172   { _emitInstruction(kX86InstPAddSW, &dst, &src); }
4173 
4174   //! @brief Packed Add Unsigned with Saturation (SSE2).
paddusbX86Assembler4175   inline void paddusb(const XmmReg& dst, const XmmReg& src)
4176   { _emitInstruction(kX86InstPAddUSB, &dst, &src); }
4177   //! @brief Packed Add Unsigned with Saturation (SSE2).
paddusbX86Assembler4178   inline void paddusb(const XmmReg& dst, const Mem& src)
4179   { _emitInstruction(kX86InstPAddUSB, &dst, &src); }
4180 
4181   //! @brief Packed Add Unsigned with Saturation (SSE2).
padduswX86Assembler4182   inline void paddusw(const XmmReg& dst, const XmmReg& src)
4183   { _emitInstruction(kX86InstPAddUSW, &dst, &src); }
4184   //! @brief Packed Add Unsigned with Saturation (SSE2).
padduswX86Assembler4185   inline void paddusw(const XmmReg& dst, const Mem& src)
4186   { _emitInstruction(kX86InstPAddUSW, &dst, &src); }
4187 
4188   //! @brief Logical AND (SSE2).
pandX86Assembler4189   inline void pand(const XmmReg& dst, const XmmReg& src)
4190   { _emitInstruction(kX86InstPAnd, &dst, &src); }
4191   //! @brief Logical AND (SSE2).
pandX86Assembler4192   inline void pand(const XmmReg& dst, const Mem& src)
4193   { _emitInstruction(kX86InstPAnd, &dst, &src); }
4194 
4195   //! @brief Logical AND Not (SSE2).
pandnX86Assembler4196   inline void pandn(const XmmReg& dst, const XmmReg& src)
4197   { _emitInstruction(kX86InstPAndN, &dst, &src); }
4198   //! @brief Logical AND Not (SSE2).
pandnX86Assembler4199   inline void pandn(const XmmReg& dst, const Mem& src)
4200   { _emitInstruction(kX86InstPAndN, &dst, &src); }
4201 
4202   //! @brief Spin Loop Hint (SSE2).
pauseX86Assembler4203   inline void pause()
4204   { _emitInstruction(kX86InstPause); }
4205 
4206   //! @brief Packed Average (SSE2).
pavgbX86Assembler4207   inline void pavgb(const XmmReg& dst, const XmmReg& src)
4208   { _emitInstruction(kX86InstPAvgB, &dst, &src); }
4209   //! @brief Packed Average (SSE2).
pavgbX86Assembler4210   inline void pavgb(const XmmReg& dst, const Mem& src)
4211   { _emitInstruction(kX86InstPAvgB, &dst, &src); }
4212 
4213   //! @brief Packed Average (SSE2).
pavgwX86Assembler4214   inline void pavgw(const XmmReg& dst, const XmmReg& src)
4215   { _emitInstruction(kX86InstPAvgW, &dst, &src); }
4216   //! @brief Packed Average (SSE2).
pavgwX86Assembler4217   inline void pavgw(const XmmReg& dst, const Mem& src)
4218   { _emitInstruction(kX86InstPAvgW, &dst, &src); }
4219 
4220   //! @brief Packed Compare for Equal (BYTES) (SSE2).
pcmpeqbX86Assembler4221   inline void pcmpeqb(const XmmReg& dst, const XmmReg& src)
4222   { _emitInstruction(kX86InstPCmpEqB, &dst, &src); }
4223   //! @brief Packed Compare for Equal (BYTES) (SSE2).
pcmpeqbX86Assembler4224   inline void pcmpeqb(const XmmReg& dst, const Mem& src)
4225   { _emitInstruction(kX86InstPCmpEqB, &dst, &src); }
4226 
4227   //! @brief Packed Compare for Equal (WORDS) (SSE2).
pcmpeqwX86Assembler4228   inline void pcmpeqw(const XmmReg& dst, const XmmReg& src)
4229   { _emitInstruction(kX86InstPCmpEqW, &dst, &src); }
4230   //! @brief Packed Compare for Equal (WORDS) (SSE2).
pcmpeqwX86Assembler4231   inline void pcmpeqw(const XmmReg& dst, const Mem& src)
4232   { _emitInstruction(kX86InstPCmpEqW, &dst, &src); }
4233 
4234   //! @brief Packed Compare for Equal (DWORDS) (SSE2).
pcmpeqdX86Assembler4235   inline void pcmpeqd(const XmmReg& dst, const XmmReg& src)
4236   { _emitInstruction(kX86InstPCmpEqD, &dst, &src); }
4237   //! @brief Packed Compare for Equal (DWORDS) (SSE2).
pcmpeqdX86Assembler4238   inline void pcmpeqd(const XmmReg& dst, const Mem& src)
4239   { _emitInstruction(kX86InstPCmpEqD, &dst, &src); }
4240 
4241   //! @brief Packed Compare for Greater Than (BYTES) (SSE2).
pcmpgtbX86Assembler4242   inline void pcmpgtb(const XmmReg& dst, const XmmReg& src)
4243   { _emitInstruction(kX86InstPCmpGtB, &dst, &src); }
4244   //! @brief Packed Compare for Greater Than (BYTES) (SSE2).
pcmpgtbX86Assembler4245   inline void pcmpgtb(const XmmReg& dst, const Mem& src)
4246   { _emitInstruction(kX86InstPCmpGtB, &dst, &src); }
4247 
4248   //! @brief Packed Compare for Greater Than (WORDS) (SSE2).
pcmpgtwX86Assembler4249   inline void pcmpgtw(const XmmReg& dst, const XmmReg& src)
4250   { _emitInstruction(kX86InstPCmpGtW, &dst, &src); }
4251   //! @brief Packed Compare for Greater Than (WORDS) (SSE2).
pcmpgtwX86Assembler4252   inline void pcmpgtw(const XmmReg& dst, const Mem& src)
4253   { _emitInstruction(kX86InstPCmpGtW, &dst, &src); }
4254 
4255   //! @brief Packed Compare for Greater Than (DWORDS) (SSE2).
pcmpgtdX86Assembler4256   inline void pcmpgtd(const XmmReg& dst, const XmmReg& src)
4257   { _emitInstruction(kX86InstPCmpGtD, &dst, &src); }
4258   //! @brief Packed Compare for Greater Than (DWORDS) (SSE2).
pcmpgtdX86Assembler4259   inline void pcmpgtd(const XmmReg& dst, const Mem& src)
4260   { _emitInstruction(kX86InstPCmpGtD, &dst, &src); }
4261 
4262   //! @brief Packed Signed Integer Word Maximum (SSE2).
pmaxswX86Assembler4263   inline void pmaxsw(const XmmReg& dst, const XmmReg& src)
4264   { _emitInstruction(kX86InstPMaxSW, &dst, &src); }
4265   //! @brief Packed Signed Integer Word Maximum (SSE2).
pmaxswX86Assembler4266   inline void pmaxsw(const XmmReg& dst, const Mem& src)
4267   { _emitInstruction(kX86InstPMaxSW, &dst, &src); }
4268 
4269   //! @brief Packed Unsigned Integer Byte Maximum (SSE2).
pmaxubX86Assembler4270   inline void pmaxub(const XmmReg& dst, const XmmReg& src)
4271   { _emitInstruction(kX86InstPMaxUB, &dst, &src); }
4272   //! @brief Packed Unsigned Integer Byte Maximum (SSE2).
pmaxubX86Assembler4273   inline void pmaxub(const XmmReg& dst, const Mem& src)
4274   { _emitInstruction(kX86InstPMaxUB, &dst, &src); }
4275 
4276   //! @brief Packed Signed Integer Word Minimum (SSE2).
pminswX86Assembler4277   inline void pminsw(const XmmReg& dst, const XmmReg& src)
4278   { _emitInstruction(kX86InstPMinSW, &dst, &src); }
4279   //! @brief Packed Signed Integer Word Minimum (SSE2).
pminswX86Assembler4280   inline void pminsw(const XmmReg& dst, const Mem& src)
4281   { _emitInstruction(kX86InstPMinSW, &dst, &src); }
4282 
4283   //! @brief Packed Unsigned Integer Byte Minimum (SSE2).
pminubX86Assembler4284   inline void pminub(const XmmReg& dst, const XmmReg& src)
4285   { _emitInstruction(kX86InstPMinUB, &dst, &src); }
4286   //! @brief Packed Unsigned Integer Byte Minimum (SSE2).
pminubX86Assembler4287   inline void pminub(const XmmReg& dst, const Mem& src)
4288   { _emitInstruction(kX86InstPMinUB, &dst, &src); }
4289 
4290   //! @brief Move Byte Mask (SSE2).
pmovmskbX86Assembler4291   inline void pmovmskb(const GpReg& dst, const XmmReg& src)
4292   { _emitInstruction(kX86InstPMovMskB, &dst, &src); }
4293 
4294   //! @brief Packed Multiply High (SSE2).
pmulhwX86Assembler4295   inline void pmulhw(const XmmReg& dst, const XmmReg& src)
4296   { _emitInstruction(kX86InstPMulHW, &dst, &src); }
4297   //! @brief Packed Multiply High (SSE2).
pmulhwX86Assembler4298   inline void pmulhw(const XmmReg& dst, const Mem& src)
4299   { _emitInstruction(kX86InstPMulHW, &dst, &src); }
4300 
4301   //! @brief Packed Multiply High Unsigned (SSE2).
pmulhuwX86Assembler4302   inline void pmulhuw(const XmmReg& dst, const XmmReg& src)
4303   { _emitInstruction(kX86InstPMulHUW, &dst, &src); }
4304   //! @brief Packed Multiply High Unsigned (SSE2).
pmulhuwX86Assembler4305   inline void pmulhuw(const XmmReg& dst, const Mem& src)
4306   { _emitInstruction(kX86InstPMulHUW, &dst, &src); }
4307 
4308   //! @brief Packed Multiply Low (SSE2).
pmullwX86Assembler4309   inline void pmullw(const XmmReg& dst, const XmmReg& src)
4310   { _emitInstruction(kX86InstPMulLW, &dst, &src); }
4311   //! @brief Packed Multiply Low (SSE2).
pmullwX86Assembler4312   inline void pmullw(const XmmReg& dst, const Mem& src)
4313   { _emitInstruction(kX86InstPMulLW, &dst, &src); }
4314 
4315   //! @brief Packed Multiply to QWORD (SSE2).
pmuludqX86Assembler4316   inline void pmuludq(const MmReg& dst, const MmReg& src)
4317   { _emitInstruction(kX86InstPMulUDQ, &dst, &src); }
4318   //! @brief Packed Multiply to QWORD (SSE2).
pmuludqX86Assembler4319   inline void pmuludq(const MmReg& dst, const Mem& src)
4320   { _emitInstruction(kX86InstPMulUDQ, &dst, &src); }
4321 
4322   //! @brief Packed Multiply to QWORD (SSE2).
pmuludqX86Assembler4323   inline void pmuludq(const XmmReg& dst, const XmmReg& src)
4324   { _emitInstruction(kX86InstPMulUDQ, &dst, &src); }
4325   //! @brief Packed Multiply to QWORD (SSE2).
pmuludqX86Assembler4326   inline void pmuludq(const XmmReg& dst, const Mem& src)
4327   { _emitInstruction(kX86InstPMulUDQ, &dst, &src); }
4328 
4329   //! @brief Bitwise Logical OR (SSE2).
porX86Assembler4330   inline void por(const XmmReg& dst, const XmmReg& src)
4331   { _emitInstruction(kX86InstPOr, &dst, &src); }
4332   //! @brief Bitwise Logical OR (SSE2).
porX86Assembler4333   inline void por(const XmmReg& dst, const Mem& src)
4334   { _emitInstruction(kX86InstPOr, &dst, &src); }
4335 
4336   //! @brief Packed Shift Left Logical (SSE2).
pslldX86Assembler4337   inline void pslld(const XmmReg& dst, const XmmReg& src)
4338   { _emitInstruction(kX86InstPSllD, &dst, &src); }
4339   //! @brief Packed Shift Left Logical (SSE2).
pslldX86Assembler4340   inline void pslld(const XmmReg& dst, const Mem& src)
4341   { _emitInstruction(kX86InstPSllD, &dst, &src); }
4342   //! @brief Packed Shift Left Logical (SSE2).
pslldX86Assembler4343   inline void pslld(const XmmReg& dst, const Imm& src)
4344   { _emitInstruction(kX86InstPSllD, &dst, &src); }
4345 
4346   //! @brief Packed Shift Left Logical (SSE2).
psllqX86Assembler4347   inline void psllq(const XmmReg& dst, const XmmReg& src)
4348   { _emitInstruction(kX86InstPSllQ, &dst, &src); }
4349   //! @brief Packed Shift Left Logical (SSE2).
psllqX86Assembler4350   inline void psllq(const XmmReg& dst, const Mem& src)
4351   { _emitInstruction(kX86InstPSllQ, &dst, &src); }
4352   //! @brief Packed Shift Left Logical (SSE2).
psllqX86Assembler4353   inline void psllq(const XmmReg& dst, const Imm& src)
4354   { _emitInstruction(kX86InstPSllQ, &dst, &src); }
4355 
4356   //! @brief Packed Shift Left Logical (SSE2).
psllwX86Assembler4357   inline void psllw(const XmmReg& dst, const XmmReg& src)
4358   { _emitInstruction(kX86InstPSllW, &dst, &src); }
4359   //! @brief Packed Shift Left Logical (SSE2).
psllwX86Assembler4360   inline void psllw(const XmmReg& dst, const Mem& src)
4361   { _emitInstruction(kX86InstPSllW, &dst, &src); }
4362   //! @brief Packed Shift Left Logical (SSE2).
psllwX86Assembler4363   inline void psllw(const XmmReg& dst, const Imm& src)
4364   { _emitInstruction(kX86InstPSllW, &dst, &src); }
4365 
4366   //! @brief Packed Shift Left Logical (SSE2).
pslldqX86Assembler4367   inline void pslldq(const XmmReg& dst, const Imm& src)
4368   { _emitInstruction(kX86InstPSllDQ, &dst, &src); }
4369 
4370   //! @brief Packed Shift Right Arithmetic (SSE2).
psradX86Assembler4371   inline void psrad(const XmmReg& dst, const XmmReg& src)
4372   { _emitInstruction(kX86InstPSraD, &dst, &src); }
4373   //! @brief Packed Shift Right Arithmetic (SSE2).
psradX86Assembler4374   inline void psrad(const XmmReg& dst, const Mem& src)
4375   { _emitInstruction(kX86InstPSraD, &dst, &src); }
4376   //! @brief Packed Shift Right Arithmetic (SSE2).
psradX86Assembler4377   inline void psrad(const XmmReg& dst, const Imm& src)
4378   { _emitInstruction(kX86InstPSraD, &dst, &src); }
4379 
4380   //! @brief Packed Shift Right Arithmetic (SSE2).
psrawX86Assembler4381   inline void psraw(const XmmReg& dst, const XmmReg& src)
4382   { _emitInstruction(kX86InstPSraW, &dst, &src); }
4383   //! @brief Packed Shift Right Arithmetic (SSE2).
psrawX86Assembler4384   inline void psraw(const XmmReg& dst, const Mem& src)
4385   { _emitInstruction(kX86InstPSraW, &dst, &src); }
4386   //! @brief Packed Shift Right Arithmetic (SSE2).
psrawX86Assembler4387   inline void psraw(const XmmReg& dst, const Imm& src)
4388   { _emitInstruction(kX86InstPSraW, &dst, &src); }
4389 
4390   //! @brief Packed Subtract (SSE2).
psubbX86Assembler4391   inline void psubb(const XmmReg& dst, const XmmReg& src)
4392   { _emitInstruction(kX86InstPSubB, &dst, &src); }
4393   //! @brief Packed Subtract (SSE2).
psubbX86Assembler4394   inline void psubb(const XmmReg& dst, const Mem& src)
4395   { _emitInstruction(kX86InstPSubB, &dst, &src); }
4396 
4397   //! @brief Packed Subtract (SSE2).
psubwX86Assembler4398   inline void psubw(const XmmReg& dst, const XmmReg& src)
4399   { _emitInstruction(kX86InstPSubW, &dst, &src); }
4400   //! @brief Packed Subtract (SSE2).
psubwX86Assembler4401   inline void psubw(const XmmReg& dst, const Mem& src)
4402   { _emitInstruction(kX86InstPSubW, &dst, &src); }
4403 
4404   //! @brief Packed Subtract (SSE2).
psubdX86Assembler4405   inline void psubd(const XmmReg& dst, const XmmReg& src)
4406   { _emitInstruction(kX86InstPSubD, &dst, &src); }
4407   //! @brief Packed Subtract (SSE2).
psubdX86Assembler4408   inline void psubd(const XmmReg& dst, const Mem& src)
4409   { _emitInstruction(kX86InstPSubD, &dst, &src); }
4410 
4411   //! @brief Packed Subtract (SSE2).
psubqX86Assembler4412   inline void psubq(const MmReg& dst, const MmReg& src)
4413   { _emitInstruction(kX86InstPSubQ, &dst, &src); }
4414   //! @brief Packed Subtract (SSE2).
psubqX86Assembler4415   inline void psubq(const MmReg& dst, const Mem& src)
4416   { _emitInstruction(kX86InstPSubQ, &dst, &src); }
4417 
4418   //! @brief Packed Subtract (SSE2).
psubqX86Assembler4419   inline void psubq(const XmmReg& dst, const XmmReg& src)
4420   { _emitInstruction(kX86InstPSubQ, &dst, &src); }
4421   //! @brief Packed Subtract (SSE2).
psubqX86Assembler4422   inline void psubq(const XmmReg& dst, const Mem& src)
4423   { _emitInstruction(kX86InstPSubQ, &dst, &src); }
4424 
4425   //! @brief Packed Multiply and Add (SSE2).
pmaddwdX86Assembler4426   inline void pmaddwd(const XmmReg& dst, const XmmReg& src)
4427   { _emitInstruction(kX86InstPMAddWD, &dst, &src); }
4428   //! @brief Packed Multiply and Add (SSE2).
pmaddwdX86Assembler4429   inline void pmaddwd(const XmmReg& dst, const Mem& src)
4430   { _emitInstruction(kX86InstPMAddWD, &dst, &src); }
4431 
4432   //! @brief Shuffle Packed DWORDs (SSE2).
pshufdX86Assembler4433   inline void pshufd(const XmmReg& dst, const XmmReg& src, const Imm& imm8)
4434   { _emitInstruction(kX86InstPShufD, &dst, &src, &imm8); }
4435   //! @brief Shuffle Packed DWORDs (SSE2).
pshufdX86Assembler4436   inline void pshufd(const XmmReg& dst, const Mem& src, const Imm& imm8)
4437   { _emitInstruction(kX86InstPShufD, &dst, &src, &imm8); }
4438 
4439   //! @brief Shuffle Packed High Words (SSE2).
pshufhwX86Assembler4440   inline void pshufhw(const XmmReg& dst, const XmmReg& src, const Imm& imm8)
4441   { _emitInstruction(kX86InstPShufHW, &dst, &src, &imm8); }
4442   //! @brief Shuffle Packed High Words (SSE2).
pshufhwX86Assembler4443   inline void pshufhw(const XmmReg& dst, const Mem& src, const Imm& imm8)
4444   { _emitInstruction(kX86InstPShufHW, &dst, &src, &imm8); }
4445 
4446   //! @brief Shuffle Packed Low Words (SSE2).
pshuflwX86Assembler4447   inline void pshuflw(const XmmReg& dst, const XmmReg& src, const Imm& imm8)
4448   { _emitInstruction(kX86InstPShufLW, &dst, &src, &imm8); }
4449   //! @brief Shuffle Packed Low Words (SSE2).
pshuflwX86Assembler4450   inline void pshuflw(const XmmReg& dst, const Mem& src, const Imm& imm8)
4451   { _emitInstruction(kX86InstPShufLW, &dst, &src, &imm8); }
4452 
4453   //! @brief Packed Shift Right Logical (SSE2).
psrldX86Assembler4454   inline void psrld(const XmmReg& dst, const XmmReg& src)
4455   { _emitInstruction(kX86InstPSrlD, &dst, &src); }
4456   //! @brief Packed Shift Right Logical (SSE2).
psrldX86Assembler4457   inline void psrld(const XmmReg& dst, const Mem& src)
4458   { _emitInstruction(kX86InstPSrlD, &dst, &src); }
4459   //! @brief Packed Shift Right Logical (SSE2).
psrldX86Assembler4460   inline void psrld(const XmmReg& dst, const Imm& src)
4461   { _emitInstruction(kX86InstPSrlD, &dst, &src); }
4462 
4463   //! @brief Packed Shift Right Logical (SSE2).
psrlqX86Assembler4464   inline void psrlq(const XmmReg& dst, const XmmReg& src)
4465   { _emitInstruction(kX86InstPSrlQ, &dst, &src); }
4466   //! @brief Packed Shift Right Logical (SSE2).
psrlqX86Assembler4467   inline void psrlq(const XmmReg& dst, const Mem& src)
4468   { _emitInstruction(kX86InstPSrlQ, &dst, &src); }
4469   //! @brief Packed Shift Right Logical (SSE2).
psrlqX86Assembler4470   inline void psrlq(const XmmReg& dst, const Imm& src)
4471   { _emitInstruction(kX86InstPSrlQ, &dst, &src); }
4472 
4473   //! @brief DQWord Shift Right Logical (MMX).
psrldqX86Assembler4474   inline void psrldq(const XmmReg& dst, const Imm& src)
4475   { _emitInstruction(kX86InstPSrlDQ, &dst, &src); }
4476 
4477   //! @brief Packed Shift Right Logical (SSE2).
psrlwX86Assembler4478   inline void psrlw(const XmmReg& dst, const XmmReg& src)
4479   { _emitInstruction(kX86InstPSrlW, &dst, &src); }
4480   //! @brief Packed Shift Right Logical (SSE2).
psrlwX86Assembler4481   inline void psrlw(const XmmReg& dst, const Mem& src)
4482   { _emitInstruction(kX86InstPSrlW, &dst, &src); }
4483   //! @brief Packed Shift Right Logical (SSE2).
psrlwX86Assembler4484   inline void psrlw(const XmmReg& dst, const Imm& src)
4485   { _emitInstruction(kX86InstPSrlW, &dst, &src); }
4486 
4487   //! @brief Packed Subtract with Saturation (SSE2).
psubsbX86Assembler4488   inline void psubsb(const XmmReg& dst, const XmmReg& src)
4489   { _emitInstruction(kX86InstPSubSB, &dst, &src); }
4490   //! @brief Packed Subtract with Saturation (SSE2).
psubsbX86Assembler4491   inline void psubsb(const XmmReg& dst, const Mem& src)
4492   { _emitInstruction(kX86InstPSubSB, &dst, &src); }
4493 
4494   //! @brief Packed Subtract with Saturation (SSE2).
psubswX86Assembler4495   inline void psubsw(const XmmReg& dst, const XmmReg& src)
4496   { _emitInstruction(kX86InstPSubSW, &dst, &src); }
4497   //! @brief Packed Subtract with Saturation (SSE2).
psubswX86Assembler4498   inline void psubsw(const XmmReg& dst, const Mem& src)
4499   { _emitInstruction(kX86InstPSubSW, &dst, &src); }
4500 
4501   //! @brief Packed Subtract with Unsigned Saturation (SSE2).
psubusbX86Assembler4502   inline void psubusb(const XmmReg& dst, const XmmReg& src)
4503   { _emitInstruction(kX86InstPSubUSB, &dst, &src); }
4504   //! @brief Packed Subtract with Unsigned Saturation (SSE2).
psubusbX86Assembler4505   inline void psubusb(const XmmReg& dst, const Mem& src)
4506   { _emitInstruction(kX86InstPSubUSB, &dst, &src); }
4507 
4508   //! @brief Packed Subtract with Unsigned Saturation (SSE2).
psubuswX86Assembler4509   inline void psubusw(const XmmReg& dst, const XmmReg& src)
4510   { _emitInstruction(kX86InstPSubUSW, &dst, &src); }
4511   //! @brief Packed Subtract with Unsigned Saturation (SSE2).
psubuswX86Assembler4512   inline void psubusw(const XmmReg& dst, const Mem& src)
4513   { _emitInstruction(kX86InstPSubUSW, &dst, &src); }
4514 
4515   //! @brief Unpack High Data (SSE2).
punpckhbwX86Assembler4516   inline void punpckhbw(const XmmReg& dst, const XmmReg& src)
4517   { _emitInstruction(kX86InstPunpckHBW, &dst, &src); }
4518   //! @brief Unpack High Data (SSE2).
punpckhbwX86Assembler4519   inline void punpckhbw(const XmmReg& dst, const Mem& src)
4520   { _emitInstruction(kX86InstPunpckHBW, &dst, &src); }
4521 
4522   //! @brief Unpack High Data (SSE2).
punpckhwdX86Assembler4523   inline void punpckhwd(const XmmReg& dst, const XmmReg& src)
4524   { _emitInstruction(kX86InstPunpckHWD, &dst, &src); }
4525   //! @brief Unpack High Data (SSE2).
punpckhwdX86Assembler4526   inline void punpckhwd(const XmmReg& dst, const Mem& src)
4527   { _emitInstruction(kX86InstPunpckHWD, &dst, &src); }
4528 
4529   //! @brief Unpack High Data (SSE2).
punpckhdqX86Assembler4530   inline void punpckhdq(const XmmReg& dst, const XmmReg& src)
4531   { _emitInstruction(kX86InstPunpckHDQ, &dst, &src); }
4532   //! @brief Unpack High Data (SSE2).
punpckhdqX86Assembler4533   inline void punpckhdq(const XmmReg& dst, const Mem& src)
4534   { _emitInstruction(kX86InstPunpckHDQ, &dst, &src); }
4535 
4536   //! @brief Unpack High Data (SSE2).
punpckhqdqX86Assembler4537   inline void punpckhqdq(const XmmReg& dst, const XmmReg& src)
4538   { _emitInstruction(kX86InstPunpckHQDQ, &dst, &src); }
4539   //! @brief Unpack High Data (SSE2).
punpckhqdqX86Assembler4540   inline void punpckhqdq(const XmmReg& dst, const Mem& src)
4541   { _emitInstruction(kX86InstPunpckHQDQ, &dst, &src); }
4542 
4543   //! @brief Unpack Low Data (SSE2).
punpcklbwX86Assembler4544   inline void punpcklbw(const XmmReg& dst, const XmmReg& src)
4545   { _emitInstruction(kX86InstPunpckLBW, &dst, &src); }
4546   //! @brief Unpack Low Data (SSE2).
punpcklbwX86Assembler4547   inline void punpcklbw(const XmmReg& dst, const Mem& src)
4548   { _emitInstruction(kX86InstPunpckLBW, &dst, &src); }
4549 
4550   //! @brief Unpack Low Data (SSE2).
punpcklwdX86Assembler4551   inline void punpcklwd(const XmmReg& dst, const XmmReg& src)
4552   { _emitInstruction(kX86InstPunpckLWD, &dst, &src); }
4553   //! @brief Unpack Low Data (SSE2).
punpcklwdX86Assembler4554   inline void punpcklwd(const XmmReg& dst, const Mem& src)
4555   { _emitInstruction(kX86InstPunpckLWD, &dst, &src); }
4556 
4557   //! @brief Unpack Low Data (SSE2).
punpckldqX86Assembler4558   inline void punpckldq(const XmmReg& dst, const XmmReg& src)
4559   { _emitInstruction(kX86InstPunpckLDQ, &dst, &src); }
4560   //! @brief Unpack Low Data (SSE2).
punpckldqX86Assembler4561   inline void punpckldq(const XmmReg& dst, const Mem& src)
4562   { _emitInstruction(kX86InstPunpckLDQ, &dst, &src); }
4563 
4564   //! @brief Unpack Low Data (SSE2).
punpcklqdqX86Assembler4565   inline void punpcklqdq(const XmmReg& dst, const XmmReg& src)
4566   { _emitInstruction(kX86InstPunpckLQDQ, &dst, &src); }
4567   //! @brief Unpack Low Data (SSE2).
punpcklqdqX86Assembler4568   inline void punpcklqdq(const XmmReg& dst, const Mem& src)
4569   { _emitInstruction(kX86InstPunpckLQDQ, &dst, &src); }
4570 
4571   //! @brief Bitwise Exclusive OR (SSE2).
pxorX86Assembler4572   inline void pxor(const XmmReg& dst, const XmmReg& src)
4573   { _emitInstruction(kX86InstPXor, &dst, &src); }
4574   //! @brief Bitwise Exclusive OR (SSE2).
pxorX86Assembler4575   inline void pxor(const XmmReg& dst, const Mem& src)
4576   { _emitInstruction(kX86InstPXor, &dst, &src); }
4577 
4578   //! @brief Shuffle DP-FP (SSE2).
shufpdX86Assembler4579   inline void shufpd(const XmmReg& dst, const XmmReg& src, const Imm& imm8)
4580   { _emitInstruction(kX86InstShufPD, &dst, &src, &imm8); }
4581   //! @brief Shuffle DP-FP (SSE2).
shufpdX86Assembler4582   inline void shufpd(const XmmReg& dst, const Mem& src, const Imm& imm8)
4583   { _emitInstruction(kX86InstShufPD, &dst, &src, &imm8); }
4584 
4585   //! @brief Compute Square Roots of Packed DP-FP Values (SSE2).
sqrtpdX86Assembler4586   inline void sqrtpd(const XmmReg& dst, const XmmReg& src)
4587   { _emitInstruction(kX86InstSqrtPD, &dst, &src); }
4588   //! @brief Compute Square Roots of Packed DP-FP Values (SSE2).
sqrtpdX86Assembler4589   inline void sqrtpd(const XmmReg& dst, const Mem& src)
4590   { _emitInstruction(kX86InstSqrtPD, &dst, &src); }
4591 
4592   //! @brief Compute Square Root of Scalar DP-FP Value (SSE2).
sqrtsdX86Assembler4593   inline void sqrtsd(const XmmReg& dst, const XmmReg& src)
4594   { _emitInstruction(kX86InstSqrtSD, &dst, &src); }
4595   //! @brief Compute Square Root of Scalar DP-FP Value (SSE2).
sqrtsdX86Assembler4596   inline void sqrtsd(const XmmReg& dst, const Mem& src)
4597   { _emitInstruction(kX86InstSqrtSD, &dst, &src); }
4598 
4599   //! @brief Packed DP-FP Subtract (SSE2).
subpdX86Assembler4600   inline void subpd(const XmmReg& dst, const XmmReg& src)
4601   { _emitInstruction(kX86InstSubPD, &dst, &src); }
4602   //! @brief Packed DP-FP Subtract (SSE2).
subpdX86Assembler4603   inline void subpd(const XmmReg& dst, const Mem& src)
4604   { _emitInstruction(kX86InstSubPD, &dst, &src); }
4605 
4606   //! @brief Scalar DP-FP Subtract (SSE2).
subsdX86Assembler4607   inline void subsd(const XmmReg& dst, const XmmReg& src)
4608   { _emitInstruction(kX86InstSubSD, &dst, &src); }
4609   //! @brief Scalar DP-FP Subtract (SSE2).
subsdX86Assembler4610   inline void subsd(const XmmReg& dst, const Mem& src)
4611   { _emitInstruction(kX86InstSubSD, &dst, &src); }
4612 
4613   //! @brief Scalar Unordered DP-FP Compare and Set EFLAGS (SSE2).
ucomisdX86Assembler4614   inline void ucomisd(const XmmReg& dst, const XmmReg& src)
4615   { _emitInstruction(kX86InstUComISD, &dst, &src); }
4616   //! @brief Scalar Unordered DP-FP Compare and Set EFLAGS (SSE2).
ucomisdX86Assembler4617   inline void ucomisd(const XmmReg& dst, const Mem& src)
4618   { _emitInstruction(kX86InstUComISD, &dst, &src); }
4619 
4620   //! @brief Unpack and Interleave High Packed Double-Precision FP Values (SSE2).
unpckhpdX86Assembler4621   inline void unpckhpd(const XmmReg& dst, const XmmReg& src)
4622   { _emitInstruction(kX86InstUnpckHPD, &dst, &src); }
4623   //! @brief Unpack and Interleave High Packed Double-Precision FP Values (SSE2).
unpckhpdX86Assembler4624   inline void unpckhpd(const XmmReg& dst, const Mem& src)
4625   { _emitInstruction(kX86InstUnpckHPD, &dst, &src); }
4626 
4627   //! @brief Unpack and Interleave Low Packed Double-Precision FP Values (SSE2).
unpcklpdX86Assembler4628   inline void unpcklpd(const XmmReg& dst, const XmmReg& src)
4629   { _emitInstruction(kX86InstUnpckLPD, &dst, &src); }
4630   //! @brief Unpack and Interleave Low Packed Double-Precision FP Values (SSE2).
unpcklpdX86Assembler4631   inline void unpcklpd(const XmmReg& dst, const Mem& src)
4632   { _emitInstruction(kX86InstUnpckLPD, &dst, &src); }
4633 
4634   //! @brief Bit-wise Logical OR for DP-FP Data (SSE2).
xorpdX86Assembler4635   inline void xorpd(const XmmReg& dst, const XmmReg& src)
4636   { _emitInstruction(kX86InstXorPD, &dst, &src); }
4637   //! @brief Bit-wise Logical OR for DP-FP Data (SSE2).
xorpdX86Assembler4638   inline void xorpd(const XmmReg& dst, const Mem& src)
4639   { _emitInstruction(kX86InstXorPD, &dst, &src); }
4640 
4641   // --------------------------------------------------------------------------
4642   // [SSE3]
4643   // --------------------------------------------------------------------------
4644 
4645   //! @brief Packed DP-FP Add/Subtract (SSE3).
addsubpdX86Assembler4646   inline void addsubpd(const XmmReg& dst, const XmmReg& src)
4647   { _emitInstruction(kX86InstAddSubPD, &dst, &src); }
4648   //! @brief Packed DP-FP Add/Subtract (SSE3).
addsubpdX86Assembler4649   inline void addsubpd(const XmmReg& dst, const Mem& src)
4650   { _emitInstruction(kX86InstAddSubPD, &dst, &src); }
4651 
4652   //! @brief Packed SP-FP Add/Subtract (SSE3).
addsubpsX86Assembler4653   inline void addsubps(const XmmReg& dst, const XmmReg& src)
4654   { _emitInstruction(kX86InstAddSubPS, &dst, &src); }
4655   //! @brief Packed SP-FP Add/Subtract (SSE3).
addsubpsX86Assembler4656   inline void addsubps(const XmmReg& dst, const Mem& src)
4657   { _emitInstruction(kX86InstAddSubPS, &dst, &src); }
4658 
4659   //! @brief Store Integer with Truncation (SSE3).
fisttpX86Assembler4660   inline void fisttp(const Mem& dst)
4661   { _emitInstruction(kX86InstFISttP, &dst); }
4662 
4663   //! @brief Packed DP-FP Horizontal Add (SSE3).
haddpdX86Assembler4664   inline void haddpd(const XmmReg& dst, const XmmReg& src)
4665   { _emitInstruction(kX86InstHAddPD, &dst, &src); }
4666   //! @brief Packed DP-FP Horizontal Add (SSE3).
haddpdX86Assembler4667   inline void haddpd(const XmmReg& dst, const Mem& src)
4668   { _emitInstruction(kX86InstHAddPD, &dst, &src); }
4669 
4670   //! @brief Packed SP-FP Horizontal Add (SSE3).
haddpsX86Assembler4671   inline void haddps(const XmmReg& dst, const XmmReg& src)
4672   { _emitInstruction(kX86InstHAddPS, &dst, &src); }
4673   //! @brief Packed SP-FP Horizontal Add (SSE3).
haddpsX86Assembler4674   inline void haddps(const XmmReg& dst, const Mem& src)
4675   { _emitInstruction(kX86InstHAddPS, &dst, &src); }
4676 
4677   //! @brief Packed DP-FP Horizontal Subtract (SSE3).
hsubpdX86Assembler4678   inline void hsubpd(const XmmReg& dst, const XmmReg& src)
4679   { _emitInstruction(kX86InstHSubPD, &dst, &src); }
4680   //! @brief Packed DP-FP Horizontal Subtract (SSE3).
hsubpdX86Assembler4681   inline void hsubpd(const XmmReg& dst, const Mem& src)
4682   { _emitInstruction(kX86InstHSubPD, &dst, &src); }
4683 
4684   //! @brief Packed SP-FP Horizontal Subtract (SSE3).
hsubpsX86Assembler4685   inline void hsubps(const XmmReg& dst, const XmmReg& src)
4686   { _emitInstruction(kX86InstHSubPS, &dst, &src); }
4687   //! @brief Packed SP-FP Horizontal Subtract (SSE3).
hsubpsX86Assembler4688   inline void hsubps(const XmmReg& dst, const Mem& src)
4689   { _emitInstruction(kX86InstHSubPS, &dst, &src); }
4690 
4691   //! @brief Load Unaligned Integer 128 Bits (SSE3).
lddquX86Assembler4692   inline void lddqu(const XmmReg& dst, const Mem& src)
4693   { _emitInstruction(kX86InstLdDQU, &dst, &src); }
4694 
4695   //! @brief Set Up Monitor Address (SSE3).
monitorX86Assembler4696   inline void monitor()
4697   { _emitInstruction(kX86InstMonitor); }
4698 
4699   //! @brief Move One DP-FP and Duplicate (SSE3).
movddupX86Assembler4700   inline void movddup(const XmmReg& dst, const XmmReg& src)
4701   { _emitInstruction(kX86InstMovDDup, &dst, &src); }
4702   //! @brief Move One DP-FP and Duplicate (SSE3).
movddupX86Assembler4703   inline void movddup(const XmmReg& dst, const Mem& src)
4704   { _emitInstruction(kX86InstMovDDup, &dst, &src); }
4705 
4706   //! @brief Move Packed SP-FP High and Duplicate (SSE3).
movshdupX86Assembler4707   inline void movshdup(const XmmReg& dst, const XmmReg& src)
4708   { _emitInstruction(kX86InstMovSHDup, &dst, &src); }
4709   //! @brief Move Packed SP-FP High and Duplicate (SSE3).
movshdupX86Assembler4710   inline void movshdup(const XmmReg& dst, const Mem& src)
4711   { _emitInstruction(kX86InstMovSHDup, &dst, &src); }
4712 
4713   //! @brief Move Packed SP-FP Low and Duplicate (SSE3).
movsldupX86Assembler4714   inline void movsldup(const XmmReg& dst, const XmmReg& src)
4715   { _emitInstruction(kX86InstMovSLDup, &dst, &src); }
4716   //! @brief Move Packed SP-FP Low and Duplicate (SSE3).
movsldupX86Assembler4717   inline void movsldup(const XmmReg& dst, const Mem& src)
4718   { _emitInstruction(kX86InstMovSLDup, &dst, &src); }
4719 
4720   //! @brief Monitor Wait (SSE3).
mwaitX86Assembler4721   inline void mwait()
4722   { _emitInstruction(kX86InstMWait); }
4723 
4724   // --------------------------------------------------------------------------
4725   // [SSSE3]
4726   // --------------------------------------------------------------------------
4727 
4728   //! @brief Packed SIGN (SSSE3).
psignbX86Assembler4729   inline void psignb(const MmReg& dst, const MmReg& src)
4730   { _emitInstruction(kX86InstPSignB, &dst, &src); }
4731   //! @brief Packed SIGN (SSSE3).
psignbX86Assembler4732   inline void psignb(const MmReg& dst, const Mem& src)
4733   { _emitInstruction(kX86InstPSignB, &dst, &src); }
4734 
4735   //! @brief Packed SIGN (SSSE3).
psignbX86Assembler4736   inline void psignb(const XmmReg& dst, const XmmReg& src)
4737   { _emitInstruction(kX86InstPSignB, &dst, &src); }
4738   //! @brief Packed SIGN (SSSE3).
psignbX86Assembler4739   inline void psignb(const XmmReg& dst, const Mem& src)
4740   { _emitInstruction(kX86InstPSignB, &dst, &src); }
4741 
4742   //! @brief Packed SIGN (SSSE3).
psignwX86Assembler4743   inline void psignw(const MmReg& dst, const MmReg& src)
4744   { _emitInstruction(kX86InstPSignW, &dst, &src); }
4745   //! @brief Packed SIGN (SSSE3).
psignwX86Assembler4746   inline void psignw(const MmReg& dst, const Mem& src)
4747   { _emitInstruction(kX86InstPSignW, &dst, &src); }
4748 
4749   //! @brief Packed SIGN (SSSE3).
psignwX86Assembler4750   inline void psignw(const XmmReg& dst, const XmmReg& src)
4751   { _emitInstruction(kX86InstPSignW, &dst, &src); }
4752   //! @brief Packed SIGN (SSSE3).
psignwX86Assembler4753   inline void psignw(const XmmReg& dst, const Mem& src)
4754   { _emitInstruction(kX86InstPSignW, &dst, &src); }
4755 
4756   //! @brief Packed SIGN (SSSE3).
psigndX86Assembler4757   inline void psignd(const MmReg& dst, const MmReg& src)
4758   { _emitInstruction(kX86InstPSignD, &dst, &src); }
4759   //! @brief Packed SIGN (SSSE3).
psigndX86Assembler4760   inline void psignd(const MmReg& dst, const Mem& src)
4761   { _emitInstruction(kX86InstPSignD, &dst, &src); }
4762 
4763   //! @brief Packed SIGN (SSSE3).
psigndX86Assembler4764   inline void psignd(const XmmReg& dst, const XmmReg& src)
4765   { _emitInstruction(kX86InstPSignD, &dst, &src); }
4766   //! @brief Packed SIGN (SSSE3).
psigndX86Assembler4767   inline void psignd(const XmmReg& dst, const Mem& src)
4768   { _emitInstruction(kX86InstPSignD, &dst, &src); }
4769 
4770   //! @brief Packed Horizontal Add (SSSE3).
phaddwX86Assembler4771   inline void phaddw(const MmReg& dst, const MmReg& src)
4772   { _emitInstruction(kX86InstPHAddW, &dst, &src); }
4773   //! @brief Packed Horizontal Add (SSSE3).
phaddwX86Assembler4774   inline void phaddw(const MmReg& dst, const Mem& src)
4775   { _emitInstruction(kX86InstPHAddW, &dst, &src); }
4776 
4777   //! @brief Packed Horizontal Add (SSSE3).
phaddwX86Assembler4778   inline void phaddw(const XmmReg& dst, const XmmReg& src)
4779   { _emitInstruction(kX86InstPHAddW, &dst, &src); }
4780   //! @brief Packed Horizontal Add (SSSE3).
phaddwX86Assembler4781   inline void phaddw(const XmmReg& dst, const Mem& src)
4782   { _emitInstruction(kX86InstPHAddW, &dst, &src); }
4783 
4784   //! @brief Packed Horizontal Add (SSSE3).
phadddX86Assembler4785   inline void phaddd(const MmReg& dst, const MmReg& src)
4786   { _emitInstruction(kX86InstPHAddD, &dst, &src); }
4787   //! @brief Packed Horizontal Add (SSSE3).
phadddX86Assembler4788   inline void phaddd(const MmReg& dst, const Mem& src)
4789   { _emitInstruction(kX86InstPHAddD, &dst, &src); }
4790 
4791   //! @brief Packed Horizontal Add (SSSE3).
phadddX86Assembler4792   inline void phaddd(const XmmReg& dst, const XmmReg& src)
4793   { _emitInstruction(kX86InstPHAddD, &dst, &src); }
4794   //! @brief Packed Horizontal Add (SSSE3).
phadddX86Assembler4795   inline void phaddd(const XmmReg& dst, const Mem& src)
4796   { _emitInstruction(kX86InstPHAddD, &dst, &src); }
4797 
4798   //! @brief Packed Horizontal Add and Saturate (SSSE3).
phaddswX86Assembler4799   inline void phaddsw(const MmReg& dst, const MmReg& src)
4800   { _emitInstruction(kX86InstPHAddSW, &dst, &src); }
4801   //! @brief Packed Horizontal Add and Saturate (SSSE3).
phaddswX86Assembler4802   inline void phaddsw(const MmReg& dst, const Mem& src)
4803   { _emitInstruction(kX86InstPHAddSW, &dst, &src); }
4804 
4805   //! @brief Packed Horizontal Add and Saturate (SSSE3).
phaddswX86Assembler4806   inline void phaddsw(const XmmReg& dst, const XmmReg& src)
4807   { _emitInstruction(kX86InstPHAddSW, &dst, &src); }
4808   //! @brief Packed Horizontal Add and Saturate (SSSE3).
phaddswX86Assembler4809   inline void phaddsw(const XmmReg& dst, const Mem& src)
4810   { _emitInstruction(kX86InstPHAddSW, &dst, &src); }
4811 
4812   //! @brief Packed Horizontal Subtract (SSSE3).
phsubwX86Assembler4813   inline void phsubw(const MmReg& dst, const MmReg& src)
4814   { _emitInstruction(kX86InstPHSubW, &dst, &src); }
4815   //! @brief Packed Horizontal Subtract (SSSE3).
phsubwX86Assembler4816   inline void phsubw(const MmReg& dst, const Mem& src)
4817   { _emitInstruction(kX86InstPHSubW, &dst, &src); }
4818 
4819   //! @brief Packed Horizontal Subtract (SSSE3).
phsubwX86Assembler4820   inline void phsubw(const XmmReg& dst, const XmmReg& src)
4821   { _emitInstruction(kX86InstPHSubW, &dst, &src); }
4822   //! @brief Packed Horizontal Subtract (SSSE3).
phsubwX86Assembler4823   inline void phsubw(const XmmReg& dst, const Mem& src)
4824   { _emitInstruction(kX86InstPHSubW, &dst, &src); }
4825 
4826   //! @brief Packed Horizontal Subtract (SSSE3).
phsubdX86Assembler4827   inline void phsubd(const MmReg& dst, const MmReg& src)
4828   { _emitInstruction(kX86InstPHSubD, &dst, &src); }
4829   //! @brief Packed Horizontal Subtract (SSSE3).
phsubdX86Assembler4830   inline void phsubd(const MmReg& dst, const Mem& src)
4831   { _emitInstruction(kX86InstPHSubD, &dst, &src); }
4832 
4833   //! @brief Packed Horizontal Subtract (SSSE3).
phsubdX86Assembler4834   inline void phsubd(const XmmReg& dst, const XmmReg& src)
4835   { _emitInstruction(kX86InstPHSubD, &dst, &src); }
4836   //! @brief Packed Horizontal Subtract (SSSE3).
phsubdX86Assembler4837   inline void phsubd(const XmmReg& dst, const Mem& src)
4838   { _emitInstruction(kX86InstPHSubD, &dst, &src); }
4839 
4840   //! @brief Packed Horizontal Subtract and Saturate (SSSE3).
phsubswX86Assembler4841   inline void phsubsw(const MmReg& dst, const MmReg& src)
4842   { _emitInstruction(kX86InstPHSubSW, &dst, &src); }
4843   //! @brief Packed Horizontal Subtract and Saturate (SSSE3).
phsubswX86Assembler4844   inline void phsubsw(const MmReg& dst, const Mem& src)
4845   { _emitInstruction(kX86InstPHSubSW, &dst, &src); }
4846 
4847   //! @brief Packed Horizontal Subtract and Saturate (SSSE3).
phsubswX86Assembler4848   inline void phsubsw(const XmmReg& dst, const XmmReg& src)
4849   { _emitInstruction(kX86InstPHSubSW, &dst, &src); }
4850   //! @brief Packed Horizontal Subtract and Saturate (SSSE3).
phsubswX86Assembler4851   inline void phsubsw(const XmmReg& dst, const Mem& src)
4852   { _emitInstruction(kX86InstPHSubSW, &dst, &src); }
4853 
4854   //! @brief Multiply and Add Packed Signed and Unsigned Bytes (SSSE3).
pmaddubswX86Assembler4855   inline void pmaddubsw(const MmReg& dst, const MmReg& src)
4856   { _emitInstruction(kX86InstPMAddUBSW, &dst, &src); }
4857   //! @brief Multiply and Add Packed Signed and Unsigned Bytes (SSSE3).
pmaddubswX86Assembler4858   inline void pmaddubsw(const MmReg& dst, const Mem& src)
4859   { _emitInstruction(kX86InstPMAddUBSW, &dst, &src); }
4860 
4861   //! @brief Multiply and Add Packed Signed and Unsigned Bytes (SSSE3).
pmaddubswX86Assembler4862   inline void pmaddubsw(const XmmReg& dst, const XmmReg& src)
4863   { _emitInstruction(kX86InstPMAddUBSW, &dst, &src); }
4864   //! @brief Multiply and Add Packed Signed and Unsigned Bytes (SSSE3).
pmaddubswX86Assembler4865   inline void pmaddubsw(const XmmReg& dst, const Mem& src)
4866   { _emitInstruction(kX86InstPMAddUBSW, &dst, &src); }
4867 
4868   //! @brief Packed Absolute Value (SSSE3).
pabsbX86Assembler4869   inline void pabsb(const MmReg& dst, const MmReg& src)
4870   { _emitInstruction(kX86InstPAbsB, &dst, &src); }
4871   //! @brief Packed Absolute Value (SSSE3).
pabsbX86Assembler4872   inline void pabsb(const MmReg& dst, const Mem& src)
4873   { _emitInstruction(kX86InstPAbsB, &dst, &src); }
4874 
4875   //! @brief Packed Absolute Value (SSSE3).
pabsbX86Assembler4876   inline void pabsb(const XmmReg& dst, const XmmReg& src)
4877   { _emitInstruction(kX86InstPAbsB, &dst, &src); }
4878   //! @brief Packed Absolute Value (SSSE3).
pabsbX86Assembler4879   inline void pabsb(const XmmReg& dst, const Mem& src)
4880   { _emitInstruction(kX86InstPAbsB, &dst, &src); }
4881 
4882   //! @brief Packed Absolute Value (SSSE3).
pabswX86Assembler4883   inline void pabsw(const MmReg& dst, const MmReg& src)
4884   { _emitInstruction(kX86InstPAbsW, &dst, &src); }
4885   //! @brief Packed Absolute Value (SSSE3).
pabswX86Assembler4886   inline void pabsw(const MmReg& dst, const Mem& src)
4887   { _emitInstruction(kX86InstPAbsW, &dst, &src); }
4888 
4889   //! @brief Packed Absolute Value (SSSE3).
pabswX86Assembler4890   inline void pabsw(const XmmReg& dst, const XmmReg& src)
4891   { _emitInstruction(kX86InstPAbsW, &dst, &src); }
4892   //! @brief Packed Absolute Value (SSSE3).
pabswX86Assembler4893   inline void pabsw(const XmmReg& dst, const Mem& src)
4894   { _emitInstruction(kX86InstPAbsW, &dst, &src); }
4895 
4896   //! @brief Packed Absolute Value (SSSE3).
pabsdX86Assembler4897   inline void pabsd(const MmReg& dst, const MmReg& src)
4898   { _emitInstruction(kX86InstPAbsD, &dst, &src); }
4899   //! @brief Packed Absolute Value (SSSE3).
pabsdX86Assembler4900   inline void pabsd(const MmReg& dst, const Mem& src)
4901   { _emitInstruction(kX86InstPAbsD, &dst, &src); }
4902 
4903   //! @brief Packed Absolute Value (SSSE3).
pabsdX86Assembler4904   inline void pabsd(const XmmReg& dst, const XmmReg& src)
4905   { _emitInstruction(kX86InstPAbsD, &dst, &src); }
4906   //! @brief Packed Absolute Value (SSSE3).
pabsdX86Assembler4907   inline void pabsd(const XmmReg& dst, const Mem& src)
4908   { _emitInstruction(kX86InstPAbsD, &dst, &src); }
4909 
4910   //! @brief Packed Multiply High with Round and Scale (SSSE3).
pmulhrswX86Assembler4911   inline void pmulhrsw(const MmReg& dst, const MmReg& src)
4912   { _emitInstruction(kX86InstPMulHRSW, &dst, &src); }
4913   //! @brief Packed Multiply High with Round and Scale (SSSE3).
pmulhrswX86Assembler4914   inline void pmulhrsw(const MmReg& dst, const Mem& src)
4915   { _emitInstruction(kX86InstPMulHRSW, &dst, &src); }
4916 
4917   //! @brief Packed Multiply High with Round and Scale (SSSE3).
pmulhrswX86Assembler4918   inline void pmulhrsw(const XmmReg& dst, const XmmReg& src)
4919   { _emitInstruction(kX86InstPMulHRSW, &dst, &src); }
4920   //! @brief Packed Multiply High with Round and Scale (SSSE3).
pmulhrswX86Assembler4921   inline void pmulhrsw(const XmmReg& dst, const Mem& src)
4922   { _emitInstruction(kX86InstPMulHRSW, &dst, &src); }
4923 
4924   //! @brief Packed Shuffle Bytes (SSSE3).
pshufbX86Assembler4925   inline void pshufb(const MmReg& dst, const MmReg& src)
4926   { _emitInstruction(kX86InstPShufB, &dst, &src); }
4927   //! @brief Packed Shuffle Bytes (SSSE3).
pshufbX86Assembler4928   inline void pshufb(const MmReg& dst, const Mem& src)
4929   { _emitInstruction(kX86InstPShufB, &dst, &src); }
4930 
4931   //! @brief Packed Shuffle Bytes (SSSE3).
pshufbX86Assembler4932   inline void pshufb(const XmmReg& dst, const XmmReg& src)
4933   { _emitInstruction(kX86InstPShufB, &dst, &src); }
4934   //! @brief Packed Shuffle Bytes (SSSE3).
pshufbX86Assembler4935   inline void pshufb(const XmmReg& dst, const Mem& src)
4936   { _emitInstruction(kX86InstPShufB, &dst, &src); }
4937 
4938   //! @brief Packed Shuffle Bytes (SSSE3).
palignrX86Assembler4939   inline void palignr(const MmReg& dst, const MmReg& src, const Imm& imm8)
4940   { _emitInstruction(kX86InstPAlignR, &dst, &src, &imm8); }
4941   //! @brief Packed Shuffle Bytes (SSSE3).
palignrX86Assembler4942   inline void palignr(const MmReg& dst, const Mem& src, const Imm& imm8)
4943   { _emitInstruction(kX86InstPAlignR, &dst, &src, &imm8); }
4944 
4945   //! @brief Packed Shuffle Bytes (SSSE3).
palignrX86Assembler4946   inline void palignr(const XmmReg& dst, const XmmReg& src, const Imm& imm8)
4947   { _emitInstruction(kX86InstPAlignR, &dst, &src, &imm8); }
4948   //! @brief Packed Shuffle Bytes (SSSE3).
palignrX86Assembler4949   inline void palignr(const XmmReg& dst, const Mem& src, const Imm& imm8)
4950   { _emitInstruction(kX86InstPAlignR, &dst, &src, &imm8); }
4951 
4952   // --------------------------------------------------------------------------
4953   // [SSE4.1]
4954   // --------------------------------------------------------------------------
4955 
4956   //! @brief Blend Packed DP-FP Values (SSE4.1).
blendpdX86Assembler4957   inline void blendpd(const XmmReg& dst, const XmmReg& src, const Imm& imm8)
4958   { _emitInstruction(kX86InstBlendPD, &dst, &src, &imm8); }
4959   //! @brief Blend Packed DP-FP Values (SSE4.1).
blendpdX86Assembler4960   inline void blendpd(const XmmReg& dst, const Mem& src, const Imm& imm8)
4961   { _emitInstruction(kX86InstBlendPD, &dst, &src, &imm8); }
4962 
4963   //! @brief Blend Packed SP-FP Values (SSE4.1).
blendpsX86Assembler4964   inline void blendps(const XmmReg& dst, const XmmReg& src, const Imm& imm8)
4965   { _emitInstruction(kX86InstBlendPS, &dst, &src, &imm8); }
4966   //! @brief Blend Packed SP-FP Values (SSE4.1).
blendpsX86Assembler4967   inline void blendps(const XmmReg& dst, const Mem& src, const Imm& imm8)
4968   { _emitInstruction(kX86InstBlendPS, &dst, &src, &imm8); }
4969 
4970   //! @brief Variable Blend Packed DP-FP Values (SSE4.1).
blendvpdX86Assembler4971   inline void blendvpd(const XmmReg& dst, const XmmReg& src)
4972   { _emitInstruction(kX86InstBlendVPD, &dst, &src); }
4973   //! @brief Variable Blend Packed DP-FP Values (SSE4.1).
blendvpdX86Assembler4974   inline void blendvpd(const XmmReg& dst, const Mem& src)
4975   { _emitInstruction(kX86InstBlendVPD, &dst, &src); }
4976 
4977   //! @brief Variable Blend Packed SP-FP Values (SSE4.1).
blendvpsX86Assembler4978   inline void blendvps(const XmmReg& dst, const XmmReg& src)
4979   { _emitInstruction(kX86InstBlendVPS, &dst, &src); }
4980   //! @brief Variable Blend Packed SP-FP Values (SSE4.1).
blendvpsX86Assembler4981   inline void blendvps(const XmmReg& dst, const Mem& src)
4982   { _emitInstruction(kX86InstBlendVPS, &dst, &src); }
4983 
4984   //! @brief Dot Product of Packed DP-FP Values (SSE4.1).
dppdX86Assembler4985   inline void dppd(const XmmReg& dst, const XmmReg& src, const Imm& imm8)
4986   { _emitInstruction(kX86InstDpPD, &dst, &src, &imm8); }
4987   //! @brief Dot Product of Packed DP-FP Values (SSE4.1).
dppdX86Assembler4988   inline void dppd(const XmmReg& dst, const Mem& src, const Imm& imm8)
4989   { _emitInstruction(kX86InstDpPD, &dst, &src, &imm8); }
4990 
4991   //! @brief Dot Product of Packed SP-FP Values (SSE4.1).
dppsX86Assembler4992   inline void dpps(const XmmReg& dst, const XmmReg& src, const Imm& imm8)
4993   { _emitInstruction(kX86InstDpPS, &dst, &src, &imm8); }
4994   //! @brief Dot Product of Packed SP-FP Values (SSE4.1).
dppsX86Assembler4995   inline void dpps(const XmmReg& dst, const Mem& src, const Imm& imm8)
4996   { _emitInstruction(kX86InstDpPS, &dst, &src, &imm8); }
4997 
4998   //! @brief Extract Packed SP-FP Value (SSE4.1).
extractpsX86Assembler4999   inline void extractps(const XmmReg& dst, const XmmReg& src, const Imm& imm8)
5000   { _emitInstruction(kX86InstExtractPS, &dst, &src, &imm8); }
5001   //! @brief Extract Packed SP-FP Value (SSE4.1).
extractpsX86Assembler5002   inline void extractps(const XmmReg& dst, const Mem& src, const Imm& imm8)
5003   { _emitInstruction(kX86InstExtractPS, &dst, &src, &imm8); }
5004 
5005   //! @brief Load Double Quadword Non-Temporal Aligned Hint (SSE4.1).
movntdqaX86Assembler5006   inline void movntdqa(const XmmReg& dst, const Mem& src)
5007   { _emitInstruction(kX86InstMovNTDQA, &dst, &src); }
5008 
5009   //! @brief Compute Multiple Packed Sums of Absolute Difference (SSE4.1).
mpsadbwX86Assembler5010   inline void mpsadbw(const XmmReg& dst, const XmmReg& src, const Imm& imm8)
5011   { _emitInstruction(kX86InstMPSADBW, &dst, &src, &imm8); }
5012   //! @brief Compute Multiple Packed Sums of Absolute Difference (SSE4.1).
mpsadbwX86Assembler5013   inline void mpsadbw(const XmmReg& dst, const Mem& src, const Imm& imm8)
5014   { _emitInstruction(kX86InstMPSADBW, &dst, &src, &imm8); }
5015 
5016   //! @brief Pack with Unsigned Saturation (SSE4.1).
packusdwX86Assembler5017   inline void packusdw(const XmmReg& dst, const XmmReg& src)
5018   { _emitInstruction(kX86InstPackUSDW, &dst, &src); }
5019   //! @brief Pack with Unsigned Saturation (SSE4.1).
packusdwX86Assembler5020   inline void packusdw(const XmmReg& dst, const Mem& src)
5021   { _emitInstruction(kX86InstPackUSDW, &dst, &src); }
5022 
5023   //! @brief Variable Blend Packed Bytes (SSE4.1).
pblendvbX86Assembler5024   inline void pblendvb(const XmmReg& dst, const XmmReg& src)
5025   { _emitInstruction(kX86InstPBlendVB, &dst, &src); }
5026   //! @brief Variable Blend Packed Bytes (SSE4.1).
pblendvbX86Assembler5027   inline void pblendvb(const XmmReg& dst, const Mem& src)
5028   { _emitInstruction(kX86InstPBlendVB, &dst, &src); }
5029 
5030   //! @brief Blend Packed Words (SSE4.1).
pblendwX86Assembler5031   inline void pblendw(const XmmReg& dst, const XmmReg& src, const Imm& imm8)
5032   { _emitInstruction(kX86InstPBlendW, &dst, &src, &imm8); }
5033   //! @brief Blend Packed Words (SSE4.1).
pblendwX86Assembler5034   inline void pblendw(const XmmReg& dst, const Mem& src, const Imm& imm8)
5035   { _emitInstruction(kX86InstPBlendW, &dst, &src, &imm8); }
5036 
5037   //! @brief Compare Packed Qword Data for Equal (SSE4.1).
pcmpeqqX86Assembler5038   inline void pcmpeqq(const XmmReg& dst, const XmmReg& src)
5039   { _emitInstruction(kX86InstPCmpEqQ, &dst, &src); }
5040   //! @brief Compare Packed Qword Data for Equal (SSE4.1).
pcmpeqqX86Assembler5041   inline void pcmpeqq(const XmmReg& dst, const Mem& src)
5042   { _emitInstruction(kX86InstPCmpEqQ, &dst, &src); }
5043 
5044   //! @brief Extract Byte (SSE4.1).
pextrbX86Assembler5045   inline void pextrb(const GpReg& dst, const XmmReg& src, const Imm& imm8)
5046   { _emitInstruction(kX86InstPExtrB, &dst, &src, &imm8); }
5047   //! @brief Extract Byte (SSE4.1).
pextrbX86Assembler5048   inline void pextrb(const Mem& dst, const XmmReg& src, const Imm& imm8)
5049   { _emitInstruction(kX86InstPExtrB, &dst, &src, &imm8); }
5050 
5051   //! @brief Extract Dword (SSE4.1).
pextrdX86Assembler5052   inline void pextrd(const GpReg& dst, const XmmReg& src, const Imm& imm8)
5053   { _emitInstruction(kX86InstPExtrD, &dst, &src, &imm8); }
5054   //! @brief Extract Dword (SSE4.1).
pextrdX86Assembler5055   inline void pextrd(const Mem& dst, const XmmReg& src, const Imm& imm8)
5056   { _emitInstruction(kX86InstPExtrD, &dst, &src, &imm8); }
5057 
5058   //! @brief Extract Dword (SSE4.1).
pextrqX86Assembler5059   inline void pextrq(const GpReg& dst, const XmmReg& src, const Imm& imm8)
5060   { _emitInstruction(kX86InstPExtrQ, &dst, &src, &imm8); }
5061   //! @brief Extract Dword (SSE4.1).
pextrqX86Assembler5062   inline void pextrq(const Mem& dst, const XmmReg& src, const Imm& imm8)
5063   { _emitInstruction(kX86InstPExtrQ, &dst, &src, &imm8); }
5064 
5065   //! @brief Extract Word (SSE4.1).
pextrwX86Assembler5066   inline void pextrw(const GpReg& dst, const XmmReg& src, const Imm& imm8)
5067   { _emitInstruction(kX86InstPExtrW, &dst, &src, &imm8); }
5068   //! @brief Extract Word (SSE4.1).
pextrwX86Assembler5069   inline void pextrw(const Mem& dst, const XmmReg& src, const Imm& imm8)
5070   { _emitInstruction(kX86InstPExtrW, &dst, &src, &imm8); }
5071 
5072   //! @brief Packed Horizontal Word Minimum (SSE4.1).
phminposuwX86Assembler5073   inline void phminposuw(const XmmReg& dst, const XmmReg& src)
5074   { _emitInstruction(kX86InstPHMinPOSUW, &dst, &src); }
5075   //! @brief Packed Horizontal Word Minimum (SSE4.1).
phminposuwX86Assembler5076   inline void phminposuw(const XmmReg& dst, const Mem& src)
5077   { _emitInstruction(kX86InstPHMinPOSUW, &dst, &src); }
5078 
5079   //! @brief Insert Byte (SSE4.1).
pinsrbX86Assembler5080   inline void pinsrb(const XmmReg& dst, const GpReg& src, const Imm& imm8)
5081   { _emitInstruction(kX86InstPInsRB, &dst, &src, &imm8); }
5082   //! @brief Insert Byte (SSE4.1).
pinsrbX86Assembler5083   inline void pinsrb(const XmmReg& dst, const Mem& src, const Imm& imm8)
5084   { _emitInstruction(kX86InstPInsRB, &dst, &src, &imm8); }
5085 
5086   //! @brief Insert Dword (SSE4.1).
pinsrdX86Assembler5087   inline void pinsrd(const XmmReg& dst, const GpReg& src, const Imm& imm8)
5088   { _emitInstruction(kX86InstPInsRD, &dst, &src, &imm8); }
5089   //! @brief Insert Dword (SSE4.1).
pinsrdX86Assembler5090   inline void pinsrd(const XmmReg& dst, const Mem& src, const Imm& imm8)
5091   { _emitInstruction(kX86InstPInsRD, &dst, &src, &imm8); }
5092 
5093   //! @brief Insert Dword (SSE4.1).
pinsrqX86Assembler5094   inline void pinsrq(const XmmReg& dst, const GpReg& src, const Imm& imm8)
5095   { _emitInstruction(kX86InstPInsRQ, &dst, &src, &imm8); }
5096   //! @brief Insert Dword (SSE4.1).
pinsrqX86Assembler5097   inline void pinsrq(const XmmReg& dst, const Mem& src, const Imm& imm8)
5098   { _emitInstruction(kX86InstPInsRQ, &dst, &src, &imm8); }
5099 
5100   //! @brief Insert Word (SSE2).
pinsrwX86Assembler5101   inline void pinsrw(const XmmReg& dst, const GpReg& src, const Imm& imm8)
5102   { _emitInstruction(kX86InstPInsRW, &dst, &src, &imm8); }
5103   //! @brief Insert Word (SSE2).
pinsrwX86Assembler5104   inline void pinsrw(const XmmReg& dst, const Mem& src, const Imm& imm8)
5105   { _emitInstruction(kX86InstPInsRW, &dst, &src, &imm8); }
5106 
5107   //! @brief Maximum of Packed Word Integers (SSE4.1).
pmaxuwX86Assembler5108   inline void pmaxuw(const XmmReg& dst, const XmmReg& src)
5109   { _emitInstruction(kX86InstPMaxUW, &dst, &src); }
5110   //! @brief Maximum of Packed Word Integers (SSE4.1).
pmaxuwX86Assembler5111   inline void pmaxuw(const XmmReg& dst, const Mem& src)
5112   { _emitInstruction(kX86InstPMaxUW, &dst, &src); }
5113 
5114   //! @brief Maximum of Packed Signed Byte Integers (SSE4.1).
pmaxsbX86Assembler5115   inline void pmaxsb(const XmmReg& dst, const XmmReg& src)
5116   { _emitInstruction(kX86InstPMaxSB, &dst, &src); }
5117   //! @brief Maximum of Packed Signed Byte Integers (SSE4.1).
pmaxsbX86Assembler5118   inline void pmaxsb(const XmmReg& dst, const Mem& src)
5119   { _emitInstruction(kX86InstPMaxSB, &dst, &src); }
5120 
5121   //! @brief Maximum of Packed Signed Dword Integers (SSE4.1).
pmaxsdX86Assembler5122   inline void pmaxsd(const XmmReg& dst, const XmmReg& src)
5123   { _emitInstruction(kX86InstPMaxSD, &dst, &src); }
5124   //! @brief Maximum of Packed Signed Dword Integers (SSE4.1).
pmaxsdX86Assembler5125   inline void pmaxsd(const XmmReg& dst, const Mem& src)
5126   { _emitInstruction(kX86InstPMaxSD, &dst, &src); }
5127 
5128   //! @brief Maximum of Packed Unsigned Dword Integers (SSE4.1).
pmaxudX86Assembler5129   inline void pmaxud(const XmmReg& dst, const XmmReg& src)
5130   { _emitInstruction(kX86InstPMaxUD, &dst, &src); }
5131   //! @brief Maximum of Packed Unsigned Dword Integers (SSE4.1).
pmaxudX86Assembler5132   inline void pmaxud(const XmmReg& dst, const Mem& src)
5133   { _emitInstruction(kX86InstPMaxUD, &dst, &src); }
5134 
5135   //! @brief Minimum of Packed Signed Byte Integers (SSE4.1).
pminsbX86Assembler5136   inline void pminsb(const XmmReg& dst, const XmmReg& src)
5137   { _emitInstruction(kX86InstPMinSB, &dst, &src); }
5138   //! @brief Minimum of Packed Signed Byte Integers (SSE4.1).
pminsbX86Assembler5139   inline void pminsb(const XmmReg& dst, const Mem& src)
5140   { _emitInstruction(kX86InstPMinSB, &dst, &src); }
5141 
5142   //! @brief Minimum of Packed Word Integers (SSE4.1).
pminuwX86Assembler5143   inline void pminuw(const XmmReg& dst, const XmmReg& src)
5144   { _emitInstruction(kX86InstPMinUW, &dst, &src); }
5145   //! @brief Minimum of Packed Word Integers (SSE4.1).
pminuwX86Assembler5146   inline void pminuw(const XmmReg& dst, const Mem& src)
5147   { _emitInstruction(kX86InstPMinUW, &dst, &src); }
5148 
5149   //! @brief Minimum of Packed Dword Integers (SSE4.1).
pminudX86Assembler5150   inline void pminud(const XmmReg& dst, const XmmReg& src)
5151   { _emitInstruction(kX86InstPMinUD, &dst, &src); }
5152   //! @brief Minimum of Packed Dword Integers (SSE4.1).
pminudX86Assembler5153   inline void pminud(const XmmReg& dst, const Mem& src)
5154   { _emitInstruction(kX86InstPMinUD, &dst, &src); }
5155 
5156   //! @brief Minimum of Packed Dword Integers (SSE4.1).
pminsdX86Assembler5157   inline void pminsd(const XmmReg& dst, const XmmReg& src)
5158   { _emitInstruction(kX86InstPMinSD, &dst, &src); }
5159   //! @brief Minimum of Packed Dword Integers (SSE4.1).
pminsdX86Assembler5160   inline void pminsd(const XmmReg& dst, const Mem& src)
5161   { _emitInstruction(kX86InstPMinSD, &dst, &src); }
5162 
5163   //! @brief Packed Move with Sign Extend (SSE4.1).
pmovsxbwX86Assembler5164   inline void pmovsxbw(const XmmReg& dst, const XmmReg& src)
5165   { _emitInstruction(kX86InstPMovSXBW, &dst, &src); }
5166   //! @brief Packed Move with Sign Extend (SSE4.1).
pmovsxbwX86Assembler5167   inline void pmovsxbw(const XmmReg& dst, const Mem& src)
5168   { _emitInstruction(kX86InstPMovSXBW, &dst, &src); }
5169 
5170   //! @brief Packed Move with Sign Extend (SSE4.1).
pmovsxbdX86Assembler5171   inline void pmovsxbd(const XmmReg& dst, const XmmReg& src)
5172   { _emitInstruction(kX86InstPMovSXBD, &dst, &src); }
5173   //! @brief Packed Move with Sign Extend (SSE4.1).
pmovsxbdX86Assembler5174   inline void pmovsxbd(const XmmReg& dst, const Mem& src)
5175   { _emitInstruction(kX86InstPMovSXBD, &dst, &src); }
5176 
5177   //! @brief Packed Move with Sign Extend (SSE4.1).
pmovsxbqX86Assembler5178   inline void pmovsxbq(const XmmReg& dst, const XmmReg& src)
5179   { _emitInstruction(kX86InstPMovSXBQ, &dst, &src); }
5180   //! @brief Packed Move with Sign Extend (SSE4.1).
pmovsxbqX86Assembler5181   inline void pmovsxbq(const XmmReg& dst, const Mem& src)
5182   { _emitInstruction(kX86InstPMovSXBQ, &dst, &src); }
5183 
5184   //! @brief Packed Move with Sign Extend (SSE4.1).
pmovsxwdX86Assembler5185   inline void pmovsxwd(const XmmReg& dst, const XmmReg& src)
5186   { _emitInstruction(kX86InstPMovSXWD, &dst, &src); }
5187   //! @brief Packed Move with Sign Extend (SSE4.1).
pmovsxwdX86Assembler5188   inline void pmovsxwd(const XmmReg& dst, const Mem& src)
5189   { _emitInstruction(kX86InstPMovSXWD, &dst, &src); }
5190 
5191   //! @brief (SSE4.1).
pmovsxwqX86Assembler5192   inline void pmovsxwq(const XmmReg& dst, const XmmReg& src)
5193   { _emitInstruction(kX86InstPMovSXWQ, &dst, &src); }
5194   //! @brief (SSE4.1).
pmovsxwqX86Assembler5195   inline void pmovsxwq(const XmmReg& dst, const Mem& src)
5196   { _emitInstruction(kX86InstPMovSXWQ, &dst, &src); }
5197 
5198   //! @brief (SSE4.1).
pmovsxdqX86Assembler5199   inline void pmovsxdq(const XmmReg& dst, const XmmReg& src)
5200   { _emitInstruction(kX86InstPMovSXDQ, &dst, &src); }
5201   //! @brief (SSE4.1).
pmovsxdqX86Assembler5202   inline void pmovsxdq(const XmmReg& dst, const Mem& src)
5203   { _emitInstruction(kX86InstPMovSXDQ, &dst, &src); }
5204 
5205   //! @brief Packed Move with Zero Extend (SSE4.1).
pmovzxbwX86Assembler5206   inline void pmovzxbw(const XmmReg& dst, const XmmReg& src)
5207   { _emitInstruction(kX86InstPMovZXBW, &dst, &src); }
5208   //! @brief Packed Move with Zero Extend (SSE4.1).
pmovzxbwX86Assembler5209   inline void pmovzxbw(const XmmReg& dst, const Mem& src)
5210   { _emitInstruction(kX86InstPMovZXBW, &dst, &src); }
5211 
5212   //! @brief Packed Move with Zero Extend (SSE4.1).
pmovzxbdX86Assembler5213   inline void pmovzxbd(const XmmReg& dst, const XmmReg& src)
5214   { _emitInstruction(kX86InstPMovZXBD, &dst, &src); }
5215   //! @brief Packed Move with Zero Extend (SSE4.1).
pmovzxbdX86Assembler5216   inline void pmovzxbd(const XmmReg& dst, const Mem& src)
5217   { _emitInstruction(kX86InstPMovZXBD, &dst, &src); }
5218 
5219   //! @brief Packed Move with Zero Extend (SSE4.1).
pmovzxbqX86Assembler5220   inline void pmovzxbq(const XmmReg& dst, const XmmReg& src)
5221   { _emitInstruction(kX86InstPMovZXBQ, &dst, &src); }
5222   //! @brief Packed Move with Zero Extend (SSE4.1).
pmovzxbqX86Assembler5223   inline void pmovzxbq(const XmmReg& dst, const Mem& src)
5224   { _emitInstruction(kX86InstPMovZXBQ, &dst, &src); }
5225 
5226   //! @brief Packed Move with Zero Extend (SSE4.1).
pmovzxwdX86Assembler5227   inline void pmovzxwd(const XmmReg& dst, const XmmReg& src)
5228   { _emitInstruction(kX86InstPMovZXWD, &dst, &src); }
5229   //! @brief Packed Move with Zero Extend (SSE4.1).
pmovzxwdX86Assembler5230   inline void pmovzxwd(const XmmReg& dst, const Mem& src)
5231   { _emitInstruction(kX86InstPMovZXWD, &dst, &src); }
5232 
5233   //! @brief (SSE4.1).
pmovzxwqX86Assembler5234   inline void pmovzxwq(const XmmReg& dst, const XmmReg& src)
5235   { _emitInstruction(kX86InstPMovZXWQ, &dst, &src); }
5236   //! @brief (SSE4.1).
pmovzxwqX86Assembler5237   inline void pmovzxwq(const XmmReg& dst, const Mem& src)
5238   { _emitInstruction(kX86InstPMovZXWQ, &dst, &src); }
5239 
5240   //! @brief (SSE4.1).
pmovzxdqX86Assembler5241   inline void pmovzxdq(const XmmReg& dst, const XmmReg& src)
5242   { _emitInstruction(kX86InstPMovZXDQ, &dst, &src); }
5243   //! @brief (SSE4.1).
pmovzxdqX86Assembler5244   inline void pmovzxdq(const XmmReg& dst, const Mem& src)
5245   { _emitInstruction(kX86InstPMovZXDQ, &dst, &src); }
5246 
5247   //! @brief Multiply Packed Signed Dword Integers (SSE4.1).
pmuldqX86Assembler5248   inline void pmuldq(const XmmReg& dst, const XmmReg& src)
5249   { _emitInstruction(kX86InstPMulDQ, &dst, &src); }
5250   //! @brief Multiply Packed Signed Dword Integers (SSE4.1).
pmuldqX86Assembler5251   inline void pmuldq(const XmmReg& dst, const Mem& src)
5252   { _emitInstruction(kX86InstPMulDQ, &dst, &src); }
5253 
5254   //! @brief Multiply Packed Signed Integers and Store Low Result (SSE4.1).
pmulldX86Assembler5255   inline void pmulld(const XmmReg& dst, const XmmReg& src)
5256   { _emitInstruction(kX86InstPMulLD, &dst, &src); }
5257   //! @brief Multiply Packed Signed Integers and Store Low Result (SSE4.1).
pmulldX86Assembler5258   inline void pmulld(const XmmReg& dst, const Mem& src)
5259   { _emitInstruction(kX86InstPMulLD, &dst, &src); }
5260 
5261   //! @brief Logical Compare (SSE4.1).
ptestX86Assembler5262   inline void ptest(const XmmReg& op1, const XmmReg& op2)
5263   { _emitInstruction(kX86InstPTest, &op1, &op2); }
5264   //! @brief Logical Compare (SSE4.1).
ptestX86Assembler5265   inline void ptest(const XmmReg& op1, const Mem& op2)
5266   { _emitInstruction(kX86InstPTest, &op1, &op2); }
5267 
5268   //! Round Packed SP-FP Values @brief (SSE4.1).
roundpsX86Assembler5269   inline void roundps(const XmmReg& dst, const XmmReg& src, const Imm& imm8)
5270   { _emitInstruction(kX86InstRoundPS, &dst, &src, &imm8); }
5271   //! Round Packed SP-FP Values @brief (SSE4.1).
roundpsX86Assembler5272   inline void roundps(const XmmReg& dst, const Mem& src, const Imm& imm8)
5273   { _emitInstruction(kX86InstRoundPS, &dst, &src, &imm8); }
5274 
5275   //! @brief Round Scalar SP-FP Values (SSE4.1).
roundssX86Assembler5276   inline void roundss(const XmmReg& dst, const XmmReg& src, const Imm& imm8)
5277   { _emitInstruction(kX86InstRoundSS, &dst, &src, &imm8); }
5278   //! @brief Round Scalar SP-FP Values (SSE4.1).
roundssX86Assembler5279   inline void roundss(const XmmReg& dst, const Mem& src, const Imm& imm8)
5280   { _emitInstruction(kX86InstRoundSS, &dst, &src, &imm8); }
5281 
5282   //! @brief Round Packed DP-FP Values (SSE4.1).
roundpdX86Assembler5283   inline void roundpd(const XmmReg& dst, const XmmReg& src, const Imm& imm8)
5284   { _emitInstruction(kX86InstRoundPD, &dst, &src, &imm8); }
5285   //! @brief Round Packed DP-FP Values (SSE4.1).
roundpdX86Assembler5286   inline void roundpd(const XmmReg& dst, const Mem& src, const Imm& imm8)
5287   { _emitInstruction(kX86InstRoundPD, &dst, &src, &imm8); }
5288 
5289   //! @brief Round Scalar DP-FP Values (SSE4.1).
roundsdX86Assembler5290   inline void roundsd(const XmmReg& dst, const XmmReg& src, const Imm& imm8)
5291   { _emitInstruction(kX86InstRoundSD, &dst, &src, &imm8); }
5292   //! @brief Round Scalar DP-FP Values (SSE4.1).
roundsdX86Assembler5293   inline void roundsd(const XmmReg& dst, const Mem& src, const Imm& imm8)
5294   { _emitInstruction(kX86InstRoundSD, &dst, &src, &imm8); }
5295 
5296   // --------------------------------------------------------------------------
5297   // [SSE4.2]
5298   // --------------------------------------------------------------------------
5299 
5300   //! @brief Accumulate CRC32 Value (polynomial 0x11EDC6F41) (SSE4.2).
crc32X86Assembler5301   inline void crc32(const GpReg& dst, const GpReg& src)
5302   {
5303     ASMJIT_ASSERT(dst.isRegType(kX86RegTypeGpd) || dst.isRegType(kX86RegTypeGpq));
5304     _emitInstruction(kX86InstCrc32, &dst, &src);
5305   }
5306   //! @brief Accumulate CRC32 Value (polynomial 0x11EDC6F41) (SSE4.2).
crc32X86Assembler5307   inline void crc32(const GpReg& dst, const Mem& src)
5308   {
5309     ASMJIT_ASSERT(dst.isRegType(kX86RegTypeGpd) || dst.isRegType(kX86RegTypeGpq));
5310     _emitInstruction(kX86InstCrc32, &dst, &src);
5311   }
5312 
5313   //! @brief Packed Compare Explicit Length Strings, Return Index (SSE4.2).
pcmpestriX86Assembler5314   inline void pcmpestri(const XmmReg& dst, const XmmReg& src, const Imm& imm8)
5315   { _emitInstruction(kX86InstPCmpEStrI, &dst, &src, &imm8); }
5316   //! @brief Packed Compare Explicit Length Strings, Return Index (SSE4.2).
pcmpestriX86Assembler5317   inline void pcmpestri(const XmmReg& dst, const Mem& src, const Imm& imm8)
5318   { _emitInstruction(kX86InstPCmpEStrI, &dst, &src, &imm8); }
5319 
5320   //! @brief Packed Compare Explicit Length Strings, Return Mask (SSE4.2).
pcmpestrmX86Assembler5321   inline void pcmpestrm(const XmmReg& dst, const XmmReg& src, const Imm& imm8)
5322   { _emitInstruction(kX86InstPCmpEStrM, &dst, &src, &imm8); }
5323   //! @brief Packed Compare Explicit Length Strings, Return Mask (SSE4.2).
pcmpestrmX86Assembler5324   inline void pcmpestrm(const XmmReg& dst, const Mem& src, const Imm& imm8)
5325   { _emitInstruction(kX86InstPCmpEStrM, &dst, &src, &imm8); }
5326 
5327   //! @brief Packed Compare Implicit Length Strings, Return Index (SSE4.2).
pcmpistriX86Assembler5328   inline void pcmpistri(const XmmReg& dst, const XmmReg& src, const Imm& imm8)
5329   { _emitInstruction(kX86InstPCmpIStrI, &dst, &src, &imm8); }
5330   //! @brief Packed Compare Implicit Length Strings, Return Index (SSE4.2).
pcmpistriX86Assembler5331   inline void pcmpistri(const XmmReg& dst, const Mem& src, const Imm& imm8)
5332   { _emitInstruction(kX86InstPCmpIStrI, &dst, &src, &imm8); }
5333 
5334   //! @brief Packed Compare Implicit Length Strings, Return Mask (SSE4.2).
pcmpistrmX86Assembler5335   inline void pcmpistrm(const XmmReg& dst, const XmmReg& src, const Imm& imm8)
5336   { _emitInstruction(kX86InstPCmpIStrM, &dst, &src, &imm8); }
5337   //! @brief Packed Compare Implicit Length Strings, Return Mask (SSE4.2).
pcmpistrmX86Assembler5338   inline void pcmpistrm(const XmmReg& dst, const Mem& src, const Imm& imm8)
5339   { _emitInstruction(kX86InstPCmpIStrM, &dst, &src, &imm8); }
5340 
5341   //! @brief Compare Packed Data for Greater Than (SSE4.2).
pcmpgtqX86Assembler5342   inline void pcmpgtq(const XmmReg& dst, const XmmReg& src)
5343   { _emitInstruction(kX86InstPCmpGtQ, &dst, &src); }
5344   //! @brief Compare Packed Data for Greater Than (SSE4.2).
pcmpgtqX86Assembler5345   inline void pcmpgtq(const XmmReg& dst, const Mem& src)
5346   { _emitInstruction(kX86InstPCmpGtQ, &dst, &src); }
5347 
5348   //! @brief Return the Count of Number of Bits Set to 1 (SSE4.2).
popcntX86Assembler5349   inline void popcnt(const GpReg& dst, const GpReg& src)
5350   {
5351     ASMJIT_ASSERT(!dst.isGpb());
5352     ASMJIT_ASSERT(src.getRegType() == dst.getRegType());
5353     _emitInstruction(kX86InstPopCnt, &dst, &src);
5354   }
5355   //! @brief Return the Count of Number of Bits Set to 1 (SSE4.2).
popcntX86Assembler5356   inline void popcnt(const GpReg& dst, const Mem& src)
5357   {
5358     ASMJIT_ASSERT(!dst.isGpb());
5359     _emitInstruction(kX86InstPopCnt, &dst, &src);
5360   }
5361 
5362   // -------------------------------------------------------------------------
5363   // [AMD only]
5364   // -------------------------------------------------------------------------
5365 
5366   //! @brief Prefetch (3dNow - Amd).
5367   //!
5368   //! Loads the entire 64-byte aligned memory sequence containing the
5369   //! specified memory address into the L1 data cache. The position of
5370   //! the specified memory address within the 64-byte cache line is
5371   //! irrelevant. If a cache hit occurs, or if a memory fault is detected,
5372   //! no bus cycle is initiated and the instruction is treated as a NOP.
amd_prefetchX86Assembler5373   inline void amd_prefetch(const Mem& mem)
5374   { _emitInstruction(kX86InstAmdPrefetch, &mem); }
5375 
5376   //! @brief Prefetch and set cache to modified (3dNow - Amd).
5377   //!
5378   //! The PREFETCHW instruction loads the prefetched line and sets the
5379   //! cache-line state to Modified, in anticipation of subsequent data
5380   //! writes to the line. The PREFETCH instruction, by contrast, typically
5381   //! sets the cache-line state to Exclusive (depending on the hardware
5382   //! implementation).
amd_prefetchwX86Assembler5383   inline void amd_prefetchw(const Mem& mem)
5384   { _emitInstruction(kX86InstAmdPrefetchW, &mem); }
5385 
5386   // -------------------------------------------------------------------------
5387   // [Intel only]
5388   // -------------------------------------------------------------------------
5389 
5390   //! @brief Move Data After Swapping Bytes (SSE3 - Intel Atom).
movbeX86Assembler5391   inline void movbe(const GpReg& dst, const Mem& src)
5392   {
5393     ASMJIT_ASSERT(!dst.isGpb());
5394     _emitInstruction(kX86InstMovBE, &dst, &src);
5395   }
5396 
5397   //! @brief Move Data After Swapping Bytes (SSE3 - Intel Atom).
movbeX86Assembler5398   inline void movbe(const Mem& dst, const GpReg& src)
5399   {
5400     ASMJIT_ASSERT(!src.isGpb());
5401     _emitInstruction(kX86InstMovBE, &dst, &src);
5402   }
5403 
5404   // -------------------------------------------------------------------------
5405   // [Emit Options]
5406   // -------------------------------------------------------------------------
5407 
5408   //! @brief Assert LOCK# Signal Prefix.
5409   //!
5410   //! This instruction causes the processor's LOCK# signal to be asserted
5411   //! during execution of the accompanying instruction (turns the
5412   //! instruction into an atomic instruction). In a multiprocessor environment,
5413   //! the LOCK# signal insures that the processor has exclusive use of any shared
5414   //! memory while the signal is asserted.
5415   //!
5416   //! The LOCK prefix can be prepended only to the following instructions and
5417   //! to those forms of the instructions that use a memory operand: ADD, ADC,
5418   //! AND, BTC, BTR, BTS, CMPXCHG, DEC, INC, NEG, NOT, OR, SBB, SUB, XOR, XADD,
5419   //! and XCHG. An undefined opcode exception will be generated if the LOCK
5420   //! prefix is used with any other instruction. The XCHG instruction always
5421   //! asserts the LOCK# signal regardless of the presence or absence of the LOCK
5422   //! prefix.
5423   //!
5424   //! @sa @c kX86EmitOptionLock.
lockX86Assembler5425   inline void lock()
5426   { _emitOptions |= kX86EmitOptionLock; }
5427 
5428   //! @brief Force REX prefix to be emitted.
5429   //!
5430   //! This option should be used carefully, because there are unencodable
5431   //! combinations. If you want to access ah, bh, ch or dh registers then you
5432   //! can't emit REX prefix and it will cause an illegal instruction error.
5433   //!
5434   //! @note REX prefix is only valid for X64/AMD64 platform.
5435   //!
5436   //! @sa @c kX86EmitOptionRex.
rexX86Assembler5437   inline void rex()
5438   { _emitOptions |= kX86EmitOptionRex; }
5439 };
5440 
5441 //! @}
5442 
5443 } // AsmJit namespace
5444 
5445 // [Api-End]
5446 #include "../core/apiend.h"
5447 
5448 // [Guard]
5449 #endif // _ASMJIT_X86_X86ASSEMBLER_H
5450