1 // [AsmJit]
2 // Complete JIT Assembler for C++ Language.
3 //
4 // [License]
5 // Zlib - See COPYING file in this package.
6 
7 // [Guard]
8 #ifndef _ASMJIT_X86_X86COMPILER_H
9 #define _ASMJIT_X86_X86COMPILER_H
10 
11 // [Dependencies - AsmJit]
12 #include "../core/build.h"
13 #include "../core/compiler.h"
14 #include "../core/compilercontext.h"
15 #include "../core/compilerfunc.h"
16 #include "../core/compileritem.h"
17 
18 #include "../x86/x86assembler.h"
19 #include "../x86/x86defs.h"
20 #include "../x86/x86func.h"
21 #include "../x86/x86util.h"
22 
23 // [Api-Begin]
24 #include "../core/apibegin.h"
25 
26 //! @internal
27 //!
28 //! @brief Mark methods not supported by @ref Compiler. These methods are
29 //! usually used only in function prologs/epilogs or to manage stack.
30 #define ASMJIT_NOT_SUPPORTED_BY_COMPILER 0
31 
32 namespace AsmJit {
33 
34 //! @addtogroup AsmJit_X86
35 //! @{
36 
37 // ============================================================================
38 // [Forward Declarations]
39 // ============================================================================
40 
41 struct X86Compiler;
42 struct X86CompilerAlign;
43 struct X86CompilerContext;
44 struct X86CompilerFuncCall;
45 struct X86CompilerFuncDecl;
46 struct X86CompilerFuncEnd;
47 struct X86CompilerInst;
48 struct X86CompilerJmpInst;
49 struct X86CompilerState;
50 struct X86CompilerTarget;
51 struct X86CompilerVar;
52 
53 // ============================================================================
54 // [AsmJit::X86CompilerVar]
55 // ============================================================================
56 
57 //! @brief @ref X86Compiler variable.
58 struct X86CompilerVar : public CompilerVar
59 {
60   // --------------------------------------------------------------------------
61   // [AsVar]
62   // --------------------------------------------------------------------------
63 
asGpVarX86CompilerVar64   inline GpVar asGpVar() const
65   {
66     GpVar var;
67     var._var.id = _id;
68     var._var.size = _size;
69     var._var.regCode = x86VarInfo[_type].getCode();
70     var._var.varType = _type;
71     return var;
72   }
73 
asMmVarX86CompilerVar74   inline MmVar asMmVar() const
75   {
76     MmVar var;
77     var._var.id = _id;
78     var._var.size = _size;
79     var._var.regCode = x86VarInfo[_type].getCode();
80     var._var.varType = _type;
81     return var;
82   }
83 
asXmmVarX86CompilerVar84   inline XmmVar asXmmVar() const
85   {
86     XmmVar var;
87     var._var.id = _id;
88     var._var.size = _size;
89     var._var.regCode = x86VarInfo[_type].getCode();
90     var._var.varType = _type;
91     return var;
92   }
93 
94   // --------------------------------------------------------------------------
95   // [Members - Scope]
96   // --------------------------------------------------------------------------
97 
98   //! @brief The first item where the variable is accessed.
99   //! @note If this member is @c NULL then variable isn't used.
100   CompilerItem* firstItem;
101   //! @brief The last item where the variable is accessed.
102   CompilerItem* lastItem;
103 
104   //! @brief Scope (NULL if variable is global).
105   X86CompilerFuncDecl* funcScope;
106   //! @brief The first call which is after the @c firstItem.
107   X86CompilerFuncCall* funcCall;
108 
109   // --------------------------------------------------------------------------
110   // [Members - Home]
111   // --------------------------------------------------------------------------
112 
113   //! @brief Home register index or @c kRegIndexInvalid (used by register allocator).
114   uint32_t homeRegisterIndex;
115   //! @brief Preferred registers mask.
116   uint32_t prefRegisterMask;
117 
118   //! @brief Home memory address offset.
119   int32_t homeMemoryOffset;
120   //! @brief Used by @c CompilerContext, do not touch (initially NULL).
121   void* homeMemoryData;
122 
123   // --------------------------------------------------------------------------
124   // [Members - Actual]
125   // --------------------------------------------------------------------------
126 
127   //! @brief Actual register index (connected with actual @c X86CompilerState).
128   uint32_t regIndex;
129   //! @brief Actual working offset. This member is set before register allocator
130   //! is called. If workOffset is same as CompilerContext::_currentOffset then
131   //! this variable is probably used in next instruction and can't be spilled.
132   uint32_t workOffset;
133 
134   //! @brief Next active variable in circular double-linked list.
135   X86CompilerVar* nextActive;
136   //! @brief Previous active variable in circular double-linked list.
137   X86CompilerVar* prevActive;
138 
139   // --------------------------------------------------------------------------
140   // [Members - Flags]
141   // --------------------------------------------------------------------------
142 
143   //! @brief Variable state (connected with actual @c X86CompilerState).
144   uint8_t state;
145   //! @brief Whether variable was changed (connected with actual @c X86CompilerState).
146   uint8_t changed;
147   //! @brief Save on unuse (at end of the variable scope).
148   uint8_t saveOnUnuse;
149 
150   // --------------------------------------------------------------------------
151   // [Members - Statistics]
152   // --------------------------------------------------------------------------
153 
154   //! @brief Register read access statistics.
155   uint32_t regReadCount;
156   //! @brief Register write access statistics.
157   uint32_t regWriteCount;
158   //! @brief Register read/write access statistics (related to a single instruction).
159   uint32_t regRwCount;
160 
161   //! @brief Register GpbLo access statistics.
162   uint32_t regGpbLoCount;
163   //! @brief Register GpbHi access statistics.
164   uint32_t regGpbHiCount;
165 
166   //! @brief Memory read statistics.
167   uint32_t memReadCount;
168   //! @brief Memory write statistics.
169   uint32_t memWriteCount;
170   //! @brief Memory read+write statistics.
171   uint32_t memRwCount;
172 
173   // --------------------------------------------------------------------------
174   // [Members - Temporary]
175   // --------------------------------------------------------------------------
176 
177   //! @brief Temporary data that can be used in prepare/translate stage.
178   //!
179   //! Initial value is NULL and it's expected that after use it's set back to
180   //! NULL.
181   //!
182   //! The temporary data is designed to be used by algorithms that need to
183   //! set some state into variables, do something and then clean-up. See
184   //! state switch and function call for details.
185   union
186   {
187     void* tPtr;
188     intptr_t tInt;
189   };
190 };
191 
192 // ============================================================================
193 // [AsmJit::X86CompilerState]
194 // ============================================================================
195 
196 //! @brief @ref X86Compiler state.
197 struct X86CompilerState : CompilerState
198 {
199   enum
200   {
201     //! @brief Base for Gp registers.
202     kStateRegGpBase = 0,
203     //! @brief Base for Mm registers.
204     kStateRegMmBase = 16,
205     //! @brief Base for Xmm registers.
206     kStateRegXmmBase = 24,
207 
208     //! @brief Count of all registers in @ref X86CompilerState.
209     kStateRegCount = 16 + 8 + 16
210   };
211 
212   // --------------------------------------------------------------------------
213   // [Clear]
214   // --------------------------------------------------------------------------
215 
clearX86CompilerState216   inline void clear()
217   { memset(this, 0, sizeof(*this)); }
218 
219   // --------------------------------------------------------------------------
220   // [Members]
221   // --------------------------------------------------------------------------
222 
223   union
224   {
225     //! @brief All allocated variables in one array.
226     X86CompilerVar* regs[kStateRegCount];
227 
228     struct
229     {
230       //! @brief Allocated GP registers.
231       X86CompilerVar* gp[16];
232       //! @brief Allocated MM registers.
233       X86CompilerVar* mm[8];
234       //! @brief Allocated XMM registers.
235       X86CompilerVar* xmm[16];
236     };
237   };
238 
239   //! @brief Used GP registers bit-mask.
240   uint32_t usedGP;
241   //! @brief Used MM registers bit-mask.
242   uint32_t usedMM;
243   //! @brief Used XMM registers bit-mask.
244   uint32_t usedXMM;
245 
246   //! @brief Changed GP registers bit-mask.
247   uint32_t changedGP;
248   //! @brief Changed MM registers bit-mask.
249   uint32_t changedMM;
250   //! @brief Changed XMM registers bit-mask.
251   uint32_t changedXMM;
252 
253   //! @brief Count of variables in @c memVarsData.
254   uint32_t memVarsCount;
255   //! @brief Variables stored in memory (@c kVarStateMem).
256   //!
257   //! When saving / restoring state it's important to keep registers which are
258   //! still in memory. Register is always unused when it is going out-of-scope.
259   //! All variables which are not here are unused (@c kVarStateUnused).
260   X86CompilerVar* memVarsData[1];
261 };
262 
263 // ============================================================================
264 // [AsmJit::VarMemBlock]
265 // ============================================================================
266 
267 struct VarMemBlock
268 {
269   int32_t offset;
270   uint32_t size;
271 
272   VarMemBlock* nextUsed;
273   VarMemBlock* nextFree;
274 };
275 
276 // ============================================================================
277 // [AsmJit::VarAllocRecord]
278 // ============================================================================
279 
280 //! @brief Variable alloc record (for each instruction that uses variables).
281 //!
282 //! Variable record contains pointer to variable data and register allocation
283 //! flags. These flags are important to determine the best alloc instruction.
284 struct VarAllocRecord
285 {
286   //! @brief Variable data (the structure owned by @c Compiler).
287   X86CompilerVar* vdata;
288   //! @brief Variable alloc flags, see @c kVarAllocFlags.
289   uint32_t vflags;
290   //! @brief Register mask (default is 0).
291   uint32_t regMask;
292 };
293 
294 // ============================================================================
295 // [AsmJit::VarCallRecord]
296 // ============================================================================
297 
298 //! @brief Variable call-fn record (for each callable that uses variables).
299 //!
300 //! This record contains variables that are used to call a function (using
301 //! @c X86CompilerFuncCall item). Each variable contains the registers where
302 //! it must be and registers where the value will be returned.
303 struct VarCallRecord
304 {
305   //! @brief Variable data (the structure owned by @c Compiler).
306   X86CompilerVar* vdata;
307   uint32_t flags;
308 
309   uint8_t inCount;
310   uint8_t inDone;
311 
312   uint8_t outCount;
313   uint8_t outDone;
314 
315   enum FLAGS
316   {
317     kFlagInGp = 0x0001,
318     kFlagInMm = 0x0002,
319     kFlagInXmm = 0x0004,
320     kFlagInStack = 0x0008,
321 
322     kFlagOutEax = 0x0010,
323     kFlagOutEdx = 0x0020,
324     kFlagOutSt0 = 0x0040,
325     kFlagOutSt1 = 0x0080,
326     kFlagOutMm0 = 0x0100,
327     kFlagOutXmm0 = 0x0400,
328     kFlagOutXmm1 = 0x0800,
329 
330     kFlagInMemPtr = 0x1000,
331     kFlagCallReg = 0x2000,
332     kFlagCallMem = 0x4000,
333     kFlagUnuseAfterUse = 0x8000
334   };
335 };
336 
337 // ============================================================================
338 // [AsmJit::VarHintRecord]
339 // ============================================================================
340 
341 struct VarHintRecord
342 {
343   X86CompilerVar* vdata;
344   uint32_t hint;
345 };
346 
347 // ============================================================================
348 // [AsmJit::ForwardJumpData]
349 // ============================================================================
350 
351 struct ForwardJumpData
352 {
353   X86CompilerJmpInst* inst;
354   X86CompilerState* state;
355   ForwardJumpData* next;
356 };
357 
358 // ============================================================================
359 // [AsmJit::CompilerUtil]
360 // ============================================================================
361 
362 //! @brief Static class that contains utility methods.
363 struct CompilerUtil
364 {
365   ASMJIT_API static bool isStack16ByteAligned();
366 };
367 
368 // ============================================================================
369 // [AsmJit::X86Compiler]
370 // ============================================================================
371 
372 //! @brief Compiler - high level code generation.
373 //!
374 //! This class is used to store instruction stream and allows to modify
375 //! it on the fly. It uses different concept than @c AsmJit::Assembler class
376 //! and in fact @c AsmJit::Assembler is only used as a backend. Compiler never
377 //! emits machine code and each instruction you use is stored to instruction
378 //! array instead. This allows to modify instruction stream later and for
379 //! example to reorder instructions to make better performance.
380 //!
381 //! Using @c AsmJit::Compiler moves code generation to higher level. Higher
382 //! level constructs allows to write more abstract and extensible code that
383 //! is not possible with pure @c AsmJit::Assembler class. Because
384 //! @c AsmJit::Compiler needs to create many objects and lifetime of these
385 //! objects is small (same as @c AsmJit::Compiler lifetime itself) it uses
386 //! very fast memory management model. This model allows to create object
387 //! instances in nearly zero time (compared to @c malloc() or @c new()
388 //! operators) so overhead by creating machine code by @c AsmJit::Compiler
389 //! is minimized.
390 //!
391 //! @section AsmJit_Compiler_TheStory The Story
392 //!
393 //! Before telling you how Compiler works I'd like to write a story. I'd like
394 //! to cover reasons why this class was created and why I'm recommending to use
395 //! it. When I released the first version of AsmJit (0.1) it was a toy. The
396 //! first function I wrote was function which is still available as testjit and
397 //! which simply returns 1024. The reason why function works for both 32-bit/
398 //! 64-bit mode and for Windows/Unix specific calling conventions is luck, no
399 //! arguments usage and no registers usage except returning value in EAX/RAX.
400 //!
401 //! Then I started a project called BlitJit which was targetted to generating
402 //! JIT code for computer graphics. After writing some lines I decided that I
403 //! can't join pieces of code together without abstraction, should be
404 //! pixels source pointer in ESI/RSI or EDI/RDI or it's completelly
405 //! irrellevant? What about destination pointer and SSE2 register for reading
406 //! input pixels? The simple answer might be "just pick some one and use it".
407 //!
408 //! Another reason for abstraction is function calling-conventions. It's really
409 //! not easy to write assembler code for 32-bit and 64-bit platform supporting
410 //! three calling conventions (32-bit is similar between Windows and Unix, but
411 //! 64-bit calling conventions are different).
412 //!
413 //! At this time I realized that I can't write code which uses named registers,
414 //! I need to abstract it. In most cases you don't need specific register, you
415 //! need to emit instruction that does something with 'virtual' register(s),
416 //! memory, immediate or label.
417 //!
418 //! The first version of AsmJit with Compiler was 0.5 (or 0.6?, can't remember).
419 //! There was support for 32-bit and 64-bit mode, function calling conventions,
420 //! but when emitting instructions the developer needed to decide which
421 //! registers are changed, which are only read or completely overwritten. This
422 //! model helped a lot when generating code, especially when joining more
423 //! code-sections together, but there was also small possibility for mistakes.
424 //! Simply the first version of Compiler was great improvement over low-level
425 //! Assembler class, but the API design wasn't perfect.
426 //!
427 //! The second version of Compiler, completelly rewritten and based on
428 //! different goals, is part of AsmJit starting at version 1.0. This version
429 //! was designed after the first one and it contains serious improvements over
430 //! the old one. The first improvement is that you just use instructions with
431 //! virtual registers - called variables. When using compiler there is no way
432 //! to use native registers, there are variables instead. AsmJit is smarter
433 //! than before and it knows which register is needed only for read (r),
434 //! read/write (w) or overwrite (x). Supported are also instructions which
435 //! are using some registers in implicit way (these registers are not part of
436 //! instruction definition in string form). For example to use CPUID instruction
437 //! you must give it four variables which will be automatically allocated to
438 //! input/output registers (EAX, EBX, ECX, EDX).
439 //!
440 //! Another improvement is algorithm used by a register allocator. In first
441 //! version the registers were allocated when creating instruction stream. In
442 //! new version registers are allocated after calling @c Compiler::make(). This
443 //! means that register allocator has information about scope of all variables
444 //! and their usage statistics. The algorithm to allocate registers is very
445 //! simple and it's always called as a 'linear scan register allocator'. When
446 //! you get out of registers the all possible variables are scored and the worst
447 //! is spilled. Of course algorithm ignores the variables used for current
448 //! instruction.
449 //!
450 //! In addition, because registers are allocated after the code stream is
451 //! generated, the state switches between jumps are handled by Compiler too.
452 //! You don't need to worry about jumps, compiler always do this dirty work
453 //! for you.
454 //!
455 //! The nearly last thing I'd like to present is calling other functions from
456 //! the generated code. AsmJit uses a @c FunctionPrototype class to hold
457 //! the function parameters, their position in stack (or register index) and
458 //! function return value. This class is used internally, but it can be
459 //! used to create your own function calling-convention. All standard function
460 //! calling conventions are implemented.
461 //!
462 //! Please enjoy the new version of Compiler, it was created for writing a
463 //! low-level code using high-level API, leaving developer to concentrate to
464 //! real problems and not to solving a register puzzle.
465 //!
466 //! @section AsmJit_Compiler_CodeGeneration Code Generation
467 //!
468 //! First that is needed to know about compiler is that compiler never emits
469 //! machine code. It's used as a middleware between @c AsmJit::Assembler and
470 //! your code. There is also convenience method @c make() that allows to
471 //! generate machine code directly without creating @c AsmJit::Assembler
472 //! instance.
473 //!
474 //! Comparison of generating machine code through @c Assembler and directly
475 //! by @c Compiler:
476 //!
477 //! @code
478 //! // Assembler instance is low level code generation class that emits
479 //! // machine code.
480 //! X86Assembler a;
481 //!
482 //! // Compiler instance is high level code generation class that stores all
483 //! // instructions in internal representation.
484 //! X86Compiler c;
485 //!
486 //! // ... put your code here ...
487 //!
488 //! // Final step - generate code. AsmJit::Compiler::serialize() will serialize
489 //! // all instructions into Assembler and this ensures generating real machine
490 //! // code.
491 //! c.serialize(a);
492 //!
493 //! // Your function
494 //! void* fn = a.make();
495 //! @endcode
496 //!
497 //! Example how to generate machine code using only @c Compiler (preferred):
498 //!
499 //! @code
500 //! // Compiler instance is enough.
501 //! X86Compiler c;
502 //!
503 //! // ... put your code here ...
504 //!
505 //! // Your function
506 //! void* fn = c.make();
507 //! @endcode
508 //!
509 //! You can see that there is @c AsmJit::Compiler::serialize() function that
510 //! emits instructions into @c AsmJit::Assembler(). This layered architecture
511 //! means that each class is used for something different and there is no code
512 //! duplication. For convenience there is also @c AsmJit::Compiler::make()
513 //! method that can create your function using @c AsmJit::Assembler, but
514 //! internally (this is preffered bahavior when using @c AsmJit::Compiler).
515 //!
516 //! The @c make() method allocates memory using @c Context instance passed
517 //! into the @c X86Compiler constructor. If code generator is used to create JIT
518 //! function then virtual memory allocated by @c MemoryManager is used. To get
519 //! global memory manager use @c MemoryManager::getGlobal().
520 //!
521 //! @code
522 //! // Compiler instance is enough.
523 //! X86Compiler c;
524 //!
525 //! // ... put your code using Compiler instance ...
526 //!
527 //! // Your function
528 //! void* fn = c.make();
529 //!
530 //! // Free it if you don't want it anymore
531 //! // (using global memory manager instance)
532 //! MemoryManager::getGlobal()->free(fn);
533 //! @endcode
534 //!
535 //! @section AsmJit_Compiler_Functions Functions
536 //!
537 //! To build functions with @c Compiler, see @c AsmJit::Compiler::newFunc()
538 //! method.
539 //!
540 //! @section AsmJit_Compiler_Variables Variables
541 //!
542 //! Compiler is able to manage variables and function arguments. Internally
543 //! there is no difference between function argument and variable declared
544 //! inside. To get function argument you use @c getGpArg() method and to declare
545 //! variable use @c newGpVar(), @c newMmVar() and @c newXmmVar() methods. The @c newXXX()
546 //! methods accept also parameter describing the variable type. For example
547 //! the @c newGpVar() method always creates variable which size matches the target
548 //! architecture size (for 32-bit target the 32-bit variable is created, for
549 //! 64-bit target the variable size is 64-bit). To override this behavior the
550 //! variable type must be specified.
551 //!
552 //! @code
553 //! // Compiler and function declaration - void f(int*);
554 //! X86Compiler c;
555 //! c.newFunc(kX86FuncConvDefault, BuildFunction1<int*>());
556 //!
557 //! // Get argument variable (it's pointer).
558 //! GpVar a1(c.getGpArg(0));
559 //!
560 //! // Create your variables.
561 //! GpVar x1(c.newGpVar(kX86VarTypeGpd));
562 //! GpVar x2(c.newGpVar(kX86VarTypeGpd));
563 //!
564 //! // Init your variables.
565 //! c.mov(x1, 1);
566 //! c.mov(x2, 2);
567 //!
568 //! // ... your code ...
569 //! c.add(x1, x2);
570 //! // ... your code ...
571 //!
572 //! // Store result to a given pointer in first argument
573 //! c.mov(dword_ptr(a1), x1);
574 //!
575 //! // End of function body.
576 //! c.endFunc();
577 //!
578 //! // Make the function.
579 //! typedef void (*MyFn)(int*);
580 //! MyFn fn = asmjit_cast<MyFn>(c.make());
581 //! @endcode
582 //!
583 //! This code snipped needs to be explained. You can see that there are more
584 //! variable types that can be used by @c Compiler. Most useful variables can
585 //! be allocated using general purpose registers (@c GpVar), MMX registers
586 //! (@c MmVar) or SSE registers (@c XmmVar).
587 //!
588 //! X86/X64 variable types:
589 //!
590 //! - @c kX86VarTypeGpd - 32-bit general purpose register (EAX, EBX, ...).
591 //! - @c kX86VarTypeGpq - 64-bit general purpose register (RAX, RBX, ...).
592 //! - @c kX86VarTypeGpz - 32-bit or 64-bit general purpose register, depends
593 //!   to target architecture. Mapped to @c kX86VarTypeGpd or @c kX86VarTypeGpq.
594 //!
595 //! - @c kX86VarTypeX87 - 80-bit floating point stack register st(0 to 7).
596 //! - @c kX86VarTypeX87SS - 32-bit floating point stack register st(0 to 7).
597 //! - @c kX86VarTypeX87SD - 64-bit floating point stack register st(0 to 7).
598 //!
599 //! - @c VARIALBE_TYPE_MM - 64-bit MMX register.
600 //!
601 //! - @c kX86VarTypeXmm - 128-bit SSE register.
602 //! - @c kX86VarTypeXmmSS - 128-bit SSE register which contains
603 //!   scalar 32-bit single precision floating point.
604 //! - @c kX86VarTypeXmmSD - 128-bit SSE register which contains
605 //!   scalar 64-bit double precision floating point.
606 //! - @c kX86VarTypeXmmPS - 128-bit SSE register which contains
607 //!   4 packed 32-bit single precision floating points.
608 //! - @c kX86VarTypeXmmPD - 128-bit SSE register which contains
609 //!   2 packed 64-bit double precision floating points.
610 //!
611 //! Unified variable types:
612 //!
613 //! - @c kX86VarTypeInt32 - 32-bit general purpose register.
614 //! - @c kX86VarTypeInt64 - 64-bit general purpose register.
615 //! - @c kX86VarTypeIntPtr - 32-bit or 64-bit general purpose register / pointer.
616 //!
617 //! - @c kX86VarTypeFloat - 32-bit single precision floating point.
618 //! - @c kX86VarTypeDouble - 64-bit double precision floating point.
619 //!
620 //! Variable states:
621 //!
622 //! - @c kVarStateUnused - State that is assigned to newly created
623 //!   variables or to not used variables (dereferenced to zero).
624 //! - @c kVarStateReg - State that means that variable is currently
625 //!   allocated in register.
626 //! - @c kVarStateMem - State that means that variable is currently
627 //!   only in memory location.
628 //!
629 //! When you create new variable, initial state is always @c kVarStateUnused,
630 //! allocating it to register or spilling to memory changes this state to
631 //! @c kVarStateReg or @c kVarStateMem, respectively.
632 //! During variable lifetime it's usual that its state is changed multiple
633 //! times. To generate better code, you can control allocating and spilling
634 //! by using up to four types of methods that allows it (see next list).
635 //!
636 //! Explicit variable allocating / spilling methods:
637 //!
638 //! - @c Compiler::alloc() - Explicit method to alloc variable into
639 //!      register. You can use this before loops or code blocks.
640 //!
641 //! - @c Compiler::spill() - Explicit method to spill variable. If variable
642 //!      is in register and you call this method, it's moved to its home memory
643 //!      location. If variable is not in register no operation is performed.
644 //!
645 //! - @c Compiler::unuse() - Unuse variable (you can use this to end the
646 //!      variable scope or sub-scope).
647 //!
648 //! Please see AsmJit tutorials (testcompiler.cpp and testvariables.cpp) for
649 //! more complete examples.
650 //!
651 //! @section AsmJit_Compiler_MemoryManagement Memory Management
652 //!
653 //! @c Compiler Memory management follows these rules:
654 //! - Everything created by @c Compiler is always freed by @c Compiler.
655 //! - To get decent performance, compiler always uses larger memory buffer
656 //!   for objects to allocate and when compiler instance is destroyed, this
657 //!   buffer is freed. Destructors of active objects are called when
658 //!   destroying compiler instance. Destructors of abadonded compiler
659 //!   objects are called immediately after abadonding them.
660 //! - This type of memory management is called 'zone memory management'.
661 //!
662 //! This means that you can't use any @c Compiler object after destructing it,
663 //! it also means that each object like @c Label, @c Var and others are created
664 //! and managed by @c Compiler itself. These objects contain ID which is used
665 //! internally by Compiler to store additional information about these objects.
666 //!
667 //! @section AsmJit_Compiler_StateManagement Control-Flow and State Management.
668 //!
669 //! The @c Compiler automatically manages state of the variables when using
670 //! control flow instructions like jumps, conditional jumps and calls. There
671 //! is minimal heuristics for choosing the method how state is saved or restored.
672 //!
673 //! Generally the state can be changed only when using jump or conditional jump
674 //! instruction. When using non-conditional jump then state change is embedded
675 //! into the instruction stream before the jump. When using conditional jump
676 //! the @c Compiler decides whether to restore state before the jump or whether
677 //! to use another block where state is restored. The last case is that no-code
678 //! have to be emitted and there is no state change (this is of course ideal).
679 //!
680 //! Choosing whether to embed 'restore-state' section before conditional jump
681 //! is quite simple. If jump is likely to be 'taken' then code is embedded, if
682 //! jump is unlikely to be taken then the small code section for state-switch
683 //! will be generated instead.
684 //!
685 //! Next example is the situation where the extended code block is used to
686 //! do state-change:
687 //!
688 //! @code
689 //! X86Compiler c;
690 //!
691 //! c.newFunc(kX86FuncConvDefault, FuncBuilder0<Void>());
692 //! c.getFunc()->setHint(kFuncHintNaked, true);
693 //!
694 //! // Labels.
695 //! Label L0 = c.newLabel();
696 //!
697 //! // Variables.
698 //! GpVar var0 = c.newGpVar();
699 //! GpVar var1 = c.newGpVar();
700 //!
701 //! // Cleanup. After these two lines, the var0 and var1 will be always stored
702 //! // in registers. Our example is very small, but in larger code the var0 can
703 //! // be spilled by xor(var1, var1).
704 //! c.xor_(var0, var0);
705 //! c.xor_(var1, var1);
706 //! c.cmp(var0, var1);
707 //! // State:
708 //! //   var0 - register.
709 //! //   var1 - register.
710 //!
711 //! // We manually spill these variables.
712 //! c.spill(var0);
713 //! c.spill(var1);
714 //! // State:
715 //! //   var0 - memory.
716 //! //   var1 - memory.
717 //!
718 //! // Conditional jump to L0. It will be always taken, but compiler thinks that
719 //! // it is unlikely taken so it will embed state change code somewhere.
720 //! c.je(L0);
721 //!
722 //! // Do something. The variables var0 and var1 will be allocated again.
723 //! c.add(var0, 1);
724 //! c.add(var1, 2);
725 //! // State:
726 //! //   var0 - register.
727 //! //   var1 - register.
728 //!
729 //! // Bind label here, the state is not changed.
730 //! c.bind(L0);
731 //! // State:
732 //! //   var0 - register.
733 //! //   var1 - register.
734 //!
735 //! // We need to use var0 and var1, because if compiler detects that variables
736 //! // are out of scope then it optimizes the state-change.
737 //! c.sub(var0, var1);
738 //! // State:
739 //! //   var0 - register.
740 //! //   var1 - register.
741 //!
742 //! c.endFunc();
743 //! @endcode
744 //!
745 //! The output:
746 //!
747 //! @verbatim
748 //! xor eax, eax                    ; xor var_0, var_0
749 //! xor ecx, ecx                    ; xor var_1, var_1
750 //! cmp eax, ecx                    ; cmp var_0, var_1
751 //! mov [esp - 24], eax             ; spill var_0
752 //! mov [esp - 28], ecx             ; spill var_1
753 //! je L0_Switch
754 //! mov eax, [esp - 24]             ; alloc var_0
755 //! add eax, 1                      ; add var_0, 1
756 //! mov ecx, [esp - 28]             ; alloc var_1
757 //! add ecx, 2                      ; add var_1, 2
758 //! L0:
759 //! sub eax, ecx                    ; sub var_0, var_1
760 //! ret
761 //!
762 //! ; state-switch begin
763 //! L0_Switch0:
764 //! mov eax, [esp - 24]             ; alloc var_0
765 //! mov ecx, [esp - 28]             ; alloc var_1
766 //! jmp short L0
767 //! ; state-switch end
768 //! @endverbatim
769 //!
770 //! You can see that the state-switch section was generated (see L0_Switch0).
771 //! The compiler is unable to restore state immediately when emitting the
772 //! forward jump (the code is generated from first to last instruction and
773 //! the target state is simply not known at this time).
774 //!
775 //! To tell @c Compiler that you want to embed state-switch code before jump
776 //! it's needed to create backward jump (where also processor expects that it
777 //! will be taken). To demonstrate the possibility to embed state-switch before
778 //! jump we use slightly modified code:
779 //!
780 //! @code
781 //! X86Compiler c;
782 //!
783 //! c.newFunc(kX86FuncConvDefault, FuncBuilder0<Void>());
784 //! c.getFunc()->setHint(kFuncHintNaked, true);
785 //!
786 //! // Labels.
787 //! Label L0 = c.newLabel();
788 //!
789 //! // Variables.
790 //! GpVar var0 = c.newGpVar();
791 //! GpVar var1 = c.newGpVar();
792 //!
793 //! // Cleanup. After these two lines, the var0 and var1 will be always stored
794 //! // in registers. Our example is very small, but in larger code the var0 can
795 //! // be spilled by xor(var1, var1).
796 //! c.xor_(var0, var0);
797 //! c.xor_(var1, var1);
798 //! // State:
799 //! //   var0 - register.
800 //! //   var1 - register.
801 //!
802 //! // We manually spill these variables.
803 //! c.spill(var0);
804 //! c.spill(var1);
805 //! // State:
806 //! //   var0 - memory.
807 //! //   var1 - memory.
808 //!
809 //! // Bind our label here.
810 //! c.bind(L0);
811 //!
812 //! // Do something, the variables will be allocated again.
813 //! c.add(var0, 1);
814 //! c.add(var1, 2);
815 //! // State:
816 //! //   var0 - register.
817 //! //   var1 - register.
818 //!
819 //! // Backward conditional jump to L0. The default behavior is that it is taken
820 //! // so state-change code will be embedded here.
821 //! c.je(L0);
822 //!
823 //! c.endFunc();
824 //! @endcode
825 //!
826 //! The output:
827 //!
828 //! @verbatim
829 //! xor ecx, ecx                    ; xor var_0, var_0
830 //! xor edx, edx                    ; xor var_1, var_1
831 //! mov [esp - 24], ecx             ; spill var_0
832 //! mov [esp - 28], edx             ; spill var_1
833 //! L.2:
834 //! mov ecx, [esp - 24]             ; alloc var_0
835 //! add ecx, 1                      ; add var_0, 1
836 //! mov edx, [esp - 28]             ; alloc var_1
837 //! add edx, 2                      ; add var_1, 2
838 //!
839 //! ; state-switch begin
840 //! mov [esp - 24], ecx             ; spill var_0
841 //! mov [esp - 28], edx             ; spill var_1
842 //! ; state-switch end
843 //!
844 //! je short L.2
845 //! ret
846 //! @endverbatim
847 //!
848 //! Please notice where the state-switch section is located. The @c Compiler
849 //! decided that jump is likely to be taken so the state change is embedded
850 //! before the conditional jump. To change this behavior into the previous
851 //! case it's needed to add a hint (@c kCondHintLikely or @c kCondHintUnlikely).
852 //!
853 //! Replacing the <code>c.je(L0)</code> by <code>c.je(L0, kCondHintUnlikely)
854 //! will generate code like this:
855 //!
856 //! @verbatim
857 //! xor ecx, ecx                    ; xor var_0, var_0
858 //! xor edx, edx                    ; xor var_1, var_1
859 //! mov [esp - 24], ecx             ; spill var_0
860 //! mov [esp - 28], edx             ; spill var_1
861 //! L0:
862 //! mov ecx, [esp - 24]             ; alloc var_0
863 //! add ecx, 1                      ; add var_0, a
864 //! mov edx, [esp - 28]             ; alloc var_1
865 //! add edx, 2                      ; add var_1, 2
866 //! je L0_Switch, 2
867 //! ret
868 //!
869 //! ; state-switch begin
870 //! L0_Switch:
871 //! mov [esp - 24], ecx             ; spill var_0
872 //! mov [esp - 28], edx             ; spill var_1
873 //! jmp short L0
874 //! ; state-switch end
875 //! @endverbatim
876 //!
877 //! This section provided information about how state-change works. The
878 //! behavior is deterministic and it can be overridden.
879 //!
880 //! @section AsmJit_Compiler_AdvancedCodeGeneration Advanced Code Generation
881 //!
882 //! This section describes advanced method of code generation available to
883 //! @c Compiler (but also to @c Assembler). When emitting code to instruction
884 //! stream the methods like @c mov(), @c add(), @c sub() can be called directly
885 //! (advantage is static-type control performed also by C++ compiler) or
886 //! indirectly using @c emit() method. The @c emit() method needs only
887 //! instruction code and operands.
888 //!
889 //! Example of code generating by standard type-safe API:
890 //!
891 //! @code
892 //! X86Compiler c;
893 //! GpVar var0 = c.newGpVar();
894 //! GpVar var1 = c.newGpVar();
895 //!
896 //! ...
897 //!
898 //! c.mov(var0, imm(0));
899 //! c.add(var0, var1);
900 //! c.sub(var0, var1);
901 //! @endcode
902 //!
903 //! The code above can be rewritten as:
904 //!
905 //! @code
906 //! X86Compiler c;
907 //! GpVar var0 = c.newGpVar();
908 //! GpVar var1 = c.newGpVar();
909 //!
910 //! ...
911 //!
912 //! c.emit(kX86InstMov, var0, imm(0));
913 //! c.emit(kX86InstAdd, var0, var1);
914 //! c.emit(kX86InstSub, var0, var1);
915 //! @endcode
916 //!
917 //! The advantage of first snippet is very friendly API and type-safe control
918 //! that is controlled by the C++ compiler. The advantage of second snippet is
919 //! availability to replace or generate instruction code in different places.
920 //! See the next example how the @c emit() method can be used to generate
921 //! abstract code.
922 //!
923 //! Use case:
924 //!
925 //! @code
926 //! bool emitArithmetic(Compiler& c, XmmVar& var0, XmmVar& var1, const char* op)
927 //! {
928 //!   uint code = kInstNone;
929 //!
930 //!   if (strcmp(op, "ADD") == 0)
931 //!     code = kX86InstAddSS;
932 //!   else if (strcmp(op, "SUBTRACT") == 0)
933 //!     code = kX86InstSubSS;
934 //!   else if (strcmp(op, "MULTIPLY") == 0)
935 //!     code = kX86InstMulSS;
936 //!   else if (strcmp(op, "DIVIDE") == 0)
937 //!     code = kX86InstDivSS;
938 //!   else
939 //!     // Invalid parameter?
940 //!     return false;
941 //!
942 //!   c.emit(code, var0, var1);
943 //! }
944 //! @endcode
945 //!
946 //! Other use cases are waiting for you! Be sure that instruction you are
947 //! emitting is correct and encodable, because if not, Assembler will set
948 //! error code to @c kErrorUnknownInstruction.
949 //!
950 //! @section AsmJit_Compiler_CompilerDetails Compiler Details
951 //!
952 //! This section is here for people interested in the compiling process. There
953 //! are few steps that must be done for each compiled function (or your code).
954 //!
955 //! When your @c Compiler instance is ready, you can create function and add
956 //! compiler-items using intrinsics or higher level methods implemented by the
957 //! @c AsmJit::Compiler. When you are done (all instructions serialized) you
958 //! should call @c AsmJit::Compiler::make() method which will analyze your code,
959 //! allocate registers and memory for local variables and serialize all items
960 //! to @c AsmJit::Assembler instance. Next steps shows what's done internally
961 //! before code is serialized into @c AsmJit::Assembler
962 //!   (implemented in @c AsmJit::Compiler::serialize() method).
963 //!
964 //! 1. Compiler try to match function and end-function items (these items
965 //!    define function body and blocks).
966 //!
967 //! 2. For all items inside the function-body the virtual functions
968 //!    are called in this order:
969 //!    - CompilerItem::prepare()
970 //!    - CompilerItem::translate()
971 //!    - CompilerItem::emit()
972 //!    - CompilerItem::post()
973 //!
974 //!    There is some extra work when emitting function prolog / epilog and
975 //!    register allocator.
976 //!
977 //! 3. Emit jump tables data.
978 //!
979 //! When everything here ends, @c AsmJit::Assembler contains binary stream
980 //! that needs only relocation to be callable by C/C++ code.
981 //!
982 //! @section AsmJit_Compiler_Differences Summary of Differences between @c Assembler and @c Compiler
983 //!
984 //! - Instructions are not translated to machine code immediately, they are
985 //!   stored as emmitables, see @c AsmJit::CompilerItem.
986 //! - Contains function builder and ability to call other functions.
987 //! - Contains register allocator and variable management.
988 //! - Contains a lot of helper methods to simplify the code generation not
989 //!   available/possible in @c AsmJit::Assembler.
990 //! - Ability to pre-process or post-process the code which is being generated.
991 struct X86Compiler : public Compiler
992 {
993   // Special X86 instructions:
994   // - cpuid,
995   // - cbw, cwd, cwde, cdq, cdqe, cqo
996   // - cmpxchg
997   // - cmpxchg8b, cmpxchg16b,
998   // - daa, das,
999   // - imul, mul, idiv, div,
1000   // - mov_ptr
1001   // - lahf, sahf
1002   // - maskmovq, maskmovdqu
1003   // - enter, leave
1004   // - ret
1005   // - monitor, mwait
1006   // - pop, popad, popfd, popfq,
1007   // - push, pushad, pushfd, pushfq
1008   // - rcl, rcr, rol, ror, sal, sar, shl, shr
1009   // - shld, shrd
1010   // - rdtsc. rdtscp
1011   // - lodsb, lodsd, lodsq, lodsw
1012   // - movsb, movsd, movsq, movsw
1013   // - stosb, stosd, stosq, stosw
1014   // - cmpsb, cmpsd, cmpsq, cmpsw
1015   // - scasb, scasd, scasq, scasw
1016   //
1017   // Special X87 instructions:
1018   // - fisttp
1019 
1020   // --------------------------------------------------------------------------
1021   // [Construction / Destruction]
1022   // --------------------------------------------------------------------------
1023 
1024   //! @brief Create a @ref X86Compiler instance.
1025   ASMJIT_API X86Compiler(Context* context = JitContext::getGlobal());
1026   //! @brief Destroy the @ref X86Compiler instance.
1027   ASMJIT_API ~X86Compiler();
1028 
1029   // --------------------------------------------------------------------------
1030   // [Accessors]
1031   // --------------------------------------------------------------------------
1032 
1033   //! @brief Get current function as @ref X86CompilerFuncDecl.
1034   //!
1035   //! This method can be called within @c newFunc() and @c endFunc()
1036   //! block to get current function you are working with. It's recommended
1037   //! to store @c AsmJit::Function pointer returned by @c newFunc<> method,
1038   //! because this allows you in future implement function sections outside of
1039   //! function itself (yeah, this is possible!).
getFuncX86Compiler1040   inline X86CompilerFuncDecl* getFunc() const
1041   { return reinterpret_cast<X86CompilerFuncDecl*>(_func); }
1042 
1043   // --------------------------------------------------------------------------
1044   // [Function Builder]
1045   // --------------------------------------------------------------------------
1046 
1047   //! @brief Create a new function.
1048   //!
1049   //! @param cconv Calling convention to use (see @c kX86FuncConv enum)
1050   //! @param params Function arguments prototype.
1051   //!
1052   //! This method is usually used as a first step when generating functions
1053   //! by @c Compiler. First parameter @a cconv specifies function calling
1054   //! convention to use. Second parameter @a params specifies function
1055   //! arguments. To create function arguments are used templates
1056   //! @c BuildFunction0<>, @c BuildFunction1<...>, @c BuildFunction2<...>,
1057   //! etc...
1058   //!
1059   //! Templates with BuildFunction prefix are used to generate argument IDs
1060   //! based on real C++ types. See next example how to generate function with
1061   //! two 32-bit integer arguments.
1062   //!
1063   //! @code
1064   //! // Building function using AsmJit::Compiler example.
1065   //!
1066   //! // Compiler instance
1067   //! X86Compiler c;
1068   //!
1069   //! // Begin of function (also emits function @c Prolog)
1070   //! c.newFunc(
1071   //!   // Default calling convention (32-bit cdecl or 64-bit for host OS)
1072   //!   kX86FuncConvDefault,
1073   //!   // Using function builder to generate arguments list
1074   //!   BuildFunction2<int, int>());
1075   //!
1076   //! // End of function (also emits function @c Epilog)
1077   //! c.endFunc();
1078   //! @endcode
1079   //!
1080   //! You can see that building functions is really easy. Previous code snipped
1081   //! will generate code for function with two 32-bit integer arguments. You
1082   //! can access arguments by @c AsmJit::Function::argument() method. Arguments
1083   //! are indexed from 0 (like everything in C).
1084   //!
1085   //! @code
1086   //! // Accessing function arguments through AsmJit::Function example.
1087   //!
1088   //! // Compiler instance
1089   //! X86Compiler c;
1090   //!
1091   //! // Begin of function (also emits function @c Prolog)
1092   //! c.newFunc(
1093   //!   // Default calling convention (32-bit cdecl or 64-bit for host OS)
1094   //!   kX86FuncConvDefault,
1095   //!   // Using function builder to generate arguments list
1096   //!   BuildFunction2<int, int>());
1097   //!
1098   //! // Arguments are like other variables, you need to reference them by
1099   //! // variable operands:
1100   //! GpVar a0 = c.getGpArg(0);
1101   //! GpVar a1 = c.getGpArg(1);
1102   //!
1103   //! // Use them.
1104   //! c.add(a0, a1);
1105   //!
1106   //! // End of function (emits function epilog and return)
1107   //! c.endFunc();
1108   //! @endcode
1109   //!
1110   //! Arguments are like variables. How to manipulate with variables is
1111   //! documented in @c AsmJit::Compiler, variables section.
1112   //!
1113   //! @note To get current function use @c currentFunction() method or save
1114   //! pointer to @c AsmJit::Function returned by @c AsmJit::Compiler::newFunc<>
1115   //! method. Recommended is to save the pointer.
1116   //!
1117   //! @sa @c BuildFunction0, @c BuildFunction1, @c BuildFunction2, ...
newFuncX86Compiler1118   inline X86CompilerFuncDecl* newFunc(uint32_t convention, const FuncPrototype& func)
1119   { return newFunc_(convention, func.getReturnType(), func.getArguments(), func.getArgumentsCount()); }
1120 
1121   //! @brief Create a new function (low level version).
1122   //!
1123   //! @param cconv Function calling convention (see @c AsmJit::kX86FuncConv).
1124   //! @param args Function arguments (see @c AsmJit::kX86VarType).
1125   //! @param count Arguments count.
1126   //!
1127   //! This method is internally called from @c newFunc() method and
1128   //! contains arguments thats used internally by @c AsmJit::Compiler.
1129   //!
1130   //! @note To get current function use @c currentFunction() method.
1131   ASMJIT_API X86CompilerFuncDecl* newFunc_(uint32_t convenion, uint32_t returnType, const uint32_t* arguments, uint32_t argumentsCount);
1132 
1133   //! @brief End of current function scope and all variables.
1134   ASMJIT_API X86CompilerFuncDecl* endFunc();
1135 
1136   // --------------------------------------------------------------------------
1137   // [Emit]
1138   // --------------------------------------------------------------------------
1139 
1140   //! @brief Emit instruction with no operand.
1141   ASMJIT_API void _emitInstruction(uint32_t code);
1142 
1143   //! @brief Emit instruction with one operand.
1144   ASMJIT_API void _emitInstruction(uint32_t code, const Operand* o0);
1145 
1146   //! @brief Emit instruction with two operands.
1147   ASMJIT_API void _emitInstruction(uint32_t code, const Operand* o0, const Operand* o1);
1148 
1149   //! @brief Emit instruction with three operands.
1150   ASMJIT_API void _emitInstruction(uint32_t code, const Operand* o0, const Operand* o1, const Operand* o2);
1151 
1152   //! @brief Emit instruction with four operands (Special instructions).
1153   ASMJIT_API void _emitInstruction(uint32_t code, const Operand* o0, const Operand* o1, const Operand* o2, const Operand* o3);
1154 
1155   //! @brief Emit instruction with five operands (Special instructions).
1156   ASMJIT_API void _emitInstruction(uint32_t code, const Operand* o0, const Operand* o1, const Operand* o2, const Operand* o3, const Operand* o4);
1157 
1158   //! @brief Private method for emitting jcc.
1159   ASMJIT_API void _emitJcc(uint32_t code, const Label* label, uint32_t hint);
1160 
1161   //! @brief Private method for emitting function call.
1162   ASMJIT_API X86CompilerFuncCall* _emitCall(const Operand* o0);
1163 
1164   //! @brief Private method for returning a value from the function.
1165   ASMJIT_API void _emitReturn(const Operand* first, const Operand* second);
1166 
1167   // --------------------------------------------------------------------------
1168   // [Align]
1169   // --------------------------------------------------------------------------
1170 
1171   //! @brief Align target buffer to @a m bytes.
1172   //!
1173   //! Typical usage of this is to align labels at start of the inner loops.
1174   //!
1175   //! Inserts @c nop() instructions or CPU optimized NOPs.
1176   ASMJIT_API void align(uint32_t m);
1177 
1178   // --------------------------------------------------------------------------
1179   // [Label]
1180   // --------------------------------------------------------------------------
1181 
1182   //! @brief Create and return new label.
1183   ASMJIT_API Label newLabel();
1184 
1185   //! @brief Bind label to the current offset.
1186   //!
1187   //! @note Label can be bound only once!
1188   ASMJIT_API void bind(const Label& label);
1189 
1190   // --------------------------------------------------------------------------
1191   // [Variables]
1192   // --------------------------------------------------------------------------
1193 
1194   //! @brief Get compiler variable at @a id.
_getVarX86Compiler1195   inline X86CompilerVar* _getVar(uint32_t id) const
1196   {
1197     ASMJIT_ASSERT(id != kInvalidValue);
1198     return reinterpret_cast<X86CompilerVar*>(_vars[id & kOperandIdValueMask]);
1199   }
1200 
1201   //! @internal
1202   //!
1203   //! @brief Create a new variable data.
1204   ASMJIT_API X86CompilerVar* _newVar(const char* name, uint32_t type, uint32_t size);
1205 
1206   //! @brief Create a new general-purpose variable.
1207   ASMJIT_API GpVar newGpVar(uint32_t varType = kX86VarTypeGpz, const char* name = NULL);
1208   //! @brief Get argument as general-purpose variable.
1209   ASMJIT_API GpVar getGpArg(uint32_t argIndex);
1210 
1211   //! @brief Create a new MM variable.
1212   ASMJIT_API MmVar newMmVar(uint32_t varType = kX86VarTypeMm, const char* name = NULL);
1213   //! @brief Get argument as MM variable.
1214   ASMJIT_API MmVar getMmArg(uint32_t argIndex);
1215 
1216   //! @brief Create a new XMM variable.
1217   ASMJIT_API XmmVar newXmmVar(uint32_t varType = kX86VarTypeXmm, const char* name = NULL);
1218   //! @brief Get argument as XMM variable.
1219   ASMJIT_API XmmVar getXmmArg(uint32_t argIndex);
1220 
1221   //! @internal
1222   //!
1223   //! @brief Serialize variable hint.
1224   ASMJIT_API void _vhint(Var& var, uint32_t hintId, uint32_t hintValue);
1225 
1226   //! @brief Alloc variable @a var.
1227   ASMJIT_API void alloc(Var& var);
1228   //! @brief Alloc variable @a var using @a regIndex as a register index.
1229   ASMJIT_API void alloc(Var& var, uint32_t regIndex);
1230   //! @brief Alloc variable @a var using @a reg as a demanded register.
1231   ASMJIT_API void alloc(Var& var, const Reg& reg);
1232   //! @brief Spill variable @a var.
1233   ASMJIT_API void spill(Var& var);
1234   //! @brief Save variable @a var if modified.
1235   ASMJIT_API void save(Var& var);
1236   //! @brief Unuse variable @a var.
1237   ASMJIT_API void unuse(Var& var);
1238 
1239   //! @brief Get memory home of variable @a var.
1240   ASMJIT_API void getMemoryHome(Var& var, GpVar* home, int* displacement = NULL);
1241 
1242   //! @brief Set memory home of variable @a var.
1243   //!
1244   //! Default memory home location is on stack (ESP/RSP), but when needed the
1245   //! bebahior can be changed by this method.
1246   //!
1247   //! It is an error to chaining memory home locations. For example the given
1248   //! code is invalid:
1249   //!
1250   //! @code
1251   //! X86Compiler c;
1252   //!
1253   //! ...
1254   //! GpVar v0 = c.newGpVar();
1255   //! GpVar v1 = c.newGpVar();
1256   //! GpVar v2 = c.newGpVar();
1257   //! GpVar v3 = c.newGpVar();
1258   //!
1259   //! c.setMemoryHome(v1, v0, 0); // Allowed, [v0] is memory home for v1.
1260   //! c.setMemoryHome(v2, v0, 4); // Allowed, [v0+4] is memory home for v2.
1261   //! c.setMemoryHome(v3, v2);    // CHAINING, NOT ALLOWED!
1262   //! @endcode
1263   ASMJIT_API void setMemoryHome(Var& var, const GpVar& home, int displacement = 0);
1264 
1265   //! @brief Get priority of variable @a var.
1266   ASMJIT_API uint32_t getPriority(Var& var) const;
1267   //! @brief Set priority of variable @a var to @a priority.
1268   ASMJIT_API void setPriority(Var& var, uint32_t priority);
1269 
1270   //! @brief Get save-on-unuse @a var property.
1271   ASMJIT_API bool getSaveOnUnuse(Var& var) const;
1272   //! @brief Set save-on-unuse @a var property to @a value.
1273   ASMJIT_API void setSaveOnUnuse(Var& var, bool value);
1274 
1275   //! @brief Rename variable @a var to @a name.
1276   //!
1277   //! @note Only new name will appear in the logger.
1278   ASMJIT_API void rename(Var& var, const char* name);
1279 
1280   // --------------------------------------------------------------------------
1281   // [State]
1282   // --------------------------------------------------------------------------
1283 
1284   //! @internal
1285   //!
1286   //! @brief Create a new @ref X86CompilerState.
1287   ASMJIT_API X86CompilerState* _newState(uint32_t memVarsCount);
1288 
1289   // --------------------------------------------------------------------------
1290   // [Make]
1291   // --------------------------------------------------------------------------
1292 
1293   //! @brief Make is convenience method to make currently serialized code and
1294   //! return pointer to generated function.
1295   //!
1296   //! What you need is only to cast this pointer to your function type and call
1297   //! it. Note that if there was an error and calling @c getError() method doesn't
1298   //! return @c kErrorOk (zero) then this function always returns @c NULL and
1299   //! error value remains the same.
1300   ASMJIT_API virtual void* make();
1301 
1302   //! @brief Method that will emit everything to @c Assembler instance @a a.
1303   ASMJIT_API virtual void serialize(Assembler& a);
1304 
1305   // --------------------------------------------------------------------------
1306   // [Data]
1307   // --------------------------------------------------------------------------
1308 
1309   //! @brief Get target from label @a id.
_getTargetX86Compiler1310   inline X86CompilerTarget* _getTarget(uint32_t id)
1311   {
1312     ASMJIT_ASSERT((id & kOperandIdTypeMask) == kOperandIdTypeLabel);
1313     return reinterpret_cast<X86CompilerTarget*>(_targets[id & kOperandIdValueMask]);
1314   }
1315 
1316   // --------------------------------------------------------------------------
1317   // [Embed]
1318   // --------------------------------------------------------------------------
1319 
1320   //! @brief Add 8-bit integer data to the instuction stream.
dbX86Compiler1321   inline void db(uint8_t  x) { embed(&x, 1); }
1322   //! @brief Add 16-bit integer data to the instuction stream.
dwX86Compiler1323   inline void dw(uint16_t x) { embed(&x, 2); }
1324   //! @brief Add 32-bit integer data to the instuction stream.
ddX86Compiler1325   inline void dd(uint32_t x) { embed(&x, 4); }
1326   //! @brief Add 64-bit integer data to the instuction stream.
dqX86Compiler1327   inline void dq(uint64_t x) { embed(&x, 8); }
1328 
1329   //! @brief Add 8-bit integer data to the instuction stream.
dint8X86Compiler1330   inline void dint8(int8_t x) { embed(&x, sizeof(int8_t)); }
1331   //! @brief Add 8-bit integer data to the instuction stream.
duint8X86Compiler1332   inline void duint8(uint8_t x) { embed(&x, sizeof(uint8_t)); }
1333 
1334   //! @brief Add 16-bit integer data to the instuction stream.
dint16X86Compiler1335   inline void dint16(int16_t x) { embed(&x, sizeof(int16_t)); }
1336   //! @brief Add 16-bit integer data to the instuction stream.
duint16X86Compiler1337   inline void duint16(uint16_t x) { embed(&x, sizeof(uint16_t)); }
1338 
1339   //! @brief Add 32-bit integer data to the instuction stream.
dint32X86Compiler1340   inline void dint32(int32_t x) { embed(&x, sizeof(int32_t)); }
1341   //! @brief Add 32-bit integer data to the instuction stream.
duint32X86Compiler1342   inline void duint32(uint32_t x) { embed(&x, sizeof(uint32_t)); }
1343 
1344   //! @brief Add 64-bit integer data to the instuction stream.
dint64X86Compiler1345   inline void dint64(int64_t x) { embed(&x, sizeof(int64_t)); }
1346   //! @brief Add 64-bit integer data to the instuction stream.
duint64X86Compiler1347   inline void duint64(uint64_t x) { embed(&x, sizeof(uint64_t)); }
1348 
1349   //! @brief Add system-integer data to the instuction stream.
dintptrX86Compiler1350   inline void dintptr(intptr_t x) { embed(&x, sizeof(intptr_t)); }
1351   //! @brief Add system-integer data to the instuction stream.
duintptrX86Compiler1352   inline void duintptr(uintptr_t x) { embed(&x, sizeof(uintptr_t)); }
1353 
1354   //! @brief Add float data to the instuction stream.
dfloatX86Compiler1355   inline void dfloat(float x) { embed(&x, sizeof(float)); }
1356   //! @brief Add double data to the instuction stream.
ddoubleX86Compiler1357   inline void ddouble(double x) { embed(&x, sizeof(double)); }
1358 
1359   //! @brief Add pointer data to the instuction stream.
dptrX86Compiler1360   inline void dptr(void* x) { embed(&x, sizeof(void*)); }
1361 
1362   //! @brief Add MM data to the instuction stream.
dmmX86Compiler1363   inline void dmm(const MmData& x) { embed(&x, sizeof(MmData)); }
1364   //! @brief Add XMM data to the instuction stream.
dxmmX86Compiler1365   inline void dxmm(const XmmData& x) { embed(&x, sizeof(XmmData)); }
1366 
1367   //! @brief Add data to the instuction stream.
dataX86Compiler1368   inline void data(const void* data, size_t size) { embed(data, size); }
1369 
1370   //! @brief Add data in a given structure instance to the instuction stream.
1371   template<typename T>
dstructX86Compiler1372   inline void dstruct(const T& x) { embed(&x, sizeof(T)); }
1373 
1374   // --------------------------------------------------------------------------
1375   // [Custom Instructions]
1376   // --------------------------------------------------------------------------
1377 
1378   // These emitters are used by custom compiler code (register alloc / spill,
1379   // prolog / epilog generator, ...).
1380 
emitX86Compiler1381   inline void emit(uint32_t code)
1382   { _emitInstruction(code); }
1383 
emitX86Compiler1384   inline void emit(uint32_t code, const Operand& o0)
1385   { _emitInstruction(code, &o0); }
1386 
emitX86Compiler1387   inline void emit(uint32_t code, const Operand& o0, const Operand& o1)
1388   { _emitInstruction(code, &o0, &o1); }
1389 
emitX86Compiler1390   inline void emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2)
1391   { _emitInstruction(code, &o0, &o1, &o2); }
1392 
1393   // --------------------------------------------------------------------------
1394   // [X86 Instructions]
1395   // --------------------------------------------------------------------------
1396 
1397   //! @brief Add with Carry.
adcX86Compiler1398   inline void adc(const GpVar& dst, const GpVar& src)
1399   { _emitInstruction(kX86InstAdc, &dst, &src); }
1400 
1401   //! @brief Add with Carry.
adcX86Compiler1402   inline void adc(const GpVar& dst, const Mem& src)
1403   { _emitInstruction(kX86InstAdc, &dst, &src); }
1404 
1405   //! @brief Add with Carry.
adcX86Compiler1406   inline void adc(const GpVar& dst, const Imm& src)
1407   { _emitInstruction(kX86InstAdc, &dst, &src); }
1408 
1409   //! @brief Add with Carry.
adcX86Compiler1410   inline void adc(const Mem& dst, const GpVar& src)
1411   { _emitInstruction(kX86InstAdc, &dst, &src); }
1412 
1413   //! @brief Add with Carry.
adcX86Compiler1414   inline void adc(const Mem& dst, const Imm& src)
1415   { _emitInstruction(kX86InstAdc, &dst, &src); }
1416 
1417   //! @brief Add.
addX86Compiler1418   inline void add(const GpVar& dst, const GpVar& src)
1419   { _emitInstruction(kX86InstAdd, &dst, &src); }
1420 
1421   //! @brief Add.
addX86Compiler1422   inline void add(const GpVar& dst, const Mem& src)
1423   { _emitInstruction(kX86InstAdd, &dst, &src); }
1424 
1425   //! @brief Add.
addX86Compiler1426   inline void add(const GpVar& dst, const Imm& src)
1427   { _emitInstruction(kX86InstAdd, &dst, &src); }
1428 
1429   //! @brief Add.
addX86Compiler1430   inline void add(const Mem& dst, const GpVar& src)
1431   { _emitInstruction(kX86InstAdd, &dst, &src); }
1432 
1433   //! @brief Add.
addX86Compiler1434   inline void add(const Mem& dst, const Imm& src)
1435   { _emitInstruction(kX86InstAdd, &dst, &src); }
1436 
1437   //! @brief Logical And.
and_X86Compiler1438   inline void and_(const GpVar& dst, const GpVar& src)
1439   { _emitInstruction(kX86InstAnd, &dst, &src); }
1440 
1441   //! @brief Logical And.
and_X86Compiler1442   inline void and_(const GpVar& dst, const Mem& src)
1443   { _emitInstruction(kX86InstAnd, &dst, &src); }
1444 
1445   //! @brief Logical And.
and_X86Compiler1446   inline void and_(const GpVar& dst, const Imm& src)
1447   { _emitInstruction(kX86InstAnd, &dst, &src); }
1448 
1449   //! @brief Logical And.
and_X86Compiler1450   inline void and_(const Mem& dst, const GpVar& src)
1451   { _emitInstruction(kX86InstAnd, &dst, &src); }
1452 
1453   //! @brief Logical And.
and_X86Compiler1454   inline void and_(const Mem& dst, const Imm& src)
1455   { _emitInstruction(kX86InstAnd, &dst, &src); }
1456 
1457   //! @brief Bit Scan Forward.
bsfX86Compiler1458   inline void bsf(const GpVar& dst, const GpVar& src)
1459   {
1460     ASMJIT_ASSERT(!dst.isGpb());
1461     _emitInstruction(kX86InstBsf, &dst, &src);
1462   }
1463 
1464   //! @brief Bit Scan Forward.
bsfX86Compiler1465   inline void bsf(const GpVar& dst, const Mem& src)
1466   {
1467     ASMJIT_ASSERT(!dst.isGpb());
1468     _emitInstruction(kX86InstBsf, &dst, &src);
1469   }
1470 
1471   //! @brief Bit Scan Reverse.
bsrX86Compiler1472   inline void bsr(const GpVar& dst, const GpVar& src)
1473   {
1474     ASMJIT_ASSERT(!dst.isGpb());
1475     _emitInstruction(kX86InstBsr, &dst, &src);
1476   }
1477 
1478   //! @brief Bit Scan Reverse.
bsrX86Compiler1479   inline void bsr(const GpVar& dst, const Mem& src)
1480   {
1481     ASMJIT_ASSERT(!dst.isGpb());
1482     _emitInstruction(kX86InstBsr, &dst, &src);
1483   }
1484 
1485   //! @brief Byte swap (32-bit or 64-bit registers only) (i486).
bswapX86Compiler1486   inline void bswap(const GpVar& dst)
1487   {
1488     // ASMJIT_ASSERT(dst.getRegType() == kX86RegGPD || dst.getRegType() == kX86RegGPQ);
1489     _emitInstruction(kX86InstBSwap, &dst);
1490   }
1491 
1492   //! @brief Bit test.
btX86Compiler1493   inline void bt(const GpVar& dst, const GpVar& src)
1494   { _emitInstruction(kX86InstBt, &dst, &src); }
1495 
1496   //! @brief Bit test.
btX86Compiler1497   inline void bt(const GpVar& dst, const Imm& src)
1498   { _emitInstruction(kX86InstBt, &dst, &src); }
1499 
1500   //! @brief Bit test.
btX86Compiler1501   inline void bt(const Mem& dst, const GpVar& src)
1502   { _emitInstruction(kX86InstBt, &dst, &src); }
1503 
1504   //! @brief Bit test.
btX86Compiler1505   inline void bt(const Mem& dst, const Imm& src)
1506   { _emitInstruction(kX86InstBt, &dst, &src); }
1507 
1508   //! @brief Bit test and complement.
btcX86Compiler1509   inline void btc(const GpVar& dst, const GpVar& src)
1510   { _emitInstruction(kX86InstBtc, &dst, &src); }
1511 
1512   //! @brief Bit test and complement.
btcX86Compiler1513   inline void btc(const GpVar& dst, const Imm& src)
1514   { _emitInstruction(kX86InstBtc, &dst, &src); }
1515 
1516   //! @brief Bit test and complement.
btcX86Compiler1517   inline void btc(const Mem& dst, const GpVar& src)
1518   { _emitInstruction(kX86InstBtc, &dst, &src); }
1519 
1520   //! @brief Bit test and complement.
btcX86Compiler1521   inline void btc(const Mem& dst, const Imm& src)
1522   { _emitInstruction(kX86InstBtc, &dst, &src); }
1523 
1524   //! @brief Bit test and reset.
btrX86Compiler1525   inline void btr(const GpVar& dst, const GpVar& src)
1526   { _emitInstruction(kX86InstBtr, &dst, &src); }
1527 
1528   //! @brief Bit test and reset.
btrX86Compiler1529   inline void btr(const GpVar& dst, const Imm& src)
1530   { _emitInstruction(kX86InstBtr, &dst, &src); }
1531 
1532   //! @brief Bit test and reset.
btrX86Compiler1533   inline void btr(const Mem& dst, const GpVar& src)
1534   { _emitInstruction(kX86InstBtr, &dst, &src); }
1535 
1536   //! @brief Bit test and reset.
btrX86Compiler1537   inline void btr(const Mem& dst, const Imm& src)
1538   { _emitInstruction(kX86InstBtr, &dst, &src); }
1539 
1540   //! @brief Bit test and set.
btsX86Compiler1541   inline void bts(const GpVar& dst, const GpVar& src)
1542   { _emitInstruction(kX86InstBts, &dst, &src); }
1543 
1544   //! @brief Bit test and set.
btsX86Compiler1545   inline void bts(const GpVar& dst, const Imm& src)
1546   { _emitInstruction(kX86InstBts, &dst, &src); }
1547 
1548   //! @brief Bit test and set.
btsX86Compiler1549   inline void bts(const Mem& dst, const GpVar& src)
1550   { _emitInstruction(kX86InstBts, &dst, &src); }
1551 
1552   //! @brief Bit test and set.
btsX86Compiler1553   inline void bts(const Mem& dst, const Imm& src)
1554   { _emitInstruction(kX86InstBts, &dst, &src); }
1555 
1556   //! @brief Call Procedure.
callX86Compiler1557   inline X86CompilerFuncCall* call(const GpVar& dst)
1558   { return _emitCall(&dst); }
1559 
1560   //! @brief Call Procedure.
callX86Compiler1561   inline X86CompilerFuncCall* call(const Mem& dst)
1562   { return _emitCall(&dst); }
1563 
1564   //! @brief Call Procedure.
callX86Compiler1565   inline X86CompilerFuncCall* call(const Imm& dst)
1566   { return _emitCall(&dst); }
1567 
1568   //! @brief Call Procedure.
1569   //! @overload
callX86Compiler1570   inline X86CompilerFuncCall* call(void* dst)
1571   {
1572     Imm imm((sysint_t)dst);
1573     return _emitCall(&imm);
1574   }
1575 
1576   //! @brief Call Procedure.
callX86Compiler1577   inline X86CompilerFuncCall* call(const Label& label)
1578   { return _emitCall(&label); }
1579 
1580   //! @brief Convert Byte to Word (Sign Extend).
cbwX86Compiler1581   inline void cbw(const GpVar& dst)
1582   { _emitInstruction(kX86InstCbw, &dst); }
1583 
1584   //! @brief Convert Word to DWord (Sign Extend).
cwdX86Compiler1585   inline void cwd(const GpVar& dst, const GpVar& src)
1586   { _emitInstruction(kX86InstCwd, &dst, &src); }
1587 
1588   //! @brief Convert Word to DWord (Sign Extend).
cwdeX86Compiler1589   inline void cwde(const GpVar& dst)
1590   { _emitInstruction(kX86InstCwde, &dst); }
1591 
1592   //! @brief Convert Word to DWord (Sign Extend).
cdqX86Compiler1593   inline void cdq(const GpVar& dst, const GpVar& src)
1594   { _emitInstruction(kX86InstCdq, &dst, &src); }
1595 
1596 #if defined(ASMJIT_X64)
1597   //! @brief Convert DWord to QWord (Sign Extend).
cdqeX86Compiler1598   inline void cdqe(const GpVar& dst)
1599   { _emitInstruction(kX86InstCdqe, &dst); }
1600 #endif // ASMJIT_X64
1601 
1602 #if defined(ASMJIT_X64)
1603   //! @brief Convert QWord to DQWord (Sign Extend).
cqoX86Compiler1604   inline void cqo(const GpVar& dst, const GpVar& src)
1605   { _emitInstruction(kX86InstCqo, &dst, &src); }
1606 #endif // ASMJIT_X64
1607 
1608   //! @brief Clear Carry flag
1609   //!
1610   //! This instruction clears the CF flag in the EFLAGS register.
clcX86Compiler1611   inline void clc()
1612   { _emitInstruction(kX86InstClc); }
1613 
1614   //! @brief Clear Direction flag
1615   //!
1616   //! This instruction clears the DF flag in the EFLAGS register.
cldX86Compiler1617   inline void cld()
1618   { _emitInstruction(kX86InstCld); }
1619 
1620   //! @brief Complement Carry Flag.
1621   //!
1622   //! This instruction complements the CF flag in the EFLAGS register.
1623   //! (CF = NOT CF)
cmcX86Compiler1624   inline void cmc()
1625   { _emitInstruction(kX86InstCmc); }
1626 
1627   //! @brief Conditional Move.
cmovX86Compiler1628   inline void cmov(kX86Cond cc, const GpVar& dst, const GpVar& src)
1629   { _emitInstruction(X86Util::getCMovccInstFromCond(cc), &dst, &src); }
1630 
1631   //! @brief Conditional Move.
cmovX86Compiler1632   inline void cmov(kX86Cond cc, const GpVar& dst, const Mem& src)
1633   { _emitInstruction(X86Util::getCMovccInstFromCond(cc), &dst, &src); }
1634 
1635   //! @brief Conditional Move.
cmovaX86Compiler1636   inline void cmova  (const GpVar& dst, const GpVar& src) { _emitInstruction(kX86InstCMovA  , &dst, &src); }
1637   //! @brief Conditional Move.
cmovaX86Compiler1638   inline void cmova  (const GpVar& dst, const Mem& src)   { _emitInstruction(kX86InstCMovA  , &dst, &src); }
1639   //! @brief Conditional Move.
cmovaeX86Compiler1640   inline void cmovae (const GpVar& dst, const GpVar& src) { _emitInstruction(kX86InstCMovAE , &dst, &src); }
1641   //! @brief Conditional Move.
cmovaeX86Compiler1642   inline void cmovae (const GpVar& dst, const Mem& src)   { _emitInstruction(kX86InstCMovAE , &dst, &src); }
1643   //! @brief Conditional Move.
cmovbX86Compiler1644   inline void cmovb  (const GpVar& dst, const GpVar& src) { _emitInstruction(kX86InstCMovB  , &dst, &src); }
1645   //! @brief Conditional Move.
cmovbX86Compiler1646   inline void cmovb  (const GpVar& dst, const Mem& src)   { _emitInstruction(kX86InstCMovB  , &dst, &src); }
1647   //! @brief Conditional Move.
cmovbeX86Compiler1648   inline void cmovbe (const GpVar& dst, const GpVar& src) { _emitInstruction(kX86InstCMovBE , &dst, &src); }
1649   //! @brief Conditional Move.
cmovbeX86Compiler1650   inline void cmovbe (const GpVar& dst, const Mem& src)   { _emitInstruction(kX86InstCMovBE , &dst, &src); }
1651   //! @brief Conditional Move.
cmovcX86Compiler1652   inline void cmovc  (const GpVar& dst, const GpVar& src) { _emitInstruction(kX86InstCMovC  , &dst, &src); }
1653   //! @brief Conditional Move.
cmovcX86Compiler1654   inline void cmovc  (const GpVar& dst, const Mem& src)   { _emitInstruction(kX86InstCMovC  , &dst, &src); }
1655   //! @brief Conditional Move.
cmoveX86Compiler1656   inline void cmove  (const GpVar& dst, const GpVar& src) { _emitInstruction(kX86InstCMovE  , &dst, &src); }
1657   //! @brief Conditional Move.
cmoveX86Compiler1658   inline void cmove  (const GpVar& dst, const Mem& src)   { _emitInstruction(kX86InstCMovE  , &dst, &src); }
1659   //! @brief Conditional Move.
cmovgX86Compiler1660   inline void cmovg  (const GpVar& dst, const GpVar& src) { _emitInstruction(kX86InstCMovG  , &dst, &src); }
1661   //! @brief Conditional Move.
cmovgX86Compiler1662   inline void cmovg  (const GpVar& dst, const Mem& src)   { _emitInstruction(kX86InstCMovG  , &dst, &src); }
1663   //! @brief Conditional Move.
cmovgeX86Compiler1664   inline void cmovge (const GpVar& dst, const GpVar& src) { _emitInstruction(kX86InstCMovGE , &dst, &src); }
1665   //! @brief Conditional Move.
cmovgeX86Compiler1666   inline void cmovge (const GpVar& dst, const Mem& src)   { _emitInstruction(kX86InstCMovGE , &dst, &src); }
1667   //! @brief Conditional Move.
cmovlX86Compiler1668   inline void cmovl  (const GpVar& dst, const GpVar& src) { _emitInstruction(kX86InstCMovL  , &dst, &src); }
1669   //! @brief Conditional Move.
cmovlX86Compiler1670   inline void cmovl  (const GpVar& dst, const Mem& src)   { _emitInstruction(kX86InstCMovL  , &dst, &src); }
1671   //! @brief Conditional Move.
cmovleX86Compiler1672   inline void cmovle (const GpVar& dst, const GpVar& src) { _emitInstruction(kX86InstCMovLE , &dst, &src); }
1673   //! @brief Conditional Move.
cmovleX86Compiler1674   inline void cmovle (const GpVar& dst, const Mem& src)   { _emitInstruction(kX86InstCMovLE , &dst, &src); }
1675   //! @brief Conditional Move.
cmovnaX86Compiler1676   inline void cmovna (const GpVar& dst, const GpVar& src) { _emitInstruction(kX86InstCMovNA , &dst, &src); }
1677   //! @brief Conditional Move.
cmovnaX86Compiler1678   inline void cmovna (const GpVar& dst, const Mem& src)   { _emitInstruction(kX86InstCMovNA , &dst, &src); }
1679   //! @brief Conditional Move.
cmovnaeX86Compiler1680   inline void cmovnae(const GpVar& dst, const GpVar& src) { _emitInstruction(kX86InstCMovNAE, &dst, &src); }
1681   //! @brief Conditional Move.
cmovnaeX86Compiler1682   inline void cmovnae(const GpVar& dst, const Mem& src)   { _emitInstruction(kX86InstCMovNAE, &dst, &src); }
1683   //! @brief Conditional Move.
cmovnbX86Compiler1684   inline void cmovnb (const GpVar& dst, const GpVar& src) { _emitInstruction(kX86InstCMovNB , &dst, &src); }
1685   //! @brief Conditional Move.
cmovnbX86Compiler1686   inline void cmovnb (const GpVar& dst, const Mem& src)   { _emitInstruction(kX86InstCMovNB , &dst, &src); }
1687   //! @brief Conditional Move.
cmovnbeX86Compiler1688   inline void cmovnbe(const GpVar& dst, const GpVar& src) { _emitInstruction(kX86InstCMovNBE, &dst, &src); }
1689   //! @brief Conditional Move.
cmovnbeX86Compiler1690   inline void cmovnbe(const GpVar& dst, const Mem& src)   { _emitInstruction(kX86InstCMovNBE, &dst, &src); }
1691   //! @brief Conditional Move.
cmovncX86Compiler1692   inline void cmovnc (const GpVar& dst, const GpVar& src) { _emitInstruction(kX86InstCMovNC , &dst, &src); }
1693   //! @brief Conditional Move.
cmovncX86Compiler1694   inline void cmovnc (const GpVar& dst, const Mem& src)   { _emitInstruction(kX86InstCMovNC , &dst, &src); }
1695   //! @brief Conditional Move.
cmovneX86Compiler1696   inline void cmovne (const GpVar& dst, const GpVar& src) { _emitInstruction(kX86InstCMovNE , &dst, &src); }
1697   //! @brief Conditional Move.
cmovneX86Compiler1698   inline void cmovne (const GpVar& dst, const Mem& src)   { _emitInstruction(kX86InstCMovNE , &dst, &src); }
1699   //! @brief Conditional Move.
cmovngX86Compiler1700   inline void cmovng (const GpVar& dst, const GpVar& src) { _emitInstruction(kX86InstCMovNG , &dst, &src); }
1701   //! @brief Conditional Move.
cmovngX86Compiler1702   inline void cmovng (const GpVar& dst, const Mem& src)   { _emitInstruction(kX86InstCMovNG , &dst, &src); }
1703   //! @brief Conditional Move.
cmovngeX86Compiler1704   inline void cmovnge(const GpVar& dst, const GpVar& src) { _emitInstruction(kX86InstCMovNGE, &dst, &src); }
1705   //! @brief Conditional Move.
cmovngeX86Compiler1706   inline void cmovnge(const GpVar& dst, const Mem& src)   { _emitInstruction(kX86InstCMovNGE, &dst, &src); }
1707   //! @brief Conditional Move.
cmovnlX86Compiler1708   inline void cmovnl (const GpVar& dst, const GpVar& src) { _emitInstruction(kX86InstCMovNL , &dst, &src); }
1709   //! @brief Conditional Move.
cmovnlX86Compiler1710   inline void cmovnl (const GpVar& dst, const Mem& src)   { _emitInstruction(kX86InstCMovNL , &dst, &src); }
1711   //! @brief Conditional Move.
cmovnleX86Compiler1712   inline void cmovnle(const GpVar& dst, const GpVar& src) { _emitInstruction(kX86InstCMovNLE, &dst, &src); }
1713   //! @brief Conditional Move.
cmovnleX86Compiler1714   inline void cmovnle(const GpVar& dst, const Mem& src)   { _emitInstruction(kX86InstCMovNLE, &dst, &src); }
1715   //! @brief Conditional Move.
cmovnoX86Compiler1716   inline void cmovno (const GpVar& dst, const GpVar& src) { _emitInstruction(kX86InstCMovNO , &dst, &src); }
1717   //! @brief Conditional Move.
cmovnoX86Compiler1718   inline void cmovno (const GpVar& dst, const Mem& src)   { _emitInstruction(kX86InstCMovNO , &dst, &src); }
1719   //! @brief Conditional Move.
cmovnpX86Compiler1720   inline void cmovnp (const GpVar& dst, const GpVar& src) { _emitInstruction(kX86InstCMovNP , &dst, &src); }
1721   //! @brief Conditional Move.
cmovnpX86Compiler1722   inline void cmovnp (const GpVar& dst, const Mem& src)   { _emitInstruction(kX86InstCMovNP , &dst, &src); }
1723   //! @brief Conditional Move.
cmovnsX86Compiler1724   inline void cmovns (const GpVar& dst, const GpVar& src) { _emitInstruction(kX86InstCMovNS , &dst, &src); }
1725   //! @brief Conditional Move.
cmovnsX86Compiler1726   inline void cmovns (const GpVar& dst, const Mem& src)   { _emitInstruction(kX86InstCMovNS , &dst, &src); }
1727   //! @brief Conditional Move.
cmovnzX86Compiler1728   inline void cmovnz (const GpVar& dst, const GpVar& src) { _emitInstruction(kX86InstCMovNZ , &dst, &src); }
1729   //! @brief Conditional Move.
cmovnzX86Compiler1730   inline void cmovnz (const GpVar& dst, const Mem& src)   { _emitInstruction(kX86InstCMovNZ , &dst, &src); }
1731   //! @brief Conditional Move.
cmovoX86Compiler1732   inline void cmovo  (const GpVar& dst, const GpVar& src) { _emitInstruction(kX86InstCMovO  , &dst, &src); }
1733   //! @brief Conditional Move.
cmovoX86Compiler1734   inline void cmovo  (const GpVar& dst, const Mem& src)   { _emitInstruction(kX86InstCMovO  , &dst, &src); }
1735   //! @brief Conditional Move.
cmovpX86Compiler1736   inline void cmovp  (const GpVar& dst, const GpVar& src) { _emitInstruction(kX86InstCMovP  , &dst, &src); }
1737   //! @brief Conditional Move.
cmovpX86Compiler1738   inline void cmovp  (const GpVar& dst, const Mem& src)   { _emitInstruction(kX86InstCMovP  , &dst, &src); }
1739   //! @brief Conditional Move.
cmovpeX86Compiler1740   inline void cmovpe (const GpVar& dst, const GpVar& src) { _emitInstruction(kX86InstCMovPE , &dst, &src); }
1741   //! @brief Conditional Move.
cmovpeX86Compiler1742   inline void cmovpe (const GpVar& dst, const Mem& src)   { _emitInstruction(kX86InstCMovPE , &dst, &src); }
1743   //! @brief Conditional Move.
cmovpoX86Compiler1744   inline void cmovpo (const GpVar& dst, const GpVar& src) { _emitInstruction(kX86InstCMovPO , &dst, &src); }
1745   //! @brief Conditional Move.
cmovpoX86Compiler1746   inline void cmovpo (const GpVar& dst, const Mem& src)   { _emitInstruction(kX86InstCMovPO , &dst, &src); }
1747   //! @brief Conditional Move.
cmovsX86Compiler1748   inline void cmovs  (const GpVar& dst, const GpVar& src) { _emitInstruction(kX86InstCMovS  , &dst, &src); }
1749   //! @brief Conditional Move.
cmovsX86Compiler1750   inline void cmovs  (const GpVar& dst, const Mem& src)   { _emitInstruction(kX86InstCMovS  , &dst, &src); }
1751   //! @brief Conditional Move.
cmovzX86Compiler1752   inline void cmovz  (const GpVar& dst, const GpVar& src) { _emitInstruction(kX86InstCMovZ  , &dst, &src); }
1753   //! @brief Conditional Move.
cmovzX86Compiler1754   inline void cmovz  (const GpVar& dst, const Mem& src)   { _emitInstruction(kX86InstCMovZ  , &dst, &src); }
1755 
1756   //! @brief Compare Two Operands.
cmpX86Compiler1757   inline void cmp(const GpVar& dst, const GpVar& src)
1758   { _emitInstruction(kX86InstCmp, &dst, &src); }
1759 
1760   //! @brief Compare Two Operands.
cmpX86Compiler1761   inline void cmp(const GpVar& dst, const Mem& src)
1762   { _emitInstruction(kX86InstCmp, &dst, &src); }
1763 
1764   //! @brief Compare Two Operands.
cmpX86Compiler1765   inline void cmp(const GpVar& dst, const Imm& src)
1766   { _emitInstruction(kX86InstCmp, &dst, &src); }
1767 
1768   //! @brief Compare Two Operands.
cmpX86Compiler1769   inline void cmp(const Mem& dst, const GpVar& src)
1770   { _emitInstruction(kX86InstCmp, &dst, &src); }
1771 
1772   //! @brief Compare Two Operands.
cmpX86Compiler1773   inline void cmp(const Mem& dst, const Imm& src)
1774   { _emitInstruction(kX86InstCmp, &dst, &src); }
1775 
1776   //! @brief Compare and Exchange (i486).
cmpxchgX86Compiler1777   inline void cmpxchg(const GpVar cmp_1_eax, const GpVar& cmp_2, const GpVar& src)
1778   {
1779     ASMJIT_ASSERT(cmp_1_eax.getId() != src.getId());
1780     _emitInstruction(kX86InstCmpXCHG, &cmp_1_eax, &cmp_2, &src);
1781   }
1782 
1783   //! @brief Compare and Exchange (i486).
cmpxchgX86Compiler1784   inline void cmpxchg(const GpVar cmp_1_eax, const Mem& cmp_2, const GpVar& src)
1785   {
1786     ASMJIT_ASSERT(cmp_1_eax.getId() != src.getId());
1787     _emitInstruction(kX86InstCmpXCHG, &cmp_1_eax, &cmp_2, &src);
1788   }
1789 
1790   //! @brief Compares the 64-bit value in EDX:EAX with the memory operand (Pentium).
1791   //!
1792   //! If the values are equal, then this instruction stores the 64-bit value
1793   //! in ECX:EBX into the memory operand and sets the zero flag. Otherwise,
1794   //! this instruction copies the 64-bit memory operand into the EDX:EAX
1795   //! registers and clears the zero flag.
cmpxchg8bX86Compiler1796   inline void cmpxchg8b(
1797     const GpVar& cmp_edx, const GpVar& cmp_eax,
1798     const GpVar& cmp_ecx, const GpVar& cmp_ebx,
1799     const Mem& dst)
1800   {
1801     ASMJIT_ASSERT(cmp_edx.getId() != cmp_eax.getId() &&
1802                   cmp_eax.getId() != cmp_ecx.getId() &&
1803                   cmp_ecx.getId() != cmp_ebx.getId());
1804 
1805     _emitInstruction(kX86InstCmpXCHG8B, &cmp_edx, &cmp_eax, &cmp_ecx, &cmp_ebx, &dst);
1806   }
1807 
1808 #if defined(ASMJIT_X64)
1809   //! @brief Compares the 128-bit value in RDX:RAX with the memory operand (X64).
1810   //!
1811   //! If the values are equal, then this instruction stores the 128-bit value
1812   //! in RCX:RBX into the memory operand and sets the zero flag. Otherwise,
1813   //! this instruction copies the 128-bit memory operand into the RDX:RAX
1814   //! registers and clears the zero flag.
cmpxchg16bX86Compiler1815   inline void cmpxchg16b(
1816     const GpVar& cmp_edx, const GpVar& cmp_eax,
1817     const GpVar& cmp_ecx, const GpVar& cmp_ebx,
1818     const Mem& dst)
1819   {
1820     ASMJIT_ASSERT(cmp_edx.getId() != cmp_eax.getId() &&
1821                   cmp_eax.getId() != cmp_ecx.getId() &&
1822                   cmp_ecx.getId() != cmp_ebx.getId());
1823 
1824     _emitInstruction(kX86InstCmpXCHG16B, &cmp_edx, &cmp_eax, &cmp_ecx, &cmp_ebx, &dst);
1825   }
1826 #endif // ASMJIT_X64
1827 
1828   //! @brief CPU Identification (i486).
cpuidX86Compiler1829   inline void cpuid(
1830     const GpVar& inout_eax,
1831     const GpVar& out_ebx,
1832     const GpVar& out_ecx,
1833     const GpVar& out_edx)
1834   {
1835     // Destination variables must be different.
1836     ASMJIT_ASSERT(inout_eax.getId() != out_ebx.getId() &&
1837                   out_ebx.getId() != out_ecx.getId() &&
1838                   out_ecx.getId() != out_edx.getId());
1839 
1840     _emitInstruction(kX86InstCpuId, &inout_eax, &out_ebx, &out_ecx, &out_edx);
1841   }
1842 
1843 #if defined(ASMJIT_X86)
daaX86Compiler1844   inline void daa(const GpVar& dst)
1845   { _emitInstruction(kX86InstDaa, &dst); }
1846 #endif // ASMJIT_X86
1847 
1848 #if defined(ASMJIT_X86)
dasX86Compiler1849   inline void das(const GpVar& dst)
1850   { _emitInstruction(kX86InstDas, &dst); }
1851 #endif // ASMJIT_X86
1852 
1853   //! @brief Decrement by 1.
1854   //! @note This instruction can be slower than sub(dst, 1)
decX86Compiler1855   inline void dec(const GpVar& dst)
1856   { _emitInstruction(kX86InstDec, &dst); }
1857 
1858   //! @brief Decrement by 1.
1859   //! @note This instruction can be slower than sub(dst, 1)
decX86Compiler1860   inline void dec(const Mem& dst)
1861   { _emitInstruction(kX86InstDec, &dst); }
1862 
1863   //! @brief Unsigned divide.
1864   //!
1865   //! This instruction divides (unsigned) the value in the AL, AX, or EAX
1866   //! register by the source operand and stores the result in the AX,
1867   //! DX:AX, or EDX:EAX registers.
divX86Compiler1868   inline void div(const GpVar& dst_rem, const GpVar& dst_quot, const GpVar& src)
1869   {
1870     // Destination variables must be different.
1871     ASMJIT_ASSERT(dst_rem.getId() != dst_quot.getId());
1872     _emitInstruction(kX86InstDiv, &dst_rem, &dst_quot, &src);
1873   }
1874 
1875   //! @brief Unsigned divide.
1876   //! @overload
divX86Compiler1877   inline void div(const GpVar& dst_rem, const GpVar& dst_quot, const Mem& src)
1878   {
1879     // Destination variables must be different.
1880     ASMJIT_ASSERT(dst_rem.getId() != dst_quot.getId());
1881     _emitInstruction(kX86InstDiv, &dst_rem, &dst_quot, &src);
1882   }
1883 
1884 #if ASMJIT_NOT_SUPPORTED_BY_COMPILER
1885   //! @brief Make Stack Frame for Procedure Parameters.
enterX86Compiler1886   inline void enter(const Imm& imm16, const Imm& imm8)
1887   { _emitInstruction(kX86InstEnter, &imm16, &imm8); }
1888 #endif // ASMJIT_NOT_SUPPORTED_BY_COMPILER
1889 
1890   //! @brief Signed divide.
1891   //!
1892   //! This instruction divides (signed) the value in the AL, AX, or EAX
1893   //! register by the source operand and stores the result in the AX,
1894   //! DX:AX, or EDX:EAX registers.
idivX86Compiler1895   inline void idiv(const GpVar& dst_rem, const GpVar& dst_quot, const GpVar& src)
1896   {
1897     // Destination variables must be different.
1898     ASMJIT_ASSERT(dst_rem.getId() != dst_quot.getId());
1899     _emitInstruction(kX86InstIDiv, &dst_rem, &dst_quot, &src);
1900   }
1901 
1902   //! @brief Signed divide.
1903   //! @overload
idivX86Compiler1904   inline void idiv(const GpVar& dst_rem, const GpVar& dst_quot, const Mem& src)
1905   {
1906     // Destination variables must be different.
1907     ASMJIT_ASSERT(dst_rem.getId() != dst_quot.getId());
1908     _emitInstruction(kX86InstIDiv, &dst_rem, &dst_quot, &src);
1909   }
1910 
1911   //! @brief Signed multiply.
1912   //!
1913   //! [dst_lo:dst_hi] = dst_hi * src.
imulX86Compiler1914   inline void imul(const GpVar& dst_hi, const GpVar& dst_lo, const GpVar& src)
1915   {
1916     // Destination variables must be different.
1917     ASMJIT_ASSERT(dst_hi.getId() != dst_lo.getId());
1918     _emitInstruction(kX86InstIMul, &dst_hi, &dst_lo, &src);
1919   }
1920 
1921   //! @overload
imulX86Compiler1922   inline void imul(const GpVar& dst_hi, const GpVar& dst_lo, const Mem& src)
1923   {
1924     // Destination variables must be different.
1925     ASMJIT_ASSERT(dst_hi.getId() != dst_lo.getId());
1926     _emitInstruction(kX86InstIMul, &dst_hi, &dst_lo, &src);
1927   }
1928 
1929   //! @brief Signed multiply.
1930   //!
1931   //! Destination operand (the first operand) is multiplied by the source
1932   //! operand (second operand). The destination operand is a general-purpose
1933   //! register and the source operand is an immediate value, a general-purpose
1934   //! register, or a memory location. The product is then stored in the
1935   //! destination operand location.
imulX86Compiler1936   inline void imul(const GpVar& dst, const GpVar& src)
1937   { _emitInstruction(kX86InstIMul, &dst, &src); }
1938 
1939   //! @brief Signed multiply.
1940   //! @overload
imulX86Compiler1941   inline void imul(const GpVar& dst, const Mem& src)
1942   { _emitInstruction(kX86InstIMul, &dst, &src); }
1943 
1944   //! @brief Signed multiply.
1945   //! @overload
imulX86Compiler1946   inline void imul(const GpVar& dst, const Imm& src)
1947   { _emitInstruction(kX86InstIMul, &dst, &src); }
1948 
1949   //! @brief Signed multiply.
1950   //!
1951   //! source operand (which can be a general-purpose register or a memory
1952   //! location) is multiplied by the second source operand (an immediate
1953   //! value). The product is then stored in the destination operand
1954   //! (a general-purpose register).
imulX86Compiler1955   inline void imul(const GpVar& dst, const GpVar& src, const Imm& imm)
1956   { _emitInstruction(kX86InstIMul, &dst, &src, &imm); }
1957 
1958   //! @overload
imulX86Compiler1959   inline void imul(const GpVar& dst, const Mem& src, const Imm& imm)
1960   { _emitInstruction(kX86InstIMul, &dst, &src, &imm); }
1961 
1962   //! @brief Increment by 1.
1963   //! @note This instruction can be slower than add(dst, 1)
incX86Compiler1964   inline void inc(const GpVar& dst)
1965   { _emitInstruction(kX86InstInc, &dst); }
1966 
1967   //! @brief Increment by 1.
1968   //! @note This instruction can be slower than add(dst, 1)
incX86Compiler1969   inline void inc(const Mem& dst)
1970   { _emitInstruction(kX86InstInc, &dst); }
1971 
1972   //! @brief Interrupt 3 - trap to debugger.
int3X86Compiler1973   inline void int3()
1974   { _emitInstruction(kX86InstInt3); }
1975 
1976   //! @brief Jump to label @a label if condition @a cc is met.
1977   //!
1978   //! This instruction checks the state of one or more of the status flags in
1979   //! the EFLAGS register (CF, OF, PF, SF, and ZF) and, if the flags are in the
1980   //! specified state (condition), performs a jump to the target instruction
1981   //! specified by the destination operand. A condition code (cc) is associated
1982   //! with each instruction to indicate the condition being tested for. If the
1983   //! condition is not satisfied, the jump is not performed and execution
1984   //! continues with the instruction following the Jcc instruction.
1985   inline void j(kX86Cond cc, const Label& label, uint32_t hint = kCondHintNone)
1986   { _emitJcc(X86Util::getJccInstFromCond(cc), &label, hint); }
1987 
1988   //! @brief Jump to label @a label if condition is met.
1989   inline void ja  (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJA  , &label, hint); }
1990   //! @brief Jump to label @a label if condition is met.
1991   inline void jae (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJAE , &label, hint); }
1992   //! @brief Jump to label @a label if condition is met.
1993   inline void jb  (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJB  , &label, hint); }
1994   //! @brief Jump to label @a label if condition is met.
1995   inline void jbe (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJBE , &label, hint); }
1996   //! @brief Jump to label @a label if condition is met.
1997   inline void jc  (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJC  , &label, hint); }
1998   //! @brief Jump to label @a label if condition is met.
1999   inline void je  (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJE  , &label, hint); }
2000   //! @brief Jump to label @a label if condition is met.
2001   inline void jg  (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJG  , &label, hint); }
2002   //! @brief Jump to label @a label if condition is met.
2003   inline void jge (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJGE , &label, hint); }
2004   //! @brief Jump to label @a label if condition is met.
2005   inline void jl  (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJL  , &label, hint); }
2006   //! @brief Jump to label @a label if condition is met.
2007   inline void jle (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJLE , &label, hint); }
2008   //! @brief Jump to label @a label if condition is met.
2009   inline void jna (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJNA , &label, hint); }
2010   //! @brief Jump to label @a label if condition is met.
2011   inline void jnae(const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJNAE, &label, hint); }
2012   //! @brief Jump to label @a label if condition is met.
2013   inline void jnb (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJNB , &label, hint); }
2014   //! @brief Jump to label @a label if condition is met.
2015   inline void jnbe(const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJNBE, &label, hint); }
2016   //! @brief Jump to label @a label if condition is met.
2017   inline void jnc (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJNC , &label, hint); }
2018   //! @brief Jump to label @a label if condition is met.
2019   inline void jne (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJNE , &label, hint); }
2020   //! @brief Jump to label @a label if condition is met.
2021   inline void jng (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJNG , &label, hint); }
2022   //! @brief Jump to label @a label if condition is met.
2023   inline void jnge(const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJNGE, &label, hint); }
2024   //! @brief Jump to label @a label if condition is met.
2025   inline void jnl (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJNL , &label, hint); }
2026   //! @brief Jump to label @a label if condition is met.
2027   inline void jnle(const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJNLE, &label, hint); }
2028   //! @brief Jump to label @a label if condition is met.
2029   inline void jno (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJNO , &label, hint); }
2030   //! @brief Jump to label @a label if condition is met.
2031   inline void jnp (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJNP , &label, hint); }
2032   //! @brief Jump to label @a label if condition is met.
2033   inline void jns (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJNS , &label, hint); }
2034   //! @brief Jump to label @a label if condition is met.
2035   inline void jnz (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJNZ , &label, hint); }
2036   //! @brief Jump to label @a label if condition is met.
2037   inline void jo  (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJO  , &label, hint); }
2038   //! @brief Jump to label @a label if condition is met.
2039   inline void jp  (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJP  , &label, hint); }
2040   //! @brief Jump to label @a label if condition is met.
2041   inline void jpe (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJPE , &label, hint); }
2042   //! @brief Jump to label @a label if condition is met.
2043   inline void jpo (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJPO , &label, hint); }
2044   //! @brief Jump to label @a label if condition is met.
2045   inline void js  (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJS  , &label, hint); }
2046   //! @brief Jump to label @a label if condition is met.
2047   inline void jz  (const Label& label, uint32_t hint = kCondHintNone) { _emitJcc(kX86InstJZ  , &label, hint); }
2048 
2049   //! @brief Jump.
2050   //! @overload
jmpX86Compiler2051   inline void jmp(const GpVar& dst)
2052   { _emitInstruction(kX86InstJmp, &dst); }
2053 
2054   //! @brief Jump.
2055   //! @overload
jmpX86Compiler2056   inline void jmp(const Mem& dst)
2057   { _emitInstruction(kX86InstJmp, &dst); }
2058 
2059   //! @brief Jump.
2060   //! @overload
jmpX86Compiler2061   inline void jmp(const Imm& dst)
2062   { _emitInstruction(kX86InstJmp, &dst); }
2063 
2064   //! @brief Jump.
2065   //! @overload
jmpX86Compiler2066   inline void jmp(void* dst)
2067   {
2068     Imm imm((sysint_t)dst);
2069     _emitInstruction(kX86InstJmp, &imm);
2070   }
2071 
2072   //! @brief Jump.
2073   //!
2074   //! This instruction transfers program control to a different point
2075   //! in the instruction stream without recording return information.
2076   //! The destination (target) operand specifies the label of the
2077   //! instruction being jumped to.
jmpX86Compiler2078   inline void jmp(const Label& label)
2079   { _emitInstruction(kX86InstJmp, &label); }
2080 
2081   //! @brief Load Effective Address
2082   //!
2083   //! This instruction computes the effective address of the second
2084   //! operand (the source operand) and stores it in the first operand
2085   //! (destination operand). The source operand is a memory address
2086   //! (offset part) specified with one of the processors addressing modes.
2087   //! The destination operand is a general-purpose register.
leaX86Compiler2088   inline void lea(const GpVar& dst, const Mem& src)
2089   { _emitInstruction(kX86InstLea, &dst, &src); }
2090 
2091 #if ASMJIT_NOT_SUPPORTED_BY_COMPILER
2092   //! @brief High Level Procedure Exit.
leaveX86Compiler2093   inline void leave()
2094   { _emitInstruction(kX86InstLeave); }
2095 #endif // ASMJIT_NOT_SUPPORTED_BY_COMPILER
2096 
2097   //! @brief Move.
2098   //!
2099   //! This instruction copies the second operand (source operand) to the first
2100   //! operand (destination operand). The source operand can be an immediate
2101   //! value, general-purpose register, segment register, or memory location.
2102   //! The destination register can be a general-purpose register, segment
2103   //! register, or memory location. Both operands must be the same size, which
2104   //! can be a byte, a word, or a DWORD.
2105   //!
2106   //! @note To move MMX or SSE registers to/from GP registers or memory, use
2107   //! corresponding functions: @c movd(), @c movq(), etc. Passing MMX or SSE
2108   //! registers to @c mov() is illegal.
movX86Compiler2109   inline void mov(const GpVar& dst, const GpVar& src)
2110   { _emitInstruction(kX86InstMov, &dst, &src); }
2111 
2112   //! @brief Move.
2113   //! @overload
movX86Compiler2114   inline void mov(const GpVar& dst, const Mem& src)
2115   { _emitInstruction(kX86InstMov, &dst, &src); }
2116 
2117   //! @brief Move.
2118   //! @overload
movX86Compiler2119   inline void mov(const GpVar& dst, const Imm& src)
2120   { _emitInstruction(kX86InstMov, &dst, &src); }
2121 
2122   //! @brief Move.
2123   //! @overload
movX86Compiler2124   inline void mov(const Mem& dst, const GpVar& src)
2125   { _emitInstruction(kX86InstMov, &dst, &src); }
2126 
2127   //! @brief Move.
2128   //! @overload
movX86Compiler2129   inline void mov(const Mem& dst, const Imm& src)
2130   { _emitInstruction(kX86InstMov, &dst, &src); }
2131 
2132   //! @brief Move from segment register.
2133   //! @overload.
movX86Compiler2134   inline void mov(const GpVar& dst, const SegmentReg& src)
2135   { _emitInstruction(kX86InstMov, &dst, &src); }
2136 
2137   //! @brief Move from segment register.
2138   //! @overload.
movX86Compiler2139   inline void mov(const Mem& dst, const SegmentReg& src)
2140   { _emitInstruction(kX86InstMov, &dst, &src); }
2141 
2142   //! @brief Move to segment register.
2143   //! @overload.
movX86Compiler2144   inline void mov(const SegmentReg& dst, const GpVar& src)
2145   { _emitInstruction(kX86InstMov, &dst, &src); }
2146 
2147   //! @brief Move to segment register.
2148   //! @overload.
movX86Compiler2149   inline void mov(const SegmentReg& dst, const Mem& src)
2150   { _emitInstruction(kX86InstMov, &dst, &src); }
2151 
2152   //! @brief Move byte, word, dword or qword from absolute address @a src to
2153   //! AL, AX, EAX or RAX register.
mov_ptrX86Compiler2154   inline void mov_ptr(const GpVar& dst, void* src)
2155   {
2156     Imm imm((sysint_t)src);
2157     _emitInstruction(kX86InstMovPtr, &dst, &imm);
2158   }
2159 
2160   //! @brief Move byte, word, dword or qword from AL, AX, EAX or RAX register
2161   //! to absolute address @a dst.
mov_ptrX86Compiler2162   inline void mov_ptr(void* dst, const GpVar& src)
2163   {
2164     Imm imm((sysint_t)dst);
2165     _emitInstruction(kX86InstMovPtr, &imm, &src);
2166   }
2167 
2168   //! @brief Move with Sign-Extension.
2169   //!
2170   //! This instruction copies the contents of the source operand (register
2171   //! or memory location) to the destination operand (register) and sign
2172   //! extends the value to 16, 32 or 64-bits.
2173   //!
2174   //! @sa movsxd().
movsxX86Compiler2175   void movsx(const GpVar& dst, const GpVar& src)
2176   { _emitInstruction(kX86InstMovSX, &dst, &src); }
2177 
2178   //! @brief Move with Sign-Extension.
2179   //! @overload
movsxX86Compiler2180   void movsx(const GpVar& dst, const Mem& src)
2181   { _emitInstruction(kX86InstMovSX, &dst, &src); }
2182 
2183 #if defined(ASMJIT_X64)
2184   //! @brief Move DWord to QWord with sign-extension.
movsxdX86Compiler2185   inline void movsxd(const GpVar& dst, const GpVar& src)
2186   { _emitInstruction(kX86InstMovSXD, &dst, &src); }
2187 
2188   //! @brief Move DWord to QWord with sign-extension.
2189   //! @overload
movsxdX86Compiler2190   inline void movsxd(const GpVar& dst, const Mem& src)
2191   { _emitInstruction(kX86InstMovSXD, &dst, &src); }
2192 #endif // ASMJIT_X64
2193 
2194   //! @brief Move with Zero-Extend.
2195   //!
2196   //! This instruction copies the contents of the source operand (register
2197   //! or memory location) to the destination operand (register) and zero
2198   //! extends the value to 16 or 32-bits. The size of the converted value
2199   //! depends on the operand-size attribute.
movzxX86Compiler2200   inline void movzx(const GpVar& dst, const GpVar& src)
2201   { _emitInstruction(kX86InstMovZX, &dst, &src); }
2202 
2203   //! @brief Move with Zero-Extend.
2204   //! @brief Overload
movzxX86Compiler2205   inline void movzx(const GpVar& dst, const Mem& src)
2206   { _emitInstruction(kX86InstMovZX, &dst, &src); }
2207 
2208   //! @brief Unsigned multiply.
2209   //!
2210   //! Source operand (in a general-purpose register or memory location)
2211   //! is multiplied by the value in the AL, AX, or EAX register (depending
2212   //! on the operand size) and the product is stored in the AX, DX:AX, or
2213   //! EDX:EAX registers, respectively.
mulX86Compiler2214   inline void mul(const GpVar& dst_hi, const GpVar& dst_lo, const GpVar& src)
2215   {
2216     // Destination variables must be different.
2217     ASMJIT_ASSERT(dst_hi.getId() != dst_lo.getId());
2218     _emitInstruction(kX86InstMul, &dst_hi, &dst_lo, &src);
2219   }
2220 
2221   //! @brief Unsigned multiply.
2222   //! @overload
mulX86Compiler2223   inline void mul(const GpVar& dst_hi, const GpVar& dst_lo, const Mem& src)
2224   {
2225     // Destination variables must be different.
2226     ASMJIT_ASSERT(dst_hi.getId() != dst_lo.getId());
2227     _emitInstruction(kX86InstMul, &dst_hi, &dst_lo, &src);
2228   }
2229 
2230   //! @brief Two's Complement Negation.
negX86Compiler2231   inline void neg(const GpVar& dst)
2232   { _emitInstruction(kX86InstNeg, &dst); }
2233 
2234   //! @brief Two's Complement Negation.
negX86Compiler2235   inline void neg(const Mem& dst)
2236   { _emitInstruction(kX86InstNeg, &dst); }
2237 
2238   //! @brief No Operation.
2239   //!
2240   //! This instruction performs no operation. This instruction is a one-byte
2241   //! instruction that takes up space in the instruction stream but does not
2242   //! affect the machine context, except the EIP register. The NOP instruction
2243   //! is an alias mnemonic for the XCHG (E)AX, (E)AX instruction.
nopX86Compiler2244   inline void nop()
2245   { _emitInstruction(kX86InstNop); }
2246 
2247   //! @brief One's Complement Negation.
not_X86Compiler2248   inline void not_(const GpVar& dst)
2249   { _emitInstruction(kX86InstNot, &dst); }
2250 
2251   //! @brief One's Complement Negation.
not_X86Compiler2252   inline void not_(const Mem& dst)
2253   { _emitInstruction(kX86InstNot, &dst); }
2254 
2255   //! @brief Logical Inclusive OR.
or_X86Compiler2256   inline void or_(const GpVar& dst, const GpVar& src)
2257   { _emitInstruction(kX86InstOr, &dst, &src); }
2258 
2259   //! @brief Logical Inclusive OR.
or_X86Compiler2260   inline void or_(const GpVar& dst, const Mem& src)
2261   { _emitInstruction(kX86InstOr, &dst, &src); }
2262 
2263   //! @brief Logical Inclusive OR.
or_X86Compiler2264   inline void or_(const GpVar& dst, const Imm& src)
2265   { _emitInstruction(kX86InstOr, &dst, &src); }
2266 
2267   //! @brief Logical Inclusive OR.
or_X86Compiler2268   inline void or_(const Mem& dst, const GpVar& src)
2269   { _emitInstruction(kX86InstOr, &dst, &src); }
2270 
2271   //! @brief Logical Inclusive OR.
or_X86Compiler2272   inline void or_(const Mem& dst, const Imm& src)
2273   { _emitInstruction(kX86InstOr, &dst, &src); }
2274 
2275   //! @brief Pop a Value from the Stack.
2276   //!
2277   //! This instruction loads the value from the top of the stack to the location
2278   //! specified with the destination operand and then increments the stack pointer.
2279   //! The destination operand can be a general purpose register, memory location,
2280   //! or segment register.
popX86Compiler2281   inline void pop(const GpVar& dst)
2282   { _emitInstruction(kX86InstPop, &dst); }
2283 
popX86Compiler2284   inline void pop(const Mem& dst)
2285   {
2286     ASMJIT_ASSERT(dst.getSize() == 2 || dst.getSize() == sizeof(sysint_t));
2287     _emitInstruction(kX86InstPop, &dst);
2288   }
2289 
2290 #if defined(ASMJIT_X86)
2291   //! @brief Pop All General-Purpose Registers.
2292   //!
2293   //! Pop EDI, ESI, EBP, EBX, EDX, ECX, and EAX.
popadX86Compiler2294   inline void popad()
2295   { _emitInstruction(kX86InstPopAD); }
2296 #endif // ASMJIT_X86
2297 
2298   //! @brief Pop Stack into EFLAGS Register (32-bit or 64-bit).
popfX86Compiler2299   inline void popf()
2300   {
2301 #if defined(ASMJIT_X86)
2302     popfd();
2303 #else
2304     popfq();
2305 #endif
2306   }
2307 
2308 #if defined(ASMJIT_X86)
2309   //! @brief Pop Stack into EFLAGS Register (32-bit).
popfdX86Compiler2310   inline void popfd()
2311   { _emitInstruction(kX86InstPopFD); }
2312 #else
2313   //! @brief Pop Stack into EFLAGS Register (64-bit).
popfqX86Compiler2314   inline void popfq()
2315   { _emitInstruction(kX86InstPopFQ); }
2316 #endif
2317 
2318   //! @brief Push WORD/DWORD/QWORD Onto the Stack.
2319   //!
2320   //! @note 32-bit architecture pushed DWORD while 64-bit
2321   //! pushes QWORD. 64-bit mode not provides instruction to
2322   //! push 32-bit register/memory.
pushX86Compiler2323   inline void push(const GpVar& src)
2324   { _emitInstruction(kX86InstPush, &src); }
2325 
2326   //! @brief Push WORD/DWORD/QWORD Onto the Stack.
pushX86Compiler2327   inline void push(const Mem& src)
2328   {
2329     ASMJIT_ASSERT(src.getSize() == 2 || src.getSize() == sizeof(sysint_t));
2330     _emitInstruction(kX86InstPush, &src);
2331   }
2332 
2333   //! @brief Push WORD/DWORD/QWORD Onto the Stack.
pushX86Compiler2334   inline void push(const Imm& src)
2335   { _emitInstruction(kX86InstPush, &src); }
2336 
2337 #if defined(ASMJIT_X86)
2338   //! @brief Push All General-Purpose Registers.
2339   //!
2340   //! Push EAX, ECX, EDX, EBX, original ESP, EBP, ESI, and EDI.
pushadX86Compiler2341   inline void pushad()
2342   { _emitInstruction(kX86InstPushAD); }
2343 #endif // ASMJIT_X86
2344 
2345   //! @brief Push EFLAGS Register (32-bit or 64-bit) onto the Stack.
pushfX86Compiler2346   inline void pushf()
2347   {
2348 #if defined(ASMJIT_X86)
2349     pushfd();
2350 #else
2351     pushfq();
2352 #endif
2353   }
2354 
2355 #if defined(ASMJIT_X86)
2356   //! @brief Push EFLAGS Register (32-bit) onto the Stack.
pushfdX86Compiler2357   inline void pushfd()
2358   { _emitInstruction(kX86InstPushFD); }
2359 #else
2360   //! @brief Push EFLAGS Register (64-bit) onto the Stack.
pushfqX86Compiler2361   inline void pushfq()
2362   { _emitInstruction(kX86InstPushFQ); }
2363 #endif // ASMJIT_X86
2364 
2365   //! @brief Rotate Bits Left.
2366   //! @note @a src register can be only @c cl.
rclX86Compiler2367   inline void rcl(const GpVar& dst, const GpVar& src)
2368   { _emitInstruction(kX86InstRcl, &dst, &src); }
2369 
2370   //! @brief Rotate Bits Left.
rclX86Compiler2371   inline void rcl(const GpVar& dst, const Imm& src)
2372   { _emitInstruction(kX86InstRcl, &dst, &src); }
2373 
2374   //! @brief Rotate Bits Left.
2375   //! @note @a src register can be only @c cl.
rclX86Compiler2376   inline void rcl(const Mem& dst, const GpVar& src)
2377   { _emitInstruction(kX86InstRcl, &dst, &src); }
2378 
2379   //! @brief Rotate Bits Left.
rclX86Compiler2380   inline void rcl(const Mem& dst, const Imm& src)
2381   { _emitInstruction(kX86InstRcl, &dst, &src); }
2382 
2383   //! @brief Rotate Bits Right.
2384   //! @note @a src register can be only @c cl.
rcrX86Compiler2385   inline void rcr(const GpVar& dst, const GpVar& src)
2386   { _emitInstruction(kX86InstRcr, &dst, &src); }
2387 
2388   //! @brief Rotate Bits Right.
rcrX86Compiler2389   inline void rcr(const GpVar& dst, const Imm& src)
2390   { _emitInstruction(kX86InstRcr, &dst, &src); }
2391 
2392   //! @brief Rotate Bits Right.
2393   //! @note @a src register can be only @c cl.
rcrX86Compiler2394   inline void rcr(const Mem& dst, const GpVar& src)
2395   { _emitInstruction(kX86InstRcr, &dst, &src); }
2396 
2397   //! @brief Rotate Bits Right.
rcrX86Compiler2398   inline void rcr(const Mem& dst, const Imm& src)
2399   { _emitInstruction(kX86InstRcr, &dst, &src); }
2400 
2401   //! @brief Read Time-Stamp Counter (Pentium).
rdtscX86Compiler2402   inline void rdtsc(const GpVar& dst_edx, const GpVar& dst_eax)
2403   {
2404     // Destination registers must be different.
2405     ASMJIT_ASSERT(dst_edx.getId() != dst_eax.getId());
2406     _emitInstruction(kX86InstRdtsc, &dst_edx, &dst_eax);
2407   }
2408 
2409   //! @brief Read Time-Stamp Counter and Processor ID (New).
rdtscpX86Compiler2410   inline void rdtscp(const GpVar& dst_edx, const GpVar& dst_eax, const GpVar& dst_ecx)
2411   {
2412     // Destination registers must be different.
2413     ASMJIT_ASSERT(dst_edx.getId() != dst_eax.getId() && dst_eax.getId() != dst_ecx.getId());
2414     _emitInstruction(kX86InstRdtscP, &dst_edx, &dst_eax, &dst_ecx);
2415   }
2416 
2417   //! @brief Load ECX/RCX BYTEs from DS:[ESI/RSI] to AL.
rep_lodsbX86Compiler2418   inline void rep_lodsb(const GpVar& dst_val, const GpVar& src_addr, const GpVar& cnt_ecx)
2419   {
2420     // All registers must be unique, they will be reallocated to dst=EAX,RAX, src=DS:ESI/RSI, cnt=ECX/RCX.
2421     ASMJIT_ASSERT(dst_val.getId() != src_addr.getId() && src_addr.getId() != cnt_ecx.getId());
2422     _emitInstruction(kX86InstRepLodSB, &dst_val, &src_addr, &cnt_ecx);
2423   }
2424 
2425   //! @brief Load ECX/RCX DWORDs from DS:[ESI/RSI] to EAX.
rep_lodsdX86Compiler2426   inline void rep_lodsd(const GpVar& dst_val, const GpVar& src_addr, const GpVar& cnt_ecx)
2427   {
2428     // All registers must be unique, they will be reallocated to dst=EAX,RAX, src=DS:ESI/RSI, cnt=ECX/RCX.
2429     ASMJIT_ASSERT(dst_val.getId() != src_addr.getId() && src_addr.getId() != cnt_ecx.getId());
2430     _emitInstruction(kX86InstRepLodSD, &dst_val, &src_addr, &cnt_ecx);
2431   }
2432 
2433 #if defined(ASMJIT_X64)
2434   //! @brief Load ECX/RCX QWORDs from DS:[ESI/RSI] to RAX.
rep_lodsqX86Compiler2435   inline void rep_lodsq(const GpVar& dst_val, const GpVar& src_addr, const GpVar& cnt_ecx)
2436   {
2437     // All registers must be unique, they will be reallocated to dst=EAX,RAX, src=DS:ESI/RSI, cnt=ECX/RCX.
2438     ASMJIT_ASSERT(dst_val.getId() != src_addr.getId() && src_addr.getId() != cnt_ecx.getId());
2439     _emitInstruction(kX86InstRepLodSQ, &dst_val, &src_addr, &cnt_ecx);
2440   }
2441 #endif // ASMJIT_X64
2442 
2443   //! @brief Load ECX/RCX WORDs from DS:[ESI/RSI] to AX.
rep_lodswX86Compiler2444   inline void rep_lodsw(const GpVar& dst_val, const GpVar& src_addr, const GpVar& cnt_ecx)
2445   {
2446     // All registers must be unique, they will be reallocated to dst=EAX,RAX, src=DS:ESI/RSI, cnt=ECX/RCX.
2447     ASMJIT_ASSERT(dst_val.getId() != src_addr.getId() && src_addr.getId() != cnt_ecx.getId());
2448     _emitInstruction(kX86InstRepLodSW, &dst_val, &src_addr, &cnt_ecx);
2449   }
2450 
2451   //! @brief Move ECX/RCX BYTEs from DS:[ESI/RSI] to ES:[EDI/RDI].
rep_movsbX86Compiler2452   inline void rep_movsb(const GpVar& dst_addr, const GpVar& src_addr, const GpVar& cnt_ecx)
2453   {
2454     // All registers must be unique, they will be reallocated to dst=ES:EDI,RDI, src=DS:ESI/RSI, cnt=ECX/RCX.
2455     ASMJIT_ASSERT(dst_addr.getId() != src_addr.getId() && src_addr.getId() != cnt_ecx.getId());
2456     _emitInstruction(kX86InstRepMovSB, &dst_addr, &src_addr, &cnt_ecx);
2457   }
2458 
2459   //! @brief Move ECX/RCX DWORDs from DS:[ESI/RSI] to ES:[EDI/RDI].
rep_movsdX86Compiler2460   inline void rep_movsd(const GpVar& dst_addr, const GpVar& src_addr, const GpVar& cnt_ecx)
2461   {
2462     // All registers must be unique, they will be reallocated to dst=ES:EDI,RDI, src=DS:ESI/RSI, cnt=ECX/RCX.
2463     ASMJIT_ASSERT(dst_addr.getId() != src_addr.getId() && src_addr.getId() != cnt_ecx.getId());
2464     _emitInstruction(kX86InstRepMovSD, &dst_addr, &src_addr, &cnt_ecx);
2465   }
2466 
2467 #if defined(ASMJIT_X64)
2468   //! @brief Move ECX/RCX QWORDs from DS:[ESI/RSI] to ES:[EDI/RDI].
rep_movsqX86Compiler2469   inline void rep_movsq(const GpVar& dst_addr, const GpVar& src_addr, const GpVar& cnt_ecx)
2470   {
2471     // All registers must be unique, they will be reallocated to dst=ES:EDI,RDI, src=DS:ESI/RSI, cnt=ECX/RCX.
2472     ASMJIT_ASSERT(dst_addr.getId() != src_addr.getId() && src_addr.getId() != cnt_ecx.getId());
2473     _emitInstruction(kX86InstRepMovSQ, &dst_addr, &src_addr, &cnt_ecx);
2474   }
2475 #endif // ASMJIT_X64
2476 
2477   //! @brief Move ECX/RCX WORDs from DS:[ESI/RSI] to ES:[EDI/RDI].
rep_movswX86Compiler2478   inline void rep_movsw(const GpVar& dst_addr, const GpVar& src_addr, const GpVar& cnt_ecx)
2479   {
2480     // All registers must be unique, they will be reallocated to dst=ES:EDI,RDI, src=DS:ESI/RSI, cnt=ECX/RCX.
2481     ASMJIT_ASSERT(dst_addr.getId() != src_addr.getId() && src_addr.getId() != cnt_ecx.getId());
2482     _emitInstruction(kX86InstRepMovSW, &dst_addr, &src_addr, &cnt_ecx);
2483   }
2484 
2485   //! @brief Fill ECX/RCX BYTEs at ES:[EDI/RDI] with AL.
rep_stosbX86Compiler2486   inline void rep_stosb(const GpVar& dst_addr, const GpVar& src_val, const GpVar& cnt_ecx)
2487   {
2488     // All registers must be unique, they will be reallocated to dst=ES:EDI,RDI, src=EAX/RAX, cnt=ECX/RCX.
2489     ASMJIT_ASSERT(dst_addr.getId() != src_val.getId() && src_val.getId() != cnt_ecx.getId());
2490     _emitInstruction(kX86InstRepStoSB, &dst_addr, &src_val, &cnt_ecx);
2491   }
2492 
2493   //! @brief Fill ECX/RCX DWORDs at ES:[EDI/RDI] with EAX.
rep_stosdX86Compiler2494   inline void rep_stosd(const GpVar& dst_addr, const GpVar& src_val, const GpVar& cnt_ecx)
2495   {
2496     // All registers must be unique, they will be reallocated to dst=ES:EDI,RDI, src=EAX/RAX, cnt=ECX/RCX.
2497     ASMJIT_ASSERT(dst_addr.getId() != src_val.getId() && src_val.getId() != cnt_ecx.getId());
2498     _emitInstruction(kX86InstRepStoSD, &dst_addr, &src_val, &cnt_ecx);
2499   }
2500 
2501 #if defined(ASMJIT_X64)
2502   //! @brief Fill ECX/RCX QWORDs at ES:[EDI/RDI] with RAX.
rep_stosqX86Compiler2503   inline void rep_stosq(const GpVar& dst_addr, const GpVar& src_val, const GpVar& cnt_ecx)
2504   {
2505     // All registers must be unique, they will be reallocated to dst=ES:EDI,RDI, src=EAX/RAX, cnt=ECX/RCX.
2506     ASMJIT_ASSERT(dst_addr.getId() != src_val.getId() && src_val.getId() != cnt_ecx.getId());
2507     _emitInstruction(kX86InstRepStoSQ, &dst_addr, &src_val, &cnt_ecx);
2508   }
2509 #endif // ASMJIT_X64
2510 
2511   //! @brief Fill ECX/RCX WORDs at ES:[EDI/RDI] with AX.
rep_stoswX86Compiler2512   inline void rep_stosw(const GpVar& dst_addr, const GpVar& src_val, const GpVar& cnt_ecx)
2513   {
2514     // All registers must be unique, they will be reallocated to dst=ES:EDI,RDI, src=EAX/RAX, cnt=ECX/RCX.
2515     ASMJIT_ASSERT(dst_addr.getId() != src_val.getId() && src_val.getId() != cnt_ecx.getId());
2516     _emitInstruction(kX86InstRepStoSW, &dst_addr, &src_val, &cnt_ecx);
2517   }
2518 
2519   //! @brief Repeated find nonmatching BYTEs in ES:[EDI/RDI] and DS:[ESI/RDI].
repe_cmpsbX86Compiler2520   inline void repe_cmpsb(const GpVar& cmp1_addr, const GpVar& cmp2_addr, const GpVar& cnt_ecx)
2521   {
2522     // All registers must be unique, they will be reallocated to cmp1=ES:EDI,RDI, cmp2=ES:[EDI/RDI], cnt=ECX/RCX.
2523     ASMJIT_ASSERT(cmp1_addr.getId() != cmp2_addr.getId() && cmp2_addr.getId() != cnt_ecx.getId());
2524     _emitInstruction(kX86InstRepECmpSB, &cmp1_addr, &cmp2_addr, &cnt_ecx);
2525   }
2526 
2527   //! @brief Repeated find nonmatching DWORDs in ES:[EDI/RDI] and DS:[ESI/RDI].
repe_cmpsdX86Compiler2528   inline void repe_cmpsd(const GpVar& cmp1_addr, const GpVar& cmp2_addr, const GpVar& cnt_ecx)
2529   {
2530     // All registers must be unique, they will be reallocated to cmp1=ES:EDI,RDI, cmp2=ES:[EDI/RDI], cnt=ECX/RCX.
2531     ASMJIT_ASSERT(cmp1_addr.getId() != cmp2_addr.getId() && cmp2_addr.getId() != cnt_ecx.getId());
2532     _emitInstruction(kX86InstRepECmpSD, &cmp1_addr, &cmp2_addr, &cnt_ecx);
2533   }
2534 
2535 #if defined(ASMJIT_X64)
2536   //! @brief Repeated find nonmatching QWORDs in ES:[EDI/RDI] and DS:[ESI/RDI].
repe_cmpsqX86Compiler2537   inline void repe_cmpsq(const GpVar& cmp1_addr, const GpVar& cmp2_addr, const GpVar& cnt_ecx)
2538   {
2539     // All registers must be unique, they will be reallocated to cmp1=ES:EDI,RDI, cmp2=ES:[EDI/RDI], cnt=ECX/RCX.
2540     ASMJIT_ASSERT(cmp1_addr.getId() != cmp2_addr.getId() && cmp2_addr.getId() != cnt_ecx.getId());
2541     _emitInstruction(kX86InstRepECmpSQ, &cmp1_addr, &cmp2_addr, &cnt_ecx);
2542   }
2543 #endif // ASMJIT_X64
2544 
2545   //! @brief Repeated find nonmatching WORDs in ES:[EDI/RDI] and DS:[ESI/RDI].
repe_cmpswX86Compiler2546   inline void repe_cmpsw(const GpVar& cmp1_addr, const GpVar& cmp2_addr, const GpVar& cnt_ecx)
2547   {
2548     // All registers must be unique, they will be reallocated to cmp1=ES:EDI,RDI, cmp2=ES:[EDI/RDI], cnt=ECX/RCX.
2549     ASMJIT_ASSERT(cmp1_addr.getId() != cmp2_addr.getId() && cmp2_addr.getId() != cnt_ecx.getId());
2550     _emitInstruction(kX86InstRepECmpSW, &cmp1_addr, &cmp2_addr, &cnt_ecx);
2551   }
2552 
2553   //! @brief Find non-AL BYTE starting at ES:[EDI/RDI].
repe_scasbX86Compiler2554   inline void repe_scasb(const GpVar& cmp1_addr, const GpVar& cmp2_val, const GpVar& cnt_ecx)
2555   {
2556     // All registers must be unique, they will be reallocated to cmp1=ES:EDI,RDI, src=AL, cnt=ECX/RCX.
2557     ASMJIT_ASSERT(cmp1_addr.getId() != cmp2_val.getId() && cmp2_val.getId() != cnt_ecx.getId());
2558     _emitInstruction(kX86InstRepEScaSB, &cmp1_addr, &cmp2_val, &cnt_ecx);
2559   }
2560 
2561   //! @brief Find non-EAX DWORD starting at ES:[EDI/RDI].
repe_scasdX86Compiler2562   inline void repe_scasd(const GpVar& cmp1_addr, const GpVar& cmp2_val, const GpVar& cnt_ecx)
2563   {
2564     // All registers must be unique, they will be reallocated to cmp1=ES:EDI,RDI, src=EAX, cnt=ECX/RCX.
2565     ASMJIT_ASSERT(cmp1_addr.getId() != cmp2_val.getId() && cmp2_val.getId() != cnt_ecx.getId());
2566     _emitInstruction(kX86InstRepEScaSD, &cmp1_addr, &cmp2_val, &cnt_ecx);
2567   }
2568 
2569 #if defined(ASMJIT_X64)
2570   //! @brief Find non-RAX QWORD starting at ES:[EDI/RDI].
repe_scasqX86Compiler2571   inline void repe_scasq(const GpVar& cmp1_addr, const GpVar& cmp2_val, const GpVar& cnt_ecx)
2572   {
2573     // All registers must be unique, they will be reallocated to cmp1=ES:EDI,RDI, src=RAX, cnt=ECX/RCX.
2574     ASMJIT_ASSERT(cmp1_addr.getId() != cmp2_val.getId() && cmp2_val.getId() != cnt_ecx.getId());
2575     _emitInstruction(kX86InstRepEScaSQ, &cmp1_addr, &cmp2_val, &cnt_ecx);
2576   }
2577 #endif // ASMJIT_X64
2578 
2579   //! @brief Find non-AX WORD starting at ES:[EDI/RDI].
repe_scaswX86Compiler2580   inline void repe_scasw(const GpVar& cmp1_addr, const GpVar& cmp2_val, const GpVar& cnt_ecx)
2581   {
2582     // All registers must be unique, they will be reallocated to cmp1=ES:EDI,RDI, src=AX, cnt=ECX/RCX.
2583     ASMJIT_ASSERT(cmp1_addr.getId() != cmp2_val.getId() && cmp2_val.getId() != cnt_ecx.getId());
2584     _emitInstruction(kX86InstRepEScaSW, &cmp1_addr, &cmp2_val, &cnt_ecx);
2585   }
2586 
2587   //! @brief Find matching BYTEs in [RDI] and [RSI].
repne_cmpsbX86Compiler2588   inline void repne_cmpsb(const GpVar& cmp1_addr, const GpVar& cmp2_addr, const GpVar& cnt_ecx)
2589   {
2590     // All registers must be unique, they will be reallocated to cmp1=ES:EDI,RDI, cmp2=ES:[EDI/RDI], cnt=ECX/RCX.
2591     ASMJIT_ASSERT(cmp1_addr.getId() != cmp2_addr.getId() && cmp2_addr.getId() != cnt_ecx.getId());
2592     _emitInstruction(kX86InstRepNECmpSB, &cmp1_addr, &cmp2_addr, &cnt_ecx);
2593   }
2594 
2595   //! @brief Find matching DWORDs in [RDI] and [RSI].
repne_cmpsdX86Compiler2596   inline void repne_cmpsd(const GpVar& cmp1_addr, const GpVar& cmp2_addr, const GpVar& cnt_ecx)
2597   {
2598     // All registers must be unique, they will be reallocated to cmp1=ES:EDI,RDI, cmp2=ES:[EDI/RDI], cnt=ECX/RCX.
2599     ASMJIT_ASSERT(cmp1_addr.getId() != cmp2_addr.getId() && cmp2_addr.getId() != cnt_ecx.getId());
2600     _emitInstruction(kX86InstRepNECmpSD, &cmp1_addr, &cmp2_addr, &cnt_ecx);
2601   }
2602 
2603 #if defined(ASMJIT_X64)
2604   //! @brief Find matching QWORDs in [RDI] and [RSI].
repne_cmpsqX86Compiler2605   inline void repne_cmpsq(const GpVar& cmp1_addr, const GpVar& cmp2_addr, const GpVar& cnt_ecx)
2606   {
2607     // All registers must be unique, they will be reallocated to cmp1=ES:EDI,RDI, cmp2=ES:[EDI/RDI], cnt=ECX/RCX.
2608     ASMJIT_ASSERT(cmp1_addr.getId() != cmp2_addr.getId() && cmp2_addr.getId() != cnt_ecx.getId());
2609     _emitInstruction(kX86InstRepNECmpSQ, &cmp1_addr, &cmp2_addr, &cnt_ecx);
2610   }
2611 #endif // ASMJIT_X64
2612 
2613   //! @brief Find matching WORDs in [RDI] and [RSI].
repne_cmpswX86Compiler2614   inline void repne_cmpsw(const GpVar& cmp1_addr, const GpVar& cmp2_addr, const GpVar& cnt_ecx)
2615   {
2616     // All registers must be unique, they will be reallocated to cmp1=ES:EDI,RDI, cmp2=ES:[EDI/RDI], cnt=ECX/RCX.
2617     ASMJIT_ASSERT(cmp1_addr.getId() != cmp2_addr.getId() && cmp2_addr.getId() != cnt_ecx.getId());
2618     _emitInstruction(kX86InstRepNECmpSW, &cmp1_addr, &cmp2_addr, &cnt_ecx);
2619   }
2620 
2621   //! @brief Find AL, starting at ES:[EDI/RDI].
repne_scasbX86Compiler2622   inline void repne_scasb(const GpVar& cmp1_addr, const GpVar& cmp2_val, const GpVar& cnt_ecx)
2623   {
2624     // All registers must be unique, they will be reallocated to cmp1=ES:EDI,RDI, src=AL, cnt=ECX/RCX.
2625     ASMJIT_ASSERT(cmp1_addr.getId() != cmp2_val.getId() && cmp2_val.getId() != cnt_ecx.getId());
2626     _emitInstruction(kX86InstRepNEScaSB, &cmp1_addr, &cmp2_val, &cnt_ecx);
2627   }
2628 
2629   //! @brief Find EAX, starting at ES:[EDI/RDI].
repne_scasdX86Compiler2630   inline void repne_scasd(const GpVar& cmp1_addr, const GpVar& cmp2_val, const GpVar& cnt_ecx)
2631   {
2632     // All registers must be unique, they will be reallocated to cmp1=ES:EDI,RDI, src=EAX, cnt=ECX/RCX.
2633     ASMJIT_ASSERT(cmp1_addr.getId() != cmp2_val.getId() && cmp2_val.getId() != cnt_ecx.getId());
2634     _emitInstruction(kX86InstRepNEScaSD, &cmp1_addr, &cmp2_val, &cnt_ecx);
2635   }
2636 
2637 #if defined(ASMJIT_X64)
2638   //! @brief Find RAX, starting at ES:[EDI/RDI].
repne_scasqX86Compiler2639   inline void repne_scasq(const GpVar& cmp1_addr, const GpVar& cmp2_val, const GpVar& cnt_ecx)
2640   {
2641     // All registers must be unique, they will be reallocated to cmp1=ES:EDI,RDI, src=RAX, cnt=ECX/RCX.
2642     ASMJIT_ASSERT(cmp1_addr.getId() != cmp2_val.getId() && cmp2_val.getId() != cnt_ecx.getId());
2643     _emitInstruction(kX86InstRepNEScaSQ, &cmp1_addr, &cmp2_val, &cnt_ecx);
2644   }
2645 #endif // ASMJIT_X64
2646 
2647   //! @brief Find AX, starting at ES:[EDI/RDI].
repne_scaswX86Compiler2648   inline void repne_scasw(const GpVar& cmp1_addr, const GpVar& cmp2_val, const GpVar& cnt_ecx)
2649   {
2650     // All registers must be unique, they will be reallocated to cmp1=ES:EDI,RDI, src=AX, cnt=ECX/RCX.
2651     ASMJIT_ASSERT(cmp1_addr.getId() != cmp2_val.getId() && cmp2_val.getId() != cnt_ecx.getId());
2652     _emitInstruction(kX86InstRepNEScaSW, &cmp1_addr, &cmp2_val, &cnt_ecx);
2653   }
2654 
2655   //! @brief Return from Procedure.
retX86Compiler2656   inline void ret()
2657   { _emitReturn(NULL, NULL); }
2658 
2659   //! @brief Return from Procedure.
retX86Compiler2660   inline void ret(const GpVar& first)
2661   { _emitReturn(&first, NULL); }
2662 
2663   //! @brief Return from Procedure.
retX86Compiler2664   inline void ret(const GpVar& first, const GpVar& second)
2665   { _emitReturn(&first, &second); }
2666 
2667   //! @brief Return from Procedure.
retX86Compiler2668   inline void ret(const XmmVar& first)
2669   { _emitReturn(&first, NULL); }
2670 
2671   //! @brief Return from Procedure.
retX86Compiler2672   inline void ret(const XmmVar& first, const XmmVar& second)
2673   { _emitReturn(&first, &second); }
2674 
2675   //! @brief Rotate Bits Left.
2676   //! @note @a src register can be only @c cl.
rolX86Compiler2677   inline void rol(const GpVar& dst, const GpVar& src)
2678   { _emitInstruction(kX86InstRol, &dst, &src); }
2679 
2680   //! @brief Rotate Bits Left.
rolX86Compiler2681   inline void rol(const GpVar& dst, const Imm& src)
2682   { _emitInstruction(kX86InstRol, &dst, &src); }
2683 
2684   //! @brief Rotate Bits Left.
2685   //! @note @a src register can be only @c cl.
rolX86Compiler2686   inline void rol(const Mem& dst, const GpVar& src)
2687   { _emitInstruction(kX86InstRol, &dst, &src); }
2688 
2689   //! @brief Rotate Bits Left.
rolX86Compiler2690   inline void rol(const Mem& dst, const Imm& src)
2691   { _emitInstruction(kX86InstRol, &dst, &src); }
2692 
2693   //! @brief Rotate Bits Right.
2694   //! @note @a src register can be only @c cl.
rorX86Compiler2695   inline void ror(const GpVar& dst, const GpVar& src)
2696   { _emitInstruction(kX86InstRor, &dst, &src); }
2697 
2698   //! @brief Rotate Bits Right.
rorX86Compiler2699   inline void ror(const GpVar& dst, const Imm& src)
2700   { _emitInstruction(kX86InstRor, &dst, &src); }
2701 
2702   //! @brief Rotate Bits Right.
2703   //! @note @a src register can be only @c cl.
rorX86Compiler2704   inline void ror(const Mem& dst, const GpVar& src)
2705   { _emitInstruction(kX86InstRor, &dst, &src); }
2706 
2707   //! @brief Rotate Bits Right.
rorX86Compiler2708   inline void ror(const Mem& dst, const Imm& src)
2709   { _emitInstruction(kX86InstRor, &dst, &src); }
2710 
2711 #if defined(ASMJIT_X86)
2712   //! @brief Store @a var (allocated to AH/AX/EAX/RAX) into Flags.
sahfX86Compiler2713   inline void sahf(const GpVar& var)
2714   { _emitInstruction(kX86InstSahf, &var); }
2715 #endif // ASMJIT_X86
2716 
2717   //! @brief Integer subtraction with borrow.
sbbX86Compiler2718   inline void sbb(const GpVar& dst, const GpVar& src)
2719   { _emitInstruction(kX86InstSbb, &dst, &src); }
2720 
2721   //! @brief Integer subtraction with borrow.
sbbX86Compiler2722   inline void sbb(const GpVar& dst, const Mem& src)
2723   { _emitInstruction(kX86InstSbb, &dst, &src); }
2724 
2725   //! @brief Integer subtraction with borrow.
sbbX86Compiler2726   inline void sbb(const GpVar& dst, const Imm& src)
2727   { _emitInstruction(kX86InstSbb, &dst, &src); }
2728 
2729   //! @brief Integer subtraction with borrow.
sbbX86Compiler2730   inline void sbb(const Mem& dst, const GpVar& src)
2731   { _emitInstruction(kX86InstSbb, &dst, &src); }
2732 
2733   //! @brief Integer subtraction with borrow.
sbbX86Compiler2734   inline void sbb(const Mem& dst, const Imm& src)
2735   { _emitInstruction(kX86InstSbb, &dst, &src); }
2736 
2737   //! @brief Shift Bits Left.
2738   //! @note @a src register can be only @c cl.
salX86Compiler2739   inline void sal(const GpVar& dst, const GpVar& src)
2740   { _emitInstruction(kX86InstSal, &dst, &src); }
2741 
2742   //! @brief Shift Bits Left.
salX86Compiler2743   inline void sal(const GpVar& dst, const Imm& src)
2744   { _emitInstruction(kX86InstSal, &dst, &src); }
2745 
2746   //! @brief Shift Bits Left.
2747   //! @note @a src register can be only @c cl.
salX86Compiler2748   inline void sal(const Mem& dst, const GpVar& src)
2749   { _emitInstruction(kX86InstSal, &dst, &src); }
2750 
2751   //! @brief Shift Bits Left.
salX86Compiler2752   inline void sal(const Mem& dst, const Imm& src)
2753   { _emitInstruction(kX86InstSal, &dst, &src); }
2754 
2755   //! @brief Shift Bits Right.
2756   //! @note @a src register can be only @c cl.
sarX86Compiler2757   inline void sar(const GpVar& dst, const GpVar& src)
2758   { _emitInstruction(kX86InstSar, &dst, &src); }
2759 
2760   //! @brief Shift Bits Right.
sarX86Compiler2761   inline void sar(const GpVar& dst, const Imm& src)
2762   { _emitInstruction(kX86InstSar, &dst, &src); }
2763 
2764   //! @brief Shift Bits Right.
2765   //! @note @a src register can be only @c cl.
sarX86Compiler2766   inline void sar(const Mem& dst, const GpVar& src)
2767   { _emitInstruction(kX86InstSar, &dst, &src); }
2768 
2769   //! @brief Shift Bits Right.
sarX86Compiler2770   inline void sar(const Mem& dst, const Imm& src)
2771   { _emitInstruction(kX86InstSar, &dst, &src); }
2772 
2773   //! @brief Set Byte on Condition.
setX86Compiler2774   inline void set(kX86Cond cc, const GpVar& dst)
2775   {
2776     ASMJIT_ASSERT(dst.getSize() == 1);
2777     _emitInstruction(X86Util::getSetccInstFromCond(cc), &dst);
2778   }
2779 
2780   //! @brief Set Byte on Condition.
setX86Compiler2781   inline void set(kX86Cond cc, const Mem& dst)
2782   {
2783     ASMJIT_ASSERT(dst.getSize() <= 1);
2784     _emitInstruction(X86Util::getSetccInstFromCond(cc), &dst);
2785   }
2786 
2787   //! @brief Set Byte on Condition.
setaX86Compiler2788   inline void seta  (const GpVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetA  , &dst); }
2789   //! @brief Set Byte on Condition.
setaX86Compiler2790   inline void seta  (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetA  , &dst); }
2791   //! @brief Set Byte on Condition.
setaeX86Compiler2792   inline void setae (const GpVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetAE , &dst); }
2793   //! @brief Set Byte on Condition.
setaeX86Compiler2794   inline void setae (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetAE , &dst); }
2795   //! @brief Set Byte on Condition.
setbX86Compiler2796   inline void setb  (const GpVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetB  , &dst); }
2797   //! @brief Set Byte on Condition.
setbX86Compiler2798   inline void setb  (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetB  , &dst); }
2799   //! @brief Set Byte on Condition.
setbeX86Compiler2800   inline void setbe (const GpVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetBE , &dst); }
2801   //! @brief Set Byte on Condition.
setbeX86Compiler2802   inline void setbe (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetBE , &dst); }
2803   //! @brief Set Byte on Condition.
setcX86Compiler2804   inline void setc  (const GpVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetC  , &dst); }
2805   //! @brief Set Byte on Condition.
setcX86Compiler2806   inline void setc  (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetC  , &dst); }
2807   //! @brief Set Byte on Condition.
seteX86Compiler2808   inline void sete  (const GpVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetE  , &dst); }
2809   //! @brief Set Byte on Condition.
seteX86Compiler2810   inline void sete  (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetE  , &dst); }
2811   //! @brief Set Byte on Condition.
setgX86Compiler2812   inline void setg  (const GpVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetG  , &dst); }
2813   //! @brief Set Byte on Condition.
setgX86Compiler2814   inline void setg  (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetG  , &dst); }
2815   //! @brief Set Byte on Condition.
setgeX86Compiler2816   inline void setge (const GpVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetGE , &dst); }
2817   //! @brief Set Byte on Condition.
setgeX86Compiler2818   inline void setge (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetGE , &dst); }
2819   //! @brief Set Byte on Condition.
setlX86Compiler2820   inline void setl  (const GpVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetL  , &dst); }
2821   //! @brief Set Byte on Condition.
setlX86Compiler2822   inline void setl  (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetL  , &dst); }
2823   //! @brief Set Byte on Condition.
setleX86Compiler2824   inline void setle (const GpVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetLE , &dst); }
2825   //! @brief Set Byte on Condition.
setleX86Compiler2826   inline void setle (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetLE , &dst); }
2827   //! @brief Set Byte on Condition.
setnaX86Compiler2828   inline void setna (const GpVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetNA , &dst); }
2829   //! @brief Set Byte on Condition.
setnaX86Compiler2830   inline void setna (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetNA , &dst); }
2831   //! @brief Set Byte on Condition.
setnaeX86Compiler2832   inline void setnae(const GpVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetNAE, &dst); }
2833   //! @brief Set Byte on Condition.
setnaeX86Compiler2834   inline void setnae(const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetNAE, &dst); }
2835   //! @brief Set Byte on Condition.
setnbX86Compiler2836   inline void setnb (const GpVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetNB , &dst); }
2837   //! @brief Set Byte on Condition.
setnbX86Compiler2838   inline void setnb (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetNB , &dst); }
2839   //! @brief Set Byte on Condition.
setnbeX86Compiler2840   inline void setnbe(const GpVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetNBE, &dst); }
2841   //! @brief Set Byte on Condition.
setnbeX86Compiler2842   inline void setnbe(const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetNBE, &dst); }
2843   //! @brief Set Byte on Condition.
setncX86Compiler2844   inline void setnc (const GpVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetNC , &dst); }
2845   //! @brief Set Byte on Condition.
setncX86Compiler2846   inline void setnc (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetNC , &dst); }
2847   //! @brief Set Byte on Condition.
setneX86Compiler2848   inline void setne (const GpVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetNE , &dst); }
2849   //! @brief Set Byte on Condition.
setneX86Compiler2850   inline void setne (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetNE , &dst); }
2851   //! @brief Set Byte on Condition.
setngX86Compiler2852   inline void setng (const GpVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetNG , &dst); }
2853   //! @brief Set Byte on Condition.
setngX86Compiler2854   inline void setng (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetNG , &dst); }
2855   //! @brief Set Byte on Condition.
setngeX86Compiler2856   inline void setnge(const GpVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetNGE, &dst); }
2857   //! @brief Set Byte on Condition.
setngeX86Compiler2858   inline void setnge(const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetNGE, &dst); }
2859   //! @brief Set Byte on Condition.
setnlX86Compiler2860   inline void setnl (const GpVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetNL , &dst); }
2861   //! @brief Set Byte on Condition.
setnlX86Compiler2862   inline void setnl (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetNL , &dst); }
2863   //! @brief Set Byte on Condition.
setnleX86Compiler2864   inline void setnle(const GpVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetNLE, &dst); }
2865   //! @brief Set Byte on Condition.
setnleX86Compiler2866   inline void setnle(const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetNLE, &dst); }
2867   //! @brief Set Byte on Condition.
setnoX86Compiler2868   inline void setno (const GpVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetNO , &dst); }
2869   //! @brief Set Byte on Condition.
setnoX86Compiler2870   inline void setno (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetNO , &dst); }
2871   //! @brief Set Byte on Condition.
setnpX86Compiler2872   inline void setnp (const GpVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetNP , &dst); }
2873   //! @brief Set Byte on Condition.
setnpX86Compiler2874   inline void setnp (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetNP , &dst); }
2875   //! @brief Set Byte on Condition.
setnsX86Compiler2876   inline void setns (const GpVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetNS , &dst); }
2877   //! @brief Set Byte on Condition.
setnsX86Compiler2878   inline void setns (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetNS , &dst); }
2879   //! @brief Set Byte on Condition.
setnzX86Compiler2880   inline void setnz (const GpVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetNZ , &dst); }
2881   //! @brief Set Byte on Condition.
setnzX86Compiler2882   inline void setnz (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetNZ , &dst); }
2883   //! @brief Set Byte on Condition.
setoX86Compiler2884   inline void seto  (const GpVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetO  , &dst); }
2885   //! @brief Set Byte on Condition.
setoX86Compiler2886   inline void seto  (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetO  , &dst); }
2887   //! @brief Set Byte on Condition.
setpX86Compiler2888   inline void setp  (const GpVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetP  , &dst); }
2889   //! @brief Set Byte on Condition.
setpX86Compiler2890   inline void setp  (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetP  , &dst); }
2891   //! @brief Set Byte on Condition.
setpeX86Compiler2892   inline void setpe (const GpVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetPE , &dst); }
2893   //! @brief Set Byte on Condition.
setpeX86Compiler2894   inline void setpe (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetPE , &dst); }
2895   //! @brief Set Byte on Condition.
setpoX86Compiler2896   inline void setpo (const GpVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetPO , &dst); }
2897   //! @brief Set Byte on Condition.
setpoX86Compiler2898   inline void setpo (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetPO , &dst); }
2899   //! @brief Set Byte on Condition.
setsX86Compiler2900   inline void sets  (const GpVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetS  , &dst); }
2901   //! @brief Set Byte on Condition.
setsX86Compiler2902   inline void sets  (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetS  , &dst); }
2903   //! @brief Set Byte on Condition.
setzX86Compiler2904   inline void setz  (const GpVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(kX86InstSetZ  , &dst); }
2905   //! @brief Set Byte on Condition.
setzX86Compiler2906   inline void setz  (const Mem& dst)   { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(kX86InstSetZ  , &dst); }
2907 
2908   //! @brief Shift Bits Left.
2909   //! @note @a src register can be only @c cl.
shlX86Compiler2910   inline void shl(const GpVar& dst, const GpVar& src)
2911   { _emitInstruction(kX86InstShl, &dst, &src); }
2912 
2913   //! @brief Shift Bits Left.
shlX86Compiler2914   inline void shl(const GpVar& dst, const Imm& src)
2915   { _emitInstruction(kX86InstShl, &dst, &src); }
2916 
2917   //! @brief Shift Bits Left.
2918   //! @note @a src register can be only @c cl.
shlX86Compiler2919   inline void shl(const Mem& dst, const GpVar& src)
2920   { _emitInstruction(kX86InstShl, &dst, &src); }
2921 
2922   //! @brief Shift Bits Left.
shlX86Compiler2923   inline void shl(const Mem& dst, const Imm& src)
2924   { _emitInstruction(kX86InstShl, &dst, &src); }
2925 
2926   //! @brief Shift Bits Right.
2927   //! @note @a src register can be only @c cl.
shrX86Compiler2928   inline void shr(const GpVar& dst, const GpVar& src)
2929   { _emitInstruction(kX86InstShr, &dst, &src); }
2930 
2931   //! @brief Shift Bits Right.
shrX86Compiler2932   inline void shr(const GpVar& dst, const Imm& src)
2933   { _emitInstruction(kX86InstShr, &dst, &src); }
2934 
2935   //! @brief Shift Bits Right.
2936   //! @note @a src register can be only @c cl.
shrX86Compiler2937   inline void shr(const Mem& dst, const GpVar& src)
2938   { _emitInstruction(kX86InstShr, &dst, &src); }
2939 
2940   //! @brief Shift Bits Right.
shrX86Compiler2941   inline void shr(const Mem& dst, const Imm& src)
2942   { _emitInstruction(kX86InstShr, &dst, &src); }
2943 
2944   //! @brief Double Precision Shift Left.
2945   //! @note src2 register can be only @c cl register.
shldX86Compiler2946   inline void shld(const GpVar& dst, const GpVar& src1, const GpVar& src2)
2947   { _emitInstruction(kX86InstShld, &dst, &src1, &src2); }
2948 
2949   //! @brief Double Precision Shift Left.
shldX86Compiler2950   inline void shld(const GpVar& dst, const GpVar& src1, const Imm& src2)
2951   { _emitInstruction(kX86InstShld, &dst, &src1, &src2); }
2952 
2953   //! @brief Double Precision Shift Left.
2954   //! @note src2 register can be only @c cl register.
shldX86Compiler2955   inline void shld(const Mem& dst, const GpVar& src1, const GpVar& src2)
2956   { _emitInstruction(kX86InstShld, &dst, &src1, &src2); }
2957 
2958   //! @brief Double Precision Shift Left.
shldX86Compiler2959   inline void shld(const Mem& dst, const GpVar& src1, const Imm& src2)
2960   { _emitInstruction(kX86InstShld, &dst, &src1, &src2); }
2961 
2962   //! @brief Double Precision Shift Right.
2963   //! @note src2 register can be only @c cl register.
shrdX86Compiler2964   inline void shrd(const GpVar& dst, const GpVar& src1, const GpVar& src2)
2965   { _emitInstruction(kX86InstShrd, &dst, &src1, &src2); }
2966 
2967   //! @brief Double Precision Shift Right.
shrdX86Compiler2968   inline void shrd(const GpVar& dst, const GpVar& src1, const Imm& src2)
2969   { _emitInstruction(kX86InstShrd, &dst, &src1, &src2); }
2970 
2971   //! @brief Double Precision Shift Right.
2972   //! @note src2 register can be only @c cl register.
shrdX86Compiler2973   inline void shrd(const Mem& dst, const GpVar& src1, const GpVar& src2)
2974   { _emitInstruction(kX86InstShrd, &dst, &src1, &src2); }
2975 
2976   //! @brief Double Precision Shift Right.
shrdX86Compiler2977   inline void shrd(const Mem& dst, const GpVar& src1, const Imm& src2)
2978   { _emitInstruction(kX86InstShrd, &dst, &src1, &src2); }
2979 
2980   //! @brief Set Carry Flag to 1.
stcX86Compiler2981   inline void stc()
2982   { _emitInstruction(kX86InstStc); }
2983 
2984   //! @brief Set Direction Flag to 1.
stdX86Compiler2985   inline void std()
2986   { _emitInstruction(kX86InstStd); }
2987 
2988   //! @brief Subtract.
subX86Compiler2989   inline void sub(const GpVar& dst, const GpVar& src)
2990   { _emitInstruction(kX86InstSub, &dst, &src); }
2991 
2992   //! @brief Subtract.
subX86Compiler2993   inline void sub(const GpVar& dst, const Mem& src)
2994   { _emitInstruction(kX86InstSub, &dst, &src); }
2995 
2996   //! @brief Subtract.
subX86Compiler2997   inline void sub(const GpVar& dst, const Imm& src)
2998   { _emitInstruction(kX86InstSub, &dst, &src); }
2999 
3000   //! @brief Subtract.
subX86Compiler3001   inline void sub(const Mem& dst, const GpVar& src)
3002   { _emitInstruction(kX86InstSub, &dst, &src); }
3003 
3004   //! @brief Subtract.
subX86Compiler3005   inline void sub(const Mem& dst, const Imm& src)
3006   { _emitInstruction(kX86InstSub, &dst, &src); }
3007 
3008   //! @brief Logical Compare.
testX86Compiler3009   inline void test(const GpVar& op1, const GpVar& op2)
3010   { _emitInstruction(kX86InstTest, &op1, &op2); }
3011 
3012   //! @brief Logical Compare.
testX86Compiler3013   inline void test(const GpVar& op1, const Imm& op2)
3014   { _emitInstruction(kX86InstTest, &op1, &op2); }
3015 
3016   //! @brief Logical Compare.
testX86Compiler3017   inline void test(const Mem& op1, const GpVar& op2)
3018   { _emitInstruction(kX86InstTest, &op1, &op2); }
3019 
3020   //! @brief Logical Compare.
testX86Compiler3021   inline void test(const Mem& op1, const Imm& op2)
3022   { _emitInstruction(kX86InstTest, &op1, &op2); }
3023 
3024   //! @brief Undefined instruction - Raise invalid opcode exception.
ud2X86Compiler3025   inline void ud2()
3026   { _emitInstruction(kX86InstUd2); }
3027 
3028   //! @brief Exchange and Add.
xaddX86Compiler3029   inline void xadd(const GpVar& dst, const GpVar& src)
3030   { _emitInstruction(kX86InstXadd, &dst, &src); }
3031 
3032   //! @brief Exchange and Add.
xaddX86Compiler3033   inline void xadd(const Mem& dst, const GpVar& src)
3034   { _emitInstruction(kX86InstXadd, &dst, &src); }
3035 
3036   //! @brief Exchange Register/Memory with Register.
xchgX86Compiler3037   inline void xchg(const GpVar& dst, const GpVar& src)
3038   { _emitInstruction(kX86InstXchg, &dst, &src); }
3039 
3040   //! @brief Exchange Register/Memory with Register.
xchgX86Compiler3041   inline void xchg(const Mem& dst, const GpVar& src)
3042   { _emitInstruction(kX86InstXchg, &dst, &src); }
3043 
3044   //! @brief Exchange Register/Memory with Register.
xchgX86Compiler3045   inline void xchg(const GpVar& dst, const Mem& src)
3046   { _emitInstruction(kX86InstXchg, &src, &dst); }
3047 
3048   //! @brief Exchange Register/Memory with Register.
xor_X86Compiler3049   inline void xor_(const GpVar& dst, const GpVar& src)
3050   { _emitInstruction(kX86InstXor, &dst, &src); }
3051 
3052   //! @brief Exchange Register/Memory with Register.
xor_X86Compiler3053   inline void xor_(const GpVar& dst, const Mem& src)
3054   { _emitInstruction(kX86InstXor, &dst, &src); }
3055 
3056   //! @brief Exchange Register/Memory with Register.
xor_X86Compiler3057   inline void xor_(const GpVar& dst, const Imm& src)
3058   { _emitInstruction(kX86InstXor, &dst, &src); }
3059 
3060   //! @brief Exchange Register/Memory with Register.
xor_X86Compiler3061   inline void xor_(const Mem& dst, const GpVar& src)
3062   { _emitInstruction(kX86InstXor, &dst, &src); }
3063 
3064   //! @brief Exchange Register/Memory with Register.
xor_X86Compiler3065   inline void xor_(const Mem& dst, const Imm& src)
3066   { _emitInstruction(kX86InstXor, &dst, &src); }
3067 
3068   // --------------------------------------------------------------------------
3069   // [MMX]
3070   // --------------------------------------------------------------------------
3071 
3072   //! @brief Empty MMX state.
emmsX86Compiler3073   inline void emms()
3074   { _emitInstruction(kX86InstEmms); }
3075 
3076   //! @brief Move DWord (MMX).
movdX86Compiler3077   inline void movd(const Mem& dst, const MmVar& src)
3078   { _emitInstruction(kX86InstMovD, &dst, &src); }
3079 
3080   //! @brief Move DWord (MMX).
movdX86Compiler3081   inline void movd(const GpVar& dst, const MmVar& src)
3082   { _emitInstruction(kX86InstMovD, &dst, &src); }
3083 
3084   //! @brief Move DWord (MMX).
movdX86Compiler3085   inline void movd(const MmVar& dst, const Mem& src)
3086   { _emitInstruction(kX86InstMovD, &dst, &src); }
3087 
3088   //! @brief Move DWord (MMX).
movdX86Compiler3089   inline void movd(const MmVar& dst, const GpVar& src)
3090   { _emitInstruction(kX86InstMovD, &dst, &src); }
3091 
3092   //! @brief Move QWord (MMX).
movqX86Compiler3093   inline void movq(const MmVar& dst, const MmVar& src)
3094   { _emitInstruction(kX86InstMovQ, &dst, &src); }
3095 
3096   //! @brief Move QWord (MMX).
movqX86Compiler3097   inline void movq(const Mem& dst, const MmVar& src)
3098   { _emitInstruction(kX86InstMovQ, &dst, &src); }
3099 
3100 #if defined(ASMJIT_X64)
3101   //! @brief Move QWord (MMX).
movqX86Compiler3102   inline void movq(const GpVar& dst, const MmVar& src)
3103   { _emitInstruction(kX86InstMovQ, &dst, &src); }
3104 #endif
3105 
3106   //! @brief Move QWord (MMX).
movqX86Compiler3107   inline void movq(const MmVar& dst, const Mem& src)
3108   { _emitInstruction(kX86InstMovQ, &dst, &src); }
3109 
3110 #if defined(ASMJIT_X64)
3111   //! @brief Move QWord (MMX).
movqX86Compiler3112   inline void movq(const MmVar& dst, const GpVar& src)
3113   { _emitInstruction(kX86InstMovQ, &dst, &src); }
3114 #endif
3115 
3116   //! @brief Pack with Signed Saturation (MMX).
packsswbX86Compiler3117   inline void packsswb(const MmVar& dst, const MmVar& src)
3118   { _emitInstruction(kX86InstPackSSWB, &dst, &src); }
3119 
3120   //! @brief Pack with Signed Saturation (MMX).
packsswbX86Compiler3121   inline void packsswb(const MmVar& dst, const Mem& src)
3122   { _emitInstruction(kX86InstPackSSWB, &dst, &src); }
3123 
3124   //! @brief Pack with Signed Saturation (MMX).
packssdwX86Compiler3125   inline void packssdw(const MmVar& dst, const MmVar& src)
3126   { _emitInstruction(kX86InstPackSSDW, &dst, &src); }
3127 
3128   //! @brief Pack with Signed Saturation (MMX).
packssdwX86Compiler3129   inline void packssdw(const MmVar& dst, const Mem& src)
3130   { _emitInstruction(kX86InstPackSSDW, &dst, &src); }
3131 
3132   //! @brief Pack with Unsigned Saturation (MMX).
packuswbX86Compiler3133   inline void packuswb(const MmVar& dst, const MmVar& src)
3134   { _emitInstruction(kX86InstPackUSWB, &dst, &src); }
3135 
3136   //! @brief Pack with Unsigned Saturation (MMX).
packuswbX86Compiler3137   inline void packuswb(const MmVar& dst, const Mem& src)
3138   { _emitInstruction(kX86InstPackUSWB, &dst, &src); }
3139 
3140   //! @brief Packed BYTE Add (MMX).
paddbX86Compiler3141   inline void paddb(const MmVar& dst, const MmVar& src)
3142   { _emitInstruction(kX86InstPAddB, &dst, &src); }
3143 
3144   //! @brief Packed BYTE Add (MMX).
paddbX86Compiler3145   inline void paddb(const MmVar& dst, const Mem& src)
3146   { _emitInstruction(kX86InstPAddB, &dst, &src); }
3147 
3148   //! @brief Packed WORD Add (MMX).
paddwX86Compiler3149   inline void paddw(const MmVar& dst, const MmVar& src)
3150   { _emitInstruction(kX86InstPAddW, &dst, &src); }
3151 
3152   //! @brief Packed WORD Add (MMX).
paddwX86Compiler3153   inline void paddw(const MmVar& dst, const Mem& src)
3154   { _emitInstruction(kX86InstPAddW, &dst, &src); }
3155 
3156   //! @brief Packed DWORD Add (MMX).
padddX86Compiler3157   inline void paddd(const MmVar& dst, const MmVar& src)
3158   { _emitInstruction(kX86InstPAddD, &dst, &src); }
3159 
3160   //! @brief Packed DWORD Add (MMX).
padddX86Compiler3161   inline void paddd(const MmVar& dst, const Mem& src)
3162   { _emitInstruction(kX86InstPAddD, &dst, &src); }
3163 
3164   //! @brief Packed Add with Saturation (MMX).
paddsbX86Compiler3165   inline void paddsb(const MmVar& dst, const MmVar& src)
3166   { _emitInstruction(kX86InstPAddSB, &dst, &src); }
3167 
3168   //! @brief Packed Add with Saturation (MMX).
paddsbX86Compiler3169   inline void paddsb(const MmVar& dst, const Mem& src)
3170   { _emitInstruction(kX86InstPAddSB, &dst, &src); }
3171 
3172   //! @brief Packed Add with Saturation (MMX).
paddswX86Compiler3173   inline void paddsw(const MmVar& dst, const MmVar& src)
3174   { _emitInstruction(kX86InstPAddSW, &dst, &src); }
3175 
3176   //! @brief Packed Add with Saturation (MMX).
paddswX86Compiler3177   inline void paddsw(const MmVar& dst, const Mem& src)
3178   { _emitInstruction(kX86InstPAddSW, &dst, &src); }
3179 
3180   //! @brief Packed Add Unsigned with Saturation (MMX).
paddusbX86Compiler3181   inline void paddusb(const MmVar& dst, const MmVar& src)
3182   { _emitInstruction(kX86InstPAddUSB, &dst, &src); }
3183 
3184   //! @brief Packed Add Unsigned with Saturation (MMX).
paddusbX86Compiler3185   inline void paddusb(const MmVar& dst, const Mem& src)
3186   { _emitInstruction(kX86InstPAddUSB, &dst, &src); }
3187 
3188   //! @brief Packed Add Unsigned with Saturation (MMX).
padduswX86Compiler3189   inline void paddusw(const MmVar& dst, const MmVar& src)
3190   { _emitInstruction(kX86InstPAddUSW, &dst, &src); }
3191 
3192   //! @brief Packed Add Unsigned with Saturation (MMX).
padduswX86Compiler3193   inline void paddusw(const MmVar& dst, const Mem& src)
3194   { _emitInstruction(kX86InstPAddUSW, &dst, &src); }
3195 
3196   //! @brief Logical AND (MMX).
pandX86Compiler3197   inline void pand(const MmVar& dst, const MmVar& src)
3198   { _emitInstruction(kX86InstPAnd, &dst, &src); }
3199 
3200   //! @brief Logical AND (MMX).
pandX86Compiler3201   inline void pand(const MmVar& dst, const Mem& src)
3202   { _emitInstruction(kX86InstPAnd, &dst, &src); }
3203 
3204   //! @brief Logical AND Not (MMX).
pandnX86Compiler3205   inline void pandn(const MmVar& dst, const MmVar& src)
3206   { _emitInstruction(kX86InstPAndN, &dst, &src); }
3207 
3208   //! @brief Logical AND Not (MMX).
pandnX86Compiler3209   inline void pandn(const MmVar& dst, const Mem& src)
3210   { _emitInstruction(kX86InstPAndN, &dst, &src); }
3211 
3212   //! @brief Packed Compare for Equal (BYTES) (MMX).
pcmpeqbX86Compiler3213   inline void pcmpeqb(const MmVar& dst, const MmVar& src)
3214   { _emitInstruction(kX86InstPCmpEqB, &dst, &src); }
3215 
3216   //! @brief Packed Compare for Equal (BYTES) (MMX).
pcmpeqbX86Compiler3217   inline void pcmpeqb(const MmVar& dst, const Mem& src)
3218   { _emitInstruction(kX86InstPCmpEqB, &dst, &src); }
3219 
3220   //! @brief Packed Compare for Equal (WORDS) (MMX).
pcmpeqwX86Compiler3221   inline void pcmpeqw(const MmVar& dst, const MmVar& src)
3222   { _emitInstruction(kX86InstPCmpEqW, &dst, &src); }
3223 
3224   //! @brief Packed Compare for Equal (WORDS) (MMX).
pcmpeqwX86Compiler3225   inline void pcmpeqw(const MmVar& dst, const Mem& src)
3226   { _emitInstruction(kX86InstPCmpEqW, &dst, &src); }
3227 
3228   //! @brief Packed Compare for Equal (DWORDS) (MMX).
pcmpeqdX86Compiler3229   inline void pcmpeqd(const MmVar& dst, const MmVar& src)
3230   { _emitInstruction(kX86InstPCmpEqD, &dst, &src); }
3231 
3232   //! @brief Packed Compare for Equal (DWORDS) (MMX).
pcmpeqdX86Compiler3233   inline void pcmpeqd(const MmVar& dst, const Mem& src)
3234   { _emitInstruction(kX86InstPCmpEqD, &dst, &src); }
3235 
3236   //! @brief Packed Compare for Greater Than (BYTES) (MMX).
pcmpgtbX86Compiler3237   inline void pcmpgtb(const MmVar& dst, const MmVar& src)
3238   { _emitInstruction(kX86InstPCmpGtB, &dst, &src); }
3239 
3240   //! @brief Packed Compare for Greater Than (BYTES) (MMX).
pcmpgtbX86Compiler3241   inline void pcmpgtb(const MmVar& dst, const Mem& src)
3242   { _emitInstruction(kX86InstPCmpGtB, &dst, &src); }
3243 
3244   //! @brief Packed Compare for Greater Than (WORDS) (MMX).
pcmpgtwX86Compiler3245   inline void pcmpgtw(const MmVar& dst, const MmVar& src)
3246   { _emitInstruction(kX86InstPCmpGtW, &dst, &src); }
3247 
3248   //! @brief Packed Compare for Greater Than (WORDS) (MMX).
pcmpgtwX86Compiler3249   inline void pcmpgtw(const MmVar& dst, const Mem& src)
3250   { _emitInstruction(kX86InstPCmpGtW, &dst, &src); }
3251 
3252   //! @brief Packed Compare for Greater Than (DWORDS) (MMX).
pcmpgtdX86Compiler3253   inline void pcmpgtd(const MmVar& dst, const MmVar& src)
3254   { _emitInstruction(kX86InstPCmpGtD, &dst, &src); }
3255 
3256   //! @brief Packed Compare for Greater Than (DWORDS) (MMX).
pcmpgtdX86Compiler3257   inline void pcmpgtd(const MmVar& dst, const Mem& src)
3258   { _emitInstruction(kX86InstPCmpGtD, &dst, &src); }
3259 
3260   //! @brief Packed Multiply High (MMX).
pmulhwX86Compiler3261   inline void pmulhw(const MmVar& dst, const MmVar& src)
3262   { _emitInstruction(kX86InstPMulHW, &dst, &src); }
3263 
3264   //! @brief Packed Multiply High (MMX).
pmulhwX86Compiler3265   inline void pmulhw(const MmVar& dst, const Mem& src)
3266   { _emitInstruction(kX86InstPMulHW, &dst, &src); }
3267 
3268   //! @brief Packed Multiply Low (MMX).
pmullwX86Compiler3269   inline void pmullw(const MmVar& dst, const MmVar& src)
3270   { _emitInstruction(kX86InstPMulLW, &dst, &src); }
3271 
3272   //! @brief Packed Multiply Low (MMX).
pmullwX86Compiler3273   inline void pmullw(const MmVar& dst, const Mem& src)
3274   { _emitInstruction(kX86InstPMulLW, &dst, &src); }
3275 
3276   //! @brief Bitwise Logical OR (MMX).
porX86Compiler3277   inline void por(const MmVar& dst, const MmVar& src)
3278   { _emitInstruction(kX86InstPOr, &dst, &src); }
3279 
3280   //! @brief Bitwise Logical OR (MMX).
porX86Compiler3281   inline void por(const MmVar& dst, const Mem& src)
3282   { _emitInstruction(kX86InstPOr, &dst, &src); }
3283 
3284   //! @brief Packed Multiply and Add (MMX).
pmaddwdX86Compiler3285   inline void pmaddwd(const MmVar& dst, const MmVar& src)
3286   { _emitInstruction(kX86InstPMAddWD, &dst, &src); }
3287 
3288   //! @brief Packed Multiply and Add (MMX).
pmaddwdX86Compiler3289   inline void pmaddwd(const MmVar& dst, const Mem& src)
3290   { _emitInstruction(kX86InstPMAddWD, &dst, &src); }
3291 
3292   //! @brief Packed Shift Left Logical (MMX).
pslldX86Compiler3293   inline void pslld(const MmVar& dst, const MmVar& src)
3294   { _emitInstruction(kX86InstPSllD, &dst, &src); }
3295 
3296   //! @brief Packed Shift Left Logical (MMX).
pslldX86Compiler3297   inline void pslld(const MmVar& dst, const Mem& src)
3298   { _emitInstruction(kX86InstPSllD, &dst, &src); }
3299 
3300   //! @brief Packed Shift Left Logical (MMX).
pslldX86Compiler3301   inline void pslld(const MmVar& dst, const Imm& src)
3302   { _emitInstruction(kX86InstPSllD, &dst, &src); }
3303 
3304   //! @brief Packed Shift Left Logical (MMX).
psllqX86Compiler3305   inline void psllq(const MmVar& dst, const MmVar& src)
3306   { _emitInstruction(kX86InstPSllQ, &dst, &src); }
3307 
3308   //! @brief Packed Shift Left Logical (MMX).
psllqX86Compiler3309   inline void psllq(const MmVar& dst, const Mem& src)
3310   { _emitInstruction(kX86InstPSllQ, &dst, &src); }
3311 
3312   //! @brief Packed Shift Left Logical (MMX).
psllqX86Compiler3313   inline void psllq(const MmVar& dst, const Imm& src)
3314   { _emitInstruction(kX86InstPSllQ, &dst, &src); }
3315 
3316   //! @brief Packed Shift Left Logical (MMX).
psllwX86Compiler3317   inline void psllw(const MmVar& dst, const MmVar& src)
3318   { _emitInstruction(kX86InstPSllW, &dst, &src); }
3319 
3320   //! @brief Packed Shift Left Logical (MMX).
psllwX86Compiler3321   inline void psllw(const MmVar& dst, const Mem& src)
3322   { _emitInstruction(kX86InstPSllW, &dst, &src); }
3323 
3324   //! @brief Packed Shift Left Logical (MMX).
psllwX86Compiler3325   inline void psllw(const MmVar& dst, const Imm& src)
3326   { _emitInstruction(kX86InstPSllW, &dst, &src); }
3327 
3328   //! @brief Packed Shift Right Arithmetic (MMX).
psradX86Compiler3329   inline void psrad(const MmVar& dst, const MmVar& src)
3330   { _emitInstruction(kX86InstPSraD, &dst, &src); }
3331 
3332   //! @brief Packed Shift Right Arithmetic (MMX).
psradX86Compiler3333   inline void psrad(const MmVar& dst, const Mem& src)
3334   { _emitInstruction(kX86InstPSraD, &dst, &src);}
3335 
3336   //! @brief Packed Shift Right Arithmetic (MMX).
psradX86Compiler3337   inline void psrad(const MmVar& dst, const Imm& src)
3338   { _emitInstruction(kX86InstPSraD, &dst, &src); }
3339 
3340   //! @brief Packed Shift Right Arithmetic (MMX).
psrawX86Compiler3341   inline void psraw(const MmVar& dst, const MmVar& src)
3342   { _emitInstruction(kX86InstPSraW, &dst, &src); }
3343 
3344   //! @brief Packed Shift Right Arithmetic (MMX).
psrawX86Compiler3345   inline void psraw(const MmVar& dst, const Mem& src)
3346   { _emitInstruction(kX86InstPSraW, &dst, &src); }
3347 
3348   //! @brief Packed Shift Right Arithmetic (MMX).
psrawX86Compiler3349   inline void psraw(const MmVar& dst, const Imm& src)
3350   { _emitInstruction(kX86InstPSraW, &dst, &src); }
3351 
3352   //! @brief Packed Shift Right Logical (MMX).
psrldX86Compiler3353   inline void psrld(const MmVar& dst, const MmVar& src)
3354   { _emitInstruction(kX86InstPSrlD, &dst, &src); }
3355 
3356   //! @brief Packed Shift Right Logical (MMX).
psrldX86Compiler3357   inline void psrld(const MmVar& dst, const Mem& src)
3358   { _emitInstruction(kX86InstPSrlD, &dst, &src); }
3359 
3360   //! @brief Packed Shift Right Logical (MMX).
psrldX86Compiler3361   inline void psrld(const MmVar& dst, const Imm& src)
3362   { _emitInstruction(kX86InstPSrlD, &dst, &src); }
3363 
3364   //! @brief Packed Shift Right Logical (MMX).
psrlqX86Compiler3365   inline void psrlq(const MmVar& dst, const MmVar& src)
3366   { _emitInstruction(kX86InstPSrlQ, &dst, &src); }
3367 
3368   //! @brief Packed Shift Right Logical (MMX).
psrlqX86Compiler3369   inline void psrlq(const MmVar& dst, const Mem& src)
3370   { _emitInstruction(kX86InstPSrlQ, &dst, &src); }
3371 
3372   //! @brief Packed Shift Right Logical (MMX).
psrlqX86Compiler3373   inline void psrlq(const MmVar& dst, const Imm& src)
3374   { _emitInstruction(kX86InstPSrlQ, &dst, &src); }
3375 
3376   //! @brief Packed Shift Right Logical (MMX).
psrlwX86Compiler3377   inline void psrlw(const MmVar& dst, const MmVar& src)
3378   { _emitInstruction(kX86InstPSrlW, &dst, &src); }
3379 
3380   //! @brief Packed Shift Right Logical (MMX).
psrlwX86Compiler3381   inline void psrlw(const MmVar& dst, const Mem& src)
3382   { _emitInstruction(kX86InstPSrlW, &dst, &src); }
3383 
3384   //! @brief Packed Shift Right Logical (MMX).
psrlwX86Compiler3385   inline void psrlw(const MmVar& dst, const Imm& src)
3386   { _emitInstruction(kX86InstPSrlW, &dst, &src); }
3387 
3388   //! @brief Packed Subtract (MMX).
psubbX86Compiler3389   inline void psubb(const MmVar& dst, const MmVar& src)
3390   { _emitInstruction(kX86InstPSubB, &dst, &src); }
3391 
3392   //! @brief Packed Subtract (MMX).
psubbX86Compiler3393   inline void psubb(const MmVar& dst, const Mem& src)
3394   { _emitInstruction(kX86InstPSubB, &dst, &src); }
3395 
3396   //! @brief Packed Subtract (MMX).
psubwX86Compiler3397   inline void psubw(const MmVar& dst, const MmVar& src)
3398   { _emitInstruction(kX86InstPSubW, &dst, &src); }
3399 
3400   //! @brief Packed Subtract (MMX).
psubwX86Compiler3401   inline void psubw(const MmVar& dst, const Mem& src)
3402   { _emitInstruction(kX86InstPSubW, &dst, &src); }
3403 
3404   //! @brief Packed Subtract (MMX).
psubdX86Compiler3405   inline void psubd(const MmVar& dst, const MmVar& src)
3406   { _emitInstruction(kX86InstPSubD, &dst, &src); }
3407 
3408   //! @brief Packed Subtract (MMX).
psubdX86Compiler3409   inline void psubd(const MmVar& dst, const Mem& src)
3410   { _emitInstruction(kX86InstPSubD, &dst, &src); }
3411 
3412   //! @brief Packed Subtract with Saturation (MMX).
psubsbX86Compiler3413   inline void psubsb(const MmVar& dst, const MmVar& src)
3414   { _emitInstruction(kX86InstPSubSB, &dst, &src); }
3415 
3416   //! @brief Packed Subtract with Saturation (MMX).
psubsbX86Compiler3417   inline void psubsb(const MmVar& dst, const Mem& src)
3418   { _emitInstruction(kX86InstPSubSB, &dst, &src); }
3419 
3420   //! @brief Packed Subtract with Saturation (MMX).
psubswX86Compiler3421   inline void psubsw(const MmVar& dst, const MmVar& src)
3422   { _emitInstruction(kX86InstPSubSW, &dst, &src); }
3423 
3424   //! @brief Packed Subtract with Saturation (MMX).
psubswX86Compiler3425   inline void psubsw(const MmVar& dst, const Mem& src)
3426   { _emitInstruction(kX86InstPSubSW, &dst, &src); }
3427 
3428   //! @brief Packed Subtract with Unsigned Saturation (MMX).
psubusbX86Compiler3429   inline void psubusb(const MmVar& dst, const MmVar& src)
3430   { _emitInstruction(kX86InstPSubUSB, &dst, &src); }
3431 
3432   //! @brief Packed Subtract with Unsigned Saturation (MMX).
psubusbX86Compiler3433   inline void psubusb(const MmVar& dst, const Mem& src)
3434   { _emitInstruction(kX86InstPSubUSB, &dst, &src); }
3435 
3436   //! @brief Packed Subtract with Unsigned Saturation (MMX).
psubuswX86Compiler3437   inline void psubusw(const MmVar& dst, const MmVar& src)
3438   { _emitInstruction(kX86InstPSubUSW, &dst, &src); }
3439 
3440   //! @brief Packed Subtract with Unsigned Saturation (MMX).
psubuswX86Compiler3441   inline void psubusw(const MmVar& dst, const Mem& src)
3442   { _emitInstruction(kX86InstPSubUSW, &dst, &src); }
3443 
3444   //! @brief Unpack High Packed Data (MMX).
punpckhbwX86Compiler3445   inline void punpckhbw(const MmVar& dst, const MmVar& src)
3446   { _emitInstruction(kX86InstPunpckHBW, &dst, &src); }
3447 
3448   //! @brief Unpack High Packed Data (MMX).
punpckhbwX86Compiler3449   inline void punpckhbw(const MmVar& dst, const Mem& src)
3450   { _emitInstruction(kX86InstPunpckHBW, &dst, &src); }
3451 
3452   //! @brief Unpack High Packed Data (MMX).
punpckhwdX86Compiler3453   inline void punpckhwd(const MmVar& dst, const MmVar& src)
3454   { _emitInstruction(kX86InstPunpckHWD, &dst, &src); }
3455 
3456   //! @brief Unpack High Packed Data (MMX).
punpckhwdX86Compiler3457   inline void punpckhwd(const MmVar& dst, const Mem& src)
3458   { _emitInstruction(kX86InstPunpckHWD, &dst, &src); }
3459 
3460   //! @brief Unpack High Packed Data (MMX).
punpckhdqX86Compiler3461   inline void punpckhdq(const MmVar& dst, const MmVar& src)
3462   { _emitInstruction(kX86InstPunpckHDQ, &dst, &src); }
3463 
3464   //! @brief Unpack High Packed Data (MMX).
punpckhdqX86Compiler3465   inline void punpckhdq(const MmVar& dst, const Mem& src)
3466   { _emitInstruction(kX86InstPunpckHDQ, &dst, &src); }
3467 
3468   //! @brief Unpack High Packed Data (MMX).
punpcklbwX86Compiler3469   inline void punpcklbw(const MmVar& dst, const MmVar& src)
3470   { _emitInstruction(kX86InstPunpckLBW, &dst, &src); }
3471 
3472   //! @brief Unpack High Packed Data (MMX).
punpcklbwX86Compiler3473   inline void punpcklbw(const MmVar& dst, const Mem& src)
3474   { _emitInstruction(kX86InstPunpckLBW, &dst, &src); }
3475 
3476   //! @brief Unpack High Packed Data (MMX).
punpcklwdX86Compiler3477   inline void punpcklwd(const MmVar& dst, const MmVar& src)
3478   { _emitInstruction(kX86InstPunpckLWD, &dst, &src); }
3479 
3480   //! @brief Unpack High Packed Data (MMX).
punpcklwdX86Compiler3481   inline void punpcklwd(const MmVar& dst, const Mem& src)
3482   { _emitInstruction(kX86InstPunpckLWD, &dst, &src); }
3483 
3484   //! @brief Unpack High Packed Data (MMX).
punpckldqX86Compiler3485   inline void punpckldq(const MmVar& dst, const MmVar& src)
3486   { _emitInstruction(kX86InstPunpckLDQ, &dst, &src); }
3487 
3488   //! @brief Unpack High Packed Data (MMX).
punpckldqX86Compiler3489   inline void punpckldq(const MmVar& dst, const Mem& src)
3490   { _emitInstruction(kX86InstPunpckLDQ, &dst, &src); }
3491 
3492   //! @brief Bitwise Exclusive OR (MMX).
pxorX86Compiler3493   inline void pxor(const MmVar& dst, const MmVar& src)
3494   { _emitInstruction(kX86InstPXor, &dst, &src); }
3495 
3496   //! @brief Bitwise Exclusive OR (MMX).
pxorX86Compiler3497   inline void pxor(const MmVar& dst, const Mem& src)
3498   { _emitInstruction(kX86InstPXor, &dst, &src); }
3499 
3500   // --------------------------------------------------------------------------
3501   // [3dNow]
3502   // --------------------------------------------------------------------------
3503 
3504   //! @brief Faster EMMS (3dNow!).
3505   //!
3506   //! @note Use only for early AMD processors where is only 3dNow! or SSE. If
3507   //! CPU contains SSE2, it's better to use @c emms() ( @c femms() is mapped
3508   //! to @c emms() ).
femmsX86Compiler3509   inline void femms()
3510   { _emitInstruction(kX86InstFEmms); }
3511 
3512   //! @brief Packed SP-FP to Integer Convert (3dNow!).
pf2idX86Compiler3513   inline void pf2id(const MmVar& dst, const MmVar& src)
3514   { _emitInstruction(kX86InstPF2ID, &dst, &src); }
3515 
3516   //! @brief Packed SP-FP to Integer Convert (3dNow!).
pf2idX86Compiler3517   inline void pf2id(const MmVar& dst, const Mem& src)
3518   { _emitInstruction(kX86InstPF2ID, &dst, &src); }
3519 
3520   //! @brief  Packed SP-FP to Integer Word Convert (3dNow!).
pf2iwX86Compiler3521   inline void pf2iw(const MmVar& dst, const MmVar& src)
3522   { _emitInstruction(kX86InstPF2IW, &dst, &src); }
3523 
3524   //! @brief  Packed SP-FP to Integer Word Convert (3dNow!).
pf2iwX86Compiler3525   inline void pf2iw(const MmVar& dst, const Mem& src)
3526   { _emitInstruction(kX86InstPF2IW, &dst, &src); }
3527 
3528   //! @brief Packed SP-FP Accumulate (3dNow!).
pfaccX86Compiler3529   inline void pfacc(const MmVar& dst, const MmVar& src)
3530   { _emitInstruction(kX86InstPFAcc, &dst, &src); }
3531 
3532   //! @brief Packed SP-FP Accumulate (3dNow!).
pfaccX86Compiler3533   inline void pfacc(const MmVar& dst, const Mem& src)
3534   { _emitInstruction(kX86InstPFAcc, &dst, &src); }
3535 
3536   //! @brief Packed SP-FP Addition (3dNow!).
pfaddX86Compiler3537   inline void pfadd(const MmVar& dst, const MmVar& src)
3538   { _emitInstruction(kX86InstPFAdd, &dst, &src); }
3539 
3540   //! @brief Packed SP-FP Addition (3dNow!).
pfaddX86Compiler3541   inline void pfadd(const MmVar& dst, const Mem& src)
3542   { _emitInstruction(kX86InstPFAdd, &dst, &src); }
3543 
3544   //! @brief Packed SP-FP Compare - dst == src (3dNow!).
pfcmpeqX86Compiler3545   inline void pfcmpeq(const MmVar& dst, const MmVar& src)
3546   { _emitInstruction(kX86InstPFCmpEQ, &dst, &src); }
3547 
3548   //! @brief Packed SP-FP Compare - dst == src (3dNow!).
pfcmpeqX86Compiler3549   inline void pfcmpeq(const MmVar& dst, const Mem& src)
3550   { _emitInstruction(kX86InstPFCmpEQ, &dst, &src); }
3551 
3552   //! @brief Packed SP-FP Compare - dst >= src (3dNow!).
pfcmpgeX86Compiler3553   inline void pfcmpge(const MmVar& dst, const MmVar& src)
3554   { _emitInstruction(kX86InstPFCmpGE, &dst, &src); }
3555 
3556   //! @brief Packed SP-FP Compare - dst >= src (3dNow!).
pfcmpgeX86Compiler3557   inline void pfcmpge(const MmVar& dst, const Mem& src)
3558   { _emitInstruction(kX86InstPFCmpGE, &dst, &src); }
3559 
3560   //! @brief Packed SP-FP Compare - dst > src (3dNow!).
pfcmpgtX86Compiler3561   inline void pfcmpgt(const MmVar& dst, const MmVar& src)
3562   { _emitInstruction(kX86InstPFCmpGT, &dst, &src); }
3563 
3564   //! @brief Packed SP-FP Compare - dst > src (3dNow!).
pfcmpgtX86Compiler3565   inline void pfcmpgt(const MmVar& dst, const Mem& src)
3566   { _emitInstruction(kX86InstPFCmpGT, &dst, &src); }
3567 
3568   //! @brief Packed SP-FP Maximum (3dNow!).
pfmaxX86Compiler3569   inline void pfmax(const MmVar& dst, const MmVar& src)
3570   { _emitInstruction(kX86InstPFMax, &dst, &src); }
3571 
3572   //! @brief Packed SP-FP Maximum (3dNow!).
pfmaxX86Compiler3573   inline void pfmax(const MmVar& dst, const Mem& src)
3574   { _emitInstruction(kX86InstPFMax, &dst, &src); }
3575 
3576   //! @brief Packed SP-FP Minimum (3dNow!).
pfminX86Compiler3577   inline void pfmin(const MmVar& dst, const MmVar& src)
3578   { _emitInstruction(kX86InstPFMin, &dst, &src); }
3579 
3580   //! @brief Packed SP-FP Minimum (3dNow!).
pfminX86Compiler3581   inline void pfmin(const MmVar& dst, const Mem& src)
3582   { _emitInstruction(kX86InstPFMin, &dst, &src); }
3583 
3584   //! @brief Packed SP-FP Multiply (3dNow!).
pfmulX86Compiler3585   inline void pfmul(const MmVar& dst, const MmVar& src)
3586   { _emitInstruction(kX86InstPFMul, &dst, &src); }
3587 
3588   //! @brief Packed SP-FP Multiply (3dNow!).
pfmulX86Compiler3589   inline void pfmul(const MmVar& dst, const Mem& src)
3590   { _emitInstruction(kX86InstPFMul, &dst, &src); }
3591 
3592   //! @brief Packed SP-FP Negative Accumulate (3dNow!).
pfnaccX86Compiler3593   inline void pfnacc(const MmVar& dst, const MmVar& src)
3594   { _emitInstruction(kX86InstPFNAcc, &dst, &src); }
3595 
3596   //! @brief Packed SP-FP Negative Accumulate (3dNow!).
pfnaccX86Compiler3597   inline void pfnacc(const MmVar& dst, const Mem& src)
3598   { _emitInstruction(kX86InstPFNAcc, &dst, &src); }
3599 
3600   //! @brief Packed SP-FP Mixed Accumulate (3dNow!).
pfpnaccX86Compiler3601   inline void pfpnacc(const MmVar& dst, const MmVar& src)
3602   { _emitInstruction(kX86InstPFPNAcc, &dst, &src); }
3603 
3604   //! @brief Packed SP-FP Mixed Accumulate (3dNow!).
pfpnaccX86Compiler3605   inline void pfpnacc(const MmVar& dst, const Mem& src)
3606   { _emitInstruction(kX86InstPFPNAcc, &dst, &src); }
3607 
3608   //! @brief Packed SP-FP Reciprocal Approximation (3dNow!).
pfrcpX86Compiler3609   inline void pfrcp(const MmVar& dst, const MmVar& src)
3610   { _emitInstruction(kX86InstPFRcp, &dst, &src); }
3611 
3612   //! @brief Packed SP-FP Reciprocal Approximation (3dNow!).
pfrcpX86Compiler3613   inline void pfrcp(const MmVar& dst, const Mem& src)
3614   { _emitInstruction(kX86InstPFRcp, &dst, &src); }
3615 
3616   //! @brief Packed SP-FP Reciprocal, First Iteration Step (3dNow!).
pfrcpit1X86Compiler3617   inline void pfrcpit1(const MmVar& dst, const MmVar& src)
3618   { _emitInstruction(kX86InstPFRcpIt1, &dst, &src); }
3619 
3620   //! @brief Packed SP-FP Reciprocal, First Iteration Step (3dNow!).
pfrcpit1X86Compiler3621   inline void pfrcpit1(const MmVar& dst, const Mem& src)
3622   { _emitInstruction(kX86InstPFRcpIt1, &dst, &src); }
3623 
3624   //! @brief Packed SP-FP Reciprocal, Second Iteration Step (3dNow!).
pfrcpit2X86Compiler3625   inline void pfrcpit2(const MmVar& dst, const MmVar& src)
3626   { _emitInstruction(kX86InstPFRcpIt2, &dst, &src); }
3627 
3628   //! @brief Packed SP-FP Reciprocal, Second Iteration Step (3dNow!).
pfrcpit2X86Compiler3629   inline void pfrcpit2(const MmVar& dst, const Mem& src)
3630   { _emitInstruction(kX86InstPFRcpIt2, &dst, &src); }
3631 
3632   //! @brief Packed SP-FP Reciprocal Square Root, First Iteration Step (3dNow!).
pfrsqit1X86Compiler3633   inline void pfrsqit1(const MmVar& dst, const MmVar& src)
3634   { _emitInstruction(kX86InstPFRSqIt1, &dst, &src); }
3635 
3636   //! @brief Packed SP-FP Reciprocal Square Root, First Iteration Step (3dNow!).
pfrsqit1X86Compiler3637   inline void pfrsqit1(const MmVar& dst, const Mem& src)
3638   { _emitInstruction(kX86InstPFRSqIt1, &dst, &src); }
3639 
3640   //! @brief Packed SP-FP Reciprocal Square Root Approximation (3dNow!).
pfrsqrtX86Compiler3641   inline void pfrsqrt(const MmVar& dst, const MmVar& src)
3642   { _emitInstruction(kX86InstPFRSqrt, &dst, &src); }
3643 
3644   //! @brief Packed SP-FP Reciprocal Square Root Approximation (3dNow!).
pfrsqrtX86Compiler3645   inline void pfrsqrt(const MmVar& dst, const Mem& src)
3646   { _emitInstruction(kX86InstPFRSqrt, &dst, &src); }
3647 
3648   //! @brief Packed SP-FP Subtract (3dNow!).
pfsubX86Compiler3649   inline void pfsub(const MmVar& dst, const MmVar& src)
3650   { _emitInstruction(kX86InstPFSub, &dst, &src); }
3651 
3652   //! @brief Packed SP-FP Subtract (3dNow!).
pfsubX86Compiler3653   inline void pfsub(const MmVar& dst, const Mem& src)
3654   { _emitInstruction(kX86InstPFSub, &dst, &src); }
3655 
3656   //! @brief Packed SP-FP Reverse Subtract (3dNow!).
pfsubrX86Compiler3657   inline void pfsubr(const MmVar& dst, const MmVar& src)
3658   { _emitInstruction(kX86InstPFSubR, &dst, &src); }
3659 
3660   //! @brief Packed SP-FP Reverse Subtract (3dNow!).
pfsubrX86Compiler3661   inline void pfsubr(const MmVar& dst, const Mem& src)
3662   { _emitInstruction(kX86InstPFSubR, &dst, &src); }
3663 
3664   //! @brief Packed DWords to SP-FP (3dNow!).
pi2fdX86Compiler3665   inline void pi2fd(const MmVar& dst, const MmVar& src)
3666   { _emitInstruction(kX86InstPI2FD, &dst, &src); }
3667 
3668   //! @brief Packed DWords to SP-FP (3dNow!).
pi2fdX86Compiler3669   inline void pi2fd(const MmVar& dst, const Mem& src)
3670   { _emitInstruction(kX86InstPI2FD, &dst, &src); }
3671 
3672   //! @brief Packed Words to SP-FP (3dNow!).
pi2fwX86Compiler3673   inline void pi2fw(const MmVar& dst, const MmVar& src)
3674   { _emitInstruction(kX86InstPI2FW, &dst, &src); }
3675 
3676   //! @brief Packed Words to SP-FP (3dNow!).
pi2fwX86Compiler3677   inline void pi2fw(const MmVar& dst, const Mem& src)
3678   { _emitInstruction(kX86InstPI2FW, &dst, &src); }
3679 
3680   //! @brief Packed swap DWord (3dNow!)
pswapdX86Compiler3681   inline void pswapd(const MmVar& dst, const MmVar& src)
3682   { _emitInstruction(kX86InstPSwapD, &dst, &src); }
3683 
3684   //! @brief Packed swap DWord (3dNow!)
pswapdX86Compiler3685   inline void pswapd(const MmVar& dst, const Mem& src)
3686   { _emitInstruction(kX86InstPSwapD, &dst, &src); }
3687 
3688   // --------------------------------------------------------------------------
3689   // [SSE]
3690   // --------------------------------------------------------------------------
3691 
3692   //! @brief Packed SP-FP Add (SSE).
addpsX86Compiler3693   inline void addps(const XmmVar& dst, const XmmVar& src)
3694   { _emitInstruction(kX86InstAddPS, &dst, &src); }
3695   //! @brief Packed SP-FP Add (SSE).
addpsX86Compiler3696   inline void addps(const XmmVar& dst, const Mem& src)
3697   { _emitInstruction(kX86InstAddPS, &dst, &src); }
3698 
3699   //! @brief Scalar SP-FP Add (SSE).
addssX86Compiler3700   inline void addss(const XmmVar& dst, const XmmVar& src)
3701   { _emitInstruction(kX86InstAddSS, &dst, &src); }
3702   //! @brief Scalar SP-FP Add (SSE).
addssX86Compiler3703   inline void addss(const XmmVar& dst, const Mem& src)
3704   { _emitInstruction(kX86InstAddSS, &dst, &src); }
3705 
3706   //! @brief Bit-wise Logical And Not For SP-FP (SSE).
andnpsX86Compiler3707   inline void andnps(const XmmVar& dst, const XmmVar& src)
3708   { _emitInstruction(kX86InstAndnPS, &dst, &src); }
3709   //! @brief Bit-wise Logical And Not For SP-FP (SSE).
andnpsX86Compiler3710   inline void andnps(const XmmVar& dst, const Mem& src)
3711   { _emitInstruction(kX86InstAndnPS, &dst, &src); }
3712 
3713   //! @brief Bit-wise Logical And For SP-FP (SSE).
andpsX86Compiler3714   inline void andps(const XmmVar& dst, const XmmVar& src)
3715   { _emitInstruction(kX86InstAndPS, &dst, &src); }
3716   //! @brief Bit-wise Logical And For SP-FP (SSE).
andpsX86Compiler3717   inline void andps(const XmmVar& dst, const Mem& src)
3718   { _emitInstruction(kX86InstAndPS, &dst, &src); }
3719 
3720   //! @brief Packed SP-FP Compare (SSE).
cmppsX86Compiler3721   inline void cmpps(const XmmVar& dst, const XmmVar& src, const Imm& imm8)
3722   { _emitInstruction(kX86InstCmpPS, &dst, &src, &imm8); }
3723   //! @brief Packed SP-FP Compare (SSE).
cmppsX86Compiler3724   inline void cmpps(const XmmVar& dst, const Mem& src, const Imm& imm8)
3725   { _emitInstruction(kX86InstCmpPS, &dst, &src, &imm8); }
3726 
3727   //! @brief Compare Scalar SP-FP Values (SSE).
cmpssX86Compiler3728   inline void cmpss(const XmmVar& dst, const XmmVar& src, const Imm& imm8)
3729   { _emitInstruction(kX86InstCmpSS, &dst, &src, &imm8); }
3730   //! @brief Compare Scalar SP-FP Values (SSE).
cmpssX86Compiler3731   inline void cmpss(const XmmVar& dst, const Mem& src, const Imm& imm8)
3732   { _emitInstruction(kX86InstCmpSS, &dst, &src, &imm8); }
3733 
3734   //! @brief Scalar Ordered SP-FP Compare and Set EFLAGS (SSE).
comissX86Compiler3735   inline void comiss(const XmmVar& dst, const XmmVar& src)
3736   { _emitInstruction(kX86InstComISS, &dst, &src); }
3737   //! @brief Scalar Ordered SP-FP Compare and Set EFLAGS (SSE).
comissX86Compiler3738   inline void comiss(const XmmVar& dst, const Mem& src)
3739   { _emitInstruction(kX86InstComISS, &dst, &src); }
3740 
3741   //! @brief Packed Signed INT32 to Packed SP-FP Conversion (SSE).
cvtpi2psX86Compiler3742   inline void cvtpi2ps(const XmmVar& dst, const MmVar& src)
3743   { _emitInstruction(kX86InstCvtPI2PS, &dst, &src); }
3744   //! @brief Packed Signed INT32 to Packed SP-FP Conversion (SSE).
cvtpi2psX86Compiler3745   inline void cvtpi2ps(const XmmVar& dst, const Mem& src)
3746   { _emitInstruction(kX86InstCvtPI2PS, &dst, &src); }
3747 
3748   //! @brief Packed SP-FP to Packed INT32 Conversion (SSE).
cvtps2piX86Compiler3749   inline void cvtps2pi(const MmVar& dst, const XmmVar& src)
3750   { _emitInstruction(kX86InstCvtPS2PI, &dst, &src); }
3751   //! @brief Packed SP-FP to Packed INT32 Conversion (SSE).
cvtps2piX86Compiler3752   inline void cvtps2pi(const MmVar& dst, const Mem& src)
3753   { _emitInstruction(kX86InstCvtPS2PI, &dst, &src); }
3754 
3755   //! @brief Scalar Signed INT32 to SP-FP Conversion (SSE).
cvtsi2ssX86Compiler3756   inline void cvtsi2ss(const XmmVar& dst, const GpVar& src)
3757   { _emitInstruction(kX86InstCvtSI2SS, &dst, &src); }
3758   //! @brief Scalar Signed INT32 to SP-FP Conversion (SSE).
cvtsi2ssX86Compiler3759   inline void cvtsi2ss(const XmmVar& dst, const Mem& src)
3760   { _emitInstruction(kX86InstCvtSI2SS, &dst, &src); }
3761 
3762   //! @brief Scalar SP-FP to Signed INT32 Conversion (SSE).
cvtss2siX86Compiler3763   inline void cvtss2si(const GpVar& dst, const XmmVar& src)
3764   { _emitInstruction(kX86InstCvtSS2SI, &dst, &src); }
3765   //! @brief Scalar SP-FP to Signed INT32 Conversion (SSE).
cvtss2siX86Compiler3766   inline void cvtss2si(const GpVar& dst, const Mem& src)
3767   { _emitInstruction(kX86InstCvtSS2SI, &dst, &src); }
3768 
3769   //! @brief Packed SP-FP to Packed INT32 Conversion (truncate) (SSE).
cvttps2piX86Compiler3770   inline void cvttps2pi(const MmVar& dst, const XmmVar& src)
3771   { _emitInstruction(kX86InstCvttPS2PI, &dst, &src); }
3772   //! @brief Packed SP-FP to Packed INT32 Conversion (truncate) (SSE).
cvttps2piX86Compiler3773   inline void cvttps2pi(const MmVar& dst, const Mem& src)
3774   { _emitInstruction(kX86InstCvttPS2PI, &dst, &src); }
3775 
3776   //! @brief Scalar SP-FP to Signed INT32 Conversion (truncate) (SSE).
cvttss2siX86Compiler3777   inline void cvttss2si(const GpVar& dst, const XmmVar& src)
3778   { _emitInstruction(kX86InstCvttSS2SI, &dst, &src); }
3779   //! @brief Scalar SP-FP to Signed INT32 Conversion (truncate) (SSE).
cvttss2siX86Compiler3780   inline void cvttss2si(const GpVar& dst, const Mem& src)
3781   { _emitInstruction(kX86InstCvttSS2SI, &dst, &src); }
3782 
3783   //! @brief Packed SP-FP Divide (SSE).
divpsX86Compiler3784   inline void divps(const XmmVar& dst, const XmmVar& src)
3785   { _emitInstruction(kX86InstDivPS, &dst, &src); }
3786   //! @brief Packed SP-FP Divide (SSE).
divpsX86Compiler3787   inline void divps(const XmmVar& dst, const Mem& src)
3788   { _emitInstruction(kX86InstDivPS, &dst, &src); }
3789 
3790   //! @brief Scalar SP-FP Divide (SSE).
divssX86Compiler3791   inline void divss(const XmmVar& dst, const XmmVar& src)
3792   { _emitInstruction(kX86InstDivSS, &dst, &src); }
3793   //! @brief Scalar SP-FP Divide (SSE).
divssX86Compiler3794   inline void divss(const XmmVar& dst, const Mem& src)
3795   { _emitInstruction(kX86InstDivSS, &dst, &src); }
3796 
3797   //! @brief Load Streaming SIMD Extension Control/Status (SSE).
ldmxcsrX86Compiler3798   inline void ldmxcsr(const Mem& src)
3799   { _emitInstruction(kX86InstLdMXCSR, &src); }
3800 
3801   //! @brief Byte Mask Write (SSE).
3802   //!
3803   //! @note The default memory location is specified by DS:EDI.
maskmovqX86Compiler3804   inline void maskmovq(const GpVar& dst_ptr, const MmVar& data, const MmVar& mask)
3805   { _emitInstruction(kX86InstMaskMovQ, &dst_ptr, &data, &mask); }
3806 
3807   //! @brief Packed SP-FP Maximum (SSE).
maxpsX86Compiler3808   inline void maxps(const XmmVar& dst, const XmmVar& src)
3809   { _emitInstruction(kX86InstMaxPS, &dst, &src); }
3810   //! @brief Packed SP-FP Maximum (SSE).
maxpsX86Compiler3811   inline void maxps(const XmmVar& dst, const Mem& src)
3812   { _emitInstruction(kX86InstMaxPS, &dst, &src); }
3813 
3814   //! @brief Scalar SP-FP Maximum (SSE).
maxssX86Compiler3815   inline void maxss(const XmmVar& dst, const XmmVar& src)
3816   { _emitInstruction(kX86InstMaxSS, &dst, &src); }
3817   //! @brief Scalar SP-FP Maximum (SSE).
maxssX86Compiler3818   inline void maxss(const XmmVar& dst, const Mem& src)
3819   { _emitInstruction(kX86InstMaxSS, &dst, &src); }
3820 
3821   //! @brief Packed SP-FP Minimum (SSE).
minpsX86Compiler3822   inline void minps(const XmmVar& dst, const XmmVar& src)
3823   { _emitInstruction(kX86InstMinPS, &dst, &src); }
3824   //! @brief Packed SP-FP Minimum (SSE).
minpsX86Compiler3825   inline void minps(const XmmVar& dst, const Mem& src)
3826   { _emitInstruction(kX86InstMinPS, &dst, &src); }
3827 
3828   //! @brief Scalar SP-FP Minimum (SSE).
minssX86Compiler3829   inline void minss(const XmmVar& dst, const XmmVar& src)
3830   { _emitInstruction(kX86InstMinSS, &dst, &src); }
3831   //! @brief Scalar SP-FP Minimum (SSE).
minssX86Compiler3832   inline void minss(const XmmVar& dst, const Mem& src)
3833   { _emitInstruction(kX86InstMinSS, &dst, &src); }
3834 
3835   //! @brief Move Aligned Packed SP-FP Values (SSE).
movapsX86Compiler3836   inline void movaps(const XmmVar& dst, const XmmVar& src)
3837   { _emitInstruction(kX86InstMovAPS, &dst, &src); }
3838   //! @brief Move Aligned Packed SP-FP Values (SSE).
movapsX86Compiler3839   inline void movaps(const XmmVar& dst, const Mem& src)
3840   { _emitInstruction(kX86InstMovAPS, &dst, &src); }
3841 
3842   //! @brief Move Aligned Packed SP-FP Values (SSE).
movapsX86Compiler3843   inline void movaps(const Mem& dst, const XmmVar& src)
3844   { _emitInstruction(kX86InstMovAPS, &dst, &src); }
3845 
3846   //! @brief Move DWord.
movdX86Compiler3847   inline void movd(const Mem& dst, const XmmVar& src)
3848   { _emitInstruction(kX86InstMovD, &dst, &src); }
3849   //! @brief Move DWord.
movdX86Compiler3850   inline void movd(const GpVar& dst, const XmmVar& src)
3851   { _emitInstruction(kX86InstMovD, &dst, &src); }
3852   //! @brief Move DWord.
movdX86Compiler3853   inline void movd(const XmmVar& dst, const Mem& src)
3854   { _emitInstruction(kX86InstMovD, &dst, &src); }
3855   //! @brief Move DWord.
movdX86Compiler3856   inline void movd(const XmmVar& dst, const GpVar& src)
3857   { _emitInstruction(kX86InstMovD, &dst, &src); }
3858 
3859   //! @brief Move QWord (SSE).
movqX86Compiler3860   inline void movq(const XmmVar& dst, const XmmVar& src)
3861   { _emitInstruction(kX86InstMovQ, &dst, &src); }
3862   //! @brief Move QWord (SSE).
movqX86Compiler3863   inline void movq(const Mem& dst, const XmmVar& src)
3864   { _emitInstruction(kX86InstMovQ, &dst, &src); }
3865 #if defined(ASMJIT_X64)
3866   //! @brief Move QWord (SSE).
movqX86Compiler3867   inline void movq(const GpVar& dst, const XmmVar& src)
3868   { _emitInstruction(kX86InstMovQ, &dst, &src); }
3869 #endif // ASMJIT_X64
3870   //! @brief Move QWord (SSE).
movqX86Compiler3871   inline void movq(const XmmVar& dst, const Mem& src)
3872   { _emitInstruction(kX86InstMovQ, &dst, &src); }
3873 #if defined(ASMJIT_X64)
3874   //! @brief Move QWord (SSE).
movqX86Compiler3875   inline void movq(const XmmVar& dst, const GpVar& src)
3876   { _emitInstruction(kX86InstMovQ, &dst, &src); }
3877 #endif // ASMJIT_X64
3878 
3879   //! @brief Move 64 Bits Non Temporal (SSE).
movntqX86Compiler3880   inline void movntq(const Mem& dst, const MmVar& src)
3881   { _emitInstruction(kX86InstMovNTQ, &dst, &src); }
3882 
3883   //! @brief High to Low Packed SP-FP (SSE).
movhlpsX86Compiler3884   inline void movhlps(const XmmVar& dst, const XmmVar& src)
3885   { _emitInstruction(kX86InstMovHLPS, &dst, &src); }
3886 
3887   //! @brief Move High Packed SP-FP (SSE).
movhpsX86Compiler3888   inline void movhps(const XmmVar& dst, const Mem& src)
3889   { _emitInstruction(kX86InstMovHPS, &dst, &src); }
3890 
3891   //! @brief Move High Packed SP-FP (SSE).
movhpsX86Compiler3892   inline void movhps(const Mem& dst, const XmmVar& src)
3893   { _emitInstruction(kX86InstMovHPS, &dst, &src); }
3894 
3895   //! @brief Move Low to High Packed SP-FP (SSE).
movlhpsX86Compiler3896   inline void movlhps(const XmmVar& dst, const XmmVar& src)
3897   { _emitInstruction(kX86InstMovLHPS, &dst, &src); }
3898 
3899   //! @brief Move Low Packed SP-FP (SSE).
movlpsX86Compiler3900   inline void movlps(const XmmVar& dst, const Mem& src)
3901   { _emitInstruction(kX86InstMovLPS, &dst, &src); }
3902 
3903   //! @brief Move Low Packed SP-FP (SSE).
movlpsX86Compiler3904   inline void movlps(const Mem& dst, const XmmVar& src)
3905   { _emitInstruction(kX86InstMovLPS, &dst, &src); }
3906 
3907   //! @brief Move Aligned Four Packed SP-FP Non Temporal (SSE).
movntpsX86Compiler3908   inline void movntps(const Mem& dst, const XmmVar& src)
3909   { _emitInstruction(kX86InstMovNTPS, &dst, &src); }
3910 
3911   //! @brief Move Scalar SP-FP (SSE).
movssX86Compiler3912   inline void movss(const XmmVar& dst, const XmmVar& src)
3913   { _emitInstruction(kX86InstMovSS, &dst, &src); }
3914 
3915   //! @brief Move Scalar SP-FP (SSE).
movssX86Compiler3916   inline void movss(const XmmVar& dst, const Mem& src)
3917   { _emitInstruction(kX86InstMovSS, &dst, &src); }
3918 
3919   //! @brief Move Scalar SP-FP (SSE).
movssX86Compiler3920   inline void movss(const Mem& dst, const XmmVar& src)
3921   { _emitInstruction(kX86InstMovSS, &dst, &src); }
3922 
3923   //! @brief Move Unaligned Packed SP-FP Values (SSE).
movupsX86Compiler3924   inline void movups(const XmmVar& dst, const XmmVar& src)
3925   { _emitInstruction(kX86InstMovUPS, &dst, &src); }
3926   //! @brief Move Unaligned Packed SP-FP Values (SSE).
movupsX86Compiler3927   inline void movups(const XmmVar& dst, const Mem& src)
3928   { _emitInstruction(kX86InstMovUPS, &dst, &src); }
3929 
3930   //! @brief Move Unaligned Packed SP-FP Values (SSE).
movupsX86Compiler3931   inline void movups(const Mem& dst, const XmmVar& src)
3932   { _emitInstruction(kX86InstMovUPS, &dst, &src); }
3933 
3934   //! @brief Packed SP-FP Multiply (SSE).
mulpsX86Compiler3935   inline void mulps(const XmmVar& dst, const XmmVar& src)
3936   { _emitInstruction(kX86InstMulPS, &dst, &src); }
3937   //! @brief Packed SP-FP Multiply (SSE).
mulpsX86Compiler3938   inline void mulps(const XmmVar& dst, const Mem& src)
3939   { _emitInstruction(kX86InstMulPS, &dst, &src); }
3940 
3941   //! @brief Scalar SP-FP Multiply (SSE).
mulssX86Compiler3942   inline void mulss(const XmmVar& dst, const XmmVar& src)
3943   { _emitInstruction(kX86InstMulSS, &dst, &src); }
3944   //! @brief Scalar SP-FP Multiply (SSE).
mulssX86Compiler3945   inline void mulss(const XmmVar& dst, const Mem& src)
3946   { _emitInstruction(kX86InstMulSS, &dst, &src); }
3947 
3948   //! @brief Bit-wise Logical OR for SP-FP Data (SSE).
orpsX86Compiler3949   inline void orps(const XmmVar& dst, const XmmVar& src)
3950   { _emitInstruction(kX86InstOrPS, &dst, &src); }
3951   //! @brief Bit-wise Logical OR for SP-FP Data (SSE).
orpsX86Compiler3952   inline void orps(const XmmVar& dst, const Mem& src)
3953   { _emitInstruction(kX86InstOrPS, &dst, &src); }
3954 
3955   //! @brief Packed Average (SSE).
pavgbX86Compiler3956   inline void pavgb(const MmVar& dst, const MmVar& src)
3957   { _emitInstruction(kX86InstPAvgB, &dst, &src); }
3958   //! @brief Packed Average (SSE).
pavgbX86Compiler3959   inline void pavgb(const MmVar& dst, const Mem& src)
3960   { _emitInstruction(kX86InstPAvgB, &dst, &src); }
3961 
3962   //! @brief Packed Average (SSE).
pavgwX86Compiler3963   inline void pavgw(const MmVar& dst, const MmVar& src)
3964   { _emitInstruction(kX86InstPAvgW, &dst, &src); }
3965   //! @brief Packed Average (SSE).
pavgwX86Compiler3966   inline void pavgw(const MmVar& dst, const Mem& src)
3967   { _emitInstruction(kX86InstPAvgW, &dst, &src); }
3968 
3969   //! @brief Extract Word (SSE).
pextrwX86Compiler3970   inline void pextrw(const GpVar& dst, const MmVar& src, const Imm& imm8)
3971   { _emitInstruction(kX86InstPExtrW, &dst, &src, &imm8); }
3972 
3973   //! @brief Insert Word (SSE).
pinsrwX86Compiler3974   inline void pinsrw(const MmVar& dst, const GpVar& src, const Imm& imm8)
3975   { _emitInstruction(kX86InstPInsRW, &dst, &src, &imm8); }
3976   //! @brief Insert Word (SSE).
pinsrwX86Compiler3977   inline void pinsrw(const MmVar& dst, const Mem& src, const Imm& imm8)
3978   { _emitInstruction(kX86InstPInsRW, &dst, &src, &imm8); }
3979 
3980   //! @brief Packed Signed Integer Word Maximum (SSE).
pmaxswX86Compiler3981   inline void pmaxsw(const MmVar& dst, const MmVar& src)
3982   { _emitInstruction(kX86InstPMaxSW, &dst, &src); }
3983   //! @brief Packed Signed Integer Word Maximum (SSE).
pmaxswX86Compiler3984   inline void pmaxsw(const MmVar& dst, const Mem& src)
3985   { _emitInstruction(kX86InstPMaxSW, &dst, &src); }
3986 
3987   //! @brief Packed Unsigned Integer Byte Maximum (SSE).
pmaxubX86Compiler3988   inline void pmaxub(const MmVar& dst, const MmVar& src)
3989   { _emitInstruction(kX86InstPMaxUB, &dst, &src); }
3990   //! @brief Packed Unsigned Integer Byte Maximum (SSE).
pmaxubX86Compiler3991   inline void pmaxub(const MmVar& dst, const Mem& src)
3992   { _emitInstruction(kX86InstPMaxUB, &dst, &src); }
3993 
3994   //! @brief Packed Signed Integer Word Minimum (SSE).
pminswX86Compiler3995   inline void pminsw(const MmVar& dst, const MmVar& src)
3996   { _emitInstruction(kX86InstPMinSW, &dst, &src); }
3997   //! @brief Packed Signed Integer Word Minimum (SSE).
pminswX86Compiler3998   inline void pminsw(const MmVar& dst, const Mem& src)
3999   { _emitInstruction(kX86InstPMinSW, &dst, &src); }
4000 
4001   //! @brief Packed Unsigned Integer Byte Minimum (SSE).
pminubX86Compiler4002   inline void pminub(const MmVar& dst, const MmVar& src)
4003   { _emitInstruction(kX86InstPMinUB, &dst, &src); }
4004   //! @brief Packed Unsigned Integer Byte Minimum (SSE).
pminubX86Compiler4005   inline void pminub(const MmVar& dst, const Mem& src)
4006   { _emitInstruction(kX86InstPMinUB, &dst, &src); }
4007 
4008   //! @brief Move Byte Mask To Integer (SSE).
pmovmskbX86Compiler4009   inline void pmovmskb(const GpVar& dst, const MmVar& src)
4010   { _emitInstruction(kX86InstPMovMskB, &dst, &src); }
4011 
4012   //! @brief Packed Multiply High Unsigned (SSE).
pmulhuwX86Compiler4013   inline void pmulhuw(const MmVar& dst, const MmVar& src)
4014   { _emitInstruction(kX86InstPMulHUW, &dst, &src); }
4015   //! @brief Packed Multiply High Unsigned (SSE).
pmulhuwX86Compiler4016   inline void pmulhuw(const MmVar& dst, const Mem& src)
4017   { _emitInstruction(kX86InstPMulHUW, &dst, &src); }
4018 
4019   //! @brief Packed Sum of Absolute Differences (SSE).
psadbwX86Compiler4020   inline void psadbw(const MmVar& dst, const MmVar& src)
4021   { _emitInstruction(kX86InstPSADBW, &dst, &src); }
4022   //! @brief Packed Sum of Absolute Differences (SSE).
psadbwX86Compiler4023   inline void psadbw(const MmVar& dst, const Mem& src)
4024   { _emitInstruction(kX86InstPSADBW, &dst, &src); }
4025 
4026   //! @brief Packed Shuffle word (SSE).
pshufwX86Compiler4027   inline void pshufw(const MmVar& dst, const MmVar& src, const Imm& imm8)
4028   { _emitInstruction(kX86InstPShufW, &dst, &src, &imm8); }
4029   //! @brief Packed Shuffle word (SSE).
pshufwX86Compiler4030   inline void pshufw(const MmVar& dst, const Mem& src, const Imm& imm8)
4031   { _emitInstruction(kX86InstPShufW, &dst, &src, &imm8); }
4032 
4033   //! @brief Packed SP-FP Reciprocal (SSE).
rcppsX86Compiler4034   inline void rcpps(const XmmVar& dst, const XmmVar& src)
4035   { _emitInstruction(kX86InstRcpPS, &dst, &src); }
4036   //! @brief Packed SP-FP Reciprocal (SSE).
rcppsX86Compiler4037   inline void rcpps(const XmmVar& dst, const Mem& src)
4038   { _emitInstruction(kX86InstRcpPS, &dst, &src); }
4039 
4040   //! @brief Scalar SP-FP Reciprocal (SSE).
rcpssX86Compiler4041   inline void rcpss(const XmmVar& dst, const XmmVar& src)
4042   { _emitInstruction(kX86InstRcpSS, &dst, &src); }
4043   //! @brief Scalar SP-FP Reciprocal (SSE).
rcpssX86Compiler4044   inline void rcpss(const XmmVar& dst, const Mem& src)
4045   { _emitInstruction(kX86InstRcpSS, &dst, &src); }
4046 
4047   //! @brief Prefetch (SSE).
prefetchX86Compiler4048   inline void prefetch(const Mem& mem, const Imm& hint)
4049   { _emitInstruction(kX86InstPrefetch, &mem, &hint); }
4050 
4051   //! @brief Compute Sum of Absolute Differences (SSE).
psadbwX86Compiler4052   inline void psadbw(const XmmVar& dst, const XmmVar& src)
4053   { _emitInstruction(kX86InstPSADBW, &dst, &src); }
4054   //! @brief Compute Sum of Absolute Differences (SSE).
psadbwX86Compiler4055   inline void psadbw(const XmmVar& dst, const Mem& src)
4056   { _emitInstruction(kX86InstPSADBW, &dst, &src); }
4057 
4058   //! @brief Packed SP-FP Square Root Reciprocal (SSE).
rsqrtpsX86Compiler4059   inline void rsqrtps(const XmmVar& dst, const XmmVar& src)
4060   { _emitInstruction(kX86InstSqrtPS, &dst, &src); }
4061   //! @brief Packed SP-FP Square Root Reciprocal (SSE).
rsqrtpsX86Compiler4062   inline void rsqrtps(const XmmVar& dst, const Mem& src)
4063   { _emitInstruction(kX86InstSqrtPS, &dst, &src); }
4064 
4065   //! @brief Scalar SP-FP Square Root Reciprocal (SSE).
rsqrtssX86Compiler4066   inline void rsqrtss(const XmmVar& dst, const XmmVar& src)
4067   { _emitInstruction(kX86InstSqrtSS, &dst, &src); }
4068   //! @brief Scalar SP-FP Square Root Reciprocal (SSE).
rsqrtssX86Compiler4069   inline void rsqrtss(const XmmVar& dst, const Mem& src)
4070   { _emitInstruction(kX86InstSqrtSS, &dst, &src); }
4071 
4072   //! @brief Store fence (SSE).
sfenceX86Compiler4073   inline void sfence()
4074   { _emitInstruction(kX86InstSFence); }
4075 
4076   //! @brief Shuffle SP-FP (SSE).
shufpsX86Compiler4077   inline void shufps(const XmmVar& dst, const XmmVar& src, const Imm& imm8)
4078   { _emitInstruction(kX86InstShufPS, &dst, &src, &imm8); }
4079   //! @brief Shuffle SP-FP (SSE).
shufpsX86Compiler4080   inline void shufps(const XmmVar& dst, const Mem& src, const Imm& imm8)
4081   { _emitInstruction(kX86InstShufPS, &dst, &src, &imm8); }
4082 
4083   //! @brief Packed SP-FP Square Root (SSE).
sqrtpsX86Compiler4084   inline void sqrtps(const XmmVar& dst, const XmmVar& src)
4085   { _emitInstruction(kX86InstSqrtPS, &dst, &src); }
4086   //! @brief Packed SP-FP Square Root (SSE).
sqrtpsX86Compiler4087   inline void sqrtps(const XmmVar& dst, const Mem& src)
4088   { _emitInstruction(kX86InstSqrtPS, &dst, &src); }
4089 
4090   //! @brief Scalar SP-FP Square Root (SSE).
sqrtssX86Compiler4091   inline void sqrtss(const XmmVar& dst, const XmmVar& src)
4092   { _emitInstruction(kX86InstSqrtSS, &dst, &src); }
4093   //! @brief Scalar SP-FP Square Root (SSE).
sqrtssX86Compiler4094   inline void sqrtss(const XmmVar& dst, const Mem& src)
4095   { _emitInstruction(kX86InstSqrtSS, &dst, &src); }
4096 
4097   //! @brief Store Streaming SIMD Extension Control/Status (SSE).
stmxcsrX86Compiler4098   inline void stmxcsr(const Mem& dst)
4099   { _emitInstruction(kX86InstStMXCSR, &dst); }
4100 
4101   //! @brief Packed SP-FP Subtract (SSE).
subpsX86Compiler4102   inline void subps(const XmmVar& dst, const XmmVar& src)
4103   { _emitInstruction(kX86InstSubPS, &dst, &src); }
4104   //! @brief Packed SP-FP Subtract (SSE).
subpsX86Compiler4105   inline void subps(const XmmVar& dst, const Mem& src)
4106   { _emitInstruction(kX86InstSubPS, &dst, &src); }
4107 
4108   //! @brief Scalar SP-FP Subtract (SSE).
subssX86Compiler4109   inline void subss(const XmmVar& dst, const XmmVar& src)
4110   { _emitInstruction(kX86InstSubSS, &dst, &src); }
4111   //! @brief Scalar SP-FP Subtract (SSE).
subssX86Compiler4112   inline void subss(const XmmVar& dst, const Mem& src)
4113   { _emitInstruction(kX86InstSubSS, &dst, &src); }
4114 
4115   //! @brief Unordered Scalar SP-FP compare and set EFLAGS (SSE).
ucomissX86Compiler4116   inline void ucomiss(const XmmVar& dst, const XmmVar& src)
4117   { _emitInstruction(kX86InstUComISS, &dst, &src); }
4118   //! @brief Unordered Scalar SP-FP compare and set EFLAGS (SSE).
ucomissX86Compiler4119   inline void ucomiss(const XmmVar& dst, const Mem& src)
4120   { _emitInstruction(kX86InstUComISS, &dst, &src); }
4121 
4122   //! @brief Unpack High Packed SP-FP Data (SSE).
unpckhpsX86Compiler4123   inline void unpckhps(const XmmVar& dst, const XmmVar& src)
4124   { _emitInstruction(kX86InstUnpckHPS, &dst, &src); }
4125   //! @brief Unpack High Packed SP-FP Data (SSE).
unpckhpsX86Compiler4126   inline void unpckhps(const XmmVar& dst, const Mem& src)
4127   { _emitInstruction(kX86InstUnpckHPS, &dst, &src); }
4128 
4129   //! @brief Unpack Low Packed SP-FP Data (SSE).
unpcklpsX86Compiler4130   inline void unpcklps(const XmmVar& dst, const XmmVar& src)
4131   { _emitInstruction(kX86InstUnpckLPS, &dst, &src); }
4132   //! @brief Unpack Low Packed SP-FP Data (SSE).
unpcklpsX86Compiler4133   inline void unpcklps(const XmmVar& dst, const Mem& src)
4134   { _emitInstruction(kX86InstUnpckLPS, &dst, &src); }
4135 
4136   //! @brief Bit-wise Logical Xor for SP-FP Data (SSE).
xorpsX86Compiler4137   inline void xorps(const XmmVar& dst, const XmmVar& src)
4138   { _emitInstruction(kX86InstXorPS, &dst, &src); }
4139   //! @brief Bit-wise Logical Xor for SP-FP Data (SSE).
xorpsX86Compiler4140   inline void xorps(const XmmVar& dst, const Mem& src)
4141   { _emitInstruction(kX86InstXorPS, &dst, &src); }
4142 
4143   // --------------------------------------------------------------------------
4144   // [SSE2]
4145   // --------------------------------------------------------------------------
4146 
4147   //! @brief Packed DP-FP Add (SSE2).
addpdX86Compiler4148   inline void addpd(const XmmVar& dst, const XmmVar& src)
4149   { _emitInstruction(kX86InstAddPD, &dst, &src); }
4150   //! @brief Packed DP-FP Add (SSE2).
addpdX86Compiler4151   inline void addpd(const XmmVar& dst, const Mem& src)
4152   { _emitInstruction(kX86InstAddPD, &dst, &src); }
4153 
4154   //! @brief Scalar DP-FP Add (SSE2).
addsdX86Compiler4155   inline void addsd(const XmmVar& dst, const XmmVar& src)
4156   { _emitInstruction(kX86InstAddSD, &dst, &src); }
4157   //! @brief Scalar DP-FP Add (SSE2).
addsdX86Compiler4158   inline void addsd(const XmmVar& dst, const Mem& src)
4159   { _emitInstruction(kX86InstAddSD, &dst, &src); }
4160 
4161   //! @brief Bit-wise Logical And Not For DP-FP (SSE2).
andnpdX86Compiler4162   inline void andnpd(const XmmVar& dst, const XmmVar& src)
4163   { _emitInstruction(kX86InstAndnPD, &dst, &src); }
4164   //! @brief Bit-wise Logical And Not For DP-FP (SSE2).
andnpdX86Compiler4165   inline void andnpd(const XmmVar& dst, const Mem& src)
4166   { _emitInstruction(kX86InstAndnPD, &dst, &src); }
4167 
4168   //! @brief Bit-wise Logical And For DP-FP (SSE2).
andpdX86Compiler4169   inline void andpd(const XmmVar& dst, const XmmVar& src)
4170   { _emitInstruction(kX86InstAndPD, &dst, &src); }
4171   //! @brief Bit-wise Logical And For DP-FP (SSE2).
andpdX86Compiler4172   inline void andpd(const XmmVar& dst, const Mem& src)
4173   { _emitInstruction(kX86InstAndPD, &dst, &src); }
4174 
4175   //! @brief Flush Cache Line (SSE2).
clflushX86Compiler4176   inline void clflush(const Mem& mem)
4177   { _emitInstruction(kX86InstClFlush, &mem); }
4178 
4179   //! @brief Packed DP-FP Compare (SSE2).
cmppdX86Compiler4180   inline void cmppd(const XmmVar& dst, const XmmVar& src, const Imm& imm8)
4181   { _emitInstruction(kX86InstCmpPD, &dst, &src, &imm8); }
4182   //! @brief Packed DP-FP Compare (SSE2).
cmppdX86Compiler4183   inline void cmppd(const XmmVar& dst, const Mem& src, const Imm& imm8)
4184   { _emitInstruction(kX86InstCmpPD, &dst, &src, &imm8); }
4185 
4186   //! @brief Compare Scalar SP-FP Values (SSE2).
cmpsdX86Compiler4187   inline void cmpsd(const XmmVar& dst, const XmmVar& src, const Imm& imm8)
4188   { _emitInstruction(kX86InstCmpSD, &dst, &src, &imm8); }
4189   //! @brief Compare Scalar SP-FP Values (SSE2).
cmpsdX86Compiler4190   inline void cmpsd(const XmmVar& dst, const Mem& src, const Imm& imm8)
4191   { _emitInstruction(kX86InstCmpSD, &dst, &src, &imm8); }
4192 
4193   //! @brief Scalar Ordered DP-FP Compare and Set EFLAGS (SSE2).
comisdX86Compiler4194   inline void comisd(const XmmVar& dst, const XmmVar& src)
4195   { _emitInstruction(kX86InstComISD, &dst, &src); }
4196   //! @brief Scalar Ordered DP-FP Compare and Set EFLAGS (SSE2).
comisdX86Compiler4197   inline void comisd(const XmmVar& dst, const Mem& src)
4198   { _emitInstruction(kX86InstComISD, &dst, &src); }
4199 
4200   //! @brief Convert Packed Dword Integers to Packed DP-FP Values (SSE2).
cvtdq2pdX86Compiler4201   inline void cvtdq2pd(const XmmVar& dst, const XmmVar& src)
4202   { _emitInstruction(kX86InstCvtDQ2PD, &dst, &src); }
4203   //! @brief Convert Packed Dword Integers to Packed DP-FP Values (SSE2).
cvtdq2pdX86Compiler4204   inline void cvtdq2pd(const XmmVar& dst, const Mem& src)
4205   { _emitInstruction(kX86InstCvtDQ2PD, &dst, &src); }
4206 
4207   //! @brief Convert Packed Dword Integers to Packed SP-FP Values (SSE2).
cvtdq2psX86Compiler4208   inline void cvtdq2ps(const XmmVar& dst, const XmmVar& src)
4209   { _emitInstruction(kX86InstCvtDQ2PS, &dst, &src); }
4210   //! @brief Convert Packed Dword Integers to Packed SP-FP Values (SSE2).
cvtdq2psX86Compiler4211   inline void cvtdq2ps(const XmmVar& dst, const Mem& src)
4212   { _emitInstruction(kX86InstCvtDQ2PS, &dst, &src); }
4213 
4214   //! @brief Convert Packed DP-FP Values to Packed Dword Integers (SSE2).
cvtpd2dqX86Compiler4215   inline void cvtpd2dq(const XmmVar& dst, const XmmVar& src)
4216   { _emitInstruction(kX86InstCvtPD2DQ, &dst, &src); }
4217   //! @brief Convert Packed DP-FP Values to Packed Dword Integers (SSE2).
cvtpd2dqX86Compiler4218   inline void cvtpd2dq(const XmmVar& dst, const Mem& src)
4219   { _emitInstruction(kX86InstCvtPD2DQ, &dst, &src); }
4220 
4221   //! @brief Convert Packed DP-FP Values to Packed Dword Integers (SSE2).
cvtpd2piX86Compiler4222   inline void cvtpd2pi(const MmVar& dst, const XmmVar& src)
4223   { _emitInstruction(kX86InstCvtPD2PI, &dst, &src); }
4224   //! @brief Convert Packed DP-FP Values to Packed Dword Integers (SSE2).
cvtpd2piX86Compiler4225   inline void cvtpd2pi(const MmVar& dst, const Mem& src)
4226   { _emitInstruction(kX86InstCvtPD2PI, &dst, &src); }
4227 
4228   //! @brief Convert Packed DP-FP Values to Packed SP-FP Values (SSE2).
cvtpd2psX86Compiler4229   inline void cvtpd2ps(const XmmVar& dst, const XmmVar& src)
4230   { _emitInstruction(kX86InstCvtPD2PS, &dst, &src); }
4231   //! @brief Convert Packed DP-FP Values to Packed SP-FP Values (SSE2).
cvtpd2psX86Compiler4232   inline void cvtpd2ps(const XmmVar& dst, const Mem& src)
4233   { _emitInstruction(kX86InstCvtPD2PS, &dst, &src); }
4234 
4235   //! @brief Convert Packed Dword Integers to Packed DP-FP Values (SSE2).
cvtpi2pdX86Compiler4236   inline void cvtpi2pd(const XmmVar& dst, const MmVar& src)
4237   { _emitInstruction(kX86InstCvtPI2PD, &dst, &src); }
4238   //! @brief Convert Packed Dword Integers to Packed DP-FP Values (SSE2).
cvtpi2pdX86Compiler4239   inline void cvtpi2pd(const XmmVar& dst, const Mem& src)
4240   { _emitInstruction(kX86InstCvtPI2PD, &dst, &src); }
4241 
4242   //! @brief Convert Packed SP-FP Values to Packed Dword Integers (SSE2).
cvtps2dqX86Compiler4243   inline void cvtps2dq(const XmmVar& dst, const XmmVar& src)
4244   { _emitInstruction(kX86InstCvtPS2DQ, &dst, &src); }
4245   //! @brief Convert Packed SP-FP Values to Packed Dword Integers (SSE2).
cvtps2dqX86Compiler4246   inline void cvtps2dq(const XmmVar& dst, const Mem& src)
4247   { _emitInstruction(kX86InstCvtPS2DQ, &dst, &src); }
4248 
4249   //! @brief Convert Packed SP-FP Values to Packed DP-FP Values (SSE2).
cvtps2pdX86Compiler4250   inline void cvtps2pd(const XmmVar& dst, const XmmVar& src)
4251   { _emitInstruction(kX86InstCvtPS2PD, &dst, &src); }
4252   //! @brief Convert Packed SP-FP Values to Packed DP-FP Values (SSE2).
cvtps2pdX86Compiler4253   inline void cvtps2pd(const XmmVar& dst, const Mem& src)
4254   { _emitInstruction(kX86InstCvtPS2PD, &dst, &src); }
4255 
4256   //! @brief Convert Scalar DP-FP Value to Dword Integer (SSE2).
cvtsd2siX86Compiler4257   inline void cvtsd2si(const GpVar& dst, const XmmVar& src)
4258   { _emitInstruction(kX86InstCvtSD2SI, &dst, &src); }
4259   //! @brief Convert Scalar DP-FP Value to Dword Integer (SSE2).
cvtsd2siX86Compiler4260   inline void cvtsd2si(const GpVar& dst, const Mem& src)
4261   { _emitInstruction(kX86InstCvtSD2SI, &dst, &src); }
4262 
4263   //! @brief Convert Scalar DP-FP Value to Scalar SP-FP Value (SSE2).
cvtsd2ssX86Compiler4264   inline void cvtsd2ss(const XmmVar& dst, const XmmVar& src)
4265   { _emitInstruction(kX86InstCvtSD2SS, &dst, &src); }
4266   //! @brief Convert Scalar DP-FP Value to Scalar SP-FP Value (SSE2).
cvtsd2ssX86Compiler4267   inline void cvtsd2ss(const XmmVar& dst, const Mem& src)
4268   { _emitInstruction(kX86InstCvtSD2SS, &dst, &src); }
4269 
4270   //! @brief Convert Dword Integer to Scalar DP-FP Value (SSE2).
cvtsi2sdX86Compiler4271   inline void cvtsi2sd(const XmmVar& dst, const GpVar& src)
4272   { _emitInstruction(kX86InstCvtSI2SD, &dst, &src); }
4273   //! @brief Convert Dword Integer to Scalar DP-FP Value (SSE2).
cvtsi2sdX86Compiler4274   inline void cvtsi2sd(const XmmVar& dst, const Mem& src)
4275   { _emitInstruction(kX86InstCvtSI2SD, &dst, &src); }
4276 
4277   //! @brief Convert Scalar SP-FP Value to Scalar DP-FP Value (SSE2).
cvtss2sdX86Compiler4278   inline void cvtss2sd(const XmmVar& dst, const XmmVar& src)
4279   { _emitInstruction(kX86InstCvtSS2SD, &dst, &src); }
4280   //! @brief Convert Scalar SP-FP Value to Scalar DP-FP Value (SSE2).
cvtss2sdX86Compiler4281   inline void cvtss2sd(const XmmVar& dst, const Mem& src)
4282   { _emitInstruction(kX86InstCvtSS2SD, &dst, &src); }
4283 
4284   //! @brief Convert with Truncation Packed DP-FP Values to Packed Dword Integers (SSE2).
cvttpd2piX86Compiler4285   inline void cvttpd2pi(const MmVar& dst, const XmmVar& src)
4286   { _emitInstruction(kX86InstCvttPD2PI, &dst, &src); }
4287   //! @brief Convert with Truncation Packed DP-FP Values to Packed Dword Integers (SSE2).
cvttpd2piX86Compiler4288   inline void cvttpd2pi(const MmVar& dst, const Mem& src)
4289   { _emitInstruction(kX86InstCvttPD2PI, &dst, &src); }
4290 
4291   //! @brief Convert with Truncation Packed DP-FP Values to Packed Dword Integers (SSE2).
cvttpd2dqX86Compiler4292   inline void cvttpd2dq(const XmmVar& dst, const XmmVar& src)
4293   { _emitInstruction(kX86InstCvttPD2DQ, &dst, &src); }
4294   //! @brief Convert with Truncation Packed DP-FP Values to Packed Dword Integers (SSE2).
cvttpd2dqX86Compiler4295   inline void cvttpd2dq(const XmmVar& dst, const Mem& src)
4296   { _emitInstruction(kX86InstCvttPD2DQ, &dst, &src); }
4297 
4298   //! @brief Convert with Truncation Packed SP-FP Values to Packed Dword Integers (SSE2).
cvttps2dqX86Compiler4299   inline void cvttps2dq(const XmmVar& dst, const XmmVar& src)
4300   { _emitInstruction(kX86InstCvttPS2DQ, &dst, &src); }
4301   //! @brief Convert with Truncation Packed SP-FP Values to Packed Dword Integers (SSE2).
cvttps2dqX86Compiler4302   inline void cvttps2dq(const XmmVar& dst, const Mem& src)
4303   { _emitInstruction(kX86InstCvttPS2DQ, &dst, &src); }
4304 
4305   //! @brief Convert with Truncation Scalar DP-FP Value to Signed Dword Integer (SSE2).
cvttsd2siX86Compiler4306   inline void cvttsd2si(const GpVar& dst, const XmmVar& src)
4307   { _emitInstruction(kX86InstCvttSD2SI, &dst, &src); }
4308   //! @brief Convert with Truncation Scalar DP-FP Value to Signed Dword Integer (SSE2).
cvttsd2siX86Compiler4309   inline void cvttsd2si(const GpVar& dst, const Mem& src)
4310   { _emitInstruction(kX86InstCvttSD2SI, &dst, &src); }
4311 
4312   //! @brief Packed DP-FP Divide (SSE2).
divpdX86Compiler4313   inline void divpd(const XmmVar& dst, const XmmVar& src)
4314   { _emitInstruction(kX86InstDivPD, &dst, &src); }
4315   //! @brief Packed DP-FP Divide (SSE2).
divpdX86Compiler4316   inline void divpd(const XmmVar& dst, const Mem& src)
4317   { _emitInstruction(kX86InstDivPD, &dst, &src); }
4318 
4319   //! @brief Scalar DP-FP Divide (SSE2).
divsdX86Compiler4320   inline void divsd(const XmmVar& dst, const XmmVar& src)
4321   { _emitInstruction(kX86InstDivSD, &dst, &src); }
4322   //! @brief Scalar DP-FP Divide (SSE2).
divsdX86Compiler4323   inline void divsd(const XmmVar& dst, const Mem& src)
4324   { _emitInstruction(kX86InstDivSD, &dst, &src); }
4325 
4326   //! @brief Load Fence (SSE2).
lfenceX86Compiler4327   inline void lfence()
4328   { _emitInstruction(kX86InstLFence); }
4329 
4330   //! @brief Store Selected Bytes of Double Quadword (SSE2).
4331   //!
4332   //! @note Target is DS:EDI.
maskmovdquX86Compiler4333   inline void maskmovdqu(const GpVar& dst_ptr, const XmmVar& src, const XmmVar& mask)
4334   { _emitInstruction(kX86InstMaskMovDQU, &dst_ptr, &src, &mask); }
4335 
4336   //! @brief Return Maximum Packed Double-Precision FP Values (SSE2).
maxpdX86Compiler4337   inline void maxpd(const XmmVar& dst, const XmmVar& src)
4338   { _emitInstruction(kX86InstMaxPD, &dst, &src); }
4339   //! @brief Return Maximum Packed Double-Precision FP Values (SSE2).
maxpdX86Compiler4340   inline void maxpd(const XmmVar& dst, const Mem& src)
4341   { _emitInstruction(kX86InstMaxPD, &dst, &src); }
4342 
4343   //! @brief Return Maximum Scalar Double-Precision FP Value (SSE2).
maxsdX86Compiler4344   inline void maxsd(const XmmVar& dst, const XmmVar& src)
4345   { _emitInstruction(kX86InstMaxSD, &dst, &src); }
4346   //! @brief Return Maximum Scalar Double-Precision FP Value (SSE2).
maxsdX86Compiler4347   inline void maxsd(const XmmVar& dst, const Mem& src)
4348   { _emitInstruction(kX86InstMaxSD, &dst, &src); }
4349 
4350   //! @brief Memory Fence (SSE2).
mfenceX86Compiler4351   inline void mfence()
4352   { _emitInstruction(kX86InstMFence); }
4353 
4354   //! @brief Return Minimum Packed DP-FP Values (SSE2).
minpdX86Compiler4355   inline void minpd(const XmmVar& dst, const XmmVar& src)
4356   { _emitInstruction(kX86InstMinPD, &dst, &src); }
4357   //! @brief Return Minimum Packed DP-FP Values (SSE2).
minpdX86Compiler4358   inline void minpd(const XmmVar& dst, const Mem& src)
4359   { _emitInstruction(kX86InstMinPD, &dst, &src); }
4360 
4361   //! @brief Return Minimum Scalar DP-FP Value (SSE2).
minsdX86Compiler4362   inline void minsd(const XmmVar& dst, const XmmVar& src)
4363   { _emitInstruction(kX86InstMinSD, &dst, &src); }
4364   //! @brief Return Minimum Scalar DP-FP Value (SSE2).
minsdX86Compiler4365   inline void minsd(const XmmVar& dst, const Mem& src)
4366   { _emitInstruction(kX86InstMinSD, &dst, &src); }
4367 
4368   //! @brief Move Aligned DQWord (SSE2).
movdqaX86Compiler4369   inline void movdqa(const XmmVar& dst, const XmmVar& src)
4370   { _emitInstruction(kX86InstMovDQA, &dst, &src); }
4371   //! @brief Move Aligned DQWord (SSE2).
movdqaX86Compiler4372   inline void movdqa(const XmmVar& dst, const Mem& src)
4373   { _emitInstruction(kX86InstMovDQA, &dst, &src); }
4374 
4375   //! @brief Move Aligned DQWord (SSE2).
movdqaX86Compiler4376   inline void movdqa(const Mem& dst, const XmmVar& src)
4377   { _emitInstruction(kX86InstMovDQA, &dst, &src); }
4378 
4379   //! @brief Move Unaligned Double Quadword (SSE2).
movdquX86Compiler4380   inline void movdqu(const XmmVar& dst, const XmmVar& src)
4381   { _emitInstruction(kX86InstMovDQU, &dst, &src); }
4382   //! @brief Move Unaligned Double Quadword (SSE2).
movdquX86Compiler4383   inline void movdqu(const XmmVar& dst, const Mem& src)
4384   { _emitInstruction(kX86InstMovDQU, &dst, &src); }
4385 
4386   //! @brief Move Unaligned Double Quadword (SSE2).
movdquX86Compiler4387   inline void movdqu(const Mem& dst, const XmmVar& src)
4388   { _emitInstruction(kX86InstMovDQU, &dst, &src); }
4389 
4390   //! @brief Extract Packed SP-FP Sign Mask (SSE2).
movmskpsX86Compiler4391   inline void movmskps(const GpVar& dst, const XmmVar& src)
4392   { _emitInstruction(kX86InstMovMskPS, &dst, &src); }
4393 
4394   //! @brief Extract Packed DP-FP Sign Mask (SSE2).
movmskpdX86Compiler4395   inline void movmskpd(const GpVar& dst, const XmmVar& src)
4396   { _emitInstruction(kX86InstMovMskPD, &dst, &src); }
4397 
4398   //! @brief Move Scalar Double-Precision FP Value (SSE2).
movsdX86Compiler4399   inline void movsd(const XmmVar& dst, const XmmVar& src)
4400   { _emitInstruction(kX86InstMovSD, &dst, &src); }
4401   //! @brief Move Scalar Double-Precision FP Value (SSE2).
movsdX86Compiler4402   inline void movsd(const XmmVar& dst, const Mem& src)
4403   { _emitInstruction(kX86InstMovSD, &dst, &src); }
4404 
4405   //! @brief Move Scalar Double-Precision FP Value (SSE2).
movsdX86Compiler4406   inline void movsd(const Mem& dst, const XmmVar& src)
4407   { _emitInstruction(kX86InstMovSD, &dst, &src); }
4408 
4409   //! @brief Move Aligned Packed Double-Precision FP Values (SSE2).
movapdX86Compiler4410   inline void movapd(const XmmVar& dst, const XmmVar& src)
4411   { _emitInstruction(kX86InstMovAPD, &dst, &src); }
4412 
4413   //! @brief Move Aligned Packed Double-Precision FP Values (SSE2).
movapdX86Compiler4414   inline void movapd(const XmmVar& dst, const Mem& src)
4415   { _emitInstruction(kX86InstMovAPD, &dst, &src); }
4416 
4417   //! @brief Move Aligned Packed Double-Precision FP Values (SSE2).
movapdX86Compiler4418   inline void movapd(const Mem& dst, const XmmVar& src)
4419   { _emitInstruction(kX86InstMovAPD, &dst, &src); }
4420 
4421   //! @brief Move Quadword from XMM to MMX Technology Register (SSE2).
movdq2qX86Compiler4422   inline void movdq2q(const MmVar& dst, const XmmVar& src)
4423   { _emitInstruction(kX86InstMovDQ2Q, &dst, &src); }
4424 
4425   //! @brief Move Quadword from MMX Technology to XMM Register (SSE2).
movq2dqX86Compiler4426   inline void movq2dq(const XmmVar& dst, const MmVar& src)
4427   { _emitInstruction(kX86InstMovQ2DQ, &dst, &src); }
4428 
4429   //! @brief Move High Packed Double-Precision FP Value (SSE2).
movhpdX86Compiler4430   inline void movhpd(const XmmVar& dst, const Mem& src)
4431   { _emitInstruction(kX86InstMovHPD, &dst, &src); }
4432 
4433   //! @brief Move High Packed Double-Precision FP Value (SSE2).
movhpdX86Compiler4434   inline void movhpd(const Mem& dst, const XmmVar& src)
4435   { _emitInstruction(kX86InstMovHPD, &dst, &src); }
4436 
4437   //! @brief Move Low Packed Double-Precision FP Value (SSE2).
movlpdX86Compiler4438   inline void movlpd(const XmmVar& dst, const Mem& src)
4439   { _emitInstruction(kX86InstMovLPD, &dst, &src); }
4440 
4441   //! @brief Move Low Packed Double-Precision FP Value (SSE2).
movlpdX86Compiler4442   inline void movlpd(const Mem& dst, const XmmVar& src)
4443   { _emitInstruction(kX86InstMovLPD, &dst, &src); }
4444 
4445   //! @brief Store Double Quadword Using Non-Temporal Hint (SSE2).
movntdqX86Compiler4446   inline void movntdq(const Mem& dst, const XmmVar& src)
4447   { _emitInstruction(kX86InstMovNTDQ, &dst, &src); }
4448 
4449   //! @brief Store Store DWORD Using Non-Temporal Hint (SSE2).
movntiX86Compiler4450   inline void movnti(const Mem& dst, const GpVar& src)
4451   { _emitInstruction(kX86InstMovNTI, &dst, &src); }
4452 
4453   //! @brief Store Packed Double-Precision FP Values Using Non-Temporal Hint (SSE2).
movntpdX86Compiler4454   inline void movntpd(const Mem& dst, const XmmVar& src)
4455   { _emitInstruction(kX86InstMovNTPD, &dst, &src); }
4456 
4457   //! @brief Move Unaligned Packed Double-Precision FP Values (SSE2).
movupdX86Compiler4458   inline void movupd(const XmmVar& dst, const XmmVar& src)
4459   { _emitInstruction(kX86InstMovUPD, &dst, &src); }
4460 
4461   //! @brief Move Unaligned Packed Double-Precision FP Values (SSE2).
movupdX86Compiler4462   inline void movupd(const XmmVar& dst, const Mem& src)
4463   { _emitInstruction(kX86InstMovUPD, &dst, &src); }
4464 
4465   //! @brief Move Unaligned Packed Double-Precision FP Values (SSE2).
movupdX86Compiler4466   inline void movupd(const Mem& dst, const XmmVar& src)
4467   { _emitInstruction(kX86InstMovUPD, &dst, &src); }
4468 
4469   //! @brief Packed DP-FP Multiply (SSE2).
mulpdX86Compiler4470   inline void mulpd(const XmmVar& dst, const XmmVar& src)
4471   { _emitInstruction(kX86InstMulPD, &dst, &src); }
4472   //! @brief Packed DP-FP Multiply (SSE2).
mulpdX86Compiler4473   inline void mulpd(const XmmVar& dst, const Mem& src)
4474   { _emitInstruction(kX86InstMulPD, &dst, &src); }
4475 
4476   //! @brief Scalar DP-FP Multiply (SSE2).
mulsdX86Compiler4477   inline void mulsd(const XmmVar& dst, const XmmVar& src)
4478   { _emitInstruction(kX86InstMulSD, &dst, &src); }
4479   //! @brief Scalar DP-FP Multiply (SSE2).
mulsdX86Compiler4480   inline void mulsd(const XmmVar& dst, const Mem& src)
4481   { _emitInstruction(kX86InstMulSD, &dst, &src); }
4482 
4483   //! @brief Bit-wise Logical OR for DP-FP Data (SSE2).
orpdX86Compiler4484   inline void orpd(const XmmVar& dst, const XmmVar& src)
4485   { _emitInstruction(kX86InstOrPD, &dst, &src); }
4486   //! @brief Bit-wise Logical OR for DP-FP Data (SSE2).
orpdX86Compiler4487   inline void orpd(const XmmVar& dst, const Mem& src)
4488   { _emitInstruction(kX86InstOrPD, &dst, &src); }
4489 
4490   //! @brief Pack with Signed Saturation (SSE2).
packsswbX86Compiler4491   inline void packsswb(const XmmVar& dst, const XmmVar& src)
4492   { _emitInstruction(kX86InstPackSSWB, &dst, &src); }
4493   //! @brief Pack with Signed Saturation (SSE2).
packsswbX86Compiler4494   inline void packsswb(const XmmVar& dst, const Mem& src)
4495   { _emitInstruction(kX86InstPackSSWB, &dst, &src); }
4496 
4497   //! @brief Pack with Signed Saturation (SSE2).
packssdwX86Compiler4498   inline void packssdw(const XmmVar& dst, const XmmVar& src)
4499   { _emitInstruction(kX86InstPackSSDW, &dst, &src); }
4500   //! @brief Pack with Signed Saturation (SSE2).
packssdwX86Compiler4501   inline void packssdw(const XmmVar& dst, const Mem& src)
4502   { _emitInstruction(kX86InstPackSSDW, &dst, &src); }
4503 
4504   //! @brief Pack with Unsigned Saturation (SSE2).
packuswbX86Compiler4505   inline void packuswb(const XmmVar& dst, const XmmVar& src)
4506   { _emitInstruction(kX86InstPackUSWB, &dst, &src); }
4507   //! @brief Pack with Unsigned Saturation (SSE2).
packuswbX86Compiler4508   inline void packuswb(const XmmVar& dst, const Mem& src)
4509   { _emitInstruction(kX86InstPackUSWB, &dst, &src); }
4510 
4511   //! @brief Packed BYTE Add (SSE2).
paddbX86Compiler4512   inline void paddb(const XmmVar& dst, const XmmVar& src)
4513   { _emitInstruction(kX86InstPAddB, &dst, &src); }
4514   //! @brief Packed BYTE Add (SSE2).
paddbX86Compiler4515   inline void paddb(const XmmVar& dst, const Mem& src)
4516   { _emitInstruction(kX86InstPAddB, &dst, &src); }
4517 
4518   //! @brief Packed WORD Add (SSE2).
paddwX86Compiler4519   inline void paddw(const XmmVar& dst, const XmmVar& src)
4520   { _emitInstruction(kX86InstPAddW, &dst, &src); }
4521   //! @brief Packed WORD Add (SSE2).
paddwX86Compiler4522   inline void paddw(const XmmVar& dst, const Mem& src)
4523   { _emitInstruction(kX86InstPAddW, &dst, &src); }
4524 
4525   //! @brief Packed DWORD Add (SSE2).
padddX86Compiler4526   inline void paddd(const XmmVar& dst, const XmmVar& src)
4527   { _emitInstruction(kX86InstPAddD, &dst, &src); }
4528   //! @brief Packed DWORD Add (SSE2).
padddX86Compiler4529   inline void paddd(const XmmVar& dst, const Mem& src)
4530   { _emitInstruction(kX86InstPAddD, &dst, &src); }
4531 
4532   //! @brief Packed QWORD Add (SSE2).
paddqX86Compiler4533   inline void paddq(const MmVar& dst, const MmVar& src)
4534   { _emitInstruction(kX86InstPAddQ, &dst, &src); }
4535   //! @brief Packed QWORD Add (SSE2).
paddqX86Compiler4536   inline void paddq(const MmVar& dst, const Mem& src)
4537   { _emitInstruction(kX86InstPAddQ, &dst, &src); }
4538 
4539   //! @brief Packed QWORD Add (SSE2).
paddqX86Compiler4540   inline void paddq(const XmmVar& dst, const XmmVar& src)
4541   { _emitInstruction(kX86InstPAddQ, &dst, &src); }
4542   //! @brief Packed QWORD Add (SSE2).
paddqX86Compiler4543   inline void paddq(const XmmVar& dst, const Mem& src)
4544   { _emitInstruction(kX86InstPAddQ, &dst, &src); }
4545 
4546   //! @brief Packed Add with Saturation (SSE2).
paddsbX86Compiler4547   inline void paddsb(const XmmVar& dst, const XmmVar& src)
4548   { _emitInstruction(kX86InstPAddSB, &dst, &src); }
4549   //! @brief Packed Add with Saturation (SSE2).
paddsbX86Compiler4550   inline void paddsb(const XmmVar& dst, const Mem& src)
4551   { _emitInstruction(kX86InstPAddSB, &dst, &src); }
4552 
4553   //! @brief Packed Add with Saturation (SSE2).
paddswX86Compiler4554   inline void paddsw(const XmmVar& dst, const XmmVar& src)
4555   { _emitInstruction(kX86InstPAddSW, &dst, &src); }
4556   //! @brief Packed Add with Saturation (SSE2).
paddswX86Compiler4557   inline void paddsw(const XmmVar& dst, const Mem& src)
4558   { _emitInstruction(kX86InstPAddSW, &dst, &src); }
4559 
4560   //! @brief Packed Add Unsigned with Saturation (SSE2).
paddusbX86Compiler4561   inline void paddusb(const XmmVar& dst, const XmmVar& src)
4562   { _emitInstruction(kX86InstPAddUSB, &dst, &src); }
4563   //! @brief Packed Add Unsigned with Saturation (SSE2).
paddusbX86Compiler4564   inline void paddusb(const XmmVar& dst, const Mem& src)
4565   { _emitInstruction(kX86InstPAddUSB, &dst, &src); }
4566 
4567   //! @brief Packed Add Unsigned with Saturation (SSE2).
padduswX86Compiler4568   inline void paddusw(const XmmVar& dst, const XmmVar& src)
4569   { _emitInstruction(kX86InstPAddUSW, &dst, &src); }
4570   //! @brief Packed Add Unsigned with Saturation (SSE2).
padduswX86Compiler4571   inline void paddusw(const XmmVar& dst, const Mem& src)
4572   { _emitInstruction(kX86InstPAddUSW, &dst, &src); }
4573 
4574   //! @brief Logical AND (SSE2).
pandX86Compiler4575   inline void pand(const XmmVar& dst, const XmmVar& src)
4576   { _emitInstruction(kX86InstPAnd, &dst, &src); }
4577   //! @brief Logical AND (SSE2).
pandX86Compiler4578   inline void pand(const XmmVar& dst, const Mem& src)
4579   { _emitInstruction(kX86InstPAnd, &dst, &src); }
4580 
4581   //! @brief Logical AND Not (SSE2).
pandnX86Compiler4582   inline void pandn(const XmmVar& dst, const XmmVar& src)
4583   { _emitInstruction(kX86InstPAndN, &dst, &src); }
4584   //! @brief Logical AND Not (SSE2).
pandnX86Compiler4585   inline void pandn(const XmmVar& dst, const Mem& src)
4586   { _emitInstruction(kX86InstPAndN, &dst, &src); }
4587 
4588   //! @brief Spin Loop Hint (SSE2).
pauseX86Compiler4589   inline void pause()
4590   { _emitInstruction(kX86InstPause); }
4591 
4592   //! @brief Packed Average (SSE2).
pavgbX86Compiler4593   inline void pavgb(const XmmVar& dst, const XmmVar& src)
4594   { _emitInstruction(kX86InstPAvgB, &dst, &src); }
4595   //! @brief Packed Average (SSE2).
pavgbX86Compiler4596   inline void pavgb(const XmmVar& dst, const Mem& src)
4597   { _emitInstruction(kX86InstPAvgB, &dst, &src); }
4598 
4599   //! @brief Packed Average (SSE2).
pavgwX86Compiler4600   inline void pavgw(const XmmVar& dst, const XmmVar& src)
4601   { _emitInstruction(kX86InstPAvgW, &dst, &src); }
4602   //! @brief Packed Average (SSE2).
pavgwX86Compiler4603   inline void pavgw(const XmmVar& dst, const Mem& src)
4604   { _emitInstruction(kX86InstPAvgW, &dst, &src); }
4605 
4606   //! @brief Packed Compare for Equal (BYTES) (SSE2).
pcmpeqbX86Compiler4607   inline void pcmpeqb(const XmmVar& dst, const XmmVar& src)
4608   { _emitInstruction(kX86InstPCmpEqB, &dst, &src); }
4609   //! @brief Packed Compare for Equal (BYTES) (SSE2).
pcmpeqbX86Compiler4610   inline void pcmpeqb(const XmmVar& dst, const Mem& src)
4611   { _emitInstruction(kX86InstPCmpEqB, &dst, &src); }
4612 
4613   //! @brief Packed Compare for Equal (WORDS) (SSE2).
pcmpeqwX86Compiler4614   inline void pcmpeqw(const XmmVar& dst, const XmmVar& src)
4615   { _emitInstruction(kX86InstPCmpEqW, &dst, &src); }
4616   //! @brief Packed Compare for Equal (WORDS) (SSE2).
pcmpeqwX86Compiler4617   inline void pcmpeqw(const XmmVar& dst, const Mem& src)
4618   { _emitInstruction(kX86InstPCmpEqW, &dst, &src); }
4619 
4620   //! @brief Packed Compare for Equal (DWORDS) (SSE2).
pcmpeqdX86Compiler4621   inline void pcmpeqd(const XmmVar& dst, const XmmVar& src)
4622   { _emitInstruction(kX86InstPCmpEqD, &dst, &src); }
4623   //! @brief Packed Compare for Equal (DWORDS) (SSE2).
pcmpeqdX86Compiler4624   inline void pcmpeqd(const XmmVar& dst, const Mem& src)
4625   { _emitInstruction(kX86InstPCmpEqD, &dst, &src); }
4626 
4627   //! @brief Packed Compare for Greater Than (BYTES) (SSE2).
pcmpgtbX86Compiler4628   inline void pcmpgtb(const XmmVar& dst, const XmmVar& src)
4629   { _emitInstruction(kX86InstPCmpGtB, &dst, &src); }
4630   //! @brief Packed Compare for Greater Than (BYTES) (SSE2).
pcmpgtbX86Compiler4631   inline void pcmpgtb(const XmmVar& dst, const Mem& src)
4632   { _emitInstruction(kX86InstPCmpGtB, &dst, &src); }
4633 
4634   //! @brief Packed Compare for Greater Than (WORDS) (SSE2).
pcmpgtwX86Compiler4635   inline void pcmpgtw(const XmmVar& dst, const XmmVar& src)
4636   { _emitInstruction(kX86InstPCmpGtW, &dst, &src); }
4637   //! @brief Packed Compare for Greater Than (WORDS) (SSE2).
pcmpgtwX86Compiler4638   inline void pcmpgtw(const XmmVar& dst, const Mem& src)
4639   { _emitInstruction(kX86InstPCmpGtW, &dst, &src); }
4640 
4641   //! @brief Packed Compare for Greater Than (DWORDS) (SSE2).
pcmpgtdX86Compiler4642   inline void pcmpgtd(const XmmVar& dst, const XmmVar& src)
4643   { _emitInstruction(kX86InstPCmpGtD, &dst, &src); }
4644   //! @brief Packed Compare for Greater Than (DWORDS) (SSE2).
pcmpgtdX86Compiler4645   inline void pcmpgtd(const XmmVar& dst, const Mem& src)
4646   { _emitInstruction(kX86InstPCmpGtD, &dst, &src); }
4647 
4648   //! @brief Extract Word (SSE2).
pextrwX86Compiler4649   inline void pextrw(const GpVar& dst, const XmmVar& src, const Imm& imm8)
4650   { _emitInstruction(kX86InstPExtrW, &dst, &src, &imm8); }
4651   //! @brief Extract Word (SSE2).
pextrwX86Compiler4652   inline void pextrw(const Mem& dst, const XmmVar& src, const Imm& imm8)
4653   { _emitInstruction(kX86InstPExtrW, &dst, &src, &imm8); }
4654 
4655   //! @brief Packed Signed Integer Word Maximum (SSE2).
pmaxswX86Compiler4656   inline void pmaxsw(const XmmVar& dst, const XmmVar& src)
4657   { _emitInstruction(kX86InstPMaxSW, &dst, &src); }
4658   //! @brief Packed Signed Integer Word Maximum (SSE2).
pmaxswX86Compiler4659   inline void pmaxsw(const XmmVar& dst, const Mem& src)
4660   { _emitInstruction(kX86InstPMaxSW, &dst, &src); }
4661 
4662   //! @brief Packed Unsigned Integer Byte Maximum (SSE2).
pmaxubX86Compiler4663   inline void pmaxub(const XmmVar& dst, const XmmVar& src)
4664   { _emitInstruction(kX86InstPMaxUB, &dst, &src); }
4665   //! @brief Packed Unsigned Integer Byte Maximum (SSE2).
pmaxubX86Compiler4666   inline void pmaxub(const XmmVar& dst, const Mem& src)
4667   { _emitInstruction(kX86InstPMaxUB, &dst, &src); }
4668 
4669   //! @brief Packed Signed Integer Word Minimum (SSE2).
pminswX86Compiler4670   inline void pminsw(const XmmVar& dst, const XmmVar& src)
4671   { _emitInstruction(kX86InstPMinSW, &dst, &src); }
4672   //! @brief Packed Signed Integer Word Minimum (SSE2).
pminswX86Compiler4673   inline void pminsw(const XmmVar& dst, const Mem& src)
4674   { _emitInstruction(kX86InstPMinSW, &dst, &src); }
4675 
4676   //! @brief Packed Unsigned Integer Byte Minimum (SSE2).
pminubX86Compiler4677   inline void pminub(const XmmVar& dst, const XmmVar& src)
4678   { _emitInstruction(kX86InstPMinUB, &dst, &src); }
4679   //! @brief Packed Unsigned Integer Byte Minimum (SSE2).
pminubX86Compiler4680   inline void pminub(const XmmVar& dst, const Mem& src)
4681   { _emitInstruction(kX86InstPMinUB, &dst, &src); }
4682 
4683   //! @brief Move Byte Mask (SSE2).
pmovmskbX86Compiler4684   inline void pmovmskb(const GpVar& dst, const XmmVar& src)
4685   { _emitInstruction(kX86InstPMovMskB, &dst, &src); }
4686 
4687   //! @brief Packed Multiply High (SSE2).
pmulhwX86Compiler4688   inline void pmulhw(const XmmVar& dst, const XmmVar& src)
4689   { _emitInstruction(kX86InstPMulHW, &dst, &src); }
4690   //! @brief Packed Multiply High (SSE2).
pmulhwX86Compiler4691   inline void pmulhw(const XmmVar& dst, const Mem& src)
4692   { _emitInstruction(kX86InstPMulHW, &dst, &src); }
4693 
4694   //! @brief Packed Multiply High Unsigned (SSE2).
pmulhuwX86Compiler4695   inline void pmulhuw(const XmmVar& dst, const XmmVar& src)
4696   { _emitInstruction(kX86InstPMulHUW, &dst, &src); }
4697   //! @brief Packed Multiply High Unsigned (SSE2).
pmulhuwX86Compiler4698   inline void pmulhuw(const XmmVar& dst, const Mem& src)
4699   { _emitInstruction(kX86InstPMulHUW, &dst, &src); }
4700 
4701   //! @brief Packed Multiply Low (SSE2).
pmullwX86Compiler4702   inline void pmullw(const XmmVar& dst, const XmmVar& src)
4703   { _emitInstruction(kX86InstPMulLW, &dst, &src); }
4704   //! @brief Packed Multiply Low (SSE2).
pmullwX86Compiler4705   inline void pmullw(const XmmVar& dst, const Mem& src)
4706   { _emitInstruction(kX86InstPMulLW, &dst, &src); }
4707 
4708   //! @brief Packed Multiply to QWORD (SSE2).
pmuludqX86Compiler4709   inline void pmuludq(const MmVar& dst, const MmVar& src)
4710   { _emitInstruction(kX86InstPMulUDQ, &dst, &src); }
4711   //! @brief Packed Multiply to QWORD (SSE2).
pmuludqX86Compiler4712   inline void pmuludq(const MmVar& dst, const Mem& src)
4713   { _emitInstruction(kX86InstPMulUDQ, &dst, &src); }
4714 
4715   //! @brief Packed Multiply to QWORD (SSE2).
pmuludqX86Compiler4716   inline void pmuludq(const XmmVar& dst, const XmmVar& src)
4717   { _emitInstruction(kX86InstPMulUDQ, &dst, &src); }
4718   //! @brief Packed Multiply to QWORD (SSE2).
pmuludqX86Compiler4719   inline void pmuludq(const XmmVar& dst, const Mem& src)
4720   { _emitInstruction(kX86InstPMulUDQ, &dst, &src); }
4721 
4722   //! @brief Bitwise Logical OR (SSE2).
porX86Compiler4723   inline void por(const XmmVar& dst, const XmmVar& src)
4724   { _emitInstruction(kX86InstPOr, &dst, &src); }
4725   //! @brief Bitwise Logical OR (SSE2).
porX86Compiler4726   inline void por(const XmmVar& dst, const Mem& src)
4727   { _emitInstruction(kX86InstPOr, &dst, &src); }
4728 
4729   //! @brief Packed Shift Left Logical (SSE2).
pslldX86Compiler4730   inline void pslld(const XmmVar& dst, const XmmVar& src)
4731   { _emitInstruction(kX86InstPSllD, &dst, &src); }
4732   //! @brief Packed Shift Left Logical (SSE2).
pslldX86Compiler4733   inline void pslld(const XmmVar& dst, const Mem& src)
4734   { _emitInstruction(kX86InstPSllD, &dst, &src); }
4735   //! @brief Packed Shift Left Logical (SSE2).
pslldX86Compiler4736   inline void pslld(const XmmVar& dst, const Imm& src)
4737   { _emitInstruction(kX86InstPSllD, &dst, &src); }
4738 
4739   //! @brief Packed Shift Left Logical (SSE2).
psllqX86Compiler4740   inline void psllq(const XmmVar& dst, const XmmVar& src)
4741   { _emitInstruction(kX86InstPSllQ, &dst, &src); }
4742   //! @brief Packed Shift Left Logical (SSE2).
psllqX86Compiler4743   inline void psllq(const XmmVar& dst, const Mem& src)
4744   { _emitInstruction(kX86InstPSllQ, &dst, &src); }
4745   //! @brief Packed Shift Left Logical (SSE2).
psllqX86Compiler4746   inline void psllq(const XmmVar& dst, const Imm& src)
4747   { _emitInstruction(kX86InstPSllQ, &dst, &src); }
4748 
4749   //! @brief Packed Shift Left Logical (SSE2).
psllwX86Compiler4750   inline void psllw(const XmmVar& dst, const XmmVar& src)
4751   { _emitInstruction(kX86InstPSllW, &dst, &src); }
4752   //! @brief Packed Shift Left Logical (SSE2).
psllwX86Compiler4753   inline void psllw(const XmmVar& dst, const Mem& src)
4754   { _emitInstruction(kX86InstPSllW, &dst, &src); }
4755   //! @brief Packed Shift Left Logical (SSE2).
psllwX86Compiler4756   inline void psllw(const XmmVar& dst, const Imm& src)
4757   { _emitInstruction(kX86InstPSllW, &dst, &src); }
4758 
4759   //! @brief Packed Shift Left Logical (SSE2).
pslldqX86Compiler4760   inline void pslldq(const XmmVar& dst, const Imm& src)
4761   { _emitInstruction(kX86InstPSllDQ, &dst, &src); }
4762 
4763   //! @brief Packed Shift Right Arithmetic (SSE2).
psradX86Compiler4764   inline void psrad(const XmmVar& dst, const XmmVar& src)
4765   { _emitInstruction(kX86InstPSraD, &dst, &src); }
4766   //! @brief Packed Shift Right Arithmetic (SSE2).
psradX86Compiler4767   inline void psrad(const XmmVar& dst, const Mem& src)
4768   { _emitInstruction(kX86InstPSraD, &dst, &src); }
4769   //! @brief Packed Shift Right Arithmetic (SSE2).
psradX86Compiler4770   inline void psrad(const XmmVar& dst, const Imm& src)
4771   { _emitInstruction(kX86InstPSraD, &dst, &src); }
4772 
4773   //! @brief Packed Shift Right Arithmetic (SSE2).
psrawX86Compiler4774   inline void psraw(const XmmVar& dst, const XmmVar& src)
4775   { _emitInstruction(kX86InstPSraW, &dst, &src); }
4776   //! @brief Packed Shift Right Arithmetic (SSE2).
psrawX86Compiler4777   inline void psraw(const XmmVar& dst, const Mem& src)
4778   { _emitInstruction(kX86InstPSraW, &dst, &src); }
4779   //! @brief Packed Shift Right Arithmetic (SSE2).
psrawX86Compiler4780   inline void psraw(const XmmVar& dst, const Imm& src)
4781   { _emitInstruction(kX86InstPSraW, &dst, &src); }
4782 
4783   //! @brief Packed Subtract (SSE2).
psubbX86Compiler4784   inline void psubb(const XmmVar& dst, const XmmVar& src)
4785   { _emitInstruction(kX86InstPSubB, &dst, &src); }
4786   //! @brief Packed Subtract (SSE2).
psubbX86Compiler4787   inline void psubb(const XmmVar& dst, const Mem& src)
4788   { _emitInstruction(kX86InstPSubB, &dst, &src); }
4789 
4790   //! @brief Packed Subtract (SSE2).
psubwX86Compiler4791   inline void psubw(const XmmVar& dst, const XmmVar& src)
4792   { _emitInstruction(kX86InstPSubW, &dst, &src); }
4793   //! @brief Packed Subtract (SSE2).
psubwX86Compiler4794   inline void psubw(const XmmVar& dst, const Mem& src)
4795   { _emitInstruction(kX86InstPSubW, &dst, &src); }
4796 
4797   //! @brief Packed Subtract (SSE2).
psubdX86Compiler4798   inline void psubd(const XmmVar& dst, const XmmVar& src)
4799   { _emitInstruction(kX86InstPSubD, &dst, &src); }
4800   //! @brief Packed Subtract (SSE2).
psubdX86Compiler4801   inline void psubd(const XmmVar& dst, const Mem& src)
4802   { _emitInstruction(kX86InstPSubD, &dst, &src); }
4803 
4804   //! @brief Packed Subtract (SSE2).
psubqX86Compiler4805   inline void psubq(const MmVar& dst, const MmVar& src)
4806   { _emitInstruction(kX86InstPSubQ, &dst, &src); }
4807   //! @brief Packed Subtract (SSE2).
psubqX86Compiler4808   inline void psubq(const MmVar& dst, const Mem& src)
4809   { _emitInstruction(kX86InstPSubQ, &dst, &src); }
4810 
4811   //! @brief Packed Subtract (SSE2).
psubqX86Compiler4812   inline void psubq(const XmmVar& dst, const XmmVar& src)
4813   { _emitInstruction(kX86InstPSubQ, &dst, &src); }
4814   //! @brief Packed Subtract (SSE2).
psubqX86Compiler4815   inline void psubq(const XmmVar& dst, const Mem& src)
4816   { _emitInstruction(kX86InstPSubQ, &dst, &src); }
4817 
4818   //! @brief Packed Multiply and Add (SSE2).
pmaddwdX86Compiler4819   inline void pmaddwd(const XmmVar& dst, const XmmVar& src)
4820   { _emitInstruction(kX86InstPMAddWD, &dst, &src); }
4821   //! @brief Packed Multiply and Add (SSE2).
pmaddwdX86Compiler4822   inline void pmaddwd(const XmmVar& dst, const Mem& src)
4823   { _emitInstruction(kX86InstPMAddWD, &dst, &src); }
4824 
4825   //! @brief Shuffle Packed DWORDs (SSE2).
pshufdX86Compiler4826   inline void pshufd(const XmmVar& dst, const XmmVar& src, const Imm& imm8)
4827   { _emitInstruction(kX86InstPShufD, &dst, &src, &imm8); }
4828   //! @brief Shuffle Packed DWORDs (SSE2).
pshufdX86Compiler4829   inline void pshufd(const XmmVar& dst, const Mem& src, const Imm& imm8)
4830   { _emitInstruction(kX86InstPShufD, &dst, &src, &imm8); }
4831 
4832   //! @brief Shuffle Packed High Words (SSE2).
pshufhwX86Compiler4833   inline void pshufhw(const XmmVar& dst, const XmmVar& src, const Imm& imm8)
4834   { _emitInstruction(kX86InstPShufHW, &dst, &src, &imm8); }
4835   //! @brief Shuffle Packed High Words (SSE2).
pshufhwX86Compiler4836   inline void pshufhw(const XmmVar& dst, const Mem& src, const Imm& imm8)
4837   { _emitInstruction(kX86InstPShufHW, &dst, &src, &imm8); }
4838 
4839   //! @brief Shuffle Packed Low Words (SSE2).
pshuflwX86Compiler4840   inline void pshuflw(const XmmVar& dst, const XmmVar& src, const Imm& imm8)
4841   { _emitInstruction(kX86InstPShufLW, &dst, &src, &imm8); }
4842   //! @brief Shuffle Packed Low Words (SSE2).
pshuflwX86Compiler4843   inline void pshuflw(const XmmVar& dst, const Mem& src, const Imm& imm8)
4844   { _emitInstruction(kX86InstPShufLW, &dst, &src, &imm8); }
4845 
4846   //! @brief Packed Shift Right Logical (SSE2).
psrldX86Compiler4847   inline void psrld(const XmmVar& dst, const XmmVar& src)
4848   { _emitInstruction(kX86InstPSrlD, &dst, &src); }
4849   //! @brief Packed Shift Right Logical (SSE2).
psrldX86Compiler4850   inline void psrld(const XmmVar& dst, const Mem& src)
4851   { _emitInstruction(kX86InstPSrlD, &dst, &src); }
4852   //! @brief Packed Shift Right Logical (SSE2).
psrldX86Compiler4853   inline void psrld(const XmmVar& dst, const Imm& src)
4854   { _emitInstruction(kX86InstPSrlD, &dst, &src); }
4855 
4856   //! @brief Packed Shift Right Logical (SSE2).
psrlqX86Compiler4857   inline void psrlq(const XmmVar& dst, const XmmVar& src)
4858   { _emitInstruction(kX86InstPSrlQ, &dst, &src); }
4859   //! @brief Packed Shift Right Logical (SSE2).
psrlqX86Compiler4860   inline void psrlq(const XmmVar& dst, const Mem& src)
4861   { _emitInstruction(kX86InstPSrlQ, &dst, &src); }
4862   //! @brief Packed Shift Right Logical (SSE2).
psrlqX86Compiler4863   inline void psrlq(const XmmVar& dst, const Imm& src)
4864   { _emitInstruction(kX86InstPSrlQ, &dst, &src); }
4865 
4866   //! @brief DQWord Shift Right Logical (MMX).
psrldqX86Compiler4867   inline void psrldq(const XmmVar& dst, const Imm& src)
4868   { _emitInstruction(kX86InstPSrlDQ, &dst, &src); }
4869 
4870   //! @brief Packed Shift Right Logical (SSE2).
psrlwX86Compiler4871   inline void psrlw(const XmmVar& dst, const XmmVar& src)
4872   { _emitInstruction(kX86InstPSrlW, &dst, &src); }
4873   //! @brief Packed Shift Right Logical (SSE2).
psrlwX86Compiler4874   inline void psrlw(const XmmVar& dst, const Mem& src)
4875   { _emitInstruction(kX86InstPSrlW, &dst, &src); }
4876   //! @brief Packed Shift Right Logical (SSE2).
psrlwX86Compiler4877   inline void psrlw(const XmmVar& dst, const Imm& src)
4878   { _emitInstruction(kX86InstPSrlW, &dst, &src); }
4879 
4880   //! @brief Packed Subtract with Saturation (SSE2).
psubsbX86Compiler4881   inline void psubsb(const XmmVar& dst, const XmmVar& src)
4882   { _emitInstruction(kX86InstPSubSB, &dst, &src); }
4883   //! @brief Packed Subtract with Saturation (SSE2).
psubsbX86Compiler4884   inline void psubsb(const XmmVar& dst, const Mem& src)
4885   { _emitInstruction(kX86InstPSubSB, &dst, &src); }
4886 
4887   //! @brief Packed Subtract with Saturation (SSE2).
psubswX86Compiler4888   inline void psubsw(const XmmVar& dst, const XmmVar& src)
4889   { _emitInstruction(kX86InstPSubSW, &dst, &src); }
4890   //! @brief Packed Subtract with Saturation (SSE2).
psubswX86Compiler4891   inline void psubsw(const XmmVar& dst, const Mem& src)
4892   { _emitInstruction(kX86InstPSubSW, &dst, &src); }
4893 
4894   //! @brief Packed Subtract with Unsigned Saturation (SSE2).
psubusbX86Compiler4895   inline void psubusb(const XmmVar& dst, const XmmVar& src)
4896   { _emitInstruction(kX86InstPSubUSB, &dst, &src); }
4897   //! @brief Packed Subtract with Unsigned Saturation (SSE2).
psubusbX86Compiler4898   inline void psubusb(const XmmVar& dst, const Mem& src)
4899   { _emitInstruction(kX86InstPSubUSB, &dst, &src); }
4900 
4901   //! @brief Packed Subtract with Unsigned Saturation (SSE2).
psubuswX86Compiler4902   inline void psubusw(const XmmVar& dst, const XmmVar& src)
4903   { _emitInstruction(kX86InstPSubUSW, &dst, &src); }
4904   //! @brief Packed Subtract with Unsigned Saturation (SSE2).
psubuswX86Compiler4905   inline void psubusw(const XmmVar& dst, const Mem& src)
4906   { _emitInstruction(kX86InstPSubUSW, &dst, &src); }
4907 
4908   //! @brief Unpack High Data (SSE2).
punpckhbwX86Compiler4909   inline void punpckhbw(const XmmVar& dst, const XmmVar& src)
4910   { _emitInstruction(kX86InstPunpckHBW, &dst, &src); }
4911   //! @brief Unpack High Data (SSE2).
punpckhbwX86Compiler4912   inline void punpckhbw(const XmmVar& dst, const Mem& src)
4913   { _emitInstruction(kX86InstPunpckHBW, &dst, &src); }
4914 
4915   //! @brief Unpack High Data (SSE2).
punpckhwdX86Compiler4916   inline void punpckhwd(const XmmVar& dst, const XmmVar& src)
4917   { _emitInstruction(kX86InstPunpckHWD, &dst, &src); }
4918   //! @brief Unpack High Data (SSE2).
punpckhwdX86Compiler4919   inline void punpckhwd(const XmmVar& dst, const Mem& src)
4920   { _emitInstruction(kX86InstPunpckHWD, &dst, &src); }
4921 
4922   //! @brief Unpack High Data (SSE2).
punpckhdqX86Compiler4923   inline void punpckhdq(const XmmVar& dst, const XmmVar& src)
4924   { _emitInstruction(kX86InstPunpckHDQ, &dst, &src); }
4925   //! @brief Unpack High Data (SSE2).
punpckhdqX86Compiler4926   inline void punpckhdq(const XmmVar& dst, const Mem& src)
4927   { _emitInstruction(kX86InstPunpckHDQ, &dst, &src); }
4928 
4929   //! @brief Unpack High Data (SSE2).
punpckhqdqX86Compiler4930   inline void punpckhqdq(const XmmVar& dst, const XmmVar& src)
4931   { _emitInstruction(kX86InstPunpckHQDQ, &dst, &src); }
4932   //! @brief Unpack High Data (SSE2).
punpckhqdqX86Compiler4933   inline void punpckhqdq(const XmmVar& dst, const Mem& src)
4934   { _emitInstruction(kX86InstPunpckHQDQ, &dst, &src); }
4935 
4936   //! @brief Unpack Low Data (SSE2).
punpcklbwX86Compiler4937   inline void punpcklbw(const XmmVar& dst, const XmmVar& src)
4938   { _emitInstruction(kX86InstPunpckLBW, &dst, &src); }
4939   //! @brief Unpack Low Data (SSE2).
punpcklbwX86Compiler4940   inline void punpcklbw(const XmmVar& dst, const Mem& src)
4941   { _emitInstruction(kX86InstPunpckLBW, &dst, &src); }
4942 
4943   //! @brief Unpack Low Data (SSE2).
punpcklwdX86Compiler4944   inline void punpcklwd(const XmmVar& dst, const XmmVar& src)
4945   { _emitInstruction(kX86InstPunpckLWD, &dst, &src); }
4946   //! @brief Unpack Low Data (SSE2).
punpcklwdX86Compiler4947   inline void punpcklwd(const XmmVar& dst, const Mem& src)
4948   { _emitInstruction(kX86InstPunpckLWD, &dst, &src); }
4949 
4950   //! @brief Unpack Low Data (SSE2).
punpckldqX86Compiler4951   inline void punpckldq(const XmmVar& dst, const XmmVar& src)
4952   { _emitInstruction(kX86InstPunpckLDQ, &dst, &src); }
4953   //! @brief Unpack Low Data (SSE2).
punpckldqX86Compiler4954   inline void punpckldq(const XmmVar& dst, const Mem& src)
4955   { _emitInstruction(kX86InstPunpckLDQ, &dst, &src); }
4956 
4957   //! @brief Unpack Low Data (SSE2).
punpcklqdqX86Compiler4958   inline void punpcklqdq(const XmmVar& dst, const XmmVar& src)
4959   { _emitInstruction(kX86InstPunpckLQDQ, &dst, &src); }
4960   //! @brief Unpack Low Data (SSE2).
punpcklqdqX86Compiler4961   inline void punpcklqdq(const XmmVar& dst, const Mem& src)
4962   { _emitInstruction(kX86InstPunpckLQDQ, &dst, &src); }
4963 
4964   //! @brief Bitwise Exclusive OR (SSE2).
pxorX86Compiler4965   inline void pxor(const XmmVar& dst, const XmmVar& src)
4966   { _emitInstruction(kX86InstPXor, &dst, &src); }
4967   //! @brief Bitwise Exclusive OR (SSE2).
pxorX86Compiler4968   inline void pxor(const XmmVar& dst, const Mem& src)
4969   { _emitInstruction(kX86InstPXor, &dst, &src); }
4970 
4971   //! @brief Shuffle DP-FP (SSE2).
shufpdX86Compiler4972   inline void shufpd(const XmmVar& dst, const XmmVar& src, const Imm& imm8)
4973   { _emitInstruction(kX86InstShufPD, &dst, &src, &imm8); }
4974   //! @brief Shuffle DP-FP (SSE2).
shufpdX86Compiler4975   inline void shufpd(const XmmVar& dst, const Mem& src, const Imm& imm8)
4976   { _emitInstruction(kX86InstShufPD, &dst, &src, &imm8); }
4977 
4978   //! @brief Compute Square Roots of Packed DP-FP Values (SSE2).
sqrtpdX86Compiler4979   inline void sqrtpd(const XmmVar& dst, const XmmVar& src)
4980   { _emitInstruction(kX86InstSqrtPD, &dst, &src); }
4981   //! @brief Compute Square Roots of Packed DP-FP Values (SSE2).
sqrtpdX86Compiler4982   inline void sqrtpd(const XmmVar& dst, const Mem& src)
4983   { _emitInstruction(kX86InstSqrtPD, &dst, &src); }
4984 
4985   //! @brief Compute Square Root of Scalar DP-FP Value (SSE2).
sqrtsdX86Compiler4986   inline void sqrtsd(const XmmVar& dst, const XmmVar& src)
4987   { _emitInstruction(kX86InstSqrtSD, &dst, &src); }
4988   //! @brief Compute Square Root of Scalar DP-FP Value (SSE2).
sqrtsdX86Compiler4989   inline void sqrtsd(const XmmVar& dst, const Mem& src)
4990   { _emitInstruction(kX86InstSqrtSD, &dst, &src); }
4991 
4992   //! @brief Packed DP-FP Subtract (SSE2).
subpdX86Compiler4993   inline void subpd(const XmmVar& dst, const XmmVar& src)
4994   { _emitInstruction(kX86InstSubPD, &dst, &src); }
4995   //! @brief Packed DP-FP Subtract (SSE2).
subpdX86Compiler4996   inline void subpd(const XmmVar& dst, const Mem& src)
4997   { _emitInstruction(kX86InstSubPD, &dst, &src); }
4998 
4999   //! @brief Scalar DP-FP Subtract (SSE2).
subsdX86Compiler5000   inline void subsd(const XmmVar& dst, const XmmVar& src)
5001   { _emitInstruction(kX86InstSubSD, &dst, &src); }
5002   //! @brief Scalar DP-FP Subtract (SSE2).
subsdX86Compiler5003   inline void subsd(const XmmVar& dst, const Mem& src)
5004   { _emitInstruction(kX86InstSubSD, &dst, &src); }
5005 
5006   //! @brief Scalar Unordered DP-FP Compare and Set EFLAGS (SSE2).
ucomisdX86Compiler5007   inline void ucomisd(const XmmVar& dst, const XmmVar& src)
5008   { _emitInstruction(kX86InstUComISD, &dst, &src); }
5009   //! @brief Scalar Unordered DP-FP Compare and Set EFLAGS (SSE2).
ucomisdX86Compiler5010   inline void ucomisd(const XmmVar& dst, const Mem& src)
5011   { _emitInstruction(kX86InstUComISD, &dst, &src); }
5012 
5013   //! @brief Unpack and Interleave High Packed Double-Precision FP Values (SSE2).
unpckhpdX86Compiler5014   inline void unpckhpd(const XmmVar& dst, const XmmVar& src)
5015   { _emitInstruction(kX86InstUnpckHPD, &dst, &src); }
5016   //! @brief Unpack and Interleave High Packed Double-Precision FP Values (SSE2).
unpckhpdX86Compiler5017   inline void unpckhpd(const XmmVar& dst, const Mem& src)
5018   { _emitInstruction(kX86InstUnpckHPD, &dst, &src); }
5019 
5020   //! @brief Unpack and Interleave Low Packed Double-Precision FP Values (SSE2).
unpcklpdX86Compiler5021   inline void unpcklpd(const XmmVar& dst, const XmmVar& src)
5022   { _emitInstruction(kX86InstUnpckLPD, &dst, &src); }
5023   //! @brief Unpack and Interleave Low Packed Double-Precision FP Values (SSE2).
unpcklpdX86Compiler5024   inline void unpcklpd(const XmmVar& dst, const Mem& src)
5025   { _emitInstruction(kX86InstUnpckLPD, &dst, &src); }
5026 
5027   //! @brief Bit-wise Logical OR for DP-FP Data (SSE2).
xorpdX86Compiler5028   inline void xorpd(const XmmVar& dst, const XmmVar& src)
5029   { _emitInstruction(kX86InstXorPD, &dst, &src); }
5030   //! @brief Bit-wise Logical OR for DP-FP Data (SSE2).
xorpdX86Compiler5031   inline void xorpd(const XmmVar& dst, const Mem& src)
5032   { _emitInstruction(kX86InstXorPD, &dst, &src); }
5033 
5034   // --------------------------------------------------------------------------
5035   // [SSE3]
5036   // --------------------------------------------------------------------------
5037 
5038   //! @brief Packed DP-FP Add/Subtract (SSE3).
addsubpdX86Compiler5039   inline void addsubpd(const XmmVar& dst, const XmmVar& src)
5040   { _emitInstruction(kX86InstAddSubPD, &dst, &src); }
5041   //! @brief Packed DP-FP Add/Subtract (SSE3).
addsubpdX86Compiler5042   inline void addsubpd(const XmmVar& dst, const Mem& src)
5043   { _emitInstruction(kX86InstAddSubPD, &dst, &src); }
5044 
5045   //! @brief Packed SP-FP Add/Subtract (SSE3).
addsubpsX86Compiler5046   inline void addsubps(const XmmVar& dst, const XmmVar& src)
5047   { _emitInstruction(kX86InstAddSubPS, &dst, &src); }
5048   //! @brief Packed SP-FP Add/Subtract (SSE3).
addsubpsX86Compiler5049   inline void addsubps(const XmmVar& dst, const Mem& src)
5050   { _emitInstruction(kX86InstAddSubPS, &dst, &src); }
5051 
5052 #if ASMJIT_NOT_SUPPORTED_BY_COMPILER
5053   // TODO: NOT IMPLEMENTED BY THE COMPILER.
5054   //! @brief Store Integer with Truncation (SSE3).
fisttpX86Compiler5055   inline void fisttp(const Mem& dst)
5056   { _emitInstruction(kX86InstFISttP, &dst); }
5057 #endif // ASMJIT_NOT_SUPPORTED_BY_COMPILER
5058 
5059   //! @brief Packed DP-FP Horizontal Add (SSE3).
haddpdX86Compiler5060   inline void haddpd(const XmmVar& dst, const XmmVar& src)
5061   { _emitInstruction(kX86InstHAddPD, &dst, &src); }
5062   //! @brief Packed DP-FP Horizontal Add (SSE3).
haddpdX86Compiler5063   inline void haddpd(const XmmVar& dst, const Mem& src)
5064   { _emitInstruction(kX86InstHAddPD, &dst, &src); }
5065 
5066   //! @brief Packed SP-FP Horizontal Add (SSE3).
haddpsX86Compiler5067   inline void haddps(const XmmVar& dst, const XmmVar& src)
5068   { _emitInstruction(kX86InstHAddPS, &dst, &src); }
5069   //! @brief Packed SP-FP Horizontal Add (SSE3).
haddpsX86Compiler5070   inline void haddps(const XmmVar& dst, const Mem& src)
5071   { _emitInstruction(kX86InstHAddPS, &dst, &src); }
5072 
5073   //! @brief Packed DP-FP Horizontal Subtract (SSE3).
hsubpdX86Compiler5074   inline void hsubpd(const XmmVar& dst, const XmmVar& src)
5075   { _emitInstruction(kX86InstHSubPD, &dst, &src); }
5076   //! @brief Packed DP-FP Horizontal Subtract (SSE3).
hsubpdX86Compiler5077   inline void hsubpd(const XmmVar& dst, const Mem& src)
5078   { _emitInstruction(kX86InstHSubPD, &dst, &src); }
5079 
5080   //! @brief Packed SP-FP Horizontal Subtract (SSE3).
hsubpsX86Compiler5081   inline void hsubps(const XmmVar& dst, const XmmVar& src)
5082   { _emitInstruction(kX86InstHSubPS, &dst, &src); }
5083   //! @brief Packed SP-FP Horizontal Subtract (SSE3).
hsubpsX86Compiler5084   inline void hsubps(const XmmVar& dst, const Mem& src)
5085   { _emitInstruction(kX86InstHSubPS, &dst, &src); }
5086 
5087   //! @brief Load Unaligned Integer 128 Bits (SSE3).
lddquX86Compiler5088   inline void lddqu(const XmmVar& dst, const Mem& src)
5089   { _emitInstruction(kX86InstLdDQU, &dst, &src); }
5090 
5091 #if ASMJIT_NOT_SUPPORTED_BY_COMPILER
5092   //! @brief Set Up Monitor Address (SSE3).
monitorX86Compiler5093   inline void monitor()
5094   { _emitInstruction(kX86InstMonitor); }
5095 #endif // ASMJIT_NOT_SUPPORTED_BY_COMPILER
5096 
5097   //! @brief Move One DP-FP and Duplicate (SSE3).
movddupX86Compiler5098   inline void movddup(const XmmVar& dst, const XmmVar& src)
5099   { _emitInstruction(kX86InstMovDDup, &dst, &src); }
5100   //! @brief Move One DP-FP and Duplicate (SSE3).
movddupX86Compiler5101   inline void movddup(const XmmVar& dst, const Mem& src)
5102   { _emitInstruction(kX86InstMovDDup, &dst, &src); }
5103 
5104   //! @brief Move Packed SP-FP High and Duplicate (SSE3).
movshdupX86Compiler5105   inline void movshdup(const XmmVar& dst, const XmmVar& src)
5106   { _emitInstruction(kX86InstMovSHDup, &dst, &src); }
5107   //! @brief Move Packed SP-FP High and Duplicate (SSE3).
movshdupX86Compiler5108   inline void movshdup(const XmmVar& dst, const Mem& src)
5109   { _emitInstruction(kX86InstMovSHDup, &dst, &src); }
5110 
5111   //! @brief Move Packed SP-FP Low and Duplicate (SSE3).
movsldupX86Compiler5112   inline void movsldup(const XmmVar& dst, const XmmVar& src)
5113   { _emitInstruction(kX86InstMovSLDup, &dst, &src); }
5114   //! @brief Move Packed SP-FP Low and Duplicate (SSE3).
movsldupX86Compiler5115   inline void movsldup(const XmmVar& dst, const Mem& src)
5116   { _emitInstruction(kX86InstMovSLDup, &dst, &src); }
5117 
5118 #if ASMJIT_NOT_SUPPORTED_BY_COMPILER
5119   //! @brief Monitor Wait (SSE3).
mwaitX86Compiler5120   inline void mwait()
5121   { _emitInstruction(kX86InstMWait); }
5122 #endif // ASMJIT_NOT_SUPPORTED_BY_COMPILER
5123 
5124   // --------------------------------------------------------------------------
5125   // [SSSE3]
5126   // --------------------------------------------------------------------------
5127 
5128   //! @brief Packed SIGN (SSSE3).
psignbX86Compiler5129   inline void psignb(const MmVar& dst, const MmVar& src)
5130   { _emitInstruction(kX86InstPSignB, &dst, &src); }
5131   //! @brief Packed SIGN (SSSE3).
psignbX86Compiler5132   inline void psignb(const MmVar& dst, const Mem& src)
5133   { _emitInstruction(kX86InstPSignB, &dst, &src); }
5134 
5135   //! @brief Packed SIGN (SSSE3).
psignbX86Compiler5136   inline void psignb(const XmmVar& dst, const XmmVar& src)
5137   { _emitInstruction(kX86InstPSignB, &dst, &src); }
5138   //! @brief Packed SIGN (SSSE3).
psignbX86Compiler5139   inline void psignb(const XmmVar& dst, const Mem& src)
5140   { _emitInstruction(kX86InstPSignB, &dst, &src); }
5141 
5142   //! @brief Packed SIGN (SSSE3).
psignwX86Compiler5143   inline void psignw(const MmVar& dst, const MmVar& src)
5144   { _emitInstruction(kX86InstPSignW, &dst, &src); }
5145   //! @brief Packed SIGN (SSSE3).
psignwX86Compiler5146   inline void psignw(const MmVar& dst, const Mem& src)
5147   { _emitInstruction(kX86InstPSignW, &dst, &src); }
5148 
5149   //! @brief Packed SIGN (SSSE3).
psignwX86Compiler5150   inline void psignw(const XmmVar& dst, const XmmVar& src)
5151   { _emitInstruction(kX86InstPSignW, &dst, &src); }
5152   //! @brief Packed SIGN (SSSE3).
psignwX86Compiler5153   inline void psignw(const XmmVar& dst, const Mem& src)
5154   { _emitInstruction(kX86InstPSignW, &dst, &src); }
5155 
5156   //! @brief Packed SIGN (SSSE3).
psigndX86Compiler5157   inline void psignd(const MmVar& dst, const MmVar& src)
5158   { _emitInstruction(kX86InstPSignD, &dst, &src); }
5159   //! @brief Packed SIGN (SSSE3).
psigndX86Compiler5160   inline void psignd(const MmVar& dst, const Mem& src)
5161   { _emitInstruction(kX86InstPSignD, &dst, &src); }
5162 
5163   //! @brief Packed SIGN (SSSE3).
psigndX86Compiler5164   inline void psignd(const XmmVar& dst, const XmmVar& src)
5165   { _emitInstruction(kX86InstPSignD, &dst, &src); }
5166   //! @brief Packed SIGN (SSSE3).
psigndX86Compiler5167   inline void psignd(const XmmVar& dst, const Mem& src)
5168   { _emitInstruction(kX86InstPSignD, &dst, &src); }
5169 
5170   //! @brief Packed Horizontal Add (SSSE3).
phaddwX86Compiler5171   inline void phaddw(const MmVar& dst, const MmVar& src)
5172   { _emitInstruction(kX86InstPHAddW, &dst, &src); }
5173   //! @brief Packed Horizontal Add (SSSE3).
phaddwX86Compiler5174   inline void phaddw(const MmVar& dst, const Mem& src)
5175   { _emitInstruction(kX86InstPHAddW, &dst, &src); }
5176 
5177   //! @brief Packed Horizontal Add (SSSE3).
phaddwX86Compiler5178   inline void phaddw(const XmmVar& dst, const XmmVar& src)
5179   { _emitInstruction(kX86InstPHAddW, &dst, &src); }
5180   //! @brief Packed Horizontal Add (SSSE3).
phaddwX86Compiler5181   inline void phaddw(const XmmVar& dst, const Mem& src)
5182   { _emitInstruction(kX86InstPHAddW, &dst, &src); }
5183 
5184   //! @brief Packed Horizontal Add (SSSE3).
phadddX86Compiler5185   inline void phaddd(const MmVar& dst, const MmVar& src)
5186   { _emitInstruction(kX86InstPHAddD, &dst, &src); }
5187   //! @brief Packed Horizontal Add (SSSE3).
phadddX86Compiler5188   inline void phaddd(const MmVar& dst, const Mem& src)
5189   { _emitInstruction(kX86InstPHAddD, &dst, &src); }
5190 
5191   //! @brief Packed Horizontal Add (SSSE3).
phadddX86Compiler5192   inline void phaddd(const XmmVar& dst, const XmmVar& src)
5193   { _emitInstruction(kX86InstPHAddD, &dst, &src); }
5194   //! @brief Packed Horizontal Add (SSSE3).
phadddX86Compiler5195   inline void phaddd(const XmmVar& dst, const Mem& src)
5196   { _emitInstruction(kX86InstPHAddD, &dst, &src); }
5197 
5198   //! @brief Packed Horizontal Add and Saturate (SSSE3).
phaddswX86Compiler5199   inline void phaddsw(const MmVar& dst, const MmVar& src)
5200   { _emitInstruction(kX86InstPHAddSW, &dst, &src); }
5201   //! @brief Packed Horizontal Add and Saturate (SSSE3).
phaddswX86Compiler5202   inline void phaddsw(const MmVar& dst, const Mem& src)
5203   { _emitInstruction(kX86InstPHAddSW, &dst, &src); }
5204 
5205   //! @brief Packed Horizontal Add and Saturate (SSSE3).
phaddswX86Compiler5206   inline void phaddsw(const XmmVar& dst, const XmmVar& src)
5207   { _emitInstruction(kX86InstPHAddSW, &dst, &src); }
5208   //! @brief Packed Horizontal Add and Saturate (SSSE3).
phaddswX86Compiler5209   inline void phaddsw(const XmmVar& dst, const Mem& src)
5210   { _emitInstruction(kX86InstPHAddSW, &dst, &src); }
5211 
5212   //! @brief Packed Horizontal Subtract (SSSE3).
phsubwX86Compiler5213   inline void phsubw(const MmVar& dst, const MmVar& src)
5214   { _emitInstruction(kX86InstPHSubW, &dst, &src); }
5215   //! @brief Packed Horizontal Subtract (SSSE3).
phsubwX86Compiler5216   inline void phsubw(const MmVar& dst, const Mem& src)
5217   { _emitInstruction(kX86InstPHSubW, &dst, &src); }
5218 
5219   //! @brief Packed Horizontal Subtract (SSSE3).
phsubwX86Compiler5220   inline void phsubw(const XmmVar& dst, const XmmVar& src)
5221   { _emitInstruction(kX86InstPHSubW, &dst, &src); }
5222   //! @brief Packed Horizontal Subtract (SSSE3).
phsubwX86Compiler5223   inline void phsubw(const XmmVar& dst, const Mem& src)
5224   { _emitInstruction(kX86InstPHSubW, &dst, &src); }
5225 
5226   //! @brief Packed Horizontal Subtract (SSSE3).
phsubdX86Compiler5227   inline void phsubd(const MmVar& dst, const MmVar& src)
5228   { _emitInstruction(kX86InstPHSubD, &dst, &src); }
5229   //! @brief Packed Horizontal Subtract (SSSE3).
phsubdX86Compiler5230   inline void phsubd(const MmVar& dst, const Mem& src)
5231   { _emitInstruction(kX86InstPHSubD, &dst, &src); }
5232 
5233   //! @brief Packed Horizontal Subtract (SSSE3).
phsubdX86Compiler5234   inline void phsubd(const XmmVar& dst, const XmmVar& src)
5235   { _emitInstruction(kX86InstPHSubD, &dst, &src); }
5236   //! @brief Packed Horizontal Subtract (SSSE3).
phsubdX86Compiler5237   inline void phsubd(const XmmVar& dst, const Mem& src)
5238   { _emitInstruction(kX86InstPHSubD, &dst, &src); }
5239 
5240   //! @brief Packed Horizontal Subtract and Saturate (SSSE3).
phsubswX86Compiler5241   inline void phsubsw(const MmVar& dst, const MmVar& src)
5242   { _emitInstruction(kX86InstPHSubSW, &dst, &src); }
5243   //! @brief Packed Horizontal Subtract and Saturate (SSSE3).
phsubswX86Compiler5244   inline void phsubsw(const MmVar& dst, const Mem& src)
5245   { _emitInstruction(kX86InstPHSubSW, &dst, &src); }
5246 
5247   //! @brief Packed Horizontal Subtract and Saturate (SSSE3).
phsubswX86Compiler5248   inline void phsubsw(const XmmVar& dst, const XmmVar& src)
5249   { _emitInstruction(kX86InstPHSubSW, &dst, &src); }
5250   //! @brief Packed Horizontal Subtract and Saturate (SSSE3).
phsubswX86Compiler5251   inline void phsubsw(const XmmVar& dst, const Mem& src)
5252   { _emitInstruction(kX86InstPHSubSW, &dst, &src); }
5253 
5254   //! @brief Multiply and Add Packed Signed and Unsigned Bytes (SSSE3).
pmaddubswX86Compiler5255   inline void pmaddubsw(const MmVar& dst, const MmVar& src)
5256   { _emitInstruction(kX86InstPMAddUBSW, &dst, &src); }
5257   //! @brief Multiply and Add Packed Signed and Unsigned Bytes (SSSE3).
pmaddubswX86Compiler5258   inline void pmaddubsw(const MmVar& dst, const Mem& src)
5259   { _emitInstruction(kX86InstPMAddUBSW, &dst, &src); }
5260 
5261   //! @brief Multiply and Add Packed Signed and Unsigned Bytes (SSSE3).
pmaddubswX86Compiler5262   inline void pmaddubsw(const XmmVar& dst, const XmmVar& src)
5263   { _emitInstruction(kX86InstPMAddUBSW, &dst, &src); }
5264   //! @brief Multiply and Add Packed Signed and Unsigned Bytes (SSSE3).
pmaddubswX86Compiler5265   inline void pmaddubsw(const XmmVar& dst, const Mem& src)
5266   { _emitInstruction(kX86InstPMAddUBSW, &dst, &src); }
5267 
5268   //! @brief Packed Absolute Value (SSSE3).
pabsbX86Compiler5269   inline void pabsb(const MmVar& dst, const MmVar& src)
5270   { _emitInstruction(kX86InstPAbsB, &dst, &src); }
5271   //! @brief Packed Absolute Value (SSSE3).
pabsbX86Compiler5272   inline void pabsb(const MmVar& dst, const Mem& src)
5273   { _emitInstruction(kX86InstPAbsB, &dst, &src); }
5274 
5275   //! @brief Packed Absolute Value (SSSE3).
pabsbX86Compiler5276   inline void pabsb(const XmmVar& dst, const XmmVar& src)
5277   { _emitInstruction(kX86InstPAbsB, &dst, &src); }
5278   //! @brief Packed Absolute Value (SSSE3).
pabsbX86Compiler5279   inline void pabsb(const XmmVar& dst, const Mem& src)
5280   { _emitInstruction(kX86InstPAbsB, &dst, &src); }
5281 
5282   //! @brief Packed Absolute Value (SSSE3).
pabswX86Compiler5283   inline void pabsw(const MmVar& dst, const MmVar& src)
5284   { _emitInstruction(kX86InstPAbsW, &dst, &src); }
5285   //! @brief Packed Absolute Value (SSSE3).
pabswX86Compiler5286   inline void pabsw(const MmVar& dst, const Mem& src)
5287   { _emitInstruction(kX86InstPAbsW, &dst, &src); }
5288 
5289   //! @brief Packed Absolute Value (SSSE3).
pabswX86Compiler5290   inline void pabsw(const XmmVar& dst, const XmmVar& src)
5291   { _emitInstruction(kX86InstPAbsW, &dst, &src); }
5292   //! @brief Packed Absolute Value (SSSE3).
pabswX86Compiler5293   inline void pabsw(const XmmVar& dst, const Mem& src)
5294   { _emitInstruction(kX86InstPAbsW, &dst, &src); }
5295 
5296   //! @brief Packed Absolute Value (SSSE3).
pabsdX86Compiler5297   inline void pabsd(const MmVar& dst, const MmVar& src)
5298   { _emitInstruction(kX86InstPAbsD, &dst, &src); }
5299   //! @brief Packed Absolute Value (SSSE3).
pabsdX86Compiler5300   inline void pabsd(const MmVar& dst, const Mem& src)
5301   { _emitInstruction(kX86InstPAbsD, &dst, &src); }
5302 
5303   //! @brief Packed Absolute Value (SSSE3).
pabsdX86Compiler5304   inline void pabsd(const XmmVar& dst, const XmmVar& src)
5305   { _emitInstruction(kX86InstPAbsD, &dst, &src); }
5306   //! @brief Packed Absolute Value (SSSE3).
pabsdX86Compiler5307   inline void pabsd(const XmmVar& dst, const Mem& src)
5308   { _emitInstruction(kX86InstPAbsD, &dst, &src); }
5309 
5310   //! @brief Packed Multiply High with Round and Scale (SSSE3).
pmulhrswX86Compiler5311   inline void pmulhrsw(const MmVar& dst, const MmVar& src)
5312   { _emitInstruction(kX86InstPMulHRSW, &dst, &src); }
5313   //! @brief Packed Multiply High with Round and Scale (SSSE3).
pmulhrswX86Compiler5314   inline void pmulhrsw(const MmVar& dst, const Mem& src)
5315   { _emitInstruction(kX86InstPMulHRSW, &dst, &src); }
5316 
5317   //! @brief Packed Multiply High with Round and Scale (SSSE3).
pmulhrswX86Compiler5318   inline void pmulhrsw(const XmmVar& dst, const XmmVar& src)
5319   { _emitInstruction(kX86InstPMulHRSW, &dst, &src); }
5320   //! @brief Packed Multiply High with Round and Scale (SSSE3).
pmulhrswX86Compiler5321   inline void pmulhrsw(const XmmVar& dst, const Mem& src)
5322   { _emitInstruction(kX86InstPMulHRSW, &dst, &src); }
5323 
5324   //! @brief Packed Shuffle Bytes (SSSE3).
pshufbX86Compiler5325   inline void pshufb(const MmVar& dst, const MmVar& src)
5326   { _emitInstruction(kX86InstPShufB, &dst, &src); }
5327   //! @brief Packed Shuffle Bytes (SSSE3).
pshufbX86Compiler5328   inline void pshufb(const MmVar& dst, const Mem& src)
5329   { _emitInstruction(kX86InstPShufB, &dst, &src); }
5330 
5331   //! @brief Packed Shuffle Bytes (SSSE3).
pshufbX86Compiler5332   inline void pshufb(const XmmVar& dst, const XmmVar& src)
5333   { _emitInstruction(kX86InstPShufB, &dst, &src); }
5334   //! @brief Packed Shuffle Bytes (SSSE3).
pshufbX86Compiler5335   inline void pshufb(const XmmVar& dst, const Mem& src)
5336   { _emitInstruction(kX86InstPShufB, &dst, &src); }
5337 
5338   //! @brief Packed Shuffle Bytes (SSSE3).
palignrX86Compiler5339   inline void palignr(const MmVar& dst, const MmVar& src, const Imm& imm8)
5340   { _emitInstruction(kX86InstPAlignR, &dst, &src, &imm8); }
5341   //! @brief Packed Shuffle Bytes (SSSE3).
palignrX86Compiler5342   inline void palignr(const MmVar& dst, const Mem& src, const Imm& imm8)
5343   { _emitInstruction(kX86InstPAlignR, &dst, &src, &imm8); }
5344 
5345   //! @brief Packed Shuffle Bytes (SSSE3).
palignrX86Compiler5346   inline void palignr(const XmmVar& dst, const XmmVar& src, const Imm& imm8)
5347   { _emitInstruction(kX86InstPAlignR, &dst, &src, &imm8); }
5348   //! @brief Packed Shuffle Bytes (SSSE3).
palignrX86Compiler5349   inline void palignr(const XmmVar& dst, const Mem& src, const Imm& imm8)
5350   { _emitInstruction(kX86InstPAlignR, &dst, &src, &imm8); }
5351 
5352   // --------------------------------------------------------------------------
5353   // [SSE4.1]
5354   // --------------------------------------------------------------------------
5355 
5356   //! @brief Blend Packed DP-FP Values (SSE4.1).
blendpdX86Compiler5357   inline void blendpd(const XmmVar& dst, const XmmVar& src, const Imm& imm8)
5358   { _emitInstruction(kX86InstBlendPD, &dst, &src, &imm8); }
5359   //! @brief Blend Packed DP-FP Values (SSE4.1).
blendpdX86Compiler5360   inline void blendpd(const XmmVar& dst, const Mem& src, const Imm& imm8)
5361   { _emitInstruction(kX86InstBlendPD, &dst, &src, &imm8); }
5362 
5363   //! @brief Blend Packed SP-FP Values (SSE4.1).
blendpsX86Compiler5364   inline void blendps(const XmmVar& dst, const XmmVar& src, const Imm& imm8)
5365   { _emitInstruction(kX86InstBlendPS, &dst, &src, &imm8); }
5366   //! @brief Blend Packed SP-FP Values (SSE4.1).
blendpsX86Compiler5367   inline void blendps(const XmmVar& dst, const Mem& src, const Imm& imm8)
5368   { _emitInstruction(kX86InstBlendPS, &dst, &src, &imm8); }
5369 
5370   //! @brief Variable Blend Packed DP-FP Values (SSE4.1).
blendvpdX86Compiler5371   inline void blendvpd(const XmmVar& dst, const XmmVar& src)
5372   { _emitInstruction(kX86InstBlendVPD, &dst, &src); }
5373   //! @brief Variable Blend Packed DP-FP Values (SSE4.1).
blendvpdX86Compiler5374   inline void blendvpd(const XmmVar& dst, const Mem& src)
5375   { _emitInstruction(kX86InstBlendVPD, &dst, &src); }
5376 
5377   //! @brief Variable Blend Packed SP-FP Values (SSE4.1).
blendvpsX86Compiler5378   inline void blendvps(const XmmVar& dst, const XmmVar& src)
5379   { _emitInstruction(kX86InstBlendVPS, &dst, &src); }
5380   //! @brief Variable Blend Packed SP-FP Values (SSE4.1).
blendvpsX86Compiler5381   inline void blendvps(const XmmVar& dst, const Mem& src)
5382   { _emitInstruction(kX86InstBlendVPS, &dst, &src); }
5383 
5384   //! @brief Dot Product of Packed DP-FP Values (SSE4.1).
dppdX86Compiler5385   inline void dppd(const XmmVar& dst, const XmmVar& src, const Imm& imm8)
5386   { _emitInstruction(kX86InstDpPD, &dst, &src, &imm8); }
5387   //! @brief Dot Product of Packed DP-FP Values (SSE4.1).
dppdX86Compiler5388   inline void dppd(const XmmVar& dst, const Mem& src, const Imm& imm8)
5389   { _emitInstruction(kX86InstDpPD, &dst, &src, &imm8); }
5390 
5391   //! @brief Dot Product of Packed SP-FP Values (SSE4.1).
dppsX86Compiler5392   inline void dpps(const XmmVar& dst, const XmmVar& src, const Imm& imm8)
5393   { _emitInstruction(kX86InstDpPS, &dst, &src, &imm8); }
5394   //! @brief Dot Product of Packed SP-FP Values (SSE4.1).
dppsX86Compiler5395   inline void dpps(const XmmVar& dst, const Mem& src, const Imm& imm8)
5396   { _emitInstruction(kX86InstDpPS, &dst, &src, &imm8); }
5397 
5398   //! @brief Extract Packed SP-FP Value (SSE4.1).
extractpsX86Compiler5399   inline void extractps(const XmmVar& dst, const XmmVar& src, const Imm& imm8)
5400   { _emitInstruction(kX86InstExtractPS, &dst, &src, &imm8); }
5401   //! @brief Extract Packed SP-FP Value (SSE4.1).
extractpsX86Compiler5402   inline void extractps(const XmmVar& dst, const Mem& src, const Imm& imm8)
5403   { _emitInstruction(kX86InstExtractPS, &dst, &src, &imm8); }
5404 
5405   //! @brief Load Double Quadword Non-Temporal Aligned Hint (SSE4.1).
movntdqaX86Compiler5406   inline void movntdqa(const XmmVar& dst, const Mem& src)
5407   { _emitInstruction(kX86InstMovNTDQA, &dst, &src); }
5408 
5409   //! @brief Compute Multiple Packed Sums of Absolute Difference (SSE4.1).
mpsadbwX86Compiler5410   inline void mpsadbw(const XmmVar& dst, const XmmVar& src, const Imm& imm8)
5411   { _emitInstruction(kX86InstMPSADBW, &dst, &src, &imm8); }
5412   //! @brief Compute Multiple Packed Sums of Absolute Difference (SSE4.1).
mpsadbwX86Compiler5413   inline void mpsadbw(const XmmVar& dst, const Mem& src, const Imm& imm8)
5414   { _emitInstruction(kX86InstMPSADBW, &dst, &src, &imm8); }
5415 
5416   //! @brief Pack with Unsigned Saturation (SSE4.1).
packusdwX86Compiler5417   inline void packusdw(const XmmVar& dst, const XmmVar& src)
5418   { _emitInstruction(kX86InstPackUSDW, &dst, &src); }
5419   //! @brief Pack with Unsigned Saturation (SSE4.1).
packusdwX86Compiler5420   inline void packusdw(const XmmVar& dst, const Mem& src)
5421   { _emitInstruction(kX86InstPackUSDW, &dst, &src); }
5422 
5423   //! @brief Variable Blend Packed Bytes (SSE4.1).
pblendvbX86Compiler5424   inline void pblendvb(const XmmVar& dst, const XmmVar& src)
5425   { _emitInstruction(kX86InstPBlendVB, &dst, &src); }
5426   //! @brief Variable Blend Packed Bytes (SSE4.1).
pblendvbX86Compiler5427   inline void pblendvb(const XmmVar& dst, const Mem& src)
5428   { _emitInstruction(kX86InstPBlendVB, &dst, &src); }
5429 
5430   //! @brief Blend Packed Words (SSE4.1).
pblendwX86Compiler5431   inline void pblendw(const XmmVar& dst, const XmmVar& src, const Imm& imm8)
5432   { _emitInstruction(kX86InstPBlendW, &dst, &src, &imm8); }
5433   //! @brief Blend Packed Words (SSE4.1).
pblendwX86Compiler5434   inline void pblendw(const XmmVar& dst, const Mem& src, const Imm& imm8)
5435   { _emitInstruction(kX86InstPBlendW, &dst, &src, &imm8); }
5436 
5437   //! @brief Compare Packed Qword Data for Equal (SSE4.1).
pcmpeqqX86Compiler5438   inline void pcmpeqq(const XmmVar& dst, const XmmVar& src)
5439   { _emitInstruction(kX86InstPCmpEqQ, &dst, &src); }
5440   //! @brief Compare Packed Qword Data for Equal (SSE4.1).
pcmpeqqX86Compiler5441   inline void pcmpeqq(const XmmVar& dst, const Mem& src)
5442   { _emitInstruction(kX86InstPCmpEqQ, &dst, &src); }
5443 
5444   //! @brief Extract Byte (SSE4.1).
pextrbX86Compiler5445   inline void pextrb(const GpVar& dst, const XmmVar& src, const Imm& imm8)
5446   { _emitInstruction(kX86InstPExtrB, &dst, &src, &imm8); }
5447   //! @brief Extract Byte (SSE4.1).
pextrbX86Compiler5448   inline void pextrb(const Mem& dst, const XmmVar& src, const Imm& imm8)
5449   { _emitInstruction(kX86InstPExtrB, &dst, &src, &imm8); }
5450 
5451   //! @brief Extract Dword (SSE4.1).
pextrdX86Compiler5452   inline void pextrd(const GpVar& dst, const XmmVar& src, const Imm& imm8)
5453   { _emitInstruction(kX86InstPExtrD, &dst, &src, &imm8); }
5454   //! @brief Extract Dword (SSE4.1).
pextrdX86Compiler5455   inline void pextrd(const Mem& dst, const XmmVar& src, const Imm& imm8)
5456   { _emitInstruction(kX86InstPExtrD, &dst, &src, &imm8); }
5457 
5458   //! @brief Extract Dword (SSE4.1).
pextrqX86Compiler5459   inline void pextrq(const GpVar& dst, const XmmVar& src, const Imm& imm8)
5460   { _emitInstruction(kX86InstPExtrQ, &dst, &src, &imm8); }
5461   //! @brief Extract Dword (SSE4.1).
pextrqX86Compiler5462   inline void pextrq(const Mem& dst, const XmmVar& src, const Imm& imm8)
5463   { _emitInstruction(kX86InstPExtrQ, &dst, &src, &imm8); }
5464 
5465   //! @brief Packed Horizontal Word Minimum (SSE4.1).
phminposuwX86Compiler5466   inline void phminposuw(const XmmVar& dst, const XmmVar& src)
5467   { _emitInstruction(kX86InstPHMinPOSUW, &dst, &src); }
5468   //! @brief Packed Horizontal Word Minimum (SSE4.1).
phminposuwX86Compiler5469   inline void phminposuw(const XmmVar& dst, const Mem& src)
5470   { _emitInstruction(kX86InstPHMinPOSUW, &dst, &src); }
5471 
5472   //! @brief Insert Byte (SSE4.1).
pinsrbX86Compiler5473   inline void pinsrb(const XmmVar& dst, const GpVar& src, const Imm& imm8)
5474   { _emitInstruction(kX86InstPInsRB, &dst, &src, &imm8); }
5475   //! @brief Insert Byte (SSE4.1).
pinsrbX86Compiler5476   inline void pinsrb(const XmmVar& dst, const Mem& src, const Imm& imm8)
5477   { _emitInstruction(kX86InstPInsRB, &dst, &src, &imm8); }
5478 
5479   //! @brief Insert Dword (SSE4.1).
pinsrdX86Compiler5480   inline void pinsrd(const XmmVar& dst, const GpVar& src, const Imm& imm8)
5481   { _emitInstruction(kX86InstPInsRD, &dst, &src, &imm8); }
5482   //! @brief Insert Dword (SSE4.1).
pinsrdX86Compiler5483   inline void pinsrd(const XmmVar& dst, const Mem& src, const Imm& imm8)
5484   { _emitInstruction(kX86InstPInsRD, &dst, &src, &imm8); }
5485 
5486   //! @brief Insert Dword (SSE4.1).
pinsrqX86Compiler5487   inline void pinsrq(const XmmVar& dst, const GpVar& src, const Imm& imm8)
5488   { _emitInstruction(kX86InstPInsRQ, &dst, &src, &imm8); }
5489   //! @brief Insert Dword (SSE4.1).
pinsrqX86Compiler5490   inline void pinsrq(const XmmVar& dst, const Mem& src, const Imm& imm8)
5491   { _emitInstruction(kX86InstPInsRQ, &dst, &src, &imm8); }
5492 
5493   //! @brief Insert Word (SSE2).
pinsrwX86Compiler5494   inline void pinsrw(const XmmVar& dst, const GpVar& src, const Imm& imm8)
5495   { _emitInstruction(kX86InstPInsRW, &dst, &src, &imm8); }
5496   //! @brief Insert Word (SSE2).
pinsrwX86Compiler5497   inline void pinsrw(const XmmVar& dst, const Mem& src, const Imm& imm8)
5498   { _emitInstruction(kX86InstPInsRW, &dst, &src, &imm8); }
5499 
5500   //! @brief Maximum of Packed Word Integers (SSE4.1).
pmaxuwX86Compiler5501   inline void pmaxuw(const XmmVar& dst, const XmmVar& src)
5502   { _emitInstruction(kX86InstPMaxUW, &dst, &src); }
5503   //! @brief Maximum of Packed Word Integers (SSE4.1).
pmaxuwX86Compiler5504   inline void pmaxuw(const XmmVar& dst, const Mem& src)
5505   { _emitInstruction(kX86InstPMaxUW, &dst, &src); }
5506 
5507   //! @brief Maximum of Packed Signed Byte Integers (SSE4.1).
pmaxsbX86Compiler5508   inline void pmaxsb(const XmmVar& dst, const XmmVar& src)
5509   { _emitInstruction(kX86InstPMaxSB, &dst, &src); }
5510   //! @brief Maximum of Packed Signed Byte Integers (SSE4.1).
pmaxsbX86Compiler5511   inline void pmaxsb(const XmmVar& dst, const Mem& src)
5512   { _emitInstruction(kX86InstPMaxSB, &dst, &src); }
5513 
5514   //! @brief Maximum of Packed Signed Dword Integers (SSE4.1).
pmaxsdX86Compiler5515   inline void pmaxsd(const XmmVar& dst, const XmmVar& src)
5516   { _emitInstruction(kX86InstPMaxSD, &dst, &src); }
5517   //! @brief Maximum of Packed Signed Dword Integers (SSE4.1).
pmaxsdX86Compiler5518   inline void pmaxsd(const XmmVar& dst, const Mem& src)
5519   { _emitInstruction(kX86InstPMaxSD, &dst, &src); }
5520 
5521   //! @brief Maximum of Packed Unsigned Dword Integers (SSE4.1).
pmaxudX86Compiler5522   inline void pmaxud(const XmmVar& dst, const XmmVar& src)
5523   { _emitInstruction(kX86InstPMaxUD, &dst, &src); }
5524   //! @brief Maximum of Packed Unsigned Dword Integers (SSE4.1).
pmaxudX86Compiler5525   inline void pmaxud(const XmmVar& dst, const Mem& src)
5526   { _emitInstruction(kX86InstPMaxUD, &dst, &src); }
5527 
5528   //! @brief Minimum of Packed Signed Byte Integers (SSE4.1).
pminsbX86Compiler5529   inline void pminsb(const XmmVar& dst, const XmmVar& src)
5530   { _emitInstruction(kX86InstPMinSB, &dst, &src); }
5531   //! @brief Minimum of Packed Signed Byte Integers (SSE4.1).
pminsbX86Compiler5532   inline void pminsb(const XmmVar& dst, const Mem& src)
5533   { _emitInstruction(kX86InstPMinSB, &dst, &src); }
5534 
5535   //! @brief Minimum of Packed Word Integers (SSE4.1).
pminuwX86Compiler5536   inline void pminuw(const XmmVar& dst, const XmmVar& src)
5537   { _emitInstruction(kX86InstPMinUW, &dst, &src); }
5538   //! @brief Minimum of Packed Word Integers (SSE4.1).
pminuwX86Compiler5539   inline void pminuw(const XmmVar& dst, const Mem& src)
5540   { _emitInstruction(kX86InstPMinUW, &dst, &src); }
5541 
5542   //! @brief Minimum of Packed Dword Integers (SSE4.1).
pminudX86Compiler5543   inline void pminud(const XmmVar& dst, const XmmVar& src)
5544   { _emitInstruction(kX86InstPMinUD, &dst, &src); }
5545   //! @brief Minimum of Packed Dword Integers (SSE4.1).
pminudX86Compiler5546   inline void pminud(const XmmVar& dst, const Mem& src)
5547   { _emitInstruction(kX86InstPMinUD, &dst, &src); }
5548 
5549   //! @brief Minimum of Packed Dword Integers (SSE4.1).
pminsdX86Compiler5550   inline void pminsd(const XmmVar& dst, const XmmVar& src)
5551   { _emitInstruction(kX86InstPMinSD, &dst, &src); }
5552   //! @brief Minimum of Packed Dword Integers (SSE4.1).
pminsdX86Compiler5553   inline void pminsd(const XmmVar& dst, const Mem& src)
5554   { _emitInstruction(kX86InstPMinSD, &dst, &src); }
5555 
5556   //! @brief Packed Move with Sign Extend (SSE4.1).
pmovsxbwX86Compiler5557   inline void pmovsxbw(const XmmVar& dst, const XmmVar& src)
5558   { _emitInstruction(kX86InstPMovSXBW, &dst, &src); }
5559   //! @brief Packed Move with Sign Extend (SSE4.1).
pmovsxbwX86Compiler5560   inline void pmovsxbw(const XmmVar& dst, const Mem& src)
5561   { _emitInstruction(kX86InstPMovSXBW, &dst, &src); }
5562 
5563   //! @brief Packed Move with Sign Extend (SSE4.1).
pmovsxbdX86Compiler5564   inline void pmovsxbd(const XmmVar& dst, const XmmVar& src)
5565   { _emitInstruction(kX86InstPMovSXBD, &dst, &src); }
5566   //! @brief Packed Move with Sign Extend (SSE4.1).
pmovsxbdX86Compiler5567   inline void pmovsxbd(const XmmVar& dst, const Mem& src)
5568   { _emitInstruction(kX86InstPMovSXBD, &dst, &src); }
5569 
5570   //! @brief Packed Move with Sign Extend (SSE4.1).
pmovsxbqX86Compiler5571   inline void pmovsxbq(const XmmVar& dst, const XmmVar& src)
5572   { _emitInstruction(kX86InstPMovSXBQ, &dst, &src); }
5573   //! @brief Packed Move with Sign Extend (SSE4.1).
pmovsxbqX86Compiler5574   inline void pmovsxbq(const XmmVar& dst, const Mem& src)
5575   { _emitInstruction(kX86InstPMovSXBQ, &dst, &src); }
5576 
5577   //! @brief Packed Move with Sign Extend (SSE4.1).
pmovsxwdX86Compiler5578   inline void pmovsxwd(const XmmVar& dst, const XmmVar& src)
5579   { _emitInstruction(kX86InstPMovSXWD, &dst, &src); }
5580   //! @brief Packed Move with Sign Extend (SSE4.1).
pmovsxwdX86Compiler5581   inline void pmovsxwd(const XmmVar& dst, const Mem& src)
5582   { _emitInstruction(kX86InstPMovSXWD, &dst, &src); }
5583 
5584   //! @brief (SSE4.1).
pmovsxwqX86Compiler5585   inline void pmovsxwq(const XmmVar& dst, const XmmVar& src)
5586   { _emitInstruction(kX86InstPMovSXWQ, &dst, &src); }
5587   //! @brief (SSE4.1).
pmovsxwqX86Compiler5588   inline void pmovsxwq(const XmmVar& dst, const Mem& src)
5589   { _emitInstruction(kX86InstPMovSXWQ, &dst, &src); }
5590 
5591   //! @brief (SSE4.1).
pmovsxdqX86Compiler5592   inline void pmovsxdq(const XmmVar& dst, const XmmVar& src)
5593   { _emitInstruction(kX86InstPMovSXDQ, &dst, &src); }
5594   //! @brief (SSE4.1).
pmovsxdqX86Compiler5595   inline void pmovsxdq(const XmmVar& dst, const Mem& src)
5596   { _emitInstruction(kX86InstPMovSXDQ, &dst, &src); }
5597 
5598   //! @brief Packed Move with Zero Extend (SSE4.1).
pmovzxbwX86Compiler5599   inline void pmovzxbw(const XmmVar& dst, const XmmVar& src)
5600   { _emitInstruction(kX86InstPMovZXBW, &dst, &src); }
5601   //! @brief Packed Move with Zero Extend (SSE4.1).
pmovzxbwX86Compiler5602   inline void pmovzxbw(const XmmVar& dst, const Mem& src)
5603   { _emitInstruction(kX86InstPMovZXBW, &dst, &src); }
5604 
5605   //! @brief Packed Move with Zero Extend (SSE4.1).
pmovzxbdX86Compiler5606   inline void pmovzxbd(const XmmVar& dst, const XmmVar& src)
5607   { _emitInstruction(kX86InstPMovZXBD, &dst, &src); }
5608   //! @brief Packed Move with Zero Extend (SSE4.1).
pmovzxbdX86Compiler5609   inline void pmovzxbd(const XmmVar& dst, const Mem& src)
5610   { _emitInstruction(kX86InstPMovZXBD, &dst, &src); }
5611 
5612   //! @brief Packed Move with Zero Extend (SSE4.1).
pmovzxbqX86Compiler5613   inline void pmovzxbq(const XmmVar& dst, const XmmVar& src)
5614   { _emitInstruction(kX86InstPMovZXBQ, &dst, &src); }
5615   //! @brief Packed Move with Zero Extend (SSE4.1).
pmovzxbqX86Compiler5616   inline void pmovzxbq(const XmmVar& dst, const Mem& src)
5617   { _emitInstruction(kX86InstPMovZXBQ, &dst, &src); }
5618 
5619   //! @brief Packed Move with Zero Extend (SSE4.1).
pmovzxwdX86Compiler5620   inline void pmovzxwd(const XmmVar& dst, const XmmVar& src)
5621   { _emitInstruction(kX86InstPMovZXWD, &dst, &src); }
5622   //! @brief Packed Move with Zero Extend (SSE4.1).
pmovzxwdX86Compiler5623   inline void pmovzxwd(const XmmVar& dst, const Mem& src)
5624   { _emitInstruction(kX86InstPMovZXWD, &dst, &src); }
5625 
5626   //! @brief (SSE4.1).
pmovzxwqX86Compiler5627   inline void pmovzxwq(const XmmVar& dst, const XmmVar& src)
5628   { _emitInstruction(kX86InstPMovZXWQ, &dst, &src); }
5629   //! @brief (SSE4.1).
pmovzxwqX86Compiler5630   inline void pmovzxwq(const XmmVar& dst, const Mem& src)
5631   { _emitInstruction(kX86InstPMovZXWQ, &dst, &src); }
5632 
5633   //! @brief (SSE4.1).
pmovzxdqX86Compiler5634   inline void pmovzxdq(const XmmVar& dst, const XmmVar& src)
5635   { _emitInstruction(kX86InstPMovZXDQ, &dst, &src); }
5636   //! @brief (SSE4.1).
pmovzxdqX86Compiler5637   inline void pmovzxdq(const XmmVar& dst, const Mem& src)
5638   { _emitInstruction(kX86InstPMovZXDQ, &dst, &src); }
5639 
5640   //! @brief Multiply Packed Signed Dword Integers (SSE4.1).
pmuldqX86Compiler5641   inline void pmuldq(const XmmVar& dst, const XmmVar& src)
5642   { _emitInstruction(kX86InstPMulDQ, &dst, &src); }
5643   //! @brief Multiply Packed Signed Dword Integers (SSE4.1).
pmuldqX86Compiler5644   inline void pmuldq(const XmmVar& dst, const Mem& src)
5645   { _emitInstruction(kX86InstPMulDQ, &dst, &src); }
5646 
5647   //! @brief Multiply Packed Signed Integers and Store Low Result (SSE4.1).
pmulldX86Compiler5648   inline void pmulld(const XmmVar& dst, const XmmVar& src)
5649   { _emitInstruction(kX86InstPMulLD, &dst, &src); }
5650   //! @brief Multiply Packed Signed Integers and Store Low Result (SSE4.1).
pmulldX86Compiler5651   inline void pmulld(const XmmVar& dst, const Mem& src)
5652   { _emitInstruction(kX86InstPMulLD, &dst, &src); }
5653 
5654   //! @brief Logical Compare (SSE4.1).
ptestX86Compiler5655   inline void ptest(const XmmVar& op1, const XmmVar& op2)
5656   { _emitInstruction(kX86InstPTest, &op1, &op2); }
5657   //! @brief Logical Compare (SSE4.1).
ptestX86Compiler5658   inline void ptest(const XmmVar& op1, const Mem& op2)
5659   { _emitInstruction(kX86InstPTest, &op1, &op2); }
5660 
5661   //! Round Packed SP-FP Values @brief (SSE4.1).
roundpsX86Compiler5662   inline void roundps(const XmmVar& dst, const XmmVar& src, const Imm& imm8)
5663   { _emitInstruction(kX86InstRoundPS, &dst, &src, &imm8); }
5664   //! Round Packed SP-FP Values @brief (SSE4.1).
roundpsX86Compiler5665   inline void roundps(const XmmVar& dst, const Mem& src, const Imm& imm8)
5666   { _emitInstruction(kX86InstRoundPS, &dst, &src, &imm8); }
5667 
5668   //! @brief Round Scalar SP-FP Values (SSE4.1).
roundssX86Compiler5669   inline void roundss(const XmmVar& dst, const XmmVar& src, const Imm& imm8)
5670   { _emitInstruction(kX86InstRoundSS, &dst, &src, &imm8); }
5671   //! @brief Round Scalar SP-FP Values (SSE4.1).
roundssX86Compiler5672   inline void roundss(const XmmVar& dst, const Mem& src, const Imm& imm8)
5673   { _emitInstruction(kX86InstRoundSS, &dst, &src, &imm8); }
5674 
5675   //! @brief Round Packed DP-FP Values (SSE4.1).
roundpdX86Compiler5676   inline void roundpd(const XmmVar& dst, const XmmVar& src, const Imm& imm8)
5677   { _emitInstruction(kX86InstRoundPD, &dst, &src, &imm8); }
5678   //! @brief Round Packed DP-FP Values (SSE4.1).
roundpdX86Compiler5679   inline void roundpd(const XmmVar& dst, const Mem& src, const Imm& imm8)
5680   { _emitInstruction(kX86InstRoundPD, &dst, &src, &imm8); }
5681 
5682   //! @brief Round Scalar DP-FP Values (SSE4.1).
roundsdX86Compiler5683   inline void roundsd(const XmmVar& dst, const XmmVar& src, const Imm& imm8)
5684   { _emitInstruction(kX86InstRoundSD, &dst, &src, &imm8); }
5685   //! @brief Round Scalar DP-FP Values (SSE4.1).
roundsdX86Compiler5686   inline void roundsd(const XmmVar& dst, const Mem& src, const Imm& imm8)
5687   { _emitInstruction(kX86InstRoundSD, &dst, &src, &imm8); }
5688 
5689   // --------------------------------------------------------------------------
5690   // [SSE4.2]
5691   // --------------------------------------------------------------------------
5692 
5693   //! @brief Accumulate CRC32 Value (polynomial 0x11EDC6F41) (SSE4.2).
crc32X86Compiler5694   inline void crc32(const GpVar& dst, const GpVar& src)
5695   { _emitInstruction(kX86InstCrc32, &dst, &src); }
5696   //! @brief Accumulate CRC32 Value (polynomial 0x11EDC6F41) (SSE4.2).
crc32X86Compiler5697   inline void crc32(const GpVar& dst, const Mem& src)
5698   { _emitInstruction(kX86InstCrc32, &dst, &src); }
5699 
5700   //! @brief Packed Compare Explicit Length Strings, Return Index (SSE4.2).
pcmpestriX86Compiler5701   inline void pcmpestri(const XmmVar& dst, const XmmVar& src, const Imm& imm8)
5702   { _emitInstruction(kX86InstPCmpEStrI, &dst, &src, &imm8); }
5703   //! @brief Packed Compare Explicit Length Strings, Return Index (SSE4.2).
pcmpestriX86Compiler5704   inline void pcmpestri(const XmmVar& dst, const Mem& src, const Imm& imm8)
5705   { _emitInstruction(kX86InstPCmpEStrI, &dst, &src, &imm8); }
5706 
5707   //! @brief Packed Compare Explicit Length Strings, Return Mask (SSE4.2).
pcmpestrmX86Compiler5708   inline void pcmpestrm(const XmmVar& dst, const XmmVar& src, const Imm& imm8)
5709   { _emitInstruction(kX86InstPCmpEStrM, &dst, &src, &imm8); }
5710   //! @brief Packed Compare Explicit Length Strings, Return Mask (SSE4.2).
pcmpestrmX86Compiler5711   inline void pcmpestrm(const XmmVar& dst, const Mem& src, const Imm& imm8)
5712   { _emitInstruction(kX86InstPCmpEStrM, &dst, &src, &imm8); }
5713 
5714   //! @brief Packed Compare Implicit Length Strings, Return Index (SSE4.2).
pcmpistriX86Compiler5715   inline void pcmpistri(const XmmVar& dst, const XmmVar& src, const Imm& imm8)
5716   { _emitInstruction(kX86InstPCmpIStrI, &dst, &src, &imm8); }
5717   //! @brief Packed Compare Implicit Length Strings, Return Index (SSE4.2).
pcmpistriX86Compiler5718   inline void pcmpistri(const XmmVar& dst, const Mem& src, const Imm& imm8)
5719   { _emitInstruction(kX86InstPCmpIStrI, &dst, &src, &imm8); }
5720 
5721   //! @brief Packed Compare Implicit Length Strings, Return Mask (SSE4.2).
pcmpistrmX86Compiler5722   inline void pcmpistrm(const XmmVar& dst, const XmmVar& src, const Imm& imm8)
5723   { _emitInstruction(kX86InstPCmpIStrM, &dst, &src, &imm8); }
5724   //! @brief Packed Compare Implicit Length Strings, Return Mask (SSE4.2).
pcmpistrmX86Compiler5725   inline void pcmpistrm(const XmmVar& dst, const Mem& src, const Imm& imm8)
5726   { _emitInstruction(kX86InstPCmpIStrM, &dst, &src, &imm8); }
5727 
5728   //! @brief Compare Packed Data for Greater Than (SSE4.2).
pcmpgtqX86Compiler5729   inline void pcmpgtq(const XmmVar& dst, const XmmVar& src)
5730   { _emitInstruction(kX86InstPCmpGtQ, &dst, &src); }
5731   //! @brief Compare Packed Data for Greater Than (SSE4.2).
pcmpgtqX86Compiler5732   inline void pcmpgtq(const XmmVar& dst, const Mem& src)
5733   { _emitInstruction(kX86InstPCmpGtQ, &dst, &src); }
5734 
5735   //! @brief Return the Count of Number of Bits Set to 1 (SSE4.2).
popcntX86Compiler5736   inline void popcnt(const GpVar& dst, const GpVar& src)
5737   { _emitInstruction(kX86InstPopCnt, &dst, &src); }
5738   //! @brief Return the Count of Number of Bits Set to 1 (SSE4.2).
popcntX86Compiler5739   inline void popcnt(const GpVar& dst, const Mem& src)
5740   { _emitInstruction(kX86InstPopCnt, &dst, &src); }
5741 
5742   // --------------------------------------------------------------------------
5743   // [AMD only]
5744   // --------------------------------------------------------------------------
5745 
5746   //! @brief Prefetch (3dNow - Amd).
5747   //!
5748   //! Loads the entire 64-byte aligned memory sequence containing the
5749   //! specified memory address into the L1 data cache. The position of
5750   //! the specified memory address within the 64-byte cache line is
5751   //! irrelevant. If a cache hit occurs, or if a memory fault is detected,
5752   //! no bus cycle is initiated and the instruction is treated as a NOP.
amd_prefetchX86Compiler5753   inline void amd_prefetch(const Mem& mem)
5754   { _emitInstruction(kX86InstAmdPrefetch, &mem); }
5755 
5756   //! @brief Prefetch and set cache to modified (3dNow - Amd).
5757   //!
5758   //! The PREFETCHW instruction loads the prefetched line and sets the
5759   //! cache-line state to Modified, in anticipation of subsequent data
5760   //! writes to the line. The PREFETCH instruction, by contrast, typically
5761   //! sets the cache-line state to Exclusive (depending on the hardware
5762   //! implementation).
amd_prefetchwX86Compiler5763   inline void amd_prefetchw(const Mem& mem)
5764   { _emitInstruction(kX86InstAmdPrefetchW, &mem); }
5765 
5766   // --------------------------------------------------------------------------
5767   // [Intel only]
5768   // --------------------------------------------------------------------------
5769 
5770   //! @brief Move Data After Swapping Bytes (SSE3 - Intel Atom).
movbeX86Compiler5771   inline void movbe(const GpVar& dst, const Mem& src)
5772   {
5773     ASMJIT_ASSERT(!dst.isGpb());
5774     _emitInstruction(kX86InstMovBE, &dst, &src);
5775   }
5776 
5777   //! @brief Move Data After Swapping Bytes (SSE3 - Intel Atom).
movbeX86Compiler5778   inline void movbe(const Mem& dst, const GpVar& src)
5779   {
5780     ASMJIT_ASSERT(!src.isGpb());
5781     _emitInstruction(kX86InstMovBE, &dst, &src);
5782   }
5783 
5784   // -------------------------------------------------------------------------
5785   // [Emit Options]
5786   // -------------------------------------------------------------------------
5787 
5788   //! @brief Assert LOCK# Signal Prefix.
5789   //!
5790   //! This instruction causes the processor's LOCK# signal to be asserted
5791   //! during execution of the accompanying instruction (turns the
5792   //! instruction into an atomic instruction). In a multiprocessor environment,
5793   //! the LOCK# signal insures that the processor has exclusive use of any shared
5794   //! memory while the signal is asserted.
5795   //!
5796   //! The LOCK prefix can be prepended only to the following instructions and
5797   //! to those forms of the instructions that use a memory operand: ADD, ADC,
5798   //! AND, BTC, BTR, BTS, CMPXCHG, DEC, INC, NEG, NOT, OR, SBB, SUB, XOR, XADD,
5799   //! and XCHG. An undefined opcode exception will be generated if the LOCK
5800   //! prefix is used with any other instruction. The XCHG instruction always
5801   //! asserts the LOCK# signal regardless of the presence or absence of the LOCK
5802   //! prefix.
lockX86Compiler5803   inline void lock()
5804   { _emitOptions |= kX86EmitOptionLock; }
5805 
5806   //! @brief Force REX prefix to be emitted.
5807   //!
5808   //! This option should be used carefully, because there are unencodable
5809   //! combinations. If you want to access ah, bh, ch or dh registers then you
5810   //! can't emit REX prefix and it will cause an illegal instruction error.
5811   //!
5812   //! @note REX prefix is only valid for X64/AMD64 platform.
5813   //!
5814   //! @sa @c kX86EmitOptionRex.
rexX86Compiler5815   inline void rex()
5816   { _emitOptions |= kX86EmitOptionRex; }
5817 };
5818 
5819 //! @}
5820 
5821 } // AsmJit namespace
5822 
5823 #undef ASMJIT_NOT_SUPPORTED_BY_COMPILER
5824 
5825 // [Api-End]
5826 #include "../core/apiend.h"
5827 
5828 // [Guard]
5829 #endif // _ASMJIT_X86_X86COMPILER_H
5830