1;;;; that part of the description of the x86-64 instruction set
2;;;; which can live on the cross-compilation host
3
4;;;; This software is part of the SBCL system. See the README file for
5;;;; more information.
6;;;;
7;;;; This software is derived from the CMU CL system, which was
8;;;; written at Carnegie Mellon University and released into the
9;;;; public domain. The software is in the public domain and is
10;;;; provided with absolutely no warranty. See the COPYING and CREDITS
11;;;; files for more information.
12
13(in-package "SB!X86-64-ASM")
14
15(eval-when (:compile-toplevel :load-toplevel :execute)
16  ;; Imports from this package into SB-VM
17  (import '(*condition-name-vec* conditional-opcode
18            register-p xmm-register-p ; FIXME: rename REGISTER-P to GPR-P
19            make-ea ea-disp) 'sb!vm)
20  ;; Imports from SB-VM into this package
21  (import '(sb!vm::*byte-sc-names* sb!vm::*word-sc-names*
22            sb!vm::*dword-sc-names* sb!vm::*qword-sc-names*
23            sb!vm::frame-byte-offset
24            sb!vm::registers sb!vm::float-registers sb!vm::stack))) ; SB names
25
26;;; Note: In CMU CL, this used to be a call to SET-DISASSEM-PARAMS.
27(setf *disassem-inst-alignment-bytes* 1)
28
29;;; This type is used mostly in disassembly and represents legacy
30;;; registers only. R8-R15 are handled separately.
31(deftype reg () '(unsigned-byte 3))
32
33;;; This includes legacy registers and R8-R15.
34(deftype full-reg () '(unsigned-byte 4))
35
36;;; The XMM registers XMM0 - XMM15.
37(deftype xmmreg () '(unsigned-byte 4))
38
39;;; Default word size for the chip: if the operand size /= :dword
40;;; we need to output #x66 (or REX) prefix
41(defconstant +default-operand-size+ :dword)
42
43;;; The default address size for the chip. It could be overwritten
44;;; to :dword with a #x67 prefix, but this is never needed by SBCL
45;;; and thus not supported by this assembler/disassembler.
46(defconstant +default-address-size+ :qword)
47
48(defparameter *byte-reg-names*
49  #(al cl dl bl spl bpl sil dil r8b r9b r10b r11b r12b r13b r14b r15b))
50(defparameter *high-byte-reg-names*
51  #(ah ch dh bh))
52(defparameter *word-reg-names*
53  #(ax cx dx bx sp bp si di r8w r9w r10w r11w r12w r13w r14w r15w))
54(defparameter *dword-reg-names*
55  #(eax ecx edx ebx esp ebp esi edi r8d r9d r10d r11d r12d r13d r14d r15d))
56(defparameter *qword-reg-names*
57  #(rax rcx rdx rbx rsp rbp rsi rdi r8 r9 r10 r11 r12 r13 r14 r15))
58
59;;; The printers for registers, memory references and immediates need to
60;;; take into account the width bit in the instruction, whether a #x66
61;;; or a REX prefix was issued, and the contents of the REX prefix.
62;;; This is implemented using prefilters to put flags into the slot
63;;; INST-PROPERTIES of the DSTATE.  These flags are the following
64;;; symbols:
65;;;
66;;; OPERAND-SIZE-8   The width bit was zero
67;;; OPERAND-SIZE-16  The "operand size override" prefix (#x66) was found
68;;; REX              A REX prefix was found
69;;; REX-W            A REX prefix with the "operand width" bit set was
70;;;                  found
71;;; REX-R            A REX prefix with the "register" bit set was found
72;;; REX-X            A REX prefix with the "index" bit set was found
73;;; REX-B            A REX prefix with the "base" bit set was found
74(defconstant +allow-qword-imm+ #b10000000)
75(defconstant +operand-size-8+  #b01000000)
76(defconstant +operand-size-16+ #b00100000)
77(defconstant +rex+             #b00010000)
78;;; The next 4 exactly correspond to the bits in the REX prefix itself,
79;;; to avoid unpacking and stuffing into inst-properties one at a time.
80(defconstant +rex-w+           #b1000)
81(defconstant +rex-r+           #b0100)
82(defconstant +rex-x+           #b0010)
83(defconstant +rex-b+           #b0001)
84
85;;; Return the operand size depending on the prefixes and width bit as
86;;; stored in DSTATE.
87(defun inst-operand-size (dstate)
88  (declare (type disassem-state dstate))
89  (cond ((dstate-get-inst-prop dstate +operand-size-8+) :byte)
90        ((dstate-get-inst-prop dstate +rex-w+) :qword)
91        ((dstate-get-inst-prop dstate +operand-size-16+) :word)
92        (t +default-operand-size+)))
93
94;;; The same as INST-OPERAND-SIZE, but for those instructions (e.g.
95;;; PUSH, JMP) that have a default operand size of :qword. It can only
96;;; be overwritten to :word.
97(defun inst-operand-size-default-qword (dstate)
98  (declare (type disassem-state dstate))
99  (if (dstate-get-inst-prop dstate +operand-size-16+) :word :qword))
100
101;;; This prefilter is used solely for its side effect, namely to put
102;;; the property OPERAND-SIZE-8 into the DSTATE if VALUE is 0.
103(defun prefilter-width (dstate value)
104  (declare (type bit value) (type disassem-state dstate))
105  (when (zerop value)
106    (dstate-put-inst-prop dstate +operand-size-8+))
107  value)
108
109;;; A register field that can be extended by REX.R.
110(defun prefilter-reg-r (dstate value)
111  (declare (type reg value) (type disassem-state dstate))
112  (if (dstate-get-inst-prop dstate +rex-r+) (+ value 8) value))
113
114;;; A register field that can be extended by REX.B.
115(defun prefilter-reg-b (dstate value)
116  (declare (type reg value) (type disassem-state dstate))
117  (if (dstate-get-inst-prop dstate +rex-b+) (+ value 8) value))
118
119(defun width-bits (width)
120  (ecase width
121    (:byte 8)
122    (:word 16)
123    (:dword 32)
124    (:qword 64)))
125
126
127;;;; disassembler argument types
128
129;;; Used to capture the lower four bits of the REX prefix all at once ...
130(define-arg-type wrxb
131  :prefilter (lambda (dstate value)
132               (dstate-put-inst-prop dstate (logior +rex+ (logand value #b1111)))
133               value))
134;;; ... or individually (not needed for REX.R and REX.X).
135;;; They are always used together, so only the first one sets the REX property.
136(define-arg-type rex-w
137  :prefilter  (lambda (dstate value)
138                (dstate-put-inst-prop dstate
139                                      (logior +rex+ (if (plusp value) +rex-w+ 0)))))
140(define-arg-type rex-b
141  :prefilter (lambda (dstate value)
142               (dstate-put-inst-prop dstate (if (plusp value) +rex-b+ 0))))
143
144(define-arg-type width
145  :prefilter #'prefilter-width
146  :printer (lambda (value stream dstate)
147             (declare (ignore value))
148             (princ (schar (symbol-name (inst-operand-size dstate)) 0)
149                    stream)))
150
151;;; Used to capture the effect of the #x66 operand size override prefix.
152(define-arg-type x66
153  :prefilter (lambda (dstate junk)
154               (declare (ignore junk))
155               (dstate-put-inst-prop dstate +operand-size-16+)))
156
157(define-arg-type displacement
158  :sign-extend t
159  :use-label (lambda (value dstate) (+ (dstate-next-addr dstate) value))
160  :printer (lambda (value stream dstate)
161             (maybe-note-assembler-routine value nil dstate)
162             (print-label value stream dstate)))
163
164(define-arg-type accum
165  :printer (lambda (value stream dstate)
166             (declare (ignore value)
167                      (type stream stream)
168                      (type disassem-state dstate))
169             (print-reg 0 stream dstate)))
170
171(define-arg-type reg
172  :prefilter #'prefilter-reg-r
173  :printer #'print-reg)
174
175(define-arg-type reg-b
176  :prefilter #'prefilter-reg-b
177  :printer #'print-reg)
178
179(define-arg-type reg-b-default-qword
180  :prefilter #'prefilter-reg-b
181  :printer #'print-reg-default-qword)
182
183(define-arg-type imm-addr
184  :prefilter (lambda (dstate)
185               (read-suffix (width-bits (inst-operand-size dstate)) dstate))
186  :printer #'print-label)
187
188;;; Normally, immediate values for an operand size of :qword are of size
189;;; :dword and are sign-extended to 64 bits.
190;;; The exception is that opcode group 0xB8 .. 0xBF allows a :qword immediate.
191(define-arg-type signed-imm-data
192  :prefilter (lambda (dstate &aux (width (inst-operand-size dstate)))
193               (when (and (not (dstate-get-inst-prop dstate +allow-qword-imm+))
194                          (eq width :qword))
195                 (setf width :dword))
196               (read-signed-suffix (width-bits width) dstate))
197  :printer (lambda (value stream dstate)
198             (maybe-note-static-symbol value dstate)
199             (princ value stream)))
200
201(define-arg-type signed-imm-data/asm-routine
202  :type 'signed-imm-data
203  :printer #'print-imm/asm-routine)
204
205;;; Used by those instructions that have a default operand size of
206;;; :qword. Nevertheless the immediate is at most of size :dword.
207;;; The only instruction of this kind having a variant with an immediate
208;;; argument is PUSH.
209(define-arg-type signed-imm-data-default-qword
210  :prefilter (lambda (dstate)
211               (let ((width (width-bits
212                             (inst-operand-size-default-qword dstate))))
213                 (when (= width 64)
214                   (setf width 32))
215                 (read-signed-suffix width dstate))))
216
217(define-arg-type signed-imm-byte
218  :prefilter (lambda (dstate)
219               (read-signed-suffix 8 dstate)))
220
221(define-arg-type imm-byte
222  :prefilter (lambda (dstate)
223               (read-suffix 8 dstate)))
224
225;;; needed for the ret imm16 instruction
226(define-arg-type imm-word-16
227  :prefilter (lambda (dstate)
228               (read-suffix 16 dstate)))
229
230(define-arg-type reg/mem
231  :prefilter #'prefilter-reg/mem
232  :printer #'print-reg/mem)
233(define-arg-type sized-reg/mem
234  ;; Same as reg/mem, but prints an explicit size indicator for
235  ;; memory references.
236  :prefilter #'prefilter-reg/mem
237  :printer #'print-sized-reg/mem)
238
239;;; Arguments of type reg/mem with a fixed size.
240(define-arg-type sized-byte-reg/mem
241  :prefilter #'prefilter-reg/mem
242  :printer #'print-sized-byte-reg/mem)
243(define-arg-type sized-word-reg/mem
244  :prefilter #'prefilter-reg/mem
245  :printer #'print-sized-word-reg/mem)
246(define-arg-type sized-dword-reg/mem
247  :prefilter #'prefilter-reg/mem
248  :printer #'print-sized-dword-reg/mem)
249
250;;; Same as sized-reg/mem, but with a default operand size of :qword.
251(define-arg-type sized-reg/mem-default-qword
252  :prefilter #'prefilter-reg/mem
253  :printer #'print-sized-reg/mem-default-qword)
254
255;;; XMM registers
256(define-arg-type xmmreg
257  :prefilter #'prefilter-reg-r
258  :printer #'print-xmmreg)
259
260(define-arg-type xmmreg-b
261  :prefilter #'prefilter-reg-b
262  :printer #'print-xmmreg)
263
264(define-arg-type xmmreg/mem
265  :prefilter #'prefilter-reg/mem
266  :printer #'print-xmmreg/mem)
267
268(defparameter *conditions*
269  '((:o . 0)
270    (:no . 1)
271    (:b . 2) (:nae . 2) (:c . 2)
272    (:nb . 3) (:ae . 3) (:nc . 3)
273    (:eq . 4) (:e . 4) (:z . 4)
274    (:ne . 5) (:nz . 5)
275    (:be . 6) (:na . 6)
276    (:nbe . 7) (:a . 7)
277    (:s . 8)
278    (:ns . 9)
279    (:p . 10) (:pe . 10)
280    (:np . 11) (:po . 11)
281    (:l . 12) (:nge . 12)
282    (:nl . 13) (:ge . 13)
283    (:le . 14) (:ng . 14)
284    (:nle . 15) (:g . 15)))
285(defparameter *condition-name-vec*
286  (let ((vec (make-array 16 :initial-element nil)))
287    (dolist (cond *conditions*)
288      (when (null (aref vec (cdr cond)))
289        (setf (aref vec (cdr cond)) (car cond))))
290    vec))
291
292;;; SSE shuffle patterns. The names end in the number of bits of the
293;;; immediate byte that are used to encode the pattern and the radix
294;;; in which to print the value.
295(macrolet ((define-sse-shuffle-arg-type (name format-string)
296               `(define-arg-type ,name
297                  :type 'imm-byte
298                  :printer (lambda (value stream dstate)
299                             (declare (type (unsigned-byte 8) value)
300                                      (type stream stream)
301                                      (ignore dstate))
302                             (format stream ,format-string value)))))
303  (define-sse-shuffle-arg-type sse-shuffle-pattern-2-2 "#b~2,'0B")
304  (define-sse-shuffle-arg-type sse-shuffle-pattern-8-4 "#4r~4,4,'0R"))
305
306;;; Set assembler parameters. (In CMU CL, this was done with
307;;; a call to a macro DEF-ASSEMBLER-PARAMS.)
308(eval-when (:compile-toplevel :load-toplevel :execute)
309  (setf sb!assem:*assem-scheduler-p* nil))
310
311(define-arg-type condition-code
312  :printer *condition-name-vec*)
313
314(defun conditional-opcode (condition)
315  (cdr (assoc condition *conditions* :test #'eq)))
316
317;;;; disassembler instruction formats
318
319(defun swap-if (direction field1 separator field2)
320    `(:if (,direction :constant 0)
321          (,field1 ,separator ,field2)
322          (,field2 ,separator ,field1)))
323
324(define-instruction-format (byte 8 :default-printer '(:name))
325  (op    :field (byte 8 0))
326  ;; optional fields
327  (accum :type 'accum)
328  (imm))
329
330(define-instruction-format (two-bytes 16
331                                        :default-printer '(:name))
332  (op :fields (list (byte 8 0) (byte 8 8))))
333
334(define-instruction-format (three-bytes 24
335                                        :default-printer '(:name))
336  (op :fields (list (byte 8 0) (byte 8 8) (byte 8 16))))
337
338;;; Prefix instructions
339
340(define-instruction-format (rex 8)
341  (rex     :field (byte 4 4)    :value #b0100)
342  (wrxb    :field (byte 4 0)    :type 'wrxb))
343
344(define-instruction-format (x66 8)
345  (x66     :field (byte 8 0)    :type 'x66      :value #x66))
346
347;;; A one-byte instruction with a #x66 prefix, used to indicate an
348;;; operand size of :word.
349(define-instruction-format (x66-byte 16
350                                        :default-printer '(:name))
351  (x66   :field (byte 8 0) :value #x66)
352  (op    :field (byte 8 8)))
353
354;;; A one-byte instruction with a REX prefix, used to indicate an
355;;; operand size of :qword. REX.W must be 1, the other three bits are
356;;; ignored.
357(define-instruction-format (rex-byte 16
358                                        :default-printer '(:name))
359  (rex   :field (byte 5 3) :value #b01001)
360  (op    :field (byte 8 8)))
361
362(define-instruction-format (simple 8)
363  (op    :field (byte 7 1))
364  (width :field (byte 1 0) :type 'width)
365  ;; optional fields
366  (accum :type 'accum)
367  (imm))
368
369;;; Same as simple, but with direction bit
370(define-instruction-format (simple-dir 8 :include simple)
371  (op :field (byte 6 2))
372  (dir :field (byte 1 1)))
373
374;;; Same as simple, but with the immediate value occurring by default,
375;;; and with an appropiate printer.
376(define-instruction-format (accum-imm 8
377                                     :include simple
378                                     :default-printer '(:name
379                                                        :tab accum ", " imm))
380  (imm :type 'signed-imm-data))
381
382(define-instruction-format (reg-no-width 8
383                                     :default-printer '(:name :tab reg))
384  (op    :field (byte 5 3))
385  (reg   :field (byte 3 0) :type 'reg-b)
386  ;; optional fields
387  (accum :type 'accum)
388  (imm))
389
390;;; This is reg-no-width with a mandatory REX prefix and accum field,
391;;; with the ability to match against REX.W and REX.B individually.
392;;; REX.R and REX.X are ignored.
393(define-instruction-format (rex-accum-reg 16
394                                       :default-printer
395                                       '(:name :tab accum ", " reg))
396  (rex   :field (byte 4 4) :value #b0100)
397  (rex-w :field (byte 1 3) :type 'rex-w)
398  (rex-b :field (byte 1 0) :type 'rex-b)
399  (op    :field (byte 5 11))
400  (reg   :field (byte 3 8) :type 'reg-b)
401  (accum :type 'accum))
402
403;;; Same as reg-no-width, but with a default operand size of :qword.
404(define-instruction-format (reg-no-width-default-qword 8
405                                        :include reg-no-width
406                                        :default-printer '(:name :tab reg))
407  (reg   :type 'reg-b-default-qword))
408
409;;; Adds a width field to reg-no-width. Note that we can't use
410;;; :INCLUDE REG-NO-WIDTH here to save typing because that would put
411;;; the WIDTH field last, but the prefilter for WIDTH must run before
412;;; the one for IMM to be able to determine the correct size of IMM.
413(define-instruction-format (reg 8
414                                        :default-printer '(:name :tab reg))
415  (op    :field (byte 4 4))
416  (width :field (byte 1 3) :type 'width)
417  (reg   :field (byte 3 0) :type 'reg-b)
418  ;; optional fields
419  (accum :type 'accum)
420  (imm))
421
422(define-instruction-format (reg-reg/mem 16
423                                        :default-printer
424                                        `(:name :tab reg ", " reg/mem))
425  (op      :field (byte 7 1))
426  (width   :field (byte 1 0)    :type 'width)
427  (reg/mem :fields (list (byte 2 14) (byte 3 8))
428           :type 'reg/mem :reader reg-r/m-inst-r/m-arg)
429  (reg     :field (byte 3 11)   :type 'reg)
430  ;; optional fields
431  (imm))
432
433;;; same as reg-reg/mem, but with direction bit
434(define-instruction-format (reg-reg/mem-dir 16
435                                        :include reg-reg/mem
436                                        :default-printer
437                                        `(:name
438                                          :tab
439                                          ,(swap-if 'dir 'reg/mem ", " 'reg)))
440  (op  :field (byte 6 2))
441  (dir :field (byte 1 1)))
442
443;;; Same as reg-reg/mem, but uses the reg field as a second op code.
444(define-instruction-format (reg/mem 16
445                                        :default-printer '(:name :tab reg/mem))
446  (op      :fields (list (byte 7 1) (byte 3 11)))
447  (width   :field (byte 1 0)    :type 'width)
448  (reg/mem :fields (list (byte 2 14) (byte 3 8))
449                                :type 'sized-reg/mem)
450  ;; optional fields
451  (imm))
452
453;;; Same as reg/mem, but without a width field and with a default
454;;; operand size of :qword.
455(define-instruction-format (reg/mem-default-qword 16
456                                        :default-printer '(:name :tab reg/mem))
457  (op      :fields (list (byte 8 0) (byte 3 11)))
458  (reg/mem :fields (list (byte 2 14) (byte 3 8))
459                                :type 'sized-reg/mem-default-qword))
460
461;;; Same as reg/mem, but with the immediate value occurring by default,
462;;; and with an appropiate printer.
463(define-instruction-format (reg/mem-imm 16
464                                        :include reg/mem
465                                        :default-printer
466                                        '(:name :tab reg/mem ", " imm))
467  (reg/mem :type 'sized-reg/mem)
468  (imm     :type 'signed-imm-data))
469
470(define-instruction-format (reg/mem-imm/asm-routine 16
471                                        :include reg/mem-imm
472                                        :default-printer
473                                        '(:name :tab reg/mem ", " imm))
474  (reg/mem :type 'sized-reg/mem)
475  (imm     :type 'signed-imm-data/asm-routine))
476
477;;; Same as reg/mem, but with using the accumulator in the default printer
478(define-instruction-format
479    (accum-reg/mem 16
480     :include reg/mem :default-printer '(:name :tab accum ", " reg/mem))
481  (reg/mem :type 'reg/mem)              ; don't need a size
482  (accum :type 'accum))
483
484;;; Same as reg-reg/mem, but with a prefix of #b00001111
485(define-instruction-format (ext-reg-reg/mem 24
486                                        :default-printer
487                                        `(:name :tab reg ", " reg/mem))
488  (prefix  :field (byte 8 0)    :value #b00001111)
489  (op      :field (byte 7 9))
490  (width   :field (byte 1 8)    :type 'width)
491  (reg/mem :fields (list (byte 2 22) (byte 3 16))
492                                :type 'reg/mem)
493  (reg     :field (byte 3 19)   :type 'reg)
494  ;; optional fields
495  (imm))
496
497(define-instruction-format (ext-reg-reg/mem-no-width 24
498                                        :default-printer
499                                        `(:name :tab reg ", " reg/mem))
500  (prefix  :field (byte 8 0)    :value #b00001111)
501  (op      :field (byte 8 8))
502  (reg/mem :fields (list (byte 2 22) (byte 3 16))
503                                :type 'reg/mem)
504  (reg     :field (byte 3 19)   :type 'reg)
505  ;; optional fields
506  (imm))
507
508(define-instruction-format (ext-reg/mem-no-width 24
509                                        :default-printer
510                                        `(:name :tab reg/mem))
511  (prefix  :field (byte 8 0)    :value #b00001111)
512  (op      :fields (list (byte 8 8) (byte 3 19)))
513  (reg/mem :fields (list (byte 2 22) (byte 3 16))
514                                :type 'reg/mem))
515
516;;; reg-no-width with #x0f prefix
517(define-instruction-format (ext-reg-no-width 16
518                                        :default-printer '(:name :tab reg))
519  (prefix  :field (byte 8 0)    :value #b00001111)
520  (op    :field (byte 5 11))
521  (reg   :field (byte 3 8) :type 'reg-b))
522
523;;; Same as reg/mem, but with a prefix of #b00001111
524(define-instruction-format (ext-reg/mem 24
525                                        :default-printer '(:name :tab reg/mem))
526  (prefix  :field (byte 8 0)    :value #b00001111)
527  (op      :fields (list (byte 7 9) (byte 3 19)))
528  (width   :field (byte 1 8)    :type 'width)
529  (reg/mem :fields (list (byte 2 22) (byte 3 16))
530                                :type 'sized-reg/mem)
531  ;; optional fields
532  (imm))
533
534(define-instruction-format (ext-reg/mem-imm 24
535                                        :include ext-reg/mem
536                                        :default-printer
537                                        '(:name :tab reg/mem ", " imm))
538  (imm :type 'signed-imm-data))
539
540(define-instruction-format (ext-reg/mem-no-width+imm8 24
541                                        :include ext-reg/mem-no-width
542                                        :default-printer
543                                        '(:name :tab reg/mem ", " imm))
544  (imm :type 'imm-byte))
545
546;;;; XMM instructions
547
548;;; All XMM instructions use an extended opcode (#x0F as the first
549;;; opcode byte). Therefore in the following "EXT" in the name of the
550;;; instruction formats refers to the formats that have an additional
551;;; prefix (#x66, #xF2 or #xF3).
552
553;;; Instructions having an XMM register as the destination operand
554;;; and an XMM register or a memory location as the source operand.
555;;; The size of the operands is implicitly given by the instruction.
556(define-instruction-format (xmm-xmm/mem 24
557                                        :default-printer
558                                        '(:name :tab reg ", " reg/mem))
559  (x0f     :field (byte 8 0)    :value #x0f)
560  (op      :field (byte 8 8))
561  (reg/mem :fields (list (byte 2 22) (byte 3 16))
562                                :type 'xmmreg/mem)
563  (reg     :field (byte 3 19)   :type 'xmmreg)
564  ;; optional fields
565  (imm))
566
567(define-instruction-format (ext-xmm-xmm/mem 32
568                                        :default-printer
569                                        '(:name :tab reg ", " reg/mem))
570  (prefix  :field (byte 8 0))
571  (x0f     :field (byte 8 8)    :value #x0f)
572  (op      :field (byte 8 16))
573  (reg/mem :fields (list (byte 2 30) (byte 3 24))
574                                :type 'xmmreg/mem)
575  (reg     :field (byte 3 27)   :type 'xmmreg)
576  (imm))
577
578(define-instruction-format (ext-rex-xmm-xmm/mem 40
579                                        :default-printer
580                                        '(:name :tab reg ", " reg/mem))
581  (prefix  :field (byte 8 0))
582  (rex     :field (byte 4 12)   :value #b0100)
583  (wrxb    :field (byte 4 8)    :type 'wrxb)
584  (x0f     :field (byte 8 16)   :value #x0f)
585  (op      :field (byte 8 24))
586  (reg/mem :fields (list (byte 2 38) (byte 3 32))
587                                :type 'xmmreg/mem)
588  (reg     :field (byte 3 35)   :type 'xmmreg)
589  (imm))
590
591(define-instruction-format (ext-2byte-xmm-xmm/mem 40
592                                        :default-printer
593                                        '(:name :tab reg ", " reg/mem))
594  (prefix  :field (byte 8 0))
595  (x0f     :field (byte 8 8)    :value #x0f)
596  (op1     :field (byte 8 16))          ; #x38 or #x3a
597  (op2     :field (byte 8 24))
598  (reg/mem :fields (list (byte 2 38) (byte 3 32))
599                                :type 'xmmreg/mem)
600  (reg     :field (byte 3 35)   :type 'xmmreg))
601
602(define-instruction-format (ext-rex-2byte-xmm-xmm/mem 48
603                                        :default-printer
604                                        '(:name :tab reg ", " reg/mem))
605  (prefix  :field (byte 8 0))
606  (rex     :field (byte 4 12)   :value #b0100)
607  (wrxb    :field (byte 4 8)    :type 'wrxb)
608  (x0f     :field (byte 8 16)   :value #x0f)
609  (op1     :field (byte 8 24))          ; #x38 or #x3a
610  (op2     :field (byte 8 32))
611  (reg/mem :fields (list (byte 2 46) (byte 3 40))
612                                :type 'xmmreg/mem)
613  (reg     :field (byte 3 43)   :type 'xmmreg))
614
615;;; Same as xmm-xmm/mem etc., but with direction bit.
616
617(define-instruction-format (ext-xmm-xmm/mem-dir 32
618                                        :include ext-xmm-xmm/mem
619                                        :default-printer
620                                        `(:name
621                                          :tab
622                                          ,(swap-if 'dir 'reg ", " 'reg/mem)))
623  (op      :field (byte 7 17))
624  (dir     :field (byte 1 16)))
625
626(define-instruction-format (ext-rex-xmm-xmm/mem-dir 40
627                                        :include ext-rex-xmm-xmm/mem
628                                        :default-printer
629                                        `(:name
630                                          :tab
631                                          ,(swap-if 'dir 'reg ", " 'reg/mem)))
632  (op      :field (byte 7 25))
633  (dir     :field (byte 1 24)))
634
635;;; Instructions having an XMM register as one operand
636;;; and a constant (unsigned) byte as the other.
637
638(define-instruction-format (ext-xmm-imm 32
639                                        :default-printer
640                                        '(:name :tab reg/mem ", " imm))
641  (prefix  :field (byte 8 0))
642  (x0f     :field (byte 8 8)   :value #x0f)
643  (op      :field (byte 8 16))
644  (/i      :field (byte 3 27))
645  (b11     :field (byte 2 30) :value #b11)
646  (reg/mem :field (byte 3 24)
647           :type 'xmmreg-b)
648  (imm     :type 'imm-byte))
649
650(define-instruction-format (ext-rex-xmm-imm 40
651                                        :default-printer
652                                        '(:name :tab reg/mem ", " imm))
653  (prefix  :field (byte 8 0))
654  (rex     :field (byte 4 12)   :value #b0100)
655  (wrxb    :field (byte 4 8)    :type 'wrxb)
656  (x0f     :field (byte 8 16)   :value #x0f)
657  (op      :field (byte 8 24))
658  (/i      :field (byte 3 35))
659  (b11     :field (byte 2 38) :value #b11)
660  (reg/mem :field (byte 3 32)
661           :type 'xmmreg-b)
662  (imm     :type 'imm-byte))
663
664;;; Instructions having an XMM register as one operand and a general-
665;;; -purpose register or a memory location as the other operand.
666
667(define-instruction-format (xmm-reg/mem 24
668                                        :default-printer
669                                        '(:name :tab reg ", " reg/mem))
670  (x0f     :field (byte 8 0)    :value #x0f)
671  (op      :field (byte 8 8))
672  (reg/mem :fields (list (byte 2 22) (byte 3 16))
673           :type 'sized-reg/mem)
674  (reg     :field (byte 3 19)   :type 'xmmreg)
675  (imm))
676
677(define-instruction-format (ext-xmm-reg/mem 32
678                                        :default-printer
679                                        '(:name :tab reg ", " reg/mem))
680  (prefix  :field (byte 8 0))
681  (x0f     :field (byte 8 8)    :value #x0f)
682  (op      :field (byte 8 16))
683  (reg/mem :fields (list (byte 2 30) (byte 3 24))
684                                :type 'sized-reg/mem)
685  (reg     :field (byte 3 27)   :type 'xmmreg)
686  (imm))
687
688(define-instruction-format (ext-rex-xmm-reg/mem 40
689                                        :default-printer
690                                        '(:name :tab reg ", " reg/mem))
691  (prefix  :field (byte 8 0))
692  (rex     :field (byte 4 12)   :value #b0100)
693  (wrxb    :field (byte 4 8)    :type 'wrxb)
694  (x0f     :field (byte 8 16)   :value #x0f)
695  (op      :field (byte 8 24))
696  (reg/mem :fields (list (byte 2 38) (byte 3 32))
697                                :type 'sized-reg/mem)
698  (reg     :field (byte 3 35)   :type 'xmmreg)
699  (imm))
700
701(define-instruction-format (ext-2byte-xmm-reg/mem 40
702                                        :default-printer
703                                        '(:name :tab reg ", " reg/mem))
704  (prefix  :field (byte 8 0))
705  (x0f     :field (byte 8 8)    :value #x0f)
706  (op1     :field (byte 8 16))
707  (op2     :field (byte 8 24))
708  (reg/mem :fields (list (byte 2 38) (byte 3 32)) :type 'sized-reg/mem)
709  (reg     :field (byte 3 35)   :type 'xmmreg)
710  (imm))
711
712;;; Instructions having a general-purpose register as one operand and an
713;;; XMM register or a memory location as the other operand.
714
715(define-instruction-format (reg-xmm/mem 24
716                                        :default-printer
717                                        '(:name :tab reg ", " reg/mem))
718  (x0f     :field (byte 8 0)    :value #x0f)
719  (op      :field (byte 8 8))
720  (reg/mem :fields (list (byte 2 22) (byte 3 16))
721                                :type 'xmmreg/mem)
722  (reg     :field (byte 3 19)   :type 'reg))
723
724(define-instruction-format (ext-reg-xmm/mem 32
725                                        :default-printer
726                                        '(:name :tab reg ", " reg/mem))
727  (prefix  :field (byte 8 0))
728  (x0f     :field (byte 8 8)    :value #x0f)
729  (op      :field (byte 8 16))
730  (reg/mem :fields (list (byte 2 30) (byte 3 24))
731                                :type 'xmmreg/mem)
732  (reg     :field (byte 3 27)   :type 'reg))
733
734(define-instruction-format (ext-rex-reg-xmm/mem 40
735                                        :default-printer
736                                        '(:name :tab reg ", " reg/mem))
737  (prefix  :field (byte 8 0))
738  (rex     :field (byte 4 12)   :value #b0100)
739  (wrxb    :field (byte 4 8)    :type 'wrxb)
740  (x0f     :field (byte 8 16)   :value #x0f)
741  (op      :field (byte 8 24))
742  (reg/mem :fields (list (byte 2 38) (byte 3 32))
743                                :type 'xmmreg/mem)
744  (reg     :field (byte 3 35)   :type 'reg))
745
746;;; Instructions having a general-purpose register or a memory location
747;;; as one operand and an a XMM register as the other operand.
748
749(define-instruction-format (ext-reg/mem-xmm 32
750                                        :default-printer
751                                        '(:name :tab reg/mem ", " reg))
752  (prefix  :field (byte 8 0))
753  (x0f     :field (byte 8 8)    :value #x0f)
754  (op      :field (byte 8 16))
755  (reg/mem :fields (list (byte 2 30) (byte 3 24))
756                                :type 'reg/mem)
757  (reg     :field (byte 3 27)   :type 'xmmreg)
758  (imm))
759
760(define-instruction-format (ext-rex-reg/mem-xmm 40
761                                        :default-printer
762                                        '(:name :tab reg/mem ", " reg))
763  (prefix  :field (byte 8 0))
764  (rex     :field (byte 4 12)   :value #b0100)
765  (wrxb    :field (byte 4 8)    :type 'wrxb)
766  (x0f     :field (byte 8 16)    :value #x0f)
767  (op      :field (byte 8 24))
768  (reg/mem :fields (list (byte 2 38) (byte 3 32))
769                                :type 'reg/mem)
770  (reg     :field (byte 3 35)   :type 'xmmreg)
771  (imm))
772
773(define-instruction-format (ext-2byte-reg/mem-xmm 40
774                                        :default-printer
775                                        '(:name :tab reg/mem ", " reg))
776  (prefix  :field (byte 8 0))
777  (x0f     :field (byte 8 8)    :value #x0f)
778  (op1     :field (byte 8 16))
779  (op2     :field (byte 8 24))
780  (reg/mem :fields (list (byte 2 38) (byte 3 32)) :type 'reg/mem)
781  (reg     :field (byte 3 35)   :type 'xmmreg)
782  (imm))
783
784(define-instruction-format (ext-rex-2byte-reg/mem-xmm 48
785                                        :default-printer
786                                        '(:name :tab reg/mem ", " reg))
787  (prefix  :field (byte 8 0))
788  (rex     :field (byte 4 12)   :value #b0100)
789  (wrxb    :field (byte 4 8)    :type 'wrxb)
790  (x0f     :field (byte 8 16)   :value #x0f)
791  (op1     :field (byte 8 24))
792  (op2     :field (byte 8 32))
793  (reg/mem :fields (list (byte 2 46) (byte 3 40)) :type 'reg/mem)
794  (reg     :field (byte 3 43)   :type 'xmmreg)
795  (imm))
796
797;;; Instructions having a general-purpose register as one operand and an a
798;;; general-purpose register or a memory location as the other operand,
799;;; and using a prefix byte.
800
801(define-instruction-format (ext-prefix-reg-reg/mem 32
802                                        :default-printer
803                                        '(:name :tab reg ", " reg/mem))
804  (prefix  :field (byte 8 0))
805  (x0f     :field (byte 8 8)    :value #x0f)
806  (op      :field (byte 8 16))
807  (reg/mem :fields (list (byte 2 30) (byte 3 24))
808                                :type 'sized-reg/mem)
809  (reg     :field (byte 3 27)   :type 'reg))
810
811(define-instruction-format (ext-rex-prefix-reg-reg/mem 40
812                                        :default-printer
813                                        '(:name :tab reg ", " reg/mem))
814  (prefix  :field (byte 8 0))
815  (rex     :field (byte 4 12)   :value #b0100)
816  (wrxb    :field (byte 4 8)    :type 'wrxb)
817  (x0f     :field (byte 8 16)   :value #x0f)
818  (op      :field (byte 8 24))
819  (reg/mem :fields (list (byte 2 38) (byte 3 32))
820                                :type 'sized-reg/mem)
821  (reg     :field (byte 3 35)   :type 'reg))
822
823(define-instruction-format (ext-2byte-prefix-reg-reg/mem 40
824                                        :default-printer
825                                        '(:name :tab reg ", " reg/mem))
826  (prefix  :field (byte 8 0))
827  (x0f     :field (byte 8 8)    :value #x0f)
828  (op1     :field (byte 8 16))          ; #x38 or #x3a
829  (op2     :field (byte 8 24))
830  (reg/mem :fields (list (byte 2 38) (byte 3 32))
831                                :type 'sized-reg/mem)
832  (reg     :field (byte 3 35)   :type 'reg))
833
834(define-instruction-format (ext-rex-2byte-prefix-reg-reg/mem 48
835                                        :default-printer
836                                        '(:name :tab reg ", " reg/mem))
837  (prefix  :field (byte 8 0))
838  (rex     :field (byte 4 12)   :value #b0100)
839  (wrxb    :field (byte 4 8)    :type 'wrxb)
840  (x0f     :field (byte 8 16)   :value #x0f)
841  (op1     :field (byte 8 24))          ; #x38 or #x3a
842  (op2     :field (byte 8 32))
843  (reg/mem :fields (list (byte 2 46) (byte 3 40))
844                                :type 'sized-reg/mem)
845  (reg     :field (byte 3 43)   :type 'reg))
846
847;; XMM comparison instruction
848
849(defparameter *sse-conditions* #(:eq :lt :le :unord :neq :nlt :nle :ord))
850
851(define-arg-type sse-condition-code
852  ;; Inherit the prefilter from IMM-BYTE to READ-SUFFIX the byte.
853  :type 'imm-byte
854  :printer *sse-conditions*)
855
856(define-instruction-format (string-op 8
857                                     :include simple
858                                     :default-printer '(:name width)))
859
860(define-instruction-format (short-cond-jump 16)
861  (op    :field (byte 4 4))
862  (cc    :field (byte 4 0) :type 'condition-code)
863  (label :field (byte 8 8) :type 'displacement))
864
865(define-instruction-format (short-jump 16 :default-printer '(:name :tab label))
866  (const :field (byte 4 4) :value #b1110)
867  (op    :field (byte 4 0))
868  (label :field (byte 8 8) :type 'displacement))
869
870(define-instruction-format (near-cond-jump 48)
871  (op    :fields (list (byte 8 0) (byte 4 12)) :value '(#b00001111 #b1000))
872  (cc    :field (byte 4 8) :type 'condition-code)
873  (label :field (byte 32 16) :type 'displacement))
874
875(define-instruction-format (near-jump 40 :default-printer '(:name :tab label))
876  (op    :field (byte 8 0))
877  (label :field (byte 32 8) :type 'displacement))
878
879(define-instruction-format (cond-set 24 :default-printer '('set cc :tab reg/mem))
880  (prefix :field (byte 8 0) :value #b00001111)
881  (op    :field (byte 4 12) :value #b1001)
882  (cc    :field (byte 4 8) :type 'condition-code)
883  (reg/mem :fields (list (byte 2 22) (byte 3 16))
884           :type 'sized-byte-reg/mem)
885  (reg     :field (byte 3 19)   :value #b000))
886
887(define-instruction-format (cond-move 24
888                                     :default-printer
889                                        '('cmov cc :tab reg ", " reg/mem))
890  (prefix  :field (byte 8 0)    :value #b00001111)
891  (op      :field (byte 4 12)   :value #b0100)
892  (cc      :field (byte 4 8)    :type 'condition-code)
893  (reg/mem :fields (list (byte 2 22) (byte 3 16))
894                                :type 'reg/mem)
895  (reg     :field (byte 3 19)   :type 'reg))
896
897(define-instruction-format (enter-format 32
898                                     :default-printer '(:name
899                                                        :tab disp
900                                                        (:unless (:constant 0)
901                                                          ", " level)))
902  (op :field (byte 8 0))
903  (disp :field (byte 16 8))
904  (level :field (byte 8 24)))
905
906;;; Single byte instruction with an immediate byte argument.
907(define-instruction-format (byte-imm 16 :default-printer '(:name :tab code))
908 (op :field (byte 8 0))
909 (code :field (byte 8 8) :reader byte-imm-code))
910
911;;; Two byte instruction with an immediate byte argument.
912;;;
913(define-instruction-format (word-imm 24 :default-printer '(:name :tab code))
914  (op :field (byte 16 0))
915  (code :field (byte 8 16) :reader word-imm-code))
916
917;;; F3 escape map - Needs a ton more work.
918
919(define-instruction-format (F3-escape 24)
920  (prefix1 :field (byte 8 0) :value #xF3)
921  (prefix2 :field (byte 8 8) :value #x0F)
922  (op      :field (byte 8 16)))
923
924(define-instruction-format (rex-F3-escape 32)
925  ;; F3 is a legacy prefix which was generalized to select an alternate opcode
926  ;; map. Legacy prefixes are encoded in the instruction before a REX prefix.
927  (prefix1 :field (byte 8 0)  :value #xF3)
928  (rex     :field (byte 4 12) :value 4)    ; "prefix2"
929  (wrxb    :field (byte 4 8)  :type 'wrxb)
930  (prefix3 :field (byte 8 16) :value #x0F)
931  (op      :field (byte 8 24)))
932
933(define-instruction-format (F3-escape-reg-reg/mem 32
934                                        :include F3-escape
935                                        :default-printer
936                                        '(:name :tab reg ", " reg/mem))
937  (reg/mem :fields (list (byte 2 30) (byte 3 24)) :type 'sized-reg/mem)
938  (reg     :field  (byte 3 27) :type 'reg))
939
940(define-instruction-format (rex-F3-escape-reg-reg/mem 40
941                                        :include rex-F3-escape
942                                        :default-printer
943                                        '(:name :tab reg ", " reg/mem))
944  (reg/mem :fields (list (byte 2 38) (byte 3 32)) :type 'sized-reg/mem)
945  (reg     :field  (byte 3 35) :type 'reg))
946
947
948;;;; primitive emitters
949
950(define-bitfield-emitter emit-word 16
951  (byte 16 0))
952
953;; FIXME: a nice enhancement would be to save all sexprs of small functions
954;; within the same file, and drop them at the end.
955;; Expressly declaimed inline definitions would be saved as usual though.
956(declaim (inline emit-dword))
957(define-bitfield-emitter emit-dword 32
958  (byte 32 0))
959(declaim (notinline emit-dword))
960
961;;; Most uses of dwords are as displacements or as immediate values in
962;;; 64-bit operations. In these cases they are sign-extended to 64 bits.
963;;; EMIT-DWORD is unsuitable there because it accepts values of type
964;;; (OR (SIGNED-BYTE 32) (UNSIGNED-BYTE 32)), so we provide a more
965;;; restricted emitter here.
966(defun emit-signed-dword (segment value)
967  (declare (type sb!assem:segment segment)
968           (type (signed-byte 32) value))
969  (declare (inline emit-dword))
970  (emit-dword segment value))
971
972(define-bitfield-emitter emit-qword 64
973  (byte 64 0))
974
975(define-bitfield-emitter emit-byte-with-reg 8
976  (byte 5 3) (byte 3 0))
977
978(define-bitfield-emitter emit-mod-reg-r/m-byte 8
979  (byte 2 6) (byte 3 3) (byte 3 0))
980
981(define-bitfield-emitter emit-sib-byte 8
982  (byte 2 6) (byte 3 3) (byte 3 0))
983
984(define-bitfield-emitter emit-rex-byte 8
985  (byte 4 4) (byte 1 3) (byte 1 2) (byte 1 1) (byte 1 0))
986
987
988
989;;;; fixup emitters
990
991(defun emit-absolute-fixup (segment fixup &optional quad-p)
992  (note-fixup segment (if quad-p :absolute64 :absolute) fixup)
993  (let ((offset (fixup-offset fixup)))
994    (if (label-p offset)
995        (emit-back-patch segment
996                         (if quad-p 8 4)
997                         (lambda (segment posn)
998                           (declare (ignore posn))
999                           (let ((val  (- (+ (component-header-length)
1000                                             (or (label-position offset)
1001                                                 0))
1002                                          other-pointer-lowtag)))
1003                             (if quad-p
1004                                 (emit-qword segment val)
1005                                 (emit-signed-dword segment val)))))
1006        (if quad-p
1007            (emit-qword segment (or offset 0))
1008            (emit-signed-dword segment (or offset 0))))))
1009
1010(defun emit-relative-fixup (segment fixup)
1011  (note-fixup segment :relative fixup)
1012  (emit-signed-dword segment (or (fixup-offset fixup) 0)))
1013
1014
1015;;;; the effective-address (ea) structure
1016
1017(defun reg-tn-encoding (tn)
1018  (declare (type tn tn))
1019  ;; ea only has space for three bits of register number: regs r8
1020  ;; and up are selected by a REX prefix byte which caller is responsible
1021  ;; for having emitted where necessary already
1022  (ecase (sb-name (sc-sb (tn-sc tn)))
1023    (registers
1024     (let ((offset (mod (tn-offset tn) 16)))
1025       (logior (ash (logand offset 1) 2)
1026               (ash offset -1))))
1027    (float-registers
1028     (mod (tn-offset tn) 8))))
1029
1030(defstruct (ea (:constructor make-ea (size &key base index scale disp))
1031               (:copier nil))
1032  ;; note that we can represent an EA with a QWORD size, but EMIT-EA
1033  ;; can't actually emit it on its own: caller also needs to emit REX
1034  ;; prefix
1035  (size nil :type (member :byte :word :dword :qword))
1036  (base nil :type (or tn null))
1037  (index nil :type (or tn null))
1038  (scale 1 :type (member 1 2 4 8))
1039  (disp 0 :type (or (unsigned-byte 32) (signed-byte 32) fixup)))
1040(defmethod print-object ((ea ea) stream)
1041  (cond ((or *print-escape* *print-readably*)
1042         (print-unreadable-object (ea stream :type t)
1043           (format stream
1044                   "~S~@[ base=~S~]~@[ index=~S~]~@[ scale=~S~]~@[ disp=~S~]"
1045                   (ea-size ea)
1046                   (ea-base ea)
1047                   (ea-index ea)
1048                   (let ((scale (ea-scale ea)))
1049                     (if (= scale 1) nil scale))
1050                   (ea-disp ea))))
1051        (t
1052         (format stream "~A PTR [" (symbol-name (ea-size ea)))
1053         (when (ea-base ea)
1054           (write-string (sb!c:location-print-name (ea-base ea)) stream)
1055           (when (ea-index ea)
1056             (write-string "+" stream)))
1057         (when (ea-index ea)
1058           (write-string (sb!c:location-print-name (ea-index ea)) stream))
1059         (unless (= (ea-scale ea) 1)
1060           (format stream "*~A" (ea-scale ea)))
1061         (typecase (ea-disp ea)
1062           (null)
1063           (integer
1064            (format stream "~@D" (ea-disp ea)))
1065           (t
1066            (format stream "+~A" (ea-disp ea))))
1067         (write-char #\] stream))))
1068
1069(defun emit-constant-tn-rip (segment constant-tn reg remaining-bytes)
1070  ;; AMD64 doesn't currently have a code object register to use as a
1071  ;; base register for constant access. Instead we use RIP-relative
1072  ;; addressing. The offset from the SIMPLE-FUN-HEADER to the instruction
1073  ;; is passed to the backpatch callback. In addition we need the offset
1074  ;; from the start of the function header to the slot in the CODE-HEADER
1075  ;; that stores the constant. Since we don't know where the code header
1076  ;; starts, instead count backwards from the function header.
1077  (let* ((2comp (component-info *component-being-compiled*))
1078         (constants (ir2-component-constants 2comp))
1079         (len (length constants))
1080         ;; Both CODE-HEADER and SIMPLE-FUN-HEADER are 16-byte aligned.
1081         ;; If there are an even amount of constants, there will be
1082         ;; an extra qword of padding before the function header, which
1083         ;; needs to be adjusted for. XXX: This will break if new slots
1084         ;; are added to the code header.
1085         (offset (* (- (+ len (if (evenp len)
1086                                  1
1087                                  2))
1088                       (tn-offset constant-tn))
1089                    n-word-bytes)))
1090    ;; RIP-relative addressing
1091    (emit-mod-reg-r/m-byte segment #b00 reg #b101)
1092    (emit-back-patch segment
1093                     4
1094                     (lambda (segment posn)
1095                       ;; The addressing is relative to end of instruction,
1096                       ;; i.e. the end of this dword. Hence the + 4.
1097                       (emit-signed-dword segment
1098                                          (+ 4 remaining-bytes
1099                                             (- (+ offset posn)))))))
1100  (values))
1101
1102(defun emit-byte-displacement-backpatch (segment target)
1103  (emit-back-patch segment 1
1104                   (lambda (segment posn)
1105                     (emit-byte segment
1106                                (the (signed-byte 8)
1107                                  (- (label-position target) (1+ posn)))))))
1108
1109(defun emit-dword-displacement-backpatch (segment target &optional (n-extra 0))
1110  ;; N-EXTRA is how many more instruction bytes will follow, to properly compute
1111  ;; the displacement from the beginning of the next instruction to TARGET.
1112  (emit-back-patch segment 4
1113                   (lambda (segment posn)
1114                     (emit-signed-dword segment (- (label-position target)
1115                                                   (+ 4 posn n-extra))))))
1116
1117(defun emit-label-rip (segment fixup reg remaining-bytes)
1118  ;; RIP-relative addressing
1119  (emit-mod-reg-r/m-byte segment #b00 reg #b101)
1120  (emit-dword-displacement-backpatch segment (fixup-offset fixup) remaining-bytes)
1121  (values))
1122
1123(defun emit-ea (segment thing reg &key allow-constants (remaining-bytes 0))
1124  (etypecase thing
1125    (tn
1126     ;; this would be eleganter if we had a function that would create
1127     ;; an ea given a tn
1128     (ecase (sb-name (sc-sb (tn-sc thing)))
1129       ((registers float-registers)
1130        (emit-mod-reg-r/m-byte segment #b11 reg (reg-tn-encoding thing)))
1131       (stack
1132        ;; Convert stack tns into an index off RBP.
1133        (let ((disp (frame-byte-offset (tn-offset thing))))
1134          (cond ((<= -128 disp 127)
1135                 (emit-mod-reg-r/m-byte segment #b01 reg #b101)
1136                 (emit-byte segment disp))
1137                (t
1138                 (emit-mod-reg-r/m-byte segment #b10 reg #b101)
1139                 (emit-signed-dword segment disp)))))
1140       (constant
1141        (unless allow-constants
1142          ;; Why?
1143          (error
1144           "Constant TNs can only be directly used in MOV, PUSH, and CMP."))
1145        (emit-constant-tn-rip segment thing reg remaining-bytes))))
1146    (ea
1147     (let* ((base (ea-base thing))
1148            (index (ea-index thing))
1149            (scale (ea-scale thing))
1150            (disp (ea-disp thing))
1151            (mod (cond ((or (null base)
1152                            (and (eql disp 0)
1153                                 (not (= (reg-tn-encoding base) #b101))))
1154                        #b00)
1155                       ((and (fixnump disp) (<= -128 disp 127))
1156                        #b01)
1157                       (t
1158                        #b10)))
1159            (r/m (cond (index #b100)
1160                       ((null base) #b101)
1161                       (t (reg-tn-encoding base)))))
1162       (when (and (fixup-p disp)
1163                  (label-p (fixup-offset disp)))
1164         (aver (null base))
1165         (aver (null index))
1166         (return-from emit-ea (emit-ea segment disp reg
1167                                       :allow-constants allow-constants
1168                                       :remaining-bytes remaining-bytes)))
1169       (when (and (= mod 0) (= r/m #b101))
1170         ;; this is rip-relative in amd64, so we'll use a sib instead
1171         (setf r/m #b100 scale 1))
1172       (emit-mod-reg-r/m-byte segment mod reg r/m)
1173       (when (= r/m #b100)
1174         (let ((ss (1- (integer-length scale)))
1175               (index (if (null index)
1176                          #b100
1177                          (if (location= index sb!vm::rsp-tn)
1178                              (error "can't index off of RSP")
1179                              (reg-tn-encoding index))))
1180               (base (if (null base)
1181                         #b101
1182                         (reg-tn-encoding base))))
1183           (emit-sib-byte segment ss index base)))
1184       (cond ((= mod #b01)
1185              (emit-byte segment disp))
1186             ((or (= mod #b10) (null base))
1187              (if (fixup-p disp)
1188                  (emit-absolute-fixup segment disp)
1189                  (emit-signed-dword segment disp))))))
1190    (fixup
1191     (typecase (fixup-offset thing)
1192       (label
1193        (emit-label-rip segment thing reg remaining-bytes))
1194       (t
1195        (emit-mod-reg-r/m-byte segment #b00 reg #b100)
1196        (emit-sib-byte segment 0 #b100 #b101)
1197        (emit-absolute-fixup segment thing))))))
1198
1199(defun byte-reg-p (thing)
1200  (and (tn-p thing)
1201       (eq (sb-name (sc-sb (tn-sc thing))) 'registers)
1202       (member (sc-name (tn-sc thing)) *byte-sc-names*)
1203       t))
1204
1205(defun byte-ea-p (thing)
1206  (typecase thing
1207    (ea (eq (ea-size thing) :byte))
1208    (tn
1209     (and (member (sc-name (tn-sc thing)) *byte-sc-names*) t))
1210    (t nil)))
1211
1212(defun word-reg-p (thing)
1213  (and (tn-p thing)
1214       (eq (sb-name (sc-sb (tn-sc thing))) 'registers)
1215       (member (sc-name (tn-sc thing)) *word-sc-names*)
1216       t))
1217
1218(defun word-ea-p (thing)
1219  (typecase thing
1220    (ea (eq (ea-size thing) :word))
1221    (tn (and (member (sc-name (tn-sc thing)) *word-sc-names*) t))
1222    (t nil)))
1223
1224(defun dword-reg-p (thing)
1225  (and (tn-p thing)
1226       (eq (sb-name (sc-sb (tn-sc thing))) 'registers)
1227       (member (sc-name (tn-sc thing)) *dword-sc-names*)
1228       t))
1229
1230(defun dword-ea-p (thing)
1231  (typecase thing
1232    (ea (eq (ea-size thing) :dword))
1233    (tn
1234     (and (member (sc-name (tn-sc thing)) *dword-sc-names*) t))
1235    (t nil)))
1236
1237(defun qword-reg-p (thing)
1238  (and (tn-p thing)
1239       (eq (sb-name (sc-sb (tn-sc thing))) 'registers)
1240       (member (sc-name (tn-sc thing)) *qword-sc-names*)
1241       t))
1242
1243(defun qword-ea-p (thing)
1244  (typecase thing
1245    (ea (eq (ea-size thing) :qword))
1246    (tn
1247     (and (member (sc-name (tn-sc thing)) *qword-sc-names*) t))
1248    (t nil)))
1249
1250;;; Return true if THING is a general-purpose register TN.
1251(defun register-p (thing)
1252  (and (tn-p thing)
1253       (eq (sb-name (sc-sb (tn-sc thing))) 'registers)))
1254
1255(defun accumulator-p (thing)
1256  (and (register-p thing)
1257       (= (tn-offset thing) 0)))
1258
1259;;; Return true if THING is an XMM register TN.
1260(defun xmm-register-p (thing)
1261  (and (tn-p thing)
1262       (eq (sb-name (sc-sb (tn-sc thing))) 'float-registers)))
1263
1264
1265;;;; utilities
1266
1267(defconstant +operand-size-prefix-byte+ #b01100110)
1268
1269(defun maybe-emit-operand-size-prefix (segment size)
1270  (unless (or (eq size :byte)
1271              (eq size :qword)          ; REX prefix handles this
1272              (eq size +default-operand-size+))
1273    (emit-byte segment +operand-size-prefix-byte+)))
1274
1275;;; A REX prefix must be emitted if at least one of the following
1276;;; conditions is true:
1277;;  1. The operand size is :QWORD and the default operand size of the
1278;;     instruction is not :QWORD.
1279;;; 2. The instruction references an extended register.
1280;;; 3. The instruction references one of the byte registers SIL, DIL,
1281;;;    SPL or BPL.
1282
1283;;; Emit a REX prefix if necessary. OPERAND-SIZE is used to determine
1284;;; whether to set REX.W. Callers pass it explicitly as :DO-NOT-SET if
1285;;; this should not happen, for example because the instruction's
1286;;; default operand size is qword. R, X and B are NIL or TNs specifying
1287;;; registers the encodings of which are extended with the REX.R, REX.X
1288;;; and REX.B bit, respectively. To determine whether one of the byte
1289;;; registers is used that can only be accessed using a REX prefix, we
1290;;; need only to test R and B, because X is only used for the index
1291;;; register of an effective address and therefore never byte-sized.
1292;;; For R we can avoid to calculate the size of the TN because it is
1293;;; always OPERAND-SIZE. The size of B must be calculated here because
1294;;; B can be address-sized (if it is the base register of an effective
1295;;; address), of OPERAND-SIZE (if the instruction operates on two
1296;;; registers) or of some different size (in the instructions that
1297;;; combine arguments of different sizes: MOVZX, MOVSX, MOVSXD and
1298;;; several SSE instructions, e.g. CVTSD2SI). We don't distinguish
1299;;; between general-purpose and floating point registers for this cause
1300;;; because only general-purpose registers can be byte-sized at all.
1301(defun maybe-emit-rex-prefix (segment operand-size r x b)
1302  (declare (type (member nil :byte :word :dword :qword :do-not-set)
1303                 operand-size)
1304           (type (or null tn) r x b))
1305  (labels ((if-hi (r)
1306             (if (and r (> (tn-offset r)
1307                           ;; offset of r8 is 16, offset of xmm8 is 8
1308                           (if (eq (sb-name (sc-sb (tn-sc r)))
1309                                   'float-registers)
1310                               7
1311                               15)))
1312                 1
1313                 0))
1314           (reg-4-7-p (r)
1315             ;; Assuming R is a TN describing a general-purpose
1316             ;; register, return true if it references register
1317             ;; 4 upto 7.
1318             (<= 8 (tn-offset r) 15)))
1319    (let ((rex-w (if (eq operand-size :qword) 1 0))
1320          (rex-r (if-hi r))
1321          (rex-x (if-hi x))
1322          (rex-b (if-hi b)))
1323      (when (or (not (zerop (logior rex-w rex-r rex-x rex-b)))
1324                (and r
1325                     (eq operand-size :byte)
1326                     (reg-4-7-p r))
1327                (and b
1328                     (eq (operand-size b) :byte)
1329                     (reg-4-7-p b)))
1330        (emit-rex-byte segment #b0100 rex-w rex-r rex-x rex-b)))))
1331
1332;;; Emit a REX prefix if necessary. The operand size is determined from
1333;;; THING or can be overwritten by OPERAND-SIZE. This and REG are always
1334;;; passed to MAYBE-EMIT-REX-PREFIX. Additionally, if THING is an EA we
1335;;; pass its index and base registers, if it is a register TN, we pass
1336;;; only itself.
1337;;; In contrast to EMIT-EA above, neither stack TNs nor fixups need to
1338;;; be treated specially here: If THING is a stack TN, neither it nor
1339;;; any of its components are passed to MAYBE-EMIT-REX-PREFIX which
1340;;; works correctly because stack references always use RBP as the base
1341;;; register and never use an index register so no extended registers
1342;;; need to be accessed. Fixups are assembled using an addressing mode
1343;;; of displacement-only or RIP-plus-displacement (see EMIT-EA), so may
1344;;; not reference an extended register. The displacement-only addressing
1345;;; mode requires that REX.X is 0, which is ensured here.
1346(defun maybe-emit-rex-for-ea (segment thing reg &key operand-size)
1347  (declare (type (or ea tn fixup) thing)
1348           (type (or null tn) reg)
1349           (type (member nil :byte :word :dword :qword :do-not-set)
1350                 operand-size))
1351  (let ((ea-p (ea-p thing)))
1352    (maybe-emit-rex-prefix segment
1353                           (or operand-size (operand-size thing))
1354                           reg
1355                           (and ea-p (ea-index thing))
1356                           (cond (ea-p (ea-base thing))
1357                                 ((and (tn-p thing)
1358                                       (member (sb-name (sc-sb (tn-sc thing)))
1359                                               '(float-registers registers)))
1360                                  thing)
1361                                 (t nil)))))
1362
1363(defun operand-size (thing)
1364  (typecase thing
1365    (tn
1366     ;; FIXME: might as well be COND instead of having to use #. readmacro
1367     ;; to hack up the code
1368     (case (sc-name (tn-sc thing))
1369       #!+sb-simd-pack
1370       (#.sb!vm::*oword-sc-names*
1371        :oword)
1372       (#.*qword-sc-names*
1373        :qword)
1374       (#.*dword-sc-names*
1375        :dword)
1376       (#.*word-sc-names*
1377        :word)
1378       (#.*byte-sc-names*
1379        :byte)
1380       ;; added by jrd: float-registers is a separate size (?)
1381       ;; The only place in the code where we are called with THING
1382       ;; being a float-register is in MAYBE-EMIT-REX-PREFIX when it
1383       ;; checks whether THING is a byte register. Thus our result in
1384       ;; these cases could as well be :dword and :qword. I leave it as
1385       ;; :float and :double which is more likely to trigger an aver
1386       ;; instead of silently doing the wrong thing in case this
1387       ;; situation should change. Lutz Euler, 2005-10-23.
1388       (#.sb!vm::*float-sc-names*
1389        :float)
1390       (#.sb!vm::*double-sc-names*
1391        :double)
1392       (#.sb!vm::*complex-sc-names*
1393        :complex)
1394       (t
1395        (error "can't tell the size of ~S ~S" thing (sc-name (tn-sc thing))))))
1396    (ea
1397     (ea-size thing))
1398    (fixup
1399     ;; GNA.  Guess who spelt "flavor" correctly first time round?
1400     ;; There's a strong argument in my mind to change all uses of
1401     ;; "flavor" to "kind": and similarly with some misguided uses of
1402     ;; "type" here and there.  -- CSR, 2005-01-06.
1403     (case (fixup-flavor thing)
1404       ((:foreign-dataref) :qword)))
1405    (t
1406     nil)))
1407
1408(defun matching-operand-size (dst src)
1409  (let ((dst-size (operand-size dst))
1410        (src-size (operand-size src)))
1411    (if dst-size
1412        (if src-size
1413            (if (eq dst-size src-size)
1414                dst-size
1415                (error "size mismatch: ~S is a ~S and ~S is a ~S."
1416                       dst dst-size src src-size))
1417            dst-size)
1418        (if src-size
1419            src-size
1420            (error "can't tell the size of either ~S or ~S" dst src)))))
1421
1422;;; Except in a very few cases (MOV instructions A1, A3 and B8 - BF)
1423;;; we expect dword data bytes even when 64 bit work is being done.
1424;;; But A1 and A3 are currently unused and B8 - BF use EMIT-QWORD
1425;;; directly, so we emit all quad constants as dwords, additionally
1426;;; making sure that they survive the sign-extension to 64 bits
1427;;; unchanged.
1428(defun emit-sized-immediate (segment size value)
1429  (ecase size
1430    (:byte
1431     (emit-byte segment value))
1432    (:word
1433     (emit-word segment value))
1434    (:dword
1435     (emit-dword segment value))
1436    (:qword
1437     (emit-signed-dword segment value))))
1438
1439;;;; prefixes
1440
1441(define-instruction rex (segment)
1442  (:printer rex () nil :print-name nil))
1443
1444(define-instruction x66 (segment)
1445  (:printer x66 () nil :print-name nil))
1446
1447(defun emit-prefix (segment name)
1448  (declare (ignorable segment))
1449  (ecase name
1450    ((nil))
1451    (:lock
1452     #!+sb-thread
1453     (emit-byte segment #xf0))))
1454
1455(define-instruction lock (segment)
1456  (:printer byte ((op #b11110000)) nil))
1457
1458(define-instruction rep (segment)
1459  (:emitter
1460   (emit-byte segment #b11110011)))
1461
1462(define-instruction repe (segment)
1463  (:printer byte ((op #b11110011)) nil)
1464  (:emitter
1465   (emit-byte segment #b11110011)))
1466
1467(define-instruction repne (segment)
1468  (:printer byte ((op #b11110010)) nil)
1469  (:emitter
1470   (emit-byte segment #b11110010)))
1471
1472;;;; general data transfer
1473
1474;;; This is the part of the MOV instruction emitter that does moving
1475;;; of an immediate value into a qword register. We go to some length
1476;;; to achieve the shortest possible encoding.
1477(defun emit-immediate-move-to-qword-register (segment dst src)
1478  (declare (type integer src))
1479  (cond ((typep src '(unsigned-byte 32))
1480         ;; We use the B8 - BF encoding with an operand size of 32 bits
1481         ;; here and let the implicit zero-extension fill the upper half
1482         ;; of the 64-bit destination register. Instruction size: five
1483         ;; or six bytes. (A REX prefix will be emitted only if the
1484         ;; destination is an extended register.)
1485         (maybe-emit-rex-prefix segment :dword nil nil dst)
1486         (emit-byte-with-reg segment #b10111 (reg-tn-encoding dst))
1487         (emit-dword segment src))
1488        (t
1489         (maybe-emit-rex-prefix segment :qword nil nil dst)
1490         (cond ((typep src '(signed-byte 32))
1491                ;; Use the C7 encoding that takes a 32-bit immediate and
1492                ;; sign-extends it to 64 bits. Instruction size: seven
1493                ;; bytes.
1494                (emit-byte segment #b11000111)
1495                (emit-mod-reg-r/m-byte segment #b11 #b000
1496                                       (reg-tn-encoding dst))
1497                (emit-signed-dword segment src))
1498               ((<= (- (expt 2 64) (expt 2 31))
1499                    src
1500                    (1- (expt 2 64)))
1501                ;; This triggers on positive integers of 64 bits length
1502                ;; with the most significant 33 bits being 1. We use the
1503                ;; same encoding as in the previous clause.
1504                (emit-byte segment #b11000111)
1505                (emit-mod-reg-r/m-byte segment #b11 #b000
1506                                       (reg-tn-encoding dst))
1507                (emit-signed-dword segment (- src (expt 2 64))))
1508               (t
1509                ;; We need a full 64-bit immediate. Instruction size:
1510                ;; ten bytes.
1511                (emit-byte-with-reg segment #b10111 (reg-tn-encoding dst))
1512                (emit-qword segment src))))))
1513
1514(define-instruction mov (segment dst src)
1515  ;; immediate to register
1516  (:printer reg ((op #b1011 :prefilter (lambda (dstate value)
1517                                         (dstate-put-inst-prop dstate +allow-qword-imm+)
1518                                         value))
1519                 (imm nil :type 'signed-imm-data/asm-routine))
1520            '(:name :tab reg ", " imm))
1521  ;; absolute mem to/from accumulator
1522  (:printer simple-dir ((op #b101000) (imm nil :type 'imm-addr))
1523            `(:name :tab ,(swap-if 'dir 'accum ", " '("[" imm "]"))))
1524  ;; register to/from register/memory
1525  (:printer reg-reg/mem-dir ((op #b100010)))
1526  ;; immediate to register/memory
1527  (:printer reg/mem-imm/asm-routine ((op '(#b1100011 #b000))))
1528
1529  (:emitter
1530   (let ((size (matching-operand-size dst src)))
1531     (maybe-emit-operand-size-prefix segment size)
1532     (cond ((register-p dst)
1533            (cond ((integerp src)
1534                   (cond ((eq size :qword)
1535                          (emit-immediate-move-to-qword-register segment
1536                                                                 dst src))
1537                         (t
1538                          (maybe-emit-rex-prefix segment size nil nil dst)
1539                          (emit-byte-with-reg segment
1540                                              (if (eq size :byte)
1541                                                  #b10110
1542                                                  #b10111)
1543                                              (reg-tn-encoding dst))
1544                          (emit-sized-immediate segment size src))))
1545                  ((and (fixup-p src)
1546                        (member (fixup-flavor src)
1547                                '(:static-call :foreign :assembly-routine)))
1548                   (maybe-emit-rex-prefix segment :dword nil nil dst)
1549                   (emit-byte-with-reg segment #b10111 (reg-tn-encoding dst))
1550                   (emit-absolute-fixup segment src))
1551                  (t
1552                   (maybe-emit-rex-for-ea segment src dst)
1553                   (emit-byte segment
1554                              (if (eq size :byte)
1555                                  #b10001010
1556                                  #b10001011))
1557                   (emit-ea segment src (reg-tn-encoding dst)
1558                            :allow-constants t))))
1559           ((integerp src)
1560            ;; C7 only deals with 32 bit immediates even if the
1561            ;; destination is a 64-bit location. The value is
1562            ;; sign-extended in this case.
1563            (maybe-emit-rex-for-ea segment dst nil)
1564            (emit-byte segment (if (eq size :byte) #b11000110 #b11000111))
1565            (emit-ea segment dst #b000)
1566            (emit-sized-immediate segment size src))
1567           ((register-p src)
1568            (maybe-emit-rex-for-ea segment dst src)
1569            (emit-byte segment (if (eq size :byte) #b10001000 #b10001001))
1570            (emit-ea segment dst (reg-tn-encoding src)))
1571           ((fixup-p src)
1572            ;; Generally we can't MOV a fixupped value into an EA, since
1573            ;; MOV on non-registers can only take a 32-bit immediate arg.
1574            ;; Make an exception for :FOREIGN fixups (pretty much just
1575            ;; the runtime asm, since other foreign calls go through the
1576            ;; the linkage table) and for linkage table references, since
1577            ;; these should always end up in low memory.
1578            (aver (or (member (fixup-flavor src)
1579                              '(:foreign :foreign-dataref :symbol-tls-index
1580                                :assembly-routine))
1581                      (eq (ea-size dst) :dword)))
1582            (maybe-emit-rex-for-ea segment dst nil)
1583            (emit-byte segment #b11000111)
1584            (emit-ea segment dst #b000)
1585            (emit-absolute-fixup segment src))
1586           (t
1587            (error "bogus arguments to MOV: ~S ~S" dst src))))))
1588
1589;;; Emit a sign-extending (if SIGNED-P is true) or zero-extending move.
1590;;; To achieve the shortest possible encoding zero extensions into a
1591;;; 64-bit destination are assembled as a straight 32-bit MOV (if the
1592;;; source size is 32 bits) or as MOVZX with a 32-bit destination (if
1593;;; the source size is 8 or 16 bits). Due to the implicit zero extension
1594;;; to 64 bits this has the same effect as a MOVZX with 64-bit
1595;;; destination but often needs no REX prefix.
1596(defun emit-move-with-extension (segment dst src signed-p)
1597  (aver (register-p dst))
1598  (let ((dst-size (operand-size dst))
1599        (src-size (operand-size src))
1600        (opcode (if signed-p #b10111110 #b10110110)))
1601    (macrolet ((emitter (operand-size &rest bytes)
1602                 `(progn
1603                   (maybe-emit-rex-for-ea segment src dst
1604                                          :operand-size ,operand-size)
1605                   ,@(mapcar (lambda (byte)
1606                               `(emit-byte segment ,byte))
1607                             bytes)
1608                   (emit-ea segment src (reg-tn-encoding dst)))))
1609      (ecase dst-size
1610        (:word
1611         (aver (eq src-size :byte))
1612         (maybe-emit-operand-size-prefix segment :word)
1613         (emitter :word #b00001111 opcode))
1614        ((:dword :qword)
1615         (unless signed-p
1616           (setf dst-size :dword))
1617         (ecase src-size
1618           (:byte
1619            (emitter dst-size #b00001111 opcode))
1620           (:word
1621            (emitter dst-size #b00001111 (logior opcode 1)))
1622           (:dword
1623            (aver (or (not signed-p) (eq dst-size :qword)))
1624            (emitter dst-size
1625                     (if signed-p #x63 #x8b))))))))) ; movsxd or straight mov
1626
1627;; MOV[SZ]X - #x66 or REX selects the destination REG size, wherein :byte isn't
1628;; a possibility.  The 'width' bit selects a source r/m size of :byte or :word.
1629(define-instruction-format
1630    (move-with-extension 24 :include ext-reg-reg/mem
1631     :default-printer
1632     '(:name :tab reg ", "
1633       (:cond ((width :constant 0) (:using #'print-sized-byte-reg/mem reg/mem))
1634              (t (:using #'print-sized-word-reg/mem reg/mem)))))
1635  (width :prefilter nil)) ; doesn't affect DSTATE
1636
1637(define-instruction movsx (segment dst src)
1638  (:printer move-with-extension ((op #b1011111)))
1639  (:emitter (emit-move-with-extension segment dst src :signed)))
1640
1641(define-instruction movzx (segment dst src)
1642  (:printer move-with-extension ((op #b1011011)))
1643  (:emitter (emit-move-with-extension segment dst src nil)))
1644
1645;;; The regular use of MOVSXD is with an operand size of :qword. This
1646;;; sign-extends the dword source into the qword destination register.
1647;;; If the operand size is :dword the instruction zero-extends the dword
1648;;; source into the qword destination register, i.e. it does the same as
1649;;; a dword MOV into a register.
1650(define-instruction movsxd (segment dst src)
1651  (:printer reg-reg/mem ((op #b0110001) (width 1)
1652                         (reg/mem nil :type 'sized-dword-reg/mem)))
1653  (:emitter (emit-move-with-extension segment dst src :signed)))
1654
1655;;; this is not a real amd64 instruction, of course
1656(define-instruction movzxd (segment dst src)
1657  ; (:printer reg-reg/mem ((op #x63) (reg nil :type 'reg)))
1658  (:emitter (emit-move-with-extension segment dst src nil)))
1659
1660(define-instruction push (segment src)
1661  ;; register
1662  (:printer reg-no-width-default-qword ((op #b01010)))
1663  ;; register/memory
1664  (:printer reg/mem-default-qword ((op '(#b11111111 #b110))))
1665  ;; immediate
1666  (:printer byte ((op #b01101010) (imm nil :type 'signed-imm-byte))
1667            '(:name :tab imm))
1668  (:printer byte ((op #b01101000)
1669                  (imm nil :type 'signed-imm-data-default-qword))
1670            '(:name :tab imm))
1671  ;; ### segment registers?
1672
1673  (:emitter
1674   (cond ((integerp src)
1675          (cond ((<= -128 src 127)
1676                 (emit-byte segment #b01101010)
1677                 (emit-byte segment src))
1678                (t
1679                 ;; A REX-prefix is not needed because the operand size
1680                 ;; defaults to 64 bits. The size of the immediate is 32
1681                 ;; bits and it is sign-extended.
1682                 (emit-byte segment #b01101000)
1683                 (emit-signed-dword segment src))))
1684         (t
1685          (let ((size (operand-size src)))
1686            (aver (or (eq size :qword) (eq size :word)))
1687            (maybe-emit-operand-size-prefix segment size)
1688            (maybe-emit-rex-for-ea segment src nil :operand-size :do-not-set)
1689            (cond ((register-p src)
1690                   (emit-byte-with-reg segment #b01010 (reg-tn-encoding src)))
1691                  (t
1692                   (emit-byte segment #b11111111)
1693                   (emit-ea segment src #b110 :allow-constants t))))))))
1694
1695(define-instruction pop (segment dst)
1696  (:printer reg-no-width-default-qword ((op #b01011)))
1697  (:printer reg/mem-default-qword ((op '(#b10001111 #b000))))
1698  (:emitter
1699   (let ((size (operand-size dst)))
1700     (aver (or (eq size :qword) (eq size :word)))
1701     (maybe-emit-operand-size-prefix segment size)
1702     (maybe-emit-rex-for-ea segment dst nil :operand-size :do-not-set)
1703     (cond ((register-p dst)
1704            (emit-byte-with-reg segment #b01011 (reg-tn-encoding dst)))
1705           (t
1706            (emit-byte segment #b10001111)
1707            (emit-ea segment dst #b000))))))
1708
1709;;; Compared to x86 we need to take two particularities into account
1710;;; here:
1711;;; * XCHG EAX, EAX can't be encoded as #x90 as the processor interprets
1712;;;   that opcode as NOP while XCHG EAX, EAX is specified to clear the
1713;;;   upper half of RAX. We need to use the long form #x87 #xC0 instead.
1714;;; * The opcode #x90 is not only used for NOP and XCHG RAX, RAX and
1715;;;   XCHG AX, AX, but also for XCHG RAX, R8 (and the corresponding 32-
1716;;;   and 16-bit versions). The printer for the NOP instruction (further
1717;;;   below) matches all these encodings so needs to be overridden here
1718;;;   for the cases that need to print as XCHG.
1719;;; Assembler and disassembler chained then map these special cases as
1720;;; follows:
1721;;;   (INST NOP)                 ->  90      ->  NOP
1722;;;   (INST XCHG RAX-TN RAX-TN)  ->  4890    ->  NOP
1723;;;   (INST XCHG EAX-TN EAX-TN)  ->  87C0    ->  XCHG EAX, EAX
1724;;;   (INST XCHG AX-TN AX-TN)    ->  6690    ->  NOP
1725;;;   (INST XCHG RAX-TN R8-TN)   ->  4990    ->  XCHG RAX, R8
1726;;;   (INST XCHG EAX-TN R8D-TN)  ->  4190    ->  XCHG EAX, R8D
1727;;;   (INST XCHG AX-TN R8W-TN)   ->  664190  ->  XCHG AX, R8W
1728;;; The disassembler additionally correctly matches encoding variants
1729;;; that the assembler doesn't generate, for example 4E90 prints as NOP
1730;;; and 4F90 as XCHG RAX, R8 (both because REX.R and REX.X are ignored).
1731(define-instruction xchg (segment operand1 operand2)
1732  ;; This printer matches all patterns that encode exchanging RAX with
1733  ;; R8, EAX with R8D, or AX with R8W. These consist of the opcode #x90
1734  ;; with a REX prefix with REX.B = 1, and possibly the #x66 prefix.
1735  ;; We rely on the prefix automatism for the #x66 prefix, but
1736  ;; explicitly match the REX prefix as we need to provide a value for
1737  ;; REX.B, and to override the NOP printer by virtue of a longer match.
1738  (:printer rex-accum-reg ((rex-b 1) (op #b10010) (reg #b000)))
1739  ;; Register with accumulator.
1740  (:printer reg-no-width ((op #b10010)) '(:name :tab accum ", " reg))
1741  ;; Register/Memory with Register.
1742  (:printer reg-reg/mem ((op #b1000011)))
1743  (:emitter
1744   (let ((size (matching-operand-size operand1 operand2)))
1745     (maybe-emit-operand-size-prefix segment size)
1746     (labels ((xchg-acc-with-something (acc something)
1747                (if (and (not (eq size :byte))
1748                         (register-p something)
1749                         ;; Don't use the short encoding for XCHG EAX, EAX:
1750                         (not (and (= (tn-offset something) sb!vm::eax-offset)
1751                                   (eq size :dword))))
1752                    (progn
1753                      (maybe-emit-rex-for-ea segment something acc)
1754                      (emit-byte-with-reg segment
1755                                          #b10010
1756                                          (reg-tn-encoding something)))
1757                    (xchg-reg-with-something acc something)))
1758              (xchg-reg-with-something (reg something)
1759                (maybe-emit-rex-for-ea segment something reg)
1760                (emit-byte segment (if (eq size :byte) #b10000110 #b10000111))
1761                (emit-ea segment something (reg-tn-encoding reg))))
1762       (cond ((accumulator-p operand1)
1763              (xchg-acc-with-something operand1 operand2))
1764             ((accumulator-p operand2)
1765              (xchg-acc-with-something operand2 operand1))
1766             ((register-p operand1)
1767              (xchg-reg-with-something operand1 operand2))
1768             ((register-p operand2)
1769              (xchg-reg-with-something operand2 operand1))
1770             (t
1771              (error "bogus args to XCHG: ~S ~S" operand1 operand2)))))))
1772
1773(define-instruction lea (segment dst src)
1774  (:printer
1775   reg-reg/mem
1776   ((op #b1000110) (width 1)
1777    (reg/mem nil :use-label #'lea-compute-label :printer #'lea-print-ea)))
1778  (:emitter
1779   (aver (or (dword-reg-p dst) (qword-reg-p dst)))
1780   (maybe-emit-rex-for-ea segment src dst
1781                          :operand-size (if (dword-reg-p dst) :dword :qword))
1782   (emit-byte segment #b10001101)
1783   (emit-ea segment src (reg-tn-encoding dst))))
1784
1785(define-instruction cmpxchg (segment dst src &optional prefix)
1786  ;; Register/Memory with Register.
1787  (:printer ext-reg-reg/mem ((op #b1011000)) '(:name :tab reg/mem ", " reg))
1788  (:emitter
1789   (aver (register-p src))
1790   (emit-prefix segment prefix)
1791   (let ((size (matching-operand-size src dst)))
1792     (maybe-emit-operand-size-prefix segment size)
1793     (maybe-emit-rex-for-ea segment dst src)
1794     (emit-byte segment #b00001111)
1795     (emit-byte segment (if (eq size :byte) #b10110000 #b10110001))
1796     (emit-ea segment dst (reg-tn-encoding src)))))
1797
1798(define-instruction cmpxchg16b (segment mem &optional prefix)
1799  (:printer ext-reg/mem-no-width
1800            ((op '(#xC7 1))))
1801  (:emitter
1802   (aver (not (register-p mem)))
1803   (emit-prefix segment prefix)
1804   (maybe-emit-rex-for-ea segment mem nil :operand-size :qword)
1805   (emit-byte segment #x0F)
1806   (emit-byte segment #xC7)
1807   (emit-ea segment mem 1))) ; operand extension
1808
1809(define-instruction rdrand (segment dst)
1810  (:printer ext-reg/mem-no-width
1811            ((op '(#xC7 6))))
1812  (:emitter
1813   (aver (register-p dst))
1814   (maybe-emit-operand-size-prefix segment (operand-size dst))
1815   (maybe-emit-rex-for-ea segment dst nil)
1816   (emit-byte segment #x0F)
1817   (emit-byte segment #xC7)
1818   (emit-ea segment dst 6)))
1819
1820;;;; flag control instructions
1821
1822;;; CLC -- Clear Carry Flag.
1823(define-instruction clc (segment)
1824  (:printer byte ((op #b11111000)))
1825  (:emitter
1826   (emit-byte segment #b11111000)))
1827
1828;;; CLD -- Clear Direction Flag.
1829(define-instruction cld (segment)
1830  (:printer byte ((op #b11111100)))
1831  (:emitter
1832   (emit-byte segment #b11111100)))
1833
1834;;; CLI -- Clear Iterrupt Enable Flag.
1835(define-instruction cli (segment)
1836  (:printer byte ((op #b11111010)))
1837  (:emitter
1838   (emit-byte segment #b11111010)))
1839
1840;;; CMC -- Complement Carry Flag.
1841(define-instruction cmc (segment)
1842  (:printer byte ((op #b11110101)))
1843  (:emitter
1844   (emit-byte segment #b11110101)))
1845
1846;;; LAHF -- Load AH into flags.
1847(define-instruction lahf (segment)
1848  (:printer byte ((op #b10011111)))
1849  (:emitter
1850   (emit-byte segment #b10011111)))
1851
1852;;; POPF -- Pop flags.
1853(define-instruction popf (segment)
1854  (:printer byte ((op #b10011101)))
1855  (:emitter
1856   (emit-byte segment #b10011101)))
1857
1858;;; PUSHF -- push flags.
1859(define-instruction pushf (segment)
1860  (:printer byte ((op #b10011100)))
1861  (:emitter
1862   (emit-byte segment #b10011100)))
1863
1864;;; SAHF -- Store AH into flags.
1865(define-instruction sahf (segment)
1866  (:printer byte ((op #b10011110)))
1867  (:emitter
1868   (emit-byte segment #b10011110)))
1869
1870;;; STC -- Set Carry Flag.
1871(define-instruction stc (segment)
1872  (:printer byte ((op #b11111001)))
1873  (:emitter
1874   (emit-byte segment #b11111001)))
1875
1876;;; STD -- Set Direction Flag.
1877(define-instruction std (segment)
1878  (:printer byte ((op #b11111101)))
1879  (:emitter
1880   (emit-byte segment #b11111101)))
1881
1882;;; STI -- Set Interrupt Enable Flag.
1883(define-instruction sti (segment)
1884  (:printer byte ((op #b11111011)))
1885  (:emitter
1886   (emit-byte segment #b11111011)))
1887
1888;;;; arithmetic
1889
1890(defun emit-random-arith-inst (name segment dst src opcode
1891                                    &optional allow-constants)
1892  (let ((size (matching-operand-size dst src)))
1893    (maybe-emit-operand-size-prefix segment size)
1894    (cond
1895     ((integerp src)
1896      (cond ((and (not (eq size :byte)) (<= -128 src 127))
1897             (maybe-emit-rex-for-ea segment dst nil)
1898             (emit-byte segment #b10000011)
1899             (emit-ea segment dst opcode :allow-constants allow-constants)
1900             (emit-byte segment src))
1901            ((accumulator-p dst)
1902             (maybe-emit-rex-for-ea segment dst nil)
1903             (emit-byte segment
1904                        (dpb opcode
1905                             (byte 3 3)
1906                             (if (eq size :byte)
1907                                 #b00000100
1908                                 #b00000101)))
1909             (emit-sized-immediate segment size src))
1910            (t
1911             (maybe-emit-rex-for-ea segment dst nil)
1912             (emit-byte segment (if (eq size :byte) #b10000000 #b10000001))
1913             (emit-ea segment dst opcode :allow-constants allow-constants)
1914             (emit-sized-immediate segment size src))))
1915     ((register-p src)
1916      (maybe-emit-rex-for-ea segment dst src)
1917      (emit-byte segment
1918                 (dpb opcode
1919                      (byte 3 3)
1920                      (if (eq size :byte) #b00000000 #b00000001)))
1921      (emit-ea segment dst (reg-tn-encoding src)
1922               :allow-constants allow-constants))
1923     ((register-p dst)
1924      (maybe-emit-rex-for-ea segment src dst)
1925      (emit-byte segment
1926                 (dpb opcode
1927                      (byte 3 3)
1928                      (if (eq size :byte) #b00000010 #b00000011)))
1929      (emit-ea segment src (reg-tn-encoding dst)
1930               :allow-constants allow-constants))
1931     (t
1932      (error "bogus operands to ~A" name)))))
1933
1934(macrolet ((define (name subop &optional allow-constants)
1935             `(define-instruction ,name (segment dst src &optional prefix)
1936                (:printer accum-imm ((op ,(dpb subop (byte 3 2) #b0000010))))
1937                (:printer reg/mem-imm ((op '(#b1000000 ,subop))))
1938                ;; The redundant encoding #x82 is invalid in 64-bit mode,
1939                ;; therefore we force WIDTH to 1.
1940                (:printer reg/mem-imm ((op '(#b1000001 ,subop)) (width 1)
1941                                       (imm nil :type 'signed-imm-byte)))
1942                (:printer reg-reg/mem-dir ((op ,(dpb subop (byte 3 1) #b000000))))
1943                (:emitter
1944                 (emit-prefix segment prefix)
1945                 (emit-random-arith-inst ,(string name) segment dst src ,subop
1946                                         ,allow-constants)))))
1947  (define add #b000)
1948  (define adc #b010)
1949  (define sub #b101)
1950  (define sbb #b011)
1951  (define cmp #b111 t)
1952  (define and #b100)
1953  (define or  #b001)
1954  (define xor #b110))
1955
1956;;; The one-byte encodings for INC and DEC are used as REX prefixes
1957;;; in 64-bit mode so we always use the two-byte form.
1958(define-instruction inc (segment dst &optional prefix)
1959  (:printer reg/mem ((op '(#b1111111 #b000))))
1960  (:emitter
1961   (emit-prefix segment prefix)
1962   (let ((size (operand-size dst)))
1963     (maybe-emit-operand-size-prefix segment size)
1964     (maybe-emit-rex-for-ea segment dst nil)
1965     (emit-byte segment (if (eq size :byte) #b11111110 #b11111111))
1966     (emit-ea segment dst #b000))))
1967
1968(define-instruction dec (segment dst &optional prefix)
1969  (:printer reg/mem ((op '(#b1111111 #b001))))
1970  (:emitter
1971   (emit-prefix segment prefix)
1972   (let ((size (operand-size dst)))
1973     (maybe-emit-operand-size-prefix segment size)
1974     (maybe-emit-rex-for-ea segment dst nil)
1975     (emit-byte segment (if (eq size :byte) #b11111110 #b11111111))
1976     (emit-ea segment dst #b001))))
1977
1978(define-instruction neg (segment dst)
1979  (:printer reg/mem ((op '(#b1111011 #b011))))
1980  (:emitter
1981   (let ((size (operand-size dst)))
1982     (maybe-emit-operand-size-prefix segment size)
1983     (maybe-emit-rex-for-ea segment dst nil)
1984     (emit-byte segment (if (eq size :byte) #b11110110 #b11110111))
1985     (emit-ea segment dst #b011))))
1986
1987(define-instruction mul (segment dst src)
1988  (:printer accum-reg/mem ((op '(#b1111011 #b100))))
1989  (:emitter
1990   (let ((size (matching-operand-size dst src)))
1991     (aver (accumulator-p dst))
1992     (maybe-emit-operand-size-prefix segment size)
1993     (maybe-emit-rex-for-ea segment src nil)
1994     (emit-byte segment (if (eq size :byte) #b11110110 #b11110111))
1995     (emit-ea segment src #b100))))
1996
1997(define-instruction imul (segment dst &optional src1 src2)
1998  (:printer accum-reg/mem ((op '(#b1111011 #b101))))
1999  (:printer ext-reg-reg/mem-no-width ((op #b10101111)))
2000  ;; These next two are like a single format where one bit in the opcode byte
2001  ;; determines the size of the immediate datum. A REG-REG/MEM-IMM format
2002  ;; would save one entry in the decoding table, since that bit would become
2003  ;; "don't care" from a decoding perspective, but we don't have (many) other
2004  ;; 3-operand opcodes in the general purpose (non-SSE) opcode space.
2005  (:printer reg-reg/mem ((op #b0110100) (width 1)
2006                         (imm nil :type 'signed-imm-data))
2007            '(:name :tab reg ", " reg/mem ", " imm))
2008  (:printer reg-reg/mem ((op #b0110101) (width 1)
2009                         (imm nil :type 'signed-imm-byte))
2010            '(:name :tab reg ", " reg/mem ", " imm))
2011  (:emitter
2012   (flet ((r/m-with-immed-to-reg (reg r/m immed)
2013            (let* ((size (matching-operand-size reg r/m))
2014                   (sx (and (not (eq size :byte)) (<= -128 immed 127))))
2015              (maybe-emit-operand-size-prefix segment size)
2016              (maybe-emit-rex-for-ea segment r/m reg)
2017              (emit-byte segment (if sx #b01101011 #b01101001))
2018              (emit-ea segment r/m (reg-tn-encoding reg))
2019              (if sx
2020                  (emit-byte segment immed)
2021                  (emit-sized-immediate segment size immed)))))
2022     (cond (src2
2023            (r/m-with-immed-to-reg dst src1 src2))
2024           (src1
2025            (if (integerp src1)
2026                (r/m-with-immed-to-reg dst dst src1)
2027                (let ((size (matching-operand-size dst src1)))
2028                  (maybe-emit-operand-size-prefix segment size)
2029                  (maybe-emit-rex-for-ea segment src1 dst)
2030                  (emit-byte segment #b00001111)
2031                  (emit-byte segment #b10101111)
2032                  (emit-ea segment src1 (reg-tn-encoding dst)))))
2033           (t
2034            (let ((size (operand-size dst)))
2035              (maybe-emit-operand-size-prefix segment size)
2036              (maybe-emit-rex-for-ea segment dst nil)
2037              (emit-byte segment (if (eq size :byte) #b11110110 #b11110111))
2038              (emit-ea segment dst #b101)))))))
2039
2040(define-instruction div (segment dst src)
2041  (:printer accum-reg/mem ((op '(#b1111011 #b110))))
2042  (:emitter
2043   (let ((size (matching-operand-size dst src)))
2044     (aver (accumulator-p dst))
2045     (maybe-emit-operand-size-prefix segment size)
2046     (maybe-emit-rex-for-ea segment src nil)
2047     (emit-byte segment (if (eq size :byte) #b11110110 #b11110111))
2048     (emit-ea segment src #b110))))
2049
2050(define-instruction idiv (segment dst src)
2051  (:printer accum-reg/mem ((op '(#b1111011 #b111))))
2052  (:emitter
2053   (let ((size (matching-operand-size dst src)))
2054     (aver (accumulator-p dst))
2055     (maybe-emit-operand-size-prefix segment size)
2056     (maybe-emit-rex-for-ea segment src nil)
2057     (emit-byte segment (if (eq size :byte) #b11110110 #b11110111))
2058     (emit-ea segment src #b111))))
2059
2060(define-instruction bswap (segment dst)
2061  (:printer ext-reg-no-width ((op #b11001)))
2062  (:emitter
2063   (let ((size (operand-size dst)))
2064     (maybe-emit-rex-prefix segment size nil nil dst)
2065     (emit-byte segment #x0f)
2066     (emit-byte-with-reg segment #b11001 (reg-tn-encoding dst)))))
2067
2068;;; CBW -- Convert Byte to Word. AX <- sign_xtnd(AL)
2069(define-instruction cbw (segment)
2070  (:printer x66-byte ((op #b10011000)))
2071  (:emitter
2072   (maybe-emit-operand-size-prefix segment :word)
2073   (emit-byte segment #b10011000)))
2074
2075;;; CWDE -- Convert Word To Double Word Extended. EAX <- sign_xtnd(AX)
2076(define-instruction cwde (segment)
2077  (:printer byte ((op #b10011000)))
2078  (:emitter
2079   (maybe-emit-operand-size-prefix segment :dword)
2080   (emit-byte segment #b10011000)))
2081
2082;;; CDQE -- Convert Double Word To Quad Word Extended. RAX <- sign_xtnd(EAX)
2083(define-instruction cdqe (segment)
2084  (:printer rex-byte ((op #b10011000)))
2085  (:emitter
2086   (maybe-emit-rex-prefix segment :qword nil nil nil)
2087   (emit-byte segment #b10011000)))
2088
2089;;; CWD -- Convert Word to Double Word. DX:AX <- sign_xtnd(AX)
2090(define-instruction cwd (segment)
2091  (:printer x66-byte ((op #b10011001)))
2092  (:emitter
2093   (maybe-emit-operand-size-prefix segment :word)
2094   (emit-byte segment #b10011001)))
2095
2096;;; CDQ -- Convert Double Word to Quad Word. EDX:EAX <- sign_xtnd(EAX)
2097(define-instruction cdq (segment)
2098  (:printer byte ((op #b10011001)))
2099  (:emitter
2100   (maybe-emit-operand-size-prefix segment :dword)
2101   (emit-byte segment #b10011001)))
2102
2103;;; CQO -- Convert Quad Word to Octaword. RDX:RAX <- sign_xtnd(RAX)
2104(define-instruction cqo (segment)
2105  (:printer rex-byte ((op #b10011001)))
2106  (:emitter
2107   (maybe-emit-rex-prefix segment :qword nil nil nil)
2108   (emit-byte segment #b10011001)))
2109
2110(define-instruction xadd (segment dst src &optional prefix)
2111  ;; Register/Memory with Register.
2112  (:printer ext-reg-reg/mem ((op #b1100000)) '(:name :tab reg/mem ", " reg))
2113  (:emitter
2114   (aver (register-p src))
2115   (emit-prefix segment prefix)
2116   (let ((size (matching-operand-size src dst)))
2117     (maybe-emit-operand-size-prefix segment size)
2118     (maybe-emit-rex-for-ea segment dst src)
2119     (emit-byte segment #b00001111)
2120     (emit-byte segment (if (eq size :byte) #b11000000 #b11000001))
2121     (emit-ea segment dst (reg-tn-encoding src)))))
2122
2123
2124;;;; logic
2125
2126(defun emit-shift-inst (segment dst amount opcode)
2127  (let ((size (operand-size dst)))
2128    (maybe-emit-operand-size-prefix segment size)
2129    (multiple-value-bind (major-opcode immed)
2130        (case amount
2131          (:cl (values #b11010010 nil))
2132          (1 (values #b11010000 nil))
2133          (t (values #b11000000 t)))
2134      (maybe-emit-rex-for-ea segment dst nil)
2135      (emit-byte segment
2136                 (if (eq size :byte) major-opcode (logior major-opcode 1)))
2137      (emit-ea segment dst opcode)
2138      (when immed
2139        (emit-byte segment amount)))))
2140
2141(define-instruction-format
2142    (shift-inst 16 :include reg/mem
2143     :default-printer '(:name :tab reg/mem ", " (:if (varying :positive) 'cl 1)))
2144  (op :fields (list (byte 6 2) (byte 3 11)))
2145  (varying :field (byte 1 1)))
2146
2147(macrolet ((define (name subop)
2148             `(define-instruction ,name (segment dst amount)
2149                (:printer shift-inst ((op '(#b110100 ,subop)))) ; shift by CL or 1
2150                (:printer reg/mem-imm ((op '(#b1100000 ,subop))
2151                                       (imm nil :type 'imm-byte)))
2152                (:emitter (emit-shift-inst segment dst amount ,subop)))))
2153  (define rol #b000)
2154  (define ror #b001)
2155  (define rcl #b010)
2156  (define rcr #b011)
2157  (define shl #b100)
2158  (define shr #b101)
2159  (define sar #b111))
2160
2161(defun emit-double-shift (segment opcode dst src amt)
2162  (let ((size (matching-operand-size dst src)))
2163    (when (eq size :byte)
2164      (error "Double shifts can only be used with words."))
2165    (maybe-emit-operand-size-prefix segment size)
2166    (maybe-emit-rex-for-ea segment dst src)
2167    (emit-byte segment #b00001111)
2168    (emit-byte segment (dpb opcode (byte 1 3)
2169                            (if (eq amt :cl) #b10100101 #b10100100)))
2170    (emit-ea segment dst (reg-tn-encoding src))
2171    (unless (eq amt :cl)
2172      (emit-byte segment amt))))
2173
2174(macrolet ((define (name direction-bit op)
2175             `(define-instruction ,name (segment dst src amt)
2176                (:declare (type (or (member :cl) (mod 32)) amt))
2177                (:printer ext-reg-reg/mem-no-width ((op ,(logior op #b100))
2178                                                    (imm nil :type 'imm-byte))
2179                          '(:name :tab reg/mem ", " reg ", " imm))
2180                (:printer ext-reg-reg/mem-no-width ((op ,(logior op #b101)))
2181                          '(:name :tab reg/mem ", " reg ", " 'cl))
2182                (:emitter
2183                 (emit-double-shift segment ,direction-bit dst src amt)))))
2184  (define shld 0 #b10100000)
2185  (define shrd 1 #b10101000))
2186
2187(define-instruction test (segment this that)
2188  (:printer accum-imm ((op #b1010100)))
2189  (:printer reg/mem-imm ((op '(#b1111011 #b000))))
2190  (:printer reg-reg/mem ((op #b1000010)))
2191  (:emitter
2192   (let ((size (matching-operand-size this that)))
2193     (maybe-emit-operand-size-prefix segment size)
2194     (flet ((test-immed-and-something (immed something)
2195              (cond ((accumulator-p something)
2196                     (maybe-emit-rex-for-ea segment something nil)
2197                     (emit-byte segment
2198                                (if (eq size :byte) #b10101000 #b10101001))
2199                     (emit-sized-immediate segment size immed))
2200                    (t
2201                     (maybe-emit-rex-for-ea segment something nil)
2202                     (emit-byte segment
2203                                (if (eq size :byte) #b11110110 #b11110111))
2204                     (emit-ea segment something #b000)
2205                     (emit-sized-immediate segment size immed))))
2206            (test-reg-and-something (reg something)
2207              (maybe-emit-rex-for-ea segment something reg)
2208              (emit-byte segment (if (eq size :byte) #b10000100 #b10000101))
2209              (emit-ea segment something (reg-tn-encoding reg))))
2210       (cond ((integerp that)
2211              (test-immed-and-something that this))
2212             ((integerp this)
2213              (test-immed-and-something this that))
2214             ((register-p this)
2215              (test-reg-and-something this that))
2216             ((register-p that)
2217              (test-reg-and-something that this))
2218             (t
2219              (error "bogus operands for TEST: ~S and ~S" this that)))))))
2220
2221(define-instruction not (segment dst)
2222  (:printer reg/mem ((op '(#b1111011 #b010))))
2223  (:emitter
2224   (let ((size (operand-size dst)))
2225     (maybe-emit-operand-size-prefix segment size)
2226     (maybe-emit-rex-for-ea segment dst nil)
2227     (emit-byte segment (if (eq size :byte) #b11110110 #b11110111))
2228     (emit-ea segment dst #b010))))
2229
2230;;;; string manipulation
2231
2232(define-instruction cmps (segment size)
2233  (:printer string-op ((op #b1010011)))
2234  (:emitter
2235   (maybe-emit-operand-size-prefix segment size)
2236   (maybe-emit-rex-prefix segment size nil nil nil)
2237   (emit-byte segment (if (eq size :byte) #b10100110 #b10100111))))
2238
2239(define-instruction ins (segment acc)
2240  (:printer string-op ((op #b0110110)))
2241  (:emitter
2242   (let ((size (operand-size acc)))
2243     (aver (accumulator-p acc))
2244     (maybe-emit-operand-size-prefix segment size)
2245     (maybe-emit-rex-prefix segment size nil nil nil)
2246     (emit-byte segment (if (eq size :byte) #b01101100 #b01101101)))))
2247
2248(define-instruction lods (segment acc)
2249  (:printer string-op ((op #b1010110)))
2250  (:emitter
2251   (let ((size (operand-size acc)))
2252     (aver (accumulator-p acc))
2253     (maybe-emit-operand-size-prefix segment size)
2254     (maybe-emit-rex-prefix segment size nil nil nil)
2255     (emit-byte segment (if (eq size :byte) #b10101100 #b10101101)))))
2256
2257(define-instruction movs (segment size)
2258  (:printer string-op ((op #b1010010)))
2259  (:emitter
2260   (maybe-emit-operand-size-prefix segment size)
2261   (maybe-emit-rex-prefix segment size nil nil nil)
2262   (emit-byte segment (if (eq size :byte) #b10100100 #b10100101))))
2263
2264(define-instruction outs (segment acc)
2265  (:printer string-op ((op #b0110111)))
2266  (:emitter
2267   (let ((size (operand-size acc)))
2268     (aver (accumulator-p acc))
2269     (maybe-emit-operand-size-prefix segment size)
2270     (maybe-emit-rex-prefix segment size nil nil nil)
2271     (emit-byte segment (if (eq size :byte) #b01101110 #b01101111)))))
2272
2273(define-instruction scas (segment acc)
2274  (:printer string-op ((op #b1010111)))
2275  (:emitter
2276   (let ((size (operand-size acc)))
2277     (aver (accumulator-p acc))
2278     (maybe-emit-operand-size-prefix segment size)
2279     (maybe-emit-rex-prefix segment size nil nil nil)
2280     (emit-byte segment (if (eq size :byte) #b10101110 #b10101111)))))
2281
2282(define-instruction stos (segment acc)
2283  (:printer string-op ((op #b1010101)))
2284  (:emitter
2285   (let ((size (operand-size acc)))
2286     (aver (accumulator-p acc))
2287     (maybe-emit-operand-size-prefix segment size)
2288     (maybe-emit-rex-prefix segment size nil nil nil)
2289     (emit-byte segment (if (eq size :byte) #b10101010 #b10101011)))))
2290
2291(define-instruction xlat (segment)
2292  (:printer byte ((op #b11010111)))
2293  (:emitter
2294   (emit-byte segment #b11010111)))
2295
2296
2297;;;; bit manipulation
2298
2299(define-instruction bsf (segment dst src)
2300  (:printer ext-reg-reg/mem-no-width ((op #b10111100)))
2301  (:emitter
2302   (let ((size (matching-operand-size dst src)))
2303     (when (eq size :byte)
2304       (error "can't scan bytes: ~S" src))
2305     (maybe-emit-operand-size-prefix segment size)
2306     (maybe-emit-rex-for-ea segment src dst)
2307     (emit-byte segment #b00001111)
2308     (emit-byte segment #b10111100)
2309     (emit-ea segment src (reg-tn-encoding dst)))))
2310
2311(define-instruction bsr (segment dst src)
2312  (:printer ext-reg-reg/mem-no-width ((op #b10111101)))
2313  (:emitter
2314   (let ((size (matching-operand-size dst src)))
2315     (when (eq size :byte)
2316       (error "can't scan bytes: ~S" src))
2317     (maybe-emit-operand-size-prefix segment size)
2318     (maybe-emit-rex-for-ea segment src dst)
2319     (emit-byte segment #b00001111)
2320     (emit-byte segment #b10111101)
2321     (emit-ea segment src (reg-tn-encoding dst)))))
2322
2323(defun emit-bit-test-and-mumble (segment src index opcode)
2324  (let ((size (operand-size src)))
2325    (when (eq size :byte)
2326      (error "can't scan bytes: ~S" src))
2327    (maybe-emit-operand-size-prefix segment size)
2328    (cond ((integerp index)
2329           (maybe-emit-rex-for-ea segment src nil)
2330           (emit-byte segment #b00001111)
2331           (emit-byte segment #b10111010)
2332           (emit-ea segment src opcode)
2333           (emit-byte segment index))
2334          (t
2335           (maybe-emit-rex-for-ea segment src index)
2336           (emit-byte segment #b00001111)
2337           (emit-byte segment (dpb opcode (byte 3 3) #b10000011))
2338           (emit-ea segment src (reg-tn-encoding index))))))
2339
2340(macrolet ((define (inst opcode-extension)
2341             `(define-instruction ,inst (segment src index &optional prefix)
2342                (:printer ext-reg/mem-no-width+imm8
2343                          ((op '(#xBA ,opcode-extension))
2344                           (reg/mem nil :type 'sized-reg/mem)))
2345                (:printer ext-reg-reg/mem-no-width
2346                          ((op ,(dpb opcode-extension (byte 3 3) #b10000011))
2347                           (reg/mem nil :type 'sized-reg/mem))
2348                          '(:name :tab reg/mem ", " reg))
2349                (:emitter
2350                 (emit-prefix segment prefix)
2351                 (emit-bit-test-and-mumble segment src index
2352                                           ,opcode-extension)))))
2353  (define bt  4)
2354  (define bts 5)
2355  (define btr 6)
2356  (define btc 7))
2357
2358
2359;;;; control transfer
2360
2361(define-instruction call (segment where)
2362  (:printer near-jump ((op #b11101000)))
2363  (:printer reg/mem-default-qword ((op '(#b11111111 #b010))))
2364  (:emitter
2365   (typecase where
2366     (label
2367      (emit-byte segment #b11101000) ; 32 bit relative
2368      (emit-dword-displacement-backpatch segment where))
2369     (fixup
2370      (emit-byte segment #b11101000)
2371      (emit-relative-fixup segment where))
2372     (t
2373      (maybe-emit-rex-for-ea segment where nil :operand-size :do-not-set)
2374      (emit-byte segment #b11111111)
2375      (emit-ea segment where #b010)))))
2376
2377(define-instruction jmp (segment cond &optional where)
2378  ;; conditional jumps
2379  (:printer short-cond-jump ((op #b0111)) '('j cc :tab label))
2380  (:printer near-cond-jump () '('j cc :tab label))
2381  ;; unconditional jumps
2382  (:printer short-jump ((op #b1011)))
2383  (:printer near-jump ((op #b11101001)))
2384  (:printer reg/mem-default-qword ((op '(#b11111111 #b100))))
2385  (:emitter
2386   (cond (where
2387          (emit-chooser
2388           segment 6 2
2389           (lambda (segment posn delta-if-after)
2390             (let ((disp (- (label-position where posn delta-if-after)
2391                            (+ posn 2))))
2392               (when (<= -128 disp 127)
2393                 (emit-byte segment
2394                            (dpb (conditional-opcode cond)
2395                                 (byte 4 0)
2396                                 #b01110000))
2397                 (emit-byte-displacement-backpatch segment where)
2398                 t)))
2399           (lambda (segment posn)
2400             (let ((disp (- (label-position where) (+ posn 6))))
2401               (emit-byte segment #b00001111)
2402               (emit-byte segment
2403                          (dpb (conditional-opcode cond)
2404                               (byte 4 0)
2405                               #b10000000))
2406               (emit-signed-dword segment disp)))))
2407         ((label-p (setq where cond))
2408          (emit-chooser
2409           segment 5 0
2410           (lambda (segment posn delta-if-after)
2411             (let ((disp (- (label-position where posn delta-if-after)
2412                            (+ posn 2))))
2413               (when (<= -128 disp 127)
2414                 (emit-byte segment #b11101011)
2415                 (emit-byte-displacement-backpatch segment where)
2416                 t)))
2417           (lambda (segment posn)
2418             (let ((disp (- (label-position where) (+ posn 5))))
2419               (emit-byte segment #b11101001)
2420               (emit-signed-dword segment disp)))))
2421         ((fixup-p where)
2422          (emit-byte segment #b11101001)
2423          (emit-relative-fixup segment where))
2424         (t
2425          (unless (or (ea-p where) (tn-p where))
2426            (error "don't know what to do with ~A" where))
2427          ;; near jump defaults to 64 bit
2428          ;; w-bit in rex prefix is unnecessary
2429          (maybe-emit-rex-for-ea segment where nil :operand-size :do-not-set)
2430          (emit-byte segment #b11111111)
2431          (emit-ea segment where #b100)))))
2432
2433(define-instruction ret (segment &optional stack-delta)
2434  (:printer byte ((op #b11000011)))
2435  (:printer byte ((op #b11000010) (imm nil :type 'imm-word-16))
2436            '(:name :tab imm))
2437  (:emitter
2438   (cond ((and stack-delta (not (zerop stack-delta)))
2439          (emit-byte segment #b11000010)
2440          (emit-word segment stack-delta))
2441         (t
2442          (emit-byte segment #b11000011)))))
2443
2444(define-instruction jrcxz (segment target)
2445  (:printer short-jump ((op #b0011)))
2446  (:emitter
2447   (emit-byte segment #b11100011)
2448   (emit-byte-displacement-backpatch segment target)))
2449
2450(define-instruction loop (segment target)
2451  (:printer short-jump ((op #b0010)))
2452  (:emitter
2453   (emit-byte segment #b11100010)       ; pfw this was 11100011, or jecxz!!!!
2454   (emit-byte-displacement-backpatch segment target)))
2455
2456(define-instruction loopz (segment target)
2457  (:printer short-jump ((op #b0001)))
2458  (:emitter
2459   (emit-byte segment #b11100001)
2460   (emit-byte-displacement-backpatch segment target)))
2461
2462(define-instruction loopnz (segment target)
2463  (:printer short-jump ((op #b0000)))
2464  (:emitter
2465   (emit-byte segment #b11100000)
2466   (emit-byte-displacement-backpatch segment target)))
2467
2468;;;; conditional move
2469(define-instruction cmov (segment cond dst src)
2470  (:printer cond-move ())
2471  (:emitter
2472   (aver (register-p dst))
2473   (let ((size (matching-operand-size dst src)))
2474     (aver (or (eq size :word) (eq size :dword) (eq size :qword)))
2475     (maybe-emit-operand-size-prefix segment size))
2476   (maybe-emit-rex-for-ea segment src dst)
2477   (emit-byte segment #b00001111)
2478   (emit-byte segment (dpb (conditional-opcode cond) (byte 4 0) #b01000000))
2479   (emit-ea segment src (reg-tn-encoding dst) :allow-constants t)))
2480
2481;;;; conditional byte set
2482
2483(define-instruction set (segment dst cond)
2484  (:printer cond-set ())
2485  (:emitter
2486   (maybe-emit-rex-for-ea segment dst nil :operand-size :byte)
2487   (emit-byte segment #b00001111)
2488   (emit-byte segment (dpb (conditional-opcode cond) (byte 4 0) #b10010000))
2489   (emit-ea segment dst #b000)))
2490
2491;;;; enter/leave
2492
2493(define-instruction enter (segment disp &optional (level 0))
2494  (:declare (type (unsigned-byte 16) disp)
2495            (type (unsigned-byte 8) level))
2496  (:printer enter-format ((op #b11001000)))
2497  (:emitter
2498   (emit-byte segment #b11001000)
2499   (emit-word segment disp)
2500   (emit-byte segment level)))
2501
2502(define-instruction leave (segment)
2503  (:printer byte ((op #b11001001)))
2504  (:emitter
2505   (emit-byte segment #b11001001)))
2506
2507;;;; interrupt instructions
2508
2509(define-instruction break (segment code)
2510  (:declare (type (unsigned-byte 8) code))
2511  #!-ud2-breakpoints (:printer byte-imm ((op #b11001100))
2512                               '(:name :tab code) :control #'break-control)
2513  #!+ud2-breakpoints (:printer word-imm ((op #b0000101100001111))
2514                               '(:name :tab code) :control #'break-control)
2515  (:emitter
2516   #!-ud2-breakpoints (emit-byte segment #b11001100)
2517   ;; On darwin, trap handling via SIGTRAP is unreliable, therefore we
2518   ;; throw a sigill with 0x0b0f instead and check for this in the
2519   ;; SIGILL handler and pass it on to the sigtrap handler if
2520   ;; appropriate
2521   #!+ud2-breakpoints (emit-word segment #b0000101100001111)
2522   (emit-byte segment code)))
2523
2524(define-instruction int (segment number)
2525  (:declare (type (unsigned-byte 8) number))
2526  (:printer byte-imm ((op #b11001101)))
2527  (:emitter
2528   (etypecase number
2529     ((member 3)
2530      (emit-byte segment #b11001100))
2531     ((unsigned-byte 8)
2532      (emit-byte segment #b11001101)
2533      (emit-byte segment number)))))
2534
2535(define-instruction iret (segment)
2536  (:printer byte ((op #b11001111)))
2537  (:emitter
2538   (emit-byte segment #b11001111)))
2539
2540;;;; processor control
2541
2542(define-instruction hlt (segment)
2543  (:printer byte ((op #b11110100)))
2544  (:emitter
2545   (emit-byte segment #b11110100)))
2546
2547(define-instruction nop (segment)
2548  (:printer byte ((op #b10010000)))
2549  ;; multi-byte NOP
2550  (:printer ext-reg/mem-no-width ((op '(#x1f 0))) '(:name))
2551  (:emitter
2552   (emit-byte segment #b10010000)))
2553
2554;;; Emit a sequence of single- or multi-byte NOPs to fill AMOUNT many
2555;;; bytes with the smallest possible number of such instructions.
2556(defun emit-long-nop (segment amount)
2557  (declare (type sb!assem:segment segment)
2558           (type index amount))
2559  ;; Pack all instructions into one byte vector to save space.
2560  (let* ((bytes #.(!coerce-to-specialized
2561                          #(#x90
2562                            #x66 #x90
2563                            #x0f #x1f #x00
2564                            #x0f #x1f #x40 #x00
2565                            #x0f #x1f #x44 #x00 #x00
2566                            #x66 #x0f #x1f #x44 #x00 #x00
2567                            #x0f #x1f #x80 #x00 #x00 #x00 #x00
2568                            #x0f #x1f #x84 #x00 #x00 #x00 #x00 #x00
2569                            #x66 #x0f #x1f #x84 #x00 #x00 #x00 #x00 #x00)
2570                          '(unsigned-byte 8)))
2571         (max-length (isqrt (* 2 (length bytes)))))
2572    (loop
2573      (let* ((count (min amount max-length))
2574             (start (ash (* count (1- count)) -1)))
2575        (dotimes (i count)
2576          (emit-byte segment (aref bytes (+ start i)))))
2577      (if (> amount max-length)
2578          (decf amount max-length)
2579          (return)))))
2580
2581(define-instruction wait (segment)
2582  (:printer byte ((op #b10011011)))
2583  (:emitter
2584   (emit-byte segment #b10011011)))
2585
2586
2587;;;; miscellaneous hackery
2588
2589(define-instruction byte (segment byte)
2590  (:emitter
2591   (emit-byte segment byte)))
2592
2593(define-instruction word (segment word)
2594  (:emitter
2595   (emit-word segment word)))
2596
2597(define-instruction dword (segment dword)
2598  (:emitter
2599   (emit-dword segment dword)))
2600
2601(defun emit-header-data (segment type)
2602  (emit-back-patch segment
2603                   n-word-bytes
2604                   (lambda (segment posn)
2605                     (emit-qword segment
2606                                 (logior type
2607                                         (ash (+ posn
2608                                                 (component-header-length))
2609                                              (- n-widetag-bits
2610                                                 word-shift)))))))
2611
2612(define-instruction simple-fun-header-word (segment)
2613  (:emitter
2614   (emit-header-data segment simple-fun-header-widetag)))
2615
2616(define-instruction lra-header-word (segment)
2617  (:emitter
2618   (emit-header-data segment return-pc-header-widetag)))
2619
2620;;;; Instructions required to do floating point operations using SSE
2621
2622;; Return a one- or two-element list of printers for SSE instructions.
2623;; The one-element list is used in the cases where the REX prefix is
2624;; really a prefix and thus automatically supported, the two-element
2625;; list is used when the REX prefix is used in an infix position.
2626(eval-when (:compile-toplevel :execute)
2627  (defun sse-inst-printer-list (inst-format-stem prefix opcode
2628                                &key more-fields printer)
2629    (let ((fields `(,@(when prefix
2630                        `((prefix ,prefix)))
2631                    (op ,opcode)
2632                    ,@more-fields))
2633          (inst-formats (if prefix
2634                            (list (symbolicate "EXT-" inst-format-stem)
2635                                  (symbolicate "EXT-REX-" inst-format-stem))
2636                            (list inst-format-stem))))
2637      (mapcar (lambda (inst-format)
2638                `(:printer ,inst-format ,fields ,@(if printer `(',printer))))
2639              inst-formats)))
2640  (defun 2byte-sse-inst-printer-list (inst-format-stem prefix op1 op2
2641                                       &key more-fields printer)
2642    (let ((fields `(,@(when prefix
2643                        `((prefix, prefix)))
2644                    (op1 ,op1)
2645                    (op2 ,op2)
2646                    ,@more-fields))
2647          (inst-formats (if prefix
2648                            (list (symbolicate "EXT-" inst-format-stem)
2649                                  (symbolicate "EXT-REX-" inst-format-stem))
2650                            (list inst-format-stem))))
2651      (mapcar (lambda (inst-format)
2652                `(:printer ,inst-format ,fields ,@(if printer `(',printer))))
2653              inst-formats))))
2654
2655(defun emit-sse-inst (segment dst src prefix opcode
2656                      &key operand-size (remaining-bytes 0))
2657  (when prefix
2658    (emit-byte segment prefix))
2659  (if operand-size
2660      (maybe-emit-rex-for-ea segment src dst :operand-size operand-size)
2661      (maybe-emit-rex-for-ea segment src dst))
2662  (emit-byte segment #x0f)
2663  (emit-byte segment opcode)
2664  (emit-ea segment src (reg-tn-encoding dst) :remaining-bytes remaining-bytes))
2665
2666;; 0110 0110:0000 1111:0111 00gg: 11 010 xmmreg:imm8
2667
2668(defun emit-sse-inst-with-imm (segment dst/src imm
2669                               prefix opcode /i
2670                               &key operand-size)
2671  (aver (<= 0 /i 7))
2672  (when prefix
2673    (emit-byte segment prefix))
2674  ;; dst/src is encoded in the r/m field, not r; REX.B must be
2675  ;; set to use extended XMM registers
2676  (maybe-emit-rex-prefix segment operand-size nil nil dst/src)
2677  (emit-byte segment #x0F)
2678  (emit-byte segment opcode)
2679  (emit-byte segment (logior (ash (logior #b11000 /i) 3)
2680                             (reg-tn-encoding dst/src)))
2681  (emit-byte segment imm))
2682
2683(defun emit-sse-inst-2byte (segment dst src prefix op1 op2
2684                            &key operand-size (remaining-bytes 0))
2685  (when prefix
2686    (emit-byte segment prefix))
2687  (if operand-size
2688      (maybe-emit-rex-for-ea segment src dst :operand-size operand-size)
2689      (maybe-emit-rex-for-ea segment src dst))
2690  (emit-byte segment #x0f)
2691  (emit-byte segment op1)
2692  (emit-byte segment op2)
2693  (emit-ea segment src (reg-tn-encoding dst) :remaining-bytes remaining-bytes))
2694
2695(macrolet
2696    ((define-imm-sse-instruction (name opcode /i)
2697         `(define-instruction ,name (segment dst/src imm)
2698            ,@(sse-inst-printer-list 'xmm-imm #x66 opcode
2699                                     :more-fields `((/i ,/i)))
2700            (:emitter
2701             (emit-sse-inst-with-imm segment dst/src imm
2702                                     #x66 ,opcode ,/i
2703                                     :operand-size :do-not-set)))))
2704  (define-imm-sse-instruction pslldq #x73 7)
2705  (define-imm-sse-instruction psllw-imm #x71 6)
2706  (define-imm-sse-instruction pslld-imm #x72 6)
2707  (define-imm-sse-instruction psllq-imm #x73 6)
2708
2709  (define-imm-sse-instruction psraw-imm #x71 4)
2710  (define-imm-sse-instruction psrad-imm #x72 4)
2711
2712  (define-imm-sse-instruction psrldq #x73 3)
2713  (define-imm-sse-instruction psrlw-imm #x71 2)
2714  (define-imm-sse-instruction psrld-imm #x72 2)
2715  (define-imm-sse-instruction psrlq-imm #x73 2))
2716
2717;;; Emit an SSE instruction that has an XMM register as the destination
2718;;; operand and for which the size of the operands is implicitly given
2719;;; by the instruction.
2720(defun emit-regular-sse-inst (segment dst src prefix opcode
2721                              &key (remaining-bytes 0))
2722  (aver (xmm-register-p dst))
2723  (emit-sse-inst segment dst src prefix opcode
2724                 :operand-size :do-not-set
2725                 :remaining-bytes remaining-bytes))
2726
2727(defun emit-regular-2byte-sse-inst (segment dst src prefix op1 op2
2728                                    &key (remaining-bytes 0))
2729  (aver (xmm-register-p dst))
2730  (emit-sse-inst-2byte segment dst src prefix op1 op2
2731                       :operand-size :do-not-set
2732                       :remaining-bytes remaining-bytes))
2733
2734;;; Instructions having an XMM register as the destination operand
2735;;; and an XMM register or a memory location as the source operand.
2736;;; The operand size is implicitly given by the instruction.
2737
2738(macrolet ((define-regular-sse-inst (name prefix opcode)
2739             `(define-instruction ,name (segment dst src)
2740                ,@(sse-inst-printer-list 'xmm-xmm/mem prefix opcode)
2741                (:emitter
2742                 (emit-regular-sse-inst segment dst src ,prefix ,opcode)))))
2743  ;; moves
2744  (define-regular-sse-inst movshdup #xf3 #x16)
2745  (define-regular-sse-inst movsldup #xf3 #x12)
2746  (define-regular-sse-inst movddup  #xf2 #x12)
2747  ;; logical
2748  (define-regular-sse-inst andpd    #x66 #x54)
2749  (define-regular-sse-inst andps    nil  #x54)
2750  (define-regular-sse-inst andnpd   #x66 #x55)
2751  (define-regular-sse-inst andnps   nil  #x55)
2752  (define-regular-sse-inst orpd     #x66 #x56)
2753  (define-regular-sse-inst orps     nil  #x56)
2754  (define-regular-sse-inst pand     #x66 #xdb)
2755  (define-regular-sse-inst pandn    #x66 #xdf)
2756  (define-regular-sse-inst por      #x66 #xeb)
2757  (define-regular-sse-inst pxor     #x66 #xef)
2758  (define-regular-sse-inst xorpd    #x66 #x57)
2759  (define-regular-sse-inst xorps    nil  #x57)
2760  ;; comparison
2761  (define-regular-sse-inst comisd   #x66 #x2f)
2762  (define-regular-sse-inst comiss   nil  #x2f)
2763  (define-regular-sse-inst ucomisd  #x66 #x2e)
2764  (define-regular-sse-inst ucomiss  nil  #x2e)
2765  ;; integer comparison
2766  (define-regular-sse-inst pcmpeqb  #x66 #x74)
2767  (define-regular-sse-inst pcmpeqw  #x66 #x75)
2768  (define-regular-sse-inst pcmpeqd  #x66 #x76)
2769  (define-regular-sse-inst pcmpgtb  #x66 #x64)
2770  (define-regular-sse-inst pcmpgtw  #x66 #x65)
2771  (define-regular-sse-inst pcmpgtd  #x66 #x66)
2772  ;; max/min
2773  (define-regular-sse-inst maxpd    #x66 #x5f)
2774  (define-regular-sse-inst maxps    nil  #x5f)
2775  (define-regular-sse-inst maxsd    #xf2 #x5f)
2776  (define-regular-sse-inst maxss    #xf3 #x5f)
2777  (define-regular-sse-inst minpd    #x66 #x5d)
2778  (define-regular-sse-inst minps    nil  #x5d)
2779  (define-regular-sse-inst minsd    #xf2 #x5d)
2780  (define-regular-sse-inst minss    #xf3 #x5d)
2781  ;; integer max/min
2782  (define-regular-sse-inst pmaxsw   #x66 #xee)
2783  (define-regular-sse-inst pmaxub   #x66 #xde)
2784  (define-regular-sse-inst pminsw   #x66 #xea)
2785  (define-regular-sse-inst pminub   #x66 #xda)
2786  ;; arithmetic
2787  (define-regular-sse-inst addpd    #x66 #x58)
2788  (define-regular-sse-inst addps    nil  #x58)
2789  (define-regular-sse-inst addsd    #xf2 #x58)
2790  (define-regular-sse-inst addss    #xf3 #x58)
2791  (define-regular-sse-inst addsubpd #x66 #xd0)
2792  (define-regular-sse-inst addsubps #xf2 #xd0)
2793  (define-regular-sse-inst divpd    #x66 #x5e)
2794  (define-regular-sse-inst divps    nil  #x5e)
2795  (define-regular-sse-inst divsd    #xf2 #x5e)
2796  (define-regular-sse-inst divss    #xf3 #x5e)
2797  (define-regular-sse-inst haddpd   #x66 #x7c)
2798  (define-regular-sse-inst haddps   #xf2 #x7c)
2799  (define-regular-sse-inst hsubpd   #x66 #x7d)
2800  (define-regular-sse-inst hsubps   #xf2 #x7d)
2801  (define-regular-sse-inst mulpd    #x66 #x59)
2802  (define-regular-sse-inst mulps    nil  #x59)
2803  (define-regular-sse-inst mulsd    #xf2 #x59)
2804  (define-regular-sse-inst mulss    #xf3 #x59)
2805  (define-regular-sse-inst rcpps    nil  #x53)
2806  (define-regular-sse-inst rcpss    #xf3 #x53)
2807  (define-regular-sse-inst rsqrtps  nil  #x52)
2808  (define-regular-sse-inst rsqrtss  #xf3 #x52)
2809  (define-regular-sse-inst sqrtpd   #x66 #x51)
2810  (define-regular-sse-inst sqrtps   nil  #x51)
2811  (define-regular-sse-inst sqrtsd   #xf2 #x51)
2812  (define-regular-sse-inst sqrtss   #xf3 #x51)
2813  (define-regular-sse-inst subpd    #x66 #x5c)
2814  (define-regular-sse-inst subps    nil  #x5c)
2815  (define-regular-sse-inst subsd    #xf2 #x5c)
2816  (define-regular-sse-inst subss    #xf3 #x5c)
2817  (define-regular-sse-inst unpckhpd #x66 #x15)
2818  (define-regular-sse-inst unpckhps nil  #x15)
2819  (define-regular-sse-inst unpcklpd #x66 #x14)
2820  (define-regular-sse-inst unpcklps nil  #x14)
2821  ;; integer arithmetic
2822  (define-regular-sse-inst paddb    #x66 #xfc)
2823  (define-regular-sse-inst paddw    #x66 #xfd)
2824  (define-regular-sse-inst paddd    #x66 #xfe)
2825  (define-regular-sse-inst paddq    #x66 #xd4)
2826  (define-regular-sse-inst paddsb   #x66 #xec)
2827  (define-regular-sse-inst paddsw   #x66 #xed)
2828  (define-regular-sse-inst paddusb  #x66 #xdc)
2829  (define-regular-sse-inst paddusw  #x66 #xdd)
2830  (define-regular-sse-inst pavgb    #x66 #xe0)
2831  (define-regular-sse-inst pavgw    #x66 #xe3)
2832  (define-regular-sse-inst pmaddwd  #x66 #xf5)
2833  (define-regular-sse-inst pmulhuw  #x66 #xe4)
2834  (define-regular-sse-inst pmulhw   #x66 #xe5)
2835  (define-regular-sse-inst pmullw   #x66 #xd5)
2836  (define-regular-sse-inst pmuludq  #x66 #xf4)
2837  (define-regular-sse-inst psadbw   #x66 #xf6)
2838  (define-regular-sse-inst psllw    #x66 #xf1)
2839  (define-regular-sse-inst pslld    #x66 #xf2)
2840  (define-regular-sse-inst psllq    #x66 #xf3)
2841  (define-regular-sse-inst psraw    #x66 #xe1)
2842  (define-regular-sse-inst psrad    #x66 #xe2)
2843  (define-regular-sse-inst psrlw    #x66 #xd1)
2844  (define-regular-sse-inst psrld    #x66 #xd2)
2845  (define-regular-sse-inst psrlq    #x66 #xd3)
2846  (define-regular-sse-inst psubb    #x66 #xf8)
2847  (define-regular-sse-inst psubw    #x66 #xf9)
2848  (define-regular-sse-inst psubd    #x66 #xfa)
2849  (define-regular-sse-inst psubq    #x66 #xfb)
2850  (define-regular-sse-inst psubsb   #x66 #xe8)
2851  (define-regular-sse-inst psubsw   #x66 #xe9)
2852  (define-regular-sse-inst psubusb  #x66 #xd8)
2853  (define-regular-sse-inst psubusw  #x66 #xd9)
2854  ;; conversion
2855  (define-regular-sse-inst cvtdq2pd #xf3 #xe6)
2856  (define-regular-sse-inst cvtdq2ps nil  #x5b)
2857  (define-regular-sse-inst cvtpd2dq #xf2 #xe6)
2858  (define-regular-sse-inst cvtpd2ps #x66 #x5a)
2859  (define-regular-sse-inst cvtps2dq #x66 #x5b)
2860  (define-regular-sse-inst cvtps2pd nil  #x5a)
2861  (define-regular-sse-inst cvtsd2ss #xf2 #x5a)
2862  (define-regular-sse-inst cvtss2sd #xf3 #x5a)
2863  (define-regular-sse-inst cvttpd2dq #x66 #xe6)
2864  (define-regular-sse-inst cvttps2dq #xf3 #x5b)
2865  ;; integer
2866  (define-regular-sse-inst packsswb  #x66 #x63)
2867  (define-regular-sse-inst packssdw  #x66 #x6b)
2868  (define-regular-sse-inst packuswb  #x66 #x67)
2869  (define-regular-sse-inst punpckhbw #x66 #x68)
2870  (define-regular-sse-inst punpckhwd #x66 #x69)
2871  (define-regular-sse-inst punpckhdq #x66 #x6a)
2872  (define-regular-sse-inst punpckhqdq #x66 #x6d)
2873  (define-regular-sse-inst punpcklbw #x66 #x60)
2874  (define-regular-sse-inst punpcklwd #x66 #x61)
2875  (define-regular-sse-inst punpckldq #x66 #x62)
2876  (define-regular-sse-inst punpcklqdq #x66 #x6c))
2877
2878(macrolet ((define-xmm-shuffle-sse-inst (name prefix opcode n-bits radix)
2879               (let ((shuffle-pattern
2880                      (intern (format nil "SSE-SHUFFLE-PATTERN-~D-~D"
2881                                      n-bits radix))))
2882                 `(define-instruction ,name (segment dst src pattern)
2883                    ,@(sse-inst-printer-list
2884                        'xmm-xmm/mem prefix opcode
2885                        :more-fields `((imm nil :type ',shuffle-pattern))
2886                        :printer '(:name :tab reg ", " reg/mem ", " imm))
2887
2888                    (:emitter
2889                     (aver (typep pattern '(unsigned-byte ,n-bits)))
2890                     (emit-regular-sse-inst segment dst src ,prefix ,opcode
2891                                            :remaining-bytes 1)
2892                     (emit-byte segment pattern))))))
2893  (define-xmm-shuffle-sse-inst pshufd  #x66 #x70 8 4)
2894  (define-xmm-shuffle-sse-inst pshufhw #xf3 #x70 8 4)
2895  (define-xmm-shuffle-sse-inst pshuflw #xf2 #x70 8 4)
2896  (define-xmm-shuffle-sse-inst shufpd  #x66 #xc6 2 2)
2897  (define-xmm-shuffle-sse-inst shufps  nil  #xc6 8 4))
2898
2899;; MASKMOVDQU (dst is DS:RDI)
2900(define-instruction maskmovdqu (segment src mask)
2901  (:emitter
2902   (aver (xmm-register-p src))
2903   (aver (xmm-register-p mask))
2904   (emit-regular-sse-inst segment src mask #x66 #xf7))
2905  . #.(sse-inst-printer-list 'xmm-xmm/mem #x66 #xf7))
2906
2907(macrolet ((define-comparison-sse-inst (name prefix opcode
2908                                        name-prefix name-suffix)
2909               `(define-instruction ,name (segment op x y)
2910                  ,@(sse-inst-printer-list
2911                      'xmm-xmm/mem prefix opcode
2912                      :more-fields '((imm nil :type 'sse-condition-code))
2913                      :printer `(,name-prefix imm ,name-suffix
2914                                 :tab reg ", " reg/mem))
2915                  (:emitter
2916                   (let ((code (position op *sse-conditions*)))
2917                     (aver code)
2918                     (emit-regular-sse-inst segment x y ,prefix ,opcode
2919                                            :remaining-bytes 1)
2920                     (emit-byte segment code))))))
2921  (define-comparison-sse-inst cmppd #x66 #xc2 "CMP" "PD")
2922  (define-comparison-sse-inst cmpps nil  #xc2 "CMP" "PS")
2923  (define-comparison-sse-inst cmpsd #xf2 #xc2 "CMP" "SD")
2924  (define-comparison-sse-inst cmpss #xf3 #xc2 "CMP" "SS"))
2925
2926;;; MOVSD, MOVSS
2927(macrolet ((define-movsd/ss-sse-inst (name prefix)
2928             `(define-instruction ,name (segment dst src)
2929                ,@(sse-inst-printer-list 'xmm-xmm/mem-dir prefix #b0001000)
2930                (:emitter
2931                 (cond ((xmm-register-p dst)
2932                        (emit-sse-inst segment dst src ,prefix #x10
2933                                       :operand-size :do-not-set))
2934                       (t
2935                        (aver (xmm-register-p src))
2936                        (emit-sse-inst segment src dst ,prefix #x11
2937                                       :operand-size :do-not-set)))))))
2938  (define-movsd/ss-sse-inst movsd #xf2)
2939  (define-movsd/ss-sse-inst movss #xf3))
2940
2941;;; Packed MOVs
2942(macrolet ((define-mov-sse-inst (name prefix opcode-from opcode-to
2943                                      &key force-to-mem reg-reg-name)
2944               `(progn
2945                  ,(when reg-reg-name
2946                     `(define-instruction ,reg-reg-name (segment dst src)
2947                        (:emitter
2948                         (aver (xmm-register-p dst))
2949                         (aver (xmm-register-p src))
2950                         (emit-regular-sse-inst segment dst src
2951                                                ,prefix ,opcode-from))))
2952                  (define-instruction ,name (segment dst src)
2953                    ,@(when opcode-from
2954                        (sse-inst-printer-list 'xmm-xmm/mem prefix opcode-from))
2955                    ,@(sse-inst-printer-list
2956                          'xmm-xmm/mem prefix opcode-to
2957                          :printer '(:name :tab reg/mem ", " reg))
2958                    (:emitter
2959                     (cond ,@(when opcode-from
2960                               `(((xmm-register-p dst)
2961                                  ,(when force-to-mem
2962                                     `(aver (not (or (register-p src)
2963                                                     (xmm-register-p src)))))
2964                                  (emit-regular-sse-inst
2965                                   segment dst src ,prefix ,opcode-from))))
2966                           (t
2967                            (aver (xmm-register-p src))
2968                            ,(when force-to-mem
2969                               `(aver (not (or (register-p dst)
2970                                               (xmm-register-p dst)))))
2971                            (emit-regular-sse-inst segment src dst
2972                                                   ,prefix ,opcode-to))))))))
2973  ;; direction bit?
2974  (define-mov-sse-inst movapd #x66 #x28 #x29)
2975  (define-mov-sse-inst movaps nil  #x28 #x29)
2976  (define-mov-sse-inst movdqa #x66 #x6f #x7f)
2977  (define-mov-sse-inst movdqu #xf3 #x6f #x7f)
2978
2979  ;; streaming
2980  (define-mov-sse-inst movntdq #x66 nil #xe7 :force-to-mem t)
2981  (define-mov-sse-inst movntpd #x66 nil #x2b :force-to-mem t)
2982  (define-mov-sse-inst movntps nil  nil #x2b :force-to-mem t)
2983
2984  ;; use movhps for movlhps and movlps for movhlps
2985  (define-mov-sse-inst movhpd #x66 #x16 #x17 :force-to-mem t)
2986  (define-mov-sse-inst movhps nil  #x16 #x17 :reg-reg-name movlhps)
2987  (define-mov-sse-inst movlpd #x66 #x12 #x13 :force-to-mem t)
2988  (define-mov-sse-inst movlps nil  #x12 #x13 :reg-reg-name movhlps)
2989  (define-mov-sse-inst movupd #x66 #x10 #x11)
2990  (define-mov-sse-inst movups nil  #x10 #x11))
2991
2992;;; MOVNTDQA
2993(define-instruction movntdqa (segment dst src)
2994  (:emitter
2995   (aver (and (xmm-register-p dst)
2996              (not (xmm-register-p src))))
2997   (emit-regular-2byte-sse-inst segment dst src #x66 #x38 #x2a))
2998  . #.(2byte-sse-inst-printer-list '2byte-xmm-xmm/mem #x66 #x38 #x2a))
2999
3000;;; MOVQ
3001(define-instruction movq (segment dst src)
3002  (:emitter
3003   (cond ((xmm-register-p dst)
3004          (emit-sse-inst segment dst src #xf3 #x7e
3005                         :operand-size :do-not-set))
3006         (t
3007          (aver (xmm-register-p src))
3008          (emit-sse-inst segment src dst #x66 #xd6
3009                         :operand-size :do-not-set))))
3010  . #.(append (sse-inst-printer-list 'xmm-xmm/mem #xf3 #x7e)
3011              (sse-inst-printer-list 'xmm-xmm/mem #x66 #xd6
3012                                     :printer '(:name :tab reg/mem ", " reg))))
3013
3014;;; Instructions having an XMM register as the destination operand
3015;;; and a general-purpose register or a memory location as the source
3016;;; operand. The operand size is calculated from the source operand.
3017
3018;;; MOVD - Move a 32- or 64-bit value from a general-purpose register or
3019;;; a memory location to the low order 32 or 64 bits of an XMM register
3020;;; with zero extension or vice versa.
3021;;; We do not support the MMX version of this instruction.
3022(define-instruction movd (segment dst src)
3023  (:emitter
3024   (cond ((xmm-register-p dst)
3025          (emit-sse-inst segment dst src #x66 #x6e))
3026         (t
3027          (aver (xmm-register-p src))
3028          (emit-sse-inst segment src dst #x66 #x7e))))
3029  . #.(append (sse-inst-printer-list 'xmm-reg/mem #x66 #x6e)
3030              (sse-inst-printer-list 'xmm-reg/mem #x66 #x7e
3031                                     :printer '(:name :tab reg/mem ", " reg))))
3032
3033(macrolet ((define-extract-sse-instruction (name prefix op1 op2
3034                                            &key explicit-qword)
3035             `(define-instruction ,name (segment dst src imm)
3036                (:printer
3037                 ,(if op2 (if explicit-qword
3038                              'ext-rex-2byte-reg/mem-xmm
3039                              'ext-2byte-reg/mem-xmm)
3040                      'ext-reg/mem-xmm)
3041                 ((prefix '(,prefix))
3042                  ,@(if op2
3043                        `((op1 '(,op1)) (op2 '(,op2)))
3044                        `((op '(,op1))))
3045                  (imm nil :type 'imm-byte))
3046                 '(:name :tab reg/mem ", " reg ", " imm))
3047                (:emitter
3048                 (aver (and (xmm-register-p src) (not (xmm-register-p dst))))
3049                 ,(if op2
3050                      `(emit-sse-inst-2byte segment dst src ,prefix ,op1 ,op2
3051                                            :operand-size ,(if explicit-qword
3052                                                               :qword
3053                                                               :do-not-set)
3054                                            :remaining-bytes 1)
3055                      `(emit-sse-inst segment dst src ,prefix ,op1
3056                                      :operand-size ,(if explicit-qword
3057                                                         :qword
3058                                                         :do-not-set)
3059                                      :remaining-bytes 1))
3060                 (emit-byte segment imm))))
3061
3062           (define-insert-sse-instruction (name prefix op1 op2)
3063             `(define-instruction ,name (segment dst src imm)
3064                (:printer
3065                 ,(if op2 'ext-2byte-xmm-reg/mem 'ext-xmm-reg/mem)
3066                 ((prefix '(,prefix))
3067                  ,@(if op2
3068                        `((op1 '(,op1)) (op2 '(,op2)))
3069                        `((op '(,op1))))
3070                  (imm nil :type 'imm-byte))
3071                 '(:name :tab reg ", " reg/mem ", " imm))
3072                (:emitter
3073                 (aver (and (xmm-register-p dst) (not (xmm-register-p src))))
3074                 ,(if op2
3075                      `(emit-sse-inst-2byte segment dst src ,prefix ,op1 ,op2
3076                                            :operand-size :do-not-set
3077                                            :remaining-bytes 1)
3078                      `(emit-sse-inst segment dst src ,prefix ,op1
3079                                      :operand-size :do-not-set
3080                                      :remaining-bytes 1))
3081                 (emit-byte segment imm)))))
3082
3083
3084  ;; pinsrq not encodable in 64-bit mode
3085  (define-insert-sse-instruction pinsrb #x66 #x3a #x20)
3086  (define-insert-sse-instruction pinsrw #x66 #xc4 nil)
3087  (define-insert-sse-instruction pinsrd #x66 #x3a #x22)
3088  (define-insert-sse-instruction insertps #x66 #x3a #x21)
3089
3090  (define-extract-sse-instruction pextrb #x66 #x3a #x14)
3091  (define-extract-sse-instruction pextrd #x66 #x3a #x16)
3092  (define-extract-sse-instruction pextrq #x66 #x3a #x16 :explicit-qword t)
3093  (define-extract-sse-instruction extractps #x66 #x3a #x17))
3094
3095;; PEXTRW has a new 2-byte encoding in SSE4.1 to allow dst to be
3096;; a memory address.
3097(define-instruction pextrw (segment dst src imm)
3098  (:emitter
3099   (aver (xmm-register-p src))
3100   (if (not (register-p dst))
3101       (emit-sse-inst-2byte segment dst src #x66 #x3a #x15
3102                            :operand-size :do-not-set :remaining-bytes 1)
3103       (emit-sse-inst segment dst src #x66 #xc5
3104                            :operand-size :do-not-set :remaining-bytes 1))
3105   (emit-byte segment imm))
3106  . #.(append
3107       (2byte-sse-inst-printer-list '2byte-reg/mem-xmm #x66 #x3a #x15
3108                                    :more-fields '((imm nil :type 'imm-byte))
3109                                    :printer '(:name :tab reg/mem ", " reg ", " imm))
3110       (sse-inst-printer-list 'reg/mem-xmm #x66 #xc5
3111                              :more-fields '((imm nil :type 'imm-byte))
3112                              :printer '(:name :tab reg/mem ", " reg ", " imm))))
3113
3114(macrolet ((define-integer-source-sse-inst (name prefix opcode &key mem-only)
3115             `(define-instruction ,name (segment dst src)
3116                ,@(sse-inst-printer-list 'xmm-reg/mem prefix opcode)
3117                (:emitter
3118                 (aver (xmm-register-p dst))
3119                 ,(when mem-only
3120                    `(aver (not (or (register-p src)
3121                                    (xmm-register-p src)))))
3122                 (let ((src-size (operand-size src)))
3123                   (aver (or (eq src-size :qword) (eq src-size :dword))))
3124                 (emit-sse-inst segment dst src ,prefix ,opcode)))))
3125  (define-integer-source-sse-inst cvtsi2sd #xf2 #x2a)
3126  (define-integer-source-sse-inst cvtsi2ss #xf3 #x2a)
3127  ;; FIXME: memory operand is always a QWORD
3128  (define-integer-source-sse-inst cvtpi2pd #x66 #x2a :mem-only t)
3129  (define-integer-source-sse-inst cvtpi2ps nil  #x2a :mem-only t))
3130
3131;;; Instructions having a general-purpose register as the destination
3132;;; operand and an XMM register or a memory location as the source
3133;;; operand. The operand size is calculated from the destination
3134;;; operand.
3135
3136(macrolet ((define-gpr-destination-sse-inst (name prefix opcode &key reg-only)
3137             `(define-instruction ,name (segment dst src)
3138                ,@(sse-inst-printer-list 'reg-xmm/mem prefix opcode)
3139                (:emitter
3140                 (aver (register-p dst))
3141                 ,(when reg-only
3142                    `(aver (xmm-register-p src)))
3143                 (let ((dst-size (operand-size dst)))
3144                   (aver (or (eq dst-size :qword) (eq dst-size :dword)))
3145                   (emit-sse-inst segment dst src ,prefix ,opcode
3146                                  :operand-size dst-size))))))
3147  (define-gpr-destination-sse-inst cvtsd2si  #xf2 #x2d)
3148  (define-gpr-destination-sse-inst cvtss2si  #xf3 #x2d)
3149  (define-gpr-destination-sse-inst cvttsd2si #xf2 #x2c)
3150  (define-gpr-destination-sse-inst cvttss2si #xf3 #x2c)
3151  (define-gpr-destination-sse-inst movmskpd  #x66 #x50 :reg-only t)
3152  (define-gpr-destination-sse-inst movmskps  nil  #x50 :reg-only t)
3153  (define-gpr-destination-sse-inst pmovmskb  #x66 #xd7 :reg-only t))
3154
3155;;;; We call these "2byte" instructions due to their two opcode bytes.
3156;;;; Intel and AMD call them three-byte instructions, as they count the
3157;;;; 0x0f byte for determining the number of opcode bytes.
3158
3159;;; Instructions that take XMM-XMM/MEM and XMM-XMM/MEM-IMM arguments.
3160
3161(macrolet ((regular-2byte-sse-inst (name prefix op1 op2)
3162             `(define-instruction ,name (segment dst src)
3163                ,@(2byte-sse-inst-printer-list '2byte-xmm-xmm/mem prefix
3164                                                op1 op2)
3165                (:emitter
3166                 (emit-regular-2byte-sse-inst segment dst src ,prefix
3167                                              ,op1 ,op2))))
3168           (regular-2byte-sse-inst-imm (name prefix op1 op2)
3169             `(define-instruction ,name (segment dst src imm)
3170                ,@(2byte-sse-inst-printer-list
3171                    '2byte-xmm-xmm/mem prefix op1 op2
3172                    :more-fields '((imm nil :type 'imm-byte))
3173                    :printer `(:name :tab reg ", " reg/mem ", " imm))
3174                (:emitter
3175                 (aver (typep imm '(unsigned-byte 8)))
3176                 (emit-regular-2byte-sse-inst segment dst src ,prefix ,op1 ,op2
3177                                              :remaining-bytes 1)
3178                 (emit-byte segment imm)))))
3179  (regular-2byte-sse-inst pshufb #x66 #x38 #x00)
3180  (regular-2byte-sse-inst phaddw #x66 #x38 #x01)
3181  (regular-2byte-sse-inst phaddd #x66 #x38 #x02)
3182  (regular-2byte-sse-inst phaddsw #x66 #x38 #x03)
3183  (regular-2byte-sse-inst pmaddubsw #x66 #x38 #x04)
3184  (regular-2byte-sse-inst phsubw #x66 #x38 #x05)
3185  (regular-2byte-sse-inst phsubd #x66 #x38 #x06)
3186  (regular-2byte-sse-inst phsubsw #x66 #x38 #x07)
3187  (regular-2byte-sse-inst psignb #x66 #x38 #x08)
3188  (regular-2byte-sse-inst psignw #x66 #x38 #x09)
3189  (regular-2byte-sse-inst psignd #x66 #x38 #x0a)
3190  (regular-2byte-sse-inst pmulhrsw #x66 #x38 #x0b)
3191
3192  (regular-2byte-sse-inst ptest #x66 #x38 #x17)
3193  (regular-2byte-sse-inst pabsb #x66 #x38 #x1c)
3194  (regular-2byte-sse-inst pabsw #x66 #x38 #x1d)
3195  (regular-2byte-sse-inst pabsd #x66 #x38 #x1e)
3196
3197  (regular-2byte-sse-inst pmuldq #x66 #x38 #x28)
3198  (regular-2byte-sse-inst pcmpeqq #x66 #x38 #x29)
3199  (regular-2byte-sse-inst packusdw #x66 #x38 #x2b)
3200
3201  (regular-2byte-sse-inst pcmpgtq #x66 #x38 #x37)
3202  (regular-2byte-sse-inst pminsb #x66 #x38 #x38)
3203  (regular-2byte-sse-inst pminsd #x66 #x38 #x39)
3204  (regular-2byte-sse-inst pminuw #x66 #x38 #x3a)
3205  (regular-2byte-sse-inst pminud #x66 #x38 #x3b)
3206  (regular-2byte-sse-inst pmaxsb #x66 #x38 #x3c)
3207  (regular-2byte-sse-inst pmaxsd #x66 #x38 #x3d)
3208  (regular-2byte-sse-inst pmaxuw #x66 #x38 #x3e)
3209  (regular-2byte-sse-inst pmaxud #x66 #x38 #x3f)
3210
3211  (regular-2byte-sse-inst pmulld #x66 #x38 #x40)
3212  (regular-2byte-sse-inst phminposuw #x66 #x38 #x41)
3213
3214  (regular-2byte-sse-inst aesimc #x66 #x38 #xdb)
3215  (regular-2byte-sse-inst aesenc #x66 #x38 #xdc)
3216  (regular-2byte-sse-inst aesenclast #x66 #x38 #xdd)
3217  (regular-2byte-sse-inst aesdec #x66 #x38 #xde)
3218  (regular-2byte-sse-inst aesdeclast #x66 #x38 #xdf)
3219
3220  (regular-2byte-sse-inst pmovsxbw #x66 #x38 #x20)
3221  (regular-2byte-sse-inst pmovsxbd #x66 #x38 #x21)
3222  (regular-2byte-sse-inst pmovsxbq #x66 #x38 #x22)
3223  (regular-2byte-sse-inst pmovsxwd #x66 #x38 #x23)
3224  (regular-2byte-sse-inst pmovsxwq #x66 #x38 #x24)
3225  (regular-2byte-sse-inst pmovsxdq #x66 #x38 #x25)
3226
3227  (regular-2byte-sse-inst pmovzxbw #x66 #x38 #x30)
3228  (regular-2byte-sse-inst pmovzxbd #x66 #x38 #x31)
3229  (regular-2byte-sse-inst pmovzxbq #x66 #x38 #x32)
3230  (regular-2byte-sse-inst pmovzxwd #x66 #x38 #x33)
3231  (regular-2byte-sse-inst pmovzxwq #x66 #x38 #x34)
3232  (regular-2byte-sse-inst pmovzxdq #x66 #x38 #x35)
3233
3234  (regular-2byte-sse-inst-imm roundps #x66 #x3a #x08)
3235  (regular-2byte-sse-inst-imm roundpd #x66 #x3a #x09)
3236  (regular-2byte-sse-inst-imm roundss #x66 #x3a #x0a)
3237  (regular-2byte-sse-inst-imm roundsd #x66 #x3a #x0b)
3238  (regular-2byte-sse-inst-imm blendps #x66 #x3a #x0c)
3239  (regular-2byte-sse-inst-imm blendpd #x66 #x3a #x0d)
3240  (regular-2byte-sse-inst-imm pblendw #x66 #x3a #x0e)
3241  (regular-2byte-sse-inst-imm palignr #x66 #x3a #x0f)
3242  (regular-2byte-sse-inst-imm dpps    #x66 #x3a #x40)
3243  (regular-2byte-sse-inst-imm dppd    #x66 #x3a #x41)
3244
3245  (regular-2byte-sse-inst-imm mpsadbw #x66 #x3a #x42)
3246  (regular-2byte-sse-inst-imm pclmulqdq #x66 #x3a #x44)
3247
3248  (regular-2byte-sse-inst-imm pcmpestrm #x66 #x3a #x60)
3249  (regular-2byte-sse-inst-imm pcmpestri #x66 #x3a #x61)
3250  (regular-2byte-sse-inst-imm pcmpistrm #x66 #x3a #x62)
3251  (regular-2byte-sse-inst-imm pcmpistri #x66 #x3a #x63)
3252
3253  (regular-2byte-sse-inst-imm aeskeygenassist #x66 #x3a #xdf))
3254
3255;;; Other SSE instructions
3256
3257;; Instructions implicitly using XMM0 as a mask
3258(macrolet ((define-sse-inst-implicit-mask (name prefix op1 op2)
3259             `(define-instruction ,name (segment dst src mask)
3260                ,@(2byte-sse-inst-printer-list
3261                    '2byte-xmm-xmm/mem prefix op1 op2
3262                    :printer '(:name :tab reg ", " reg/mem ", XMM0"))
3263                (:emitter
3264                 (aver (xmm-register-p dst))
3265                 (aver (and (xmm-register-p mask) (= (tn-offset mask) 0)))
3266                 (emit-regular-2byte-sse-inst segment dst src ,prefix
3267                                              ,op1 ,op2)))))
3268
3269  (define-sse-inst-implicit-mask pblendvb #x66 #x38 #x10)
3270  (define-sse-inst-implicit-mask blendvps #x66 #x38 #x14)
3271  (define-sse-inst-implicit-mask blendvpd #x66 #x38 #x15))
3272
3273(define-instruction movnti (segment dst src)
3274  (:printer ext-reg-reg/mem-no-width ((op #xc3)) '(:name :tab reg/mem ", " reg))
3275  (:emitter
3276   (aver (not (or (register-p dst)
3277                  (xmm-register-p dst))))
3278   (aver (register-p src))
3279   (maybe-emit-rex-for-ea segment dst src)
3280   (emit-byte segment #x0f)
3281   (emit-byte segment #xc3)
3282   (emit-ea segment dst (reg-tn-encoding src))))
3283
3284(define-instruction prefetch (segment type src)
3285  (:printer ext-reg/mem-no-width ((op '(#x18 0)))
3286            '("PREFETCHNTA" :tab reg/mem))
3287  (:printer ext-reg/mem-no-width ((op '(#x18 1)))
3288            '("PREFETCHT0" :tab reg/mem))
3289  (:printer ext-reg/mem-no-width ((op '(#x18 2)))
3290            '("PREFETCHT1" :tab reg/mem))
3291  (:printer ext-reg/mem-no-width ((op '(#x18 3)))
3292            '("PREFETCHT2" :tab reg/mem))
3293  (:emitter
3294   (aver (not (or (register-p src)
3295                  (xmm-register-p src))))
3296   (aver (eq (operand-size src) :byte))
3297   (let ((type (position type #(:nta :t0 :t1 :t2))))
3298     (aver type)
3299     (maybe-emit-rex-for-ea segment src nil)
3300     (emit-byte segment #x0f)
3301     (emit-byte segment #x18)
3302     (emit-ea segment src type))))
3303
3304(define-instruction clflush (segment src)
3305  (:printer ext-reg/mem-no-width ((op '(#xae 7))))
3306  (:emitter
3307   (aver (not (or (register-p src)
3308                  (xmm-register-p src))))
3309   (aver (eq (operand-size src) :byte))
3310   (maybe-emit-rex-for-ea segment src nil)
3311   (emit-byte segment #x0f)
3312   (emit-byte segment #xae)
3313   (emit-ea segment src 7)))
3314
3315(macrolet ((define-fence-instruction (name last-byte)
3316               `(define-instruction ,name (segment)
3317                  (:printer three-bytes ((op '(#x0f #xae ,last-byte))))
3318                  (:emitter
3319                   (emit-byte segment #x0f)
3320                   (emit-byte segment #xae)
3321                   (emit-byte segment ,last-byte)))))
3322  (define-fence-instruction lfence #b11101000)
3323  (define-fence-instruction mfence #b11110000)
3324  (define-fence-instruction sfence #b11111000))
3325
3326(define-instruction pause (segment)
3327  (:printer two-bytes ((op '(#xf3 #x90))))
3328  (:emitter
3329   (emit-byte segment #xf3)
3330   (emit-byte segment #x90)))
3331
3332(define-instruction ldmxcsr (segment src)
3333  (:printer ext-reg/mem-no-width ((op '(#xae 2))))
3334  (:emitter
3335   (aver (not (or (register-p src)
3336                  (xmm-register-p src))))
3337   (aver (eq (operand-size src) :dword))
3338   (maybe-emit-rex-for-ea segment src nil)
3339   (emit-byte segment #x0f)
3340   (emit-byte segment #xae)
3341   (emit-ea segment src 2)))
3342
3343(define-instruction stmxcsr (segment dst)
3344  (:printer ext-reg/mem-no-width ((op '(#xae 3))))
3345  (:emitter
3346   (aver (not (or (register-p dst)
3347                  (xmm-register-p dst))))
3348   (aver (eq (operand-size dst) :dword))
3349   (maybe-emit-rex-for-ea segment dst nil)
3350   (emit-byte segment #x0f)
3351   (emit-byte segment #xae)
3352   (emit-ea segment dst 3)))
3353
3354(define-instruction popcnt (segment dst src)
3355  (:printer f3-escape-reg-reg/mem ((op #xB8)))
3356  (:printer rex-f3-escape-reg-reg/mem ((op #xB8)))
3357  (:emitter
3358   (aver (register-p dst))
3359   (aver (and (register-p dst) (not (eq (operand-size dst) :byte))))
3360   (aver (not (eq (operand-size src) :byte)))
3361   (emit-sse-inst segment dst src #xf3 #xb8)))
3362
3363(define-instruction crc32 (segment dst src)
3364  ;; The low bit of the final opcode byte sets the source size.
3365  ;; REX.W bit sets the destination size. can't have #x66 prefix and REX.W = 1.
3366  (:printer ext-2byte-prefix-reg-reg/mem
3367            ((prefix #xf2) (op1 #x38)
3368             (op2 #b1111000 :field (byte 7 25)) ; #xF0 ignoring the low bit
3369             (src-width nil :field (byte 1 24) :prefilter #'prefilter-width)
3370             (reg nil :printer #'print-d/q-word-reg)))
3371  (:printer ext-rex-2byte-prefix-reg-reg/mem
3372            ((prefix #xf2) (op1 #x38)
3373             (op2 #b1111000 :field (byte 7 33)) ; ditto
3374             (src-width nil :field (byte 1 32) :prefilter #'prefilter-width)
3375             (reg nil :printer #'print-d/q-word-reg)))
3376  (:emitter
3377   (let ((dst-size (operand-size dst))
3378         (src-size (operand-size src)))
3379     ;; The following operand size combinations are possible:
3380     ;;   dst = r32, src = r/m{8, 16, 32}
3381     ;;   dst = r64, src = r/m{8, 64}
3382     (aver (and (register-p dst)
3383                (memq src-size (case dst-size
3384                                 (:dword '(:byte :word :dword))
3385                                 (:qword '(:byte :qword))))))
3386     (maybe-emit-operand-size-prefix segment src-size)
3387     (emit-sse-inst-2byte segment dst src #xf2 #x38
3388                          (if (eq src-size :byte) #xf0 #xf1)
3389                          ;; :OPERAND-SIZE is ordinarily determined
3390                          ;; from 'src', so override it to use 'dst'.
3391                          :operand-size dst-size))))
3392
3393;;;; Miscellany
3394
3395(define-instruction cpuid (segment)
3396  (:printer two-bytes ((op '(#b00001111 #b10100010))))
3397  (:emitter
3398   (emit-byte segment #b00001111)
3399   (emit-byte segment #b10100010)))
3400
3401(define-instruction rdtsc (segment)
3402  (:printer two-bytes ((op '(#b00001111 #b00110001))))
3403  (:emitter
3404   (emit-byte segment #b00001111)
3405   (emit-byte segment #b00110001)))
3406
3407;;;; Intel TSX - some user library (STMX) used to define these,
3408;;;; but it's not really supported and they actually belong here.
3409
3410(define-instruction-format
3411    (xbegin 48 :default-printer '(:name :tab label))
3412  (op :fields (list (byte 8 0) (byte 8 8)) :value '(#xc7 #xf8))
3413  (label :field (byte 32 16) :type 'displacement))
3414
3415(define-instruction-format
3416    (xabort 24 :default-printer '(:name :tab imm))
3417  (op :fields (list (byte 8 0) (byte 8 8)) :value '(#xc6 #xf8))
3418  (imm :field (byte 8 16)))
3419
3420(define-instruction xbegin (segment &optional where)
3421  (:printer xbegin ())
3422  (:emitter
3423   (emit-byte segment #xc7)
3424   (emit-byte segment #xf8)
3425   (if where
3426       ;; emit 32-bit, signed relative offset for where
3427       (emit-dword-displacement-backpatch segment where)
3428       ;; nowhere to jump: simply jump to the next instruction
3429       (emit-skip segment 4 0))))
3430
3431(define-instruction xend (segment)
3432  (:printer three-bytes ((op '(#x0f #x01 #xd5))))
3433  (:emitter
3434   (emit-byte segment #x0f)
3435   (emit-byte segment #x01)
3436   (emit-byte segment #xd5)))
3437
3438(define-instruction xabort (segment reason)
3439  (:printer xabort ())
3440  (:emitter
3441   (aver (<= 0 reason #xff))
3442   (emit-byte segment #xc6)
3443   (emit-byte segment #xf8)
3444   (emit-byte segment reason)))
3445
3446(define-instruction xtest (segment)
3447  (:printer three-bytes ((op '(#x0f #x01 #xd6))))
3448  (:emitter
3449   (emit-byte segment #x0f)
3450   (emit-byte segment #x01)
3451   (emit-byte segment #xd6)))
3452
3453(define-instruction xacquire (segment) ;; same prefix byte as repne/repnz
3454  (:emitter
3455   (emit-byte segment #xf2)))
3456
3457(define-instruction xrelease (segment) ;; same prefix byte as rep/repe/repz
3458  (:emitter
3459   (emit-byte segment #xf3)))
3460
3461;;;; Late VM definitions
3462
3463(defun canonicalize-inline-constant (constant &aux (alignedp nil))
3464  (let ((first (car constant)))
3465    (when (eql first :aligned)
3466      (setf alignedp t)
3467      (pop constant)
3468      (setf first (car constant)))
3469    (typecase first
3470      (single-float (setf constant (list :single-float first)))
3471      (double-float (setf constant (list :double-float first)))
3472      .
3473      #+sb-xc-host
3474      ((complex
3475        ;; It's an error (perhaps) on the host to use simd-pack type.
3476        ;; [and btw it's disconcerting that this isn't an ETYPECASE.]
3477        (error "xc-host can't reference complex float")))
3478      #-sb-xc-host
3479      (((complex single-float)
3480        (setf constant (list :complex-single-float first)))
3481       ((complex double-float)
3482        (setf constant (list :complex-double-float first)))
3483       #!+sb-simd-pack
3484       (simd-pack
3485        (setq constant
3486              (list :sse (logior (%simd-pack-low first)
3487                                 (ash (%simd-pack-high first) 64))))))))
3488  (destructuring-bind (type value) constant
3489    (ecase type
3490      ((:byte :word :dword :qword)
3491         (aver (integerp value))
3492         (cons type value))
3493      ((:base-char)
3494         #!+sb-unicode (aver (typep value 'base-char))
3495         (cons :byte (char-code value)))
3496      ((:character)
3497         (aver (characterp value))
3498         (cons :dword (char-code value)))
3499      ((:single-float)
3500         (aver (typep value 'single-float))
3501         (cons (if alignedp :oword :dword)
3502               (ldb (byte 32 0) (single-float-bits value))))
3503      ((:double-float)
3504         (aver (typep value 'double-float))
3505         (cons (if alignedp :oword :qword)
3506               (ldb (byte 64 0) (logior (ash (double-float-high-bits value) 32)
3507                                        (double-float-low-bits value)))))
3508      ((:complex-single-float)
3509         (aver (typep value '(complex single-float)))
3510         (cons (if alignedp :oword :qword)
3511               (ldb (byte 64 0)
3512                    (logior (ash (single-float-bits (imagpart value)) 32)
3513                            (ldb (byte 32 0)
3514                                 (single-float-bits (realpart value)))))))
3515      ((:oword :sse)
3516         (aver (integerp value))
3517         (cons :oword value))
3518      ((:complex-double-float)
3519         (aver (typep value '(complex double-float)))
3520         (cons :oword
3521               (logior (ash (double-float-high-bits (imagpart value)) 96)
3522                       (ash (double-float-low-bits (imagpart value)) 64)
3523                       (ash (ldb (byte 32 0)
3524                                 (double-float-high-bits (realpart value)))
3525                            32)
3526                       (double-float-low-bits (realpart value))))))))
3527
3528(defun inline-constant-value (constant)
3529  (let ((label (gen-label))
3530        (size  (ecase (car constant)
3531                 ((:byte :word :dword :qword) (car constant))
3532                 ((:oword) :qword))))
3533    (values label (make-ea size
3534                           :disp (make-fixup nil :code-object label)))))
3535
3536(defun emit-constant-segment-header (segment constants optimize)
3537  (declare (ignore constants))
3538  (emit-long-nop segment (if optimize 64 16)))
3539
3540(defun size-nbyte (size)
3541  (ecase size
3542    (:byte  1)
3543    (:word  2)
3544    (:dword 4)
3545    (:qword 8)
3546    (:oword 16)))
3547
3548(defun sort-inline-constants (constants)
3549  (stable-sort constants #'> :key (lambda (constant)
3550                                    (size-nbyte (caar constant)))))
3551
3552(defun emit-inline-constant (constant label)
3553  (let ((size (size-nbyte (car constant))))
3554    (emit-alignment (integer-length (1- size)))
3555    (emit-label label)
3556    (let ((val (cdr constant)))
3557      (loop repeat size
3558            do (inst byte (ldb (byte 8 0) val))
3559               (setf val (ash val -8))))))
3560