1;;;; machine-independent disassembler
2
3;;;; This software is part of the SBCL system. See the README file for
4;;;; more information.
5;;;;
6;;;; This software is derived from the CMU CL system, which was
7;;;; written at Carnegie Mellon University and released into the
8;;;; public domain. The software is in the public domain and is
9;;;; provided with absolutely no warranty. See the COPYING and CREDITS
10;;;; files for more information.
11
12(in-package "SB!DISASSEM")
13
14;;; types and defaults
15
16(defconstant label-column-width 7)
17
18(deftype text-width () '(integer 0 1000))
19(deftype alignment () '(integer 0 64))
20(deftype offset () '(signed-byte 24))
21(deftype address () '(unsigned-byte #.sb!vm:n-word-bits))
22(deftype disassem-length () '(unsigned-byte 24))
23(deftype column () '(integer 0 1000))
24
25(defconstant max-filtered-value-index 32)
26(deftype filtered-value-index ()
27  `(integer 0 (,max-filtered-value-index)))
28(deftype filtered-value-vector ()
29  `(simple-array t (,max-filtered-value-index)))
30
31;;;; disassembly parameters
32
33;; With a few tweaks, you can use a running SBCL as a cross-assembler
34;; and disassembler for other supported backends,
35;; if that backend has been converted to use a distinct ASM package.
36(eval-when (:compile-toplevel :load-toplevel :execute)
37  (defparameter sb!assem::*backend-instruction-set-package*
38    (find-package #.(sb-cold::backend-asm-package-name))))
39
40(defvar *disassem-inst-space* nil)
41
42;;; minimum alignment of instructions, in bytes
43(defvar *disassem-inst-alignment-bytes* sb!vm:n-word-bytes)
44(declaim (type alignment *disassem-inst-alignment-bytes*))
45
46;; How many columns of output to allow for the address preceding each line.
47;; If NIL, use the minimum possible width for the disassembly range.
48;; If 0, do not print addresses.
49(defvar *disassem-location-column-width* nil)
50(declaim (type (or null text-width) *disassem-location-column-width*))
51
52;;; the width of the column in which instruction-names are printed. A
53;;; value of zero gives the effect of not aligning the arguments at
54;;; all.
55(defvar *disassem-opcode-column-width* 0)
56(declaim (type text-width *disassem-opcode-column-width*))
57
58;;; the width of the column in which instruction-bytes are printed. A
59;;; value of zero disables the printing of instruction bytes.
60(defvar *disassem-inst-column-width* 16
61  #!+sb-doc
62  "The width of instruction bytes.")
63(declaim (type text-width *disassem-inst-column-width*))
64
65(defvar *disassem-note-column* (+ 45 *disassem-inst-column-width*)
66  #!+sb-doc
67  "The column in which end-of-line comments for notes are started.")
68
69;;;; A DCHUNK contains the bits we look at to decode an
70;;;; instruction.
71;;;; I tried to keep this abstract so that if using integers > the machine
72;;;; word size conses too much, it can be changed to use bit-vectors or
73;;;; something.
74;;;;
75;;;; KLUDGE: It's not clear that using bit-vectors would be any more efficient.
76;;;; Perhaps the abstraction could go away. -- WHN 19991124
77
78#!-sb-fluid
79(declaim (inline dchunk-or dchunk-and dchunk-clear dchunk-not
80                 dchunk-make-mask dchunk-make-field
81                 dchunk-extract
82                 dchunk=
83                 dchunk-count-bits))
84
85;;; For variable-length instruction sets, such as x86, it is better to
86;;; define the dchunk size to be the smallest number of bits necessary
87;;; and sufficient to decode any instruction format, if that quantity
88;;; of bits is small enough to avoid bignum consing.
89;;; Ideally this constant would go in the 'insts' file for the architecture,
90;;; but there's really no easy way to do that at present.
91(defconstant dchunk-bits
92  #!+x86-64 56
93  #!-x86-64 sb!vm:n-word-bits)
94
95(deftype dchunk ()
96  `(unsigned-byte ,dchunk-bits))
97(deftype dchunk-index ()
98  `(integer 0 ,dchunk-bits))
99
100(defconstant dchunk-zero 0)
101(defconstant dchunk-one (ldb (byte dchunk-bits 0) -1))
102
103(defun dchunk-extract (chunk byte-spec)
104  (declare (type dchunk chunk))
105  (the dchunk (ldb byte-spec (the dchunk chunk))))
106
107(defmacro dchunk-copy (x)
108  `(the dchunk ,x))
109
110(defun dchunk-or (to from)
111  (declare (type dchunk to from))
112  (the dchunk (logior to from)))
113(defun dchunk-and (to from)
114  (declare (type dchunk to from))
115  (the dchunk (logand to from)))
116(defun dchunk-clear (to from)
117  (declare (type dchunk to from))
118  (the dchunk (logandc2 to from)))
119(defun dchunk-not (from)
120  (declare (type dchunk from))
121  (the dchunk (logand dchunk-one (lognot from))))
122
123(defmacro dchunk-andf (to from)
124  `(setf ,to (dchunk-and ,to ,from)))
125(defmacro dchunk-orf (to from)
126  `(setf ,to (dchunk-or ,to ,from)))
127(defmacro dchunk-clearf (to from)
128  `(setf ,to (dchunk-clear ,to ,from)))
129
130(defun dchunk-make-mask (pos)
131  (the dchunk (mask-field pos -1)))
132(defun dchunk-make-field (pos value)
133  (the dchunk (dpb value pos 0)))
134
135(defmacro make-dchunk (value)
136  `(the dchunk ,value))
137
138(defun dchunk-corrected-extract (from pos unit-bits byte-order)
139  (declare (type dchunk from))
140  (if (eq byte-order :big-endian)
141      (ldb (byte (byte-size pos)
142                 (+ (byte-position pos) (- dchunk-bits unit-bits)))
143           (the dchunk from))
144      (ldb pos (the dchunk from))))
145
146(defmacro dchunk-insertf (place pos value)
147  `(setf ,place (the dchunk (dpb ,value ,pos (the dchunk,place)))))
148
149(defun dchunk= (x y)
150  (declare (type dchunk x y))
151  (= x y))
152(defmacro dchunk-zerop (x)
153  `(dchunk= ,x dchunk-zero))
154
155(defun dchunk-strict-superset-p (sup sub)
156  (and (zerop (logandc2 sub sup))
157       (not (zerop (logandc2 sup sub)))))
158
159(defun dchunk-count-bits (x)
160  (declare (type dchunk x))
161  (logcount x))
162
163(defstruct (instruction (:conc-name inst-)
164                        (:constructor
165                         make-instruction (name format-name print-name
166                                           length mask id printer labeller
167                                           prefilters control))
168                        (:copier nil))
169  (name nil :type (or symbol string) :read-only t)
170  (format-name nil :type (or symbol string) :read-only t)
171
172  (mask dchunk-zero :type dchunk :read-only t)   ; bits in the inst that are constant
173  (id dchunk-zero :type dchunk :read-only t)     ; value of those constant bits
174
175  (length 0 :type disassem-length :read-only t)  ; in bytes
176
177  (print-name nil :type symbol :read-only t)
178
179  ;; disassembly "functions"
180  (prefilters nil :type list :read-only t)
181  (labeller nil :type (or list vector) :read-only t)
182  (printer nil :type (or null function) :read-only t)
183  (control nil :type (or null function) :read-only t)
184
185  ;; instructions that are the same as this instruction but with more
186  ;; constraints
187  (specializers nil :type list))
188(defmethod print-object ((inst instruction) stream)
189  (print-unreadable-object (inst stream :type t :identity t)
190    (format stream "~A(~A)" (inst-name inst) (inst-format-name inst))))
191
192;;;; an instruction space holds all known machine instructions in a
193;;;; form that can be easily searched
194
195(defstruct (inst-space (:conc-name ispace-)
196                       (:copier nil))
197  (valid-mask dchunk-zero :type dchunk) ; applies to *children*
198  (choices nil :type list))
199(defmethod print-object ((ispace inst-space) stream)
200  (print-unreadable-object (ispace stream :type t :identity t)))
201
202;;; now that we've defined the structure, we can declaim the type of
203;;; the variable:
204(declaim (type (or null inst-space) *disassem-inst-space*))
205
206(defstruct (inst-space-choice (:conc-name ischoice-)
207                              (:copier nil))
208  (common-id dchunk-zero :type dchunk)  ; applies to *parent's* mask
209  (subspace (missing-arg) :type (or inst-space instruction)))
210
211(defstruct (arg (:constructor %make-arg (name))
212                (:copier nil)
213                (:predicate nil))
214  (name nil :type symbol)
215  (fields nil :type list)
216
217  (value nil :type (or list integer))
218  (sign-extend-p nil :type boolean)
219
220  ;; functions to use
221  (printer nil :type (or null function vector))
222  (prefilter nil :type (or null function))
223  (use-label nil :type (or boolean function)))
224
225(defstruct (instruction-format (:conc-name format-)
226                               (:constructor make-inst-format
227                                             (name length default-printer args))
228                               (:copier nil))
229  (name nil)
230  (args nil :type list)
231
232  (length 0 :type disassem-length)               ; in bytes
233
234  (default-printer nil :type list))
235
236;;; A FUNSTATE holds the state of any arguments used in a disassembly
237;;; function. It is a 2-level alist. The outer list maps each ARG to
238;;; a list of styles in which that arg can be rendered.
239;;; Each rendering is named by a keyword (the key to the inner alist),
240;;; and is represented as a list of temp vars and values for them.
241(defun make-funstate (args) (mapcar #'list args))
242
243(defun arg-position (arg funstate)
244  ;;; The THE form is to assert that ARG is found.
245  (the filtered-value-index (position arg funstate :key #'car)))
246
247(defun arg-or-lose (name funstate)
248  (or (car (assoc name funstate :key #'arg-name :test #'eq))
249      (pd-error "unknown argument ~S" name)))
250
251;;; machinery to provide more meaningful error messages during compilation
252(defvar *current-instruction-flavor*)
253(defun pd-error (fmt &rest args)
254  (if (boundp '*current-instruction-flavor*)
255      (error "~{A printer ~D~}: ~?" *current-instruction-flavor* fmt args)
256      (apply #'error fmt args)))
257
258(defun format-or-lose (name)
259  (or (get name 'inst-format)
260      (pd-error "unknown instruction format ~S" name)))
261
262;;; Return a modified copy of ARG that has property values changed
263;;; depending on whether it is being used at compile-time or load-time.
264;;; This is to avoid evaluating #'FOO references at compile-time
265;;; while allowing compile-time manipulation of byte specifiers.
266(defun massage-arg (spec when)
267  (ecase when
268    (:compile
269     ;; At compile-time we get a restricted view of the DEFINE-ARG-TYPE args,
270     ;; just enough to macroexpand :READER definitions. :TYPE and ::SIGN-EXTEND
271     ;; are as specified, but :PREFILTER, :LABELLER, and :PRINTER are not
272     ;; compile-time evaluated.
273     (loop for (indicator val) on (cdr spec) by #'cddr
274           nconc (case indicator
275                   (:sign-extend ; Only a literal T or NIL is allowed
276                    (list indicator (the boolean val)))
277                   (:prefilter
278                    ;; #'ERROR is a placeholder for any compile-time non-nil
279                    ;; value. If nil, it must be literally nil, not 'NIL.
280                    (list indicator (if val #'error nil)))
281                   ((:field :fields :type)
282                    (list indicator val)))))
283    (:eval
284     (loop for (indicator raw-val) on (cdr spec) by #'cddr
285           ;; Use NAMED-LAMBDAs to enhance debuggability,
286           for val = (if (typep raw-val '(cons (eql lambda)))
287                         `(named-lambda ,(format nil "~A.~A" (car spec) indicator)
288                                        ,@(cdr raw-val))
289                         raw-val)
290           nconc (case indicator
291                   (:reader nil) ; drop it
292                   (:prefilter ; Enforce compile-time-determined not-nullness.
293                    (list indicator (if val `(the (not null) ,val) nil)))
294                   (t (list indicator val)))))))
295
296(defmacro define-instruction-format ((format-name length-in-bits
297                                      &key default-printer include)
298                                     &rest arg-specs)
299  #+sb-xc-host (declare (ignore default-printer))
300  #!+sb-doc
301  "DEFINE-INSTRUCTION-FORMAT (Name Length {Format-Key Value}*) Arg-Def*
302  Define an instruction format NAME for the disassembler's use. LENGTH is
303  the length of the format in bits.
304  Possible FORMAT-KEYs:
305
306  :INCLUDE other-format-name
307      Inherit all arguments and properties of the given format. Any
308      arguments defined in the current format definition will either modify
309      the copy of an existing argument (keeping in the same order with
310      respect to when prefilters are called), if it has the same name as
311      one, or be added to the end.
312  :DEFAULT-PRINTER printer-list
313      Use the given PRINTER-LIST as a format to print any instructions of
314      this format when they don't specify something else.
315
316  Each ARG-DEF defines one argument in the format, and is of the form
317    (Arg-Name {Arg-Key Value}*)
318
319  Possible ARG-KEYs (the values are evaluated unless otherwise specified):
320
321  :FIELDS byte-spec-list
322      The argument takes values from these fields in the instruction. If
323      the list is of length one, then the corresponding value is supplied by
324      itself; otherwise it is a list of the values. The list may be NIL.
325  :FIELD byte-spec
326      The same as :FIELDS (list byte-spec).
327
328  :VALUE value
329      If the argument only has one field, this is the value it should have,
330      otherwise it's a list of the values of the individual fields. This can
331      be overridden in an instruction-definition or a format definition
332      including this one by specifying another, or NIL to indicate that it's
333      variable.
334
335  :SIGN-EXTEND boolean
336      If non-NIL, the raw value of this argument is sign-extended,
337      immediately after being extracted from the instruction (before any
338      prefilters are run, for instance). If the argument has multiple
339      fields, they are all sign-extended.
340
341  :TYPE arg-type-name
342      Inherit any properties of the given argument type.
343
344  :PREFILTER function
345      A function which is called (along with all other prefilters, in the
346      order that their arguments appear in the instruction-format) before
347      any printing is done, to filter the raw value. Any uses of READ-SUFFIX
348      must be done inside a prefilter.
349
350  :PRINTER function-string-or-vector
351      A function, string, or vector which is used to print this argument.
352
353  :USE-LABEL
354      If non-NIL, the value of this argument is used as an address, and if
355      that address occurs inside the disassembled code, it is replaced by a
356      label. If this is a function, it is called to filter the value."
357  `(progn
358     (eval-when (:compile-toplevel)
359       (%def-inst-format
360        ',format-name ',include ,length-in-bits nil
361        ,@(mapcar (lambda (arg) `(list ',(car arg) ,@(massage-arg arg :compile)))
362                  arg-specs)))
363     ,@(mapcan
364        (lambda (arg-spec)
365          (awhen (getf (cdr arg-spec) :reader)
366            `((defun ,it (dchunk dstate)
367                (declare (ignorable dchunk dstate))
368                (flet ((local-filtered-value (offset)
369                         (declare (type filtered-value-index offset))
370                         (aref (dstate-filtered-values dstate) offset))
371                       (local-extract (bytespec)
372                         (dchunk-extract dchunk bytespec)))
373                  (declare (ignorable #'local-filtered-value #'local-extract)
374                           (inline local-filtered-value local-extract))
375                  ;; Delay ARG-FORM-VALUE call until after compile-time-too
376                  ;; processing of !%DEF-INSTRUCTION-FORMAT has happened.
377                  (macrolet
378                      ((reader ()
379                         (let* ((format-args
380                                 (format-args (format-or-lose ',format-name)))
381                                (arg (find ',(car arg-spec) format-args
382                                           :key #'arg-name))
383                                (funstate (make-funstate format-args))
384                                (*!temp-var-counter* 0)
385                                (expr (arg-value-form arg funstate :numeric)))
386                           `(let* ,(make-arg-temp-bindings funstate) ,expr))))
387                    (reader)))))))
388        arg-specs)
389     #-sb-xc-host ; Host doesn't need the real definition.
390     (%def-inst-format
391      ',format-name ',include ,length-in-bits ,default-printer
392      ,@(mapcar (lambda (arg) `(list ',(car arg) ,@(massage-arg arg :eval)))
393                arg-specs))))
394
395(defun %def-inst-format (name inherit length printer &rest arg-specs)
396  (let ((args (if inherit (copy-list (format-args (format-or-lose inherit)))))
397        (seen))
398    (dolist (arg-spec arg-specs)
399      (let* ((arg-name (car arg-spec))
400             (properties (cdr arg-spec))
401             (cell (member arg-name args :key #'arg-name)))
402        (aver (not (memq arg-name seen)))
403        (push arg-name seen)
404        (cond ((not cell)
405               (setq args (nconc args (list (apply #'modify-arg (%make-arg arg-name)
406                                                   length properties)))))
407              (properties
408               (rplaca cell (apply #'modify-arg (copy-structure (car cell))
409                                   length properties))))))
410    (setf (get name 'inst-format)
411          (make-inst-format name (bits-to-bytes length) printer args))))
412
413(defun modify-arg (arg format-length
414                   &key   (value nil value-p)
415                          (type nil type-p)
416                          (prefilter nil prefilter-p)
417                          (printer nil printer-p)
418                          (sign-extend nil sign-extend-p)
419                          (use-label nil use-label-p)
420                          (field nil field-p)
421                          (fields nil fields-p))
422  (when field-p
423    (if fields-p
424        (error ":FIELD and :FIELDS are mutually exclusive")
425        (setf fields (list field) fields-p t)))
426  (when type-p
427    (let ((type-arg (or (get type 'arg-type)
428                        (pd-error "unknown argument type: ~S" type))))
429      (setf (arg-printer arg) (arg-printer type-arg))
430      (setf (arg-prefilter arg) (arg-prefilter type-arg))
431      (setf (arg-sign-extend-p arg) (arg-sign-extend-p type-arg))
432      (setf (arg-use-label arg) (arg-use-label type-arg))))
433  (when value-p
434    (setf (arg-value arg) value))
435  (when prefilter-p
436    (setf (arg-prefilter arg) prefilter))
437  (when sign-extend-p
438    (setf (arg-sign-extend-p arg) sign-extend))
439  (when printer-p
440    (setf (arg-printer arg) printer))
441  (when use-label-p
442    (setf (arg-use-label arg) use-label))
443  (when fields-p
444    (setf (arg-fields arg)
445          (mapcar (lambda (bytespec)
446                    (when (> (+ (byte-position bytespec) (byte-size bytespec))
447                             format-length)
448                      (error "~@<in arg ~S: ~3I~:_~
449                                   The field ~S doesn't fit in an ~
450                                   instruction-format ~W bits wide.~:>"
451                             (arg-name arg) bytespec format-length))
452                    (correct-dchunk-bytespec-for-endianness
453                     bytespec format-length sb!c:*backend-byte-order*))
454                  fields)))
455  arg)
456
457(defun arg-value-form (arg funstate
458                       &optional
459                       (rendering :final)
460                       (allow-multiple-p (neq rendering :numeric)))
461  (let ((forms (gen-arg-forms arg rendering funstate)))
462    (when (and (not allow-multiple-p)
463               (listp forms)
464               (/= (length forms) 1))
465      (pd-error "~S must not have multiple values." arg))
466    (maybe-listify forms)))
467
468(defun correct-dchunk-bytespec-for-endianness (bs unit-bits byte-order)
469  (if (eq byte-order :big-endian)
470      (byte (byte-size bs) (+ (byte-position bs) (- dchunk-bits unit-bits)))
471      bs))
472
473(defun make-arg-temp-bindings (funstate)
474  (let ((bindings nil))
475    ;; Prefilters have to be called in the correct order, so reverse FUNSTATE
476    ;; because we're using PUSH in the inner loop.
477    (dolist (arg-cell (reverse funstate) bindings)
478      ;; These sublists are "backwards", so PUSH ends up being correct.
479      (dolist (rendering (cdr arg-cell))
480        (let* ((binding (cdr rendering))
481               (vars (car binding))
482               (vals (cdr binding)))
483          (if (listp vars)
484              (mapc (lambda (var val) (push `(,var ,val) bindings)) vars vals)
485              (push `(,vars ,vals) bindings)))))))
486
487;;; Return the form(s) that should be evaluated to render ARG in the chosen
488;;; RENDERING style, which is one of :RAW, :SIGN-EXTENDED,
489;;; :FILTERED, :NUMERIC, and :FINAL. Each rendering depends on the preceding
490;;; one, so asking for :FINAL will implicitly compute all renderings.
491(defvar *!temp-var-counter*)
492(defun gen-arg-forms (arg rendering funstate)
493  (labels ((tempvars (n)
494             (if (plusp n)
495                 (cons (package-symbolicate
496                        (load-time-value (find-package "SB!DISASSEM"))
497                        ".T" (write-to-string (incf *!temp-var-counter*)))
498                       (tempvars (1- n))))))
499    (let* ((arg-cell (assq arg funstate))
500           (rendering-temps (cdr (assq rendering (cdr arg-cell))))
501           (vars (car rendering-temps))
502           (forms (cdr rendering-temps)))
503      (unless forms
504        (multiple-value-bind (new-forms single-value-p)
505            (%gen-arg-forms arg rendering funstate)
506          (setq forms new-forms
507                vars (cond ((or single-value-p (atom forms))
508                            (if (symbolp forms) vars (car (tempvars 1))))
509                           ((every #'symbolp forms)
510                            ;; just use the same as the forms
511                            nil)
512                           (t
513                            (tempvars (length forms)))))
514          (push (list* rendering vars forms) (cdr arg-cell))))
515      (or vars forms))))
516
517(defun maybe-listify (forms)
518  (cond ((atom forms)
519         forms)
520        ((/= (length forms) 1)
521         `(list ,@forms))
522        (t
523         (car forms))))
524
525;;; DEFINE-ARG-TYPE Name {Key Value}*
526;;;
527;;; Define a disassembler argument type NAME (which can then be referenced in
528;;; another argument definition using the :TYPE argument). &KEY args are:
529;;;
530;;;  :SIGN-EXTEND boolean
531;;;     If non-NIL, the raw value of this argument is sign-extended.
532;;;
533;;;  :TYPE arg-type-name
534;;;     Inherit any properties of given arg-type.
535;;;
536;;; :PREFILTER function
537;;;     A function which is called (along with all other prefilters,
538;;;     in the order that their arguments appear in the instruction-
539;;;     format) before any printing is done, to filter the raw value.
540;;;     Any uses of READ-SUFFIX must be done inside a prefilter.
541;;;
542;;; :PRINTER function-string-or-vector
543;;;     A function, string, or vector which is used to print an argument of
544;;;     this type.
545;;;
546;;; :USE-LABEL
547;;;     If non-NIL, the value of an argument of this type is used as
548;;;     an address, and if that address occurs inside the disassembled
549;;;     code, it is replaced by a label. If this is a function, it is
550;;;     called to filter the value.
551(defmacro define-arg-type (name &rest args
552                           &key ((:type inherit))
553                                sign-extend prefilter printer use-label)
554  (declare (ignore sign-extend prefilter printer use-label))
555  ;; FIXME: this should be an *unevaluated* macro arg (named :INHERIT)
556  (aver (typep inherit '(or null (cons (eql quote) (cons symbol null)))))
557  (let ((pair (cons name (loop for (ind val) on args by #'cddr
558                               unless (eq ind :type)
559                               nconc (list ind val)))))
560    `(progn
561       (eval-when (:compile-toplevel)
562         (%def-arg-type ',name ,inherit ,@(massage-arg pair :compile)))
563       #-sb-xc-host ; Host doesn't need the real definition.
564       (%def-arg-type ',name ,inherit ,@(massage-arg pair :eval)))))
565
566(defun %def-arg-type (name inherit &rest properties)
567  (setf (get name 'arg-type)
568        (apply 'modify-arg (%make-arg name) nil
569               (nconc (when inherit (list :type inherit)) properties))))
570
571(defun %gen-arg-forms (arg rendering funstate)
572  (declare (type arg arg) (type list funstate))
573  (ecase rendering
574    (:raw ; just extract the bits
575     (mapcar (lambda (bytespec)
576               `(the (unsigned-byte ,(byte-size bytespec))
577                     (local-extract ',bytespec)))
578             (arg-fields arg)))
579    (:sign-extended ; sign-extend, or not
580     (let ((raw-forms (gen-arg-forms arg :raw funstate)))
581       (if (and (arg-sign-extend-p arg) (listp raw-forms))
582           (mapcar (lambda (form field)
583                     `(the (signed-byte ,(byte-size field))
584                           (sign-extend ,form ,(byte-size field))))
585                   raw-forms
586                   (arg-fields arg))
587           raw-forms)))
588    (:filtered ; extract from the prefiltered value vector
589     (let ((pf (arg-prefilter arg)))
590       (if pf
591           (values `(local-filtered-value ,(arg-position arg funstate)) t)
592           (gen-arg-forms arg :sign-extended funstate))))
593    (:numeric ; pass the filtered value to the label adjuster, or not
594     (let ((filtered-forms (gen-arg-forms arg :filtered funstate))
595           (use-label (arg-use-label arg)))
596       ;; use-label = T means that the prefiltered value is already an address,
597       ;; otherwise non-nil means a function to call, and NIL means not a label.
598       ;; So only the middle case needs to call ADJUST-LABEL.
599       (if (and use-label (neq use-label t))
600           `((adjust-label ,(maybe-listify filtered-forms) ,use-label))
601           filtered-forms)))
602    (:final ; if arg is not a label, return numeric value, otherwise a string
603     (let ((numeric-forms (gen-arg-forms arg :numeric funstate)))
604       (if (arg-use-label arg)
605           `((lookup-label ,(maybe-listify numeric-forms)))
606           numeric-forms)))))
607
608(defun find-printer-fun (printer-source args cache *current-instruction-flavor*)
609  (let* ((source (preprocess-printer printer-source args))
610         (funstate (make-funstate args))
611         (forms (let ((*!temp-var-counter* 0))
612                  (compile-printer-list source funstate)))
613         (bindings (make-arg-temp-bindings funstate))
614         (guts `(let* ,bindings ,@forms))
615         (sub-table (assq :printer cache)))
616    (or (cdr (assoc guts (cdr sub-table) :test #'equal))
617        (let ((template
618     '(lambda (chunk inst stream dstate
619               &aux (chunk (truly-the dchunk chunk))
620                    (inst (truly-the instruction inst))
621                    (stream (truly-the stream stream))
622                    (dstate (truly-the disassem-state dstate)))
623       (macrolet ((local-format-arg (arg fmt)
624                    `(funcall (formatter ,fmt) stream ,arg)))
625         (flet ((local-tab-to-arg-column ()
626                  (tab (dstate-argument-column dstate) stream))
627                (local-print-name ()
628                  (princ (inst-print-name inst) stream))
629                (local-write-char (ch)
630                  (write-char ch stream))
631                (local-princ (thing)
632                  (princ thing stream))
633                (local-princ16 (thing)
634                  (princ16 thing stream))
635                (local-call-arg-printer (arg printer)
636                  (funcall printer arg stream dstate))
637                (local-call-global-printer (fun)
638                  (funcall fun chunk inst stream dstate))
639                (local-filtered-value (offset)
640                  (declare (type filtered-value-index offset))
641                  (aref (dstate-filtered-values dstate) offset))
642                (local-extract (bytespec)
643                  (dchunk-extract chunk bytespec))
644                (lookup-label (lab)
645                  (or (gethash lab (dstate-label-hash dstate))
646                      lab))
647                (adjust-label (val adjust-fun)
648                  (funcall adjust-fun val dstate)))
649           (declare (ignorable #'local-tab-to-arg-column
650                               #'local-print-name
651                               #'local-princ #'local-princ16
652                               #'local-write-char
653                               #'local-call-arg-printer
654                               #'local-call-global-printer
655                               #'local-extract
656                               #'local-filtered-value
657                               #'lookup-label #'adjust-label)
658                    (inline local-tab-to-arg-column
659                            local-princ local-princ16
660                            local-call-arg-printer local-call-global-printer
661                            local-filtered-value local-extract
662                            lookup-label adjust-label))
663           :body)))))
664          (cdar (push (cons guts (compile nil (subst guts :body template)))
665                      (cdr sub-table)))))))
666
667(defun preprocess-test (subj form args)
668  (multiple-value-bind (subj test)
669      (if (and (consp form) (symbolp (car form)) (not (keywordp (car form))))
670          (values (car form) (cdr form))
671          (values subj form))
672    (let ((key (if (consp test) (car test) test))
673          (body (if (consp test) (cdr test) nil)))
674      (case key
675        (:constant
676         (if (null body)
677             ;; If no supplied constant values, just any constant is ok,
678             ;; just see whether there's some constant value in the arg.
679             (not
680              (null
681               (arg-value
682                (or (find subj args :key #'arg-name)
683                    (pd-error "unknown argument ~S" subj)))))
684             ;; Otherwise, defer to run-time.
685             form))
686        ((:or :and :not)
687         (sharing-cons
688          form
689          subj
690          (sharing-cons
691           test
692           key
693           (sharing-mapcar
694            (lambda (sub-test)
695              (preprocess-test subj sub-test args))
696            body))))
697        (t form)))))
698
699(defun preprocess-conditionals (printer args)
700  (if (atom printer)
701      printer
702      (case (car printer)
703        (:unless
704         (preprocess-conditionals
705          `(:cond ((:not ,(nth 1 printer)) ,@(nthcdr 2 printer)))
706          args))
707        (:when
708         (preprocess-conditionals `(:cond (,(cdr printer))) args))
709        (:if
710         (preprocess-conditionals
711          `(:cond (,(nth 1 printer) ,(nth 2 printer))
712                  (t ,(nth 3 printer)))
713          args))
714        (:cond
715         (sharing-cons
716          printer
717          :cond
718          (sharing-mapcar
719           (lambda (clause)
720             (let ((filtered-body
721                    (sharing-mapcar
722                     (lambda (sub-printer)
723                       (preprocess-conditionals sub-printer args))
724                     (cdr clause))))
725               (sharing-cons
726                clause
727                (preprocess-test (find-first-field-name filtered-body)
728                                 (car clause)
729                                 args)
730                filtered-body)))
731           (cdr printer))))
732        (quote printer)
733        (t
734         (sharing-mapcar
735          (lambda (sub-printer)
736            (preprocess-conditionals sub-printer args))
737          printer)))))
738
739;;; Return a version of the disassembly-template PRINTER with
740;;; compile-time tests (e.g. :constant without a value), and any
741;;; :CHOOSE operators resolved properly for the args ARGS.
742;;;
743;;; (:CHOOSE Sub*) simply returns the first Sub in which every field
744;;; reference refers to a valid arg.
745(defun preprocess-printer (printer args)
746  (preprocess-conditionals (preprocess-chooses printer args) args))
747
748;;; Return the first non-keyword symbol in a depth-first search of TREE.
749(defun find-first-field-name (tree)
750  (cond ((null tree)
751         nil)
752        ((and (symbolp tree) (not (keywordp tree)))
753         tree)
754        ((atom tree)
755         nil)
756        ((eq (car tree) 'quote)
757         nil)
758        (t
759         (or (find-first-field-name (car tree))
760             (find-first-field-name (cdr tree))))))
761
762(defun preprocess-chooses (printer args)
763  (cond ((atom printer)
764         printer)
765        ((eq (car printer) :choose)
766         (pick-printer-choice (cdr printer) args))
767        (t
768         (sharing-mapcar (lambda (sub) (preprocess-chooses sub args))
769                         printer))))
770
771;;;; some simple functions that help avoid consing when we're just
772;;;; recursively filtering things that usually don't change
773
774(defun sharing-cons (old-cons car cdr)
775  #!+sb-doc
776  "If CAR is eq to the car of OLD-CONS and CDR is eq to the CDR, return
777  OLD-CONS, otherwise return (cons CAR CDR)."
778  (if (and (eq car (car old-cons)) (eq cdr (cdr old-cons)))
779      old-cons
780      (cons car cdr)))
781
782(defun sharing-mapcar (fun list)
783  (declare (type function fun))
784  #!+sb-doc
785  "A simple (one list arg) mapcar that avoids consing up a new list
786  as long as the results of calling FUN on the elements of LIST are
787  eq to the original."
788  (and list
789       (sharing-cons list
790                     (funcall fun (car list))
791                     (sharing-mapcar fun (cdr list)))))
792
793(defun all-arg-refs-relevant-p (printer args)
794  (cond ((or (null printer) (keywordp printer) (eq printer t))
795         t)
796        ((symbolp printer)
797         (find printer args :key #'arg-name))
798        ((listp printer)
799         (every (lambda (x) (all-arg-refs-relevant-p x args))
800                printer))
801        (t t)))
802
803(defun pick-printer-choice (choices args)
804  (dolist (choice choices
805           (pd-error "no suitable choice found in ~S" choices))
806    (when (all-arg-refs-relevant-p choice args)
807      (return choice))))
808
809(defun compile-printer-list (sources funstate)
810  (when sources
811    (cons (compile-printer-body (car sources) funstate)
812          (compile-printer-list (cdr sources) funstate))))
813
814(defun compile-printer-body (source funstate)
815  (cond ((null source)
816         nil)
817        ((eq source :name)
818         `(local-print-name))
819        ((eq source :tab)
820         `(local-tab-to-arg-column))
821        ((keywordp source)
822         (pd-error "unknown printer element: ~S" source))
823        ((symbolp source)
824         (compile-print source funstate))
825        ((atom source)
826         `(local-princ ',source))
827        ((eq (car source) :using)
828         (unless (or (stringp (cadr source))
829                     (and (listp (cadr source))
830                          (eq (caadr source) 'function)))
831           (pd-error "The first arg to :USING must be a string or #'function."))
832         ;; For (:using #'F) to be stuffed in properly, the printer as expressed
833         ;; in its DSL would have to compile-time expand into a thing that
834         ;; reconstructs it such that #'F forms don't appear inside quoted list
835         ;; structure. Lacking the ability to do that, we treat #'F as a bit of
836         ;; syntax to be evaluated manually.
837         (compile-print (caddr source) funstate
838                        (let ((f (cadr source)))
839                          (if (typep f '(cons (eql function) (cons symbol null)))
840                              (symbol-function (second f))
841                              f))))
842        ((eq (car source) :plus-integer)
843         ;; prints the given field proceed with a + or a -
844         (let ((form
845                (arg-value-form (arg-or-lose (cadr source) funstate)
846                                funstate
847                                :numeric)))
848           `(progn
849              (when (>= ,form 0)
850                (local-write-char #\+))
851              (local-princ ,form))))
852        ((eq (car source) 'quote)
853         `(local-princ ,source))
854        ((eq (car source) 'function)
855         `(local-call-global-printer ,source))
856        ((eq (car source) :cond)
857         `(cond ,@(mapcar (lambda (clause)
858                            `(,(compile-test (find-first-field-name
859                                              (cdr clause))
860                                             (car clause)
861                                             funstate)
862                              ,@(compile-printer-list (cdr clause)
863                                                      funstate)))
864                          (cdr source))))
865        ;; :IF, :UNLESS, and :WHEN are replaced by :COND during preprocessing
866        (t
867         `(progn ,@(compile-printer-list source funstate)))))
868
869(defun compile-print (arg-name funstate &optional printer)
870  (let* ((arg (arg-or-lose arg-name funstate))
871         (printer (or printer (arg-printer arg))))
872    (etypecase printer
873      (string
874       `(local-format-arg ,(arg-value-form arg funstate) ,printer))
875      (vector
876       `(local-princ (aref ,printer ,(arg-value-form arg funstate :numeric))))
877      ((or function (cons (eql function)))
878       `(local-call-arg-printer ,(arg-value-form arg funstate) ,printer))
879      (boolean
880       `(,(if (arg-use-label arg) 'local-princ16 'local-princ)
881         ,(arg-value-form arg funstate))))))
882
883(defun compare-fields-form (val-form-1 val-form-2)
884  (flet ((listify-fields (fields)
885           (cond ((symbolp fields) fields)
886                 ((every #'constantp fields) `',fields)
887                 (t `(list ,@fields)))))
888    (cond ((or (symbolp val-form-1) (symbolp val-form-2))
889           `(equal ,(listify-fields val-form-1)
890                   ,(listify-fields val-form-2)))
891          (t
892           `(and ,@(mapcar (lambda (v1 v2) `(= ,v1 ,v2))
893                           val-form-1 val-form-2))))))
894
895(defun compile-test (subj test funstate)
896  (when (and (consp test) (symbolp (car test)) (not (keywordp (car test))))
897    (setf subj (car test)
898          test (cdr test)))
899  (let ((key (if (consp test) (car test) test))
900        (body (if (consp test) (cdr test) nil)))
901    (cond ((null key)
902           nil)
903          ((eq key t)
904           t)
905          ((eq key :constant)
906           (let* ((arg (arg-or-lose subj funstate))
907                  (fields (arg-fields arg))
908                  (consts body))
909             (when (not (= (length fields) (length consts)))
910               (pd-error "The number of constants doesn't match number of ~
911                          fields in: (~S :constant~{ ~S~})"
912                         subj body))
913             (compare-fields-form (gen-arg-forms arg :numeric funstate)
914                                  consts)))
915          ((eq key :positive)
916           `(> ,(arg-value-form (arg-or-lose subj funstate) funstate :numeric)
917               0))
918          ((eq key :negative)
919           `(< ,(arg-value-form (arg-or-lose subj funstate) funstate :numeric)
920               0))
921          ((eq key :same-as)
922           (let ((arg1 (arg-or-lose subj funstate))
923                 (arg2 (arg-or-lose (car body) funstate)))
924             (unless (and (= (length (arg-fields arg1))
925                             (length (arg-fields arg2)))
926                          (every (lambda (bs1 bs2)
927                                   (= (byte-size bs1) (byte-size bs2)))
928                                 (arg-fields arg1)
929                                 (arg-fields arg2)))
930               (pd-error "can't compare differently sized fields: ~
931                          (~S :same-as ~S)" subj (car body)))
932             (compare-fields-form (gen-arg-forms arg1 :numeric funstate)
933                                  (gen-arg-forms arg2 :numeric funstate))))
934          ((eq key :or)
935           `(or ,@(mapcar (lambda (sub) (compile-test subj sub funstate))
936                          body)))
937          ((eq key :and)
938           `(and ,@(mapcar (lambda (sub) (compile-test subj sub funstate))
939                           body)))
940          ((eq key :not)
941           `(not ,(compile-test subj (car body) funstate)))
942          ((and (consp key) (null body))
943           (compile-test subj key funstate))
944          (t
945           (pd-error "bogus test-form: ~S" test)))))
946
947(defun compute-mask-id (args)
948  (let ((mask dchunk-zero)
949        (id dchunk-zero))
950    (dolist (arg args (values mask id))
951      (let ((av (arg-value arg)))
952        (when av
953          (do ((fields (arg-fields arg) (cdr fields))
954               (values (if (atom av) (list av) av) (cdr values)))
955              ((null fields))
956            (let ((field-mask (dchunk-make-mask (car fields))))
957              (when (/= (dchunk-and mask field-mask) dchunk-zero)
958                (pd-error "The field ~S in arg ~S overlaps some other field."
959                          (car fields)
960                          (arg-name arg)))
961              (dchunk-insertf id (car fields) (car values))
962              (dchunk-orf mask field-mask))))))))
963
964#!-sb-fluid (declaim (inline bytes-to-bits))
965(declaim (maybe-inline sign-extend aligned-p align tab tab0))
966
967(defun bytes-to-bits (bytes)
968  (declare (type disassem-length bytes))
969  (* bytes sb!vm:n-byte-bits))
970
971(defun bits-to-bytes (bits)
972  (declare (type disassem-length bits))
973  (multiple-value-bind (bytes rbits)
974      (truncate bits sb!vm:n-byte-bits)
975    (when (not (zerop rbits))
976      (error "~W bits is not a byte-multiple." bits))
977    bytes))
978
979(defun sign-extend (int size)
980  (declare (type integer int)
981           (type (integer 0 128) size))
982  (if (logbitp (1- size) int)
983      (dpb int (byte size 0) -1)
984      int))
985
986;;; Is ADDRESS aligned on a SIZE byte boundary?
987(defun aligned-p (address size)
988  (declare (type address address)
989           (type alignment size))
990  (zerop (logand (1- size) address)))
991
992;;; Return ADDRESS aligned *upward* to a SIZE byte boundary.
993(defun align (address size)
994  (declare (type address address)
995           (type alignment size))
996  (logandc1 (1- size) (+ (1- size) address)))
997
998(defun tab (column stream)
999  (funcall (formatter "~V,1t") stream column)
1000  nil)
1001(defun tab0 (column stream)
1002  (funcall (formatter "~V,0t") stream column)
1003  nil)
1004
1005(defun princ16 (value stream)
1006  (write value :stream stream :radix t :base 16 :escape nil))
1007
1008(defstruct (storage-info (:copier nil))
1009  (groups nil :type list)               ; alist of (name . location-group)
1010  (debug-vars #() :type vector))
1011
1012(defstruct (segment (:conc-name seg-)
1013                    (:constructor %make-segment)
1014                    (:copier nil))
1015  (sap-maker (missing-arg)
1016             :type (function () system-area-pointer))
1017  ;; Length in bytes of the range of memory covered by this segment.
1018  (length 0 :type disassem-length)
1019  (virtual-location 0 :type address)
1020  (storage-info nil :type (or null storage-info))
1021  ;; KLUDGE: CODE-COMPONENT is not a type the host understands
1022  #-sb-xc-host (code nil :type (or null code-component))
1023  (unboxed-data-range nil :type (or null (cons fixnum fixnum)))
1024  (hooks nil :type list))
1025
1026;;; All state during disassembly. We store some seemingly redundant
1027;;; information so that we can allow garbage collect during disassembly and
1028;;; not get tripped up by a code block being moved...
1029(defstruct (disassem-state (:conc-name dstate-)
1030                           (:constructor %make-dstate)
1031                           (:copier nil))
1032  ;; offset of current pos in segment
1033  (cur-offs 0 :type offset)
1034  ;; offset of next position
1035  (next-offs 0 :type offset)
1036  ;; a sap pointing to our segment
1037  (segment-sap nil :type (or null system-area-pointer))
1038  ;; the current segment
1039  (segment nil :type (or null segment))
1040  ;; to avoid buffer overrun at segment end, we might need to copy bytes
1041  ;; here first because sap-ref-dchunk reads a fixed length.
1042  (scratch-buf (make-array 8 :element-type '(unsigned-byte 8)))
1043  ;; what to align to in most cases
1044  (alignment sb!vm:n-word-bytes :type alignment)
1045  (byte-order :little-endian
1046              :type (member :big-endian :little-endian))
1047  ;; for user code to hang stuff off of
1048  (properties nil :type list)
1049  ;; for user code to hang stuff off of, cleared each time after a
1050  ;; non-prefix instruction is processed
1051  (inst-properties nil :type (or fixnum list))
1052  (filtered-values (make-array max-filtered-value-index)
1053                   :type filtered-value-vector)
1054  ;; to avoid consing decoded values, a prefilter can keep a chain
1055  ;; of objects in these slots. The objects returned here
1056  ;; are reusable for the next instruction.
1057  (filtered-arg-pool-in-use)
1058  (filtered-arg-pool-free)
1059  ;; used for prettifying printing
1060  (addr-print-len nil :type (or null (integer 0 20)))
1061  (argument-column 0 :type column)
1062  ;; to make output look nicer
1063  (output-state :beginning
1064                :type (member :beginning
1065                              :block-boundary
1066                              nil))
1067
1068  ;; alist of (address . label-number)
1069  (labels nil :type list)
1070  ;; same as LABELS slot data, but in a different form
1071  (label-hash (make-hash-table) :type hash-table)
1072  ;; list of function
1073  (fun-hooks nil :type list)
1074
1075  ;; alist of (address . label-number), popped as it's used
1076  (cur-labels nil :type list)
1077  ;; OFFS-HOOKs, popped as they're used
1078  (cur-offs-hooks nil :type list)
1079
1080  ;; for the current location
1081  (notes nil :type list)
1082
1083  ;; currently active source variables
1084  (current-valid-locations nil :type (or null (vector bit))))
1085(defmethod print-object ((dstate disassem-state) stream)
1086  (print-unreadable-object (dstate stream :type t)
1087    (format stream
1088            "+~W~@[ in ~S~]"
1089            (dstate-cur-offs dstate)
1090            (dstate-segment dstate))))
1091
1092;;; Return the absolute address of the current instruction in DSTATE.
1093(defun dstate-cur-addr (dstate)
1094  (the address (+ (seg-virtual-location (dstate-segment dstate))
1095                  (dstate-cur-offs dstate))))
1096
1097;;; Return the absolute address of the next instruction in DSTATE.
1098(defun dstate-next-addr (dstate)
1099  (the address (+ (seg-virtual-location (dstate-segment dstate))
1100                  (dstate-next-offs dstate))))
1101
1102;;; Get the value of the property called NAME in DSTATE. Also SETF'able.
1103;;;
1104;;; KLUDGE: The associated run-time machinery for this is in
1105;;; target-disassem.lisp (much later). This is here just to make sure
1106;;; it's defined before it's used. -- WHN ca. 19990701
1107(defmacro dstate-get-prop (dstate name)
1108  `(getf (dstate-properties ,dstate) ,name))
1109
1110;;; Put PROPERTY into the set of instruction properties in DSTATE.
1111;;; PROPERTY can be a fixnum or symbol, but any given backend
1112;;; must exclusively use one or the other property representation.
1113(defun dstate-put-inst-prop (dstate property)
1114  (if (fixnump property)
1115      (setf (dstate-inst-properties dstate)
1116            (logior (or (dstate-inst-properties dstate) 0) property))
1117      (push property (dstate-inst-properties dstate))))
1118
1119;;; Return non-NIL if PROPERTY is in the set of instruction properties in
1120;;; DSTATE. As with -PUT-INST-PROP, we can have a bitmask or a plist.
1121(defun dstate-get-inst-prop (dstate property)
1122  (if (fixnump property)
1123      (logtest (or (dstate-inst-properties dstate) 0) property)
1124      (memq property (dstate-inst-properties dstate))))
1125
1126(declaim (ftype function read-suffix))
1127(defun read-signed-suffix (length dstate)
1128  (declare (type (member 8 16 32 64) length)
1129           (type disassem-state dstate)
1130           (optimize (speed 3) (safety 0)))
1131  (sign-extend (read-suffix length dstate) length))
1132