1; Part of Scheme 48 1.9. See file COPYING for notices and license. 2 3; Authors: Martin Gasbichler, Mike Sperber 4 5;; Generic byte code parser 6 7(define-record-type attribution :attribution 8 (make-attribution init-template template-literal 9 opcode-table make-label at-label) 10 attribution? 11 (init-template attribution-init-template) 12 (template-literal attribution-template-literal) 13 (opcode-table attribution-opcode-table) 14 (make-label attribution-make-label) 15 (at-label attribution-at-label)) 16 17(define (opcode-table-ref table i) 18 (vector-ref table i)) 19 20(define (opcode-table-set! table i new) 21 (vector-set! table i new)) 22 23(define (make-opcode-table default) 24 (make-vector op-count default)) 25 26 27;; Example attribution 28(define (disass) 29 (define (disass-init-template state template p-args push-template? push-env? push-closure?) 30 (cons (list 0 'protocol p-args push-template? push-env? push-closure?) 31 state)) 32 33 (define instruction-set-table 34 (make-opcode-table 35 (lambda (opcode template state pc len . args) 36 (cons `(,pc ,(enumerand->name opcode op) ,@(map cdr args)) state)))) 37 38 (define (attribute-literal literal i state) 39 state) 40 41 (define (make-label target-pc) 42 target-pc) 43 44 (define (at-label label state) 45 (cons `(,label :) state)) 46 47 (make-attribution disass-init-template attribute-literal 48 instruction-set-table make-label at-label)) 49 50(define (parse-template x state attribution) 51 (let* ((tem (coerce-to-template x)) 52 (template-len (template-length tem))) 53 (let lp ((i 1) (state state)) 54 (if (= i template-len) 55 (parse-template-code tem (template-code tem) state attribution) 56 (let ((literal (template-ref tem i))) 57 (if (template? literal) 58 (lp (+ i 1) (parse-template literal state attribution)) 59 (lp (+ i 1) ((attribution-template-literal attribution) literal i state)))))))) 60 61(define (byte-code? x) 62 (let ((code (template-code (coerce-to-template x)))) 63 (define (byte-code-protocol? protocol) 64 (or (<= protocol maximum-stack-args) 65 (= protocol two-byte-nargs-protocol) 66 (= protocol two-byte-nargs+list-protocol) 67 (= protocol ignore-values-protocol) 68 (= protocol call-with-values-protocol) 69 (= protocol args+nargs-protocol) 70 (= protocol nary-dispatch-protocol) 71 (and (= protocol big-stack-protocol) 72 (byte-code-protocol? 73 (code-vector-ref code (- (code-vector-length code) 3)))))) 74 (byte-code-protocol? (code-vector-ref code 1)))) 75 76 77(define (parse-template-code tem code state attribution) 78 (with-template 79 tem code state attribution 80 (lambda (pc length state) 81 (let loop ((pc pc) 82 (state state)) 83 (if (< pc length) 84 (receive (size state) 85 (parse-instruction tem code pc state attribution) 86 (loop (+ pc size) state)) 87 state))))) 88 89(define (with-template tem code state attribution fun) 90 (let ((length (template-code-length code))) 91 (let-fluid 92 *bc-make-labels* '() 93 (lambda () 94 (for-each 95 (lambda (pc) (pc->label pc attribution)) 96 (debug-data-jump-back-dests (template-debug-data tem))) 97 (receive (size protocol-arguments) 98 (parse-protocol code 1 attribution) 99 (receive (push-template? push-env? push-closure?) 100 (case (code-vector-ref code (+ size 1)) 101 ((#b000) (values #f #f #f)) 102 ((#b001) (values #t #f #f)) 103 ((#b010) (values #f #t #f)) 104 ((#b011) (values #t #t #f)) 105 ((#b100) (values #f #f #t)) 106 ((#b110) (values #f #t #t)) 107 ((#b101) (values #t #f #t)) 108 ((#b111) (values #t #t #t)) 109 (else (assertion-violation 'with-template "invalid init-template spec" 110 (code-vector-ref code (+ size 1))))) 111 (fun (+ size 2) 112 length 113 ((attribution-init-template attribution) 114 state tem protocol-arguments push-template? push-env? push-closure?)))))))) 115 116 117(define (parse-instruction template code pc state attribution) 118 (let* ((opcode (code-vector-ref code pc)) 119 (len.rev-args (cond ((= opcode (enum op computed-goto)) ; unused? 120 (assertion-violation 'parse-instruction 121 "computed-goto in parse-bytecode")) 122 (else 123 (parse-opcode-args opcode 124 pc 125 code 126 template 127 attribution)))) 128 (total-len (+ 1 (car len.rev-args)))) ; 1 for the opcode 129 (values total-len 130 (really-parse-instruction pc total-len opcode template state 131 (reverse (cdr len.rev-args)) attribution)))) 132 133(define (really-parse-instruction pc len opcode template state args attribution) 134 (let ((new-state (if (label-at-pc? pc) 135 ((attribution-at-label attribution) 136 (pc->label pc attribution) 137 state) 138 state))) 139 (let ((opcode-attribution 140 (opcode-table-ref (attribution-opcode-table attribution) opcode))) 141 (if opcode-attribution 142 (apply opcode-attribution opcode template new-state pc len args) 143 (assertion-violation 'parse-instruction "cannot attribute " 144 (enumerand->name opcode op) args))))) 145 146;;-------------------- 147;; labels 148 149(define *bc-make-labels* (make-fluid '())) 150 151(define (add-pc! pc attribution) 152 (set-fluid! *bc-make-labels* 153 (cons (cons pc ((attribution-make-label attribution) pc)) 154 (fluid *bc-make-labels*)))) 155 156(define (pc->label pc attribution) 157 (let ((maybe-pc.label (assq pc (fluid *bc-make-labels*)))) 158 (if maybe-pc.label 159 (cdr maybe-pc.label) 160 (begin 161 (add-pc! pc attribution) 162 (pc->label pc attribution))))) 163 164(define (label-at-pc? pc) 165 (if (assq pc (fluid *bc-make-labels*)) #t #f)) 166 167; (enum op make-[big-]flat-env) 168; number of vars 169; number of closures 170; [offset of template in frame 171; offsets of templates in template] 172; number of variables in frame (size) 173; offsets of vars in frame 174; [offset of env in frame 175; number of vars in env 176; offsets of vars in level]* 177 178(define-record-type env-data :env-data 179 (make-env-data total-count frame-offsets maybe-template-index closure-offsets 180 env-offsets) 181 env-data? 182 (total-count env-data-total-count) 183 (frame-offsets env-data-frame-offsets) 184 (maybe-template-index env-data-maybe-template-index) 185 (closure-offsets env-data-closure-offsets) 186 (env-offsets env-data-env-offsets)) 187 188(define (parse-flat-env-args pc code size fetch) 189 (let ((start-pc pc) 190 (total-count (fetch code pc)) 191 (closure-count (fetch code (+ pc size)))) 192 (receive (template-index closure-offsets) 193 (if (< 0 closure-count) 194 (values (fetch code (+ pc size size)) 195 (get-offsets code (+ pc size size size) 196 size fetch closure-count)) 197 (values #f '())) 198 (let* ((pc (if (< 0 closure-count) 199 (+ pc 200 (* 2 size) ; counts 201 size ; template offset 202 (* closure-count size)) ; subtemplates 203 (+ pc (* 2 size)))) ; counts 204 (frame-count (fetch code pc)) 205 (pc (+ pc size))) 206 (let ((frame-offsets (get-offsets code pc size fetch frame-count))) 207 (let ((pc (+ pc (* frame-count size))) 208 (count (+ closure-count frame-count))) 209 (let loop ((pc pc) (count count) (rev-env-offsets '())) 210 (if (= count total-count) 211 (values (- pc start-pc) 212 (make-env-data total-count frame-offsets 213 template-index closure-offsets 214 (reverse rev-env-offsets))) 215 (let* ((env (fetch code pc)) 216 (count-here (fetch code (+ pc size))) 217 (indexes (get-offsets code 218 (+ pc size size) 219 size 220 fetch 221 count-here))) 222 (loop (+ pc (* (+ 2 count-here) size)) 223 (+ count count-here) 224 (cons (cons env indexes) rev-env-offsets))))))))))) 225 226 227(define (get-offsets code pc size fetch count) 228 (do ((pc pc (+ pc size)) 229 (i 0 (+ i 1)) 230 (r '() (cons (fetch code pc) r))) 231 ((= i count) 232 (reverse r)))) 233 234 235; Parse a protocol, returning the number of bytes of instruction stream that 236; were consumed. PC has to point behind the PRTOCOL opcode 237 238(define (parse-protocol code pc attribution) 239 (let ((protocol (code-vector-ref code pc))) 240 (really-parse-protocol protocol code pc attribution))) 241 242(define (really-parse-protocol protocol code pc attribution) 243 (cond ((<= protocol maximum-stack-args) 244 (values 1 (list protocol))) 245 ((= protocol two-byte-nargs-protocol) 246 (values 3 (list protocol (get-offset code (+ pc 1))))) 247 ((= protocol two-byte-nargs+list-protocol) 248 (values 3 (list protocol (get-offset code (+ pc 1))))) 249 ((= protocol ignore-values-protocol) 250 (values 1 (list protocol))) 251 ((= protocol call-with-values-protocol) 252 (let ((offset (get-offset code (+ pc 1)))) 253 (values 3 (list protocol 254 (pc->label (- (+ offset pc) 1) 255 attribution) 256 (zero? offset))))) 257 ((= protocol args+nargs-protocol) 258 (values 2 (list protocol (code-vector-ref code (+ pc 1))))) 259 ((= protocol nary-dispatch-protocol) 260 (values 5 (cons protocol (parse-dispatch code pc attribution)))) 261 ((= protocol big-stack-protocol) 262 (let ((real-protocol (code-vector-ref code 263 (- (code-vector-length code) 3))) 264 (stack-size (get-offset code (- (code-vector-length code) 2)))) 265 (receive (size real-attribution) 266 (really-parse-protocol real-protocol code pc attribution) 267 (values size 268 (list protocol real-attribution stack-size))))) 269 (else 270 (assertion-violation 'parse-protocol "unknown protocol" protocol pc)))) 271 272(define (parse-dispatch code pc attribution) 273 (define (maybe-parse-one-dispatch index) 274 (let ((offset (code-vector-ref code (+ pc index)))) 275 (if (= offset 0) 276 #f 277 (pc->label (+ offset pc) attribution)))) 278 279 (map maybe-parse-one-dispatch (list 3 4 5 2))) 280 281(define (protocol-protocol p-args) 282 (car p-args)) 283 284(define (n-ary-protocol? p-args) 285 (let ((protocol (car p-args))) 286 (if (or (= protocol two-byte-nargs+list-protocol) 287 (= protocol call-with-values-protocol) 288 (= protocol ignore-values-protocol)) 289 #t 290 (if (or (<= protocol maximum-stack-args) 291 (= protocol two-byte-nargs-protocol)) 292 #f 293 (if (= protocol big-stack-protocol) 294 (n-ary-protocol? (cadr p-args)) 295 (assertion-violation 'n-ary-protocol? 296 "unknown protocol" p-args)))))) 297 298(define (protocol-nargs p-args) 299 (let ((protocol (car p-args))) 300 (cond ((<= protocol maximum-stack-args) 301 protocol) 302 ((= protocol two-byte-nargs-protocol) 303 (cadr p-args)) 304 ((= protocol two-byte-nargs+list-protocol) 305 (cadr p-args)) 306 ((= protocol args+nargs-protocol) 307 (cadr p-args)) 308 ((= protocol big-stack-protocol) 309 (protocol-nargs (cadr p-args))) 310 ((= protocol ignore-values-protocol) 311 0) 312 ((= protocol call-with-values-protocol) 313 (assertion-violation 'protocol-nargs 314 "call-with-values-protocol in protocol-nargs")) 315 (else 316 (assertion-violation 'protocol-nargs 317 "unknown protocol" p-args))))) 318 319(define (protocol-cwv-tailcall? p-args) 320 (let ((protocol (protocol-protocol p-args))) 321 (if (not (= protocol call-with-values-protocol)) 322 (assertion-violation 'protocol-cwv-tailcall? 323 "invalid protocol" protocol)) 324 (caddr p-args))) 325 326(define (call-with-values-protocol-target p-args) 327 (let ((protocol (protocol-protocol p-args))) 328 (if (not (= protocol call-with-values-protocol)) 329 (assertion-violation 'call-with-values-protocol-target 330 "invalid protocol" protocol)) 331 (cadr p-args))) 332 333; Generic opcode argument parser 334 335(define (parse-opcode-args op start-pc code template attribution) 336 (let ((specs (vector-ref opcode-arg-specs op))) 337 (let loop ((specs specs) (pc (+ start-pc 1)) (len 0) (args '())) 338 (if (null? specs) 339 (cons len args) 340 (let ((spec (car specs))) 341 (cond 342 ((eq? spec 'protocol) 343 (receive (size p-args) 344 (parse-protocol code pc attribution) 345 (loop (cdr specs) 346 (+ pc size) 347 (+ len size) 348 (cons (cons 'protocol p-args) args)))) 349 ((or (eq? spec 'env-data) 350 (eq? spec 'big-env-data)) 351 (receive (size env-data) 352 (receive (slot-size fetch) 353 (if (eq? spec 'env-data) 354 (values 1 code-vector-ref) 355 (values 2 get-offset)) 356 (parse-flat-env-args pc code slot-size fetch)) 357 (loop (cdr specs) 358 (+ pc size) 359 (+ len size) 360 (cons (cons 'env-data env-data) args)))) 361 ((eq? spec 'instr) 362 (let ((opcode (code-vector-ref code pc))) 363 (let ((len.revargs (parse-opcode-args opcode 364 pc 365 code 366 template 367 attribution))) 368 (loop (cdr specs) 369 (+ pc 1 (car len.revargs)) 370 (+ len 1 (car len.revargs)) 371 (cons 372 (cons 'instr 373 (cons opcode (reverse (cdr len.revargs)))) 374 args))))) 375 ((= 0 (arg-spec-size spec pc code)) 376 (cons len args)) 377 (else 378 (let ((arg (parse-opcode-arg specs 379 pc 380 start-pc 381 code 382 template 383 attribution))) 384 (loop (cdr specs) 385 (+ pc (arg-spec-size spec pc code)) 386 (+ len (arg-spec-size spec pc code)) 387 (cons arg args)))))))))) 388 389; The number of bytes required by an argument. 390 391(define (arg-spec-size spec pc code) 392 (case spec 393 ((byte nargs stack-index index literal stob) 1) 394 ((two-bytes two-byte-nargs two-byte-stack-index two-byte-index offset offset-) 2) 395 ((env-data) (assertion-violation 'arg-spec-size "env-data in arg-spec-size")) 396 ((protocol) (assertion-violation 'arg-spec-size "protocol in arg-spec-size")) 397 ((moves-data) 398 (let ((n-moves (code-vector-ref code pc))) 399 (+ 1 (* 2 n-moves)))) 400 ((big-moves-data) 401 (let ((n-moves (code-vector-ref code pc))) 402 (+ 2 (* 4 n-moves)))) 403 ((cont-data) 404 (- (get-offset code pc) 1)) ; size includes opcode 405 (else 0))) 406 407; Parse the particular type of argument. 408 409(define (parse-opcode-arg specs pc start-pc code template attribution) 410 (cons 411 (car specs) 412 (case (car specs) 413 ((byte nargs stack-index index) 414 (code-vector-ref code pc)) 415 ((two-bytes two-byte-nargs two-byte-stack-index two-byte-index) 416 (get-offset code pc)) 417 ((literal) 418 (- (code-vector-ref code pc) 128)) 419 ((offset) 420 (let ((offset (get-offset code pc))) 421 (if (zero? offset) 422 #f 423 (pc->label (+ start-pc offset) attribution)))) 424 ((offset-) 425 (pc->label (- start-pc (get-offset code pc)) attribution)) 426 ((stob) 427 (code-vector-ref code pc)) 428 ((cont-data) 429 (parse-cont-data-args pc code template attribution)) 430 ((moves-data) 431 (let ((n-moves (code-vector-ref code pc))) 432 (let loop ((offset (+ pc 1)) 433 (n n-moves)) 434 (if (zero? n) 435 '() 436 (cons (cons (code-vector-ref code offset) 437 (code-vector-ref code (+ offset 1))) 438 (loop (+ offset 2) (- n 1))))))) 439 ((big-moves-data) 440 (let ((n-moves (get-offset code pc))) 441 (let loop ((offset (+ pc 2)) 442 (n n-moves)) 443 (if (zero? n) 444 '() 445 (cons (cons (get-offset code offset) 446 (get-offset code (+ offset 2))) 447 (loop (+ offset 4) (- n 1))))))) 448 (else (assertion-violation 'parse-opcode-arg 449 "unknown arg spec: " (car specs)))))) 450 451(define-record-type cont-data :cont-data 452 (make-cont-data length mask-bytes live-offsets template pc gc-mask-size depth) 453 cont-data? 454 (length cont-data-length) 455 (mask-bytes cont-data-mask-bytes) 456 ;; #f if all are live 457 (live-offsets cont-data-live-offsets) 458 (template cont-data-template) 459 (pc cont-data-pc) 460 (gc-mask-size cont-data-gc-mask-size) 461 (depth cont-data-depth)) 462 463(define (parse-cont-data-args pc code template attribution) 464 (let* ((len (get-offset code pc)) 465 (end-pc (- (+ pc len) 1)) ; len includes opcode 466 (gc-mask-size (code-vector-ref code (- end-pc 3))) 467 (depth (get-offset code (- end-pc 2))) 468 (offset (get-offset code (- end-pc 5))) 469 (template (get-offset code (- end-pc 7))) 470 (mask-bytes 471 (let lp ((the-pc (+ pc 2)) (mask-bytes '())) 472 (if (>= the-pc (+ pc 2 gc-mask-size)) 473 mask-bytes 474 (lp (+ the-pc 1) 475 (cons (code-vector-ref code the-pc) mask-bytes))))) 476 (live-offsets 477 (and (not (zero? gc-mask-size)) 478 (gc-mask-live-offsets (bytes->bits mask-bytes))))) 479 (make-cont-data len 480 mask-bytes 481 live-offsets 482 template 483 (pc->label offset attribution) 484 gc-mask-size 485 depth))) 486 487(define (bytes->bits l) 488 (let loop ((n 0) (l l)) 489 (if (null? l) 490 n 491 (loop (+ (arithmetic-shift n 8) (car l)) 492 (cdr l))))) 493 494(define (gc-mask-live-offsets mask) 495 (let loop ((mask mask) (i 0) (l '())) 496 (if (zero? mask) 497 (reverse l) 498 (loop (arithmetic-shift mask -1) (+ 1 i) 499 (if (odd? mask) 500 (cons i l) 501 l))))) 502 503;---------------- 504; Utilities. 505 506; TODO: Put the template-related stuff into a separate module? 507 508; Turn OBJ into a template, if possible. 509 510(define (coerce-to-template obj) 511 (cond ((template? obj) obj) 512 ((closure? obj) (closure-template obj)) 513 ((continuation? obj) (continuation-template obj)) 514 (else (assertion-violation 'coerce-to-template 515 "expected a procedure or continuation" obj)))) 516 517(define (template-code-length code) 518 (if (and (= (enum op protocol) 519 (code-vector-ref code 0)) 520 (= big-stack-protocol 521 (code-vector-ref code 1))) 522 (- (code-vector-length code) 3) 523 (code-vector-length code))) 524 525 526; Fetch the two-byte value at PC in CODE. 527 528(define (get-offset code pc) 529 (+ (* (code-vector-ref code pc) 530 byte-limit) 531 (code-vector-ref code (+ pc 1)))) 532 533