1; Part of Scheme 48 1.9.  See file COPYING for notices and license.
2
3; Authors: Martin Gasbichler, Mike Sperber
4
5;; Generic byte code parser
6
7(define-record-type attribution :attribution
8  (make-attribution init-template template-literal
9		    opcode-table make-label at-label)
10  attribution?
11  (init-template attribution-init-template)
12  (template-literal attribution-template-literal)
13  (opcode-table attribution-opcode-table)
14  (make-label attribution-make-label)
15  (at-label attribution-at-label))
16
17(define (opcode-table-ref table i)
18  (vector-ref table i))
19
20(define (opcode-table-set! table i new)
21  (vector-set! table i new))
22
23(define (make-opcode-table default)
24  (make-vector op-count default))
25
26
27;; Example attribution
28(define (disass)
29  (define (disass-init-template state template p-args push-template? push-env? push-closure?)
30    (cons (list 0 'protocol p-args push-template? push-env? push-closure?)
31          state))
32
33  (define instruction-set-table
34    (make-opcode-table
35     (lambda (opcode template state pc len . args)
36       (cons `(,pc ,(enumerand->name opcode op) ,@(map cdr args)) state))))
37
38  (define (attribute-literal literal i state)
39    state)
40
41  (define (make-label target-pc)
42    target-pc)
43
44  (define (at-label label state)
45    (cons `(,label :) state))
46
47  (make-attribution disass-init-template attribute-literal
48                    instruction-set-table make-label at-label))
49
50(define (parse-template x state attribution)
51  (let* ((tem (coerce-to-template x))
52         (template-len (template-length tem)))
53        (let lp ((i 1) (state state))
54          (if (= i template-len)
55              (parse-template-code tem (template-code tem) state attribution)
56              (let ((literal (template-ref tem i)))
57                (if (template? literal)
58                    (lp (+ i 1) (parse-template literal state attribution))
59                    (lp (+ i 1) ((attribution-template-literal attribution) literal i state))))))))
60
61(define (byte-code? x)
62  (let ((code (template-code (coerce-to-template x))))
63    (define (byte-code-protocol? protocol)
64      (or (<= protocol maximum-stack-args)
65          (= protocol two-byte-nargs-protocol)
66          (= protocol two-byte-nargs+list-protocol)
67          (= protocol ignore-values-protocol)
68          (= protocol call-with-values-protocol)
69          (= protocol args+nargs-protocol)
70          (= protocol nary-dispatch-protocol)
71          (and (= protocol big-stack-protocol)
72               (byte-code-protocol?
73                (code-vector-ref code (- (code-vector-length code) 3))))))
74    (byte-code-protocol? (code-vector-ref code 1))))
75
76
77(define (parse-template-code tem code state attribution)
78  (with-template
79   tem code state attribution
80   (lambda (pc length state)
81     (let loop ((pc pc)
82                (state state))
83       (if (< pc length)
84           (receive (size state)
85               (parse-instruction tem code pc state attribution)
86             (loop (+ pc size) state))
87           state)))))
88
89(define (with-template tem code state attribution fun)
90  (let ((length (template-code-length code)))
91    (let-fluid
92        *bc-make-labels* '()
93      (lambda ()
94        (for-each
95         (lambda (pc) (pc->label pc attribution))
96         (debug-data-jump-back-dests (template-debug-data tem)))
97        (receive (size protocol-arguments)
98            (parse-protocol code 1 attribution)
99          (receive (push-template? push-env? push-closure?)
100              (case (code-vector-ref code (+ size 1))
101                ((#b000) (values #f #f #f))
102                ((#b001) (values #t #f #f))
103                ((#b010) (values #f #t #f))
104                ((#b011) (values #t #t #f))
105                ((#b100) (values #f #f #t))
106                ((#b110) (values #f #t #t))
107                ((#b101) (values #t #f #t))
108                ((#b111) (values #t #t #t))
109                (else (assertion-violation 'with-template "invalid init-template spec"
110					   (code-vector-ref code (+ size 1)))))
111            (fun (+ size 2)
112                 length
113                 ((attribution-init-template attribution)
114                  state tem protocol-arguments push-template? push-env? push-closure?))))))))
115
116
117(define (parse-instruction template code pc state attribution)
118  (let* ((opcode (code-vector-ref code pc))
119	 (len.rev-args (cond ((= opcode (enum op computed-goto)) ; unused?
120			      (assertion-violation 'parse-instruction
121						   "computed-goto in parse-bytecode"))
122			     (else
123			      (parse-opcode-args opcode
124                                                 pc
125                                                 code
126                                                 template
127                                                 attribution))))
128	 (total-len (+ 1 (car len.rev-args))))  ; 1 for the opcode
129    (values total-len
130            (really-parse-instruction pc total-len opcode template state
131                                      (reverse (cdr len.rev-args)) attribution))))
132
133(define (really-parse-instruction pc len opcode template state args attribution)
134  (let ((new-state (if (label-at-pc? pc)
135                       ((attribution-at-label attribution)
136                        (pc->label pc attribution)
137                        state)
138                       state)))
139  (let ((opcode-attribution
140         (opcode-table-ref (attribution-opcode-table attribution) opcode)))
141    (if opcode-attribution
142	(apply opcode-attribution opcode template new-state pc len args)
143	(assertion-violation 'parse-instruction "cannot attribute "
144			     (enumerand->name opcode op) args)))))
145
146;;--------------------
147;; labels
148
149(define *bc-make-labels* (make-fluid '()))
150
151(define (add-pc! pc attribution)
152  (set-fluid! *bc-make-labels*
153	      (cons (cons pc ((attribution-make-label attribution) pc))
154                    (fluid *bc-make-labels*))))
155
156(define (pc->label pc attribution)
157  (let ((maybe-pc.label (assq pc (fluid *bc-make-labels*))))
158    (if maybe-pc.label
159	(cdr maybe-pc.label)
160	(begin
161	  (add-pc! pc attribution)
162	  (pc->label pc attribution)))))
163
164(define (label-at-pc? pc)
165  (if (assq pc (fluid *bc-make-labels*)) #t #f))
166
167; (enum op make-[big-]flat-env)
168; number of vars
169; number of closures
170; [offset of template in frame
171;  offsets of templates in template]
172; number of variables in frame (size)
173; offsets of vars in frame
174; [offset of env in frame
175;  number of vars in env
176;  offsets of vars in level]*
177
178(define-record-type env-data :env-data
179  (make-env-data total-count frame-offsets maybe-template-index closure-offsets
180                 env-offsets)
181  env-data?
182  (total-count env-data-total-count)
183  (frame-offsets env-data-frame-offsets)
184  (maybe-template-index env-data-maybe-template-index)
185  (closure-offsets env-data-closure-offsets)
186  (env-offsets env-data-env-offsets))
187
188(define (parse-flat-env-args pc code size fetch)
189  (let ((start-pc pc)
190	(total-count (fetch code pc))
191	(closure-count (fetch code (+ pc size))))
192    (receive (template-index closure-offsets)
193        (if (< 0 closure-count)
194            (values (fetch code (+ pc size size))
195                    (get-offsets code (+ pc size size size)
196                                 size fetch closure-count))
197            (values #f '()))
198      (let* ((pc (if (< 0 closure-count)
199                     (+ pc
200                        (* 2 size)      ; counts
201                        size            ; template offset
202                        (* closure-count size)) ; subtemplates
203                     (+ pc (* 2 size)))) ; counts
204             (frame-count (fetch code pc))
205             (pc (+ pc size)))
206        (let ((frame-offsets (get-offsets code pc size fetch frame-count)))
207          (let ((pc (+ pc (* frame-count size)))
208                (count (+ closure-count frame-count)))
209            (let loop ((pc pc) (count count) (rev-env-offsets '()))
210              (if (= count total-count)
211                  (values (- pc start-pc)
212                          (make-env-data total-count frame-offsets
213                                         template-index closure-offsets
214                                         (reverse rev-env-offsets)))
215                  (let* ((env (fetch code pc))
216                         (count-here (fetch code (+ pc size)))
217                         (indexes (get-offsets code
218                                               (+ pc size size)
219                                               size
220                                               fetch
221                                               count-here)))
222                    (loop (+ pc (* (+ 2 count-here) size))
223                          (+ count count-here)
224                          (cons (cons env indexes) rev-env-offsets)))))))))))
225
226
227(define (get-offsets code pc size fetch count)
228  (do ((pc pc (+ pc size))
229       (i 0 (+ i 1))
230       (r '() (cons (fetch code pc) r)))
231      ((= i count)
232       (reverse r))))
233
234
235; Parse a protocol, returning the number of bytes of instruction stream that
236; were consumed. PC has to point behind the PRTOCOL opcode
237
238(define (parse-protocol code pc attribution)
239  (let ((protocol (code-vector-ref code pc)))
240    (really-parse-protocol protocol code pc attribution)))
241
242(define (really-parse-protocol protocol code pc attribution)
243  (cond ((<= protocol maximum-stack-args)
244         (values 1 (list protocol)))
245        ((= protocol two-byte-nargs-protocol)
246         (values 3 (list protocol (get-offset code (+ pc 1)))))
247        ((= protocol two-byte-nargs+list-protocol)
248         (values 3 (list protocol (get-offset code (+ pc 1)))))
249        ((= protocol ignore-values-protocol)
250         (values 1 (list protocol)))
251        ((= protocol call-with-values-protocol)
252         (let ((offset (get-offset code (+ pc 1))))
253           (values 3 (list protocol
254                           (pc->label (- (+ offset pc) 1)
255                                      attribution)
256                           (zero? offset)))))
257        ((= protocol args+nargs-protocol)
258         (values 2 (list protocol (code-vector-ref code (+ pc 1)))))
259        ((= protocol nary-dispatch-protocol)
260         (values 5 (cons protocol (parse-dispatch code pc attribution))))
261        ((= protocol big-stack-protocol)
262         (let ((real-protocol (code-vector-ref code
263                                               (- (code-vector-length code) 3)))
264               (stack-size (get-offset code (- (code-vector-length code) 2))))
265           (receive (size real-attribution)
266               (really-parse-protocol real-protocol code pc attribution)
267             (values size
268                     (list protocol real-attribution stack-size)))))
269        (else
270         (assertion-violation 'parse-protocol "unknown protocol" protocol pc))))
271
272(define (parse-dispatch code pc attribution)
273  (define (maybe-parse-one-dispatch index)
274    (let ((offset (code-vector-ref code (+ pc index))))
275      (if (= offset 0)
276          #f
277          (pc->label (+ offset pc) attribution))))
278
279  (map maybe-parse-one-dispatch (list 3 4 5 2)))
280
281(define (protocol-protocol p-args)
282  (car p-args))
283
284(define (n-ary-protocol? p-args)
285  (let ((protocol (car p-args)))
286    (if (or (= protocol two-byte-nargs+list-protocol)
287            (= protocol call-with-values-protocol)
288            (= protocol ignore-values-protocol))
289        #t
290        (if (or (<= protocol maximum-stack-args)
291                (= protocol two-byte-nargs-protocol))
292            #f
293            (if (= protocol big-stack-protocol)
294                (n-ary-protocol? (cadr p-args))
295                (assertion-violation 'n-ary-protocol?
296				     "unknown protocol" p-args))))))
297
298(define (protocol-nargs p-args)
299  (let ((protocol (car p-args)))
300    (cond ((<= protocol maximum-stack-args)
301           protocol)
302	  ((= protocol two-byte-nargs-protocol)
303	   (cadr p-args))
304	  ((= protocol two-byte-nargs+list-protocol)
305	   (cadr p-args))
306	  ((= protocol args+nargs-protocol)
307           (cadr p-args))
308	  ((= protocol big-stack-protocol)
309           (protocol-nargs (cadr p-args)))
310          ((= protocol ignore-values-protocol)
311           0)
312          ((= protocol call-with-values-protocol)
313           (assertion-violation 'protocol-nargs
314				"call-with-values-protocol in protocol-nargs"))
315	  (else
316	   (assertion-violation 'protocol-nargs
317				"unknown protocol" p-args)))))
318
319(define (protocol-cwv-tailcall? p-args)
320  (let ((protocol (protocol-protocol p-args)))
321    (if (not (= protocol call-with-values-protocol))
322        (assertion-violation 'protocol-cwv-tailcall?
323			     "invalid protocol" protocol))
324    (caddr p-args)))
325
326(define (call-with-values-protocol-target p-args)
327  (let ((protocol (protocol-protocol p-args)))
328    (if (not (= protocol call-with-values-protocol))
329        (assertion-violation 'call-with-values-protocol-target
330			     "invalid protocol" protocol))
331    (cadr p-args)))
332
333; Generic opcode argument parser
334
335(define (parse-opcode-args op start-pc code template attribution)
336  (let ((specs (vector-ref opcode-arg-specs op)))
337    (let loop ((specs specs) (pc (+ start-pc 1)) (len 0) (args '()))
338      (if (null? specs)
339	  (cons len args)
340	  (let ((spec (car specs)))
341            (cond
342	     ((eq? spec 'protocol)
343	      (receive (size p-args)
344		  (parse-protocol code pc attribution)
345		(loop (cdr specs)
346		      (+ pc size)
347		      (+ len size)
348		      (cons (cons 'protocol p-args) args))))
349	     ((or (eq? spec 'env-data)
350		  (eq? spec 'big-env-data))
351	      (receive (size env-data)
352		  (receive (slot-size fetch)
353		      (if (eq? spec 'env-data)
354			  (values 1 code-vector-ref)
355			  (values 2 get-offset))
356		    (parse-flat-env-args pc code slot-size fetch))
357		(loop (cdr specs)
358		      (+ pc size)
359		      (+ len size)
360		      (cons (cons 'env-data env-data) args))))
361             ((eq? spec 'instr)
362              (let ((opcode (code-vector-ref code pc)))
363                (let ((len.revargs (parse-opcode-args opcode
364                                                      pc
365                                                      code
366                                                      template
367                                                      attribution)))
368                  (loop (cdr specs)
369                        (+ pc 1 (car len.revargs))
370                        (+ len 1 (car len.revargs))
371                        (cons
372                         (cons 'instr
373                               (cons opcode (reverse (cdr len.revargs))))
374                              args)))))
375	     ((= 0 (arg-spec-size spec pc code))
376	      (cons len args))
377	     (else
378	      (let ((arg (parse-opcode-arg specs
379					   pc
380					   start-pc
381					   code
382					   template
383					   attribution)))
384		(loop (cdr specs)
385		      (+ pc (arg-spec-size spec pc code))
386		      (+ len (arg-spec-size spec pc code))
387		      (cons arg args))))))))))
388
389; The number of bytes required by an argument.
390
391(define (arg-spec-size spec pc code)
392  (case spec
393    ((byte nargs stack-index index literal stob) 1)
394    ((two-bytes two-byte-nargs two-byte-stack-index two-byte-index offset offset-) 2)
395    ((env-data) (assertion-violation 'arg-spec-size "env-data in arg-spec-size"))
396    ((protocol) (assertion-violation 'arg-spec-size "protocol in arg-spec-size"))
397    ((moves-data)
398     (let ((n-moves (code-vector-ref code pc)))
399       (+ 1 (* 2 n-moves))))
400    ((big-moves-data)
401     (let ((n-moves (code-vector-ref code pc)))
402       (+ 2 (* 4 n-moves))))
403    ((cont-data)
404     (- (get-offset code pc) 1)) ; size includes opcode
405    (else 0)))
406
407; Parse the particular type of argument.
408
409(define (parse-opcode-arg specs pc start-pc code template attribution)
410  (cons
411   (car specs)
412   (case (car specs)
413     ((byte nargs stack-index index)
414      (code-vector-ref code pc))
415     ((two-bytes two-byte-nargs two-byte-stack-index two-byte-index)
416      (get-offset code pc))
417     ((literal)
418      (- (code-vector-ref code pc) 128))
419     ((offset)
420      (let ((offset (get-offset code pc)))
421        (if (zero? offset)
422            #f
423            (pc->label (+ start-pc offset) attribution))))
424     ((offset-)
425      (pc->label (- start-pc (get-offset code pc)) attribution))
426     ((stob)
427      (code-vector-ref code pc))
428     ((cont-data)
429      (parse-cont-data-args pc code template attribution))
430     ((moves-data)
431      (let ((n-moves (code-vector-ref code pc)))
432        (let loop ((offset (+ pc 1))
433                   (n n-moves))
434          (if (zero? n)
435              '()
436              (cons (cons (code-vector-ref code offset)
437                          (code-vector-ref code (+ offset 1)))
438                    (loop (+ offset 2) (- n 1)))))))
439     ((big-moves-data)
440      (let ((n-moves (get-offset code pc)))
441        (let loop ((offset (+ pc 2))
442                   (n n-moves))
443          (if (zero? n)
444              '()
445              (cons (cons (get-offset code offset)
446                          (get-offset code (+ offset 2)))
447                    (loop (+ offset 4) (- n 1)))))))
448     (else (assertion-violation 'parse-opcode-arg
449				"unknown arg spec: " (car specs))))))
450
451(define-record-type cont-data :cont-data
452  (make-cont-data length mask-bytes live-offsets template pc gc-mask-size depth)
453  cont-data?
454  (length cont-data-length)
455  (mask-bytes cont-data-mask-bytes)
456  ;; #f if all are live
457  (live-offsets cont-data-live-offsets)
458  (template cont-data-template)
459  (pc cont-data-pc)
460  (gc-mask-size cont-data-gc-mask-size)
461  (depth cont-data-depth))
462
463(define (parse-cont-data-args pc code template attribution)
464  (let* ((len (get-offset code pc))
465	 (end-pc (- (+ pc len) 1))      ; len includes opcode
466	 (gc-mask-size (code-vector-ref code (- end-pc 3)))
467	 (depth (get-offset code (- end-pc 2)))
468	 (offset (get-offset code (- end-pc 5)))
469         (template (get-offset code (- end-pc 7)))
470	 (mask-bytes
471	  (let lp ((the-pc (+ pc 2)) (mask-bytes '()))
472	    (if (>= the-pc (+ pc 2 gc-mask-size))
473		mask-bytes
474		(lp (+ the-pc 1)
475		    (cons (code-vector-ref code the-pc) mask-bytes)))))
476	 (live-offsets
477	  (and (not (zero? gc-mask-size))
478	       (gc-mask-live-offsets (bytes->bits mask-bytes)))))
479    (make-cont-data len
480                    mask-bytes
481		    live-offsets
482                    template
483                    (pc->label offset attribution)
484                    gc-mask-size
485                    depth)))
486
487(define (bytes->bits l)
488  (let loop ((n 0) (l l))
489    (if (null? l)
490	n
491	(loop (+ (arithmetic-shift n 8) (car l))
492	      (cdr l)))))
493
494(define (gc-mask-live-offsets mask)
495  (let loop ((mask mask) (i 0) (l '()))
496    (if (zero? mask)
497	(reverse l)
498	(loop (arithmetic-shift mask -1) (+ 1 i)
499	      (if (odd? mask)
500		  (cons i l)
501		  l)))))
502
503;----------------
504; Utilities.
505
506; TODO: Put the template-related stuff into a separate module?
507
508; Turn OBJ into a template, if possible.
509
510(define (coerce-to-template obj)
511  (cond ((template? obj) obj)
512	((closure? obj) (closure-template obj))
513	((continuation? obj) (continuation-template obj))
514	(else (assertion-violation 'coerce-to-template
515				   "expected a procedure or continuation" obj))))
516
517(define (template-code-length code)
518  (if (and (= (enum op protocol)
519	      (code-vector-ref code 0))
520	   (= big-stack-protocol
521	      (code-vector-ref code 1)))
522      (- (code-vector-length code) 3)
523      (code-vector-length code)))
524
525
526; Fetch the two-byte value at PC in CODE.
527
528(define (get-offset code pc)
529  (+ (* (code-vector-ref code pc)
530	byte-limit)
531     (code-vector-ref code (+ pc 1))))
532
533