1;;; arm64.ss
2
3;;; SECTION 1: registers
4;;; ABI:
5;;;  Register usage:
6;;;   r0-r7: C argument/result registers, caller-save
7;;;   r8: indirect-result register, caller-save
8;;;   r9-18: caller-save
9;;;   r19-28: callee-save
10;;;   r29: frame pointer, callee-save
11;;;   r30: a.k.a. lr, link register
12;;;   sp: stack pointer or (same register number) zero register
13;;;   --------
14;;;   v0-v7: FP registers used for C arguments/results, caller-save
15;;;   v8-v15: callee-save for low 64 bits
16;;;   v16-v31: caller-save
17;;;  Alignment:
18;;;   stack must be 16-byte aligned, essentially always
19
20(define-registers
21  (reserved
22    [%tc  %r19                  #t 19 uptr]
23    [%sfp %r20                  #t 20 uptr]
24    [%ap  %r21                  #t 21 uptr]
25    [%trap %r22                 #t 22 uptr])
26  (allocable
27    [%ac0 %r23                  #t 23 uptr]
28    [%xp  %r24                  #t 24 uptr]
29    [%ts  %r8                   #f  8 uptr]
30    [%td  %r25                  #t 25 uptr]
31    [%cp  %r26                  #t 26 uptr]
32    [     %r0  %Carg1 %Cretval  #f  0 uptr]
33    [     %r1  %Carg2           #f  1 uptr]
34    [     %r2  %Carg3 %reify1   #f  2 uptr]
35    [     %r3  %Carg4 %reify2   #f  3 uptr]
36    [     %r4  %Carg5 %save1    #f  4 uptr]
37    [     %r5  %Carg6           #f  5 uptr]
38    [     %r6  %Carg7           #f  6 uptr]
39    [     %r7  %Carg8           #f  7 uptr]
40    [     %r9                   #f  9 uptr]
41    [     %r12                  #f 12 uptr]
42    [     %r13                  #f 13 uptr]
43    [     %r14                  #f 14 uptr]
44    [     %r15                  #f 15 uptr]
45    [     %lr                   #f 30 uptr] ; %lr is trashed by 'c' calls including calls to hand-coded routines like get-room
46    [%fp1           %v16        #f 16 fp]
47    [%fp2           %v17        #f 17 fp]
48    [%fp3           %v18        #f 18 fp]
49    [%fp4           %v19        #f 19 fp]
50    [%fp5           %v20        #f 20 fp]
51    [%fp6           %v21        #f 21 fp]
52  )
53  (machine-dependent
54    [%jmptmp %argtmp            #f 10 uptr]
55    [%argtmp2                   #f 11 uptr]
56    [%sp %real-zero             #t 31 uptr]
57    [%Cfparg1 %Cfpretval      %v0   #f  0 fp]
58    [%Cfparg2                 %v1   #f  1 fp]
59    [%Cfparg3                 %v2   #f  2 fp]
60    [%Cfparg4                 %v3   #f  3 fp]
61    [%Cfparg5                 %v4   #f  4 fp]
62    [%Cfparg6                 %v5   #f  5 fp]
63    [%Cfparg7                 %v6   #f  6 fp]
64    [%Cfparg8                 %v7   #f  7 fp]
65    ;; etc., but FP registers v8-v15 are preserved
66    ))
67
68;;; SECTION 2: instructions
69(module (md-handle-jump ; also sets primitive handlers
70         mem->mem
71         fpmem->fpmem
72         coercible?
73         coerce-opnd)
74  (import asm-module)
75
76  (define imm-funkymask?
77    (lambda (x)
78      (nanopass-case (L15c Triv) x
79        [(immediate ,imm) (and (funkymask imm) #t)]
80        [else #f])))
81
82  (define imm-unsigned12?
83    (lambda (x)
84      (nanopass-case (L15c Triv) x
85        [(immediate ,imm) (unsigned12? imm)]
86        [else #f])))
87
88  (define imm-neg-unsigned12?
89    (lambda (x)
90      (nanopass-case (L15c Triv) x
91        [(immediate ,imm) (unsigned12? (- imm))]
92        [else #f])))
93
94  (define imm-constant?
95    (lambda (x)
96      (nanopass-case (L15c Triv) x
97        [(immediate ,imm) #t]
98        [else #f])))
99
100  (define-pass imm->negate-imm : (L15c Triv) (ir) -> (L15d Triv) ()
101    (Triv : Triv (ir) -> Triv ()
102      [(immediate ,imm) `(immediate ,(- imm))]
103      [else (sorry! who "~s is not an immediate" ir)]))
104
105  (define mref->mref
106    (lambda (a k)
107      (define return
108        (lambda (x0 x1 imm type)
109          ; arm load & store instructions support index or offset but not both
110          (safe-assert (or (eq? x1 %zero) (eqv? imm 0)))
111          (k (with-output-language (L15d Triv) `(mref ,x0 ,x1 ,imm ,type)))))
112      (nanopass-case (L15c Triv) a
113        [(mref ,lvalue0 ,lvalue1 ,imm ,type)
114         (lvalue->ur lvalue0
115           (lambda (x0)
116             (lvalue->ur lvalue1
117               (lambda (x1)
118                 (cond
119                   [(and (eq? x1 %zero) (or (signed9? imm)
120                                            (aligned-offset? imm)))
121                    (return x0 %zero imm type)]
122                   [(and (not (eq? x1 %zero)) (unsigned12? imm))
123                    (let ([u (make-tmp 'u)])
124                      (seq
125                       (build-set! ,u (asm ,null-info ,(asm-add #f) ,x1 (immediate ,imm)))
126                       (return x0 u 0 type)))]
127                   [(and (not (eq? x1 %zero)) (unsigned12? (- imm)))
128                    (let ([u (make-tmp 'u)])
129                      (seq
130                       (build-set! ,u (asm ,null-info ,(asm-sub #f) ,x1 (immediate ,(- imm))))
131                       (return x0 u 0 type)))]
132                   [else
133                    (let ([u (make-tmp 'u)])
134                      (seq
135                        (build-set! ,u (immediate ,imm))
136                        (if (eq? x1 %zero)
137                            (return x0 u 0 type)
138                            (seq
139                              (build-set! ,u (asm ,null-info ,(asm-add #f) ,u ,x1))
140                              (return x0 u 0 type)))))])))))])))
141
142  (define mem->mem
143    (lambda (a k)
144      (cond
145        [(literal@? a)
146         (let ([u (make-tmp 'u)])
147           (seq
148             (build-set! ,u ,(literal@->literal a))
149             (k (with-output-language (L15d Lvalue) `(mref ,u ,%zero 0 uptr)))))]
150        [else (mref->mref a k)])))
151
152  (define fpmem->fpmem mem->mem)
153
154  ;; `define-instruction` code takes care of `ur` and `fpur`, to which
155  ;; all type-compatible values must convert
156  (define-syntax coercible?
157    (syntax-rules ()
158      [(_ ?a ?aty*)
159       (let ([a ?a] [aty* ?aty*])
160         (or (and (memq 'unsigned12 aty*) (imm-unsigned12? a))
161             (and (memq 'neg-unsigned12 aty*) (imm-neg-unsigned12? a))
162             (and (memq 'funkymask aty*) (imm-funkymask? a))
163             (and (memq 'imm-constant aty*) (imm-constant? a))
164             (and (memq 'mem aty*) (mem? a))
165             (and (memq 'fpmem aty*) (fpmem? a))))]))
166
167  ;; `define-instruction` doesn't try to cover `ur` and `fpur`
168  (define-syntax coerce-opnd ; passes k something compatible with aty*
169    (syntax-rules ()
170      [(_ ?a ?aty* ?k)
171       (let ([a ?a] [aty* ?aty*] [k ?k])
172         (cond
173           [(and (memq 'mem aty*) (mem? a)) (mem->mem a k)]
174           [(and (memq 'fpmem aty*) (fpmem? a)) (fpmem->fpmem a k)]
175           [(and (memq 'unsigned12 aty*) (imm-unsigned12? a)) (k (imm->imm a))]
176           [(and (memq 'neg-unsigned12 aty*) (imm-neg-unsigned12? a)) (k (imm->negate-imm a))]
177           [(and (memq 'funkymask aty*) (imm-funkymask? a)) (k (imm->imm a))]
178           [(and (memq 'imm-constant aty*) (imm-constant? a)) (k (imm->imm a))]
179           [(memq 'ur aty*)
180            (cond
181              [(ur? a) (k a)]
182              [(imm? a)
183               (let ([u (make-tmp 'u)])
184                 (seq
185                   (build-set! ,u ,(imm->imm a))
186                   (k u)))]
187              [(mem? a)
188               (mem->mem a
189                 (lambda (a)
190                   (let ([u (make-tmp 'u)])
191                     (seq
192                       (build-set! ,u ,a)
193                       (k u)))))]
194              [else (sorry! 'coerce-opnd "unexpected operand ~s" a)])]
195           [(memq 'fpur aty*)
196            (cond
197              [(fpur? a) (k a)]
198              [(fpmem? a)
199               (fpmem->fpmem a
200                 (lambda (a)
201                   (let ([u (make-tmp 'u 'fp)])
202                     (seq
203                       (build-set! ,u ,a)
204                       (k u)))))]
205              [else
206               (sorry! 'coerce-opnd "unexpected operand ~s" a)])]
207           [else (sorry! 'coerce-opnd "cannot coerce ~s to ~s" a aty*)]))]))
208
209  (define md-handle-jump
210    (lambda (t)
211      (with-output-language (L15d Tail)
212        (define long-form
213          (lambda (e)
214            (let ([tmp (make-tmp 'utmp)])
215              (values
216                (in-context Effect `(set! ,(make-live-info) ,tmp ,e))
217                `(jump ,tmp)))))
218        (nanopass-case (L15c Triv) t
219          [,lvalue
220           (if (mem? lvalue)
221               (mem->mem lvalue (lambda (e) (values '() `(jump ,e))))
222               (values '() `(jump ,lvalue)))]
223          [(literal ,info)
224           (guard (and (not (info-literal-indirect? info))
225                       (memq (info-literal-type info) '(entry library-code))))
226           (values '() `(jump (literal ,info)))]
227          [(label-ref ,l ,offset)
228           (values '() `(jump (label-ref ,l ,offset)))]
229          [else (long-form t)]))))
230
231  (define info-cc-eq (make-info-condition-code 'eq? #f #t))
232  (define asm-eq (asm-relop info-cc-eq #f))
233
234  ; x is not the same as z in any clause that follows a clause where (x z)
235  ; and y is coercible to one of its types, however:
236  ; WARNING: do not assume that if x isn't the same as z then x is independent
237  ; of z, since x might be an mref with z as it's base or index
238
239  (define-instruction value (- -/ovfl -/eq -/pos)
240    [(op (z ur) (x ur) (y unsigned12))
241     `(set! ,(make-live-info) ,z (asm ,info ,(asm-sub (not (eq? op '-))) ,x ,y))]
242    [(op (z ur) (x ur) (y neg-unsigned12))
243     `(set! ,(make-live-info) ,z (asm ,info ,(asm-add (not (eq? op '-))) ,x ,y))]
244    [(op (z ur) (x ur) (y ur))
245     `(set! ,(make-live-info) ,z (asm ,info ,(asm-sub (not (eq? op '-))) ,x ,y))])
246
247  (define-instruction value (+ +/ovfl +/carry)
248    [(op (z ur) (x ur) (y unsigned12))
249     `(set! ,(make-live-info) ,z (asm ,info ,(asm-add (not (eq? op '+))) ,x ,y))]
250    [(op (z ur) (x ur) (y neg-unsigned12))
251     `(set! ,(make-live-info) ,z (asm ,info ,(asm-sub (not (eq? op '+))) ,x ,y))]
252    [(op (z ur) (x unsigned12) (y ur))
253     `(set! ,(make-live-info) ,z (asm ,info ,(asm-add (not (eq? op '+))) ,y ,x))]
254    [(op (z ur) (x ur) (y ur))
255     `(set! ,(make-live-info) ,z (asm ,info ,(asm-add (not (eq? op '+))) ,x ,y))])
256
257  (define-instruction value (*)
258    ; no imm form available
259    [(op (z ur) (x ur) (y ur))
260     `(set! ,(make-live-info) ,z (asm ,info ,asm-mul ,x ,y))])
261
262  (define-instruction value (*/ovfl) ; z flag set iff no overflow
263    ; no imm form available
264    [(op (z ur) (x ur) (y ur))
265     (let ([u (make-tmp 'u)])
266       (seq
267         `(set! ,(make-live-info) ,u (asm ,null-info ,asm-smulh ,x ,y))
268         `(set! ,(make-live-info) ,z (asm ,null-info ,asm-mul ,x ,y))
269         `(asm ,null-info ,asm-cmp/asr63 ,u ,z)))])
270
271  (define-instruction value (/)
272    [(op (z ur) (x ur) (y ur))
273     `(set! ,(make-live-info) ,z (asm ,info ,asm-div ,x ,y))])
274
275  (define-instruction value (logand)
276    [(op (z ur) (x ur) (y funkymask))
277     `(set! ,(make-live-info) ,z (asm ,info ,(asm-logand #f) ,x ,y))]
278    [(op (z ur) (x funkymask) (y ur))
279     `(set! ,(make-live-info) ,z (asm ,info ,(asm-logand #f) ,y ,x))]
280    [(op (z ur) (x ur) (y ur))
281     `(set! ,(make-live-info) ,z (asm ,info ,(asm-logand #f) ,x ,y))])
282
283  (let ()
284    (define select-op (lambda (op) (if (eq? op 'logor) asm-logor asm-logxor)))
285    (define-instruction value (logor logxor)
286      [(op (z ur) (x funkymask) (y ur))
287       `(set! ,(make-live-info) ,z (asm ,info ,((select-op op) #f) ,y ,x))]
288      [(op (z ur) (x ur) (y funkymask ur))
289       `(set! ,(make-live-info) ,z (asm ,info ,((select-op op) #f) ,x ,y))]))
290
291  (define-instruction value (lognot)
292    [(op (z ur) (x ur))
293     `(set! ,(make-live-info) ,z (asm ,info ,asm-lognot ,x))])
294
295  (define-instruction value (sll srl sra)
296    [(op (z ur) (x ur) (y imm-constant ur))
297     `(set! ,(make-live-info) ,z (asm ,info ,(asm-shiftop op) ,x ,y))])
298
299  (define-instruction value popcount
300    [(op (z ur) (x ur))
301     (let ([u (make-tmp 'u)])
302       (seq
303        `(set! ,(make-live-info) ,u (asm ,null-info ,asm-kill))
304        `(set! ,(make-live-info) ,z (asm ,info ,asm-popcount ,x ,u))))])
305
306  (define-instruction value (move)
307    [(op (z mem) (x ur))
308     `(set! ,(make-live-info) ,z ,x)]
309    [(op (z ur) (x ur mem imm-constant))
310     `(set! ,(make-live-info) ,z ,x)])
311
312  (let ()
313    (define build-lea1
314      (lambda (info z x)
315        (let ([offset (info-lea-offset info)])
316          (with-output-language (L15d Effect)
317            (cond
318              [(unsigned12? offset)
319               `(set! ,(make-live-info) ,z (asm ,info ,(asm-add #f) ,x (immediate ,offset)))]
320              [(unsigned12? (- offset))
321               `(set! ,(make-live-info) ,z (asm ,info ,(asm-sub #f) ,x (immediate ,(- offset))))]
322              [else
323               (let ([u (make-tmp 'u)])
324                 (seq
325                  `(set! ,(make-live-info) ,u (immediate ,offset))
326                  `(set! ,(make-live-info) ,z (asm ,info ,(asm-add #f) ,x ,u))))])))))
327
328    (define-instruction value lea1
329      ;; NB: would be simpler if offset were explicit operand
330      ;; NB: why not one version of lea with %zero for y in lea1 case?
331      [(op (z ur) (x ur)) (build-lea1 info z x)])
332
333    (define-instruction value lea2
334      ;; NB: would be simpler if offset were explicit operand
335      [(op (z ur) (x ur) (y ur))
336       (let ([u (make-tmp 'u)])
337         (seq
338          (build-lea1 info u x)
339          `(set! ,(make-live-info) ,z (asm ,info ,(asm-add #f) ,y ,u))))]))
340
341  (define-instruction value (sext8 sext16 sext32 zext8 zext16 zext32)
342    [(op (z ur) (x ur)) `(set! ,(make-live-info) ,z (asm ,info ,(asm-move/extend op) ,x))])
343
344  (let ()
345    (define imm-zero (with-output-language (L15d Triv) `(immediate 0)))
346    (define load/store
347      (lambda (x y w type k) ; x ur, y ur, w ur or imm
348        (with-output-language (L15d Effect)
349          (if (ur? w)
350              (if (eq? y %zero)
351                  (k x w imm-zero)
352                  (let ([u (make-tmp 'u)])
353                    (seq
354                      `(set! ,(make-live-info) ,u (asm ,null-info ,(asm-add #f) ,y ,w))
355                      (k x u imm-zero))))
356              (let ([n (nanopass-case (L15d Triv) w [(immediate ,imm) imm])])
357                (cond
358                  [(and (eq? y %zero)
359                        (aligned-offset? n (case type
360                                             [(unsigned-32 integer-32) 2]
361                                             [(unsigned-16 integer-16) 1]
362                                             [(unsigned-8 integer-8) 0]
363                                             [else 3])))
364                   (let ([w (in-context Triv `(immediate ,n))])
365                     (k x y w))]
366                  [(and (eq? y %zero) (signed9? n))
367                   (let ([w (in-context Triv `(immediate ,n))])
368                     (k x y w))]
369                  [(and (not (eq? y %zero)) (unsigned12? n))
370                   (let ([w (in-context Triv `(immediate ,n))])
371                     (let ([u (make-tmp 'u)])
372                       (seq
373                        `(set! ,(make-live-info) ,u (asm ,null-info ,(asm-add #f) ,y ,w))
374                        (k x u imm-zero))))]
375                  [(and (not (eq? y %zero)) (unsigned12? (- n)))
376                   (let ([w (in-context Triv `(immediate ,(- n)))])
377                     (let ([u (make-tmp 'u)])
378                       (seq
379                        `(set! ,(make-live-info) ,u (asm ,null-info ,(asm-sub #f) ,y ,w))
380                        (k x u imm-zero))))]
381                  [else
382                   (let ([u (make-tmp 'u)])
383                     (seq
384                       `(set! ,(make-live-info) ,u (immediate ,n))
385                       (if (eq? y %zero)
386                           (k x u imm-zero)
387                           (seq
388                            `(set! ,(make-live-info) ,u (asm ,null-info ,(asm-add #f) ,x ,u))
389                            (k u y imm-zero)))))]))))))
390    (define-instruction value (load)
391      [(op (z ur) (x ur) (y ur) (w ur imm-constant))
392       (let ([type (info-load-type info)])
393         (load/store x y w type
394           (lambda (x y w)
395             (let ([instr `(set! ,(make-live-info) ,z (asm ,null-info ,(asm-load type) ,x ,y ,w))])
396               (if (info-load-swapped? info)
397                   (seq
398                     instr
399                     `(set! ,(make-live-info) ,z (asm ,null-info ,(asm-swap type) ,z)))
400                   instr)))))])
401    (define-instruction effect (store)
402      [(op (x ur) (y ur) (w ur imm-constant) (z ur))
403       (let ([type (info-load-type info)])
404         (load/store x y w type
405           (lambda (x y w)
406             (if (info-load-swapped? info)
407                 (let ([u (make-tmp 'unique-bob)])
408                   (seq
409                     `(set! ,(make-live-info) ,u (asm ,null-info ,(asm-swap type) ,z))
410                     `(asm ,null-info ,(asm-store type) ,x ,y ,w ,u)))
411                 `(asm ,null-info ,(asm-store type) ,x ,y ,w ,z)))))]))
412
413  (define-instruction value (load-single->double)
414    [(op (x fpur) (y fpmem))
415     (let ([u (make-tmp 'u 'fp)])
416       (seq
417        `(set! ,(make-live-info) ,u (asm ,null-info ,asm-fpmove-single ,y))
418        `(set! ,(make-live-info) ,x (asm ,info ,(asm-fl-cvt 'single->double) ,u))))])
419
420  (define-instruction effect (store-double->single)
421    [(op (x fpmem) (y fpur))
422     (let ([u (make-tmp 'u 'fp)])
423       (seq
424        `(set! ,(make-live-info) ,u (asm ,null-info ,(asm-fl-cvt 'double->single) ,y))
425        `(asm ,info ,asm-fpmove-single ,x ,u)))])
426
427  (define-instruction effect (store-single)
428    [(op (x fpmem) (y fpur))
429     `(asm ,info ,asm-fpmove-single ,x ,y)])
430
431  (define-instruction value (load-single)
432    [(op (x fpur) (y fpmem fpur))
433     `(set! ,(make-live-info) ,x (asm ,info ,asm-fpmove-single ,y))])
434
435  (define-instruction value (single->double double->single)
436    [(op (x fpur) (y fpur))
437     `(set! ,(make-live-info) ,x (asm ,info ,(asm-fl-cvt op) ,y))])
438
439  (define-instruction value (fpt)
440    [(op (x fpur) (y ur))
441     `(set! ,(make-live-info) ,x (asm ,info ,asm-fpt ,y))])
442
443  (define-instruction value (fptrunc)
444    [(op (x ur) (y fpur))
445     `(set! ,(make-live-info) ,x (asm ,info ,asm-fptrunc ,y))])
446
447  (define-instruction value (fpsingle)
448    [(op (x fpur) (y fpur)) `(set! ,(make-live-info) ,x (asm ,info ,asm-fpsingle ,y))])
449
450  (define-instruction value (fpmove)
451    [(op (x fpmem) (y fpur)) `(set! ,(make-live-info) ,x ,y)]
452    [(op (x fpur) (y fpmem fpur)) `(set! ,(make-live-info) ,x ,y)])
453
454  (let ()
455    (define (mem->mem mem new-type)
456      (nanopass-case (L15d Triv) mem
457        [(mref ,x0 ,x1 ,imm ,type)
458         (with-output-language (L15d Lvalue) `(mref ,x0 ,x1 ,imm ,new-type))]))
459
460    (define-instruction value (fpcastto)
461      [(op (x mem) (y fpur)) `(set! ,(make-live-info) ,(mem->mem x 'fp) ,y)]
462      [(op (x ur) (y fpur)) `(set! ,(make-live-info) ,x (asm ,info ,asm-fpcastto ,y))])
463
464    (define-instruction value (fpcastfrom)
465      [(op (x fpmem) (y ur)) `(set! ,(make-live-info) ,(mem->mem x 'uptr) ,y)]
466      [(op (x fpur) (y ur)) `(set! ,(make-live-info) ,x (asm ,info ,asm-fpcastfrom ,y))]))
467
468  (define-instruction value (fp+ fp- fp/ fp*)
469    [(op (x fpur) (y fpur) (z fpur))
470     `(set! ,(make-live-info) ,x (asm ,info ,(asm-fpop-2 op) ,y ,z))])
471
472  (define-instruction value (fpsqrt)
473    [(op (x fpur) (y fpur))
474     `(set! ,(make-live-info) ,x (asm ,info ,asm-fpsqrt ,y))])
475
476  (define-instruction pred (fp= fp< fp<=)
477    [(op (x fpur) (y fpur))
478     (let ([info (make-info-condition-code op #f #f)])
479       (values '() `(asm ,info ,(asm-fp-relop info) ,x ,y)))])
480
481  (define-instruction effect (inc-cc-counter)
482    [(op (x ur) (w unsigned12) (z ur unsigned12))
483     (let ([u1 (make-tmp 'u1)] [u2 (make-tmp 'u2)])
484       (seq
485        `(set! ,(make-live-info) ,u1 (asm ,null-info ,(asm-add #f) ,x ,w))
486        `(set! ,(make-live-info) ,u2 (asm ,null-info ,asm-kill))
487        `(asm ,null-info ,asm-inc-cc-counter ,u1 ,z ,u2)))])
488
489  (define-instruction effect (inc-profile-counter)
490    [(op (x mem) (y unsigned12))
491     (let ([u (make-tmp 'u)])
492       (seq
493         `(set! ,(make-live-info) ,u ,x)
494         `(set! ,(make-live-info) ,u (asm ,null-info ,(asm-add #f) ,u ,y))
495         `(set! ,(make-live-info) ,x ,u)))])
496
497  (define-instruction value (read-time-stamp-counter)
498    [(op (z ur)) `(set! ,(make-live-info) ,z (asm ,null-info
499                                                  ;; CNTPCT_EL0
500                                                  ,(asm-read-counter #b11 #b011 #b1110 #b0000 #b001)))])
501
502  (define-instruction value (read-performance-monitoring-counter)
503    [(op (z ur) (x ur)) `(set! ,(make-live-info) ,z (immediate 0))])
504
505  ;; no kills since we expect to be called when all necessary state has already been saved
506  (define-instruction value (get-tc)
507    [(op (z ur))
508     (safe-assert (eq? z %Cretval))
509     (let ([ulr (make-precolored-unspillable 'ulr %lr)])
510       (seq
511         `(set! ,(make-live-info) ,ulr (asm ,null-info ,asm-kill))
512         `(set! ,(make-live-info) ,z (asm ,info ,asm-get-tc ,ulr))))])
513
514  (define-instruction value activate-thread
515    [(op (z ur))
516     (safe-assert (eq? z %Cretval))
517     (let ([ulr (make-precolored-unspillable 'ulr %lr)])
518       (seq
519         `(set! ,(make-live-info) ,ulr (asm ,null-info ,asm-kill))
520         `(set! ,(make-live-info) ,z (asm ,info ,asm-activate-thread ,ulr))))])
521
522  (define-instruction effect deactivate-thread
523    [(op)
524     (let ([ulr (make-precolored-unspillable 'ulr %lr)])
525       (seq
526         `(set! ,(make-live-info) ,ulr (asm ,null-info ,asm-kill))
527         `(asm ,info ,asm-deactivate-thread ,ulr)))])
528
529  (define-instruction effect unactivate-thread
530    [(op (x ur))
531     (safe-assert (eq? x %Carg1))
532     (let ([ulr (make-precolored-unspillable 'ulr %lr)])
533       (seq
534         `(set! ,(make-live-info) ,ulr (asm ,null-info ,asm-kill))
535         `(asm ,info ,asm-unactivate-thread ,x ,ulr)))])
536
537  (define-instruction value (asmlibcall)
538    [(op (z ur))
539     (if (info-asmlib-save-ra? info)
540         `(set! ,(make-live-info) ,z (asm ,info ,(asm-library-call (info-asmlib-libspec info) #t) ,(info-kill*-live*-live* info) ...))
541         (let ([ulr (make-precolored-unspillable 'ulr %lr)])
542           (seq
543            `(set! ,(make-live-info) ,ulr (asm ,null-info ,asm-kill))
544            `(set! ,(make-live-info) ,z (asm ,info ,(asm-library-call (info-asmlib-libspec info) #f) ,ulr ,(info-kill*-live*-live* info) ...)))))])
545
546  (define-instruction effect (asmlibcall!)
547    [(op)
548     (if (info-asmlib-save-ra? info)
549         (let ([ulr (make-precolored-unspillable 'ulr %lr)])
550           `(asm ,info ,(asm-library-call! (info-asmlib-libspec info) #t) ,(info-kill*-live*-live* info) ...))
551         (let ([ulr (make-precolored-unspillable 'ulr %lr)])
552           (seq
553            `(set! ,(make-live-info) ,ulr (asm ,null-info ,asm-kill))
554            `(asm ,info ,(asm-library-call! (info-asmlib-libspec info) #f) ,ulr ,(info-kill*-live*-live* info) ...))))])
555
556  (safe-assert (reg-callee-save? %tc)) ; no need to save-restore
557  (define-instruction effect (c-simple-call)
558    [(op)
559     (if (info-c-simple-call-save-ra? info)
560         `(asm ,info ,(asm-c-simple-call (info-c-simple-call-entry info) #t))
561         (let ([ulr (make-precolored-unspillable 'ulr %lr)])
562           (seq
563            `(set! ,(make-live-info) ,ulr (asm ,null-info ,asm-kill))
564            `(asm ,info ,(asm-c-simple-call (info-c-simple-call-entry info) #f) ,ulr))))])
565
566  (define-instruction pred (eq? u< < > <= >=)
567    [(op (y unsigned12) (x ur))
568     (let ([info (if (eq? op 'eq?) info-cc-eq (make-info-condition-code op #t #t))])
569       (values '() `(asm ,info ,(asm-relop info #f) ,x ,y)))]
570    [(op (y neg-unsigned12) (x ur))
571     (let ([info (if (eq? op 'eq?) info-cc-eq (make-info-condition-code op #t #t))])
572       (values '() `(asm ,info ,(asm-relop info #t) ,x ,y)))]
573    [(op (x ur) (y ur unsigned12))
574     (let ([info (if (eq? op 'eq?) info-cc-eq (make-info-condition-code op #f #t))])
575       (values '() `(asm ,info ,(asm-relop info #f) ,x ,y)))]
576    [(op (x ur) (y neg-unsigned12))
577     (let ([info (if (eq? op 'eq?) info-cc-eq (make-info-condition-code op #f #t))])
578       (values '() `(asm ,info ,(asm-relop info #t) ,x ,y)))])
579
580  (define-instruction pred (condition-code)
581    [(op) (values '() `(asm ,info ,(asm-condition-code info)))])
582
583  (define-instruction pred (type-check?)
584    [(op (x ur) (mask funkymask ur) (type unsigned12 ur))
585     (let ([tmp (make-tmp 'u)])
586       (values
587         (with-output-language (L15d Effect)
588           `(set! ,(make-live-info) ,tmp (asm ,null-info ,(asm-logand #f) ,x ,mask)))
589         `(asm ,info-cc-eq ,asm-eq ,tmp ,type)))])
590
591  (define-instruction pred (logtest log!test)
592    [(op (x funkymask) (y ur))
593     (values '() `(asm ,info-cc-eq ,(asm-logtest (eq? op 'log!test) info-cc-eq) ,y ,x))]
594    [(op (x ur) (y ur funkymask))
595     (values '() `(asm ,info-cc-eq ,(asm-logtest (eq? op 'log!test) info-cc-eq) ,x ,y))])
596
597  (let ()
598    (define lea->reg
599      (lambda (x y w k)
600        (with-output-language (L15d Effect)
601          (define add-offset
602            (lambda (r)
603              (let ([i (nanopass-case (L15d Triv) w [(immediate ,imm) imm])])
604                (cond
605                  [(eqv? i 0) (k r)]
606                  [(unsigned12? i)
607                   (let ([u (make-tmp 'u)])
608                     (seq
609                      `(set! ,(make-live-info) ,u (asm ,null-info ,(asm-add #f) ,r ,w))
610                      (k u)))]
611                  [else
612                   (let ([u (make-tmp 'u)])
613                     (seq
614                      `(set! ,(make-live-info) ,u ,w)
615                      `(set! ,(make-live-info) ,u (asm ,null-info ,(asm-add #f) ,r ,u))
616                      (k u)))]))))
617          (if (eq? y %zero)
618              (add-offset x)
619              (let ([u (make-tmp 'u)])
620                (seq
621                  `(set! ,(make-live-info) ,u (asm ,null-info ,(asm-add #f) ,x ,y))
622                  (add-offset u)))))))
623    ;; NB: compiler implements init-lock! and unlock! as word store of zero
624    (define-instruction pred (lock!)
625      [(op (x ur) (y ur) (w imm-constant))
626       (let ([u (make-tmp 'u)]
627             [u2 (make-tmp 'u2)])
628         (values
629           (lea->reg x y w
630             (lambda (r)
631               (with-output-language (L15d Effect)
632                 (seq
633                   `(set! ,(make-live-info) ,u (asm ,null-info ,asm-kill))
634                   `(set! ,(make-live-info) ,u2 (asm ,null-info ,asm-kill))
635                   `(asm ,null-info ,asm-lock ,r ,u ,u2)))))
636           `(asm ,info-cc-eq ,asm-eq ,u (immediate 0))))])
637    (define-instruction effect (locked-incr! locked-decr!)
638      [(op (x ur) (y ur) (w imm-constant))
639       (lea->reg x y w
640         (lambda (r)
641           (let ([u1 (make-tmp 'u1)] [u2 (make-tmp 'u2)])
642             (seq
643               `(set! ,(make-live-info) ,u1 (asm ,null-info ,asm-kill))
644               `(set! ,(make-live-info) ,u2 (asm ,null-info ,asm-kill))
645               `(asm ,null-info ,(asm-lock+/- op) ,r ,u1 ,u2)))))])
646    (define-instruction effect (cas)
647      [(op (x ur) (y ur) (w imm-constant) (old ur) (new ur))
648       (lea->reg x y w
649         (lambda (r)
650	   (let ([u1 (make-tmp 'u1)] [u2 (make-tmp 'u2)])
651             (seq
652               `(set! ,(make-live-info) ,u1 (asm ,null-info ,asm-kill))
653               `(set! ,(make-live-info) ,u2 (asm ,null-info ,asm-kill))
654	       `(asm ,info ,asm-cas ,r ,old ,new ,u1 ,u2)))))]))
655
656  (define-instruction effect (store-store-fence)
657    [(op)
658     `(asm ,info ,(asm-fence 'store-store))])
659
660  (define-instruction effect (acquire-fence)
661    [(op)
662     `(asm ,info ,(asm-fence 'acquire))])
663
664  (define-instruction effect (release-fence)
665    [(op)
666     `(asm ,info ,(asm-fence 'release))])
667
668  (define-instruction effect (pause)
669    ;; NB: use sqrt or something like that?
670    [(op) '()])
671
672  (define-instruction effect (debug)
673    [(op)
674     `(asm ,info ,asm-debug)])
675
676  (define-instruction effect (c-call)
677    [(op (x ur))
678     (let ([ulr (make-precolored-unspillable 'ulr %lr)])
679       (seq
680         `(set! ,(make-live-info) ,ulr (asm ,null-info ,asm-kill))
681         `(asm ,info ,asm-indirect-call ,x ,ulr ,(info-kill*-live*-live* info) ...)))])
682
683  (define-instruction effect (pop-multiple)
684    [(op) `(asm ,info ,(asm-pop-multiple (info-kill*-kill* info)))])
685
686  (define-instruction effect (push-multiple)
687    [(op) `(asm ,info ,(asm-push-multiple (info-kill*-live*-live* info)))])
688
689  (define-instruction effect (pop-fpmultiple)
690    [(op) `(asm ,info ,(asm-pop-fpmultiple (info-kill*-kill* info)))])
691
692  (define-instruction effect (push-fpmultiple)
693    [(op) `(asm ,info ,(asm-push-fpmultiple (info-kill*-live*-live* info)))])
694
695  (define-instruction effect save-flrv
696    [(op) `(asm ,info ,(asm-push-fpmultiple (list %Cfpretval)))])
697
698  (define-instruction effect restore-flrv
699    [(op) `(asm ,info ,(asm-pop-fpmultiple (list %Cfpretval)))])
700
701  (define-instruction effect (invoke-prelude)
702    [(op) `(set! ,(make-live-info) ,%tc ,%Carg1)])
703)
704
705;;; SECTION 3: assembler
706(module asm-module (; required exports
707                     asm-move asm-move/extend asm-load asm-store asm-swap asm-library-call asm-library-call! asm-library-jump
708                     asm-mul asm-smulh asm-div asm-add asm-sub asm-logand asm-logor asm-logxor
709                     asm-pop-multiple asm-shiftop asm-logand asm-lognot asm-cmp/asr63 asm-popcount
710                     asm-logtest asm-fp-relop asm-relop asm-push-multiple asm-push-fpmultiple asm-pop-fpmultiple
711                     asm-indirect-jump asm-literal-jump
712                     asm-direct-jump asm-return-address asm-jump asm-conditional-jump
713                     asm-indirect-call asm-condition-code
714                     asm-fpmove-single asm-fl-cvt asm-fpt asm-fpmove asm-fpcastto asm-fpcastfrom
715                     asm-fptrunc asm-fpsingle
716                     asm-lock asm-lock+/- asm-cas asm-fence
717                     asm-fpop-2 asm-fpsqrt asm-c-simple-call
718                     asm-return asm-c-return asm-size
719                     asm-enter asm-foreign-call asm-foreign-callable
720                     asm-debug
721                     asm-read-counter
722                     asm-inc-cc-counter
723                     signed9? unsigned12? aligned-offset? funkymask shifted16
724                     ; threaded version specific
725                     asm-get-tc
726                     asm-activate-thread asm-deactivate-thread asm-unactivate-thread
727                     ; machine dependent exports
728                     asm-kill)
729
730  (define ax-register?
731    (case-lambda
732      [(x) (record-case x [(reg) r #t] [else #f])]
733      [(x reg) (record-case x [(reg) r (eq? r reg)] [else #f])]))
734
735  (define-who ax-ea-reg-code
736    (lambda (ea)
737      (record-case ea
738        [(reg) r (reg-mdinfo r)]
739        [else (sorry! who "ea=~s" ea)])))
740
741  (define ax-reg?
742    (lambda (ea)
743      (record-case ea
744        [(reg) ignore #t]
745        [else #f])))
746
747  (define ax-imm?
748    (lambda (ea)
749      (record-case ea
750        [(imm) ignore #t]
751        [else #f])))
752
753  (define-who ax-imm-data
754    (lambda (ea)
755      (record-case ea
756        [(imm) (n) n]
757        [else (sorry! who "ax-imm-data ea=~s" ea)])))
758
759  ; define-op sets up assembly op macros--
760  ; the opcode and all other expressions are passed to the specified handler--
761  (define-syntax define-op
762    (lambda (x)
763      (syntax-case x ()
764        [(k op handler e ...)
765         (with-syntax ([op (construct-name #'k "asmop-" #'op)])
766           #'(define-syntax op
767               (syntax-rules ()
768                 [(_ mneu arg (... ...))
769                  (handler 'mneu e ... arg (... ...))])))])))
770
771  (define-syntax emit
772    (lambda (x)
773      (syntax-case x ()
774        [(k op x ...)
775         (with-syntax ([emit-op (construct-name #'k "asmop-" #'op)])
776           #'(emit-op op x ...))])))
777
778  ;;; note that the assembler isn't clever--you must be very explicit about
779  ;;; which flavor you want, and there are a few new varieties introduced
780  ;;; (commented-out opcodes are not currently used by the assembler--
781  ;;; spaces are left to indicate possible size extensions)
782
783  (define-op movzi   movzi-op #b10) ; 16-bit immediate, shifted
784  (define-op movki   movzi-op #b11) ; 16-bit immediate, shifted
785  (define-op movi    movi-op)  ; immediate encoded as a mask
786
787  (define-op addi  add-imm-op  #b0) ; selector is at bit 30 (op)
788  (define-op subi  add-imm-op  #b1)
789
790  (define-op andi  logical-imm-op  #b00)
791  (define-op orri  logical-imm-op  #b01)
792  (define-op eori  logical-imm-op  #b10)
793
794  (define-op add   binary-op  #b0)
795  (define-op sub   binary-op  #b1)
796
797  (define-op and   logical-op  #b00)
798  (define-op orr   logical-op  #b01)
799  (define-op eor   logical-op  #b10)
800
801  (define-op cmp        cmp-op  #b1101011 #b00 0)
802  (define-op tst        cmp-op  #b1101010 #b00 0)
803  (define-op cmp/asr63  cmp-op  #b1101011 #b10 63)
804
805  (define-op cmpi  cmp-imm-op #b1) ; selector is at bit 30 (op)
806  (define-op cmni  cmp-imm-op #b0)
807  (define-op tsti  logical-imm-op #b11 #f `(reg . ,%real-zero))
808
809  (define-op mov   mov-op  #b1 #b0) ; selectors are a bit 31 (sf) and 21 (N)
810  (define-op movw  mov-op  #b0 #b0)
811  (define-op mvn   mov-op  #b1 #b1)
812
813  (define-op lsli  shifti-op #b10 'l) ; selector is at bit 29 (opc)
814  (define-op lsri  shifti-op #b10 'r)
815  (define-op asri  shifti-op #b00 'r)
816
817  (define-op lsl  shift-op #b00) ; selector is at bit 10 (op2)
818  (define-op lsr  shift-op #b01)
819  (define-op asr  shift-op #b10)
820
821  (define-op sxtb extend-op  #b100 #b1 #b000111) ; selectors are at bits 29 (sfc+opc), 22 (N), and 10 (imms)
822  (define-op sxth extend-op  #b100 #b1 #b001111)
823  (define-op sxtw extend-op  #b100 #b1 #b011111)
824  (define-op uxtb extend-op  #b010 #b0 #b000111)
825  (define-op uxth extend-op  #b010 #b0 #b001111)
826
827  (define-op mul   mul-op  #b000) ; selector is at bit 21
828  (define-op smulh mul-op  #b010)
829
830  (define-op sdiv  div-op)
831
832  (define-op cnt    cnt-op)
833  (define-op addv.b addv.b-op)
834
835  ;; scaled variants (offset must be aligned):
836  (define-op ldri    load-imm-op  3 #b11 #b0 #b01) ; selectors are at bits 30 (size), 26, and 22 (opc)
837  (define-op ldrbi   load-imm-op  0 #b00 #b0 #b01)
838  (define-op ldrhi   load-imm-op  1 #b01 #b0 #b01)
839  (define-op ldrwi   load-imm-op  2 #b10 #b0 #b01)
840  (define-op ldrfi   load-imm-op  3 #b11 #b1 #b01)
841  (define-op ldrfsi  load-imm-op  2 #b10 #b1 #b01) ; single-precision
842
843  (define-op ldrsbi  load-imm-op  0 #b00 #b0 #b10)
844  (define-op ldrshi  load-imm-op  1 #b01 #b0 #b10)
845  (define-op ldrswi  load-imm-op  2 #b10 #b0 #b10)
846
847  (define-op stri    load-imm-op  3 #b11 #b0 #b00)
848  (define-op strbi   load-imm-op  0 #b00 #b0 #b00)
849  (define-op strhi   load-imm-op  1 #b01 #b0 #b00)
850  (define-op strwi   load-imm-op  2 #b10 #b0 #b00)
851  (define-op strfi   load-imm-op  3 #b11 #b1 #b00)
852  (define-op strfsi  load-imm-op  2 #b10 #b1 #b00) ; single-precision
853
854  ;; unscaled variants (offset must be signed9):
855  (define-op lduri    load-unscaled-imm-op  #b11 #b0 #b01) ; selectors are at bits 30 (size), 26, and 22 (opc)
856  (define-op ldurbi   load-unscaled-imm-op  #b00 #b0 #b01)
857  (define-op ldurhi   load-unscaled-imm-op  #b01 #b0 #b01)
858  (define-op ldurwi   load-unscaled-imm-op  #b10 #b0 #b01)
859  (define-op ldurfi   load-unscaled-imm-op  #b11 #b1 #b01)
860  (define-op ldurfsi  load-unscaled-imm-op  #b10 #b1 #b01) ; single-precision
861
862  (define-op ldursbi  load-unscaled-imm-op  #b00 #b0 #b10)
863  (define-op ldurshi  load-unscaled-imm-op  #b01 #b0 #b10)
864  (define-op ldurswi  load-unscaled-imm-op  #b10 #b0 #b10)
865
866  (define-op sturi    load-unscaled-imm-op  #b11 #b0 #b00)
867  (define-op sturbi   load-unscaled-imm-op  #b00 #b0 #b00)
868  (define-op sturhi   load-unscaled-imm-op  #b01 #b0 #b00)
869  (define-op sturwi   load-unscaled-imm-op  #b10 #b0 #b00)
870  (define-op sturfi   load-unscaled-imm-op  #b11 #b1 #b00)
871  (define-op sturfsi  load-unscaled-imm-op  #b10 #b1 #b00) ; single-precision
872
873  (define-op ldr     load-op     #b11 #b0 #b01)  ; selectors are at bits 30 (size), 26, and 22 (opc)
874  (define-op ldrw    load-op     #b10 #b0 #b01)
875  (define-op ldrh    load-op     #b01 #b0 #b01)
876  (define-op ldrb    load-op     #b00 #b0 #b01)
877  (define-op ldrf    load-op     #b11 #b1 #b01)
878  (define-op ldrfs   load-op     #b10 #b1 #b01)
879
880  (define-op ldrsw   load-op     #b10 #b0 #b10)
881  (define-op ldrsh   load-op     #b01 #b0 #b10)
882  (define-op ldrsb   load-op     #b00 #b0 #b10)
883
884  (define-op str     load-op     #b11 #b0 #b00)
885  (define-op strw    load-op     #b10 #b0 #b00)
886  (define-op strh    load-op     #b01 #b0 #b00)
887  (define-op strb    load-op     #b00 #b0 #b00)
888  (define-op strf    load-op     #b11 #b1 #b00)
889  (define-op strfs   load-op     #b10 #b1 #b00)
890
891  (define-op ldr/postidx  load-idx-op  #b01 #b0 #b01) ; selectors are at bits 22 (opc), 26, and 10
892  (define-op str/preidx   load-idx-op  #b00 #b0 #b11)
893
894  (define-op ldrf/postidx load-idx-op  #b01 #b1 #b01)
895  (define-op strf/preidx  load-idx-op  #b00 #b1 #b11)
896
897  (define-op ldrp/postidx loadp-idx-op  #b10 #b0 #b001 #b1) ; selectors are at bits 30 (opc), 26, 23, and 22 (L)
898  (define-op strp/preidx  loadp-idx-op  #b10 #b0 #b011 #b0)
899
900  (define-op ldrpf/postidx loadp-idx-op  #b01 #b1 #b001 #b1)
901  (define-op strpf/preidx  loadp-idx-op  #b01 #b1 #b011 #b0)
902
903  (define-op ldxr   ldxr-op      #b1 `(reg . ,%real-zero))
904  (define-op stxr   ldxr-op      #b0)
905
906  (define-op dmbst    dmb-op #b1110)
907  (define-op dmbish   dmb-op #b1011)
908  (define-op dmbishld dmb-op #b1001)
909  (define-op dmbishst dmb-op #b1010)
910
911  (define-op bnei  branch-imm-op       (ax-cond 'ne))
912  (define-op beqi  branch-imm-op       (ax-cond 'eq))
913  (define-op brai  branch-imm-op       (ax-cond 'al))
914
915  (define-op br    branch-reg-op       #b00)
916  (define-op blr   branch-reg-op       #b01)
917
918  (define-op b     branch-always-label-op)
919
920  (define-op beq   branch-label-op     (ax-cond 'eq))
921  (define-op bne   branch-label-op     (ax-cond 'ne))
922  (define-op blt   branch-label-op     (ax-cond 'lt))
923  (define-op ble   branch-label-op     (ax-cond 'le))
924  (define-op bgt   branch-label-op     (ax-cond 'gt))
925  (define-op bge   branch-label-op     (ax-cond 'ge))
926  (define-op bcc   branch-label-op     (ax-cond 'cc))
927  (define-op bcs   branch-label-op     (ax-cond 'cs))
928  (define-op bvc   branch-label-op     (ax-cond 'vc))
929  (define-op bvs   branch-label-op     (ax-cond 'vs))
930  (define-op bls   branch-label-op     (ax-cond 'ls))
931  (define-op bhi   branch-label-op     (ax-cond 'hi))
932
933  (define-op adr   adr-op)
934  (define-op ret   ret-op)
935
936  (define-op fcvt.s->d  fcvt-op  #b00 #b01)
937  (define-op fcvt.d->s  fcvt-op  #b01 #b00)
938
939  (define-op fcvtzs  fdcvt-op  #b11 #b000) ; selectors are at bits 19 (mode) and 1 6(opcode)
940  (define-op scvtf   fdcvt-op  #b00 #b010)
941
942  (define-op fmov       fmov-op  #b0 #b000 #b1) ; selectors are at bits 31, 16, and 14
943  (define-op fmov.f->g  fmov-op  #b1 #b110 #b0)
944  (define-op fmov.g->f  fmov-op  #b1 #b111 #b0)
945
946  (define-op fcmp fcmp-op)
947
948  (define-op rev    rev-op  #b11) ; selector is at bit 10 (opc)
949  (define-op rev16  rev-op  #b01)
950  (define-op rev32  rev-op  #b10)
951
952  (define-op mrs  mrs-op)
953
954  (define-op und  und-op)
955
956  (define-op fadd  f-arith-op  #b0010) ; selector is at bit 12
957  (define-op fsub  f-arith-op  #b0011)
958  (define-op fmul  f-arith-op  #b0000)
959  (define-op fdiv  f-arith-op  #b0001)
960
961  (define-op fsqrt fsqrt-op)
962
963  (define movzi-op
964    (lambda (op opc dest imm shift code*)
965      (emit-code (op dest imm shift code*)
966        [31 #b1]
967        [29 opc]
968        [23 #b100101]
969        [21 shift] ; `shift` is implicitly multiplied by 16
970        [5  imm]
971        [0  (ax-ea-reg-code dest)])))
972
973  (define movi-op
974    (lambda (op dest imm n+immr+imms code*)
975      (let ([n (car n+immr+imms)]
976            [immr (cadr n+immr+imms)]
977            [imms (caddr n+immr+imms)])
978        (emit-code (op dest imm n+immr+imms code*)
979          [23 #b101100100]
980          [22 n]
981          [16 immr]
982          [10 imms]
983          [5  #b11111]
984          [0  (ax-ea-reg-code dest)]))))
985
986  (define add-imm-op
987    (lambda (op opcode set-cc? dest src imm code*)
988      (emit-code (op dest src imm (and set-cc? #t) code*)
989        [31 #b1]
990        [30 opcode]
991        [29 (if set-cc? #b1 #b0)]
992        [24 #b10001]
993        [22 #b00] ; shift
994        [10 imm]
995        [5  (ax-ea-reg-code src)]
996        [0  (ax-ea-reg-code dest)])))
997
998  (define logical-imm-op
999    (lambda (op opcode set-cc? dest src imm code*)
1000      (safe-assert (not set-cc?)) ; but opcode may imply setting condition codes
1001      (let ([n+immr+imms (funkymask imm)])
1002        (let ([n (car n+immr+imms)]
1003              [immr (cadr n+immr+imms)]
1004              [imms (caddr n+immr+imms)])
1005          (emit-code (op dest src imm code*)
1006            [31 #b1]
1007            [29 opcode]
1008            [23 #b100100]
1009            [22 n]
1010            [16 immr]
1011            [10 imms]
1012            [5  (ax-ea-reg-code src)]
1013            [0  (ax-ea-reg-code dest)])))))
1014
1015  (define binary-op
1016    (lambda (op opcode set-cc? dest src0 src1 code*)
1017      (emit-code (op dest src0 src1 (and set-cc? #t) code*)
1018        [31 #b1]
1019        [30 opcode]
1020        [29 (if set-cc? #b1 #b0)]
1021        [24 #b01011]
1022        [22 #b00] ; shift type (applied to src1)
1023        [21 #b0]
1024        [16 (ax-ea-reg-code src1)]
1025        [10 #b000000] ; shift amount
1026        [5  (ax-ea-reg-code src0)]
1027        [0  (ax-ea-reg-code dest)])))
1028
1029  (define logical-op
1030    (lambda (op opcode set-cc? dest src0 src1 code*)
1031      (safe-assert (not set-cc?))
1032      (emit-code (op dest src0 src1 code*)
1033        [31 #b1]
1034        [29 opcode]
1035        [24 #b01010]
1036        [22 #b00] ; shift type (applied to src1)
1037        [21 #b0]
1038        [16 (ax-ea-reg-code src1)]
1039        [10 #b000000] ; shift amount
1040        [5  (ax-ea-reg-code src0)]
1041        [0  (ax-ea-reg-code dest)])))
1042
1043  (define cmp-op
1044    (lambda (op opcode shift-type shift-amt src0 src1 code*)
1045      (emit-code (op src0 src1 code*)
1046        [31 #b1]
1047        [24 opcode]
1048        [22 shift-type] ; applied to src1
1049        [21 #b0]
1050        [16 (ax-ea-reg-code src1)]
1051        [10 shift-amt]
1052        [5  (ax-ea-reg-code src0)]
1053        [0  #b11111])))
1054
1055  (define cmp-imm-op
1056    (lambda (op opc src imm code*)
1057      (safe-assert (unsigned12? imm))
1058      (emit-code (op src imm code*)
1059        [31 #b1]
1060        [30 opc]
1061        [24 #b110001]
1062        [22 #b00] ; shift amount (applied to immediate)
1063        [10 imm]
1064        [5  (ax-ea-reg-code src)]
1065        [0  #b11111])))
1066
1067  (define mov-op
1068    (lambda (op sz neg dest src code*)
1069      (emit-code (op dest src code*)
1070        [31 sz]
1071        [22 #b010101000]
1072        [21 neg]
1073        [16 (ax-ea-reg-code src)]
1074        [5  #b11111]
1075        [0  (ax-ea-reg-code dest)])))
1076
1077  (define shifti-op
1078    (lambda (op opcode dir dest src imm code*)
1079      (emit-code (op dest src imm code*)
1080        [31 #b1]
1081        [29 opcode]
1082        [22 #b1001101]
1083        [16 (if (eq? dir 'l)
1084                (fx- 64 imm)
1085                imm)]
1086        [10 (if (eq? dir 'l)
1087                (fx- 63 imm)
1088                63)]
1089        [5  (ax-ea-reg-code src)]
1090        [0  (ax-ea-reg-code dest)])))
1091
1092  (define shift-op
1093    (lambda (op opcode dest src0 src1 code*)
1094      (emit-code (op dest src0 src1 code*)
1095        [29 #b100]
1096        [21 #b11010110]
1097        [16 (ax-ea-reg-code src1)]
1098        [12 #b0010]
1099        [10 opcode]
1100        [5  (ax-ea-reg-code src0)]
1101        [0  (ax-ea-reg-code dest)])))
1102
1103  (define extend-op
1104    (lambda (op sf+opc n imms-as-op2 dest src code*)
1105      (emit-code (op dest src code*)
1106        [29 sf+opc]
1107        [23 #b100110]
1108        [22 n]
1109        [16 #b000000]
1110        [10 imms-as-op2]
1111        [5  (ax-ea-reg-code src)]
1112        [0  (ax-ea-reg-code dest)])))
1113
1114  (define mul-op
1115    (lambda (op opcode dest src0 src1 code*)
1116      (emit-code (op dest src0 src1 code*)
1117        [29 #b100]
1118        [24 #b11011]
1119        [21 opcode]
1120        [16 (ax-ea-reg-code src1)]
1121        [10 #b011111]
1122        [5  (ax-ea-reg-code src0)]
1123        [0  (ax-ea-reg-code dest)])))
1124
1125  (define div-op
1126    (lambda (op dest src0 src1 code*)
1127      (emit-code (op dest src0 src1 code*)
1128        [29 #b100]
1129        [21 #b11010110]
1130        [16 (ax-ea-reg-code src1)]
1131        [10 #b000011]
1132        [5  (ax-ea-reg-code src0)]
1133        [0  (ax-ea-reg-code dest)])))
1134
1135  (define cnt-op
1136    (lambda (op dest src code*)
1137      (emit-code (op dest src code*)
1138        [29 #b000]
1139        [24 #b01110]
1140        [22 #b00] ; size
1141        [17 #b10000]
1142        [10 #b0010110]
1143        [5  (ax-ea-reg-code src)]
1144        [0  (ax-ea-reg-code dest)])))
1145
1146  (define addv.b-op
1147    (lambda (op dest src code*)
1148      (emit-code (op dest src code*)
1149        [29 #b000]
1150        [24 #b01110]
1151        [22 #b00] ; size: 00 => b
1152        [17 #b11000]
1153        [10 #b1101110]
1154        [5  (ax-ea-reg-code src)]
1155        [0  (ax-ea-reg-code dest)])))
1156
1157  (define load-imm-op
1158    (lambda (op scale size kind opc dest src imm code*)
1159      (emit-code (op dest src imm code*)
1160        [30 size]
1161        [27 #b111]
1162        [26 kind]
1163        [24 #b01]
1164        [22 opc]
1165        [10 (fxsrl imm scale)]
1166        [5  (ax-ea-reg-code src)]
1167        [0  (ax-ea-reg-code dest)])))
1168
1169  (define load-unscaled-imm-op
1170    (lambda (op size kind opc dest src imm code*)
1171      (emit-code (op dest src imm code*)
1172        [30 size]
1173        [27 #b111]
1174        [26 kind]
1175        [24 #b00]
1176        [22 opc]
1177        [21 #b0]
1178        [12 (fxand imm #x1FF)]
1179        [10 #b00]
1180        [5  (ax-ea-reg-code src)]
1181        [0  (ax-ea-reg-code dest)])))
1182
1183  (define load-op
1184    (lambda (op size kind opc dest src0 src1 code*)
1185      (emit-code (op dest src0 src1 code*)
1186        [30 size]
1187        [27 #b111]
1188        [26 kind]
1189        [24 #b00]
1190        [22 opc]
1191        [21 #b1]
1192        [16 (ax-ea-reg-code src1)]
1193        [13 #b011] ; option, where #x011 => 64-bit source address
1194        [12 #b0] ; shift
1195        [10 #b10]
1196        [5  (ax-ea-reg-code src0)]
1197        [0  (ax-ea-reg-code dest)])))
1198
1199  (define load-idx-op
1200    (lambda (op opc mode idx dest src imm code*)
1201      (emit-code (op dest src imm code*)
1202        [30 #b11]
1203        [27 #b111]
1204        [26 mode]
1205        [24 #b00]
1206        [22 opc]
1207        [21 #b0]
1208        [12 (fxand imm (fx- (fxsll 1 9) 1))]
1209        [10 idx]
1210        [5  (ax-ea-reg-code src)]
1211        [0  (ax-ea-reg-code dest)])))
1212
1213    (define loadp-idx-op
1214      (lambda (op opc mode opx l dest0 dest1 src imm code*)
1215        (emit-code (op dest0 dest1 src imm code*)
1216          [30 opc]
1217          [27 #b101]
1218          [26 mode]
1219          [23 opx]
1220          [22 l]
1221          [15 (fxand (fxsrl imm 3) (fx- (fxsll 1 7) 1))]
1222          [10 (ax-ea-reg-code dest1)]
1223          [5  (ax-ea-reg-code src)]
1224          [0 (ax-ea-reg-code dest0)])))
1225
1226  (define ldxr-op
1227    (lambda (op mode dest2 dest src code*)
1228      (emit-code (op dest2 dest src code*)
1229        [30 #b11]
1230        [23 #b0010000]
1231        [22 mode]
1232        [21 0]
1233        [16 (ax-ea-reg-code dest2)]
1234        [15 #b0]
1235        [10 #b11111]
1236        [5  (ax-ea-reg-code src)]
1237        [0  (ax-ea-reg-code dest)])))
1238
1239  (define dmb-op
1240    (lambda (op mode code*)
1241      (emit-code (op code*)
1242        [22 #b1101010100]
1243        [16 #b000011]
1244        [12 #b0011]
1245        [8  mode]
1246        [5  #b101]
1247        [0  #b11111])))
1248
1249  (define branch-imm-op
1250    (lambda (op cond-bits imm code*)
1251      (safe-assert (branch-disp? imm))
1252      (emit-code (op imm code*)
1253        [24 #b01010100]
1254        [5  (fxand (fxsra imm 2) (fx- (fxsll 1 19) 1))]
1255        [4  #b0]
1256        [0  cond-bits])))
1257
1258  (define branch-reg-op
1259    (lambda (op opcode reg code*)
1260      (emit-code (op reg code*)
1261        [24 #b11010110]
1262        [23 #b0]
1263        [21 opcode]
1264        [16 #b11111]
1265        [12 #b0000]
1266        [10 #b00]
1267        [5  (ax-ea-reg-code reg)]
1268        [0  #b00000])))
1269
1270  (define-who branch-always-label-op
1271    (lambda (op dest code*)
1272      (record-case dest
1273        [(label) (offset l)
1274         (safe-assert (uncond-branch-disp? offset))
1275         (emit-code (op dest code*)
1276           [26 #b000101]
1277           [0  (fxand (fxsra (fx+ offset 4) 2) (fx- (fxsll 1 26) 1))])]
1278        [else (sorry! who "unexpected dest ~s" dest)])))
1279
1280  (define-who branch-label-op
1281    (lambda (op cond-bits dest code*)
1282      (define (emit-branch offset)
1283	(safe-assert (branch-disp? (+ offset 4)))
1284	(emit-code (op dest code*)
1285          [24 #b01010100]
1286	  [5  (fxand (fxsra (fx+ offset 4) 2) (fx- (fxsll 1 19) 1))]
1287	  [4  #b0]
1288	  [0  cond-bits]))
1289      (record-case dest
1290        [(label) (offset l) (emit-branch offset)]
1291	[(imm) (n) (emit-branch n)] ; generated for long branches
1292        [else (sorry! who "unexpected dest ~s" dest)])))
1293
1294  (define adr-op
1295    (lambda (op dest imm code*)
1296      (emit-code (op dest imm code*)
1297        [31 #b0]
1298        [29 (fxand imm #b11)]
1299        [24 #b10000]
1300        [5  (fxand (fxsra imm 2) (fx- (fxsll 1 19) 1))]
1301        [0  (ax-ea-reg-code dest)])))
1302
1303  (define ret-op
1304    (lambda (op src code*)
1305      (emit-code (op src code*)
1306        [25 #b1101011]
1307        [21 #b0010]
1308        [16 #b11111]
1309        [12 #b0000]
1310        [10 #b00]
1311        [5  (ax-ea-reg-code src)]
1312        [0  #b00000])))
1313
1314  (define fcvt-op
1315    (lambda (op type opc dest src code*)
1316      (emit-code (op dest src code*)
1317        [24 #b00011110]
1318        [22 type]
1319        [17 #b10001]
1320        [15 opc]
1321        [10 #b10000]
1322        [5  (ax-ea-reg-code src)]
1323        [0  (ax-ea-reg-code dest)])))
1324
1325  (define fdcvt-op
1326    (lambda (op mode opcode dest src code*)
1327      (emit-code (op dest src code*)
1328        [29 #b100]
1329        [24 #b11110]
1330        [22 #b01] ; type
1331        [21 #b1]
1332        [19 mode]
1333        [16 opcode]
1334        [10 #b000000]
1335        [5  (ax-ea-reg-code src)]
1336        [0  (ax-ea-reg-code dest)])))
1337
1338  (define fmov-op
1339    (lambda (op sf opcode opsel dest src code*)
1340      (emit-code (op dest src code*)
1341        [31 sf]
1342        [24 #b0011110]
1343        [22 #b01] ; type
1344        [21 #b1]
1345        [19 #b00]
1346        [16 opcode]
1347        [15 #b0]
1348        [14 opsel]
1349        [10 #b0000]
1350        [5  (ax-ea-reg-code src)]
1351        [0  (ax-ea-reg-code dest)])))
1352
1353  (define f-arith-op
1354    (lambda (op opcode dest src0 src1 code*)
1355      (emit-code (op dest src0 src1 code*)
1356        [29 #b000]
1357        [24 #b11110]
1358        [22 #b01] ; type
1359        [21 #b1]
1360        [16 (ax-ea-reg-code src1)]
1361        [12 opcode]
1362        [10 #b10]
1363        [5  (ax-ea-reg-code src0)]
1364        [0  (ax-ea-reg-code dest)])))
1365
1366  (define fsqrt-op
1367    (lambda (op dest src code*)
1368      (emit-code (op dest src code*)
1369        [29 #b000]
1370        [24 #b11110]
1371        [22 #b01] ; type
1372        [21 #b1]
1373        [17 #b0000]
1374        [15 #b11] ; opc
1375        [10 #b10000]
1376        [5  (ax-ea-reg-code src)]
1377        [0  (ax-ea-reg-code dest)])))
1378
1379  (define fcmp-op
1380    (lambda (op src0 src1 code*)
1381      (emit-code (op src0 src1 code*)
1382        [24 #b00011110]
1383        [22 #b01]
1384        [21 #b1]
1385        [16 (ax-ea-reg-code src1)]
1386        [10 #b001000]
1387        [5  (ax-ea-reg-code src0)]
1388        [3  #b00] ; opc
1389        [0  #b000])))
1390
1391  (define rev-op
1392    (lambda (op opc dest src code*)
1393      (emit-code (op dest src code*)
1394        [29 #b110]
1395        [21 #b11010110]
1396        [16 #b00000]
1397        [12 #b0000]
1398        [10 opc]
1399        [5  (ax-ea-reg-code src)]
1400        [0  (ax-ea-reg-code dest)])))
1401
1402  (define mrs-op
1403    (lambda (op op0 op1 crn crm op2 dest code*)
1404      (emit-code (op dest code*)
1405        [22 #b1101010100]
1406        [20 #b11]
1407        [19 op0]
1408        [16 op1]
1409        [12 crn]
1410        [8  crm]
1411        [5  op2]
1412        [0 (ax-ea-reg-code dest)])))
1413
1414  (define und-op
1415    (lambda (op code*)
1416      (emit-code (op code*)
1417        [0 0])))
1418
1419  ;; asm helpers
1420
1421  (define-who ax-cond
1422    (lambda (x)
1423      (case x
1424        [(eq) #b0000] ; fl=
1425        [(ne) #b0001]
1426        [(cs) #b0010] ; u<
1427        [(cc) #b0011] ; u>=, fl< (for fl<, do we need this and mi?)
1428        [(mi) #b0100] ; fl< (for fl<, do we need this and cc?)
1429        [(pl) #b0101]
1430        [(vs) #b0110]
1431        [(vc) #b0111]
1432        [(hi) #b1000] ; u>
1433        [(ls) #b1001] ; u<=, fl<=
1434        [(ge) #b1010] ; fl>=
1435        [(lt) #b1011]
1436        [(gt) #b1100] ; fl>
1437        [(le) #b1101]
1438        [(al) #b1110]
1439        [else (sorry! who "unrecognized cond name ~s" x)])))
1440
1441  (define-syntax emit-code
1442    (lambda (x)
1443      ; NB: probably won't need emit-code to weed out #f
1444      (define build-maybe-cons*
1445        (lambda (e* e-ls)
1446          (if (null? e*)
1447              e-ls
1448              #`(let ([t #,(car e*)] [ls #,(build-maybe-cons* (cdr e*) e-ls)])
1449                  (if t (cons t ls) ls)))))
1450      (syntax-case x ()
1451        [(_ (op opnd ... ?code*) chunk ...)
1452         (let ([safe-check (lambda (e)
1453                             (if (fx= (debug-level) 0)
1454                                 e
1455                                 #`(let ([code #,e])
1456                                     (unless (<= 0 code (sub1 (expt 2 32)))
1457                                       (sorry! 'emit-code "bad result ~s for ~s"
1458                                               code
1459                                               (list op opnd ...)))
1460                                     code)))])
1461           (build-maybe-cons* #`((build long #,(safe-check #`(byte-fields chunk ...))))
1462             #'(aop-cons* `(asm ,op ,opnd ...) ?code*)))])))
1463
1464  (define-syntax build
1465    (syntax-rules ()
1466      [(_ x e)
1467       (and (memq (datum x) '(byte word long)) (integer? (datum e)))
1468       (begin
1469         (safe-assert (fixnum? (datum e)))
1470         (quote (x . e)))]
1471      [(_ x e)
1472       (memq (datum x) '(byte word long))
1473       (cons 'x e #;(let ([x e]) (safe-assert (not (eqv? x #x53401c17))) x))]))
1474
1475  (define-syntax byte-fields
1476    ; NB: make more efficient for fixnums
1477    (syntax-rules ()
1478      [(byte-fields (n e) ...)
1479       (andmap fixnum? (datum (n ...)))
1480       (+ (bitwise-arithmetic-shift-left e n) ...)]))
1481
1482  (define signed9?
1483    (lambda (imm)
1484      (and (fixnum? imm) (fx<= (fx- (expt 2 8)) imm (fx- (expt 2 8) 1)))))
1485
1486  (define unsigned12?
1487    (lambda (imm)
1488      (and (fixnum? imm)  ($fxu< imm (expt 2 12)))))
1489
1490  (define aligned-offset?
1491    (case-lambda
1492     [(imm) (aligned-offset? imm (constant log2-ptr-bytes))]
1493     [(imm log2-bytes)
1494      (and (fixnum? imm)
1495           (eqv? 0 (fxand imm (fx- (fxsll 1 log2-bytes) 1)))
1496           ($fxu< imm (expt 2 (fx+ 12 log2-bytes))))]))
1497
1498  (define funkymask
1499    (lambda (imm)
1500      ;; encode as `(list N immr imms)`, based on the LLVM implementation.
1501      (cond
1502        [(eqv? imm 0) #f]  ; can't do all 0s
1503        [(eqv? imm -1) #f] ; can't do all 1s
1504        [(>= imm (sub1 (expt 2 63))) #f]  ; can't do all 1s or more
1505        [(<= imm (- (expt 2 63))) #f] ; can't less than most negative
1506        [else
1507         ;; Immediate is representable in 64 bits without being 0 or -1.
1508         ;; First, find the smallest width that can be replicated to match `imm`:
1509         (let* ([imm (bitwise-and imm (sub1 (expt 2 64)))] ; view as positive
1510                [width (let loop ([width 32])
1511                        (let ([mask (sub1 (bitwise-arithmetic-shift-left 1 width))])
1512                          (if (= (bitwise-and imm mask)
1513                                 (bitwise-and (bitwise-arithmetic-shift-right imm width) mask))
1514                              (if (fx= width 2)
1515                                  2
1516                                  (loop (fxsrl width 1)))
1517                              (fx* width 2))))])
1518           (let ([v (bitwise-and imm (sub1 (bitwise-arithmetic-shift-left 1 width)))])
1519             ;; The encoding will work if v matches 1*0*1* or 0*1*0*
1520             (let* ([count-trailing (lambda (val v)
1521                                      (let loop ([v v])
1522                                        (if (= val (bitwise-and v 1))
1523                                            (fx+ 1 (loop (bitwise-arithmetic-shift-right v 1)))
1524                                            0)))]
1525                    [0s (count-trailing 0 v)]
1526                    [1s (count-trailing 1 (bitwise-arithmetic-shift-right v 0s))]
1527                    [vx (bitwise-arithmetic-shift-right v (fx+ 0s 1s))])
1528               (let-values ([(rotate total-1s)
1529                             (cond
1530                               [(eqv? 0 vx)
1531                                (if (fx= 0s 0)
1532                                    ;; No rotation needed
1533                                    (values 0 1s)
1534                                    ;; Rotate left to fill in `0s` zeros, and the encoding works
1535                                    (values (fx- width 0s) 1s))]
1536                               [(eqv? 0 0s)
1537                                ;; There could be more 1s at the top that we can rotate around
1538                                (let* ([0s (count-trailing 0 vx)])
1539                                  ;; Assert: 0s < width - 1s
1540                                  (cond
1541                                    [(= (bitwise-arithmetic-shift vx 0s)
1542                                        (sub1 (bitwise-arithmetic-shift-left 1 (fx- width 0s 1s))))
1543                                     ;; All 1s are in lowest bits or highest bits, so rotate
1544                                     (values (fx- width 0s 1s)
1545                                             (fx- width 0s))]
1546                                    [else (values #f #f)]))]
1547                               [else (values #f #f)])])
1548                 (and rotate
1549                      (list (if (fx= width 64) 1 0)
1550                            rotate
1551                            (bitwise-ior (case width
1552                                           [(2)  #b111100]
1553                                           [(4)  #b111000]
1554                                           [(8)  #b110000]
1555                                           [(16) #b100000]
1556                                           [else 0])
1557                                         (fx- total-1s 1))))))))])))
1558
1559  (define shifted16
1560    (lambda (imm)
1561      (let loop ([shift 0])
1562        (and (fx< shift 4)
1563             (if (= imm (bitwise-and (bitwise-arithmetic-shift-left #xFFFF (fx* shift 16)) imm))
1564                 (cons (bitwise-arithmetic-shift-right imm (fx* shift 16)) shift)
1565                 (loop (fx+ shift 1)))))))
1566
1567  (define branch-disp?
1568    (lambda (x)
1569      (and (fixnum? x)
1570           (fx<= (- (expt 2 20)) x (- (expt 2 20) 1))
1571           (not (fxlogtest x #b11)))))
1572
1573  (define uncond-branch-disp?
1574    (lambda (x)
1575      (let ([x (+ x 4)]) ; because `branch-always-label-op` adds 4
1576        (and (fixnum? x)
1577             (fx<= (- (expt 2 27)) x (- (expt 2 27) 1))
1578             (not (fxlogtest x #b11))))))
1579
1580  (define asm-size
1581    (lambda (x)
1582      (case (car x)
1583        [(asm arm64-abs arm64-jump arm64-call) 0]
1584        [(long) 4]
1585        [else 8])))
1586
1587  (define ax-mov64
1588    (lambda (dest n code*)
1589      (emit movzi dest (logand n #xffff) 0
1590        (emit movki dest (logand (bitwise-arithmetic-shift-right n 16) #xffff) 1
1591          (emit movki dest (logand (bitwise-arithmetic-shift-right n 32) #xffff) 2
1592            (emit movki dest (logand (bitwise-arithmetic-shift-right n 48) #xffff) 3
1593               code*))))))
1594
1595  (define ax-movi
1596    (lambda (dest n code*)
1597      (cond
1598        [(shifted16 n) =>
1599         (lambda (imm+shift)
1600           (emit movzi dest (car imm+shift) (cdr imm+shift) code*))]
1601        [(funkymask n) =>
1602         (lambda (n+immr+imms)
1603           (emit movi dest n n+immr+imms code*))]
1604        [(unsigned12? n)
1605         (emit movzi dest 0 0
1606           (emit addi #f dest dest n code*))]
1607        [(unsigned12? (- n))
1608         (emit movzi dest 0 0
1609           (emit subi #f dest dest (- n) code*))]
1610        [else
1611         (let loop ([n n] [shift 0] [init? #t])
1612           (cond
1613             [(or (eqv? n 0) (fx= shift 4)) code*]
1614             [else
1615              (let ([m (logand n #xFFFF)])
1616                (cond
1617                  [(eqv? m 0)
1618                   (loop (bitwise-arithmetic-shift-right n 16) (fx+ shift 1) init?)]
1619                  [else
1620                   (let ([code* (loop (bitwise-arithmetic-shift-right n 16) (fx+ shift 1) #f)])
1621                     (if init?
1622                         (emit movzi dest m shift code*)
1623                         (emit movki dest m shift code*)))]))]))])))
1624
1625  (define-who asm-move
1626    (lambda (code* dest src)
1627      ;; move pseudo instruction used by set! case in select-instruction
1628      ;; guarantees dest is a reg and src is reg, mem, or imm OR dest is
1629      ;; mem and src is reg.
1630      (Trivit (dest src)
1631        (define (bad!) (sorry! who "unexpected combination of src ~s and dest ~s" src dest))
1632        (cond
1633          [(ax-reg? dest)
1634           (record-case src
1635             [(reg) ignore (emit mov dest src code*)]
1636             [(imm) (n)
1637              (ax-movi dest n code*)]
1638             [(literal) stuff
1639              (ax-mov64 dest 0
1640                (asm-helper-relocation code* (cons 'arm64-abs stuff)))]
1641             [(disp) (n breg)
1642              (cond
1643                [(aligned-offset? n)
1644                 (emit ldri dest `(reg . ,breg) n code*)]
1645                [else
1646                 (assert (signed9? n))
1647                 (emit lduri dest `(reg . ,breg) n code*)])]
1648             [(index) (n ireg breg)
1649              (safe-assert (eqv? n 0))
1650              (emit ldr dest `(reg . ,breg) `(reg . ,ireg) code*)]
1651             [else (bad!)])]
1652          [(ax-reg? src)
1653           (record-case dest
1654             [(disp) (n breg)
1655              (cond
1656                [(aligned-offset? n)
1657                 (emit stri src `(reg . ,breg) n code*)]
1658                [else
1659                 (assert (signed9? n))
1660                 (emit sturi src `(reg . ,breg) n code*)])]
1661             [(index) (n ireg breg)
1662              (safe-assert (eqv? n 0))
1663              (emit str src `(reg . ,breg) `(reg . ,ireg) code*)]
1664             [else (bad!)])]
1665          [else (bad!)]))))
1666
1667  (define-who asm-move/extend
1668    (lambda (op)
1669      (lambda (code* dest src)
1670        (Trivit (dest src)
1671          (case op
1672            [(sext8) (emit sxtb dest src code*)]
1673            [(sext16) (emit sxth dest src code*)]
1674            [(sext32) (emit sxtw dest src code*)]
1675            [(zext8) (emit uxtb dest src code*)]
1676            [(zext16) (emit uxth dest src code*)]
1677            [(zext32) (emit movw dest src code*)] ; movw zero-extends
1678            [else (sorry! who "unexpected op ~s" op)])))))
1679
1680  (module (asm-add asm-sub asm-logand asm-logor asm-logxor)
1681    (define-syntax asm-binop
1682      (syntax-rules ()
1683        [(_ opi op)
1684         (lambda (set-cc?)
1685           (lambda (code* dest src0 src1)
1686             (Trivit (dest src0 src1)
1687               (record-case src1
1688                 [(imm) (n) (emit opi set-cc? dest src0 n code*)]
1689                 [else (emit op set-cc? dest src0 src1 code*)]))))]))
1690
1691    (define asm-add (asm-binop addi add))
1692    (define asm-sub (asm-binop subi sub))
1693    (define asm-logand (asm-binop andi and))
1694    (define asm-logor (asm-binop orri orr))
1695    (define asm-logxor (asm-binop eori eor)))
1696
1697  (define asm-mul
1698    (lambda (code* dest src0 src1)
1699      (Trivit (dest src0 src1)
1700        (emit mul dest src0 src1 code*))))
1701
1702  (define asm-div
1703    (lambda (code* dest src0 src1)
1704      (Trivit (dest src0 src1)
1705        (emit sdiv dest src0 src1 code*))))
1706
1707  (define asm-smulh
1708    (lambda (code* dest src0 src1)
1709      (Trivit (dest src0 src1)
1710        (emit smulh dest src0 src1 code*))))
1711
1712  (define-who asm-cmp/asr63
1713    (lambda (code* src0 src1)
1714      (Trivit (src0 src1)
1715        (emit cmp/asr63 src0 src1 code*))))
1716
1717  (define-who asm-fl-cvt
1718    (lambda (op)
1719      (lambda (code* dest src)
1720        (Trivit (dest src)
1721          (case op
1722            [(single->double)
1723             (emit fcvt.s->d dest src code*)]
1724            [(double->single)
1725             (emit fcvt.d->s dest src code*)]
1726            [else (sorry! who "unrecognized op ~s" op)])))))
1727
1728  (define-who asm-load
1729    (lambda (type)
1730      (rec asm-load-internal
1731        (lambda (code* dest base index offset)
1732          (let ([n (nanopass-case (L16 Triv) offset
1733                     [(immediate ,imm) imm]
1734                     [else (sorry! who "unexpected non-immediate offset ~s" offset)])])
1735            ;; Assuming that `n` is either aligned and in range (fits
1736            ;; unsigned in 12 bits after shifting by type bits) or unaligned
1737            ;; and small (fits in 9 bits)
1738            (Trivit (dest base)
1739              (cond
1740                [(eq? index %zero)
1741                 (cond
1742                   [(signed9? n)
1743                    (case type
1744                      [(integer-64 unsigned-64) (emit lduri dest base n code*)]
1745                      [(integer-32) (emit ldurswi dest base n code*)]
1746                      [(unsigned-32) (emit ldurwi dest base n code*)]
1747                      [(integer-16) (emit ldurshi dest base n code*)]
1748                      [(unsigned-16) (emit ldurhi dest base n code*)]
1749                      [(integer-8) (emit ldursbi dest base n code*)]
1750                      [(unsigned-8) (emit ldurbi dest base n code*)]
1751                      [else (sorry! who "unexpected mref type ~s" type)])]
1752                   [else
1753                    (case type
1754                      [(integer-64 unsigned-64) (emit ldri dest base n code*)]
1755                      [(integer-32) (emit ldrswi dest base n code*)]
1756                      [(unsigned-32) (emit ldrwi dest base n code*)]
1757                      [(integer-16) (emit ldrshi dest base n code*)]
1758                      [(unsigned-16) (emit ldrhi dest base n code*)]
1759                      [(integer-8) (emit ldrsbi dest base n code*)]
1760                      [(unsigned-8) (emit ldrbi dest base n code*)]
1761                      [else (sorry! who "unexpected mref type ~s" type)])])]
1762                [(eqv? n 0)
1763                  (Trivit (index)
1764                    (case type
1765                      [(integer-64 unsigned-64) (emit ldr dest base index code*)]
1766                      [(integer-32) (emit ldrsw dest base index code*)]
1767                      [(unsigned-32) (emit ldrw dest base index code*)]
1768                      [(integer-16) (emit ldrsh dest base index code*)]
1769                      [(unsigned-16) (emit ldrh dest base index code*)]
1770                      [(integer-8) (emit ldrsb dest base index code*)]
1771                      [(unsigned-8) (emit ldrb dest base index code*)]
1772                      [else (sorry! who "unexpected mref type ~s" type)]))]
1773                [else (sorry! who "expected %zero index or 0 offset, got ~s and ~s" index offset)])))))))
1774
1775  (define-who asm-store
1776    (lambda (type)
1777      (rec asm-store-internal
1778        (lambda (code* base index offset src)
1779          (let ([n (nanopass-case (L16 Triv) offset
1780                     [(immediate ,imm) imm]
1781                     [else (sorry! who "unexpected non-immediate offset ~s" offset)])])
1782            ;; Assuming that `n` is aligned and in range (fits
1783            ;; unsigned in 12 bits after shifting by type bits)
1784            (Trivit (src base)
1785              (cond
1786                [(eq? index %zero)
1787                 (cond
1788                   [(signed9? n)
1789                    (case type
1790                      [(integer-64 unsigned-64) (emit sturi src base n code*)]
1791                      [(integer-32 unsigned-32) (emit sturwi src base n code*)]
1792                      [(integer-16 unsigned-16) (emit sturhi src base n code*)]
1793                      [(integer-8 unsigned-8) (emit sturbi src base n code*)]
1794                      [else (sorry! who "unexpected mref type ~s" type)])]
1795                   [else
1796                    (case type
1797                      [(integer-64 unsigned-64) (emit stri src base n code*)]
1798                      [(integer-32 unsigned-32) (emit strwi src base n code*)]
1799                      [(integer-16 unsigned-16) (emit strhi src base n code*)]
1800                      [(integer-8 unsigned-8) (emit strbi src base n code*)]
1801                      [else (sorry! who "unexpected mref type ~s" type)])])]
1802                [(eqv? n 0)
1803                  (Trivit (index)
1804                    (case type
1805                      [(integer-64 unsigned-64) (emit str src base index code*)]
1806                      [(integer-32 unsigned-32) (emit strw src base index code*)]
1807                      [(integer-16 unsigned-16) (emit strh src base index code*)]
1808                      [(integer-8 unsigned-8) (emit strb src base index code*)]
1809                      [else (sorry! who "unexpected mref type ~s" type)]))]
1810                [else (sorry! who "expected %zero index or 0 offset, got ~s and ~s" index offset)])))))))
1811
1812  (define-who asm-fpop-2
1813    (lambda (op)
1814      (lambda (code* dest src1 src2)
1815        (Trivit (dest src1 src2)
1816          (case op
1817            [(fp+) (emit fadd dest src1 src2 code*)]
1818            [(fp-) (emit fsub dest src1 src2 code*)]
1819            [(fp*) (emit fmul dest src1 src2 code*)]
1820            [(fp/) (emit fdiv dest src1 src2 code*)]
1821            [else (sorry! who "unrecognized op ~s" op)])))))
1822
1823  (define asm-fpsqrt
1824    (lambda (code* dest src)
1825      (Trivit (dest src)
1826        (emit fsqrt dest src code*))))
1827
1828  (define-who asm-fpsingle
1829    (lambda (code* dest src)
1830      (Trivit (dest src)
1831        (emit fcvt.d->s dest src
1832          (emit fcvt.s->d dest dest
1833            code*)))))
1834
1835  (define asm-fptrunc
1836    (lambda (code* dest src)
1837      (Trivit (dest src)
1838        (emit fcvtzs dest src code*))))
1839
1840  (define asm-fpt
1841    (lambda (code* dest src)
1842      (Trivit (dest src)
1843        (emit scvtf dest src code*))))
1844
1845  (define-who asm-fpmove
1846    ;; fpmove pseudo instruction is used by set! case in
1847    ;; select-instructions! and generate-code; at most one of src or
1848    ;; dest can be an mref, and then the offset is double-aligned
1849    (lambda (code* dest src)
1850      (gen-fpmove who code* dest src #t)))
1851
1852  (define-who asm-fpmove-single
1853    (lambda (code* dest src)
1854      (gen-fpmove who code* dest src #f)))
1855
1856  (define gen-fpmove
1857    (lambda (who code* dest src double?)
1858      (Trivit (dest src)
1859        (record-case dest
1860          [(disp) (imm reg)
1861           (if double?
1862               (cond
1863                 [(aligned-offset? imm)
1864                  (emit strfi src (cons 'reg reg) imm code*)]
1865                 [else
1866                  (safe-assert (signed9? imm))
1867                  (emit sturfi src (cons 'reg reg) imm code*)])
1868               (cond
1869                 [(aligned-offset? imm 2)
1870                  (emit strfsi src (cons 'reg reg) imm code*)]
1871                 [else
1872                  (safe-assert (signed9? imm))
1873                  (emit sturfsi src (cons 'reg reg) imm code*)]))]
1874          [(index) (n ireg breg)
1875           (cond
1876             [(fx= n 0)
1877              (if double?
1878                  (emit strf src (cons 'reg ireg) (cons 'reg breg) code*)
1879                  (emit strfs src (cons 'reg ireg) (cons 'reg breg) code*))]
1880             [else
1881              (sorry! who "cannot handle indexed fp dest ref")])]
1882          [else
1883           (record-case src
1884             [(disp) (imm reg)
1885              (if double?
1886                  (cond
1887                    [(aligned-offset? imm)
1888                     (emit ldrfi dest (cons 'reg reg) imm code*)]
1889                    [else
1890                     (safe-assert (signed9? imm))
1891                     (emit ldurfi dest (cons 'reg reg) imm code*)])
1892                  (cond
1893                    [(aligned-offset? imm 2)
1894                     (emit ldrfsi dest (cons 'reg reg) imm code*)]
1895                    [else
1896                     (safe-assert (signed9? imm))
1897                     (emit ldurfsi dest (cons 'reg reg) imm code*)]))]
1898             [(index) (n ireg breg)
1899              (cond
1900                [(fx= n 0)
1901                 (if double?
1902                     (emit ldrf dest (cons 'reg ireg) (cons 'reg breg) code*)
1903                     (emit ldrfs dest (cons 'reg ireg) (cons 'reg breg) code*))]
1904                [else
1905                 (sorry! who "cannot handle indexed fp src ref")])]
1906             [else (emit fmov dest src code*)])]))))
1907
1908  (define asm-fpcastto
1909    (lambda (code* dest src)
1910      (Trivit (dest src)
1911        (emit fmov.f->g dest src code*))))
1912
1913  (define asm-fpcastfrom
1914    (lambda (code* dest src)
1915      (Trivit (dest src)
1916        (emit fmov.g->f dest src code*))))
1917
1918  (define-who asm-swap
1919    (lambda (type)
1920      (rec asm-swap-internal
1921        (lambda (code* dest src)
1922          (Trivit (dest src)
1923            (case type
1924              [(integer-16) (emit rev16 dest src
1925                              (emit sxth dest dest code*))]
1926              [(unsigned-16) (emit rev16 dest src
1927                               (emit uxth dest dest code*))]
1928              [(integer-32) (emit rev32 dest src
1929                              (emit sxtw dest dest code*))]
1930              [(unsigned-32) (emit rev32 dest src
1931                               (emit movw dest dest code*))]
1932              [(integer-64 unsigned-64) (emit rev dest src code*)]
1933              [else (sorry! who "unexpected asm-swap type argument ~s" type)]))))))
1934
1935  (define asm-lock
1936    ;  tmp = 1 # in case load result is not 0
1937    ;  tmp2 = ldxr src
1938    ;  cmp tmp2, 0
1939    ;  bne L1
1940    ;  tmp2 = 1
1941    ;  tmp = stxr tmp2, src
1942    ;L1:
1943    (lambda (code* src tmp tmp2)
1944      (Trivit (src tmp tmp2)
1945        (emit movzi tmp 1 0
1946          (emit ldxr tmp2 src
1947            (emit cmpi tmp2 0
1948              (emit bnei 12
1949                (emit movzi tmp2 1 0
1950                  (emit stxr tmp tmp2 src code*)))))))))
1951
1952  (define-who asm-lock+/-
1953    ; L:
1954    ;   tmp1 = ldxr src
1955    ;   tmp1 = tmp1 +/- 1
1956    ;   tmp2 = stxr tmp1, src
1957    ;   cmp tmp2, 0
1958    ;   bne L
1959    ;   cmp tmp1, 0
1960    (lambda (op)
1961      (lambda (code* src tmp1 tmp2)
1962        (Trivit (src tmp1 tmp2)
1963          (emit ldxr tmp1 src
1964            (let ([code* (emit stxr tmp2 tmp1 src
1965                           (emit cmpi tmp2 0
1966                             (emit bnei -16
1967                               (emit cmpi tmp1 0 code*))))])
1968              (case op
1969                [(locked-incr!) (emit addi #f tmp1 tmp1 1 code*)]
1970                [(locked-decr!) (emit subi #f tmp1 tmp1 1 code*)]
1971                [else (sorry! who "unexpected op ~s" op)])))))))
1972
1973  (define-who asm-cas
1974    ;   tmp = ldxr src
1975    ;   cmp tmp, old
1976    ;   bne L
1977    ;   tmp2 = stxr new, src
1978    ;   cmp tmp2, 0
1979    ; L:
1980    (lambda (code* src old new tmp1 tmp2)
1981      (Trivit (src old new tmp1 tmp2)
1982        (emit ldxr tmp1 src
1983          (emit cmp tmp1 old
1984            (emit bnei 12
1985              (emit stxr tmp2 new src
1986                (emit cmpi tmp2 0
1987                   code*))))))))
1988
1989  ;; Based in part on https://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
1990  (define-who asm-fence
1991    (lambda (kind)
1992      (lambda (code*)
1993        (case kind
1994          [(store-store) (emit dmbishst code*)]
1995          [(acquire) (emit dmbishld code*)]
1996          [(release) (emit dmbish code*)]
1997          [else (sorry! who "unexpected kind ~s" kind)]))))
1998
1999  (define asm-fp-relop
2000    (lambda (info)
2001      (lambda (l1 l2 offset x y)
2002        (Trivit (x y)
2003          (values
2004           (emit fcmp x y '())
2005           (asm-conditional-jump info l1 l2 offset))))))
2006
2007  (define-who asm-relop
2008    (lambda (info negated-imm?)
2009      (rec asm-relop-internal
2010        (lambda (l1 l2 offset x y)
2011          (Trivit (x y)
2012            (unless (ax-reg? x) (sorry! who "unexpected first operand ~s" x))
2013            (values
2014              (record-case y
2015                [(imm) (n) (if negated-imm?
2016                               (emit cmni x n '())
2017                               (emit cmpi x n '()))]
2018                [(reg) ignore (safe-assert (not negated-imm?)) (emit cmp x y '())]
2019                [else (sorry! who "unexpected second operand ~s" y)])
2020              (asm-conditional-jump info l1 l2 offset)))))))
2021
2022  (define asm-condition-code
2023    (lambda (info)
2024      (rec asm-check-flag-internal
2025        (lambda (l1 l2 offset)
2026          (values '() (asm-conditional-jump info l1 l2 offset))))))
2027
2028  (define asm-pop-multiple
2029    (lambda (regs)
2030      (lambda (code*)
2031        (asm-multiple regs #t code*
2032                      (lambda (sp reg code*)
2033                        (emit ldr/postidx reg sp 16 code*))
2034                      (lambda (sp reg1 reg2 code*)
2035                        (emit ldrp/postidx reg1 reg2 sp 16 code*))))))
2036
2037  (define asm-push-multiple
2038    (lambda (regs)
2039      (lambda (code*)
2040        (asm-multiple regs #f code*
2041                      (lambda (sp reg code*)
2042                        (emit str/preidx reg sp -16 code*))
2043                      (lambda (sp reg1 reg2 code*)
2044                        (emit strp/preidx reg1 reg2 sp -16 code*))))))
2045
2046  (define asm-pop-fpmultiple
2047    (lambda (regs)
2048      (lambda (code*)
2049        (asm-multiple regs #t code*
2050                      (lambda (sp reg code*)
2051                        (emit ldrf/postidx reg sp 16 code*))
2052                      (lambda (sp reg1 reg2 code*)
2053                        (emit ldrpf/postidx reg1 reg2 sp 16 code*))))))
2054
2055  (define asm-push-fpmultiple
2056    (lambda (regs)
2057      (lambda (code*)
2058        (asm-multiple regs #f code*
2059                      (lambda (sp reg code*)
2060                        (emit strf/preidx reg sp -16 code*))
2061                      (lambda (sp reg1 reg2 code*)
2062                        (emit strpf/preidx reg1 reg2 sp -16 code*))))))
2063
2064  (define (asm-multiple regs rev? code* one two)
2065    (let ([sp `(reg . ,%sp)])
2066      (let loop ([regs regs] [code* code*])
2067        (cond
2068          [(null? regs) code*]
2069          [(null? (cdr regs))
2070           (one sp (cons 'reg (car regs)) code*)]
2071          [rev?
2072           (two sp (cons 'reg (car regs)) (cons 'reg (cadr regs)) (loop (cddr regs) code*))]
2073          [else
2074           (loop (cddr regs) (two sp (cons 'reg (car regs)) (cons 'reg (cadr regs)) code*))]))))
2075
2076  (define asm-debug
2077    (lambda (code*)
2078      (emit und code*)))
2079
2080  (define asm-read-counter
2081    (lambda (op0 op1 crn crm op2)
2082      (lambda (code* dest)
2083        (Trivit (dest)
2084          (emit mrs op0 op1 crn crm op2 dest code*)))))
2085
2086  (define asm-library-jump
2087    (lambda (l)
2088      (asm-helper-jump '()
2089        `(arm64-jump ,(constant code-data-disp) (library-code ,(libspec-label-libspec l))))))
2090
2091  (define asm-library-call
2092    (lambda (libspec save-ra?)
2093      (let ([target `(arm64-call ,(constant code-data-disp) (library-code ,libspec))])
2094        (rec asm-asm-call-internal
2095          (lambda (code* dest . ignore) ; ignore arguments, which must be in fixed locations
2096            (asm-helper-call code* target save-ra?))))))
2097
2098  (define asm-library-call!
2099    (lambda (libspec save-ra?)
2100      (let ([target `(arm64-call ,(constant code-data-disp) (library-code ,libspec))])
2101        (rec asm-asm-call-internal
2102          (lambda (code* . ignore) ; ignore arguments, which must be in fixed locations
2103            (asm-helper-call code* target save-ra?))))))
2104
2105  (define asm-c-simple-call
2106    (lambda (entry save-ra?)
2107      (let ([target `(arm64-call 0 (entry ,entry))])
2108        (rec asm-c-simple-call-internal
2109          (lambda (code* . ignore)
2110            (asm-helper-call code* target save-ra?))))))
2111
2112  (define-who asm-indirect-call
2113    (lambda (code* dest lr . ignore)
2114      (safe-assert (eq? lr %lr))
2115      (Trivit (dest)
2116        (unless (ax-reg? dest) (sorry! who "unexpected dest ~s" dest))
2117        (emit blr dest code*))))
2118
2119  (define asm-direct-jump
2120    (lambda (l offset)
2121      (let ([offset (adjust-return-point-offset offset l)])
2122        (asm-helper-jump '() (make-funcrel 'arm64-jump l offset)))))
2123
2124  (define asm-literal-jump
2125    (lambda (info)
2126      (asm-helper-jump '()
2127        `(arm64-jump ,(info-literal-offset info) (,(info-literal-type info) ,(info-literal-addr info))))))
2128
2129  (define-who asm-indirect-jump
2130    (lambda (src)
2131      (Trivit (src)
2132        (record-case src
2133          [(reg) ignore (emit br src '())]
2134          [(disp) (n breg)
2135           (cond
2136             [(signed9? n)
2137              (emit lduri `(reg . ,%jmptmp) `(reg . ,breg) n
2138                 (emit br `(reg . ,%jmptmp) '()))]
2139             [(aligned-offset? n)
2140              (emit ldri `(reg . ,%jmptmp) `(reg . ,breg) n
2141                 (emit br `(reg . ,%jmptmp) '()))]
2142             [else
2143              (safe-assert (or (unsigned12? n) (unsigned12? (- n))))
2144              (let ([code* (emit ldri `(reg . ,%jmptmp) `(reg . ,%jmptmp) 0
2145                             (emit br `(reg . ,%jmptmp) '()))])
2146                (if (unsigned12? n)
2147                    (emit addi #f `(reg . ,%jmptmp) `(reg . ,breg) n code*)
2148                    (emit subi #f `(reg . ,%jmptmp) `(reg . ,breg) (- n) code*)))])]
2149          [(index) (n ireg breg)
2150           (safe-assert (eqv? n 0))
2151           (emit ldr `(reg . ,%jmptmp) `(reg . ,breg) `(reg . ,ireg)
2152             (emit br `(reg . ,%jmptmp) '()))]
2153          [else (sorry! who "unexpected src ~s" src)]))))
2154
2155  (define asm-logtest
2156    (lambda (i? info)
2157      (lambda (l1 l2 offset x y)
2158        (Trivit (x y)
2159          (values
2160            (record-case y
2161              [(imm) (n) (emit tsti x n '())]
2162              [else (emit tst x y '())])
2163            (let-values ([(l1 l2) (if i? (values l2 l1) (values l1 l2))])
2164              (asm-conditional-jump info l2 l1 offset)))))))
2165
2166  (define asm-get-tc
2167    (let ([target `(arm64-call 0 (entry ,(lookup-c-entry get-thread-context)))])
2168      (lambda (code* dest . ignore) ; dest is ignored, since it is always Cretval
2169        (asm-helper-call code* target #f))))
2170
2171  (define asm-activate-thread
2172    (let ([target `(arm64-call 0 (entry ,(lookup-c-entry activate-thread)))])
2173      (lambda (code* dest . ignore)
2174        (asm-helper-call code* target #t))))
2175
2176  (define asm-deactivate-thread
2177    (let ([target `(arm64-call 0 (entry ,(lookup-c-entry deactivate-thread)))])
2178      (lambda (code* . ignore)
2179        (asm-helper-call code* target #f))))
2180
2181  (define asm-unactivate-thread
2182    (let ([target `(arm64-call 0 (entry ,(lookup-c-entry unactivate-thread)))])
2183      (lambda (code* arg-reg . ignore)
2184        (asm-helper-call code* target #f))))
2185
2186  (define-who asm-return-address
2187    (lambda (dest l incr-offset next-addr)
2188      (make-rachunk dest l incr-offset next-addr
2189        (or (cond
2190              [(local-label-offset l) =>
2191               (lambda (offset)
2192                 (let ([incr-offset (adjust-return-point-offset incr-offset l)])
2193                   (let ([disp (fx+ (fx- next-addr (fx- offset incr-offset)) 4)])
2194                     (cond
2195                       [($fxu< disp (expt 2 21))
2196                        (Trivit (dest)
2197                          (emit adr dest disp '()))]
2198                      [else #f]))))]
2199              [else #f])
2200            (asm-move '() dest (with-output-language (L16 Triv) `(label-ref ,l ,incr-offset)))))))
2201
2202  (define-who asm-jump
2203    (lambda (l next-addr)
2204      (make-gchunk l next-addr
2205        (cond
2206          [(local-label-offset l) =>
2207           (lambda (offset)
2208             (let ([disp (fx- next-addr offset)])
2209               (cond
2210                 [(eqv? disp 0) '()]
2211                 [(uncond-branch-disp? disp) (emit b `(label ,disp ,l) '())]
2212                 [else (sorry! who "no support for code objects > 256MB in length")])))]
2213          [else
2214            ;; label must be somewhere above.  generate something so that a hard loop
2215            ;; doesn't get dropped.  this also has some chance of being the right size
2216            ;; for the final branch instruction.
2217            (emit b `(label 0 ,l) '())]))))
2218
2219  (define-who asm-conditional-jump
2220    (lambda (info l1 l2 next-addr)
2221      (define get-disp-opnd
2222        (lambda (next-addr l)
2223          (if (local-label? l)
2224              (cond
2225                [(local-label-offset l) =>
2226                 (lambda (offset)
2227                   (let ([disp (fx- next-addr offset)])
2228                     (values disp `(label ,disp ,l))))]
2229                [else (values 0 `(label 0 ,l))])
2230              (sorry! who "unexpected label ~s" l))))
2231      (let ([type (info-condition-code-type info)]
2232            [reversed? (info-condition-code-reversed? info)])
2233        (make-cgchunk info l1 l2 next-addr
2234          (let ()
2235            (define-syntax pred-case
2236              (lambda (x)
2237                (define b-asm-size 4)
2238                (define build-bop-seq
2239                  (lambda (bop opnd1 opnd2 l2 body)
2240                    #`(let ([code* (emit #,bop #,opnd1 code*)])
2241			(safe-assert (= (asm-size* code*) #,b-asm-size))
2242                        (let-values ([(ignore #,opnd2) (get-disp-opnd (fx+ next-addr #,b-asm-size) #,l2)])
2243                          #,body))))
2244                (define ops->code
2245                  (lambda (bop opnd)
2246                    #`(emit #,bop #,opnd code*)))
2247                (define handle-reverse
2248                  (lambda (e opnd l)
2249                    (syntax-case e (r?)
2250                      [(r? c1 c2) #`(if reversed? #,(ops->code #'c1 opnd) #,(ops->code #'c2 opnd))]
2251                      [_ (ops->code e opnd)])))
2252                (define handle-inverse
2253                  (lambda (e)
2254                    (syntax-case e (i?)
2255                      [(i? c1 c2)
2256                       #`(cond
2257                           [(and (fx= disp1 0)
2258                                 (branch-disp? (fx+ disp2 #,b-asm-size)))
2259                            #,(handle-reverse #'c1 #'opnd2 #'l2)]
2260                           [(and (fx= disp2 0)
2261                                 (branch-disp? (fx+ disp1 #,b-asm-size)))
2262                            #,(handle-reverse #'c2 #'opnd1 #'l1)]
2263                           [(branch-disp? (fx+ disp1 (fx* 2 #,b-asm-size)))
2264                            #,(build-bop-seq #'b #'opnd2 #'opnd1 #'l1
2265                                (handle-reverse #'c2 #'opnd1 #'l1))]
2266                           [(branch-disp? (fx+ disp2 (fx* 2 #,b-asm-size)))
2267                            #,(build-bop-seq #'b #'opnd1 #'opnd2 #'l2
2268                                (handle-reverse #'c1 #'opnd2 #'l2))]
2269                           [else
2270                            (let ([code* #,(build-bop-seq #'b #'opnd1 #'opnd2 #'l2
2271					      #'(emit b opnd2 code*))])
2272			      #,(handle-reverse #'c2 #``(imm #,b-asm-size) #'step))])]
2273                      [_ ($oops 'handle-inverse "expected an inverse in ~s" e)])))
2274                (syntax-case x ()
2275                  [(_ [(pred ...) cl-body] ...)
2276                   (with-syntax ([(cl-body ...) (map handle-inverse #'(cl-body ...))])
2277                     #'(let ([code* '()])
2278                         (let-values ([(disp1 opnd1) (get-disp-opnd next-addr l1)]
2279                                      [(disp2 opnd2) (get-disp-opnd next-addr l2)])
2280                           (case type
2281                             [(pred ...) cl-body] ...
2282                             [else (sorry! who "~s branch type is currently unsupported" type)]))))])))
2283            (pred-case
2284              [(eq?) (i? bne beq)]
2285              [(u<) (i? (r? bls bcs) (r? bhi bcc))]
2286              [(<) (i? (r? ble bge) (r? bgt blt))]
2287              [(<=) (i? (r? blt bgt) (r? bge ble))]
2288              [(>) (i? (r? bge ble) (r? blt bgt))]
2289              [(>=) (i? (r? bgt blt) (r? ble bge))]
2290              [(overflow) (i? bvc bvs)]
2291              [(positive) (i? ble bgt)]
2292              [(multiply-overflow) (i? beq bne)] ; result of comparing sign bit of low word with all bits in high word: eq if no overflow, ne if oveflow
2293              [(carry) (i? bcc bcs)]
2294              [(fp<) (i? (r? ble bcs) (r? bgt bcc))]
2295              [(fp<=) (i? (r? blt bhi) (r? bge bls))]
2296              [(fp=) (i? bne beq)]))))))
2297
2298  (define asm-helper-jump
2299    (lambda (code* reloc)
2300      (let ([jmp-tmp (cons 'reg %jmptmp)])
2301        (ax-mov64 jmp-tmp 0
2302          (emit br jmp-tmp
2303            (asm-helper-relocation code* reloc))))))
2304
2305  (define asm-kill
2306    (lambda (code* dest)
2307      code*))
2308
2309  (define ax-save/restore
2310    ;; push/pop while maintaining 16-byte alignment
2311    (lambda (code* reg-ea p)
2312      (let ([sp (cons 'reg %sp)])
2313        (emit str/preidx reg-ea sp -16
2314          (p (emit ldr/postidx reg-ea sp 16 code*))))))
2315
2316  (define asm-helper-call
2317    (lambda (code* reloc save-ra?)
2318      ;; NB: kills %lr
2319      (let ([jmp-tmp (cons 'reg %jmptmp)])
2320        (define maybe-save-ra
2321          (lambda (code* p)
2322            (if save-ra?
2323                (ax-save/restore code* (cons 'reg %lr) p)
2324                (p code*))))
2325        (maybe-save-ra code*
2326          (lambda (code*)
2327            (ax-mov64 jmp-tmp 0
2328              (emit blr jmp-tmp
2329                (asm-helper-relocation code* reloc))))))))
2330
2331  (define asm-helper-relocation
2332    (lambda (code* reloc)
2333      (cons* reloc (aop-cons* `(asm "relocation:" ,reloc) code*))))
2334
2335  ; NB: reads from %lr...should be okay if declare-intrinsics sets up return-live* properly
2336  (define asm-return (lambda () (emit ret (cons 'reg %lr) '())))
2337
2338  (define asm-c-return (lambda (info) (emit ret (cons 'reg %lr) '())))
2339
2340  (define-who asm-shiftop
2341    (lambda (op)
2342      (lambda (code* dest src0 src1)
2343        (Trivit (dest src0 src1)
2344          (record-case src1
2345            [(imm) (n)
2346             ;; When `n` fits in a fixnum, the compiler may generate
2347             ;; a bad shift that is under a guard, so force it to 63 bits
2348             (let ([n (fxand n 63)])
2349	       (cond
2350		[(fx= n 0)
2351		 ;; shift by 0 is just a move
2352		 (emit mov dest src0 code*)]
2353		[else
2354		 (case op
2355		   [(sll) (emit lsli dest src0 n code*)]
2356		   [(srl) (emit lsri dest src0 n code*)]
2357		   [(sra) (emit asri dest src0 n code*)]
2358		   [else (sorry! 'shiftop "unrecognized ~s" op)])]))]
2359            [else
2360             (case op
2361               [(sll) (emit lsl dest src0 src1 code*)]
2362               [(srl) (emit lsr dest src0 src1 code*)]
2363               [(sra) (emit asr dest src0 src1 code*)]
2364               [else (sorry! 'shiftop "unrecognized ~s" op)])])))))
2365
2366  (define asm-lognot
2367    (lambda (code* dest src)
2368      (Trivit (dest src)
2369        (emit mvn dest src code*))))
2370
2371  (define asm-popcount
2372    (lambda (code* dest src tmp)
2373      (Trivit (dest src tmp)
2374        (emit fmov.g->f tmp src
2375          (emit cnt tmp tmp
2376            (emit addv.b tmp tmp
2377              (emit fmov.f->g dest tmp code*)))))))
2378
2379  (define asm-enter values)
2380
2381  (define-who asm-inc-cc-counter
2382    (lambda (code* addr val tmp)
2383      (Trivit (addr val tmp)
2384        (define do-ldr
2385          (lambda (offset k code*)
2386            (emit ldri tmp addr offset (k (emit stri tmp addr offset code*)))))
2387        (define do-add/cc
2388          (lambda (code*)
2389            (record-case val
2390              [(imm) (n) (emit addi #t tmp tmp n code*)]
2391              [else (emit add #t tmp tmp val code*)])))
2392        (do-ldr 0
2393          do-add/cc
2394          (emit bnei 16
2395            (do-ldr 8
2396              (lambda (code*)
2397                (emit addi #f tmp tmp 1 code*))
2398              code*))))))
2399
2400  (module (asm-foreign-call asm-foreign-callable)
2401    (define align (lambda (b x) (let ([k (- b 1)]) (fxlogand (fx+ x k) (fxlognot k)))))
2402    (define (double-member? m) (and (eq? (car m) 'float)
2403				    (fx= (cadr m) 8)))
2404    (define (float-member? m) (and (eq? (car m) 'float)
2405				   (fx= (cadr m) 4)))
2406    (define (indirect-result-that-fits-in-registers? result-type)
2407      (nanopass-case (Ltype Type) result-type
2408        [(fp-ftd& ,ftd)
2409	 (let* ([members ($ftd->members ftd)]
2410		[num-members (length members)])
2411	   (or (fx<= ($ftd-size ftd) 4)
2412	       (and (fx= num-members 1)
2413		    ;; a struct containing only int64 is not returned in a register
2414		    (or (not ($ftd-compound? ftd))))
2415	       (and (fx<= num-members 4)
2416		    (or (andmap double-member? members)
2417			(andmap float-member? members)))))]
2418	[else #f]))
2419    (define int-argument-regs (list %Carg1 %Carg2 %Carg3 %Carg4
2420                                    %Carg5 %Carg6 %Carg7 %Carg8))
2421    (define fp-argument-regs (list %Cfparg1 %Cfparg2 %Cfparg3 %Cfparg4
2422                                   %Cfparg5 %Cfparg6 %Cfparg7 %Cfparg8))
2423    (define save-and-restore
2424      (lambda (regs e)
2425        (safe-assert (andmap reg? regs))
2426	(with-output-language (L13 Effect)
2427          (let ([save-and-restore-gp
2428		 (lambda (regs e)
2429                   (let* ([regs (filter (lambda (r) (not (eq? (reg-type r) 'fp))) regs)])
2430                     (cond
2431                      [(null? regs) e]
2432                      [else
2433                       (%seq
2434			(inline ,(make-info-kill*-live* '() regs) ,%push-multiple)
2435			,e
2436			(inline ,(make-info-kill*-live* regs '()) ,%pop-multiple))])))]
2437		[save-and-restore-fp
2438		 (lambda (regs e)
2439                   (let ([fp-regs (filter (lambda (r) (eq? (reg-type r) 'fp)) regs)])
2440                     (cond
2441                      [(null? fp-regs) e]
2442                      [else
2443                       (%seq
2444			(inline ,(make-info-kill*-live* '() fp-regs) ,%push-fpmultiple)
2445			,e
2446			(inline ,(make-info-kill*-live* fp-regs '()) ,%pop-fpmultiple))])))])
2447            (save-and-restore-gp regs (save-and-restore-fp regs e))))))
2448
2449    (define (extract-varargs-after-conv conv*)
2450      (ormap (lambda (conv)
2451               (and (pair? conv) (eq? (car conv) 'varargs) (cdr conv)))
2452             conv*))
2453
2454    (define-record-type cat
2455      (nongenerative #{cat jqrttgvpydsbdo0l736l43udu-1})
2456      (sealed #t)
2457      (fields place            ; 'int, 'fp, or 'stack
2458              regs             ; list of registers
2459              size             ; size in bytes
2460              pad              ; extra trailing size (for 'stack place) in bytes
2461              indirect-bytes)) ; #f or extra bytes on stack for indirect
2462
2463    (define alignment-via-lookahead
2464      (lambda (size types stack-align varargs-after k)
2465        (constant-case machine-type-name
2466          [(arm64osx tarm64osx)
2467           (cond
2468             [(eqv? 0 varargs-after) (k (align 8 size) 0 0)]
2469             [else
2470              ;; On Mac OS, a non-varargs stack argument does not have to use a
2471              ;; multiple of 8, but we need to work out any padding that
2472              ;; is needed to get alignment right for the next argument
2473              ;; (and to end on 8-byte alignment). Currently, we're
2474              ;; assuming max aignment of 8.
2475              (let ([end-this-align (fxand #x7 (fx+ stack-align size))]
2476                    [next-align (cond
2477                                  [(null? types) 8]
2478                                  [else (nanopass-case (Ltype Type) (car types)
2479                                          [(fp-double-float) 8]
2480                                          [(fp-single-float) 4]
2481                                          [(fp-ftd& ,ftd) (if (> ($ftd-size ftd) 16)
2482                                                              8
2483                                                              ($ftd-alignment ftd))]
2484                                          [(fp-integer ,bits) (fxquotient bits 8)]
2485                                          [(fp-unsigned ,bits) (fxquotient bits 8)]
2486                                          [else 8])])])
2487                (cond
2488                  [(fx= 0 (fxand end-this-align (fx- next-align 1)))
2489                   (k size 0 end-this-align)]
2490                  [else
2491                   (k size (- next-align end-this-align) next-align)]))])]
2492          [else
2493           (k (align 8 size) 0 0)])))
2494
2495    (define rest-of
2496      (lambda (regs n next-varargs-after)
2497        (constant-case machine-type-name
2498          [(arm64osx tarm64osx)
2499           (cond
2500             [(eqv? next-varargs-after 0)
2501              ;; All the rest go on the stack
2502              '()]
2503             [else
2504              (list-tail regs n)])]
2505          [else
2506           (list-tail regs n)])))
2507
2508    (define categorize-arguments
2509      (lambda (types varargs-after)
2510        (let loop ([types types] [int* int-argument-regs] [fp* fp-argument-regs]
2511                   [varargs-after varargs-after]
2512                   ;; accumulate alignment from previous args so we can compute any
2513                   ;; needed padding and alignment after this next argument
2514                   [stack-align 0])
2515          (let ([next-varargs-after (and varargs-after (if (fx> varargs-after 0) (fx- varargs-after 1) 0))])
2516            (if (null? types)
2517                '()
2518                (nanopass-case (Ltype Type) (car types)
2519                  [(fp-double-float)
2520                   (cond
2521                     [(null? fp*)
2522                      (cons (make-cat 'stack '() 8 0 #f) (loop (cdr types) int* '() next-varargs-after 0))]
2523                     [else
2524                      (cons (make-cat 'fp (list (car fp*)) 8 0 #f)
2525                            (loop (cdr types) (rest-of int* 0 next-varargs-after) (rest-of fp* 1 next-varargs-after)
2526                                  next-varargs-after
2527                                  stack-align))])]
2528                  [(fp-single-float)
2529                   (cond
2530                     [(null? fp*)
2531                      (alignment-via-lookahead
2532                       4 (cdr types) stack-align varargs-after
2533                       (lambda (bytes pad stack-align)
2534                         (cons (make-cat 'stack '() bytes pad #f) (loop (cdr types) int* '() next-varargs-after stack-align))))]
2535                     [else
2536                      (cons (make-cat 'fp (list (car fp*)) 8 0 #f)
2537                            (loop (cdr types) (rest-of int* 0 next-varargs-after)(rest-of fp* 1 next-varargs-after)
2538                                  next-varargs-after
2539                                  stack-align))])]
2540                  [(fp-ftd& ,ftd)
2541                   (let* ([size ($ftd-size ftd)]
2542                          [members ($ftd->members ftd)]
2543                          [num-members (length members)]
2544                          [doubles? (and (fx= 8 ($ftd-alignment ftd))
2545                                         (fx<= num-members 4)
2546                                         (andmap double-member? members))]
2547                          [floats? (and (fx= 4 ($ftd-alignment ftd))
2548                                        (fx<= num-members 4)
2549                                        (andmap float-member? members))])
2550                     (cond
2551                       [doubles?
2552                        ;; Sequence of up to 4 doubles that may fit in registers
2553                        (cond
2554                          [(fx>= (length fp*) num-members)
2555                           ;; Allocate each double to a register
2556                           (cons (make-cat 'fp (list-head fp* num-members) (fx* 8 num-members) 0 #f)
2557                                 (loop (cdr types) (rest-of int* 0 next-varargs-after) (rest-of fp* num-members next-varargs-after)
2558                                       next-varargs-after
2559                                       stack-align))]
2560                          [else
2561                           ;; Stop using fp registers, put on stack
2562                           (cons (make-cat 'stack '() size 0 #f)
2563                                 (loop (cdr types) int* '() next-varargs-after 0))])]
2564                       [floats?
2565                        ;; Sequence of up to 4 floats that may fit in registers
2566                        (cond
2567                          [(fx>= (length fp*) num-members)
2568                           ;; Allocate each float to a register
2569                           (cons (make-cat 'fp (list-head fp* num-members) (fx* 8 num-members) 0 #f)
2570                                 (loop (cdr types) (rest-of int* 0 next-varargs-after) (rest-of fp* num-members next-varargs-after)
2571                                       next-varargs-after
2572                                       stack-align))]
2573                          [else
2574                           ;; Stop using fp registers, put on stack
2575                           (alignment-via-lookahead
2576                            size (cdr types) stack-align varargs-after
2577                            (lambda (size pad stack-align)
2578                              (cons (make-cat 'stack '() size pad #f)
2579                                    (loop (cdr types) int* '() next-varargs-after stack-align))))])]
2580                       [(fx> size 16)
2581                        ;; Indirect; pointer goes in a register or on the stack
2582                        (cond
2583                          [(null? int*)
2584                           ;; Pointer on the stack
2585                           (cons (make-cat 'stack '() (constant ptr-bytes) 0 (align 8 size))
2586                                 (loop (cdr types) '() fp* next-varargs-after 0))]
2587                          [else
2588                           ;; Pointer in register
2589                           (cons (make-cat 'int (list (car int*)) 8 0 (align 8 size))
2590                                 (loop (cdr types) (rest-of int* 1 next-varargs-after) (rest-of fp* 0 next-varargs-after)
2591                                       next-varargs-after
2592                                       stack-align))])]
2593                       [else
2594                        ;; Maybe put in integer registers
2595                        (let* ([regs (fxquotient (align 8 size) 8)])
2596                          (cond
2597                            [(fx<= regs (length int*))
2598                             ;; Fits in registers
2599                             (cons (make-cat 'int (list-head int* regs) (align 8 size) 0 #f)
2600                                   (loop (cdr types) (rest-of int* regs next-varargs-after) (rest-of fp* 0 next-varargs-after)
2601                                         next-varargs-after
2602                                         stack-align))]
2603                            [else
2604                             ;; Stop using int registers, put on stack
2605                             (alignment-via-lookahead
2606                              size (cdr types) stack-align varargs-after
2607                              (lambda (size pad stack-align)
2608                                (cons (make-cat 'stack '() size pad #f)
2609                                      (loop (cdr types) '() fp* next-varargs-after stack-align))))]))]))]
2610                  [else
2611                   ;; integers, scheme-object, etc.
2612                   (cond
2613                     [(null? int*)
2614                      (let ([size (nanopass-case (Ltype Type) (car types)
2615                                    [(fp-integer ,bits) (fxquotient bits 8)]
2616                                    [(fp-unsigned ,bits) (fxquotient bits 8)]
2617                                    [else 8])])
2618                        (alignment-via-lookahead
2619                         size (cdr types) stack-align varargs-after
2620                         (lambda (size pad stack-align)
2621                           (cons (make-cat 'stack '() size pad #f) (loop (cdr types) '() fp* next-varargs-after stack-align)))))]
2622                     [else
2623                      (cons (make-cat 'int (list (car int*)) 8 0 #f)
2624                            (loop (cdr types) (rest-of int* 1 next-varargs-after) (rest-of fp* 0 next-varargs-after)
2625                                  next-varargs-after stack-align))])]))))))
2626
2627    (define get-registers
2628      (lambda (cats kind)
2629        (let loop ([cats cats])
2630          (cond
2631            [(null? cats) '()]
2632            [(or (eq? kind 'all) (eq? kind (cat-place (car cats))))
2633             (append (cat-regs (car cats))
2634                     (loop (cdr cats)))]
2635            [else (loop (cdr cats))]))))
2636
2637    (define memory-to-reg
2638      (lambda (ireg x from-offset size unsigned?)
2639        (safe-assert (not (eq? ireg x)))
2640        (with-output-language (L13 Effect)
2641          (let loop ([ireg ireg] [from-offset from-offset] [size size] [unsigned? unsigned?])
2642            (case size
2643              [(8) `(set! ,ireg ,(%mref ,x ,from-offset))]
2644              [(7 6 5)
2645               (let ([tmp %argtmp])
2646                 (%seq
2647                  ,(loop ireg (fx+ from-offset 4) (fx- size 4) #t)
2648                  ,(loop tmp from-offset 4 #t)
2649                  (set! ,ireg ,(%inline sll ,ireg (immediate 32)))
2650                  (set! ,ireg ,(%inline + ,ireg ,tmp))))]
2651              [(3)
2652               (let ([tmp %argtmp])
2653                 (%seq
2654                  ,(loop ireg from-offset 2 #t)
2655                  ,(loop tmp (fx+ from-offset 2) 1 #t)
2656                  (set! ,tmp ,(%inline sll ,tmp (immediate 16)))
2657                  (set! ,ireg ,(%inline + ,ireg ,tmp))))]
2658              [else
2659               `(set! ,ireg ,(case size
2660                               [(1) `(inline ,(make-info-load (if unsigned? 'unsigned-8 'integer-8) #f) ,%load ,x ,%zero (immediate ,from-offset))]
2661                               [(2) `(inline ,(make-info-load (if unsigned? 'unsigned-16 'integer-16) #f) ,%load ,x ,%zero (immediate ,from-offset))]
2662                               [(4) `(inline ,(make-info-load (if unsigned? 'unsigned-32 'integer-32) #f) ,%load ,x ,%zero (immediate ,from-offset))]
2663                               [else (sorry! 'memory-to-reg "unexpected size ~s" size)]))])))))
2664    (define reg-to-memory
2665      (lambda (dest offset size from-reg)
2666        ;; can trash `from-reg`, cannot use `%argtmp`
2667        (let loop ([offset offset] [size size])
2668          (with-output-language (L13 Effect)
2669            (case size
2670              [(1) `(inline ,(make-info-load 'integer-8 #f) ,%store ,dest ,%zero (immediate ,offset) ,from-reg)]
2671              [(2) `(inline ,(make-info-load 'integer-16 #f) ,%store ,dest ,%zero (immediate ,offset) ,from-reg)]
2672              [(3) (%seq
2673                    ,(loop offset 2)
2674                    (set! ,from-reg ,(%inline srl ,from-reg (immediate 16)))
2675                    ,(loop (fx+ offset 2) 1))]
2676              [(4) `(inline ,(make-info-load 'integer-32 #f) ,%store ,dest ,%zero (immediate ,offset) ,from-reg)]
2677              [(8) `(set! ,(%mref ,dest ,offset) ,from-reg)]
2678              [(7 6 5) (%seq
2679                        ,(loop offset 4)
2680                        (set! ,from-reg ,(%inline srl ,from-reg (immediate 32)))
2681                        ,(loop (fx+ offset 4) (fx- size 4)))])))))
2682
2683    (define-who asm-foreign-call
2684      (with-output-language (L13 Effect)
2685        (letrec ([load-double-stack
2686                  (lambda (offset)
2687                    (lambda (x) ; unboxed
2688                      `(set! ,(%mref ,%sp ,%zero ,offset fp) ,x)))]
2689                 [load-single-stack
2690                  (lambda (offset)
2691                    (lambda (x) ; unboxed
2692                      (%inline store-double->single ,(%mref ,%sp ,%zero ,offset fp) ,x)))]
2693                 [load-int-stack
2694                  (lambda (offset size)
2695                    (lambda (rhs) ; requires rhs
2696                      (let ([int-type (case size
2697                                        [(1) 'unsigned-8]
2698                                        [(2) 'unsigned-16]
2699                                        [(4) 'unsigned-32]
2700                                        [else #f])])
2701                        (cond
2702                          [(not int-type) `(set! ,(%mref ,%sp ,offset) ,rhs)]
2703                          [else
2704                           (let ([tmp %argtmp])
2705                             (%seq
2706                              (set! ,tmp ,rhs)
2707                              (inline ,(make-info-load int-type #f) ,%store ,%sp ,%zero (immediate ,offset) ,tmp)))]))))]
2708                 [load-indirect-stack
2709                  ;; used both for arguments passed on stack and argument passed as a pointer to deeper on the stack
2710                  (lambda (offset from-offset size)
2711                    (lambda (x) ; requires var
2712                      (let loop ([size size] [offset offset] [from-offset from-offset])
2713                        (case size
2714                          [(8) `(set! ,(%mref ,%sp ,offset) ,(%mref ,x ,from-offset))]
2715                          [(7 6 5)
2716                           (%seq
2717                            ,(loop 4 offset from-offset)
2718                            ,(loop (fx- size 4) (fx+ offset 4) (fx+ from-offset 4)))]
2719                          [(3)
2720                           (%seq
2721                            (set! ,(%mref ,%sp ,offset) (inline ,(make-info-load 'integer-16 #f) ,%load ,x ,%zero (immediate ,from-offset)))
2722                            (set! ,(%mref ,%sp ,(fx+ offset 2)) (inline ,(make-info-load 'integer-8 #f) ,%load ,x ,%zero (immediate ,(fx+ from-offset 2)))))]
2723                          [(1 2 4)
2724                           `(set! ,(%mref ,%sp ,offset) ,(case size
2725                                                           [(1) `(inline ,(make-info-load 'integer-8 #f) ,%load ,x ,%zero (immediate ,from-offset))]
2726                                                           [(2) `(inline ,(make-info-load 'integer-16 #f) ,%load ,x ,%zero (immediate ,from-offset))]
2727                                                           [(4) `(inline ,(make-info-load 'integer-32 #f) ,%load ,x ,%zero (immediate ,from-offset))]))]
2728                          [else
2729                           (%seq
2730                            ,(loop 8 offset from-offset)
2731                            ,(loop (fx- size 8) (fx+ offset 8) (fx+ from-offset 8)))]))))]
2732                 [load-double-reg
2733                  (lambda (fpreg)
2734                    (lambda (x) ; unboxed
2735                      `(set! ,fpreg ,x)))]
2736                 [load-single-reg
2737                  (lambda (fpreg)
2738                    (lambda (x) ; unboxed
2739                      `(set! ,fpreg ,(%inline double->single ,x))))]
2740                 [load-boxed-double-reg
2741                  (lambda (fpreg fp-disp)
2742                    (lambda (x) ; address (always a var) of a flonum
2743                      `(set! ,fpreg ,(%mref ,x ,%zero ,fp-disp fp))))]
2744                 [load-boxed-single-reg
2745                  (lambda (fpreg fp-disp)
2746                    (lambda (x) ; address (always a var) of a float
2747                      `(set! ,fpreg ,(%inline load-single ,(%mref ,x ,%zero ,fp-disp fp)))))]
2748                 [load-int-reg
2749                  (lambda (ireg)
2750                    (lambda (x)
2751                      `(set! ,ireg ,x)))]
2752                 [load-int-indirect-reg
2753                  (lambda (ireg from-offset size unsigned?)
2754                    (lambda (x)
2755                      (memory-to-reg ireg x from-offset size unsigned?)))]
2756                 [compute-stack-argument-space
2757                  ;; We'll save indirect arguments on the stack, too, but they have to be beyond any
2758                  ;; arguments that the callee expects. So, calculate how much the callee shoudl expect.
2759                  (lambda (cats)
2760                    (let loop ([cats cats] [isp 0])
2761                      (if (null? cats)
2762                          isp
2763                          (let ([cat (car cats)])
2764                            (if (eq? (cat-place cat) 'stack)
2765                                (loop (cdr cats) (fx+ isp (cat-size cat) (cat-pad cat)))
2766                                (loop (cdr cats) isp))))))]
2767                 [compute-stack-indirect-space
2768                  (lambda (cats)
2769                    (let loop ([cats cats] [isp 0])
2770                      (if (null? cats)
2771                          isp
2772                          (let ([cat (car cats)])
2773                            (loop (cdr cats) (fx+ isp (or (cat-indirect-bytes cat) 0)))))))]
2774                 [do-args
2775                  (lambda (types cats indirect-start)
2776                    (let loop ([types types] [cats cats] [locs '()] [isp 0] [ind-sp indirect-start])
2777                      (if (null? types)
2778                          locs
2779                          (let ([cat (car cats)]
2780                                [type (car types)]
2781                                [cats (cdr cats)]
2782                                [types (cdr types)])
2783                            (nanopass-case (Ltype Type) type
2784                              [(fp-double-float)
2785                               (cond
2786                                 [(eq? 'fp (cat-place cat))
2787                                  (loop types cats
2788                                        (cons (load-double-reg (car (cat-regs cat))) locs)
2789                                        isp ind-sp)]
2790                                 [else
2791                                  (loop types cats
2792                                        (cons (load-double-stack isp) locs)
2793                                        (fx+ isp (cat-size cat) (cat-pad cat)) ind-sp)])]
2794                              [(fp-single-float)
2795                               (cond
2796                                 [(eq? 'fp (cat-place cat))
2797                                  (loop types cats
2798                                        (cons (load-single-reg (car (cat-regs cat))) locs)
2799                                        isp ind-sp)]
2800                                 [else
2801                                  (loop types cats
2802                                        (cons (load-single-stack isp) locs)
2803                                        (fx+ isp (cat-size cat) (cat-pad cat)) ind-sp)])]
2804                              [(fp-ftd& ,ftd)
2805                               (let ([size ($ftd-size ftd)])
2806                                 (case (cat-place cat)
2807                                   [(int)
2808                                    (let ([indirect-bytes (cat-indirect-bytes cat)])
2809                                      (cond
2810                                        [indirect-bytes
2811                                         ;; pointer to an indirect argument
2812                                         (safe-assert (fx= 1 (length (cat-regs cat))))
2813                                         (loop types cats
2814                                               (cons (let ([ind (load-indirect-stack ind-sp 0 size)])
2815                                                       (lambda (x)
2816                                                         (%seq
2817                                                          ,(ind x)
2818                                                          (set! ,(car (cat-regs cat)) ,(%inline + ,%sp (immediate ,ind-sp))))))
2819                                                     locs)
2820                                               isp (fx+ ind-sp indirect-bytes))]
2821                                        [else
2822                                         ;; argument copied to one or more integer registers
2823                                         (let i-loop ([int* (cat-regs cat)] [size size] [offset 0] [proc #f])
2824                                           (cond
2825                                             [(null? int*)
2826                                              (loop types cats
2827                                                    (cons proc locs)
2828                                                    isp ind-sp)]
2829                                             [else
2830                                              (i-loop (cdr int*) (fx- size 8) (fx+ offset 8)
2831                                                      (let ([new-proc (load-int-indirect-reg (car int*) offset (fxmin size 8) ($ftd-unsigned? ftd))])
2832                                                        (if proc
2833                                                            (lambda (x) (%seq ,(proc x) ,(new-proc x)))
2834                                                            new-proc)))]))]))]
2835                                   [(fp)
2836                                    (let ([double? (double-member? (car ($ftd->members ftd)))])
2837                                      ;; argument copied to one or more integer registers
2838                                      (let f-loop ([fp* (cat-regs cat)] [offset 0] [proc #f])
2839                                        (cond
2840                                          [(null? fp*)
2841                                           (loop types cats
2842                                                 (cons proc locs)
2843                                                 isp ind-sp)]
2844                                          [else
2845                                           (f-loop (cdr fp*) (fx+ offset (if double? 8 4))
2846                                                   (let ([new-proc (if double?
2847                                                                       (load-boxed-double-reg (car fp*) offset)
2848                                                                       (load-boxed-single-reg (car fp*) offset))])
2849                                                     (if proc
2850                                                         (lambda (x) (%seq ,(proc x) ,(new-proc x)))
2851                                                         new-proc)))])))]
2852                                   [else
2853                                    (let ([indirect-bytes (cat-indirect-bytes cat)]
2854                                          [size-on-stack (cat-size cat)])
2855                                      (cond
2856                                        [indirect-bytes
2857                                         ;; pointer (passed on stack) to an indirect argument (also on stack)
2858                                         (safe-assert (fx= size-on-stack 8))
2859                                         (loop types cats
2860                                               (cons (let ([ind (load-indirect-stack ind-sp 0 size-on-stack)])
2861                                                       (lambda (x)
2862                                                         (%seq
2863                                                          ,(ind x)
2864                                                          (set! ,(%mref ,%sp ,isp) ,(%inline + ,%sp ,ind)))))
2865                                                     locs)
2866                                               (fx+ isp size-on-stack) (fx+ ind-sp indirect-bytes))]
2867                                        [else
2868                                         ;; argument copied to stack
2869                                         (loop types cats
2870                                               (cons (load-indirect-stack isp 0 size) locs)
2871                                               (fx+ isp size-on-stack (cat-pad cat)) ind-sp)]))]))]
2872                              [else
2873                               ;; integer, scheme-object, etc.
2874                               (cond
2875                                 [(eq? 'int (cat-place cat))
2876                                  (loop types cats
2877                                        (cons (load-int-reg (car (cat-regs cat))) locs)
2878                                        isp ind-sp)]
2879                                 [else
2880                                  (loop types cats
2881                                        (cons (load-int-stack isp (cat-size cat)) locs)
2882                                        (fx+ isp (cat-size cat) (cat-pad cat)) ind-sp)])])))))]
2883		 [add-fill-result
2884                  ;; may destroy the values in result registers
2885		  (lambda (result-cat result-type args-frame-size e)
2886                    (nanopass-case (Ltype Type) result-type
2887                      [(fp-ftd& ,ftd)
2888                       (let* ([size ($ftd-size ftd)]
2889                              [tmp %argtmp])
2890                         (case (cat-place result-cat)
2891                           [(int)
2892                            ;; result is in integer registers
2893                            (let loop ([int* (cat-regs result-cat)] [offset 0] [size size])
2894                              (cond
2895                                [(null? int*) `(seq ,e (set! ,tmp ,(%mref ,%sp ,args-frame-size)))]
2896                                [else
2897                                 (%seq ,(loop (cdr int*) (fx+ offset 8) (fx- size 8))
2898                                       ,(reg-to-memory tmp offset (fxmin size 8) (car int*)))]))]
2899                           [(fp)
2900                            ;; result is in fp registers, so going to either double or float elements
2901                            (let* ([double? (double-member? (car ($ftd->members ftd)))])
2902                              (let loop ([fp* (cat-regs result-cat)] [offset 0])
2903                                (cond
2904                                  [(null? fp*) `(seq ,e (set! ,tmp ,(%mref ,%sp ,args-frame-size)))]
2905                                  [double?
2906                                   (%seq ,(loop (cdr fp*) (fx+ offset 8))
2907                                         (set! ,(%mref ,tmp ,%zero ,offset fp) ,(car fp*)))]
2908                                  [else
2909                                   (%seq ,(loop (cdr fp*) (fx+ offset 4))
2910                                         ,(%inline store-single ,(%mref ,tmp ,%zero ,offset fp) ,(car fp*)))])))]
2911                           [else
2912                            ;; we passed the pointer to be filled, so nothing more to do here
2913                            e]))]
2914                      [else
2915                       ;; anything else
2916                       e]))]
2917                 [add-deactivate
2918                  (lambda (adjust-active? t0 live* result-live* k)
2919                    (cond
2920                      [adjust-active?
2921                       (%seq
2922			(set! ,%ac0 ,t0)
2923                        ,(save-and-restore live* (%inline deactivate-thread))
2924                        ,(k %ac0)
2925                        ,(save-and-restore result-live* `(set! ,%Cretval ,(%inline activate-thread))))]
2926                      [else (k t0)]))])
2927          (lambda (info)
2928            (safe-assert (reg-callee-save? %tc)) ; no need to save-restore
2929            (let* ([arg-type* (info-foreign-arg-type* info)]
2930                   [result-type (info-foreign-result-type info)]
2931                   [ftd-result? (nanopass-case (Ltype Type) result-type
2932                                  [(fp-ftd& ,ftd) #t]
2933                                  [else #f])]
2934                   [arg-type* (if ftd-result?
2935                                  (cdr arg-type*)
2936                                  arg-type*)]
2937                   [conv* (info-foreign-conv* info)]
2938                   [arg-cat* (categorize-arguments arg-type* (extract-varargs-after-conv conv*))]
2939		   [result-cat (car (categorize-arguments (list result-type) #f))]
2940                   [result-reg* (cat-regs result-cat)]
2941		   [fill-result-here? (and ftd-result?
2942                                           (not (cat-indirect-bytes result-cat))
2943                                           (not (eq? 'stack (cat-place result-cat))))]
2944                   [arg-stack-bytes (align 16 (compute-stack-argument-space arg-cat*))]
2945                   [indirect-stack-bytes (align 16 (compute-stack-indirect-space arg-cat*))]
2946                   [adjust-active? (if-feature pthreads (memq 'adjust-active conv*) #f)]
2947                   [locs (do-args arg-type* arg-cat* arg-stack-bytes)]
2948                   [live* (get-registers arg-cat* 'all)]
2949                   [live* (if (and ftd-result? (not fill-result-here?))
2950                              (cons %r8 live*)
2951                              live*)]
2952                   [frame-size (align 16 (fx+ arg-stack-bytes
2953                                              indirect-stack-bytes
2954                                              (if fill-result-here?
2955                                                  8
2956                                                  0)))]
2957                   [adjust-frame (lambda (op)
2958                                   (lambda ()
2959                                     (if (fx= frame-size 0)
2960                                         `(nop)
2961                                         `(set! ,%sp (inline ,null-info ,op ,%sp (immediate ,frame-size))))))])
2962              (values
2963               (adjust-frame %-)
2964               (let ([locs (reverse locs)])
2965                 (cond
2966                   [fill-result-here?
2967                    ;; stash extra argument on the stack to be retrieved after call and filled with the result:
2968                    (cons (load-int-stack (fx+ arg-stack-bytes indirect-stack-bytes) 8) locs)]
2969                   [ftd-result?
2970                    ;; callee expects pointer to fill for return in %r8:
2971                    (cons (lambda (rhs) `(set! ,%r8 ,rhs)) locs)]
2972                   [else locs]))
2973               (lambda (t0 not-varargs?)
2974                 (add-fill-result result-cat result-type (fx+ arg-stack-bytes indirect-stack-bytes)
2975                                  (add-deactivate adjust-active? t0 live* result-reg*
2976                                                  (lambda (t0)
2977                                                    `(inline ,(make-info-kill*-live* (add-caller-save-registers result-reg*) live*) ,%c-call ,t0)))))
2978               (nanopass-case (Ltype Type) result-type
2979                 [(fp-double-float)
2980                  (lambda (lvalue) ; unboxed
2981                    `(set! ,lvalue ,%Cfpretval))]
2982                 [(fp-single-float)
2983                  (lambda (lvalue) ; unboxed
2984                    `(set! ,lvalue ,(%inline single->double ,%Cfpretval)))]
2985                 [(fp-integer ,bits)
2986                  (case bits
2987                    [(8) (lambda (lvalue) `(set! ,lvalue ,(%inline sext8 ,%Cretval)))]
2988                    [(16) (lambda (lvalue) `(set! ,lvalue ,(%inline sext16 ,%Cretval)))]
2989                    [(32) (lambda (lvalue) `(set! ,lvalue ,(%inline sext32 ,%Cretval)))]
2990                    [(64) (lambda (lvalue) `(set! ,lvalue ,%Cretval))]
2991                    [else (sorry! who "unexpected asm-foreign-procedures fp-integer size ~s" bits)])]
2992                 [(fp-unsigned ,bits)
2993                  (case bits
2994                    [(8) (lambda (lvalue) `(set! ,lvalue ,(%inline zext8 ,%Cretval)))]
2995                    [(16) (lambda (lvalue) `(set! ,lvalue ,(%inline zext16 ,%Cretval)))]
2996                    [(32) (lambda (lvalue) `(set! ,lvalue ,(%inline zext32 ,%Cretval)))]
2997                    [(64) (lambda (lvalue) `(set! ,lvalue ,%Cretval))]
2998                    [else (sorry! who "unexpected asm-foreign-procedures fp-unsigned size ~s" bits)])]
2999                 [else (lambda (lvalue) `(set! ,lvalue ,%Cretval))])
3000               (adjust-frame %+)))
3001            ))))
3002
3003    (define-who asm-foreign-callable
3004      #|
3005        Frame Layout
3006                   +---------------------------+
3007                   |                           |
3008                   |    incoming stack args    |
3009                   |                           |
3010                   +---------------------------+<- 16-byte boundary
3011                   |    saved int reg args     |
3012                   |    + %r8 for indirect     |
3013                   |    + maybe padding        |
3014                   +---------------------------+<- 16-byte boundary
3015                   |                           |
3016                   |   saved float reg args    |
3017                   |    + maybe padding        |
3018                   +---------------------------+<- 16-byte boundary
3019                   |                           |
3020                   |     activatation state    |
3021                   |       if necessary        |
3022                   +---------------------------+<- 16-byte boundary
3023                   |                           |
3024                   |      &-return space       |
3025                   |       if necessary        |
3026                   +---------------------------+<- 16-byte boundary
3027                   |                           |
3028                   |   callee-save regs + lr   |
3029                   |   callee-save fpregs      |
3030                   +---------------------------+<- 16-byte boundary
3031      |#
3032      (with-output-language (L13 Effect)
3033        (let ()
3034          (define load-double-stack
3035            (lambda (offset)
3036              (lambda (x) ; requires var
3037                `(set! ,(%mref ,x ,%zero ,(constant flonum-data-disp) fp)
3038                       ,(%mref ,%sp ,%zero ,offset fp)))))
3039          (define load-single-stack
3040            (lambda (offset)
3041              (lambda (x) ; requires var
3042                `(set! ,(%mref ,x ,%zero ,(constant flonum-data-disp) fp)
3043                       ,(%inline load-single->double ,(%mref ,%sp ,%zero ,offset fp))))))
3044          (define load-word-stack
3045            (lambda (offset)
3046              (lambda (lvalue)
3047                `(set! ,lvalue ,(%mref ,%sp ,offset)))))
3048          (define load-int-stack
3049            (lambda (type offset)
3050              (lambda (lvalue)
3051                (nanopass-case (Ltype Type) type
3052                  [(fp-integer ,bits)
3053                   (case bits
3054                     [(8) `(set! ,lvalue (inline ,(make-info-load 'integer-8 #f) ,%load ,%sp ,%zero (immediate ,offset)))]
3055                     [(16) `(set! ,lvalue (inline ,(make-info-load 'integer-16 #f) ,%load ,%sp ,%zero (immediate ,offset)))]
3056                     [(32) `(set! ,lvalue (inline ,(make-info-load 'integer-32 #f) ,%load ,%sp ,%zero (immediate ,offset)))]
3057                     [(64) `(set! ,lvalue ,(%mref ,%sp ,offset))]
3058                     [else (sorry! who "unexpected load-int-stack fp-integer size ~s" bits)])]
3059                  [(fp-unsigned ,bits)
3060                   (case bits
3061                     [(8) `(set! ,lvalue (inline ,(make-info-load 'unsigned-8 #f) ,%load ,%sp ,%zero (immediate ,offset)))]
3062                     [(16) `(set! ,lvalue (inline ,(make-info-load 'unsigned-16 #f) ,%load ,%sp ,%zero (immediate ,offset)))]
3063                     [(32) `(set! ,lvalue (inline ,(make-info-load 'unsigned-32 #f) ,%load ,%sp ,%zero (immediate ,offset)))]
3064                     [(64) `(set! ,lvalue ,(%mref ,%sp ,offset))]
3065                     [else (sorry! who "unexpected load-int-stack fp-unsigned size ~s" bits)])]
3066                  [else `(set! ,lvalue ,(%mref ,%sp ,offset))]))))
3067	  (define load-stack-address
3068	    (lambda (offset)
3069	      (lambda (lvalue)
3070		`(set! ,lvalue ,(%inline + ,%sp (immediate ,offset))))))
3071          (define do-args
3072            ;; all of the args are on the stack at this point, though not contiguous since
3073            ;; we push all of the int reg args with one set of push instructions and all of the
3074            ;; float reg args with another set of push instructions
3075            (lambda (arg-type* arg-cat* init-int-reg-offset float-reg-offset stack-arg-offset return-offset
3076                               synthesize-first? indirect-result?)
3077              (let loop ([types arg-type*]
3078                         [cats arg-cat*]
3079                         [locs '()]
3080                         [int-reg-offset (if indirect-result? (fx+ init-int-reg-offset 8) init-int-reg-offset)]
3081                         [float-reg-offset float-reg-offset]
3082                         [stack-arg-offset stack-arg-offset])
3083                  (if (null? types)
3084                      (let ([locs (reverse locs)])
3085                        (cond
3086                          [synthesize-first?
3087                           (cons (load-stack-address return-offset) locs)]
3088                          [indirect-result?
3089                           (cons (load-word-stack init-int-reg-offset) locs)]
3090                          [else locs]))
3091                      (let ([cat (car cats)]
3092                            [type (car types)]
3093                            [cats (cdr cats)]
3094                            [types (cdr types)])
3095                        (nanopass-case (Ltype Type) type
3096                          [(fp-double-float)
3097                           (case (cat-place cat)
3098                             [(fp)
3099                              (loop types cats
3100                                    (cons (load-double-stack float-reg-offset) locs)
3101                                    int-reg-offset (fx+ float-reg-offset 8) stack-arg-offset)]
3102                             [else
3103                              (loop types cats
3104                                    (cons (load-double-stack stack-arg-offset) locs)
3105                                    int-reg-offset float-reg-offset (fx+ stack-arg-offset (cat-size cat) (cat-pad cat)))])]
3106                          [(fp-single-float)
3107                           (case (cat-place cat)
3108                             [(fp)
3109                              (loop types cats
3110                                    (cons (load-single-stack float-reg-offset) locs)
3111                                    int-reg-offset (fx+ float-reg-offset 8) stack-arg-offset)]
3112                             [else
3113                              (loop types cats
3114                                    (cons (load-single-stack stack-arg-offset) locs)
3115                                    int-reg-offset float-reg-offset (fx+ stack-arg-offset (cat-size cat) (cat-pad cat)))])]
3116
3117                          [(fp-ftd& ,ftd)
3118                           (case (cat-place cat)
3119                             [(int)
3120                              (let ([indirect-bytes (cat-indirect-bytes cat)])
3121                                (cond
3122                                  [indirect-bytes
3123                                   ;; pointer to an indirect argument
3124                                   (safe-assert (fx= (length (cat-regs cat)) 1))
3125                                   (loop types cats
3126                                         (cons (load-word-stack int-reg-offset) locs)
3127                                         (fx+ int-reg-offset 8) float-reg-offset stack-arg-offset)]
3128                                  [else
3129                                   ;; point to argument on stack
3130                                   (loop types cats
3131                                         (cons (load-stack-address int-reg-offset) locs)
3132                                         (fx+ int-reg-offset (cat-size cat) (cat-pad cat)) float-reg-offset stack-arg-offset)]))]
3133                             [(fp)
3134                              ;; point to argument, but if they're floats, then we need to
3135                              ;; shift double-sized registers into float-sized elements
3136                              (loop types cats
3137                                    (cons (let ([proc (load-stack-address float-reg-offset)]
3138                                                [members ($ftd->members ftd)])
3139                                            (cond
3140                                              [(or (null? (cdr members))
3141                                                   (double-member? (car members)))
3142                                               proc]
3143                                              [else
3144                                               ;; instead of compacting here, it might be nicer to
3145                                               ;; save registers in packed form in the first place
3146                                               ;; (which means that `(cat-size cat)` would be a multiple of 4)
3147                                               (lambda (lvalue)
3148                                                 (let loop ([members (cdr members)]
3149                                                            [dest-offset (fx+ float-reg-offset 4)]
3150                                                            [src-offset (fx+ float-reg-offset 8)])
3151                                                   (if (null? members)
3152                                                       (proc lvalue)
3153                                                       (let ([tmp %argtmp])
3154                                                         (%seq
3155                                                          (set! ,tmp (inline ,(make-info-load 'unsigned-32 #f) ,%load ,%sp ,%zero (immediate ,src-offset)))
3156                                                          (inline ,(make-info-load 'unsigned-32 #f) ,%store ,%sp ,%zero (immediate ,dest-offset) ,%argtmp)
3157                                                          ,(loop (cdr members) (fx+ dest-offset 4) (fx+ src-offset 8)))))))]))
3158                                          locs)
3159                                    int-reg-offset (fx+ float-reg-offset (cat-size cat) (cat-pad cat)) stack-arg-offset)]
3160                             [else
3161                              (let ([indirect-bytes (cat-indirect-bytes cat)])
3162                                (cond
3163                                 [indirect-bytes
3164                                   ;; pointer (passed on stack) to an indirect argument (also on stack)
3165                                   (safe-assert (fx= (cat-size cat) 8))
3166                                   (loop types cats
3167                                         (cons (load-word-stack stack-arg-offset) locs)
3168                                         int-reg-offset float-reg-offset (fx+ stack-arg-offset 8))]
3169                                  [else
3170                                   ;; point to argument on stack
3171                                   (loop types cats
3172                                         (cons (load-stack-address stack-arg-offset) locs)
3173                                         int-reg-offset float-reg-offset (fx+ stack-arg-offset (cat-size cat) (cat-pad cat)))]))])]
3174                          [else
3175                           ;; integer, scheme-object, etc.
3176                           (case (cat-place cat)
3177                             [(int)
3178                              (loop types cats
3179                                    (cons (load-int-stack type int-reg-offset) locs)
3180                                    (fx+ int-reg-offset 8) float-reg-offset stack-arg-offset)]
3181                             [else
3182                              (loop types cats
3183                                    (cons (load-int-stack type stack-arg-offset) locs)
3184                                    int-reg-offset float-reg-offset (fx+ stack-arg-offset (cat-size cat) (cat-pad cat)))])]))))))
3185          (define do-result
3186            (lambda (result-type result-cat synthesize-first? return-stack-offset)
3187	      (nanopass-case (Ltype Type) result-type
3188                [(fp-double-float)
3189		 (lambda (rhs)
3190                   `(set! ,%Cfpretval ,(%mref ,rhs ,%zero ,(constant flonum-data-disp) fp)))]
3191		[(fp-single-float)
3192                 (lambda (rhs)
3193                   `(set! ,%Cfpretval ,(%inline double->single ,(%mref ,rhs ,%zero ,(constant flonum-data-disp) fp))))]
3194                [(fp-void)
3195                 (lambda () `(nop))]
3196                [(fp-ftd& ,ftd)
3197                 (cond
3198                   [(cat-indirect-bytes result-cat)
3199                    ;; we passed the pointer to be filled, so nothing more to do here
3200                    (lambda () `(nop))]
3201                   [else
3202                    (case (cat-place result-cat)
3203                      [(int)
3204                       (let ([to-regs
3205                              (lambda (x offset)
3206                                (let loop ([int* (cat-regs result-cat)] [offset offset] [size ($ftd-size ftd)])
3207                                  (cond
3208                                    [(null? int*) `(nop)]
3209                                    [else
3210                                     (safe-assert (not (eq? (car int*) x)))
3211                                     (%seq
3212                                      ,(loop (cdr int*) (fx+ offset 8) (fx- size 8))
3213                                      ,(memory-to-reg (car int*) x offset (fxmin size 8) ($ftd-unsigned? ftd)))])))])
3214                         (if synthesize-first?
3215                             (lambda ()
3216                               (to-regs %sp return-stack-offset))
3217                             (lambda (x)
3218                               (to-regs x 0))))]
3219                      [(fp)
3220                       (let* ([double? (double-member? (car ($ftd->members ftd)))])
3221                         (let ([to-regs
3222                                (lambda (x offset)
3223                                  (let loop ([fp* (cat-regs result-cat)] [offset offset])
3224                                    (cond
3225                                      [(null? fp*) `(nop)]
3226                                      [double?
3227                                       (%seq ,(loop (cdr fp*) (fx+ offset 8))
3228                                             (set! ,(car fp*) ,(%mref ,x ,%zero ,offset fp)))]
3229                                      [else
3230                                       (%seq ,(loop (cdr fp*) (fx+ offset 4))
3231                                             (set! ,(car fp*) ,(%inline load-single ,(%mref ,x ,%zero ,offset fp))))])))])
3232                           (if synthesize-first?
3233                               (lambda ()
3234                                 (to-regs %sp return-stack-offset))
3235                               (lambda (x)
3236                                 (to-regs x 0)))))]
3237                      [else
3238                       ;; we passed the pointer to be filled, so nothing more to do here
3239                       (lambda () `(nop))])])]
3240                [else
3241                 ;; integer, scheme-object, etc.
3242                 (lambda (x)
3243                   `(set! ,%Cretval ,x))])))
3244          (lambda (info)
3245            (define get-callee-save-regs (lambda (type)
3246                                           (let loop ([i 0])
3247                                             (cond
3248                                               [(fx= i (vector-length regvec)) '()]
3249                                               [else (let ([reg (vector-ref regvec i)])
3250                                                       (if (and (reg-callee-save? reg)
3251                                                                (eq? type (reg-type reg)))
3252                                                           (cons reg (loop (fx+ i 1)))
3253                                                           (loop (fx+ i 1))))]))))
3254            (define callee-save-regs+lr (cons* %lr
3255					       ;; reserved:
3256					       %tc %sfp %ap %trap
3257					       ;; allocable:
3258					       (get-callee-save-regs 'uptr)))
3259            (define callee-save-fpregs  (get-callee-save-regs 'fp))
3260            (define isaved (length callee-save-regs+lr))
3261            (define fpsaved (length callee-save-fpregs))
3262            (let* ([arg-type* (info-foreign-arg-type* info)]
3263                   [result-type (info-foreign-result-type info)]
3264                   [ftd-result? (nanopass-case (Ltype Type) result-type
3265                                  [(fp-ftd& ,ftd) #t]
3266                                  [else #f])]
3267                   [arg-type* (if ftd-result?
3268                                  (cdr arg-type*)
3269                                  arg-type*)]
3270		   [conv* (info-foreign-conv* info)]
3271                   [arg-cat* (categorize-arguments arg-type* (extract-varargs-after-conv conv*))]
3272                   [result-cat (car (categorize-arguments (list result-type) #f))]
3273                   [synthesize-first? (and ftd-result?
3274                                           (not (cat-indirect-bytes result-cat))
3275                                           (not (eq? 'stack (cat-place result-cat))))]
3276                   [indirect-result? (and ftd-result? (not synthesize-first?))]
3277                   [adjust-active? (if-feature pthreads (memq 'adjust-active conv*) #f)]
3278
3279                   [arg-regs (let ([regs (get-registers arg-cat* 'int)])
3280                               (if indirect-result?
3281                                   (cons %r8 regs)
3282                                   regs))]
3283                   [arg-fp-regs (get-registers arg-cat* 'fp)]
3284                   [result-regs (get-registers (list result-cat) 'all)])
3285                (let ([int-reg-bytes (fx* (align 2 (length arg-regs)) 8)]
3286                      [float-reg-bytes (fx* (align 2 (length arg-fp-regs)) 8)]
3287                      [active-state-bytes (if adjust-active? 16 0)]
3288                      [return-bytes (if synthesize-first? (align 16 (cat-size result-cat)) 0)]
3289                      [callee-save-bytes (fx* 8
3290                                              (fx+ (align 2 (length callee-save-regs+lr))
3291                                                   (align 2 (length callee-save-fpregs))))])
3292                  (let* ([return-offset callee-save-bytes]
3293                         [active-state-offset (fx+ return-offset return-bytes)]
3294                         [arg-fpregs-offset (fx+ active-state-offset active-state-bytes)]
3295                         [arg-regs-offset (fx+ arg-fpregs-offset float-reg-bytes)]
3296                         [args-offset (fx+ arg-regs-offset int-reg-bytes)])
3297                    (values
3298                     (lambda ()
3299                       (%seq
3300                        ;; save argument register values to the stack so we don't lose the values
3301                        ;; across possible calls to C while setting up the tc and allocating memory
3302                        ,(if (null? arg-regs) `(nop) `(inline ,(make-info-kill*-live* '() arg-regs) ,%push-multiple))
3303                        ,(if (null? arg-fp-regs) `(nop) `(inline ,(make-info-kill*-live* '() arg-fp-regs) ,%push-fpmultiple))
3304                        ;; make room for active state and/or return bytes
3305                        ,(let ([len (+ active-state-bytes return-bytes)])
3306                           (if (fx= len 0) `(nop) `(set! ,%sp ,(%inline - ,%sp (immediate ,len)))))
3307                        ;; save the callee save registers & return address
3308                        (inline ,(make-info-kill*-live* '() callee-save-regs+lr) ,%push-multiple)
3309                        (inline ,(make-info-kill*-live* '() callee-save-fpregs) ,%push-fpmultiple)
3310                        ;; maybe activate
3311                        ,(if adjust-active?
3312                             `(seq
3313                               (set! ,%Cretval ,(%inline activate-thread))
3314                               (set! ,(%mref ,%sp ,active-state-offset) ,%Cretval))
3315                             `(nop))
3316                        ;; set up tc for benefit of argument-conversion code, which might allocate
3317                        ,(if-feature pthreads
3318                           (%seq
3319                            (set! ,%Cretval ,(%inline get-tc))
3320                            (set! ,%tc ,%Cretval))
3321                           `(set! ,%tc (literal ,(make-info-literal #f 'entry (lookup-c-entry thread-context) 0))))))
3322                     ;; list of procedures that marshal arguments from their C stack locations
3323                     ;; to the Scheme argument locations
3324                     (do-args arg-type* arg-cat* arg-regs-offset arg-fpregs-offset args-offset return-offset
3325                              synthesize-first? indirect-result?)
3326                     (do-result result-type result-cat synthesize-first? return-offset)
3327                     (lambda ()
3328                       (in-context Tail
3329                        (%seq
3330                         ,(if adjust-active?
3331                              (%seq
3332                               ;; We need *(sp+active-state-offset) in %Carg1,
3333                               ;; but that can also be a return register.
3334                               ;; Meanwhle, sp may change before we call unactivate.
3335                               ;; So, move to %r2 for now, then %Carg1 later:
3336                               (set! ,%argtmp ,(%mref ,%sp ,active-state-offset))
3337                               ,(save-and-restore
3338                                 result-regs
3339                                 `(seq
3340                                   (set! ,%Carg1 ,%argtmp)
3341                                   ,(%inline unactivate-thread ,%Carg1))))
3342                              `(nop))
3343                         ;; restore the callee save registers
3344                         (inline ,(make-info-kill* callee-save-fpregs) ,%pop-fpmultiple)
3345                         (inline ,(make-info-kill* callee-save-regs+lr) ,%pop-multiple)
3346                         ;; deallocate space for pad & arg reg values
3347                         (set! ,%sp ,(%inline + ,%sp (immediate ,(fx+ active-state-bytes return-bytes float-reg-bytes int-reg-bytes))))
3348                         ;; done
3349                         (asm-c-return ,null-info ,callee-save-regs+lr ... ,callee-save-fpregs ... ,result-regs ...)))))))))))))
3350)
3351