1#2 "asmcomp/i386/emit.mlp"
2(**************************************************************************)
3(*                                                                        *)
4(*                                 OCaml                                  *)
5(*                                                                        *)
6(*             Xavier Leroy, projet Cristal, INRIA Rocquencourt           *)
7(*                                                                        *)
8(*   Copyright 1996 Institut National de Recherche en Informatique et     *)
9(*     en Automatique.                                                    *)
10(*                                                                        *)
11(*   All rights reserved.  This file is distributed under the terms of    *)
12(*   the GNU Lesser General Public License version 2.1, with the          *)
13(*   special exception on linking described in the file LICENSE.          *)
14(*                                                                        *)
15(**************************************************************************)
16
17(* Emission of Intel 386 assembly code *)
18
19open Misc
20open Cmm
21open Arch
22open Proc
23open Reg
24open Mach
25open Linearize
26open Emitaux
27
28open X86_ast
29open X86_proc
30open X86_dsl
31
32let _label s = D.label ~typ:DWORD s
33
34let mem_sym typ ?(ofs = 0) sym =
35  mem32 typ ~scale:0 ?base:None ~sym ofs RAX (*ignored since scale=0*)
36
37(* CFI directives *)
38
39let cfi_startproc () =
40  if Config.asm_cfi_supported then D.cfi_startproc ()
41
42let cfi_endproc () =
43  if Config.asm_cfi_supported then D.cfi_endproc ()
44
45let cfi_adjust_cfa_offset n =
46  if Config.asm_cfi_supported then D.cfi_adjust_cfa_offset n
47
48let emit_debug_info dbg =
49  emit_debug_info_gen dbg D.file D.loc
50
51(* Tradeoff between code size and code speed *)
52
53let fastcode_flag = ref true
54
55let stack_offset = ref 0
56
57(* Layout of the stack frame *)
58
59let frame_size () =                     (* includes return address *)
60  let sz =
61    !stack_offset + 4 * num_stack_slots.(0) + 8 * num_stack_slots.(1) + 4
62  in Misc.align sz stack_alignment
63
64let slot_offset loc cl =
65  match loc with
66  | Incoming n ->
67      assert (n >= 0);
68      frame_size() + n
69  | Local n ->
70      if cl = 0
71      then !stack_offset + n * 4
72      else !stack_offset + num_stack_slots.(0) * 4 + n * 8
73  | Outgoing n ->
74      assert (n >= 0);
75      n
76
77(* Record symbols used and defined - at the end generate extern for those
78   used but not defined *)
79
80let symbols_defined = ref StringSet.empty
81let symbols_used = ref StringSet.empty
82
83let add_def_symbol s = symbols_defined := StringSet.add s !symbols_defined
84let add_used_symbol s = symbols_used := StringSet.add s !symbols_used
85
86let trap_frame_size = Misc.align 8 stack_alignment
87
88(* Prefixing of symbols with "_" *)
89
90let symbol_prefix =
91  match system with
92  | S_linux_elf -> ""
93  | S_bsd_elf -> ""
94  | S_solaris -> ""
95  | S_beos -> ""
96  | S_gnu -> ""
97  | _ -> "_" (* win32 & others *)
98
99let emit_symbol s = string_of_symbol symbol_prefix s
100
101let immsym s = sym (emit_symbol s)
102
103let emit_call s = I.call (immsym s)
104
105(* Output a label *)
106
107let label_prefix =
108  match system with
109  | S_linux_elf -> ".L"
110  | S_bsd_elf -> ".L"
111  | S_solaris -> ".L"
112  | S_beos -> ".L"
113  | S_gnu -> ".L"
114  | _ -> "L"
115
116let emit_label lbl =
117  Printf.sprintf "%s%d" label_prefix lbl
118
119let label s = sym (emit_label s)
120
121let def_label s = D.label (emit_label s)
122
123let emit_Llabel fallthrough lbl =
124  if not fallthrough && !fastcode_flag then D.align 16 ;
125  def_label lbl
126
127(* Output a pseudo-register *)
128
129let int_reg_name =  [| RAX; RBX; RCX; RDX; RSI; RDI; RBP  |]
130
131let float_reg_name = [| TOS |]
132
133let register_name r =
134  if r < 100 then Reg32 (int_reg_name.(r))
135  else Regf (float_reg_name.(r - 100))
136
137let sym32 ?ofs s = mem_sym ?ofs DWORD (emit_symbol s)
138
139let reg = function
140  | { loc = Reg r } -> register_name r
141  | { loc = Stack(Incoming n | Outgoing n) } when n < 0 ->
142      sym32 "caml_extra_params" ~ofs:(n + 64)
143  | { loc = Stack s; typ = Float } as r ->
144      let ofs = slot_offset s (register_class r) in
145      mem32 REAL8 ofs RSP
146  | { loc = Stack s } as r ->
147      let ofs = slot_offset s (register_class r) in
148      mem32 DWORD ofs RSP
149  | { loc = Unknown } ->
150      fatal_error "Emit_i386.reg"
151
152(* Output a reference to the lower 8 bits or lower 16 bits of a register *)
153
154let reg_low_8_name  = Array.map (fun r -> Reg8L r) int_reg_name
155let reg_low_16_name = Array.map (fun r -> Reg16 r) int_reg_name
156
157let reg8 r =
158  match r.loc with
159  | Reg r when r < 4 -> reg_low_8_name.(r)
160  | _ -> fatal_error "Emit_i386.reg8"
161
162let reg16 r =
163  match r.loc with
164  | Reg r when r < 7 -> reg_low_16_name.(r)
165  | _ -> fatal_error "Emit_i386.reg16"
166
167let reg32 = function
168  | { loc = Reg.Reg r } -> int_reg_name.(r)
169  | _ -> assert false
170
171let arg32 i n = reg32 i.arg.(n)
172
173(* Output an addressing mode *)
174
175let addressing addr typ i n =
176  match addr with
177  | Ibased(s, ofs) ->
178      add_used_symbol s;
179      mem_sym typ (emit_symbol s) ~ofs
180  | Iindexed d ->
181      mem32 typ d (arg32 i n)
182  | Iindexed2 d ->
183      mem32 typ ~base:(arg32 i n) d (arg32 i (n+1))
184  | Iscaled(2, d) ->
185      mem32 typ ~base:(arg32 i n) d (arg32 i n)
186  | Iscaled(scale, d) ->
187      mem32 typ ~scale d (arg32 i n)
188  | Iindexed2scaled(scale, d) ->
189      mem32 typ ~scale ~base:(arg32 i n) d (arg32 i (n+1))
190
191(* Record live pointers at call points *)
192
193let record_frame_label ?label live raise_ dbg =
194  let lbl =
195    match label with
196    | None -> new_label()
197    | Some label -> label
198  in
199  let live_offset = ref [] in
200  Reg.Set.iter
201    (function
202      | {typ = Val; loc = Reg r} ->
203          live_offset := ((r lsl 1) + 1) :: !live_offset
204      | {typ = Val; loc = Stack s} as reg ->
205          live_offset := slot_offset s (register_class reg) :: !live_offset
206      | {typ = Addr} as r ->
207          Misc.fatal_error ("bad GC root " ^ Reg.name r)
208      | _ -> ())
209    live;
210  record_frame_descr ~label:lbl ~frame_size:(frame_size())
211    ~live_offset:!live_offset ~raise_frame:raise_ dbg;
212  lbl
213
214let record_frame ?label live raise_ dbg =
215  let lbl = record_frame_label ?label live raise_ dbg in
216  def_label lbl
217
218(* Record calls to the GC -- we've moved them out of the way *)
219
220type gc_call =
221  { gc_lbl: label;                      (* Entry label *)
222    gc_return_lbl: label;               (* Where to branch after GC *)
223    gc_frame: label }                   (* Label of frame descriptor *)
224
225let call_gc_sites = ref ([] : gc_call list)
226
227let emit_call_gc gc =
228  def_label gc.gc_lbl;
229  emit_call "caml_call_gc";
230  def_label gc.gc_frame;
231  I.jmp (label gc.gc_return_lbl)
232
233(* Record calls to caml_ml_array_bound_error.
234   In -g mode, we maintain one call to caml_ml_array_bound_error
235   per bound check site.  Without -g, we can share a single call. *)
236
237type bound_error_call =
238  { bd_lbl: label;                      (* Entry label *)
239    bd_frame: label }                   (* Label of frame descriptor *)
240
241let bound_error_sites = ref ([] : bound_error_call list)
242let bound_error_call = ref 0
243
244let bound_error_label ?label dbg =
245  if !Clflags.debug then begin
246    let lbl_bound_error = new_label() in
247    let lbl_frame = record_frame_label ?label Reg.Set.empty false dbg in
248    bound_error_sites :=
249      { bd_lbl = lbl_bound_error; bd_frame = lbl_frame } :: !bound_error_sites;
250    lbl_bound_error
251  end else begin
252    if !bound_error_call = 0 then bound_error_call := new_label();
253    !bound_error_call
254  end
255
256let emit_call_bound_error bd =
257  def_label bd.bd_lbl;
258  emit_call "caml_ml_array_bound_error";
259  def_label bd.bd_frame
260
261let emit_call_bound_errors () =
262  List.iter emit_call_bound_error !bound_error_sites;
263  if !bound_error_call > 0 then begin
264    def_label !bound_error_call;
265    emit_call "caml_ml_array_bound_error"
266  end
267
268(* Names for instructions *)
269
270let instr_for_intop = function
271  | Iadd -> I.add
272  | Isub -> I.sub
273  | Imul -> (fun arg1 arg2 ->  I.imul arg1 (Some arg2))
274  | Iand -> I.and_
275  | Ior -> I.or_
276  | Ixor -> I.xor
277  | Ilsl -> I.sal
278  | Ilsr -> I.shr
279  | Iasr -> I.sar
280  | _ -> fatal_error "Emit_i386: instr_for_intop"
281
282let unary_instr_for_floatop = function
283  | Inegf -> I.fchs ()
284  | Iabsf -> I.fabs ()
285  | _ -> fatal_error "Emit_i386: unary_instr_for_floatop"
286
287let instr_for_floatop = function
288  | Iaddf -> I.fadd
289  | Isubf -> I.fsub
290  | Imulf -> I.fmul
291  | Idivf -> I.fdiv
292  | Ispecific Isubfrev -> I.fsubr
293  | Ispecific Idivfrev -> I.fdivr
294  | _ -> fatal_error "Emit_i386: instr_for_floatop"
295
296let instr_for_floatop_reversed = function
297  | Iaddf -> I.fadd
298  | Isubf -> I.fsubr
299  | Imulf -> I.fmul
300  | Idivf -> I.fdivr
301  | Ispecific Isubfrev -> I.fsub
302  | Ispecific Idivfrev -> I.fdiv
303  | _ -> fatal_error "Emit_i386: instr_for_floatop_reversed"
304
305
306let instr_for_floatop_reversed_pop = function
307  | Iaddf -> I.faddp
308  | Isubf -> I.fsubrp
309  | Imulf -> I.fmulp
310  | Idivf -> I.fdivrp
311  | Ispecific Isubfrev -> I.fsubp
312  | Ispecific Idivfrev -> I.fdivp
313  | _ -> fatal_error "Emit_i386: instr_for_floatop_reversed_pop"
314
315let instr_for_floatarithmem = function
316  | Ifloatadd -> I.fadd
317  | Ifloatsub -> I.fsub
318  | Ifloatsubrev -> I.fsubr
319  | Ifloatmul -> I.fmul
320  | Ifloatdiv -> I.fdiv
321  | Ifloatdivrev -> I.fdivr
322
323let cond = function
324  | Isigned Ceq   -> E   | Isigned Cne   -> NE
325  | Isigned Cle   -> LE  | Isigned Cgt   -> G
326  | Isigned Clt   -> L   | Isigned Cge   -> GE
327  | Iunsigned Ceq -> E   | Iunsigned Cne -> NE
328  | Iunsigned Cle -> BE  | Iunsigned Cgt -> A
329  | Iunsigned Clt -> B   | Iunsigned Cge -> AE
330
331(* Output an = 0 or <> 0 test. *)
332
333let output_test_zero arg =
334  match arg.loc with
335  | Reg.Reg _ -> I.test (reg arg) (reg arg)
336  | _  -> I.cmp (int 0) (reg arg)
337
338(* Deallocate the stack frame before a return or tail call *)
339
340let output_epilogue f =
341  let n = frame_size() - 4 in
342  if n > 0 then
343    begin
344      I.add (int n) esp;
345      cfi_adjust_cfa_offset (-n);
346      f ();
347      (* reset CFA back cause function body may continue *)
348      cfi_adjust_cfa_offset n
349    end
350  else
351    f ()
352
353(* Determine if the given register is the top of the floating-point stack *)
354
355let is_tos = function { loc = Reg _; typ = Float } -> true | _ -> false
356
357(* Emit the code for a floating-point comparison *)
358
359let emit_float_test cmp neg arg lbl =
360  let actual_cmp =
361    match (is_tos arg.(0), is_tos arg.(1)) with
362    | (true, true) ->
363        (* both args on top of FP stack *)
364        I.fcompp ();
365        cmp
366    | (true, false) ->
367        (* first arg on top of FP stack *)
368        I.fcomp (reg arg.(1));
369        cmp
370    | (false, true) ->
371        (* second arg on top of FP stack *)
372        I.fcomp (reg arg.(0));
373        Cmm.swap_comparison cmp
374    | (false, false) ->
375        I.fld     (reg arg.(0));
376        I.fcomp   (reg arg.(1));
377        cmp
378  in
379  I.fnstsw ax;
380  match actual_cmp with
381  | Ceq ->
382      if neg then begin
383        I.and_ (int 68) ah;
384        I.xor (int 64) ah;
385        I.jne lbl
386      end else begin
387        I.and_ (int 69) ah;
388        I.cmp (int 64) ah;
389        I.je lbl
390      end
391  | Cne ->
392      if neg then begin
393        I.and_ (int 69) ah;
394        I.cmp (int 64) ah;
395        I.je lbl
396      end else begin
397        I.and_ (int 68) ah;
398        I.xor (int 64) ah;
399        I.jne lbl
400      end
401  | Cle ->
402      I.and_ (int 69) ah;
403      I.dec ah;
404      I.cmp (int 64) ah;
405      if neg
406      then I.jae lbl
407      else I.jb lbl
408  | Cge ->
409      I.and_ (int 5) ah;
410      if neg
411      then I.jne lbl
412      else I.je lbl
413  | Clt ->
414      I.and_ (int 69) ah;
415      I.cmp (int 1) ah;
416      if neg
417      then I.jne lbl
418      else I.je lbl
419  | Cgt ->
420      I.and_ (int 69) ah;
421      if neg
422      then I.jne lbl
423      else I.je lbl
424
425(* Emit a Ifloatspecial instruction *)
426
427let emit_floatspecial = function
428  | "atan"  -> I.fld1 (); I.fpatan ()
429  | "atan2" -> I.fpatan ()
430  | "cos"   -> I.fcos ()
431  | "log"   -> I.fldln2 (); I.fxch st1; I.fyl2x ()
432  | "log10" -> I.fldlg2 (); I.fxch st1; I.fyl2x ()
433  | "sin"   -> I.fsin ()
434  | "sqrt"  -> I.fsqrt ()
435  | "tan"   -> I.fptan (); I.fstp st0
436  | _ -> assert false
437
438(* Floating-point constants *)
439
440let float_constants = ref ([] : (int64 * int) list)
441
442let add_float_constant cst =
443  try
444    List.assoc cst !float_constants
445  with
446    Not_found ->
447      let lbl = new_label() in
448      float_constants := (cst, lbl) :: !float_constants;
449      lbl
450
451let emit_float64_split_directive x =
452  let lo = Int64.logand x 0xFFFF_FFFFL
453  and hi = Int64.shift_right_logical x 32 in
454  D.long (Const (if Arch.big_endian then hi else lo));
455  D.long (Const (if Arch.big_endian then lo else hi))
456
457let emit_float_constant cst lbl =
458  _label (emit_label lbl);
459  emit_float64_split_directive cst
460
461let emit_global_label s =
462  let lbl = Compilenv.make_symbol (Some s) in
463  add_def_symbol lbl;
464  let lbl = emit_symbol lbl in
465  D.global lbl;
466  _label lbl
467
468(* Output the assembly code for an instruction *)
469
470(* Name of current function *)
471let function_name = ref ""
472(* Entry point for tail recursive calls *)
473let tailrec_entry_point = ref 0
474(* Record references to external C functions (for MacOSX) *)
475let external_symbols_direct = ref StringSet.empty
476let external_symbols_indirect = ref StringSet.empty
477
478let emit_instr fallthrough i =
479  emit_debug_info i.dbg;
480  match i.desc with
481  | Lend -> ()
482  | Lop(Imove | Ispill | Ireload) ->
483      let src = i.arg.(0) and dst = i.res.(0) in
484      if src.loc <> dst.loc then begin
485        if src.typ = Float then
486          if is_tos src then
487            I.fstp (reg dst)
488          else if is_tos dst then
489            I.fld (reg src)
490          else begin
491            I.fld (reg src);
492            I.fstp (reg dst)
493          end
494        else
495          I.mov (reg src) (reg dst)
496      end
497  | Lop(Iconst_int n) ->
498      if n = 0n then begin
499        match i.res.(0).loc with
500        | Reg _ -> I.xor (reg i.res.(0)) (reg i.res.(0))
501        | _     -> I.mov (int 0) (reg i.res.(0))
502      end else
503        I.mov (nat n) (reg i.res.(0))
504  | Lop(Iconst_float f) ->
505      begin match f with
506      | 0x0000_0000_0000_0000L ->       (* +0.0 *)
507          I.fldz ()
508      | 0x8000_0000_0000_0000L ->       (* -0.0 *)
509          I.fldz (); I.fchs ()
510      | 0x3FF0_0000_0000_0000L ->       (*  1.0 *)
511          I.fld1 ()
512      | 0xBFF0_0000_0000_0000L ->       (* -1.0 *)
513          I.fld1 (); I.fchs ()
514      | _ ->
515          let lbl = add_float_constant f in
516          I.fld (mem_sym REAL8 (emit_label lbl))
517      end
518  | Lop(Iconst_symbol s) ->
519      add_used_symbol s;
520      I.mov (immsym s) (reg i.res.(0))
521  | Lop(Icall_ind { label_after; }) ->
522      I.call (reg i.arg.(0));
523      record_frame i.live false i.dbg ~label:label_after
524  | Lop(Icall_imm { func; label_after; }) ->
525      add_used_symbol func;
526      emit_call func;
527      record_frame i.live false i.dbg ~label:label_after
528  | Lop(Itailcall_ind { label_after = _; }) ->
529      output_epilogue begin fun () ->
530        I.jmp (reg i.arg.(0))
531      end
532  | Lop(Itailcall_imm { func; label_after = _; }) ->
533      if func = !function_name then
534        I.jmp (label !tailrec_entry_point)
535      else begin
536        output_epilogue begin fun () ->
537          add_used_symbol func;
538          I.jmp (immsym func)
539        end
540      end
541  | Lop(Iextcall { func; alloc; label_after; }) ->
542      add_used_symbol func;
543      if alloc then begin
544        if system <> S_macosx then
545          I.mov (immsym func) eax
546        else begin
547          external_symbols_indirect :=
548            StringSet.add func !external_symbols_indirect;
549          I.mov (mem_sym DWORD (Printf.sprintf "L%s$non_lazy_ptr"
550                              (emit_symbol func))) eax
551        end;
552        emit_call "caml_c_call";
553        record_frame i.live false i.dbg ~label:label_after
554      end else begin
555        if system <> S_macosx then
556          emit_call func
557        else begin
558          external_symbols_direct :=
559            StringSet.add func !external_symbols_direct;
560          I.call (sym (Printf.sprintf "L%s$stub" (emit_symbol func)))
561        end
562      end
563  | Lop(Istackoffset n) ->
564      if n < 0
565      then I.add (int (-n)) esp
566      else I.sub (int n) esp;
567      cfi_adjust_cfa_offset n;
568      stack_offset := !stack_offset + n
569  | Lop(Iload(chunk, addr)) ->
570      let dest = i.res.(0) in
571      begin match chunk with
572      | Word_int | Word_val | Thirtytwo_signed | Thirtytwo_unsigned ->
573          I.mov (addressing addr DWORD i 0) (reg dest)
574      | Byte_unsigned ->
575          I.movzx (addressing addr BYTE i 0) (reg dest)
576      | Byte_signed ->
577          I.movsx (addressing addr BYTE i 0) (reg dest)
578      | Sixteen_unsigned ->
579          I.movzx (addressing addr WORD i 0) (reg dest)
580      | Sixteen_signed ->
581          I.movsx (addressing addr WORD i 0) (reg dest)
582      | Single ->
583          I.fld (addressing addr REAL4 i 0)
584      | Double | Double_u ->
585          I.fld (addressing addr REAL8 i 0)
586      end
587  | Lop(Istore(chunk, addr, _)) ->
588      begin match chunk with
589      | Word_int | Word_val | Thirtytwo_signed | Thirtytwo_unsigned ->
590          I.mov (reg i.arg.(0)) (addressing addr DWORD i 1)
591      | Byte_unsigned | Byte_signed ->
592          I.mov (reg8 i.arg.(0)) (addressing addr BYTE i 1)
593      | Sixteen_unsigned | Sixteen_signed ->
594          I.mov (reg16 i.arg.(0)) (addressing addr WORD i 1)
595      | Single ->
596          if is_tos i.arg.(0) then
597            I.fstp (addressing addr REAL4 i 1)
598          else begin
599            I.fld (reg i.arg.(0));
600            I.fstp (addressing addr REAL4 i 1)
601          end
602      | Double | Double_u ->
603          if is_tos i.arg.(0) then
604            I.fstp (addressing addr REAL8 i 1)
605          else begin
606            I.fld (reg i.arg.(0));
607            I.fstp (addressing addr REAL8 i 1)
608          end
609      end
610  | Lop(Ialloc { words = n; label_after_call_gc; }) ->
611      if !fastcode_flag then begin
612        let lbl_redo = new_label() in
613        def_label lbl_redo;
614        I.mov (sym32 "caml_young_ptr") eax;
615        I.sub (int n) eax;
616        I.mov eax (sym32 "caml_young_ptr");
617        I.cmp (sym32 "caml_young_limit") eax;
618        let lbl_call_gc = new_label() in
619        let lbl_frame = record_frame_label i.live false Debuginfo.none in
620        I.jb (label lbl_call_gc);
621        I.lea (mem32 NONE 4 RAX) (reg i.res.(0));
622        call_gc_sites :=
623          { gc_lbl = lbl_call_gc;
624            gc_return_lbl = lbl_redo;
625            gc_frame = lbl_frame } :: !call_gc_sites
626      end else begin
627        begin match n with
628          8  -> emit_call "caml_alloc1"
629        | 12 -> emit_call "caml_alloc2"
630        | 16 -> emit_call "caml_alloc3"
631        | _  ->
632            I.mov (int n) eax;
633            emit_call "caml_allocN"
634        end;
635        let label =
636          record_frame_label ?label:label_after_call_gc i.live false
637            Debuginfo.none
638        in
639        def_label label;
640        I.lea (mem32 NONE 4 RAX) (reg i.res.(0))
641      end
642  | Lop(Iintop(Icomp cmp)) ->
643      I.cmp (reg i.arg.(1)) (reg i.arg.(0));
644      I.set (cond cmp) al;
645      I.movzx al (reg i.res.(0));
646  | Lop(Iintop_imm(Icomp cmp, n)) ->
647      I.cmp (int n) (reg i.arg.(0));
648      I.set (cond cmp) al;
649      I.movzx al (reg i.res.(0))
650  | Lop(Iintop (Icheckbound { label_after_error; } )) ->
651      let lbl = bound_error_label ?label:label_after_error i.dbg in
652      I.cmp (reg i.arg.(1)) (reg i.arg.(0));
653      I.jbe (label lbl)
654  | Lop(Iintop_imm(Icheckbound { label_after_error; }, n)) ->
655      let lbl = bound_error_label ?label:label_after_error i.dbg in
656      I.cmp (int n) (reg i.arg.(0));
657      I.jbe (label lbl)
658  | Lop(Iintop(Idiv | Imod)) ->
659      I.cdq ();
660      I.idiv (reg i.arg.(1))
661  | Lop(Iintop(Ilsl | Ilsr | Iasr as op)) ->
662      (* We have i.arg.(0) = i.res.(0) and i.arg.(1) = %ecx *)
663      instr_for_intop op cl (reg i.res.(0))
664  | Lop(Iintop Imulh) ->
665      I.imul (reg i.arg.(1)) None
666  | Lop(Iintop op) ->
667      (* We have i.arg.(0) = i.res.(0) *)
668      instr_for_intop op (reg i.arg.(1)) (reg i.res.(0))
669  | Lop(Iintop_imm(Iadd, n)) when i.arg.(0).loc <> i.res.(0).loc ->
670      I.lea (mem32 NONE n (reg32 i.arg.(0))) (reg i.res.(0))
671  | Lop(Iintop_imm(Iadd, 1) | Iintop_imm(Isub, -1)) ->
672      I.inc (reg i.res.(0))
673  | Lop(Iintop_imm(Iadd, -1) | Iintop_imm(Isub, 1)) ->
674      I.dec (reg i.res.(0))
675  | Lop(Iintop_imm(op, n)) ->
676      (* We have i.arg.(0) = i.res.(0) *)
677      instr_for_intop op (int n) (reg i.res.(0))
678  | Lop(Inegf | Iabsf as floatop) ->
679      if not (is_tos i.arg.(0)) then
680        I.fld (reg i.arg.(0));
681      unary_instr_for_floatop floatop
682  | Lop(Iaddf | Isubf | Imulf | Idivf | Ispecific(Isubfrev | Idivfrev)
683        as floatop) ->
684      begin match (is_tos i.arg.(0), is_tos i.arg.(1)) with
685        (true, true) ->
686          (* both operands on top of FP stack *)
687          instr_for_floatop_reversed_pop floatop st0 st1
688      | (true, false) ->
689          (* first operand on stack *)
690          instr_for_floatop floatop (reg i.arg.(1))
691      | (false, true) ->
692          (* second operand on stack *)
693          instr_for_floatop_reversed floatop (reg i.arg.(0))
694      | (false, false) ->
695          (* both operands in memory *)
696          I.fld (reg i.arg.(0));
697          instr_for_floatop floatop (reg i.arg.(1))
698      end
699  | Lop(Ifloatofint) ->
700      begin match i.arg.(0).loc with
701      | Stack _ ->
702          I.fild (reg i.arg.(0))
703      | _ ->
704          I.push (reg i.arg.(0));
705          I.fild (mem32 DWORD 0 RSP);
706          I.add (int 4) esp
707      end
708  | Lop(Iintoffloat) ->
709      if not (is_tos i.arg.(0)) then
710        I.fld (reg i.arg.(0));
711      stack_offset := !stack_offset - 8;
712      I.sub (int 8) esp;
713      cfi_adjust_cfa_offset 8;
714      I.fnstcw (mem32 NONE 4 RSP);
715      I.mov (mem32 WORD 4 RSP) ax;
716      I.mov (int 12) ah;
717      I.mov ax (mem32 WORD 0 RSP);
718      I.fldcw (mem32 NONE 0 RSP);
719      begin match i.res.(0).loc with
720      | Stack _ ->
721          I.fistp (reg i.res.(0))
722      | _ ->
723          I.fistp (mem32 DWORD 0 RSP);
724          I.mov (mem32 DWORD 0 RSP) (reg i.res.(0))
725      end;
726      I.fldcw (mem32 NONE 4 RSP);
727      I.add (int 8) esp;
728      cfi_adjust_cfa_offset (-8);
729      stack_offset := !stack_offset + 8
730  | Lop(Ispecific(Ilea addr)) ->
731      I.lea (addressing addr DWORD i 0) (reg i.res.(0))
732  | Lop(Ispecific(Istore_int(n, addr, _))) ->
733      I.mov (nat n) (addressing addr DWORD i 0)
734  | Lop(Ispecific(Istore_symbol(s, addr, _))) ->
735      add_used_symbol s;
736      I.mov (immsym s) (addressing addr DWORD i 0)
737  | Lop(Ispecific(Ioffset_loc(n, addr))) ->
738      I.add (int n) (addressing addr DWORD i 0)
739  | Lop(Ispecific(Ipush)) ->
740      (* Push arguments in reverse order *)
741      for n = Array.length i.arg - 1 downto 0 do
742        let r = i.arg.(n) in
743        match r with
744          {loc = Reg _; typ = Float} ->
745            I.sub (int 8) esp;
746            cfi_adjust_cfa_offset 8;
747            I.fstp (mem32 REAL8 0 RSP);
748            stack_offset := !stack_offset + 8
749        | {loc = Stack sl; typ = Float} ->
750            let ofs = slot_offset sl 1 in
751            (* Use x87 stack to move from stack to stack,
752               instead of two 32-bit push instructions,
753               which could kill performance on modern CPUs (see #6979).
754            *)
755            I.fld (mem32 REAL8 ofs RSP);
756            I.sub (int 8) esp;
757            cfi_adjust_cfa_offset 8;
758            I.fstp (mem32 REAL8 0 RSP);
759            stack_offset := !stack_offset + 8
760        | _ ->
761            I.push (reg r);
762            cfi_adjust_cfa_offset 4;
763            stack_offset := !stack_offset + 4
764      done
765  | Lop(Ispecific(Ipush_int n)) ->
766      I.push (nat n);
767      cfi_adjust_cfa_offset 4;
768      stack_offset := !stack_offset + 4
769  | Lop(Ispecific(Ipush_symbol s)) ->
770      add_used_symbol s;
771      I.push (immsym s);
772      cfi_adjust_cfa_offset 4;
773      stack_offset := !stack_offset + 4
774  | Lop(Ispecific(Ipush_load addr)) ->
775      I.push (addressing addr DWORD i 0);
776      cfi_adjust_cfa_offset 4;
777      stack_offset := !stack_offset + 4
778  | Lop(Ispecific(Ipush_load_float addr)) ->
779      I.push (addressing (offset_addressing addr 4) DWORD i 0);
780      I.push (addressing addr DWORD i 0);
781      cfi_adjust_cfa_offset 8;
782      stack_offset := !stack_offset + 8
783  | Lop(Ispecific(Ifloatarithmem(double, op, addr))) ->
784      if not (is_tos i.arg.(0)) then
785        I.fld (reg i.arg.(0));
786      instr_for_floatarithmem op
787          (addressing addr
788             (if double then REAL8 else REAL4) i 1)
789  | Lop(Ispecific(Ifloatspecial s)) ->
790      (* Push args on float stack if necessary *)
791      for k = 0 to Array.length i.arg - 1 do
792        if not (is_tos i.arg.(k)) then I.fld (reg i.arg.(k))
793      done;
794      (* Fix-up for binary instrs whose args were swapped *)
795      if Array.length i.arg = 2 && is_tos i.arg.(1) then
796        I.fxch st1;
797      emit_floatspecial s
798  | Lreloadretaddr ->
799      ()
800  | Lreturn ->
801      output_epilogue begin fun () ->
802        I.ret ()
803      end
804  | Llabel lbl ->
805      emit_Llabel fallthrough lbl
806  | Lbranch lbl ->
807      I.jmp (label lbl)
808  | Lcondbranch(tst, lbl) ->
809      let lbl = label lbl in
810      begin match tst with
811      | Itruetest ->
812          output_test_zero i.arg.(0);
813          I.jne lbl;
814      | Ifalsetest ->
815          output_test_zero i.arg.(0);
816          I.je lbl
817      | Iinttest cmp ->
818          I.cmp (reg i.arg.(1)) (reg i.arg.(0));
819          I.j (cond cmp) lbl
820      | Iinttest_imm((Isigned Ceq | Isigned Cne |
821                      Iunsigned Ceq | Iunsigned Cne) as cmp, 0) ->
822          output_test_zero i.arg.(0);
823          I.j (cond cmp) lbl
824      | Iinttest_imm(cmp, n) ->
825          I.cmp (int n) (reg i.arg.(0));
826          I.j (cond cmp) lbl
827      | Ifloattest(cmp, neg) ->
828          emit_float_test cmp neg i.arg lbl
829      | Ioddtest ->
830          I.test (int 1) (reg i.arg.(0));
831          I.jne lbl
832      | Ieventest ->
833          I.test (int 1) (reg i.arg.(0));
834          I.je lbl
835      end
836  | Lcondbranch3(lbl0, lbl1, lbl2) ->
837      I.cmp (int 1) (reg i.arg.(0));
838      begin match lbl0 with
839        None -> ()
840      | Some lbl -> I.jb (label lbl)
841      end;
842      begin match lbl1 with
843        None -> ()
844      | Some lbl -> I.je (label lbl)
845      end;
846      begin match lbl2 with
847        None -> ()
848      | Some lbl -> I.jg (label lbl)
849      end
850  | Lswitch jumptbl ->
851      let lbl = new_label() in
852      I.jmp (mem32 NONE 0 (reg32 i.arg.(0)) ~scale:4 ~sym:(emit_label lbl));
853      D.data ();
854      _label (emit_label lbl);
855      for i = 0 to Array.length jumptbl - 1 do
856        D.long (ConstLabel (emit_label jumptbl.(i)))
857      done;
858      D.text ()
859  | Lsetuptrap lbl ->
860      I.call (label lbl)
861  | Lpushtrap ->
862      if trap_frame_size > 8 then
863        I.sub (int (trap_frame_size - 8)) esp;
864      I.push (sym32 "caml_exception_pointer");
865      cfi_adjust_cfa_offset trap_frame_size;
866      I.mov esp (sym32 "caml_exception_pointer");
867      stack_offset := !stack_offset + trap_frame_size
868  | Lpoptrap ->
869      I.pop (sym32 "caml_exception_pointer");
870      I.add (int (trap_frame_size - 4)) esp;
871      cfi_adjust_cfa_offset (-trap_frame_size);
872      stack_offset := !stack_offset - trap_frame_size
873  | Lraise k  ->
874      begin match k with
875      | Cmm.Raise_withtrace ->
876          emit_call "caml_raise_exn";
877          record_frame Reg.Set.empty true i.dbg
878      | Cmm.Raise_notrace ->
879          I.mov (sym32 "caml_exception_pointer") esp;
880          I.pop (sym32 "caml_exception_pointer");
881          if trap_frame_size > 8 then
882            I.add (int (trap_frame_size - 8)) esp;
883          I.ret ()
884      end
885
886let rec emit_all fallthrough i =
887  match i.desc with
888  |  Lend -> ()
889  | _ ->
890      emit_instr fallthrough i;
891      emit_all
892        (system = S_win32 || Linearize.has_fallthrough i.desc)
893        i.next
894
895(* Emission of external symbol references (for MacOSX) *)
896
897let emit_external_symbol_direct s =
898  _label (Printf.sprintf "L%s$stub" (emit_symbol s));
899  D.indirect_symbol (emit_symbol s);
900  I.hlt (); I.hlt (); I.hlt (); I.hlt () ; I.hlt ()
901
902let emit_external_symbol_indirect s =
903  _label (Printf.sprintf "L%s$non_lazy_ptr" (emit_symbol s));
904  D.indirect_symbol (emit_symbol s);
905  D.long (const 0)
906
907let emit_external_symbols () =
908  D.section [ "__IMPORT"; "__pointers"] None ["non_lazy_symbol_pointers" ];
909  StringSet.iter emit_external_symbol_indirect !external_symbols_indirect;
910  external_symbols_indirect := StringSet.empty;
911  D.section [ "__IMPORT"; "__jump_table"] None
912    [ "symbol_stubs"; "self_modifying_code+pure_instructions"; "5" ];
913  StringSet.iter emit_external_symbol_direct !external_symbols_direct;
914  external_symbols_direct := StringSet.empty;
915  if !Clflags.gprofile then begin
916    _label "Lmcount$stub";
917    D.indirect_symbol "mcount";
918    I.hlt (); I.hlt (); I.hlt () ; I.hlt () ; I.hlt ()
919  end
920
921(* Emission of the profiling prelude *)
922
923let call_mcount mcount =
924  I.push eax;
925  I.mov esp ebp;
926  I.push ecx;
927  I.push edx;
928  I.call (sym mcount);
929  I.pop edx;
930  I.pop ecx;
931  I.pop eax
932
933let emit_profile () =
934  match system with
935  | S_linux_elf | S_gnu -> call_mcount "mcount"
936  | S_bsd_elf -> call_mcount ".mcount"
937  | S_macosx -> call_mcount "Lmcount$stub"
938  | _ -> () (*unsupported yet*)
939
940(* Emission of a function declaration *)
941
942let fundecl fundecl =
943  function_name := fundecl.fun_name;
944  fastcode_flag := fundecl.fun_fast;
945  tailrec_entry_point := new_label();
946  stack_offset := 0;
947  call_gc_sites := [];
948  bound_error_sites := [];
949  bound_error_call := 0;
950  D.text ();
951  add_def_symbol fundecl.fun_name;
952  D.align (if system = S_win32 then 4 else 16);
953  if system = S_macosx
954  && not !Clflags.output_c_object
955  && is_generic_function fundecl.fun_name
956  then (* PR#4690 *)
957    D.private_extern (emit_symbol fundecl.fun_name)
958  else
959    D.global (emit_symbol fundecl.fun_name);
960  D.label (emit_symbol fundecl.fun_name);
961  emit_debug_info fundecl.fun_dbg;
962  cfi_startproc ();
963  if !Clflags.gprofile then emit_profile();
964  let n = frame_size() - 4 in
965  if n > 0 then  begin
966    I.sub (int n) esp;
967    cfi_adjust_cfa_offset n;
968  end;
969  def_label !tailrec_entry_point;
970  emit_all true fundecl.fun_body;
971  List.iter emit_call_gc !call_gc_sites;
972  emit_call_bound_errors ();
973  cfi_endproc ();
974  begin match system with
975  | S_linux_elf | S_bsd_elf | S_gnu ->
976      D.type_ (emit_symbol fundecl.fun_name) "@function";
977      D.size (emit_symbol fundecl.fun_name)
978        (ConstSub (
979            ConstThis,
980            ConstLabel (emit_symbol fundecl.fun_name)))
981  | _ -> ()
982  end
983
984
985(* Emission of data *)
986
987let emit_item = function
988  | Cglobal_symbol s -> D.global (emit_symbol s)
989  | Cdefine_symbol s -> add_def_symbol s; _label (emit_symbol s)
990  | Cint8 n -> D.byte (const n)
991  | Cint16 n -> D.word (const n)
992  | Cint32 n -> D.long (const_nat n)
993  | Cint n -> D.long (const_nat n)
994  | Csingle f -> D.long (Const (Int64.of_int32 (Int32.bits_of_float f)))
995  | Cdouble f -> emit_float64_split_directive (Int64.bits_of_float f)
996  | Csymbol_address s -> add_used_symbol s; D.long (ConstLabel (emit_symbol s))
997  | Cstring s -> D.bytes s
998  | Cskip n -> if n > 0 then D.space n
999  | Calign n -> D.align n
1000
1001let data l =
1002  D.data ();
1003  List.iter emit_item l
1004
1005(* Beginning / end of an assembly file *)
1006
1007let begin_assembly() =
1008  X86_proc.reset_asm_code ();
1009  reset_debug_info();                   (* PR#5603 *)
1010  float_constants := [];
1011  if system = S_win32 then begin
1012    D.mode386 ();
1013    D.model "FLAT";
1014    D.extrn "_caml_young_ptr" DWORD;
1015    D.extrn "_caml_young_limit" DWORD;
1016    D.extrn "_caml_exception_pointer" DWORD;
1017    D.extrn "_caml_extra_params" DWORD;
1018    D.extrn "_caml_call_gc" PROC;
1019    D.extrn "_caml_c_call" PROC;
1020    D.extrn "_caml_allocN" PROC;
1021    D.extrn "_caml_alloc1" PROC;
1022    D.extrn "_caml_alloc2" PROC;
1023    D.extrn "_caml_alloc3" PROC;
1024    D.extrn "_caml_ml_array_bound_error" PROC;
1025    D.extrn "_caml_raise_exn" PROC;
1026  end;
1027
1028  D.data ();
1029  emit_global_label "data_begin";
1030
1031  D.text ();
1032  emit_global_label "code_begin";
1033  if system = S_macosx then I.nop (); (* PR#4690 *)
1034  ()
1035
1036let end_assembly() =
1037  if !float_constants <> [] then begin
1038    D.data ();
1039    List.iter (fun (cst,lbl) -> emit_float_constant cst lbl) !float_constants
1040  end;
1041
1042  D.text ();
1043  if system = S_macosx then I.nop ();
1044  (* suppress "ld warning: atom sorting error" *)
1045
1046  emit_global_label "code_end";
1047
1048  D.data ();
1049  emit_global_label "data_end";
1050  D.long (const 0);
1051
1052  emit_global_label "frametable";
1053
1054  emit_frames
1055    { efa_code_label = (fun l -> D.long (ConstLabel (emit_label l)));
1056      efa_data_label = (fun l -> D.long (ConstLabel (emit_label l)));
1057      efa_16 = (fun n -> D.word (const n));
1058      efa_32 = (fun n -> D.long (const_32 n));
1059      efa_word = (fun n -> D.long (const n));
1060      efa_align = D.align;
1061      efa_label_rel = (fun lbl ofs ->
1062          D.long (ConstAdd (
1063              ConstSub(ConstLabel(emit_label lbl),
1064                       ConstThis),
1065              const_32 ofs)));
1066      efa_def_label = (fun l -> _label (emit_label l));
1067      efa_string = (fun s -> D.bytes (s ^ "\000"))
1068    };
1069
1070  if system = S_macosx then emit_external_symbols ();
1071  if system = S_linux_elf then
1072    (* Mark stack as non-executable, PR#4564 *)
1073    D.section [".note.GNU-stack"] (Some "") ["%progbits"];
1074
1075  if system = S_win32 then begin
1076    D.comment "External functions";
1077    StringSet.iter
1078      (fun s ->
1079         if not (StringSet.mem s !symbols_defined) then
1080           D.extrn (emit_symbol s) PROC)
1081      !symbols_used;
1082    symbols_used := StringSet.empty;
1083    symbols_defined := StringSet.empty;
1084  end;
1085
1086  let asm =
1087    if !Emitaux.create_asm_file then
1088      Some
1089        (
1090         (if X86_proc.masm then X86_masm.generate_asm
1091          else X86_gas.generate_asm) !Emitaux.output_channel
1092        )
1093    else
1094      None
1095  in
1096  X86_proc.generate_code asm
1097