1#2 "asmcomp/i386/emit.mlp" 2(**************************************************************************) 3(* *) 4(* OCaml *) 5(* *) 6(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) 7(* *) 8(* Copyright 1996 Institut National de Recherche en Informatique et *) 9(* en Automatique. *) 10(* *) 11(* All rights reserved. This file is distributed under the terms of *) 12(* the GNU Lesser General Public License version 2.1, with the *) 13(* special exception on linking described in the file LICENSE. *) 14(* *) 15(**************************************************************************) 16 17(* Emission of Intel 386 assembly code *) 18 19open Misc 20open Cmm 21open Arch 22open Proc 23open Reg 24open Mach 25open Linearize 26open Emitaux 27 28open X86_ast 29open X86_proc 30open X86_dsl 31 32let _label s = D.label ~typ:DWORD s 33 34let mem_sym typ ?(ofs = 0) sym = 35 mem32 typ ~scale:0 ?base:None ~sym ofs RAX (*ignored since scale=0*) 36 37(* CFI directives *) 38 39let cfi_startproc () = 40 if Config.asm_cfi_supported then D.cfi_startproc () 41 42let cfi_endproc () = 43 if Config.asm_cfi_supported then D.cfi_endproc () 44 45let cfi_adjust_cfa_offset n = 46 if Config.asm_cfi_supported then D.cfi_adjust_cfa_offset n 47 48let emit_debug_info dbg = 49 emit_debug_info_gen dbg D.file D.loc 50 51(* Tradeoff between code size and code speed *) 52 53let fastcode_flag = ref true 54 55let stack_offset = ref 0 56 57(* Layout of the stack frame *) 58 59let frame_size () = (* includes return address *) 60 let sz = 61 !stack_offset + 4 * num_stack_slots.(0) + 8 * num_stack_slots.(1) + 4 62 in Misc.align sz stack_alignment 63 64let slot_offset loc cl = 65 match loc with 66 | Incoming n -> 67 assert (n >= 0); 68 frame_size() + n 69 | Local n -> 70 if cl = 0 71 then !stack_offset + n * 4 72 else !stack_offset + num_stack_slots.(0) * 4 + n * 8 73 | Outgoing n -> 74 assert (n >= 0); 75 n 76 77(* Record symbols used and defined - at the end generate extern for those 78 used but not defined *) 79 80let symbols_defined = ref StringSet.empty 81let symbols_used = ref StringSet.empty 82 83let add_def_symbol s = symbols_defined := StringSet.add s !symbols_defined 84let add_used_symbol s = symbols_used := StringSet.add s !symbols_used 85 86let trap_frame_size = Misc.align 8 stack_alignment 87 88(* Prefixing of symbols with "_" *) 89 90let symbol_prefix = 91 match system with 92 | S_linux_elf -> "" 93 | S_bsd_elf -> "" 94 | S_solaris -> "" 95 | S_beos -> "" 96 | S_gnu -> "" 97 | _ -> "_" (* win32 & others *) 98 99let emit_symbol s = string_of_symbol symbol_prefix s 100 101let immsym s = sym (emit_symbol s) 102 103let emit_call s = I.call (immsym s) 104 105(* Output a label *) 106 107let label_prefix = 108 match system with 109 | S_linux_elf -> ".L" 110 | S_bsd_elf -> ".L" 111 | S_solaris -> ".L" 112 | S_beos -> ".L" 113 | S_gnu -> ".L" 114 | _ -> "L" 115 116let emit_label lbl = 117 Printf.sprintf "%s%d" label_prefix lbl 118 119let label s = sym (emit_label s) 120 121let def_label s = D.label (emit_label s) 122 123let emit_Llabel fallthrough lbl = 124 if not fallthrough && !fastcode_flag then D.align 16 ; 125 def_label lbl 126 127(* Output a pseudo-register *) 128 129let int_reg_name = [| RAX; RBX; RCX; RDX; RSI; RDI; RBP |] 130 131let float_reg_name = [| TOS |] 132 133let register_name r = 134 if r < 100 then Reg32 (int_reg_name.(r)) 135 else Regf (float_reg_name.(r - 100)) 136 137let sym32 ?ofs s = mem_sym ?ofs DWORD (emit_symbol s) 138 139let reg = function 140 | { loc = Reg r } -> register_name r 141 | { loc = Stack(Incoming n | Outgoing n) } when n < 0 -> 142 sym32 "caml_extra_params" ~ofs:(n + 64) 143 | { loc = Stack s; typ = Float } as r -> 144 let ofs = slot_offset s (register_class r) in 145 mem32 REAL8 ofs RSP 146 | { loc = Stack s } as r -> 147 let ofs = slot_offset s (register_class r) in 148 mem32 DWORD ofs RSP 149 | { loc = Unknown } -> 150 fatal_error "Emit_i386.reg" 151 152(* Output a reference to the lower 8 bits or lower 16 bits of a register *) 153 154let reg_low_8_name = Array.map (fun r -> Reg8L r) int_reg_name 155let reg_low_16_name = Array.map (fun r -> Reg16 r) int_reg_name 156 157let reg8 r = 158 match r.loc with 159 | Reg r when r < 4 -> reg_low_8_name.(r) 160 | _ -> fatal_error "Emit_i386.reg8" 161 162let reg16 r = 163 match r.loc with 164 | Reg r when r < 7 -> reg_low_16_name.(r) 165 | _ -> fatal_error "Emit_i386.reg16" 166 167let reg32 = function 168 | { loc = Reg.Reg r } -> int_reg_name.(r) 169 | _ -> assert false 170 171let arg32 i n = reg32 i.arg.(n) 172 173(* Output an addressing mode *) 174 175let addressing addr typ i n = 176 match addr with 177 | Ibased(s, ofs) -> 178 add_used_symbol s; 179 mem_sym typ (emit_symbol s) ~ofs 180 | Iindexed d -> 181 mem32 typ d (arg32 i n) 182 | Iindexed2 d -> 183 mem32 typ ~base:(arg32 i n) d (arg32 i (n+1)) 184 | Iscaled(2, d) -> 185 mem32 typ ~base:(arg32 i n) d (arg32 i n) 186 | Iscaled(scale, d) -> 187 mem32 typ ~scale d (arg32 i n) 188 | Iindexed2scaled(scale, d) -> 189 mem32 typ ~scale ~base:(arg32 i n) d (arg32 i (n+1)) 190 191(* Record live pointers at call points *) 192 193let record_frame_label ?label live raise_ dbg = 194 let lbl = 195 match label with 196 | None -> new_label() 197 | Some label -> label 198 in 199 let live_offset = ref [] in 200 Reg.Set.iter 201 (function 202 | {typ = Val; loc = Reg r} -> 203 live_offset := ((r lsl 1) + 1) :: !live_offset 204 | {typ = Val; loc = Stack s} as reg -> 205 live_offset := slot_offset s (register_class reg) :: !live_offset 206 | {typ = Addr} as r -> 207 Misc.fatal_error ("bad GC root " ^ Reg.name r) 208 | _ -> ()) 209 live; 210 record_frame_descr ~label:lbl ~frame_size:(frame_size()) 211 ~live_offset:!live_offset ~raise_frame:raise_ dbg; 212 lbl 213 214let record_frame ?label live raise_ dbg = 215 let lbl = record_frame_label ?label live raise_ dbg in 216 def_label lbl 217 218(* Record calls to the GC -- we've moved them out of the way *) 219 220type gc_call = 221 { gc_lbl: label; (* Entry label *) 222 gc_return_lbl: label; (* Where to branch after GC *) 223 gc_frame: label } (* Label of frame descriptor *) 224 225let call_gc_sites = ref ([] : gc_call list) 226 227let emit_call_gc gc = 228 def_label gc.gc_lbl; 229 emit_call "caml_call_gc"; 230 def_label gc.gc_frame; 231 I.jmp (label gc.gc_return_lbl) 232 233(* Record calls to caml_ml_array_bound_error. 234 In -g mode, we maintain one call to caml_ml_array_bound_error 235 per bound check site. Without -g, we can share a single call. *) 236 237type bound_error_call = 238 { bd_lbl: label; (* Entry label *) 239 bd_frame: label } (* Label of frame descriptor *) 240 241let bound_error_sites = ref ([] : bound_error_call list) 242let bound_error_call = ref 0 243 244let bound_error_label ?label dbg = 245 if !Clflags.debug then begin 246 let lbl_bound_error = new_label() in 247 let lbl_frame = record_frame_label ?label Reg.Set.empty false dbg in 248 bound_error_sites := 249 { bd_lbl = lbl_bound_error; bd_frame = lbl_frame } :: !bound_error_sites; 250 lbl_bound_error 251 end else begin 252 if !bound_error_call = 0 then bound_error_call := new_label(); 253 !bound_error_call 254 end 255 256let emit_call_bound_error bd = 257 def_label bd.bd_lbl; 258 emit_call "caml_ml_array_bound_error"; 259 def_label bd.bd_frame 260 261let emit_call_bound_errors () = 262 List.iter emit_call_bound_error !bound_error_sites; 263 if !bound_error_call > 0 then begin 264 def_label !bound_error_call; 265 emit_call "caml_ml_array_bound_error" 266 end 267 268(* Names for instructions *) 269 270let instr_for_intop = function 271 | Iadd -> I.add 272 | Isub -> I.sub 273 | Imul -> (fun arg1 arg2 -> I.imul arg1 (Some arg2)) 274 | Iand -> I.and_ 275 | Ior -> I.or_ 276 | Ixor -> I.xor 277 | Ilsl -> I.sal 278 | Ilsr -> I.shr 279 | Iasr -> I.sar 280 | _ -> fatal_error "Emit_i386: instr_for_intop" 281 282let unary_instr_for_floatop = function 283 | Inegf -> I.fchs () 284 | Iabsf -> I.fabs () 285 | _ -> fatal_error "Emit_i386: unary_instr_for_floatop" 286 287let instr_for_floatop = function 288 | Iaddf -> I.fadd 289 | Isubf -> I.fsub 290 | Imulf -> I.fmul 291 | Idivf -> I.fdiv 292 | Ispecific Isubfrev -> I.fsubr 293 | Ispecific Idivfrev -> I.fdivr 294 | _ -> fatal_error "Emit_i386: instr_for_floatop" 295 296let instr_for_floatop_reversed = function 297 | Iaddf -> I.fadd 298 | Isubf -> I.fsubr 299 | Imulf -> I.fmul 300 | Idivf -> I.fdivr 301 | Ispecific Isubfrev -> I.fsub 302 | Ispecific Idivfrev -> I.fdiv 303 | _ -> fatal_error "Emit_i386: instr_for_floatop_reversed" 304 305 306let instr_for_floatop_reversed_pop = function 307 | Iaddf -> I.faddp 308 | Isubf -> I.fsubrp 309 | Imulf -> I.fmulp 310 | Idivf -> I.fdivrp 311 | Ispecific Isubfrev -> I.fsubp 312 | Ispecific Idivfrev -> I.fdivp 313 | _ -> fatal_error "Emit_i386: instr_for_floatop_reversed_pop" 314 315let instr_for_floatarithmem = function 316 | Ifloatadd -> I.fadd 317 | Ifloatsub -> I.fsub 318 | Ifloatsubrev -> I.fsubr 319 | Ifloatmul -> I.fmul 320 | Ifloatdiv -> I.fdiv 321 | Ifloatdivrev -> I.fdivr 322 323let cond = function 324 | Isigned Ceq -> E | Isigned Cne -> NE 325 | Isigned Cle -> LE | Isigned Cgt -> G 326 | Isigned Clt -> L | Isigned Cge -> GE 327 | Iunsigned Ceq -> E | Iunsigned Cne -> NE 328 | Iunsigned Cle -> BE | Iunsigned Cgt -> A 329 | Iunsigned Clt -> B | Iunsigned Cge -> AE 330 331(* Output an = 0 or <> 0 test. *) 332 333let output_test_zero arg = 334 match arg.loc with 335 | Reg.Reg _ -> I.test (reg arg) (reg arg) 336 | _ -> I.cmp (int 0) (reg arg) 337 338(* Deallocate the stack frame before a return or tail call *) 339 340let output_epilogue f = 341 let n = frame_size() - 4 in 342 if n > 0 then 343 begin 344 I.add (int n) esp; 345 cfi_adjust_cfa_offset (-n); 346 f (); 347 (* reset CFA back cause function body may continue *) 348 cfi_adjust_cfa_offset n 349 end 350 else 351 f () 352 353(* Determine if the given register is the top of the floating-point stack *) 354 355let is_tos = function { loc = Reg _; typ = Float } -> true | _ -> false 356 357(* Emit the code for a floating-point comparison *) 358 359let emit_float_test cmp neg arg lbl = 360 let actual_cmp = 361 match (is_tos arg.(0), is_tos arg.(1)) with 362 | (true, true) -> 363 (* both args on top of FP stack *) 364 I.fcompp (); 365 cmp 366 | (true, false) -> 367 (* first arg on top of FP stack *) 368 I.fcomp (reg arg.(1)); 369 cmp 370 | (false, true) -> 371 (* second arg on top of FP stack *) 372 I.fcomp (reg arg.(0)); 373 Cmm.swap_comparison cmp 374 | (false, false) -> 375 I.fld (reg arg.(0)); 376 I.fcomp (reg arg.(1)); 377 cmp 378 in 379 I.fnstsw ax; 380 match actual_cmp with 381 | Ceq -> 382 if neg then begin 383 I.and_ (int 68) ah; 384 I.xor (int 64) ah; 385 I.jne lbl 386 end else begin 387 I.and_ (int 69) ah; 388 I.cmp (int 64) ah; 389 I.je lbl 390 end 391 | Cne -> 392 if neg then begin 393 I.and_ (int 69) ah; 394 I.cmp (int 64) ah; 395 I.je lbl 396 end else begin 397 I.and_ (int 68) ah; 398 I.xor (int 64) ah; 399 I.jne lbl 400 end 401 | Cle -> 402 I.and_ (int 69) ah; 403 I.dec ah; 404 I.cmp (int 64) ah; 405 if neg 406 then I.jae lbl 407 else I.jb lbl 408 | Cge -> 409 I.and_ (int 5) ah; 410 if neg 411 then I.jne lbl 412 else I.je lbl 413 | Clt -> 414 I.and_ (int 69) ah; 415 I.cmp (int 1) ah; 416 if neg 417 then I.jne lbl 418 else I.je lbl 419 | Cgt -> 420 I.and_ (int 69) ah; 421 if neg 422 then I.jne lbl 423 else I.je lbl 424 425(* Emit a Ifloatspecial instruction *) 426 427let emit_floatspecial = function 428 | "atan" -> I.fld1 (); I.fpatan () 429 | "atan2" -> I.fpatan () 430 | "cos" -> I.fcos () 431 | "log" -> I.fldln2 (); I.fxch st1; I.fyl2x () 432 | "log10" -> I.fldlg2 (); I.fxch st1; I.fyl2x () 433 | "sin" -> I.fsin () 434 | "sqrt" -> I.fsqrt () 435 | "tan" -> I.fptan (); I.fstp st0 436 | _ -> assert false 437 438(* Floating-point constants *) 439 440let float_constants = ref ([] : (int64 * int) list) 441 442let add_float_constant cst = 443 try 444 List.assoc cst !float_constants 445 with 446 Not_found -> 447 let lbl = new_label() in 448 float_constants := (cst, lbl) :: !float_constants; 449 lbl 450 451let emit_float64_split_directive x = 452 let lo = Int64.logand x 0xFFFF_FFFFL 453 and hi = Int64.shift_right_logical x 32 in 454 D.long (Const (if Arch.big_endian then hi else lo)); 455 D.long (Const (if Arch.big_endian then lo else hi)) 456 457let emit_float_constant cst lbl = 458 _label (emit_label lbl); 459 emit_float64_split_directive cst 460 461let emit_global_label s = 462 let lbl = Compilenv.make_symbol (Some s) in 463 add_def_symbol lbl; 464 let lbl = emit_symbol lbl in 465 D.global lbl; 466 _label lbl 467 468(* Output the assembly code for an instruction *) 469 470(* Name of current function *) 471let function_name = ref "" 472(* Entry point for tail recursive calls *) 473let tailrec_entry_point = ref 0 474(* Record references to external C functions (for MacOSX) *) 475let external_symbols_direct = ref StringSet.empty 476let external_symbols_indirect = ref StringSet.empty 477 478let emit_instr fallthrough i = 479 emit_debug_info i.dbg; 480 match i.desc with 481 | Lend -> () 482 | Lop(Imove | Ispill | Ireload) -> 483 let src = i.arg.(0) and dst = i.res.(0) in 484 if src.loc <> dst.loc then begin 485 if src.typ = Float then 486 if is_tos src then 487 I.fstp (reg dst) 488 else if is_tos dst then 489 I.fld (reg src) 490 else begin 491 I.fld (reg src); 492 I.fstp (reg dst) 493 end 494 else 495 I.mov (reg src) (reg dst) 496 end 497 | Lop(Iconst_int n) -> 498 if n = 0n then begin 499 match i.res.(0).loc with 500 | Reg _ -> I.xor (reg i.res.(0)) (reg i.res.(0)) 501 | _ -> I.mov (int 0) (reg i.res.(0)) 502 end else 503 I.mov (nat n) (reg i.res.(0)) 504 | Lop(Iconst_float f) -> 505 begin match f with 506 | 0x0000_0000_0000_0000L -> (* +0.0 *) 507 I.fldz () 508 | 0x8000_0000_0000_0000L -> (* -0.0 *) 509 I.fldz (); I.fchs () 510 | 0x3FF0_0000_0000_0000L -> (* 1.0 *) 511 I.fld1 () 512 | 0xBFF0_0000_0000_0000L -> (* -1.0 *) 513 I.fld1 (); I.fchs () 514 | _ -> 515 let lbl = add_float_constant f in 516 I.fld (mem_sym REAL8 (emit_label lbl)) 517 end 518 | Lop(Iconst_symbol s) -> 519 add_used_symbol s; 520 I.mov (immsym s) (reg i.res.(0)) 521 | Lop(Icall_ind { label_after; }) -> 522 I.call (reg i.arg.(0)); 523 record_frame i.live false i.dbg ~label:label_after 524 | Lop(Icall_imm { func; label_after; }) -> 525 add_used_symbol func; 526 emit_call func; 527 record_frame i.live false i.dbg ~label:label_after 528 | Lop(Itailcall_ind { label_after = _; }) -> 529 output_epilogue begin fun () -> 530 I.jmp (reg i.arg.(0)) 531 end 532 | Lop(Itailcall_imm { func; label_after = _; }) -> 533 if func = !function_name then 534 I.jmp (label !tailrec_entry_point) 535 else begin 536 output_epilogue begin fun () -> 537 add_used_symbol func; 538 I.jmp (immsym func) 539 end 540 end 541 | Lop(Iextcall { func; alloc; label_after; }) -> 542 add_used_symbol func; 543 if alloc then begin 544 if system <> S_macosx then 545 I.mov (immsym func) eax 546 else begin 547 external_symbols_indirect := 548 StringSet.add func !external_symbols_indirect; 549 I.mov (mem_sym DWORD (Printf.sprintf "L%s$non_lazy_ptr" 550 (emit_symbol func))) eax 551 end; 552 emit_call "caml_c_call"; 553 record_frame i.live false i.dbg ~label:label_after 554 end else begin 555 if system <> S_macosx then 556 emit_call func 557 else begin 558 external_symbols_direct := 559 StringSet.add func !external_symbols_direct; 560 I.call (sym (Printf.sprintf "L%s$stub" (emit_symbol func))) 561 end 562 end 563 | Lop(Istackoffset n) -> 564 if n < 0 565 then I.add (int (-n)) esp 566 else I.sub (int n) esp; 567 cfi_adjust_cfa_offset n; 568 stack_offset := !stack_offset + n 569 | Lop(Iload(chunk, addr)) -> 570 let dest = i.res.(0) in 571 begin match chunk with 572 | Word_int | Word_val | Thirtytwo_signed | Thirtytwo_unsigned -> 573 I.mov (addressing addr DWORD i 0) (reg dest) 574 | Byte_unsigned -> 575 I.movzx (addressing addr BYTE i 0) (reg dest) 576 | Byte_signed -> 577 I.movsx (addressing addr BYTE i 0) (reg dest) 578 | Sixteen_unsigned -> 579 I.movzx (addressing addr WORD i 0) (reg dest) 580 | Sixteen_signed -> 581 I.movsx (addressing addr WORD i 0) (reg dest) 582 | Single -> 583 I.fld (addressing addr REAL4 i 0) 584 | Double | Double_u -> 585 I.fld (addressing addr REAL8 i 0) 586 end 587 | Lop(Istore(chunk, addr, _)) -> 588 begin match chunk with 589 | Word_int | Word_val | Thirtytwo_signed | Thirtytwo_unsigned -> 590 I.mov (reg i.arg.(0)) (addressing addr DWORD i 1) 591 | Byte_unsigned | Byte_signed -> 592 I.mov (reg8 i.arg.(0)) (addressing addr BYTE i 1) 593 | Sixteen_unsigned | Sixteen_signed -> 594 I.mov (reg16 i.arg.(0)) (addressing addr WORD i 1) 595 | Single -> 596 if is_tos i.arg.(0) then 597 I.fstp (addressing addr REAL4 i 1) 598 else begin 599 I.fld (reg i.arg.(0)); 600 I.fstp (addressing addr REAL4 i 1) 601 end 602 | Double | Double_u -> 603 if is_tos i.arg.(0) then 604 I.fstp (addressing addr REAL8 i 1) 605 else begin 606 I.fld (reg i.arg.(0)); 607 I.fstp (addressing addr REAL8 i 1) 608 end 609 end 610 | Lop(Ialloc { words = n; label_after_call_gc; }) -> 611 if !fastcode_flag then begin 612 let lbl_redo = new_label() in 613 def_label lbl_redo; 614 I.mov (sym32 "caml_young_ptr") eax; 615 I.sub (int n) eax; 616 I.mov eax (sym32 "caml_young_ptr"); 617 I.cmp (sym32 "caml_young_limit") eax; 618 let lbl_call_gc = new_label() in 619 let lbl_frame = record_frame_label i.live false Debuginfo.none in 620 I.jb (label lbl_call_gc); 621 I.lea (mem32 NONE 4 RAX) (reg i.res.(0)); 622 call_gc_sites := 623 { gc_lbl = lbl_call_gc; 624 gc_return_lbl = lbl_redo; 625 gc_frame = lbl_frame } :: !call_gc_sites 626 end else begin 627 begin match n with 628 8 -> emit_call "caml_alloc1" 629 | 12 -> emit_call "caml_alloc2" 630 | 16 -> emit_call "caml_alloc3" 631 | _ -> 632 I.mov (int n) eax; 633 emit_call "caml_allocN" 634 end; 635 let label = 636 record_frame_label ?label:label_after_call_gc i.live false 637 Debuginfo.none 638 in 639 def_label label; 640 I.lea (mem32 NONE 4 RAX) (reg i.res.(0)) 641 end 642 | Lop(Iintop(Icomp cmp)) -> 643 I.cmp (reg i.arg.(1)) (reg i.arg.(0)); 644 I.set (cond cmp) al; 645 I.movzx al (reg i.res.(0)); 646 | Lop(Iintop_imm(Icomp cmp, n)) -> 647 I.cmp (int n) (reg i.arg.(0)); 648 I.set (cond cmp) al; 649 I.movzx al (reg i.res.(0)) 650 | Lop(Iintop (Icheckbound { label_after_error; } )) -> 651 let lbl = bound_error_label ?label:label_after_error i.dbg in 652 I.cmp (reg i.arg.(1)) (reg i.arg.(0)); 653 I.jbe (label lbl) 654 | Lop(Iintop_imm(Icheckbound { label_after_error; }, n)) -> 655 let lbl = bound_error_label ?label:label_after_error i.dbg in 656 I.cmp (int n) (reg i.arg.(0)); 657 I.jbe (label lbl) 658 | Lop(Iintop(Idiv | Imod)) -> 659 I.cdq (); 660 I.idiv (reg i.arg.(1)) 661 | Lop(Iintop(Ilsl | Ilsr | Iasr as op)) -> 662 (* We have i.arg.(0) = i.res.(0) and i.arg.(1) = %ecx *) 663 instr_for_intop op cl (reg i.res.(0)) 664 | Lop(Iintop Imulh) -> 665 I.imul (reg i.arg.(1)) None 666 | Lop(Iintop op) -> 667 (* We have i.arg.(0) = i.res.(0) *) 668 instr_for_intop op (reg i.arg.(1)) (reg i.res.(0)) 669 | Lop(Iintop_imm(Iadd, n)) when i.arg.(0).loc <> i.res.(0).loc -> 670 I.lea (mem32 NONE n (reg32 i.arg.(0))) (reg i.res.(0)) 671 | Lop(Iintop_imm(Iadd, 1) | Iintop_imm(Isub, -1)) -> 672 I.inc (reg i.res.(0)) 673 | Lop(Iintop_imm(Iadd, -1) | Iintop_imm(Isub, 1)) -> 674 I.dec (reg i.res.(0)) 675 | Lop(Iintop_imm(op, n)) -> 676 (* We have i.arg.(0) = i.res.(0) *) 677 instr_for_intop op (int n) (reg i.res.(0)) 678 | Lop(Inegf | Iabsf as floatop) -> 679 if not (is_tos i.arg.(0)) then 680 I.fld (reg i.arg.(0)); 681 unary_instr_for_floatop floatop 682 | Lop(Iaddf | Isubf | Imulf | Idivf | Ispecific(Isubfrev | Idivfrev) 683 as floatop) -> 684 begin match (is_tos i.arg.(0), is_tos i.arg.(1)) with 685 (true, true) -> 686 (* both operands on top of FP stack *) 687 instr_for_floatop_reversed_pop floatop st0 st1 688 | (true, false) -> 689 (* first operand on stack *) 690 instr_for_floatop floatop (reg i.arg.(1)) 691 | (false, true) -> 692 (* second operand on stack *) 693 instr_for_floatop_reversed floatop (reg i.arg.(0)) 694 | (false, false) -> 695 (* both operands in memory *) 696 I.fld (reg i.arg.(0)); 697 instr_for_floatop floatop (reg i.arg.(1)) 698 end 699 | Lop(Ifloatofint) -> 700 begin match i.arg.(0).loc with 701 | Stack _ -> 702 I.fild (reg i.arg.(0)) 703 | _ -> 704 I.push (reg i.arg.(0)); 705 I.fild (mem32 DWORD 0 RSP); 706 I.add (int 4) esp 707 end 708 | Lop(Iintoffloat) -> 709 if not (is_tos i.arg.(0)) then 710 I.fld (reg i.arg.(0)); 711 stack_offset := !stack_offset - 8; 712 I.sub (int 8) esp; 713 cfi_adjust_cfa_offset 8; 714 I.fnstcw (mem32 NONE 4 RSP); 715 I.mov (mem32 WORD 4 RSP) ax; 716 I.mov (int 12) ah; 717 I.mov ax (mem32 WORD 0 RSP); 718 I.fldcw (mem32 NONE 0 RSP); 719 begin match i.res.(0).loc with 720 | Stack _ -> 721 I.fistp (reg i.res.(0)) 722 | _ -> 723 I.fistp (mem32 DWORD 0 RSP); 724 I.mov (mem32 DWORD 0 RSP) (reg i.res.(0)) 725 end; 726 I.fldcw (mem32 NONE 4 RSP); 727 I.add (int 8) esp; 728 cfi_adjust_cfa_offset (-8); 729 stack_offset := !stack_offset + 8 730 | Lop(Ispecific(Ilea addr)) -> 731 I.lea (addressing addr DWORD i 0) (reg i.res.(0)) 732 | Lop(Ispecific(Istore_int(n, addr, _))) -> 733 I.mov (nat n) (addressing addr DWORD i 0) 734 | Lop(Ispecific(Istore_symbol(s, addr, _))) -> 735 add_used_symbol s; 736 I.mov (immsym s) (addressing addr DWORD i 0) 737 | Lop(Ispecific(Ioffset_loc(n, addr))) -> 738 I.add (int n) (addressing addr DWORD i 0) 739 | Lop(Ispecific(Ipush)) -> 740 (* Push arguments in reverse order *) 741 for n = Array.length i.arg - 1 downto 0 do 742 let r = i.arg.(n) in 743 match r with 744 {loc = Reg _; typ = Float} -> 745 I.sub (int 8) esp; 746 cfi_adjust_cfa_offset 8; 747 I.fstp (mem32 REAL8 0 RSP); 748 stack_offset := !stack_offset + 8 749 | {loc = Stack sl; typ = Float} -> 750 let ofs = slot_offset sl 1 in 751 (* Use x87 stack to move from stack to stack, 752 instead of two 32-bit push instructions, 753 which could kill performance on modern CPUs (see #6979). 754 *) 755 I.fld (mem32 REAL8 ofs RSP); 756 I.sub (int 8) esp; 757 cfi_adjust_cfa_offset 8; 758 I.fstp (mem32 REAL8 0 RSP); 759 stack_offset := !stack_offset + 8 760 | _ -> 761 I.push (reg r); 762 cfi_adjust_cfa_offset 4; 763 stack_offset := !stack_offset + 4 764 done 765 | Lop(Ispecific(Ipush_int n)) -> 766 I.push (nat n); 767 cfi_adjust_cfa_offset 4; 768 stack_offset := !stack_offset + 4 769 | Lop(Ispecific(Ipush_symbol s)) -> 770 add_used_symbol s; 771 I.push (immsym s); 772 cfi_adjust_cfa_offset 4; 773 stack_offset := !stack_offset + 4 774 | Lop(Ispecific(Ipush_load addr)) -> 775 I.push (addressing addr DWORD i 0); 776 cfi_adjust_cfa_offset 4; 777 stack_offset := !stack_offset + 4 778 | Lop(Ispecific(Ipush_load_float addr)) -> 779 I.push (addressing (offset_addressing addr 4) DWORD i 0); 780 I.push (addressing addr DWORD i 0); 781 cfi_adjust_cfa_offset 8; 782 stack_offset := !stack_offset + 8 783 | Lop(Ispecific(Ifloatarithmem(double, op, addr))) -> 784 if not (is_tos i.arg.(0)) then 785 I.fld (reg i.arg.(0)); 786 instr_for_floatarithmem op 787 (addressing addr 788 (if double then REAL8 else REAL4) i 1) 789 | Lop(Ispecific(Ifloatspecial s)) -> 790 (* Push args on float stack if necessary *) 791 for k = 0 to Array.length i.arg - 1 do 792 if not (is_tos i.arg.(k)) then I.fld (reg i.arg.(k)) 793 done; 794 (* Fix-up for binary instrs whose args were swapped *) 795 if Array.length i.arg = 2 && is_tos i.arg.(1) then 796 I.fxch st1; 797 emit_floatspecial s 798 | Lreloadretaddr -> 799 () 800 | Lreturn -> 801 output_epilogue begin fun () -> 802 I.ret () 803 end 804 | Llabel lbl -> 805 emit_Llabel fallthrough lbl 806 | Lbranch lbl -> 807 I.jmp (label lbl) 808 | Lcondbranch(tst, lbl) -> 809 let lbl = label lbl in 810 begin match tst with 811 | Itruetest -> 812 output_test_zero i.arg.(0); 813 I.jne lbl; 814 | Ifalsetest -> 815 output_test_zero i.arg.(0); 816 I.je lbl 817 | Iinttest cmp -> 818 I.cmp (reg i.arg.(1)) (reg i.arg.(0)); 819 I.j (cond cmp) lbl 820 | Iinttest_imm((Isigned Ceq | Isigned Cne | 821 Iunsigned Ceq | Iunsigned Cne) as cmp, 0) -> 822 output_test_zero i.arg.(0); 823 I.j (cond cmp) lbl 824 | Iinttest_imm(cmp, n) -> 825 I.cmp (int n) (reg i.arg.(0)); 826 I.j (cond cmp) lbl 827 | Ifloattest(cmp, neg) -> 828 emit_float_test cmp neg i.arg lbl 829 | Ioddtest -> 830 I.test (int 1) (reg i.arg.(0)); 831 I.jne lbl 832 | Ieventest -> 833 I.test (int 1) (reg i.arg.(0)); 834 I.je lbl 835 end 836 | Lcondbranch3(lbl0, lbl1, lbl2) -> 837 I.cmp (int 1) (reg i.arg.(0)); 838 begin match lbl0 with 839 None -> () 840 | Some lbl -> I.jb (label lbl) 841 end; 842 begin match lbl1 with 843 None -> () 844 | Some lbl -> I.je (label lbl) 845 end; 846 begin match lbl2 with 847 None -> () 848 | Some lbl -> I.jg (label lbl) 849 end 850 | Lswitch jumptbl -> 851 let lbl = new_label() in 852 I.jmp (mem32 NONE 0 (reg32 i.arg.(0)) ~scale:4 ~sym:(emit_label lbl)); 853 D.data (); 854 _label (emit_label lbl); 855 for i = 0 to Array.length jumptbl - 1 do 856 D.long (ConstLabel (emit_label jumptbl.(i))) 857 done; 858 D.text () 859 | Lsetuptrap lbl -> 860 I.call (label lbl) 861 | Lpushtrap -> 862 if trap_frame_size > 8 then 863 I.sub (int (trap_frame_size - 8)) esp; 864 I.push (sym32 "caml_exception_pointer"); 865 cfi_adjust_cfa_offset trap_frame_size; 866 I.mov esp (sym32 "caml_exception_pointer"); 867 stack_offset := !stack_offset + trap_frame_size 868 | Lpoptrap -> 869 I.pop (sym32 "caml_exception_pointer"); 870 I.add (int (trap_frame_size - 4)) esp; 871 cfi_adjust_cfa_offset (-trap_frame_size); 872 stack_offset := !stack_offset - trap_frame_size 873 | Lraise k -> 874 begin match k with 875 | Cmm.Raise_withtrace -> 876 emit_call "caml_raise_exn"; 877 record_frame Reg.Set.empty true i.dbg 878 | Cmm.Raise_notrace -> 879 I.mov (sym32 "caml_exception_pointer") esp; 880 I.pop (sym32 "caml_exception_pointer"); 881 if trap_frame_size > 8 then 882 I.add (int (trap_frame_size - 8)) esp; 883 I.ret () 884 end 885 886let rec emit_all fallthrough i = 887 match i.desc with 888 | Lend -> () 889 | _ -> 890 emit_instr fallthrough i; 891 emit_all 892 (system = S_win32 || Linearize.has_fallthrough i.desc) 893 i.next 894 895(* Emission of external symbol references (for MacOSX) *) 896 897let emit_external_symbol_direct s = 898 _label (Printf.sprintf "L%s$stub" (emit_symbol s)); 899 D.indirect_symbol (emit_symbol s); 900 I.hlt (); I.hlt (); I.hlt (); I.hlt () ; I.hlt () 901 902let emit_external_symbol_indirect s = 903 _label (Printf.sprintf "L%s$non_lazy_ptr" (emit_symbol s)); 904 D.indirect_symbol (emit_symbol s); 905 D.long (const 0) 906 907let emit_external_symbols () = 908 D.section [ "__IMPORT"; "__pointers"] None ["non_lazy_symbol_pointers" ]; 909 StringSet.iter emit_external_symbol_indirect !external_symbols_indirect; 910 external_symbols_indirect := StringSet.empty; 911 D.section [ "__IMPORT"; "__jump_table"] None 912 [ "symbol_stubs"; "self_modifying_code+pure_instructions"; "5" ]; 913 StringSet.iter emit_external_symbol_direct !external_symbols_direct; 914 external_symbols_direct := StringSet.empty; 915 if !Clflags.gprofile then begin 916 _label "Lmcount$stub"; 917 D.indirect_symbol "mcount"; 918 I.hlt (); I.hlt (); I.hlt () ; I.hlt () ; I.hlt () 919 end 920 921(* Emission of the profiling prelude *) 922 923let call_mcount mcount = 924 I.push eax; 925 I.mov esp ebp; 926 I.push ecx; 927 I.push edx; 928 I.call (sym mcount); 929 I.pop edx; 930 I.pop ecx; 931 I.pop eax 932 933let emit_profile () = 934 match system with 935 | S_linux_elf | S_gnu -> call_mcount "mcount" 936 | S_bsd_elf -> call_mcount ".mcount" 937 | S_macosx -> call_mcount "Lmcount$stub" 938 | _ -> () (*unsupported yet*) 939 940(* Emission of a function declaration *) 941 942let fundecl fundecl = 943 function_name := fundecl.fun_name; 944 fastcode_flag := fundecl.fun_fast; 945 tailrec_entry_point := new_label(); 946 stack_offset := 0; 947 call_gc_sites := []; 948 bound_error_sites := []; 949 bound_error_call := 0; 950 D.text (); 951 add_def_symbol fundecl.fun_name; 952 D.align (if system = S_win32 then 4 else 16); 953 if system = S_macosx 954 && not !Clflags.output_c_object 955 && is_generic_function fundecl.fun_name 956 then (* PR#4690 *) 957 D.private_extern (emit_symbol fundecl.fun_name) 958 else 959 D.global (emit_symbol fundecl.fun_name); 960 D.label (emit_symbol fundecl.fun_name); 961 emit_debug_info fundecl.fun_dbg; 962 cfi_startproc (); 963 if !Clflags.gprofile then emit_profile(); 964 let n = frame_size() - 4 in 965 if n > 0 then begin 966 I.sub (int n) esp; 967 cfi_adjust_cfa_offset n; 968 end; 969 def_label !tailrec_entry_point; 970 emit_all true fundecl.fun_body; 971 List.iter emit_call_gc !call_gc_sites; 972 emit_call_bound_errors (); 973 cfi_endproc (); 974 begin match system with 975 | S_linux_elf | S_bsd_elf | S_gnu -> 976 D.type_ (emit_symbol fundecl.fun_name) "@function"; 977 D.size (emit_symbol fundecl.fun_name) 978 (ConstSub ( 979 ConstThis, 980 ConstLabel (emit_symbol fundecl.fun_name))) 981 | _ -> () 982 end 983 984 985(* Emission of data *) 986 987let emit_item = function 988 | Cglobal_symbol s -> D.global (emit_symbol s) 989 | Cdefine_symbol s -> add_def_symbol s; _label (emit_symbol s) 990 | Cint8 n -> D.byte (const n) 991 | Cint16 n -> D.word (const n) 992 | Cint32 n -> D.long (const_nat n) 993 | Cint n -> D.long (const_nat n) 994 | Csingle f -> D.long (Const (Int64.of_int32 (Int32.bits_of_float f))) 995 | Cdouble f -> emit_float64_split_directive (Int64.bits_of_float f) 996 | Csymbol_address s -> add_used_symbol s; D.long (ConstLabel (emit_symbol s)) 997 | Cstring s -> D.bytes s 998 | Cskip n -> if n > 0 then D.space n 999 | Calign n -> D.align n 1000 1001let data l = 1002 D.data (); 1003 List.iter emit_item l 1004 1005(* Beginning / end of an assembly file *) 1006 1007let begin_assembly() = 1008 X86_proc.reset_asm_code (); 1009 reset_debug_info(); (* PR#5603 *) 1010 float_constants := []; 1011 if system = S_win32 then begin 1012 D.mode386 (); 1013 D.model "FLAT"; 1014 D.extrn "_caml_young_ptr" DWORD; 1015 D.extrn "_caml_young_limit" DWORD; 1016 D.extrn "_caml_exception_pointer" DWORD; 1017 D.extrn "_caml_extra_params" DWORD; 1018 D.extrn "_caml_call_gc" PROC; 1019 D.extrn "_caml_c_call" PROC; 1020 D.extrn "_caml_allocN" PROC; 1021 D.extrn "_caml_alloc1" PROC; 1022 D.extrn "_caml_alloc2" PROC; 1023 D.extrn "_caml_alloc3" PROC; 1024 D.extrn "_caml_ml_array_bound_error" PROC; 1025 D.extrn "_caml_raise_exn" PROC; 1026 end; 1027 1028 D.data (); 1029 emit_global_label "data_begin"; 1030 1031 D.text (); 1032 emit_global_label "code_begin"; 1033 if system = S_macosx then I.nop (); (* PR#4690 *) 1034 () 1035 1036let end_assembly() = 1037 if !float_constants <> [] then begin 1038 D.data (); 1039 List.iter (fun (cst,lbl) -> emit_float_constant cst lbl) !float_constants 1040 end; 1041 1042 D.text (); 1043 if system = S_macosx then I.nop (); 1044 (* suppress "ld warning: atom sorting error" *) 1045 1046 emit_global_label "code_end"; 1047 1048 D.data (); 1049 emit_global_label "data_end"; 1050 D.long (const 0); 1051 1052 emit_global_label "frametable"; 1053 1054 emit_frames 1055 { efa_code_label = (fun l -> D.long (ConstLabel (emit_label l))); 1056 efa_data_label = (fun l -> D.long (ConstLabel (emit_label l))); 1057 efa_16 = (fun n -> D.word (const n)); 1058 efa_32 = (fun n -> D.long (const_32 n)); 1059 efa_word = (fun n -> D.long (const n)); 1060 efa_align = D.align; 1061 efa_label_rel = (fun lbl ofs -> 1062 D.long (ConstAdd ( 1063 ConstSub(ConstLabel(emit_label lbl), 1064 ConstThis), 1065 const_32 ofs))); 1066 efa_def_label = (fun l -> _label (emit_label l)); 1067 efa_string = (fun s -> D.bytes (s ^ "\000")) 1068 }; 1069 1070 if system = S_macosx then emit_external_symbols (); 1071 if system = S_linux_elf then 1072 (* Mark stack as non-executable, PR#4564 *) 1073 D.section [".note.GNU-stack"] (Some "") ["%progbits"]; 1074 1075 if system = S_win32 then begin 1076 D.comment "External functions"; 1077 StringSet.iter 1078 (fun s -> 1079 if not (StringSet.mem s !symbols_defined) then 1080 D.extrn (emit_symbol s) PROC) 1081 !symbols_used; 1082 symbols_used := StringSet.empty; 1083 symbols_defined := StringSet.empty; 1084 end; 1085 1086 let asm = 1087 if !Emitaux.create_asm_file then 1088 Some 1089 ( 1090 (if X86_proc.masm then X86_masm.generate_asm 1091 else X86_gas.generate_asm) !Emitaux.output_channel 1092 ) 1093 else 1094 None 1095 in 1096 X86_proc.generate_code asm 1097