1%%% -*- erlang-indent-level: 2 -*- 2%%% 3%%% Licensed under the Apache License, Version 2.0 (the "License"); 4%%% you may not use this file except in compliance with the License. 5%%% You may obtain a copy of the License at 6%%% 7%%% http://www.apache.org/licenses/LICENSE-2.0 8%%% 9%%% Unless required by applicable law or agreed to in writing, software 10%%% distributed under the License is distributed on an "AS IS" BASIS, 11%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12%%% See the License for the specific language governing permissions and 13%%% limitations under the License. 14%%% 15%%% HiPE/x86 assembler 16%%% 17%%% TODO: 18%%% - Simplify combine_label_maps and mk_data_relocs. 19 20-ifdef(HIPE_AMD64). 21-define(HIPE_X86_ASSEMBLE, hipe_amd64_assemble). 22-define(HIPE_X86_ENCODE, hipe_amd64_encode). 23-define(HIPE_X86_REGISTERS, hipe_amd64_registers). 24-define(HIPE_X86_PP, hipe_amd64_pp). 25-ifdef(AMD64_SIMULATE_NSP). 26-define(X86_SIMULATE_NSP, ?AMD64_SIMULATE_NSP). 27-endif. 28-define(EAX, rax). 29-define(REGArch, reg64). 30-define(RMArch, rm64). 31-define(EA_DISP32_ABSOLUTE, ea_disp32_sindex). 32-else. 33-define(HIPE_X86_ASSEMBLE, hipe_x86_assemble). 34-define(HIPE_X86_ENCODE, hipe_x86_encode). 35-define(HIPE_X86_REGISTERS, hipe_x86_registers). 36-define(HIPE_X86_PP, hipe_x86_pp). 37-define(EAX, eax). 38-define(REGArch, reg32). 39-define(RMArch, rm32). 40-define(EA_DISP32_ABSOLUTE, ea_disp32). 41-endif. 42 43-module(?HIPE_X86_ASSEMBLE). 44-export([assemble/4]). 45 46-define(DEBUG,true). 47 48-include("../main/hipe.hrl"). 49-include("../x86/hipe_x86.hrl"). 50-include("../../kernel/src/hipe_ext_format.hrl"). 51-include("../rtl/hipe_literals.hrl"). 52-include("../misc/hipe_sdi.hrl"). 53-undef(ASSERT). 54-define(ASSERT(G), if G -> [] ; true -> exit({assertion_failed,?MODULE,?LINE,??G}) end). 55 56assemble(CompiledCode, Closures, Exports, Options) -> 57 ?when_option(time, Options, ?start_timer("x86 assembler")), 58 print("****************** Assembling *******************\n", [], Options), 59 %% 60 Code = [{MFA, 61 hipe_x86:defun_code(Defun), 62 hipe_x86:defun_data(Defun)} 63 || {MFA, Defun} <- CompiledCode], 64 %% 65 {ConstAlign,ConstSize,ConstMap,RefsFromConsts} = 66 hipe_pack_constants:pack_constants(Code), 67 %% 68 {CodeSize,CodeBinary,AccRefs,LabelMap,ExportMap} = 69 encode(translate(Code, ConstMap, Options), Options), 70 print("Total num bytes=~w\n", [CodeSize], Options), 71 %% put(code_size, CodeSize), 72 %% put(const_size, ConstSize), 73 %% ?when_option(verbose, Options, 74 %% ?debug_msg("Constants are ~w bytes\n",[ConstSize])), 75 %% 76 SC = hipe_pack_constants:slim_constmap(ConstMap), 77 DataRelocs = hipe_pack_constants:mk_data_relocs(RefsFromConsts, LabelMap), 78 SSE = hipe_pack_constants:slim_sorted_exportmap(ExportMap,Closures,Exports), 79 SlimRefs = hipe_pack_constants:slim_refs(AccRefs), 80 Bin = term_to_binary([{?VERSION_STRING(),?HIPE_ERTS_CHECKSUM}, 81 ConstAlign, ConstSize, 82 SC, 83 DataRelocs, % nee LM, LabelMap 84 SSE, 85 CodeSize,CodeBinary,SlimRefs, 86 0,[] % ColdCodeSize, SlimColdRefs 87 ]), 88 %% 89 %% ?when_option(time, Options, ?stop_timer("x86 assembler")), 90 Bin. 91 92%%% 93%%% Assembly Pass 1. 94%%% Process initial {MFA,Code,Data} list. 95%%% Translate each MFA's body, choosing operand & instruction kinds. 96%%% 97%%% Assembly Pass 2. 98%%% Perform short/long form optimisation for jumps. 99%%% Build LabelMap for each MFA. 100%%% 101%%% Result is {MFA,NewCode,CodeSize,LabelMap} list. 102%%% 103 104translate(Code, ConstMap, Options) -> 105 translate_mfas(Code, ConstMap, [], Options). 106 107translate_mfas([{MFA,Insns,_Data}|Code], ConstMap, NewCode, Options) -> 108 {NewInsns,CodeSize,LabelMap} = 109 translate_insns(Insns, {MFA,ConstMap}, hipe_sdi:pass1_init(), 0, [], Options), 110 translate_mfas(Code, ConstMap, [{MFA,NewInsns,CodeSize,LabelMap}|NewCode], Options); 111translate_mfas([], _ConstMap, NewCode, _Options) -> 112 lists:reverse(NewCode). 113 114translate_insns([I|Insns], Context, SdiPass1, Address, NewInsns, Options) -> 115 NewIs = translate_insn(I, Context, Options), 116 add_insns(NewIs, Insns, Context, SdiPass1, Address, NewInsns, Options); 117translate_insns([], _Context, SdiPass1, Address, NewInsns, _Options) -> 118 {LabelMap,CodeSizeIncr} = hipe_sdi:pass2(SdiPass1), 119 {lists:reverse(NewInsns), Address+CodeSizeIncr, LabelMap}. 120 121add_insns([I|Is], Insns, Context, SdiPass1, Address, NewInsns, Options) -> 122 NewSdiPass1 = 123 case I of 124 {'.label',L,_} -> 125 hipe_sdi:pass1_add_label(SdiPass1, Address, L); 126 {jcc_sdi,{_,{label,L}},_} -> 127 SdiInfo = #sdi_info{incr=(6-2),lb=(-128)+2,ub=127+2}, 128 hipe_sdi:pass1_add_sdi(SdiPass1, Address, L, SdiInfo); 129 {jmp_sdi,{{label,L}},_} -> 130 SdiInfo = #sdi_info{incr=(5-2),lb=(-128)+2,ub=127+2}, 131 hipe_sdi:pass1_add_sdi(SdiPass1, Address, L, SdiInfo); 132 _ -> 133 SdiPass1 134 end, 135 Address1 = Address + insn_size(I), 136 add_insns(Is, Insns, Context, NewSdiPass1, Address1, [I|NewInsns], Options); 137add_insns([], Insns, Context, SdiPass1, Address, NewInsns, Options) -> 138 translate_insns(Insns, Context, SdiPass1, Address, NewInsns, Options). 139 140insn_size(I) -> 141 case I of 142 {'.label',_,_} -> 0; 143 {'.sdesc',_,_} -> 0; 144 {jcc_sdi,_,_} -> 2; 145 {jmp_sdi,_,_} -> 2; 146 {Op,Arg,_Orig} -> ?HIPE_X86_ENCODE:insn_sizeof(Op, Arg) 147 end. 148 149translate_insn(I, Context, Options) -> 150 case I of 151 #alu{aluop='xor', src=#x86_temp{reg=Reg}=Src, dst=#x86_temp{reg=Reg}=Dst} -> 152 [{'xor', {temp_to_reg32(Dst), temp_to_rm32(Src)}, I}]; 153 #alu{} -> 154 Arg = resolve_alu_args(hipe_x86:alu_src(I), hipe_x86:alu_dst(I), Context), 155 [{hipe_x86:alu_op(I), Arg, I}]; 156 #call{} -> 157 translate_call(I); 158 #cmovcc{} -> 159 {Dst,Src} = resolve_move_args( 160 hipe_x86:cmovcc_src(I), hipe_x86:cmovcc_dst(I), 161 Context), 162 CC = {cc,?HIPE_X86_ENCODE:cc(hipe_x86:cmovcc_cc(I))}, 163 Arg = {CC,Dst,Src}, 164 [{cmovcc, Arg, I}]; 165 #cmp{} -> 166 Arg = resolve_alu_args(hipe_x86:cmp_src(I), hipe_x86:cmp_dst(I), Context), 167 [{cmp, Arg, I}]; 168 #comment{} -> 169 []; 170 #fmove{} -> 171 {Op,Arg} = resolve_sse2_fmove_args(hipe_x86:fmove_src(I), 172 hipe_x86:fmove_dst(I)), 173 [{Op, Arg, I}]; 174 #fp_binop{} -> 175 case proplists:get_bool(x87, Options) of 176 true -> % x87 177 Arg = resolve_x87_binop_args(hipe_x86:fp_binop_src(I), 178 hipe_x86:fp_binop_dst(I)), 179 [{hipe_x86:fp_binop_op(I), Arg, I}]; 180 false -> % sse2 181 Arg = resolve_sse2_binop_args(hipe_x86:fp_binop_src(I), 182 hipe_x86:fp_binop_dst(I)), 183 [{resolve_sse2_op(hipe_x86:fp_binop_op(I)), Arg, I}] 184 end; 185 #fp_unop{} -> 186 case proplists:get_bool(x87, Options) of 187 true -> % x87 188 Arg = resolve_x87_unop_arg(hipe_x86:fp_unop_arg(I)), 189 [{hipe_x86:fp_unop_op(I), Arg, I}]; 190 false -> % sse2 191 case hipe_x86:fp_unop_op(I) of 192 'fchs' -> 193 Arg = resolve_sse2_fchs_arg(hipe_x86:fp_unop_arg(I)), 194 [{'xorpd', Arg, I}]; 195 'fwait' -> % no op on sse2, magic on x87 196 [] 197 end 198 end; 199 #imul{} -> 200 translate_imul(I, Context); 201 #jcc{} -> 202 Cc = {cc,?HIPE_X86_ENCODE:cc(hipe_x86:jcc_cc(I))}, 203 Label = translate_label(hipe_x86:jcc_label(I)), 204 [{jcc_sdi, {Cc,Label}, I}]; 205 #jmp_fun{} -> 206 %% call and jmp are patched the same, so no need to distinguish 207 %% call from tailcall 208 PatchTypeExt = 209 case hipe_x86:jmp_fun_linkage(I) of 210 remote -> ?CALL_REMOTE; 211 not_remote -> ?CALL_LOCAL 212 end, 213 Arg = translate_fun(hipe_x86:jmp_fun_fun(I), PatchTypeExt), 214 [{jmp, {Arg}, I}]; 215 #jmp_label{} -> 216 Arg = translate_label(hipe_x86:jmp_label_label(I)), 217 [{jmp_sdi, {Arg}, I}]; 218 #jmp_switch{} -> 219 RM32 = resolve_jmp_switch_arg(I, Context), 220 [{jmp, {RM32}, I}]; 221 #label{} -> 222 [{'.label', hipe_x86:label_label(I), I}]; 223 #lea{} -> 224 Arg = resolve_lea_args(hipe_x86:lea_mem(I), hipe_x86:lea_temp(I)), 225 [{lea, Arg, I}]; 226 #move{} -> 227 Arg = resolve_move_args(hipe_x86:move_src(I), hipe_x86:move_dst(I), 228 Context), 229 [{mov, Arg, I}]; 230 #move64{} -> 231 translate_move64(I, Context); 232 #movsx{} -> 233 Src = resolve_movx_src(hipe_x86:movsx_src(I)), 234 [{movsx, {temp_to_regArch(hipe_x86:movsx_dst(I)), Src}, I}]; 235 #movzx{} -> 236 Src = resolve_movx_src(hipe_x86:movzx_src(I)), 237 [{movzx, {temp_to_reg32(hipe_x86:movzx_dst(I)), Src}, I}]; 238 %% pseudo_call: eliminated before assembly 239 %% pseudo_jcc: eliminated before assembly 240 %% pseudo_tailcall: eliminated before assembly 241 %% pseudo_tailcall_prepare: eliminated before assembly 242 #pop{} -> 243 Arg = translate_dst(hipe_x86:pop_dst(I)), 244 [{pop, {Arg}, I}]; 245 #push{} -> 246 Arg = translate_src(hipe_x86:push_src(I), Context), 247 [{push, {Arg}, I}]; 248 #ret{} -> 249 translate_ret(I); 250 #shift{} -> 251 Arg = resolve_shift_args(hipe_x86:shift_src(I), hipe_x86:shift_dst(I), Context), 252 [{hipe_x86:shift_op(I), Arg, I}]; 253 #test{} -> 254 Arg = resolve_test_args(hipe_x86:test_src(I), hipe_x86:test_dst(I), Context), 255 [{test, Arg, I}] 256 end. 257 258-ifdef(X86_SIMULATE_NSP). 259-ifdef(HIPE_AMD64). 260translate_call(I) -> 261 WordSize = hipe_amd64_registers:wordsize(), 262 RegSP = 2#100, % esp/rsp 263 TempSP = hipe_x86:mk_temp(RegSP, untagged), 264 FunOrig = hipe_x86:call_fun(I), 265 Fun = 266 case FunOrig of 267 #x86_mem{base=#x86_temp{reg=4}, off=#x86_imm{value=Off}} -> 268 FunOrig#x86_mem{off=#x86_imm{value=Off+WordSize}}; 269 _ -> FunOrig 270 end, 271 RegRA = 272 begin 273 RegTemp0 = hipe_amd64_registers:temp0(), 274 RegTemp1 = hipe_amd64_registers:temp1(), 275 case Fun of 276 #x86_temp{reg=RegTemp0} -> RegTemp1; 277 #x86_mem{base=#x86_temp{reg=RegTemp0}} -> RegTemp1; 278 _ -> RegTemp0 279 end 280 end, 281 TempRA = hipe_x86:mk_temp(RegRA, untagged), 282 PatchTypeExt = 283 case hipe_x86:call_linkage(I) of 284 remote -> ?CALL_REMOTE; 285 not_remote -> ?CALL_LOCAL 286 end, 287 JmpArg = translate_fun(Fun, PatchTypeExt), 288 I4 = {'.sdesc', hipe_x86:call_sdesc(I), #comment{term=sdesc}}, 289 I3 = {jmp, {JmpArg}, #comment{term=call}}, 290 Size3 = hipe_amd64_encode:insn_sizeof(jmp, {JmpArg}), 291 MovArgs = {mem_to_rmArch(hipe_x86:mk_mem(TempSP, 292 hipe_x86:mk_imm(0), 293 untagged)), 294 temp_to_regArch(TempRA)}, 295 I2 = {mov, MovArgs, #comment{term=call}}, 296 Size2 = hipe_amd64_encode:insn_sizeof(mov, MovArgs), 297 I1 = {lea, {temp_to_regArch(TempRA), 298 {ea, hipe_amd64_encode:ea_disp32_rip(Size2+Size3)}}, 299 #comment{term=call}}, 300 I0 = {sub, {temp_to_rmArch(TempSP), {imm8,WordSize}}, I}, 301 [I0,I1,I2,I3,I4]. 302-else. 303translate_call(I) -> 304 WordSize = ?HIPE_X86_REGISTERS:wordsize(), 305 RegSP = 2#100, % esp/rsp 306 TempSP = hipe_x86:mk_temp(RegSP, untagged), 307 FunOrig = hipe_x86:call_fun(I), 308 Fun = 309 case FunOrig of 310 #x86_mem{base=#x86_temp{reg=4}, off=#x86_imm{value=Off}} -> 311 FunOrig#x86_mem{off=#x86_imm{value=Off+WordSize}}; 312 _ -> FunOrig 313 end, 314 PatchTypeExt = 315 case hipe_x86:call_linkage(I) of 316 remote -> ?CALL_REMOTE; 317 not_remote -> ?CALL_LOCAL 318 end, 319 JmpArg = translate_fun(Fun, PatchTypeExt), 320 I3 = {'.sdesc', hipe_x86:call_sdesc(I), #comment{term=sdesc}}, 321 I2 = {jmp, {JmpArg}, #comment{term=call}}, 322 Size2 = ?HIPE_X86_ENCODE:insn_sizeof(jmp, {JmpArg}), 323 I1 = {mov, {mem_to_rmArch(hipe_x86:mk_mem(TempSP, 324 hipe_x86:mk_imm(0), 325 untagged)), 326 {imm32,{?X86ABSPCREL,4+Size2}}}, 327 #comment{term=call}}, 328 I0 = {sub, {temp_to_rmArch(TempSP), {imm8,WordSize}}, I}, 329 [I0,I1,I2,I3]. 330-endif. 331 332translate_ret(I) -> 333 NPOP = hipe_x86:ret_npop(I) + ?HIPE_X86_REGISTERS:wordsize(), 334 RegSP = 2#100, % esp/rsp 335 TempSP = hipe_x86:mk_temp(RegSP, untagged), 336 RegRA = 2#011, % ebx/rbx 337 TempRA = hipe_x86:mk_temp(RegRA, untagged), 338 [{mov, 339 {temp_to_regArch(TempRA), 340 mem_to_rmArch(hipe_x86:mk_mem(TempSP, 341 hipe_x86:mk_imm(0), 342 untagged))}, 343 I}, 344 {add, 345 {temp_to_rmArch(TempSP), 346 case NPOP < 128 of 347 true -> {imm8,NPOP}; 348 false -> {imm32,NPOP} 349 end}, 350 #comment{term=ret}}, 351 {jmp, 352 {temp_to_rmArch(TempRA)}, 353 #comment{term=ret}}]. 354 355-else. % not X86_SIMULATE_NSP 356 357translate_call(I) -> 358 %% call and jmp are patched the same, so no need to distinguish 359 %% call from tailcall 360 PatchTypeExt = 361 case hipe_x86:call_linkage(I) of 362 remote -> ?CALL_REMOTE; 363 not_remote -> ?CALL_LOCAL 364 end, 365 Arg = translate_fun(hipe_x86:call_fun(I), PatchTypeExt), 366 SDesc = hipe_x86:call_sdesc(I), 367 [{call, {Arg}, I}, {'.sdesc', SDesc, #comment{term=sdesc}}]. 368 369translate_ret(I) -> 370 Arg = 371 case hipe_x86:ret_npop(I) of 372 0 -> {}; 373 N -> {{imm16,N}} 374 end, 375 [{ret, Arg, I}]. 376 377-endif. % X86_SIMULATE_NSP 378 379translate_imul(I, Context) -> 380 Temp = temp_to_regArch(hipe_x86:imul_temp(I)), 381 Src = temp_or_mem_to_rmArch(hipe_x86:imul_src(I)), 382 Args = 383 case hipe_x86:imul_imm_opt(I) of 384 [] -> {Temp,Src}; 385 Imm -> {Temp,Src,translate_imm(Imm, Context, true)} 386 end, 387 [{'imul', Args, I}]. 388 389temp_or_mem_to_rmArch(Src) -> 390 case Src of 391 #x86_temp{} -> temp_to_rmArch(Src); 392 #x86_mem{} -> mem_to_rmArch(Src) 393 end. 394 395translate_label(Label) when is_integer(Label) -> 396 {label,Label}. % symbolic, since offset is not yet computable 397 398translate_fun(Arg, PatchTypeExt) -> 399 case Arg of 400 #x86_temp{} -> 401 temp_to_rmArch(Arg); 402 #x86_mem{} -> 403 mem_to_rmArch(Arg); 404 #x86_mfa{m=M,f=F,a=A} -> 405 {rel32,{PatchTypeExt,{M,F,A}}}; 406 #x86_prim{prim=Prim} -> 407 {rel32,{PatchTypeExt,Prim}} 408 end. 409 410translate_src(Src, Context) -> 411 case Src of 412 #x86_imm{} -> 413 translate_imm(Src, Context, true); 414 _ -> 415 translate_dst(Src) 416 end. 417 418%%% MayTrunc8 controls whether negative Imm8s should be truncated 419%%% to 8 bits or not. Truncation should always be done, except when 420%%% the caller will widen the Imm8 to an Imm32 or Imm64. 421translate_imm(#x86_imm{value=Imm}, Context, MayTrunc8) -> 422 if is_atom(Imm) -> 423 {imm32,{?LOAD_ATOM,Imm}}; 424 is_integer(Imm) -> 425 case (Imm =< 127) and (Imm >= -128) of 426 true -> 427 Imm8 = 428 case MayTrunc8 of 429 true -> Imm band 16#FF; 430 false -> Imm 431 end, 432 {imm8,Imm8}; 433 false -> 434 {imm32,Imm} 435 end; 436 true -> 437 Val = 438 case Imm of 439 {Label,constant} -> 440 {MFA,ConstMap} = Context, 441 ConstNo = hipe_pack_constants:find_const({MFA,Label}, ConstMap), 442 {constant,ConstNo}; 443 {Label,closure} -> 444 {closure,Label}; 445 {Label,c_const} -> 446 {c_const,Label} 447 end, 448 {imm32,{?LOAD_ADDRESS,Val}} 449 end. 450 451translate_dst(Dst) -> 452 case Dst of 453 #x86_temp{} -> 454 temp_to_regArch(Dst); 455 #x86_mem{type='double'} -> 456 mem_to_rm64fp(Dst); 457 #x86_mem{} -> 458 mem_to_rmArch(Dst); 459 #x86_fpreg{} -> 460 fpreg_to_stack(Dst) 461 end. 462 463%%% 464%%% Assembly Pass 3. 465%%% Process final {MFA,Code,CodeSize,LabelMap} list from pass 2. 466%%% Translate to a single binary code segment. 467%%% Collect relocation patches. 468%%% Build ExportMap (MFA-to-address mapping). 469%%% Combine LabelMaps to a single one (for mk_data_relocs/2 compatibility). 470%%% Return {CombinedCodeSize,BinaryCode,Relocs,CombinedLabelMap,ExportMap}. 471%%% 472 473encode(Code, Options) -> 474 CodeSize = compute_code_size(Code, 0), 475 ExportMap = build_export_map(Code, 0, []), 476 {AccCode,Relocs} = encode_mfas(Code, 0, [], [], Options), 477 CodeBinary = list_to_binary(lists:reverse(AccCode)), 478 ?ASSERT(CodeSize =:= byte_size(CodeBinary)), 479 CombinedLabelMap = combine_label_maps(Code, 0, gb_trees:empty()), 480 {CodeSize,CodeBinary,Relocs,CombinedLabelMap,ExportMap}. 481 482nr_pad_bytes(Address) -> (4 - (Address rem 4)) rem 4. % XXX: 16 or 32 instead? 483 484align_entry(Address) -> Address + nr_pad_bytes(Address). 485 486compute_code_size([{_MFA,_Insns,CodeSize,_LabelMap}|Code], Size) -> 487 compute_code_size(Code, align_entry(Size+CodeSize)); 488compute_code_size([], Size) -> Size. 489 490build_export_map([{{M,F,A},_Insns,CodeSize,_LabelMap}|Code], Address, ExportMap) -> 491 build_export_map(Code, align_entry(Address+CodeSize), [{Address,M,F,A}|ExportMap]); 492build_export_map([], _Address, ExportMap) -> ExportMap. 493 494combine_label_maps([{MFA,_Insns,CodeSize,LabelMap}|Code], Address, CLM) -> 495 NewCLM = merge_label_map(gb_trees:to_list(LabelMap), MFA, Address, CLM), 496 combine_label_maps(Code, align_entry(Address+CodeSize), NewCLM); 497combine_label_maps([], _Address, CLM) -> CLM. 498 499merge_label_map([{Label,Offset}|Rest], MFA, Address, CLM) -> 500 NewCLM = gb_trees:insert({MFA,Label}, Address+Offset, CLM), 501 merge_label_map(Rest, MFA, Address, NewCLM); 502merge_label_map([], _MFA, _Address, CLM) -> CLM. 503 504encode_mfas([{MFA,Insns,CodeSize,LabelMap}|Code], Address, AccCode, Relocs, Options) -> 505 print("Generating code for:~w\n", [MFA], Options), 506 print("Offset | Opcode | Instruction\n", [], Options), 507 {Address1,Relocs1,AccCode1} = 508 encode_insns(Insns, Address, Address, LabelMap, Relocs, AccCode, Options), 509 ExpectedAddress = align_entry(Address + CodeSize), 510 ?ASSERT(Address1 =:= ExpectedAddress), 511 print("Finished.\n\n", [], Options), 512 encode_mfas(Code, Address1, AccCode1, Relocs1, Options); 513encode_mfas([], _Address, AccCode, Relocs, _Options) -> 514 {AccCode, Relocs}. 515 516encode_insns([I|Insns], Address, FunAddress, LabelMap, Relocs, AccCode, Options) -> 517 case I of 518 {'.label',L,_} -> 519 LabelAddress = gb_trees:get(L, LabelMap) + FunAddress, 520 ?ASSERT(Address =:= LabelAddress), % sanity check 521 print_insn(Address, [], I, Options), 522 encode_insns(Insns, Address, FunAddress, LabelMap, Relocs, AccCode, Options); 523 {'.sdesc',SDesc,_} -> 524 #x86_sdesc{exnlab=ExnLab,fsize=FSize,arity=Arity,live=Live} = SDesc, 525 ExnRA = 526 case ExnLab of 527 [] -> []; % don't cons up a new one 528 ExnLab -> gb_trees:get(ExnLab, LabelMap) + FunAddress 529 end, 530 Reloc = {?SDESC, Address, 531 ?STACK_DESC(ExnRA, FSize, Arity, Live)}, 532 encode_insns(Insns, Address, FunAddress, LabelMap, [Reloc|Relocs], AccCode, Options); 533 _ -> 534 {Op,Arg,_} = fix_jumps(I, Address, FunAddress, LabelMap), 535 {Bytes, NewRelocs} = ?HIPE_X86_ENCODE:insn_encode(Op, Arg, Address), 536 print_insn(Address, Bytes, I, Options), 537 Segment = list_to_binary(Bytes), 538 Size = byte_size(Segment), 539 NewAccCode = [Segment|AccCode], 540 encode_insns(Insns, Address+Size, FunAddress, LabelMap, NewRelocs++Relocs, NewAccCode, Options) 541 end; 542encode_insns([], Address, FunAddress, LabelMap, Relocs, AccCode, Options) -> 543 case nr_pad_bytes(Address) of 544 0 -> 545 {Address,Relocs,AccCode}; 546 NrPadBytes -> % triggers at most once per function body 547 Padding = lists:duplicate(NrPadBytes, {nop,{},#comment{term=padding}}), 548 encode_insns(Padding, Address, FunAddress, LabelMap, Relocs, AccCode, Options) 549 end. 550 551fix_jumps(I, InsnAddress, FunAddress, LabelMap) -> 552 case I of 553 {jcc_sdi,{CC,{label,L}},OrigI} -> 554 LabelAddress = gb_trees:get(L, LabelMap) + FunAddress, 555 ShortOffset = LabelAddress - (InsnAddress + 2), 556 if is_integer(ShortOffset), ShortOffset >= -128, ShortOffset =< 127 -> 557 {jcc,{CC,{rel8,ShortOffset band 16#FF}},OrigI}; 558 true -> 559 LongOffset = LabelAddress - (InsnAddress + 6), 560 {jcc,{CC,{rel32,LongOffset}},OrigI} 561 end; 562 {jmp_sdi,{{label,L}},OrigI} -> 563 LabelAddress = gb_trees:get(L, LabelMap) + FunAddress, 564 ShortOffset = LabelAddress - (InsnAddress + 2), 565 if is_integer(ShortOffset), ShortOffset >= -128, ShortOffset =< 127 -> 566 {jmp,{{rel8,ShortOffset band 16#FF}},OrigI}; 567 true -> 568 LongOffset = LabelAddress - (InsnAddress + 5), 569 {jmp,{{rel32,LongOffset}},OrigI} 570 end; 571 _ -> I 572 end. 573 574%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 575 576fpreg_to_stack(#x86_fpreg{reg=Reg}) -> 577 {fpst, Reg}. 578 579temp_to_regArch(#x86_temp{reg=Reg}) -> 580 {?REGArch, Reg}. 581 582-ifdef(HIPE_AMD64). 583temp_to_reg64(#x86_temp{reg=Reg}) -> 584 {reg64, Reg}. 585-endif. 586 587temp_to_reg32(#x86_temp{reg=Reg}) -> 588 {reg32, Reg}. 589temp_to_reg16(#x86_temp{reg=Reg}) -> 590 {reg16, Reg}. 591temp_to_reg8(#x86_temp{reg=Reg}) -> 592 {reg8, Reg}. 593 594temp_to_xmm(#x86_temp{reg=Reg}) -> 595 {xmm, Reg}. 596 597-ifdef(HIPE_AMD64). 598temp_to_rm8(#x86_temp{reg=Reg}) -> 599 {rm8, ?HIPE_X86_ENCODE:rm_reg(Reg)}. 600temp_to_rm64(#x86_temp{reg=Reg}) -> 601 {rm64, hipe_amd64_encode:rm_reg(Reg)}. 602-else. 603temp_to_rm8(#x86_temp{reg=Reg}) -> 604 true = ?HIPE_X86_ENCODE:reg_has_8bit(Reg), 605 {rm8, ?HIPE_X86_ENCODE:rm_reg(Reg)}. 606temp_to_rm16(#x86_temp{reg=Reg}) -> 607 {rm16, ?HIPE_X86_ENCODE:rm_reg(Reg)}. 608-endif. 609 610temp_to_rm32(#x86_temp{reg=Reg}) -> 611 {rm32, ?HIPE_X86_ENCODE:rm_reg(Reg)}. 612temp_to_rmArch(#x86_temp{reg=Reg}) -> 613 {?RMArch, ?HIPE_X86_ENCODE:rm_reg(Reg)}. 614temp_to_rm64fp(#x86_temp{reg=Reg}) -> 615 {rm64fp, ?HIPE_X86_ENCODE:rm_reg(Reg)}. 616 617mem_to_ea(Mem) -> 618 EA = mem_to_ea_common(Mem), 619 {ea, EA}. 620 621mem_to_rm32(Mem) -> 622 EA = mem_to_ea_common(Mem), 623 {rm32, ?HIPE_X86_ENCODE:rm_mem(EA)}. 624 625mem_to_rmArch(Mem) -> 626 EA = mem_to_ea_common(Mem), 627 {?RMArch, ?HIPE_X86_ENCODE:rm_mem(EA)}. 628 629mem_to_rm64fp(Mem) -> 630 EA = mem_to_ea_common(Mem), 631 {rm64fp, ?HIPE_X86_ENCODE:rm_mem(EA)}. 632 633%%%%%%%%%%%%%%%%% 634mem_to_rm8(Mem) -> 635 EA = mem_to_ea_common(Mem), 636 {rm8, ?HIPE_X86_ENCODE:rm_mem(EA)}. 637 638mem_to_rm16(Mem) -> 639 EA = mem_to_ea_common(Mem), 640 {rm16, ?HIPE_X86_ENCODE:rm_mem(EA)}. 641%%%%%%%%%%%%%%%%% 642 643mem_to_ea_common(#x86_mem{base=[], off=#x86_imm{value=Off}}) -> 644 ?HIPE_X86_ENCODE:?EA_DISP32_ABSOLUTE(Off); 645mem_to_ea_common(#x86_mem{base=#x86_temp{reg=Base}, off=#x86_temp{reg=Index}}) -> 646 case Base band 2#111 of 647 5 -> % ebp/rbp or r13 648 case Index band 2#111 of 649 5 -> % ebp/rbp or r13 650 SINDEX = ?HIPE_X86_ENCODE:sindex(0, Index), 651 SIB = ?HIPE_X86_ENCODE:sib(Base, SINDEX), 652 ?HIPE_X86_ENCODE:ea_disp8_sib(0, SIB); 653 _ -> 654 SINDEX = ?HIPE_X86_ENCODE:sindex(0, Base), 655 SIB = ?HIPE_X86_ENCODE:sib(Index, SINDEX), 656 ?HIPE_X86_ENCODE:ea_sib(SIB) 657 end; 658 _ -> 659 SINDEX = ?HIPE_X86_ENCODE:sindex(0, Index), 660 SIB = ?HIPE_X86_ENCODE:sib(Base, SINDEX), 661 ?HIPE_X86_ENCODE:ea_sib(SIB) 662 end; 663mem_to_ea_common(#x86_mem{base=#x86_temp{reg=Base}, off=#x86_imm{value=Off}}) -> 664 if 665 Off =:= 0 -> 666 case Base of 667 4 -> %esp, use SIB w/o disp8 668 SIB = ?HIPE_X86_ENCODE:sib(Base), 669 ?HIPE_X86_ENCODE:ea_sib(SIB); 670 5 -> %ebp, use disp8 w/o SIB 671 ?HIPE_X86_ENCODE:ea_disp8_base(Off, Base); 672 12 -> %r12, use SIB w/o disp8 673 SIB = ?HIPE_X86_ENCODE:sib(Base), 674 ?HIPE_X86_ENCODE:ea_sib(SIB); 675 13 -> %r13, use disp8 w/o SIB 676 ?HIPE_X86_ENCODE:ea_disp8_base(Off, Base); 677 _ -> %neither SIB nor disp8 needed 678 ?HIPE_X86_ENCODE:ea_base(Base) 679 end; 680 Off >= -128, Off =< 127 -> 681 Disp8 = Off band 16#FF, 682 case Base of 683 4 -> %esp, must use SIB 684 SIB = ?HIPE_X86_ENCODE:sib(Base), 685 ?HIPE_X86_ENCODE:ea_disp8_sib(Disp8, SIB); 686 12 -> %r12, must use SIB 687 SIB = ?HIPE_X86_ENCODE:sib(Base), 688 ?HIPE_X86_ENCODE:ea_disp8_sib(Disp8, SIB); 689 _ -> %use disp8 w/o SIB 690 ?HIPE_X86_ENCODE:ea_disp8_base(Disp8, Base) 691 end; 692 true -> 693 case Base of 694 4 -> %esp, must use SIB 695 SIB = ?HIPE_X86_ENCODE:sib(Base), 696 ?HIPE_X86_ENCODE:ea_disp32_sib(Off, SIB); 697 12 -> %r12, must use SIB 698 SIB = ?HIPE_X86_ENCODE:sib(Base), 699 ?HIPE_X86_ENCODE:ea_disp32_sib(Off, SIB); 700 _ -> 701 ?HIPE_X86_ENCODE:ea_disp32_base(Off, Base) 702 end 703 end. 704 705%% jmp_switch 706-ifdef(HIPE_AMD64). 707resolve_jmp_switch_arg(I, _Context) -> 708 Base = hipe_x86:temp_reg(hipe_x86:jmp_switch_jtab(I)), 709 Index = hipe_x86:temp_reg(hipe_x86:jmp_switch_temp(I)), 710 SINDEX = hipe_amd64_encode:sindex(3, Index), 711 SIB = hipe_amd64_encode:sib(Base, SINDEX), 712 EA = 713 if (Base =:= 5) or (Base =:= 13) -> 714 hipe_amd64_encode:ea_disp8_sib(0, SIB); 715 true -> 716 hipe_amd64_encode:ea_sib(SIB) 717 end, 718 {rm64,hipe_amd64_encode:rm_mem(EA)}. 719-else. 720resolve_jmp_switch_arg(I, {MFA,ConstMap}) -> 721 ConstNo = hipe_pack_constants:find_const({MFA,hipe_x86:jmp_switch_jtab(I)}, ConstMap), 722 Disp32 = {?LOAD_ADDRESS,{constant,ConstNo}}, 723 SINDEX = ?HIPE_X86_ENCODE:sindex(2, hipe_x86:temp_reg(hipe_x86:jmp_switch_temp(I))), 724 EA = ?HIPE_X86_ENCODE:ea_disp32_sindex(Disp32, SINDEX), % this creates a SIB implicitly 725 {rm32,?HIPE_X86_ENCODE:rm_mem(EA)}. 726-endif. 727 728%% lea reg, mem 729resolve_lea_args(Src=#x86_mem{}, Dst=#x86_temp{}) -> 730 {temp_to_regArch(Dst),mem_to_ea(Src)}. 731 732resolve_sse2_op(Op) -> 733 case Op of 734 fadd -> addsd; 735 fdiv -> divsd; 736 fmul -> mulsd; 737 fsub -> subsd; 738 xorpd -> xorpd; 739 _ -> exit({?MODULE, unknown_sse2_operator, Op}) 740 end. 741 742%% OP xmm, mem 743resolve_sse2_binop_args(Src=#x86_mem{type=double}, 744 Dst=#x86_temp{type=double}) -> 745 {temp_to_xmm(Dst),mem_to_rm64fp(Src)}; 746%% movsd mem, xmm 747resolve_sse2_binop_args(Src=#x86_temp{type=double}, 748 Dst=#x86_mem{type=double}) -> 749 {mem_to_rm64fp(Dst),temp_to_xmm(Src)}; 750%% OP xmm, xmm 751resolve_sse2_binop_args(Src=#x86_temp{type=double}, 752 Dst=#x86_temp{type=double}) -> 753 {temp_to_xmm(Dst),temp_to_rm64fp(Src)}. 754 755%%% fmove -> cvtsi2sd or movsd 756resolve_sse2_fmove_args(Src, Dst) -> 757 case {Src,Dst} of 758 {#x86_temp{type=untagged}, #x86_temp{type=double}} -> % cvtsi2sd xmm, reg 759 {cvtsi2sd, {temp_to_xmm(Dst),temp_to_rmArch(Src)}}; 760 {#x86_mem{type=untagged}, #x86_temp{type=double}} -> % cvtsi2sd xmm, mem 761 {cvtsi2sd, {temp_to_xmm(Dst),mem_to_rmArch(Src)}}; 762 _ -> % movsd 763 {movsd, resolve_sse2_binop_args(Src, Dst)} 764 end. 765 766%%% xorpd xmm, mem 767resolve_sse2_fchs_arg(Dst=#x86_temp{type=double}) -> 768 {temp_to_xmm(Dst), 769 {rm64fp, {rm_mem, ?HIPE_X86_ENCODE:?EA_DISP32_ABSOLUTE( 770 {?LOAD_ADDRESS, 771 {c_const, sse2_fnegate_mask}})}}}. 772 773%% mov mem, imm 774resolve_move_args(#x86_imm{value=ImmSrc}, Dst=#x86_mem{type=Type}, Context) -> 775 case Type of % to support byte, int16 and int32 stores 776 byte -> 777 ByteImm = ImmSrc band 255, %to ensure that it is a bytesized imm 778 {mem_to_rm8(Dst),{imm8,ByteImm}}; 779 int16 -> 780 {mem_to_rm16(Dst),{imm16,ImmSrc band 16#FFFF}}; 781 int32 -> 782 {_,Imm} = translate_imm(#x86_imm{value=ImmSrc}, Context, false), 783 {mem_to_rm32(Dst),{imm32,Imm}}; 784 _ -> 785 RMArch = mem_to_rmArch(Dst), 786 {_,Imm} = translate_imm(#x86_imm{value=ImmSrc}, Context, false), 787 {RMArch,{imm32,Imm}} 788 end; 789 790%% mov reg,mem 791resolve_move_args(Src=#x86_mem{type=Type}, Dst=#x86_temp{}, _Context) -> 792 case Type of 793 int32 -> % must be unsigned 794 {temp_to_reg32(Dst),mem_to_rm32(Src)}; 795 _ -> 796 {temp_to_regArch(Dst),mem_to_rmArch(Src)} 797 end; 798 799%% mov mem,reg 800resolve_move_args(Src=#x86_temp{}, Dst=#x86_mem{type=Type}, _Context) -> 801 case Type of % to support byte, int16 and int32 stores 802 byte -> 803 {mem_to_rm8(Dst),temp_to_reg8(Src)}; 804 int16 -> 805 {mem_to_rm16(Dst),temp_to_reg16(Src)}; 806 int32 -> 807 {mem_to_rm32(Dst),temp_to_reg32(Src)}; 808 tagged -> % tagged, untagged 809 {mem_to_rmArch(Dst),temp_to_regArch(Src)}; 810 untagged -> % tagged, untagged 811 {mem_to_rmArch(Dst),temp_to_regArch(Src)} 812 end; 813 814%% mov reg,reg 815resolve_move_args(Src=#x86_temp{}, Dst=#x86_temp{}, _Context) -> 816 {temp_to_regArch(Dst),temp_to_rmArch(Src)}; 817 818%% mov reg,imm 819resolve_move_args(Src=#x86_imm{value=_ImmSrc}, Dst=#x86_temp{}, Context) -> 820 {_,Imm} = translate_imm(Src, Context, false), 821 imm_move_args(Dst, Imm). 822 823-ifdef(HIPE_AMD64). 824imm_move_args(Dst, Imm) -> 825 if is_number(Imm), Imm >= 0 -> 826 {temp_to_reg32(Dst),{imm32,Imm}}; 827 true -> 828 {temp_to_rm64(Dst),{imm32,Imm}} 829 end. 830-else. 831imm_move_args(Dst, Imm) -> 832 {temp_to_reg32(Dst),{imm32,Imm}}. 833-endif. 834 835-ifdef(HIPE_AMD64). 836translate_move64(I, Context) -> 837 Arg = resolve_move64_args(hipe_x86:move64_src(I), 838 hipe_x86:move64_dst(I), 839 Context), 840 [{mov, Arg, I}]. 841 842%% mov reg,imm64 843resolve_move64_args(Src=#x86_imm{}, Dst=#x86_temp{}, Context) -> 844 {_,Imm} = translate_imm(Src, Context, false), 845 {temp_to_reg64(Dst),{imm64,Imm}}. 846-else. 847translate_move64(I, _Context) -> exit({?MODULE, I}). 848-endif. 849 850%%% mov{s,z}x 851resolve_movx_src(Src=#x86_mem{type=Type}) -> 852 case Type of 853 byte -> 854 mem_to_rm8(Src); 855 int16 -> 856 mem_to_rm16(Src); 857 int32 -> 858 mem_to_rm32(Src) 859 end. 860 861%%% alu/cmp (_not_ test) 862resolve_alu_args(Src, Dst, Context) -> 863 case {Src,Dst} of 864 {#x86_imm{}, #x86_mem{}} -> 865 {mem_to_rmArch(Dst), translate_imm(Src, Context, true)}; 866 {#x86_mem{}, #x86_temp{}} -> 867 {temp_to_regArch(Dst), mem_to_rmArch(Src)}; 868 {#x86_temp{}, #x86_mem{}} -> 869 {mem_to_rmArch(Dst), temp_to_regArch(Src)}; 870 {#x86_temp{}, #x86_temp{}} -> 871 {temp_to_regArch(Dst), temp_to_rmArch(Src)}; 872 {#x86_imm{}, #x86_temp{reg=0}} -> % eax,imm 873 NewSrc = translate_imm(Src, Context, true), 874 NewDst = 875 case NewSrc of 876 {imm8,_} -> temp_to_rmArch(Dst); 877 {imm32,_} -> ?EAX 878 end, 879 {NewDst, NewSrc}; 880 {#x86_imm{}, #x86_temp{}} -> 881 {temp_to_rmArch(Dst), translate_imm(Src, Context, true)} 882 end. 883 884%%% test 885resolve_test_args(Src, Dst, Context) -> 886 case Src of 887 %% Since we're using an 8-bit instruction, the immediate is not sign 888 %% extended. Thus, we can use immediates up to 255. 889 #x86_imm{value=ImmVal} 890 when is_integer(ImmVal), ImmVal >= 0, ImmVal =< 255 -> 891 Imm = {imm8, ImmVal}, 892 case Dst of 893 #x86_temp{reg=0} -> {al, Imm}; 894 #x86_temp{} -> resolve_test_imm8_reg(Imm, Dst); 895 #x86_mem{} -> {mem_to_rm8(Dst), Imm} 896 end; 897 #x86_imm{value=ImmVal} when is_integer(ImmVal), ImmVal >= 0 -> 898 {case Dst of 899 #x86_temp{reg=0} -> eax; 900 #x86_temp{} -> temp_to_rm32(Dst); 901 #x86_mem{} -> mem_to_rm32(Dst) 902 end, {imm32, ImmVal}}; 903 #x86_imm{} -> % Negative ImmVal; use word-sized instr, imm32 904 {_, ImmVal} = translate_imm(Src, Context, false), 905 {case Dst of 906 #x86_temp{reg=0} -> ?EAX; 907 #x86_temp{} -> temp_to_rmArch(Dst); 908 #x86_mem{} -> mem_to_rmArch(Dst) 909 end, {imm32, ImmVal}}; 910 #x86_temp{} -> 911 NewDst = 912 case Dst of 913 #x86_temp{} -> temp_to_rmArch(Dst); 914 #x86_mem{} -> mem_to_rmArch(Dst) 915 end, 916 {NewDst, temp_to_regArch(Src)} 917 end. 918 919-ifdef(HIPE_AMD64). 920resolve_test_imm8_reg(Imm, Dst) -> {temp_to_rm8(Dst), Imm}. 921-else. 922resolve_test_imm8_reg(Imm = {imm8, ImmVal}, Dst = #x86_temp{reg=Reg}) -> 923 case ?HIPE_X86_ENCODE:reg_has_8bit(Reg) of 924 true -> {temp_to_rm8(Dst), Imm}; 925 false -> 926 %% Register does not exist in 8-bit version; use 16-bit instead 927 {temp_to_rm16(Dst), {imm16, ImmVal}} 928 end. 929-endif. 930 931%%% shifts 932resolve_shift_args(Src, Dst, Context) -> 933 RM32 = 934 case Dst of 935 #x86_temp{} -> temp_to_rmArch(Dst); 936 #x86_mem{} -> mem_to_rmArch(Dst) 937 end, 938 Count = 939 case Src of 940 #x86_imm{value=1} -> 1; 941 #x86_imm{} -> translate_imm(Src, Context, true); % must be imm8 942 #x86_temp{reg=1} -> cl % temp must be ecx 943 end, 944 {RM32, Count}. 945 946%% x87_binop mem 947resolve_x87_unop_arg(Arg=#x86_mem{type=Type})-> 948 case Type of 949 'double' -> {mem_to_rm64fp(Arg)}; 950 'untagged' -> {mem_to_rmArch(Arg)}; 951 _ -> ?EXIT({fmovArgNotSupported,{Arg}}) 952 end; 953resolve_x87_unop_arg(Arg=#x86_fpreg{}) -> 954 {fpreg_to_stack(Arg)}; 955resolve_x87_unop_arg([]) -> 956 []. 957 958%% x87_binop mem, st(i) 959resolve_x87_binop_args(Src=#x86_fpreg{}, Dst=#x86_mem{})-> 960 {mem_to_rm64fp(Dst),fpreg_to_stack(Src)}; 961%% x87_binop st(0), st(i) 962resolve_x87_binop_args(Src=#x86_fpreg{}, Dst=#x86_fpreg{})-> 963 {fpreg_to_stack(Dst),fpreg_to_stack(Src)}. 964 965%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 966 967%%% 968%%% Assembly listing support (pp_asm option). 969%%% 970 971print(String, Arglist, Options) -> 972 ?when_option(pp_asm, Options, io:format(String, Arglist)). 973 974print_insn(Address, Bytes, I, Options) -> 975 ?when_option(pp_asm, Options, print_insn_2(Address, Bytes, I)), 976 ?when_option(pp_cxmon, Options, print_code_list_2(Bytes)). 977 978print_code_list_2([H | Tail]) -> 979 print_byte(H), 980 io:format(","), 981 print_code_list_2(Tail); 982print_code_list_2([]) -> 983 io:format(""). 984 985print_insn_2(Address, Bytes, {_,_,OrigI}) -> 986 io:format("~8.16b | ", [Address]), 987 print_code_list(Bytes, 0), 988 ?HIPE_X86_PP:pp_insn(OrigI). 989 990print_code_list([Byte|Rest], Len) -> 991 print_byte(Byte), 992 print_code_list(Rest, Len+1); 993print_code_list([], Len) -> 994 fill_spaces(24-(Len*2)), 995 io:format(" | "). 996 997print_byte(Byte) -> 998 io:format("~2.16.0b", [Byte band 16#FF]). 999 1000fill_spaces(N) when N > 0 -> 1001 io:format(" "), 1002 fill_spaces(N-1); 1003fill_spaces(0) -> 1004 []. 1005