1%%% -*- erlang-indent-level: 2 -*- 2%%% 3%%% Licensed under the Apache License, Version 2.0 (the "License"); 4%%% you may not use this file except in compliance with the License. 5%%% You may obtain a copy of the License at 6%%% 7%%% http://www.apache.org/licenses/LICENSE-2.0 8%%% 9%%% Unless required by applicable law or agreed to in writing, software 10%%% distributed under the License is distributed on an "AS IS" BASIS, 11%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12%%% See the License for the specific language governing permissions and 13%%% limitations under the License. 14%%% 15%%% x86 stack frame handling 16%%% 17%%% - map non-register temps to stack slots 18%%% - add explicit stack management code to prologue and epilogue, 19%%% and at calls and tailcalls 20%%% 21%%% TODO: 22%%% - Compute max stack in a pre-pass? (get rid of ref cell updates) 23%%% - Merge all_temps and defun_minframe to a single 24%%% pass, for compile-time efficiency reasons. 25 26-ifdef(HIPE_AMD64). 27-define(HIPE_X86_FRAME, hipe_amd64_frame). 28-define(HIPE_X86_REGISTERS, hipe_amd64_registers). 29-define(HIPE_X86_LIVENESS, hipe_amd64_liveness). 30-define(LEAF_WORDS, ?AMD64_LEAF_WORDS). 31-else. 32-define(HIPE_X86_FRAME, hipe_x86_frame). 33-define(HIPE_X86_REGISTERS, hipe_x86_registers). 34-define(HIPE_X86_LIVENESS, hipe_x86_liveness). 35-define(LEAF_WORDS, ?X86_LEAF_WORDS). 36-endif. 37 38-module(?HIPE_X86_FRAME). 39-export([frame/2]). 40-include("../x86/hipe_x86.hrl"). 41-include("../rtl/hipe_literals.hrl"). 42 43frame(CFG0, _Options) -> 44 Formals = fix_formals(hipe_x86_cfg:params(CFG0)), 45 Temps0 = all_temps(CFG0, Formals), 46 MinFrame = defun_minframe(CFG0), 47 Temps = ensure_minframe(MinFrame, Temps0), 48 Liveness = ?HIPE_X86_LIVENESS:analyse(CFG0), 49 do_body(CFG0, Liveness, Formals, Temps). 50 51fix_formals(Formals) -> 52 fix_formals(?HIPE_X86_REGISTERS:nr_args(), Formals). 53 54fix_formals(0, Rest) -> Rest; 55fix_formals(N, [_|Rest]) -> fix_formals(N-1, Rest); 56fix_formals(_, []) -> []. 57 58do_body(CFG0, Liveness, Formals, Temps) -> 59 Context = mk_context(Liveness, Formals, Temps), 60 CFG1 = do_blocks(CFG0, Context), 61 do_prologue(CFG1, Context). 62 63do_blocks(CFG, Context) -> 64 hipe_x86_cfg:map_bbs(fun(Lbl, BB) -> do_block(Lbl, BB, Context) end, CFG). 65 66do_block(Label, Block, Context) -> 67 Liveness = context_liveness(Context), 68 LiveOut = ?HIPE_X86_LIVENESS:liveout(Liveness, Label), 69 Code = hipe_bb:code(Block), 70 NewCode = do_block(Code, LiveOut, Context, context_framesize(Context), []), 71 hipe_bb:code_update(Block, NewCode). 72 73do_block([I|Insns], LiveOut, Context, FPoff0, RevCode) -> 74 {NewIs, FPoff1} = do_insn(I, LiveOut, Context, FPoff0), 75 do_block(Insns, LiveOut, Context, FPoff1, lists:reverse(NewIs, RevCode)); 76do_block([], _, Context, FPoff, RevCode) -> 77 FPoff0 = context_framesize(Context), 78 if FPoff =:= FPoff0 -> []; 79 true -> exit({?MODULE,do_block,FPoff}) 80 end, 81 lists:reverse(RevCode, []). 82 83do_insn(I, LiveOut, Context, FPoff) -> 84 case I of 85 #alu{} -> 86 {[do_alu(I, Context, FPoff)], FPoff}; 87 #cmp{} -> 88 {[do_cmp(I, Context, FPoff)], FPoff}; 89 #fp_unop{} -> 90 {do_fp_unop(I, Context, FPoff), FPoff}; 91 #fp_binop{} -> 92 {do_fp_binop(I, Context, FPoff), FPoff}; 93 #fmove{} -> 94 {[do_fmove(I, Context, FPoff)], FPoff}; 95 #imul{} -> 96 {[do_imul(I, Context, FPoff)], FPoff}; 97 #move{} -> 98 {do_move(I, Context, FPoff), FPoff}; 99 #movsx{} -> 100 {[do_movsx(I, Context, FPoff)], FPoff}; 101 #movzx{} -> 102 {[do_movzx(I, Context, FPoff)], FPoff}; 103 #pseudo_call{} -> 104 do_pseudo_call(I, LiveOut, Context, FPoff); 105 #pseudo_spill_fmove{} -> 106 {do_pseudo_spill_fmove(I, Context, FPoff), FPoff}; 107 #pseudo_spill_move{} -> 108 {do_pseudo_spill_move(I, Context, FPoff), FPoff}; 109 #pseudo_tailcall{} -> 110 {do_pseudo_tailcall(I, Context), context_framesize(Context)}; 111 #push{} -> 112 {[do_push(I, Context, FPoff)], FPoff+word_size()}; 113 #ret{} -> 114 {do_ret(I, Context, FPoff), context_framesize(Context)}; 115 #shift{} -> 116 {[do_shift(I, Context, FPoff)], FPoff}; 117 #test{} -> 118 {[do_test(I, Context, FPoff)], FPoff}; 119 _ -> % comment, jmp, label, pseudo_jcc, pseudo_tailcall_prepare 120 {[I], FPoff} 121 end. 122 123%%% 124%%% Convert any pseudo-temp operand in a binary (alu, cmp, move) 125%%% or unary (push) instruction to an explicit x86_mem operand. 126%%% 127 128do_alu(I, Context, FPoff) -> 129 #alu{src=Src0,dst=Dst0} = I, 130 Src = conv_opnd(Src0, FPoff, Context), 131 Dst = conv_opnd(Dst0, FPoff, Context), 132 I#alu{src=Src,dst=Dst}. 133 134do_cmp(I, Context, FPoff) -> 135 #cmp{src=Src0,dst=Dst0} = I, 136 Src = conv_opnd(Src0, FPoff, Context), 137 Dst = conv_opnd(Dst0, FPoff, Context), 138 I#cmp{src=Src,dst=Dst}. 139 140do_fp_unop(I, Context, FPoff) -> 141 #fp_unop{arg=Arg0} = I, 142 Arg = conv_opnd(Arg0, FPoff, Context), 143 [I#fp_unop{arg=Arg}]. 144 145do_fp_binop(I, Context, FPoff) -> 146 #fp_binop{src=Src0,dst=Dst0} = I, 147 Src = conv_opnd(Src0, FPoff, Context), 148 Dst = conv_opnd(Dst0, FPoff, Context), 149 [I#fp_binop{src=Src,dst=Dst}]. 150 151do_fmove(I0, Context, FPoff) -> 152 #fmove{src=Src0,dst=Dst0} = I0, 153 Src = conv_opnd(Src0, FPoff, Context), 154 Dst = conv_opnd(Dst0, FPoff, Context), 155 I = I0#fmove{src=Src,dst=Dst}, 156 case Src =:= Dst of 157 true -> []; % omit move-to-self 158 false -> [I] 159 end. 160 161do_pseudo_spill_fmove(I0, Context, FPoff) -> 162 #pseudo_spill_fmove{src=Src0,temp=Temp0,dst=Dst0} = I0, 163 Src = conv_opnd(Src0, FPoff, Context), 164 Temp = conv_opnd(Temp0, FPoff, Context), 165 Dst = conv_opnd(Dst0, FPoff, Context), 166 case Src =:= Dst of 167 true -> []; % omit move-to-self 168 false -> [#fmove{src=Src, dst=Temp}, #fmove{src=Temp, dst=Dst}] 169 end. 170 171do_imul(I, Context, FPoff) -> 172 #imul{src=Src0} = I, 173 Src = conv_opnd(Src0, FPoff, Context), 174 I#imul{src=Src}. 175 176do_move(I0, Context, FPoff) -> 177 #move{src=Src0,dst=Dst0} = I0, 178 Src = conv_opnd(Src0, FPoff, Context), 179 Dst = conv_opnd(Dst0, FPoff, Context), 180 I = I0#move{src=Src,dst=Dst}, 181 case Src =:= Dst of 182 true -> []; % omit move-to-self 183 false -> [I] 184 end. 185 186do_pseudo_spill_move(I0, Context, FPoff) -> 187 #pseudo_spill_move{src=Src0,temp=Temp0,dst=Dst0} = I0, 188 Src = conv_opnd(Src0, FPoff, Context), 189 Temp = conv_opnd(Temp0, FPoff, Context), 190 Dst = conv_opnd(Dst0, FPoff, Context), 191 case Src =:= Dst of 192 true -> []; % omit move-to-self 193 false -> [#move{src=Src, dst=Temp}, #move{src=Temp, dst=Dst}] 194 end. 195 196do_movsx(I, Context, FPoff) -> 197 #movsx{src=Src0,dst=Dst0} = I, 198 Src = conv_opnd(Src0, FPoff, Context), 199 Dst = conv_opnd(Dst0, FPoff, Context), 200 I#movsx{src=Src,dst=Dst}. 201 202do_movzx(I, Context, FPoff) -> 203 #movzx{src=Src0,dst=Dst0} = I, 204 Src = conv_opnd(Src0, FPoff, Context), 205 Dst = conv_opnd(Dst0, FPoff, Context), 206 I#movzx{src=Src,dst=Dst}. 207 208do_push(I, Context, FPoff) -> 209 #push{src=Src0} = I, 210 Src = conv_opnd(Src0, FPoff, Context), 211 I#push{src=Src}. 212 213do_shift(I, Context, FPoff) -> 214 #shift{src=Src0,dst=Dst0} = I, 215 Src = conv_opnd(Src0, FPoff, Context), 216 Dst = conv_opnd(Dst0, FPoff, Context), 217 I#shift{src=Src,dst=Dst}. 218 219do_test(I, Context, FPoff) -> 220 #test{src=Src0,dst=Dst0} = I, 221 Src = conv_opnd(Src0, FPoff, Context), 222 Dst = conv_opnd(Dst0, FPoff, Context), 223 I#test{src=Src,dst=Dst}. 224 225conv_opnd(Opnd, FPoff, Context) -> 226 case opnd_is_pseudo(Opnd) of 227 false -> 228 Opnd; 229 true -> 230 conv_pseudo(Opnd, FPoff, Context) 231 end. 232 233conv_pseudo(Temp, FPoff, Context) -> 234 Off = FPoff + context_offset(Context, Temp), 235 conv_pseudo(Temp, Off). 236 237conv_pseudo(Temp, Off) -> 238 hipe_x86:mk_mem(mk_sp(), hipe_x86:mk_imm(Off), hipe_x86:temp_type(Temp)). 239 240%%% 241%%% Return - deallocate frame and emit 'ret $N' insn. 242%%% 243 244do_ret(_I, Context, FPoff) -> 245 %% XXX: this conses up a new ret insn, ignoring the one rtl->x86 made 246 adjust_sp(FPoff, [hipe_x86:mk_ret(word_size()*context_arity(Context))]). 247 248adjust_sp(N, Rest) -> 249 if N =:= 0 -> 250 Rest; 251 true -> 252 [hipe_x86:mk_alu('add', hipe_x86:mk_imm(N), mk_sp()) | Rest] 253 end. 254 255%%% 256%%% Recursive calls. 257%%% 258 259do_pseudo_call(I, LiveOut, Context, FPoff0) -> 260 #x86_sdesc{exnlab=ExnLab,arity=OrigArity} = hipe_x86:pseudo_call_sdesc(I), 261 Fun0 = hipe_x86:pseudo_call_fun(I), 262 Fun1 = conv_opnd(Fun0, FPoff0, Context), 263 LiveTemps = [Temp || Temp <- LiveOut, temp_is_pseudo(Temp)], 264 SDesc = mk_sdesc(ExnLab, Context, LiveTemps), 265 ContLab = hipe_x86:pseudo_call_contlab(I), 266 Linkage = hipe_x86:pseudo_call_linkage(I), 267 CallCode = [hipe_x86:mk_pseudo_call(Fun1, SDesc, ContLab, Linkage)], 268 %% +word_size() for our RA and +word_size() for callee's RA should 269 %% it need to call inc_stack 270 StkArity = erlang:max(0, OrigArity - ?HIPE_X86_REGISTERS:nr_args()), 271 context_need_stack(Context, stack_need(FPoff0 + 2*word_size(), StkArity, Fun1)), 272 ArgsBytes = word_size() * StkArity, 273 {CallCode, FPoff0 - ArgsBytes}. 274 275stack_need(FPoff, StkArity, Fun) -> 276 case Fun of 277 #x86_prim{} -> FPoff; 278 #x86_mfa{m=M,f=F,a=A} -> 279 case erlang:is_builtin(M, F, A) of 280 true -> FPoff; 281 false -> stack_need_general(FPoff, StkArity) 282 end; 283 #x86_temp{} -> stack_need_general(FPoff, StkArity); 284 #x86_mem{} -> stack_need_general(FPoff, StkArity) 285 end. 286 287stack_need_general(FPoff, StkArity) -> 288 erlang:max(FPoff, FPoff + (?LEAF_WORDS - 2 - StkArity) * word_size()). 289 290%%% 291%%% Create stack descriptors for call sites. 292%%% 293 294mk_sdesc(ExnLab, Context, Temps) -> % for normal calls 295 Temps0 = only_tagged(Temps), 296 Live = mk_live(Context, Temps0), 297 Arity = context_arity(Context), 298 FSize = context_framesize(Context), 299 hipe_x86:mk_sdesc(ExnLab, FSize div word_size(), Arity, 300 list_to_tuple(Live)). 301 302only_tagged(Temps)-> 303 [X || X <- Temps, hipe_x86:temp_type(X) =:= 'tagged']. 304 305mk_live(Context, Temps) -> 306 lists:sort([temp_to_slot(Context, Temp) || Temp <- Temps]). 307 308temp_to_slot(Context, Temp) -> 309 (context_framesize(Context) + context_offset(Context, Temp)) 310 div word_size(). 311 312mk_minimal_sdesc(Context) -> % for inc_stack_0 calls 313 hipe_x86:mk_sdesc([], 0, context_arity(Context), {}). 314 315%%% 316%%% Tailcalls. 317%%% 318 319do_pseudo_tailcall(I, Context) -> % always at FPoff=context_framesize(Context) 320 Arity = context_arity(Context), 321 Args = hipe_x86:pseudo_tailcall_stkargs(I) ++ [context_ra(Context)], 322 Fun0 = hipe_x86:pseudo_tailcall_fun(I), 323 {Insns, FPoff1, Fun1} = do_tailcall_args(Args, Context, Fun0), 324 context_need_stack(Context, FPoff1), 325 FPoff2 = FPoff1 + word_size()+word_size()*Arity - word_size()*length(Args), 326 %% +word_size() for callee's inc_stack RA 327 StkArity = length(hipe_x86:pseudo_tailcall_stkargs(I)), 328 context_need_stack(Context, stack_need(FPoff2 + word_size(), StkArity, Fun1)), 329 I2 = hipe_x86:mk_jmp_fun(Fun1, hipe_x86:pseudo_tailcall_linkage(I)), 330 Insns ++ adjust_sp(FPoff2, [I2]). 331 332do_tailcall_args(Args, Context, Fun0) -> 333 FPoff0 = context_framesize(Context), 334 Arity = context_arity(Context), 335 FrameTop = word_size() + word_size()*Arity, 336 DangerOff = FrameTop - word_size()*length(Args), 337 Moves = mk_moves(Args, FrameTop, []), 338 {Stores, Simple, Conflict} = 339 split_moves(Moves, Context, DangerOff, [], [], []), 340 %% sanity check (shouldn't trigger any more) 341 if DangerOff < -FPoff0 -> 342 exit({?MODULE,do_tailcall_args,DangerOff,-FPoff0}); 343 true -> [] 344 end, 345 FPoff1 = FPoff0, 346 %% 347 {Pushes, MoreSimple, FPoff2} = split_conflict(Conflict, FPoff1, [], []), 348 %% 349 {PushFun0, FPoff3, LoadFun1, Fun1} = 350 case opnd_is_pseudo(Fun0) of 351 false -> 352 {[], FPoff2, [], Fun0}; 353 true -> 354 Type = hipe_x86:temp_type(Fun0), 355 Temp1 = mk_temp1(Type), 356 Fun0Off = context_offset(Context, Fun0), 357 MEM0 = conv_pseudo(Fun0, FPoff2 + Fun0Off), 358 if Fun0Off >= DangerOff -> 359 Fun1Off = hipe_x86:mk_imm(0), 360 MEM1 = hipe_x86:mk_mem(mk_sp(), Fun1Off, Type), 361 {[hipe_x86:mk_push(MEM0)], 362 FPoff2 + word_size(), 363 [hipe_x86:mk_move(MEM1, Temp1)], 364 Temp1}; 365 true -> 366 {[], FPoff2, [hipe_x86:mk_move(MEM0, Temp1)], Temp1} 367 end 368 end, 369 %% 370 RegTemp0 = ?HIPE_X86_REGISTERS:temp0(), 371 TempReg = 372 case hipe_x86:is_temp(Fun1) of 373 true -> 374 RegFun1 = hipe_x86:temp_reg(Fun1), 375 if RegFun1 =/= RegTemp0 -> RegTemp0; 376 true -> ?HIPE_X86_REGISTERS:temp1() 377 end; 378 false -> 379 RegTemp0 380 end, 381 %% 382 {Pushes ++ PushFun0 ++ 383 store_moves(Stores, FPoff3, LoadFun1 ++ 384 simple_moves(Simple, FPoff3, TempReg, 385 simple_moves(MoreSimple, FPoff3, TempReg, 386 []))), 387 FPoff3, Fun1}. 388 389mk_moves([Arg|Args], Off, Moves) -> 390 Off1 = Off - word_size(), 391 mk_moves(Args, Off1, [{Arg,Off1}|Moves]); 392mk_moves([], _, Moves) -> 393 Moves. 394 395split_moves([Move|Moves], Context, DangerOff, Stores, Simple, Conflict) -> 396 {Src,DstOff} = Move, 397 case src_is_pseudo(Src) of 398 false -> 399 split_moves(Moves, Context, DangerOff, [Move|Stores], 400 Simple, Conflict); 401 true -> 402 SrcOff = context_offset(Context, Src), 403 Type = typeof_src(Src), 404 if SrcOff =:= DstOff -> 405 split_moves(Moves, Context, DangerOff, Stores, 406 Simple, Conflict); 407 SrcOff >= DangerOff -> 408 split_moves(Moves, Context, DangerOff, Stores, 409 Simple, [{SrcOff,DstOff,Type}|Conflict]); 410 true -> 411 split_moves(Moves, Context, DangerOff, Stores, 412 [{SrcOff,DstOff,Type}|Simple], Conflict) 413 end 414 end; 415split_moves([], _, _, Stores, Simple, Conflict) -> 416 {Stores, Simple, Conflict}. 417 418split_conflict([{SrcOff,DstOff,Type}|Conflict], FPoff, Pushes, Simple) -> 419 Push = hipe_x86:mk_push( 420 hipe_x86:mk_mem(mk_sp(), hipe_x86:mk_imm(FPoff+SrcOff), Type)), 421 split_conflict(Conflict, FPoff+word_size(), [Push|Pushes], 422 [{-(FPoff+word_size()),DstOff,Type}|Simple]); 423split_conflict([], FPoff, Pushes, Simple) -> 424 {lists:reverse(Pushes), Simple, FPoff}. 425 426simple_moves([{SrcOff,DstOff,Type}|Moves], FPoff, TempReg, Rest) -> 427 Temp = hipe_x86:mk_temp(TempReg, Type), 428 SP = mk_sp(), 429 LoadOff = hipe_x86:mk_imm(FPoff+SrcOff), 430 LD = hipe_x86:mk_move(hipe_x86:mk_mem(SP, LoadOff, Type), Temp), 431 StoreOff = hipe_x86:mk_imm(FPoff+DstOff), 432 ST = hipe_x86:mk_move(Temp, hipe_x86:mk_mem(SP, StoreOff, Type)), 433 simple_moves(Moves, FPoff, TempReg, [LD, ST | Rest]); 434simple_moves([], _, _, Rest) -> 435 Rest. 436 437store_moves([{Src,DstOff}|Moves], FPoff, Rest) -> 438 Type = typeof_src(Src), 439 SP = mk_sp(), 440 StoreOff = hipe_x86:mk_imm(FPoff+DstOff), 441 ST = hipe_x86:mk_move(Src, hipe_x86:mk_mem(SP, StoreOff, Type)), 442 store_moves(Moves, FPoff, [ST | Rest]); 443store_moves([], _, Rest) -> 444 Rest. 445 446%%% 447%%% Contexts 448%%% 449 450-record(context, {liveness, framesize, arity, map, ra, ref_maxstack}). 451 452mk_context(Liveness, Formals, Temps) -> 453 RA = hipe_x86:mk_new_temp('untagged'), 454 {Map, MinOff} = mk_temp_map(Formals, RA, Temps), 455 FrameSize = (-MinOff), 456 RefMaxStack = hipe_bifs:ref(FrameSize), 457 Context = #context{liveness=Liveness, 458 framesize=FrameSize, arity=length(Formals), 459 map=Map, ra=RA, ref_maxstack=RefMaxStack}, 460 Context. 461 462context_need_stack(#context{ref_maxstack=RM}, N) -> 463 M = hipe_bifs:ref_get(RM), 464 if N > M -> hipe_bifs:ref_set(RM, N); 465 true -> [] 466 end. 467 468context_maxstack(#context{ref_maxstack=RM}) -> 469 hipe_bifs:ref_get(RM). 470 471context_arity(#context{arity=Arity}) -> 472 Arity. 473 474context_framesize(#context{framesize=FrameSize}) -> 475 FrameSize. 476 477context_liveness(#context{liveness=Liveness}) -> 478 Liveness. 479 480context_offset(#context{map=Map}, Temp) -> 481 tmap_lookup(Map, Temp). 482 483context_ra(#context{ra=RA}) -> 484 RA. 485 486mk_temp_map(Formals, RA, Temps) -> 487 {Map, _} = enter_vars(Formals, word_size() * (length(Formals)+1), 488 tmap_bind(tmap_empty(), RA, 0)), 489 enter_vars(tset_to_list(Temps), 0, Map). 490 491enter_vars([V|Vs], PrevOff, Map) -> 492 Off = 493 case hipe_x86:temp_type(V) of 494 'double' -> PrevOff - float_size(); 495 _ -> PrevOff - word_size() 496 end, 497 enter_vars(Vs, Off, tmap_bind(Map, V, Off)); 498enter_vars([], Off, Map) -> 499 {Map, Off}. 500 501tmap_empty() -> 502 gb_trees:empty(). 503 504tmap_bind(Map, Key, Val) -> 505 gb_trees:insert(Key, Val, Map). 506 507tmap_lookup(Map, Key) -> 508 gb_trees:get(Key, Map). 509 510%%% 511%%% do_prologue: prepend stack frame allocation code. 512%%% 513%%% NewStart: 514%%% temp0 = sp - MaxStack 515%%% if( temp0 < SP_LIMIT(P) ) goto IncStack else goto AllocFrame 516%%% AllocFrame: 517%%% sp -= FrameSize 518%%% goto OldStart 519%%% OldStart: 520%%% ... 521%%% IncStack: 522%%% call inc_stack 523%%% goto NewStart 524 525do_prologue(CFG, Context) -> 526 do_check_stack(do_alloc_frame(CFG, Context), Context). 527 528do_alloc_frame(CFG, Context) -> 529 case context_framesize(Context) of 530 0 -> 531 CFG; 532 FrameSize -> 533 OldStartLab = hipe_x86_cfg:start_label(CFG), 534 AllocFrameLab = hipe_gensym:get_next_label(x86), 535 SP = mk_sp(), 536 AllocFrameCode = 537 [hipe_x86:mk_alu('sub', hipe_x86:mk_imm(FrameSize), SP), 538 hipe_x86:mk_jmp_label(OldStartLab)], 539 CFG1 = hipe_x86_cfg:bb_add(CFG, AllocFrameLab, 540 hipe_bb:mk_bb(AllocFrameCode)), 541 hipe_x86_cfg:start_label_update(CFG1, AllocFrameLab) 542 end. 543 544do_check_stack(CFG, Context) -> 545 MaxStack = context_maxstack(Context), 546 Arity = context_arity(Context), 547 Guaranteed = erlang:max(0, (?LEAF_WORDS - 1 - Arity) * word_size()), 548 if MaxStack =< Guaranteed -> 549 %% io:format("~w: MaxStack ~w =< Guaranteed ~w :-)\n", [?MODULE,MaxStack,Guaranteed]), 550 CFG; 551 true -> 552 %% io:format("~w: MaxStack ~w > Guaranteed ~w :-(\n", [?MODULE,MaxStack,Guaranteed]), 553 AllocFrameLab = hipe_x86_cfg:start_label(CFG), 554 NewStartLab = hipe_gensym:get_next_label(x86), 555 IncStackLab = hipe_gensym:get_next_label(x86), 556 %% 557 Type = 'untagged', 558 Preg = ?HIPE_X86_REGISTERS:proc_pointer(), 559 Pbase = hipe_x86:mk_temp(Preg, Type), 560 SP_LIMIT_OFF = hipe_x86:mk_imm( 561 ?HIPE_X86_REGISTERS:sp_limit_offset()), 562 Temp0 = mk_temp0(Type), 563 SP = mk_sp(), 564 NewStartCode = 565 %% hopefully this lea is faster than the mov;sub it replaced 566 [hipe_x86:mk_lea( 567 hipe_x86:mk_mem(SP, hipe_x86:mk_imm(-MaxStack), 'untagged'), 568 Temp0), 569 hipe_x86:mk_cmp( 570 hipe_x86:mk_mem(Pbase, SP_LIMIT_OFF, Type), Temp0), 571 hipe_x86:mk_pseudo_jcc('b', IncStackLab, AllocFrameLab, 0.01)], 572 IncStackCode = 573 [hipe_x86:mk_call(hipe_x86:mk_prim('inc_stack_0'), 574 mk_minimal_sdesc(Context), not_remote), 575 hipe_x86:mk_jmp_label(NewStartLab)], 576 %% 577 CFG1 = hipe_x86_cfg:bb_add(CFG, NewStartLab, 578 hipe_bb:mk_bb(NewStartCode)), 579 CFG2 = hipe_x86_cfg:bb_add(CFG1, IncStackLab, 580 hipe_bb:mk_bb(IncStackCode)), 581 hipe_x86_cfg:start_label_update(CFG2, NewStartLab) 582 end. 583 584%%% typeof_src -- what's src's type? 585 586typeof_src(Src) -> 587 case Src of 588 #x86_imm{} -> 589 'untagged'; 590 #x86_temp{} -> 591 hipe_x86:temp_type(Src); 592 #x86_mem{} -> 593 hipe_x86:mem_type(Src) 594 end. 595 596%%% Cons up an '%sp' Temp. 597 598mk_sp() -> 599 hipe_x86:mk_temp(?HIPE_X86_REGISTERS:sp(), 'untagged'). 600 601%%% Cons up a '%temp0' Temp. 602 603mk_temp0(Type) -> 604 hipe_x86:mk_temp(?HIPE_X86_REGISTERS:temp0(), Type). 605 606%%% Cons up a '%temp1' Temp. 607 608mk_temp1(Type) -> 609 hipe_x86:mk_temp(?HIPE_X86_REGISTERS:temp1(), Type). 610 611%%% Check if an operand is a pseudo-Temp. 612 613src_is_pseudo(Src) -> 614 opnd_is_pseudo(Src). 615 616opnd_is_pseudo(Opnd) -> 617 case hipe_x86:is_temp(Opnd) of 618 true -> temp_is_pseudo(Opnd); 619 false -> false 620 end. 621 622temp_is_pseudo(Temp) -> 623 case hipe_x86:is_temp(Temp) of 624 true -> 625 not(?HIPE_X86_REGISTERS:is_precoloured(hipe_x86:temp_reg(Temp))); 626 false -> 627 false 628 end. 629 630 631%%% 632%%% Build the set of all temps used in a Defun's body. 633%%% 634 635all_temps(CFG, Formals) -> 636 S0 = fold_insns(fun find_temps/2, tset_empty(), CFG), 637 S1 = tset_del_list(S0, Formals), 638 S2 = tset_filter(S1, fun(T) -> temp_is_pseudo(T) end), 639 S2. 640 641find_temps(I, S0) -> 642 S1 = tset_add_list(S0, hipe_x86_defuse:insn_def(I)), 643 tset_add_list(S1, hipe_x86_defuse:insn_use(I)). 644 645fold_insns(Fun, InitAcc, CFG) -> 646 hipe_x86_cfg:fold_bbs( 647 fun(_, BB, Acc0) -> lists:foldl(Fun, Acc0, hipe_bb:code(BB)) end, 648 InitAcc, CFG). 649 650-compile({inline, [tset_empty/0, tset_size/1, tset_insert/2, 651 tset_filter/2, tset_to_list/1]}). 652 653tset_empty() -> 654 #{}. 655 656tset_size(S) -> 657 map_size(S). 658 659tset_insert(S, T) -> 660 S#{T => []}. 661 662tset_add_list(S, []) -> S; 663tset_add_list(S, [T|Ts]) -> 664 tset_add_list(S#{T => []}, Ts). 665 666tset_del_list(S, []) -> S; 667tset_del_list(S, [T|Ts]) -> 668 tset_del_list(maps:remove(T,S), Ts). 669 670tset_filter(S, F) -> 671 maps:filter(fun(K, _V) -> F(K) end, S). 672 673tset_to_list(S) -> 674 maps:keys(S). 675 676%%% 677%%% Compute minimum permissible frame size, ignoring spilled temps. 678%%% This is done to ensure that we won't have to adjust the frame size 679%%% in the middle of a tailcall. 680%%% 681 682defun_minframe(CFG) -> 683 MaxTailArity = fold_insns(fun insn_mta/2, 0, CFG), 684 MyArity = length(fix_formals(hipe_x86_cfg:params(CFG))), 685 erlang:max(MaxTailArity - MyArity, 0). 686 687insn_mta(I, MTA) -> 688 case I of 689 #pseudo_tailcall{arity=Arity} -> 690 erlang:max(MTA, Arity - ?HIPE_X86_REGISTERS:nr_args()); 691 _ -> MTA 692 end. 693 694%%% 695%%% Ensure that we have enough temps to satisfy the minimum frame size, 696%%% if necessary by prepending unused dummy temps. 697%%% 698 699ensure_minframe(MinFrame, Temps) -> 700 ensure_minframe(MinFrame, tset_size(Temps), Temps). 701 702ensure_minframe(MinFrame, Frame, Temps) -> 703 if MinFrame > Frame -> 704 Temp = hipe_x86:mk_new_temp('untagged'), 705 ensure_minframe(MinFrame, Frame+1, tset_insert(Temps, Temp)); 706 true -> Temps 707 end. 708 709word_size() -> 710 ?HIPE_X86_REGISTERS:wordsize(). 711 712float_size() -> 713 ?HIPE_X86_REGISTERS:float_size(). 714