1%%% -*- erlang-indent-level: 2 -*-
2%%%
3%%% Licensed under the Apache License, Version 2.0 (the "License");
4%%% you may not use this file except in compliance with the License.
5%%% You may obtain a copy of the License at
6%%%
7%%%     http://www.apache.org/licenses/LICENSE-2.0
8%%%
9%%% Unless required by applicable law or agreed to in writing, software
10%%% distributed under the License is distributed on an "AS IS" BASIS,
11%%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12%%% See the License for the specific language governing permissions and
13%%% limitations under the License.
14%%%
15%%% x86 stack frame handling
16%%%
17%%% - map non-register temps to stack slots
18%%% - add explicit stack management code to prologue and epilogue,
19%%%   and at calls and tailcalls
20%%%
21%%% TODO:
22%%% - Compute max stack in a pre-pass? (get rid of ref cell updates)
23%%% - Merge all_temps and defun_minframe to a single
24%%%   pass, for compile-time efficiency reasons.
25
26-ifdef(HIPE_AMD64).
27-define(HIPE_X86_FRAME,     hipe_amd64_frame).
28-define(HIPE_X86_REGISTERS, hipe_amd64_registers).
29-define(HIPE_X86_LIVENESS,  hipe_amd64_liveness).
30-define(LEAF_WORDS,	    ?AMD64_LEAF_WORDS).
31-else.
32-define(HIPE_X86_FRAME,     hipe_x86_frame).
33-define(HIPE_X86_REGISTERS, hipe_x86_registers).
34-define(HIPE_X86_LIVENESS,  hipe_x86_liveness).
35-define(LEAF_WORDS,	    ?X86_LEAF_WORDS).
36-endif.
37
38-module(?HIPE_X86_FRAME).
39-export([frame/2]).
40-include("../x86/hipe_x86.hrl").
41-include("../rtl/hipe_literals.hrl").
42
43frame(CFG0, _Options) ->
44  Formals = fix_formals(hipe_x86_cfg:params(CFG0)),
45  Temps0 = all_temps(CFG0, Formals),
46  MinFrame = defun_minframe(CFG0),
47  Temps = ensure_minframe(MinFrame, Temps0),
48  Liveness = ?HIPE_X86_LIVENESS:analyse(CFG0),
49  do_body(CFG0, Liveness, Formals, Temps).
50
51fix_formals(Formals) ->
52  fix_formals(?HIPE_X86_REGISTERS:nr_args(), Formals).
53
54fix_formals(0, Rest) -> Rest;
55fix_formals(N, [_|Rest]) -> fix_formals(N-1, Rest);
56fix_formals(_, []) -> [].
57
58do_body(CFG0, Liveness, Formals, Temps) ->
59  Context = mk_context(Liveness, Formals, Temps),
60  CFG1 = do_blocks(CFG0, Context),
61  do_prologue(CFG1, Context).
62
63do_blocks(CFG, Context) ->
64  hipe_x86_cfg:map_bbs(fun(Lbl, BB) -> do_block(Lbl, BB, Context) end, CFG).
65
66do_block(Label, Block, Context) ->
67  Liveness = context_liveness(Context),
68  LiveOut = ?HIPE_X86_LIVENESS:liveout(Liveness, Label),
69  Code = hipe_bb:code(Block),
70  NewCode = do_block(Code, LiveOut, Context, context_framesize(Context), []),
71  hipe_bb:code_update(Block, NewCode).
72
73do_block([I|Insns], LiveOut, Context, FPoff0, RevCode) ->
74  {NewIs, FPoff1} = do_insn(I, LiveOut, Context, FPoff0),
75  do_block(Insns, LiveOut, Context, FPoff1, lists:reverse(NewIs, RevCode));
76do_block([], _, Context, FPoff, RevCode) ->
77  FPoff0 = context_framesize(Context),
78  if FPoff =:= FPoff0 -> [];
79     true -> exit({?MODULE,do_block,FPoff})
80  end,
81  lists:reverse(RevCode, []).
82
83do_insn(I, LiveOut, Context, FPoff) ->
84  case I of
85    #alu{} ->
86      {[do_alu(I, Context, FPoff)], FPoff};
87    #cmp{} ->
88      {[do_cmp(I, Context, FPoff)], FPoff};
89    #fp_unop{} ->
90      {do_fp_unop(I, Context, FPoff), FPoff};
91    #fp_binop{} ->
92      {do_fp_binop(I, Context, FPoff), FPoff};
93    #fmove{} ->
94      {[do_fmove(I, Context, FPoff)], FPoff};
95    #imul{} ->
96      {[do_imul(I, Context, FPoff)], FPoff};
97    #move{} ->
98      {do_move(I, Context, FPoff), FPoff};
99    #movsx{} ->
100      {[do_movsx(I, Context, FPoff)], FPoff};
101    #movzx{} ->
102      {[do_movzx(I, Context, FPoff)], FPoff};
103    #pseudo_call{} ->
104      do_pseudo_call(I, LiveOut, Context, FPoff);
105    #pseudo_spill_fmove{} ->
106      {do_pseudo_spill_fmove(I, Context, FPoff), FPoff};
107    #pseudo_spill_move{} ->
108      {do_pseudo_spill_move(I, Context, FPoff), FPoff};
109    #pseudo_tailcall{} ->
110      {do_pseudo_tailcall(I, Context), context_framesize(Context)};
111    #push{} ->
112      {[do_push(I, Context, FPoff)], FPoff+word_size()};
113    #ret{} ->
114      {do_ret(I, Context, FPoff), context_framesize(Context)};
115    #shift{} ->
116      {[do_shift(I, Context, FPoff)], FPoff};
117    #test{} ->
118      {[do_test(I, Context, FPoff)], FPoff};
119    _ ->	% comment, jmp, label, pseudo_jcc, pseudo_tailcall_prepare
120      {[I], FPoff}
121  end.
122
123%%%
124%%% Convert any pseudo-temp operand in a binary (alu, cmp, move)
125%%% or unary (push) instruction to an explicit x86_mem operand.
126%%%
127
128do_alu(I, Context, FPoff) ->
129  #alu{src=Src0,dst=Dst0} = I,
130  Src = conv_opnd(Src0, FPoff, Context),
131  Dst = conv_opnd(Dst0, FPoff, Context),
132  I#alu{src=Src,dst=Dst}.
133
134do_cmp(I, Context, FPoff) ->
135  #cmp{src=Src0,dst=Dst0} = I,
136  Src = conv_opnd(Src0, FPoff, Context),
137  Dst = conv_opnd(Dst0, FPoff, Context),
138  I#cmp{src=Src,dst=Dst}.
139
140do_fp_unop(I, Context, FPoff) ->
141  #fp_unop{arg=Arg0} = I,
142  Arg = conv_opnd(Arg0, FPoff, Context),
143  [I#fp_unop{arg=Arg}].
144
145do_fp_binop(I, Context, FPoff) ->
146  #fp_binop{src=Src0,dst=Dst0} = I,
147  Src = conv_opnd(Src0, FPoff, Context),
148  Dst = conv_opnd(Dst0, FPoff, Context),
149  [I#fp_binop{src=Src,dst=Dst}].
150
151do_fmove(I0, Context, FPoff) ->
152  #fmove{src=Src0,dst=Dst0} = I0,
153  Src = conv_opnd(Src0, FPoff, Context),
154  Dst = conv_opnd(Dst0, FPoff, Context),
155  I = I0#fmove{src=Src,dst=Dst},
156  case Src =:= Dst of
157    true -> []; % omit move-to-self
158    false -> [I]
159  end.
160
161do_pseudo_spill_fmove(I0, Context, FPoff) ->
162  #pseudo_spill_fmove{src=Src0,temp=Temp0,dst=Dst0} = I0,
163  Src = conv_opnd(Src0, FPoff, Context),
164  Temp = conv_opnd(Temp0, FPoff, Context),
165  Dst = conv_opnd(Dst0, FPoff, Context),
166  case Src =:= Dst of
167    true -> []; % omit move-to-self
168    false -> [#fmove{src=Src, dst=Temp}, #fmove{src=Temp, dst=Dst}]
169  end.
170
171do_imul(I, Context, FPoff) ->
172  #imul{src=Src0} = I,
173  Src = conv_opnd(Src0, FPoff, Context),
174  I#imul{src=Src}.
175
176do_move(I0, Context, FPoff) ->
177  #move{src=Src0,dst=Dst0} = I0,
178  Src = conv_opnd(Src0, FPoff, Context),
179  Dst = conv_opnd(Dst0, FPoff, Context),
180  I = I0#move{src=Src,dst=Dst},
181  case Src =:= Dst of
182    true -> []; % omit move-to-self
183    false -> [I]
184  end.
185
186do_pseudo_spill_move(I0, Context, FPoff) ->
187  #pseudo_spill_move{src=Src0,temp=Temp0,dst=Dst0} = I0,
188  Src = conv_opnd(Src0, FPoff, Context),
189  Temp = conv_opnd(Temp0, FPoff, Context),
190  Dst = conv_opnd(Dst0, FPoff, Context),
191  case Src =:= Dst of
192    true -> []; % omit move-to-self
193    false -> [#move{src=Src, dst=Temp}, #move{src=Temp, dst=Dst}]
194  end.
195
196do_movsx(I, Context, FPoff) ->
197  #movsx{src=Src0,dst=Dst0} = I,
198  Src = conv_opnd(Src0, FPoff, Context),
199  Dst = conv_opnd(Dst0, FPoff, Context),
200  I#movsx{src=Src,dst=Dst}.
201
202do_movzx(I, Context, FPoff) ->
203  #movzx{src=Src0,dst=Dst0} = I,
204  Src = conv_opnd(Src0, FPoff, Context),
205  Dst = conv_opnd(Dst0, FPoff, Context),
206  I#movzx{src=Src,dst=Dst}.
207
208do_push(I, Context, FPoff) ->
209  #push{src=Src0} = I,
210  Src = conv_opnd(Src0, FPoff, Context),
211  I#push{src=Src}.
212
213do_shift(I, Context, FPoff) ->
214  #shift{src=Src0,dst=Dst0} = I,
215  Src = conv_opnd(Src0, FPoff, Context),
216  Dst = conv_opnd(Dst0, FPoff, Context),
217  I#shift{src=Src,dst=Dst}.
218
219do_test(I, Context, FPoff) ->
220  #test{src=Src0,dst=Dst0} = I,
221  Src = conv_opnd(Src0, FPoff, Context),
222  Dst = conv_opnd(Dst0, FPoff, Context),
223  I#test{src=Src,dst=Dst}.
224
225conv_opnd(Opnd, FPoff, Context) ->
226  case opnd_is_pseudo(Opnd) of
227    false ->
228      Opnd;
229    true ->
230      conv_pseudo(Opnd, FPoff, Context)
231  end.
232
233conv_pseudo(Temp, FPoff, Context) ->
234  Off = FPoff + context_offset(Context, Temp),
235  conv_pseudo(Temp, Off).
236
237conv_pseudo(Temp, Off) ->
238  hipe_x86:mk_mem(mk_sp(), hipe_x86:mk_imm(Off), hipe_x86:temp_type(Temp)).
239
240%%%
241%%% Return - deallocate frame and emit 'ret $N' insn.
242%%%
243
244do_ret(_I, Context, FPoff) ->
245  %% XXX: this conses up a new ret insn, ignoring the one rtl->x86 made
246  adjust_sp(FPoff, [hipe_x86:mk_ret(word_size()*context_arity(Context))]).
247
248adjust_sp(N, Rest) ->
249  if N =:= 0 ->
250      Rest;
251     true ->
252      [hipe_x86:mk_alu('add', hipe_x86:mk_imm(N), mk_sp()) | Rest]
253  end.
254
255%%%
256%%% Recursive calls.
257%%%
258
259do_pseudo_call(I, LiveOut, Context, FPoff0) ->
260  #x86_sdesc{exnlab=ExnLab,arity=OrigArity} = hipe_x86:pseudo_call_sdesc(I),
261  Fun0 = hipe_x86:pseudo_call_fun(I),
262  Fun1 = conv_opnd(Fun0, FPoff0, Context),
263  LiveTemps = [Temp || Temp <- LiveOut, temp_is_pseudo(Temp)],
264  SDesc = mk_sdesc(ExnLab, Context, LiveTemps),
265  ContLab = hipe_x86:pseudo_call_contlab(I),
266  Linkage = hipe_x86:pseudo_call_linkage(I),
267  CallCode = [hipe_x86:mk_pseudo_call(Fun1, SDesc, ContLab, Linkage)],
268  %% +word_size() for our RA and +word_size() for callee's RA should
269  %% it need to call inc_stack
270  StkArity = erlang:max(0, OrigArity - ?HIPE_X86_REGISTERS:nr_args()),
271  context_need_stack(Context, stack_need(FPoff0 + 2*word_size(), StkArity, Fun1)),
272  ArgsBytes = word_size() * StkArity,
273  {CallCode, FPoff0 - ArgsBytes}.
274
275stack_need(FPoff, StkArity, Fun) ->
276  case Fun of
277    #x86_prim{} -> FPoff;
278    #x86_mfa{m=M,f=F,a=A} ->
279      case erlang:is_builtin(M, F, A) of
280	true -> FPoff;
281	false -> stack_need_general(FPoff, StkArity)
282      end;
283    #x86_temp{} -> stack_need_general(FPoff, StkArity);
284    #x86_mem{} -> stack_need_general(FPoff, StkArity)
285  end.
286
287stack_need_general(FPoff, StkArity) ->
288  erlang:max(FPoff, FPoff + (?LEAF_WORDS - 2 - StkArity) * word_size()).
289
290%%%
291%%% Create stack descriptors for call sites.
292%%%
293
294mk_sdesc(ExnLab, Context, Temps) ->	% for normal calls
295  Temps0 = only_tagged(Temps),
296  Live = mk_live(Context, Temps0),
297  Arity = context_arity(Context),
298  FSize = context_framesize(Context),
299  hipe_x86:mk_sdesc(ExnLab, FSize div word_size(), Arity,
300                    list_to_tuple(Live)).
301
302only_tagged(Temps)->
303  [X || X <- Temps, hipe_x86:temp_type(X) =:= 'tagged'].
304
305mk_live(Context, Temps) ->
306  lists:sort([temp_to_slot(Context, Temp) || Temp <- Temps]).
307
308temp_to_slot(Context, Temp) ->
309  (context_framesize(Context) + context_offset(Context, Temp))
310    div word_size().
311
312mk_minimal_sdesc(Context) ->		% for inc_stack_0 calls
313  hipe_x86:mk_sdesc([], 0, context_arity(Context), {}).
314
315%%%
316%%% Tailcalls.
317%%%
318
319do_pseudo_tailcall(I, Context) ->	% always at FPoff=context_framesize(Context)
320  Arity = context_arity(Context),
321  Args = hipe_x86:pseudo_tailcall_stkargs(I) ++ [context_ra(Context)],
322  Fun0 = hipe_x86:pseudo_tailcall_fun(I),
323  {Insns, FPoff1, Fun1} = do_tailcall_args(Args, Context, Fun0),
324  context_need_stack(Context, FPoff1),
325  FPoff2 = FPoff1 + word_size()+word_size()*Arity - word_size()*length(Args),
326  %% +word_size() for callee's inc_stack RA
327  StkArity = length(hipe_x86:pseudo_tailcall_stkargs(I)),
328  context_need_stack(Context, stack_need(FPoff2 + word_size(), StkArity, Fun1)),
329  I2 = hipe_x86:mk_jmp_fun(Fun1, hipe_x86:pseudo_tailcall_linkage(I)),
330  Insns ++ adjust_sp(FPoff2, [I2]).
331
332do_tailcall_args(Args, Context, Fun0) ->
333  FPoff0 = context_framesize(Context),
334  Arity = context_arity(Context),
335  FrameTop = word_size() + word_size()*Arity,
336  DangerOff = FrameTop - word_size()*length(Args),
337  Moves = mk_moves(Args, FrameTop, []),
338  {Stores, Simple, Conflict} =
339    split_moves(Moves, Context, DangerOff, [], [], []),
340  %% sanity check (shouldn't trigger any more)
341  if DangerOff < -FPoff0 ->
342      exit({?MODULE,do_tailcall_args,DangerOff,-FPoff0});
343     true -> []
344  end,
345  FPoff1 = FPoff0,
346  %%
347  {Pushes, MoreSimple, FPoff2} = split_conflict(Conflict, FPoff1, [], []),
348  %%
349  {PushFun0, FPoff3, LoadFun1, Fun1} =
350    case opnd_is_pseudo(Fun0) of
351      false ->
352	{[], FPoff2, [], Fun0};
353      true ->
354	Type = hipe_x86:temp_type(Fun0),
355	Temp1 = mk_temp1(Type),
356	Fun0Off = context_offset(Context, Fun0),
357	MEM0 = conv_pseudo(Fun0, FPoff2 + Fun0Off),
358	if Fun0Off >= DangerOff ->
359	    Fun1Off = hipe_x86:mk_imm(0),
360	    MEM1 = hipe_x86:mk_mem(mk_sp(), Fun1Off, Type),
361	    {[hipe_x86:mk_push(MEM0)],
362	     FPoff2 + word_size(),
363	     [hipe_x86:mk_move(MEM1, Temp1)],
364	     Temp1};
365	   true ->
366	    {[], FPoff2, [hipe_x86:mk_move(MEM0, Temp1)], Temp1}
367	end
368    end,
369  %%
370  RegTemp0 = ?HIPE_X86_REGISTERS:temp0(),
371  TempReg =
372    case hipe_x86:is_temp(Fun1) of
373      true ->
374	RegFun1 = hipe_x86:temp_reg(Fun1),
375	if RegFun1 =/= RegTemp0 -> RegTemp0;
376	   true -> ?HIPE_X86_REGISTERS:temp1()
377	end;
378      false ->
379	RegTemp0
380    end,
381  %%
382  {Pushes ++ PushFun0 ++
383   store_moves(Stores, FPoff3, LoadFun1 ++
384	       simple_moves(Simple, FPoff3, TempReg,
385			    simple_moves(MoreSimple, FPoff3, TempReg,
386					 []))),
387   FPoff3, Fun1}.
388
389mk_moves([Arg|Args], Off, Moves) ->
390  Off1 = Off - word_size(),
391  mk_moves(Args, Off1, [{Arg,Off1}|Moves]);
392mk_moves([], _, Moves) ->
393  Moves.
394
395split_moves([Move|Moves], Context, DangerOff, Stores, Simple, Conflict) ->
396  {Src,DstOff} = Move,
397  case src_is_pseudo(Src) of
398    false ->
399      split_moves(Moves, Context, DangerOff, [Move|Stores],
400		  Simple, Conflict);
401    true ->
402      SrcOff = context_offset(Context, Src),
403      Type = typeof_src(Src),
404      if SrcOff =:= DstOff ->
405	  split_moves(Moves, Context, DangerOff, Stores,
406		      Simple, Conflict);
407	 SrcOff >= DangerOff ->
408	  split_moves(Moves, Context, DangerOff, Stores,
409		      Simple, [{SrcOff,DstOff,Type}|Conflict]);
410	 true ->
411	  split_moves(Moves, Context, DangerOff, Stores,
412		      [{SrcOff,DstOff,Type}|Simple], Conflict)
413      end
414  end;
415split_moves([], _, _, Stores, Simple, Conflict) ->
416  {Stores, Simple, Conflict}.
417
418split_conflict([{SrcOff,DstOff,Type}|Conflict], FPoff, Pushes, Simple) ->
419  Push = hipe_x86:mk_push(
420           hipe_x86:mk_mem(mk_sp(), hipe_x86:mk_imm(FPoff+SrcOff), Type)),
421  split_conflict(Conflict, FPoff+word_size(), [Push|Pushes],
422                 [{-(FPoff+word_size()),DstOff,Type}|Simple]);
423split_conflict([], FPoff, Pushes, Simple) ->
424  {lists:reverse(Pushes), Simple, FPoff}.
425
426simple_moves([{SrcOff,DstOff,Type}|Moves], FPoff, TempReg, Rest) ->
427  Temp = hipe_x86:mk_temp(TempReg, Type),
428  SP = mk_sp(),
429  LoadOff = hipe_x86:mk_imm(FPoff+SrcOff),
430  LD = hipe_x86:mk_move(hipe_x86:mk_mem(SP, LoadOff, Type), Temp),
431  StoreOff = hipe_x86:mk_imm(FPoff+DstOff),
432  ST = hipe_x86:mk_move(Temp, hipe_x86:mk_mem(SP, StoreOff, Type)),
433  simple_moves(Moves, FPoff, TempReg, [LD, ST | Rest]);
434simple_moves([], _, _, Rest) ->
435  Rest.
436
437store_moves([{Src,DstOff}|Moves], FPoff, Rest) ->
438  Type = typeof_src(Src),
439  SP = mk_sp(),
440  StoreOff = hipe_x86:mk_imm(FPoff+DstOff),
441  ST = hipe_x86:mk_move(Src, hipe_x86:mk_mem(SP, StoreOff, Type)),
442  store_moves(Moves, FPoff, [ST | Rest]);
443store_moves([], _, Rest) ->
444  Rest.
445
446%%%
447%%% Contexts
448%%%
449
450-record(context, {liveness, framesize, arity, map, ra, ref_maxstack}).
451
452mk_context(Liveness, Formals, Temps) ->
453  RA = hipe_x86:mk_new_temp('untagged'),
454  {Map, MinOff}  = mk_temp_map(Formals, RA, Temps),
455  FrameSize = (-MinOff),
456  RefMaxStack = hipe_bifs:ref(FrameSize),
457  Context = #context{liveness=Liveness,
458		     framesize=FrameSize, arity=length(Formals),
459		     map=Map, ra=RA, ref_maxstack=RefMaxStack},
460  Context.
461
462context_need_stack(#context{ref_maxstack=RM}, N) ->
463  M = hipe_bifs:ref_get(RM),
464  if N > M -> hipe_bifs:ref_set(RM, N);
465     true -> []
466  end.
467
468context_maxstack(#context{ref_maxstack=RM}) ->
469  hipe_bifs:ref_get(RM).
470
471context_arity(#context{arity=Arity}) ->
472  Arity.
473
474context_framesize(#context{framesize=FrameSize}) ->
475  FrameSize.
476
477context_liveness(#context{liveness=Liveness}) ->
478  Liveness.
479
480context_offset(#context{map=Map}, Temp) ->
481  tmap_lookup(Map, Temp).
482
483context_ra(#context{ra=RA}) ->
484  RA.
485
486mk_temp_map(Formals, RA, Temps) ->
487  {Map, _} = enter_vars(Formals, word_size() * (length(Formals)+1),
488			tmap_bind(tmap_empty(), RA, 0)),
489  enter_vars(tset_to_list(Temps), 0, Map).
490
491enter_vars([V|Vs], PrevOff, Map) ->
492  Off =
493    case hipe_x86:temp_type(V) of
494      'double' -> PrevOff - float_size();
495      _ -> PrevOff - word_size()
496    end,
497  enter_vars(Vs, Off, tmap_bind(Map, V, Off));
498enter_vars([], Off, Map) ->
499  {Map, Off}.
500
501tmap_empty() ->
502  gb_trees:empty().
503
504tmap_bind(Map, Key, Val) ->
505  gb_trees:insert(Key, Val, Map).
506
507tmap_lookup(Map, Key) ->
508  gb_trees:get(Key, Map).
509
510%%%
511%%% do_prologue: prepend stack frame allocation code.
512%%%
513%%% NewStart:
514%%%	temp0 = sp - MaxStack
515%%%	if( temp0 < SP_LIMIT(P) ) goto IncStack else goto AllocFrame
516%%% AllocFrame:
517%%%	sp -= FrameSize
518%%%	goto OldStart
519%%% OldStart:
520%%%	...
521%%% IncStack:
522%%%	call inc_stack
523%%%	goto NewStart
524
525do_prologue(CFG, Context) ->
526  do_check_stack(do_alloc_frame(CFG, Context), Context).
527
528do_alloc_frame(CFG, Context) ->
529  case context_framesize(Context) of
530    0 ->
531      CFG;
532    FrameSize ->
533      OldStartLab = hipe_x86_cfg:start_label(CFG),
534      AllocFrameLab = hipe_gensym:get_next_label(x86),
535      SP = mk_sp(),
536      AllocFrameCode =
537	[hipe_x86:mk_alu('sub', hipe_x86:mk_imm(FrameSize), SP),
538	 hipe_x86:mk_jmp_label(OldStartLab)],
539      CFG1 = hipe_x86_cfg:bb_add(CFG, AllocFrameLab,
540				 hipe_bb:mk_bb(AllocFrameCode)),
541      hipe_x86_cfg:start_label_update(CFG1, AllocFrameLab)
542  end.
543
544do_check_stack(CFG, Context) ->
545  MaxStack = context_maxstack(Context),
546  Arity = context_arity(Context),
547  Guaranteed = erlang:max(0, (?LEAF_WORDS - 1 - Arity) * word_size()),
548  if MaxStack =< Guaranteed ->
549      %% io:format("~w: MaxStack ~w =< Guaranteed ~w :-)\n", [?MODULE,MaxStack,Guaranteed]),
550      CFG;
551     true ->
552      %% io:format("~w: MaxStack ~w > Guaranteed ~w :-(\n", [?MODULE,MaxStack,Guaranteed]),
553      AllocFrameLab = hipe_x86_cfg:start_label(CFG),
554      NewStartLab = hipe_gensym:get_next_label(x86),
555      IncStackLab = hipe_gensym:get_next_label(x86),
556      %%
557      Type = 'untagged',
558      Preg = ?HIPE_X86_REGISTERS:proc_pointer(),
559      Pbase = hipe_x86:mk_temp(Preg, Type),
560      SP_LIMIT_OFF = hipe_x86:mk_imm(
561                        ?HIPE_X86_REGISTERS:sp_limit_offset()),
562      Temp0 = mk_temp0(Type),
563      SP = mk_sp(),
564      NewStartCode =
565	%% hopefully this lea is faster than the mov;sub it replaced
566	[hipe_x86:mk_lea(
567           hipe_x86:mk_mem(SP, hipe_x86:mk_imm(-MaxStack), 'untagged'),
568           Temp0),
569	 hipe_x86:mk_cmp(
570           hipe_x86:mk_mem(Pbase, SP_LIMIT_OFF, Type), Temp0),
571	 hipe_x86:mk_pseudo_jcc('b', IncStackLab, AllocFrameLab, 0.01)],
572      IncStackCode =
573	[hipe_x86:mk_call(hipe_x86:mk_prim('inc_stack_0'),
574			  mk_minimal_sdesc(Context), not_remote),
575	 hipe_x86:mk_jmp_label(NewStartLab)],
576      %%
577      CFG1 = hipe_x86_cfg:bb_add(CFG, NewStartLab,
578                                 hipe_bb:mk_bb(NewStartCode)),
579      CFG2 = hipe_x86_cfg:bb_add(CFG1, IncStackLab,
580				 hipe_bb:mk_bb(IncStackCode)),
581      hipe_x86_cfg:start_label_update(CFG2, NewStartLab)
582  end.
583
584%%% typeof_src -- what's src's type?
585
586typeof_src(Src) ->
587  case Src of
588    #x86_imm{} ->
589      'untagged';
590    #x86_temp{} ->
591      hipe_x86:temp_type(Src);
592    #x86_mem{} ->
593      hipe_x86:mem_type(Src)
594  end.
595
596%%% Cons up an '%sp' Temp.
597
598mk_sp() ->
599  hipe_x86:mk_temp(?HIPE_X86_REGISTERS:sp(), 'untagged').
600
601%%% Cons up a '%temp0' Temp.
602
603mk_temp0(Type) ->
604  hipe_x86:mk_temp(?HIPE_X86_REGISTERS:temp0(), Type).
605
606%%% Cons up a '%temp1' Temp.
607
608mk_temp1(Type) ->
609  hipe_x86:mk_temp(?HIPE_X86_REGISTERS:temp1(), Type).
610
611%%% Check if an operand is a pseudo-Temp.
612
613src_is_pseudo(Src) ->
614  opnd_is_pseudo(Src).
615
616opnd_is_pseudo(Opnd) ->
617  case hipe_x86:is_temp(Opnd) of
618    true -> temp_is_pseudo(Opnd);
619    false -> false
620  end.
621
622temp_is_pseudo(Temp) ->
623  case hipe_x86:is_temp(Temp) of
624    true ->
625      not(?HIPE_X86_REGISTERS:is_precoloured(hipe_x86:temp_reg(Temp)));
626    false ->
627      false
628  end.
629
630
631%%%
632%%% Build the set of all temps used in a Defun's body.
633%%%
634
635all_temps(CFG, Formals) ->
636  S0 = fold_insns(fun find_temps/2, tset_empty(), CFG),
637  S1 = tset_del_list(S0, Formals),
638  S2 = tset_filter(S1, fun(T) -> temp_is_pseudo(T) end),
639  S2.
640
641find_temps(I, S0) ->
642  S1 = tset_add_list(S0, hipe_x86_defuse:insn_def(I)),
643  tset_add_list(S1, hipe_x86_defuse:insn_use(I)).
644
645fold_insns(Fun, InitAcc, CFG) ->
646  hipe_x86_cfg:fold_bbs(
647    fun(_, BB, Acc0) -> lists:foldl(Fun, Acc0, hipe_bb:code(BB)) end,
648    InitAcc, CFG).
649
650-compile({inline, [tset_empty/0, tset_size/1, tset_insert/2,
651		   tset_filter/2, tset_to_list/1]}).
652
653tset_empty() ->
654  #{}.
655
656tset_size(S) ->
657  map_size(S).
658
659tset_insert(S, T) ->
660  S#{T => []}.
661
662tset_add_list(S, []) -> S;
663tset_add_list(S, [T|Ts]) ->
664  tset_add_list(S#{T => []}, Ts).
665
666tset_del_list(S, []) -> S;
667tset_del_list(S, [T|Ts]) ->
668  tset_del_list(maps:remove(T,S), Ts).
669
670tset_filter(S, F) ->
671  maps:filter(fun(K, _V) -> F(K) end, S).
672
673tset_to_list(S) ->
674  maps:keys(S).
675
676%%%
677%%% Compute minimum permissible frame size, ignoring spilled temps.
678%%% This is done to ensure that we won't have to adjust the frame size
679%%% in the middle of a tailcall.
680%%%
681
682defun_minframe(CFG) ->
683  MaxTailArity = fold_insns(fun insn_mta/2, 0, CFG),
684  MyArity = length(fix_formals(hipe_x86_cfg:params(CFG))),
685  erlang:max(MaxTailArity - MyArity, 0).
686
687insn_mta(I, MTA) ->
688  case I of
689    #pseudo_tailcall{arity=Arity} ->
690      erlang:max(MTA, Arity - ?HIPE_X86_REGISTERS:nr_args());
691    _ -> MTA
692  end.
693
694%%%
695%%% Ensure that we have enough temps to satisfy the minimum frame size,
696%%% if necessary by prepending unused dummy temps.
697%%%
698
699ensure_minframe(MinFrame, Temps) ->
700  ensure_minframe(MinFrame, tset_size(Temps), Temps).
701
702ensure_minframe(MinFrame, Frame, Temps) ->
703  if MinFrame > Frame ->
704      Temp = hipe_x86:mk_new_temp('untagged'),
705      ensure_minframe(MinFrame, Frame+1, tset_insert(Temps, Temp));
706     true -> Temps
707  end.
708
709word_size() ->
710  ?HIPE_X86_REGISTERS:wordsize().
711
712float_size() ->
713  ?HIPE_X86_REGISTERS:float_size().
714