1%%
2%% %CopyrightBegin%
3%%
4%% Copyright Ericsson AB 2018-2020. All Rights Reserved.
5%%
6%% Licensed under the Apache License, Version 2.0 (the "License");
7%% you may not use this file except in compliance with the License.
8%% You may obtain a copy of the License at
9%%
10%%     http://www.apache.org/licenses/LICENSE-2.0
11%%
12%% Unless required by applicable law or agreed to in writing, software
13%% distributed under the License is distributed on an "AS IS" BASIS,
14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15%% See the License for the specific language governing permissions and
16%% limitations under the License.
17%%
18%% %CopyrightEnd%
19%%
20%% Purpose: Prepare for code generation, including register allocation.
21%%
22%% The output of this compiler pass is still in the SSA format, but
23%% it has been annotated and transformed to help the code generator.
24%%
25%% * Some instructions are translated to other instructions closer to
26%% the BEAM instructions. For example, the binary matching
27%% instructions are transformed from the optimization-friendly
28%% internal format to instruction more similar to the actual BEAM
29%% instructions.
30%%
31%% * Blocks that will need an instruction for allocating a stack frame
32%% are annotated with a {frame_size,Size} annotation.
33%%
34%% * 'copy' instructions are added for all variables that need
35%% to be saved to the stack frame. Additional 'copy' instructions
36%% can be added as an optimization to reuse y registers (see
37%% the copy_retval sub pass).
38%%
39%% * Each function is annotated with a {register,RegisterMap}
40%% annotation that maps each variable to a BEAM register. The linear
41%% scan algorithm is used to allocate registers.
42%%
43%% There are four kind of registers. x, y, fr (floating point register),
44%% and z. A variable will be allocated to a z register if it is only
45%% used by the instruction following the instruction that defines the
46%% the variable. The code generator will typically combine those
47%% instructions to a test instruction. z registers are also used for
48%% some instructions that don't have a return value.
49%%
50%% References:
51%%
52%% [1] H. Mössenböck and M. Pfeiffer. Linear scan register allocation
53%% in the context of SSA form and register constraints. In Proceedings
54%% of the International Conference on Compiler Construction, pages
55%% 229–246. LNCS 2304, Springer-Verlag, 2002.
56%%
57%% [2] C. Wimmer and H. Mössenböck. Optimized interval splitting in a
58%% linear scan register allocator. In Proceedings of the ACM/USENIX
59%% International Conference on Virtual Execution Environments, pages
60%% 132–141. ACM Press, 2005.
61%%
62%% [3] C. Wimmer and M. Franz. Linear Scan Register Allocation on SSA
63%% Form. In Proceedings of the International Symposium on Code
64%% Generation and Optimization, pages 170-179. ACM Press, 2010.
65%%
66
67-module(beam_ssa_pre_codegen).
68
69-export([module/2]).
70
71-include("beam_ssa.hrl").
72
73-import(lists, [all/2,any/2,append/1,duplicate/2,
74                foldl/3,last/1,member/2,partition/2,
75                reverse/1,reverse/2,sort/1,splitwith/2,zip/2]).
76
77-spec module(beam_ssa:b_module(), [compile:option()]) ->
78                    {'ok',beam_ssa:b_module()}.
79
80module(#b_module{body=Fs0}=Module, Opts) ->
81    UseBSM3 = not proplists:get_bool(no_bsm3, Opts),
82    Ps = passes(Opts),
83    Fs = functions(Fs0, Ps, UseBSM3),
84    {ok,Module#b_module{body=Fs}}.
85
86functions([F|Fs], Ps, UseBSM3) ->
87    [function(F, Ps, UseBSM3)|functions(Fs, Ps, UseBSM3)];
88functions([], _Ps, _UseBSM3) -> [].
89
90-type b_var() :: beam_ssa:b_var().
91-type var_name() :: beam_ssa:var_name().
92-type instr_number() :: pos_integer().
93-type range() :: {instr_number(),instr_number()}.
94-type reg_num() :: beam_asm:reg_num().
95-type xreg() :: {'x',reg_num()}.
96-type yreg() :: {'y',reg_num()}.
97-type ypool() :: {'y',beam_ssa:label()}.
98-type reservation() :: 'fr' | {'prefer',xreg()} | 'x' | {'x',xreg()} |
99                       ypool() | {yreg(),ypool()} | 'z'.
100-type ssa_register() :: beam_ssa_codegen:ssa_register().
101
102-define(TC(Body), tc(fun() -> Body end, ?FILE, ?LINE)).
103-record(st, {ssa :: beam_ssa:block_map(),
104             args :: [b_var()],
105             cnt :: beam_ssa:label(),
106             use_bsm3 :: boolean(),
107             frames=[] :: [beam_ssa:label()],
108             intervals=[] :: [{b_var(),[range()]}],
109             res=[] :: [{b_var(),reservation()}] | #{b_var():=reservation()},
110             regs=#{} :: #{b_var():=ssa_register()},
111             extra_annos=[] :: [{atom(),term()}],
112             location :: term()
113            }).
114-define(PASS(N), {N,fun N/1}).
115
116passes(Opts) ->
117    AddPrecgAnnos = proplists:get_bool(dprecg, Opts),
118    FixTuples = proplists:get_bool(no_put_tuple2, Opts),
119    Ps = [?PASS(assert_no_critical_edges),
120
121          %% Preliminaries.
122          ?PASS(fix_bs),
123          ?PASS(sanitize),
124          ?PASS(match_fail_instructions),
125          case FixTuples of
126              false -> ignore;
127              true -> ?PASS(fix_tuples)
128          end,
129          ?PASS(use_set_tuple_element),
130          ?PASS(place_frames),
131          ?PASS(fix_receives),
132
133          %% Find and reserve Y registers.
134          ?PASS(find_yregs),
135          ?PASS(reserve_yregs),
136
137          %% Handle legacy binary match instruction that don't
138          %% accept a Y register as destination.
139          ?PASS(legacy_bs),
140
141          %% Improve reuse of Y registers to potentially
142          %% reduce the size of the stack frame.
143          ?PASS(copy_retval),
144          ?PASS(opt_get_list),
145
146          %% Calculate live intervals.
147          ?PASS(number_instructions),
148          ?PASS(live_intervals),
149          ?PASS(reserve_regs),
150
151          %% If needed for a .precg file, save the live intervals
152          %% so they can be included in an annotation.
153          case AddPrecgAnnos of
154              false -> ignore;
155              true -> ?PASS(save_live_intervals)
156          end,
157
158          %% Allocate registers.
159          ?PASS(linear_scan),
160          ?PASS(frame_size),
161          ?PASS(turn_yregs),
162
163          ?PASS(assert_no_critical_edges)],
164    [P || P <- Ps, P =/= ignore].
165
166function(#b_function{anno=Anno,args=Args,bs=Blocks0,cnt=Count0}=F0,
167         Ps, UseBSM3) ->
168    try
169        Location = maps:get(location, Anno, none),
170        St0 = #st{ssa=Blocks0,args=Args,use_bsm3=UseBSM3,
171                  cnt=Count0,location=Location},
172        St = compile:run_sub_passes(Ps, St0),
173        #st{ssa=Blocks,cnt=Count,regs=Regs,extra_annos=ExtraAnnos} = St,
174        F1 = add_extra_annos(F0, ExtraAnnos),
175        F = beam_ssa:add_anno(registers, Regs, F1),
176        F#b_function{bs=Blocks,cnt=Count}
177    catch
178        Class:Error:Stack ->
179            #{func_info:={_,Name,Arity}} = Anno,
180            io:fwrite("Function: ~w/~w\n", [Name,Arity]),
181            erlang:raise(Class, Error, Stack)
182    end.
183
184save_live_intervals(#st{intervals=Intervals}=St) ->
185    St#st{extra_annos=[{live_intervals,Intervals}]}.
186
187%% Add extra annotations when a .precg listing file is being produced.
188add_extra_annos(F, Annos) ->
189    foldl(fun({Name,Value}, Acc) ->
190                  beam_ssa:add_anno(Name, Value, Acc)
191          end, F, Annos).
192
193%% assert_no_critical_edges(St0) -> St.
194%%  The code generator will not work if there are critial edges.
195%%  Abort if any critical edges are found.
196
197assert_no_critical_edges(#st{ssa=Blocks}=St) ->
198    F = fun assert_no_ces/3,
199    RPO = beam_ssa:rpo(Blocks),
200    beam_ssa:fold_blocks(F, RPO, Blocks, Blocks),
201    St.
202
203assert_no_ces(_, #b_blk{is=[#b_set{op=phi,args=[_,_]=Phis}|_]}, Blocks) ->
204    %% This block has multiple predecessors. Make sure that none
205    %% of the precessors have more than one successor.
206    true = all(fun({_,P}) ->
207                       length(beam_ssa:successors(P, Blocks)) =:= 1
208               end, Phis),                      %Assertion.
209    Blocks;
210assert_no_ces(_, _, Blocks) -> Blocks.
211
212%% fix_bs(St0) -> St.
213%%  Fix up the binary matching instructions:
214%%
215%%    * Insert bs_save and bs_restore instructions where needed.
216%%
217%%    * Combine bs_match and bs_extract instructions to bs_get
218%%      instructions.
219
220fix_bs(#st{ssa=Blocks,cnt=Count0,use_bsm3=UseBSM3}=St) ->
221    F = fun(#b_set{op=bs_start_match,dst=Dst}, A) ->
222                %% Mark the root of the match context list.
223                [{Dst,{context,Dst}}|A];
224           (#b_set{op=bs_match,dst=Dst,args=[_,ParentCtx|_]}, A) ->
225                %% Link this match context the previous match context.
226                [{Dst,ParentCtx}|A];
227           (_, A) ->
228                A
229        end,
230    RPO = beam_ssa:rpo(Blocks),
231    case beam_ssa:fold_instrs(F, RPO, [], Blocks) of
232        [] ->
233            %% No binary matching in this function.
234            St;
235        [_|_]=M ->
236            CtxChain = maps:from_list(M),
237            Linear0 = beam_ssa:linearize(Blocks),
238
239            %% Insert position instructions where needed.
240            {Linear1,Count} = case UseBSM3 of
241                                  true ->
242                                      bs_pos_bsm3(Linear0, CtxChain, Count0);
243                                  false ->
244                                      bs_pos_bsm2(Linear0, CtxChain, Count0)
245                              end,
246
247            %% Rename instructions.
248            Linear = bs_instrs(Linear1, CtxChain, []),
249
250            St#st{ssa=maps:from_list(Linear),cnt=Count}
251    end.
252
253%% Insert bs_get_position and bs_set_position instructions as needed.
254bs_pos_bsm3(Linear0, CtxChain, Count0) ->
255    Rs0 = bs_restores(Linear0, CtxChain, #{}, #{}),
256    Rs = maps:values(Rs0),
257    S0 = sofs:relation(Rs, [{context,save_point}]),
258    S1 = sofs:relation_to_family(S0),
259    S = sofs:to_external(S1),
260
261    {SavePoints,Count1} = make_bs_pos_dict(S, Count0, []),
262
263    {Gets,Count2} = make_bs_getpos_map(Rs, SavePoints, Count1, []),
264    {Sets,Count} = make_bs_setpos_map(maps:to_list(Rs0), SavePoints, Count2, []),
265
266    %% Now insert all saves and restores.
267    {bs_insert_bsm3(Linear0, Gets, Sets), Count}.
268
269make_bs_getpos_map([{Ctx,Save}=Ps|T], SavePoints, Count, Acc) ->
270    SavePoint = get_savepoint(Ps, SavePoints),
271    I = #b_set{op=bs_get_position,dst=SavePoint,args=[Ctx]},
272    make_bs_getpos_map(T, SavePoints, Count+1, [{Save,I}|Acc]);
273make_bs_getpos_map([], _, Count, Acc) ->
274    {maps:from_list(Acc),Count}.
275
276make_bs_setpos_map([{Bef,{Ctx,_}=Ps}|T], SavePoints, Count, Acc) ->
277    Ignored = #b_var{name={'@ssa_ignored',Count}},
278    Args = [Ctx, get_savepoint(Ps, SavePoints)],
279    I = #b_set{op=bs_set_position,dst=Ignored,args=Args},
280    make_bs_setpos_map(T, SavePoints, Count+1, [{Bef,I}|Acc]);
281make_bs_setpos_map([], _, Count, Acc) ->
282    {maps:from_list(Acc),Count}.
283
284get_savepoint({_,_}=Ps, SavePoints) ->
285    Name = {'@ssa_bs_position', map_get(Ps, SavePoints)},
286    #b_var{name=Name}.
287
288make_bs_pos_dict([{Ctx,Pts}|T], Count0, Acc0) ->
289    {Acc, Count} = make_bs_pos_dict_1(Pts, Ctx, Count0, Acc0),
290    make_bs_pos_dict(T, Count, Acc);
291make_bs_pos_dict([], Count, Acc) ->
292    {maps:from_list(Acc), Count}.
293
294make_bs_pos_dict_1([H|T], Ctx, I, Acc) ->
295    make_bs_pos_dict_1(T, Ctx, I+1, [{{Ctx,H},I}|Acc]);
296make_bs_pos_dict_1([], Ctx, I, Acc) ->
297    {[{Ctx,I}|Acc], I}.
298
299%% As bs_position but without OTP-22 instructions. This is only used when
300%% cross-compiling to older versions.
301bs_pos_bsm2(Linear0, CtxChain, Count0) ->
302    Rs0 = bs_restores(Linear0, CtxChain, #{}, #{}),
303    Rs = maps:values(Rs0),
304    S0 = sofs:relation(Rs, [{context,save_point}]),
305    S1 = sofs:relation_to_family(S0),
306    S = sofs:to_external(S1),
307    Slots = make_save_point_dict(S, []),
308    {Saves,Count1} = make_save_map(Rs, Slots, Count0, []),
309    {Restores,Count} = make_restore_map(maps:to_list(Rs0), Slots, Count1, []),
310
311    %% Now insert all saves and restores.
312    {bs_insert_bsm2(Linear0, Saves, Restores, Slots),Count}.
313
314make_save_map([{Ctx,Save}=Ps|T], Slots, Count, Acc) ->
315    Ignored = #b_var{name={'@ssa_ignored',Count}},
316    case make_slot(Ps, Slots) of
317        #b_literal{val=start} ->
318            make_save_map(T, Slots, Count, Acc);
319        Slot ->
320            I = #b_set{op=bs_save,dst=Ignored,args=[Ctx,Slot]},
321            make_save_map(T, Slots, Count+1, [{Save,I}|Acc])
322    end;
323make_save_map([], _, Count, Acc) ->
324    {maps:from_list(Acc),Count}.
325
326make_restore_map([{Bef,{Ctx,_}=Ps}|T], Slots, Count, Acc) ->
327    Ignored = #b_var{name={'@ssa_ignored',Count}},
328    I = #b_set{op=bs_restore,dst=Ignored,args=[Ctx,make_slot(Ps, Slots)]},
329    make_restore_map(T, Slots, Count+1, [{Bef,I}|Acc]);
330make_restore_map([], _, Count, Acc) ->
331    {maps:from_list(Acc),Count}.
332
333make_slot({Same,Same}, _Slots) ->
334    #b_literal{val=start};
335make_slot({_,_}=Ps, Slots) ->
336    #b_literal{val=map_get(Ps, Slots)}.
337
338make_save_point_dict([{Ctx,Pts}|T], Acc0) ->
339    Acc = make_save_point_dict_1(Pts, Ctx, 0, Acc0),
340    make_save_point_dict(T, Acc);
341make_save_point_dict([], Acc) ->
342    maps:from_list(Acc).
343
344make_save_point_dict_1([Ctx|T], Ctx, I, Acc) ->
345    %% Special {atom,start} save point. Does not need a
346    %% bs_save instruction.
347    make_save_point_dict_1(T, Ctx, I, Acc);
348make_save_point_dict_1([H|T], Ctx, I, Acc) ->
349    make_save_point_dict_1(T, Ctx, I+1, [{{Ctx,H},I}|Acc]);
350make_save_point_dict_1([], Ctx, I, Acc) ->
351    [{Ctx,I}|Acc].
352
353bs_restores([{L,#b_blk{is=Is,last=Last}}|Bs], CtxChain, D0, Rs0) ->
354    InPos = maps:get(L, D0, #{}),
355    {SuccPos, FailPos, Rs} = bs_restores_is(Is, CtxChain, InPos, InPos, Rs0),
356
357    D = bs_update_successors(Last, SuccPos, FailPos, D0),
358    bs_restores(Bs, CtxChain, D, Rs);
359bs_restores([], _, _, Rs) -> Rs.
360
361bs_update_successors(#b_br{succ=Succ,fail=Fail}, SPos, FPos, D) ->
362    join_positions([{Succ,SPos},{Fail,FPos}], D);
363bs_update_successors(#b_switch{fail=Fail,list=List}, SPos, FPos, D) ->
364    SPos = FPos,                                %Assertion.
365    Update = [{L,SPos} || {_,L} <- List] ++ [{Fail,SPos}],
366    join_positions(Update, D);
367bs_update_successors(#b_ret{}, SPos, FPos, D) ->
368    SPos = FPos,                                %Assertion.
369    D.
370
371join_positions([{L,MapPos0}|T], D) ->
372    case D of
373        #{L:=MapPos0} ->
374            %% Same map.
375            join_positions(T, D);
376        #{L:=MapPos1} ->
377            %% Different maps.
378            MapPos = join_positions_1(MapPos0, MapPos1),
379            join_positions(T, D#{L:=MapPos});
380        #{} ->
381            join_positions(T, D#{L=>MapPos0})
382    end;
383join_positions([], D) -> D.
384
385join_positions_1(LHS, RHS) ->
386    if
387        map_size(LHS) < map_size(RHS) ->
388            join_positions_2(maps:keys(LHS), RHS, LHS);
389        true ->
390            join_positions_2(maps:keys(RHS), LHS, RHS)
391    end.
392
393join_positions_2([V | Vs], Bigger, Smaller) ->
394    case {Bigger, Smaller} of
395        {#{ V := Same }, #{ V := Same }} ->
396            join_positions_2(Vs, Bigger, Smaller);
397        {#{ V := _ }, #{ V := _ }} ->
398            join_positions_2(Vs, Bigger, Smaller#{ V := unknown });
399        {#{}, #{ V := _ }} ->
400            join_positions_2(Vs, Bigger, maps:remove(V, Smaller))
401    end;
402join_positions_2([], _Bigger, Smaller) ->
403    Smaller.
404
405%%
406%% Updates the restore and position maps according to the given instructions.
407%%
408%% Note that positions may be updated even when a match fails; if a match
409%% requires a restore, the position at the fail block will be the position
410%% we've *restored to* and not the one we entered the current block with.
411%%
412
413bs_restores_is([#b_set{op=bs_start_match,dst=Start}|Is],
414               CtxChain, SPos0, _FPos, Rs) ->
415    %% Match instructions leave the position unchanged on failure, so
416    %% FPos must be the SPos we entered the *instruction* with, and not the
417    %% *block*.
418    %%
419    %% This is important when we have multiple matches in a single block where
420    %% all but the last are guaranteed to succeed; the upcoming fail block must
421    %% restore to the position of the next-to-last match, not the position we
422    %% entered the current block with.
423    FPos = SPos0,
424    SPos = SPos0#{Start=>Start},
425    bs_restores_is(Is, CtxChain, SPos, FPos, Rs);
426bs_restores_is([#b_set{op=bs_match,dst=NewPos,args=Args}=I|Is],
427               CtxChain, SPos0, _FPos, Rs0) ->
428    Start = bs_subst_ctx(NewPos, CtxChain),
429    [_,FromPos|_] = Args,
430    case SPos0 of
431        #{Start:=FromPos} ->
432            %% Same position, no restore needed.
433            SPos = case bs_match_type(I) of
434                       plain ->
435                            %% Update position to new position.
436                            SPos0#{Start:=NewPos};
437                        _ ->
438                            %% Position will not change (test_unit
439                            %% instruction or no instruction at
440                            %% all).
441                            SPos0
442                   end,
443            FPos = SPos0,
444            bs_restores_is(Is, CtxChain, SPos, FPos, Rs0);
445        #{Start:=_} ->
446            %% Different positions, might need a restore instruction.
447            case bs_match_type(I) of
448                none ->
449                    %% This is a tail test that will be optimized away.
450                    %% There's no need to do a restore, and all
451                    %% positions are unchanged.
452                    FPos = SPos0,
453                    bs_restores_is(Is, CtxChain, SPos0, FPos, Rs0);
454                test_unit ->
455                    %% This match instruction will be replaced by
456                    %% a test_unit instruction. We will need a
457                    %% restore. The new position will be the position
458                    %% restored to (NOT NewPos).
459                    SPos = SPos0#{Start:=FromPos},
460                    FPos = SPos,
461                    Rs = Rs0#{NewPos=>{Start,FromPos}},
462                    bs_restores_is(Is, CtxChain, SPos, FPos, Rs);
463                plain ->
464                    %% Match or skip. Position will be changed.
465                    SPos = SPos0#{Start:=NewPos},
466                    FPos = SPos0#{Start:=FromPos},
467                    Rs = Rs0#{NewPos=>{Start,FromPos}},
468                    bs_restores_is(Is, CtxChain, SPos, FPos, Rs)
469            end
470    end;
471bs_restores_is([#b_set{op=bs_extract,args=[FromPos|_]}|Is],
472               CtxChain, SPos, _FPos, Rs) ->
473    Start = bs_subst_ctx(FromPos, CtxChain),
474
475    #{Start:=FromPos} = SPos,                   %Assertion.
476    FPos = SPos,
477
478    bs_restores_is(Is, CtxChain, SPos, FPos, Rs);
479bs_restores_is([#b_set{op=call,dst=Dst,args=Args}|Is],
480               CtxChain, SPos0, _FPos, Rs0) ->
481    {SPos1, Rs} = bs_restore_args(Args, SPos0, CtxChain, Dst, Rs0),
482
483    SPos = bs_invalidate_pos(Args, SPos1, CtxChain),
484    FPos = SPos,
485
486    bs_restores_is(Is, CtxChain, SPos, FPos, Rs);
487bs_restores_is([#b_set{op=Op,dst=Dst,args=Args}|Is],
488               CtxChain, SPos0, _FPos, Rs0)
489  when Op =:= bs_test_tail;
490       Op =:= bs_get_tail ->
491    {SPos, Rs} = bs_restore_args(Args, SPos0, CtxChain, Dst, Rs0),
492    FPos = SPos,
493
494    bs_restores_is(Is, CtxChain, SPos, FPos, Rs);
495bs_restores_is([#b_set{op={succeeded,guard},args=[Arg]}],
496               CtxChain, SPos, FPos0, Rs) ->
497    %% If we're branching on a match operation, the positions will be different
498    %% depending on whether it succeeds.
499    Ctx = bs_subst_ctx(Arg, CtxChain),
500    FPos = case SPos of
501               #{ Ctx := _ } -> FPos0;
502               #{} -> SPos
503           end,
504    {SPos, FPos, Rs};
505bs_restores_is([_ | Is], CtxChain, SPos, _FPos, Rs) ->
506    FPos = SPos,
507    bs_restores_is(Is, CtxChain, SPos, FPos, Rs);
508bs_restores_is([], _CtxChain, SPos, _FPos, Rs) ->
509    FPos = SPos,
510    {SPos, FPos, Rs}.
511
512bs_match_type(#b_set{args=[#b_literal{val=skip},_Ctx,
513                             #b_literal{val=binary},_Flags,
514                             #b_literal{val=all},#b_literal{val=U}]}) ->
515    case U of
516        1 -> none;
517        _ -> test_unit
518    end;
519bs_match_type(_) ->
520    plain.
521
522%% Call instructions leave the match position in an undefined state,
523%% requiring us to invalidate each affected argument.
524bs_invalidate_pos([#b_var{}=Arg|Args], Pos0, CtxChain) ->
525    Start = bs_subst_ctx(Arg, CtxChain),
526    case Pos0 of
527        #{Start:=_} ->
528            Pos = Pos0#{Start:=unknown},
529            bs_invalidate_pos(Args, Pos, CtxChain);
530        #{} ->
531            %% Not a match context.
532            bs_invalidate_pos(Args, Pos0, CtxChain)
533    end;
534bs_invalidate_pos([_|Args], Pos, CtxChain) ->
535    bs_invalidate_pos(Args, Pos, CtxChain);
536bs_invalidate_pos([], Pos, _CtxChain) ->
537    Pos.
538
539bs_restore_args([#b_var{}=Arg|Args], Pos0, CtxChain, Dst, Rs0) ->
540    Start = bs_subst_ctx(Arg, CtxChain),
541    case Pos0 of
542        #{Start:=Arg} ->
543            %% Same position, no restore needed.
544            bs_restore_args(Args, Pos0, CtxChain, Dst, Rs0);
545        #{Start:=_} ->
546            %% Different positions, need a restore instruction.
547            Pos = Pos0#{Start:=Arg},
548            Rs = Rs0#{Dst=>{Start,Arg}},
549            bs_restore_args(Args, Pos, CtxChain, Dst, Rs);
550        #{} ->
551            %% Not a match context.
552            bs_restore_args(Args, Pos0, CtxChain, Dst, Rs0)
553    end;
554bs_restore_args([_|Args], Pos, CtxChain, Dst, Rs) ->
555    bs_restore_args(Args, Pos, CtxChain, Dst, Rs);
556bs_restore_args([], Pos, _CtxChain, _Dst, Rs) ->
557    {Pos, Rs}.
558
559%% Insert all bs_save and bs_restore instructions.
560
561bs_insert_bsm3(Blocks, Saves, Restores) ->
562    bs_insert_1(Blocks, [], Saves, Restores, fun(I) -> I end).
563
564bs_insert_bsm2(Blocks, Saves, Restores, Slots) ->
565    %% The old instructions require bs_start_match to be annotated with the
566    %% number of position slots it needs.
567    bs_insert_1(Blocks, [], Saves, Restores,
568                fun(#b_set{op=bs_start_match,dst=Dst}=I0) ->
569                        NumSlots = case Slots of
570                                       #{Dst:=NumSlots0} -> NumSlots0;
571                                       #{} -> 0
572                                   end,
573                        beam_ssa:add_anno(num_slots, NumSlots, I0);
574                   (I) ->
575                        I
576                end).
577
578bs_insert_1([{L,#b_blk{is=Is0}=Blk} | Bs], Deferred0, Saves, Restores, XFrm) ->
579    Is1 = bs_insert_deferred(Is0, Deferred0),
580    {Is, Deferred} = bs_insert_is(Is1, Saves, Restores, XFrm, []),
581    [{L,Blk#b_blk{is=Is}} | bs_insert_1(Bs, Deferred, Saves, Restores, XFrm)];
582bs_insert_1([], [], _, _, _) ->
583    [].
584
585bs_insert_deferred([#b_set{op=bs_extract}=I | Is], Deferred) ->
586    [I | bs_insert_deferred(Is, Deferred)];
587bs_insert_deferred(Is, Deferred) ->
588    Deferred ++ Is.
589
590bs_insert_is([#b_set{dst=Dst}=I0|Is], Saves, Restores, XFrm, Acc0) ->
591    I = XFrm(I0),
592    Pre = case Restores of
593              #{Dst:=R} -> [R];
594              #{} -> []
595          end,
596    Post = case Saves of
597               #{Dst:=S} -> [S];
598               #{} -> []
599           end,
600    Acc = [I | Pre] ++ Acc0,
601    case Is of
602        [#b_set{op={succeeded,_},args=[Dst]}] ->
603            %% Defer the save sequence to the success block.
604            {reverse(Acc, Is), Post};
605        _ ->
606            bs_insert_is(Is, Saves, Restores, XFrm, Post ++ Acc)
607    end;
608bs_insert_is([], _, _, _, Acc) ->
609    {reverse(Acc), []}.
610
611%% Translate bs_match instructions to bs_get, bs_match_string,
612%% or bs_skip. Also rename match context variables to use the
613%% variable assigned to by the start_match instruction.
614
615bs_instrs([{L,#b_blk{is=Is0}=Blk}|Bs], CtxChain, Acc0) ->
616    case bs_instrs_is(Is0, CtxChain, []) of
617        [#b_set{op=bs_extract,dst=Dst,args=[Ctx]}|Is] ->
618            %% Drop this instruction. Rewrite the corresponding
619            %% bs_match instruction in the previous block to
620            %% a bs_get instruction.
621            Acc = bs_combine(Dst, Ctx, Acc0),
622            bs_instrs(Bs, CtxChain, [{L,Blk#b_blk{is=Is}}|Acc]);
623        Is ->
624            bs_instrs(Bs, CtxChain, [{L,Blk#b_blk{is=Is}}|Acc0])
625    end;
626bs_instrs([], _, Acc) ->
627    reverse(Acc).
628
629bs_instrs_is([#b_set{op={succeeded,_}}=I|Is], CtxChain, Acc) ->
630    %% This instruction refers to a specific operation, so we must not
631    %% substitute the context argument.
632    bs_instrs_is(Is, CtxChain, [I | Acc]);
633bs_instrs_is([#b_set{op=Op,args=Args0}=I0|Is], CtxChain, Acc) ->
634    Args = [bs_subst_ctx(A, CtxChain) || A <- Args0],
635    I1 = I0#b_set{args=Args},
636    I = case {Op,Args} of
637            {bs_match,[#b_literal{val=skip},Ctx,Type|As]} ->
638                I1#b_set{op=bs_skip,args=[Type,Ctx|As]};
639            {bs_match,[#b_literal{val=string},Ctx|As]} ->
640                I1#b_set{op=bs_match_string,args=[Ctx|As]};
641            {_,_} ->
642                I1
643        end,
644    bs_instrs_is(Is, CtxChain, [I|Acc]);
645bs_instrs_is([], _, Acc) ->
646    reverse(Acc).
647
648%% Combine a bs_match instruction with the destination register
649%% taken from a bs_extract instruction.
650
651bs_combine(Dst, Ctx, [{L,#b_blk{is=Is0}=Blk}|Acc]) ->
652    [#b_set{}=Succeeded,
653     #b_set{op=bs_match,args=[Type,_|As]}=BsMatch|Is1] = reverse(Is0),
654    Is = reverse(Is1, [BsMatch#b_set{op=bs_get,dst=Dst,args=[Type,Ctx|As]},
655                       Succeeded#b_set{args=[Dst]}]),
656    [{L,Blk#b_blk{is=Is}}|Acc].
657
658bs_subst_ctx(#b_var{}=Var, CtxChain) ->
659    case CtxChain of
660        #{Var:={context,Ctx}} ->
661            Ctx;
662        #{Var:=ParentCtx} ->
663            bs_subst_ctx(ParentCtx, CtxChain);
664        #{} ->
665            %% Not a match context variable.
666            Var
667    end;
668bs_subst_ctx(Other, _CtxChain) ->
669    Other.
670
671%% legacy_bs(St0) -> St.
672%%  Binary matching instructions in OTP 21 and earlier don't support
673%%  a Y register as destination. If St#st.use_bsm3 is false,
674%%  we will need to rewrite those instructions so that the result
675%%  is first put in an X register and then moved to a Y register
676%%  if the operation succeeded.
677
678legacy_bs(#st{use_bsm3=false,ssa=Blocks0,cnt=Count0,res=Res}=St) ->
679    IsYreg = maps:from_list([{V,true} || {V,{y,_}} <- Res]),
680    Linear0 = beam_ssa:linearize(Blocks0),
681    {Linear,Count} = legacy_bs(Linear0, IsYreg, Count0, #{}, []),
682    Blocks = maps:from_list(Linear),
683    St#st{ssa=Blocks,cnt=Count};
684legacy_bs(#st{use_bsm3=true}=St) -> St.
685
686legacy_bs([{L,Blk}|Bs], IsYreg, Count0, Copies0, Acc) ->
687    #b_blk{is=Is0,last=Last} = Blk,
688    Is1 = case Copies0 of
689              #{L:=Copy} -> [Copy|Is0];
690              #{} -> Is0
691          end,
692    {Is,Count,Copies} = legacy_bs_is(Is1, Last, IsYreg, Count0, Copies0, []),
693    legacy_bs(Bs, IsYreg, Count, Copies, [{L,Blk#b_blk{is=Is}}|Acc]);
694legacy_bs([], _IsYreg, Count, _Copies, Acc) ->
695    {Acc,Count}.
696
697legacy_bs_is([#b_set{op=Op,dst=Dst}=I0,
698              #b_set{op=succeeded,dst=SuccDst,args=[Dst]}=SuccI0],
699             Last, IsYreg, Count0, Copies0, Acc) ->
700    NeedsFix = is_map_key(Dst, IsYreg) andalso
701        case Op of
702            bs_get -> true;
703            bs_init -> true;
704            _ -> false
705        end,
706    case NeedsFix of
707        true ->
708            TempDst = #b_var{name={'@bs_temp_dst',Count0}},
709            Count = Count0 + 1,
710            I = I0#b_set{dst=TempDst},
711            SuccI = SuccI0#b_set{args=[TempDst]},
712            Copy = #b_set{op=copy,dst=Dst,args=[TempDst]},
713            #b_br{bool=SuccDst,succ=SuccL} = Last,
714            Copies = Copies0#{SuccL=>Copy},
715            legacy_bs_is([], Last, IsYreg, Count, Copies, [SuccI,I|Acc]);
716        false ->
717            legacy_bs_is([], Last, IsYreg, Count0, Copies0, [SuccI0,I0|Acc])
718    end;
719legacy_bs_is([I|Is], Last, IsYreg, Count, Copies, Acc) ->
720    legacy_bs_is(Is, Last, IsYreg, Count, Copies, [I|Acc]);
721legacy_bs_is([], _Last, _IsYreg, Count, Copies, Acc) ->
722    {reverse(Acc),Count,Copies}.
723
724%% sanitize(St0) -> St.
725%%  Remove constructs that can cause problems later:
726%%
727%%  * Unreachable blocks may cause problems for determination of
728%%  dominators.
729%%
730%%  * Some instructions (such as get_hd) don't accept literal
731%%  arguments. Evaluate the instructions and remove them.
732
733sanitize(#st{ssa=Blocks0,cnt=Count0}=St) ->
734    Ls = beam_ssa:rpo(Blocks0),
735    {Blocks,Count} = sanitize(Ls, Count0, Blocks0, #{}),
736    St#st{ssa=Blocks,cnt=Count}.
737
738sanitize([L|Ls], Count0, Blocks0, Values0) ->
739    #b_blk{is=Is0,last=Last0} = Blk0 = map_get(L, Blocks0),
740    case sanitize_is(Is0, Last0, Count0, Values0, false, []) of
741        no_change ->
742            sanitize(Ls, Count0, Blocks0, Values0);
743        {Is,Last,Count,Values} ->
744            Blk = Blk0#b_blk{is=Is,last=Last},
745            Blocks = Blocks0#{L:=Blk},
746            sanitize(Ls, Count, Blocks, Values)
747    end;
748sanitize([], Count, Blocks0, Values) ->
749    Blocks = if
750                 map_size(Values) =:= 0 ->
751                     Blocks0;
752                 true ->
753                     RPO = beam_ssa:rpo(Blocks0),
754                     beam_ssa:rename_vars(Values, RPO, Blocks0)
755             end,
756
757    %% Unreachable blocks can cause problems for the dominator calculations.
758    Ls = beam_ssa:rpo(Blocks),
759    Reachable = gb_sets:from_list(Ls),
760    {case map_size(Blocks) =:= gb_sets:size(Reachable) of
761         true -> Blocks;
762         false -> remove_unreachable(Ls, Blocks, Reachable, [])
763     end,Count}.
764
765sanitize_is([#b_set{op=get_map_element,args=Args0}=I0|Is],
766            Last, Count0, Values, Changed, Acc) ->
767    case sanitize_args(Args0, Values) of
768        [#b_literal{}=Map,Key] ->
769            %% Bind the literal map to a variable.
770            {MapVar,Count} = new_var('@ssa_map', Count0),
771            I = I0#b_set{args=[MapVar,Key]},
772            Copy = #b_set{op=copy,dst=MapVar,args=[Map]},
773            sanitize_is(Is, Last, Count, Values, true, [I,Copy|Acc]);
774        [_,_]=Args0 ->
775            sanitize_is(Is, Last, Count0, Values, Changed, [I0|Acc]);
776        [_,_]=Args ->
777            I = I0#b_set{args=Args},
778            sanitize_is(Is, Last, Count0, Values, true, [I|Acc])
779    end;
780sanitize_is([#b_set{op={succeeded,guard},dst=Dst,args=[Arg0]}=I0],
781            #b_br{bool=Dst}=Last, Count, Values, _Changed, Acc0) ->
782    %% We no longer need to distinguish between guard and body checks, so we'll
783    %% rewrite this as a plain 'succeeded'.
784    case sanitize_arg(Arg0, Values) of
785        #b_var{}=Arg ->
786            case Acc0 of
787                [#b_set{op=call,
788                        args=[#b_remote{mod=#b_literal{val=erlang},
789                                        name=#b_literal{val=error},
790                                        arity=1},_],
791                        dst=Arg0}|Acc] ->
792                    %% This erlang:error/1 is the result from a
793                    %% sanitized bs_add or bs_init instruction. Calls
794                    %% to erlang:error/1 in receive is not allowed, so
795                    %% we will have to rewrite this instruction
796                    %% sequence to an unconditional branch to the
797                    %% failure label.
798                    Fail = Last#b_br.fail,
799                    Br = #b_br{bool=#b_literal{val=true},succ=Fail,fail=Fail},
800                    {reverse(Acc), Br, Count, Values};
801                _ ->
802                    I = I0#b_set{op=succeeded,args=[Arg]},
803                    {reverse(Acc0, [I]), Last, Count, Values}
804            end;
805        #b_literal{} ->
806            Value = #b_literal{val=true},
807            {reverse(Acc0), Last, Count, Values#{ Dst => Value }}
808    end;
809sanitize_is([#b_set{op={succeeded,body},dst=Dst,args=[Arg0]}=I0],
810            #b_br{bool=Dst}=Last, Count, Values, _Changed, Acc) ->
811    %% We no longer need to distinguish between guard and body checks, so we'll
812    %% rewrite this as a plain 'succeeded'.
813    case sanitize_arg(Arg0, Values) of
814        #b_var{}=Arg ->
815            I = I0#b_set{op=succeeded,args=[Arg]},
816            {reverse(Acc, [I]), Last, Count, Values};
817        #b_literal{} ->
818            Value = #b_literal{val=true},
819            {reverse(Acc), Last, Count, Values#{ Dst => Value }}
820    end;
821sanitize_is([#b_set{op={succeeded,Kind},args=[Arg0]} | Is],
822            Last, Count, Values, _Changed, Acc) ->
823    %% We're no longer branching on this instruction and can safely remove it.
824    [] = Is, #b_br{succ=Same,fail=Same} = Last, %Assertion.
825    if
826        Same =:= ?EXCEPTION_BLOCK ->
827            %% The checked instruction always fails at runtime and we're not
828            %% in a try/catch; rewrite the terminator to a return.
829            body = Kind,                        %Assertion.
830            Arg = sanitize_arg(Arg0, Values),
831            sanitize_is(Is, #b_ret{arg=Arg}, Count, Values, true, Acc);
832        Same =/= ?EXCEPTION_BLOCK ->
833            %% We either always succeed, or always fail to somewhere other than
834            %% the exception block.
835            true = Kind =:= guard orelse Kind =:= body, %Assertion.
836            sanitize_is(Is, Last, Count, Values, true, Acc)
837    end;
838sanitize_is([#b_set{op=bs_test_tail}=I], Last, Count, Values, Changed, Acc) ->
839    case Last of
840        #b_br{succ=Same,fail=Same} ->
841            sanitize_is([], Last, Count, Values, true, Acc);
842        _ ->
843            do_sanitize_is(I, [], Last, Count, Values, Changed, Acc)
844    end;
845sanitize_is([#b_set{}=I|Is], Last, Count, Values, Changed, Acc) ->
846    do_sanitize_is(I, Is,  Last, Count, Values, Changed, Acc);
847sanitize_is([], Last, Count, Values, Changed, Acc) ->
848    case Changed of
849        true ->
850            {reverse(Acc), Last, Count, Values};
851        false ->
852            no_change
853    end.
854
855do_sanitize_is(#b_set{op=Op,dst=Dst,args=Args0}=I0,
856               Is, Last, Count, Values, Changed0, Acc) ->
857    Args = sanitize_args(Args0, Values),
858    case sanitize_instr(Op, Args, I0) of
859        {value,Value0} ->
860            Value = #b_literal{val=Value0},
861            sanitize_is(Is, Last, Count, Values#{Dst=>Value}, true, Acc);
862        {ok,I} ->
863            sanitize_is(Is, Last, Count, Values, true, [I|Acc]);
864        ok ->
865            I = I0#b_set{args=Args},
866            Changed = Changed0 orelse Args =/= Args0,
867            sanitize_is(Is, Last, Count, Values, Changed, [I|Acc])
868    end.
869
870sanitize_args(Args, Values) ->
871    [sanitize_arg(Arg, Values) || Arg <- Args].
872
873sanitize_arg(#b_var{}=Var, Values) ->
874    case Values of
875        #{Var:=New} -> New;
876        #{} -> Var
877    end;
878sanitize_arg(Arg, _Values) ->
879    Arg.
880
881
882sanitize_instr(phi, PhiArgs, _I) ->
883    case phi_all_same_literal(PhiArgs) of
884        true ->
885            %% (Can only happen when some optimizations have been
886            %% turned off.)
887            %%
888            %% This phi node always produces the same literal value.
889            %% We must do constant progation of the value to ensure
890            %% that we can sanitize any instructions that don't accept
891            %% literals (such as `get_hd`). This is necessary for
892            %% correctness, because beam_ssa_codegen:prefer_xregs/2
893            %% does constant propagation and could propagate a literal
894            %% into an instruction that don't accept literals.
895            [{#b_literal{val=Val},_}|_] = PhiArgs,
896            {value,Val};
897        false ->
898            ok
899    end;
900sanitize_instr({bif,Bif}, [#b_literal{val=Lit}], _I) ->
901    case erl_bifs:is_pure(erlang, Bif, 1) of
902        false ->
903            ok;
904        true ->
905            try
906                {value,erlang:Bif(Lit)}
907            catch
908                error:_ ->
909                    ok
910            end
911    end;
912sanitize_instr({bif,Bif}, [#b_literal{val=Lit1},#b_literal{val=Lit2}], _I) ->
913    true = erl_bifs:is_pure(erlang, Bif, 2),    %Assertion.
914    try
915        {value,erlang:Bif(Lit1, Lit2)}
916    catch
917        error:_ ->
918            ok
919    end;
920sanitize_instr(bs_match, Args, I) ->
921    %% Matching of floats are never changed to a bs_skip even when the
922    %% value is never used, because the match can always fail (for example,
923    %% if it is a NaN).
924    [#b_literal{val=float}|_] = Args,           %Assertion.
925    {ok,I#b_set{op=bs_get}};
926sanitize_instr(get_hd, [#b_literal{val=[Hd|_]}], _I) ->
927    {value,Hd};
928sanitize_instr(get_tl, [#b_literal{val=[_|Tl]}], _I) ->
929    {value,Tl};
930sanitize_instr(get_tuple_element, [#b_literal{val=T},
931                                   #b_literal{val=I}], _I)
932  when I < tuple_size(T) ->
933    {value,element(I+1, T)};
934sanitize_instr(is_nonempty_list, [#b_literal{val=Lit}], _I) ->
935    {value,case Lit of
936               [_|_] -> true;
937               _ -> false
938           end};
939sanitize_instr(is_tagged_tuple, [#b_literal{val=Tuple},
940                                 #b_literal{val=Arity},
941                                 #b_literal{val=Tag}], _I)
942  when is_integer(Arity), is_atom(Tag) ->
943    if
944        tuple_size(Tuple) =:= Arity, element(1, Tuple) =:= Tag ->
945            {value,true};
946        true ->
947            {value,false}
948    end;
949sanitize_instr(bs_add, [Arg1,Arg2,_|_], I0) ->
950    case all(fun(#b_literal{val=Size}) -> is_integer(Size) andalso Size >= 0;
951                (#b_var{}) -> true
952             end, [Arg1,Arg2]) of
953        true -> ok;
954        false -> {ok,sanitize_badarg(I0)}
955    end;
956sanitize_instr(bs_init, [#b_literal{val=new},#b_literal{val=Sz}|_], I0) ->
957    if
958        is_integer(Sz), Sz >= 0 -> ok;
959        true -> {ok,sanitize_badarg(I0)}
960    end;
961sanitize_instr(bs_init, [#b_literal{},_,#b_literal{val=Sz}|_], I0) ->
962    if
963        is_integer(Sz), Sz >= 0 -> ok;
964        true -> {ok,sanitize_badarg(I0)}
965    end;
966sanitize_instr(_, _, _) ->
967    ok.
968
969sanitize_badarg(I) ->
970    Func = #b_remote{mod=#b_literal{val=erlang},
971                     name=#b_literal{val=error},arity=1},
972    I#b_set{op=call,args=[Func,#b_literal{val=badarg}]}.
973
974remove_unreachable([L|Ls], Blocks, Reachable, Acc) ->
975    #b_blk{is=Is0} = Blk0 = map_get(L, Blocks),
976    case split_phis(Is0) of
977        {[_|_]=Phis,Rest} ->
978            Is = [prune_phi(Phi, Reachable) || Phi <- Phis] ++ Rest,
979            Blk = Blk0#b_blk{is=Is},
980            remove_unreachable(Ls, Blocks, Reachable, [{L,Blk}|Acc]);
981        {[],_} ->
982            remove_unreachable(Ls, Blocks, Reachable, [{L,Blk0}|Acc])
983    end;
984remove_unreachable([], _Blocks, _, Acc) ->
985    maps:from_list(Acc).
986
987prune_phi(#b_set{args=Args0}=Phi, Reachable) ->
988    Args = [A || {_,Pred}=A <- Args0,
989                 gb_sets:is_element(Pred, Reachable)],
990    Phi#b_set{args=Args}.
991
992phi_all_same_literal([{#b_literal{}=Arg, _From} | Phis]) ->
993    phi_all_same_literal_1(Phis, Arg);
994phi_all_same_literal([_|_]) ->
995    false.
996
997phi_all_same_literal_1([{Arg, _From} | Phis], Arg) ->
998    phi_all_same_literal_1(Phis, Arg);
999phi_all_same_literal_1([], _Arg) ->
1000    true;
1001phi_all_same_literal_1(_Phis, _Arg) ->
1002    false.
1003
1004%%% Rewrite certain calls to erlang:error/{1,2} to specialized
1005%%% instructions:
1006%%%
1007%%% erlang:error({badmatch,Value})       => badmatch Value
1008%%% erlang:error({case_clause,Value})    => case_end Value
1009%%% erlang:error({try_clause,Value})     => try_case_end Value
1010%%% erlang:error(if_clause)              => if_end
1011%%% erlang:error(function_clause, Args)  => jump FuncInfoLabel
1012%%%
1013%%% In SSA code, we represent those instructions as a 'match_fail'
1014%%% instruction with the name of the BEAM instruction as the first
1015%%% argument.
1016
1017match_fail_instructions(#st{ssa=Blocks0,args=Args,location=Location}=St) ->
1018    Ls = maps:to_list(Blocks0),
1019    Info = {length(Args),Location},
1020    Blocks = match_fail_instrs_1(Ls, Info, Blocks0),
1021    St#st{ssa=Blocks}.
1022
1023match_fail_instrs_1([{L,#b_blk{is=Is0}=Blk}|Bs], Arity, Blocks0) ->
1024    case match_fail_instrs_blk(Is0, Arity, []) of
1025        none ->
1026            match_fail_instrs_1(Bs, Arity, Blocks0);
1027        Is ->
1028            Blocks = Blocks0#{L:=Blk#b_blk{is=Is}},
1029            match_fail_instrs_1(Bs, Arity, Blocks)
1030    end;
1031match_fail_instrs_1([], _Arity, Blocks) -> Blocks.
1032
1033match_fail_instrs_blk([#b_set{op=put_tuple,dst=Dst,
1034                              args=[#b_literal{val=Tag},Val]},
1035                       #b_set{op=call,
1036                              args=[#b_remote{mod=#b_literal{val=erlang},
1037                                              name=#b_literal{val=error}},
1038                                    Dst]}=Call|Is],
1039                      _Arity, Acc) ->
1040    match_fail_instr(Call, Tag, Val, Is, Acc);
1041match_fail_instrs_blk([#b_set{op=call,
1042                              args=[#b_remote{mod=#b_literal{val=erlang},
1043                                              name=#b_literal{val=error}},
1044                                    #b_literal{val={Tag,Val}}]}=Call|Is],
1045                      _Arity, Acc) ->
1046    match_fail_instr(Call, Tag, #b_literal{val=Val}, Is, Acc);
1047match_fail_instrs_blk([#b_set{op=call,
1048                              args=[#b_remote{mod=#b_literal{val=erlang},
1049                                              name=#b_literal{val=error}},
1050                                    #b_literal{val=if_clause}]}=Call|Is],
1051                      _Arity, Acc) ->
1052    I = Call#b_set{op=match_fail,args=[#b_literal{val=if_end}]},
1053    reverse(Acc, [I|Is]);
1054match_fail_instrs_blk([#b_set{op=call,anno=Anno,
1055                              args=[#b_remote{mod=#b_literal{val=erlang},
1056                                              name=#b_literal{val=error}},
1057                                    #b_literal{val=function_clause},
1058                                    Stk]}=Call],
1059                      {Arity,Location}, Acc) ->
1060    case match_fail_stk(Stk, Acc, [], []) of
1061        {[_|_]=Vars,Is} when length(Vars) =:= Arity ->
1062            case maps:get(location, Anno, none) of
1063                Location ->
1064                    I = Call#b_set{op=match_fail,
1065                                   args=[#b_literal{val=function_clause}|Vars]},
1066                    Is ++ [I];
1067                _ ->
1068                    %% erlang:error/2 has a different location than the
1069                    %% func_info instruction at the beginning of the function
1070                    %% (probably because of inlining). Keep the original call.
1071                    reverse(Acc, [Call])
1072            end;
1073        _ ->
1074            %% Either the stacktrace could not be picked apart (for example,
1075            %% if the call to erlang:error/2 was handwritten) or the number
1076            %% of arguments in the stacktrace was different from the arity
1077            %% of the host function (because it is the implementation of a
1078            %% fun). Keep the original call.
1079            reverse(Acc, [Call])
1080    end;
1081match_fail_instrs_blk([I|Is], Arity, Acc) ->
1082    match_fail_instrs_blk(Is, Arity, [I|Acc]);
1083match_fail_instrs_blk(_, _, _) ->
1084    none.
1085
1086match_fail_instr(Call, Tag, Val, Is, Acc) ->
1087    Op = case Tag of
1088             badmatch -> Tag;
1089             case_clause -> case_end;
1090             try_clause -> try_case_end;
1091             _ -> none
1092         end,
1093    case Op of
1094        none ->
1095            none;
1096        _ ->
1097            I = Call#b_set{op=match_fail,args=[#b_literal{val=Op},Val]},
1098            reverse(Acc, [I|Is])
1099    end.
1100
1101match_fail_stk(#b_var{}=V, [#b_set{op=put_list,dst=V,args=[H,T]}|Is], IAcc, VAcc) ->
1102    match_fail_stk(T, Is, IAcc, [H|VAcc]);
1103match_fail_stk(#b_literal{val=[H|T]}, Is, IAcc, VAcc) ->
1104    match_fail_stk(#b_literal{val=T}, Is, IAcc, [#b_literal{val=H}|VAcc]);
1105match_fail_stk(#b_literal{val=[]}, [], IAcc, VAcc) ->
1106    {reverse(VAcc),IAcc};
1107match_fail_stk(T, [#b_set{op=Op}=I|Is], IAcc, VAcc)
1108  when Op =:= bs_get_tail; Op =:= bs_set_position ->
1109    match_fail_stk(T, Is, [I|IAcc], VAcc);
1110match_fail_stk(_, _, _, _) -> none.
1111
1112%%%
1113%%% Fix tuples.
1114%%%
1115
1116%% fix_tuples(St0) -> St.
1117%%  If compatibility with a previous version of Erlang has been
1118%%  requested, tuple creation must be split into two instruction to
1119%%  mirror the the way tuples are created in BEAM prior to OTP 22.
1120%%  Each put_tuple instruction is split into put_tuple_arity followed
1121%%  by put_tuple_elements.
1122
1123fix_tuples(#st{ssa=Blocks0,cnt=Count0}=St) ->
1124    F = fun (#b_set{op=put_tuple,args=Args}=Put, C0) ->
1125                Arity = #b_literal{val=length(Args)},
1126                {Ignore,C} = new_var('@ssa_ignore', C0),
1127                {[Put#b_set{op=put_tuple_arity,args=[Arity]},
1128                  #b_set{dst=Ignore,op=put_tuple_elements,args=Args}],C};
1129           (I, C) -> {[I],C}
1130        end,
1131    RPO = beam_ssa:rpo(Blocks0),
1132    {Blocks,Count} = beam_ssa:flatmapfold_instrs(F, RPO, Count0, Blocks0),
1133    St#st{ssa=Blocks,cnt=Count}.
1134
1135%%%
1136%%% Introduce the set_tuple_element instructions to make
1137%%% multiple-field record updates faster.
1138%%%
1139%%% The expansion of record field updates, when more than one field is
1140%%% updated, but not a majority of the fields, will create a sequence of
1141%%% calls to `erlang:setelement(Index, Value, Tuple)` where Tuple in the
1142%%% first call is the original record tuple, and in the subsequent calls
1143%%% Tuple is the result of the previous call. Furthermore, all Index
1144%%% values are constant positive integers, and the first call to
1145%%% `setelement` will have the greatest index. Thus all the following
1146%%% calls do not actually need to test at run-time whether Tuple has type
1147%%% tuple, nor that the index is within the tuple bounds.
1148%%%
1149%%% Since this optimization introduces destructive updates, it used to
1150%%% be done as the very last Core Erlang pass before going to
1151%%% lower-level code. However, it turns out that this kind of destructive
1152%%% updates are awkward also in SSA code and can prevent or complicate
1153%%% type analysis and aggressive optimizations.
1154%%%
1155%%% NOTE: Because there no write barriers in the system, this kind of
1156%%% optimization can only be done when we are sure that garbage
1157%%% collection will not be triggered between the creation of the tuple
1158%%% and the destructive updates - otherwise we might insert pointers
1159%%% from an older generation to a newer.
1160%%%
1161
1162use_set_tuple_element(#st{ssa=Blocks0}=St) ->
1163    Uses = count_uses(Blocks0),
1164    RPO = reverse(beam_ssa:rpo(Blocks0)),
1165    Blocks = use_ste_1(RPO, Uses, Blocks0),
1166    St#st{ssa=Blocks}.
1167
1168use_ste_1([L|Ls], Uses, Blocks) ->
1169    #b_blk{is=Is0} = Blk0 = map_get(L, Blocks),
1170    case use_ste_is(Is0, Uses) of
1171        Is0 ->
1172            use_ste_1(Ls, Uses, Blocks);
1173        Is ->
1174            Blk = Blk0#b_blk{is=Is},
1175            use_ste_1(Ls, Uses, Blocks#{L:=Blk})
1176    end;
1177use_ste_1([], _, Blocks) -> Blocks.
1178
1179%%% Optimize within a single block.
1180
1181use_ste_is([#b_set{}=I|Is0], Uses) ->
1182    Is = use_ste_is(Is0, Uses),
1183    case extract_ste(I) of
1184        none ->
1185            [I|Is];
1186        Extracted ->
1187            use_ste_call(Extracted, I, Is, Uses)
1188    end;
1189use_ste_is([], _Uses) -> [].
1190
1191use_ste_call({Dst0,Pos0,_Var0,_Val0}, Call1, Is0, Uses) ->
1192    case get_ste_call(Is0, []) of
1193        {Prefix,{Dst1,Pos1,Dst0,Val1},Call2,Is}
1194          when Pos1 > 0, Pos0 > Pos1 ->
1195            case is_single_use(Dst0, Uses) of
1196                true ->
1197                    Call = Call1#b_set{dst=Dst1},
1198                    Args = [Val1,Dst1,#b_literal{val=Pos1-1}],
1199                    Dsetel = Call2#b_set{op=set_tuple_element,
1200                                         dst=Dst0,
1201                                         args=Args},
1202                    [Call|Prefix] ++ [Dsetel|Is];
1203                false ->
1204                    [Call1|Is0]
1205            end;
1206        _ ->
1207            [Call1|Is0]
1208    end.
1209
1210get_ste_call([#b_set{op=get_tuple_element}=I|Is], Acc) ->
1211    get_ste_call(Is, [I|Acc]);
1212get_ste_call([#b_set{op=call}=I|Is], Acc) ->
1213    case extract_ste(I) of
1214        none ->
1215            none;
1216        Extracted ->
1217            {reverse(Acc),Extracted,I,Is}
1218    end;
1219get_ste_call(_, _) -> none.
1220
1221extract_ste(#b_set{op=call,dst=Dst,
1222                   args=[#b_remote{mod=#b_literal{val=M},
1223                                  name=#b_literal{val=F}}|Args]}) ->
1224    case {M,F,Args} of
1225        {erlang,setelement,[#b_literal{val=Pos},Tuple,Val]} ->
1226            {Dst,Pos,Tuple,Val};
1227        {_,_,_} ->
1228            none
1229    end;
1230extract_ste(#b_set{}) -> none.
1231
1232%% Count how many times each variable is used.
1233
1234count_uses(Blocks) ->
1235    count_uses_blk(maps:values(Blocks), #{}).
1236
1237count_uses_blk([#b_blk{is=Is,last=Last}|Bs], CountMap0) ->
1238    F = fun(I, CountMap) ->
1239                foldl(fun(Var, Acc) ->
1240                              case Acc of
1241                                  #{Var:=2} -> Acc;
1242                                  #{Var:=C} -> Acc#{Var:=C+1};
1243                                  #{} ->       Acc#{Var=>1}
1244                              end
1245                      end, CountMap, beam_ssa:used(I))
1246        end,
1247    CountMap = F(Last, foldl(F, CountMap0, Is)),
1248    count_uses_blk(Bs, CountMap);
1249count_uses_blk([], CountMap) -> CountMap.
1250
1251is_single_use(V, Uses) ->
1252    case Uses of
1253        #{V:=1} -> true;
1254        #{} -> false
1255    end.
1256
1257%%%
1258%%% Find out where frames should be placed.
1259%%%
1260
1261%% place_frames(St0) -> St.
1262%%   Return a list of the labels for the blocks that need stack frame
1263%%   allocation instructions.
1264%%
1265%%   This function attempts to place stack frames as tight as possible
1266%%   around the code, to avoid building stack frames for code paths
1267%%   that don't need one.
1268%%
1269%%   Stack frames are placed in blocks that dominate all of their
1270%%   descendants. That guarantees that the deallocation instructions
1271%%   cannot be reached from other execution paths that didn't set up
1272%%   a stack frame or set up a stack frame with a different size.
1273
1274place_frames(#st{ssa=Blocks}=St) ->
1275    Ls = beam_ssa:rpo(Blocks),
1276    {Doms,_} = beam_ssa:dominators(Ls, Blocks),
1277    Tried = gb_sets:empty(),
1278    Frames0 = [],
1279    {Frames,_} = place_frames_1(Ls, Blocks, Doms, Tried, Frames0),
1280    St#st{frames=Frames}.
1281
1282place_frames_1([L|Ls], Blocks, Doms, Tried0, Frames0) ->
1283    Blk = map_get(L, Blocks),
1284    case need_frame(Blk) of
1285        true ->
1286            %% This block needs a frame. Try to place it here.
1287            {Frames,Tried} = do_place_frame(L, Blocks, Doms, Tried0, Frames0),
1288
1289            %% Successfully placed. Try to place more frames in descendants
1290            %% that are not dominated by this block.
1291            place_frames_1(Ls, Blocks, Doms, Tried, Frames);
1292        false ->
1293            try
1294                place_frames_1(Ls, Blocks, Doms, Tried0, Frames0)
1295            catch
1296                throw:{need_frame,For,Tried1}=Reason ->
1297                    %% An descendant block needs a stack frame. Try to
1298                    %% place it here.
1299                    case is_dominated_by(For, L, Doms) of
1300                        true ->
1301                            %% Try to place a frame here.
1302                            {Frames,Tried} = do_place_frame(L, Blocks, Doms,
1303                                                            Tried1, Frames0),
1304                            place_frames_1(Ls, Blocks, Doms, Tried, Frames);
1305                        false ->
1306                            %% Wrong place. This block does not dominate
1307                            %% the block that needs the frame. Pass it on
1308                            %% to our ancestors.
1309                            throw(Reason)
1310                    end
1311            end
1312    end;
1313place_frames_1([], _, _, Tried, Frames) ->
1314    {Frames,Tried}.
1315
1316%% do_place_frame(Label, Blocks, Dominators, Tried0, Frames0) -> {Frames,Tried}.
1317%%  Try to place a frame in this block. This function returns
1318%%  successfully if it either succeds at placing a frame in this
1319%%  block, if an ancestor that dominates this block has already placed
1320%%  a frame, or if we have already tried to put a frame in this block.
1321%%
1322%%  An {need_frame,Label,Tried} exception will be thrown if this block
1323%%  block is not suitable for having a stack frame (i.e. it does not dominate
1324%%  all of its descendants). The exception means that an ancestor will have to
1325%%  place the frame needed by this block.
1326
1327do_place_frame(L, Blocks, Doms, Tried0, Frames) ->
1328    case gb_sets:is_element(L, Tried0) of
1329        true ->
1330            %% We have already tried to put a frame in this block.
1331            {Frames,Tried0};
1332        false ->
1333            %% Try to place a frame in this block.
1334            Tried = gb_sets:insert(L, Tried0),
1335            case place_frame_here(L, Blocks, Doms, Frames) of
1336                yes ->
1337                    %% We need a frame and it is safe to place it here.
1338                    {[L|Frames],Tried};
1339                no ->
1340                    %% An ancestor has a frame. Not needed.
1341                    {Frames,Tried};
1342                ancestor ->
1343                    %% This block does not dominate all of its
1344                    %% descendants. We must place the frame in
1345                    %% an ancestor.
1346                    throw({need_frame,L,Tried})
1347            end
1348    end.
1349
1350%% place_frame_here(Label, Blocks, Doms, Frames) -> no|yes|ancestor.
1351%%  Determine whether a frame should be placed in block Label.
1352
1353place_frame_here(L, Blocks, Doms, Frames) ->
1354    B0 = any(fun(DomBy) ->
1355                     is_dominated_by(L, DomBy, Doms)
1356             end, Frames),
1357    case B0 of
1358        true ->
1359            %% This block is dominated by an ancestor block that
1360            %% defines a frame. Not needed/allowed to put a frame
1361            %% here.
1362            no;
1363        false ->
1364            %% No frame in any ancestor. We need a frame.
1365            %% Now check whether the frame can be placed here.
1366            %% If this block dominates all of its descendants
1367            %% and the predecessors of any phi nodes it can be
1368            %% placed here.
1369            Descendants = beam_ssa:rpo([L], Blocks),
1370            PhiPredecessors = phi_predecessors(L, Blocks),
1371            MustDominate = ordsets:from_list(PhiPredecessors ++ Descendants),
1372            Dominates = all(fun(?EXCEPTION_BLOCK) ->
1373                                    %% This block defines no variables and calls
1374                                    %% erlang:error(badarg). It does not matter
1375                                    %% whether L dominates ?EXCEPTION_BLOCK or not;
1376                                    %% it is still safe to put the frame in L.
1377                                    true;
1378                               (Bl) ->
1379                                    is_dominated_by(Bl, L, Doms)
1380                            end, MustDominate),
1381
1382            %% Also, this block must not be a loop header.
1383            IsLoopHeader = is_loop_header(L, Blocks),
1384            case Dominates andalso not IsLoopHeader of
1385                true -> yes;
1386                false -> ancestor
1387            end
1388    end.
1389
1390%% phi_predecessors(Label, Blocks) ->
1391%%  Return all predecessors referenced in phi nodes.
1392
1393phi_predecessors(L, Blocks) ->
1394    #b_blk{is=Is} = map_get(L, Blocks),
1395    [P || #b_set{op=phi,args=Args} <- Is, {_,P} <- Args].
1396
1397%% is_dominated_by(Label, DominatedBy, Dominators) -> true|false.
1398%%  Test whether block Label is dominated by block DominatedBy.
1399
1400is_dominated_by(L, DomBy, Doms) ->
1401    DominatedBy = map_get(L, Doms),
1402    member(DomBy, DominatedBy).
1403
1404%% need_frame(#b_blk{}) -> true|false.
1405%%  Test whether any of the instructions in the block requires a stack frame.
1406
1407need_frame(#b_blk{is=Is,last=#b_ret{arg=Ret}}) ->
1408    need_frame_1(Is, {return,Ret});
1409need_frame(#b_blk{is=Is}) ->
1410    need_frame_1(Is, body).
1411
1412need_frame_1([#b_set{op=old_make_fun,dst=Fun}|Is], {return,Ret}=Context) ->
1413    case need_frame_1(Is, Context) of
1414        true ->
1415            true;
1416        false ->
1417            %% Since old_make_fun clobbers X registers, a stack frame is
1418            %% needed if any of the following instructions use any
1419            %% other variable than the one holding the reference to
1420            %% the created fun.
1421            Defs = ordsets:from_list([Dst || #b_set{dst=Dst} <- Is]),
1422            Blk = #b_blk{is=Is,last=#b_ret{arg=Ret}},
1423            Used = ordsets:subtract(beam_ssa:used(Blk), Defs),
1424            case Used of
1425                [] -> false;
1426                [Fun] -> false;
1427                [_|_] -> true
1428            end
1429        end;
1430need_frame_1([#b_set{op=new_try_tag}|_], _) ->
1431    true;
1432need_frame_1([#b_set{op=call,dst=Val}]=Is, {return,Ret}) ->
1433    if
1434        Val =:= Ret -> need_frame_1(Is, tail);
1435        true -> need_frame_1(Is, body)
1436    end;
1437need_frame_1([#b_set{op=call,args=[Func|_]}|Is], Context) ->
1438    case Func of
1439        #b_remote{mod=#b_literal{val=Mod},
1440                  name=#b_literal{val=Name},
1441                  arity=Arity} when is_atom(Mod), is_atom(Name) ->
1442            Context =:= body orelse
1443                Is =/= [] orelse
1444                is_trap_bif(Mod, Name, Arity);
1445        #b_remote{} ->
1446            %% This is an apply(), which always needs a frame.
1447            true;
1448        #b_local{} ->
1449            Context =:= body orelse Is =/= [];
1450        _ ->
1451             %% A fun call always needs a frame.
1452            true
1453    end;
1454need_frame_1([I|Is], Context) ->
1455    beam_ssa:clobbers_xregs(I) orelse need_frame_1(Is, Context);
1456need_frame_1([], _) -> false.
1457
1458%% is_trap_bif(Mod, Name, Arity) -> true|false.
1459%%   Test whether we need a stack frame for this BIF.
1460
1461is_trap_bif(erlang, '!', 2) -> true;
1462is_trap_bif(erlang, link, 1) -> true;
1463is_trap_bif(erlang, unlink, 1) -> true;
1464is_trap_bif(erlang, monitor_node, 2) -> true;
1465is_trap_bif(erlang, group_leader, 2) -> true;
1466is_trap_bif(erlang, exit, 2) -> true;
1467is_trap_bif(_, _, _) -> false.
1468
1469%%%
1470%%% Fix variables used in matching in receive.
1471%%%
1472%%% The loop_rec/2 instruction may return a reference to a
1473%%% message outside of any heap or heap fragment. If the message
1474%%% does not match, it is not allowed to store any reference to
1475%%% the message (or part of the message) on the stack. If we do,
1476%%% the message will be corrupted if there happens to be a GC.
1477%%%
1478%%% Here we make sure to introduce copies of variables that are
1479%%% matched out and subsequently used after the remove_message/0
1480%%% instructions. That will make sure that only X registers are
1481%%% used during matching.
1482%%%
1483%%% Depending on where variables are defined and used, they must
1484%%% be handled in two different ways.
1485%%%
1486%%% Variables that are always defined in the receive (before branching
1487%%% out into the different clauses of the receive) and used after the
1488%%% receive must be handled in the following way: Before each
1489%%% remove_message instruction, each such variable must be copied, and
1490%%% all variables must be consolidated using a phi node in the
1491%%% common exit block for the receive.
1492%%%
1493%%% Variables that are matched out and used in the same clause
1494%%% need copy instructions before the remove_message instruction
1495%%% in that clause.
1496%%%
1497
1498fix_receives(#st{ssa=Blocks0,cnt=Count0}=St) ->
1499    {Blocks,Count} = fix_receives_1(maps:to_list(Blocks0),
1500                                    Blocks0, Count0),
1501    St#st{ssa=Blocks,cnt=Count}.
1502
1503fix_receives_1([{L,Blk}|Ls], Blocks0, Count0) ->
1504    case Blk of
1505        #b_blk{is=[#b_set{op=peek_message}|_]} ->
1506            Rm = find_rm_blocks(L, Blocks0),
1507            LoopExit = find_loop_exit(Rm, Blocks0),
1508            RPO = beam_ssa:rpo([L], Blocks0),
1509            Defs0 = beam_ssa:def(RPO, Blocks0),
1510            CommonUsed = recv_common(Defs0, LoopExit, Blocks0),
1511            {Blocks1,Count1} = recv_crit_edges(Rm, LoopExit, Blocks0, Count0),
1512            {Blocks2,Count2} = recv_fix_common(CommonUsed, LoopExit, Rm,
1513                                               Blocks1, Count1),
1514            Defs = ordsets:subtract(Defs0, CommonUsed),
1515            {Blocks,Count} = fix_receive(Rm, Defs, Blocks2, Count2),
1516            fix_receives_1(Ls, Blocks, Count);
1517        #b_blk{} ->
1518            fix_receives_1(Ls, Blocks0, Count0)
1519    end;
1520fix_receives_1([], Blocks, Count) ->
1521    {Blocks,Count}.
1522
1523recv_common(_Defs, none, _Blocks) ->
1524    %% There is no common exit block because receive is used
1525    %% in the tail position of a function.
1526    [];
1527recv_common(Defs, Exit, Blocks) ->
1528    RPO = beam_ssa:rpo([Exit], Blocks),
1529    {ExitDefs,ExitUnused} = beam_ssa:def_unused(RPO, Defs, Blocks),
1530    Def = ordsets:subtract(Defs, ExitDefs),
1531    ordsets:subtract(Def, ExitUnused).
1532
1533%% recv_crit_edges([RemoveMessageLabel], LoopExit,
1534%%                 Blocks0, Count0) -> {Blocks,Count}.
1535%%
1536%%  Adds dummy blocks on all conditional jumps to the exit block so that
1537%%  recv_fix_common/5 can insert phi nodes without having to worry about
1538%%  critical edges.
1539
1540recv_crit_edges(_Rms, none, Blocks0, Count0) ->
1541    {Blocks0, Count0};
1542recv_crit_edges(Rms, Exit, Blocks0, Count0) ->
1543    Ls = beam_ssa:rpo(Rms, Blocks0),
1544    rce_insert_edges(Ls, Exit, Count0, Blocks0).
1545
1546rce_insert_edges([L | Ls], Exit, Count0, Blocks0) ->
1547    Successors = beam_ssa:successors(map_get(L, Blocks0)),
1548    case member(Exit, Successors) of
1549        true when Successors =/= [Exit] ->
1550            {Blocks, Count} = rce_insert_edge(L, Exit, Count0, Blocks0),
1551            rce_insert_edges(Ls, Exit, Count, Blocks);
1552        _ ->
1553            rce_insert_edges(Ls, Exit, Count0, Blocks0)
1554    end;
1555rce_insert_edges([], _Exit, Count, Blocks) ->
1556    {Blocks, Count}.
1557
1558rce_insert_edge(L, Exit, Count, Blocks0) ->
1559    #b_blk{last=Last0} = FromBlk0 = map_get(L, Blocks0),
1560
1561    ToExit = #b_br{bool=#b_literal{val=true},succ=Exit,fail=Exit},
1562
1563    FromBlk = FromBlk0#b_blk{last=rce_reroute_terminator(Last0, Exit, Count)},
1564    EdgeBlk = #b_blk{anno=#{},is=[],last=ToExit},
1565
1566    Blocks = Blocks0#{ Count => EdgeBlk, L => FromBlk },
1567    {Blocks, Count + 1}.
1568
1569rce_reroute_terminator(#b_br{succ=Exit}=Last, Exit, New) ->
1570    rce_reroute_terminator(Last#b_br{succ=New}, Exit, New);
1571rce_reroute_terminator(#b_br{fail=Exit}=Last, Exit, New) ->
1572    rce_reroute_terminator(Last#b_br{fail=New}, Exit, New);
1573rce_reroute_terminator(#b_br{}=Last, _Exit, _New) ->
1574    Last;
1575rce_reroute_terminator(#b_switch{fail=Exit}=Last, Exit, New) ->
1576    rce_reroute_terminator(Last#b_switch{fail=New}, Exit, New);
1577rce_reroute_terminator(#b_switch{list=List0}=Last, Exit, New) ->
1578    List = [if
1579                Lbl =:= Exit -> {Arg, New};
1580                Lbl =/= Exit -> {Arg, Lbl}
1581            end || {Arg, Lbl} <- List0],
1582    Last#b_switch{list=List}.
1583
1584%% recv_fix_common([CommonVar], LoopExit, [RemoveMessageLabel],
1585%%                 Blocks0, Count0) -> {Blocks,Count}.
1586%%  Handle variables alwys defined in a receive and used
1587%%  in the exit block following the receive.
1588
1589recv_fix_common([Msg0|T], Exit, Rm, Blocks0, Count0) ->
1590    {Msg,Count1} = new_var('@recv', Count0),
1591    RPO = beam_ssa:rpo([Exit], Blocks0),
1592    Blocks1 = beam_ssa:rename_vars(#{Msg0=>Msg}, RPO, Blocks0),
1593    N = length(Rm),
1594    {MsgVars,Count} = new_vars(duplicate(N, '@recv'), Count1),
1595    PhiArgs = fix_exit_phi_args(MsgVars, Rm, Exit, Blocks1),
1596    Phi = #b_set{op=phi,dst=Msg,args=PhiArgs},
1597    ExitBlk0 = map_get(Exit, Blocks1),
1598    ExitBlk = ExitBlk0#b_blk{is=[Phi|ExitBlk0#b_blk.is]},
1599    Blocks2 = Blocks1#{Exit:=ExitBlk},
1600    Blocks = recv_fix_common_1(MsgVars, Rm, Msg0, Blocks2),
1601    recv_fix_common(T, Exit, Rm, Blocks, Count);
1602recv_fix_common([], _, _, Blocks, Count) ->
1603    {Blocks,Count}.
1604
1605recv_fix_common_1([V|Vs], [Rm|Rms], Msg, Blocks0) ->
1606    Ren = #{Msg=>V},
1607    RPO = beam_ssa:rpo([Rm], Blocks0),
1608    Blocks1 = beam_ssa:rename_vars(Ren, RPO, Blocks0),
1609    #b_blk{is=Is0} = Blk0 = map_get(Rm, Blocks1),
1610    Copy = #b_set{op=copy,dst=V,args=[Msg]},
1611    Is = insert_after_phis(Is0, [Copy]),
1612    Blk = Blk0#b_blk{is=Is},
1613    Blocks = Blocks1#{Rm:=Blk},
1614    recv_fix_common_1(Vs, Rms, Msg, Blocks);
1615recv_fix_common_1([], [], _Msg, Blocks) -> Blocks.
1616
1617fix_exit_phi_args([V|Vs], [Rm|Rms], Exit, Blocks) ->
1618    Path = beam_ssa:rpo([Rm], Blocks),
1619    Preds = exit_predecessors(Path, Exit, Blocks),
1620    [{V,Pred} || Pred <- Preds] ++ fix_exit_phi_args(Vs, Rms, Exit, Blocks);
1621fix_exit_phi_args([], [], _, _) -> [].
1622
1623exit_predecessors([L|Ls], Exit, Blocks) ->
1624    Blk = map_get(L, Blocks),
1625    case member(Exit, beam_ssa:successors(Blk)) of
1626        true ->
1627            [L|exit_predecessors(Ls, Exit, Blocks)];
1628        false ->
1629            exit_predecessors(Ls, Exit, Blocks)
1630    end;
1631exit_predecessors([], _Exit, _Blocks) -> [].
1632
1633%% fix_receive([Label], Defs, Blocks0, Count0) -> {Blocks,Count}.
1634%%  Add a copy instruction for all variables that are matched out and
1635%%  later used within a clause of the receive.
1636
1637fix_receive([L|Ls], Defs, Blocks0, Count0) ->
1638    RPO = beam_ssa:rpo([L], Blocks0),
1639    {RmDefs,Unused} = beam_ssa:def_unused(RPO, Defs, Blocks0),
1640    Def = ordsets:subtract(Defs, RmDefs),
1641    Used = ordsets:subtract(Def, Unused),
1642    {NewVars,Count} = new_vars([Base || #b_var{name=Base} <- Used], Count0),
1643    Ren = zip(Used, NewVars),
1644    Blocks1 = beam_ssa:rename_vars(Ren, RPO, Blocks0),
1645    #b_blk{is=Is0} = Blk1 = map_get(L, Blocks1),
1646    CopyIs = [#b_set{op=copy,dst=New,args=[Old]} || {Old,New} <- Ren],
1647    Is = insert_after_phis(Is0, CopyIs),
1648    Blk = Blk1#b_blk{is=Is},
1649    Blocks = Blocks1#{L:=Blk},
1650    fix_receive(Ls, Defs, Blocks, Count);
1651fix_receive([], _Defs, Blocks, Count) ->
1652    {Blocks,Count}.
1653
1654%% find_loop_exit([Label], Blocks) -> Label | none.
1655%%  Given the list of all blocks with the remove_message instructions
1656%%  for this receive, find the block to which control is transferred
1657%%  when the receive loop is exited (if any).
1658
1659find_loop_exit([_,_|_]=RmBlocks, Blocks) ->
1660    %% We used to only analyze the path from two of the remove_message
1661    %% blocks. That would fail to find a common block if one or both
1662    %% of the blocks happened to raise an exception. To be sure that
1663    %% we always find a common block if there is one (shared by at
1664    %% least two clauses), we must analyze the path from all
1665    %% remove_message blocks.
1666    RPO = beam_ssa:rpo(Blocks),
1667    {Dominators,_} = beam_ssa:dominators(RPO, Blocks),
1668    RmSet = sets:from_list(RmBlocks, [{version, 2}]),
1669    RmRPO = beam_ssa:rpo(RmBlocks, Blocks),
1670    find_loop_exit_1(RmRPO, RmSet, Dominators, Blocks);
1671find_loop_exit(_, _) ->
1672    %% There is (at most) a single clause. There is no common
1673    %% loop exit block.
1674    none.
1675
1676find_loop_exit_1([?EXCEPTION_BLOCK|Ls], RmSet, Dominators, Blocks) ->
1677    %% ?EXCEPTION_BLOCK is a marker and not an actual block, so it is not
1678    %% the block we are looking for.
1679    find_loop_exit_1(Ls, RmSet, Dominators, Blocks);
1680find_loop_exit_1([L|Ls0], RmSet, Dominators, Blocks) ->
1681    DomBy = map_get(L, Dominators),
1682    case any(fun(E) -> sets:is_element(E, RmSet) end, DomBy) of
1683        true ->
1684            %% This block is dominated by one of the remove_message blocks,
1685            %% which means that the block is part of only one clause.
1686            %% It is not the block we are looking for.
1687            find_loop_exit_1(Ls0, RmSet, Dominators, Blocks);
1688        false ->
1689            %% This block is the first block that is not dominated by
1690            %% any of the blocks with remove_message instructions.
1691            case map_get(L, Blocks) of
1692                #b_blk{is=[#b_set{op=landingpad}|_]} ->
1693                    %% This is the landing pad reached when an
1694                    %% exception is caught. It is not the block
1695                    %% we are looking for. Furthermore, none of the
1696                    %% blocks reachable from this block can be
1697                    %% the exit block we are looking for.
1698                    Ls = Ls0 -- beam_ssa:rpo([L], Blocks),
1699                    find_loop_exit_1(Ls, RmSet, Dominators, Blocks);
1700                #b_blk{} ->
1701                    %% This block is not dominated by any of the receive
1702                    %% clauses and is not the landing pad for an exception.
1703                    %% It is the common exit block we are looking for.
1704                    L
1705            end
1706    end;
1707find_loop_exit_1([], _, _, _) ->
1708    %% None of clauses transfers control to a common block after the receive
1709    %% statement. That means that the receive statement is a the end of a
1710    %% function (or that all clauses raise exceptions).
1711    none.
1712
1713%% find_rm_blocks(StartLabel, Blocks) -> [Label].
1714%%  Find all blocks that start with remove_message within the receive
1715%%  loop whose peek_message label is StartLabel.
1716
1717find_rm_blocks(L, Blocks) ->
1718    Seen = gb_sets:singleton(L),
1719    Blk = map_get(L, Blocks),
1720    Succ = beam_ssa:successors(Blk),
1721    find_rm_blocks_1(Succ, Seen, Blocks).
1722
1723find_rm_blocks_1([L|Ls], Seen0, Blocks) ->
1724    case gb_sets:is_member(L, Seen0) of
1725        true ->
1726            find_rm_blocks_1(Ls, Seen0, Blocks);
1727        false ->
1728            Seen = gb_sets:insert(L, Seen0),
1729            Blk = map_get(L, Blocks),
1730            case find_rm_act(Blk#b_blk.is) of
1731                prune ->
1732                    %% Looping back. Don't look at any successors.
1733                    find_rm_blocks_1(Ls, Seen, Blocks);
1734                continue ->
1735                    %% Neutral block. Do nothing here, but look at
1736                    %% all successors.
1737                    Succ = beam_ssa:successors(Blk),
1738                    find_rm_blocks_1(Succ++Ls, Seen, Blocks);
1739                found ->
1740                    %% Found remove_message instruction.
1741                    [L|find_rm_blocks_1(Ls, Seen, Blocks)]
1742            end
1743    end;
1744find_rm_blocks_1([], _, _) -> [].
1745
1746find_rm_act([#b_set{op=Op}|Is]) ->
1747    case Op of
1748        remove_message -> found;
1749        peek_message -> prune;
1750        recv_next -> prune;
1751        wait_timeout -> prune;
1752        _ -> find_rm_act(Is)
1753    end;
1754find_rm_act([]) ->
1755    continue.
1756
1757%%%
1758%%% Find out which variables need to be stored in Y registers.
1759%%%
1760
1761-record(dk, {d :: ordsets:ordset(var_name()),
1762             k :: sets:set(var_name())
1763            }).
1764
1765%% find_yregs(St0) -> St.
1766%%  Find all variables that must be stored in Y registers. Annotate
1767%%  the blocks that allocate frames with the set of Y registers
1768%%  used within that stack frame.
1769%%
1770%%  Basically, we following all execution paths starting from a block
1771%%  that allocates a frame, keeping track of of all defined registers
1772%%  and all registers killed by an instruction that clobbers X
1773%%  registers. For every use of a variable, we check if if it is in
1774%%  the set of killed variables; if it is, it must be stored in an Y
1775%%  register.
1776
1777find_yregs(#st{frames=[]}=St) ->
1778    St;
1779find_yregs(#st{frames=[_|_]=Frames,args=Args,ssa=Blocks0}=St) ->
1780    FrameDefs = find_defs(Frames, Blocks0, [V || #b_var{}=V <- Args]),
1781    Blocks = find_yregs_1(FrameDefs, Blocks0),
1782    St#st{ssa=Blocks}.
1783
1784find_yregs_1([{F,Defs}|Fs], Blocks0) ->
1785    DK = #dk{d=Defs,k=sets:new([{version, 2}])},
1786    D0 = #{F=>DK},
1787    Ls = beam_ssa:rpo([F], Blocks0),
1788    Yregs0 = sets:new([{version, 2}]),
1789    Yregs = find_yregs_2(Ls, Blocks0, D0, Yregs0),
1790    Blk0 = map_get(F, Blocks0),
1791    Blk = beam_ssa:add_anno(yregs, Yregs, Blk0),
1792    Blocks = Blocks0#{F:=Blk},
1793    find_yregs_1(Fs, Blocks);
1794find_yregs_1([], Blocks) -> Blocks.
1795
1796find_yregs_2([L|Ls], Blocks0, D0, Yregs0) ->
1797    Blk0 = map_get(L, Blocks0),
1798    #b_blk{is=Is,last=Last} = Blk0,
1799    Ys0 = map_get(L, D0),
1800    {Yregs1,Ys} = find_yregs_is(Is, Ys0, Yregs0),
1801    Yregs = find_yregs_terminator(Last, Ys, Yregs1),
1802    Successors = beam_ssa:successors(Blk0),
1803    D = find_update_succ(Successors, Ys, D0),
1804    find_yregs_2(Ls, Blocks0, D, Yregs);
1805find_yregs_2([], _Blocks, _D, Yregs) -> Yregs.
1806
1807find_defs(Frames, Blocks, Defs) ->
1808    Seen = gb_sets:empty(),
1809    FramesSet = gb_sets:from_list(Frames),
1810    {FrameDefs,_} = find_defs_1([0], Blocks, FramesSet, Seen, Defs, []),
1811    FrameDefs.
1812
1813find_defs_1([L|Ls], Blocks, Frames, Seen0, Defs0, Acc0) ->
1814    case gb_sets:is_member(L, Frames) of
1815        true ->
1816            OrderedDefs = ordsets:from_list(Defs0),
1817            find_defs_1(Ls, Blocks, Frames, Seen0, Defs0,
1818                        [{L,OrderedDefs}|Acc0]);
1819        false ->
1820            case gb_sets:is_member(L, Seen0) of
1821                true ->
1822                    find_defs_1(Ls, Blocks, Frames, Seen0, Defs0, Acc0);
1823                false ->
1824                    Seen1 = gb_sets:insert(L, Seen0),
1825                    {Acc,Seen} = find_defs_1(Ls, Blocks, Frames, Seen1, Defs0, Acc0),
1826                    #b_blk{is=Is} = Blk = map_get(L, Blocks),
1827                    Defs = find_defs_is(Is, Defs0),
1828                    Successors = beam_ssa:successors(Blk),
1829                    find_defs_1(Successors, Blocks, Frames, Seen, Defs, Acc)
1830            end
1831    end;
1832find_defs_1([], _, _, Seen, _, Acc) ->
1833    {Acc,Seen}.
1834
1835find_defs_is([#b_set{dst=Dst}|Is], Acc) ->
1836    find_defs_is(Is, [Dst|Acc]);
1837find_defs_is([], Acc) -> Acc.
1838
1839find_update_succ([S|Ss], #dk{d=Defs0,k=Killed0}=DK0, D0) ->
1840    case D0 of
1841        #{S:=#dk{d=Defs1,k=Killed1}} ->
1842            Defs = ordsets:intersection(Defs0, Defs1),
1843            Killed = sets:union(Killed0, Killed1),
1844            DK = #dk{d=Defs,k=Killed},
1845            D = D0#{S:=DK},
1846            find_update_succ(Ss, DK0, D);
1847        #{} ->
1848            D = D0#{S=>DK0},
1849            find_update_succ(Ss, DK0, D)
1850    end;
1851find_update_succ([], _, D) -> D.
1852
1853find_yregs_is([#b_set{dst=Dst}=I|Is], #dk{d=Defs0,k=Killed0}=Ys, Yregs0) ->
1854    Yregs1 = intersect_used(I, Killed0),
1855    Yregs = sets:union(Yregs0, Yregs1),
1856    case beam_ssa:clobbers_xregs(I) of
1857        false ->
1858            Defs = ordsets:add_element(Dst, Defs0),
1859            find_yregs_is(Is, Ys#dk{d=Defs}, Yregs);
1860        true ->
1861            Killed = sets:union(sets:from_list(Defs0, [{version, 2}]), Killed0),
1862            Defs = [Dst],
1863            find_yregs_is(Is, Ys#dk{d=Defs,k=Killed}, Yregs)
1864    end;
1865find_yregs_is([], Ys, Yregs) -> {Yregs,Ys}.
1866
1867find_yregs_terminator(Terminator, #dk{k=Killed}, Yregs0) ->
1868    Yregs = intersect_used(Terminator, Killed),
1869    sets:union(Yregs0, Yregs).
1870
1871intersect_used(#b_br{bool=#b_var{}=V}, Set) ->
1872    intersect_used_keep_singleton(V, Set);
1873intersect_used(#b_ret{arg=#b_var{}=V}, Set) ->
1874    intersect_used_keep_singleton(V, Set);
1875intersect_used(#b_set{op=phi,args=Args}, Set) ->
1876    sets:from_list([V || {#b_var{}=V,_} <- Args, sets:is_element(V, Set)], [{version, 2}]);
1877intersect_used(#b_set{args=Args}, Set) ->
1878    sets:from_list(intersect_used_keep(used_args(Args), Set), [{version, 2}]);
1879intersect_used(#b_switch{arg=#b_var{}=V}, Set) ->
1880    intersect_used_keep_singleton(V, Set);
1881intersect_used(_, _) -> sets:new([{version, 2}]).
1882
1883intersect_used_keep_singleton(V, Set) ->
1884    case sets:is_element(V, Set) of
1885        true -> sets:from_list([V], [{version, 2}]);
1886        false -> sets:new([{version, 2}])
1887    end.
1888
1889intersect_used_keep(Vs, Set) ->
1890    [V || V <- Vs, sets:is_element(V, Set)].
1891
1892used_args([#b_var{}=V|As]) ->
1893    [V|used_args(As)];
1894used_args([#b_remote{mod=Mod,name=Name}|As]) ->
1895    used_args([Mod,Name|As]);
1896used_args([_|As]) ->
1897    used_args(As);
1898used_args([]) -> [].
1899
1900%%%
1901%%% Try to reduce the size of the stack frame, by adding an explicit
1902%%% 'copy' instructions for return values from 'call' and 'old_make_fun' that
1903%%% need to be saved in Y registers. Here is an example to show
1904%%% how that's useful. First, here is the Erlang code:
1905%%%
1906%%% f(Pid) ->
1907%%%    Res = foo(42),
1908%%%    _ = node(Pid),
1909%%%    bar(),
1910%%%    Res.
1911%%%
1912%%% Compiled to SSA format, the main part of the code looks like this:
1913%%%
1914%%% 0:
1915%%%   Res = call local literal foo/1, literal 42
1916%%%   @ssa_bool:1 = succeeded:body Res
1917%%%   br @ssa_bool:1, label 2, label 1
1918%%% 2:
1919%%%   _1 = bif:node Pid
1920%%%   @ssa_bool:2 = succeeded:body _1
1921%%%   br @ssa_bool:2, label 3, label 1
1922%%% 3:
1923%%%   _2 = call local literal bar/0
1924%%%   @ssa_bool:3 = succeeded:body _2
1925%%%   br @ssa_bool:3, label 4, label 1
1926%%% 4:
1927%%%   ret Res
1928%%%
1929%%% It can be seen that the variables Pid and Res must be saved in Y
1930%%% registers in order to survive the function calls. A previous sub
1931%%% pass has inserted a 'copy' instruction to save the value of the
1932%%% variable Pid:
1933%%%
1934%%% 0:
1935%%%   Pid:4 = copy Pid
1936%%%   Res = call local literal foo/1, literal 42
1937%%%   @ssa_bool:1 = succeeded:body Res
1938%%%   br @ssa_bool:1, label 2, label 1
1939%%% 2:
1940%%%   _1 = bif:node Pid:4
1941%%%   @ssa_bool:2 = succeeded:body _1
1942%%%   br @ssa_bool:2, label 3, label 1
1943%%% 3:
1944%%%   _2 = call local literal bar/0
1945%%%   @ssa_bool:3 = succeeded:body _2
1946%%%   br @ssa_bool:3, label 4, label 1
1947%%% 4:
1948%%%   ret Res
1949%%%
1950%%% The Res and Pid:4 variables must be assigned to different Y registers
1951%%% because they are live at the same time. copy_retval() inserts a
1952%%% 'copy' instruction to copy Res to a new variable:
1953%%%
1954%%% 0:
1955%%%   Pid:4 = copy Pid
1956%%%   Res:6 = call local literal foo/1, literal 42
1957%%%   @ssa_bool:1 = succeeded:body Res:6
1958%%%   br @ssa_bool:1, label 2, label 1
1959%%% 2:
1960%%%   _1 = bif:node Pid:4
1961%%%   @ssa_bool:2 = succeeded:body _1
1962%%%   br @ssa_bool:2, label 3, label 1
1963%%% 3:
1964%%%   Res = copy Res:6
1965%%%   _2 = call local literal bar/0
1966%%%   @ssa_bool:3 = succeeded:body _2
1967%%%   br @ssa_bool:3, label 4, label 1
1968%%% 4:
1969%%%   ret Res
1970%%%
1971%%% The new variable Res:6 is used to capture the return value from the call.
1972%%% The variables Pid:4 and Res are no longer live at the same time, so they
1973%%% can be assigned to the same Y register.
1974%%%
1975
1976copy_retval(#st{frames=Frames,ssa=Blocks0,cnt=Count0}=St) ->
1977    {Blocks,Count} = copy_retval_1(Frames, Blocks0, Count0),
1978    St#st{ssa=Blocks,cnt=Count}.
1979
1980copy_retval_1([F|Fs], Blocks0, Count0) ->
1981    #b_blk{anno=#{yregs:=Yregs0},is=Is} = map_get(F, Blocks0),
1982    Yregs = collect_yregs(Is, Yregs0),
1983    Ls = beam_ssa:rpo([F], Blocks0),
1984    {Blocks,Count} = copy_retval_2(Ls, Yregs, none, Blocks0, Count0),
1985    copy_retval_1(Fs, Blocks, Count);
1986copy_retval_1([], Blocks, Count) ->
1987    {Blocks,Count}.
1988
1989collect_yregs([#b_set{op=copy,dst=Y,args=[#b_var{}=X]}|Is],
1990              Yregs0) ->
1991    true = sets:is_element(X, Yregs0),        %Assertion.
1992    Yregs = sets:add_element(Y, sets:del_element(X, Yregs0)),
1993    collect_yregs(Is, Yregs);
1994collect_yregs([#b_set{}|Is], Yregs) ->
1995    collect_yregs(Is, Yregs);
1996collect_yregs([], Yregs) -> Yregs.
1997
1998copy_retval_2([L|Ls], Yregs, Copy0, Blocks0, Count0) ->
1999    #b_blk{is=Is0,last=Last} = Blk = map_get(L, Blocks0),
2000    RC = case {Last,Ls} of
2001             {#b_br{succ=Succ,fail=?EXCEPTION_BLOCK},[Succ|_]} ->
2002                 true;
2003             {_,_} ->
2004                 false
2005         end,
2006    case copy_retval_is(Is0, RC, Yregs, Copy0, Count0, []) of
2007        {Is,Count} ->
2008            case Copy0 =:= none andalso Count0 =:= Count of
2009                true ->
2010                    copy_retval_2(Ls, Yregs, none, Blocks0, Count0);
2011                false ->
2012                    Blocks = Blocks0#{L=>Blk#b_blk{is=Is}},
2013                    copy_retval_2(Ls, Yregs, none, Blocks, Count)
2014            end;
2015        {Is,Count,Copy} ->
2016            Blocks = Blocks0#{L=>Blk#b_blk{is=Is}},
2017            copy_retval_2(Ls, Yregs, Copy, Blocks, Count)
2018    end;
2019copy_retval_2([], _Yregs, none, Blocks, Count) ->
2020    {Blocks,Count}.
2021
2022copy_retval_is([#b_set{op=put_tuple_elements,args=Args0}=I0], false, _Yregs,
2023           Copy, Count, Acc) ->
2024    I = I0#b_set{args=copy_sub_args(Args0, Copy)},
2025    {reverse(Acc, [I|acc_copy([], Copy)]),Count};
2026copy_retval_is([#b_set{op=Op}=I0], false, Yregs, Copy, Count0, Acc0)
2027  when Op =:= call; Op =:= old_make_fun ->
2028    {I,Count,Acc} = place_retval_copy(I0, Yregs, Copy, Count0, Acc0),
2029    {reverse(Acc, [I]),Count};
2030copy_retval_is([#b_set{}]=Is, false, _Yregs, Copy, Count, Acc) ->
2031    {reverse(Acc, acc_copy(Is, Copy)),Count};
2032copy_retval_is([#b_set{},#b_set{op=succeeded}]=Is, false, _Yregs, Copy, Count, Acc) ->
2033    {reverse(Acc, acc_copy(Is, Copy)),Count};
2034copy_retval_is([#b_set{op=Op,dst=#b_var{name=RetName}=Dst}=I0|Is], RC, Yregs,
2035           Copy0, Count0, Acc0) when Op =:= call; Op =:= old_make_fun ->
2036    {I1,Count1,Acc} = place_retval_copy(I0, Yregs, Copy0, Count0, Acc0),
2037    case sets:is_element(Dst, Yregs) of
2038        true ->
2039            {NewVar,Count} = new_var(RetName, Count1),
2040            Copy = #b_set{op=copy,dst=Dst,args=[NewVar]},
2041            I = I1#b_set{dst=NewVar},
2042            copy_retval_is(Is, RC, Yregs, Copy, Count, [I|Acc]);
2043        false ->
2044            copy_retval_is(Is, RC, Yregs, none, Count1, [I1|Acc])
2045    end;
2046copy_retval_is([#b_set{args=Args0}=I0|Is], RC, Yregs, Copy, Count, Acc) ->
2047    I = I0#b_set{args=copy_sub_args(Args0, Copy)},
2048    case beam_ssa:clobbers_xregs(I) of
2049        true ->
2050            copy_retval_is(Is, RC, Yregs, none, Count, [I|acc_copy(Acc, Copy)]);
2051        false ->
2052            copy_retval_is(Is, RC, Yregs, Copy, Count, [I|Acc])
2053        end;
2054copy_retval_is([], RC, _, Copy, Count, Acc) ->
2055    case {Copy,RC} of
2056        {none,_} ->
2057            {reverse(Acc),Count};
2058        {#b_set{},true} ->
2059            {reverse(Acc),Count,Copy};
2060        {#b_set{},false} ->
2061            {reverse(Acc, [Copy]),Count}
2062    end.
2063
2064%%
2065%% Consider this code:
2066%%
2067%%   P = ...
2068%%   Q = ...
2069%%   ...
2070%%   A = call foo/0
2071%%   A1 = copy A
2072%%   B = call bar/2, P, Q
2073%%
2074%% If the P or Q variables are no longer used after this code, one of
2075%% their Y registers can't be reused for A. To allow one of the Y registers to
2076%% be reused we will need to insert 'copy' instructions for arguments
2077%% that are in Y registers:
2078%%
2079%%   P = ...
2080%%   Q = ...
2081%%   ...
2082%%   A1 = call foo/0
2083%%   Q1 = copy Q
2084%%   P1 = copy P
2085%%   A = copy A1
2086%%   B = call bar/2, P1, Q1
2087%%
2088%% Note that copies of the arguments are done in reverse order to help the
2089%% reserve_xregs/3 function place the copies into the X registers they will
2090%% need to be in.
2091%%
2092%% For this example, P1 needs to be in x0 and Q1 needs to be in x1. If we
2093%% would copy the arguments in order the registers would be assigned like
2094%% this:
2095%%
2096%%   x0/A1 = call foo/0
2097%%   x1/P1 = copy P
2098%%   x2/Q1 = copy Q
2099%%   A = copy A1
2100%%   B = call bar/2, P1, Q1
2101%%
2102%% That is, both P1 and Q1 would be misplaced and would have to be
2103%% moved to their correct registers before the call. However, with the
2104%% copies in reverse order and with a little help from
2105%% reserve_xregs/3, at least the Q1 variable can be can be placed in
2106%% the correct register:
2107%%
2108%%   x0/A1 = call foo/0
2109%%   x1/Q1 = copy Q
2110%%   x2/P1 = copy P
2111%%   A = copy A1
2112%%   B = call bar/2, P1, Q1
2113%%
2114%% In general, all but the first argument can be placed in their correct registers.
2115%%
2116
2117place_retval_copy(I, _Yregs, none, Count, Acc) ->
2118    %% There is no copy of a previous return value, so there is nothing
2119    %% to gain by copying the function arguments.
2120    {I,Count,Acc};
2121place_retval_copy(#b_set{args=[F|Args0]}=I0, Yregs0, RetCopy, Count0, Acc0) ->
2122    %% Copy function arguments, but make sure that we don't make an extra
2123    %% copy of the previous return value.
2124    #b_set{dst=Avoid} = RetCopy,
2125    Yregs = sets:del_element(Avoid, Yregs0),
2126    {Args,Acc1,Count} = copy_func_args(Args0, Yregs, Acc0, Count0),
2127    I = I0#b_set{args=[F|Args]},
2128
2129    %% Place the copy instruction for the previous return value after the
2130    %% copy instruction for the arguments.
2131    Acc = [RetCopy|Acc1],
2132    {I,Count,Acc}.
2133
2134copy_func_args(Args, Yregs, Acc, Count) ->
2135    copy_func_args_1(reverse(Args), Yregs, Acc, [], Count).
2136
2137copy_func_args_1([#b_var{name=AName}=A|As], Yregs, InstrAcc, ArgAcc, Count0) ->
2138    case sets:is_element(A, Yregs) of
2139        true ->
2140            {NewVar,Count} = new_var(AName, Count0),
2141            Copy = #b_set{op=copy,dst=NewVar,args=[A]},
2142            copy_func_args_1(As, Yregs, [Copy|InstrAcc], [NewVar|ArgAcc], Count);
2143        false ->
2144            copy_func_args_1(As, Yregs, InstrAcc, [A|ArgAcc], Count0)
2145    end;
2146copy_func_args_1([A|As], Yregs, InstrAcc, ArgAcc, Count) ->
2147    copy_func_args_1(As, Yregs, InstrAcc, [A|ArgAcc], Count);
2148copy_func_args_1([], _Yregs, InstrAcc, ArgAcc, Count) ->
2149    {ArgAcc,InstrAcc,Count}.
2150
2151acc_copy(Acc, none) -> Acc;
2152acc_copy(Acc, #b_set{}=Copy) -> [Copy|Acc].
2153
2154copy_sub_args(Args, none) ->
2155    Args;
2156copy_sub_args(Args, #b_set{dst=Dst,args=[Src]}) ->
2157    [sub_arg(A, Dst, Src) || A <- Args].
2158
2159sub_arg(Old, Old, New) -> New;
2160sub_arg(Old, _, _) -> Old.
2161
2162%%%
2163%%% Consider:
2164%%%
2165%%%   x1/Hd = get_hd x0/Cons
2166%%%   y0/Tl = get_tl x0/Cons
2167%%%
2168%%% Register x0 can't be reused for Hd. If Hd needs to be in x0,
2169%%% a 'move' instruction must be inserted.
2170%%%
2171%%% If we swap get_hd and get_tl when Tl is in a Y register,
2172%%% x0 can be used for Hd if Cons is not used again:
2173%%%
2174%%%   y0/Tl = get_tl x0/Cons
2175%%%   x0/Hd = get_hd x0/Cons
2176%%%
2177
2178opt_get_list(#st{ssa=Blocks,res=Res}=St) ->
2179    ResMap = maps:from_list(Res),
2180    Ls = beam_ssa:rpo(Blocks),
2181    St#st{ssa=opt_get_list_1(Ls, ResMap, Blocks)}.
2182
2183opt_get_list_1([L|Ls], Res, Blocks0) ->
2184    #b_blk{is=Is0} = Blk = map_get(L, Blocks0),
2185    case opt_get_list_is(Is0, Res, [], false) of
2186        no ->
2187            opt_get_list_1(Ls, Res, Blocks0);
2188        {yes,Is} ->
2189            Blocks = Blocks0#{L:=Blk#b_blk{is=Is}},
2190            opt_get_list_1(Ls, Res, Blocks)
2191    end;
2192opt_get_list_1([], _, Blocks) -> Blocks.
2193
2194opt_get_list_is([#b_set{op=get_hd,dst=Hd,
2195                        args=[Cons]}=GetHd,
2196                 #b_set{op=get_tl,dst=Tl,
2197                        args=[Cons]}=GetTl|Is],
2198                Res, Acc, Changed) ->
2199    %% Note that when this pass is run, only Y registers have
2200    %% reservations. The absence of an entry for a variable therefore
2201    %% means that the variable will be in an X register.
2202    case Res of
2203        #{Hd:={y,_}} ->
2204            %% Hd will be in a Y register. Don't swap.
2205            opt_get_list_is([GetTl|Is], Res, [GetHd|Acc], Changed);
2206        #{Tl:={y,_}} ->
2207            %% Tl will be in a Y register. Swap.
2208            opt_get_list_is([GetHd|Is], Res, [GetTl|Acc], true);
2209        #{} ->
2210            %% Both are in X registers. Nothing to do.
2211            opt_get_list_is([GetTl|Is], Res, [GetHd|Acc], Changed)
2212    end;
2213opt_get_list_is([I|Is], Res, Acc, Changed) ->
2214    opt_get_list_is(Is, Res, [I|Acc], Changed);
2215opt_get_list_is([], _Res, Acc, Changed) ->
2216    case Changed of
2217        true ->
2218            {yes,reverse(Acc)};
2219        false ->
2220            no
2221    end.
2222
2223%%%
2224%%% Number instructions in the order they are executed.
2225%%%
2226
2227%% number_instructions(St0) -> St.
2228%%  Number instructions in the order they are executed. Use a step
2229%%  size of 2. Don't number phi instructions. All phi variables in
2230%%  a block will be live one unit before the first non-phi instruction
2231%%  in the block.
2232
2233number_instructions(#st{ssa=Blocks0}=St) ->
2234    Ls = beam_ssa:rpo(Blocks0),
2235    St#st{ssa=number_is_1(Ls, 1, Blocks0)}.
2236
2237number_is_1([L|Ls], N0, Blocks0) ->
2238    #b_blk{is=Is0,last=Last0} = Bl0 = map_get(L, Blocks0),
2239    {Is,N1} = number_is_2(Is0, N0, []),
2240    Last = beam_ssa:add_anno(n, N1, Last0),
2241    N = N1 + 2,
2242    Bl = Bl0#b_blk{is=Is,last=Last},
2243    Blocks = Blocks0#{L:=Bl},
2244    number_is_1(Ls, N, Blocks);
2245number_is_1([], _, Blocks) -> Blocks.
2246
2247number_is_2([#b_set{op=phi}=I|Is], N, Acc) ->
2248    number_is_2(Is, N, [I|Acc]);
2249number_is_2([I0|Is], N, Acc) ->
2250    I = beam_ssa:add_anno(n, N, I0),
2251    number_is_2(Is, N+2, [I|Acc]);
2252number_is_2([], N, Acc) ->
2253    {reverse(Acc),N}.
2254
2255%%%
2256%%% Calculate live intervals for all variables in this function.
2257%%%
2258%%% This code uses the algorithm and terminology from [3].
2259%%%
2260%%% For each variable, we calculate its live interval. The live
2261%%% interval is a list of ranges, where a range is a tuple
2262%%% {Def,LastUse}.  Def is the instruction number for the instruction
2263%%% that first defines the variable and LastUse is instruction number
2264%%% of the last instruction that uses it.
2265%%%
2266%%% We traverse instruction in post order so that we will see the last
2267%%% use before we see the definition.
2268%%%
2269
2270live_intervals(#st{args=Args,ssa=Blocks}=St) ->
2271    PO = reverse(beam_ssa:rpo(Blocks)),
2272    Intervals0 = live_interval_blk(PO, Blocks, #{}, #{}),
2273    Intervals1 = add_ranges([{V,{0,1}} || #b_var{}=V <- Args], Intervals0),
2274    Intervals = maps:to_list(Intervals1),
2275    St#st{intervals=Intervals}.
2276
2277live_interval_blk([L|Ls], Blocks, LiveMap0, Intervals0) ->
2278    Blk = map_get(L, Blocks),
2279    Successors = beam_ssa:successors(Blk),
2280    Live1 = live_in_successors(Successors, L, Blocks, LiveMap0),
2281
2282    %% Add default ranges for all variables that are live in the
2283    %% successors.
2284    #b_blk{is=Is,last=Last} = Blk,
2285    FirstNumber = first_number(Is, Last),
2286    DefaultRange = {FirstNumber,1+beam_ssa:get_anno(n, Last)},
2287    Ranges0 = [{V,DefaultRange} || V <- Live1],
2288
2289    case {Is,Last} of
2290        {[],#b_br{bool=#b_literal{val=true}}} ->
2291            %% Optimize the interval calculation for blocks without variables.
2292            Intervals = add_ranges(Ranges0, Intervals0),
2293            LiveMap = LiveMap0#{L => Live1},
2294            live_interval_blk(Ls, Blocks, LiveMap, Intervals);
2295        {_,_} ->
2296            %% Update the ranges. Variables whose last use is in this
2297            %% block will be added, and variables that are defined
2298            %% in this block will have their starting instruction
2299            %% number updated.
2300            %%
2301            %% We use a gb_tree instead of a map because conversion to and
2302            %% from an orddict is faster.
2303            Ranges1 = gb_trees:from_orddict(Ranges0),
2304            Ranges2 = live_interval_last(Last, FirstNumber, Ranges1),
2305            Ranges3 = live_interval_blk_is(Is, FirstNumber, Ranges2),
2306            Ranges = gb_trees:to_list(Ranges3),
2307
2308            %% Update the interval for each variable.
2309            Intervals = add_ranges(Ranges, Intervals0),
2310
2311            %% Update what is live at the beginning of this block and
2312            %% store it.
2313            Live = [V || {V,{From,_To}} <- Ranges,
2314                         From =< FirstNumber],
2315            LiveMap = LiveMap0#{L => Live},
2316            live_interval_blk(Ls, Blocks, LiveMap, Intervals)
2317    end;
2318live_interval_blk([], _Blocks, _LiveMap, Intervals) ->
2319    Intervals.
2320
2321live_interval_last(I, FirstNumber, Ranges) ->
2322    N = beam_ssa:get_anno(n, I),
2323    Used = beam_ssa:used(I),
2324    update_used(Used, FirstNumber, N, Ranges).
2325
2326live_interval_blk_is([#b_set{op=phi,dst=Dst}|Is], FirstNumber, Acc0) ->
2327    Acc = live_interval_blk_is(Is, FirstNumber, Acc0),
2328    case gb_trees:is_defined(Dst, Acc) of
2329        true ->
2330            %% The value in the tree already has the correct starting value.
2331            update_def(Dst, FirstNumber, Acc);
2332        false ->
2333            %% Unused phi node -- can only happen if optimizations passes
2334            %% have been turned off.
2335            gb_trees:insert(Dst, {FirstNumber,FirstNumber}, Acc)
2336    end;
2337live_interval_blk_is([#b_set{args=Args,dst=Dst}=I|Is], FirstNumber, Acc0) ->
2338    Acc1 = live_interval_blk_is(Is, FirstNumber, Acc0),
2339    N = beam_ssa:get_anno(n, I),
2340    Used = used_args(Args),
2341    Acc = update_used(Used, FirstNumber, N, Acc1),
2342    update_def(Dst, N, Acc);
2343live_interval_blk_is([], _FirstNumber, Acc) ->
2344    Acc.
2345
2346update_def(V, N, Ranges) ->
2347    case gb_trees:lookup(V, Ranges) of
2348        {value,{_From,To}} ->
2349            gb_trees:update(V, {N,To}, Ranges);
2350        none ->
2351            %% The variable is defined but never used.
2352            gb_trees:insert(V, {N,N}, Ranges)
2353    end.
2354
2355update_used([V|Vs], First, N, Ranges) ->
2356    case gb_trees:is_defined(V, Ranges) of
2357        true ->
2358            %% Already up to date. (A later use has already been stored.)
2359            update_used(Vs, First, N, Ranges);
2360        false ->
2361            %% The last use of this variable. (But the first time we
2362            %% see it because we visit instructions in PO.)
2363            update_used(Vs, First, N, gb_trees:insert(V, {First,N}, Ranges))
2364    end;
2365update_used([], _First, _N, Ranges) -> Ranges.
2366
2367add_ranges([{V,{A,N}=Range}|T], Map) ->
2368    case Map of
2369        #{V := [{N,Z}|Ranges]} ->
2370            %% Coalesce two adjacent ranges.
2371            add_ranges(T, Map#{V := [{A,Z}|Ranges]});
2372        #{V := [{A,N}|_]} ->
2373            %% Ignore repeated range (probably from arguments).
2374            add_ranges(T, Map);
2375        #{V := Ranges} ->
2376            %% This range is not adjacent to any other range.
2377            add_ranges(T, Map#{V := [Range|Ranges]});
2378        #{} ->
2379            %% The last use of this variable is in the current block.
2380            add_ranges(T, Map#{V => [Range]})
2381    end;
2382add_ranges([], Map) -> Map.
2383
2384%% first_number([#b_set{}]) -> InstructionNumber.
2385%%  Return the number for the first instruction for the block.
2386%%  Note that this number is one less than the first
2387%%  non-phi instruction in the block.
2388
2389first_number([#b_set{op=phi}|Is], Last) ->
2390    first_number(Is, Last);
2391first_number([I|_], _) ->
2392    beam_ssa:get_anno(n, I) - 1;
2393first_number([], Last) ->
2394    beam_ssa:get_anno(n, Last) - 1.
2395
2396live_in_successors(Ls, Pred, Blocks, LiveMap) ->
2397    live_in_successors(Ls, Pred, Blocks, LiveMap, []).
2398
2399live_in_successors([L|Ls], Pred, Blocks, LiveMap, Live0) ->
2400    Live1 = ordsets:union(Live0, get_live(L, LiveMap)),
2401    #b_blk{is=Is} = map_get(L, Blocks),
2402    Live = live_in_phis(Is, Pred, Live1),
2403    live_in_successors(Ls, Pred, Blocks, LiveMap, Live);
2404live_in_successors([], _, _, _, Live) -> Live.
2405
2406get_live(L, LiveMap) ->
2407    case LiveMap of
2408        #{L:=Live} -> Live;
2409        #{} -> []
2410    end.
2411
2412live_in_phis([#b_set{op=phi,dst=Killed,args=Args}|Is],
2413                 Pred, Live0) ->
2414    Used = [V || {#b_var{}=V,L} <- Args, L =:= Pred],
2415    Live1 = ordsets:union(Used, Live0),
2416    Live = ordsets:del_element(Killed, Live1),
2417    live_in_phis(Is, Pred, Live);
2418live_in_phis(_, _, Live) -> Live.
2419
2420%%%
2421%%% Reserve Y registers.
2422%%%
2423
2424%% reserve_yregs(St0) -> St.
2425%%  In each block that allocates a stack frame, insert instructions
2426%%  that copy variables that must be in Y registers (given by
2427%%  the `yregs` annotation) to new variables.
2428%%
2429%%  Also allocate specific Y registers for try and catch tags.
2430%%  The outermost try/catch tag is placed in y0, any directly
2431%%  nested tag in y1, and so on. Note that this is the reversed
2432%%  order as required by BEAM; it will be corrected later by
2433%%  turn_yregs().
2434
2435reserve_yregs(#st{frames=Frames}=St0) ->
2436    foldl(fun reserve_yregs_1/2, St0, Frames).
2437
2438reserve_yregs_1(L, #st{ssa=Blocks0,cnt=Count0,res=Res0}=St) ->
2439    Blk = map_get(L, Blocks0),
2440    Yregs = ordsets:from_list(sets:to_list(beam_ssa:get_anno(yregs, Blk))),
2441    RPO = beam_ssa:rpo([L], Blocks0),
2442    {Def,Unused} = beam_ssa:def_unused(RPO, Yregs, Blocks0),
2443    UsedYregs = ordsets:subtract(Yregs, Unused),
2444    DefBefore = ordsets:subtract(UsedYregs, Def),
2445    {BeforeVars,Blocks,Count} = rename_vars(DefBefore, L, RPO, Blocks0, Count0),
2446    InsideVars = ordsets:subtract(UsedYregs, DefBefore),
2447    ResTryTags0 = reserve_try_tags(L, Blocks),
2448    ResTryTags = [{V,{Reg,Count}} || {V,Reg} <- ResTryTags0],
2449    Vars = BeforeVars ++ InsideVars,
2450    Res = [{V,{y,Count}} || V <- Vars] ++ ResTryTags ++ Res0,
2451    St#st{res=Res,ssa=Blocks,cnt=Count+1}.
2452
2453reserve_try_tags(L, Blocks) ->
2454    Seen = gb_sets:empty(),
2455    {Res0,_} = reserve_try_tags_1([L], Blocks, Seen, #{}),
2456    Res1 = [maps:to_list(M) || {_,M} <- maps:to_list(Res0)],
2457    Res = [{V,{y,Y}} || {V,Y} <- append(Res1)],
2458    ordsets:from_list(Res).
2459
2460reserve_try_tags_1([L|Ls], Blocks, Seen0, ActMap0) ->
2461    case gb_sets:is_element(L, Seen0) of
2462        true ->
2463            reserve_try_tags_1(Ls, Blocks, Seen0, ActMap0);
2464        false ->
2465            Seen1 = gb_sets:insert(L, Seen0),
2466            #b_blk{is=Is} = Blk = map_get(L, Blocks),
2467            Active0 = get_active(L, ActMap0),
2468            Active = reserve_try_tags_is(Is, Active0),
2469            Successors = beam_ssa:successors(Blk),
2470            ActMap1 = update_act_map(Successors, Active, ActMap0),
2471            {ActMap,Seen} = reserve_try_tags_1(Ls, Blocks, Seen1, ActMap1),
2472            reserve_try_tags_1(Successors, Blocks, Seen,ActMap)
2473    end;
2474reserve_try_tags_1([], _Blocks, Seen, ActMap) ->
2475    {ActMap,Seen}.
2476
2477get_active(L, ActMap) ->
2478    case ActMap of
2479        #{L:=Active} -> Active;
2480        #{} -> #{}
2481    end.
2482
2483reserve_try_tags_is([#b_set{op=new_try_tag,dst=V}|Is], Active) ->
2484    N = map_size(Active),
2485    reserve_try_tags_is(Is, Active#{V=>N});
2486reserve_try_tags_is([#b_set{op=kill_try_tag,args=[Tag]}|Is], Active) ->
2487    reserve_try_tags_is(Is, maps:remove(Tag, Active));
2488reserve_try_tags_is([_|Is], Active) ->
2489    reserve_try_tags_is(Is, Active);
2490reserve_try_tags_is([], Active) -> Active.
2491
2492update_act_map([L|Ls], Active0, ActMap0) ->
2493    case ActMap0 of
2494        #{L:=Active1} ->
2495            ActMap = ActMap0#{L=>maps:merge(Active0, Active1)},
2496            update_act_map(Ls, Active0, ActMap);
2497        #{} ->
2498            ActMap = ActMap0#{L=>Active0},
2499            update_act_map(Ls, Active0, ActMap)
2500    end;
2501update_act_map([], _, ActMap) -> ActMap.
2502
2503rename_vars([], _, _, Blocks, Count) ->
2504    {[],Blocks,Count};
2505rename_vars(Vs, L, RPO, Blocks0, Count0) ->
2506    {NewVars,Count} = new_vars([Base || #b_var{name=Base} <- Vs], Count0),
2507    Ren = zip(Vs, NewVars),
2508    Blocks1 = beam_ssa:rename_vars(Ren, RPO, Blocks0),
2509    #b_blk{is=Is0} = Blk0 = map_get(L, Blocks1),
2510    CopyIs = [#b_set{op=copy,dst=New,args=[Old]} || {Old,New} <- Ren],
2511    Is = insert_after_phis(Is0, CopyIs),
2512    Blk = Blk0#b_blk{is=Is},
2513    Blocks = Blocks1#{L:=Blk},
2514    {NewVars,Blocks,Count}.
2515
2516insert_after_phis([#b_set{op=phi}=I|Is], InsertIs) ->
2517    [I|insert_after_phis(Is, InsertIs)];
2518insert_after_phis(Is, InsertIs) ->
2519    InsertIs ++ Is.
2520
2521%% frame_size(St0) -> St.
2522%%  Calculate the frame size for each block that allocates a frame.
2523%%  Annotate the block with the frame size. Also annotate all
2524%%  return instructions with {deallocate,FrameSize} to simplify
2525%%  code generation.
2526
2527frame_size(#st{frames=Frames,regs=Regs,ssa=Blocks0}=St) ->
2528    Blocks = foldl(fun(L, Blks) ->
2529                           frame_size_1(L, Regs, Blks)
2530                   end, Blocks0, Frames),
2531    St#st{ssa=Blocks}.
2532
2533frame_size_1(L, Regs, Blocks0) ->
2534    RPO = beam_ssa:rpo([L], Blocks0),
2535    Def = beam_ssa:def(RPO, Blocks0),
2536    Yregs0 = [map_get(V, Regs) || V <- Def, is_yreg(map_get(V, Regs))],
2537    Yregs = ordsets:from_list(Yregs0),
2538    FrameSize = length(ordsets:from_list(Yregs)),
2539    if
2540        FrameSize =/= 0 ->
2541            [{y,0}|_] = Yregs,                  %Assertion.
2542            {y,Last} = last(Yregs),
2543            Last = FrameSize - 1,               %Assertion.
2544            ok;
2545        true ->
2546            ok
2547    end,
2548    Blk0 = map_get(L, Blocks0),
2549    Blk = beam_ssa:add_anno(frame_size, FrameSize, Blk0),
2550
2551    %% Insert an annotation for frame deallocation on
2552    %% each #b_ret{}.
2553    Blocks = Blocks0#{L:=Blk},
2554    Reachable = beam_ssa:rpo([L], Blocks),
2555    frame_deallocate(Reachable, FrameSize, Blocks).
2556
2557frame_deallocate([L|Ls], Size, Blocks0) ->
2558    Blk0 = map_get(L, Blocks0),
2559    Blk = case Blk0 of
2560              #b_blk{last=#b_ret{}=Ret0} ->
2561                  Ret = beam_ssa:add_anno(deallocate, Size, Ret0),
2562                  Blk0#b_blk{last=Ret};
2563              #b_blk{} ->
2564                  Blk0
2565          end,
2566    Blocks = Blocks0#{L:=Blk},
2567    frame_deallocate(Ls, Size, Blocks);
2568frame_deallocate([], _, Blocks) -> Blocks.
2569
2570
2571%% turn_yregs(St0) -> St.
2572%%  Renumber y registers so that {y,0} becomes {y,FrameSize-1},
2573%%  {y,FrameSize-1} becomes {y,0} and so on. This is to make nested
2574%%  catches work. The register allocator (linear_scan()) has given
2575%%  a lower number to the outermost catch.
2576
2577turn_yregs(#st{frames=Frames,regs=Regs0,ssa=Blocks}=St) ->
2578    Regs1 = foldl(fun(L, A) ->
2579                          Blk = map_get(L, Blocks),
2580                          FrameSize = beam_ssa:get_anno(frame_size, Blk),
2581                          RPO = beam_ssa:rpo([L], Blocks),
2582                          Def = beam_ssa:def(RPO, Blocks),
2583                          [turn_yregs_1(Def, FrameSize, Regs0)|A]
2584                  end, [], Frames),
2585    Regs = maps:merge(Regs0, maps:from_list(append(Regs1))),
2586    St#st{regs=Regs}.
2587
2588turn_yregs_1(Def, FrameSize, Regs) ->
2589    Yregs0 = [{map_get(V, Regs),V} || V <- Def, is_yreg(map_get(V, Regs))],
2590    Yregs1 = rel2fam(Yregs0),
2591    FrameSize = length(Yregs1),
2592    Yregs2 = [{{y,FrameSize-Y-1},Vs} || {{y,Y},Vs} <- Yregs1],
2593    R0 = sofs:family(Yregs2),
2594    R1 = sofs:family_to_relation(R0),
2595    R = sofs:converse(R1),
2596    sofs:to_external(R).
2597
2598%%%
2599%%% Reserving registers before register allocation.
2600%%%
2601
2602%% reserve_regs(St0) -> St.
2603%%  Reserve registers prior to register allocation. Y registers
2604%%  have already been reserved. This function will reserve z,
2605%%  fr, and specific x registers.
2606
2607reserve_regs(#st{args=Args,ssa=Blocks,intervals=Intervals,res=Res0}=St) ->
2608    %% Reserve x0, x1, and so on for the function arguments.
2609    Res1 = reserve_arg_regs(Args, 0, Res0),
2610    RPO = beam_ssa:rpo(Blocks),
2611
2612    %% Reserve Z registers (dummy registers) for instructions with no
2613    %% return values (e.g. remove_message) or pseudo-return values
2614    %% (e.g. landingpad).
2615    Res2 = reserve_zregs(RPO, Blocks, Intervals, Res1),
2616
2617    %% Reserve float registers.
2618    Res3 = reserve_fregs(RPO, Blocks, Res2),
2619
2620    %% Reserve all remaining unreserved variables as X registers.
2621    Res = maps:from_list(Res3),
2622    St#st{res=reserve_xregs(RPO, Blocks, Res)}.
2623
2624reserve_arg_regs([#b_var{}=Arg|Is], N, Acc) ->
2625    reserve_arg_regs(Is, N+1, [{Arg,{x,N}}|Acc]);
2626reserve_arg_regs([], _, Acc) -> Acc.
2627
2628reserve_zregs(RPO, Blocks, Intervals, Res) ->
2629    ShortLived0 = [V || {V,[{Start,End}]} <- Intervals, Start+2 =:= End],
2630    ShortLived = sets:from_list(ShortLived0, [{version, 2}]),
2631    F = fun(_, #b_blk{is=Is,last=Last}, A) ->
2632                reserve_zreg(Is, Last, ShortLived, A)
2633        end,
2634    beam_ssa:fold_blocks(F, RPO, Res, Blocks).
2635
2636reserve_zreg([#b_set{op={bif,tuple_size},dst=Dst},
2637              #b_set{op={bif,'=:='},args=[Dst,Val],dst=Bool}],
2638             Last, ShortLived, A) ->
2639    case {Val,Last} of
2640        {#b_literal{val=Arity},#b_br{bool=Bool}} when Arity bsr 32 =:= 0 ->
2641            %% These two instructions can be combined to a test_arity
2642            %% instruction provided that the arity variable is short-lived.
2643            reserve_test_zreg(Dst, ShortLived, A);
2644        {_,_} ->
2645            %% Either the arity is too big, or the boolean value is not
2646            %% used in a conditional branch.
2647            A
2648    end;
2649reserve_zreg([#b_set{op={bif,tuple_size},dst=Dst}],
2650             #b_switch{arg=Dst}, ShortLived, A) ->
2651    reserve_test_zreg(Dst, ShortLived, A);
2652reserve_zreg([#b_set{op=Op,dst=Dst}], #b_br{bool=Dst}, ShortLived, A) ->
2653    case use_zreg(Op) of
2654        yes -> [{Dst,z} | A];
2655        no -> A;
2656        maybe -> reserve_test_zreg(Dst, ShortLived, A)
2657    end;
2658reserve_zreg([#b_set{op=Op,dst=Dst} | Is], Last, ShortLived, A) ->
2659    case use_zreg(Op) of
2660        yes -> reserve_zreg(Is, Last, ShortLived, [{Dst,z} | A]);
2661        _Other -> reserve_zreg(Is, Last, ShortLived, A)
2662    end;
2663reserve_zreg([], _, _, A) -> A.
2664
2665use_zreg(bs_match_string) -> yes;
2666use_zreg(bs_save) -> yes;
2667use_zreg(bs_restore) -> yes;
2668use_zreg(bs_set_position) -> yes;
2669use_zreg(kill_try_tag) -> yes;
2670use_zreg(landingpad) -> yes;
2671use_zreg(put_tuple_elements) -> yes;
2672use_zreg(recv_marker_bind) -> yes;
2673use_zreg(recv_marker_clear) -> yes;
2674use_zreg(remove_message) -> yes;
2675use_zreg(set_tuple_element) -> yes;
2676use_zreg(succeeded) -> yes;
2677use_zreg(wait_timeout) -> yes;
2678%% There's no way we can combine these into a test instruction, so we must
2679%% avoid using a z register if their result is used directly in a branch.
2680use_zreg(call) -> no;
2681use_zreg({bif,is_map_key}) -> no;
2682use_zreg({bif,is_record}) -> no;
2683use_zreg({bif,map_get}) -> no;
2684use_zreg({bif,'xor'}) -> no;
2685use_zreg(get_hd) -> no;
2686use_zreg(get_tl) -> no;
2687use_zreg(get_tuple_element) -> no;
2688%% Assume the instruction can use a z register, provided it's the last in its
2689%% block and that the result is only used in the terminator.
2690use_zreg(_) -> maybe.
2691
2692%% If V is defined just before a branch, we may be able to combine it into a
2693%% test instruction.
2694reserve_test_zreg(#b_var{}=V, ShortLived, A) ->
2695    case sets:is_element(V, ShortLived) of
2696        true -> [{V,z}|A];
2697        false -> A
2698    end.
2699
2700reserve_fregs(RPO, Blocks, Res) ->
2701    F = fun(_, #b_blk{is=Is}, A) ->
2702                reserve_freg(Is, A)
2703        end,
2704    beam_ssa:fold_blocks(F, RPO, Res, Blocks).
2705
2706reserve_freg([#b_set{op={float,Op},dst=V}|Is], Res) ->
2707    case Op of
2708        get ->
2709            reserve_freg(Is, Res);
2710        _ ->
2711            reserve_freg(Is, [{V,fr}|Res])
2712    end;
2713reserve_freg([_|Is], Res) ->
2714    reserve_freg(Is, Res);
2715reserve_freg([], Res) -> Res.
2716
2717%% reserve_xregs(St0) -> St.
2718%%  Reserve all remaining variables as X registers.
2719%%
2720%%  If a variable will need to be in a specific X register for a
2721%%  'call' or 'old_make_fun' (and there is nothing that will kill it
2722%%  between the definition and use), reserve the register using a
2723%%  {prefer,{x,X} annotation. That annotation means that the linear
2724%%  scan algorithm will place the variable in the preferred register,
2725%%  unless that register is already occupied.
2726%%
2727%%  All remaining variables are reserved as X registers. Linear scan
2728%%  will allocate the lowest free X register for the variable.
2729
2730reserve_xregs(RPO, Blocks, Res) ->
2731    Ls = reverse(RPO),
2732    reserve_xregs(Ls, Blocks, #{}, Res).
2733
2734reserve_xregs([L|Ls], Blocks, XsMap0, Res0) ->
2735    #b_blk{anno=Anno,is=Is0,last=Last} = map_get(L, Blocks),
2736
2737    %% Calculate mapping from variable name to the preferred
2738    %% register.
2739    Xs0 = reserve_terminator(L, Is0, Last, Blocks, XsMap0, Res0),
2740
2741    %% We need to figure out where the code generator will
2742    %% place instructions that will do a garbage collection.
2743    %% Insert 'gc' markers as pseudo-instructions in the
2744    %% instruction sequence.
2745    Is1 = reverse(Is0),
2746    Is2 = res_place_gc_instrs(Is1, []),
2747    Is = res_place_allocate(Anno, Is2),
2748
2749    %% Add register hints for variables that are defined
2750    %% in the (reversed) instruction sequence.
2751    {Res,Xs} = reserve_xregs_is(Is, Res0, Xs0, []),
2752
2753    XsMap = XsMap0#{L=>Xs},
2754    reserve_xregs(Ls, Blocks, XsMap, Res);
2755reserve_xregs([], _, _, Res) -> Res.
2756
2757%% Insert explicit 'gc' markers points where there will
2758%% be a garbage collection. (Note that the instruction
2759%% sequence passed to this function is reversed.)
2760
2761res_place_gc_instrs([#b_set{op=phi}=I|Is], Acc) ->
2762    res_place_gc_instrs(Is, [I|Acc]);
2763res_place_gc_instrs([#b_set{op=Op}=I|Is], Acc)
2764  when Op =:= call; Op =:= old_make_fun ->
2765    case Acc of
2766        [] ->
2767            res_place_gc_instrs(Is, [I|Acc]);
2768        [GC|_] when GC =:= gc; GC =:= test_heap ->
2769            res_place_gc_instrs(Is, [I,gc|Acc]);
2770        [_|_] ->
2771            res_place_gc_instrs(Is, [I,gc|Acc])
2772    end;
2773res_place_gc_instrs([#b_set{op=Op,args=Args}=I|Is], Acc0) ->
2774    case beam_ssa_codegen:classify_heap_need(Op, Args) of
2775        neutral ->
2776            case Acc0 of
2777                [test_heap|Acc] ->
2778                    res_place_gc_instrs(Is, [test_heap,I|Acc]);
2779                Acc ->
2780                    res_place_gc_instrs(Is, [I|Acc])
2781            end;
2782        {put,_} ->
2783            res_place_gc_instrs(Is, res_place_test_heap(I, Acc0));
2784        {put_fun,_} ->
2785            res_place_gc_instrs(Is, res_place_test_heap(I, Acc0));
2786        put_float ->
2787            res_place_gc_instrs(Is, res_place_test_heap(I, Acc0));
2788        gc ->
2789            res_place_gc_instrs(Is, [gc,I|Acc0])
2790    end;
2791res_place_gc_instrs([], Acc) ->
2792    %% Reverse and replace 'test_heap' markers with 'gc'.
2793    %% (The distinction is no longer useful.)
2794    res_place_gc_instrs_rev(Acc, []).
2795
2796res_place_test_heap(I, Acc) ->
2797    case Acc of
2798        [test_heap|Acc] ->
2799            [test_heap,I|Acc];
2800        _ ->
2801            [test_heap,I|Acc]
2802    end.
2803
2804res_place_gc_instrs_rev([test_heap|Is], [gc|_]=Acc) ->
2805    res_place_gc_instrs_rev(Is, Acc);
2806res_place_gc_instrs_rev([test_heap|Is], Acc) ->
2807    res_place_gc_instrs_rev(Is, [gc|Acc]);
2808res_place_gc_instrs_rev([gc|Is], [gc|_]=Acc) ->
2809    res_place_gc_instrs_rev(Is, Acc);
2810res_place_gc_instrs_rev([I|Is], Acc) ->
2811    res_place_gc_instrs_rev(Is, [I|Acc]);
2812res_place_gc_instrs_rev([], Acc) -> Acc.
2813
2814res_place_allocate(#{yregs:=_}, Is) ->
2815    %% There will be an 'allocate' instruction inserted here.
2816    Is ++ [gc];
2817res_place_allocate(#{}, Is) -> Is.
2818
2819reserve_xregs_is([gc|Is], Res, Xs0, Used) ->
2820    %% At this point, the code generator will place an instruction
2821    %% that does a garbage collection. We must prune the remembered
2822    %% registers.
2823    Xs = res_xregs_prune(Xs0, Used, Res),
2824    reserve_xregs_is(Is, Res, Xs, Used);
2825reserve_xregs_is([#b_set{op=Op,dst=Dst,args=Args}=I|Is], Res0, Xs0, Used0) ->
2826    Res = reserve_xreg(Dst, Xs0, Res0),
2827    Used1 = ordsets:union(Used0, beam_ssa:used(I)),
2828    Used = ordsets:del_element(Dst, Used1),
2829    case Op of
2830        call ->
2831            Xs = reserve_call_args(tl(Args)),
2832            reserve_xregs_is(Is, Res, Xs, Used);
2833        old_make_fun ->
2834            Xs = reserve_call_args(tl(Args)),
2835            reserve_xregs_is(Is, Res, Xs, Used);
2836        _ ->
2837            reserve_xregs_is(Is, Res, Xs0, Used)
2838    end;
2839reserve_xregs_is([], Res, Xs, _Used) ->
2840    {Res,Xs}.
2841
2842%% Pick up register hints from the successors of this blocks.
2843reserve_terminator(L, Is, #b_br{bool=#b_var{},succ=Succ,fail=Fail},
2844                   Blocks, XsMap, Res) when Succ =/= Fail,
2845                                            Fail =/= ?EXCEPTION_BLOCK ->
2846    #{Succ:=SuccBlk,Fail:=FailBlk} = Blocks,
2847    case {SuccBlk,FailBlk} of
2848        {#b_blk{is=[],last=#b_br{succ=PhiL,fail=PhiL}},
2849         #b_blk{is=[],last=#b_br{succ=PhiL,fail=PhiL}}} ->
2850            %% Both branches ultimately transfer to the same
2851            %% block (via two blocks with no instructions).
2852            %% Pick up register hints from the phi nodes
2853            %% in the common block.
2854            #{PhiL:=#b_blk{is=PhiIs}} = Blocks,
2855            Xs = res_xregs_from_phi(PhiIs, Succ, Res, #{}),
2856            res_xregs_from_phi(PhiIs, Fail, Res, Xs);
2857        {_,_} when Is =/= [] ->
2858            case last(Is) of
2859                #b_set{op=succeeded,args=[Arg]} ->
2860                    %% We know that Arg will not be used at the failure
2861                    %% label, so we can pick up register hints from the
2862                    %% success label.
2863                    Br = #b_br{bool=#b_literal{val=true},succ=Succ,fail=Succ},
2864                    case reserve_terminator(L, [], Br, Blocks, XsMap, Res) of
2865                        #{Arg:=Reg} -> #{Arg=>Reg};
2866                        #{} -> #{}
2867                    end;
2868                _ ->
2869                    %% Register hints from the success block may not
2870                    %% be safe at the failure block, and vice versa.
2871                    #{}
2872            end;
2873        {_,_} ->
2874            %% Register hints from the success block may not
2875            %% be safe at the failure block, and vice versa.
2876            #{}
2877    end;
2878reserve_terminator(L, Is, #b_br{bool=Bool,succ=Succ,fail=Fail},
2879                   Blocks, XsMap, Res) ->
2880    case {Bool, Fail} of
2881        {_, ?EXCEPTION_BLOCK} ->
2882            %% We know that no variables are used from ?EXCEPTION_BLOCK, so any
2883            %% register hints from the successor block are safe to use.
2884            reserve_terminator_1(L, Succ, Is, Blocks, XsMap, Res);
2885        {#b_literal{val=true}, _} ->
2886            %% We only have one successor, so its hints are safe to use.
2887            reserve_terminator_1(L, Succ, Is, Blocks, XsMap, Res);
2888        {_, _} ->
2889            %% Register hints from the success block may not
2890            %% be safe at the failure block, and vice versa.
2891            #{}
2892    end;
2893reserve_terminator(_, _, _, _, _, _) ->
2894    #{}.
2895
2896reserve_terminator_1(L, Succ, _Is, Blocks, XsMap, Res) ->
2897    case {Blocks, XsMap} of
2898        {#{ Succ := #b_blk{is=[#b_set{op=phi}|_]=PhiIs}}, #{}} ->
2899            res_xregs_from_phi(PhiIs, L, Res, #{});
2900        {#{}, #{ Succ := Xs }}->
2901            Xs;
2902        {#{}, #{}} ->
2903            #{}
2904    end.
2905
2906%% Pick up a reservation from a phi node.
2907res_xregs_from_phi([#b_set{op=phi,dst=Dst,args=Args}|Is],
2908                   Pred, Res, Acc) ->
2909    case [V || {#b_var{}=V,L} <- Args, L =:= Pred] of
2910        [] ->
2911            %% The value of the phi node for this predecessor
2912            %% is a literal. Nothing to do here.
2913            res_xregs_from_phi(Is, Pred, Res, Acc);
2914        [V] ->
2915            case Res of
2916                #{Dst:={prefer,Reg}} ->
2917                    %% Try placing V in the same register as for
2918                    %% the phi node.
2919                    res_xregs_from_phi(Is, Pred, Res, Acc#{V=>Reg});
2920                #{Dst:=_} ->
2921                    res_xregs_from_phi(Is, Pred, Res, Acc)
2922            end
2923    end;
2924res_xregs_from_phi(_, _, _, Acc) -> Acc.
2925
2926reserve_call_args(Args) ->
2927    reserve_call_args(Args, 0, #{}).
2928
2929reserve_call_args([#b_var{}=Var|As], X, Xs) ->
2930    reserve_call_args(As, X+1, Xs#{Var=>{x,X}});
2931reserve_call_args([#b_literal{}|As], X, Xs) ->
2932    reserve_call_args(As, X+1, Xs);
2933reserve_call_args([], _, Xs) -> Xs.
2934
2935reserve_xreg(V, Xs, Res) ->
2936    case Res of
2937        #{V:=_} ->
2938            %% Already reserved (but not as an X register).
2939            Res;
2940        #{} ->
2941            case Xs of
2942                #{V:=X} ->
2943                    %% Add a hint that this specific X register is
2944                    %% preferred, unless it is already in use.
2945                    Res#{V=>{prefer,X}};
2946                #{} ->
2947                    %% Reserve as an X register in general.
2948                    Res#{V=>x}
2949            end
2950    end.
2951
2952%% res_xregs_prune(PreferredRegs, Used, Res) -> PreferredRegs.
2953%%  Prune the list of preferred registers, to make sure that
2954%%  there are no "holes" (uninitialized X registers) when
2955%%  invoking the garbage collector.
2956
2957res_xregs_prune(Xs, Used, Res) when map_size(Xs) =/= 0 ->
2958    %% The number of safe registers is the number of the X registers
2959    %% used after this point. The actual number of safe registers may
2960    %% be higher than this number, but this is a conservative safe
2961    %% estimate.
2962    NumSafe = foldl(fun(V, N) ->
2963                            case Res of
2964                                #{V:={x,_}} -> N + 1;
2965                                #{V:=_} -> N;
2966                                #{} -> N + 1
2967                            end
2968                    end, 0, Used),
2969
2970    %% Remove unsafe registers from the list of potential
2971    %% preferred registers.
2972    maps:filter(fun(_, {x,X}) -> X < NumSafe end, Xs);
2973res_xregs_prune(Xs, _Used, _Res) -> Xs.
2974
2975%%%
2976%%% Register allocation using linear scan.
2977%%%
2978
2979-record(i,
2980        {sort=1 :: instr_number(),
2981         reg=none :: i_reg(),
2982         pool=x :: pool_id(),
2983         var=#b_var{} :: b_var(),
2984         rs=[] :: [range()]
2985        }).
2986
2987-record(l,
2988        {cur=#i{} :: interval(),
2989         unhandled_res=[] :: [interval()],
2990         unhandled_any=[] :: [interval()],
2991         active=[] :: [interval()],
2992         inactive=[] :: [interval()],
2993         free=#{} :: #{var_name()=>pool(),
2994                       {'next',pool_id()}:=reg_num()},
2995         regs=[] :: [{b_var(),ssa_register()}]
2996        }).
2997
2998-type interval() :: #i{}.
2999-type i_reg() :: ssa_register() | {'prefer',xreg()} | 'none'.
3000-type pool_id() :: 'fr' | 'x' | 'z' | instr_number().
3001-type pool() :: ordsets:ordset(ssa_register()).
3002
3003linear_scan(#st{intervals=Intervals0,res=Res}=St0) ->
3004    St = St0#st{intervals=[],res=[]},
3005    Free = init_free(maps:to_list(Res)),
3006    Intervals1 = [init_interval(Int, Res) || Int <- Intervals0],
3007    Intervals = sort(Intervals1),
3008    IsReserved = fun(#i{reg=Reg}) ->
3009                         case Reg of
3010                             none -> false;
3011                             {prefer,{_,_}} -> false;
3012                             {_,_} -> true
3013                         end
3014                 end,
3015    {UnhandledRes,Unhandled} = partition(IsReserved, Intervals),
3016    L = #l{unhandled_res=UnhandledRes,
3017           unhandled_any=Unhandled,free=Free},
3018    #l{regs=Regs} = do_linear(L),
3019    St#st{regs=maps:from_list(Regs)}.
3020
3021init_interval({V,[{Start,_}|_]=Rs}, Res) ->
3022    Info = map_get(V, Res),
3023    Pool = case Info of
3024               {prefer,{x,_}} -> x;
3025               x -> x;
3026               {x,_} -> x;
3027               {y,Uniq} -> Uniq;
3028               {{y,_},Uniq} -> Uniq;
3029               z -> z;
3030               fr -> fr
3031           end,
3032    Reg = case Info of
3033              {prefer,{x,_}} -> Info;
3034              {x,_} -> Info;
3035              {{y,_}=Y,_} -> Y;
3036              _ -> none
3037          end,
3038    #i{sort=Start,var=V,reg=Reg,pool=Pool,rs=Rs}.
3039
3040init_free(Res) ->
3041    Free0 = rel2fam([{x,{x,0}}|init_free_1(Res)]),
3042    #{x:=Xs0} = Free1 = maps:from_list(Free0),
3043    Xs = init_xregs(Xs0),
3044    Free = Free1#{x:=Xs},
3045    Next = maps:fold(fun(K, V, A) -> [{{next,K},length(V)}|A] end, [], Free),
3046    maps:merge(Free, maps:from_list(Next)).
3047
3048init_free_1([{_,{prefer,{x,_}=Reg}}|Res]) ->
3049    [{x,Reg}|init_free_1(Res)];
3050init_free_1([{_,{x,_}=Reg}|Res]) ->
3051    [{x,Reg}|init_free_1(Res)];
3052init_free_1([{_,{y,Uniq}}|Res]) ->
3053    [{Uniq,{y,0}}|init_free_1(Res)];
3054init_free_1([{_,{{y,_}=Reg,Uniq}}|Res]) ->
3055    [{Uniq,Reg}|init_free_1(Res)];
3056init_free_1([{_,z}|Res]) ->
3057    [{z,{z,0}}|init_free_1(Res)];
3058init_free_1([{_,fr}|Res]) ->
3059    [{fr,{fr,0}}|init_free_1(Res)];
3060init_free_1([{_,x}|Res]) ->
3061    init_free_1(Res);
3062init_free_1([]) -> [].
3063
3064%% Make sure that the pool of xregs is contiguous.
3065init_xregs([{x,N},{x,M}|Is]) when N+1 =:= M ->
3066    [{x,N}|init_xregs([{x,M}|Is])];
3067init_xregs([{x,N}|[{x,_}|_]=Is]) ->
3068    [{x,N}|init_xregs([{x,N+1}|Is])];
3069init_xregs([{x,_}]=Is) -> Is.
3070
3071do_linear(L0) ->
3072    case set_next_current(L0) of
3073        done ->
3074            L0;
3075        L1 ->
3076            L2 = expire_active(L1),
3077            L3 = check_inactive(L2),
3078            Available = collect_available(L3),
3079            L4 = select_register(Available, L3),
3080            L = make_cur_active(L4),
3081            do_linear(L)
3082    end.
3083
3084set_next_current(#l{unhandled_res=[Cur1|T1],
3085                    unhandled_any=[Cur2|T2]}=L) ->
3086    case {Cur1,Cur2} of
3087        {#i{sort=N1},#i{sort=N2}} when N1 < N2 ->
3088            L#l{cur=Cur1,unhandled_res=T1};
3089        {_,_} ->
3090            L#l{cur=Cur2,unhandled_any=T2}
3091    end;
3092set_next_current(#l{unhandled_res=[],
3093                    unhandled_any=[Cur|T]}=L) ->
3094    L#l{cur=Cur,unhandled_any=T};
3095set_next_current(#l{unhandled_res=[Cur|T],
3096                    unhandled_any=[]}=L) ->
3097    L#l{cur=Cur,unhandled_res=T};
3098set_next_current(#l{unhandled_res=[],unhandled_any=[]}) ->
3099    done.
3100
3101expire_active(#l{cur=#i{sort=CurBegin},active=Act0}=L0) ->
3102    {Act,L} = expire_active(Act0, CurBegin, L0, []),
3103    L#l{active=Act}.
3104
3105expire_active([#i{reg=Reg,rs=Rs0}=I|Is], CurBegin, L0, Acc) ->
3106    {_,_} = Reg,                                %Assertion.
3107    case overlap_status(Rs0, CurBegin) of
3108        ends_before_cur ->
3109            L = free_reg(I, L0),
3110            expire_active(Is, CurBegin, L, Acc);
3111        overlapping ->
3112            expire_active(Is, CurBegin, L0, [I|Acc]);
3113        not_overlapping ->
3114            Rs = strip_before_current(Rs0, CurBegin),
3115            L1 = free_reg(I, L0),
3116            L = L1#l{inactive=[I#i{rs=Rs}|L1#l.inactive]},
3117            expire_active(Is, CurBegin, L, Acc)
3118    end;
3119expire_active([], _CurBegin, L, Acc) ->
3120    {Acc,L}.
3121
3122check_inactive(#l{cur=#i{sort=CurBegin},inactive=InAct0}=L0) ->
3123    {InAct,L} = check_inactive(InAct0, CurBegin, L0, []),
3124    L#l{inactive=InAct}.
3125
3126check_inactive([#i{rs=Rs0}=I|Is], CurBegin, L0, Acc) ->
3127    case overlap_status(Rs0, CurBegin) of
3128        ends_before_cur ->
3129            check_inactive(Is, CurBegin, L0, Acc);
3130        not_overlapping ->
3131            check_inactive(Is, CurBegin, L0, [I|Acc]);
3132        overlapping ->
3133            Rs = strip_before_current(Rs0, CurBegin),
3134            L1 = L0#l{active=[I#i{rs=Rs}|L0#l.active]},
3135            L = reserve_reg(I, L1),
3136            check_inactive(Is, CurBegin, L, Acc)
3137    end;
3138check_inactive([], _CurBegin, L, Acc) ->
3139    {Acc,L}.
3140
3141strip_before_current([{_,E}|Rs], CurBegin) when E =< CurBegin ->
3142    strip_before_current(Rs, CurBegin);
3143strip_before_current(Rs, _CurBegin) -> Rs.
3144
3145collect_available(#l{cur=#i{reg={prefer,{_,_}=Prefer}}=I}=L) ->
3146    %% Use the preferred register if it is available.
3147    Avail = collect_available(L#l{cur=I#i{reg=none}}),
3148    case member(Prefer, Avail) of
3149        true -> [Prefer];
3150        false -> Avail
3151    end;
3152collect_available(#l{cur=#i{reg={_,_}=ReservedReg}}) ->
3153    %% Return the already reserved register.
3154    [ReservedReg];
3155collect_available(#l{unhandled_res=Unhandled,cur=Cur}=L) ->
3156    Free = get_pool(Cur, L),
3157
3158    %% Note that since the live intervals are constructed from
3159    %% SSA form, there cannot be any overlap of the current interval
3160    %% with any inactive interval. See [3], page 175. Therefore we
3161    %% only have check the unhandled intervals for overlap with
3162    %% the current interval. As a further optimization, we only need
3163    %% to check the intervals that have reserved registers.
3164    collect_available(Unhandled, Cur, Free).
3165
3166collect_available([#i{pool=Pool1}|Is], #i{pool=Pool2}=Cur, Free)
3167  when Pool1 =/= Pool2 ->
3168    %% Wrong pool. Ignore this interval.
3169    collect_available(Is, Cur, Free);
3170collect_available([#i{reg={_,_}=Reg}=I|Is], Cur, Free0) ->
3171    case overlaps(I, Cur) of
3172        true ->
3173            Free = ordsets:del_element(Reg, Free0),
3174            collect_available(Is, Cur, Free);
3175        false ->
3176            collect_available(Is, Cur, Free0)
3177    end;
3178collect_available([], _, Free) -> Free.
3179
3180select_register([{_,_}=Reg|_], #l{cur=Cur0,regs=Regs}=L) ->
3181    Cur = Cur0#i{reg=Reg},
3182    reserve_reg(Cur, L#l{cur=Cur,regs=[{Cur#i.var,Reg}|Regs]});
3183select_register([], #l{cur=Cur0,regs=Regs}=L0) ->
3184    %% Allocate a new register in the pool.
3185    {Reg,L1} = get_next_free(Cur0, L0),
3186    Cur = Cur0#i{reg=Reg},
3187    L = L1#l{cur=Cur,regs=[{Cur#i.var,Reg}|Regs]},
3188    reserve_reg(Cur, L).
3189
3190make_cur_active(#l{cur=Cur,active=Act}=L) ->
3191    L#l{active=[Cur|Act]}.
3192
3193overlaps(#i{rs=Rs1}, #i{rs=Rs2}) ->
3194    are_overlapping(Rs1, Rs2).
3195
3196overlap_status([{S,E}], CurBegin) ->
3197    if
3198        E =< CurBegin -> ends_before_cur;
3199        CurBegin < S -> not_overlapping;
3200        true -> overlapping
3201    end;
3202overlap_status([{S,E}|Rs], CurBegin) ->
3203    if
3204        E =< CurBegin ->
3205            overlap_status(Rs, CurBegin);
3206        S =< CurBegin ->
3207            overlapping;
3208        true ->
3209            not_overlapping
3210    end.
3211
3212reserve_reg(#i{reg={_,_}=Reg}=I, L) ->
3213    FreeRegs0 = get_pool(I, L),
3214    FreeRegs = ordsets:del_element(Reg, FreeRegs0),
3215    update_pool(I, FreeRegs, L).
3216
3217free_reg(#i{reg={_,_}=Reg}=I, L) ->
3218    FreeRegs0 = get_pool(I, L),
3219    FreeRegs = ordsets:add_element(Reg, FreeRegs0),
3220    update_pool(I, FreeRegs, L).
3221
3222get_pool(#i{pool=Pool}, #l{free=Free}) ->
3223    map_get(Pool, Free).
3224
3225update_pool(#i{pool=Pool}, New, #l{free=Free0}=L) ->
3226    Free = Free0#{Pool:=New},
3227    L#l{free=Free}.
3228
3229get_next_free(#i{pool=Pool}, #l{free=Free0}=L0) ->
3230    K = {next,Pool},
3231    N = map_get(K, Free0),
3232    Free = Free0#{K:=N+1},
3233    L = L0#l{free=Free},
3234    if
3235        is_integer(Pool) -> {{y,N},L};
3236        is_atom(Pool)    -> {{Pool,N},L}
3237    end.
3238
3239%%%
3240%%% Interval utilities.
3241%%%
3242
3243are_overlapping([R|Rs1], Rs2) ->
3244    case are_overlapping_1(R, Rs2) of
3245        true ->
3246            true;
3247        false ->
3248            are_overlapping(Rs1, Rs2)
3249    end;
3250are_overlapping([], _) -> false.
3251
3252are_overlapping_1({_S1,E1}, [{S2,_E2}|_]) when E1 < S2 ->
3253    false;
3254are_overlapping_1({S1,E1}=R, [{S2,E2}|Rs]) ->
3255    (S2 < E1 andalso E2 > S1) orelse are_overlapping_1(R, Rs);
3256are_overlapping_1({_,_}, []) -> false.
3257
3258%%%
3259%%% Utilities.
3260%%%
3261
3262%% is_loop_header(L, Blocks) -> false|true.
3263%%  Check whether the block is a loop header.
3264
3265is_loop_header(L, Blocks) ->
3266    case map_get(L, Blocks) of
3267        #b_blk{is=[I|_]} -> beam_ssa:is_loop_header(I);
3268        #b_blk{} -> false
3269    end.
3270
3271rel2fam(S0) ->
3272    S1 = sofs:relation(S0),
3273    S = sofs:rel2fam(S1),
3274    sofs:to_external(S).
3275
3276split_phis(Is) ->
3277    splitwith(fun(#b_set{op=Op}) -> Op =:= phi end, Is).
3278
3279is_yreg({y,_}) -> true;
3280is_yreg({x,_}) -> false;
3281is_yreg({z,_}) -> false;
3282is_yreg({fr,_}) -> false.
3283
3284new_vars([Base|Vs0], Count0) ->
3285    {V,Count1} = new_var(Base, Count0),
3286    {Vs,Count} = new_vars(Vs0, Count1),
3287    {[V|Vs],Count};
3288new_vars([], Count) -> {[],Count}.
3289
3290new_var({Base,Int}, Count)  ->
3291    true = is_integer(Int),                     %Assertion.
3292    {#b_var{name={Base,Count}},Count+1};
3293new_var(Base, Count) ->
3294    {#b_var{name={Base,Count}},Count+1}.
3295