1%%
2%% %CopyrightBegin%
3%%
4%% Copyright Ericsson AB 2018-2020. All Rights Reserved.
5%%
6%% Licensed under the Apache License, Version 2.0 (the "License");
7%% you may not use this file except in compliance with the License.
8%% You may obtain a copy of the License at
9%%
10%%     http://www.apache.org/licenses/LICENSE-2.0
11%%
12%% Unless required by applicable law or agreed to in writing, software
13%% distributed under the License is distributed on an "AS IS" BASIS,
14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15%% See the License for the specific language governing permissions and
16%% limitations under the License.
17%%
18%% %CopyrightEnd%
19%%
20
21%%%
22%%% This is a collection of various optimizations that don't need a separate
23%%% pass by themselves and/or are mutually beneficial to other passes.
24%%%
25%%% The optimizations are applied in "phases," each with a list of sub-passes
26%%% to run. These sub-passes are applied on all functions in a module before
27%%% moving on to the next phase, which lets us gather module-level information
28%%% in one phase and then apply it in the next without having to risk working
29%%% with incomplete information.
30%%%
31%%% Each sub-pass operates on a #st{} record and a func_info_db(), where the
32%%% former is just a #b_function{} whose blocks can be represented either in
33%%% linear or map form, and the latter is a map with information about all
34%%% functions in the module (see beam_ssa_opt.hrl for more details).
35%%%
36
37-module(beam_ssa_opt).
38-export([module/2]).
39
40-include("beam_ssa_opt.hrl").
41
42-import(lists, [all/2,append/1,duplicate/2,foldl/3,keyfind/3,member/2,
43                reverse/1,reverse/2,
44                splitwith/2,sort/1,takewhile/2,unzip/1]).
45
46-define(DEFAULT_REPETITIONS, 2).
47
48-spec module(beam_ssa:b_module(), [compile:option()]) ->
49                    {'ok',beam_ssa:b_module()}.
50
51-record(st, {ssa :: [{beam_ssa:label(),beam_ssa:b_blk()}] |
52                    beam_ssa:block_map(),
53             args :: [beam_ssa:b_var()],
54             cnt :: beam_ssa:label(),
55             anno :: beam_ssa:anno()}).
56-type st_map() :: #{ func_id() => #st{} }.
57
58module(Module, Opts) ->
59    FuncDb0 = case proplists:get_value(no_module_opt, Opts, false) of
60                  false -> build_func_db(Module);
61                  true -> #{}
62              end,
63
64    %% Passes that perform module-level optimizations are often aided by
65    %% optimizing callers before callees and vice versa, so we optimize all
66    %% functions in call order, flipping it as required.
67    StMap0 = build_st_map(Module),
68    Order = get_call_order_po(StMap0, FuncDb0),
69
70    Phases =
71        [{Order, prologue_passes(Opts)}] ++
72        repeat(Opts, repeated_passes(Opts), Order) ++
73        [{Order, epilogue_passes(Opts)}],
74
75    {StMap, _FuncDb} = foldl(fun({FuncIds, Ps}, {StMap, FuncDb}) ->
76                                     phase(FuncIds, Ps, StMap, FuncDb)
77                             end, {StMap0, FuncDb0}, Phases),
78
79    {ok, finish(Module, StMap)}.
80
81phase([FuncId | Ids], Ps, StMap, FuncDb0) ->
82    try compile:run_sub_passes(Ps, {map_get(FuncId, StMap), FuncDb0}) of
83        {St, FuncDb} ->
84            phase(Ids, Ps, StMap#{ FuncId => St }, FuncDb)
85    catch
86        Class:Error:Stack ->
87            #b_local{name=#b_literal{val=Name},arity=Arity} = FuncId,
88            io:fwrite("Function: ~w/~w\n", [Name,Arity]),
89            erlang:raise(Class, Error, Stack)
90    end;
91phase([], _Ps, StMap, FuncDb) ->
92    {StMap, FuncDb}.
93
94%% Repeats the given passes, alternating the order between runs to make the
95%% type pass more efficient.
96repeat(Opts, Ps, OrderA) ->
97    Repeat = proplists:get_value(ssa_opt_repeat, Opts, ?DEFAULT_REPETITIONS),
98    OrderB = reverse(OrderA),
99    repeat_1(Repeat, Ps, OrderA, OrderB).
100
101repeat_1(0, _Opts, _OrderA, _OrderB) ->
102    [];
103repeat_1(N, Ps, OrderA, OrderB) when N > 0, N rem 2 =:= 0 ->
104    [{OrderA, Ps} | repeat_1(N - 1, Ps, OrderA, OrderB)];
105repeat_1(N, Ps, OrderA, OrderB) when N > 0, N rem 2 =:= 1 ->
106    [{OrderB, Ps} | repeat_1(N - 1, Ps, OrderA, OrderB)].
107
108%%
109
110get_func_id(F) ->
111    {_Mod, Name, Arity} = beam_ssa:get_anno(func_info, F),
112    #b_local{name=#b_literal{val=Name}, arity=Arity}.
113
114-spec build_st_map(#b_module{}) -> st_map().
115build_st_map(#b_module{body=Fs}) ->
116    build_st_map_1(Fs, #{}).
117
118build_st_map_1([F | Fs], Map) ->
119    #b_function{anno=Anno,args=Args,cnt=Counter,bs=Bs} = F,
120    St = #st{anno=Anno,args=Args,cnt=Counter,ssa=Bs},
121    build_st_map_1(Fs, Map#{ get_func_id(F) => St });
122build_st_map_1([], Map) ->
123    Map.
124
125-spec finish(#b_module{}, st_map()) -> #b_module{}.
126finish(#b_module{body=Fs0}=Module, StMap) ->
127    Module#b_module{body=finish_1(Fs0, StMap)}.
128
129finish_1([F0 | Fs], StMap) ->
130    #st{anno=Anno,cnt=Counter,ssa=Blocks} = map_get(get_func_id(F0), StMap),
131    F = F0#b_function{anno=Anno,bs=Blocks,cnt=Counter},
132    [F | finish_1(Fs, StMap)];
133finish_1([], _StMap) ->
134    [].
135
136%%
137
138-define(PASS(N), {N,fun N/1}).
139
140prologue_passes(Opts) ->
141    Ps = [?PASS(ssa_opt_split_blocks),
142          ?PASS(ssa_opt_coalesce_phis),
143          ?PASS(ssa_opt_tail_phis),
144          ?PASS(ssa_opt_element),
145          ?PASS(ssa_opt_linearize),
146          ?PASS(ssa_opt_tuple_size),
147          ?PASS(ssa_opt_record),
148          ?PASS(ssa_opt_cse),                   %Helps the first type pass.
149          ?PASS(ssa_opt_type_start)],
150    passes_1(Ps, Opts).
151
152%% These passes all benefit from each other (in roughly this order), so they
153%% are repeated as required.
154repeated_passes(Opts) ->
155    Ps = [?PASS(ssa_opt_live),
156          ?PASS(ssa_opt_bs_puts),
157          ?PASS(ssa_opt_dead),
158          ?PASS(ssa_opt_cse),
159          ?PASS(ssa_opt_tail_phis),
160          ?PASS(ssa_opt_type_continue)],        %Must run after ssa_opt_dead to
161                                                %clean up phi nodes.
162    passes_1(Ps, Opts).
163
164epilogue_passes(Opts) ->
165    Ps = [?PASS(ssa_opt_type_finish),
166          ?PASS(ssa_opt_float),
167          ?PASS(ssa_opt_sw),
168
169          %% Run live one more time to clean up after the float and sw
170          %% passes.
171          ?PASS(ssa_opt_live),
172          ?PASS(ssa_opt_bsm),
173          ?PASS(ssa_opt_bsm_units),
174          ?PASS(ssa_opt_bsm_shortcut),
175          ?PASS(ssa_opt_blockify),
176          ?PASS(ssa_opt_sink),
177          ?PASS(ssa_opt_merge_blocks),
178          ?PASS(ssa_opt_get_tuple_element),
179          ?PASS(ssa_opt_trim_unreachable)],
180    passes_1(Ps, Opts).
181
182passes_1(Ps, Opts0) ->
183    Negations = [{list_to_atom("no_"++atom_to_list(N)),N} ||
184                    {N,_} <- Ps],
185    Opts = proplists:substitute_negations(Negations, Opts0),
186    [case proplists:get_value(Name, Opts, true) of
187         true ->
188             P;
189         false ->
190             {NoName,Name} = keyfind(Name, 2, Negations),
191             {NoName,fun(S) -> S end}
192     end || {Name,_}=P <- Ps].
193
194%% Builds a function information map with basic information about incoming and
195%% outgoing local calls, as well as whether the function is exported.
196-spec build_func_db(#b_module{}) -> func_info_db().
197build_func_db(#b_module{body=Fs,exports=Exports}) ->
198    try
199        fdb_1(Fs, gb_sets:from_list(Exports), #{})
200    catch
201        %% All module-level optimizations are invalid when a NIF can override a
202        %% function, so we have to bail out.
203        throw:load_nif -> #{}
204    end.
205
206fdb_1([#b_function{ args=Args,bs=Bs }=F | Fs], Exports, FuncDb0) ->
207    Id = get_func_id(F),
208
209    #b_local{name=#b_literal{val=Name}, arity=Arity} = Id,
210    Exported = gb_sets:is_element({Name, Arity}, Exports),
211    ArgTypes = duplicate(length(Args), #{}),
212
213    FuncDb1 = case FuncDb0 of
214                  %% We may have an entry already if someone's called us.
215                  #{ Id := Info } ->
216                      FuncDb0#{ Id := Info#func_info{ exported=Exported,
217                                                      arg_types=ArgTypes }};
218                  #{} ->
219                      FuncDb0#{ Id => #func_info{ exported=Exported,
220                                                  arg_types=ArgTypes }}
221              end,
222
223    FuncDb = beam_ssa:fold_rpo(fun(_L, #b_blk{is=Is}, FuncDb) ->
224                                       fdb_is(Is, Id, FuncDb)
225                               end, FuncDb1, Bs),
226
227    fdb_1(Fs, Exports, FuncDb);
228fdb_1([], _Exports, FuncDb) ->
229    FuncDb.
230
231fdb_is([#b_set{op=call,
232               args=[#b_local{}=Callee | _]} | Is],
233       Caller, FuncDb) ->
234    fdb_is(Is, Caller, fdb_update(Caller, Callee, FuncDb));
235fdb_is([#b_set{op=call,
236               args=[#b_remote{mod=#b_literal{val=erlang},
237                               name=#b_literal{val=load_nif}},
238                     _Path, _LoadInfo]} | _Is], _Caller, _FuncDb) ->
239    throw(load_nif);
240fdb_is([_ | Is], Caller, FuncDb) ->
241    fdb_is(Is, Caller, FuncDb);
242fdb_is([], _Caller, FuncDb) ->
243    FuncDb.
244
245fdb_update(Caller, Callee, FuncDb) ->
246    CallerVertex = maps:get(Caller, FuncDb, #func_info{}),
247    CalleeVertex = maps:get(Callee, FuncDb, #func_info{}),
248
249    Calls = ordsets:add_element(Callee, CallerVertex#func_info.out),
250    CalledBy = ordsets:add_element(Caller, CalleeVertex#func_info.in),
251
252    FuncDb#{ Caller => CallerVertex#func_info{out=Calls},
253             Callee => CalleeVertex#func_info{in=CalledBy} }.
254
255%% Returns the post-order of all local calls in this module. That is,
256%% called functions will be ordered before the functions calling them.
257%%
258%% Functions where module-level optimization is disabled are added last in
259%% arbitrary order.
260
261get_call_order_po(StMap, FuncDb) ->
262    Order = gco_po(FuncDb),
263    Order ++ maps:fold(fun(K, _V, Acc) ->
264                               case is_map_key(K, FuncDb) of
265                                   false -> [K | Acc];
266                                   true -> Acc
267                               end
268                       end, [], StMap).
269
270gco_po(FuncDb) ->
271    All = sort(maps:keys(FuncDb)),
272    {RPO,_} = gco_rpo(All, FuncDb, cerl_sets:new(), []),
273    reverse(RPO).
274
275gco_rpo([Id|Ids], FuncDb, Seen0, Acc0) ->
276    case cerl_sets:is_element(Id, Seen0) of
277        true ->
278            gco_rpo(Ids, FuncDb, Seen0, Acc0);
279        false ->
280            #func_info{out=Successors} = map_get(Id, FuncDb),
281            Seen1 = cerl_sets:add_element(Id, Seen0),
282            {Acc,Seen} = gco_rpo(Successors, FuncDb, Seen1, Acc0),
283            gco_rpo(Ids, FuncDb, Seen, [Id|Acc])
284    end;
285gco_rpo([], _, Seen, Acc) ->
286    {Acc,Seen}.
287
288%%%
289%%% Trivial sub passes.
290%%%
291
292ssa_opt_dead({#st{ssa=Linear}=St, FuncDb}) ->
293    {St#st{ssa=beam_ssa_dead:opt(Linear)}, FuncDb}.
294
295ssa_opt_linearize({#st{ssa=Blocks}=St, FuncDb}) ->
296    {St#st{ssa=beam_ssa:linearize(Blocks)}, FuncDb}.
297
298ssa_opt_type_start({#st{ssa=Linear0,args=Args,anno=Anno}=St0, FuncDb0}) ->
299    {Linear, FuncDb} = beam_ssa_type:opt_start(Linear0, Args, Anno, FuncDb0),
300    {St0#st{ssa=Linear}, FuncDb}.
301
302ssa_opt_type_continue({#st{ssa=Linear0,args=Args,anno=Anno}=St0, FuncDb0}) ->
303    {Linear, FuncDb} = beam_ssa_type:opt_continue(Linear0, Args, Anno, FuncDb0),
304    {St0#st{ssa=Linear}, FuncDb}.
305
306ssa_opt_type_finish({#st{args=Args,anno=Anno0}=St0, FuncDb0}) ->
307    {Anno, FuncDb} = beam_ssa_type:opt_finish(Args, Anno0, FuncDb0),
308    {St0#st{anno=Anno}, FuncDb}.
309
310ssa_opt_blockify({#st{ssa=Linear}=St, FuncDb}) ->
311    {St#st{ssa=maps:from_list(Linear)}, FuncDb}.
312
313ssa_opt_trim_unreachable({#st{ssa=Blocks}=St, FuncDb}) ->
314    {St#st{ssa=beam_ssa:trim_unreachable(Blocks)}, FuncDb}.
315
316%%%
317%%% Split blocks before certain instructions to enable more optimizations.
318%%%
319%%% Splitting before element/2 enables the optimization that swaps
320%%% element/2 instructions.
321%%%
322%%% Splitting before call and make_fun instructions gives more opportunities
323%%% for sinking get_tuple_element instructions.
324%%%
325
326ssa_opt_split_blocks({#st{ssa=Blocks0,cnt=Count0}=St, FuncDb}) ->
327    P = fun(#b_set{op={bif,element}}) -> true;
328           (#b_set{op=call}) -> true;
329           (#b_set{op=make_fun}) -> true;
330           (_) -> false
331        end,
332    {Blocks,Count} = beam_ssa:split_blocks(P, Blocks0, Count0),
333    {St#st{ssa=Blocks,cnt=Count}, FuncDb}.
334
335%%%
336%%% Coalesce phi nodes.
337%%%
338%%% Nested cases can led to code such as this:
339%%%
340%%%     10:
341%%%       _1 = phi {literal value1, label 8}, {Var, label 9}
342%%%       br 11
343%%%
344%%%     11:
345%%%       _2 = phi {_1, label 10}, {literal false, label 3}
346%%%
347%%% The phi nodes can be coalesced like this:
348%%%
349%%%     11:
350%%%       _2 = phi {literal value1, label 8}, {Var, label 9}, {literal false, label 3}
351%%%
352%%% Coalescing can help other optimizations, and can in some cases reduce register
353%%% shuffling (if the phi variables for two phi nodes happens to be allocated to
354%%% different registers).
355%%%
356
357ssa_opt_coalesce_phis({#st{ssa=Blocks0}=St, FuncDb}) ->
358    Ls = beam_ssa:rpo(Blocks0),
359    Blocks = c_phis_1(Ls, Blocks0),
360    {St#st{ssa=Blocks}, FuncDb}.
361
362c_phis_1([L|Ls], Blocks0) ->
363    case map_get(L, Blocks0) of
364        #b_blk{is=[#b_set{op=phi}|_]}=Blk ->
365            Blocks = c_phis_2(L, Blk, Blocks0),
366            c_phis_1(Ls, Blocks);
367        #b_blk{} ->
368            c_phis_1(Ls, Blocks0)
369    end;
370c_phis_1([], Blocks) -> Blocks.
371
372c_phis_2(L, #b_blk{is=Is0}=Blk0, Blocks0) ->
373    case c_phis_args(Is0, Blocks0) of
374        none ->
375            Blocks0;
376        {_,_,Preds}=Info ->
377            Is = c_rewrite_phis(Is0, Info),
378            Blk = Blk0#b_blk{is=Is},
379            Blocks = Blocks0#{L:=Blk},
380            c_fix_branches(Preds, L, Blocks)
381    end.
382
383c_phis_args([#b_set{op=phi,args=Args0}|Is], Blocks) ->
384    case c_phis_args_1(Args0, Blocks) of
385        none ->
386            c_phis_args(Is, Blocks);
387        Res ->
388            Res
389    end;
390c_phis_args(_, _Blocks) -> none.
391
392c_phis_args_1([{Var,Pred}|As], Blocks) ->
393    case c_get_pred_vars(Var, Pred, Blocks) of
394        none ->
395            c_phis_args_1(As, Blocks);
396        Result ->
397            Result
398    end;
399c_phis_args_1([], _Blocks) -> none.
400
401c_get_pred_vars(Var, Pred, Blocks) ->
402    case map_get(Pred, Blocks) of
403        #b_blk{is=[#b_set{op=phi,dst=Var,args=Args}]} ->
404            {Var,Pred,Args};
405        #b_blk{} ->
406            none
407    end.
408
409c_rewrite_phis([#b_set{op=phi,args=Args0}=I|Is], Info) ->
410    Args = c_rewrite_phi(Args0, Info),
411    [I#b_set{args=Args}|c_rewrite_phis(Is, Info)];
412c_rewrite_phis(Is, _Info) -> Is.
413
414c_rewrite_phi([{Var,Pred}|As], {Var,Pred,Values}) ->
415    Values ++ As;
416c_rewrite_phi([{Value,Pred}|As], {_,Pred,Values}) ->
417    [{Value,P} || {_,P} <- Values] ++ As;
418c_rewrite_phi([A|As], Info) ->
419    [A|c_rewrite_phi(As, Info)];
420c_rewrite_phi([], _Info) -> [].
421
422c_fix_branches([{_,Pred}|As], L, Blocks0) ->
423    #b_blk{last=Last0} = Blk0 = map_get(Pred, Blocks0),
424    #b_br{bool=#b_literal{val=true}} = Last0,   %Assertion.
425    Last = Last0#b_br{bool=#b_literal{val=true},succ=L,fail=L},
426    Blk = Blk0#b_blk{last=Last},
427    Blocks = Blocks0#{Pred:=Blk},
428    c_fix_branches(As, L, Blocks);
429c_fix_branches([], _, Blocks) -> Blocks.
430
431%%%
432%%% Eliminate phi nodes in the tail of a function.
433%%%
434%%% Try to eliminate short blocks that starts with a phi node
435%%% and end in a return. For example:
436%%%
437%%%    Result = phi { Res1, 4 }, { literal true, 5 }
438%%%    Ret = put_tuple literal ok, Result
439%%%    ret Ret
440%%%
441%%% The code in this block can be inserted at the end blocks 4 and
442%%% 5. Thus, the following code can be inserted into block 4:
443%%%
444%%%    Ret:1 = put_tuple literal ok, Res1
445%%%    ret Ret:1
446%%%
447%%% And the following code into block 5:
448%%%
449%%%    Ret:2 = put_tuple literal ok, literal true
450%%%    ret Ret:2
451%%%
452%%% Which can be further simplified to:
453%%%
454%%%    ret literal {ok, true}
455%%%
456%%% This transformation may lead to more code improvements:
457%%%
458%%%   - Stack trimming
459%%%   - Fewer test_heap instructions
460%%%   - Smaller stack frames
461%%%
462
463ssa_opt_tail_phis({#st{ssa=SSA0,cnt=Count0}=St, FuncDb}) ->
464    {SSA,Count} = opt_tail_phis(SSA0, Count0),
465    {St#st{ssa=SSA,cnt=Count}, FuncDb}.
466
467opt_tail_phis(Blocks, Count) when is_map(Blocks) ->
468    opt_tail_phis(maps:values(Blocks), Blocks, Count);
469opt_tail_phis(Linear0, Count0) when is_list(Linear0) ->
470    Blocks0 = maps:from_list(Linear0),
471    {Blocks,Count} = opt_tail_phis(Blocks0, Count0),
472    {beam_ssa:linearize(Blocks),Count}.
473
474opt_tail_phis([#b_blk{is=Is0,last=Last}|Bs], Blocks0, Count0) ->
475    case {Is0,Last} of
476        {[#b_set{op=phi,args=[_,_|_]}|_],#b_ret{arg=#b_var{}}=Ret} ->
477            {Phis,Is} = splitwith(fun(#b_set{op=Op}) -> Op =:= phi end, Is0),
478            case suitable_tail_ops(Is) of
479                true ->
480                    {Blocks,Count} = opt_tail_phi(Phis, Is, Ret,
481                                                  Blocks0, Count0),
482                    opt_tail_phis(Bs, Blocks, Count);
483                false ->
484                    opt_tail_phis(Bs, Blocks0, Count0)
485            end;
486        {_,_} ->
487            opt_tail_phis(Bs, Blocks0, Count0)
488    end;
489opt_tail_phis([], Blocks, Count) ->
490    {Blocks,Count}.
491
492opt_tail_phi(Phis0, Is, Ret, Blocks0, Count0) ->
493    Phis = rel2fam(reduce_phis(Phis0)),
494    {Blocks,Count,Cost} =
495        foldl(fun(PhiArg, Acc) ->
496                      opt_tail_phi_arg(PhiArg, Is, Ret, Acc)
497              end, {Blocks0,Count0,0}, Phis),
498    MaxCost = length(Phis) * 3 + 2,
499    if
500        Cost =< MaxCost ->
501            %% The transformation would cause at most a slight
502            %% increase in code size if no more optimizations
503            %% can be applied.
504            {Blocks,Count};
505        true ->
506            %% The code size would be increased too much.
507            {Blocks0,Count0}
508    end.
509
510reduce_phis([#b_set{dst=PhiDst,args=PhiArgs}|Is]) ->
511    [{L,{PhiDst,Val}} || {Val,L} <- PhiArgs] ++ reduce_phis(Is);
512reduce_phis([]) -> [].
513
514opt_tail_phi_arg({PredL,Sub0}, Is0, Ret0, {Blocks0,Count0,Cost0}) ->
515    Blk0 = map_get(PredL, Blocks0),
516    #b_blk{is=IsPrefix,last=#b_br{succ=Next,fail=Next}} = Blk0,
517    case is_exit_bif(IsPrefix) of
518        false ->
519            Sub1 = maps:from_list(Sub0),
520            {Is1,Count,Sub} = new_names(Is0, Sub1, Count0, []),
521            Is2 = [sub(I, Sub) || I <- Is1],
522            Cost = build_cost(Is2, Cost0),
523            Is = IsPrefix ++ Is2,
524            Ret = sub(Ret0, Sub),
525            Blk = Blk0#b_blk{is=Is,last=Ret},
526            Blocks = Blocks0#{PredL:=Blk},
527            {Blocks,Count,Cost};
528        true ->
529            %% The block ends in a call to a function that
530            %% will cause an exception.
531            {Blocks0,Count0,Cost0+3}
532    end.
533
534is_exit_bif([#b_set{op=call,
535                    args=[#b_remote{mod=#b_literal{val=Mod},
536                                    name=#b_literal{val=Name}}|Args]}]) ->
537    erl_bifs:is_exit_bif(Mod, Name, length(Args));
538is_exit_bif(_) -> false.
539
540new_names([#b_set{dst=Dst}=I|Is], Sub0, Count0, Acc) ->
541    {NewDst,Count} = new_var(Dst, Count0),
542    Sub = Sub0#{Dst=>NewDst},
543    new_names(Is, Sub, Count, [I#b_set{dst=NewDst}|Acc]);
544new_names([], Sub, Count, Acc) ->
545    {reverse(Acc),Count,Sub}.
546
547suitable_tail_ops(Is) ->
548    all(fun(#b_set{op=Op}) ->
549                is_suitable_tail_op(Op)
550        end, Is).
551
552is_suitable_tail_op({bif,_}) -> true;
553is_suitable_tail_op(put_list) -> true;
554is_suitable_tail_op(put_tuple) -> true;
555is_suitable_tail_op(_) -> false.
556
557build_cost([#b_set{op=put_list,args=Args}|Is], Cost) ->
558    case are_all_literals(Args) of
559        true ->
560            build_cost(Is, Cost);
561        false ->
562            build_cost(Is, Cost + 1)
563    end;
564build_cost([#b_set{op=put_tuple,args=Args}|Is], Cost) ->
565    case are_all_literals(Args) of
566        true ->
567            build_cost(Is, Cost);
568        false ->
569            build_cost(Is, Cost + length(Args) + 1)
570    end;
571build_cost([#b_set{op={bif,_},args=Args}|Is], Cost) ->
572    case are_all_literals(Args) of
573        true ->
574            build_cost(Is, Cost);
575        false ->
576            build_cost(Is, Cost + 1)
577    end;
578build_cost([], Cost) -> Cost.
579
580are_all_literals(Args) ->
581    all(fun(#b_literal{}) -> true;
582                (_) -> false
583             end, Args).
584
585%%%
586%%% Order element/2 calls.
587%%%
588%%% Order an unbroken chain of element/2 calls for the same tuple
589%%% with the same failure label so that the highest element is
590%%% retrieved first. That will allow the other element/2 calls to
591%%% be replaced with get_tuple_element/3 instructions.
592%%%
593
594ssa_opt_element({#st{ssa=Blocks}=St, FuncDb}) ->
595    %% Collect the information about element instructions in this
596    %% function.
597    GetEls = collect_element_calls(beam_ssa:linearize(Blocks)),
598
599    %% Collect the element instructions into chains. The
600    %% element calls in each chain are ordered in reverse
601    %% execution order.
602    Chains = collect_chains(GetEls, []),
603
604    %% For each chain, swap the first element call with the
605    %% element call with the highest index.
606    {St#st{ssa=swap_element_calls(Chains, Blocks)}, FuncDb}.
607
608collect_element_calls([{L,#b_blk{is=Is0,last=Last}}|Bs]) ->
609    case {Is0,Last} of
610        {[#b_set{op={bif,element},dst=Element,
611                 args=[#b_literal{val=N},#b_var{}=Tuple]},
612          #b_set{op=succeeded,dst=Bool,args=[Element]}],
613         #b_br{bool=Bool,succ=Succ,fail=Fail}} ->
614            Info = {L,Succ,{Tuple,Fail},N},
615            [Info|collect_element_calls(Bs)];
616        {_,_} ->
617            collect_element_calls(Bs)
618    end;
619collect_element_calls([]) -> [].
620
621collect_chains([{This,_,V,_}=El|Els], [{_,This,V,_}|_]=Chain) ->
622    %% Add to the previous chain.
623    collect_chains(Els, [El|Chain]);
624collect_chains([El|Els], [_,_|_]=Chain) ->
625    %% Save the previous chain and start a new chain.
626    [Chain|collect_chains(Els, [El])];
627collect_chains([El|Els], _Chain) ->
628    %% The previous chain is too short; discard it and start a new.
629    collect_chains(Els, [El]);
630collect_chains([], [_,_|_]=Chain) ->
631    %% Save the last chain.
632    [Chain];
633collect_chains([], _) ->  [].
634
635swap_element_calls([[{L,_,_,N}|_]=Chain|Chains], Blocks0) ->
636    Blocks = swap_element_calls_1(Chain, {N,L}, Blocks0),
637    swap_element_calls(Chains, Blocks);
638swap_element_calls([], Blocks) -> Blocks.
639
640swap_element_calls_1([{L1,_,_,N1}], {N2,L2}, Blocks) when N2 > N1 ->
641    %% We have reached the end of the chain, and the first
642    %% element instrution to be executed. Its index is lower
643    %% than the maximum index found while traversing the chain,
644    %% so we will need to swap the instructions.
645    #{L1:=Blk1,L2:=Blk2} = Blocks,
646    [#b_set{dst=Dst1}=GetEl1,Succ1] = Blk1#b_blk.is,
647    [#b_set{dst=Dst2}=GetEl2,Succ2] = Blk2#b_blk.is,
648    Is1 = [GetEl2,Succ1#b_set{args=[Dst2]}],
649    Is2 = [GetEl1,Succ2#b_set{args=[Dst1]}],
650    Blocks#{L1:=Blk1#b_blk{is=Is1},L2:=Blk2#b_blk{is=Is2}};
651swap_element_calls_1([{L,_,_,N1}|Els], {N2,_}, Blocks) when N1 > N2 ->
652    swap_element_calls_1(Els, {N2,L}, Blocks);
653swap_element_calls_1([_|Els], Highest, Blocks) ->
654    swap_element_calls_1(Els, Highest, Blocks);
655swap_element_calls_1([], _, Blocks) ->
656    %% Nothing to do. The element call with highest index
657    %% is already the first one to be executed.
658    Blocks.
659
660%%%
661%%% Record optimization.
662%%%
663%%% Replace tuple matching with an is_tagged_tuple instruction
664%%% when applicable.
665%%%
666
667ssa_opt_record({#st{ssa=Linear}=St, FuncDb}) ->
668    Blocks = maps:from_list(Linear),
669    {St#st{ssa=record_opt(Linear, Blocks)}, FuncDb}.
670
671record_opt([{L,#b_blk{is=Is0,last=Last}=Blk0}|Bs], Blocks) ->
672    Is = record_opt_is(Is0, Last, Blocks),
673    Blk = Blk0#b_blk{is=Is},
674    [{L,Blk}|record_opt(Bs, Blocks)];
675record_opt([], _Blocks) -> [].
676
677record_opt_is([#b_set{op={bif,is_tuple},dst=Bool,args=[Tuple]}=Set],
678              Last, Blocks) ->
679    case is_tagged_tuple(Tuple, Bool, Last, Blocks) of
680        {yes,Size,Tag} ->
681            Args = [Tuple,Size,Tag],
682            [Set#b_set{op=is_tagged_tuple,args=Args}];
683        no ->
684            [Set]
685    end;
686record_opt_is([I|Is]=Is0, #b_br{bool=Bool}=Last, Blocks) ->
687    case is_tagged_tuple_1(Is0, Last, Blocks) of
688        {yes,_Fail,Tuple,Arity,Tag} ->
689            Args = [Tuple,Arity,Tag],
690            [I#b_set{op=is_tagged_tuple,dst=Bool,args=Args}];
691        no ->
692            [I|record_opt_is(Is, Last, Blocks)]
693    end;
694record_opt_is([I|Is], Last, Blocks) ->
695    [I|record_opt_is(Is, Last, Blocks)];
696record_opt_is([], _Last, _Blocks) -> [].
697
698is_tagged_tuple(#b_var{}=Tuple, Bool,
699                #b_br{bool=Bool,succ=Succ,fail=Fail},
700                Blocks) ->
701    #b_blk{is=Is,last=Last} = map_get(Succ, Blocks),
702    case is_tagged_tuple_1(Is, Last, Blocks) of
703        {yes,Fail,Tuple,Arity,Tag} ->
704            {yes,Arity,Tag};
705        _ ->
706            no
707    end;
708is_tagged_tuple(_, _, _, _) -> no.
709
710is_tagged_tuple_1(Is, Last, Blocks) ->
711    case {Is,Last} of
712        {[#b_set{op={bif,tuple_size},dst=ArityVar,
713                 args=[#b_var{}=Tuple]},
714          #b_set{op={bif,'=:='},
715                 dst=Bool,
716                 args=[ArityVar, #b_literal{val=ArityVal}=Arity]}],
717         #b_br{bool=Bool,succ=Succ,fail=Fail}}
718          when is_integer(ArityVal) ->
719            SuccBlk = map_get(Succ, Blocks),
720            case is_tagged_tuple_2(SuccBlk, Tuple, Fail) of
721                no ->
722                    no;
723                {yes,Tag} ->
724                    {yes,Fail,Tuple,Arity,Tag}
725            end;
726        _ ->
727            no
728    end.
729
730is_tagged_tuple_2(#b_blk{is=Is,
731                         last=#b_br{bool=#b_var{}=Bool,fail=Fail}},
732                  Tuple, Fail) ->
733    is_tagged_tuple_3(Is, Bool, Tuple);
734is_tagged_tuple_2(#b_blk{}, _, _) -> no.
735
736is_tagged_tuple_3([#b_set{op=get_tuple_element,
737                          dst=TagVar,
738                          args=[#b_var{}=Tuple,#b_literal{val=0}]}|Is],
739                  Bool, Tuple) ->
740    is_tagged_tuple_4(Is, Bool, TagVar);
741is_tagged_tuple_3([_|Is], Bool, Tuple) ->
742    is_tagged_tuple_3(Is, Bool, Tuple);
743is_tagged_tuple_3([], _, _) -> no.
744
745is_tagged_tuple_4([#b_set{op={bif,'=:='},dst=Bool,
746                          args=[#b_var{}=TagVar,
747                                #b_literal{val=TagVal}=Tag]}],
748                 Bool, TagVar) when is_atom(TagVal) ->
749    {yes,Tag};
750is_tagged_tuple_4([_|Is], Bool, TagVar) ->
751    is_tagged_tuple_4(Is, Bool, TagVar);
752is_tagged_tuple_4([], _, _) -> no.
753
754%%%
755%%% Common subexpression elimination (CSE).
756%%%
757%%% Eliminate repeated evaluation of identical expressions. To avoid
758%%% increasing the size of the stack frame, we don't eliminate
759%%% subexpressions across instructions that clobber the X registers.
760%%%
761
762ssa_opt_cse({#st{ssa=Linear}=St, FuncDb}) ->
763    M = #{0=>#{}},
764    {St#st{ssa=cse(Linear, #{}, M)}, FuncDb}.
765
766cse([{L,#b_blk{is=Is0,last=Last0}=Blk}|Bs], Sub0, M0) ->
767    Es0 = map_get(L, M0),
768    {Is1,Es,Sub} = cse_is(Is0, Es0, Sub0, []),
769    Last = sub(Last0, Sub),
770    M = cse_successors(Is1, Blk, Es, M0),
771    Is = reverse(Is1),
772    [{L,Blk#b_blk{is=Is,last=Last}}|cse(Bs, Sub, M)];
773cse([], _, _) -> [].
774
775cse_successors([#b_set{op=succeeded,args=[Src]},Bif|_], Blk, EsSucc, M0) ->
776    case cse_suitable(Bif) of
777        true ->
778            %% The previous instruction only has a valid value at the success branch.
779            %% We must remove the substitution for Src from the failure branch.
780            #b_blk{last=#b_br{succ=Succ,fail=Fail}} = Blk,
781            M = cse_successors_1([Succ], EsSucc, M0),
782            EsFail = maps:filter(fun(_, Val) -> Val =/= Src end, EsSucc),
783            cse_successors_1([Fail], EsFail, M);
784        false ->
785            %% There can't be any replacement for Src in EsSucc. No need for
786            %% any special handling.
787            cse_successors_1(beam_ssa:successors(Blk), EsSucc, M0)
788    end;
789cse_successors(_Is, Blk, Es, M) ->
790    cse_successors_1(beam_ssa:successors(Blk), Es, M).
791
792cse_successors_1([L|Ls], Es0, M) ->
793    case M of
794        #{L:=Es1} when map_size(Es1) =:= 0 ->
795            %% The map is already empty. No need to do anything
796            %% since the intersection will be empty.
797            cse_successors_1(Ls, Es0, M);
798        #{L:=Es1} ->
799            %% Calculate the intersection of the two maps.
800            %% Both keys and values must match.
801            Es = maps:filter(fun(Key, Value) ->
802                                     case Es1 of
803                                         #{Key:=Value} -> true;
804                                         #{} -> false
805                                     end
806                             end, Es0),
807            cse_successors_1(Ls, Es0, M#{L:=Es});
808        #{} ->
809            cse_successors_1(Ls, Es0, M#{L=>Es0})
810    end;
811cse_successors_1([], _, M) -> M.
812
813cse_is([#b_set{op=succeeded,dst=Bool,args=[Src]}=I0|Is], Es, Sub0, Acc) ->
814    I = sub(I0, Sub0),
815    case I of
816        #b_set{args=[Src]} ->
817            cse_is(Is, Es, Sub0, [I|Acc]);
818        #b_set{} ->
819            %% The previous instruction has been eliminated. Eliminate the
820            %% 'succeeded' instruction too.
821            Sub = Sub0#{Bool=>#b_literal{val=true}},
822            cse_is(Is, Es, Sub, Acc)
823    end;
824cse_is([#b_set{dst=Dst}=I0|Is], Es0, Sub0, Acc) ->
825    I = sub(I0, Sub0),
826    case beam_ssa:clobbers_xregs(I) of
827        true ->
828            %% Retaining the expressions map across calls and other
829            %% clobbering instructions would work, but it would cause
830            %% the common subexpressions to be saved to Y registers,
831            %% which would probably increase the size of the stack
832            %% frame.
833            cse_is(Is, #{}, Sub0, [I|Acc]);
834        false ->
835            case cse_expr(I) of
836                none ->
837                    %% Not suitable for CSE.
838                    cse_is(Is, Es0, Sub0, [I|Acc]);
839                {ok,ExprKey} ->
840                    case Es0 of
841                        #{ExprKey:=Src} ->
842                            Sub = Sub0#{Dst=>Src},
843                            cse_is(Is, Es0, Sub, Acc);
844                        #{} ->
845                            Es = Es0#{ExprKey=>Dst},
846                            cse_is(Is, Es, Sub0, [I|Acc])
847                    end
848            end
849    end;
850cse_is([], Es, Sub, Acc) ->
851    {Acc,Es,Sub}.
852
853cse_expr(#b_set{op=Op,args=Args}=I) ->
854    case cse_suitable(I) of
855        true -> {ok,{Op,Args}};
856        false -> none
857    end.
858
859cse_suitable(#b_set{op=get_hd}) -> true;
860cse_suitable(#b_set{op=get_tl}) -> true;
861cse_suitable(#b_set{op=put_list}) -> true;
862cse_suitable(#b_set{op=get_tuple_element}) -> true;
863cse_suitable(#b_set{op=put_tuple}) -> true;
864cse_suitable(#b_set{op={bif,tuple_size}}) ->
865    %% Doing CSE for tuple_size/1 can prevent the
866    %% creation of test_arity and select_tuple_arity
867    %% instructions. That could decrease performance
868    %% and beam_validator could fail to understand
869    %% that tuple operations that follow are safe.
870    false;
871cse_suitable(#b_set{anno=Anno,op={bif,Name},args=Args}) ->
872    %% Doing CSE for floating point operators is unsafe.
873    %% Doing CSE for comparison operators would prevent
874    %% creation of 'test' instructions.
875    Arity = length(Args),
876    not (is_map_key(float_op, Anno) orelse
877         erl_internal:new_type_test(Name, Arity) orelse
878         erl_internal:comp_op(Name, Arity) orelse
879         erl_internal:bool_op(Name, Arity));
880cse_suitable(#b_set{}) -> false.
881
882%%%
883%%% Using floating point instructions.
884%%%
885%%% Use the special floating points version of arithmetic
886%%% instructions, if the operands are known to be floats or the result
887%%% of the operation will be a float.
888%%%
889%%% The float instructions were never used in guards before, so we
890%%% will take special care to keep not using them in guards.  Using
891%%% them in guards would require a new version of the 'fconv'
892%%% instruction that would take a failure label.  Since it is unlikely
893%%% that using float instructions in guards would be benefical, why
894%%% bother implementing a new instruction?  Also, implementing float
895%%% instructions in guards in HiPE could turn out to be a lot of work.
896%%%
897
898-record(fs,
899        {s=undefined :: 'undefined' | 'cleared',
900         regs=#{} :: #{beam_ssa:b_var():=beam_ssa:b_var()},
901         fail=none :: 'none' | beam_ssa:label(),
902         non_guards :: gb_sets:set(beam_ssa:label()),
903         bs :: beam_ssa:block_map()
904        }).
905
906ssa_opt_float({#st{ssa=Linear0,cnt=Count0}=St, FuncDb}) ->
907    NonGuards = non_guards(Linear0),
908    Blocks = maps:from_list(Linear0),
909    Fs = #fs{non_guards=NonGuards,bs=Blocks},
910    {Linear,Count} = float_opt(Linear0, Count0, Fs),
911    {St#st{ssa=Linear,cnt=Count}, FuncDb}.
912
913float_blk_is_in_guard(#b_blk{last=#b_br{fail=F}}, #fs{non_guards=NonGuards}) ->
914    not gb_sets:is_member(F, NonGuards);
915float_blk_is_in_guard(#b_blk{}, #fs{}) ->
916    false.
917
918float_opt([{L,Blk}|Bs0], Count0, Fs) ->
919    case float_blk_is_in_guard(Blk, Fs) of
920        true ->
921            %% This block is inside a guard. Don't do
922            %% any floating point optimizations.
923            {Bs,Count} = float_opt(Bs0, Count0, Fs),
924            {[{L,Blk}|Bs],Count};
925        false ->
926            %% This block is not inside a guard.
927            %% We can do the optimization.
928            float_opt_1(L, Blk, Bs0, Count0, Fs)
929    end;
930float_opt([], Count, _Fs) ->
931    {[],Count}.
932
933float_opt_1(L, #b_blk{is=Is0}=Blk0, Bs0, Count0, Fs0) ->
934    case float_opt_is(Is0, Fs0, Count0, []) of
935        {Is1,Fs1,Count1} ->
936            Fs2 = float_fail_label(Blk0, Fs1),
937            Fail = Fs2#fs.fail,
938            {Flush,Blk,Fs,Count2} = float_maybe_flush(Blk0, Fs2, Count1),
939            Split = float_split_conv(Is1, Blk),
940            {Blks0,Count3} = float_number(Split, L, Count2),
941            {Blks,Count4} = float_conv(Blks0, Fail, Count3),
942            {Bs,Count} = float_opt(Bs0, Count4, Fs),
943            {Blks++Flush++Bs,Count};
944        none ->
945            {Bs,Count} = float_opt(Bs0, Count0, Fs0),
946            {[{L,Blk0}|Bs],Count}
947    end.
948
949%% Split {float,convert} instructions into individual blocks.
950float_split_conv(Is0, Blk) ->
951    Br = #b_br{bool=#b_literal{val=true},succ=0,fail=0},
952    case splitwith(fun(#b_set{op=Op}) ->
953                           Op =/= {float,convert}
954                   end, Is0) of
955        {Is,[]} ->
956            [Blk#b_blk{is=Is}];
957        {[_|_]=Is1,[#b_set{op={float,convert}}=Conv|Is2]} ->
958            [#b_blk{is=Is1,last=Br},
959             #b_blk{is=[Conv],last=Br}|float_split_conv(Is2, Blk)];
960        {[],[#b_set{op={float,convert}}=Conv|Is1]} ->
961            [#b_blk{is=[Conv],last=Br}|float_split_conv(Is1, Blk)]
962    end.
963
964%% Number the blocks that were split.
965float_number([B|Bs0], FirstL, Count0) ->
966    {Bs,Count} = float_number(Bs0, Count0),
967    {[{FirstL,B}|Bs],Count}.
968
969float_number([B|Bs0], Count0) ->
970    {Bs,Count} = float_number(Bs0, Count0+1),
971    {[{Count0,B}|Bs],Count};
972float_number([], Count) ->
973    {[],Count}.
974
975%% Insert 'succeeded' instructions after each {float,convert}
976%% instruction.
977float_conv([{L,#b_blk{is=Is0}=Blk0}|Bs0], Fail, Count0) ->
978    case Is0 of
979        [#b_set{op={float,convert}}=Conv] ->
980            {Bool0,Count1} = new_reg('@ssa_bool', Count0),
981            Bool = #b_var{name=Bool0},
982            Succeeded = #b_set{op=succeeded,dst=Bool,
983                               args=[Conv#b_set.dst]},
984            Is = [Conv,Succeeded],
985            [{NextL,_}|_] = Bs0,
986            Br = #b_br{bool=Bool,succ=NextL,fail=Fail},
987            Blk = Blk0#b_blk{is=Is,last=Br},
988            {Bs,Count} = float_conv(Bs0, Fail, Count1),
989            {[{L,Blk}|Bs],Count};
990        [_|_] ->
991            case Bs0 of
992                [{NextL,_}|_] ->
993                    Br = #b_br{bool=#b_literal{val=true},
994                               succ=NextL,fail=NextL},
995                    Blk = Blk0#b_blk{last=Br},
996                    {Bs,Count} = float_conv(Bs0, Fail, Count0),
997                    {[{L,Blk}|Bs],Count};
998                [] ->
999                    {[{L,Blk0}],Count0}
1000            end
1001    end.
1002
1003float_maybe_flush(Blk0, #fs{s=cleared,fail=Fail,bs=Blocks}=Fs0, Count0) ->
1004    #b_blk{last=#b_br{bool=#b_var{},succ=Succ}=Br} = Blk0,
1005
1006    %% If the success block starts with a floating point operation, we can
1007    %% defer flushing to that block as long as it isn't a guard.
1008    #b_blk{is=Is} = SuccBlk = map_get(Succ, Blocks),
1009    SuccIsGuard = float_blk_is_in_guard(SuccBlk, Fs0),
1010
1011    case Is of
1012        [#b_set{anno=#{float_op:=_}}|_] when not SuccIsGuard ->
1013            %% No flush needed.
1014            {[],Blk0,Fs0,Count0};
1015        _ ->
1016            %% Flush needed.
1017            {Bool0,Count1} = new_reg('@ssa_bool', Count0),
1018            Bool = #b_var{name=Bool0},
1019
1020            %% Allocate block numbers.
1021            CheckL = Count1,              %For checkerror.
1022            FlushL = Count1 + 1,          %For flushing of float regs.
1023            Count = Count1 + 2,
1024            Blk = Blk0#b_blk{last=Br#b_br{succ=CheckL}},
1025
1026            %% Build the block with the checkerror instruction.
1027            CheckIs = [#b_set{op={float,checkerror},dst=Bool}],
1028            CheckBr = #b_br{bool=Bool,succ=FlushL,fail=Fail},
1029            CheckBlk = #b_blk{is=CheckIs,last=CheckBr},
1030
1031            %% Build the block that flushes all registers.
1032            FlushIs = float_flush_regs(Fs0),
1033            FlushBr = #b_br{bool=#b_literal{val=true},succ=Succ,fail=Succ},
1034            FlushBlk = #b_blk{is=FlushIs,last=FlushBr},
1035
1036            %% Update state and blocks.
1037            Fs = Fs0#fs{s=undefined,regs=#{},fail=none},
1038            FlushBs = [{CheckL,CheckBlk},{FlushL,FlushBlk}],
1039            {FlushBs,Blk,Fs,Count}
1040    end;
1041float_maybe_flush(Blk, Fs, Count) ->
1042    {[],Blk,Fs,Count}.
1043
1044float_opt_is([#b_set{op=succeeded,args=[Src]}=I0],
1045             #fs{regs=Rs}=Fs, Count, Acc) ->
1046    case Rs of
1047        #{Src:=Fr} ->
1048            I = I0#b_set{args=[Fr]},
1049            {reverse(Acc, [I]),Fs,Count};
1050        #{} ->
1051            {reverse(Acc, [I0]),Fs,Count}
1052    end;
1053float_opt_is([#b_set{anno=Anno0}=I0|Is0], Fs0, Count0, Acc) ->
1054    case Anno0 of
1055        #{float_op:=FTypes} ->
1056            Anno = maps:remove(float_op, Anno0),
1057            I1 = I0#b_set{anno=Anno},
1058            {Is,Fs,Count} = float_make_op(I1, FTypes, Fs0, Count0),
1059            float_opt_is(Is0, Fs, Count, reverse(Is, Acc));
1060        #{} ->
1061            float_opt_is(Is0, Fs0#fs{regs=#{}}, Count0, [I0|Acc])
1062    end;
1063float_opt_is([], Fs, _Count, _Acc) ->
1064    #fs{s=undefined} = Fs,                      %Assertion.
1065    none.
1066
1067float_make_op(#b_set{op={bif,Op},dst=Dst,args=As0}=I0,
1068              Ts, #fs{s=S,regs=Rs0}=Fs, Count0) ->
1069    {As1,Rs1,Count1} = float_load(As0, Ts, Rs0, Count0, []),
1070    {As,Is0} = unzip(As1),
1071    {Fr,Count2} = new_reg('@fr', Count1),
1072    FrDst = #b_var{name=Fr},
1073    I = I0#b_set{op={float,Op},dst=FrDst,args=As},
1074    Rs = Rs1#{Dst=>FrDst},
1075    Is = append(Is0) ++ [I],
1076    case S of
1077        undefined ->
1078            {Ignore,Count} = new_reg('@ssa_ignore', Count2),
1079            C = #b_set{op={float,clearerror},dst=#b_var{name=Ignore}},
1080            {[C|Is],Fs#fs{s=cleared,regs=Rs},Count};
1081        cleared ->
1082            {Is,Fs#fs{regs=Rs},Count2}
1083    end.
1084
1085float_load([A|As], [T|Ts], Rs0, Count0, Acc) ->
1086    {Load,Rs,Count} = float_reg_arg(A, T, Rs0, Count0),
1087    float_load(As, Ts, Rs, Count, [Load|Acc]);
1088float_load([], [], Rs, Count, Acc) ->
1089    {reverse(Acc),Rs,Count}.
1090
1091float_reg_arg(A, T, Rs, Count0) ->
1092    case Rs of
1093        #{A:=Fr} ->
1094            {{Fr,[]},Rs,Count0};
1095        #{} ->
1096            {Fr,Count} = new_float_copy_reg(Count0),
1097            Dst = #b_var{name=Fr},
1098            I = float_load_reg(T, A, Dst),
1099            {{Dst,[I]},Rs#{A=>Dst},Count}
1100    end.
1101
1102float_load_reg(convert, #b_var{}=Src, Dst) ->
1103    #b_set{op={float,convert},dst=Dst,args=[Src]};
1104float_load_reg(convert, #b_literal{val=Val}=Src, Dst) ->
1105    try float(Val) of
1106        F ->
1107            #b_set{op={float,put},dst=Dst,args=[#b_literal{val=F}]}
1108    catch
1109        error:_ ->
1110            %% Let the exception happen at runtime.
1111            #b_set{op={float,convert},dst=Dst,args=[Src]}
1112    end;
1113float_load_reg(float, Src, Dst) ->
1114    #b_set{op={float,put},dst=Dst,args=[Src]}.
1115
1116new_float_copy_reg(Count) ->
1117    new_reg('@fr_copy', Count).
1118
1119new_reg(Base, Count) ->
1120    Fr = {Base,Count},
1121    {Fr,Count+1}.
1122
1123float_fail_label(#b_blk{last=Last}, Fs) ->
1124    case Last of
1125        #b_br{bool=#b_var{},fail=Fail} ->
1126            Fs#fs{fail=Fail};
1127        _ ->
1128            Fs
1129    end.
1130
1131float_flush_regs(#fs{regs=Rs}) ->
1132    maps:fold(fun(_, #b_var{name={'@fr_copy',_}}, Acc) ->
1133                      Acc;
1134                 (Dst, Fr, Acc) ->
1135                      [#b_set{op={float,get},dst=Dst,args=[Fr]}|Acc]
1136              end, [], Rs).
1137
1138%%%
1139%%% Live optimization.
1140%%%
1141%%% Optimize instructions whose values are not used. They could be
1142%%% removed if they have no side effects, or in a few cases replaced
1143%%% with a cheaper instructions
1144%%%
1145
1146ssa_opt_live({#st{ssa=Linear0}=St, FuncDb}) ->
1147    RevLinear = reverse(Linear0),
1148    Blocks0 = maps:from_list(RevLinear),
1149    Blocks = live_opt(RevLinear, #{}, Blocks0),
1150    Linear = beam_ssa:linearize(Blocks),
1151    {St#st{ssa=Linear}, FuncDb}.
1152
1153live_opt([{L,Blk0}|Bs], LiveMap0, Blocks) ->
1154    Blk1 = beam_ssa_share:block(Blk0, Blocks),
1155    Successors = beam_ssa:successors(Blk1),
1156    Live0 = live_opt_succ(Successors, L, LiveMap0, gb_sets:empty()),
1157    {Blk,Live} = live_opt_blk(Blk1, Live0),
1158    LiveMap = live_opt_phis(Blk#b_blk.is, L, Live, LiveMap0),
1159    live_opt(Bs, LiveMap, Blocks#{L:=Blk});
1160live_opt([], _, Acc) -> Acc.
1161
1162live_opt_succ([S|Ss], L, LiveMap, Live0) ->
1163    Key = {S,L},
1164    case LiveMap of
1165        #{Key:=Live} ->
1166            %% The successor has a phi node, and the value for
1167            %% this block in the phi node is a variable.
1168            live_opt_succ(Ss, L, LiveMap, gb_sets:union(Live, Live0));
1169        #{S:=Live} ->
1170            %% No phi node in the successor, or the value for
1171            %% this block in the phi node is a literal.
1172            live_opt_succ(Ss, L, LiveMap, gb_sets:union(Live, Live0));
1173        #{} ->
1174            %% A peek_message block which has not been processed yet.
1175            live_opt_succ(Ss, L, LiveMap, Live0)
1176    end;
1177live_opt_succ([], _, _, Acc) -> Acc.
1178
1179live_opt_phis(Is, L, Live0, LiveMap0) ->
1180    LiveMap = LiveMap0#{L=>Live0},
1181    Phis = takewhile(fun(#b_set{op=Op}) -> Op =:= phi end, Is),
1182    case Phis of
1183        [] ->
1184            LiveMap;
1185        [_|_] ->
1186            PhiArgs = append([Args || #b_set{args=Args} <- Phis]),
1187            case [{P,V} || {#b_var{}=V,P} <- PhiArgs] of
1188                [_|_]=PhiVars ->
1189                    PhiLive0 = rel2fam(PhiVars),
1190                    PhiLive = [{{L,P},gb_sets:union(gb_sets:from_list(Vs), Live0)} ||
1191                                  {P,Vs} <- PhiLive0],
1192                    maps:merge(LiveMap, maps:from_list(PhiLive));
1193                [] ->
1194                    %% There were only literals in the phi node(s).
1195                    LiveMap
1196            end
1197    end.
1198
1199live_opt_blk(#b_blk{is=Is0,last=Last}=Blk, Live0) ->
1200    Live1 = gb_sets:union(Live0, gb_sets:from_ordset(beam_ssa:used(Last))),
1201    {Is,Live} = live_opt_is(reverse(Is0), Live1, []),
1202    {Blk#b_blk{is=Is},Live}.
1203
1204live_opt_is([#b_set{op=phi,dst=Dst}=I|Is], Live, Acc) ->
1205    case gb_sets:is_member(Dst, Live) of
1206        true ->
1207            live_opt_is(Is, Live, [I|Acc]);
1208        false ->
1209            live_opt_is(Is, Live, Acc)
1210    end;
1211live_opt_is([#b_set{op=succeeded,dst=SuccDst=SuccDstVar,
1212                    args=[Dst]}=SuccI,
1213             #b_set{dst=Dst}=I|Is], Live0, Acc) ->
1214    case gb_sets:is_member(Dst, Live0) of
1215        true ->
1216            Live1 = gb_sets:add(Dst, Live0),
1217            Live = gb_sets:delete_any(SuccDst, Live1),
1218            live_opt_is([I|Is], Live, [SuccI|Acc]);
1219        false ->
1220            case live_opt_unused(I) of
1221                {replace,NewI0} ->
1222                    NewI = NewI0#b_set{dst=SuccDstVar},
1223                    live_opt_is([NewI|Is], Live0, Acc);
1224                keep ->
1225                    case gb_sets:is_member(SuccDst, Live0) of
1226                        true ->
1227                            Live1 = gb_sets:add(Dst, Live0),
1228                            Live = gb_sets:delete(SuccDst, Live1),
1229                            live_opt_is([I|Is], Live, [SuccI|Acc]);
1230                        false ->
1231                            live_opt_is([I|Is], Live0, Acc)
1232                    end
1233            end
1234    end;
1235live_opt_is([#b_set{dst=Dst}=I|Is], Live0, Acc) ->
1236    case gb_sets:is_member(Dst, Live0) of
1237        true ->
1238            Live1 = gb_sets:union(Live0, gb_sets:from_ordset(beam_ssa:used(I))),
1239            Live = gb_sets:delete(Dst, Live1),
1240            live_opt_is(Is, Live, [I|Acc]);
1241        false ->
1242            case beam_ssa:no_side_effect(I) of
1243                true ->
1244                    live_opt_is(Is, Live0, Acc);
1245                false ->
1246                    Live = gb_sets:union(Live0, gb_sets:from_ordset(beam_ssa:used(I))),
1247                    live_opt_is(Is, Live, [I|Acc])
1248            end
1249    end;
1250live_opt_is([], Live, Acc) ->
1251    {Acc,Live}.
1252
1253live_opt_unused(#b_set{op=get_map_element}=Set) ->
1254    {replace,Set#b_set{op=has_map_field}};
1255live_opt_unused(_) -> keep.
1256
1257%%%
1258%%% Optimize binary matching.
1259%%%
1260%%% * If the value of segment is never extracted, rewrite
1261%%%   to a bs_skip instruction.
1262%%%
1263%%% * Coalesce adjacent bs_skip instructions and skip instructions
1264%%%   with bs_test_tail.
1265%%%
1266
1267ssa_opt_bsm({#st{ssa=Linear}=St, FuncDb}) ->
1268    Extracted0 = bsm_extracted(Linear),
1269    Extracted = cerl_sets:from_list(Extracted0),
1270    {St#st{ssa=bsm_skip(Linear, Extracted)}, FuncDb}.
1271
1272bsm_skip([{L,#b_blk{is=Is0}=Blk}|Bs0], Extracted) ->
1273    Bs = bsm_skip(Bs0, Extracted),
1274    Is = bsm_skip_is(Is0, Extracted),
1275    coalesce_skips({L,Blk#b_blk{is=Is}}, Bs);
1276bsm_skip([], _) -> [].
1277
1278bsm_skip_is([I0|Is], Extracted) ->
1279    case I0 of
1280        #b_set{op=bs_match,
1281               dst=Ctx,
1282               args=[#b_literal{val=T}=Type,PrevCtx|Args0]}
1283          when T =/= string, T =/= skip ->
1284            I = case cerl_sets:is_element(Ctx, Extracted) of
1285                    true ->
1286                        I0;
1287                    false ->
1288                        %% The value is never extracted.
1289                        Args = [#b_literal{val=skip},PrevCtx,Type|Args0],
1290                        I0#b_set{args=Args}
1291                end,
1292            [I|Is];
1293        #b_set{} ->
1294            [I0|bsm_skip_is(Is, Extracted)]
1295    end;
1296bsm_skip_is([], _) -> [].
1297
1298bsm_extracted([{_,#b_blk{is=Is}}|Bs]) ->
1299    case Is of
1300        [#b_set{op=bs_extract,args=[Ctx]}|_] ->
1301            [Ctx|bsm_extracted(Bs)];
1302        _ ->
1303            bsm_extracted(Bs)
1304    end;
1305bsm_extracted([]) -> [].
1306
1307coalesce_skips({L,#b_blk{is=[#b_set{op=bs_extract}=Extract|Is0],
1308                         last=Last0}=Blk0}, Bs0) ->
1309    case coalesce_skips_is(Is0, Last0, Bs0) of
1310        not_possible ->
1311            [{L,Blk0}|Bs0];
1312        {Is,Last,Bs} ->
1313            Blk = Blk0#b_blk{is=[Extract|Is],last=Last},
1314            [{L,Blk}|Bs]
1315    end;
1316coalesce_skips({L,#b_blk{is=Is0,last=Last0}=Blk0}, Bs0) ->
1317    case coalesce_skips_is(Is0, Last0, Bs0) of
1318        not_possible ->
1319            [{L,Blk0}|Bs0];
1320        {Is,Last,Bs} ->
1321            Blk = Blk0#b_blk{is=Is,last=Last},
1322            [{L,Blk}|Bs]
1323    end.
1324
1325coalesce_skips_is([#b_set{op=bs_match,
1326                          args=[#b_literal{val=skip},
1327                                Ctx0,Type,Flags,
1328                                #b_literal{val=Size0},
1329                                #b_literal{val=Unit0}]}=Skip0,
1330                   #b_set{op=succeeded}],
1331                  #b_br{succ=L2,fail=Fail}=Br0,
1332                  Bs0) when is_integer(Size0) ->
1333    case Bs0 of
1334        [{L2,#b_blk{is=[#b_set{op=bs_match,
1335                               dst=SkipDst,
1336                               args=[#b_literal{val=skip},_,_,_,
1337                                     #b_literal{val=Size1},
1338                                     #b_literal{val=Unit1}]},
1339                        #b_set{op=succeeded}=Succeeded],
1340                    last=#b_br{fail=Fail}=Br}}|Bs] when is_integer(Size1) ->
1341            SkipBits = Size0 * Unit0 + Size1 * Unit1,
1342            Skip = Skip0#b_set{dst=SkipDst,
1343                               args=[#b_literal{val=skip},Ctx0,
1344                                     Type,Flags,
1345                                     #b_literal{val=SkipBits},
1346                                     #b_literal{val=1}]},
1347            Is = [Skip,Succeeded],
1348            {Is,Br,Bs};
1349        [{L2,#b_blk{is=[#b_set{op=bs_test_tail,
1350                               args=[_Ctx,#b_literal{val=TailSkip}]}],
1351                    last=#b_br{succ=NextSucc,fail=Fail}}}|Bs] ->
1352            SkipBits = Size0 * Unit0,
1353            TestTail = Skip0#b_set{op=bs_test_tail,
1354                                   args=[Ctx0,#b_literal{val=SkipBits+TailSkip}]},
1355            Br = Br0#b_br{bool=TestTail#b_set.dst,succ=NextSucc},
1356            Is = [TestTail],
1357            {Is,Br,Bs};
1358        _ ->
1359            not_possible
1360    end;
1361coalesce_skips_is(_, _, _) ->
1362    not_possible.
1363
1364%%%
1365%%% Short-cutting binary matching instructions.
1366%%%
1367
1368ssa_opt_bsm_shortcut({#st{ssa=Linear}=St, FuncDb}) ->
1369    Positions = bsm_positions(Linear, #{}),
1370    case map_size(Positions) of
1371        0 ->
1372            %% No binary matching instructions.
1373            {St, FuncDb};
1374        _ ->
1375            {St#st{ssa=bsm_shortcut(Linear, Positions)}, FuncDb}
1376    end.
1377
1378bsm_positions([{L,#b_blk{is=Is,last=Last}}|Bs], PosMap0) ->
1379    PosMap = bsm_positions_is(Is, PosMap0),
1380    case {Is,Last} of
1381        {[#b_set{op=bs_test_tail,dst=Bool,args=[Ctx,#b_literal{val=Bits0}]}],
1382         #b_br{bool=Bool,fail=Fail}} ->
1383            Bits = Bits0 + map_get(Ctx, PosMap0),
1384            bsm_positions(Bs, PosMap#{L=>{Bits,Fail}});
1385        {_,_} ->
1386            bsm_positions(Bs, PosMap)
1387    end;
1388bsm_positions([], PosMap) -> PosMap.
1389
1390bsm_positions_is([#b_set{op=bs_start_match,dst=New}|Is], PosMap0) ->
1391    PosMap = PosMap0#{New=>0},
1392    bsm_positions_is(Is, PosMap);
1393bsm_positions_is([#b_set{op=bs_match,dst=New,args=Args}|Is], PosMap0) ->
1394    [_,Old|_] = Args,
1395    #{Old:=Bits0} = PosMap0,
1396    Bits = bsm_update_bits(Args, Bits0),
1397    PosMap = PosMap0#{New=>Bits},
1398    bsm_positions_is(Is, PosMap);
1399bsm_positions_is([_|Is], PosMap) ->
1400    bsm_positions_is(Is, PosMap);
1401bsm_positions_is([], PosMap) -> PosMap.
1402
1403bsm_update_bits([#b_literal{val=string},_,#b_literal{val=String}], Bits) ->
1404    Bits + bit_size(String);
1405bsm_update_bits([#b_literal{val=utf8}|_], Bits) ->
1406    Bits + 8;
1407bsm_update_bits([#b_literal{val=utf16}|_], Bits) ->
1408    Bits + 16;
1409bsm_update_bits([#b_literal{val=utf32}|_], Bits) ->
1410    Bits + 32;
1411bsm_update_bits([_,_,_,#b_literal{val=Sz},#b_literal{val=U}], Bits)
1412  when is_integer(Sz) ->
1413    Bits + Sz*U;
1414bsm_update_bits(_, Bits) -> Bits.
1415
1416bsm_shortcut([{L,#b_blk{is=Is,last=Last0}=Blk}|Bs], PosMap) ->
1417    case {Is,Last0} of
1418        {[#b_set{op=bs_match,dst=New,args=[_,Old|_]},
1419          #b_set{op=succeeded,dst=Bool,args=[New]}],
1420         #b_br{bool=Bool,fail=Fail}} ->
1421            case PosMap of
1422                #{Old:=Bits,Fail:={TailBits,NextFail}} when Bits > TailBits ->
1423                    Last = Last0#b_br{fail=NextFail},
1424                    [{L,Blk#b_blk{last=Last}}|bsm_shortcut(Bs, PosMap)];
1425                #{} ->
1426                    [{L,Blk}|bsm_shortcut(Bs, PosMap)]
1427            end;
1428        {_,_} ->
1429            [{L,Blk}|bsm_shortcut(Bs, PosMap)]
1430    end;
1431bsm_shortcut([], _PosMap) -> [].
1432
1433%%%
1434%%% Eliminate redundant bs_test_unit2 instructions.
1435%%%
1436
1437ssa_opt_bsm_units({#st{ssa=Linear}=St, FuncDb}) ->
1438    {St#st{ssa=bsm_units(Linear, #{})}, FuncDb}.
1439
1440bsm_units([{L,#b_blk{last=#b_br{succ=Succ,fail=Fail}}=Block0} | Bs], UnitMaps0) ->
1441    UnitsIn = maps:get(L, UnitMaps0, #{}),
1442    {Block, UnitsOut} = bsm_units_skip(Block0, UnitsIn),
1443    UnitMaps1 = bsm_units_join(Succ, UnitsOut, UnitMaps0),
1444    UnitMaps = bsm_units_join(Fail, UnitsIn, UnitMaps1),
1445    [{L, Block} | bsm_units(Bs, UnitMaps)];
1446bsm_units([{L,#b_blk{last=#b_switch{fail=Fail,list=Switch}}=Block} | Bs], UnitMaps0) ->
1447    UnitsIn = maps:get(L, UnitMaps0, #{}),
1448    Labels = [Fail | [Lbl || {_Arg, Lbl} <- Switch]],
1449    UnitMaps = foldl(fun(Lbl, UnitMaps) ->
1450                             bsm_units_join(Lbl, UnitsIn, UnitMaps)
1451                     end, UnitMaps0, Labels),
1452    [{L, Block} | bsm_units(Bs, UnitMaps)];
1453bsm_units([{L, Block} | Bs], UnitMaps) ->
1454    [{L, Block} | bsm_units(Bs, UnitMaps)];
1455bsm_units([], _UnitMaps) ->
1456    [].
1457
1458bsm_units_skip(Block, Units) ->
1459    bsm_units_skip_1(Block#b_blk.is, Block, Units).
1460
1461bsm_units_skip_1([#b_set{op=bs_start_match,dst=New}|_], Block, Units) ->
1462    %% We bail early since there can't be more than one match per block.
1463    {Block, Units#{ New => 1 }};
1464bsm_units_skip_1([#b_set{op=bs_match,
1465                         dst=New,
1466                         args=[#b_literal{val=skip},
1467                               Ctx,
1468                               #b_literal{val=binary},
1469                               _Flags,
1470                               #b_literal{val=all},
1471                               #b_literal{val=OpUnit}]}=Skip | Test],
1472                 Block0, Units) ->
1473    [#b_set{op=succeeded,dst=Bool,args=[New]}] = Test, %Assertion.
1474    #b_br{bool=Bool} = Last0 = Block0#b_blk.last, %Assertion.
1475    CtxUnit = map_get(Ctx, Units),
1476    if
1477        CtxUnit rem OpUnit =:= 0 ->
1478            Is = takewhile(fun(I) -> I =/= Skip end, Block0#b_blk.is),
1479            Last = Last0#b_br{bool=#b_literal{val=true}},
1480            Block = Block0#b_blk{is=Is,last=Last},
1481            {Block, Units#{ New => CtxUnit }};
1482        CtxUnit rem OpUnit =/= 0 ->
1483            {Block0, Units#{ New => OpUnit, Ctx => OpUnit }}
1484    end;
1485bsm_units_skip_1([#b_set{op=bs_match,dst=New,args=Args}|_], Block, Units) ->
1486    [_,Ctx|_] = Args,
1487    CtxUnit = map_get(Ctx, Units),
1488    OpUnit = bsm_op_unit(Args),
1489    {Block, Units#{ New => gcd(OpUnit, CtxUnit) }};
1490bsm_units_skip_1([_I | Is], Block, Units) ->
1491    bsm_units_skip_1(Is, Block, Units);
1492bsm_units_skip_1([], Block, Units) ->
1493    {Block, Units}.
1494
1495bsm_op_unit([_,_,_,Size,#b_literal{val=U}]) ->
1496    case Size of
1497        #b_literal{val=Sz} when is_integer(Sz) -> Sz*U;
1498        _ -> U
1499    end;
1500bsm_op_unit([#b_literal{val=string},_,#b_literal{val=String}]) ->
1501    bit_size(String);
1502bsm_op_unit([#b_literal{val=utf8}|_]) ->
1503    8;
1504bsm_op_unit([#b_literal{val=utf16}|_]) ->
1505    16;
1506bsm_op_unit([#b_literal{val=utf32}|_]) ->
1507    32;
1508bsm_op_unit(_) ->
1509    1.
1510
1511%% Several paths can lead to the same match instruction and the inferred units
1512%% may differ between them, so we can only keep the information that is common
1513%% to all paths.
1514bsm_units_join(Lbl, MapA, UnitMaps0) when is_map_key(Lbl, UnitMaps0) ->
1515    MapB = map_get(Lbl, UnitMaps0),
1516    Merged = if
1517                 map_size(MapB) =< map_size(MapA) ->
1518                     bsm_units_join_1(maps:keys(MapB), MapA, MapB);
1519                 map_size(MapB) > map_size(MapA) ->
1520                     bsm_units_join_1(maps:keys(MapA), MapB, MapA)
1521             end,
1522    UnitMaps0#{Lbl := Merged};
1523bsm_units_join(Lbl, MapA, UnitMaps0) when MapA =/= #{} ->
1524    UnitMaps0#{Lbl => MapA};
1525bsm_units_join(_Lbl, _MapA, UnitMaps0) ->
1526    UnitMaps0.
1527
1528bsm_units_join_1([Key | Keys], Left, Right) when is_map_key(Key, Left) ->
1529    UnitA = map_get(Key, Left),
1530    UnitB = map_get(Key, Right),
1531    bsm_units_join_1(Keys, Left, Right#{Key := gcd(UnitA, UnitB)});
1532bsm_units_join_1([Key | Keys], Left, Right) ->
1533    bsm_units_join_1(Keys, Left, maps:remove(Key, Right));
1534bsm_units_join_1([], _MapA, Right) ->
1535    Right.
1536
1537%%%
1538%%% Optimize binary construction.
1539%%%
1540%%% If an integer segment or a float segment has a literal size and
1541%%% a literal value, convert to a binary segment. Coalesce adjacent
1542%%% literal binary segments. Literal binary segments will be converted
1543%%% to bs_put_string instructions in later pass.
1544%%%
1545
1546ssa_opt_bs_puts({#st{ssa=Linear0,cnt=Count0}=St, FuncDb}) ->
1547    {Linear,Count} = opt_bs_puts(Linear0, Count0, []),
1548    {St#st{ssa=Linear,cnt=Count}, FuncDb}.
1549
1550opt_bs_puts([{L,#b_blk{is=Is}=Blk0}|Bs], Count0, Acc0) ->
1551    case Is of
1552        [#b_set{op=bs_put}=I0] ->
1553            case opt_bs_put(L, I0, Blk0, Count0, Acc0) of
1554                not_possible ->
1555                    opt_bs_puts(Bs, Count0, [{L,Blk0}|Acc0]);
1556                {Count,Acc1} ->
1557                    Acc = opt_bs_puts_merge(Acc1),
1558                    opt_bs_puts(Bs, Count, Acc)
1559            end;
1560        _ ->
1561            opt_bs_puts(Bs, Count0, [{L,Blk0}|Acc0])
1562    end;
1563opt_bs_puts([], Count, Acc) ->
1564    {reverse(Acc),Count}.
1565
1566opt_bs_puts_merge([{L1,#b_blk{is=Is}=Blk0},{L2,#b_blk{is=AccIs}}=BAcc|Acc]) ->
1567    case {AccIs,Is} of
1568        {[#b_set{op=bs_put,
1569                 args=[#b_literal{val=binary},
1570                       #b_literal{},
1571                       #b_literal{val=Bin0},
1572                       #b_literal{val=all},
1573                       #b_literal{val=1}]}],
1574         [#b_set{op=bs_put,
1575                 args=[#b_literal{val=binary},
1576                       #b_literal{},
1577                       #b_literal{val=Bin1},
1578                       #b_literal{val=all},
1579                       #b_literal{val=1}]}=I0]} ->
1580            %% Coalesce the two segments to one.
1581            Bin = <<Bin0/bitstring,Bin1/bitstring>>,
1582            I = I0#b_set{args=bs_put_args(binary, Bin, all)},
1583            Blk = Blk0#b_blk{is=[I]},
1584            [{L2,Blk}|Acc];
1585        {_,_} ->
1586            [{L1,Blk0},BAcc|Acc]
1587    end.
1588
1589opt_bs_put(L, I0, #b_blk{last=Br0}=Blk0, Count0, Acc) ->
1590    case opt_bs_put(I0) of
1591        [Bin] when is_bitstring(Bin) ->
1592            Args = bs_put_args(binary, Bin, all),
1593            I = I0#b_set{args=Args},
1594            Blk = Blk0#b_blk{is=[I]},
1595            {Count0,[{L,Blk}|Acc]};
1596        [{int,Int,Size},Bin] when is_bitstring(Bin) ->
1597            %% Construct a bs_put_integer instruction following
1598            %% by a bs_put_binary instruction.
1599            IntArgs = bs_put_args(integer, Int, Size),
1600            BinArgs = bs_put_args(binary, Bin, all),
1601            {BinL,BinVarNum} = {Count0,Count0+1},
1602            Count = Count0 + 2,
1603            BinVar = #b_var{name={'@ssa_bool',BinVarNum}},
1604            BinI = I0#b_set{dst=BinVar,args=BinArgs},
1605            BinBlk = Blk0#b_blk{is=[BinI],last=Br0#b_br{bool=BinVar}},
1606            IntI = I0#b_set{args=IntArgs},
1607            IntBlk = Blk0#b_blk{is=[IntI],last=Br0#b_br{succ=BinL}},
1608            {Count,[{BinL,BinBlk},{L,IntBlk}|Acc]};
1609        not_possible ->
1610            not_possible
1611    end.
1612
1613opt_bs_put(#b_set{args=[#b_literal{val=binary},_,#b_literal{val=Val},
1614                        #b_literal{val=all},#b_literal{val=Unit}]})
1615  when is_bitstring(Val) ->
1616    if
1617        bit_size(Val) rem Unit =:= 0 ->
1618            [Val];
1619        true ->
1620            not_possible
1621    end;
1622opt_bs_put(#b_set{args=[#b_literal{val=Type},#b_literal{val=Flags},
1623                        #b_literal{val=Val},#b_literal{val=Size},
1624                        #b_literal{val=Unit}]}=I0) when is_integer(Size) ->
1625    EffectiveSize = Size * Unit,
1626    if
1627        EffectiveSize > 0 ->
1628            case {Type,opt_bs_put_endian(Flags)} of
1629                {integer,big} when is_integer(Val) ->
1630                    if
1631                        EffectiveSize < 64 ->
1632                            [<<Val:EffectiveSize>>];
1633                        true ->
1634                            opt_bs_put_split_int(Val, EffectiveSize)
1635                    end;
1636                {integer,little} when is_integer(Val), EffectiveSize < 128 ->
1637                    %% To avoid an explosion in code size, we only try
1638                    %% to optimize relatively small fields.
1639                    <<Int:EffectiveSize>> = <<Val:EffectiveSize/little>>,
1640                    Args = bs_put_args(Type, Int, EffectiveSize),
1641                    I = I0#b_set{args=Args},
1642                    opt_bs_put(I);
1643                {binary,_} when is_bitstring(Val) ->
1644                    <<Bitstring:EffectiveSize/bits,_/bits>> = Val,
1645                    [Bitstring];
1646                {float,Endian} ->
1647                    try
1648                        [opt_bs_put_float(Val, EffectiveSize, Endian)]
1649                    catch error:_ ->
1650                            not_possible
1651                    end;
1652                {_,_} ->
1653                    not_possible
1654            end;
1655        true ->
1656            not_possible
1657    end;
1658opt_bs_put(#b_set{}) -> not_possible.
1659
1660opt_bs_put_float(N, Sz, Endian) ->
1661    case Endian of
1662        big -> <<N:Sz/big-float-unit:1>>;
1663        little -> <<N:Sz/little-float-unit:1>>
1664    end.
1665
1666bs_put_args(Type, Val, Size) ->
1667    [#b_literal{val=Type},
1668     #b_literal{val=[unsigned,big]},
1669     #b_literal{val=Val},
1670     #b_literal{val=Size},
1671     #b_literal{val=1}].
1672
1673opt_bs_put_endian([big=E|_]) -> E;
1674opt_bs_put_endian([little=E|_]) -> E;
1675opt_bs_put_endian([native=E|_]) -> E;
1676opt_bs_put_endian([_|Fs]) -> opt_bs_put_endian(Fs).
1677
1678opt_bs_put_split_int(Int, Size) ->
1679    Pos = opt_bs_put_split_int_1(Int, 0, Size - 1),
1680    UpperSize = Size - Pos,
1681    if
1682        Pos =:= 0 ->
1683            %% Value is 0 or -1 -- keep the original instruction.
1684            not_possible;
1685        UpperSize < 64 ->
1686            %% No or few leading zeroes or ones.
1687            [<<Int:Size>>];
1688        true ->
1689            %% There are 64 or more leading ones or zeroes in
1690            %% the resulting binary. Split into two separate
1691            %% segments to avoid an explosion in code size.
1692            [{int,Int bsr Pos,UpperSize},<<Int:Pos>>]
1693    end.
1694
1695opt_bs_put_split_int_1(_Int, L, R) when L > R ->
1696    8 * ((L + 7) div 8);
1697opt_bs_put_split_int_1(Int, L, R) ->
1698    Mid = (L + R) div 2,
1699    case Int bsr Mid of
1700        Upper when Upper =:= 0; Upper =:= -1 ->
1701            opt_bs_put_split_int_1(Int, L, Mid - 1);
1702        _ ->
1703            opt_bs_put_split_int_1(Int, Mid + 1, R)
1704    end.
1705
1706%%%
1707%%% Optimize expressions such as "tuple_size(Var) =:= 2".
1708%%%
1709%%% Consider this code:
1710%%%
1711%%% 0:
1712%%%   .
1713%%%   .
1714%%%   .
1715%%%   Size = bif:tuple_size Var
1716%%%   BoolVar1 = succeeded Size
1717%%%   br BoolVar1, label 4, label 3
1718%%%
1719%%% 4:
1720%%%   BoolVar2 = bif:'=:=' Size, literal 2
1721%%%   br BoolVar2, label 6, label 3
1722%%%
1723%%% 6: ...   %% OK
1724%%%
1725%%% 3: ...   %% Not a tuple of size 2
1726%%%
1727%%% The BEAM code will look this:
1728%%%
1729%%%   {bif,tuple_size,{f,3},[{x,0}],{x,0}}.
1730%%%   {test,is_eq_exact,{f,3},[{x,0},{integer,2}]}.
1731%%%
1732%%% Better BEAM code will be produced if we transform the
1733%%% code like this:
1734%%%
1735%%% 0:
1736%%%   .
1737%%%   .
1738%%%   .
1739%%%   br label 10
1740%%%
1741%%% 10:
1742%%%   NewBoolVar = bif:is_tuple Var
1743%%%   br NewBoolVar, label 11, label 3
1744%%%
1745%%% 11:
1746%%%   Size = bif:tuple_size Var
1747%%%   br label 4
1748%%%
1749%%% 4:
1750%%%   BoolVar2 = bif:'=:=' Size, literal 2
1751%%%   br BoolVar2, label 6, label 3
1752%%%
1753%%% (The key part of the transformation is the removal of
1754%%% the 'succeeded' instruction to signal to the code generator
1755%%% that the call to tuple_size/1 can't fail.)
1756%%%
1757%%% The BEAM code will look like:
1758%%%
1759%%%   {test,is_tuple,{f,3},[{x,0}]}.
1760%%%   {test_arity,{f,3},[{x,0},2]}.
1761%%%
1762%%% Those two instructions will be combined into a single
1763%%% is_tuple_of_arity instruction by the loader.
1764%%%
1765
1766ssa_opt_tuple_size({#st{ssa=Linear0,cnt=Count0}=St, FuncDb}) ->
1767    %% This optimization is only safe in guards, as prefixing tuple_size with
1768    %% an is_tuple check prevents it from throwing an exception.
1769    NonGuards = non_guards(Linear0),
1770    {Linear,Count} = opt_tup_size(Linear0, NonGuards, Count0, []),
1771    {St#st{ssa=Linear,cnt=Count}, FuncDb}.
1772
1773opt_tup_size([{L,#b_blk{is=Is,last=Last}=Blk}|Bs], NonGuards, Count0, Acc0) ->
1774    case {Is,Last} of
1775        {[#b_set{op={bif,'=:='},dst=Bool,args=[#b_var{}=Tup,#b_literal{val=Arity}]}],
1776         #b_br{bool=Bool}} when is_integer(Arity), Arity >= 0 ->
1777            {Acc,Count} = opt_tup_size_1(Tup, L, NonGuards, Count0, Acc0),
1778            opt_tup_size(Bs, NonGuards, Count, [{L,Blk}|Acc]);
1779        {_,_} ->
1780            opt_tup_size(Bs, NonGuards, Count0, [{L,Blk}|Acc0])
1781    end;
1782opt_tup_size([], _NonGuards, Count, Acc) ->
1783    {reverse(Acc),Count}.
1784
1785opt_tup_size_1(Size, EqL, NonGuards, Count0, [{L,Blk0}|Acc]) ->
1786    #b_blk{is=Is0,last=Last} = Blk0,
1787    case Last of
1788        #b_br{bool=Bool,succ=EqL,fail=Fail} ->
1789            case gb_sets:is_member(Fail, NonGuards) of
1790                true ->
1791                    {[{L,Blk0}|Acc],Count0};
1792                false ->
1793                    case opt_tup_size_is(Is0, Bool, Size, []) of
1794                        none ->
1795                            {[{L,Blk0}|Acc],Count0};
1796                        {PreIs,TupleSizeIs,Tuple} ->
1797                            opt_tup_size_2(PreIs, TupleSizeIs, L, EqL,
1798                                           Tuple, Fail, Count0, Acc)
1799                    end
1800            end;
1801        _ ->
1802            {[{L,Blk0}|Acc],Count0}
1803    end;
1804opt_tup_size_1(_, _, _, Count, Acc) ->
1805    {Acc,Count}.
1806
1807opt_tup_size_2(PreIs, TupleSizeIs, PreL, EqL, Tuple, Fail, Count0, Acc) ->
1808    IsTupleL = Count0,
1809    TupleSizeL = Count0 + 1,
1810    Bool = #b_var{name={'@ssa_bool',Count0+2}},
1811    Count = Count0 + 3,
1812
1813    True = #b_literal{val=true},
1814    PreBr = #b_br{bool=True,succ=IsTupleL,fail=IsTupleL},
1815    PreBlk = #b_blk{is=PreIs,last=PreBr},
1816
1817    IsTupleIs = [#b_set{op={bif,is_tuple},dst=Bool,args=[Tuple]}],
1818    IsTupleBr = #b_br{bool=Bool,succ=TupleSizeL,fail=Fail},
1819    IsTupleBlk = #b_blk{is=IsTupleIs,last=IsTupleBr},
1820
1821    TupleSizeBr = #b_br{bool=True,succ=EqL,fail=EqL},
1822    TupleSizeBlk = #b_blk{is=TupleSizeIs,last=TupleSizeBr},
1823    {[{TupleSizeL,TupleSizeBlk},
1824      {IsTupleL,IsTupleBlk},
1825      {PreL,PreBlk}|Acc],Count}.
1826
1827opt_tup_size_is([#b_set{op={bif,tuple_size},dst=Size,args=[Tuple]}=I,
1828                 #b_set{op=succeeded,dst=Bool,args=[Size]}],
1829                Bool, Size, Acc) ->
1830    {reverse(Acc),[I],Tuple};
1831opt_tup_size_is([I|Is], Bool, Size, Acc) ->
1832    opt_tup_size_is(Is, Bool, Size, [I|Acc]);
1833opt_tup_size_is([], _, _, _Acc) -> none.
1834
1835%%%
1836%%% Optimize #b_switch{} instructions.
1837%%%
1838%%% If the argument for a #b_switch{} comes from a phi node with all
1839%%% literals, any values in the switch list which are not in the phi
1840%%% node can be removed.
1841%%%
1842%%% If the values in the phi node and switch list are the same,
1843%%% the failure label can't be reached and be eliminated.
1844%%%
1845%%% A #b_switch{} with only one value can be rewritten to
1846%%% a #b_br{}. A switch that only verifies that the argument
1847%%% is 'true' or 'false' can be rewritten to a is_boolean test.
1848%%%
1849
1850ssa_opt_sw({#st{ssa=Linear0,cnt=Count0}=St, FuncDb}) ->
1851    {Linear,Count} = opt_sw(Linear0, Count0, []),
1852    {St#st{ssa=Linear,cnt=Count}, FuncDb}.
1853
1854opt_sw([{L,#b_blk{is=Is,last=#b_switch{}=Sw0}=Blk0}|Bs], Count0, Acc) ->
1855    %% Ensure that no label in the switch list is the same
1856    %% as the failure label.
1857    #b_switch{fail=Fail,list=List0} = Sw0,
1858    List = [{Val,Lbl} || {Val,Lbl} <- List0, Lbl =/= Fail],
1859    Sw1 = beam_ssa:normalize(Sw0#b_switch{list=List}),
1860    case Sw1 of
1861        #b_switch{arg=Arg,fail=Fail,list=[{Lit,Lbl}]} ->
1862            %% Rewrite a single value switch to a br.
1863            Bool = #b_var{name={'@ssa_bool',Count0}},
1864            Count = Count0 + 1,
1865            IsEq = #b_set{op={bif,'=:='},dst=Bool,args=[Arg,Lit]},
1866            Br = #b_br{bool=Bool,succ=Lbl,fail=Fail},
1867            Blk = Blk0#b_blk{is=Is++[IsEq],last=Br},
1868            opt_sw(Bs, Count, [{L,Blk}|Acc]);
1869        #b_switch{arg=Arg,fail=Fail,
1870                  list=[{#b_literal{val=B1},Lbl},{#b_literal{val=B2},Lbl}]}
1871          when B1 =:= not B2 ->
1872            %% Replace with is_boolean test.
1873            Bool = #b_var{name={'@ssa_bool',Count0}},
1874            Count = Count0 + 1,
1875            IsBool = #b_set{op={bif,is_boolean},dst=Bool,args=[Arg]},
1876            Br = #b_br{bool=Bool,succ=Lbl,fail=Fail},
1877            Blk = Blk0#b_blk{is=Is++[IsBool],last=Br},
1878            opt_sw(Bs, Count, [{L,Blk}|Acc]);
1879        Sw0 ->
1880            opt_sw(Bs, Count0, [{L,Blk0}|Acc]);
1881        Sw ->
1882            Blk = Blk0#b_blk{last=Sw},
1883            opt_sw(Bs, Count0, [{L,Blk}|Acc])
1884    end;
1885opt_sw([{L,#b_blk{}=Blk}|Bs], Count, Acc) ->
1886    opt_sw(Bs, Count, [{L,Blk}|Acc]);
1887opt_sw([], Count, Acc) ->
1888    {reverse(Acc),Count}.
1889
1890%%%
1891%%% Merge blocks.
1892%%%
1893
1894ssa_opt_merge_blocks({#st{ssa=Blocks}=St, FuncDb}) ->
1895    Preds = beam_ssa:predecessors(Blocks),
1896    Merged = merge_blocks_1(beam_ssa:rpo(Blocks), Preds, Blocks),
1897    {St#st{ssa=Merged}, FuncDb}.
1898
1899merge_blocks_1([L|Ls], Preds0, Blocks0) ->
1900    case Preds0 of
1901        #{L:=[P]} ->
1902            #{P:=Blk0,L:=Blk1} = Blocks0,
1903            case is_merge_allowed(L, Blk0, Blk1) of
1904                true ->
1905                    #b_blk{is=Is0} = Blk0,
1906                    #b_blk{is=Is1} = Blk1,
1907                    verify_merge_is(Is1),
1908                    Is = Is0 ++ Is1,
1909                    Blk = Blk1#b_blk{is=Is},
1910                    Blocks1 = maps:remove(L, Blocks0),
1911                    Blocks2 = Blocks1#{P:=Blk},
1912                    Successors = beam_ssa:successors(Blk),
1913                    Blocks = beam_ssa:update_phi_labels(Successors, L, P, Blocks2),
1914                    Preds = merge_update_preds(Successors, L, P, Preds0),
1915                    merge_blocks_1(Ls, Preds, Blocks);
1916                false ->
1917                    merge_blocks_1(Ls, Preds0, Blocks0)
1918            end;
1919        #{} ->
1920            merge_blocks_1(Ls, Preds0, Blocks0)
1921    end;
1922merge_blocks_1([], _Preds, Blocks) -> Blocks.
1923
1924merge_update_preds([L|Ls], From, To, Preds0) ->
1925    Ps = [rename_label(P, From, To) || P <- map_get(L, Preds0)],
1926    Preds = Preds0#{L:=Ps},
1927    merge_update_preds(Ls, From, To, Preds);
1928merge_update_preds([], _, _, Preds) -> Preds.
1929
1930rename_label(From, From, To) -> To;
1931rename_label(Lbl, _, _) -> Lbl.
1932
1933verify_merge_is([#b_set{op=Op}|_]) ->
1934    %% The merged block has only one predecessor, so it should not have any phi
1935    %% nodes.
1936    true = Op =/= phi;                          %Assertion.
1937verify_merge_is(_) ->
1938    ok.
1939
1940is_merge_allowed(_, #b_blk{}, #b_blk{is=[#b_set{op=peek_message}|_]}) ->
1941    false;
1942is_merge_allowed(L, #b_blk{last=#b_br{}}=Blk, #b_blk{is=Is}) ->
1943    %% The predecessor block must have exactly one successor (L) for
1944    %% the merge to be safe.
1945    case beam_ssa:successors(Blk) of
1946        [L] ->
1947            case Is of
1948                [#b_set{op=phi,args=[_]}|_] ->
1949                    %% The type optimizer pass must have been
1950                    %% turned off, since it would have removed this
1951                    %% redundant phi node. Refuse to merge the blocks
1952                    %% to ensure that this phi node remains at the
1953                    %% beginning of a block.
1954                    false;
1955                _ ->
1956                    true
1957            end;
1958        [_|_] ->
1959            false
1960    end;
1961is_merge_allowed(_, #b_blk{last=#b_switch{}}, #b_blk{}) ->
1962    false.
1963
1964%%%
1965%%% When a tuple is matched, the pattern matching compiler generates a
1966%%% get_tuple_element instruction for every tuple element that will
1967%%% ever be used in the rest of the function. That often forces the
1968%%% extracted tuple elements to be stored in Y registers until it's
1969%%% time to use them. It could also mean that there could be execution
1970%%% paths that will never use the extracted elements.
1971%%%
1972%%% This optimization will sink get_tuple_element instructions, that
1973%%% is, move them forward in the execution stream to the last possible
1974%%% block there they will still dominate all uses. That may reduce the
1975%%% size of stack frames, reduce register shuffling, and avoid
1976%%% extracting tuple elements on execution paths that never use the
1977%%% extracted values.
1978%%%
1979
1980ssa_opt_sink({#st{ssa=Blocks0}=St, FuncDb}) ->
1981    Linear = beam_ssa:linearize(Blocks0),
1982
1983    %% Create a map with all variables that define get_tuple_element
1984    %% instructions. The variable name map to the block it is defined in.
1985    case def_blocks(Linear) of
1986        [] ->
1987            %% No get_tuple_element instructions, so there is nothing to do.
1988            {St, FuncDb};
1989        [_|_]=Defs0 ->
1990            Defs = maps:from_list(Defs0),
1991            {do_ssa_opt_sink(Linear, Defs, St), FuncDb}
1992    end.
1993
1994do_ssa_opt_sink(Linear, Defs, #st{ssa=Blocks0}=St) ->
1995    %% Now find all the blocks that use variables defined by get_tuple_element
1996    %% instructions.
1997    Used = used_blocks(Linear, Defs, []),
1998
1999    %% Calculate dominators.
2000    {Dom,Numbering} = beam_ssa:dominators(Blocks0),
2001
2002    %% It is not safe to move get_tuple_element instructions to blocks
2003    %% that begin with certain instructions. It is also unsafe to move
2004    %% the instructions into any part of a receive. To avoid such
2005    %% unsafe moves, pretend that the unsuitable blocks are not
2006    %% dominators.
2007    Unsuitable = unsuitable(Linear, Blocks0),
2008
2009    %% Calculate new positions for get_tuple_element instructions. The new
2010    %% position is a block that dominates all uses of the variable.
2011    DefLoc = new_def_locations(Used, Defs, Dom, Numbering, Unsuitable),
2012
2013    %% Now move all suitable get_tuple_element instructions to their
2014    %% new blocks.
2015    Blocks = foldl(fun({V,To}, A) ->
2016                           From = map_get(V, Defs),
2017                           move_defs(V, From, To, A)
2018                   end, Blocks0, DefLoc),
2019    St#st{ssa=Blocks}.
2020
2021def_blocks([{L,#b_blk{is=Is}}|Bs]) ->
2022    def_blocks_is(Is, L, def_blocks(Bs));
2023def_blocks([]) -> [].
2024
2025def_blocks_is([#b_set{op=get_tuple_element,dst=Dst}|Is], L, Acc) ->
2026    def_blocks_is(Is, L, [{Dst,L}|Acc]);
2027def_blocks_is([_|Is], L, Acc) ->
2028    def_blocks_is(Is, L, Acc);
2029def_blocks_is([], _, Acc) -> Acc.
2030
2031used_blocks([{L,Blk}|Bs], Def, Acc0) ->
2032    Used = beam_ssa:used(Blk),
2033    Acc = [{V,L} || V <- Used, maps:is_key(V, Def)] ++ Acc0,
2034    used_blocks(Bs, Def, Acc);
2035used_blocks([], _Def, Acc) ->
2036    rel2fam(Acc).
2037
2038%% unsuitable(Linear, Blocks) -> Unsuitable.
2039%%  Return an ordset of block labels for the blocks that are not
2040%%  suitable for sinking of get_tuple_element instructions.
2041
2042unsuitable(Linear, Blocks) ->
2043    Predecessors = beam_ssa:predecessors(Blocks),
2044    Unsuitable0 = unsuitable_1(Linear),
2045    Unsuitable1 = unsuitable_recv(Linear, Blocks, Predecessors),
2046    gb_sets:from_list(Unsuitable0 ++ Unsuitable1).
2047
2048unsuitable_1([{L,#b_blk{is=[#b_set{op=Op}|_]}}|Bs]) ->
2049    Unsuitable = case Op of
2050                     bs_extract -> true;
2051                     bs_put -> true;
2052                     {float,_} -> true;
2053                     landingpad -> true;
2054                     peek_message -> true;
2055                     wait_timeout -> true;
2056                     _ -> false
2057                 end,
2058    case Unsuitable of
2059        true ->
2060            [L|unsuitable_1(Bs)];
2061        false ->
2062            unsuitable_1(Bs)
2063    end;
2064unsuitable_1([{_,#b_blk{}}|Bs]) ->
2065    unsuitable_1(Bs);
2066unsuitable_1([]) -> [].
2067
2068unsuitable_recv([{L,#b_blk{is=[#b_set{op=Op}|_]}}|Bs], Blocks, Predecessors) ->
2069    Ls = case Op of
2070             remove_message ->
2071                 unsuitable_loop(L, Blocks, Predecessors);
2072             recv_next ->
2073                 unsuitable_loop(L, Blocks, Predecessors);
2074             _ ->
2075                 []
2076         end,
2077    Ls ++ unsuitable_recv(Bs, Blocks, Predecessors);
2078unsuitable_recv([_|Bs], Blocks, Predecessors) ->
2079    unsuitable_recv(Bs, Blocks, Predecessors);
2080unsuitable_recv([], _, _) -> [].
2081
2082unsuitable_loop(L, Blocks, Predecessors) ->
2083    unsuitable_loop(L, Blocks, Predecessors, []).
2084
2085unsuitable_loop(L, Blocks, Predecessors, Acc) ->
2086    Ps = map_get(L, Predecessors),
2087    unsuitable_loop_1(Ps, Blocks, Predecessors, Acc).
2088
2089unsuitable_loop_1([P|Ps], Blocks, Predecessors, Acc0) ->
2090    case map_get(P, Blocks) of
2091        #b_blk{is=[#b_set{op=peek_message}|_]} ->
2092            unsuitable_loop_1(Ps, Blocks, Predecessors, Acc0);
2093        #b_blk{} ->
2094            case ordsets:is_element(P, Acc0) of
2095                false ->
2096                    Acc1 = ordsets:add_element(P, Acc0),
2097                    Acc = unsuitable_loop(P, Blocks, Predecessors, Acc1),
2098                    unsuitable_loop_1(Ps, Blocks, Predecessors, Acc);
2099                true ->
2100                    unsuitable_loop_1(Ps, Blocks, Predecessors, Acc0)
2101            end
2102    end;
2103unsuitable_loop_1([], _, _, Acc) -> Acc.
2104
2105%% new_def_locations([{Variable,[UsedInBlock]}|Vs], Defs,
2106%%                   Dominators, Numbering, Unsuitable) ->
2107%%  [{Variable,NewDefinitionBlock}]
2108%%
2109%%  Calculate new locations for get_tuple_element instructions. For
2110%%  each variable, the new location is a block that dominates all uses
2111%%  of the variable and as near to the uses of as possible.
2112
2113new_def_locations([{V,UsedIn}|Vs], Defs, Dom, Numbering, Unsuitable) ->
2114    DefIn = map_get(V, Defs),
2115    Common = common_dominator(UsedIn, Dom, Numbering, Unsuitable),
2116    case member(Common, map_get(DefIn, Dom)) of
2117        true ->
2118            %% The common dominator is either DefIn or an
2119            %% ancestor of DefIn.
2120            new_def_locations(Vs, Defs, Dom, Numbering, Unsuitable);
2121        false ->
2122            %% We have found a suitable descendant of DefIn,
2123            %% to which the get_tuple_element instruction can
2124            %% be sunk.
2125            [{V,Common}|new_def_locations(Vs, Defs, Dom, Numbering, Unsuitable)]
2126    end;
2127new_def_locations([], _, _, _, _) -> [].
2128
2129common_dominator(Ls0, Dom, Numbering, Unsuitable) ->
2130    [Common|_] = beam_ssa:common_dominators(Ls0, Dom, Numbering),
2131    case gb_sets:is_member(Common, Unsuitable) of
2132        true ->
2133            %% It is not allowed to place the instruction here. Try
2134            %% to find another suitable dominating block by going up
2135            %% one step in the dominator tree.
2136            [Common,OneUp|_] = map_get(Common, Dom),
2137            common_dominator([OneUp], Dom, Numbering, Unsuitable);
2138        false ->
2139            Common
2140    end.
2141
2142%% Move get_tuple_element instructions to their new locations.
2143
2144move_defs(V, From, To, Blocks) ->
2145    #{From:=FromBlk0,To:=ToBlk0} = Blocks,
2146    {Def,FromBlk} = remove_def(V, FromBlk0),
2147    try insert_def(V, Def, ToBlk0) of
2148        ToBlk ->
2149            %%io:format("~p: ~p => ~p\n", [V,From,To]),
2150            Blocks#{From:=FromBlk,To:=ToBlk}
2151    catch
2152        throw:not_possible ->
2153            Blocks
2154    end.
2155
2156remove_def(V, #b_blk{is=Is0}=Blk) ->
2157    {Def,Is} = remove_def_is(Is0, V, []),
2158    {Def,Blk#b_blk{is=Is}}.
2159
2160remove_def_is([#b_set{dst=Dst}=Def|Is], Dst, Acc) ->
2161    {Def,reverse(Acc, Is)};
2162remove_def_is([I|Is], Dst, Acc) ->
2163    remove_def_is(Is, Dst, [I|Acc]).
2164
2165insert_def(V, Def, #b_blk{is=Is0}=Blk) ->
2166    Is = insert_def_is(Is0, V, Def),
2167    Blk#b_blk{is=Is}.
2168
2169insert_def_is([#b_set{op=phi}=I|Is], V, Def) ->
2170    case member(V, beam_ssa:used(I)) of
2171        true ->
2172            throw(not_possible);
2173        false ->
2174            [I|insert_def_is(Is, V, Def)]
2175    end;
2176insert_def_is([#b_set{op=Op}=I|Is]=Is0, V, Def) ->
2177    Action0 = case Op of
2178                  call -> beyond;
2179                  'catch_end' -> beyond;
2180                  timeout -> beyond;
2181                  _ -> here
2182              end,
2183    Action = case Is of
2184                 [#b_set{op=succeeded}|_] -> here;
2185                 _ -> Action0
2186             end,
2187    case Action of
2188        beyond ->
2189            case member(V, beam_ssa:used(I)) of
2190                true ->
2191                    %% The variable is used by this instruction. We must
2192                    %% place the definition before this instruction.
2193                    [Def|Is0];
2194                false ->
2195                    %% Place it beyond the current instruction.
2196                    [I|insert_def_is(Is, V, Def)]
2197            end;
2198        here ->
2199            [Def|Is0]
2200    end;
2201insert_def_is([], _V, Def) ->
2202    [Def].
2203
2204%%%
2205%%% Order consecutive get_tuple_element instructions in ascending
2206%%% position order. This will give the loader more opportunities
2207%%% for combining get_tuple_element instructions.
2208%%%
2209
2210ssa_opt_get_tuple_element({#st{ssa=Blocks0}=St, FuncDb}) ->
2211    Blocks = opt_get_tuple_element(maps:to_list(Blocks0), Blocks0),
2212    {St#st{ssa=Blocks}, FuncDb}.
2213
2214opt_get_tuple_element([{L,#b_blk{is=Is0}=Blk0}|Bs], Blocks) ->
2215    case opt_get_tuple_element_is(Is0, false, []) of
2216        {yes,Is} ->
2217            Blk = Blk0#b_blk{is=Is},
2218            opt_get_tuple_element(Bs, Blocks#{L:=Blk});
2219        no ->
2220            opt_get_tuple_element(Bs, Blocks)
2221    end;
2222opt_get_tuple_element([], Blocks) -> Blocks.
2223
2224opt_get_tuple_element_is([#b_set{op=get_tuple_element,
2225                                 args=[#b_var{}=Src,_]}=I0|Is0],
2226                         _AnyChange, Acc) ->
2227    {GetIs0,Is} = collect_get_tuple_element(Is0, Src, [I0]),
2228    GetIs1 = sort([{Pos,I} || #b_set{args=[_,Pos]}=I <- GetIs0]),
2229    GetIs = [I || {_,I} <- GetIs1],
2230    opt_get_tuple_element_is(Is, true, reverse(GetIs, Acc));
2231opt_get_tuple_element_is([I|Is], AnyChange, Acc) ->
2232    opt_get_tuple_element_is(Is, AnyChange, [I|Acc]);
2233opt_get_tuple_element_is([], AnyChange, Acc) ->
2234    case AnyChange of
2235        true -> {yes,reverse(Acc)};
2236        false -> no
2237    end.
2238
2239collect_get_tuple_element([#b_set{op=get_tuple_element,
2240                                  args=[Src,_]}=I|Is], Src, Acc) ->
2241    collect_get_tuple_element(Is, Src, [I|Acc]);
2242collect_get_tuple_element(Is, _Src, Acc) ->
2243    {Acc,Is}.
2244
2245%%%
2246%%% Common utilities.
2247%%%
2248
2249gcd(A, B) ->
2250    case A rem B of
2251        0 -> B;
2252        X -> gcd(B, X)
2253    end.
2254
2255non_guards(Linear) ->
2256    gb_sets:from_list(non_guards_1(Linear)).
2257
2258non_guards_1([{L,#b_blk{is=Is}}|Bs]) ->
2259    case Is of
2260        [#b_set{op=landingpad}|_] ->
2261            [L | non_guards_1(Bs)];
2262        _ ->
2263            non_guards_1(Bs)
2264    end;
2265non_guards_1([]) ->
2266    [?BADARG_BLOCK].
2267
2268rel2fam(S0) ->
2269    S1 = sofs:relation(S0),
2270    S = sofs:rel2fam(S1),
2271    sofs:to_external(S).
2272
2273sub(I, Sub) ->
2274    beam_ssa:normalize(sub_1(I, Sub)).
2275
2276sub_1(#b_set{op=phi,args=Args}=I, Sub) ->
2277    I#b_set{args=[{sub_arg(A, Sub),P} || {A,P} <- Args]};
2278sub_1(#b_set{args=Args}=I, Sub) ->
2279    I#b_set{args=[sub_arg(A, Sub) || A <- Args]};
2280sub_1(#b_br{bool=#b_var{}=Old}=Br, Sub) ->
2281    New = sub_arg(Old, Sub),
2282    Br#b_br{bool=New};
2283sub_1(#b_switch{arg=#b_var{}=Old}=Sw, Sub) ->
2284    New = sub_arg(Old, Sub),
2285    Sw#b_switch{arg=New};
2286sub_1(#b_ret{arg=#b_var{}=Old}=Ret, Sub) ->
2287    New = sub_arg(Old, Sub),
2288    Ret#b_ret{arg=New};
2289sub_1(Last, _) -> Last.
2290
2291sub_arg(#b_remote{mod=Mod,name=Name}=Rem, Sub) ->
2292    Rem#b_remote{mod=sub_arg(Mod, Sub),name=sub_arg(Name, Sub)};
2293sub_arg(Old, Sub) ->
2294    case Sub of
2295        #{Old:=New} -> New;
2296        #{} -> Old
2297    end.
2298
2299new_var(#b_var{name={Base,N}}, Count) ->
2300    true = is_integer(N),                       %Assertion.
2301    {#b_var{name={Base,Count}},Count+1};
2302new_var(#b_var{name=Base}, Count) ->
2303    {#b_var{name={Base,Count}},Count+1}.
2304