1%%
2%% %CopyrightBegin%
3%%
4%% Copyright Ericsson AB 2018-2020. All Rights Reserved.
5%%
6%% Licensed under the Apache License, Version 2.0 (the "License");
7%% you may not use this file except in compliance with the License.
8%% You may obtain a copy of the License at
9%%
10%%     http://www.apache.org/licenses/LICENSE-2.0
11%%
12%% Unless required by applicable law or agreed to in writing, software
13%% distributed under the License is distributed on an "AS IS" BASIS,
14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15%% See the License for the specific language governing permissions and
16%% limitations under the License.
17%%
18%% %CopyrightEnd%
19%%
20
21%%%
22%%% This is a collection of various optimizations that don't need a separate
23%%% pass by themselves and/or are mutually beneficial to other passes.
24%%%
25%%% The optimizations are applied in "phases," each with a list of sub-passes
26%%% to run. These sub-passes are applied on all functions in a module before
27%%% moving on to the next phase, which lets us gather module-level information
28%%% in one phase and then apply it in the next without having to risk working
29%%% with incomplete information.
30%%%
31%%% Each sub-pass operates on a #opt_st{} record and a func_info_db(), where
32%%% the former is just a #b_function{} whose blocks can be represented either
33%%% in linear or map form, and the latter is a map with information about all
34%%% functions in the module (see beam_ssa_opt.hrl for more details).
35%%%
36
37-module(beam_ssa_opt).
38-export([module/2]).
39
40-include("beam_ssa_opt.hrl").
41
42-import(lists, [all/2,append/1,duplicate/2,flatten/1,foldl/3,
43                keyfind/3,last/1,mapfoldl/3,member/2,
44                partition/2,reverse/1,reverse/2,
45                splitwith/2,sort/1,takewhile/2,unzip/1]).
46
47-define(MAX_REPETITIONS, 16).
48
49-spec module(beam_ssa:b_module(), [compile:option()]) ->
50                    {'ok',beam_ssa:b_module()}.
51
52module(Module, Opts) ->
53    FuncDb = case proplists:get_value(no_module_opt, Opts, false) of
54                 false -> build_func_db(Module);
55                 true -> #{}
56             end,
57
58    %% Passes that perform module-level optimizations are often aided by
59    %% optimizing callers before callees and vice versa, so we optimize all
60    %% functions in call order, alternating the order every time.
61    StMap0 = build_st_map(Module),
62    Order = get_call_order_po(StMap0, FuncDb),
63
64    Phases = [{once, Order, prologue_passes(Opts)},
65              {module, module_passes(Opts)},
66              {fixpoint, Order, repeated_passes(Opts)},
67              {once, Order, epilogue_passes(Opts)}],
68
69    StMap = run_phases(Phases, StMap0, FuncDb),
70    {ok, finish(Module, StMap)}.
71
72run_phases([{module, Passes} | Phases], StMap0, FuncDb0) ->
73    {StMap, FuncDb} = compile:run_sub_passes(Passes, {StMap0, FuncDb0}),
74    run_phases(Phases, StMap, FuncDb);
75run_phases([{once, FuncIds0, Passes} | Phases], StMap0, FuncDb0) ->
76    FuncIds = skip_removed(FuncIds0, StMap0),
77    {StMap, FuncDb} = phase(FuncIds, Passes, StMap0, FuncDb0),
78    run_phases(Phases, StMap, FuncDb);
79run_phases([{fixpoint, FuncIds0, Passes} | Phases], StMap0, FuncDb0) ->
80    FuncIds = skip_removed(FuncIds0, StMap0),
81    RevFuncIds = reverse(FuncIds),
82    Order = {FuncIds, RevFuncIds},
83    {StMap, FuncDb} = fixpoint(RevFuncIds, Order, Passes,
84                               StMap0, FuncDb0, ?MAX_REPETITIONS),
85    run_phases(Phases, StMap, FuncDb);
86run_phases([], StMap, _FuncDb) ->
87    StMap.
88
89skip_removed(FuncIds, StMap) ->
90    [F || F <- FuncIds, is_map_key(F, StMap)].
91
92%% Run the given passes until a fixpoint is reached.
93fixpoint(_FuncIds, _Order, _Passes, StMap, FuncDb, 0) ->
94    %% Too many repetitions. Give up and return what we have.
95    {StMap, FuncDb};
96fixpoint(FuncIds0, Order0, Passes, StMap0, FuncDb0, N) ->
97    {StMap, FuncDb} = phase(FuncIds0, Passes, StMap0, FuncDb0),
98    Repeat = changed(FuncIds0, FuncDb0, FuncDb, StMap0, StMap),
99    case sets:is_empty(Repeat) of
100        true ->
101            %% No change. Fixpoint reached.
102            {StMap, FuncDb};
103        false ->
104            %% Repeat the optimizations for functions whose code has
105            %% changed or for which there is potentially updated type
106            %% information.
107            {OrderA, OrderB} = Order0,
108            Order = {OrderB, OrderA},
109            FuncIds = [Id || Id <- OrderA, sets:is_element(Id, Repeat)],
110            fixpoint(FuncIds, Order, Passes, StMap, FuncDb, N - 1)
111    end.
112
113phase([FuncId | Ids], Ps, StMap, FuncDb0) ->
114    try compile:run_sub_passes(Ps, {map_get(FuncId, StMap), FuncDb0}) of
115        {St, FuncDb} ->
116            phase(Ids, Ps, StMap#{ FuncId => St }, FuncDb)
117    catch
118        Class:Error:Stack ->
119            #b_local{name=#b_literal{val=Name},arity=Arity} = FuncId,
120            io:fwrite("Function: ~w/~w\n", [Name,Arity]),
121            erlang:raise(Class, Error, Stack)
122    end;
123phase([], _Ps, StMap, FuncDb) ->
124    {StMap, FuncDb}.
125
126changed(PrevIds, FuncDb0, FuncDb, StMap0, StMap) ->
127    %% Find all functions in FuncDb that can be reached by changes
128    %% of argument and/or return types. Those are the functions that
129    %% may gain from running the optimization passes again.
130    %%
131    %% Note that we examine all functions in FuncDb, not only functions
132    %% optimized in the previous run, because the argument types can
133    %% have been updated for functions not included in the previous run.
134
135    F = fun(Id, A) ->
136                case sets:is_element(Id, A) of
137                    true ->
138                        A;
139                    false ->
140                        {#func_info{arg_types=ATs0,succ_types=ST0},
141                         #func_info{arg_types=ATs1,succ_types=ST1}} =
142                            {map_get(Id, FuncDb0),map_get(Id, FuncDb)},
143
144                        %% If the argument types have changed for this
145                        %% function, re-optimize this function and all
146                        %% functions it calls directly or indirectly.
147                        %%
148                        %% If the return type has changed, re-optimize
149                        %% this function and all functions that call
150                        %% this function directly or indirectly.
151                        Opts = case ATs0 =:= ATs1 of
152                                    true -> [];
153                                    false -> [called]
154                                end ++
155                            case ST0 =:= ST1 of
156                                true -> [];
157                                false -> [callers]
158                            end,
159                        case Opts of
160                            [] -> A;
161                            [_|_] -> add_changed([Id], Opts, FuncDb, A)
162                        end
163                end
164        end,
165    Ids = foldl(F, sets:new([{version, 2}]), maps:keys(FuncDb)),
166
167    %% From all functions that were optimized in the previous run,
168    %% find the functions that had any change in the SSA code. Those
169    %% functions might gain from being optimized again. (For example,
170    %% when beam_ssa_dead has shortcut branches, the types for some
171    %% variables could become narrower, giving beam_ssa_type new
172    %% opportunities for optimization.)
173    %%
174    %% Note that the functions examined could be functions with module-level
175    %% optimization turned off (and thus not included in FuncDb).
176
177    foldl(fun(Id, A) ->
178                  case sets:is_element(Id, A) of
179                      true ->
180                          %% Already scheduled for another optimization.
181                          %% No need to compare the SSA code.
182                          A;
183                      false ->
184                          %% Compare the SSA code before and after optimization.
185                          case {map_get(Id, StMap0),map_get(Id, StMap)} of
186                              {Same,Same} -> A;
187                              {_,_} -> sets:add_element(Id, A)
188                          end
189                  end
190          end, Ids, PrevIds).
191
192add_changed([Id|Ids], Opts, FuncDb, S0) when is_map_key(Id, FuncDb) ->
193    case sets:is_element(Id, S0) of
194        true ->
195            add_changed(Ids, Opts, FuncDb, S0);
196        false ->
197            S1 = sets:add_element(Id, S0),
198            #func_info{in=In,out=Out} = map_get(Id, FuncDb),
199            S2 = case member(callers, Opts) of
200                     true -> add_changed(In, Opts, FuncDb, S1);
201                     false -> S1
202                 end,
203            S = case member(called, Opts) of
204                    true -> add_changed(Out, Opts, FuncDb, S2);
205                    false -> S2
206                end,
207            add_changed(Ids, Opts, FuncDb, S)
208    end;
209add_changed([_|Ids], Opts, FuncDb, S) ->
210    %% This function is exempt from module-level optimization and will not
211    %% provide any more information.
212    add_changed(Ids, Opts, FuncDb, S);
213add_changed([], _, _, S) -> S.
214
215%%
216
217get_func_id(F) ->
218    {_Mod, Name, Arity} = beam_ssa:get_anno(func_info, F),
219    #b_local{name=#b_literal{val=Name}, arity=Arity}.
220
221-spec build_st_map(#b_module{}) -> st_map().
222build_st_map(#b_module{body=Fs}) ->
223    build_st_map_1(Fs, #{}).
224
225build_st_map_1([F | Fs], Map) ->
226    #b_function{anno=Anno,args=Args,cnt=Counter,bs=Bs} = F,
227    St = #opt_st{anno=Anno,args=Args,cnt=Counter,ssa=Bs},
228    build_st_map_1(Fs, Map#{ get_func_id(F) => St });
229build_st_map_1([], Map) ->
230    Map.
231
232-spec finish(#b_module{}, st_map()) -> #b_module{}.
233finish(#b_module{body=Fs0}=Module, StMap) ->
234    Module#b_module{body=finish_1(Fs0, StMap)}.
235
236finish_1([F0 | Fs], StMap) ->
237    FuncId = get_func_id(F0),
238    case StMap of
239        #{ FuncId := #opt_st{anno=Anno,cnt=Counter,ssa=Blocks} } ->
240            F = F0#b_function{anno=Anno,bs=Blocks,cnt=Counter},
241            [F | finish_1(Fs, StMap)];
242        #{} ->
243            finish_1(Fs, StMap)
244    end;
245finish_1([], _StMap) ->
246    [].
247
248%%
249
250-define(PASS(N), {N,fun N/1}).
251
252prologue_passes(Opts) ->
253    Ps = [?PASS(ssa_opt_split_blocks),
254          ?PASS(ssa_opt_coalesce_phis),
255          ?PASS(ssa_opt_tail_phis),
256          ?PASS(ssa_opt_element),
257          ?PASS(ssa_opt_linearize),
258          ?PASS(ssa_opt_tuple_size),
259          ?PASS(ssa_opt_record),
260          ?PASS(ssa_opt_cse),                   % Helps the first type pass.
261          ?PASS(ssa_opt_live)],                 % ...
262    passes_1(Ps, Opts).
263
264module_passes(Opts) ->
265    Ps0 = [{ssa_opt_bc_size,
266            fun({StMap, FuncDb}) ->
267                    {beam_ssa_bc_size:opt(StMap), FuncDb}
268            end},
269           {ssa_opt_type_start,
270            fun({StMap, FuncDb}) ->
271                    beam_ssa_type:opt_start(StMap, FuncDb)
272            end}],
273    passes_1(Ps0, Opts).
274
275%% These passes all benefit from each other (in roughly this order), so they
276%% are repeated as required.
277repeated_passes(Opts) ->
278    Ps = [?PASS(ssa_opt_live),
279          ?PASS(ssa_opt_ne),
280          ?PASS(ssa_opt_bs_puts),
281          ?PASS(ssa_opt_dead),
282          ?PASS(ssa_opt_cse),
283          ?PASS(ssa_opt_tail_phis),
284          ?PASS(ssa_opt_sink),
285          ?PASS(ssa_opt_tuple_size),
286          ?PASS(ssa_opt_record),
287          ?PASS(ssa_opt_try),
288          ?PASS(ssa_opt_type_continue)],        %Must run after ssa_opt_dead to
289                                                %clean up phi nodes.
290    passes_1(Ps, Opts).
291
292epilogue_passes(Opts) ->
293    Ps = [?PASS(ssa_opt_type_finish),
294          ?PASS(ssa_opt_float),
295          ?PASS(ssa_opt_sw),
296
297          %% Run live one more time to clean up after the previous
298          %% epilogue passes.
299          ?PASS(ssa_opt_live),
300          ?PASS(ssa_opt_bsm),
301          ?PASS(ssa_opt_bsm_shortcut),
302          ?PASS(ssa_opt_sink),
303          ?PASS(ssa_opt_blockify),
304          ?PASS(ssa_opt_merge_blocks),
305          ?PASS(ssa_opt_get_tuple_element),
306          ?PASS(ssa_opt_tail_calls),
307          ?PASS(ssa_opt_trim_unreachable),
308          ?PASS(ssa_opt_unfold_literals)],
309    passes_1(Ps, Opts).
310
311passes_1(Ps, Opts0) ->
312    Negations = [{list_to_atom("no_"++atom_to_list(N)),N} ||
313                    {N,_} <- Ps],
314    Opts = proplists:substitute_negations(Negations, Opts0),
315    [case proplists:get_value(Name, Opts, true) of
316         true ->
317             P;
318         false ->
319             {NoName,Name} = keyfind(Name, 2, Negations),
320             {NoName,fun(S) -> S end}
321     end || {Name,_}=P <- Ps].
322
323%% Builds a function information map with basic information about incoming and
324%% outgoing local calls, as well as whether the function is exported.
325-spec build_func_db(#b_module{}) -> func_info_db().
326build_func_db(#b_module{body=Fs,attributes=Attr,exports=Exports0}) ->
327    Exports = fdb_exports(Attr, Exports0),
328    try
329        fdb_fs(Fs, Exports, #{})
330    catch
331        %% All module-level optimizations are invalid when a NIF can override a
332        %% function, so we have to bail out.
333        throw:load_nif -> #{}
334    end.
335
336fdb_exports([{on_load, L} | Attrs], Exports) ->
337    %% Functions marked with on_load must be treated as exported to prevent
338    %% them from being optimized away when unused.
339    fdb_exports(Attrs, flatten(L) ++ Exports);
340fdb_exports([_Attr | Attrs], Exports) ->
341    fdb_exports(Attrs, Exports);
342fdb_exports([], Exports) ->
343    gb_sets:from_list(Exports).
344
345fdb_fs([#b_function{ args=Args,bs=Bs }=F | Fs], Exports, FuncDb0) ->
346    Id = get_func_id(F),
347
348    #b_local{name=#b_literal{val=Name}, arity=Arity} = Id,
349    Exported = gb_sets:is_element({Name, Arity}, Exports),
350    ArgTypes = duplicate(length(Args), #{}),
351
352    FuncDb1 = case FuncDb0 of
353                  %% We may have an entry already if someone's called us.
354                  #{ Id := Info } ->
355                      FuncDb0#{ Id := Info#func_info{ exported=Exported,
356                                                      arg_types=ArgTypes }};
357                  #{} ->
358                      FuncDb0#{ Id => #func_info{ exported=Exported,
359                                                  arg_types=ArgTypes }}
360              end,
361
362    RPO = beam_ssa:rpo(Bs),
363    FuncDb = beam_ssa:fold_blocks(fun(_L, #b_blk{is=Is}, FuncDb) ->
364                                       fdb_is(Is, Id, FuncDb)
365                               end, RPO, FuncDb1, Bs),
366
367    fdb_fs(Fs, Exports, FuncDb);
368fdb_fs([], _Exports, FuncDb) ->
369    FuncDb.
370
371fdb_is([#b_set{op=call,
372               args=[#b_local{}=Callee | _]} | Is],
373       Caller, FuncDb) ->
374    fdb_is(Is, Caller, fdb_update(Caller, Callee, FuncDb));
375fdb_is([#b_set{op=call,
376               args=[#b_remote{mod=#b_literal{val=erlang},
377                               name=#b_literal{val=load_nif}},
378                     _Path, _LoadInfo]} | _Is], _Caller, _FuncDb) ->
379    throw(load_nif);
380fdb_is([#b_set{op=MakeFun,
381               args=[#b_local{}=Callee | _]} | Is],
382       Caller, FuncDb) when MakeFun =:= make_fun;
383                            MakeFun =:= old_make_fun ->
384    %% The make_fun instruction's type depends on the return type of the
385    %% function in question, so we treat this as a function call.
386    fdb_is(Is, Caller, fdb_update(Caller, Callee, FuncDb));
387fdb_is([_ | Is], Caller, FuncDb) ->
388    fdb_is(Is, Caller, FuncDb);
389fdb_is([], _Caller, FuncDb) ->
390    FuncDb.
391
392fdb_update(Caller, Callee, FuncDb) ->
393    CallerVertex = maps:get(Caller, FuncDb, #func_info{}),
394    CalleeVertex = maps:get(Callee, FuncDb, #func_info{}),
395
396    Calls = ordsets:add_element(Callee, CallerVertex#func_info.out),
397    CalledBy = ordsets:add_element(Caller, CalleeVertex#func_info.in),
398
399    FuncDb#{ Caller => CallerVertex#func_info{out=Calls},
400             Callee => CalleeVertex#func_info{in=CalledBy} }.
401
402%% Returns the post-order of all local calls in this module. That is,
403%% called functions will be ordered before the functions calling them.
404%%
405%% Functions where module-level optimization is disabled are added last in
406%% arbitrary order.
407
408get_call_order_po(StMap, FuncDb) ->
409    Order = gco_po(FuncDb),
410    Order ++ maps:fold(fun(K, _V, Acc) ->
411                               case is_map_key(K, FuncDb) of
412                                   false -> [K | Acc];
413                                   true -> Acc
414                               end
415                       end, [], StMap).
416
417gco_po(FuncDb) ->
418    All = sort(maps:keys(FuncDb)),
419    {RPO,_} = gco_rpo(All, FuncDb, sets:new([{version, 2}]), []),
420    reverse(RPO).
421
422gco_rpo([Id|Ids], FuncDb, Seen0, Acc0) ->
423    case sets:is_element(Id, Seen0) of
424        true ->
425            gco_rpo(Ids, FuncDb, Seen0, Acc0);
426        false ->
427            #func_info{out=Successors} = map_get(Id, FuncDb),
428            Seen1 = sets:add_element(Id, Seen0),
429            {Acc,Seen} = gco_rpo(Successors, FuncDb, Seen1, Acc0),
430            gco_rpo(Ids, FuncDb, Seen, [Id|Acc])
431    end;
432gco_rpo([], _, Seen, Acc) ->
433    {Acc,Seen}.
434
435%%%
436%%% Trivial sub passes.
437%%%
438
439ssa_opt_dead({#opt_st{ssa=Linear}=St, FuncDb}) ->
440    {St#opt_st{ssa=beam_ssa_dead:opt(Linear)}, FuncDb}.
441
442ssa_opt_linearize({#opt_st{ssa=Blocks}=St, FuncDb}) ->
443    {St#opt_st{ssa=beam_ssa:linearize(Blocks)}, FuncDb}.
444
445ssa_opt_type_continue({#opt_st{ssa=Linear0,args=Args,anno=Anno}=St0, FuncDb0}) ->
446    {Linear, FuncDb} = beam_ssa_type:opt_continue(Linear0, Args, Anno, FuncDb0),
447    {St0#opt_st{ssa=Linear}, FuncDb}.
448
449ssa_opt_type_finish({#opt_st{args=Args,anno=Anno0}=St0, FuncDb0}) ->
450    {Anno, FuncDb} = beam_ssa_type:opt_finish(Args, Anno0, FuncDb0),
451    {St0#opt_st{anno=Anno}, FuncDb}.
452
453ssa_opt_blockify({#opt_st{ssa=Linear}=St, FuncDb}) ->
454    {St#opt_st{ssa=maps:from_list(Linear)}, FuncDb}.
455
456ssa_opt_trim_unreachable({#opt_st{ssa=Blocks}=St, FuncDb}) ->
457    {St#opt_st{ssa=beam_ssa:trim_unreachable(Blocks)}, FuncDb}.
458
459ssa_opt_merge_blocks({#opt_st{ssa=Blocks0}=St, FuncDb}) ->
460    RPO = beam_ssa:rpo(Blocks0),
461    Blocks = beam_ssa:merge_blocks(RPO, Blocks0),
462    {St#opt_st{ssa=Blocks}, FuncDb}.
463
464%%%
465%%% Split blocks before certain instructions to enable more optimizations.
466%%%
467%%% Splitting before element/2 enables the optimization that swaps
468%%% element/2 instructions.
469%%%
470%%% Splitting before call and make_fun instructions gives more opportunities
471%%% for sinking get_tuple_element instructions.
472%%%
473
474ssa_opt_split_blocks({#opt_st{ssa=Blocks0,cnt=Count0}=St, FuncDb}) ->
475    P = fun(#b_set{op={bif,element}}) -> true;
476           (#b_set{op=call}) -> true;
477           (#b_set{op=bs_init_writable}) -> true;
478           (#b_set{op=make_fun}) -> true;
479           (#b_set{op=old_make_fun}) -> true;
480           (_) -> false
481        end,
482    RPO = beam_ssa:rpo(Blocks0),
483    {Blocks,Count} = beam_ssa:split_blocks(RPO, P, Blocks0, Count0),
484    {St#opt_st{ssa=Blocks,cnt=Count}, FuncDb}.
485
486%%%
487%%% Coalesce phi nodes.
488%%%
489%%% Nested cases can led to code such as this:
490%%%
491%%%     10:
492%%%       _1 = phi {literal value1, label 8}, {Var, label 9}
493%%%       br 11
494%%%
495%%%     11:
496%%%       _2 = phi {_1, label 10}, {literal false, label 3}
497%%%
498%%% The phi nodes can be coalesced like this:
499%%%
500%%%     11:
501%%%       _2 = phi {literal value1, label 8}, {Var, label 9}, {literal false, label 3}
502%%%
503%%% Coalescing can help other optimizations, and can in some cases reduce register
504%%% shuffling (if the phi variables for two phi nodes happens to be allocated to
505%%% different registers).
506%%%
507
508ssa_opt_coalesce_phis({#opt_st{ssa=Blocks0}=St, FuncDb}) ->
509    Ls = beam_ssa:rpo(Blocks0),
510    Blocks = c_phis_1(Ls, Blocks0),
511    {St#opt_st{ssa=Blocks}, FuncDb}.
512
513c_phis_1([L|Ls], Blocks0) ->
514    case map_get(L, Blocks0) of
515        #b_blk{is=[#b_set{op=phi}|_]}=Blk ->
516            Blocks = c_phis_2(L, Blk, Blocks0),
517            c_phis_1(Ls, Blocks);
518        #b_blk{} ->
519            c_phis_1(Ls, Blocks0)
520    end;
521c_phis_1([], Blocks) -> Blocks.
522
523c_phis_2(L, #b_blk{is=Is0}=Blk0, Blocks0) ->
524    case c_phis_args(Is0, Blocks0) of
525        none ->
526            Blocks0;
527        {_,_,Preds}=Info ->
528            Is = c_rewrite_phis(Is0, Info),
529            Blk = Blk0#b_blk{is=Is},
530            Blocks = Blocks0#{L:=Blk},
531            c_fix_branches(Preds, L, Blocks)
532    end.
533
534c_phis_args([#b_set{op=phi,args=Args0}|Is], Blocks) ->
535    case c_phis_args_1(Args0, Blocks) of
536        none ->
537            c_phis_args(Is, Blocks);
538        Res ->
539            Res
540    end;
541c_phis_args(_, _Blocks) -> none.
542
543c_phis_args_1([{Var,Pred}|As], Blocks) ->
544    case c_get_pred_vars(Var, Pred, Blocks) of
545        none ->
546            c_phis_args_1(As, Blocks);
547        Result ->
548            Result
549    end;
550c_phis_args_1([], _Blocks) -> none.
551
552c_get_pred_vars(Var, Pred, Blocks) ->
553    case map_get(Pred, Blocks) of
554        #b_blk{is=[#b_set{op=phi,dst=Var,args=Args}]} ->
555            {Var,Pred,Args};
556        #b_blk{} ->
557            none
558    end.
559
560c_rewrite_phis([#b_set{op=phi,args=Args0}=I|Is], Info) ->
561    Args = c_rewrite_phi(Args0, Info),
562    [I#b_set{args=Args}|c_rewrite_phis(Is, Info)];
563c_rewrite_phis(Is, _Info) -> Is.
564
565c_rewrite_phi([{Var,Pred}|As], {Var,Pred,Values}) ->
566    Values ++ As;
567c_rewrite_phi([{Value,Pred}|As], {_,Pred,Values}) ->
568    [{Value,P} || {_,P} <- Values] ++ As;
569c_rewrite_phi([A|As], Info) ->
570    [A|c_rewrite_phi(As, Info)];
571c_rewrite_phi([], _Info) -> [].
572
573c_fix_branches([{_,Pred}|As], L, Blocks0) ->
574    #b_blk{last=Last0} = Blk0 = map_get(Pred, Blocks0),
575    #b_br{bool=#b_literal{val=true}} = Last0,   %Assertion.
576    Last = Last0#b_br{bool=#b_literal{val=true},succ=L,fail=L},
577    Blk = Blk0#b_blk{last=Last},
578    Blocks = Blocks0#{Pred:=Blk},
579    c_fix_branches(As, L, Blocks);
580c_fix_branches([], _, Blocks) -> Blocks.
581
582%%%
583%%% Eliminate phi nodes in the tail of a function.
584%%%
585%%% Try to eliminate short blocks that starts with a phi node
586%%% and end in a return. For example:
587%%%
588%%%    Result = phi { Res1, 4 }, { literal true, 5 }
589%%%    Ret = put_tuple literal ok, Result
590%%%    ret Ret
591%%%
592%%% The code in this block can be inserted at the end blocks 4 and
593%%% 5. Thus, the following code can be inserted into block 4:
594%%%
595%%%    Ret:1 = put_tuple literal ok, Res1
596%%%    ret Ret:1
597%%%
598%%% And the following code into block 5:
599%%%
600%%%    Ret:2 = put_tuple literal ok, literal true
601%%%    ret Ret:2
602%%%
603%%% Which can be further simplified to:
604%%%
605%%%    ret literal {ok, true}
606%%%
607%%% This transformation may lead to more code improvements:
608%%%
609%%%   - Stack trimming
610%%%   - Fewer test_heap instructions
611%%%   - Smaller stack frames
612%%%
613
614ssa_opt_tail_phis({#opt_st{ssa=SSA0,cnt=Count0}=St, FuncDb}) ->
615    {SSA,Count} = opt_tail_phis(SSA0, Count0),
616    {St#opt_st{ssa=SSA,cnt=Count}, FuncDb}.
617
618opt_tail_phis(Blocks, Count) when is_map(Blocks) ->
619    opt_tail_phis(maps:values(Blocks), Blocks, Count);
620opt_tail_phis(Linear0, Count0) when is_list(Linear0) ->
621    Blocks0 = maps:from_list(Linear0),
622    {Blocks,Count} = opt_tail_phis(Blocks0, Count0),
623    {beam_ssa:linearize(Blocks),Count}.
624
625opt_tail_phis([#b_blk{is=Is0,last=Last}|Bs], Blocks0, Count0) ->
626    case {Is0,Last} of
627        {[#b_set{op=phi,args=[_,_|_]}|_],#b_ret{arg=#b_var{}}=Ret} ->
628            {Phis,Is} = splitwith(fun(#b_set{op=Op}) -> Op =:= phi end, Is0),
629            case suitable_tail_ops(Is) of
630                true ->
631                    {Blocks,Count} = opt_tail_phi(Phis, Is, Ret,
632                                                  Blocks0, Count0),
633                    opt_tail_phis(Bs, Blocks, Count);
634                false ->
635                    opt_tail_phis(Bs, Blocks0, Count0)
636            end;
637        {_,_} ->
638            opt_tail_phis(Bs, Blocks0, Count0)
639    end;
640opt_tail_phis([], Blocks, Count) ->
641    {Blocks,Count}.
642
643opt_tail_phi(Phis0, Is, Ret, Blocks0, Count0) ->
644    Phis = rel2fam(reduce_phis(Phis0)),
645    {Blocks,Count,Cost} =
646        foldl(fun(PhiArg, Acc) ->
647                      opt_tail_phi_arg(PhiArg, Is, Ret, Acc)
648              end, {Blocks0,Count0,0}, Phis),
649    MaxCost = length(Phis) * 3 + 2,
650    if
651        Cost =< MaxCost ->
652            %% The transformation would cause at most a slight
653            %% increase in code size if no more optimizations
654            %% can be applied.
655            {Blocks,Count};
656        true ->
657            %% The code size would be increased too much.
658            {Blocks0,Count0}
659    end.
660
661reduce_phis([#b_set{dst=PhiDst,args=PhiArgs}|Is]) ->
662    [{L,{PhiDst,Val}} || {Val,L} <- PhiArgs] ++ reduce_phis(Is);
663reduce_phis([]) -> [].
664
665opt_tail_phi_arg({PredL,Sub0}, Is0, Ret0, {Blocks0,Count0,Cost0}) ->
666    Blk0 = map_get(PredL, Blocks0),
667    #b_blk{is=IsPrefix,last=#b_br{succ=Next,fail=Next}} = Blk0,
668    Sub1 = maps:from_list(Sub0),
669    {Is1,Count,Sub} = new_names(Is0, Sub1, Count0, []),
670    Is2 = [sub(I, Sub) || I <- Is1],
671    Cost = build_cost(Is2, Cost0),
672    Is = IsPrefix ++ Is2,
673    Ret = sub(Ret0, Sub),
674    Blk = Blk0#b_blk{is=Is,last=Ret},
675    Blocks = Blocks0#{PredL:=Blk},
676    {Blocks,Count,Cost}.
677
678new_names([#b_set{dst=Dst}=I|Is], Sub0, Count0, Acc) ->
679    {NewDst,Count} = new_var(Dst, Count0),
680    Sub = Sub0#{Dst=>NewDst},
681    new_names(Is, Sub, Count, [I#b_set{dst=NewDst}|Acc]);
682new_names([], Sub, Count, Acc) ->
683    {reverse(Acc),Count,Sub}.
684
685suitable_tail_ops(Is) ->
686    all(fun(#b_set{op=Op}) ->
687                is_suitable_tail_op(Op)
688        end, Is).
689
690is_suitable_tail_op({bif,_}) -> true;
691is_suitable_tail_op(put_list) -> true;
692is_suitable_tail_op(put_tuple) -> true;
693is_suitable_tail_op(_) -> false.
694
695build_cost([#b_set{op=put_list,args=Args}|Is], Cost) ->
696    case are_all_literals(Args) of
697        true ->
698            build_cost(Is, Cost);
699        false ->
700            build_cost(Is, Cost + 1)
701    end;
702build_cost([#b_set{op=put_tuple,args=Args}|Is], Cost) ->
703    case are_all_literals(Args) of
704        true ->
705            build_cost(Is, Cost);
706        false ->
707            build_cost(Is, Cost + length(Args) + 1)
708    end;
709build_cost([#b_set{op={bif,_},args=Args}|Is], Cost) ->
710    case are_all_literals(Args) of
711        true ->
712            build_cost(Is, Cost);
713        false ->
714            build_cost(Is, Cost + 1)
715    end;
716build_cost([], Cost) -> Cost.
717
718are_all_literals(Args) ->
719    all(fun(#b_literal{}) -> true;
720                (_) -> false
721             end, Args).
722
723%%%
724%%% Order element/2 calls.
725%%%
726%%% Order an unbroken chain of element/2 calls for the same tuple
727%%% with the same failure label so that the highest element is
728%%% retrieved first. That will allow the other element/2 calls to
729%%% be replaced with get_tuple_element/3 instructions.
730%%%
731
732ssa_opt_element({#opt_st{ssa=Blocks}=St, FuncDb}) ->
733    %% Collect the information about element instructions in this
734    %% function.
735    GetEls = collect_element_calls(beam_ssa:linearize(Blocks)),
736
737    %% Collect the element instructions into chains. The
738    %% element calls in each chain are ordered in reverse
739    %% execution order.
740    Chains = collect_chains(GetEls, []),
741
742    %% For each chain, swap the first element call with the
743    %% element call with the highest index.
744    {St#opt_st{ssa=swap_element_calls(Chains, Blocks)}, FuncDb}.
745
746collect_element_calls([{L,#b_blk{is=Is0,last=Last}}|Bs]) ->
747    case {Is0,Last} of
748        {[#b_set{op={bif,element},dst=Element,
749                 args=[#b_literal{val=N},#b_var{}=Tuple]},
750          #b_set{op={succeeded,guard},dst=Bool,args=[Element]}],
751         #b_br{bool=Bool,succ=Succ,fail=Fail}} ->
752            Info = {L,Succ,{Tuple,Fail},N},
753            [Info|collect_element_calls(Bs)];
754        {_,_} ->
755            collect_element_calls(Bs)
756    end;
757collect_element_calls([]) -> [].
758
759collect_chains([{This,_,V,_}=El|Els], [{_,This,V,_}|_]=Chain) ->
760    %% Add to the previous chain.
761    collect_chains(Els, [El|Chain]);
762collect_chains([El|Els], [_,_|_]=Chain) ->
763    %% Save the previous chain and start a new chain.
764    [Chain|collect_chains(Els, [El])];
765collect_chains([El|Els], _Chain) ->
766    %% The previous chain is too short; discard it and start a new.
767    collect_chains(Els, [El]);
768collect_chains([], [_,_|_]=Chain) ->
769    %% Save the last chain.
770    [Chain];
771collect_chains([], _) ->  [].
772
773swap_element_calls([[{L,_,_,N}|_]=Chain|Chains], Blocks0) ->
774    Blocks = swap_element_calls_1(Chain, {N,L}, Blocks0),
775    swap_element_calls(Chains, Blocks);
776swap_element_calls([], Blocks) -> Blocks.
777
778swap_element_calls_1([{L1,_,_,N1}], {N2,L2}, Blocks) when N2 > N1 ->
779    %% We have reached the end of the chain, and the first
780    %% element instrution to be executed. Its index is lower
781    %% than the maximum index found while traversing the chain,
782    %% so we will need to swap the instructions.
783    #{L1:=Blk1,L2:=Blk2} = Blocks,
784    [#b_set{dst=Dst1}=GetEl1,Succ1] = Blk1#b_blk.is,
785    [#b_set{dst=Dst2}=GetEl2,Succ2] = Blk2#b_blk.is,
786    Is1 = [GetEl2,Succ1#b_set{args=[Dst2]}],
787    Is2 = [GetEl1,Succ2#b_set{args=[Dst1]}],
788    Blocks#{L1:=Blk1#b_blk{is=Is1},L2:=Blk2#b_blk{is=Is2}};
789swap_element_calls_1([{L,_,_,N1}|Els], {N2,_}, Blocks) when N1 > N2 ->
790    swap_element_calls_1(Els, {N2,L}, Blocks);
791swap_element_calls_1([_|Els], Highest, Blocks) ->
792    swap_element_calls_1(Els, Highest, Blocks);
793swap_element_calls_1([], _, Blocks) ->
794    %% Nothing to do. The element call with highest index
795    %% is already the first one to be executed.
796    Blocks.
797
798%%%
799%%% Record optimization.
800%%%
801%%% Replace tuple matching with an is_tagged_tuple instruction
802%%% when applicable.
803%%%
804
805ssa_opt_record({#opt_st{ssa=Linear}=St, FuncDb}) ->
806    Blocks = maps:from_list(Linear),
807    {St#opt_st{ssa=record_opt(Linear, Blocks)}, FuncDb}.
808
809record_opt([{L,#b_blk{is=Is0,last=Last}=Blk0}|Bs], Blocks) ->
810    Is = record_opt_is(Is0, Last, Blocks),
811    Blk = Blk0#b_blk{is=Is},
812    [{L,Blk}|record_opt(Bs, Blocks)];
813record_opt([], _Blocks) -> [].
814
815record_opt_is([#b_set{op={bif,is_tuple},dst=Bool,args=[Tuple]}=Set],
816              Last, Blocks) ->
817    case is_tagged_tuple(Tuple, Bool, Last, Blocks) of
818        {yes,Size,Tag} ->
819            Args = [Tuple,Size,Tag],
820            [Set#b_set{op=is_tagged_tuple,args=Args}];
821        no ->
822            [Set]
823    end;
824record_opt_is([I|Is]=Is0, #b_br{bool=Bool}=Last, Blocks) ->
825    case is_tagged_tuple_1(Is0, Last, Blocks) of
826        {yes,_Fail,Tuple,Arity,Tag} ->
827            Args = [Tuple,Arity,Tag],
828            [I#b_set{op=is_tagged_tuple,dst=Bool,args=Args}];
829        no ->
830            [I|record_opt_is(Is, Last, Blocks)]
831    end;
832record_opt_is([I|Is], Last, Blocks) ->
833    [I|record_opt_is(Is, Last, Blocks)];
834record_opt_is([], _Last, _Blocks) -> [].
835
836is_tagged_tuple(#b_var{}=Tuple, Bool,
837                #b_br{bool=Bool,succ=Succ,fail=Fail},
838                Blocks) ->
839    #b_blk{is=Is,last=Last} = map_get(Succ, Blocks),
840    case is_tagged_tuple_1(Is, Last, Blocks) of
841        {yes,Fail,Tuple,Arity,Tag} ->
842            {yes,Arity,Tag};
843        _ ->
844            no
845    end;
846is_tagged_tuple(_, _, _, _) -> no.
847
848is_tagged_tuple_1(Is, Last, Blocks) ->
849    case {Is,Last} of
850        {[#b_set{op={bif,tuple_size},dst=ArityVar,
851                 args=[#b_var{}=Tuple]},
852          #b_set{op={bif,'=:='},
853                 dst=Bool,
854                 args=[ArityVar, #b_literal{val=ArityVal}=Arity]}],
855         #b_br{bool=Bool,succ=Succ,fail=Fail}}
856          when is_integer(ArityVal) ->
857            SuccBlk = map_get(Succ, Blocks),
858            case is_tagged_tuple_2(SuccBlk, Tuple, Fail) of
859                no ->
860                    no;
861                {yes,Tag} ->
862                    {yes,Fail,Tuple,Arity,Tag}
863            end;
864        _ ->
865            no
866    end.
867
868is_tagged_tuple_2(#b_blk{is=Is,
869                         last=#b_br{bool=#b_var{}=Bool,fail=Fail}},
870                  Tuple, Fail) ->
871    is_tagged_tuple_3(Is, Bool, Tuple);
872is_tagged_tuple_2(#b_blk{}, _, _) -> no.
873
874is_tagged_tuple_3([#b_set{op=get_tuple_element,
875                          dst=TagVar,
876                          args=[#b_var{}=Tuple,#b_literal{val=0}]}|Is],
877                  Bool, Tuple) ->
878    is_tagged_tuple_4(Is, Bool, TagVar);
879is_tagged_tuple_3([_|Is], Bool, Tuple) ->
880    is_tagged_tuple_3(Is, Bool, Tuple);
881is_tagged_tuple_3([], _, _) -> no.
882
883is_tagged_tuple_4([#b_set{op={bif,'=:='},dst=Bool,
884                          args=[#b_var{}=TagVar,
885                                #b_literal{val=TagVal}=Tag]}],
886                 Bool, TagVar) when is_atom(TagVal) ->
887    {yes,Tag};
888is_tagged_tuple_4([_|Is], Bool, TagVar) ->
889    is_tagged_tuple_4(Is, Bool, TagVar);
890is_tagged_tuple_4([], _, _) -> no.
891
892%%%
893%%% Common subexpression elimination (CSE).
894%%%
895%%% Eliminate repeated evaluation of identical expressions. To avoid
896%%% increasing the size of the stack frame, we don't eliminate
897%%% subexpressions across instructions that clobber the X registers.
898%%%
899
900ssa_opt_cse({#opt_st{ssa=Linear}=St, FuncDb}) ->
901    M = #{0 => #{}, ?EXCEPTION_BLOCK => #{}},
902    {St#opt_st{ssa=cse(Linear, #{}, M)}, FuncDb}.
903
904cse([{L,#b_blk{is=Is0,last=Last0}=Blk}|Bs], Sub0, M0) ->
905    Es0 = map_get(L, M0),
906    {Is1,Es,Sub} = cse_is(Is0, Es0, Sub0, []),
907    Last = sub(Last0, Sub),
908    M = cse_successors(Is1, Blk, Es, M0),
909    Is = reverse(Is1),
910    [{L,Blk#b_blk{is=Is,last=Last}}|cse(Bs, Sub, M)];
911cse([], _, _) -> [].
912
913cse_successors([#b_set{op={succeeded,_},args=[Src]},Bif|_], Blk, EsSucc, M0) ->
914    case cse_suitable(Bif) of
915        true ->
916            %% The previous instruction only has a valid value at the success branch.
917            %% We must remove the substitution for Src from the failure branch.
918            #b_blk{last=#b_br{succ=Succ,fail=Fail}} = Blk,
919            M = cse_successors_1([Succ], EsSucc, M0),
920            EsFail = maps:filter(fun(_, Val) -> Val =/= Src end, EsSucc),
921            cse_successors_1([Fail], EsFail, M);
922        false ->
923            %% There can't be any replacement for Src in EsSucc. No need for
924            %% any special handling.
925            cse_successors_1(beam_ssa:successors(Blk), EsSucc, M0)
926    end;
927cse_successors(_Is, Blk, Es, M) ->
928    cse_successors_1(beam_ssa:successors(Blk), Es, M).
929
930cse_successors_1([L|Ls], Es0, M) ->
931    case M of
932        #{L:=Es1} when map_size(Es1) =:= 0 ->
933            %% The map is already empty. No need to do anything
934            %% since the intersection will be empty.
935            cse_successors_1(Ls, Es0, M);
936        #{L:=Es1} ->
937            Es = cse_intersection(Es0, Es1),
938            cse_successors_1(Ls, Es0, M#{L:=Es});
939        #{} ->
940            cse_successors_1(Ls, Es0, M#{L=>Es0})
941    end;
942cse_successors_1([], _, M) -> M.
943
944%% Calculate the intersection of the two maps. Both keys and values
945%% must match.
946cse_intersection(M1, M2) ->
947    if
948        map_size(M1) < map_size(M2) ->
949            cse_intersection_1(maps:to_list(M1), M2, M1);
950        true ->
951            cse_intersection_1(maps:to_list(M2), M1, M2)
952    end.
953
954cse_intersection_1([{Key,Value}|KVs], M, Result) ->
955    case M of
956        #{Key := Value} ->
957            cse_intersection_1(KVs, M, Result);
958        #{} ->
959            cse_intersection_1(KVs, M, maps:remove(Key, Result))
960    end;
961cse_intersection_1([], _, Result) -> Result.
962
963cse_is([#b_set{op={succeeded,_},dst=Bool,args=[Src]}=I0|Is], Es, Sub0, Acc) ->
964    I = sub(I0, Sub0),
965    case I of
966        #b_set{args=[Src]} ->
967            cse_is(Is, Es, Sub0, [I|Acc]);
968        #b_set{} ->
969            %% The previous instruction has been eliminated. Eliminate the
970            %% 'succeeded' instruction too.
971            Sub = Sub0#{Bool=>#b_literal{val=true}},
972            cse_is(Is, Es, Sub, Acc)
973    end;
974cse_is([#b_set{dst=Dst}=I0|Is], Es0, Sub0, Acc) ->
975    I = sub(I0, Sub0),
976    case beam_ssa:clobbers_xregs(I) of
977        true ->
978            %% Retaining the expressions map across calls and other
979            %% clobbering instructions would work, but it would cause
980            %% the common subexpressions to be saved to Y registers,
981            %% which would probably increase the size of the stack
982            %% frame.
983            cse_is(Is, #{}, Sub0, [I|Acc]);
984        false ->
985            case cse_expr(I) of
986                none ->
987                    %% Not suitable for CSE.
988                    cse_is(Is, Es0, Sub0, [I|Acc]);
989                {ok,ExprKey} ->
990                    case Es0 of
991                        #{ExprKey:=Src} ->
992                            Sub = Sub0#{Dst=>Src},
993                            cse_is(Is, Es0, Sub, Acc);
994                        #{} ->
995                            Es1 = Es0#{ExprKey=>Dst},
996                            Es = cse_add_inferred_exprs(I, Es1),
997                            cse_is(Is, Es, Sub0, [I|Acc])
998                    end
999            end
1000    end;
1001cse_is([], Es, Sub, Acc) ->
1002    {Acc,Es,Sub}.
1003
1004cse_add_inferred_exprs(#b_set{op=put_list,dst=List,args=[Hd,Tl]}, Es) ->
1005    Es#{{get_hd,[List]} => Hd,
1006        {get_tl,[List]} => Tl};
1007cse_add_inferred_exprs(#b_set{op=put_tuple,dst=Tuple,args=[E1,E2|_]}, Es) ->
1008    %% Adding tuple elements beyond the first two does not seem to be
1009    %% worthwhile (at least not in the sample used by scripts/diffable).
1010    Es#{{get_tuple_element,[Tuple,#b_literal{val=0}]} => E1,
1011        {get_tuple_element,[Tuple,#b_literal{val=1}]} => E2};
1012cse_add_inferred_exprs(#b_set{op={bif,element},dst=E,
1013                              args=[#b_literal{val=N},Tuple]}, Es)
1014  when is_integer(N) ->
1015    Es#{{get_tuple_element,[Tuple,#b_literal{val=N-1}]} => E};
1016cse_add_inferred_exprs(#b_set{op={bif,hd},dst=Hd,args=[List]}, Es) ->
1017    Es#{{get_hd,[List]} => Hd};
1018cse_add_inferred_exprs(#b_set{op={bif,tl},dst=Tl,args=[List]}, Es) ->
1019    Es#{{get_tl,[List]} => Tl};
1020cse_add_inferred_exprs(#b_set{op={bif,map_get},dst=Value,args=[Key,Map]}, Es) ->
1021    Es#{{get_map_element,[Map,Key]} => Value};
1022cse_add_inferred_exprs(#b_set{op=put_map,dst=Map,args=Args}, Es) ->
1023    cse_add_map_get(Args, Map, Es);
1024cse_add_inferred_exprs(_, Es) -> Es.
1025
1026cse_add_map_get([Key,Value|T], Map, Es0) ->
1027    Es = Es0#{{get_map_element,[Map,Key]} => Value},
1028    cse_add_map_get(T, Map, Es);
1029cse_add_map_get([], _, Es) -> Es.
1030
1031cse_expr(#b_set{op=Op,args=Args}=I) ->
1032    case cse_suitable(I) of
1033        true -> {ok,{Op,Args}};
1034        false -> none
1035    end.
1036
1037cse_suitable(#b_set{op=get_hd}) -> true;
1038cse_suitable(#b_set{op=get_tl}) -> true;
1039cse_suitable(#b_set{op=put_list}) -> true;
1040cse_suitable(#b_set{op=get_tuple_element}) -> true;
1041cse_suitable(#b_set{op=put_tuple}) -> true;
1042cse_suitable(#b_set{op=get_map_element}) -> true;
1043cse_suitable(#b_set{op=put_map}) -> true;
1044cse_suitable(#b_set{op={bif,tuple_size}}) ->
1045    %% Doing CSE for tuple_size/1 can prevent the
1046    %% creation of test_arity and select_tuple_arity
1047    %% instructions. That could decrease performance
1048    %% and beam_validator could fail to understand
1049    %% that tuple operations that follow are safe.
1050    false;
1051cse_suitable(#b_set{anno=Anno,op={bif,Name},args=Args}) ->
1052    %% Doing CSE for floating point operators is unsafe.
1053    %% Doing CSE for comparison operators would prevent
1054    %% creation of 'test' instructions.
1055    Arity = length(Args),
1056    not (is_map_key(float_op, Anno) orelse
1057         erl_internal:new_type_test(Name, Arity) orelse
1058         erl_internal:comp_op(Name, Arity) orelse
1059         erl_internal:bool_op(Name, Arity));
1060cse_suitable(#b_set{}) -> false.
1061
1062%%%
1063%%% Using floating point instructions.
1064%%%
1065%%% Use the special floating points version of arithmetic
1066%%% instructions, if the operands are known to be floats or the result
1067%%% of the operation will be a float.
1068%%%
1069%%% The float instructions were never used in guards before, so we
1070%%% will take special care to keep not using them in guards.  Using
1071%%% them in guards would require a new version of the 'fconv'
1072%%% instruction that would take a failure label.  Since it is unlikely
1073%%% that using float instructions in guards would be benefical, why
1074%%% bother implementing a new instruction?
1075%%%
1076
1077-record(fs,
1078        {regs=#{} :: #{beam_ssa:b_var():=beam_ssa:b_var()},
1079         non_guards :: gb_sets:set(beam_ssa:label()),
1080         bs :: beam_ssa:block_map()
1081        }).
1082
1083ssa_opt_float({#opt_st{ssa=Linear0,cnt=Count0}=St, FuncDb}) ->
1084    NonGuards = non_guards(Linear0),
1085    Blocks = maps:from_list(Linear0),
1086    Fs = #fs{non_guards=NonGuards,bs=Blocks},
1087    {Linear,Count} = float_opt(Linear0, Count0, Fs),
1088    {St#opt_st{ssa=Linear,cnt=Count}, FuncDb}.
1089
1090%% The fconv instruction doesn't support jumping to a fail label, so we have to
1091%% skip this optimization if the fail block is a guard.
1092%%
1093%% We also skip the optimization in blocks that always fail, as it's both
1094%% difficult and pointless to rewrite them to use float ops.
1095float_can_optimize_blk(#b_blk{last=#b_br{bool=#b_var{},fail=F}},
1096                       #fs{non_guards=NonGuards}) ->
1097    gb_sets:is_member(F, NonGuards);
1098float_can_optimize_blk(#b_blk{}, #fs{}) ->
1099    false.
1100
1101float_opt([{L,Blk}|Bs0], Count0, Fs) ->
1102    case float_can_optimize_blk(Blk, Fs) of
1103        true ->
1104            float_opt_1(L, Blk, Bs0, Count0, Fs);
1105        false ->
1106            {Bs,Count} = float_opt(Bs0, Count0, Fs),
1107            {[{L,Blk}|Bs],Count}
1108    end;
1109float_opt([], Count, _Fs) ->
1110    {[],Count}.
1111
1112float_opt_1(L, #b_blk{is=Is0}=Blk0, Bs0, Count0, Fs0) ->
1113    case float_opt_is(Is0, Fs0, Count0, []) of
1114        {Is1,Fs1,Count1} ->
1115            {Flush,Blk,Fs,Count2} = float_maybe_flush(Blk0, Fs1, Count1),
1116            {Blks,Count3} = float_fixup_conv(L, Is1, Blk, Count2),
1117            {Bs,Count} = float_opt(Bs0, Count3, Fs),
1118            {Blks++Flush++Bs,Count};
1119        none ->
1120            {Bs,Count} = float_opt(Bs0, Count0, Fs0),
1121            {[{L,Blk0}|Bs],Count}
1122    end.
1123
1124%% Split out {float,convert} instructions into separate blocks, number
1125%% the blocks, and add {succeded,body} in each {float,convert} block.
1126float_fixup_conv(L, Is, Blk, Count0) ->
1127    Split = float_split_conv(Is, Blk),
1128    {Blks,Count} = float_number(Split, L, Count0),
1129    #b_blk{last=#b_br{bool=#b_var{},fail=Fail}} = Blk,
1130    float_conv(Blks, Fail, Count).
1131
1132%% Split {float,convert} instructions into individual blocks.
1133float_split_conv(Is0, Blk) ->
1134    Br = #b_br{bool=#b_literal{val=true},succ=0,fail=0},
1135
1136    %% Note that there may be other instructions such as
1137    %% remove_message before the floating point instructions;
1138    %% therefore, it is essential that we don't reorder instructions.
1139    case splitwith(fun(#b_set{op=Op}) ->
1140                           Op =/= {float,convert}
1141                   end, Is0) of
1142        {Is,[]} ->
1143            [Blk#b_blk{is=Is}];
1144        {[_|_]=Is1,[#b_set{op={float,convert}}=Conv|Is2]} ->
1145            [#b_blk{is=Is1,last=Br},
1146             #b_blk{is=[Conv],last=Br}|float_split_conv(Is2, Blk)];
1147        {[],[#b_set{op={float,convert}}=Conv|Is1]} ->
1148            [#b_blk{is=[Conv],last=Br}|float_split_conv(Is1, Blk)]
1149    end.
1150
1151%% Number and chain the blocks that were split.
1152float_number(Bs0, FirstL, Count0) ->
1153    {[{_,FirstBlk}|Bs],Count} = float_number(Bs0, Count0),
1154    {[{FirstL,FirstBlk}|Bs],Count}.
1155
1156float_number([B], Count) ->
1157    {[{Count,B}],Count};
1158float_number([B|Bs0], Count0) ->
1159    Next = Count0 + 1,
1160    {Bs,Count} = float_number(Bs0, Next),
1161    Br = #b_br{bool=#b_literal{val=true},succ=Next,fail=Next},
1162    {[{Count0,B#b_blk{last=Br}}|Bs],Count}.
1163
1164%% Insert 'succeeded' instructions after each {float,convert}
1165%% instruction.
1166float_conv([{L,#b_blk{is=Is0,last=Last}=Blk0}|Bs0], Fail, Count0) ->
1167    case Is0 of
1168        [#b_set{op={float,convert}}=Conv] ->
1169            {Bool,Count1} = new_var('@ssa_bool', Count0),
1170            Succeeded = #b_set{op={succeeded,body},dst=Bool,
1171                               args=[Conv#b_set.dst]},
1172            Is = [Conv,Succeeded],
1173            Br = Last#b_br{bool=Bool,fail=Fail},
1174            Blk = Blk0#b_blk{is=Is,last=Br},
1175            {Bs,Count} = float_conv(Bs0, Fail, Count1),
1176            {[{L,Blk}|Bs],Count};
1177        [_|_] ->
1178            {Bs,Count} = float_conv(Bs0, Fail, Count0),
1179            {[{L,Blk0}|Bs],Count}
1180    end;
1181float_conv([], _, Count) ->
1182    {[],Count}.
1183
1184float_maybe_flush(Blk0, Fs0, Count0) ->
1185    #b_blk{last=#b_br{bool=#b_var{},succ=Succ}=Br} = Blk0,
1186
1187    %% If the success block has an optimizable floating point instruction,
1188    %% it is safe to defer flushing.
1189    case float_safe_to_skip_flush(Succ, Fs0) of
1190        true ->
1191            %% No flush needed.
1192            {[],Blk0,Fs0,Count0};
1193        false ->
1194            %% Flush needed. Allocate block numbers.
1195            FlushL = Count0,              %For flushing of float regs.
1196            Count = Count0 + 1,
1197            Blk = Blk0#b_blk{last=Br#b_br{succ=FlushL}},
1198
1199            %% Build the block that flushes all registers.
1200            FlushIs = float_flush_regs(Fs0),
1201            FlushBr = #b_br{bool=#b_literal{val=true},succ=Succ,fail=Succ},
1202            FlushBlk = #b_blk{is=FlushIs,last=FlushBr},
1203
1204            %% Update state record and blocks.
1205            Fs = Fs0#fs{regs=#{}},
1206            FlushBs = [{FlushL,FlushBlk}],
1207            {FlushBs,Blk,Fs,Count}
1208    end.
1209
1210float_safe_to_skip_flush(L, #fs{bs=Blocks}=Fs) ->
1211    #b_blk{is=Is} = Blk = map_get(L, Blocks),
1212    float_can_optimize_blk(Blk, Fs) andalso float_optimizable_is(Is).
1213
1214float_optimizable_is([#b_set{anno=#{float_op:=_}}|_]) ->
1215    true;
1216float_optimizable_is([#b_set{op=get_tuple_element}|Is]) ->
1217    %% The tuple sinking optimization can sink get_tuple_element instruction
1218    %% into a sequence of floating point operations.
1219    float_optimizable_is(Is);
1220float_optimizable_is(_) ->
1221    false.
1222
1223float_opt_is([#b_set{op={succeeded,_},args=[Src]}=I0],
1224             #fs{regs=Rs}=Fs, Count, Acc) ->
1225    case Rs of
1226        #{Src:=Fr} ->
1227            I = I0#b_set{args=[Fr]},
1228            {reverse(Acc, [I]),Fs,Count};
1229        #{} ->
1230            none
1231    end;
1232float_opt_is([#b_set{anno=Anno0}=I0|Is0], Fs0, Count0, Acc) ->
1233    case Anno0 of
1234        #{float_op:=FTypes} ->
1235            Anno = maps:remove(float_op, Anno0),
1236            I1 = I0#b_set{anno=Anno},
1237            {Is,Fs,Count} = float_make_op(I1, FTypes, Fs0, Count0),
1238            float_opt_is(Is0, Fs, Count, reverse(Is, Acc));
1239        #{} ->
1240            float_opt_is(Is0, Fs0, Count0, [I0|Acc])
1241    end;
1242float_opt_is([], _Fs, _Count, _Acc) ->
1243    none.
1244
1245float_make_op(#b_set{op={bif,Op},dst=Dst,args=As0,anno=Anno}=I0,
1246              Ts, #fs{regs=Rs0}=Fs, Count0) ->
1247    {As1,Rs1,Count1} = float_load(As0, Ts, Anno, Rs0, Count0, []),
1248    {As,Is0} = unzip(As1),
1249    {FrDst,Count2} = new_var('@fr', Count1),
1250    I = I0#b_set{op={float,Op},dst=FrDst,args=As},
1251    Rs = Rs1#{Dst=>FrDst},
1252    Is = append(Is0) ++ [I],
1253    {Is,Fs#fs{regs=Rs},Count2}.
1254
1255float_load([A|As], [T|Ts], Anno, Rs0, Count0, Acc) ->
1256    {Load,Rs,Count} = float_reg_arg(A, T, Anno, Rs0, Count0),
1257    float_load(As, Ts, Anno, Rs, Count, [Load|Acc]);
1258float_load([], [], _Anno, Rs, Count, Acc) ->
1259    {reverse(Acc),Rs,Count}.
1260
1261float_reg_arg(A, T, Anno, Rs, Count0) ->
1262    case Rs of
1263        #{A:=Fr} ->
1264            {{Fr,[]},Rs,Count0};
1265        #{} ->
1266            {Dst,Count} = new_var('@fr_copy', Count0),
1267            I0 = float_load_reg(T, A, Dst),
1268            I = I0#b_set{anno=Anno},
1269            {{Dst,[I]},Rs#{A=>Dst},Count}
1270    end.
1271
1272float_load_reg(convert, #b_var{}=Src, Dst) ->
1273    #b_set{op={float,convert},dst=Dst,args=[Src]};
1274float_load_reg(convert, #b_literal{val=Val}=Src, Dst) ->
1275    try float(Val) of
1276        F ->
1277            #b_set{op={float,put},dst=Dst,args=[#b_literal{val=F}]}
1278    catch
1279        error:_ ->
1280            %% Let the exception happen at runtime.
1281            #b_set{op={float,convert},dst=Dst,args=[Src]}
1282    end;
1283float_load_reg(float, Src, Dst) ->
1284    #b_set{op={float,put},dst=Dst,args=[Src]}.
1285
1286float_flush_regs(#fs{regs=Rs}) ->
1287    maps:fold(fun(_, #b_var{name={'@fr_copy',_}}, Acc) ->
1288                      Acc;
1289                 (Dst, Fr, Acc) ->
1290                      [#b_set{op={float,get},dst=Dst,args=[Fr]}|Acc]
1291              end, [], Rs).
1292
1293%%%
1294%%% Live optimization.
1295%%%
1296%%% Optimize instructions whose values are not used. They could be
1297%%% removed if they have no side effects, or in a few cases replaced
1298%%% with a cheaper instructions
1299%%%
1300
1301ssa_opt_live({#opt_st{ssa=Linear0}=St, FuncDb}) ->
1302    RevLinear = reverse(Linear0),
1303    Blocks0 = maps:from_list(RevLinear),
1304    Blocks = live_opt(RevLinear, #{}, Blocks0),
1305    Linear = beam_ssa:linearize(Blocks),
1306    {St#opt_st{ssa=Linear}, FuncDb}.
1307
1308live_opt([{L,Blk0}|Bs], LiveMap0, Blocks) ->
1309    Blk1 = beam_ssa_share:block(Blk0, Blocks),
1310    Successors = beam_ssa:successors(Blk1),
1311    Live0 = live_opt_succ(Successors, L, LiveMap0, sets:new([{version, 2}])),
1312    {Blk,Live} = live_opt_blk(Blk1, Live0),
1313    LiveMap = live_opt_phis(Blk#b_blk.is, L, Live, LiveMap0),
1314    live_opt(Bs, LiveMap, Blocks#{L:=Blk});
1315live_opt([], _, Acc) -> Acc.
1316
1317live_opt_succ([S|Ss], L, LiveMap, Live0) ->
1318    case LiveMap of
1319        #{{S,L}:=Live} ->
1320            %% The successor has a phi node, and the value for
1321            %% this block in the phi node is a variable.
1322            live_opt_succ(Ss, L, LiveMap, sets:union(Live0, Live));
1323        #{S:=Live} ->
1324            %% No phi node in the successor, or the value for
1325            %% this block in the phi node is a literal.
1326            live_opt_succ(Ss, L, LiveMap, sets:union(Live0, Live));
1327        #{} ->
1328            %% A peek_message block which has not been processed yet.
1329            live_opt_succ(Ss, L, LiveMap, Live0)
1330    end;
1331live_opt_succ([], _, _, Acc) -> Acc.
1332
1333live_opt_phis(Is, L, Live0, LiveMap0) ->
1334    LiveMap = LiveMap0#{L=>Live0},
1335    Phis = takewhile(fun(#b_set{op=Op}) -> Op =:= phi end, Is),
1336    case Phis of
1337        [] ->
1338            LiveMap;
1339        [_|_] ->
1340            PhiArgs = append([Args || #b_set{args=Args} <- Phis]),
1341            case [{P,V} || {#b_var{}=V,P} <- PhiArgs] of
1342                [_|_]=PhiVars ->
1343                    PhiLive0 = rel2fam(PhiVars),
1344                    PhiLive = [{{L,P},list_set_union(Vs, Live0)} ||
1345                                  {P,Vs} <- PhiLive0],
1346                    maps:merge(LiveMap, maps:from_list(PhiLive));
1347                [] ->
1348                    %% There were only literals in the phi node(s).
1349                    LiveMap
1350            end
1351    end.
1352
1353live_opt_blk(#b_blk{is=Is0,last=Last}=Blk, Live0) ->
1354    Live1 = list_set_union(beam_ssa:used(Last), Live0),
1355    {Is,Live} = live_opt_is(reverse(Is0), Live1, []),
1356    {Blk#b_blk{is=Is},Live}.
1357
1358live_opt_is([#b_set{op=phi,dst=Dst}=I|Is], Live0, Acc) ->
1359    Live = sets:del_element(Dst, Live0),
1360    case sets:is_element(Dst, Live0) of
1361        true ->
1362            live_opt_is(Is, Live, [I|Acc]);
1363        false ->
1364            live_opt_is(Is, Live, Acc)
1365    end;
1366live_opt_is([#b_set{op={succeeded,guard},dst=SuccDst,args=[Dst]}=SuccI,
1367             #b_set{op=Op,dst=Dst}=I0|Is],
1368            Live0, Acc) ->
1369    case {sets:is_element(SuccDst, Live0),
1370          sets:is_element(Dst, Live0)} of
1371        {true, true} ->
1372            Live = sets:del_element(SuccDst, Live0),
1373            live_opt_is([I0|Is], Live, [SuccI|Acc]);
1374        {true, false} ->
1375            %% The result of the instruction before {succeeded,guard} is
1376            %% unused. Attempt to perform a strength reduction.
1377            case Op of
1378                {bif,'not'} ->
1379                    I = I0#b_set{op={bif,is_boolean},dst=SuccDst},
1380                    live_opt_is([I|Is], Live0, Acc);
1381                {bif,tuple_size} ->
1382                    I = I0#b_set{op={bif,is_tuple},dst=SuccDst},
1383                    live_opt_is([I|Is], Live0, Acc);
1384                bs_start_match ->
1385                    [#b_literal{val=new},Bin] = I0#b_set.args,
1386                    I = I0#b_set{op={bif,is_bitstring},args=[Bin],dst=SuccDst},
1387                    live_opt_is([I|Is], Live0, Acc);
1388                get_map_element ->
1389                    I = I0#b_set{op=has_map_field,dst=SuccDst},
1390                    live_opt_is([I|Is], Live0, Acc);
1391                _ ->
1392                    Live1 = sets:del_element(SuccDst, Live0),
1393                    Live = sets:add_element(Dst, Live1),
1394                    live_opt_is([I0|Is], Live, [SuccI|Acc])
1395            end;
1396        {false, true} ->
1397            live_opt_is([I0|Is], Live0, Acc);
1398        {false, false} ->
1399            live_opt_is(Is, Live0, Acc)
1400    end;
1401live_opt_is([#b_set{dst=Dst}=I|Is], Live0, Acc) ->
1402    case sets:is_element(Dst, Live0) of
1403        true ->
1404            Live1 = list_set_union(beam_ssa:used(I), Live0),
1405            Live = sets:del_element(Dst, Live1),
1406            live_opt_is(Is, Live, [I|Acc]);
1407        false ->
1408            case beam_ssa:no_side_effect(I) of
1409                true ->
1410                    live_opt_is(Is, Live0, Acc);
1411                false ->
1412                    Live = list_set_union(beam_ssa:used(I), Live0),
1413                    live_opt_is(Is, Live, [I|Acc])
1414            end
1415    end;
1416live_opt_is([], Live, Acc) ->
1417    {Acc,Live}.
1418
1419%%%
1420%%% try/catch optimization.
1421%%%
1422%%% Attemps to rewrite try/catches as guards when we know the exception won't
1423%%% be inspected in any way, and removes try/catches whose expressions will
1424%%% never throw.
1425%%%
1426
1427ssa_opt_try({#opt_st{ssa=Linear0}=St, FuncDb}) ->
1428    RevLinear = reduce_try(Linear0, []),
1429
1430    EmptySet = sets:new([{version, 2}]),
1431    Linear = trim_try(RevLinear, EmptySet, EmptySet, []),
1432
1433    {St#opt_st{ssa=Linear}, FuncDb}.
1434
1435%% Does a strength reduction of try/catch and catch.
1436%%
1437%% In try/catch constructs where the expression is restricted
1438%% (essentially a guard expression) and the error reason is ignored
1439%% in the catch part, such as:
1440%%
1441%%   try
1442%%      <RestrictedExpression>
1443%%   catch
1444%%      _:_ ->
1445%%        ...
1446%%   end
1447%%
1448%% the try/catch can be eliminated by simply removing the `new_try_tag`,
1449%% `landingpad`, and `kill_try_tag` instructions.
1450reduce_try([{L,#b_blk{is=[#b_set{op=new_try_tag}],
1451                      last=Last}=Blk0} | Bs0], Acc) ->
1452    #b_br{succ=Succ,fail=Fail} = Last,
1453    Ws = sets:from_list([Succ,Fail], [{version, 2}]),
1454    try do_reduce_try(Bs0, Ws) of
1455        Bs ->
1456            Blk = Blk0#b_blk{is=[],
1457                             last=#b_br{bool=#b_literal{val=true},
1458                                        succ=Succ,fail=Succ}},
1459            reduce_try(Bs, [{L, Blk} | Acc])
1460    catch
1461        throw:not_possible ->
1462            reduce_try(Bs0, [{L, Blk0} | Acc])
1463    end;
1464reduce_try([{L, Blk} | Bs], Acc) ->
1465    reduce_try(Bs, [{L, Blk} | Acc]);
1466reduce_try([], Acc) ->
1467    Acc.
1468
1469do_reduce_try([{L, Blk} | Bs]=Bs0, Ws0) ->
1470    case sets:is_element(L, Ws0) of
1471        false ->
1472            %% This block is not reachable from the block with the
1473            %% `new_try_tag` instruction. Retain it. There is no
1474            %% need to check it for safety.
1475            case sets:is_empty(Ws0) of
1476                true -> Bs0;
1477                false -> [{L, Blk} | do_reduce_try(Bs, Ws0)]
1478            end;
1479        true ->
1480            Ws1 = sets:del_element(L, Ws0),
1481            #b_blk{is=Is0} = Blk,
1482            case reduce_try_is(Is0, []) of
1483                {safe,Is} ->
1484                    %% This block does not execute any instructions
1485                    %% that would require a try. Analyze successors.
1486                    Successors = beam_ssa:successors(Blk),
1487                    Ws = list_set_union(Successors, Ws1),
1488                    [{L, Blk#b_blk{is=Is}} | do_reduce_try(Bs, Ws)];
1489                unsafe ->
1490                    %% There is something unsafe in the block, for
1491                    %% example a `call` instruction or an `extract`
1492                    %% instruction.
1493                    throw(not_possible);
1494                {done,Is} ->
1495                    %% This block kills the try tag (either after successful
1496                    %% execution or at the landing pad). Don't analyze
1497                    %% successors.
1498                    [{L, Blk#b_blk{is=Is}} | do_reduce_try(Bs, Ws1)]
1499            end
1500    end;
1501do_reduce_try([], Ws) ->
1502    true = sets:is_empty(Ws),                   %Assertion.
1503    [].
1504
1505reduce_try_is([#b_set{op=kill_try_tag}|Is], Acc) ->
1506    %% Remove this kill_try_tag instruction. If there was a landingpad
1507    %% instruction in this block, it has already been removed. Preserve
1508    %% all other instructions in the block.
1509    {done,reverse(Acc, Is)};
1510reduce_try_is([#b_set{op=extract}|_], _Acc) ->
1511    %% The error reason is accessed.
1512    unsafe;
1513reduce_try_is([#b_set{op=landingpad}|Is], Acc) ->
1514    reduce_try_is(Is, Acc);
1515reduce_try_is([#b_set{op={succeeded,body}}=I0|Is], Acc) ->
1516    %% If we reached this point, it means that the previous instruction
1517    %% has no side effects. We must now convert the flavor of the
1518    %% succeeded to the `guard`, since the try/catch will be removed.
1519    I = I0#b_set{op={succeeded,guard}},
1520    reduce_try_is(Is, [I|Acc]);
1521reduce_try_is([#b_set{op=Op}=I|Is], Acc) ->
1522    IsSafe = case Op of
1523                 phi -> true;
1524                 _ -> beam_ssa:no_side_effect(I)
1525             end,
1526    case IsSafe of
1527        true -> reduce_try_is(Is, [I|Acc]);
1528        false -> unsafe
1529    end;
1530reduce_try_is([], Acc) ->
1531    {safe,reverse(Acc)}.
1532
1533%% Removes try/catch expressions whose expressions will never throw.
1534%%
1535%% We walk backwards through all blocks, maintaining a set of potentially
1536%% unreachable landing pads, removing them from the set whenever we see a
1537%% branch to that block. When we encounter a `new_try_tag` instruction that
1538%% references a block in the unreachable set, we'll remove the try/catch.
1539trim_try([{L, #b_blk{is=[#b_set{op=landingpad} | _]}=Blk}| Bs],
1540         Unreachable0, Killed, Acc) ->
1541    Unreachable1 = sets:add_element(L, Unreachable0),
1542    Successors = sets:from_list(beam_ssa:successors(Blk)),
1543    Unreachable = sets:subtract(Unreachable1, Successors),
1544    trim_try(Bs, Unreachable, Killed, [{L, Blk} | Acc]);
1545trim_try([{L, #b_blk{last=#b_ret{}}=Blk} | Bs], Unreachable, Killed, Acc) ->
1546    %% Nothing to update and nothing to optimize.
1547    trim_try(Bs, Unreachable, Killed, [{L,Blk}|Acc]);
1548trim_try([{L, Blk0} | Bs], Unreachable0, Killed0, Acc) ->
1549    case sets:is_empty(Unreachable0) of
1550        true ->
1551            %% Nothing to update and nothing to optimize.
1552            trim_try(Bs, Unreachable0, Killed0, [{L,Blk0}|Acc]);
1553        false ->
1554            #b_blk{is=Is0,last=Last0} = Blk0,
1555            case reverse(Is0) of
1556                [#b_set{op=new_try_tag,dst=Tag}|Is] ->
1557                    #b_br{succ=SuccLbl,fail=PadLbl} = Last0,
1558                    Unreachable = sets:del_element(PadLbl, Unreachable0),
1559                    case sets:is_element(PadLbl, Unreachable0) of
1560                        true ->
1561                            %% The landing pad can't be reached in any way,
1562                            %% remove the entire try/catch.
1563                            Blk = Blk0#b_blk{is=reverse(Is),
1564                                             last=#b_br{bool=#b_literal{val=true},
1565                                                        succ=SuccLbl,fail=SuccLbl}},
1566                            Killed = sets:add_element(Tag, Killed0),
1567                            trim_try(Bs, Unreachable, Killed, [{L,Blk}|Acc]);
1568                        false ->
1569                            trim_try(Bs, Unreachable, Killed0, [{L,Blk0}|Acc])
1570                    end;
1571                _ ->
1572                    %% Update the set of unreachable landing_pad blocks.
1573                    Successors = sets:from_list(beam_ssa:successors(Blk0)),
1574                    Unreachable = sets:subtract(Unreachable0, Successors),
1575                    trim_try(Bs, Unreachable, Killed0, [{L,Blk0}|Acc])
1576            end
1577    end;
1578trim_try([], _Unreachable, Killed, Acc0) ->
1579    case sets:is_empty(Killed) of
1580        true ->
1581            Acc0;
1582        false ->
1583            %% Remove all `kill_try_tag` instructions referencing removed
1584            %% try/catches.
1585            [{L, Blk#b_blk{is=trim_try_is(Is0, Killed)}} ||
1586                {L, #b_blk{is=Is0}=Blk} <- Acc0]
1587    end.
1588
1589trim_try_is([#b_set{op=phi,dst=CatchEndVal}=Phi,
1590             #b_set{op=catch_end,dst=Dst,args=[Tag,CatchEndVal]}=Catch | Is],
1591            Killed) ->
1592    case sets:is_element(Tag, Killed) of
1593        true -> [Phi#b_set{dst=Dst} | trim_try_is(Is, Killed)];
1594        false -> [Phi, Catch | trim_try_is(Is, Killed)]
1595    end;
1596trim_try_is([#b_set{op=kill_try_tag,args=[Tag]}=I | Is], Killed) ->
1597    case sets:is_element(Tag, Killed) of
1598        true -> trim_try_is(Is, Killed);
1599        false -> [I | trim_try_is(Is, Killed)]
1600    end;
1601trim_try_is([I | Is], Killed) ->
1602    [I | trim_try_is(Is, Killed)];
1603trim_try_is([], _Killed) ->
1604    [].
1605
1606%%%
1607%%% Optimize binary matching.
1608%%%
1609%%% * If the value of segment is never extracted, rewrite
1610%%%   to a bs_skip instruction.
1611%%%
1612%%% * Coalesce adjacent bs_skip instructions and skip instructions
1613%%%   with bs_test_tail.
1614%%%
1615
1616ssa_opt_bsm({#opt_st{ssa=Linear}=St, FuncDb}) ->
1617    Extracted0 = bsm_extracted(Linear),
1618    Extracted = sets:from_list(Extracted0, [{version, 2}]),
1619    {St#opt_st{ssa=bsm_skip(Linear, Extracted)}, FuncDb}.
1620
1621bsm_skip([{L,#b_blk{is=Is0}=Blk}|Bs0], Extracted) ->
1622    Bs = bsm_skip(Bs0, Extracted),
1623    Is = bsm_skip_is(Is0, Extracted),
1624    coalesce_skips({L,Blk#b_blk{is=Is}}, Bs);
1625bsm_skip([], _) -> [].
1626
1627bsm_skip_is([I0|Is], Extracted) ->
1628    case I0 of
1629        #b_set{op=bs_match,
1630               dst=Ctx,
1631               args=[#b_literal{val=T}=Type,PrevCtx|Args0]}
1632          when T =/= float, T =/= string, T =/= skip ->
1633            %% Note that it is never safe to skip matching
1634            %% of floats, even if the size is known to be correct.
1635            I = case sets:is_element(Ctx, Extracted) of
1636                    true ->
1637                        I0;
1638                    false ->
1639                        %% The value is never extracted.
1640                        Args = [#b_literal{val=skip},PrevCtx,Type|Args0],
1641                        I0#b_set{args=Args}
1642                end,
1643            [I|Is];
1644        #b_set{} ->
1645            [I0|bsm_skip_is(Is, Extracted)]
1646    end;
1647bsm_skip_is([], _) -> [].
1648
1649bsm_extracted([{_,#b_blk{is=Is}}|Bs]) ->
1650    case Is of
1651        [#b_set{op=bs_extract,args=[Ctx]}|_] ->
1652            [Ctx|bsm_extracted(Bs)];
1653        _ ->
1654            bsm_extracted(Bs)
1655    end;
1656bsm_extracted([]) -> [].
1657
1658coalesce_skips({L,#b_blk{is=[#b_set{op=bs_extract}=Extract|Is0],
1659                         last=Last0}=Blk0}, Bs0) ->
1660    case coalesce_skips_is(Is0, Last0, Bs0) of
1661        not_possible ->
1662            [{L,Blk0}|Bs0];
1663        {Is,Last,Bs} ->
1664            Blk = Blk0#b_blk{is=[Extract|Is],last=Last},
1665            [{L,Blk}|Bs]
1666    end;
1667coalesce_skips({L,#b_blk{is=Is0,last=Last0}=Blk0}, Bs0) ->
1668    case coalesce_skips_is(Is0, Last0, Bs0) of
1669        not_possible ->
1670            [{L,Blk0}|Bs0];
1671        {Is,Last,Bs} ->
1672            Blk = Blk0#b_blk{is=Is,last=Last},
1673            [{L,Blk}|Bs]
1674    end.
1675
1676coalesce_skips_is([#b_set{op=bs_match,
1677                          args=[#b_literal{val=skip},
1678                                Ctx0,Type,Flags,
1679                                #b_literal{val=Size0},
1680                                #b_literal{val=Unit0}]}=Skip0,
1681                   #b_set{op={succeeded,guard}}],
1682                  #b_br{succ=L2,fail=Fail}=Br0,
1683                  Bs0) when is_integer(Size0) ->
1684    case Bs0 of
1685        [{L2,#b_blk{is=[#b_set{op=bs_match,
1686                               dst=SkipDst,
1687                               args=[#b_literal{val=skip},_,_,_,
1688                                     #b_literal{val=Size1},
1689                                     #b_literal{val=Unit1}]},
1690                        #b_set{op={succeeded,guard}}=Succeeded],
1691                    last=#b_br{fail=Fail}=Br}}|Bs] when is_integer(Size1) ->
1692            SkipBits = Size0 * Unit0 + Size1 * Unit1,
1693            Skip = Skip0#b_set{dst=SkipDst,
1694                               args=[#b_literal{val=skip},Ctx0,
1695                                     Type,Flags,
1696                                     #b_literal{val=SkipBits},
1697                                     #b_literal{val=1}]},
1698            Is = [Skip,Succeeded],
1699            {Is,Br,Bs};
1700        [{L2,#b_blk{is=[#b_set{op=bs_test_tail,
1701                               args=[_Ctx,#b_literal{val=TailSkip}]}],
1702                    last=#b_br{succ=NextSucc,fail=Fail}}}|Bs] ->
1703            SkipBits = Size0 * Unit0,
1704            TestTail = Skip0#b_set{op=bs_test_tail,
1705                                   args=[Ctx0,#b_literal{val=SkipBits+TailSkip}]},
1706            Br = Br0#b_br{bool=TestTail#b_set.dst,succ=NextSucc},
1707            Is = [TestTail],
1708            {Is,Br,Bs};
1709        _ ->
1710            not_possible
1711    end;
1712coalesce_skips_is(_, _, _) ->
1713    not_possible.
1714
1715%%%
1716%%% Short-cutting binary matching instructions.
1717%%%
1718
1719ssa_opt_bsm_shortcut({#opt_st{ssa=Linear}=St, FuncDb}) ->
1720    Positions = bsm_positions(Linear, #{}),
1721    case map_size(Positions) of
1722        0 ->
1723            %% No binary matching instructions.
1724            {St, FuncDb};
1725        _ ->
1726            {St#opt_st{ssa=bsm_shortcut(Linear, Positions)}, FuncDb}
1727    end.
1728
1729bsm_positions([{L,#b_blk{is=Is,last=Last}}|Bs], PosMap0) ->
1730    PosMap = bsm_positions_is(Is, PosMap0),
1731    case {Is,Last} of
1732        {[#b_set{op=bs_test_tail,dst=Bool,args=[Ctx,#b_literal{val=Bits0}]}],
1733         #b_br{bool=Bool,fail=Fail}} ->
1734            Bits = Bits0 + map_get(Ctx, PosMap0),
1735            bsm_positions(Bs, PosMap#{L=>{Bits,Fail}});
1736        {_,_} ->
1737            bsm_positions(Bs, PosMap)
1738    end;
1739bsm_positions([], PosMap) -> PosMap.
1740
1741bsm_positions_is([#b_set{op=bs_start_match,dst=New}|Is], PosMap0) ->
1742    PosMap = PosMap0#{New=>0},
1743    bsm_positions_is(Is, PosMap);
1744bsm_positions_is([#b_set{op=bs_match,dst=New,args=Args}|Is], PosMap0) ->
1745    [_,Old|_] = Args,
1746    #{Old:=Bits0} = PosMap0,
1747    Bits = bsm_update_bits(Args, Bits0),
1748    PosMap = PosMap0#{New=>Bits},
1749    bsm_positions_is(Is, PosMap);
1750bsm_positions_is([_|Is], PosMap) ->
1751    bsm_positions_is(Is, PosMap);
1752bsm_positions_is([], PosMap) -> PosMap.
1753
1754bsm_update_bits([#b_literal{val=string},_,#b_literal{val=String}], Bits) ->
1755    Bits + bit_size(String);
1756bsm_update_bits([#b_literal{val=utf8}|_], Bits) ->
1757    Bits + 8;
1758bsm_update_bits([#b_literal{val=utf16}|_], Bits) ->
1759    Bits + 16;
1760bsm_update_bits([#b_literal{val=utf32}|_], Bits) ->
1761    Bits + 32;
1762bsm_update_bits([_,_,_,#b_literal{val=Sz},#b_literal{val=U}], Bits)
1763  when is_integer(Sz) ->
1764    Bits + Sz*U;
1765bsm_update_bits(_, Bits) -> Bits.
1766
1767bsm_shortcut([{L,#b_blk{is=Is,last=Last0}=Blk}|Bs], PosMap) ->
1768    case {Is,Last0} of
1769        {[#b_set{op=bs_match,dst=New,args=[_,Old|_]},
1770          #b_set{op={succeeded,guard},dst=Bool,args=[New]}],
1771         #b_br{bool=Bool,fail=Fail}} ->
1772            case PosMap of
1773                #{Old:=Bits,Fail:={TailBits,NextFail}} when Bits > TailBits ->
1774                    Last = Last0#b_br{fail=NextFail},
1775                    [{L,Blk#b_blk{last=Last}}|bsm_shortcut(Bs, PosMap)];
1776                #{} ->
1777                    [{L,Blk}|bsm_shortcut(Bs, PosMap)]
1778            end;
1779        {_,_} ->
1780            [{L,Blk}|bsm_shortcut(Bs, PosMap)]
1781    end;
1782bsm_shortcut([], _PosMap) -> [].
1783
1784%%%
1785%%% Optimize binary construction.
1786%%%
1787%%% If an integer segment or a float segment has a literal size and
1788%%% a literal value, convert to a binary segment. Coalesce adjacent
1789%%% literal binary segments. Literal binary segments will be converted
1790%%% to bs_put_string instructions in later pass.
1791%%%
1792
1793ssa_opt_bs_puts({#opt_st{ssa=Linear0,cnt=Count0}=St, FuncDb}) ->
1794    {Linear,Count} = opt_bs_puts(Linear0, Count0, []),
1795    {St#opt_st{ssa=Linear,cnt=Count}, FuncDb}.
1796
1797opt_bs_puts([{L,#b_blk{is=Is}=Blk0}|Bs], Count0, Acc0) ->
1798    case Is of
1799        [#b_set{op=bs_put},#b_set{op={succeeded,_}}]=Is ->
1800            case opt_bs_put(L, Is, Blk0, Count0, Acc0) of
1801                not_possible ->
1802                    opt_bs_puts(Bs, Count0, [{L,Blk0}|Acc0]);
1803                {Count,Acc1} ->
1804                    Acc = opt_bs_puts_merge(Acc1),
1805                    opt_bs_puts(Bs, Count, Acc)
1806            end;
1807        _ ->
1808            opt_bs_puts(Bs, Count0, [{L,Blk0}|Acc0])
1809    end;
1810opt_bs_puts([], Count, Acc) ->
1811    {reverse(Acc),Count}.
1812
1813opt_bs_puts_merge([{L1,#b_blk{is=Is}=Blk0},{L2,#b_blk{is=AccIs}}=BAcc|Acc]) ->
1814    case {AccIs,Is} of
1815        {[#b_set{op=bs_put,
1816                 args=[#b_literal{val=binary},
1817                       #b_literal{},
1818                       #b_literal{val=Bin0},
1819                       #b_literal{val=all},
1820                       #b_literal{val=1}]},
1821          #b_set{op={succeeded,_}}],
1822         [#b_set{op=bs_put,
1823                 args=[#b_literal{val=binary},
1824                       #b_literal{},
1825                       #b_literal{val=Bin1},
1826                       #b_literal{val=all},
1827                       #b_literal{val=1}]}=I0,
1828          #b_set{op={succeeded,_}}=Succeeded]} ->
1829            %% Coalesce the two segments to one.
1830            Bin = <<Bin0/bitstring,Bin1/bitstring>>,
1831            I = I0#b_set{args=bs_put_args(binary, Bin, all)},
1832            Blk = Blk0#b_blk{is=[I,Succeeded]},
1833            [{L2,Blk}|Acc];
1834        {_,_} ->
1835            [{L1,Blk0},BAcc|Acc]
1836    end.
1837
1838opt_bs_put(L, [I0,Succeeded], #b_blk{last=Br0}=Blk0, Count0, Acc) ->
1839    case opt_bs_put(I0) of
1840        [Bin] when is_bitstring(Bin) ->
1841            Args = bs_put_args(binary, Bin, all),
1842            I = I0#b_set{args=Args},
1843            Blk = Blk0#b_blk{is=[I,Succeeded]},
1844            {Count0,[{L,Blk}|Acc]};
1845        [{int,Int,Size},Bin] when is_bitstring(Bin) ->
1846            %% Construct a bs_put_integer instruction following
1847            %% by a bs_put_binary instruction.
1848            IntArgs = bs_put_args(integer, Int, Size),
1849            BinArgs = bs_put_args(binary, Bin, all),
1850
1851            {BinL,BinVarNum,BinBoolNum} = {Count0,Count0+1,Count0+2},
1852            Count = Count0 + 3,
1853            BinVar = #b_var{name={'@ssa_bs_put',BinVarNum}},
1854            BinBool = #b_var{name={'@ssa_bool',BinBoolNum}},
1855
1856            BinI = I0#b_set{dst=BinVar,args=BinArgs},
1857            BinSucceeded = Succeeded#b_set{dst=BinBool,args=[BinVar]},
1858            BinBlk = Blk0#b_blk{is=[BinI,BinSucceeded],
1859                                last=Br0#b_br{bool=BinBool}},
1860
1861            IntI = I0#b_set{args=IntArgs},
1862            IntBlk = Blk0#b_blk{is=[IntI,Succeeded],last=Br0#b_br{succ=BinL}},
1863
1864            {Count,[{BinL,BinBlk},{L,IntBlk}|Acc]};
1865        not_possible ->
1866            not_possible
1867    end.
1868
1869opt_bs_put(#b_set{args=[#b_literal{val=binary},_,#b_literal{val=Val},
1870                        #b_literal{val=all},#b_literal{val=Unit}]})
1871  when is_bitstring(Val) ->
1872    if
1873        bit_size(Val) rem Unit =:= 0 ->
1874            [Val];
1875        true ->
1876            not_possible
1877    end;
1878opt_bs_put(#b_set{args=[#b_literal{val=Type},#b_literal{val=Flags},
1879                        #b_literal{val=Val},#b_literal{val=Size},
1880                        #b_literal{val=Unit}]}=I0) when is_integer(Size) ->
1881    EffectiveSize = Size * Unit,
1882    if
1883        EffectiveSize > 0 ->
1884            case {Type,opt_bs_put_endian(Flags)} of
1885                {integer,big} when is_integer(Val) ->
1886                    if
1887                        EffectiveSize < 64 ->
1888                            [<<Val:EffectiveSize>>];
1889                        true ->
1890                            opt_bs_put_split_int(Val, EffectiveSize)
1891                    end;
1892                {integer,little} when is_integer(Val), EffectiveSize < 128 ->
1893                    %% To avoid an explosion in code size, we only try
1894                    %% to optimize relatively small fields.
1895                    <<Int:EffectiveSize>> = <<Val:EffectiveSize/little>>,
1896                    Args = bs_put_args(Type, Int, EffectiveSize),
1897                    I = I0#b_set{args=Args},
1898                    opt_bs_put(I);
1899                {binary,_} when is_bitstring(Val) ->
1900                    case Val of
1901                        <<Bitstring:EffectiveSize/bits,_/bits>> ->
1902                            [Bitstring];
1903                        _ ->
1904                            %% Specified size exceeds size of bitstring.
1905                            not_possible
1906                    end;
1907                {float,Endian} ->
1908                    try
1909                        [opt_bs_put_float(Val, EffectiveSize, Endian)]
1910                    catch error:_ ->
1911                            not_possible
1912                    end;
1913                {_,_} ->
1914                    not_possible
1915            end;
1916        true ->
1917            not_possible
1918    end;
1919opt_bs_put(#b_set{}) -> not_possible.
1920
1921opt_bs_put_float(N, Sz, Endian) ->
1922    case Endian of
1923        big -> <<N:Sz/big-float-unit:1>>;
1924        little -> <<N:Sz/little-float-unit:1>>
1925    end.
1926
1927bs_put_args(Type, Val, Size) ->
1928    [#b_literal{val=Type},
1929     #b_literal{val=[unsigned,big]},
1930     #b_literal{val=Val},
1931     #b_literal{val=Size},
1932     #b_literal{val=1}].
1933
1934opt_bs_put_endian([big=E|_]) -> E;
1935opt_bs_put_endian([little=E|_]) -> E;
1936opt_bs_put_endian([native=E|_]) -> E;
1937opt_bs_put_endian([_|Fs]) -> opt_bs_put_endian(Fs).
1938
1939opt_bs_put_split_int(Int, Size) ->
1940    Pos = opt_bs_put_split_int_1(Int, 0, Size - 1),
1941    UpperSize = Size - Pos,
1942    if
1943        Pos =:= 0 ->
1944            %% Value is 0 or -1 -- keep the original instruction.
1945            not_possible;
1946        UpperSize < 64 ->
1947            %% No or few leading zeroes or ones.
1948            [<<Int:Size>>];
1949        true ->
1950            %% There are 64 or more leading ones or zeroes in
1951            %% the resulting binary. Split into two separate
1952            %% segments to avoid an explosion in code size.
1953            [{int,Int bsr Pos,UpperSize},<<Int:Pos>>]
1954    end.
1955
1956opt_bs_put_split_int_1(_Int, L, R) when L > R ->
1957    8 * ((L + 7) div 8);
1958opt_bs_put_split_int_1(Int, L, R) ->
1959    Mid = (L + R) div 2,
1960    case Int bsr Mid of
1961        Upper when Upper =:= 0; Upper =:= -1 ->
1962            opt_bs_put_split_int_1(Int, L, Mid - 1);
1963        _ ->
1964            opt_bs_put_split_int_1(Int, Mid + 1, R)
1965    end.
1966
1967%%%
1968%%% Optimize expressions such as "tuple_size(Var) =:= 2".
1969%%%
1970%%% Consider this code:
1971%%%
1972%%% 0:
1973%%%   .
1974%%%   .
1975%%%   .
1976%%%   Size = bif:tuple_size Var
1977%%%   BoolVar1 = succeeded:guard Size
1978%%%   br BoolVar1, label 4, label 3
1979%%%
1980%%% 4:
1981%%%   BoolVar2 = bif:'=:=' Size, literal 2
1982%%%   br BoolVar2, label 6, label 3
1983%%%
1984%%% 6: ...   %% OK
1985%%%
1986%%% 3: ...   %% Not a tuple of size 2
1987%%%
1988%%% The BEAM code will look this:
1989%%%
1990%%%   {bif,tuple_size,{f,3},[{x,0}],{x,0}}.
1991%%%   {test,is_eq_exact,{f,3},[{x,0},{integer,2}]}.
1992%%%
1993%%% Better BEAM code will be produced if we transform the
1994%%% code like this:
1995%%%
1996%%% 0:
1997%%%   .
1998%%%   .
1999%%%   .
2000%%%   br label 10
2001%%%
2002%%% 10:
2003%%%   NewBoolVar = bif:is_tuple Var
2004%%%   br NewBoolVar, label 11, label 3
2005%%%
2006%%% 11:
2007%%%   Size = bif:tuple_size Var
2008%%%   br label 4
2009%%%
2010%%% 4:
2011%%%   BoolVar2 = bif:'=:=' Size, literal 2
2012%%%   br BoolVar2, label 6, label 3
2013%%%
2014%%% (The key part of the transformation is the removal of
2015%%% the 'succeeded' instruction to signal to the code generator
2016%%% that the call to tuple_size/1 can't fail.)
2017%%%
2018%%% The BEAM code will look like:
2019%%%
2020%%%   {test,is_tuple,{f,3},[{x,0}]}.
2021%%%   {test_arity,{f,3},[{x,0},2]}.
2022%%%
2023%%% Those two instructions will be combined into a single
2024%%% is_tuple_of_arity instruction by the loader.
2025%%%
2026
2027ssa_opt_tuple_size({#opt_st{ssa=Linear0,cnt=Count0}=St, FuncDb}) ->
2028    %% This optimization is only safe in guards, as prefixing tuple_size with
2029    %% an is_tuple check prevents it from throwing an exception.
2030    NonGuards = non_guards(Linear0),
2031    {Linear,Count} = opt_tup_size(Linear0, NonGuards, Count0, []),
2032    {St#opt_st{ssa=Linear,cnt=Count}, FuncDb}.
2033
2034opt_tup_size([{L,#b_blk{is=Is,last=Last}=Blk}|Bs], NonGuards, Count0, Acc0) ->
2035    case {Is,Last} of
2036        {[#b_set{op={bif,'=:='},dst=Bool,args=[#b_var{}=Tup,#b_literal{val=Arity}]}],
2037         #b_br{bool=Bool}} when is_integer(Arity), Arity >= 0 ->
2038            {Acc,Count} = opt_tup_size_1(Tup, L, NonGuards, Count0, Acc0),
2039            opt_tup_size(Bs, NonGuards, Count, [{L,Blk}|Acc]);
2040        {_,_} ->
2041            opt_tup_size(Bs, NonGuards, Count0, [{L,Blk}|Acc0])
2042    end;
2043opt_tup_size([], _NonGuards, Count, Acc) ->
2044    {reverse(Acc),Count}.
2045
2046opt_tup_size_1(Size, EqL, NonGuards, Count0, [{L,Blk0}|Acc]) ->
2047    #b_blk{is=Is0,last=Last} = Blk0,
2048    case Last of
2049        #b_br{bool=Bool,succ=EqL,fail=Fail} ->
2050            case gb_sets:is_member(Fail, NonGuards) of
2051                true ->
2052                    {[{L,Blk0}|Acc],Count0};
2053                false ->
2054                    case opt_tup_size_is(Is0, Bool, Size, []) of
2055                        none ->
2056                            {[{L,Blk0}|Acc],Count0};
2057                        {PreIs,TupleSizeIs,Tuple} ->
2058                            opt_tup_size_2(PreIs, TupleSizeIs, L, EqL,
2059                                           Tuple, Fail, Count0, Acc)
2060                    end
2061            end;
2062        _ ->
2063            {[{L,Blk0}|Acc],Count0}
2064    end;
2065opt_tup_size_1(_, _, _, Count, Acc) ->
2066    {Acc,Count}.
2067
2068opt_tup_size_2(PreIs, TupleSizeIs, PreL, EqL, Tuple, Fail, Count0, Acc) ->
2069    IsTupleL = Count0,
2070    TupleSizeL = Count0 + 1,
2071    Bool = #b_var{name={'@ssa_bool',Count0+2}},
2072    Count = Count0 + 3,
2073
2074    True = #b_literal{val=true},
2075    PreBr = #b_br{bool=True,succ=IsTupleL,fail=IsTupleL},
2076    PreBlk = #b_blk{is=PreIs,last=PreBr},
2077
2078    IsTupleIs = [#b_set{op={bif,is_tuple},dst=Bool,args=[Tuple]}],
2079    IsTupleBr = #b_br{bool=Bool,succ=TupleSizeL,fail=Fail},
2080    IsTupleBlk = #b_blk{is=IsTupleIs,last=IsTupleBr},
2081
2082    TupleSizeBr = #b_br{bool=True,succ=EqL,fail=EqL},
2083    TupleSizeBlk = #b_blk{is=TupleSizeIs,last=TupleSizeBr},
2084    {[{TupleSizeL,TupleSizeBlk},
2085      {IsTupleL,IsTupleBlk},
2086      {PreL,PreBlk}|Acc],Count}.
2087
2088opt_tup_size_is([#b_set{op={bif,tuple_size},dst=Size,args=[Tuple]}=I,
2089                 #b_set{op={succeeded,_},dst=Bool,args=[Size]}],
2090                Bool, Size, Acc) ->
2091    {reverse(Acc),[I],Tuple};
2092opt_tup_size_is([I|Is], Bool, Size, Acc) ->
2093    opt_tup_size_is(Is, Bool, Size, [I|Acc]);
2094opt_tup_size_is([], _, _, _Acc) -> none.
2095
2096%%%
2097%%% Optimize #b_switch{} instructions.
2098%%%
2099%%% A #b_switch{} with only one value can be rewritten to
2100%%% a #b_br{}. A switch that only verifies that the argument
2101%%% is 'true' or 'false' can be rewritten to an is_boolean test.
2102%%%b
2103
2104ssa_opt_sw({#opt_st{ssa=Linear0,cnt=Count0}=St, FuncDb}) ->
2105    {Linear,Count} = opt_sw(Linear0, Count0, []),
2106    {St#opt_st{ssa=Linear,cnt=Count}, FuncDb}.
2107
2108opt_sw([{L,#b_blk{is=Is,last=#b_switch{}=Sw0}=Blk0}|Bs], Count0, Acc) ->
2109    case Sw0 of
2110        #b_switch{arg=Arg,fail=Fail,list=[{Lit,Lbl}]} ->
2111            %% Rewrite a single value switch to a br.
2112            {Bool,Count} = new_var('@ssa_bool', Count0),
2113            IsEq = #b_set{op={bif,'=:='},dst=Bool,args=[Arg,Lit]},
2114            Br = #b_br{bool=Bool,succ=Lbl,fail=Fail},
2115            Blk = Blk0#b_blk{is=Is++[IsEq],last=Br},
2116            opt_sw(Bs, Count, [{L,Blk}|Acc]);
2117        #b_switch{arg=Arg,fail=Fail,
2118                  list=[{#b_literal{val=B1},Lbl},{#b_literal{val=B2},Lbl}]}
2119          when B1 =:= not B2 ->
2120            %% Replace with is_boolean test.
2121            {Bool,Count} = new_var('@ssa_bool', Count0),
2122            IsBool = #b_set{op={bif,is_boolean},dst=Bool,args=[Arg]},
2123            Br = #b_br{bool=Bool,succ=Lbl,fail=Fail},
2124            Blk = Blk0#b_blk{is=Is++[IsBool],last=Br},
2125            opt_sw(Bs, Count, [{L,Blk}|Acc]);
2126        _ ->
2127            opt_sw(Bs, Count0, [{L,Blk0}|Acc])
2128    end;
2129opt_sw([{L,#b_blk{}=Blk}|Bs], Count, Acc) ->
2130    opt_sw(Bs, Count, [{L,Blk}|Acc]);
2131opt_sw([], Count, Acc) ->
2132    {reverse(Acc),Count}.
2133
2134%%% Try to replace `=/=` with `=:=` and `/=` with `==`. For example,
2135%%% this code:
2136%%%
2137%%%    Bool = bif:'=/=' Anything, AnyValue
2138%%%    br Bool, ^Succ, ^Fail
2139%%%
2140%%% can be rewritten like this:
2141%%%
2142%%%    Bool = bif:'=:=' Anything, AnyValue
2143%%%    br Bool, ^Fail, ^Succ
2144%%%
2145%%% The transformation is only safe if the only used of Bool is in the
2146%%% terminator. We therefore need to verify that there is only one
2147%%% use.
2148%%%
2149%%% This transformation is not an optimization in itself, but it opens
2150%%% up for other optimizations in beam_ssa_type and beam_ssa_dead.
2151%%%
2152
2153ssa_opt_ne({#opt_st{ssa=Linear0}=St, FuncDb}) ->
2154    Linear = opt_ne(Linear0, {uses,Linear0}),
2155    {St#opt_st{ssa=Linear}, FuncDb}.
2156
2157opt_ne([{L,#b_blk{is=[_|_]=Is0,last=#b_br{bool=#b_var{}=Bool}}=Blk0}|Bs], Uses0) ->
2158    case last(Is0) of
2159        #b_set{op={bif,'=/='},dst=Bool}=I0 ->
2160            I = I0#b_set{op={bif,'=:='}},
2161            {Blk,Uses} = opt_ne_replace(I, Blk0, Uses0),
2162            [{L,Blk}|opt_ne(Bs, Uses)];
2163        #b_set{op={bif,'/='},dst=Bool}=I0 ->
2164            I = I0#b_set{op={bif,'=='}},
2165            {Blk,Uses} = opt_ne_replace(I, Blk0, Uses0),
2166            [{L,Blk}|opt_ne(Bs, Uses)];
2167        _ ->
2168            [{L,Blk0}|opt_ne(Bs, Uses0)]
2169    end;
2170opt_ne([{L,Blk}|Bs], Uses) ->
2171    [{L,Blk}|opt_ne(Bs, Uses)];
2172opt_ne([], _Uses) -> [].
2173
2174opt_ne_replace(#b_set{dst=Bool}=I,
2175               #b_blk{is=Is0,last=#b_br{succ=Succ,fail=Fail}=Br0}=Blk,
2176               Uses0) ->
2177    case opt_ne_single_use(Bool, Uses0) of
2178        {true,Uses} ->
2179            Is = replace_last(Is0, I),
2180            Br = Br0#b_br{succ=Fail,fail=Succ},
2181            {Blk#b_blk{is=Is,last=Br},Uses};
2182        {false,Uses} ->
2183            %% The variable is used more than once. Not safe.
2184            {Blk,Uses}
2185    end.
2186
2187replace_last([_], Repl) -> [Repl];
2188replace_last([I|Is], Repl) -> [I|replace_last(Is, Repl)].
2189
2190opt_ne_single_use(Var, {uses,Linear}) ->
2191    Blocks = maps:from_list(Linear),
2192    RPO = beam_ssa:rpo(Blocks),
2193    Uses = beam_ssa:uses(RPO, Blocks),
2194    opt_ne_single_use(Var, Uses);
2195opt_ne_single_use(Var, Uses) when is_map(Uses) ->
2196    {case Uses of
2197         #{Var:=[_]} -> true;
2198         #{Var:=[_|_]} -> false
2199     end,Uses}.
2200
2201%%%
2202%%% When a tuple is matched, the pattern matching compiler generates a
2203%%% get_tuple_element instruction for every tuple element that will
2204%%% ever be used in the rest of the function. That often forces the
2205%%% extracted tuple elements to be stored in Y registers until it's
2206%%% time to use them. It could also mean that there could be execution
2207%%% paths that will never use the extracted elements.
2208%%%
2209%%% This optimization will sink get_tuple_element instructions, that
2210%%% is, move them forward in the execution stream to the last possible
2211%%% block there they will still dominate all uses. That may reduce the
2212%%% size of stack frames, reduce register shuffling, and avoid
2213%%% extracting tuple elements on execution paths that never use the
2214%%% extracted values.
2215%%%
2216%%% However, there is one caveat to be aware of. Sinking tuple elements
2217%%% will keep the entire tuple alive longer. In rare circumstance, that
2218%%% can be a problem. Here is an example:
2219%%%
2220%%%    mapfoldl(F, Acc0, [Hd|Tail]) ->
2221%%%        {R,Acc1} = F(Hd, Acc0),
2222%%%        {Rs,Acc2} = mapfoldl(F, Acc1, Tail),
2223%%%        {[R|Rs],Acc2};
2224%%%    mapfoldl(F, Acc, []) ->
2225%%%        {[],Acc}.
2226%%%
2227%%% Sinking get_tuple_element instructions will generate code similar
2228%%% to this example:
2229%%%
2230%%%    slow_mapfoldl(F, Acc0, [Hd|Tail]) ->
2231%%%        Res1 = F(Hd, Acc0),
2232%%%        Res2 = slow_mapfoldl(F, element(2, Res1), Tail),
2233%%%        {[element(1, Res1)|element(1, Res2)],element(2, Res2)};
2234%%%    slow_mapfoldl(F, Acc, []) ->
2235%%%        {[],Acc}.
2236%%%
2237%%% Here the entire tuple bound to `Res1` will be kept alive until
2238%%% `slow_mapfoldl/3` returns. That is, every intermediate accumulator
2239%%% will be kept alive.
2240%%%
2241%%% In this case, not sinking is clearly superior:
2242%%%
2243%%%    fast_mapfoldl(F, Acc0, [Hd|Tail]) ->
2244%%%        Res1 = F(Hd, Acc0),
2245%%%        R = element(1, Res1),
2246%%%        Res2 = fast_mapfoldl(F, element(2, Res1), Tail),
2247%%%        {[R|element(1, Res2)],element(2, Res2)};
2248%%%    fast_mapfoldl(F, Acc, []) ->
2249%%%        {[],Acc}.
2250%%%
2251%%% The `slow_mapfoldl/3` and `fast_mapfoldl/3` use the same number of
2252%%% stack slots.
2253%%%
2254%%% To avoid producing code similar to `slow_mapfoldl/3`, use an
2255%%% heuristic to only sink when sinking would reduce the number of
2256%%% stack slots, or if there can't possibly be a recursive call
2257%%% involved. This is a heuristic because it is difficult to exactly
2258%%% predict the number of stack slots that will be needed for a given
2259%%% piece of code.
2260
2261ssa_opt_sink({#opt_st{ssa=Linear}=St, FuncDb}) ->
2262    %% Create a map with all variables that define get_tuple_element
2263    %% instructions. The variable name maps to the block it is defined
2264    %% in and the source tuple.
2265    case def_blocks(Linear) of
2266        [] ->
2267            %% No get_tuple_element instructions, so there is nothing to do.
2268            {St, FuncDb};
2269        [_|_]=Defs0 ->
2270            Defs = maps:from_list(Defs0),
2271            {do_ssa_opt_sink(Defs, St), FuncDb}
2272    end.
2273
2274do_ssa_opt_sink(Defs, #opt_st{ssa=Linear}=St) ->
2275    %% Find all the blocks that use variables defined by
2276    %% get_tuple_element instructions.
2277    Used = used_blocks(Linear, Defs, []),
2278
2279    %% Calculate dominators.
2280    Blocks0 = maps:from_list(Linear),
2281    RPO = beam_ssa:rpo(Blocks0),
2282    Preds = beam_ssa:predecessors(Blocks0),
2283    {Dom, Numbering} = beam_ssa:dominators_from_predecessors(RPO, Preds),
2284
2285    %% It is not safe to move get_tuple_element instructions to blocks
2286    %% that begin with certain instructions. It is also unsafe to move
2287    %% the instructions into any part of a receive.
2288    Unsuitable = unsuitable(Linear, Blocks0),
2289
2290    %% Calculate new positions for get_tuple_element instructions. The new
2291    %% position is a block that dominates all uses of the variable.
2292    DefLocs0 = new_def_locations(Used, Defs, Dom, Numbering, Unsuitable),
2293
2294    %% Avoid keeping tuples alive if only one element is accessed later and
2295    %% if there is the chance of a recursive call being made. This is an
2296    %% important precaution to avoid that lists:mapfoldl/3 keeps all previous
2297    %% versions of the accumulator alive until the end of the input list.
2298    Ps = partition_deflocs(DefLocs0, Defs, Blocks0),
2299    DefLocs1 = filter_deflocs(Ps, Preds, Blocks0),
2300    DefLocs = sort(DefLocs1),
2301
2302    %% Now move all suitable get_tuple_element instructions to their
2303    %% new blocks.
2304    Blocks = foldl(fun({V,{From,To}}, A) ->
2305                           move_defs(V, From, To, A)
2306                   end, Blocks0, DefLocs),
2307
2308    St#opt_st{ssa=beam_ssa:linearize(Blocks)}.
2309
2310def_blocks([{L,#b_blk{is=Is}}|Bs]) ->
2311    def_blocks_is(Is, L, def_blocks(Bs));
2312def_blocks([]) -> [].
2313
2314def_blocks_is([#b_set{op=get_tuple_element,args=[Tuple,_],dst=Dst}|Is], L, Acc) ->
2315    def_blocks_is(Is, L, [{Dst,{L,Tuple}}|Acc]);
2316def_blocks_is([_|Is], L, Acc) ->
2317    def_blocks_is(Is, L, Acc);
2318def_blocks_is([], _, Acc) -> Acc.
2319
2320used_blocks([{L,Blk}|Bs], Def, Acc0) ->
2321    Used = beam_ssa:used(Blk),
2322    Acc = [{V,L} || V <- Used, maps:is_key(V, Def)] ++ Acc0,
2323    used_blocks(Bs, Def, Acc);
2324used_blocks([], _Def, Acc) ->
2325    rel2fam(Acc).
2326
2327%% Partition sinks for get_tuple_element instructions in the same
2328%% clause extracting from the same tuple. Sort each partition in
2329%% execution order.
2330partition_deflocs(DefLoc, _Defs, Blocks) ->
2331    {BlkNums0,_} = mapfoldl(fun(L, N) -> {{L,N},N+1} end, 0, beam_ssa:rpo(Blocks)),
2332    BlkNums = maps:from_list(BlkNums0),
2333    S = [{Tuple,{map_get(To, BlkNums),{V,{From,To}}}} ||
2334            {V,Tuple,{From,To}} <- DefLoc],
2335    F = rel2fam(S),
2336    partition_deflocs_1(F, Blocks).
2337
2338partition_deflocs_1([{Tuple,DefLocs0}|T], Blocks) ->
2339    DefLocs1 = [DL || {_,DL} <- DefLocs0],
2340    DefLocs = partition_dl(DefLocs1, Blocks),
2341    [{Tuple,DL} || DL <- DefLocs] ++ partition_deflocs_1(T, Blocks);
2342partition_deflocs_1([], _) -> [].
2343
2344partition_dl([_]=DefLoc, _Blocks) ->
2345    [DefLoc];
2346partition_dl([{_,{_,First}}|_]=DefLoc0, Blocks) ->
2347    RPO = beam_ssa:rpo([First], Blocks),
2348    {P,DefLoc} = partition_dl_1(DefLoc0, RPO, []),
2349    [P|partition_dl(DefLoc, Blocks)];
2350partition_dl([], _Blocks) -> [].
2351
2352partition_dl_1([{_,{_,L}}=DL|DLs], [L|_]=Ls, Acc) ->
2353    partition_dl_1(DLs, Ls, [DL|Acc]);
2354partition_dl_1([_|_]=DLs, [_|Ls], Acc) ->
2355    partition_dl_1(DLs, Ls, Acc);
2356partition_dl_1([], _, Acc) ->
2357    {reverse(Acc),[]};
2358partition_dl_1([_|_]=DLs, [], Acc) ->
2359    {reverse(Acc),DLs}.
2360
2361filter_deflocs([{Tuple,DefLoc0}|DLs], Preds, Blocks) ->
2362    %% Input is a list of sinks of get_tuple_element instructions in
2363    %% execution order from the same tuple in the same clause.
2364    [{_,{_,First}}|_] = DefLoc0,
2365    Paths = find_paths_to_check(DefLoc0, First),
2366    WillGC0 = ordsets:from_list([FromTo || {{_,_}=FromTo,_} <- Paths]),
2367    WillGC1 = [{{From,To},will_gc(From, To, Preds, Blocks, true)} ||
2368                  {From,To} <- WillGC0],
2369    WillGC = maps:from_list(WillGC1),
2370
2371    %% Separate sinks that will force the reference to the tuple to be
2372    %% saved on the stack from sinks that don't force.
2373    {DefLocGC0,DefLocNoGC} =
2374        partition(fun({{_,_}=FromTo,_}) ->
2375                          map_get(FromTo, WillGC)
2376                  end, Paths),
2377
2378    %% Avoid potentially harmful sinks.
2379    DefLocGC = filter_gc_deflocs(DefLocGC0, Tuple, First, Preds, Blocks),
2380
2381    %% Construct the complete list of sink operations.
2382    DefLoc1 = DefLocGC ++ DefLocNoGC,
2383    [DL || {_,{_,{From,To}}=DL} <- DefLoc1, From =/= To] ++
2384        filter_deflocs(DLs, Preds, Blocks);
2385filter_deflocs([], _, _) -> [].
2386
2387%% Use an heuristic to avoid harmful sinking in lists:mapfold/3 and
2388%% similar functions.
2389filter_gc_deflocs(DefLocGC, Tuple, First, Preds, Blocks) ->
2390    case DefLocGC of
2391        [] ->
2392            [];
2393        [{_,{_,{From,To}}}] ->
2394            %% There is only one get_tuple_element instruction that
2395            %% can be sunk. That means that we may not gain any slots
2396            %% by sinking it.
2397            case is_on_stack(First, Tuple, Blocks) of
2398                true ->
2399                    %% The tuple itself must be stored in a stack slot
2400                    %% (because it will be used later) in addition to
2401                    %% the tuple element being extracted. It is
2402                    %% probably a win to sink this instruction.
2403                    DefLocGC;
2404                false ->
2405                    case will_gc(From, To, Preds, Blocks, false) of
2406                        false ->
2407                            %% There is no risk for recursive calls,
2408                            %% so it should be safe to
2409                            %% sink. Theoretically, we shouldn't win
2410                            %% any stack slots, but in practice it
2411                            %% seems that sinking makes it more likely
2412                            %% that the stack slot for a dying value
2413                            %% can be immediately reused for another
2414                            %% value.
2415                            DefLocGC;
2416                        true ->
2417                            %% This function could be involved in a
2418                            %% recursive call. Since there is no
2419                            %% obvious reduction in the number of
2420                            %% stack slots, we will not sink.
2421                            []
2422                    end
2423            end;
2424        [_,_|_] ->
2425            %% More than one get_tuple_element instruction can be
2426            %% sunk. Sinking will almost certainly reduce the number
2427            %% of stack slots.
2428            DefLocGC
2429    end.
2430
2431find_paths_to_check([{_,{_,To}}=Move|T], First) ->
2432    [{{First,To},Move}|find_paths_to_check(T, First)];
2433find_paths_to_check([], _First) -> [].
2434
2435will_gc(From, To, Preds, Blocks, All) ->
2436    Between = beam_ssa:between(From, To, Preds, Blocks),
2437    will_gc_1(Between, To, Blocks, All, #{From => false}).
2438
2439will_gc_1([To|_], To, _Blocks, _All, WillGC) ->
2440    map_get(To, WillGC);
2441will_gc_1([L|Ls], To, Blocks, All, WillGC0) ->
2442    #b_blk{is=Is} = Blk = map_get(L, Blocks),
2443    GC = map_get(L, WillGC0) orelse will_gc_is(Is, All),
2444    WillGC = gc_update_successors(Blk, GC, WillGC0),
2445    will_gc_1(Ls, To, Blocks, All, WillGC).
2446
2447will_gc_is([#b_set{op=call,args=Args}|Is], false) ->
2448    case Args of
2449        [#b_remote{mod=#b_literal{val=erlang}}|_] ->
2450            %% Assume that remote calls to the erlang module can't cause a recursive
2451            %% call.
2452            will_gc_is(Is, false);
2453        [_|_] ->
2454            true
2455    end;
2456will_gc_is([_|Is], false) ->
2457    %% Instructions that clobber X registers may cause a GC, but will not cause
2458    %% a recursive call.
2459    will_gc_is(Is, false);
2460will_gc_is([I|Is], All) ->
2461    beam_ssa:clobbers_xregs(I) orelse will_gc_is(Is, All);
2462will_gc_is([], _All) -> false.
2463
2464is_on_stack(From, Var, Blocks) ->
2465    is_on_stack(beam_ssa:rpo([From], Blocks), Var, Blocks, #{From => false}).
2466
2467is_on_stack([L|Ls], Var, Blocks, WillGC0) ->
2468    #b_blk{is=Is} = Blk = map_get(L, Blocks),
2469    GC0 = map_get(L, WillGC0),
2470    try is_on_stack_is(Is, Var, GC0) of
2471        GC ->
2472            WillGC = gc_update_successors(Blk, GC, WillGC0),
2473            is_on_stack(Ls, Var, Blocks, WillGC)
2474    catch
2475        throw:{done,GC} ->
2476            GC
2477    end;
2478is_on_stack([], _Var, _, _) -> false.
2479
2480is_on_stack_is([#b_set{op=get_tuple_element}|Is], Var, GC) ->
2481    is_on_stack_is(Is, Var, GC);
2482is_on_stack_is([I|Is], Var, GC0) ->
2483    case GC0 andalso member(Var, beam_ssa:used(I)) of
2484        true ->
2485            throw({done,GC0});
2486        false ->
2487            GC = GC0 orelse beam_ssa:clobbers_xregs(I),
2488            is_on_stack_is(Is, Var, GC)
2489    end;
2490is_on_stack_is([], _, GC) -> GC.
2491
2492gc_update_successors(Blk, GC, WillGC) ->
2493    foldl(fun(L, Acc) ->
2494                  case Acc of
2495                      #{L := true} -> Acc;
2496                      #{L := false} when GC =:= false -> Acc;
2497                      #{} -> Acc#{L => GC}
2498                  end
2499          end, WillGC, beam_ssa:successors(Blk)).
2500
2501%% unsuitable(Linear, Blocks) -> Unsuitable.
2502%%  Return an ordset of block labels for the blocks that are not
2503%%  suitable for sinking of get_tuple_element instructions.
2504
2505unsuitable(Linear, Blocks) ->
2506    Predecessors = beam_ssa:predecessors(Blocks),
2507    Unsuitable0 = unsuitable_1(Linear),
2508    Unsuitable1 = unsuitable_recv(Linear, Blocks, Predecessors),
2509    gb_sets:from_list(Unsuitable0 ++ Unsuitable1).
2510
2511unsuitable_1([{L,#b_blk{is=[#b_set{op=Op}=I|_]}}|Bs]) ->
2512    Unsuitable = case Op of
2513                     bs_extract -> true;
2514                     bs_put -> true;
2515                     {float,_} -> true;
2516                     landingpad -> true;
2517                     _ -> beam_ssa:is_loop_header(I)
2518                 end,
2519    case Unsuitable of
2520        true ->
2521            [L|unsuitable_1(Bs)];
2522        false ->
2523            unsuitable_1(Bs)
2524    end;
2525unsuitable_1([{_,#b_blk{}}|Bs]) ->
2526    unsuitable_1(Bs);
2527unsuitable_1([]) -> [].
2528
2529unsuitable_recv([{L,#b_blk{is=[#b_set{op=Op}|_]}}|Bs], Blocks, Predecessors) ->
2530    Ls = case Op of
2531             remove_message ->
2532                 unsuitable_loop(L, Blocks, Predecessors);
2533             recv_next ->
2534                 unsuitable_loop(L, Blocks, Predecessors);
2535             _ ->
2536                 []
2537         end,
2538    Ls ++ unsuitable_recv(Bs, Blocks, Predecessors);
2539unsuitable_recv([_|Bs], Blocks, Predecessors) ->
2540    unsuitable_recv(Bs, Blocks, Predecessors);
2541unsuitable_recv([], _, _) -> [].
2542
2543unsuitable_loop(L, Blocks, Predecessors) ->
2544    unsuitable_loop(L, Blocks, Predecessors, []).
2545
2546unsuitable_loop(L, Blocks, Predecessors, Acc) ->
2547    Ps = map_get(L, Predecessors),
2548    unsuitable_loop_1(Ps, Blocks, Predecessors, Acc).
2549
2550unsuitable_loop_1([P|Ps], Blocks, Predecessors, Acc0) ->
2551    case is_loop_header(P, Blocks) of
2552        true ->
2553            unsuitable_loop_1(Ps, Blocks, Predecessors, Acc0);
2554        false ->
2555            case ordsets:is_element(P, Acc0) of
2556                false ->
2557                    Acc1 = ordsets:add_element(P, Acc0),
2558                    Acc = unsuitable_loop(P, Blocks, Predecessors, Acc1),
2559                    unsuitable_loop_1(Ps, Blocks, Predecessors, Acc);
2560                true ->
2561                    unsuitable_loop_1(Ps, Blocks, Predecessors, Acc0)
2562            end
2563    end;
2564unsuitable_loop_1([], _, _, Acc) -> Acc.
2565
2566is_loop_header(L, Blocks) ->
2567    case map_get(L, Blocks) of
2568        #b_blk{is=[I|_]} ->
2569            beam_ssa:is_loop_header(I);
2570        #b_blk{} ->
2571            false
2572    end.
2573
2574%% new_def_locations([{Variable,[UsedInBlock]}|Vs], Defs,
2575%%                   Dominators, Numbering, Unsuitable) ->
2576%%  [{Variable,NewDefinitionBlock}]
2577%%
2578%%  Calculate new locations for get_tuple_element instructions. For
2579%%  each variable, the new location is a block that dominates all uses
2580%%  of the variable and as near to the uses of as possible.
2581
2582new_def_locations([{V,UsedIn}|Vs], Defs, Dom, Numbering, Unsuitable) ->
2583    {DefIn,Tuple} = map_get(V, Defs),
2584    Common = common_dominator(UsedIn, Dom, Numbering, Unsuitable),
2585    Sink = case member(Common, map_get(DefIn, Dom)) of
2586               true ->
2587                   %% The common dominator is either DefIn or an
2588                   %% ancestor of DefIn. We'll need to consider all
2589                   %% get_tuple_element instructions so we will add
2590                   %% a dummy sink.
2591                   {V,Tuple,{DefIn,DefIn}};
2592               false ->
2593                   %% We have found a suitable descendant of DefIn,
2594                   %% to which the get_tuple_element instruction can
2595                   %% be sunk.
2596                   {V,Tuple,{DefIn,Common}}
2597           end,
2598    [Sink|new_def_locations(Vs, Defs, Dom, Numbering, Unsuitable)];
2599new_def_locations([], _, _, _, _) -> [].
2600
2601common_dominator(Ls0, Dom, Numbering, Unsuitable) ->
2602    [Common|_] = beam_ssa:common_dominators(Ls0, Dom, Numbering),
2603    case gb_sets:is_member(Common, Unsuitable) of
2604        true ->
2605            %% It is not allowed to place the instruction here. Try
2606            %% to find another suitable dominating block by going up
2607            %% one step in the dominator tree.
2608            [Common,OneUp|_] = map_get(Common, Dom),
2609            common_dominator([OneUp], Dom, Numbering, Unsuitable);
2610        false ->
2611            Common
2612    end.
2613
2614%% Move get_tuple_element instructions to their new locations.
2615
2616move_defs(V, From, To, Blocks) ->
2617    #{From:=FromBlk0,To:=ToBlk0} = Blocks,
2618    {Def,FromBlk} = remove_def(V, FromBlk0),
2619    try insert_def(V, Def, ToBlk0) of
2620        ToBlk ->
2621            Blocks#{From:=FromBlk,To:=ToBlk}
2622    catch
2623        throw:not_possible ->
2624            Blocks
2625    end.
2626
2627remove_def(V, #b_blk{is=Is0}=Blk) ->
2628    {Def,Is} = remove_def_is(Is0, V, []),
2629    {Def,Blk#b_blk{is=Is}}.
2630
2631remove_def_is([#b_set{dst=Dst}=Def|Is], Dst, Acc) ->
2632    {Def,reverse(Acc, Is)};
2633remove_def_is([I|Is], Dst, Acc) ->
2634    remove_def_is(Is, Dst, [I|Acc]).
2635
2636insert_def(V, Def, #b_blk{is=Is0}=Blk) ->
2637    Is = insert_def_is(Is0, V, Def),
2638    Blk#b_blk{is=Is}.
2639
2640insert_def_is([#b_set{op=phi}=I|Is], V, Def) ->
2641    case member(V, beam_ssa:used(I)) of
2642        true ->
2643            throw(not_possible);
2644        false ->
2645            [I|insert_def_is(Is, V, Def)]
2646    end;
2647insert_def_is([#b_set{op=Op}=I|Is]=Is0, V, Def) ->
2648    Action0 = case Op of
2649                  call -> beyond;
2650                  'catch_end' -> beyond;
2651                  wait_timeout -> beyond;
2652                  _ -> here
2653              end,
2654    Action = case Is of
2655                 [#b_set{op={succeeded,_}}|_] -> here;
2656                 _ -> Action0
2657             end,
2658    case Action of
2659        beyond ->
2660            case member(V, beam_ssa:used(I)) of
2661                true ->
2662                    %% The variable is used by this instruction. We must
2663                    %% place the definition before this instruction.
2664                    [Def|Is0];
2665                false ->
2666                    %% Place it beyond the current instruction.
2667                    [I|insert_def_is(Is, V, Def)]
2668            end;
2669        here ->
2670            [Def|Is0]
2671    end;
2672insert_def_is([], _V, Def) ->
2673    [Def].
2674
2675%%%
2676%%% Order consecutive get_tuple_element instructions in ascending
2677%%% position order. This will give the loader more opportunities
2678%%% for combining get_tuple_element instructions.
2679%%%
2680
2681ssa_opt_get_tuple_element({#opt_st{ssa=Blocks0}=St, FuncDb}) ->
2682    Blocks = opt_get_tuple_element(maps:to_list(Blocks0), Blocks0),
2683    {St#opt_st{ssa=Blocks}, FuncDb}.
2684
2685opt_get_tuple_element([{L,#b_blk{is=Is0}=Blk0}|Bs], Blocks) ->
2686    case opt_get_tuple_element_is(Is0, false, []) of
2687        {yes,Is} ->
2688            Blk = Blk0#b_blk{is=Is},
2689            opt_get_tuple_element(Bs, Blocks#{L:=Blk});
2690        no ->
2691            opt_get_tuple_element(Bs, Blocks)
2692    end;
2693opt_get_tuple_element([], Blocks) -> Blocks.
2694
2695opt_get_tuple_element_is([#b_set{op=get_tuple_element,
2696                                 args=[#b_var{}=Src,_]}=I0|Is0],
2697                         _AnyChange, Acc) ->
2698    {GetIs0,Is} = collect_get_tuple_element(Is0, Src, [I0]),
2699    GetIs1 = sort([{Pos,I} || #b_set{args=[_,Pos]}=I <- GetIs0]),
2700    GetIs = [I || {_,I} <- GetIs1],
2701    opt_get_tuple_element_is(Is, true, reverse(GetIs, Acc));
2702opt_get_tuple_element_is([I|Is], AnyChange, Acc) ->
2703    opt_get_tuple_element_is(Is, AnyChange, [I|Acc]);
2704opt_get_tuple_element_is([], AnyChange, Acc) ->
2705    case AnyChange of
2706        true -> {yes,reverse(Acc)};
2707        false -> no
2708    end.
2709
2710collect_get_tuple_element([#b_set{op=get_tuple_element,
2711                                  args=[Src,_]}=I|Is], Src, Acc) ->
2712    collect_get_tuple_element(Is, Src, [I|Acc]);
2713collect_get_tuple_element(Is, _Src, Acc) ->
2714    {Acc,Is}.
2715
2716%%%
2717%%% Unfold literals to avoid unnecessary move instructions in call
2718%%% instructions.
2719%%%
2720%%% Consider the following example:
2721%%%
2722%%%     -module(whatever).
2723%%%     -export([foo/0]).
2724%%%     foo() ->
2725%%%         foobar(1, 2, 3).
2726%%%     foobar(A, B, C) ->
2727%%%         foobar(A, B, C, []).
2728%%%     foobar(A, B, C, D) -> ...
2729%%%
2730%%% The type optimization pass will find out that A, B, and C have constant
2731%%% values and do constant folding, rewriting foobar/3 to:
2732%%%
2733%%%     foobar(A, B, C) ->
2734%%%         foobar(1, 2, 3, []).
2735%%%
2736%%% That will result in three extra `move` instructions.
2737%%%
2738%%% This optimization sub pass will undo the constant folding
2739%%% optimization, rewriting code to use the original variable instead
2740%%% of the constant if the original variable is known to be in an x
2741%%% register.
2742%%%
2743%%% This optimization sub pass will also undo constant folding of the
2744%%% list of arguments in the call to error/2 in the last clause of a
2745%%% function. For example:
2746%%%
2747%%%     bar(X, Y) ->
2748%%%         error(function_clause, [X,42]).
2749%%%
2750%%% will be rewritten to:
2751%%%
2752%%%     bar(X, Y) ->
2753%%%         error(function_clause, [X,Y]).
2754%%%
2755
2756ssa_opt_unfold_literals({St,FuncDb}) ->
2757    #opt_st{ssa=Blocks0,args=Args,anno=Anno,cnt=Count0} = St,
2758    ParamInfo = maps:get(parameter_info, Anno, #{}),
2759    LitMap = collect_arg_literals(Args, ParamInfo, 0, #{}),
2760    case map_size(LitMap) of
2761        0 ->
2762            %% None of the arguments for this function are known
2763            %% literals. Nothing to do.
2764            {St,FuncDb};
2765        _ ->
2766            SafeMap = #{0 => true},
2767            {Blocks,Count} = unfold_literals(beam_ssa:rpo(Blocks0),
2768                                             LitMap, SafeMap, Count0, Blocks0),
2769            {St#opt_st{ssa=Blocks,cnt=Count},FuncDb}
2770    end.
2771
2772collect_arg_literals([V|Vs], Info, X, Acc0) ->
2773    case Info of
2774        #{V:=VarInfo} ->
2775            Type = proplists:get_value(type, VarInfo, any),
2776            case beam_types:get_singleton_value(Type) of
2777                {ok,Val} ->
2778                    F = fun(Vars) -> [{X,V}|Vars] end,
2779                    Acc = maps:update_with(Val, F, [{X,V}], Acc0),
2780                    collect_arg_literals(Vs, Info, X + 1, Acc);
2781                error ->
2782                    collect_arg_literals(Vs, Info, X + 1, Acc0)
2783            end;
2784        #{} ->
2785            collect_arg_literals(Vs, Info, X + 1, Acc0)
2786    end;
2787collect_arg_literals([], _Info, _X, Acc) -> Acc.
2788
2789unfold_literals([L|Ls], LitMap, SafeMap0, Count0, Blocks0) ->
2790    {Blocks,Safe,Count} =
2791        case map_get(L, SafeMap0) of
2792            false ->
2793                %% Before reaching this block, an instruction that
2794                %% clobbers x registers has been executed.  *If* we
2795                %% would use an argument variable instead of literal,
2796                %% it would force the value to be saved to a y
2797                %% register. This is not what we want.
2798                {Blocks0,false,Count0};
2799            true ->
2800                %% All x registers live when entering the function
2801                %% are still live. Using the variable instead of
2802                %% the substituted value will eliminate a `move`
2803                %% instruction.
2804                #b_blk{is=Is0} = Blk = map_get(L, Blocks0),
2805                {Is,Safe0,Count1} = unfold_lit_is(Is0, LitMap, Count0, []),
2806                {Blocks0#{L:=Blk#b_blk{is=Is}},Safe0,Count1}
2807        end,
2808    %% Propagate safeness to successors.
2809    Successors = beam_ssa:successors(L, Blocks),
2810    SafeMap = unfold_update_succ(Successors, Safe, SafeMap0),
2811    unfold_literals(Ls, LitMap, SafeMap, Count,Blocks);
2812unfold_literals([], _, _, Count, Blocks) ->
2813    {Blocks,Count}.
2814
2815unfold_update_succ([S|Ss], Safe, SafeMap0) ->
2816    F = fun(Prev) -> Prev and Safe end,
2817    SafeMap = maps:update_with(S, F, Safe, SafeMap0),
2818    unfold_update_succ(Ss, Safe, SafeMap);
2819unfold_update_succ([], _, SafeMap) -> SafeMap.
2820
2821unfold_lit_is([#b_set{op=call,
2822                      args=[#b_remote{mod=#b_literal{val=erlang},
2823                                      name=#b_literal{val=error},
2824                                      arity=2},
2825                            #b_literal{val=function_clause},
2826                            ArgumentList]}=I0|Is], LitMap, Count0, Acc0) ->
2827    %% This is a call to error/2 that raises a function_clause
2828    %% exception in the final clause of a function. Try to undo
2829    %% constant folding in the list of arguments (the second argument
2830    %% for error/2).
2831    case unfold_arg_list(Acc0, ArgumentList, LitMap, Count0, 0, []) of
2832        {[FinalPutList|_]=Acc,Count} ->
2833            %% Acc now contains the possibly rewritten code that
2834            %% creates the argument list. All that remains is to
2835            %% rewrite the call to error/2 itself so that is will
2836            %% refer to rewritten argument list. This is essential
2837            %% when all arguments have known literal values as in this
2838            %% example:
2839            %%
2840            %%     foo(X, Y) -> error(function_clause, [0,1]).
2841            %%
2842            #b_set{op=put_list,dst=ListVar} = FinalPutList,
2843            #b_set{args=[ErlangError,Fc,_]} = I0,
2844            I = I0#b_set{args=[ErlangError,Fc,ListVar]},
2845            {reverse(Acc, [I|Is]),false,Count};
2846        {[],_} ->
2847            %% Handle code such as:
2848            %%
2849            %% bar(KnownValue, Stk) -> error(function_clause, Stk).
2850            {reverse(Acc0, [I0|Is]),false,Count0}
2851    end;
2852unfold_lit_is([#b_set{op=Op,args=Args0}=I0|Is], LitMap, Count, Acc) ->
2853    %% Using a register instead of a literal is a clear win only for
2854    %% `call` and `old_make_fun` instructions. Substituting into other
2855    %% instructions is unlikely to be an improvement.
2856    Unfold = case Op of
2857                 call -> true;
2858                 old_make_fun -> true;
2859                 _ -> false
2860             end,
2861    I = case Unfold of
2862            true ->
2863                Args = unfold_call_args(Args0, LitMap, -1),
2864                I0#b_set{args=Args};
2865            false ->
2866                I0
2867        end,
2868    case beam_ssa:clobbers_xregs(I) of
2869        true ->
2870            %% This instruction clobbers x register. Don't do
2871            %% any substitutions in rest of this block or in any
2872            %% of its successors.
2873            {reverse(Acc, [I|Is]),false,Count};
2874        false ->
2875            unfold_lit_is(Is, LitMap, Count, [I|Acc])
2876    end;
2877unfold_lit_is([], _LitMap, Count, Acc) ->
2878    {reverse(Acc),true,Count}.
2879
2880%% unfold_arg_list(Is, ArgumentList, LitMap, Count0, X, Acc) ->
2881%%     {UpdatedAcc, Count}.
2882%%
2883%%  Unfold the arguments in the argument list (second argument for error/2).
2884%%
2885%%  Note that Is is the reversed list of instructions before the
2886%%  call to error/2. Because of the way the list is built in reverse,
2887%%  it means that the first put_list instruction found will add the first
2888%%  argument (x0) to the list, the second the second argument (x1), and
2889%%  so on.
2890
2891unfold_arg_list(Is, #b_literal{val=[Hd|Tl]}, LitMap, Count0, X, Acc) ->
2892    %% Handle the case that the entire argument list (the second argument
2893    %% for error/2) is a literal.
2894    {PutListDst,Count} = new_var('@put_list', Count0),
2895    PutList = #b_set{op=put_list,dst=PutListDst,
2896                     args=[#b_literal{val=Hd},#b_literal{val=Tl}]},
2897    unfold_arg_list([PutList|Is], PutListDst, LitMap, Count, X, Acc);
2898unfold_arg_list([#b_set{op=put_list,dst=List,
2899                         args=[Hd0,#b_literal{val=[Hd|Tl]}]}=I0|Is0],
2900                 List, LitMap, Count0, X, Acc) ->
2901    %% The rest of the argument list is a literal list.
2902    {PutListDst,Count} = new_var('@put_list', Count0),
2903    PutList = #b_set{op=put_list,dst=PutListDst,
2904                     args=[#b_literal{val=Hd},#b_literal{val=Tl}]},
2905    I = I0#b_set{args=[Hd0,PutListDst]},
2906    unfold_arg_list([I,PutList|Is0], List, LitMap, Count, X, Acc);
2907unfold_arg_list([#b_set{op=put_list,dst=List,args=[Hd0,Tl]}=I0|Is],
2908                 List, LitMap, Count, X, Acc) ->
2909    %% Unfold the head of the list.
2910    Hd = unfold_arg(Hd0, LitMap, X),
2911    I = I0#b_set{args=[Hd,Tl]},
2912    unfold_arg_list(Is, Tl, LitMap, Count, X + 1, [I|Acc]);
2913unfold_arg_list([I|Is], List, LitMap, Count, X, Acc) ->
2914    %% Some other instruction, such as bs_get_tail.
2915    unfold_arg_list(Is, List, LitMap, Count, X, [I|Acc]);
2916unfold_arg_list([], _, _, Count, _, Acc) ->
2917    {reverse(Acc),Count}.
2918
2919unfold_call_args([A0|As], LitMap, X) ->
2920    A = unfold_arg(A0, LitMap, X),
2921    [A|unfold_call_args(As, LitMap, X + 1)];
2922unfold_call_args([], _, _) -> [].
2923
2924unfold_arg(#b_literal{val=Val}=Lit, LitMap, X) ->
2925    case LitMap of
2926        #{Val:=Vars} ->
2927            %% This literal is available in an x register.
2928            %% If it is in the correct x register, use
2929            %% the register. Don't bother if it is in the
2930            %% wrong register, because that would still result
2931            %% in a `move` instruction.
2932            case keyfind(X, 1, Vars) of
2933                false -> Lit;
2934                {X,Var} -> Var
2935            end;
2936        #{} -> Lit
2937    end;
2938unfold_arg(Expr, _LitMap, _X) -> Expr.
2939
2940%%%
2941%%% Optimize tail calls created as the result of optimizations.
2942%%%
2943%%% Consider the following example of a tail call in Erlang code:
2944%%%
2945%%%    bar() ->
2946%%%        foo().
2947%%%
2948%%% The SSA code for the call will look like this:
2949%%%
2950%%%      @ssa_ret = call (`foo`/0)
2951%%%      ret @ssa_ret
2952%%%
2953%%% Sometimes optimizations create new tail calls. Consider this
2954%%% slight variation of the example:
2955%%%
2956%%%    bar() ->
2957%%%        {_,_} = foo().
2958%%%
2959%%%    foo() -> {a,b}.
2960%%%
2961%%% If beam_ssa_type can figure out that `foo/0` always returns a tuple
2962%%% of size two, the test for a tuple is no longer needed and the call
2963%%% to `foo/0` will become a tail call. However, the resulting SSA
2964%%% code will look like this:
2965%%%
2966%%%      @ssa_ret = call (`foo`/0)
2967%%%      @ssa_bool = succeeded:body @ssa_ret
2968%%%      br @ssa_bool, ^999, ^1
2969%%%
2970%%%    999:
2971%%%      ret @ssa_ret
2972%%%
2973%%% The beam_ssa_codegen pass will not recognize this code as a tail
2974%%% call and will generate an unncessary stack frame. It may also
2975%%% generate unecessary `kill` instructions.
2976%%%
2977%%% To avoid those extra instructions, this optimization will
2978%%% eliminate the `succeeded:body` and `br` instructions and insert
2979%%% the `ret` in the same block as the call:
2980%%%
2981%%%      @ssa_ret = call (`foo`/0)
2982%%%      ret @ssa_ret
2983%%%
2984%%% Finally, consider this example:
2985%%%
2986%%%    bar() ->
2987%%%        foo_ok(),
2988%%%        ok.
2989%%%
2990%%%    foo_ok() -> ok.
2991%%%
2992%%% The SSA code for the call to `foo_ok/0` will look like:
2993%%%
2994%%%      %% Result type: `ok`
2995%%%      @ssa_ignored = call (`foo_ok`/0)
2996%%%      @ssa_bool = succeeded:body @ssa_ignored
2997%%%      br @ssa_bool, ^999, ^1
2998%%%
2999%%%    999:
3000%%%      ret `ok`
3001%%%
3002%%% Since the call to `foo_ok/0` has an annotation indicating that the
3003%%% call will always return the atom `ok`, the code can be simplified
3004%%% like this:
3005%%%
3006%%%      @ssa_ignored = call (`foo_ok`/0)
3007%%%      ret @ssa_ignored
3008%%%
3009%%% The beam_jump pass does the same optimization, but it does it too
3010%%% late to avoid creating an uncessary stack frame or unnecessary
3011%%% `kill` instructions.
3012%%%
3013
3014ssa_opt_tail_calls({St,FuncDb}) ->
3015    #opt_st{ssa=Blocks0} = St,
3016    Blocks = opt_tail_calls(beam_ssa:rpo(Blocks0), Blocks0),
3017    {St#opt_st{ssa=Blocks},FuncDb}.
3018
3019opt_tail_calls([L|Ls], Blocks0) ->
3020    #b_blk{is=Is0,last=Last} = Blk0 = map_get(L, Blocks0),
3021
3022    %% Does this block end with a two-way branch whose success
3023    %% label targets an empty block with a `ret` terminator?
3024    case is_potential_tail_call(Last, Blocks0) of
3025        {yes,Bool,Ret} ->
3026            %% Yes, `Ret` is the value returned from that block
3027            %% (either a variable or literal). Do the instructions
3028            %% in this block end with a `call` instruction that
3029            %% returns the same value as `Ret`, followed by a
3030            %% `succeeded:body` instruction?
3031            case is_tail_call_is(Is0, Bool, Ret, []) of
3032                {yes,Is,Var} ->
3033                    %% Yes, this is a tail call. `Is` is the instructions
3034                    %% in the block with `succeeded:body` removed, and
3035                    %% `Var` is the destination variable for the return
3036                    %% value of the call. Rewrite this block to directly
3037                    %% return `Var`.
3038                    Blk = Blk0#b_blk{is=Is,last=#b_ret{arg=Var}},
3039                    Blocks = Blocks0#{L:=Blk},
3040                    opt_tail_calls(Ls, Blocks);
3041                no ->
3042                    %% No, the block does not end with a call, or the
3043                    %% the call instruction has not the same value
3044                    %% as `Ret`.
3045                    opt_tail_calls(Ls, Blocks0)
3046            end;
3047        no ->
3048            opt_tail_calls(Ls, Blocks0)
3049    end;
3050opt_tail_calls([], Blocks) -> Blocks.
3051
3052is_potential_tail_call(#b_br{bool=#b_var{}=Bool,succ=Succ}, Blocks) ->
3053    case Blocks of
3054        #{Succ := #b_blk{is=[],last=#b_ret{arg=Arg}}} ->
3055            %% This could be a tail call.
3056            {yes,Bool,Arg};
3057        #{} ->
3058            %% The block is not empty or does not have a `ret` terminator.
3059            no
3060    end;
3061is_potential_tail_call(_, _) ->
3062    %% Not a two-way branch (a `succeeded:body` instruction must be
3063    %% followed by a two-way branch).
3064    no.
3065
3066is_tail_call_is([#b_set{op=call,dst=Dst}=Call,
3067                 #b_set{op={succeeded,body},dst=Bool}],
3068                Bool, Ret, Acc) ->
3069    IsTailCall =
3070        case Ret of
3071            #b_literal{val=Val} ->
3072                %% The return value for this function is a literal.
3073                %% Now test whether it is the same literal that the
3074                %% `call` instruction returns.
3075                Type = beam_ssa:get_anno(result_type, Call, any),
3076                case beam_types:get_singleton_value(Type) of
3077                    {ok,Val} ->
3078                        %% Same value.
3079                        true;
3080                    {ok,_} ->
3081                        %% Wrong value.
3082                        false;
3083                    error ->
3084                        %% The type for the return value is not a singleton value.
3085                        false
3086                end;
3087            #b_var{} ->
3088                %% It is a tail call if the variable set by the `call` instruction
3089                %% is the same variable as the argument for the `ret` terminator.
3090                Ret =:= Dst
3091        end,
3092    case IsTailCall of
3093        true ->
3094            %% Return the instructions in the block with `succeeded:body` removed.
3095            Is = reverse(Acc, [Call]),
3096            {yes,Is,Dst};
3097        false ->
3098            no
3099    end;
3100is_tail_call_is([I|Is], Bool, Ret, Acc) ->
3101    is_tail_call_is(Is, Bool, Ret, [I|Acc]);
3102is_tail_call_is([], _Bool, _Ret, _Acc) -> no.
3103
3104%%%
3105%%% Common utilities.
3106%%%
3107
3108list_set_union([], Set) ->
3109    Set;
3110list_set_union([E], Set) ->
3111    sets:add_element(E, Set);
3112list_set_union(List, Set) ->
3113    sets:union(sets:from_list(List, [{version, 2}]), Set).
3114
3115non_guards(Linear) ->
3116    gb_sets:from_list(non_guards_1(Linear)).
3117
3118non_guards_1([{L,#b_blk{is=Is}}|Bs]) ->
3119    case Is of
3120        [#b_set{op=landingpad}|_] ->
3121            [L | non_guards_1(Bs)];
3122        _ ->
3123            non_guards_1(Bs)
3124    end;
3125non_guards_1([]) ->
3126    [?EXCEPTION_BLOCK].
3127
3128rel2fam(S0) ->
3129    S1 = sofs:relation(S0),
3130    S = sofs:rel2fam(S1),
3131    sofs:to_external(S).
3132
3133sub(I, Sub) ->
3134    beam_ssa:normalize(sub_1(I, Sub)).
3135
3136sub_1(#b_set{op=phi,args=Args}=I, Sub) ->
3137    I#b_set{args=[{sub_arg(A, Sub),P} || {A,P} <- Args]};
3138sub_1(#b_set{args=Args}=I, Sub) ->
3139    I#b_set{args=[sub_arg(A, Sub) || A <- Args]};
3140sub_1(#b_br{bool=#b_var{}=Old}=Br, Sub) ->
3141    New = sub_arg(Old, Sub),
3142    Br#b_br{bool=New};
3143sub_1(#b_switch{arg=#b_var{}=Old}=Sw, Sub) ->
3144    New = sub_arg(Old, Sub),
3145    Sw#b_switch{arg=New};
3146sub_1(#b_ret{arg=#b_var{}=Old}=Ret, Sub) ->
3147    New = sub_arg(Old, Sub),
3148    Ret#b_ret{arg=New};
3149sub_1(Last, _) -> Last.
3150
3151sub_arg(#b_remote{mod=Mod,name=Name}=Rem, Sub) ->
3152    Rem#b_remote{mod=sub_arg(Mod, Sub),name=sub_arg(Name, Sub)};
3153sub_arg(Old, Sub) ->
3154    case Sub of
3155        #{Old:=New} -> New;
3156        #{} -> Old
3157    end.
3158
3159new_var(#b_var{name={Base,N}}, Count) ->
3160    true = is_integer(N),                       %Assertion.
3161    {#b_var{name={Base,Count}},Count+1};
3162new_var(#b_var{name=Base}, Count) ->
3163    {#b_var{name={Base,Count}},Count+1};
3164new_var(Base, Count) when is_atom(Base) ->
3165    {#b_var{name={Base,Count}},Count+1}.
3166