1%%
2%% %CopyrightBegin%
3%%
4%% Copyright Ericsson AB 2018-2020. All Rights Reserved.
5%%
6%% Licensed under the Apache License, Version 2.0 (the "License");
7%% you may not use this file except in compliance with the License.
8%% You may obtain a copy of the License at
9%%
10%%     http://www.apache.org/licenses/LICENSE-2.0
11%%
12%% Unless required by applicable law or agreed to in writing, software
13%% distributed under the License is distributed on an "AS IS" BASIS,
14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15%% See the License for the specific language governing permissions and
16%% limitations under the License.
17%%
18%% %CopyrightEnd%
19%%
20%% Dead code is code that is executed but has no effect. This
21%% optimization pass either removes dead code or jumps around it,
22%% potentially making it unreachable so that it can be dropped
23%% the next time beam_ssa:linearize/1 is called.
24%%
25
26-module(beam_ssa_dead).
27-export([opt/1]).
28
29-include("beam_ssa.hrl").
30-import(lists, [append/1,keymember/3,last/1,member/2,
31                reverse/1,sort/1,takewhile/2]).
32
33-type used_vars() :: #{beam_ssa:label():=cerl_sets:set(beam_ssa:var_name())}.
34
35-type basic_type_test() :: atom() | {'is_tagged_tuple',pos_integer(),atom()}.
36-type type_test() :: basic_type_test() | {'not',basic_type_test()}.
37-type op_name() :: atom().
38-type basic_rel_op() :: {op_name(),beam_ssa:b_var(),beam_ssa:value()} |
39                         {basic_type_test(),beam_ssa:value()}.
40-type rel_op() :: {op_name(),beam_ssa:b_var(),beam_ssa:value()} |
41                  {type_test(),beam_ssa:value()}.
42
43-record(st,
44        {bs :: beam_ssa:block_map(),
45         us :: used_vars(),
46         skippable :: #{beam_ssa:label():='true'},
47         rel_op=none :: 'none' | rel_op(),
48         target=any :: 'any' | 'one_way' | beam_ssa:label()
49        }).
50
51-spec opt([{Label0,Block0}]) -> [{Label,Block}] when
52      Label0 :: beam_ssa:label(),
53      Block0 :: beam_ssa:b_blk(),
54      Label :: beam_ssa:label(),
55      Block :: beam_ssa:b_blk().
56
57opt(Linear) ->
58    {Used,Skippable} = used_vars(Linear),
59    Blocks0 = maps:from_list(Linear),
60    St0 = #st{bs=Blocks0,us=Used,skippable=Skippable},
61    St = shortcut_opt(St0),
62    #st{bs=Blocks} = combine_eqs(St#st{us=#{}}),
63    beam_ssa:linearize(Blocks).
64
65%%%
66%%% Shortcut br/switch targets.
67%%%
68%%% A br/switch may branch to another br/switch that in turn always
69%%% branches to another target. Rewrite br/switch to refer to the
70%%% ultimate targets directly. That will save execution time, but
71%%% could also reduce the size of the code if some of the original
72%%% targets become unreachable and be deleted.
73%%%
74%%% When rewriting branches, we must be careful not to skip instructions
75%%% that have side effects or that bind variables that will be used
76%%% at the new target.
77%%%
78%%% We must also avoid branching to phi nodes.  The reason is
79%%% twofold. First, we might create a critical edge which is strictly
80%%% forbidden. Second, there will be a branch from a block that is not
81%%% listed in the list of predecessors in the phi node.  Those
82%%% limitations could probably be overcome, but it is not clear how
83%%% much that would improve the code.
84%%%
85
86shortcut_opt(#st{bs=Blocks}=St) ->
87    %% Processing the blocks in reverse post order seems to give more
88    %% opportunities for optimizations compared to post order. (Based on
89    %% running scripts/diffable with both PO and RPO and looking at
90    %% the diff.)
91    %%
92    %% Unfortunately, processing the blocks in reverse post order
93    %% potentially makes the time complexity quadratic, instead of
94    %% linear for post order processing. We avoid drastic slowdowns by
95    %% limiting how far we search forward to a common block that
96    %% both the success and failure label will reach (see the comment
97    %% in the first clause of shortcut_2/5).
98
99    Ls = beam_ssa:rpo(Blocks),
100    shortcut_opt(Ls, St).
101
102shortcut_opt([L|Ls], #st{bs=Blocks0}=St) ->
103    #b_blk{is=Is,last=Last0} = Blk0 = get_block(L, St),
104    case shortcut_terminator(Last0, Is, L, St) of
105        Last0 ->
106            %% No change. No need to update the block.
107            shortcut_opt(Ls, St);
108        Last ->
109            %% The terminator was simplified in some way.
110            %% Update the block.
111            Blk = Blk0#b_blk{last=Last},
112            Blocks = Blocks0#{L=>Blk},
113            shortcut_opt(Ls, St#st{bs=Blocks})
114    end;
115shortcut_opt([], St) -> St.
116
117shortcut_terminator(#b_br{bool=#b_literal{val=true},succ=Succ0},
118                    _Is, From, St0) ->
119    St = St0#st{rel_op=none},
120    shortcut(Succ0, From, #{}, St);
121shortcut_terminator(#b_br{bool=#b_var{}=Bool,succ=Succ0,fail=Fail0}=Br,
122                    Is, From, St0) ->
123    St = St0#st{target=one_way},
124    RelOp = get_rel_op(Bool, Is),
125
126    %% The boolean in a `br` is seldom used by the successors. By
127    %% not binding its value unless it is actually used we might be able
128    %% to skip some work in shortcut/4 and sub/2.
129    SuccBs = bind_var_if_used(Succ0, Bool, #b_literal{val=true}, St),
130    BrSucc = shortcut(Succ0, From, SuccBs, St#st{rel_op=RelOp}),
131    FailBs = bind_var_if_used(Fail0, Bool, #b_literal{val=false}, St),
132    BrFail = shortcut(Fail0, From, FailBs, St#st{rel_op=invert_op(RelOp)}),
133
134    case {BrSucc,BrFail} of
135        {#b_br{bool=#b_literal{val=true},succ=Succ},
136         #b_br{bool=#b_literal{val=true},succ=Fail}}
137          when Succ =/= Succ0; Fail =/= Fail0 ->
138            %% One or both of the targets were cut short.
139            beam_ssa:normalize(Br#b_br{succ=Succ,fail=Fail});
140        {_,_} ->
141            %% No change.
142            Br
143    end;
144shortcut_terminator(#b_switch{arg=Bool,fail=Fail0,list=List0}=Sw,
145                    _Is, From, St) ->
146    Fail = shortcut_sw_fail(Fail0, List0, Bool, From, St),
147    List = shortcut_sw_list(List0, Bool, From, St),
148    beam_ssa:normalize(Sw#b_switch{fail=Fail,list=List});
149shortcut_terminator(Last, _Is, _From, _St) ->
150    Last.
151
152shortcut_sw_fail(Fail0, List, Bool, From, St0) ->
153    case sort(List) of
154        [{#b_literal{val=false},_},
155         {#b_literal{val=true},_}] ->
156            RelOp = {{'not',is_boolean},Bool},
157            St = St0#st{rel_op=RelOp,target=one_way},
158            #b_br{bool=#b_literal{val=true},succ=Fail} =
159                shortcut(Fail0, From, #{}, St),
160            Fail;
161        _ ->
162            Fail0
163    end.
164
165shortcut_sw_list([{Lit,L0}|T], Bool, From, St0) ->
166    RelOp = {'=:=',Bool,Lit},
167    St = St0#st{rel_op=RelOp},
168    #b_br{bool=#b_literal{val=true},succ=L} =
169        shortcut(L0, From, bind_var(Bool, Lit, #{}), St#st{target=one_way}),
170    [{Lit,L}|shortcut_sw_list(T, Bool, From, St0)];
171shortcut_sw_list([], _, _, _) -> [].
172
173shortcut(L, _From, Bs, #st{rel_op=none,target=one_way}) when map_size(Bs) =:= 0 ->
174    %% There is no way that we can find a suitable branch, because there is no
175    %% relational operator stored, there are no bindings, and the block L can't
176    %% have any phi nodes from which we could pick bindings because when the target
177    %% is `one_way`, it implies the From block has a two-way `br` terminator.
178    #b_br{bool=#b_literal{val=true},succ=L,fail=L};
179shortcut(L, From, Bs, St) ->
180    shortcut_1(L, From, Bs, cerl_sets:new(), St).
181
182shortcut_1(L, From, Bs0, UnsetVars0, St) ->
183    case shortcut_2(L, From, Bs0, UnsetVars0, St) of
184        none ->
185            %% No more shortcuts found. Package up the previous
186            %% label in an unconditional branch.
187            #b_br{bool=#b_literal{val=true},succ=L,fail=L};
188        {#b_br{bool=#b_var{}}=Br,_,_} ->
189            %% This is a two-way branch. We can't do any better.
190            Br;
191        {#b_br{bool=#b_literal{val=true},succ=Succ},Bs,UnsetVars} ->
192            %% This is a safe `br`, but try to find a better one.
193            shortcut_1(Succ, L, Bs, UnsetVars, St)
194    end.
195
196%% Try to shortcut this block, branching to a successor.
197shortcut_2(L, From, Bs, UnsetVars, St) ->
198    case cerl_sets:size(UnsetVars) of
199        SetSize when SetSize > 128 ->
200            %% This is an heuristic to limit the search for a forced label
201            %% before it drastically slows down the compiler. Experiments
202            %% with scripts/diffable showed that limits larger than 31 did not
203            %% find any more opportunities for optimization.
204            none;
205        _SetSize ->
206            shortcut_3(L, From, Bs, UnsetVars, St)
207    end.
208
209shortcut_3(L, From, Bs0, UnsetVars0, St) ->
210    #b_blk{is=Is,last=Last} = get_block(L, St),
211    case eval_is(Is, From, Bs0, St) of
212        none ->
213            %% It is not safe to avoid this block because it
214            %% has instructions with potential side effects.
215            none;
216        Bs ->
217            %% The instructions in the block (if any) don't
218            %% have any side effects and can be skipped.
219            %% Evaluate the terminator.
220            case eval_terminator(Last, Bs, St) of
221                none ->
222                    %% The terminator is not suitable (could be
223                    %% because it is a switch that can't be simplified
224                    %% or it is a ret instruction).
225                    none;
226                #b_br{}=Br ->
227                    %% We have a potentially suitable br.
228                    %% Now update the set of variables that will never
229                    %% be set if this block will be skipped.
230                    case update_unset_vars(L, Is, Br, UnsetVars0, St) of
231                        unsafe ->
232                            %% It is unsafe to use this br,
233                            %% because it refers to a variable defined
234                            %% in this block.
235                            shortcut_unsafe_br(Br, L, Bs, UnsetVars0, St);
236                        UnsetVars ->
237                            %% Continue checking whether this br is
238                            %% suitable.
239                            shortcut_test_br(Br, L, Bs, UnsetVars, St)
240                    end
241            end
242    end.
243
244shortcut_test_br(Br, From, Bs, UnsetVars, St) ->
245    case is_br_safe(UnsetVars, Br, St) of
246        false ->
247            shortcut_unsafe_br(Br, From, Bs, UnsetVars, St);
248        true ->
249            shortcut_safe_br(Br, From, Bs, UnsetVars, St)
250    end.
251
252shortcut_unsafe_br(Br, From, Bs, UnsetVars, #st{target=Target}=St) ->
253    %% Branching using this `br` is unsafe, either because it
254    %% is an unconditional branch to a phi node, or because
255    %% one or more of the variables that are not set will be
256    %% used. Try to follow branches of this `br`, to find a
257    %% safe `br`.
258    case Br of
259        #b_br{bool=#b_literal{val=true},succ=L} ->
260            case Target of
261                L ->
262                    %% We have reached the forced target, and it
263                    %% is unsafe. Give up.
264                    none;
265                _ ->
266                    %% Try following this branch to see whether it
267                    %% leads to a safe `br`.
268                    shortcut_2(L, From, Bs, UnsetVars, St)
269            end;
270        #b_br{bool=#b_var{},succ=Succ,fail=Fail} ->
271            case {Succ,Fail} of
272                {L,Target} ->
273                    %% The failure label is the forced target.
274                    %% Try following the success label to see
275                    %% whether it also ultimately ends up at the
276                    %% forced target.
277                    shortcut_2(L, From, Bs, UnsetVars, St);
278                {Target,L} ->
279                    %% The success label is the forced target.
280                    %% Try following the failure label to see
281                    %% whether it also ultimately ends up at the
282                    %% forced target.
283                    shortcut_2(L, From, Bs, UnsetVars, St);
284                {_,_} ->
285                    case Target of
286                        any ->
287                            %% This two-way branch is unsafe. Try
288                            %% reducing it to a one-way branch.
289                            shortcut_two_way(Br, From, Bs, UnsetVars, St);
290                        one_way ->
291                            %% This two-way branch is unsafe. Try
292                            %% reducing it to a one-way branch.
293                            shortcut_two_way(Br, From, Bs, UnsetVars, St);
294                        _ when is_integer(Target) ->
295                            %% This two-way branch is unsafe, and
296                            %% there already is a forced target.
297                            %% Give up.
298                            none
299                    end
300            end
301    end.
302
303shortcut_safe_br(Br, From, Bs, UnsetVars, #st{target=Target}=St) ->
304    %% This `br` instruction is safe. It does not branch to a phi
305    %% node, and all variables that will be used are guaranteed to be
306    %% defined.
307    case Br of
308        #b_br{bool=#b_literal{val=true},succ=L} ->
309            %% This is a one-way branch.
310            case Target of
311                any ->
312                    %% No forced target. Success!
313                    {Br,Bs,UnsetVars};
314                one_way ->
315                    %% The target must be a one-way branch, which this
316                    %% `br` is. Success!
317                    {Br,Bs,UnsetVars};
318                L when is_integer(Target) ->
319                    %% The forced target is L. Success!
320                    {Br,Bs,UnsetVars};
321                _ when is_integer(Target) ->
322                    %% Wrong forced target. Try following this branch
323                    %% to see if it ultimately ends up at the forced
324                    %% target.
325                    shortcut_2(L, From, Bs, UnsetVars, St)
326            end;
327        #b_br{bool=#b_var{}} ->
328            %% This is a two-way branch.
329            if
330                Target =:= any; Target =:= one_way ->
331                    %% No specific forced target. Try to reduce the
332                    %% two-way branch to an one-way branch.
333                    case shortcut_two_way(Br, From, Bs, UnsetVars, St) of
334                        none when Target =:= any ->
335                            %% This `br` can't be reduced to a one-way
336                            %% branch. Return the `br` as-is.
337                            {Br,Bs,UnsetVars};
338                        none when Target =:= one_way ->
339                            %% This `br` can't be reduced to a one-way
340                            %% branch. The caller wants a one-way
341                            %% branch.  Give up.
342                            none;
343                        {_,_,_}=Res ->
344                            %% This `br` was successfully reduced to a
345                            %% one-way branch.
346                            Res
347                    end;
348                is_integer(Target) ->
349                    %% There is a forced target, which can't
350                    %% be reached because this `br` is a two-way
351                    %% branch. Give up.
352                    none
353            end
354    end.
355
356update_unset_vars(L, Is, Br, UnsetVars, #st{skippable=Skippable}) ->
357    case is_map_key(L, Skippable) of
358        true ->
359            %% None of the variables used in this block are used in
360            %% the successors. Thus, there is no need to add the
361            %% variables to the set of unset variables.
362            case Br of
363                #b_br{bool=#b_var{}=Bool} ->
364                    case keymember(Bool, #b_set.dst, Is) of
365                        true ->
366                            %% Bool is a variable defined in this
367                            %% block. Using the br instruction from
368                            %% this block (and skipping the body of
369                            %% the block) is unsafe.
370                            unsafe;
371                        false ->
372                            %% Bool is either a variable not defined
373                            %% in this block or a literal. Adding it
374                            %% to the UnsetVars set would not change
375                            %% the outcome of the tests in
376                            %% is_br_safe/2.
377                            UnsetVars
378                    end;
379                #b_br{} ->
380                    UnsetVars
381            end;
382        false ->
383            %% Some variables defined in this block are used by
384            %% successors. We must update the set of unset variables.
385            SetInThisBlock = [V || #b_set{dst=V} <- Is],
386            cerl_sets:union(UnsetVars, cerl_sets:from_list(SetInThisBlock))
387    end.
388
389shortcut_two_way(#b_br{succ=Succ,fail=Fail}, From, Bs0, UnsetVars0, St0) ->
390    case shortcut_2(Succ, From, Bs0, UnsetVars0, St0#st{target=Fail}) of
391        {#b_br{bool=#b_literal{},succ=Fail},_,_}=Res ->
392            Res;
393        none ->
394            St = St0#st{target=Succ},
395            case shortcut_2(Fail, From, Bs0, UnsetVars0, St) of
396                {#b_br{bool=#b_literal{},succ=Succ},_,_}=Res ->
397                    Res;
398                none ->
399                    none
400            end
401    end.
402
403get_block(L, St) ->
404    #st{bs=#{L:=Blk}} = St,
405    Blk.
406
407is_br_safe(UnsetVars, Br, #st{us=Us}=St) ->
408    %% Check that none of the unset variables will be used.
409    case Br of
410        #b_br{bool=#b_var{}=V,succ=Succ,fail=Fail} ->
411            #{Succ:=Used0,Fail:=Used1} = Us,
412
413            %% A two-way branch never branches to a phi node, so there
414            %% is no need to check for phi nodes here.
415            not cerl_sets:is_element(V, UnsetVars) andalso
416                cerl_sets:is_disjoint(Used0, UnsetVars) andalso
417                cerl_sets:is_disjoint(Used1, UnsetVars);
418        #b_br{succ=Same,fail=Same} ->
419            %% An unconditional branch must not jump to
420            %% a phi node.
421            not is_forbidden(Same, St) andalso
422                cerl_sets:is_disjoint(map_get(Same, Us), UnsetVars)
423    end.
424
425is_forbidden(L, St) ->
426    case get_block(L, St) of
427        #b_blk{is=[#b_set{op=phi}|_]} ->
428            true;
429        #b_blk{is=[#b_set{}=I|_]} ->
430            beam_ssa:is_loop_header(I);
431        #b_blk{} -> false
432    end.
433
434
435%% Evaluate the instructions in the block.
436%% Return the updated bindings, or 'none' if there is
437%% any instruction with potential side effects.
438
439eval_is([#b_set{op=phi,dst=Dst,args=Args}|Is], From, Bs0, St) ->
440    Val = get_phi_arg(Args, From),
441    Bs = bind_var(Dst, Val, Bs0),
442    eval_is(Is, From, Bs, St);
443eval_is([#b_set{op={succeeded,guard},dst=Dst,args=[Var]}], _From, Bs, _St) ->
444    case Bs of
445        #{Var:=#b_literal{}} ->
446            bind_var(Dst, #b_literal{val=true}, Bs);
447        #{} ->
448            Bs
449    end;
450eval_is([#b_set{op={bif,_},dst=Dst}=I0|Is], From, Bs, St) ->
451    I = sub(I0, Bs),
452    case eval_bif(I, St) of
453        #b_literal{}=Val ->
454            eval_is(Is, From, bind_var(Dst, Val, Bs), St);
455        none ->
456            eval_is(Is, From, Bs, St)
457    end;
458eval_is([#b_set{op=Op,dst=Dst}=I|Is], From, Bs, St)
459  when Op =:= is_tagged_tuple; Op =:= is_nonempty_list ->
460    #b_set{args=Args} = sub(I, Bs),
461    case eval_rel_op(Op, Args, St) of
462        #b_literal{}=Val ->
463            eval_is(Is, From, bind_var(Dst, Val, Bs), St);
464        none ->
465            eval_is(Is, From, Bs, St)
466    end;
467eval_is([#b_set{}=I|Is], From, Bs, St) ->
468    case beam_ssa:no_side_effect(I) of
469        true ->
470            %% This instruction has no side effects. It can
471            %% safely be omitted.
472            eval_is(Is, From, Bs, St);
473        false ->
474            %% This instruction may have some side effect.
475            %% It is not safe to avoid this instruction.
476            none
477    end;
478eval_is([], _From, Bs, _St) -> Bs.
479
480get_phi_arg([{Val,From}|_], From) -> Val;
481get_phi_arg([_|As], From) -> get_phi_arg(As, From).
482
483eval_terminator(#b_br{bool=#b_var{}=Bool}=Br, Bs, _St) ->
484    case get_value(Bool, Bs) of
485        #b_literal{val=Val}=Lit ->
486            case is_boolean(Val) of
487                true ->
488                    beam_ssa:normalize(Br#b_br{bool=Lit});
489                false ->
490                    %% Non-boolean literal. This means that this `br`
491                    %% terminator will never actually be reached with
492                    %% these bindings. (There must be a previous two-way
493                    %% branch that branches the other way when Bool
494                    %% is bound to a non-boolean literal.)
495                    none
496            end;
497        #b_var{}=Var ->
498            beam_ssa:normalize(Br#b_br{bool=Var})
499    end;
500eval_terminator(#b_br{bool=#b_literal{}}=Br, _Bs, _St) ->
501    beam_ssa:normalize(Br);
502eval_terminator(#b_switch{arg=Arg,fail=Fail,list=List}=Sw, Bs, St) ->
503    case get_value(Arg, Bs) of
504        #b_literal{}=Val ->
505            %% Literal argument. Simplify to a `br`.
506            beam_ssa:normalize(Sw#b_switch{arg=Val});
507        #b_var{} ->
508            %% Try optimizing the switch.
509            case eval_switch(List, Arg, St, Fail) of
510                none ->
511                    none;
512                To when is_integer(To) ->
513                    %% Either one of the values in the switch
514                    %% matched a previous value in a '=:=' test, or
515                    %% none of the values matched a previous test.
516                    #b_br{bool=#b_literal{val=true},succ=To,fail=To}
517            end
518    end;
519eval_terminator(#b_ret{}, _Bs, _St) ->
520    none.
521
522eval_switch(List, Arg, #st{rel_op={_,Arg,_}=PrevOp}, Fail) ->
523    %% There is a previous relational operator testing the same variable.
524    %% Optimization may be possible.
525    eval_switch_1(List, Arg, PrevOp, Fail);
526eval_switch(_, _, _, _) ->
527    %% There is either no previous relational operator, or it tests
528    %% a different variable. Nothing to optimize.
529    none.
530
531eval_switch_1([{Lit,Lbl}|T], Arg, PrevOp, Fail) ->
532    RelOp = {'=:=',Arg,Lit},
533    case will_succeed(PrevOp, RelOp) of
534        yes ->
535            %% Success. This branch will always be taken.
536            Lbl;
537        no ->
538            %% This branch will never be taken.
539            eval_switch_1(T, Arg, PrevOp, Fail);
540        maybe ->
541            %% This label could be reached.
542            eval_switch_1(T, Arg, PrevOp, none)
543    end;
544eval_switch_1([], _Arg, _PrevOp, Fail) ->
545    %% Fail is now either the failure label or 'none'.
546    Fail.
547
548bind_var_if_used(L, Var, Val, #st{us=Us}) ->
549    case cerl_sets:is_element(Var, map_get(L, Us)) of
550        true -> #{Var=>Val};
551        false -> #{}
552    end.
553
554bind_var(Var, Val0, Bs) ->
555    Val = get_value(Val0, Bs),
556    Bs#{Var=>Val}.
557
558get_value(#b_var{}=Var, Bs) ->
559    case Bs of
560        #{Var:=Val} -> get_value(Val, Bs);
561        #{} -> Var
562    end;
563get_value(#b_literal{}=Lit, _Bs) -> Lit.
564
565eval_bif(#b_set{op={bif,Bif},args=Args}, St) ->
566    Arity = length(Args),
567    case erl_bifs:is_pure(erlang, Bif, Arity) of
568        false ->
569            none;
570        true ->
571            case get_lit_args(Args) of
572                none ->
573                    %% Not literal arguments. Try to evaluate
574                    %% it based on a previous relational operator.
575                    eval_rel_op({bif,Bif}, Args, St);
576                LitArgs ->
577                    try apply(erlang, Bif, LitArgs) of
578                        Val -> #b_literal{val=Val}
579                    catch
580                        error:_ -> none
581                    end
582            end
583    end.
584
585get_lit_args([#b_literal{val=Lit1}]) ->
586    [Lit1];
587get_lit_args([#b_literal{val=Lit1},
588              #b_literal{val=Lit2}]) ->
589    [Lit1,Lit2];
590get_lit_args([#b_literal{val=Lit1},
591              #b_literal{val=Lit2},
592              #b_literal{val=Lit3}]) ->
593    [Lit1,Lit2,Lit3];
594get_lit_args(_) -> none.
595
596%%%
597%%% Handling of relational operators.
598%%%
599
600get_rel_op(Bool, [_|_]=Is) ->
601    case last(Is) of
602        #b_set{op=Op,dst=Bool,args=Args} ->
603            normalize_op(Op, Args);
604        #b_set{} ->
605            none
606    end;
607get_rel_op(_, []) -> none.
608
609%% normalize_op(Instruction) -> {Normalized,FailLabel} | error
610%%    Normalized = {Operator,Variable,Variable|Literal} |
611%%                 {TypeTest,Variable}
612%%    Operation = '<' | '=<' | '=:=' | '=/=' | '>=' | '>'
613%%    TypeTest = is_atom | is_integer ...
614%%    Variable = #b_var{}
615%%    Literal = #b_literal{}
616%%
617%%  Normalize a relational operator to facilitate further
618%%  comparisons between operators. Always make the register
619%%  operand the first operand. If there are two registers,
620%%  order the registers in lexical order.
621%%
622%%  For example, this instruction:
623%%
624%%    #b_set{op={bif,=<},args=[#b_literal{}, #b_var{}}
625%%
626%%  will be normalized to:
627%%
628%%    {'=<',#b_var{},#b_literal{}}
629
630-spec normalize_op(Op, Args) -> NormalizedOp | 'none' when
631      Op :: beam_ssa:op(),
632      Args :: [beam_ssa:value()],
633      NormalizedOp :: basic_rel_op().
634
635normalize_op(is_tagged_tuple, [Arg,#b_literal{val=Size},#b_literal{val=Tag}])
636  when is_integer(Size), is_atom(Tag) ->
637    {{is_tagged_tuple,Size,Tag},Arg};
638normalize_op(is_nonempty_list, [Arg]) ->
639    {is_nonempty_list,Arg};
640normalize_op({bif,Bif}, [Arg]) ->
641    case erl_internal:new_type_test(Bif, 1) of
642        true -> {Bif,Arg};
643        false -> none
644    end;
645normalize_op({bif,Bif}, [_,_]=Args) ->
646    case erl_internal:comp_op(Bif, 2) of
647        true ->
648            normalize_op_1(Bif, Args);
649        false ->
650            none
651    end;
652normalize_op(_, _) -> none.
653
654normalize_op_1(Bif, Args) ->
655    case Args of
656        [#b_literal{}=Arg1,#b_var{}=Arg2] ->
657            {turn_op(Bif),Arg2,Arg1};
658        [#b_var{}=Arg1,#b_literal{}=Arg2] ->
659            {Bif,Arg1,Arg2};
660        [#b_var{}=A,#b_var{}=B] ->
661            if A < B -> {Bif,A,B};
662               true -> {turn_op(Bif),B,A}
663            end;
664        [#b_literal{},#b_literal{}] ->
665            none
666    end.
667
668-spec invert_op(basic_rel_op() | 'none') -> rel_op() | 'none'.
669
670invert_op({Op,Arg1,Arg2}) ->
671    {invert_op_1(Op),Arg1,Arg2};
672invert_op({TypeTest,Arg}) ->
673    {{'not',TypeTest},Arg};
674invert_op(none) -> none.
675
676invert_op_1('>=') -> '<';
677invert_op_1('<') -> '>=';
678invert_op_1('=<') -> '>';
679invert_op_1('>') -> '=<';
680invert_op_1('=:=') -> '=/=';
681invert_op_1('=/=') -> '=:=';
682invert_op_1('==') -> '/=';
683invert_op_1('/=') -> '=='.
684
685turn_op('<') -> '>';
686turn_op('=<') -> '>=';
687turn_op('>') -> '<';
688turn_op('>=') -> '=<';
689turn_op('=:='=Op) -> Op;
690turn_op('=/='=Op) -> Op;
691turn_op('=='=Op) -> Op;
692turn_op('/='=Op) -> Op.
693
694eval_rel_op(_Bif, _Args, #st{rel_op=none}) ->
695    none;
696eval_rel_op(Bif, Args, #st{rel_op=Prev}) ->
697    case normalize_op(Bif, Args) of
698        none ->
699            none;
700        RelOp ->
701            case will_succeed(Prev, RelOp) of
702                yes -> #b_literal{val=true};
703                no -> #b_literal{val=false};
704                maybe -> none
705            end
706    end.
707
708%% will_succeed(PrevCondition, Condition) -> yes | no | maybe
709%%  PrevCondition is a condition known to be true. This function
710%%  will tell whether Condition will succeed.
711
712will_succeed({_,_,_}=Same, {_,_,_}=Same) ->
713    %% Repeated test.
714    yes;
715will_succeed({Op1,Var,#b_literal{val=A}}, {Op2,Var,#b_literal{val=B}}) ->
716    will_succeed_1(Op1, A, Op2, B);
717will_succeed({Op1,Var,#b_var{}=A}, {Op2,Var,#b_var{}=B}) ->
718    will_succeed_vars(Op1, A, Op2, B);
719will_succeed({'=:=',Var,#b_literal{val=A}}, {TypeTest,Var}) ->
720    eval_type_test(TypeTest, A);
721will_succeed({_,_}=Same, {_,_}=Same) ->
722    %% Repeated type test.
723    yes;
724will_succeed({Test1,Var}, {Test2,Var}) ->
725    will_succeed_test(Test1, Test2);
726will_succeed({{'not',is_boolean},Var}, {'=:=',Var,#b_literal{val=Lit}})
727  when is_boolean(Lit) ->
728    no;
729will_succeed({_,_}, {_,_}) ->
730    maybe;
731will_succeed({_,_}, {_,_,_}) ->
732    maybe;
733will_succeed({_,_,_}, {_,_}) ->
734    maybe;
735will_succeed({_,_,_}, {_,_,_}) ->
736    maybe.
737
738will_succeed_test({'not',Test1}, Test2) ->
739    case Test1 =:= Test2 of
740        true -> no;
741        false -> maybe
742    end;
743will_succeed_test(is_tuple, {is_tagged_tuple,_,_}) ->
744    maybe;
745will_succeed_test({is_tagged_tuple,_,_}, is_tuple) ->
746    yes;
747will_succeed_test(is_list, is_nonempty_list) ->
748    maybe;
749will_succeed_test(is_nonempty_list, is_list) ->
750    yes;
751will_succeed_test(_T1, _T2) ->
752    maybe.
753
754will_succeed_1('=:=', A, '<', B) ->
755    if
756	B =< A -> no;
757	true -> yes
758    end;
759will_succeed_1('=:=', A, '=<', B) ->
760    if
761	B < A -> no;
762	true -> yes
763    end;
764will_succeed_1('=:=', A, '=:=', B) when A =/= B ->
765    no;
766will_succeed_1('=:=', A, '=/=', B) ->
767    if
768	A =:= B -> no;
769	true -> yes
770    end;
771will_succeed_1('=:=', A, '>=', B) ->
772    if
773	B > A -> no;
774	true -> yes
775    end;
776will_succeed_1('=:=', A, '>', B) ->
777    if
778	B >= A -> no;
779	true -> yes
780    end;
781
782will_succeed_1('=/=', A, '=:=', B) when A =:= B -> no;
783
784will_succeed_1('<', A, '=:=', B)  when B >= A -> no;
785will_succeed_1('<', A, '<',   B)  when B >= A -> yes;
786will_succeed_1('<', A, '=<',  B)  when B >= A -> yes;
787will_succeed_1('<', A, '>=',  B)  when B >= A -> no;
788will_succeed_1('<', A, '>',   B)  when B >= A -> no;
789
790will_succeed_1('=<', A, '=:=', B) when B > A  -> no;
791will_succeed_1('=<', A, '<',   B) when B > A  -> yes;
792will_succeed_1('=<', A, '=<',  B) when B >= A -> yes;
793will_succeed_1('=<', A, '>=',  B) when B > A  -> no;
794will_succeed_1('=<', A, '>',   B) when B >= A -> no;
795
796will_succeed_1('>=', A, '=:=', B) when B < A  -> no;
797will_succeed_1('>=', A, '<',   B) when B =< A -> no;
798will_succeed_1('>=', A, '=<',  B) when B < A  -> no;
799will_succeed_1('>=', A, '>=',  B) when B =< A -> yes;
800will_succeed_1('>=', A, '>',   B) when B < A  -> yes;
801
802will_succeed_1('>', A, '=:=', B)  when B =< A -> no;
803will_succeed_1('>', A, '<',   B)  when B =< A -> no;
804will_succeed_1('>', A, '=<',  B)  when B =< A -> no;
805will_succeed_1('>', A, '>=',  B)  when B =< A -> yes;
806will_succeed_1('>', A, '>',   B)  when B =< A -> yes;
807
808will_succeed_1('==', A, '==', B) ->
809    if
810	A == B -> yes;
811	true -> no
812    end;
813will_succeed_1('==', A, '/=', B) ->
814    if
815	A == B -> no;
816	true -> yes
817    end;
818will_succeed_1('/=', A, '/=', B) when A == B -> yes;
819will_succeed_1('/=', A, '==', B) when A == B -> no;
820
821will_succeed_1(_, _, _, _) -> maybe.
822
823will_succeed_vars('=/=', Val, '=:=', Val) -> no;
824will_succeed_vars('=:=', Val, '=/=', Val) -> no;
825will_succeed_vars('=:=', Val, '>=',  Val) -> yes;
826will_succeed_vars('=:=', Val, '=<',  Val) -> yes;
827
828will_succeed_vars('/=', Val1, '==', Val2) when Val1 == Val2 -> no;
829will_succeed_vars('==', Val1, '/=', Val2) when Val1 == Val2 -> no;
830
831will_succeed_vars(_, _, _, _) -> maybe.
832
833eval_type_test(Test, Arg) ->
834    case eval_type_test_1(Test, Arg) of
835        true -> yes;
836        false -> no
837    end.
838
839eval_type_test_1(is_nonempty_list, Arg) ->
840    case Arg of
841        [_|_] -> true;
842        _ -> false
843    end;
844eval_type_test_1({is_tagged_tuple,Sz,Tag}, Arg) ->
845    if
846        tuple_size(Arg) =:= Sz, element(1, Arg) =:= Tag ->
847            true;
848        true ->
849            false
850    end;
851eval_type_test_1(Test, Arg) ->
852    erlang:Test(Arg).
853
854%%%
855%%% Combine bif:'=:=' and switch instructions
856%%% to switch instructions.
857%%%
858%%% Consider this code:
859%%%
860%%%     0:
861%%%       @ssa_bool = bif:'=:=' Var, literal 1
862%%%       br @ssa_bool, label 2, label 3
863%%%
864%%%     2:
865%%%       ret literal a
866%%%
867%%%     3:
868%%%       @ssa_bool:7 = bif:'=:=' Var, literal 2
869%%%       br @ssa_bool:7, label 4, label 999
870%%%
871%%%     4:
872%%%       ret literal b
873%%%
874%%%     999:
875%%%       .
876%%%       .
877%%%       .
878%%%
879%%% The two bif:'=:=' instructions can be combined
880%%% to a switch:
881%%%
882%%%     0:
883%%%       switch Var, label 999, [ { literal 1, label 2 },
884%%%                                { literal 2, label 3 } ]
885%%%
886%%%     2:
887%%%       ret literal a
888%%%
889%%%     4:
890%%%       ret literal b
891%%%
892%%%     999:
893%%%       .
894%%%       .
895%%%       .
896%%%
897
898combine_eqs(#st{bs=Blocks}=St) ->
899    Ls = reverse(beam_ssa:rpo(Blocks)),
900    combine_eqs_1(Ls, St).
901
902combine_eqs_1([L|Ls], #st{bs=Blocks0}=St0) ->
903    case comb_get_sw(L, St0) of
904        none ->
905            combine_eqs_1(Ls, St0);
906        {_,Arg,_,Fail0,List0} ->
907            case comb_get_sw(Fail0, St0) of
908                {true,Arg,Fail1,Fail,List1} ->
909                    %% Another switch/br with the same arguments was
910                    %% found. Try combining them.
911                    case combine_lists(Fail1, List0, List1, Blocks0) of
912                        none ->
913                            %% Different types of literals in the lists,
914                            %% or the success cases in the first switch
915                            %% could branch to the second switch
916                            %% (increasing code size and repeating tests).
917                            combine_eqs_1(Ls, St0);
918                        List ->
919                            %% Everything OK! Combine the lists.
920                            Sw0 = #b_switch{arg=Arg,fail=Fail,list=List},
921                            Sw = beam_ssa:normalize(Sw0),
922                            Blk0 = map_get(L, Blocks0),
923                            Blk = Blk0#b_blk{last=Sw},
924                            Blocks = Blocks0#{L:=Blk},
925                            St = St0#st{bs=Blocks},
926                            combine_eqs_1(Ls, St)
927                    end;
928                {true,_OtherArg,_,_,_} ->
929                    %% The other switch/br uses a different Arg.
930                    combine_eqs_1(Ls, St0);
931                {false,_,_,_,_} ->
932                    %% Not safe: Bindings of variables that will be used
933                    %% or execution of instructions with potential
934                    %% side effects will be skipped.
935                    combine_eqs_1(Ls, St0);
936                none ->
937                    %% No switch/br at this label.
938                    combine_eqs_1(Ls, St0)
939            end
940    end;
941combine_eqs_1([], St) -> St.
942
943comb_get_sw(L, #st{bs=Blocks,skippable=Skippable}) ->
944    #b_blk{is=Is,last=Last} = map_get(L, Blocks),
945    Safe0 = is_map_key(L, Skippable),
946    case Last of
947        #b_ret{} ->
948            none;
949        #b_br{bool=#b_var{}=Bool,succ=Succ,fail=Fail} ->
950            case comb_is(Is, Bool, Safe0) of
951                {none,_} ->
952                    none;
953                {#b_set{op={bif,'=:='},args=[#b_var{}=Arg,#b_literal{}=Lit]},Safe} ->
954                    {Safe,Arg,L,Fail,[{Lit,Succ}]};
955                {#b_set{},_} ->
956                    none
957            end;
958        #b_br{} ->
959            none;
960        #b_switch{arg=#b_var{}=Arg,fail=Fail,list=List} ->
961            {none,Safe} = comb_is(Is, none, Safe0),
962            {Safe,Arg,L,Fail,List}
963    end.
964
965comb_is([#b_set{dst=#b_var{}=Bool}=I], Bool, Safe) ->
966    {I,Safe};
967comb_is([#b_set{}=I|Is], Bool, Safe0) ->
968    Safe = Safe0 andalso beam_ssa:no_side_effect(I),
969    comb_is(Is, Bool, Safe);
970comb_is([], _Bool, Safe) ->
971    {none,Safe}.
972
973%% combine_list(Fail, List1, List2, Blocks) -> List|none.
974%%  Try to combine two switch lists, returning the combined
975%%  list or 'none' if not possible.
976%%
977%%  The values in the two lists must be all of the same type.
978%%
979%%  The code reached from the labels in the first list must
980%%  not reach the failure label (if they do, tests could
981%%  be repeated).
982%%
983
984combine_lists(Fail, L1, L2, Blocks) ->
985    Ls = beam_ssa:rpo([Lbl || {_,Lbl} <- L1], Blocks),
986    case member(Fail, Ls) of
987        true ->
988            %% One or more of labels in the first list
989            %% could reach the failure label. That
990            %% means that the second switch/br instruction
991            %% will be retained, increasing code size and
992            %% potentially also execution time.
993            none;
994        false ->
995            %% The combined switch will replace both original
996            %% br/switch instructions, leading to a reduction in code
997            %% size and potentially also in execution time.
998            combine_lists_1(L1, L2)
999    end.
1000
1001combine_lists_1(List0, List1) ->
1002    case are_lists_compatible(List0, List1) of
1003        true ->
1004            First = maps:from_list(List0),
1005            List0 ++ [{Val,Lbl} || {Val,Lbl} <- List1,
1006                                   not is_map_key(Val, First)];
1007        false ->
1008            none
1009    end.
1010
1011are_lists_compatible([{#b_literal{val=Val1},_}|_],
1012                     [{#b_literal{val=Val2},_}|_]) ->
1013    case lit_type(Val1) of
1014        none -> false;
1015        Type -> Type =:= lit_type(Val2)
1016    end.
1017
1018lit_type(Val) ->
1019    if
1020        is_atom(Val) -> atom;
1021        is_float(Val) -> float;
1022        is_integer(Val) -> integer;
1023        true -> none
1024    end.
1025
1026%%%
1027%%% Calculate used variables for each block.
1028%%%
1029
1030used_vars(Linear) ->
1031    used_vars(reverse(Linear), #{}, #{}).
1032
1033used_vars([{L,#b_blk{is=Is}=Blk}|Bs], UsedVars0, Skip0) ->
1034    %% Calculate the variables used by each block and its
1035    %% successors. This information is used by
1036    %% shortcut_opt/1.
1037
1038    Successors = beam_ssa:successors(Blk),
1039    Used0 = used_vars_succ(Successors, L, UsedVars0, cerl_sets:new()),
1040    Used = used_vars_blk(Blk, Used0),
1041    UsedVars = used_vars_phis(Is, L, Used, UsedVars0),
1042
1043    %% combine_eqs/1 needs different variable usage information than
1044    %% shortcut_opt/1. The Skip map will have an entry for each block
1045    %% that can be skipped (does not bind any variable used in
1046    %% successor). This information is also useful for speeding up
1047    %% shortcut_opt/1.
1048
1049    Defined0 = [Def || #b_set{dst=Def} <- Is],
1050    Defined = cerl_sets:from_list(Defined0),
1051    MaySkip = cerl_sets:is_disjoint(Defined, Used0),
1052    case MaySkip of
1053        true ->
1054            Skip = Skip0#{L=>true},
1055            used_vars(Bs, UsedVars, Skip);
1056        false ->
1057            used_vars(Bs, UsedVars, Skip0)
1058    end;
1059used_vars([], UsedVars, Skip) ->
1060    {UsedVars,Skip}.
1061
1062used_vars_succ([S|Ss], L, LiveMap, Live0) ->
1063    Key = {S,L},
1064    case LiveMap of
1065        #{Key:=Live} ->
1066            %% The successor has a phi node, and the value for
1067            %% this block in the phi node is a variable.
1068            used_vars_succ(Ss, L, LiveMap, cerl_sets:union(Live, Live0));
1069        #{S:=Live} ->
1070            %% No phi node in the successor, or the value for
1071            %% this block in the phi node is a literal.
1072            used_vars_succ(Ss, L, LiveMap, cerl_sets:union(Live, Live0));
1073        #{} ->
1074            %% A peek_message block which has not been processed yet.
1075            used_vars_succ(Ss, L, LiveMap, Live0)
1076    end;
1077used_vars_succ([], _, _, Acc) -> Acc.
1078
1079used_vars_phis(Is, L, Live0, UsedVars0) ->
1080    UsedVars = UsedVars0#{L=>Live0},
1081    Phis = takewhile(fun(#b_set{op=Op}) -> Op =:= phi end, Is),
1082    case Phis of
1083        [] ->
1084            UsedVars;
1085        [_|_] ->
1086            PhiArgs = append([Args || #b_set{args=Args} <- Phis]),
1087            case [{P,V} || {#b_var{}=V,P} <- PhiArgs] of
1088                [_|_]=PhiVars ->
1089                    PhiLive0 = rel2fam(PhiVars),
1090                    PhiLive = [{{L,P},cerl_sets:union(cerl_sets:from_list(Vs), Live0)} ||
1091                                  {P,Vs} <- PhiLive0],
1092                    maps:merge(UsedVars, maps:from_list(PhiLive));
1093                [] ->
1094                    %% There were only literals in the phi node(s).
1095                    UsedVars
1096            end
1097    end.
1098
1099used_vars_blk(#b_blk{is=Is,last=Last}, Used0) ->
1100    Used = cerl_sets:union(Used0, cerl_sets:from_list(beam_ssa:used(Last))),
1101    used_vars_is(reverse(Is), Used).
1102
1103used_vars_is([#b_set{op=phi}|Is], Used) ->
1104    used_vars_is(Is, Used);
1105used_vars_is([#b_set{dst=Dst}=I|Is], Used0) ->
1106    Used1 = cerl_sets:union(Used0, cerl_sets:from_list(beam_ssa:used(I))),
1107    Used = cerl_sets:del_element(Dst, Used1),
1108    used_vars_is(Is, Used);
1109used_vars_is([], Used) ->
1110    Used.
1111
1112%%%
1113%%% Common utilities.
1114%%%
1115
1116sub(#b_set{args=Args}=I, Sub) when map_size(Sub) =/= 0 ->
1117    I#b_set{args=[sub_arg(A, Sub) || A <- Args]};
1118sub(I, _Sub) -> I.
1119
1120sub_arg(#b_var{}=Old, Sub) ->
1121    case Sub of
1122        #{Old:=New} -> New;
1123        #{} -> Old
1124    end;
1125sub_arg(Old, _Sub) -> Old.
1126
1127rel2fam(S0) ->
1128    S1 = sofs:relation(S0),
1129    S = sofs:rel2fam(S1),
1130    sofs:to_external(S).
1131