1%%
2%% %CopyrightBegin%
3%%
4%% Copyright Ericsson AB 1999-2020. All Rights Reserved.
5%%
6%% Licensed under the Apache License, Version 2.0 (the "License");
7%% you may not use this file except in compliance with the License.
8%% You may obtain a copy of the License at
9%%
10%%     http://www.apache.org/licenses/LICENSE-2.0
11%%
12%% Unless required by applicable law or agreed to in writing, software
13%% distributed under the License is distributed on an "AS IS" BASIS,
14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15%% See the License for the specific language governing permissions and
16%% limitations under the License.
17%%
18%% %CopyrightEnd%
19%%
20%%% Purpose : Optimise jumps and remove unreachable code.
21
22-module(beam_jump).
23
24-export([module/2,
25	 remove_unused_labels/1]).
26
27%%% The following optimisations are done:
28%%%
29%%% (1) This code with two identical instruction sequences
30%%%
31%%%     L1: <Instruction sequence>
32%%%     L2:
33%%%          . . .
34%%%     L3: <Instruction sequence>
35%%%     L4:
36%%%
37%%%     can be replaced with
38%%%
39%%%     L1: jump L3
40%%%     L2:
41%%%          . . .
42%%%     L3: <Instruction sequence>
43%%%     L4
44%%%
45%%%     Note: The instruction sequence must end with an instruction
46%%%     such as a jump that never transfers control to the instruction
47%%%     following it.
48%%%
49%%% (2) Short sequences starting with a label and ending in case_end, if_end,
50%%%     and badmatch, and function calls that cause an exit (such as calls
51%%%     to exit/1) are moved to the end of the function, but only if the
52%%%     the block is not entered via a fallthrough. The purpose of this move
53%%%     is to allow further optimizations at the place from which the
54%%%     code was moved (a jump around the block could be replaced with a
55%%%     fallthrough).
56%%%
57%%% (3) Any unreachable code is removed.  Unreachable code is code
58%%%     after jump, call_last and other instructions which never
59%%%     transfer control to the following instruction.  Code is
60%%%     unreachable up to the next *referenced* label.  Note that the
61%%%     optimisations below might generate more possibilities for
62%%%     removing unreachable code.
63%%%
64%%% (4) This code:
65%%%	L1:	jump L2
66%%%          . . .
67%%%     L2: ...
68%%%
69%%%    will be changed to
70%%%
71%%%    jump L2
72%%%          . . .
73%%%    L2: ...
74%%%
75%%%    and all preceding uses of L1 renamed to L2.
76%%%    If the jump is unreachable, it will be removed according to (1).
77%%%
78%%% (5) In
79%%%
80%%%	 jump L1
81%%%      L1:
82%%%
83%%%	 the jump (but not the label) will be removed.
84%%%
85%%% (6) If test instructions are used to skip a single jump instruction,
86%%%      the test is inverted and the jump is eliminated (provided that
87%%%      the test can be inverted).  Example:
88%%%
89%%%      is_eq L1 {x,1} {x,2}
90%%%      jump L2
91%%%      L1:
92%%%
93%%%      will be changed to
94%%%
95%%%      is_ne L2 {x,1} {x,2}
96%%%      L1:
97%%%
98%%%      Because there may be backward references to the label L1
99%%%      (for instance from the wait_timeout/1 instruction), we will
100%%%      always keep the label. (beam_clean will remove any unused
101%%%      labels.)
102%%%
103%%% (7)  Replace a jump to a return instruction with a return instruction.
104%%%      Similarly, replace a jump to deallocate + return with those
105%%%      instructions.
106%%%
107%%% Note: This modules depends on (almost) all branches and jumps only
108%%% going forward, so that we can remove instructions (including definition
109%%% of labels) after any label that has not been referenced by the code
110%%% preceeding the labels. Regarding the few instructions that have backward
111%%% references to labels, we assume that they only transfer control back
112%%% to an instruction that has already been executed. That is, code such as
113%%%
114%%%         jump L_entry
115%%%
116%%%      L_again:
117%%%           .
118%%%           .
119%%%           .
120%%%      L_entry:
121%%%           .
122%%%           .
123%%%           .
124%%%         jump L_again;
125%%%
126%%% is NOT allowed (and such code is never generated by the code generator).
127%%%
128%%% Terminology note: The optimisation done here is called unreachable-code
129%%% removal, NOT dead-code elimination.  Dead code elimination means the
130%%% removal of instructions that are executed, but have no visible effect
131%%% on the program state.
132%%%
133
134-import(lists, [foldl/3,keymember/3,mapfoldl/3,reverse/1,reverse/2]).
135
136-type instruction() :: beam_utils:instruction().
137
138-include("beam_types.hrl").
139
140-spec module(beam_utils:module_code(), [compile:option()]) ->
141                    {'ok',beam_utils:module_code()}.
142
143module({Mod,Exp,Attr,Fs0,Lc0}, _Opt) ->
144    {Fs,Lc} = mapfoldl(fun function/2, Lc0, Fs0),
145    {ok,{Mod,Exp,Attr,Fs,Lc}}.
146
147%% function(Function) -> Function'
148%%  Optimize jumps and branches.
149%%
150%%  NOTE: This function assumes that there are no labels inside blocks.
151function({function,Name,Arity,CLabel,Asm0}, Lc0) ->
152    try
153        Asm1 = eliminate_moves(Asm0),
154        {Asm2,Lc} = insert_labels(Asm1, Lc0, []),
155        Asm3 = share(Asm2),
156        Asm4 = move(Asm3),
157        Asm5 = opt(Asm4, CLabel),
158        Asm6 = unshare(Asm5),
159        Asm = remove_unused_labels(Asm6),
160        {{function,Name,Arity,CLabel,Asm},Lc}
161    catch
162        Class:Error:Stack ->
163	    io:fwrite("Function: ~w/~w\n", [Name,Arity]),
164	    erlang:raise(Class, Error, Stack)
165    end.
166
167%%%
168%%% Scan instructions in execution order and remove redundant 'move'
169%%% instructions. 'move' instructions are redundant if we know that
170%%% the register already contains the value being assigned, as in the
171%%% following code:
172%%%
173%%%           select_val Register FailLabel [... Literal => L1...]
174%%%                      .
175%%%                      .
176%%%                      .
177%%%   L1:     move Literal Register
178%%%
179
180eliminate_moves(Is) ->
181    eliminate_moves(Is, #{}, []).
182
183eliminate_moves([{select,select_val,Reg,{f,Fail},List}=I|Is], D0, Acc) ->
184    D1 = add_unsafe_label(Fail, D0),
185    D = update_value_dict(List, Reg, D1),
186    eliminate_moves(Is, D, [I|Acc]);
187eliminate_moves([{test,is_eq_exact,_,[Reg,Val]}=I,
188                 {block,BlkIs0}|Is], D0, Acc) ->
189    D = update_unsafe_labels(I, D0),
190    RegVal = {Reg,Val},
191    BlkIs = eliminate_moves_blk(BlkIs0, RegVal),
192    eliminate_moves([{block,BlkIs}|Is], D, [I|Acc]);
193eliminate_moves([{test,is_nonempty_list,Fail,[Reg]}=I|Is], D0, Acc) ->
194    case is_proper_list(Reg, Acc) of
195        true ->
196            D = update_value_dict([nil,Fail], Reg, D0),
197            eliminate_moves(Is, D, [I|Acc]);
198        false ->
199            D = update_unsafe_labels(I, D0),
200            eliminate_moves(Is, D, [I|Acc])
201    end;
202eliminate_moves([{label,Lbl},{block,BlkIs0}=Blk|Is], D, Acc0) ->
203    Acc = [{label,Lbl}|Acc0],
204    case {no_fallthrough(Acc0),D} of
205        {true,#{Lbl:={_,_}=RegVal}} ->
206            BlkIs = eliminate_moves_blk(BlkIs0, RegVal),
207            eliminate_moves([{block,BlkIs}|Is], D, Acc);
208        {_,_} ->
209            eliminate_moves([Blk|Is], D, Acc)
210    end;
211eliminate_moves([{call,_,_}=I|Is], D, Acc) ->
212    eliminate_moves_call(Is, D, [I | Acc]);
213eliminate_moves([{call_ext,_,_}=I|Is], D, Acc) ->
214    eliminate_moves_call(Is, D, [I | Acc]);
215eliminate_moves([{block,[]}|Is], D, Acc) ->
216    %% Empty blocks can prevent further jump optimizations.
217    eliminate_moves(Is, D, Acc);
218eliminate_moves([I|Is], D0, Acc) ->
219    D = update_unsafe_labels(I, D0),
220    eliminate_moves(Is, D, [I|Acc]);
221eliminate_moves([], _, Acc) -> reverse(Acc).
222
223eliminate_moves_call([{'%',{var_info,{x,0},Info}}=Anno,
224                      {block,BlkIs0}=Blk | Is], D, Acc0) ->
225    Acc = [Anno | Acc0],
226    RetType = proplists:get_value(type, Info, none),
227    case beam_types:get_singleton_value(RetType) of
228        {ok, Value} ->
229            RegVal = {{x,0}, value_to_literal(Value)},
230            BlkIs = eliminate_moves_blk(BlkIs0, RegVal),
231            eliminate_moves([{block,BlkIs}|Is], D, Acc);
232        error ->
233            eliminate_moves(Is, D, [Blk | Acc])
234    end;
235eliminate_moves_call(Is, D, Acc) ->
236    eliminate_moves(Is, D, Acc).
237
238eliminate_moves_blk([{set,[Dst],[_],move}|_]=Is, {_,Dst}) ->
239    Is;
240eliminate_moves_blk([{set,[Dst],[Lit],move}|Is], {Dst,Lit}) ->
241    %% Remove redundant 'move' instruction.
242    Is;
243eliminate_moves_blk([{set,[Dst],[_],move}|_]=Is, {Dst,_}) ->
244    Is;
245eliminate_moves_blk([{set,[_],[_],move}=I|Is], {_,_}=RegVal) ->
246    [I|eliminate_moves_blk(Is, RegVal)];
247eliminate_moves_blk(Is, _) -> Is.
248
249no_fallthrough([{'%',_} | Is]) ->
250    no_fallthrough(Is);
251no_fallthrough([I|_]) ->
252    is_unreachable_after(I).
253
254is_proper_list(Reg, [{'%',{var_info,Reg,Info}}|_]) ->
255    case proplists:get_value(type, Info) of
256        #t_list{terminator=nil} ->
257            true;
258        _ ->
259            %% Unknown type or not a proper list.
260            false
261    end;
262is_proper_list(Reg, [{'%',{var_info,_,_}}|Is]) ->
263    is_proper_list(Reg, Is);
264is_proper_list(_, _) -> false.
265
266value_to_literal([]) -> nil;
267value_to_literal(A) when is_atom(A) -> {atom,A};
268value_to_literal(F) when is_float(F) -> {float,F};
269value_to_literal(I) when is_integer(I) -> {integer,I};
270value_to_literal(Other) -> {literal,Other}.
271
272update_value_dict([Lit,{f,Lbl}|T], Reg, D0) ->
273    D = case D0 of
274            #{Lbl:=unsafe} -> D0;
275            #{Lbl:={Reg,Lit}} -> D0;
276            #{Lbl:=_} -> D0#{Lbl:=unsafe};
277            #{} -> D0#{Lbl=>{Reg,Lit}}
278        end,
279    update_value_dict(T, Reg, D);
280update_value_dict([], _, D) -> D.
281
282add_unsafe_label(L, D) ->
283    D#{L=>unsafe}.
284
285update_unsafe_labels(I, D) ->
286    Ls = instr_labels(I),
287    update_unsafe_labels_1(Ls, D).
288
289update_unsafe_labels_1([L|Ls], D) ->
290    update_unsafe_labels_1(Ls, D#{L=>unsafe});
291update_unsafe_labels_1([], D) -> D.
292
293%%%
294%%% It seems to be useful to insert extra labels after certain
295%%% test instructions. This used to be done by beam_dead.
296%%%
297
298insert_labels([{test,Op,_,_}=I|Is], Lc, Acc) ->
299    Useful = case Op of
300                 is_lt -> true;
301                 is_ge -> true;
302                 is_eq_exact -> true;
303                 is_ne_exact -> true;
304                 _ -> false
305             end,
306    case Useful of
307	false -> insert_labels(Is, Lc, [I|Acc]);
308	true -> insert_labels(Is, Lc+1, [{label,Lc},I|Acc])
309    end;
310insert_labels([I|Is], Lc, Acc) ->
311    insert_labels(Is, Lc, [I|Acc]);
312insert_labels([], Lc, Acc) ->
313    {reverse(Acc),Lc}.
314
315%%%
316%%% (1) We try to share the code for identical code segments by replacing all
317%%% occurrences except the last with jumps to the last occurrence.
318%%%
319%%% We must not share code that raises an exception from outside a
320%%% try/catch block with code inside a try/catch block and vice versa,
321%%% because beam_validator will probably flag it as unsafe
322%%% (ambiguous_catch_try_state). The same goes for a plain catch.
323%%%
324
325share(Is0) ->
326    Is1 = eliminate_fallthroughs(Is0, []),
327    Is2 = find_fixpoint(fun(Is) ->
328                                share_1(Is)
329                        end, Is1),
330    reverse(Is2).
331
332share_1(Is) ->
333    Safe = classify_labels(Is),
334    share_1(Is, Safe, #{}, #{}, [], []).
335
336%% Note that we examine the instructions in reverse execution order.
337share_1([{label,L}=Lbl|Is], Safe, Dict0, Lbls0, [_|_]=Seq0, Acc) ->
338    Seq = maybe_add_scope(Seq0, L, Safe),
339
340    %% If there are try/catch or catch instructions in this function,
341    %% any line instructions in the sequence now include a scope.
342    case Dict0 of
343        #{Seq := Label} ->
344            %% This sequence of instructions has been seen previously.
345            %% The scope identifiers added to line instructions ensure
346            %% that two sequence will not be equal unless sharing is
347            %% also safe.
348            Lbls = Lbls0#{L => Label},
349            share_1(Is, Safe, Dict0, Lbls, [],
350                    [[Lbl,{jump,{f,Label}}]|Acc]);
351        #{} ->
352            %% This is first time we have seen this sequence of instructions.
353            %% Find out whether it is safe to share the sequence.
354            case (map_size(Safe) =:= 0 orelse
355                  is_shareable(Seq)) andalso
356                unambigous_exit_call(Seq)
357            of
358                true ->
359                    %% Either this function does not contain any try/catch
360                    %% instructions, in which case it is always safe to share
361                    %% exception-raising instructions such as if_end and
362                    %% case_end, or it this sequence does not include
363                    %% any problematic instructions.
364                    Dict = Dict0#{Seq => L},
365                    share_1(Is, Safe, Dict, Lbls0, [], [[Lbl|Seq]|Acc]);
366                false ->
367                    %% The sequence includes an inappropriate instruction
368                    %% that may case beam_validator to complain about
369                    %% an ambiguous try/catch state.
370                    share_1(Is, Safe, Dict0, Lbls0, [], [[Lbl|Seq]|Acc])
371            end
372    end;
373share_1([{func_info,_,_,_}|_]=Is0, _Safe, _, Lbls, [], Acc0) ->
374    %% Replace jumps to jumps with a jump to the final destination
375    %% (jump threading). This optimization is done in the main
376    %% optimization pass of this module, but we do it here too because
377    %% it can give more opportunities for sharing code.
378    F = case Lbls =:= #{} of
379            true ->
380                fun lists:reverse/2;
381            false ->
382                fun(Is, Acc) ->
383                        beam_utils:replace_labels(Is, Acc, Lbls,
384                                                  fun(Old) -> Old end)
385                end
386        end,
387    foldl(F, Is0, Acc0);
388share_1([{'catch',_,_}=I|Is], Safe, Dict, _Lbls0, Seq, Acc) ->
389    %% Disable the jump threading optimization because it may be unsafe.
390    share_1(Is, Safe, Dict, #{}, [I|Seq], Acc);
391share_1([{'try',_,_}=I|Is], Safe, Dict, _Lbls, Seq, Acc) ->
392    %% Disable the jump threading optimization because it may be unsafe.
393    share_1(Is, Safe, Dict, #{}, [I|Seq], Acc);
394share_1([{jump,{f,To}}=I,{label,From}=Lbl|Is], Safe, Dict0, Lbls0, _Seq, Acc) ->
395    Lbls = Lbls0#{From => To},
396    share_1(Is, Safe, Dict0, Lbls, [], [[Lbl,I]|Acc]);
397share_1([I|Is], Safe, Dict, Lbls, Seq, Acc) ->
398    case is_unreachable_after(I) of
399	false ->
400	    share_1(Is, Safe, Dict, Lbls, [I|Seq], Acc);
401	true ->
402	    share_1(Is, Safe, Dict, Lbls, [I], Acc)
403    end.
404
405unambigous_exit_call([{call_ext,A,{extfunc,M,F,A}}|Is]) ->
406    case erl_bifs:is_exit_bif(M, F, A) of
407        true ->
408            %% beam_validator requires that the size of the stack
409            %% frame is unambigously known when a function is called.
410            %%
411            %% That means that we must be careful when sharing function
412            %% calls.
413            %%
414            %% In practice, it seems that only exit BIFs can
415            %% potentially be shared in an unsafe way, and only in
416            %% rare circumstances. (See the undecided_allocation_1/1
417            %% function in beam_jump_SUITE.)
418            %%
419            %% To ensure that the frame size is unambigous, only allow
420            %% sharing of a call to exit BIFs if the call is followed
421            %% by an instruction that indicates the size of the stack
422            %% frame (that is almost always the case in real-world
423            %% code).
424            case Is of
425                [{deallocate,_}|_] -> true;
426                [return] -> true;
427                _ -> false
428            end;
429        false ->
430            true
431    end;
432unambigous_exit_call([_|Is]) ->
433    unambigous_exit_call(Is);
434unambigous_exit_call([]) -> true.
435
436%% If the label has a scope set, assign it to any line instruction
437%% in the sequence.
438maybe_add_scope(Seq, L, Safe) ->
439    case Safe of
440        #{L := Scope} -> add_scope(Seq, Scope);
441        #{} -> Seq
442    end.
443
444add_scope([{line,Loc}=I|Is], Scope) ->
445    case keymember(scope, 1, Loc) of
446        false ->
447            [{line,[{scope,Scope}|Loc]}|add_scope(Is, Scope)];
448        true ->
449            [I|add_scope(Is, Scope)]
450    end;
451add_scope([I|Is], Scope) ->
452    [I|add_scope(Is, Scope)];
453add_scope([], _Scope) -> [].
454
455is_shareable([build_stacktrace|_]) -> false;
456is_shareable([{case_end,_}|_]) -> false;
457is_shareable([{'catch',_,_}|_]) -> false;
458is_shareable([{catch_end,_}|_]) -> false;
459is_shareable([if_end|_]) -> false;
460is_shareable([{'try',_,_}|_]) -> false;
461is_shareable([{try_case,_}|_]) -> false;
462is_shareable([{try_end,_}|_]) -> false;
463is_shareable([_|Is]) -> is_shareable(Is);
464is_shareable([]) -> true.
465
466%%
467%% Classify labels according to where the instructions that branch to
468%% the labels are located. Each label is assigned a set of scope
469%% identifers (but usually just one). If two labels have different
470%% scope identfiers, sharing a sequence that raises an exception
471%% between the labels may not be safe, because one label is inside a
472%% try/catch, and the other label is outside. Note that we don't care
473%% where the labels themselves are located, only from where the
474%% branches to them are located.
475%%
476%% If there is more than one scope in the function (that is, if there
477%% try/catch or catch in the function), the scope identifiers will be
478%% added to the line instructions. Recording the scope in the line
479%% instructions makes beam_jump idempotent, ensuring that beam_jump
480%% will not do any unsafe optimizations when when compiling from a .S
481%% file.
482%%
483
484classify_labels(Is) ->
485    classify_labels(Is, 0, #{}).
486
487classify_labels([{'catch',_,_}|Is], Scope, Safe) ->
488    classify_labels(Is, Scope+1, Safe);
489classify_labels([{catch_end,_}|Is], Scope, Safe) ->
490    classify_labels(Is, Scope+1, Safe);
491classify_labels([{'try',_,_}|Is], Scope, Safe) ->
492    classify_labels(Is, Scope+1, Safe);
493classify_labels([{'try_end',_}|Is], Scope, Safe) ->
494    classify_labels(Is, Scope+1, Safe);
495classify_labels([{'try_case',_}|Is], Scope, Safe) ->
496    classify_labels(Is, Scope+1, Safe);
497classify_labels([{'try_case_end',_}|Is], Scope, Safe) ->
498    classify_labels(Is, Scope+1, Safe);
499classify_labels([I|Is], Scope, Safe0) ->
500    Labels = instr_labels(I),
501    Safe = foldl(fun(L, A) ->
502                         case A of
503                             #{L := [Scope]} -> A;
504                             #{L := Other} -> A#{L => ordsets:add_element(Scope, Other)};
505                             #{} -> A#{L => [Scope]}
506                         end
507                 end, Safe0, Labels),
508    classify_labels(Is, Scope, Safe);
509classify_labels([], Scope, Safe) ->
510    case Scope of
511        0 ->
512            %% No try/catch or catch in this function. We don't
513            %% need the collected information.
514            #{};
515        _ ->
516            Safe
517    end.
518
519%% Eliminate all fallthroughs. Return the result reversed.
520
521eliminate_fallthroughs([{label,L}=Lbl|Is], [I|_]=Acc) ->
522    case is_unreachable_after(I) of
523	false ->
524	    %% Eliminate fallthrough.
525	    eliminate_fallthroughs(Is, [Lbl,{jump,{f,L}}|Acc]);
526	true ->
527	    eliminate_fallthroughs(Is, [Lbl|Acc])
528    end;
529eliminate_fallthroughs([I|Is], Acc) ->
530    eliminate_fallthroughs(Is, [I|Acc]);
531eliminate_fallthroughs([], Acc) -> Acc.
532
533%%%
534%%% (2) Move short code sequences ending in an instruction that causes an exit
535%%% to the end of the function.
536%%%
537%%% Implementation note: Since share/1 eliminated fallthroughs to labels,
538%%% we don't have to test whether instructions before labels may fail through.
539%%%
540move(Is) ->
541    move_1(Is, [], []).
542
543move_1([I|Is], Ends, Acc0) ->
544    case is_exit_instruction(I) of
545	false ->
546	    move_1(Is, Ends, [I|Acc0]);
547	true ->
548	    case extract_seq(Acc0, [I]) of
549		no ->
550		    move_1(Is, Ends, [I|Acc0]);
551		{yes,End,Acc} ->
552		    move_1(Is, [End|Ends], Acc)
553	    end
554    end;
555move_1([], Ends, Acc) -> reverse(Acc, lists:append(reverse(Ends))).
556
557extract_seq([{line,_}=Line|Is], Acc) ->
558    extract_seq(Is, [Line|Acc]);
559extract_seq([{block,_}=Bl|Is], Acc) ->
560    extract_seq_1(Is, [Bl|Acc]);
561extract_seq([{label,_}|_]=Is, Acc) ->
562    extract_seq_1(Is, Acc);
563extract_seq(_, _) -> no.
564
565extract_seq_1([{line,_}=Line|Is], Acc) ->
566    extract_seq_1(Is, [Line|Acc]);
567extract_seq_1([{label,_},{func_info,_,_,_}|_], _) ->
568    no;
569extract_seq_1([{label,Lbl},{jump,{f,Lbl}}|_], _) ->
570    %% Don't move a sequence which have a fallthrough entering it.
571    no;
572extract_seq_1([{label,_}=Lbl|Is], Acc) ->
573    {yes,[Lbl|Acc],Is};
574extract_seq_1(_, _) -> no.
575
576%%%
577%%% (3) (4) (5) (6) Jump and unreachable code optimizations.
578%%%
579
580-record(st,
581	{
582	  entry :: beam_asm:label(), %Entry label (must not be moved).
583	  replace :: #{beam_asm:label() := beam_asm:label()}, %Labels to replace.
584	  labels :: sets:set()         %Set of referenced labels.
585	}).
586
587opt(Is0, CLabel) ->
588    find_fixpoint(fun(Is) ->
589			  Lbls = initial_labels(Is),
590			  St = #st{entry=CLabel,replace=#{},labels=Lbls},
591			  opt(Is, [], St)
592		  end, Is0).
593
594find_fixpoint(OptFun, Is0) ->
595    case OptFun(Is0) of
596	Is0 -> Is0;
597	Is -> find_fixpoint(OptFun, Is)
598    end.
599
600opt([{test,_,{f,L}=Lbl,_}=I|[{jump,{f,L}}|_]=Is], Acc, St) ->
601    %% We have
602    %%    Test Label Ops
603    %%    jump Label
604    %% The test instruction is not needed if the test is pure
605    %% (it modifies neither registers nor bit syntax state).
606    case beam_utils:is_pure_test(I) of
607	false ->
608	    %% Test is not pure; we must keep it.
609	    opt(Is, [I|Acc], label_used(Lbl, St));
610	true ->
611	    %% The test is pure and its failure label is the same
612	    %% as in the jump that follows -- thus it is not needed.
613	    opt(Is, Acc, St)
614    end;
615opt([{test,Test0,{f,L}=Lbl,Ops}=I|[{jump,To}|Is]=Is0], Acc, St) ->
616    case is_label_defined(Is, L) of
617	false ->
618	    opt(Is0, [I|Acc], label_used(Lbl, St));
619	true ->
620	    case invert_test(Test0) of
621		not_possible ->
622		    opt(Is0, [I|Acc], label_used(Lbl, St));
623		Test ->
624		    %% Invert the test and remove the jump.
625		    opt([{test,Test,To,Ops}|Is], Acc, St)
626	    end
627    end;
628opt([{test,_,{f,_}=Lbl,_}=I|Is], Acc, St) ->
629    opt(Is, [I|Acc], label_used(Lbl, St));
630opt([{test,_,{f,_}=Lbl,_,_,_}=I|Is], Acc, St) ->
631    opt(Is, [I|Acc], label_used(Lbl, St));
632opt([{select,_,_R,Fail,Vls}=I|Is], Acc, St) ->
633    skip_unreachable(Is, [I|Acc], label_used([Fail|Vls], St));
634opt([{label,From}=I,{label,To}|Is], Acc, #st{replace=Replace}=St) ->
635    opt([I|Is], Acc, St#st{replace=Replace#{To => From}});
636opt([{jump,{f,_}=X}|[{label,_},{jump,X}|_]=Is], Acc, St) ->
637    opt(Is, Acc, St);
638opt([{jump,{f,Lbl}}|[{label,Lbl}|_]=Is], Acc, St) ->
639    opt(Is, Acc, St);
640opt([{jump,{f,L}=Lbl}=I|Is], Acc0, St0) ->
641    %% Replace all labels before this jump instruction into the
642    %% location of the jump's target.
643    {Acc,St} = collect_labels(Acc0, L, St0),
644    skip_unreachable(Is, [I|Acc], label_used(Lbl, St));
645%% Optimization: quickly handle some common instructions that don't
646%% have any failure labels and where is_unreachable_after(I) =:= false.
647opt([{block,_}=I|Is], Acc, St) ->
648    opt(Is, [I|Acc], St);
649opt([{call,_,_}=I|Is], Acc, St) ->
650    opt(Is, [I|Acc], St);
651opt([{deallocate,_}=I|Is], Acc, St) ->
652    opt(Is, [I|Acc], St);
653%% All other instructions.
654opt([I|Is], Acc, #st{labels=Used0}=St0) ->
655    Used = ulbl(I, Used0),
656    St = St0#st{labels=Used},
657    case is_unreachable_after(I) of
658	true  -> skip_unreachable(Is, [I|Acc], St);
659	false -> opt(Is, [I|Acc], St)
660    end;
661opt([], Acc, #st{replace=Replace0}) when Replace0 =/= #{} ->
662    Replace = normalize_replace(maps:to_list(Replace0), Replace0, []),
663    beam_utils:replace_labels(Acc, [], Replace, fun(Old) -> Old end);
664opt([], Acc, #st{replace=Replace}) when Replace =:= #{} ->
665    reverse(Acc).
666
667normalize_replace([{From,To0}|Rest], Replace, Acc) ->
668    case Replace of
669        #{To0 := To} ->
670            normalize_replace([{From,To}|Rest], Replace, Acc);
671        _ ->
672            normalize_replace(Rest, Replace, [{From,To0}|Acc])
673    end;
674normalize_replace([], _Replace, Acc) ->
675    maps:from_list(Acc).
676
677collect_labels(Is, Label, #st{entry=Entry,replace=Replace} = St) ->
678    collect_labels_1(Is, Label, Entry, Replace, St).
679
680collect_labels_1([{label,Entry}|_]=Is, _Label, Entry, Acc, St) ->
681    %% Never move the entry label.
682    {Is,St#st{replace=Acc}};
683collect_labels_1([{label,L}|Is], Label, Entry, Acc, St) ->
684    collect_labels_1(Is, Label, Entry, Acc#{L => Label}, St);
685collect_labels_1(Is, _Label, _Entry, Acc, St) ->
686    {Is,St#st{replace=Acc}}.
687
688%% label_defined(Is, Label) -> true | false.
689%%  Test whether the label Label is defined at the start of the instruction
690%%  sequence, possibly preceeded by other label definitions.
691%%
692is_label_defined([{label,L}|_], L) -> true;
693is_label_defined([{label,_}|Is], L) -> is_label_defined(Is, L);
694is_label_defined(_, _) -> false.
695
696%% invert_test(Test0) -> not_possible | Test
697
698invert_test(is_ge) ->       is_lt;
699invert_test(is_lt) ->       is_ge;
700invert_test(is_eq) ->       is_ne;
701invert_test(is_ne) ->       is_eq;
702invert_test(is_eq_exact) -> is_ne_exact;
703invert_test(is_ne_exact) -> is_eq_exact;
704invert_test(_) ->           not_possible.
705
706%% skip_unreachable([Instruction], St).
707%%  Remove all instructions (including definitions of labels
708%%  that have not been referenced yet) up to the next
709%%  referenced label, then call opt/3 to optimize the rest
710%%  of the instruction sequence.
711%%
712skip_unreachable([{label,L}|_Is]=Is0, [{jump,{f,L}}|Acc], St) ->
713    opt(Is0, Acc, St);
714skip_unreachable([{label,L}|Is]=Is0, Acc, St) ->
715    case is_label_used(L, St) of
716	true  -> opt(Is0, Acc, St);
717	false -> skip_unreachable(Is, Acc, St)
718    end;
719skip_unreachable([_|Is], Acc, St) ->
720    skip_unreachable(Is, Acc, St);
721skip_unreachable([], Acc, St) ->
722    opt([], Acc, St).
723
724%% Add one or more label to the set of used labels.
725
726label_used({f,L}, St) -> St#st{labels=sets:add_element(L,St#st.labels)};
727label_used([H|T], St0) -> label_used(T, label_used(H, St0));
728label_used([], St) -> St;
729label_used(_Other, St) -> St.
730
731%% Test if label is used.
732
733is_label_used(L, St) ->
734    sets:is_element(L, St#st.labels).
735
736%% is_unreachable_after(Instruction) -> boolean()
737%%  Test whether the code after Instruction is unreachable.
738
739-spec is_unreachable_after(instruction()) -> boolean().
740
741is_unreachable_after({func_info,_M,_F,_A}) -> true;
742is_unreachable_after(return) -> true;
743is_unreachable_after({jump,_Lbl}) -> true;
744is_unreachable_after({select,_What,_R,_Lbl,_Cases}) -> true;
745is_unreachable_after({loop_rec_end,_}) -> true;
746is_unreachable_after({wait,_}) -> true;
747is_unreachable_after(I) -> is_exit_instruction(I).
748
749%% is_exit_instruction(Instruction) -> boolean()
750%%  Test whether the instruction Instruction always
751%%  causes an exit/failure.
752
753-spec is_exit_instruction(instruction()) -> boolean().
754
755is_exit_instruction(if_end) -> true;
756is_exit_instruction({case_end,_}) -> true;
757is_exit_instruction({try_case_end,_}) -> true;
758is_exit_instruction({badmatch,_}) -> true;
759is_exit_instruction(_) -> false.
760
761%% remove_unused_labels(Instructions0) -> Instructions
762%%  Remove all unused labels. Also remove unreachable
763%%  instructions following labels that are removed.
764
765-spec remove_unused_labels([instruction()]) -> [instruction()].
766
767remove_unused_labels(Is) ->
768    Used0 = initial_labels(Is),
769    Used = foldl(fun ulbl/2, Used0, Is),
770    rem_unused(Is, Used, []).
771
772rem_unused([{label,Lbl}=I|Is0], Used, [Prev|_]=Acc) ->
773    case sets:is_element(Lbl, Used) of
774	false ->
775	    Is = case is_unreachable_after(Prev) of
776		     true -> drop_upto_label(Is0);
777		     false -> Is0
778		 end,
779	    rem_unused(Is, Used, Acc);
780	true ->
781	    rem_unused(Is0, Used, [I|Acc])
782    end;
783rem_unused([I|Is], Used, Acc) ->
784    rem_unused(Is, Used, [I|Acc]);
785rem_unused([], _, Acc) -> reverse(Acc).
786
787initial_labels(Is) ->
788    initial_labels(Is, []).
789
790initial_labels([{line,_}|Is], Acc) ->
791    initial_labels(Is, Acc);
792initial_labels([{label,Lbl}|Is], Acc) ->
793    initial_labels(Is, [Lbl|Acc]);
794initial_labels([{func_info,_,_,_},{label,Lbl}|_], Acc) ->
795    sets:from_list([Lbl|Acc], [{version, 2}]).
796
797drop_upto_label([{label,_}|_]=Is) -> Is;
798drop_upto_label([_|Is]) -> drop_upto_label(Is);
799drop_upto_label([]) -> [].
800
801%% unshare([Instruction]) -> [Instruction].
802%%  Replace a jump to a return sequence (a `return` instruction
803%%  optionally preced by a `deallocate` instruction) with the return
804%%  sequence. This always saves execution time and may also save code
805%%  space (depending on the architecture). Eliminating `jump`
806%%  instructions also gives beam_trim more opportunities to trim the
807%%  stack.
808
809unshare(Is) ->
810    Short = unshare_collect_short(Is, #{}),
811    unshare_short(Is, Short).
812
813unshare_collect_short([{label,L},return|Is], Map) ->
814    unshare_collect_short(Is, Map#{L=>[return]});
815unshare_collect_short([{label,L},{deallocate,_}=D,return|Is], Map) ->
816    %% `deallocate` and `return` are combined into one instruction by
817    %% the loader.
818    unshare_collect_short(Is, Map#{L=>[D,return]});
819unshare_collect_short([_|Is], Map) ->
820    unshare_collect_short(Is, Map);
821unshare_collect_short([], Map) -> Map.
822
823unshare_short([{jump,{f,F}}=I|Is], Map) ->
824    case Map of
825        #{F:=Seq} ->
826            Seq ++ unshare_short(Is, Map);
827        #{} ->
828            [I|unshare_short(Is, Map)]
829    end;
830unshare_short([I|Is], Map) ->
831    [I|unshare_short(Is, Map)];
832unshare_short([], _Map) -> [].
833
834%% ulbl(Instruction, UsedCerlSet) -> UsedCerlSet'
835%%  Update the cerl_set UsedCerlSet with any function-local labels
836%%  (i.e. not with labels in call instructions) referenced by
837%%  the instruction Instruction.
838%%
839%%  NOTE: This function does NOT look for labels inside blocks.
840
841ulbl(I, Used) ->
842    case instr_labels(I) of
843        [] ->
844            Used;
845        [Lbl] ->
846            sets:add_element(Lbl, Used);
847        [_|_]=L ->
848            ulbl_list(L, Used)
849    end.
850
851ulbl_list([L|Ls], Used) ->
852    ulbl_list(Ls, sets:add_element(L, Used));
853ulbl_list([], Used) -> Used.
854
855-spec instr_labels(Instruction) -> Labels when
856      Instruction :: instruction(),
857      Labels :: [beam_asm:label()].
858
859instr_labels({test,_,Fail,_}) ->
860    do_instr_labels(Fail);
861instr_labels({test,_,Fail,_,_,_}) ->
862    do_instr_labels(Fail);
863instr_labels({select,_,_,Fail,Vls}) ->
864    do_instr_labels_list(Vls, do_instr_labels(Fail));
865instr_labels({'try',_,Lbl}) ->
866    do_instr_labels(Lbl);
867instr_labels({'catch',_,Lbl}) ->
868    do_instr_labels(Lbl);
869instr_labels({jump,Lbl}) ->
870    do_instr_labels(Lbl);
871instr_labels({loop_rec,Lbl,_}) ->
872    do_instr_labels(Lbl);
873instr_labels({loop_rec_end,Lbl}) ->
874    do_instr_labels(Lbl);
875instr_labels({wait,Lbl}) ->
876    do_instr_labels(Lbl);
877instr_labels({wait_timeout,Lbl,_To}) ->
878    do_instr_labels(Lbl);
879instr_labels({bif,_Name,Lbl,_As,_R}) ->
880    do_instr_labels(Lbl);
881instr_labels({gc_bif,_Name,Lbl,_Live,_As,_R}) ->
882    do_instr_labels(Lbl);
883instr_labels({bs_init,Lbl,_,_,_,_}) ->
884    do_instr_labels(Lbl);
885instr_labels({bs_put,Lbl,_,_}) ->
886    do_instr_labels(Lbl);
887instr_labels({put_map,Lbl,_Op,_Src,_Dst,_Live,_List}) ->
888    do_instr_labels(Lbl);
889instr_labels({get_map_elements,Lbl,_Src,_List}) ->
890    do_instr_labels(Lbl);
891instr_labels({bs_start_match4,Fail,_,_,_}) ->
892    case Fail of
893        {f,L} -> [L];
894        {atom,_} -> []
895    end;
896instr_labels(_) ->
897    [].
898
899do_instr_labels({f,0}) -> [];
900do_instr_labels({f,F}) -> [F].
901
902do_instr_labels_list([{f,F}|T], Acc) ->
903    do_instr_labels_list(T, [F|Acc]);
904do_instr_labels_list([_|T], Acc) ->
905    do_instr_labels_list(T, Acc);
906do_instr_labels_list([], Acc) -> Acc.
907