1%% ``Licensed under the Apache License, Version 2.0 (the "License");
2%% you may not use this file except in compliance with the License.
3%% You may obtain a copy of the License at
4%%
5%%     http://www.apache.org/licenses/LICENSE-2.0
6%%
7%% Unless required by applicable law or agreed to in writing, software
8%% distributed under the License is distributed on an "AS IS" BASIS,
9%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10%% See the License for the specific language governing permissions and
11%% limitations under the License.
12%%
13%% The Initial Developer of the Original Code is Ericsson Utvecklings AB.
14%% Portions created by Ericsson are Copyright 1999, Ericsson Utvecklings
15%% AB. All Rights Reserved.''
16%%
17%%     $Id: beam_jump.erl,v 1.1 2008/12/17 09:53:41 mikpe Exp $
18%%
19%%% Purpose : Optimise jumps and remove unreachable code.
20
21-module(beam_jump).
22
23-export([module/2,module_labels/1,
24	 is_unreachable_after/1,remove_unused_labels/1]).
25
26%%% The following optimisations are done:
27%%%
28%%% (1) This code with two identical instruction sequences
29%%%
30%%%     L1: <Instruction sequence>
31%%%     L2:
32%%%          . . .
33%%%     L3: <Instruction sequence>
34%%%     L4:
35%%%
36%%%     can be replaced with
37%%%
38%%%     L1: jump L3
39%%%     L2:
40%%%          . . .
41%%%     L3: <Instruction sequence>
42%%%     L4
43%%%
44%%%     Note: The instruction sequence must end with an instruction
45%%%     such as a jump that never transfers control to the instruction
46%%%     following it.
47%%%
48%%% (2) case_end, if_end, and badmatch, and function calls that cause an
49%%%     exit (such as calls to exit/1) are moved to the end of the function.
50%%%     The purpose is to allow further optimizations at the place from
51%%%     which the code was moved.
52%%%
53%%% (3) Any unreachable code is removed.  Unreachable code is code after
54%%%     jump, call_last and other instructions which never transfer control
55%%%     to the following instruction.  Code is unreachable up to the next
56%%%     *referenced* label.  Note that the optimisations below might
57%%%     generate more possibilities for removing unreachable code.
58%%%
59%%% (4) This code:
60%%%	L1:	jump L2
61%%%          . . .
62%%%     L2: ...
63%%%
64%%%    will be changed to
65%%%
66%%%    jump L2
67%%%          . . .
68%%%    L1:
69%%%    L2: ...
70%%%
71%%%    If the jump is unreachable, it will be removed according to (1).
72%%%
73%%% (5) In
74%%%
75%%%	 jump L1
76%%%      L1:
77%%%
78%%%	 the jump will be removed.
79%%%
80%%% (6) If test instructions are used to skip a single jump instruction,
81%%%      the test is inverted and the jump is eliminated (provided that
82%%%      the test can be inverted).  Example:
83%%%
84%%%      is_eq L1 {x,1} {x,2}
85%%%      jump L2
86%%%      L1:
87%%%
88%%%      will be changed to
89%%%
90%%%      is_ne L2 {x,1} {x,2}
91%%%
92%%%      (The label L1 will be retained if there were previous references to it.)
93%%%
94%%% (7) Some redundant uses of is_boolean/1 is optimized away.
95%%%
96%%% Terminology note: The optimisation done here is called unreachable-code
97%%% elimination, NOT dead-code elimination.  Dead code elimination
98%%% means the removal of instructions that are executed, but have no visible
99%%% effect on the program state.
100%%%
101
102-import(lists, [reverse/1,reverse/2,map/2,mapfoldl/3,foldl/3,
103		last/1,foreach/2,member/2]).
104
105module({Mod,Exp,Attr,Fs0,Lc}, _Opt) ->
106    Fs = map(fun function/1, Fs0),
107    {ok,{Mod,Exp,Attr,Fs,Lc}}.
108
109module_labels({Mod,Exp,Attr,Fs,Lc}) ->
110    {Mod,Exp,Attr,map(fun function_labels/1, Fs),Lc}.
111
112function_labels({function,Name,Arity,CLabel,Asm0}) ->
113    Asm = remove_unused_labels(Asm0),
114    {function,Name,Arity,CLabel,Asm}.
115
116function({function,Name,Arity,CLabel,Asm0}) ->
117    Asm1 = share(Asm0),
118    Asm2 = bopt(Asm1),
119    Asm3 = move(Asm2),
120    Asm4 = opt(Asm3, CLabel),
121    Asm = remove_unused_labels(Asm4),
122    {function,Name,Arity,CLabel,Asm}.
123
124%%%
125%%% (1) We try to share the code for identical code segments by replacing all
126%%% occurrences except the last with jumps to the last occurrence.
127%%%
128
129share(Is) ->
130    share_1(reverse(Is), gb_trees:empty(), [], []).
131
132share_1([{label,_}=Lbl|Is], Dict, [], Acc) ->
133    share_1(Is, Dict, [], [Lbl|Acc]);
134share_1([{label,L}=Lbl|Is], Dict0, Seq, Acc) ->
135    case is_unreachable_after(last(Seq)) of
136	false ->
137	    share_1(Is, Dict0, [], [Lbl|Seq ++ Acc]);
138	true ->
139	    case gb_trees:lookup(Seq, Dict0) of
140		none ->
141		    Dict = gb_trees:insert(Seq, L, Dict0),
142		    share_1(Is, Dict, [], [Lbl|Seq ++ Acc]);
143		{value,Label} ->
144		    share_1(Is, Dict0, [], [Lbl,{jump,{f,Label}}|Acc])
145	    end
146    end;
147share_1([{func_info,_,_,_}=I|Is], _, [], Acc) ->
148    Is++[I|Acc];
149share_1([I|Is], Dict, Seq, Acc) ->
150    case is_unreachable_after(I) of
151	false ->
152	    share_1(Is, Dict, [I|Seq], Acc);
153	true ->
154	    share_1(Is, Dict, [I], Acc)
155    end.
156
157%%%
158%%% (2) Move short code sequences ending in an instruction that causes an exit
159%%% to the end of the function.
160%%%
161
162move(Is) ->
163    move_1(Is, [], []).
164
165move_1([I|Is], End, Acc) ->
166    case is_exit_instruction(I) of
167	false -> move_1(Is, End, [I|Acc]);
168	true -> move_2(I, Is, End, Acc)
169    end;
170move_1([], End, Acc) ->
171    reverse(Acc, reverse(End)).
172
173move_2(Exit, Is, End, [{block,_},{label,_},{func_info,_,_,_}|_]=Acc) ->
174    move_1(Is, End, [Exit|Acc]);
175move_2(Exit, Is, End, [{kill,_Y}|Acc]) ->
176    move_2(Exit, Is, End, Acc);
177move_2(Exit, Is, End, [{block,_}=Blk,{label,_}=Lbl,Dead|More]=Acc) ->
178    case is_unreachable_after(Dead) of
179	false ->
180	    move_1(Is, End, [Exit|Acc]);
181	true ->
182	    move_1([Dead|Is], [Exit,Blk,Lbl|End], More)
183    end;
184move_2(Exit, Is, End, [{label,_}=Lbl,Dead|More]=Acc) ->
185    case is_unreachable_after(Dead) of
186	false ->
187	    move_1(Is, End, [Exit|Acc]);
188	true ->
189	    move_1([Dead|Is], [Exit,Lbl|End], More)
190    end;
191move_2(Exit, Is, End, Acc) ->
192    move_1(Is, End, [Exit|Acc]).
193
194%%%
195%%% (7) Remove redundant is_boolean tests.
196%%%
197
198bopt(Is) ->
199    bopt_1(Is, []).
200
201bopt_1([{test,is_boolean,_,_}=I|Is], Acc0) ->
202    case opt_is_bool(I, Acc0) of
203	no -> bopt_1(Is, [I|Acc0]);
204	yes -> bopt_1(Is, Acc0);
205	{yes,Acc} -> bopt_1(Is, Acc)
206    end;
207bopt_1([I|Is], Acc) -> bopt_1(Is, [I|Acc]);
208bopt_1([], Acc) -> reverse(Acc).
209
210opt_is_bool({test,is_boolean,{f,Lbl},[Reg]}, Acc) ->
211    opt_is_bool_1(Acc, Reg, Lbl).
212
213opt_is_bool_1([{test,is_eq_exact,{f,Lbl},[Reg,{atom,true}]}|_], Reg, Lbl) ->
214    %% Instruction not needed in this context.
215    yes;
216opt_is_bool_1([{test,is_ne_exact,{f,Lbl},[Reg,{atom,true}]}|Acc], Reg, Lbl) ->
217    %% Rewrite to shorter test.
218    {yes,[{test,is_eq_exact,{f,Lbl},[Reg,{atom,false}]}|Acc]};
219opt_is_bool_1([{test,_,{f,Lbl},_}=Test|Acc0], Reg, Lbl) ->
220    case opt_is_bool_1(Acc0, Reg, Lbl) of
221	{yes,Acc} -> {yes,[Test|Acc]};
222	Other -> Other
223    end;
224opt_is_bool_1(_, _, _) -> no.
225
226%%%
227%%% (3) (4) (5) (6) Jump and unreachable code optimizations.
228%%%
229
230-record(st, {fc,				%Label for function class errors.
231	     entry,				%Entry label (must not be moved).
232	     mlbl,				%Moved labels.
233	     labels				%Set of referenced labels.
234	    }).
235
236opt([{label,Fc}|_]=Is, CLabel) ->
237    Lbls = initial_labels(Is),
238    St = #st{fc=Fc,entry=CLabel,mlbl=dict:new(),labels=Lbls},
239    opt(Is, [], St).
240
241opt([{test,Test0,{f,Lnum}=Lbl,Ops}=I|Is0], Acc, St) ->
242    case Is0 of
243	[{jump,To}|[{label,Lnum}|Is2]=Is1] ->
244	    case invert_test(Test0) of
245		not_possible ->
246		    opt(Is0, [I|Acc], label_used(Lbl, St));
247		Test ->
248		    Is = case is_label_used(Lnum, St) of
249			     true -> Is1;
250			     false -> Is2
251			 end,
252		    opt([{test,Test,To,Ops}|Is], Acc, label_used(To, St))
253	    end;
254	_Other ->
255	    opt(Is0, [I|Acc], label_used(Lbl, St))
256    end;
257opt([{select_val,_R,Fail,{list,Vls}}=I|Is], Acc, St) ->
258    skip_unreachable(Is, [I|Acc], label_used([Fail|Vls], St));
259opt([{select_tuple_arity,_R,Fail,{list,Vls}}=I|Is], Acc, St) ->
260    skip_unreachable(Is, [I|Acc], label_used([Fail|Vls], St));
261opt([{'try',_R,Lbl}=I|Is], Acc, St) ->
262    opt(Is, [I|Acc], label_used(Lbl, St));
263opt([{'catch',_R,Lbl}=I|Is], Acc, St) ->
264    opt(Is, [I|Acc], label_used(Lbl, St));
265opt([{label,L}=I|Is], Acc, #st{entry=L}=St) ->
266    %% NEVER move the entry label.
267    opt(Is, [I|Acc], St);
268opt([{label,L1},{jump,{f,L2}}=I|Is], [Prev|Acc], St0) ->
269    St = St0#st{mlbl=dict:append(L2, L1, St0#st.mlbl)},
270    opt([Prev,I|Is], Acc, label_used({f,L2}, St));
271opt([{label,Lbl}=I|Is], Acc, #st{mlbl=Mlbl}=St0) ->
272    case dict:find(Lbl, Mlbl) of
273	{ok,Lbls} ->
274	    %% Essential to remove the list of labels from the dictionary,
275	    %% since we will rescan the inserted labels.  We MUST rescan.
276	    St = St0#st{mlbl=dict:erase(Lbl, Mlbl)},
277	    insert_labels([Lbl|Lbls], Is, Acc, St);
278	error -> opt(Is, [I|Acc], St0)
279    end;
280opt([{jump,{f,Lbl}},{label,Lbl}=I|Is], Acc, St) ->
281    opt([I|Is], Acc, St);
282opt([{jump,Lbl}=I|Is], Acc, St) ->
283    skip_unreachable(Is, [I|Acc], label_used(Lbl, St));
284opt([{loop_rec,Lbl,_R}=I|Is], Acc, St) ->
285    opt(Is, [I|Acc], label_used(Lbl, St));
286opt([{bif,_Name,Lbl,_As,_R}=I|Is], Acc, St) ->
287    opt(Is, [I|Acc], label_used(Lbl, St));
288opt([{bs_put_integer,Lbl,_Bits,_Unit,_Fl,_Val}=I|Is], Acc, St) ->
289    opt(Is, [I|Acc], label_used(Lbl, St));
290opt([{bs_put_binary,Lbl,_Bits,_Unit,_Fl,_Val}=I|Is], Acc, St) ->
291    opt(Is, [I|Acc], label_used(Lbl, St));
292opt([{bs_put_float,Lbl,_Bits,_Unit,_Fl,_Val}=I|Is], Acc, St) ->
293    opt(Is, [I|Acc], label_used(Lbl, St));
294opt([{bs_final,Lbl,_R}=I|Is], Acc, St) ->
295    opt(Is, [I|Acc], label_used(Lbl, St));
296opt([{bs_init2,Lbl,_,_,_,_,_}=I|Is], Acc, St) ->
297    opt(Is, [I|Acc], label_used(Lbl, St));
298opt([{bs_add,Lbl,_,_}=I|Is], Acc, St) ->
299    opt(Is, [I|Acc], label_used(Lbl, St));
300opt([{bs_bits_to_bytes,Lbl,_,_}=I|Is], Acc, St) ->
301    opt(Is, [I|Acc], label_used(Lbl, St));
302opt([I|Is], Acc, St) ->
303    case is_unreachable_after(I) of
304	true  -> skip_unreachable(Is, [I|Acc], St);
305	false -> opt(Is, [I|Acc], St)
306    end;
307opt([], Acc, #st{fc=Fc,mlbl=Mlbl}) ->
308    Code = reverse(Acc),
309    case dict:find(Fc, Mlbl) of
310	{ok,Lbls} -> insert_fc_labels(Lbls, Mlbl, Code);
311	error -> Code
312    end.
313
314insert_fc_labels([L|Ls], Mlbl, Acc0) ->
315    Acc = [{label,L}|Acc0],
316    case dict:find(L, Mlbl) of
317	error ->
318	    insert_fc_labels(Ls, Mlbl, Acc);
319	{ok,Lbls} ->
320	    insert_fc_labels(Lbls++Ls, Mlbl, Acc)
321    end;
322insert_fc_labels([], _, Acc) -> Acc.
323
324%% invert_test(Test0) -> not_possible | Test
325
326invert_test(is_ge) ->       is_lt;
327invert_test(is_lt) ->       is_ge;
328invert_test(is_eq) ->       is_ne;
329invert_test(is_ne) ->       is_eq;
330invert_test(is_eq_exact) -> is_ne_exact;
331invert_test(is_ne_exact) -> is_eq_exact;
332invert_test(_) ->           not_possible.
333
334insert_labels([L|Ls], Is, [{jump,{f,L}}|Acc], St) ->
335    insert_labels(Ls, [{label,L}|Is], Acc, St);
336insert_labels([L|Ls], Is, Acc, St) ->
337    insert_labels(Ls, [{label,L}|Is], Acc, St);
338insert_labels([], Is, Acc, St) ->
339    opt(Is, Acc, St).
340
341%% Skip unreachable code up to the next referenced label.
342
343skip_unreachable([{label,L}|Is], [{jump,{f,L}}|Acc], St) ->
344    opt([{label,L}|Is], Acc, St);
345skip_unreachable([{label,L}|Is], Acc, St) ->
346    case is_label_used(L, St) of
347	true  -> opt([{label,L}|Is], Acc, St);
348	false -> skip_unreachable(Is, Acc, St)
349    end;
350skip_unreachable([_|Is], Acc, St) ->
351    skip_unreachable(Is, Acc, St);
352skip_unreachable([], Acc, St) ->
353    opt([], Acc, St).
354
355%% Add one or more label to the set of used labels.
356
357label_used({f,0}, St) -> St;
358label_used({f,L}, St) -> St#st{labels=gb_sets:add(L, St#st.labels)};
359label_used([H|T], St0) -> label_used(T, label_used(H, St0));
360label_used([], St) -> St;
361label_used(_Other, St) -> St.
362
363%% Test if label is used.
364
365is_label_used(L, St) ->
366    gb_sets:is_member(L, St#st.labels).
367
368%% is_unreachable_after(Instruction) -> true|false
369%%  Test whether the code after Instruction is unreachable.
370
371is_unreachable_after({func_info,_M,_F,_A}) -> true;
372is_unreachable_after(return) -> true;
373is_unreachable_after({call_ext_last,_Ar,_ExtFunc,_D}) -> true;
374is_unreachable_after({call_ext_only,_Ar,_ExtFunc}) -> true;
375is_unreachable_after({call_last,_Ar,_Lbl,_D}) -> true;
376is_unreachable_after({call_only,_Ar,_Lbl}) -> true;
377is_unreachable_after({apply_last,_Ar,_N}) -> true;
378is_unreachable_after({jump,_Lbl}) -> true;
379is_unreachable_after({select_val,_R,_Lbl,_Cases}) -> true;
380is_unreachable_after({select_tuple_arity,_R,_Lbl,_Cases}) -> true;
381is_unreachable_after({loop_rec_end,_}) -> true;
382is_unreachable_after({wait,_}) -> true;
383is_unreachable_after(I) -> is_exit_instruction(I).
384
385%% is_exit_instruction(Instruction) -> true|false
386%%  Test whether the instruction Instruction always
387%%  causes an exit/failure.
388
389is_exit_instruction({call_ext,_,{extfunc,M,F,A}}) ->
390    is_exit_instruction_1(M, F, A);
391is_exit_instruction({call_ext_last,_,{extfunc,M,F,A},_}) ->
392    is_exit_instruction_1(M, F, A);
393is_exit_instruction({call_ext_only,_,{extfunc,M,F,A}}) ->
394    is_exit_instruction_1(M, F, A);
395is_exit_instruction(if_end) -> true;
396is_exit_instruction({case_end,_}) -> true;
397is_exit_instruction({try_case_end,_}) -> true;
398is_exit_instruction({badmatch,_}) -> true;
399is_exit_instruction(_) -> false.
400
401is_exit_instruction_1(erlang, exit, 1) -> true;
402is_exit_instruction_1(erlang, throw, 1) -> true;
403is_exit_instruction_1(erlang, error, 1) -> true;
404is_exit_instruction_1(erlang, error, 2) -> true;
405is_exit_instruction_1(erlang, fault, 1) -> true;
406is_exit_instruction_1(erlang, fault, 2) -> true;
407is_exit_instruction_1(_, _, _) -> false.
408
409%% remove_unused_labels(Instructions0) -> Instructions
410%%  Remove all unused labels.
411
412remove_unused_labels(Is) ->
413    Used0 = initial_labels(Is),
414    Used = foldl(fun ulbl/2, Used0, Is),
415    rem_unused(Is, Used, []).
416
417rem_unused([{label,Lbl}=I|Is], Used, Acc) ->
418    case gb_sets:is_member(Lbl, Used) of
419	false -> rem_unused(Is, Used, Acc);
420	true -> rem_unused(Is, Used, [I|Acc])
421    end;
422rem_unused([I|Is], Used, Acc) ->
423    rem_unused(Is, Used, [I|Acc]);
424rem_unused([], _, Acc) -> reverse(Acc).
425
426initial_labels(Is) ->
427    initial_labels(Is, []).
428
429initial_labels([{label,Lbl}|Is], Acc) ->
430    initial_labels(Is, [Lbl|Acc]);
431initial_labels([{func_info,_,_,_},{label,Lbl}|_], Acc) ->
432    gb_sets:from_list([Lbl|Acc]).
433
434ulbl({test,_,Fail,_}, Used) ->
435    mark_used(Fail, Used);
436ulbl({select_val,_,Fail,{list,Vls}}, Used) ->
437    mark_used_list(Vls, mark_used(Fail, Used));
438ulbl({select_tuple_arity,_,Fail,{list,Vls}}, Used) ->
439    mark_used_list(Vls, mark_used(Fail, Used));
440ulbl({'try',_,Lbl}, Used) ->
441    mark_used(Lbl, Used);
442ulbl({'catch',_,Lbl}, Used) ->
443    mark_used(Lbl, Used);
444ulbl({jump,Lbl}, Used) ->
445    mark_used(Lbl, Used);
446ulbl({loop_rec,Lbl,_}, Used) ->
447    mark_used(Lbl, Used);
448ulbl({loop_rec_end,Lbl}, Used) ->
449    mark_used(Lbl, Used);
450ulbl({wait,Lbl}, Used) ->
451    mark_used(Lbl, Used);
452ulbl({wait_timeout,Lbl,_To}, Used) ->
453    mark_used(Lbl, Used);
454ulbl({bif,_Name,Lbl,_As,_R}, Used) ->
455    mark_used(Lbl, Used);
456ulbl({bs_init2,Lbl,_,_,_,_,_}, Used) ->
457    mark_used(Lbl, Used);
458ulbl({bs_put_integer,Lbl,_Bits,_Unit,_Fl,_Val}, Used) ->
459    mark_used(Lbl, Used);
460ulbl({bs_put_float,Lbl,_Bits,_Unit,_Fl,_Val}, Used) ->
461    mark_used(Lbl, Used);
462ulbl({bs_put_binary,Lbl,_Bits,_Unit,_Fl,_Val}, Used) ->
463    mark_used(Lbl, Used);
464ulbl({bs_final,Lbl,_}, Used) ->
465    mark_used(Lbl, Used);
466ulbl({bs_add,Lbl,_,_}, Used) ->
467    mark_used(Lbl, Used);
468ulbl({bs_bits_to_bytes,Lbl,_,_}, Used) ->
469    mark_used(Lbl, Used);
470ulbl(_, Used) -> Used.
471
472mark_used({f,0}, Used) -> Used;
473mark_used({f,L}, Used) -> gb_sets:add(L, Used);
474mark_used(_, Used) -> Used.
475
476mark_used_list([H|T], Used) ->
477    mark_used_list(T, mark_used(H, Used));
478mark_used_list([], Used) -> Used.
479