1%% 2%% %CopyrightBegin% 3%% 4%% Copyright Ericsson AB 1999-2020. All Rights Reserved. 5%% 6%% Licensed under the Apache License, Version 2.0 (the "License"); 7%% you may not use this file except in compliance with the License. 8%% You may obtain a copy of the License at 9%% 10%% http://www.apache.org/licenses/LICENSE-2.0 11%% 12%% Unless required by applicable law or agreed to in writing, software 13%% distributed under the License is distributed on an "AS IS" BASIS, 14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15%% See the License for the specific language governing permissions and 16%% limitations under the License. 17%% 18%% %CopyrightEnd% 19%% 20%%% Purpose : Optimise jumps and remove unreachable code. 21 22-module(beam_jump). 23 24-export([module/2, 25 remove_unused_labels/1]). 26 27%%% The following optimisations are done: 28%%% 29%%% (1) This code with two identical instruction sequences 30%%% 31%%% L1: <Instruction sequence> 32%%% L2: 33%%% . . . 34%%% L3: <Instruction sequence> 35%%% L4: 36%%% 37%%% can be replaced with 38%%% 39%%% L1: jump L3 40%%% L2: 41%%% . . . 42%%% L3: <Instruction sequence> 43%%% L4 44%%% 45%%% Note: The instruction sequence must end with an instruction 46%%% such as a jump that never transfers control to the instruction 47%%% following it. 48%%% 49%%% (2) Short sequences starting with a label and ending in case_end, if_end, 50%%% and badmatch, and function calls that cause an exit (such as calls 51%%% to exit/1) are moved to the end of the function, but only if the 52%%% the block is not entered via a fallthrough. The purpose of this move 53%%% is to allow further optimizations at the place from which the 54%%% code was moved (a jump around the block could be replaced with a 55%%% fallthrough). 56%%% 57%%% (3) Any unreachable code is removed. Unreachable code is code 58%%% after jump, call_last and other instructions which never 59%%% transfer control to the following instruction. Code is 60%%% unreachable up to the next *referenced* label. Note that the 61%%% optimisations below might generate more possibilities for 62%%% removing unreachable code. 63%%% 64%%% (4) This code: 65%%% L1: jump L2 66%%% . . . 67%%% L2: ... 68%%% 69%%% will be changed to 70%%% 71%%% jump L2 72%%% . . . 73%%% L2: ... 74%%% 75%%% and all preceding uses of L1 renamed to L2. 76%%% If the jump is unreachable, it will be removed according to (1). 77%%% 78%%% (5) In 79%%% 80%%% jump L1 81%%% L1: 82%%% 83%%% the jump (but not the label) will be removed. 84%%% 85%%% (6) If test instructions are used to skip a single jump instruction, 86%%% the test is inverted and the jump is eliminated (provided that 87%%% the test can be inverted). Example: 88%%% 89%%% is_eq L1 {x,1} {x,2} 90%%% jump L2 91%%% L1: 92%%% 93%%% will be changed to 94%%% 95%%% is_ne L2 {x,1} {x,2} 96%%% L1: 97%%% 98%%% Because there may be backward references to the label L1 99%%% (for instance from the wait_timeout/1 instruction), we will 100%%% always keep the label. (beam_clean will remove any unused 101%%% labels.) 102%%% 103%%% (7) Replace a jump to a return instruction with a return instruction. 104%%% Similarly, replace a jump to deallocate + return with those 105%%% instructions. 106%%% 107%%% Note: This modules depends on (almost) all branches and jumps only 108%%% going forward, so that we can remove instructions (including definition 109%%% of labels) after any label that has not been referenced by the code 110%%% preceeding the labels. Regarding the few instructions that have backward 111%%% references to labels, we assume that they only transfer control back 112%%% to an instruction that has already been executed. That is, code such as 113%%% 114%%% jump L_entry 115%%% 116%%% L_again: 117%%% . 118%%% . 119%%% . 120%%% L_entry: 121%%% . 122%%% . 123%%% . 124%%% jump L_again; 125%%% 126%%% is NOT allowed (and such code is never generated by the code generator). 127%%% 128%%% Terminology note: The optimisation done here is called unreachable-code 129%%% removal, NOT dead-code elimination. Dead code elimination means the 130%%% removal of instructions that are executed, but have no visible effect 131%%% on the program state. 132%%% 133 134-import(lists, [foldl/3,keymember/3,mapfoldl/3,reverse/1,reverse/2]). 135 136-type instruction() :: beam_utils:instruction(). 137 138-include("beam_types.hrl"). 139 140-spec module(beam_utils:module_code(), [compile:option()]) -> 141 {'ok',beam_utils:module_code()}. 142 143module({Mod,Exp,Attr,Fs0,Lc0}, _Opt) -> 144 {Fs,Lc} = mapfoldl(fun function/2, Lc0, Fs0), 145 {ok,{Mod,Exp,Attr,Fs,Lc}}. 146 147%% function(Function) -> Function' 148%% Optimize jumps and branches. 149%% 150%% NOTE: This function assumes that there are no labels inside blocks. 151function({function,Name,Arity,CLabel,Asm0}, Lc0) -> 152 try 153 Asm1 = eliminate_moves(Asm0), 154 {Asm2,Lc} = insert_labels(Asm1, Lc0, []), 155 Asm3 = share(Asm2), 156 Asm4 = move(Asm3), 157 Asm5 = opt(Asm4, CLabel), 158 Asm6 = unshare(Asm5), 159 Asm = remove_unused_labels(Asm6), 160 {{function,Name,Arity,CLabel,Asm},Lc} 161 catch 162 Class:Error:Stack -> 163 io:fwrite("Function: ~w/~w\n", [Name,Arity]), 164 erlang:raise(Class, Error, Stack) 165 end. 166 167%%% 168%%% Scan instructions in execution order and remove redundant 'move' 169%%% instructions. 'move' instructions are redundant if we know that 170%%% the register already contains the value being assigned, as in the 171%%% following code: 172%%% 173%%% select_val Register FailLabel [... Literal => L1...] 174%%% . 175%%% . 176%%% . 177%%% L1: move Literal Register 178%%% 179 180eliminate_moves(Is) -> 181 eliminate_moves(Is, #{}, []). 182 183eliminate_moves([{select,select_val,Reg,{f,Fail},List}=I|Is], D0, Acc) -> 184 D1 = add_unsafe_label(Fail, D0), 185 D = update_value_dict(List, Reg, D1), 186 eliminate_moves(Is, D, [I|Acc]); 187eliminate_moves([{test,is_eq_exact,_,[Reg,Val]}=I, 188 {block,BlkIs0}|Is], D0, Acc) -> 189 D = update_unsafe_labels(I, D0), 190 RegVal = {Reg,Val}, 191 BlkIs = eliminate_moves_blk(BlkIs0, RegVal), 192 eliminate_moves([{block,BlkIs}|Is], D, [I|Acc]); 193eliminate_moves([{test,is_nonempty_list,Fail,[Reg]}=I|Is], D0, Acc) -> 194 case is_proper_list(Reg, Acc) of 195 true -> 196 D = update_value_dict([nil,Fail], Reg, D0), 197 eliminate_moves(Is, D, [I|Acc]); 198 false -> 199 D = update_unsafe_labels(I, D0), 200 eliminate_moves(Is, D, [I|Acc]) 201 end; 202eliminate_moves([{label,Lbl},{block,BlkIs0}=Blk|Is], D, Acc0) -> 203 Acc = [{label,Lbl}|Acc0], 204 case {no_fallthrough(Acc0),D} of 205 {true,#{Lbl:={_,_}=RegVal}} -> 206 BlkIs = eliminate_moves_blk(BlkIs0, RegVal), 207 eliminate_moves([{block,BlkIs}|Is], D, Acc); 208 {_,_} -> 209 eliminate_moves([Blk|Is], D, Acc) 210 end; 211eliminate_moves([{call,_,_}=I|Is], D, Acc) -> 212 eliminate_moves_call(Is, D, [I | Acc]); 213eliminate_moves([{call_ext,_,_}=I|Is], D, Acc) -> 214 eliminate_moves_call(Is, D, [I | Acc]); 215eliminate_moves([{block,[]}|Is], D, Acc) -> 216 %% Empty blocks can prevent further jump optimizations. 217 eliminate_moves(Is, D, Acc); 218eliminate_moves([I|Is], D0, Acc) -> 219 D = update_unsafe_labels(I, D0), 220 eliminate_moves(Is, D, [I|Acc]); 221eliminate_moves([], _, Acc) -> reverse(Acc). 222 223eliminate_moves_call([{'%',{var_info,{x,0},Info}}=Anno, 224 {block,BlkIs0}=Blk | Is], D, Acc0) -> 225 Acc = [Anno | Acc0], 226 RetType = proplists:get_value(type, Info, none), 227 case beam_types:get_singleton_value(RetType) of 228 {ok, Value} -> 229 RegVal = {{x,0}, value_to_literal(Value)}, 230 BlkIs = eliminate_moves_blk(BlkIs0, RegVal), 231 eliminate_moves([{block,BlkIs}|Is], D, Acc); 232 error -> 233 eliminate_moves(Is, D, [Blk | Acc]) 234 end; 235eliminate_moves_call(Is, D, Acc) -> 236 eliminate_moves(Is, D, Acc). 237 238eliminate_moves_blk([{set,[Dst],[_],move}|_]=Is, {_,Dst}) -> 239 Is; 240eliminate_moves_blk([{set,[Dst],[Lit],move}|Is], {Dst,Lit}) -> 241 %% Remove redundant 'move' instruction. 242 Is; 243eliminate_moves_blk([{set,[Dst],[_],move}|_]=Is, {Dst,_}) -> 244 Is; 245eliminate_moves_blk([{set,[_],[_],move}=I|Is], {_,_}=RegVal) -> 246 [I|eliminate_moves_blk(Is, RegVal)]; 247eliminate_moves_blk(Is, _) -> Is. 248 249no_fallthrough([{'%',_} | Is]) -> 250 no_fallthrough(Is); 251no_fallthrough([I|_]) -> 252 is_unreachable_after(I). 253 254is_proper_list(Reg, [{'%',{var_info,Reg,Info}}|_]) -> 255 case proplists:get_value(type, Info) of 256 #t_list{terminator=nil} -> 257 true; 258 _ -> 259 %% Unknown type or not a proper list. 260 false 261 end; 262is_proper_list(Reg, [{'%',{var_info,_,_}}|Is]) -> 263 is_proper_list(Reg, Is); 264is_proper_list(_, _) -> false. 265 266value_to_literal([]) -> nil; 267value_to_literal(A) when is_atom(A) -> {atom,A}; 268value_to_literal(F) when is_float(F) -> {float,F}; 269value_to_literal(I) when is_integer(I) -> {integer,I}; 270value_to_literal(Other) -> {literal,Other}. 271 272update_value_dict([Lit,{f,Lbl}|T], Reg, D0) -> 273 D = case D0 of 274 #{Lbl:=unsafe} -> D0; 275 #{Lbl:={Reg,Lit}} -> D0; 276 #{Lbl:=_} -> D0#{Lbl:=unsafe}; 277 #{} -> D0#{Lbl=>{Reg,Lit}} 278 end, 279 update_value_dict(T, Reg, D); 280update_value_dict([], _, D) -> D. 281 282add_unsafe_label(L, D) -> 283 D#{L=>unsafe}. 284 285update_unsafe_labels(I, D) -> 286 Ls = instr_labels(I), 287 update_unsafe_labels_1(Ls, D). 288 289update_unsafe_labels_1([L|Ls], D) -> 290 update_unsafe_labels_1(Ls, D#{L=>unsafe}); 291update_unsafe_labels_1([], D) -> D. 292 293%%% 294%%% It seems to be useful to insert extra labels after certain 295%%% test instructions. This used to be done by beam_dead. 296%%% 297 298insert_labels([{test,Op,_,_}=I|Is], Lc, Acc) -> 299 Useful = case Op of 300 is_lt -> true; 301 is_ge -> true; 302 is_eq_exact -> true; 303 is_ne_exact -> true; 304 _ -> false 305 end, 306 case Useful of 307 false -> insert_labels(Is, Lc, [I|Acc]); 308 true -> insert_labels(Is, Lc+1, [{label,Lc},I|Acc]) 309 end; 310insert_labels([I|Is], Lc, Acc) -> 311 insert_labels(Is, Lc, [I|Acc]); 312insert_labels([], Lc, Acc) -> 313 {reverse(Acc),Lc}. 314 315%%% 316%%% (1) We try to share the code for identical code segments by replacing all 317%%% occurrences except the last with jumps to the last occurrence. 318%%% 319%%% We must not share code that raises an exception from outside a 320%%% try/catch block with code inside a try/catch block and vice versa, 321%%% because beam_validator will probably flag it as unsafe 322%%% (ambiguous_catch_try_state). The same goes for a plain catch. 323%%% 324 325share(Is0) -> 326 Is1 = eliminate_fallthroughs(Is0, []), 327 Is2 = find_fixpoint(fun(Is) -> 328 share_1(Is) 329 end, Is1), 330 reverse(Is2). 331 332share_1(Is) -> 333 Safe = classify_labels(Is), 334 share_1(Is, Safe, #{}, #{}, [], []). 335 336%% Note that we examine the instructions in reverse execution order. 337share_1([{label,L}=Lbl|Is], Safe, Dict0, Lbls0, [_|_]=Seq0, Acc) -> 338 Seq = maybe_add_scope(Seq0, L, Safe), 339 340 %% If there are try/catch or catch instructions in this function, 341 %% any line instructions in the sequence now include a scope. 342 case Dict0 of 343 #{Seq := Label} -> 344 %% This sequence of instructions has been seen previously. 345 %% The scope identifiers added to line instructions ensure 346 %% that two sequence will not be equal unless sharing is 347 %% also safe. 348 Lbls = Lbls0#{L => Label}, 349 share_1(Is, Safe, Dict0, Lbls, [], 350 [[Lbl,{jump,{f,Label}}]|Acc]); 351 #{} -> 352 %% This is first time we have seen this sequence of instructions. 353 %% Find out whether it is safe to share the sequence. 354 case (map_size(Safe) =:= 0 orelse 355 is_shareable(Seq)) andalso 356 unambigous_exit_call(Seq) 357 of 358 true -> 359 %% Either this function does not contain any try/catch 360 %% instructions, in which case it is always safe to share 361 %% exception-raising instructions such as if_end and 362 %% case_end, or it this sequence does not include 363 %% any problematic instructions. 364 Dict = Dict0#{Seq => L}, 365 share_1(Is, Safe, Dict, Lbls0, [], [[Lbl|Seq]|Acc]); 366 false -> 367 %% The sequence includes an inappropriate instruction 368 %% that may case beam_validator to complain about 369 %% an ambiguous try/catch state. 370 share_1(Is, Safe, Dict0, Lbls0, [], [[Lbl|Seq]|Acc]) 371 end 372 end; 373share_1([{func_info,_,_,_}|_]=Is0, _Safe, _, Lbls, [], Acc0) -> 374 %% Replace jumps to jumps with a jump to the final destination 375 %% (jump threading). This optimization is done in the main 376 %% optimization pass of this module, but we do it here too because 377 %% it can give more opportunities for sharing code. 378 F = case Lbls =:= #{} of 379 true -> 380 fun lists:reverse/2; 381 false -> 382 fun(Is, Acc) -> 383 beam_utils:replace_labels(Is, Acc, Lbls, 384 fun(Old) -> Old end) 385 end 386 end, 387 foldl(F, Is0, Acc0); 388share_1([{'catch',_,_}=I|Is], Safe, Dict, _Lbls0, Seq, Acc) -> 389 %% Disable the jump threading optimization because it may be unsafe. 390 share_1(Is, Safe, Dict, #{}, [I|Seq], Acc); 391share_1([{'try',_,_}=I|Is], Safe, Dict, _Lbls, Seq, Acc) -> 392 %% Disable the jump threading optimization because it may be unsafe. 393 share_1(Is, Safe, Dict, #{}, [I|Seq], Acc); 394share_1([{jump,{f,To}}=I,{label,From}=Lbl|Is], Safe, Dict0, Lbls0, _Seq, Acc) -> 395 Lbls = Lbls0#{From => To}, 396 share_1(Is, Safe, Dict0, Lbls, [], [[Lbl,I]|Acc]); 397share_1([I|Is], Safe, Dict, Lbls, Seq, Acc) -> 398 case is_unreachable_after(I) of 399 false -> 400 share_1(Is, Safe, Dict, Lbls, [I|Seq], Acc); 401 true -> 402 share_1(Is, Safe, Dict, Lbls, [I], Acc) 403 end. 404 405unambigous_exit_call([{call_ext,A,{extfunc,M,F,A}}|Is]) -> 406 case erl_bifs:is_exit_bif(M, F, A) of 407 true -> 408 %% beam_validator requires that the size of the stack 409 %% frame is unambigously known when a function is called. 410 %% 411 %% That means that we must be careful when sharing function 412 %% calls. 413 %% 414 %% In practice, it seems that only exit BIFs can 415 %% potentially be shared in an unsafe way, and only in 416 %% rare circumstances. (See the undecided_allocation_1/1 417 %% function in beam_jump_SUITE.) 418 %% 419 %% To ensure that the frame size is unambigous, only allow 420 %% sharing of a call to exit BIFs if the call is followed 421 %% by an instruction that indicates the size of the stack 422 %% frame (that is almost always the case in real-world 423 %% code). 424 case Is of 425 [{deallocate,_}|_] -> true; 426 [return] -> true; 427 _ -> false 428 end; 429 false -> 430 true 431 end; 432unambigous_exit_call([_|Is]) -> 433 unambigous_exit_call(Is); 434unambigous_exit_call([]) -> true. 435 436%% If the label has a scope set, assign it to any line instruction 437%% in the sequence. 438maybe_add_scope(Seq, L, Safe) -> 439 case Safe of 440 #{L := Scope} -> add_scope(Seq, Scope); 441 #{} -> Seq 442 end. 443 444add_scope([{line,Loc}=I|Is], Scope) -> 445 case keymember(scope, 1, Loc) of 446 false -> 447 [{line,[{scope,Scope}|Loc]}|add_scope(Is, Scope)]; 448 true -> 449 [I|add_scope(Is, Scope)] 450 end; 451add_scope([I|Is], Scope) -> 452 [I|add_scope(Is, Scope)]; 453add_scope([], _Scope) -> []. 454 455is_shareable([build_stacktrace|_]) -> false; 456is_shareable([{case_end,_}|_]) -> false; 457is_shareable([{'catch',_,_}|_]) -> false; 458is_shareable([{catch_end,_}|_]) -> false; 459is_shareable([if_end|_]) -> false; 460is_shareable([{'try',_,_}|_]) -> false; 461is_shareable([{try_case,_}|_]) -> false; 462is_shareable([{try_end,_}|_]) -> false; 463is_shareable([_|Is]) -> is_shareable(Is); 464is_shareable([]) -> true. 465 466%% 467%% Classify labels according to where the instructions that branch to 468%% the labels are located. Each label is assigned a set of scope 469%% identifers (but usually just one). If two labels have different 470%% scope identfiers, sharing a sequence that raises an exception 471%% between the labels may not be safe, because one label is inside a 472%% try/catch, and the other label is outside. Note that we don't care 473%% where the labels themselves are located, only from where the 474%% branches to them are located. 475%% 476%% If there is more than one scope in the function (that is, if there 477%% try/catch or catch in the function), the scope identifiers will be 478%% added to the line instructions. Recording the scope in the line 479%% instructions makes beam_jump idempotent, ensuring that beam_jump 480%% will not do any unsafe optimizations when when compiling from a .S 481%% file. 482%% 483 484classify_labels(Is) -> 485 classify_labels(Is, 0, #{}). 486 487classify_labels([{'catch',_,_}|Is], Scope, Safe) -> 488 classify_labels(Is, Scope+1, Safe); 489classify_labels([{catch_end,_}|Is], Scope, Safe) -> 490 classify_labels(Is, Scope+1, Safe); 491classify_labels([{'try',_,_}|Is], Scope, Safe) -> 492 classify_labels(Is, Scope+1, Safe); 493classify_labels([{'try_end',_}|Is], Scope, Safe) -> 494 classify_labels(Is, Scope+1, Safe); 495classify_labels([{'try_case',_}|Is], Scope, Safe) -> 496 classify_labels(Is, Scope+1, Safe); 497classify_labels([{'try_case_end',_}|Is], Scope, Safe) -> 498 classify_labels(Is, Scope+1, Safe); 499classify_labels([I|Is], Scope, Safe0) -> 500 Labels = instr_labels(I), 501 Safe = foldl(fun(L, A) -> 502 case A of 503 #{L := [Scope]} -> A; 504 #{L := Other} -> A#{L => ordsets:add_element(Scope, Other)}; 505 #{} -> A#{L => [Scope]} 506 end 507 end, Safe0, Labels), 508 classify_labels(Is, Scope, Safe); 509classify_labels([], Scope, Safe) -> 510 case Scope of 511 0 -> 512 %% No try/catch or catch in this function. We don't 513 %% need the collected information. 514 #{}; 515 _ -> 516 Safe 517 end. 518 519%% Eliminate all fallthroughs. Return the result reversed. 520 521eliminate_fallthroughs([{label,L}=Lbl|Is], [I|_]=Acc) -> 522 case is_unreachable_after(I) of 523 false -> 524 %% Eliminate fallthrough. 525 eliminate_fallthroughs(Is, [Lbl,{jump,{f,L}}|Acc]); 526 true -> 527 eliminate_fallthroughs(Is, [Lbl|Acc]) 528 end; 529eliminate_fallthroughs([I|Is], Acc) -> 530 eliminate_fallthroughs(Is, [I|Acc]); 531eliminate_fallthroughs([], Acc) -> Acc. 532 533%%% 534%%% (2) Move short code sequences ending in an instruction that causes an exit 535%%% to the end of the function. 536%%% 537%%% Implementation note: Since share/1 eliminated fallthroughs to labels, 538%%% we don't have to test whether instructions before labels may fail through. 539%%% 540move(Is) -> 541 move_1(Is, [], []). 542 543move_1([I|Is], Ends, Acc0) -> 544 case is_exit_instruction(I) of 545 false -> 546 move_1(Is, Ends, [I|Acc0]); 547 true -> 548 case extract_seq(Acc0, [I]) of 549 no -> 550 move_1(Is, Ends, [I|Acc0]); 551 {yes,End,Acc} -> 552 move_1(Is, [End|Ends], Acc) 553 end 554 end; 555move_1([], Ends, Acc) -> reverse(Acc, lists:append(reverse(Ends))). 556 557extract_seq([{line,_}=Line|Is], Acc) -> 558 extract_seq(Is, [Line|Acc]); 559extract_seq([{block,_}=Bl|Is], Acc) -> 560 extract_seq_1(Is, [Bl|Acc]); 561extract_seq([{label,_}|_]=Is, Acc) -> 562 extract_seq_1(Is, Acc); 563extract_seq(_, _) -> no. 564 565extract_seq_1([{line,_}=Line|Is], Acc) -> 566 extract_seq_1(Is, [Line|Acc]); 567extract_seq_1([{label,_},{func_info,_,_,_}|_], _) -> 568 no; 569extract_seq_1([{label,Lbl},{jump,{f,Lbl}}|_], _) -> 570 %% Don't move a sequence which have a fallthrough entering it. 571 no; 572extract_seq_1([{label,_}=Lbl|Is], Acc) -> 573 {yes,[Lbl|Acc],Is}; 574extract_seq_1(_, _) -> no. 575 576%%% 577%%% (3) (4) (5) (6) Jump and unreachable code optimizations. 578%%% 579 580-record(st, 581 { 582 entry :: beam_asm:label(), %Entry label (must not be moved). 583 replace :: #{beam_asm:label() := beam_asm:label()}, %Labels to replace. 584 labels :: sets:set() %Set of referenced labels. 585 }). 586 587opt(Is0, CLabel) -> 588 find_fixpoint(fun(Is) -> 589 Lbls = initial_labels(Is), 590 St = #st{entry=CLabel,replace=#{},labels=Lbls}, 591 opt(Is, [], St) 592 end, Is0). 593 594find_fixpoint(OptFun, Is0) -> 595 case OptFun(Is0) of 596 Is0 -> Is0; 597 Is -> find_fixpoint(OptFun, Is) 598 end. 599 600opt([{test,_,{f,L}=Lbl,_}=I|[{jump,{f,L}}|_]=Is], Acc, St) -> 601 %% We have 602 %% Test Label Ops 603 %% jump Label 604 %% The test instruction is not needed if the test is pure 605 %% (it modifies neither registers nor bit syntax state). 606 case beam_utils:is_pure_test(I) of 607 false -> 608 %% Test is not pure; we must keep it. 609 opt(Is, [I|Acc], label_used(Lbl, St)); 610 true -> 611 %% The test is pure and its failure label is the same 612 %% as in the jump that follows -- thus it is not needed. 613 opt(Is, Acc, St) 614 end; 615opt([{test,Test0,{f,L}=Lbl,Ops}=I|[{jump,To}|Is]=Is0], Acc, St) -> 616 case is_label_defined(Is, L) of 617 false -> 618 opt(Is0, [I|Acc], label_used(Lbl, St)); 619 true -> 620 case invert_test(Test0) of 621 not_possible -> 622 opt(Is0, [I|Acc], label_used(Lbl, St)); 623 Test -> 624 %% Invert the test and remove the jump. 625 opt([{test,Test,To,Ops}|Is], Acc, St) 626 end 627 end; 628opt([{test,_,{f,_}=Lbl,_}=I|Is], Acc, St) -> 629 opt(Is, [I|Acc], label_used(Lbl, St)); 630opt([{test,_,{f,_}=Lbl,_,_,_}=I|Is], Acc, St) -> 631 opt(Is, [I|Acc], label_used(Lbl, St)); 632opt([{select,_,_R,Fail,Vls}=I|Is], Acc, St) -> 633 skip_unreachable(Is, [I|Acc], label_used([Fail|Vls], St)); 634opt([{label,From}=I,{label,To}|Is], Acc, #st{replace=Replace}=St) -> 635 opt([I|Is], Acc, St#st{replace=Replace#{To => From}}); 636opt([{jump,{f,_}=X}|[{label,_},{jump,X}|_]=Is], Acc, St) -> 637 opt(Is, Acc, St); 638opt([{jump,{f,Lbl}}|[{label,Lbl}|_]=Is], Acc, St) -> 639 opt(Is, Acc, St); 640opt([{jump,{f,L}=Lbl}=I|Is], Acc0, St0) -> 641 %% Replace all labels before this jump instruction into the 642 %% location of the jump's target. 643 {Acc,St} = collect_labels(Acc0, L, St0), 644 skip_unreachable(Is, [I|Acc], label_used(Lbl, St)); 645%% Optimization: quickly handle some common instructions that don't 646%% have any failure labels and where is_unreachable_after(I) =:= false. 647opt([{block,_}=I|Is], Acc, St) -> 648 opt(Is, [I|Acc], St); 649opt([{call,_,_}=I|Is], Acc, St) -> 650 opt(Is, [I|Acc], St); 651opt([{deallocate,_}=I|Is], Acc, St) -> 652 opt(Is, [I|Acc], St); 653%% All other instructions. 654opt([I|Is], Acc, #st{labels=Used0}=St0) -> 655 Used = ulbl(I, Used0), 656 St = St0#st{labels=Used}, 657 case is_unreachable_after(I) of 658 true -> skip_unreachable(Is, [I|Acc], St); 659 false -> opt(Is, [I|Acc], St) 660 end; 661opt([], Acc, #st{replace=Replace0}) when Replace0 =/= #{} -> 662 Replace = normalize_replace(maps:to_list(Replace0), Replace0, []), 663 beam_utils:replace_labels(Acc, [], Replace, fun(Old) -> Old end); 664opt([], Acc, #st{replace=Replace}) when Replace =:= #{} -> 665 reverse(Acc). 666 667normalize_replace([{From,To0}|Rest], Replace, Acc) -> 668 case Replace of 669 #{To0 := To} -> 670 normalize_replace([{From,To}|Rest], Replace, Acc); 671 _ -> 672 normalize_replace(Rest, Replace, [{From,To0}|Acc]) 673 end; 674normalize_replace([], _Replace, Acc) -> 675 maps:from_list(Acc). 676 677collect_labels(Is, Label, #st{entry=Entry,replace=Replace} = St) -> 678 collect_labels_1(Is, Label, Entry, Replace, St). 679 680collect_labels_1([{label,Entry}|_]=Is, _Label, Entry, Acc, St) -> 681 %% Never move the entry label. 682 {Is,St#st{replace=Acc}}; 683collect_labels_1([{label,L}|Is], Label, Entry, Acc, St) -> 684 collect_labels_1(Is, Label, Entry, Acc#{L => Label}, St); 685collect_labels_1(Is, _Label, _Entry, Acc, St) -> 686 {Is,St#st{replace=Acc}}. 687 688%% label_defined(Is, Label) -> true | false. 689%% Test whether the label Label is defined at the start of the instruction 690%% sequence, possibly preceeded by other label definitions. 691%% 692is_label_defined([{label,L}|_], L) -> true; 693is_label_defined([{label,_}|Is], L) -> is_label_defined(Is, L); 694is_label_defined(_, _) -> false. 695 696%% invert_test(Test0) -> not_possible | Test 697 698invert_test(is_ge) -> is_lt; 699invert_test(is_lt) -> is_ge; 700invert_test(is_eq) -> is_ne; 701invert_test(is_ne) -> is_eq; 702invert_test(is_eq_exact) -> is_ne_exact; 703invert_test(is_ne_exact) -> is_eq_exact; 704invert_test(_) -> not_possible. 705 706%% skip_unreachable([Instruction], St). 707%% Remove all instructions (including definitions of labels 708%% that have not been referenced yet) up to the next 709%% referenced label, then call opt/3 to optimize the rest 710%% of the instruction sequence. 711%% 712skip_unreachable([{label,L}|_Is]=Is0, [{jump,{f,L}}|Acc], St) -> 713 opt(Is0, Acc, St); 714skip_unreachable([{label,L}|Is]=Is0, Acc, St) -> 715 case is_label_used(L, St) of 716 true -> opt(Is0, Acc, St); 717 false -> skip_unreachable(Is, Acc, St) 718 end; 719skip_unreachable([_|Is], Acc, St) -> 720 skip_unreachable(Is, Acc, St); 721skip_unreachable([], Acc, St) -> 722 opt([], Acc, St). 723 724%% Add one or more label to the set of used labels. 725 726label_used({f,L}, St) -> St#st{labels=sets:add_element(L,St#st.labels)}; 727label_used([H|T], St0) -> label_used(T, label_used(H, St0)); 728label_used([], St) -> St; 729label_used(_Other, St) -> St. 730 731%% Test if label is used. 732 733is_label_used(L, St) -> 734 sets:is_element(L, St#st.labels). 735 736%% is_unreachable_after(Instruction) -> boolean() 737%% Test whether the code after Instruction is unreachable. 738 739-spec is_unreachable_after(instruction()) -> boolean(). 740 741is_unreachable_after({func_info,_M,_F,_A}) -> true; 742is_unreachable_after(return) -> true; 743is_unreachable_after({jump,_Lbl}) -> true; 744is_unreachable_after({select,_What,_R,_Lbl,_Cases}) -> true; 745is_unreachable_after({loop_rec_end,_}) -> true; 746is_unreachable_after({wait,_}) -> true; 747is_unreachable_after(I) -> is_exit_instruction(I). 748 749%% is_exit_instruction(Instruction) -> boolean() 750%% Test whether the instruction Instruction always 751%% causes an exit/failure. 752 753-spec is_exit_instruction(instruction()) -> boolean(). 754 755is_exit_instruction(if_end) -> true; 756is_exit_instruction({case_end,_}) -> true; 757is_exit_instruction({try_case_end,_}) -> true; 758is_exit_instruction({badmatch,_}) -> true; 759is_exit_instruction(_) -> false. 760 761%% remove_unused_labels(Instructions0) -> Instructions 762%% Remove all unused labels. Also remove unreachable 763%% instructions following labels that are removed. 764 765-spec remove_unused_labels([instruction()]) -> [instruction()]. 766 767remove_unused_labels(Is) -> 768 Used0 = initial_labels(Is), 769 Used = foldl(fun ulbl/2, Used0, Is), 770 rem_unused(Is, Used, []). 771 772rem_unused([{label,Lbl}=I|Is0], Used, [Prev|_]=Acc) -> 773 case sets:is_element(Lbl, Used) of 774 false -> 775 Is = case is_unreachable_after(Prev) of 776 true -> drop_upto_label(Is0); 777 false -> Is0 778 end, 779 rem_unused(Is, Used, Acc); 780 true -> 781 rem_unused(Is0, Used, [I|Acc]) 782 end; 783rem_unused([I|Is], Used, Acc) -> 784 rem_unused(Is, Used, [I|Acc]); 785rem_unused([], _, Acc) -> reverse(Acc). 786 787initial_labels(Is) -> 788 initial_labels(Is, []). 789 790initial_labels([{line,_}|Is], Acc) -> 791 initial_labels(Is, Acc); 792initial_labels([{label,Lbl}|Is], Acc) -> 793 initial_labels(Is, [Lbl|Acc]); 794initial_labels([{func_info,_,_,_},{label,Lbl}|_], Acc) -> 795 sets:from_list([Lbl|Acc], [{version, 2}]). 796 797drop_upto_label([{label,_}|_]=Is) -> Is; 798drop_upto_label([_|Is]) -> drop_upto_label(Is); 799drop_upto_label([]) -> []. 800 801%% unshare([Instruction]) -> [Instruction]. 802%% Replace a jump to a return sequence (a `return` instruction 803%% optionally preced by a `deallocate` instruction) with the return 804%% sequence. This always saves execution time and may also save code 805%% space (depending on the architecture). Eliminating `jump` 806%% instructions also gives beam_trim more opportunities to trim the 807%% stack. 808 809unshare(Is) -> 810 Short = unshare_collect_short(Is, #{}), 811 unshare_short(Is, Short). 812 813unshare_collect_short([{label,L},return|Is], Map) -> 814 unshare_collect_short(Is, Map#{L=>[return]}); 815unshare_collect_short([{label,L},{deallocate,_}=D,return|Is], Map) -> 816 %% `deallocate` and `return` are combined into one instruction by 817 %% the loader. 818 unshare_collect_short(Is, Map#{L=>[D,return]}); 819unshare_collect_short([_|Is], Map) -> 820 unshare_collect_short(Is, Map); 821unshare_collect_short([], Map) -> Map. 822 823unshare_short([{jump,{f,F}}=I|Is], Map) -> 824 case Map of 825 #{F:=Seq} -> 826 Seq ++ unshare_short(Is, Map); 827 #{} -> 828 [I|unshare_short(Is, Map)] 829 end; 830unshare_short([I|Is], Map) -> 831 [I|unshare_short(Is, Map)]; 832unshare_short([], _Map) -> []. 833 834%% ulbl(Instruction, UsedCerlSet) -> UsedCerlSet' 835%% Update the cerl_set UsedCerlSet with any function-local labels 836%% (i.e. not with labels in call instructions) referenced by 837%% the instruction Instruction. 838%% 839%% NOTE: This function does NOT look for labels inside blocks. 840 841ulbl(I, Used) -> 842 case instr_labels(I) of 843 [] -> 844 Used; 845 [Lbl] -> 846 sets:add_element(Lbl, Used); 847 [_|_]=L -> 848 ulbl_list(L, Used) 849 end. 850 851ulbl_list([L|Ls], Used) -> 852 ulbl_list(Ls, sets:add_element(L, Used)); 853ulbl_list([], Used) -> Used. 854 855-spec instr_labels(Instruction) -> Labels when 856 Instruction :: instruction(), 857 Labels :: [beam_asm:label()]. 858 859instr_labels({test,_,Fail,_}) -> 860 do_instr_labels(Fail); 861instr_labels({test,_,Fail,_,_,_}) -> 862 do_instr_labels(Fail); 863instr_labels({select,_,_,Fail,Vls}) -> 864 do_instr_labels_list(Vls, do_instr_labels(Fail)); 865instr_labels({'try',_,Lbl}) -> 866 do_instr_labels(Lbl); 867instr_labels({'catch',_,Lbl}) -> 868 do_instr_labels(Lbl); 869instr_labels({jump,Lbl}) -> 870 do_instr_labels(Lbl); 871instr_labels({loop_rec,Lbl,_}) -> 872 do_instr_labels(Lbl); 873instr_labels({loop_rec_end,Lbl}) -> 874 do_instr_labels(Lbl); 875instr_labels({wait,Lbl}) -> 876 do_instr_labels(Lbl); 877instr_labels({wait_timeout,Lbl,_To}) -> 878 do_instr_labels(Lbl); 879instr_labels({bif,_Name,Lbl,_As,_R}) -> 880 do_instr_labels(Lbl); 881instr_labels({gc_bif,_Name,Lbl,_Live,_As,_R}) -> 882 do_instr_labels(Lbl); 883instr_labels({bs_init,Lbl,_,_,_,_}) -> 884 do_instr_labels(Lbl); 885instr_labels({bs_put,Lbl,_,_}) -> 886 do_instr_labels(Lbl); 887instr_labels({put_map,Lbl,_Op,_Src,_Dst,_Live,_List}) -> 888 do_instr_labels(Lbl); 889instr_labels({get_map_elements,Lbl,_Src,_List}) -> 890 do_instr_labels(Lbl); 891instr_labels({bs_start_match4,Fail,_,_,_}) -> 892 case Fail of 893 {f,L} -> [L]; 894 {atom,_} -> [] 895 end; 896instr_labels(_) -> 897 []. 898 899do_instr_labels({f,0}) -> []; 900do_instr_labels({f,F}) -> [F]. 901 902do_instr_labels_list([{f,F}|T], Acc) -> 903 do_instr_labels_list(T, [F|Acc]); 904do_instr_labels_list([_|T], Acc) -> 905 do_instr_labels_list(T, Acc); 906do_instr_labels_list([], Acc) -> Acc. 907