1%% 2%% %CopyrightBegin% 3%% 4%% Copyright Ericsson AB 2018-2020. All Rights Reserved. 5%% 6%% Licensed under the Apache License, Version 2.0 (the "License"); 7%% you may not use this file except in compliance with the License. 8%% You may obtain a copy of the License at 9%% 10%% http://www.apache.org/licenses/LICENSE-2.0 11%% 12%% Unless required by applicable law or agreed to in writing, software 13%% distributed under the License is distributed on an "AS IS" BASIS, 14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15%% See the License for the specific language governing permissions and 16%% limitations under the License. 17%% 18%% %CopyrightEnd% 19%% 20 21%%% 22%%% This is a collection of various optimizations that don't need a separate 23%%% pass by themselves and/or are mutually beneficial to other passes. 24%%% 25%%% The optimizations are applied in "phases," each with a list of sub-passes 26%%% to run. These sub-passes are applied on all functions in a module before 27%%% moving on to the next phase, which lets us gather module-level information 28%%% in one phase and then apply it in the next without having to risk working 29%%% with incomplete information. 30%%% 31%%% Each sub-pass operates on a #opt_st{} record and a func_info_db(), where 32%%% the former is just a #b_function{} whose blocks can be represented either 33%%% in linear or map form, and the latter is a map with information about all 34%%% functions in the module (see beam_ssa_opt.hrl for more details). 35%%% 36 37-module(beam_ssa_opt). 38-export([module/2]). 39 40-include("beam_ssa_opt.hrl"). 41 42-import(lists, [all/2,append/1,duplicate/2,flatten/1,foldl/3, 43 keyfind/3,last/1,mapfoldl/3,member/2, 44 partition/2,reverse/1,reverse/2, 45 splitwith/2,sort/1,takewhile/2,unzip/1]). 46 47-define(MAX_REPETITIONS, 16). 48 49-spec module(beam_ssa:b_module(), [compile:option()]) -> 50 {'ok',beam_ssa:b_module()}. 51 52module(Module, Opts) -> 53 FuncDb = case proplists:get_value(no_module_opt, Opts, false) of 54 false -> build_func_db(Module); 55 true -> #{} 56 end, 57 58 %% Passes that perform module-level optimizations are often aided by 59 %% optimizing callers before callees and vice versa, so we optimize all 60 %% functions in call order, alternating the order every time. 61 StMap0 = build_st_map(Module), 62 Order = get_call_order_po(StMap0, FuncDb), 63 64 Phases = [{once, Order, prologue_passes(Opts)}, 65 {module, module_passes(Opts)}, 66 {fixpoint, Order, repeated_passes(Opts)}, 67 {once, Order, epilogue_passes(Opts)}], 68 69 StMap = run_phases(Phases, StMap0, FuncDb), 70 {ok, finish(Module, StMap)}. 71 72run_phases([{module, Passes} | Phases], StMap0, FuncDb0) -> 73 {StMap, FuncDb} = compile:run_sub_passes(Passes, {StMap0, FuncDb0}), 74 run_phases(Phases, StMap, FuncDb); 75run_phases([{once, FuncIds0, Passes} | Phases], StMap0, FuncDb0) -> 76 FuncIds = skip_removed(FuncIds0, StMap0), 77 {StMap, FuncDb} = phase(FuncIds, Passes, StMap0, FuncDb0), 78 run_phases(Phases, StMap, FuncDb); 79run_phases([{fixpoint, FuncIds0, Passes} | Phases], StMap0, FuncDb0) -> 80 FuncIds = skip_removed(FuncIds0, StMap0), 81 RevFuncIds = reverse(FuncIds), 82 Order = {FuncIds, RevFuncIds}, 83 {StMap, FuncDb} = fixpoint(RevFuncIds, Order, Passes, 84 StMap0, FuncDb0, ?MAX_REPETITIONS), 85 run_phases(Phases, StMap, FuncDb); 86run_phases([], StMap, _FuncDb) -> 87 StMap. 88 89skip_removed(FuncIds, StMap) -> 90 [F || F <- FuncIds, is_map_key(F, StMap)]. 91 92%% Run the given passes until a fixpoint is reached. 93fixpoint(_FuncIds, _Order, _Passes, StMap, FuncDb, 0) -> 94 %% Too many repetitions. Give up and return what we have. 95 {StMap, FuncDb}; 96fixpoint(FuncIds0, Order0, Passes, StMap0, FuncDb0, N) -> 97 {StMap, FuncDb} = phase(FuncIds0, Passes, StMap0, FuncDb0), 98 Repeat = changed(FuncIds0, FuncDb0, FuncDb, StMap0, StMap), 99 case sets:is_empty(Repeat) of 100 true -> 101 %% No change. Fixpoint reached. 102 {StMap, FuncDb}; 103 false -> 104 %% Repeat the optimizations for functions whose code has 105 %% changed or for which there is potentially updated type 106 %% information. 107 {OrderA, OrderB} = Order0, 108 Order = {OrderB, OrderA}, 109 FuncIds = [Id || Id <- OrderA, sets:is_element(Id, Repeat)], 110 fixpoint(FuncIds, Order, Passes, StMap, FuncDb, N - 1) 111 end. 112 113phase([FuncId | Ids], Ps, StMap, FuncDb0) -> 114 try compile:run_sub_passes(Ps, {map_get(FuncId, StMap), FuncDb0}) of 115 {St, FuncDb} -> 116 phase(Ids, Ps, StMap#{ FuncId => St }, FuncDb) 117 catch 118 Class:Error:Stack -> 119 #b_local{name=#b_literal{val=Name},arity=Arity} = FuncId, 120 io:fwrite("Function: ~w/~w\n", [Name,Arity]), 121 erlang:raise(Class, Error, Stack) 122 end; 123phase([], _Ps, StMap, FuncDb) -> 124 {StMap, FuncDb}. 125 126changed(PrevIds, FuncDb0, FuncDb, StMap0, StMap) -> 127 %% Find all functions in FuncDb that can be reached by changes 128 %% of argument and/or return types. Those are the functions that 129 %% may gain from running the optimization passes again. 130 %% 131 %% Note that we examine all functions in FuncDb, not only functions 132 %% optimized in the previous run, because the argument types can 133 %% have been updated for functions not included in the previous run. 134 135 F = fun(Id, A) -> 136 case sets:is_element(Id, A) of 137 true -> 138 A; 139 false -> 140 {#func_info{arg_types=ATs0,succ_types=ST0}, 141 #func_info{arg_types=ATs1,succ_types=ST1}} = 142 {map_get(Id, FuncDb0),map_get(Id, FuncDb)}, 143 144 %% If the argument types have changed for this 145 %% function, re-optimize this function and all 146 %% functions it calls directly or indirectly. 147 %% 148 %% If the return type has changed, re-optimize 149 %% this function and all functions that call 150 %% this function directly or indirectly. 151 Opts = case ATs0 =:= ATs1 of 152 true -> []; 153 false -> [called] 154 end ++ 155 case ST0 =:= ST1 of 156 true -> []; 157 false -> [callers] 158 end, 159 case Opts of 160 [] -> A; 161 [_|_] -> add_changed([Id], Opts, FuncDb, A) 162 end 163 end 164 end, 165 Ids = foldl(F, sets:new([{version, 2}]), maps:keys(FuncDb)), 166 167 %% From all functions that were optimized in the previous run, 168 %% find the functions that had any change in the SSA code. Those 169 %% functions might gain from being optimized again. (For example, 170 %% when beam_ssa_dead has shortcut branches, the types for some 171 %% variables could become narrower, giving beam_ssa_type new 172 %% opportunities for optimization.) 173 %% 174 %% Note that the functions examined could be functions with module-level 175 %% optimization turned off (and thus not included in FuncDb). 176 177 foldl(fun(Id, A) -> 178 case sets:is_element(Id, A) of 179 true -> 180 %% Already scheduled for another optimization. 181 %% No need to compare the SSA code. 182 A; 183 false -> 184 %% Compare the SSA code before and after optimization. 185 case {map_get(Id, StMap0),map_get(Id, StMap)} of 186 {Same,Same} -> A; 187 {_,_} -> sets:add_element(Id, A) 188 end 189 end 190 end, Ids, PrevIds). 191 192add_changed([Id|Ids], Opts, FuncDb, S0) when is_map_key(Id, FuncDb) -> 193 case sets:is_element(Id, S0) of 194 true -> 195 add_changed(Ids, Opts, FuncDb, S0); 196 false -> 197 S1 = sets:add_element(Id, S0), 198 #func_info{in=In,out=Out} = map_get(Id, FuncDb), 199 S2 = case member(callers, Opts) of 200 true -> add_changed(In, Opts, FuncDb, S1); 201 false -> S1 202 end, 203 S = case member(called, Opts) of 204 true -> add_changed(Out, Opts, FuncDb, S2); 205 false -> S2 206 end, 207 add_changed(Ids, Opts, FuncDb, S) 208 end; 209add_changed([_|Ids], Opts, FuncDb, S) -> 210 %% This function is exempt from module-level optimization and will not 211 %% provide any more information. 212 add_changed(Ids, Opts, FuncDb, S); 213add_changed([], _, _, S) -> S. 214 215%% 216 217get_func_id(F) -> 218 {_Mod, Name, Arity} = beam_ssa:get_anno(func_info, F), 219 #b_local{name=#b_literal{val=Name}, arity=Arity}. 220 221-spec build_st_map(#b_module{}) -> st_map(). 222build_st_map(#b_module{body=Fs}) -> 223 build_st_map_1(Fs, #{}). 224 225build_st_map_1([F | Fs], Map) -> 226 #b_function{anno=Anno,args=Args,cnt=Counter,bs=Bs} = F, 227 St = #opt_st{anno=Anno,args=Args,cnt=Counter,ssa=Bs}, 228 build_st_map_1(Fs, Map#{ get_func_id(F) => St }); 229build_st_map_1([], Map) -> 230 Map. 231 232-spec finish(#b_module{}, st_map()) -> #b_module{}. 233finish(#b_module{body=Fs0}=Module, StMap) -> 234 Module#b_module{body=finish_1(Fs0, StMap)}. 235 236finish_1([F0 | Fs], StMap) -> 237 FuncId = get_func_id(F0), 238 case StMap of 239 #{ FuncId := #opt_st{anno=Anno,cnt=Counter,ssa=Blocks} } -> 240 F = F0#b_function{anno=Anno,bs=Blocks,cnt=Counter}, 241 [F | finish_1(Fs, StMap)]; 242 #{} -> 243 finish_1(Fs, StMap) 244 end; 245finish_1([], _StMap) -> 246 []. 247 248%% 249 250-define(PASS(N), {N,fun N/1}). 251 252prologue_passes(Opts) -> 253 Ps = [?PASS(ssa_opt_split_blocks), 254 ?PASS(ssa_opt_coalesce_phis), 255 ?PASS(ssa_opt_tail_phis), 256 ?PASS(ssa_opt_element), 257 ?PASS(ssa_opt_linearize), 258 ?PASS(ssa_opt_tuple_size), 259 ?PASS(ssa_opt_record), 260 ?PASS(ssa_opt_cse), % Helps the first type pass. 261 ?PASS(ssa_opt_live)], % ... 262 passes_1(Ps, Opts). 263 264module_passes(Opts) -> 265 Ps0 = [{ssa_opt_bc_size, 266 fun({StMap, FuncDb}) -> 267 {beam_ssa_bc_size:opt(StMap), FuncDb} 268 end}, 269 {ssa_opt_type_start, 270 fun({StMap, FuncDb}) -> 271 beam_ssa_type:opt_start(StMap, FuncDb) 272 end}], 273 passes_1(Ps0, Opts). 274 275%% These passes all benefit from each other (in roughly this order), so they 276%% are repeated as required. 277repeated_passes(Opts) -> 278 Ps = [?PASS(ssa_opt_live), 279 ?PASS(ssa_opt_ne), 280 ?PASS(ssa_opt_bs_puts), 281 ?PASS(ssa_opt_dead), 282 ?PASS(ssa_opt_cse), 283 ?PASS(ssa_opt_tail_phis), 284 ?PASS(ssa_opt_sink), 285 ?PASS(ssa_opt_tuple_size), 286 ?PASS(ssa_opt_record), 287 ?PASS(ssa_opt_try), 288 ?PASS(ssa_opt_type_continue)], %Must run after ssa_opt_dead to 289 %clean up phi nodes. 290 passes_1(Ps, Opts). 291 292epilogue_passes(Opts) -> 293 Ps = [?PASS(ssa_opt_type_finish), 294 ?PASS(ssa_opt_float), 295 ?PASS(ssa_opt_sw), 296 297 %% Run live one more time to clean up after the previous 298 %% epilogue passes. 299 ?PASS(ssa_opt_live), 300 ?PASS(ssa_opt_bsm), 301 ?PASS(ssa_opt_bsm_shortcut), 302 ?PASS(ssa_opt_sink), 303 ?PASS(ssa_opt_blockify), 304 ?PASS(ssa_opt_merge_blocks), 305 ?PASS(ssa_opt_get_tuple_element), 306 ?PASS(ssa_opt_tail_calls), 307 ?PASS(ssa_opt_trim_unreachable), 308 ?PASS(ssa_opt_unfold_literals)], 309 passes_1(Ps, Opts). 310 311passes_1(Ps, Opts0) -> 312 Negations = [{list_to_atom("no_"++atom_to_list(N)),N} || 313 {N,_} <- Ps], 314 Opts = proplists:substitute_negations(Negations, Opts0), 315 [case proplists:get_value(Name, Opts, true) of 316 true -> 317 P; 318 false -> 319 {NoName,Name} = keyfind(Name, 2, Negations), 320 {NoName,fun(S) -> S end} 321 end || {Name,_}=P <- Ps]. 322 323%% Builds a function information map with basic information about incoming and 324%% outgoing local calls, as well as whether the function is exported. 325-spec build_func_db(#b_module{}) -> func_info_db(). 326build_func_db(#b_module{body=Fs,attributes=Attr,exports=Exports0}) -> 327 Exports = fdb_exports(Attr, Exports0), 328 try 329 fdb_fs(Fs, Exports, #{}) 330 catch 331 %% All module-level optimizations are invalid when a NIF can override a 332 %% function, so we have to bail out. 333 throw:load_nif -> #{} 334 end. 335 336fdb_exports([{on_load, L} | Attrs], Exports) -> 337 %% Functions marked with on_load must be treated as exported to prevent 338 %% them from being optimized away when unused. 339 fdb_exports(Attrs, flatten(L) ++ Exports); 340fdb_exports([_Attr | Attrs], Exports) -> 341 fdb_exports(Attrs, Exports); 342fdb_exports([], Exports) -> 343 gb_sets:from_list(Exports). 344 345fdb_fs([#b_function{ args=Args,bs=Bs }=F | Fs], Exports, FuncDb0) -> 346 Id = get_func_id(F), 347 348 #b_local{name=#b_literal{val=Name}, arity=Arity} = Id, 349 Exported = gb_sets:is_element({Name, Arity}, Exports), 350 ArgTypes = duplicate(length(Args), #{}), 351 352 FuncDb1 = case FuncDb0 of 353 %% We may have an entry already if someone's called us. 354 #{ Id := Info } -> 355 FuncDb0#{ Id := Info#func_info{ exported=Exported, 356 arg_types=ArgTypes }}; 357 #{} -> 358 FuncDb0#{ Id => #func_info{ exported=Exported, 359 arg_types=ArgTypes }} 360 end, 361 362 RPO = beam_ssa:rpo(Bs), 363 FuncDb = beam_ssa:fold_blocks(fun(_L, #b_blk{is=Is}, FuncDb) -> 364 fdb_is(Is, Id, FuncDb) 365 end, RPO, FuncDb1, Bs), 366 367 fdb_fs(Fs, Exports, FuncDb); 368fdb_fs([], _Exports, FuncDb) -> 369 FuncDb. 370 371fdb_is([#b_set{op=call, 372 args=[#b_local{}=Callee | _]} | Is], 373 Caller, FuncDb) -> 374 fdb_is(Is, Caller, fdb_update(Caller, Callee, FuncDb)); 375fdb_is([#b_set{op=call, 376 args=[#b_remote{mod=#b_literal{val=erlang}, 377 name=#b_literal{val=load_nif}}, 378 _Path, _LoadInfo]} | _Is], _Caller, _FuncDb) -> 379 throw(load_nif); 380fdb_is([#b_set{op=MakeFun, 381 args=[#b_local{}=Callee | _]} | Is], 382 Caller, FuncDb) when MakeFun =:= make_fun; 383 MakeFun =:= old_make_fun -> 384 %% The make_fun instruction's type depends on the return type of the 385 %% function in question, so we treat this as a function call. 386 fdb_is(Is, Caller, fdb_update(Caller, Callee, FuncDb)); 387fdb_is([_ | Is], Caller, FuncDb) -> 388 fdb_is(Is, Caller, FuncDb); 389fdb_is([], _Caller, FuncDb) -> 390 FuncDb. 391 392fdb_update(Caller, Callee, FuncDb) -> 393 CallerVertex = maps:get(Caller, FuncDb, #func_info{}), 394 CalleeVertex = maps:get(Callee, FuncDb, #func_info{}), 395 396 Calls = ordsets:add_element(Callee, CallerVertex#func_info.out), 397 CalledBy = ordsets:add_element(Caller, CalleeVertex#func_info.in), 398 399 FuncDb#{ Caller => CallerVertex#func_info{out=Calls}, 400 Callee => CalleeVertex#func_info{in=CalledBy} }. 401 402%% Returns the post-order of all local calls in this module. That is, 403%% called functions will be ordered before the functions calling them. 404%% 405%% Functions where module-level optimization is disabled are added last in 406%% arbitrary order. 407 408get_call_order_po(StMap, FuncDb) -> 409 Order = gco_po(FuncDb), 410 Order ++ maps:fold(fun(K, _V, Acc) -> 411 case is_map_key(K, FuncDb) of 412 false -> [K | Acc]; 413 true -> Acc 414 end 415 end, [], StMap). 416 417gco_po(FuncDb) -> 418 All = sort(maps:keys(FuncDb)), 419 {RPO,_} = gco_rpo(All, FuncDb, sets:new([{version, 2}]), []), 420 reverse(RPO). 421 422gco_rpo([Id|Ids], FuncDb, Seen0, Acc0) -> 423 case sets:is_element(Id, Seen0) of 424 true -> 425 gco_rpo(Ids, FuncDb, Seen0, Acc0); 426 false -> 427 #func_info{out=Successors} = map_get(Id, FuncDb), 428 Seen1 = sets:add_element(Id, Seen0), 429 {Acc,Seen} = gco_rpo(Successors, FuncDb, Seen1, Acc0), 430 gco_rpo(Ids, FuncDb, Seen, [Id|Acc]) 431 end; 432gco_rpo([], _, Seen, Acc) -> 433 {Acc,Seen}. 434 435%%% 436%%% Trivial sub passes. 437%%% 438 439ssa_opt_dead({#opt_st{ssa=Linear}=St, FuncDb}) -> 440 {St#opt_st{ssa=beam_ssa_dead:opt(Linear)}, FuncDb}. 441 442ssa_opt_linearize({#opt_st{ssa=Blocks}=St, FuncDb}) -> 443 {St#opt_st{ssa=beam_ssa:linearize(Blocks)}, FuncDb}. 444 445ssa_opt_type_continue({#opt_st{ssa=Linear0,args=Args,anno=Anno}=St0, FuncDb0}) -> 446 {Linear, FuncDb} = beam_ssa_type:opt_continue(Linear0, Args, Anno, FuncDb0), 447 {St0#opt_st{ssa=Linear}, FuncDb}. 448 449ssa_opt_type_finish({#opt_st{args=Args,anno=Anno0}=St0, FuncDb0}) -> 450 {Anno, FuncDb} = beam_ssa_type:opt_finish(Args, Anno0, FuncDb0), 451 {St0#opt_st{anno=Anno}, FuncDb}. 452 453ssa_opt_blockify({#opt_st{ssa=Linear}=St, FuncDb}) -> 454 {St#opt_st{ssa=maps:from_list(Linear)}, FuncDb}. 455 456ssa_opt_trim_unreachable({#opt_st{ssa=Blocks}=St, FuncDb}) -> 457 {St#opt_st{ssa=beam_ssa:trim_unreachable(Blocks)}, FuncDb}. 458 459ssa_opt_merge_blocks({#opt_st{ssa=Blocks0}=St, FuncDb}) -> 460 RPO = beam_ssa:rpo(Blocks0), 461 Blocks = beam_ssa:merge_blocks(RPO, Blocks0), 462 {St#opt_st{ssa=Blocks}, FuncDb}. 463 464%%% 465%%% Split blocks before certain instructions to enable more optimizations. 466%%% 467%%% Splitting before element/2 enables the optimization that swaps 468%%% element/2 instructions. 469%%% 470%%% Splitting before call and make_fun instructions gives more opportunities 471%%% for sinking get_tuple_element instructions. 472%%% 473 474ssa_opt_split_blocks({#opt_st{ssa=Blocks0,cnt=Count0}=St, FuncDb}) -> 475 P = fun(#b_set{op={bif,element}}) -> true; 476 (#b_set{op=call}) -> true; 477 (#b_set{op=bs_init_writable}) -> true; 478 (#b_set{op=make_fun}) -> true; 479 (#b_set{op=old_make_fun}) -> true; 480 (_) -> false 481 end, 482 RPO = beam_ssa:rpo(Blocks0), 483 {Blocks,Count} = beam_ssa:split_blocks(RPO, P, Blocks0, Count0), 484 {St#opt_st{ssa=Blocks,cnt=Count}, FuncDb}. 485 486%%% 487%%% Coalesce phi nodes. 488%%% 489%%% Nested cases can led to code such as this: 490%%% 491%%% 10: 492%%% _1 = phi {literal value1, label 8}, {Var, label 9} 493%%% br 11 494%%% 495%%% 11: 496%%% _2 = phi {_1, label 10}, {literal false, label 3} 497%%% 498%%% The phi nodes can be coalesced like this: 499%%% 500%%% 11: 501%%% _2 = phi {literal value1, label 8}, {Var, label 9}, {literal false, label 3} 502%%% 503%%% Coalescing can help other optimizations, and can in some cases reduce register 504%%% shuffling (if the phi variables for two phi nodes happens to be allocated to 505%%% different registers). 506%%% 507 508ssa_opt_coalesce_phis({#opt_st{ssa=Blocks0}=St, FuncDb}) -> 509 Ls = beam_ssa:rpo(Blocks0), 510 Blocks = c_phis_1(Ls, Blocks0), 511 {St#opt_st{ssa=Blocks}, FuncDb}. 512 513c_phis_1([L|Ls], Blocks0) -> 514 case map_get(L, Blocks0) of 515 #b_blk{is=[#b_set{op=phi}|_]}=Blk -> 516 Blocks = c_phis_2(L, Blk, Blocks0), 517 c_phis_1(Ls, Blocks); 518 #b_blk{} -> 519 c_phis_1(Ls, Blocks0) 520 end; 521c_phis_1([], Blocks) -> Blocks. 522 523c_phis_2(L, #b_blk{is=Is0}=Blk0, Blocks0) -> 524 case c_phis_args(Is0, Blocks0) of 525 none -> 526 Blocks0; 527 {_,_,Preds}=Info -> 528 Is = c_rewrite_phis(Is0, Info), 529 Blk = Blk0#b_blk{is=Is}, 530 Blocks = Blocks0#{L:=Blk}, 531 c_fix_branches(Preds, L, Blocks) 532 end. 533 534c_phis_args([#b_set{op=phi,args=Args0}|Is], Blocks) -> 535 case c_phis_args_1(Args0, Blocks) of 536 none -> 537 c_phis_args(Is, Blocks); 538 Res -> 539 Res 540 end; 541c_phis_args(_, _Blocks) -> none. 542 543c_phis_args_1([{Var,Pred}|As], Blocks) -> 544 case c_get_pred_vars(Var, Pred, Blocks) of 545 none -> 546 c_phis_args_1(As, Blocks); 547 Result -> 548 Result 549 end; 550c_phis_args_1([], _Blocks) -> none. 551 552c_get_pred_vars(Var, Pred, Blocks) -> 553 case map_get(Pred, Blocks) of 554 #b_blk{is=[#b_set{op=phi,dst=Var,args=Args}]} -> 555 {Var,Pred,Args}; 556 #b_blk{} -> 557 none 558 end. 559 560c_rewrite_phis([#b_set{op=phi,args=Args0}=I|Is], Info) -> 561 Args = c_rewrite_phi(Args0, Info), 562 [I#b_set{args=Args}|c_rewrite_phis(Is, Info)]; 563c_rewrite_phis(Is, _Info) -> Is. 564 565c_rewrite_phi([{Var,Pred}|As], {Var,Pred,Values}) -> 566 Values ++ As; 567c_rewrite_phi([{Value,Pred}|As], {_,Pred,Values}) -> 568 [{Value,P} || {_,P} <- Values] ++ As; 569c_rewrite_phi([A|As], Info) -> 570 [A|c_rewrite_phi(As, Info)]; 571c_rewrite_phi([], _Info) -> []. 572 573c_fix_branches([{_,Pred}|As], L, Blocks0) -> 574 #b_blk{last=Last0} = Blk0 = map_get(Pred, Blocks0), 575 #b_br{bool=#b_literal{val=true}} = Last0, %Assertion. 576 Last = Last0#b_br{bool=#b_literal{val=true},succ=L,fail=L}, 577 Blk = Blk0#b_blk{last=Last}, 578 Blocks = Blocks0#{Pred:=Blk}, 579 c_fix_branches(As, L, Blocks); 580c_fix_branches([], _, Blocks) -> Blocks. 581 582%%% 583%%% Eliminate phi nodes in the tail of a function. 584%%% 585%%% Try to eliminate short blocks that starts with a phi node 586%%% and end in a return. For example: 587%%% 588%%% Result = phi { Res1, 4 }, { literal true, 5 } 589%%% Ret = put_tuple literal ok, Result 590%%% ret Ret 591%%% 592%%% The code in this block can be inserted at the end blocks 4 and 593%%% 5. Thus, the following code can be inserted into block 4: 594%%% 595%%% Ret:1 = put_tuple literal ok, Res1 596%%% ret Ret:1 597%%% 598%%% And the following code into block 5: 599%%% 600%%% Ret:2 = put_tuple literal ok, literal true 601%%% ret Ret:2 602%%% 603%%% Which can be further simplified to: 604%%% 605%%% ret literal {ok, true} 606%%% 607%%% This transformation may lead to more code improvements: 608%%% 609%%% - Stack trimming 610%%% - Fewer test_heap instructions 611%%% - Smaller stack frames 612%%% 613 614ssa_opt_tail_phis({#opt_st{ssa=SSA0,cnt=Count0}=St, FuncDb}) -> 615 {SSA,Count} = opt_tail_phis(SSA0, Count0), 616 {St#opt_st{ssa=SSA,cnt=Count}, FuncDb}. 617 618opt_tail_phis(Blocks, Count) when is_map(Blocks) -> 619 opt_tail_phis(maps:values(Blocks), Blocks, Count); 620opt_tail_phis(Linear0, Count0) when is_list(Linear0) -> 621 Blocks0 = maps:from_list(Linear0), 622 {Blocks,Count} = opt_tail_phis(Blocks0, Count0), 623 {beam_ssa:linearize(Blocks),Count}. 624 625opt_tail_phis([#b_blk{is=Is0,last=Last}|Bs], Blocks0, Count0) -> 626 case {Is0,Last} of 627 {[#b_set{op=phi,args=[_,_|_]}|_],#b_ret{arg=#b_var{}}=Ret} -> 628 {Phis,Is} = splitwith(fun(#b_set{op=Op}) -> Op =:= phi end, Is0), 629 case suitable_tail_ops(Is) of 630 true -> 631 {Blocks,Count} = opt_tail_phi(Phis, Is, Ret, 632 Blocks0, Count0), 633 opt_tail_phis(Bs, Blocks, Count); 634 false -> 635 opt_tail_phis(Bs, Blocks0, Count0) 636 end; 637 {_,_} -> 638 opt_tail_phis(Bs, Blocks0, Count0) 639 end; 640opt_tail_phis([], Blocks, Count) -> 641 {Blocks,Count}. 642 643opt_tail_phi(Phis0, Is, Ret, Blocks0, Count0) -> 644 Phis = rel2fam(reduce_phis(Phis0)), 645 {Blocks,Count,Cost} = 646 foldl(fun(PhiArg, Acc) -> 647 opt_tail_phi_arg(PhiArg, Is, Ret, Acc) 648 end, {Blocks0,Count0,0}, Phis), 649 MaxCost = length(Phis) * 3 + 2, 650 if 651 Cost =< MaxCost -> 652 %% The transformation would cause at most a slight 653 %% increase in code size if no more optimizations 654 %% can be applied. 655 {Blocks,Count}; 656 true -> 657 %% The code size would be increased too much. 658 {Blocks0,Count0} 659 end. 660 661reduce_phis([#b_set{dst=PhiDst,args=PhiArgs}|Is]) -> 662 [{L,{PhiDst,Val}} || {Val,L} <- PhiArgs] ++ reduce_phis(Is); 663reduce_phis([]) -> []. 664 665opt_tail_phi_arg({PredL,Sub0}, Is0, Ret0, {Blocks0,Count0,Cost0}) -> 666 Blk0 = map_get(PredL, Blocks0), 667 #b_blk{is=IsPrefix,last=#b_br{succ=Next,fail=Next}} = Blk0, 668 Sub1 = maps:from_list(Sub0), 669 {Is1,Count,Sub} = new_names(Is0, Sub1, Count0, []), 670 Is2 = [sub(I, Sub) || I <- Is1], 671 Cost = build_cost(Is2, Cost0), 672 Is = IsPrefix ++ Is2, 673 Ret = sub(Ret0, Sub), 674 Blk = Blk0#b_blk{is=Is,last=Ret}, 675 Blocks = Blocks0#{PredL:=Blk}, 676 {Blocks,Count,Cost}. 677 678new_names([#b_set{dst=Dst}=I|Is], Sub0, Count0, Acc) -> 679 {NewDst,Count} = new_var(Dst, Count0), 680 Sub = Sub0#{Dst=>NewDst}, 681 new_names(Is, Sub, Count, [I#b_set{dst=NewDst}|Acc]); 682new_names([], Sub, Count, Acc) -> 683 {reverse(Acc),Count,Sub}. 684 685suitable_tail_ops(Is) -> 686 all(fun(#b_set{op=Op}) -> 687 is_suitable_tail_op(Op) 688 end, Is). 689 690is_suitable_tail_op({bif,_}) -> true; 691is_suitable_tail_op(put_list) -> true; 692is_suitable_tail_op(put_tuple) -> true; 693is_suitable_tail_op(_) -> false. 694 695build_cost([#b_set{op=put_list,args=Args}|Is], Cost) -> 696 case are_all_literals(Args) of 697 true -> 698 build_cost(Is, Cost); 699 false -> 700 build_cost(Is, Cost + 1) 701 end; 702build_cost([#b_set{op=put_tuple,args=Args}|Is], Cost) -> 703 case are_all_literals(Args) of 704 true -> 705 build_cost(Is, Cost); 706 false -> 707 build_cost(Is, Cost + length(Args) + 1) 708 end; 709build_cost([#b_set{op={bif,_},args=Args}|Is], Cost) -> 710 case are_all_literals(Args) of 711 true -> 712 build_cost(Is, Cost); 713 false -> 714 build_cost(Is, Cost + 1) 715 end; 716build_cost([], Cost) -> Cost. 717 718are_all_literals(Args) -> 719 all(fun(#b_literal{}) -> true; 720 (_) -> false 721 end, Args). 722 723%%% 724%%% Order element/2 calls. 725%%% 726%%% Order an unbroken chain of element/2 calls for the same tuple 727%%% with the same failure label so that the highest element is 728%%% retrieved first. That will allow the other element/2 calls to 729%%% be replaced with get_tuple_element/3 instructions. 730%%% 731 732ssa_opt_element({#opt_st{ssa=Blocks}=St, FuncDb}) -> 733 %% Collect the information about element instructions in this 734 %% function. 735 GetEls = collect_element_calls(beam_ssa:linearize(Blocks)), 736 737 %% Collect the element instructions into chains. The 738 %% element calls in each chain are ordered in reverse 739 %% execution order. 740 Chains = collect_chains(GetEls, []), 741 742 %% For each chain, swap the first element call with the 743 %% element call with the highest index. 744 {St#opt_st{ssa=swap_element_calls(Chains, Blocks)}, FuncDb}. 745 746collect_element_calls([{L,#b_blk{is=Is0,last=Last}}|Bs]) -> 747 case {Is0,Last} of 748 {[#b_set{op={bif,element},dst=Element, 749 args=[#b_literal{val=N},#b_var{}=Tuple]}, 750 #b_set{op={succeeded,guard},dst=Bool,args=[Element]}], 751 #b_br{bool=Bool,succ=Succ,fail=Fail}} -> 752 Info = {L,Succ,{Tuple,Fail},N}, 753 [Info|collect_element_calls(Bs)]; 754 {_,_} -> 755 collect_element_calls(Bs) 756 end; 757collect_element_calls([]) -> []. 758 759collect_chains([{This,_,V,_}=El|Els], [{_,This,V,_}|_]=Chain) -> 760 %% Add to the previous chain. 761 collect_chains(Els, [El|Chain]); 762collect_chains([El|Els], [_,_|_]=Chain) -> 763 %% Save the previous chain and start a new chain. 764 [Chain|collect_chains(Els, [El])]; 765collect_chains([El|Els], _Chain) -> 766 %% The previous chain is too short; discard it and start a new. 767 collect_chains(Els, [El]); 768collect_chains([], [_,_|_]=Chain) -> 769 %% Save the last chain. 770 [Chain]; 771collect_chains([], _) -> []. 772 773swap_element_calls([[{L,_,_,N}|_]=Chain|Chains], Blocks0) -> 774 Blocks = swap_element_calls_1(Chain, {N,L}, Blocks0), 775 swap_element_calls(Chains, Blocks); 776swap_element_calls([], Blocks) -> Blocks. 777 778swap_element_calls_1([{L1,_,_,N1}], {N2,L2}, Blocks) when N2 > N1 -> 779 %% We have reached the end of the chain, and the first 780 %% element instrution to be executed. Its index is lower 781 %% than the maximum index found while traversing the chain, 782 %% so we will need to swap the instructions. 783 #{L1:=Blk1,L2:=Blk2} = Blocks, 784 [#b_set{dst=Dst1}=GetEl1,Succ1] = Blk1#b_blk.is, 785 [#b_set{dst=Dst2}=GetEl2,Succ2] = Blk2#b_blk.is, 786 Is1 = [GetEl2,Succ1#b_set{args=[Dst2]}], 787 Is2 = [GetEl1,Succ2#b_set{args=[Dst1]}], 788 Blocks#{L1:=Blk1#b_blk{is=Is1},L2:=Blk2#b_blk{is=Is2}}; 789swap_element_calls_1([{L,_,_,N1}|Els], {N2,_}, Blocks) when N1 > N2 -> 790 swap_element_calls_1(Els, {N2,L}, Blocks); 791swap_element_calls_1([_|Els], Highest, Blocks) -> 792 swap_element_calls_1(Els, Highest, Blocks); 793swap_element_calls_1([], _, Blocks) -> 794 %% Nothing to do. The element call with highest index 795 %% is already the first one to be executed. 796 Blocks. 797 798%%% 799%%% Record optimization. 800%%% 801%%% Replace tuple matching with an is_tagged_tuple instruction 802%%% when applicable. 803%%% 804 805ssa_opt_record({#opt_st{ssa=Linear}=St, FuncDb}) -> 806 Blocks = maps:from_list(Linear), 807 {St#opt_st{ssa=record_opt(Linear, Blocks)}, FuncDb}. 808 809record_opt([{L,#b_blk{is=Is0,last=Last}=Blk0}|Bs], Blocks) -> 810 Is = record_opt_is(Is0, Last, Blocks), 811 Blk = Blk0#b_blk{is=Is}, 812 [{L,Blk}|record_opt(Bs, Blocks)]; 813record_opt([], _Blocks) -> []. 814 815record_opt_is([#b_set{op={bif,is_tuple},dst=Bool,args=[Tuple]}=Set], 816 Last, Blocks) -> 817 case is_tagged_tuple(Tuple, Bool, Last, Blocks) of 818 {yes,Size,Tag} -> 819 Args = [Tuple,Size,Tag], 820 [Set#b_set{op=is_tagged_tuple,args=Args}]; 821 no -> 822 [Set] 823 end; 824record_opt_is([I|Is]=Is0, #b_br{bool=Bool}=Last, Blocks) -> 825 case is_tagged_tuple_1(Is0, Last, Blocks) of 826 {yes,_Fail,Tuple,Arity,Tag} -> 827 Args = [Tuple,Arity,Tag], 828 [I#b_set{op=is_tagged_tuple,dst=Bool,args=Args}]; 829 no -> 830 [I|record_opt_is(Is, Last, Blocks)] 831 end; 832record_opt_is([I|Is], Last, Blocks) -> 833 [I|record_opt_is(Is, Last, Blocks)]; 834record_opt_is([], _Last, _Blocks) -> []. 835 836is_tagged_tuple(#b_var{}=Tuple, Bool, 837 #b_br{bool=Bool,succ=Succ,fail=Fail}, 838 Blocks) -> 839 #b_blk{is=Is,last=Last} = map_get(Succ, Blocks), 840 case is_tagged_tuple_1(Is, Last, Blocks) of 841 {yes,Fail,Tuple,Arity,Tag} -> 842 {yes,Arity,Tag}; 843 _ -> 844 no 845 end; 846is_tagged_tuple(_, _, _, _) -> no. 847 848is_tagged_tuple_1(Is, Last, Blocks) -> 849 case {Is,Last} of 850 {[#b_set{op={bif,tuple_size},dst=ArityVar, 851 args=[#b_var{}=Tuple]}, 852 #b_set{op={bif,'=:='}, 853 dst=Bool, 854 args=[ArityVar, #b_literal{val=ArityVal}=Arity]}], 855 #b_br{bool=Bool,succ=Succ,fail=Fail}} 856 when is_integer(ArityVal) -> 857 SuccBlk = map_get(Succ, Blocks), 858 case is_tagged_tuple_2(SuccBlk, Tuple, Fail) of 859 no -> 860 no; 861 {yes,Tag} -> 862 {yes,Fail,Tuple,Arity,Tag} 863 end; 864 _ -> 865 no 866 end. 867 868is_tagged_tuple_2(#b_blk{is=Is, 869 last=#b_br{bool=#b_var{}=Bool,fail=Fail}}, 870 Tuple, Fail) -> 871 is_tagged_tuple_3(Is, Bool, Tuple); 872is_tagged_tuple_2(#b_blk{}, _, _) -> no. 873 874is_tagged_tuple_3([#b_set{op=get_tuple_element, 875 dst=TagVar, 876 args=[#b_var{}=Tuple,#b_literal{val=0}]}|Is], 877 Bool, Tuple) -> 878 is_tagged_tuple_4(Is, Bool, TagVar); 879is_tagged_tuple_3([_|Is], Bool, Tuple) -> 880 is_tagged_tuple_3(Is, Bool, Tuple); 881is_tagged_tuple_3([], _, _) -> no. 882 883is_tagged_tuple_4([#b_set{op={bif,'=:='},dst=Bool, 884 args=[#b_var{}=TagVar, 885 #b_literal{val=TagVal}=Tag]}], 886 Bool, TagVar) when is_atom(TagVal) -> 887 {yes,Tag}; 888is_tagged_tuple_4([_|Is], Bool, TagVar) -> 889 is_tagged_tuple_4(Is, Bool, TagVar); 890is_tagged_tuple_4([], _, _) -> no. 891 892%%% 893%%% Common subexpression elimination (CSE). 894%%% 895%%% Eliminate repeated evaluation of identical expressions. To avoid 896%%% increasing the size of the stack frame, we don't eliminate 897%%% subexpressions across instructions that clobber the X registers. 898%%% 899 900ssa_opt_cse({#opt_st{ssa=Linear}=St, FuncDb}) -> 901 M = #{0 => #{}, ?EXCEPTION_BLOCK => #{}}, 902 {St#opt_st{ssa=cse(Linear, #{}, M)}, FuncDb}. 903 904cse([{L,#b_blk{is=Is0,last=Last0}=Blk}|Bs], Sub0, M0) -> 905 Es0 = map_get(L, M0), 906 {Is1,Es,Sub} = cse_is(Is0, Es0, Sub0, []), 907 Last = sub(Last0, Sub), 908 M = cse_successors(Is1, Blk, Es, M0), 909 Is = reverse(Is1), 910 [{L,Blk#b_blk{is=Is,last=Last}}|cse(Bs, Sub, M)]; 911cse([], _, _) -> []. 912 913cse_successors([#b_set{op={succeeded,_},args=[Src]},Bif|_], Blk, EsSucc, M0) -> 914 case cse_suitable(Bif) of 915 true -> 916 %% The previous instruction only has a valid value at the success branch. 917 %% We must remove the substitution for Src from the failure branch. 918 #b_blk{last=#b_br{succ=Succ,fail=Fail}} = Blk, 919 M = cse_successors_1([Succ], EsSucc, M0), 920 EsFail = maps:filter(fun(_, Val) -> Val =/= Src end, EsSucc), 921 cse_successors_1([Fail], EsFail, M); 922 false -> 923 %% There can't be any replacement for Src in EsSucc. No need for 924 %% any special handling. 925 cse_successors_1(beam_ssa:successors(Blk), EsSucc, M0) 926 end; 927cse_successors(_Is, Blk, Es, M) -> 928 cse_successors_1(beam_ssa:successors(Blk), Es, M). 929 930cse_successors_1([L|Ls], Es0, M) -> 931 case M of 932 #{L:=Es1} when map_size(Es1) =:= 0 -> 933 %% The map is already empty. No need to do anything 934 %% since the intersection will be empty. 935 cse_successors_1(Ls, Es0, M); 936 #{L:=Es1} -> 937 Es = cse_intersection(Es0, Es1), 938 cse_successors_1(Ls, Es0, M#{L:=Es}); 939 #{} -> 940 cse_successors_1(Ls, Es0, M#{L=>Es0}) 941 end; 942cse_successors_1([], _, M) -> M. 943 944%% Calculate the intersection of the two maps. Both keys and values 945%% must match. 946cse_intersection(M1, M2) -> 947 if 948 map_size(M1) < map_size(M2) -> 949 cse_intersection_1(maps:to_list(M1), M2, M1); 950 true -> 951 cse_intersection_1(maps:to_list(M2), M1, M2) 952 end. 953 954cse_intersection_1([{Key,Value}|KVs], M, Result) -> 955 case M of 956 #{Key := Value} -> 957 cse_intersection_1(KVs, M, Result); 958 #{} -> 959 cse_intersection_1(KVs, M, maps:remove(Key, Result)) 960 end; 961cse_intersection_1([], _, Result) -> Result. 962 963cse_is([#b_set{op={succeeded,_},dst=Bool,args=[Src]}=I0|Is], Es, Sub0, Acc) -> 964 I = sub(I0, Sub0), 965 case I of 966 #b_set{args=[Src]} -> 967 cse_is(Is, Es, Sub0, [I|Acc]); 968 #b_set{} -> 969 %% The previous instruction has been eliminated. Eliminate the 970 %% 'succeeded' instruction too. 971 Sub = Sub0#{Bool=>#b_literal{val=true}}, 972 cse_is(Is, Es, Sub, Acc) 973 end; 974cse_is([#b_set{dst=Dst}=I0|Is], Es0, Sub0, Acc) -> 975 I = sub(I0, Sub0), 976 case beam_ssa:clobbers_xregs(I) of 977 true -> 978 %% Retaining the expressions map across calls and other 979 %% clobbering instructions would work, but it would cause 980 %% the common subexpressions to be saved to Y registers, 981 %% which would probably increase the size of the stack 982 %% frame. 983 cse_is(Is, #{}, Sub0, [I|Acc]); 984 false -> 985 case cse_expr(I) of 986 none -> 987 %% Not suitable for CSE. 988 cse_is(Is, Es0, Sub0, [I|Acc]); 989 {ok,ExprKey} -> 990 case Es0 of 991 #{ExprKey:=Src} -> 992 Sub = Sub0#{Dst=>Src}, 993 cse_is(Is, Es0, Sub, Acc); 994 #{} -> 995 Es1 = Es0#{ExprKey=>Dst}, 996 Es = cse_add_inferred_exprs(I, Es1), 997 cse_is(Is, Es, Sub0, [I|Acc]) 998 end 999 end 1000 end; 1001cse_is([], Es, Sub, Acc) -> 1002 {Acc,Es,Sub}. 1003 1004cse_add_inferred_exprs(#b_set{op=put_list,dst=List,args=[Hd,Tl]}, Es) -> 1005 Es#{{get_hd,[List]} => Hd, 1006 {get_tl,[List]} => Tl}; 1007cse_add_inferred_exprs(#b_set{op=put_tuple,dst=Tuple,args=[E1,E2|_]}, Es) -> 1008 %% Adding tuple elements beyond the first two does not seem to be 1009 %% worthwhile (at least not in the sample used by scripts/diffable). 1010 Es#{{get_tuple_element,[Tuple,#b_literal{val=0}]} => E1, 1011 {get_tuple_element,[Tuple,#b_literal{val=1}]} => E2}; 1012cse_add_inferred_exprs(#b_set{op={bif,element},dst=E, 1013 args=[#b_literal{val=N},Tuple]}, Es) 1014 when is_integer(N) -> 1015 Es#{{get_tuple_element,[Tuple,#b_literal{val=N-1}]} => E}; 1016cse_add_inferred_exprs(#b_set{op={bif,hd},dst=Hd,args=[List]}, Es) -> 1017 Es#{{get_hd,[List]} => Hd}; 1018cse_add_inferred_exprs(#b_set{op={bif,tl},dst=Tl,args=[List]}, Es) -> 1019 Es#{{get_tl,[List]} => Tl}; 1020cse_add_inferred_exprs(#b_set{op={bif,map_get},dst=Value,args=[Key,Map]}, Es) -> 1021 Es#{{get_map_element,[Map,Key]} => Value}; 1022cse_add_inferred_exprs(#b_set{op=put_map,dst=Map,args=Args}, Es) -> 1023 cse_add_map_get(Args, Map, Es); 1024cse_add_inferred_exprs(_, Es) -> Es. 1025 1026cse_add_map_get([Key,Value|T], Map, Es0) -> 1027 Es = Es0#{{get_map_element,[Map,Key]} => Value}, 1028 cse_add_map_get(T, Map, Es); 1029cse_add_map_get([], _, Es) -> Es. 1030 1031cse_expr(#b_set{op=Op,args=Args}=I) -> 1032 case cse_suitable(I) of 1033 true -> {ok,{Op,Args}}; 1034 false -> none 1035 end. 1036 1037cse_suitable(#b_set{op=get_hd}) -> true; 1038cse_suitable(#b_set{op=get_tl}) -> true; 1039cse_suitable(#b_set{op=put_list}) -> true; 1040cse_suitable(#b_set{op=get_tuple_element}) -> true; 1041cse_suitable(#b_set{op=put_tuple}) -> true; 1042cse_suitable(#b_set{op=get_map_element}) -> true; 1043cse_suitable(#b_set{op=put_map}) -> true; 1044cse_suitable(#b_set{op={bif,tuple_size}}) -> 1045 %% Doing CSE for tuple_size/1 can prevent the 1046 %% creation of test_arity and select_tuple_arity 1047 %% instructions. That could decrease performance 1048 %% and beam_validator could fail to understand 1049 %% that tuple operations that follow are safe. 1050 false; 1051cse_suitable(#b_set{anno=Anno,op={bif,Name},args=Args}) -> 1052 %% Doing CSE for floating point operators is unsafe. 1053 %% Doing CSE for comparison operators would prevent 1054 %% creation of 'test' instructions. 1055 Arity = length(Args), 1056 not (is_map_key(float_op, Anno) orelse 1057 erl_internal:new_type_test(Name, Arity) orelse 1058 erl_internal:comp_op(Name, Arity) orelse 1059 erl_internal:bool_op(Name, Arity)); 1060cse_suitable(#b_set{}) -> false. 1061 1062%%% 1063%%% Using floating point instructions. 1064%%% 1065%%% Use the special floating points version of arithmetic 1066%%% instructions, if the operands are known to be floats or the result 1067%%% of the operation will be a float. 1068%%% 1069%%% The float instructions were never used in guards before, so we 1070%%% will take special care to keep not using them in guards. Using 1071%%% them in guards would require a new version of the 'fconv' 1072%%% instruction that would take a failure label. Since it is unlikely 1073%%% that using float instructions in guards would be benefical, why 1074%%% bother implementing a new instruction? 1075%%% 1076 1077-record(fs, 1078 {regs=#{} :: #{beam_ssa:b_var():=beam_ssa:b_var()}, 1079 non_guards :: gb_sets:set(beam_ssa:label()), 1080 bs :: beam_ssa:block_map() 1081 }). 1082 1083ssa_opt_float({#opt_st{ssa=Linear0,cnt=Count0}=St, FuncDb}) -> 1084 NonGuards = non_guards(Linear0), 1085 Blocks = maps:from_list(Linear0), 1086 Fs = #fs{non_guards=NonGuards,bs=Blocks}, 1087 {Linear,Count} = float_opt(Linear0, Count0, Fs), 1088 {St#opt_st{ssa=Linear,cnt=Count}, FuncDb}. 1089 1090%% The fconv instruction doesn't support jumping to a fail label, so we have to 1091%% skip this optimization if the fail block is a guard. 1092%% 1093%% We also skip the optimization in blocks that always fail, as it's both 1094%% difficult and pointless to rewrite them to use float ops. 1095float_can_optimize_blk(#b_blk{last=#b_br{bool=#b_var{},fail=F}}, 1096 #fs{non_guards=NonGuards}) -> 1097 gb_sets:is_member(F, NonGuards); 1098float_can_optimize_blk(#b_blk{}, #fs{}) -> 1099 false. 1100 1101float_opt([{L,Blk}|Bs0], Count0, Fs) -> 1102 case float_can_optimize_blk(Blk, Fs) of 1103 true -> 1104 float_opt_1(L, Blk, Bs0, Count0, Fs); 1105 false -> 1106 {Bs,Count} = float_opt(Bs0, Count0, Fs), 1107 {[{L,Blk}|Bs],Count} 1108 end; 1109float_opt([], Count, _Fs) -> 1110 {[],Count}. 1111 1112float_opt_1(L, #b_blk{is=Is0}=Blk0, Bs0, Count0, Fs0) -> 1113 case float_opt_is(Is0, Fs0, Count0, []) of 1114 {Is1,Fs1,Count1} -> 1115 {Flush,Blk,Fs,Count2} = float_maybe_flush(Blk0, Fs1, Count1), 1116 {Blks,Count3} = float_fixup_conv(L, Is1, Blk, Count2), 1117 {Bs,Count} = float_opt(Bs0, Count3, Fs), 1118 {Blks++Flush++Bs,Count}; 1119 none -> 1120 {Bs,Count} = float_opt(Bs0, Count0, Fs0), 1121 {[{L,Blk0}|Bs],Count} 1122 end. 1123 1124%% Split out {float,convert} instructions into separate blocks, number 1125%% the blocks, and add {succeded,body} in each {float,convert} block. 1126float_fixup_conv(L, Is, Blk, Count0) -> 1127 Split = float_split_conv(Is, Blk), 1128 {Blks,Count} = float_number(Split, L, Count0), 1129 #b_blk{last=#b_br{bool=#b_var{},fail=Fail}} = Blk, 1130 float_conv(Blks, Fail, Count). 1131 1132%% Split {float,convert} instructions into individual blocks. 1133float_split_conv(Is0, Blk) -> 1134 Br = #b_br{bool=#b_literal{val=true},succ=0,fail=0}, 1135 1136 %% Note that there may be other instructions such as 1137 %% remove_message before the floating point instructions; 1138 %% therefore, it is essential that we don't reorder instructions. 1139 case splitwith(fun(#b_set{op=Op}) -> 1140 Op =/= {float,convert} 1141 end, Is0) of 1142 {Is,[]} -> 1143 [Blk#b_blk{is=Is}]; 1144 {[_|_]=Is1,[#b_set{op={float,convert}}=Conv|Is2]} -> 1145 [#b_blk{is=Is1,last=Br}, 1146 #b_blk{is=[Conv],last=Br}|float_split_conv(Is2, Blk)]; 1147 {[],[#b_set{op={float,convert}}=Conv|Is1]} -> 1148 [#b_blk{is=[Conv],last=Br}|float_split_conv(Is1, Blk)] 1149 end. 1150 1151%% Number and chain the blocks that were split. 1152float_number(Bs0, FirstL, Count0) -> 1153 {[{_,FirstBlk}|Bs],Count} = float_number(Bs0, Count0), 1154 {[{FirstL,FirstBlk}|Bs],Count}. 1155 1156float_number([B], Count) -> 1157 {[{Count,B}],Count}; 1158float_number([B|Bs0], Count0) -> 1159 Next = Count0 + 1, 1160 {Bs,Count} = float_number(Bs0, Next), 1161 Br = #b_br{bool=#b_literal{val=true},succ=Next,fail=Next}, 1162 {[{Count0,B#b_blk{last=Br}}|Bs],Count}. 1163 1164%% Insert 'succeeded' instructions after each {float,convert} 1165%% instruction. 1166float_conv([{L,#b_blk{is=Is0,last=Last}=Blk0}|Bs0], Fail, Count0) -> 1167 case Is0 of 1168 [#b_set{op={float,convert}}=Conv] -> 1169 {Bool,Count1} = new_var('@ssa_bool', Count0), 1170 Succeeded = #b_set{op={succeeded,body},dst=Bool, 1171 args=[Conv#b_set.dst]}, 1172 Is = [Conv,Succeeded], 1173 Br = Last#b_br{bool=Bool,fail=Fail}, 1174 Blk = Blk0#b_blk{is=Is,last=Br}, 1175 {Bs,Count} = float_conv(Bs0, Fail, Count1), 1176 {[{L,Blk}|Bs],Count}; 1177 [_|_] -> 1178 {Bs,Count} = float_conv(Bs0, Fail, Count0), 1179 {[{L,Blk0}|Bs],Count} 1180 end; 1181float_conv([], _, Count) -> 1182 {[],Count}. 1183 1184float_maybe_flush(Blk0, Fs0, Count0) -> 1185 #b_blk{last=#b_br{bool=#b_var{},succ=Succ}=Br} = Blk0, 1186 1187 %% If the success block has an optimizable floating point instruction, 1188 %% it is safe to defer flushing. 1189 case float_safe_to_skip_flush(Succ, Fs0) of 1190 true -> 1191 %% No flush needed. 1192 {[],Blk0,Fs0,Count0}; 1193 false -> 1194 %% Flush needed. Allocate block numbers. 1195 FlushL = Count0, %For flushing of float regs. 1196 Count = Count0 + 1, 1197 Blk = Blk0#b_blk{last=Br#b_br{succ=FlushL}}, 1198 1199 %% Build the block that flushes all registers. 1200 FlushIs = float_flush_regs(Fs0), 1201 FlushBr = #b_br{bool=#b_literal{val=true},succ=Succ,fail=Succ}, 1202 FlushBlk = #b_blk{is=FlushIs,last=FlushBr}, 1203 1204 %% Update state record and blocks. 1205 Fs = Fs0#fs{regs=#{}}, 1206 FlushBs = [{FlushL,FlushBlk}], 1207 {FlushBs,Blk,Fs,Count} 1208 end. 1209 1210float_safe_to_skip_flush(L, #fs{bs=Blocks}=Fs) -> 1211 #b_blk{is=Is} = Blk = map_get(L, Blocks), 1212 float_can_optimize_blk(Blk, Fs) andalso float_optimizable_is(Is). 1213 1214float_optimizable_is([#b_set{anno=#{float_op:=_}}|_]) -> 1215 true; 1216float_optimizable_is([#b_set{op=get_tuple_element}|Is]) -> 1217 %% The tuple sinking optimization can sink get_tuple_element instruction 1218 %% into a sequence of floating point operations. 1219 float_optimizable_is(Is); 1220float_optimizable_is(_) -> 1221 false. 1222 1223float_opt_is([#b_set{op={succeeded,_},args=[Src]}=I0], 1224 #fs{regs=Rs}=Fs, Count, Acc) -> 1225 case Rs of 1226 #{Src:=Fr} -> 1227 I = I0#b_set{args=[Fr]}, 1228 {reverse(Acc, [I]),Fs,Count}; 1229 #{} -> 1230 none 1231 end; 1232float_opt_is([#b_set{anno=Anno0}=I0|Is0], Fs0, Count0, Acc) -> 1233 case Anno0 of 1234 #{float_op:=FTypes} -> 1235 Anno = maps:remove(float_op, Anno0), 1236 I1 = I0#b_set{anno=Anno}, 1237 {Is,Fs,Count} = float_make_op(I1, FTypes, Fs0, Count0), 1238 float_opt_is(Is0, Fs, Count, reverse(Is, Acc)); 1239 #{} -> 1240 float_opt_is(Is0, Fs0, Count0, [I0|Acc]) 1241 end; 1242float_opt_is([], _Fs, _Count, _Acc) -> 1243 none. 1244 1245float_make_op(#b_set{op={bif,Op},dst=Dst,args=As0,anno=Anno}=I0, 1246 Ts, #fs{regs=Rs0}=Fs, Count0) -> 1247 {As1,Rs1,Count1} = float_load(As0, Ts, Anno, Rs0, Count0, []), 1248 {As,Is0} = unzip(As1), 1249 {FrDst,Count2} = new_var('@fr', Count1), 1250 I = I0#b_set{op={float,Op},dst=FrDst,args=As}, 1251 Rs = Rs1#{Dst=>FrDst}, 1252 Is = append(Is0) ++ [I], 1253 {Is,Fs#fs{regs=Rs},Count2}. 1254 1255float_load([A|As], [T|Ts], Anno, Rs0, Count0, Acc) -> 1256 {Load,Rs,Count} = float_reg_arg(A, T, Anno, Rs0, Count0), 1257 float_load(As, Ts, Anno, Rs, Count, [Load|Acc]); 1258float_load([], [], _Anno, Rs, Count, Acc) -> 1259 {reverse(Acc),Rs,Count}. 1260 1261float_reg_arg(A, T, Anno, Rs, Count0) -> 1262 case Rs of 1263 #{A:=Fr} -> 1264 {{Fr,[]},Rs,Count0}; 1265 #{} -> 1266 {Dst,Count} = new_var('@fr_copy', Count0), 1267 I0 = float_load_reg(T, A, Dst), 1268 I = I0#b_set{anno=Anno}, 1269 {{Dst,[I]},Rs#{A=>Dst},Count} 1270 end. 1271 1272float_load_reg(convert, #b_var{}=Src, Dst) -> 1273 #b_set{op={float,convert},dst=Dst,args=[Src]}; 1274float_load_reg(convert, #b_literal{val=Val}=Src, Dst) -> 1275 try float(Val) of 1276 F -> 1277 #b_set{op={float,put},dst=Dst,args=[#b_literal{val=F}]} 1278 catch 1279 error:_ -> 1280 %% Let the exception happen at runtime. 1281 #b_set{op={float,convert},dst=Dst,args=[Src]} 1282 end; 1283float_load_reg(float, Src, Dst) -> 1284 #b_set{op={float,put},dst=Dst,args=[Src]}. 1285 1286float_flush_regs(#fs{regs=Rs}) -> 1287 maps:fold(fun(_, #b_var{name={'@fr_copy',_}}, Acc) -> 1288 Acc; 1289 (Dst, Fr, Acc) -> 1290 [#b_set{op={float,get},dst=Dst,args=[Fr]}|Acc] 1291 end, [], Rs). 1292 1293%%% 1294%%% Live optimization. 1295%%% 1296%%% Optimize instructions whose values are not used. They could be 1297%%% removed if they have no side effects, or in a few cases replaced 1298%%% with a cheaper instructions 1299%%% 1300 1301ssa_opt_live({#opt_st{ssa=Linear0}=St, FuncDb}) -> 1302 RevLinear = reverse(Linear0), 1303 Blocks0 = maps:from_list(RevLinear), 1304 Blocks = live_opt(RevLinear, #{}, Blocks0), 1305 Linear = beam_ssa:linearize(Blocks), 1306 {St#opt_st{ssa=Linear}, FuncDb}. 1307 1308live_opt([{L,Blk0}|Bs], LiveMap0, Blocks) -> 1309 Blk1 = beam_ssa_share:block(Blk0, Blocks), 1310 Successors = beam_ssa:successors(Blk1), 1311 Live0 = live_opt_succ(Successors, L, LiveMap0, sets:new([{version, 2}])), 1312 {Blk,Live} = live_opt_blk(Blk1, Live0), 1313 LiveMap = live_opt_phis(Blk#b_blk.is, L, Live, LiveMap0), 1314 live_opt(Bs, LiveMap, Blocks#{L:=Blk}); 1315live_opt([], _, Acc) -> Acc. 1316 1317live_opt_succ([S|Ss], L, LiveMap, Live0) -> 1318 case LiveMap of 1319 #{{S,L}:=Live} -> 1320 %% The successor has a phi node, and the value for 1321 %% this block in the phi node is a variable. 1322 live_opt_succ(Ss, L, LiveMap, sets:union(Live0, Live)); 1323 #{S:=Live} -> 1324 %% No phi node in the successor, or the value for 1325 %% this block in the phi node is a literal. 1326 live_opt_succ(Ss, L, LiveMap, sets:union(Live0, Live)); 1327 #{} -> 1328 %% A peek_message block which has not been processed yet. 1329 live_opt_succ(Ss, L, LiveMap, Live0) 1330 end; 1331live_opt_succ([], _, _, Acc) -> Acc. 1332 1333live_opt_phis(Is, L, Live0, LiveMap0) -> 1334 LiveMap = LiveMap0#{L=>Live0}, 1335 Phis = takewhile(fun(#b_set{op=Op}) -> Op =:= phi end, Is), 1336 case Phis of 1337 [] -> 1338 LiveMap; 1339 [_|_] -> 1340 PhiArgs = append([Args || #b_set{args=Args} <- Phis]), 1341 case [{P,V} || {#b_var{}=V,P} <- PhiArgs] of 1342 [_|_]=PhiVars -> 1343 PhiLive0 = rel2fam(PhiVars), 1344 PhiLive = [{{L,P},list_set_union(Vs, Live0)} || 1345 {P,Vs} <- PhiLive0], 1346 maps:merge(LiveMap, maps:from_list(PhiLive)); 1347 [] -> 1348 %% There were only literals in the phi node(s). 1349 LiveMap 1350 end 1351 end. 1352 1353live_opt_blk(#b_blk{is=Is0,last=Last}=Blk, Live0) -> 1354 Live1 = list_set_union(beam_ssa:used(Last), Live0), 1355 {Is,Live} = live_opt_is(reverse(Is0), Live1, []), 1356 {Blk#b_blk{is=Is},Live}. 1357 1358live_opt_is([#b_set{op=phi,dst=Dst}=I|Is], Live0, Acc) -> 1359 Live = sets:del_element(Dst, Live0), 1360 case sets:is_element(Dst, Live0) of 1361 true -> 1362 live_opt_is(Is, Live, [I|Acc]); 1363 false -> 1364 live_opt_is(Is, Live, Acc) 1365 end; 1366live_opt_is([#b_set{op={succeeded,guard},dst=SuccDst,args=[Dst]}=SuccI, 1367 #b_set{op=Op,dst=Dst}=I0|Is], 1368 Live0, Acc) -> 1369 case {sets:is_element(SuccDst, Live0), 1370 sets:is_element(Dst, Live0)} of 1371 {true, true} -> 1372 Live = sets:del_element(SuccDst, Live0), 1373 live_opt_is([I0|Is], Live, [SuccI|Acc]); 1374 {true, false} -> 1375 %% The result of the instruction before {succeeded,guard} is 1376 %% unused. Attempt to perform a strength reduction. 1377 case Op of 1378 {bif,'not'} -> 1379 I = I0#b_set{op={bif,is_boolean},dst=SuccDst}, 1380 live_opt_is([I|Is], Live0, Acc); 1381 {bif,tuple_size} -> 1382 I = I0#b_set{op={bif,is_tuple},dst=SuccDst}, 1383 live_opt_is([I|Is], Live0, Acc); 1384 bs_start_match -> 1385 [#b_literal{val=new},Bin] = I0#b_set.args, 1386 I = I0#b_set{op={bif,is_bitstring},args=[Bin],dst=SuccDst}, 1387 live_opt_is([I|Is], Live0, Acc); 1388 get_map_element -> 1389 I = I0#b_set{op=has_map_field,dst=SuccDst}, 1390 live_opt_is([I|Is], Live0, Acc); 1391 _ -> 1392 Live1 = sets:del_element(SuccDst, Live0), 1393 Live = sets:add_element(Dst, Live1), 1394 live_opt_is([I0|Is], Live, [SuccI|Acc]) 1395 end; 1396 {false, true} -> 1397 live_opt_is([I0|Is], Live0, Acc); 1398 {false, false} -> 1399 live_opt_is(Is, Live0, Acc) 1400 end; 1401live_opt_is([#b_set{dst=Dst}=I|Is], Live0, Acc) -> 1402 case sets:is_element(Dst, Live0) of 1403 true -> 1404 Live1 = list_set_union(beam_ssa:used(I), Live0), 1405 Live = sets:del_element(Dst, Live1), 1406 live_opt_is(Is, Live, [I|Acc]); 1407 false -> 1408 case beam_ssa:no_side_effect(I) of 1409 true -> 1410 live_opt_is(Is, Live0, Acc); 1411 false -> 1412 Live = list_set_union(beam_ssa:used(I), Live0), 1413 live_opt_is(Is, Live, [I|Acc]) 1414 end 1415 end; 1416live_opt_is([], Live, Acc) -> 1417 {Acc,Live}. 1418 1419%%% 1420%%% try/catch optimization. 1421%%% 1422%%% Attemps to rewrite try/catches as guards when we know the exception won't 1423%%% be inspected in any way, and removes try/catches whose expressions will 1424%%% never throw. 1425%%% 1426 1427ssa_opt_try({#opt_st{ssa=Linear0}=St, FuncDb}) -> 1428 RevLinear = reduce_try(Linear0, []), 1429 1430 EmptySet = sets:new([{version, 2}]), 1431 Linear = trim_try(RevLinear, EmptySet, EmptySet, []), 1432 1433 {St#opt_st{ssa=Linear}, FuncDb}. 1434 1435%% Does a strength reduction of try/catch and catch. 1436%% 1437%% In try/catch constructs where the expression is restricted 1438%% (essentially a guard expression) and the error reason is ignored 1439%% in the catch part, such as: 1440%% 1441%% try 1442%% <RestrictedExpression> 1443%% catch 1444%% _:_ -> 1445%% ... 1446%% end 1447%% 1448%% the try/catch can be eliminated by simply removing the `new_try_tag`, 1449%% `landingpad`, and `kill_try_tag` instructions. 1450reduce_try([{L,#b_blk{is=[#b_set{op=new_try_tag}], 1451 last=Last}=Blk0} | Bs0], Acc) -> 1452 #b_br{succ=Succ,fail=Fail} = Last, 1453 Ws = sets:from_list([Succ,Fail], [{version, 2}]), 1454 try do_reduce_try(Bs0, Ws) of 1455 Bs -> 1456 Blk = Blk0#b_blk{is=[], 1457 last=#b_br{bool=#b_literal{val=true}, 1458 succ=Succ,fail=Succ}}, 1459 reduce_try(Bs, [{L, Blk} | Acc]) 1460 catch 1461 throw:not_possible -> 1462 reduce_try(Bs0, [{L, Blk0} | Acc]) 1463 end; 1464reduce_try([{L, Blk} | Bs], Acc) -> 1465 reduce_try(Bs, [{L, Blk} | Acc]); 1466reduce_try([], Acc) -> 1467 Acc. 1468 1469do_reduce_try([{L, Blk} | Bs]=Bs0, Ws0) -> 1470 case sets:is_element(L, Ws0) of 1471 false -> 1472 %% This block is not reachable from the block with the 1473 %% `new_try_tag` instruction. Retain it. There is no 1474 %% need to check it for safety. 1475 case sets:is_empty(Ws0) of 1476 true -> Bs0; 1477 false -> [{L, Blk} | do_reduce_try(Bs, Ws0)] 1478 end; 1479 true -> 1480 Ws1 = sets:del_element(L, Ws0), 1481 #b_blk{is=Is0} = Blk, 1482 case reduce_try_is(Is0, []) of 1483 {safe,Is} -> 1484 %% This block does not execute any instructions 1485 %% that would require a try. Analyze successors. 1486 Successors = beam_ssa:successors(Blk), 1487 Ws = list_set_union(Successors, Ws1), 1488 [{L, Blk#b_blk{is=Is}} | do_reduce_try(Bs, Ws)]; 1489 unsafe -> 1490 %% There is something unsafe in the block, for 1491 %% example a `call` instruction or an `extract` 1492 %% instruction. 1493 throw(not_possible); 1494 {done,Is} -> 1495 %% This block kills the try tag (either after successful 1496 %% execution or at the landing pad). Don't analyze 1497 %% successors. 1498 [{L, Blk#b_blk{is=Is}} | do_reduce_try(Bs, Ws1)] 1499 end 1500 end; 1501do_reduce_try([], Ws) -> 1502 true = sets:is_empty(Ws), %Assertion. 1503 []. 1504 1505reduce_try_is([#b_set{op=kill_try_tag}|Is], Acc) -> 1506 %% Remove this kill_try_tag instruction. If there was a landingpad 1507 %% instruction in this block, it has already been removed. Preserve 1508 %% all other instructions in the block. 1509 {done,reverse(Acc, Is)}; 1510reduce_try_is([#b_set{op=extract}|_], _Acc) -> 1511 %% The error reason is accessed. 1512 unsafe; 1513reduce_try_is([#b_set{op=landingpad}|Is], Acc) -> 1514 reduce_try_is(Is, Acc); 1515reduce_try_is([#b_set{op={succeeded,body}}=I0|Is], Acc) -> 1516 %% If we reached this point, it means that the previous instruction 1517 %% has no side effects. We must now convert the flavor of the 1518 %% succeeded to the `guard`, since the try/catch will be removed. 1519 I = I0#b_set{op={succeeded,guard}}, 1520 reduce_try_is(Is, [I|Acc]); 1521reduce_try_is([#b_set{op=Op}=I|Is], Acc) -> 1522 IsSafe = case Op of 1523 phi -> true; 1524 _ -> beam_ssa:no_side_effect(I) 1525 end, 1526 case IsSafe of 1527 true -> reduce_try_is(Is, [I|Acc]); 1528 false -> unsafe 1529 end; 1530reduce_try_is([], Acc) -> 1531 {safe,reverse(Acc)}. 1532 1533%% Removes try/catch expressions whose expressions will never throw. 1534%% 1535%% We walk backwards through all blocks, maintaining a set of potentially 1536%% unreachable landing pads, removing them from the set whenever we see a 1537%% branch to that block. When we encounter a `new_try_tag` instruction that 1538%% references a block in the unreachable set, we'll remove the try/catch. 1539trim_try([{L, #b_blk{is=[#b_set{op=landingpad} | _]}=Blk}| Bs], 1540 Unreachable0, Killed, Acc) -> 1541 Unreachable1 = sets:add_element(L, Unreachable0), 1542 Successors = sets:from_list(beam_ssa:successors(Blk)), 1543 Unreachable = sets:subtract(Unreachable1, Successors), 1544 trim_try(Bs, Unreachable, Killed, [{L, Blk} | Acc]); 1545trim_try([{L, #b_blk{last=#b_ret{}}=Blk} | Bs], Unreachable, Killed, Acc) -> 1546 %% Nothing to update and nothing to optimize. 1547 trim_try(Bs, Unreachable, Killed, [{L,Blk}|Acc]); 1548trim_try([{L, Blk0} | Bs], Unreachable0, Killed0, Acc) -> 1549 case sets:is_empty(Unreachable0) of 1550 true -> 1551 %% Nothing to update and nothing to optimize. 1552 trim_try(Bs, Unreachable0, Killed0, [{L,Blk0}|Acc]); 1553 false -> 1554 #b_blk{is=Is0,last=Last0} = Blk0, 1555 case reverse(Is0) of 1556 [#b_set{op=new_try_tag,dst=Tag}|Is] -> 1557 #b_br{succ=SuccLbl,fail=PadLbl} = Last0, 1558 Unreachable = sets:del_element(PadLbl, Unreachable0), 1559 case sets:is_element(PadLbl, Unreachable0) of 1560 true -> 1561 %% The landing pad can't be reached in any way, 1562 %% remove the entire try/catch. 1563 Blk = Blk0#b_blk{is=reverse(Is), 1564 last=#b_br{bool=#b_literal{val=true}, 1565 succ=SuccLbl,fail=SuccLbl}}, 1566 Killed = sets:add_element(Tag, Killed0), 1567 trim_try(Bs, Unreachable, Killed, [{L,Blk}|Acc]); 1568 false -> 1569 trim_try(Bs, Unreachable, Killed0, [{L,Blk0}|Acc]) 1570 end; 1571 _ -> 1572 %% Update the set of unreachable landing_pad blocks. 1573 Successors = sets:from_list(beam_ssa:successors(Blk0)), 1574 Unreachable = sets:subtract(Unreachable0, Successors), 1575 trim_try(Bs, Unreachable, Killed0, [{L,Blk0}|Acc]) 1576 end 1577 end; 1578trim_try([], _Unreachable, Killed, Acc0) -> 1579 case sets:is_empty(Killed) of 1580 true -> 1581 Acc0; 1582 false -> 1583 %% Remove all `kill_try_tag` instructions referencing removed 1584 %% try/catches. 1585 [{L, Blk#b_blk{is=trim_try_is(Is0, Killed)}} || 1586 {L, #b_blk{is=Is0}=Blk} <- Acc0] 1587 end. 1588 1589trim_try_is([#b_set{op=phi,dst=CatchEndVal}=Phi, 1590 #b_set{op=catch_end,dst=Dst,args=[Tag,CatchEndVal]}=Catch | Is], 1591 Killed) -> 1592 case sets:is_element(Tag, Killed) of 1593 true -> [Phi#b_set{dst=Dst} | trim_try_is(Is, Killed)]; 1594 false -> [Phi, Catch | trim_try_is(Is, Killed)] 1595 end; 1596trim_try_is([#b_set{op=kill_try_tag,args=[Tag]}=I | Is], Killed) -> 1597 case sets:is_element(Tag, Killed) of 1598 true -> trim_try_is(Is, Killed); 1599 false -> [I | trim_try_is(Is, Killed)] 1600 end; 1601trim_try_is([I | Is], Killed) -> 1602 [I | trim_try_is(Is, Killed)]; 1603trim_try_is([], _Killed) -> 1604 []. 1605 1606%%% 1607%%% Optimize binary matching. 1608%%% 1609%%% * If the value of segment is never extracted, rewrite 1610%%% to a bs_skip instruction. 1611%%% 1612%%% * Coalesce adjacent bs_skip instructions and skip instructions 1613%%% with bs_test_tail. 1614%%% 1615 1616ssa_opt_bsm({#opt_st{ssa=Linear}=St, FuncDb}) -> 1617 Extracted0 = bsm_extracted(Linear), 1618 Extracted = sets:from_list(Extracted0, [{version, 2}]), 1619 {St#opt_st{ssa=bsm_skip(Linear, Extracted)}, FuncDb}. 1620 1621bsm_skip([{L,#b_blk{is=Is0}=Blk}|Bs0], Extracted) -> 1622 Bs = bsm_skip(Bs0, Extracted), 1623 Is = bsm_skip_is(Is0, Extracted), 1624 coalesce_skips({L,Blk#b_blk{is=Is}}, Bs); 1625bsm_skip([], _) -> []. 1626 1627bsm_skip_is([I0|Is], Extracted) -> 1628 case I0 of 1629 #b_set{op=bs_match, 1630 dst=Ctx, 1631 args=[#b_literal{val=T}=Type,PrevCtx|Args0]} 1632 when T =/= float, T =/= string, T =/= skip -> 1633 %% Note that it is never safe to skip matching 1634 %% of floats, even if the size is known to be correct. 1635 I = case sets:is_element(Ctx, Extracted) of 1636 true -> 1637 I0; 1638 false -> 1639 %% The value is never extracted. 1640 Args = [#b_literal{val=skip},PrevCtx,Type|Args0], 1641 I0#b_set{args=Args} 1642 end, 1643 [I|Is]; 1644 #b_set{} -> 1645 [I0|bsm_skip_is(Is, Extracted)] 1646 end; 1647bsm_skip_is([], _) -> []. 1648 1649bsm_extracted([{_,#b_blk{is=Is}}|Bs]) -> 1650 case Is of 1651 [#b_set{op=bs_extract,args=[Ctx]}|_] -> 1652 [Ctx|bsm_extracted(Bs)]; 1653 _ -> 1654 bsm_extracted(Bs) 1655 end; 1656bsm_extracted([]) -> []. 1657 1658coalesce_skips({L,#b_blk{is=[#b_set{op=bs_extract}=Extract|Is0], 1659 last=Last0}=Blk0}, Bs0) -> 1660 case coalesce_skips_is(Is0, Last0, Bs0) of 1661 not_possible -> 1662 [{L,Blk0}|Bs0]; 1663 {Is,Last,Bs} -> 1664 Blk = Blk0#b_blk{is=[Extract|Is],last=Last}, 1665 [{L,Blk}|Bs] 1666 end; 1667coalesce_skips({L,#b_blk{is=Is0,last=Last0}=Blk0}, Bs0) -> 1668 case coalesce_skips_is(Is0, Last0, Bs0) of 1669 not_possible -> 1670 [{L,Blk0}|Bs0]; 1671 {Is,Last,Bs} -> 1672 Blk = Blk0#b_blk{is=Is,last=Last}, 1673 [{L,Blk}|Bs] 1674 end. 1675 1676coalesce_skips_is([#b_set{op=bs_match, 1677 args=[#b_literal{val=skip}, 1678 Ctx0,Type,Flags, 1679 #b_literal{val=Size0}, 1680 #b_literal{val=Unit0}]}=Skip0, 1681 #b_set{op={succeeded,guard}}], 1682 #b_br{succ=L2,fail=Fail}=Br0, 1683 Bs0) when is_integer(Size0) -> 1684 case Bs0 of 1685 [{L2,#b_blk{is=[#b_set{op=bs_match, 1686 dst=SkipDst, 1687 args=[#b_literal{val=skip},_,_,_, 1688 #b_literal{val=Size1}, 1689 #b_literal{val=Unit1}]}, 1690 #b_set{op={succeeded,guard}}=Succeeded], 1691 last=#b_br{fail=Fail}=Br}}|Bs] when is_integer(Size1) -> 1692 SkipBits = Size0 * Unit0 + Size1 * Unit1, 1693 Skip = Skip0#b_set{dst=SkipDst, 1694 args=[#b_literal{val=skip},Ctx0, 1695 Type,Flags, 1696 #b_literal{val=SkipBits}, 1697 #b_literal{val=1}]}, 1698 Is = [Skip,Succeeded], 1699 {Is,Br,Bs}; 1700 [{L2,#b_blk{is=[#b_set{op=bs_test_tail, 1701 args=[_Ctx,#b_literal{val=TailSkip}]}], 1702 last=#b_br{succ=NextSucc,fail=Fail}}}|Bs] -> 1703 SkipBits = Size0 * Unit0, 1704 TestTail = Skip0#b_set{op=bs_test_tail, 1705 args=[Ctx0,#b_literal{val=SkipBits+TailSkip}]}, 1706 Br = Br0#b_br{bool=TestTail#b_set.dst,succ=NextSucc}, 1707 Is = [TestTail], 1708 {Is,Br,Bs}; 1709 _ -> 1710 not_possible 1711 end; 1712coalesce_skips_is(_, _, _) -> 1713 not_possible. 1714 1715%%% 1716%%% Short-cutting binary matching instructions. 1717%%% 1718 1719ssa_opt_bsm_shortcut({#opt_st{ssa=Linear}=St, FuncDb}) -> 1720 Positions = bsm_positions(Linear, #{}), 1721 case map_size(Positions) of 1722 0 -> 1723 %% No binary matching instructions. 1724 {St, FuncDb}; 1725 _ -> 1726 {St#opt_st{ssa=bsm_shortcut(Linear, Positions)}, FuncDb} 1727 end. 1728 1729bsm_positions([{L,#b_blk{is=Is,last=Last}}|Bs], PosMap0) -> 1730 PosMap = bsm_positions_is(Is, PosMap0), 1731 case {Is,Last} of 1732 {[#b_set{op=bs_test_tail,dst=Bool,args=[Ctx,#b_literal{val=Bits0}]}], 1733 #b_br{bool=Bool,fail=Fail}} -> 1734 Bits = Bits0 + map_get(Ctx, PosMap0), 1735 bsm_positions(Bs, PosMap#{L=>{Bits,Fail}}); 1736 {_,_} -> 1737 bsm_positions(Bs, PosMap) 1738 end; 1739bsm_positions([], PosMap) -> PosMap. 1740 1741bsm_positions_is([#b_set{op=bs_start_match,dst=New}|Is], PosMap0) -> 1742 PosMap = PosMap0#{New=>0}, 1743 bsm_positions_is(Is, PosMap); 1744bsm_positions_is([#b_set{op=bs_match,dst=New,args=Args}|Is], PosMap0) -> 1745 [_,Old|_] = Args, 1746 #{Old:=Bits0} = PosMap0, 1747 Bits = bsm_update_bits(Args, Bits0), 1748 PosMap = PosMap0#{New=>Bits}, 1749 bsm_positions_is(Is, PosMap); 1750bsm_positions_is([_|Is], PosMap) -> 1751 bsm_positions_is(Is, PosMap); 1752bsm_positions_is([], PosMap) -> PosMap. 1753 1754bsm_update_bits([#b_literal{val=string},_,#b_literal{val=String}], Bits) -> 1755 Bits + bit_size(String); 1756bsm_update_bits([#b_literal{val=utf8}|_], Bits) -> 1757 Bits + 8; 1758bsm_update_bits([#b_literal{val=utf16}|_], Bits) -> 1759 Bits + 16; 1760bsm_update_bits([#b_literal{val=utf32}|_], Bits) -> 1761 Bits + 32; 1762bsm_update_bits([_,_,_,#b_literal{val=Sz},#b_literal{val=U}], Bits) 1763 when is_integer(Sz) -> 1764 Bits + Sz*U; 1765bsm_update_bits(_, Bits) -> Bits. 1766 1767bsm_shortcut([{L,#b_blk{is=Is,last=Last0}=Blk}|Bs], PosMap) -> 1768 case {Is,Last0} of 1769 {[#b_set{op=bs_match,dst=New,args=[_,Old|_]}, 1770 #b_set{op={succeeded,guard},dst=Bool,args=[New]}], 1771 #b_br{bool=Bool,fail=Fail}} -> 1772 case PosMap of 1773 #{Old:=Bits,Fail:={TailBits,NextFail}} when Bits > TailBits -> 1774 Last = Last0#b_br{fail=NextFail}, 1775 [{L,Blk#b_blk{last=Last}}|bsm_shortcut(Bs, PosMap)]; 1776 #{} -> 1777 [{L,Blk}|bsm_shortcut(Bs, PosMap)] 1778 end; 1779 {_,_} -> 1780 [{L,Blk}|bsm_shortcut(Bs, PosMap)] 1781 end; 1782bsm_shortcut([], _PosMap) -> []. 1783 1784%%% 1785%%% Optimize binary construction. 1786%%% 1787%%% If an integer segment or a float segment has a literal size and 1788%%% a literal value, convert to a binary segment. Coalesce adjacent 1789%%% literal binary segments. Literal binary segments will be converted 1790%%% to bs_put_string instructions in later pass. 1791%%% 1792 1793ssa_opt_bs_puts({#opt_st{ssa=Linear0,cnt=Count0}=St, FuncDb}) -> 1794 {Linear,Count} = opt_bs_puts(Linear0, Count0, []), 1795 {St#opt_st{ssa=Linear,cnt=Count}, FuncDb}. 1796 1797opt_bs_puts([{L,#b_blk{is=Is}=Blk0}|Bs], Count0, Acc0) -> 1798 case Is of 1799 [#b_set{op=bs_put},#b_set{op={succeeded,_}}]=Is -> 1800 case opt_bs_put(L, Is, Blk0, Count0, Acc0) of 1801 not_possible -> 1802 opt_bs_puts(Bs, Count0, [{L,Blk0}|Acc0]); 1803 {Count,Acc1} -> 1804 Acc = opt_bs_puts_merge(Acc1), 1805 opt_bs_puts(Bs, Count, Acc) 1806 end; 1807 _ -> 1808 opt_bs_puts(Bs, Count0, [{L,Blk0}|Acc0]) 1809 end; 1810opt_bs_puts([], Count, Acc) -> 1811 {reverse(Acc),Count}. 1812 1813opt_bs_puts_merge([{L1,#b_blk{is=Is}=Blk0},{L2,#b_blk{is=AccIs}}=BAcc|Acc]) -> 1814 case {AccIs,Is} of 1815 {[#b_set{op=bs_put, 1816 args=[#b_literal{val=binary}, 1817 #b_literal{}, 1818 #b_literal{val=Bin0}, 1819 #b_literal{val=all}, 1820 #b_literal{val=1}]}, 1821 #b_set{op={succeeded,_}}], 1822 [#b_set{op=bs_put, 1823 args=[#b_literal{val=binary}, 1824 #b_literal{}, 1825 #b_literal{val=Bin1}, 1826 #b_literal{val=all}, 1827 #b_literal{val=1}]}=I0, 1828 #b_set{op={succeeded,_}}=Succeeded]} -> 1829 %% Coalesce the two segments to one. 1830 Bin = <<Bin0/bitstring,Bin1/bitstring>>, 1831 I = I0#b_set{args=bs_put_args(binary, Bin, all)}, 1832 Blk = Blk0#b_blk{is=[I,Succeeded]}, 1833 [{L2,Blk}|Acc]; 1834 {_,_} -> 1835 [{L1,Blk0},BAcc|Acc] 1836 end. 1837 1838opt_bs_put(L, [I0,Succeeded], #b_blk{last=Br0}=Blk0, Count0, Acc) -> 1839 case opt_bs_put(I0) of 1840 [Bin] when is_bitstring(Bin) -> 1841 Args = bs_put_args(binary, Bin, all), 1842 I = I0#b_set{args=Args}, 1843 Blk = Blk0#b_blk{is=[I,Succeeded]}, 1844 {Count0,[{L,Blk}|Acc]}; 1845 [{int,Int,Size},Bin] when is_bitstring(Bin) -> 1846 %% Construct a bs_put_integer instruction following 1847 %% by a bs_put_binary instruction. 1848 IntArgs = bs_put_args(integer, Int, Size), 1849 BinArgs = bs_put_args(binary, Bin, all), 1850 1851 {BinL,BinVarNum,BinBoolNum} = {Count0,Count0+1,Count0+2}, 1852 Count = Count0 + 3, 1853 BinVar = #b_var{name={'@ssa_bs_put',BinVarNum}}, 1854 BinBool = #b_var{name={'@ssa_bool',BinBoolNum}}, 1855 1856 BinI = I0#b_set{dst=BinVar,args=BinArgs}, 1857 BinSucceeded = Succeeded#b_set{dst=BinBool,args=[BinVar]}, 1858 BinBlk = Blk0#b_blk{is=[BinI,BinSucceeded], 1859 last=Br0#b_br{bool=BinBool}}, 1860 1861 IntI = I0#b_set{args=IntArgs}, 1862 IntBlk = Blk0#b_blk{is=[IntI,Succeeded],last=Br0#b_br{succ=BinL}}, 1863 1864 {Count,[{BinL,BinBlk},{L,IntBlk}|Acc]}; 1865 not_possible -> 1866 not_possible 1867 end. 1868 1869opt_bs_put(#b_set{args=[#b_literal{val=binary},_,#b_literal{val=Val}, 1870 #b_literal{val=all},#b_literal{val=Unit}]}) 1871 when is_bitstring(Val) -> 1872 if 1873 bit_size(Val) rem Unit =:= 0 -> 1874 [Val]; 1875 true -> 1876 not_possible 1877 end; 1878opt_bs_put(#b_set{args=[#b_literal{val=Type},#b_literal{val=Flags}, 1879 #b_literal{val=Val},#b_literal{val=Size}, 1880 #b_literal{val=Unit}]}=I0) when is_integer(Size) -> 1881 EffectiveSize = Size * Unit, 1882 if 1883 EffectiveSize > 0 -> 1884 case {Type,opt_bs_put_endian(Flags)} of 1885 {integer,big} when is_integer(Val) -> 1886 if 1887 EffectiveSize < 64 -> 1888 [<<Val:EffectiveSize>>]; 1889 true -> 1890 opt_bs_put_split_int(Val, EffectiveSize) 1891 end; 1892 {integer,little} when is_integer(Val), EffectiveSize < 128 -> 1893 %% To avoid an explosion in code size, we only try 1894 %% to optimize relatively small fields. 1895 <<Int:EffectiveSize>> = <<Val:EffectiveSize/little>>, 1896 Args = bs_put_args(Type, Int, EffectiveSize), 1897 I = I0#b_set{args=Args}, 1898 opt_bs_put(I); 1899 {binary,_} when is_bitstring(Val) -> 1900 case Val of 1901 <<Bitstring:EffectiveSize/bits,_/bits>> -> 1902 [Bitstring]; 1903 _ -> 1904 %% Specified size exceeds size of bitstring. 1905 not_possible 1906 end; 1907 {float,Endian} -> 1908 try 1909 [opt_bs_put_float(Val, EffectiveSize, Endian)] 1910 catch error:_ -> 1911 not_possible 1912 end; 1913 {_,_} -> 1914 not_possible 1915 end; 1916 true -> 1917 not_possible 1918 end; 1919opt_bs_put(#b_set{}) -> not_possible. 1920 1921opt_bs_put_float(N, Sz, Endian) -> 1922 case Endian of 1923 big -> <<N:Sz/big-float-unit:1>>; 1924 little -> <<N:Sz/little-float-unit:1>> 1925 end. 1926 1927bs_put_args(Type, Val, Size) -> 1928 [#b_literal{val=Type}, 1929 #b_literal{val=[unsigned,big]}, 1930 #b_literal{val=Val}, 1931 #b_literal{val=Size}, 1932 #b_literal{val=1}]. 1933 1934opt_bs_put_endian([big=E|_]) -> E; 1935opt_bs_put_endian([little=E|_]) -> E; 1936opt_bs_put_endian([native=E|_]) -> E; 1937opt_bs_put_endian([_|Fs]) -> opt_bs_put_endian(Fs). 1938 1939opt_bs_put_split_int(Int, Size) -> 1940 Pos = opt_bs_put_split_int_1(Int, 0, Size - 1), 1941 UpperSize = Size - Pos, 1942 if 1943 Pos =:= 0 -> 1944 %% Value is 0 or -1 -- keep the original instruction. 1945 not_possible; 1946 UpperSize < 64 -> 1947 %% No or few leading zeroes or ones. 1948 [<<Int:Size>>]; 1949 true -> 1950 %% There are 64 or more leading ones or zeroes in 1951 %% the resulting binary. Split into two separate 1952 %% segments to avoid an explosion in code size. 1953 [{int,Int bsr Pos,UpperSize},<<Int:Pos>>] 1954 end. 1955 1956opt_bs_put_split_int_1(_Int, L, R) when L > R -> 1957 8 * ((L + 7) div 8); 1958opt_bs_put_split_int_1(Int, L, R) -> 1959 Mid = (L + R) div 2, 1960 case Int bsr Mid of 1961 Upper when Upper =:= 0; Upper =:= -1 -> 1962 opt_bs_put_split_int_1(Int, L, Mid - 1); 1963 _ -> 1964 opt_bs_put_split_int_1(Int, Mid + 1, R) 1965 end. 1966 1967%%% 1968%%% Optimize expressions such as "tuple_size(Var) =:= 2". 1969%%% 1970%%% Consider this code: 1971%%% 1972%%% 0: 1973%%% . 1974%%% . 1975%%% . 1976%%% Size = bif:tuple_size Var 1977%%% BoolVar1 = succeeded:guard Size 1978%%% br BoolVar1, label 4, label 3 1979%%% 1980%%% 4: 1981%%% BoolVar2 = bif:'=:=' Size, literal 2 1982%%% br BoolVar2, label 6, label 3 1983%%% 1984%%% 6: ... %% OK 1985%%% 1986%%% 3: ... %% Not a tuple of size 2 1987%%% 1988%%% The BEAM code will look this: 1989%%% 1990%%% {bif,tuple_size,{f,3},[{x,0}],{x,0}}. 1991%%% {test,is_eq_exact,{f,3},[{x,0},{integer,2}]}. 1992%%% 1993%%% Better BEAM code will be produced if we transform the 1994%%% code like this: 1995%%% 1996%%% 0: 1997%%% . 1998%%% . 1999%%% . 2000%%% br label 10 2001%%% 2002%%% 10: 2003%%% NewBoolVar = bif:is_tuple Var 2004%%% br NewBoolVar, label 11, label 3 2005%%% 2006%%% 11: 2007%%% Size = bif:tuple_size Var 2008%%% br label 4 2009%%% 2010%%% 4: 2011%%% BoolVar2 = bif:'=:=' Size, literal 2 2012%%% br BoolVar2, label 6, label 3 2013%%% 2014%%% (The key part of the transformation is the removal of 2015%%% the 'succeeded' instruction to signal to the code generator 2016%%% that the call to tuple_size/1 can't fail.) 2017%%% 2018%%% The BEAM code will look like: 2019%%% 2020%%% {test,is_tuple,{f,3},[{x,0}]}. 2021%%% {test_arity,{f,3},[{x,0},2]}. 2022%%% 2023%%% Those two instructions will be combined into a single 2024%%% is_tuple_of_arity instruction by the loader. 2025%%% 2026 2027ssa_opt_tuple_size({#opt_st{ssa=Linear0,cnt=Count0}=St, FuncDb}) -> 2028 %% This optimization is only safe in guards, as prefixing tuple_size with 2029 %% an is_tuple check prevents it from throwing an exception. 2030 NonGuards = non_guards(Linear0), 2031 {Linear,Count} = opt_tup_size(Linear0, NonGuards, Count0, []), 2032 {St#opt_st{ssa=Linear,cnt=Count}, FuncDb}. 2033 2034opt_tup_size([{L,#b_blk{is=Is,last=Last}=Blk}|Bs], NonGuards, Count0, Acc0) -> 2035 case {Is,Last} of 2036 {[#b_set{op={bif,'=:='},dst=Bool,args=[#b_var{}=Tup,#b_literal{val=Arity}]}], 2037 #b_br{bool=Bool}} when is_integer(Arity), Arity >= 0 -> 2038 {Acc,Count} = opt_tup_size_1(Tup, L, NonGuards, Count0, Acc0), 2039 opt_tup_size(Bs, NonGuards, Count, [{L,Blk}|Acc]); 2040 {_,_} -> 2041 opt_tup_size(Bs, NonGuards, Count0, [{L,Blk}|Acc0]) 2042 end; 2043opt_tup_size([], _NonGuards, Count, Acc) -> 2044 {reverse(Acc),Count}. 2045 2046opt_tup_size_1(Size, EqL, NonGuards, Count0, [{L,Blk0}|Acc]) -> 2047 #b_blk{is=Is0,last=Last} = Blk0, 2048 case Last of 2049 #b_br{bool=Bool,succ=EqL,fail=Fail} -> 2050 case gb_sets:is_member(Fail, NonGuards) of 2051 true -> 2052 {[{L,Blk0}|Acc],Count0}; 2053 false -> 2054 case opt_tup_size_is(Is0, Bool, Size, []) of 2055 none -> 2056 {[{L,Blk0}|Acc],Count0}; 2057 {PreIs,TupleSizeIs,Tuple} -> 2058 opt_tup_size_2(PreIs, TupleSizeIs, L, EqL, 2059 Tuple, Fail, Count0, Acc) 2060 end 2061 end; 2062 _ -> 2063 {[{L,Blk0}|Acc],Count0} 2064 end; 2065opt_tup_size_1(_, _, _, Count, Acc) -> 2066 {Acc,Count}. 2067 2068opt_tup_size_2(PreIs, TupleSizeIs, PreL, EqL, Tuple, Fail, Count0, Acc) -> 2069 IsTupleL = Count0, 2070 TupleSizeL = Count0 + 1, 2071 Bool = #b_var{name={'@ssa_bool',Count0+2}}, 2072 Count = Count0 + 3, 2073 2074 True = #b_literal{val=true}, 2075 PreBr = #b_br{bool=True,succ=IsTupleL,fail=IsTupleL}, 2076 PreBlk = #b_blk{is=PreIs,last=PreBr}, 2077 2078 IsTupleIs = [#b_set{op={bif,is_tuple},dst=Bool,args=[Tuple]}], 2079 IsTupleBr = #b_br{bool=Bool,succ=TupleSizeL,fail=Fail}, 2080 IsTupleBlk = #b_blk{is=IsTupleIs,last=IsTupleBr}, 2081 2082 TupleSizeBr = #b_br{bool=True,succ=EqL,fail=EqL}, 2083 TupleSizeBlk = #b_blk{is=TupleSizeIs,last=TupleSizeBr}, 2084 {[{TupleSizeL,TupleSizeBlk}, 2085 {IsTupleL,IsTupleBlk}, 2086 {PreL,PreBlk}|Acc],Count}. 2087 2088opt_tup_size_is([#b_set{op={bif,tuple_size},dst=Size,args=[Tuple]}=I, 2089 #b_set{op={succeeded,_},dst=Bool,args=[Size]}], 2090 Bool, Size, Acc) -> 2091 {reverse(Acc),[I],Tuple}; 2092opt_tup_size_is([I|Is], Bool, Size, Acc) -> 2093 opt_tup_size_is(Is, Bool, Size, [I|Acc]); 2094opt_tup_size_is([], _, _, _Acc) -> none. 2095 2096%%% 2097%%% Optimize #b_switch{} instructions. 2098%%% 2099%%% A #b_switch{} with only one value can be rewritten to 2100%%% a #b_br{}. A switch that only verifies that the argument 2101%%% is 'true' or 'false' can be rewritten to an is_boolean test. 2102%%%b 2103 2104ssa_opt_sw({#opt_st{ssa=Linear0,cnt=Count0}=St, FuncDb}) -> 2105 {Linear,Count} = opt_sw(Linear0, Count0, []), 2106 {St#opt_st{ssa=Linear,cnt=Count}, FuncDb}. 2107 2108opt_sw([{L,#b_blk{is=Is,last=#b_switch{}=Sw0}=Blk0}|Bs], Count0, Acc) -> 2109 case Sw0 of 2110 #b_switch{arg=Arg,fail=Fail,list=[{Lit,Lbl}]} -> 2111 %% Rewrite a single value switch to a br. 2112 {Bool,Count} = new_var('@ssa_bool', Count0), 2113 IsEq = #b_set{op={bif,'=:='},dst=Bool,args=[Arg,Lit]}, 2114 Br = #b_br{bool=Bool,succ=Lbl,fail=Fail}, 2115 Blk = Blk0#b_blk{is=Is++[IsEq],last=Br}, 2116 opt_sw(Bs, Count, [{L,Blk}|Acc]); 2117 #b_switch{arg=Arg,fail=Fail, 2118 list=[{#b_literal{val=B1},Lbl},{#b_literal{val=B2},Lbl}]} 2119 when B1 =:= not B2 -> 2120 %% Replace with is_boolean test. 2121 {Bool,Count} = new_var('@ssa_bool', Count0), 2122 IsBool = #b_set{op={bif,is_boolean},dst=Bool,args=[Arg]}, 2123 Br = #b_br{bool=Bool,succ=Lbl,fail=Fail}, 2124 Blk = Blk0#b_blk{is=Is++[IsBool],last=Br}, 2125 opt_sw(Bs, Count, [{L,Blk}|Acc]); 2126 _ -> 2127 opt_sw(Bs, Count0, [{L,Blk0}|Acc]) 2128 end; 2129opt_sw([{L,#b_blk{}=Blk}|Bs], Count, Acc) -> 2130 opt_sw(Bs, Count, [{L,Blk}|Acc]); 2131opt_sw([], Count, Acc) -> 2132 {reverse(Acc),Count}. 2133 2134%%% Try to replace `=/=` with `=:=` and `/=` with `==`. For example, 2135%%% this code: 2136%%% 2137%%% Bool = bif:'=/=' Anything, AnyValue 2138%%% br Bool, ^Succ, ^Fail 2139%%% 2140%%% can be rewritten like this: 2141%%% 2142%%% Bool = bif:'=:=' Anything, AnyValue 2143%%% br Bool, ^Fail, ^Succ 2144%%% 2145%%% The transformation is only safe if the only used of Bool is in the 2146%%% terminator. We therefore need to verify that there is only one 2147%%% use. 2148%%% 2149%%% This transformation is not an optimization in itself, but it opens 2150%%% up for other optimizations in beam_ssa_type and beam_ssa_dead. 2151%%% 2152 2153ssa_opt_ne({#opt_st{ssa=Linear0}=St, FuncDb}) -> 2154 Linear = opt_ne(Linear0, {uses,Linear0}), 2155 {St#opt_st{ssa=Linear}, FuncDb}. 2156 2157opt_ne([{L,#b_blk{is=[_|_]=Is0,last=#b_br{bool=#b_var{}=Bool}}=Blk0}|Bs], Uses0) -> 2158 case last(Is0) of 2159 #b_set{op={bif,'=/='},dst=Bool}=I0 -> 2160 I = I0#b_set{op={bif,'=:='}}, 2161 {Blk,Uses} = opt_ne_replace(I, Blk0, Uses0), 2162 [{L,Blk}|opt_ne(Bs, Uses)]; 2163 #b_set{op={bif,'/='},dst=Bool}=I0 -> 2164 I = I0#b_set{op={bif,'=='}}, 2165 {Blk,Uses} = opt_ne_replace(I, Blk0, Uses0), 2166 [{L,Blk}|opt_ne(Bs, Uses)]; 2167 _ -> 2168 [{L,Blk0}|opt_ne(Bs, Uses0)] 2169 end; 2170opt_ne([{L,Blk}|Bs], Uses) -> 2171 [{L,Blk}|opt_ne(Bs, Uses)]; 2172opt_ne([], _Uses) -> []. 2173 2174opt_ne_replace(#b_set{dst=Bool}=I, 2175 #b_blk{is=Is0,last=#b_br{succ=Succ,fail=Fail}=Br0}=Blk, 2176 Uses0) -> 2177 case opt_ne_single_use(Bool, Uses0) of 2178 {true,Uses} -> 2179 Is = replace_last(Is0, I), 2180 Br = Br0#b_br{succ=Fail,fail=Succ}, 2181 {Blk#b_blk{is=Is,last=Br},Uses}; 2182 {false,Uses} -> 2183 %% The variable is used more than once. Not safe. 2184 {Blk,Uses} 2185 end. 2186 2187replace_last([_], Repl) -> [Repl]; 2188replace_last([I|Is], Repl) -> [I|replace_last(Is, Repl)]. 2189 2190opt_ne_single_use(Var, {uses,Linear}) -> 2191 Blocks = maps:from_list(Linear), 2192 RPO = beam_ssa:rpo(Blocks), 2193 Uses = beam_ssa:uses(RPO, Blocks), 2194 opt_ne_single_use(Var, Uses); 2195opt_ne_single_use(Var, Uses) when is_map(Uses) -> 2196 {case Uses of 2197 #{Var:=[_]} -> true; 2198 #{Var:=[_|_]} -> false 2199 end,Uses}. 2200 2201%%% 2202%%% When a tuple is matched, the pattern matching compiler generates a 2203%%% get_tuple_element instruction for every tuple element that will 2204%%% ever be used in the rest of the function. That often forces the 2205%%% extracted tuple elements to be stored in Y registers until it's 2206%%% time to use them. It could also mean that there could be execution 2207%%% paths that will never use the extracted elements. 2208%%% 2209%%% This optimization will sink get_tuple_element instructions, that 2210%%% is, move them forward in the execution stream to the last possible 2211%%% block there they will still dominate all uses. That may reduce the 2212%%% size of stack frames, reduce register shuffling, and avoid 2213%%% extracting tuple elements on execution paths that never use the 2214%%% extracted values. 2215%%% 2216%%% However, there is one caveat to be aware of. Sinking tuple elements 2217%%% will keep the entire tuple alive longer. In rare circumstance, that 2218%%% can be a problem. Here is an example: 2219%%% 2220%%% mapfoldl(F, Acc0, [Hd|Tail]) -> 2221%%% {R,Acc1} = F(Hd, Acc0), 2222%%% {Rs,Acc2} = mapfoldl(F, Acc1, Tail), 2223%%% {[R|Rs],Acc2}; 2224%%% mapfoldl(F, Acc, []) -> 2225%%% {[],Acc}. 2226%%% 2227%%% Sinking get_tuple_element instructions will generate code similar 2228%%% to this example: 2229%%% 2230%%% slow_mapfoldl(F, Acc0, [Hd|Tail]) -> 2231%%% Res1 = F(Hd, Acc0), 2232%%% Res2 = slow_mapfoldl(F, element(2, Res1), Tail), 2233%%% {[element(1, Res1)|element(1, Res2)],element(2, Res2)}; 2234%%% slow_mapfoldl(F, Acc, []) -> 2235%%% {[],Acc}. 2236%%% 2237%%% Here the entire tuple bound to `Res1` will be kept alive until 2238%%% `slow_mapfoldl/3` returns. That is, every intermediate accumulator 2239%%% will be kept alive. 2240%%% 2241%%% In this case, not sinking is clearly superior: 2242%%% 2243%%% fast_mapfoldl(F, Acc0, [Hd|Tail]) -> 2244%%% Res1 = F(Hd, Acc0), 2245%%% R = element(1, Res1), 2246%%% Res2 = fast_mapfoldl(F, element(2, Res1), Tail), 2247%%% {[R|element(1, Res2)],element(2, Res2)}; 2248%%% fast_mapfoldl(F, Acc, []) -> 2249%%% {[],Acc}. 2250%%% 2251%%% The `slow_mapfoldl/3` and `fast_mapfoldl/3` use the same number of 2252%%% stack slots. 2253%%% 2254%%% To avoid producing code similar to `slow_mapfoldl/3`, use an 2255%%% heuristic to only sink when sinking would reduce the number of 2256%%% stack slots, or if there can't possibly be a recursive call 2257%%% involved. This is a heuristic because it is difficult to exactly 2258%%% predict the number of stack slots that will be needed for a given 2259%%% piece of code. 2260 2261ssa_opt_sink({#opt_st{ssa=Linear}=St, FuncDb}) -> 2262 %% Create a map with all variables that define get_tuple_element 2263 %% instructions. The variable name maps to the block it is defined 2264 %% in and the source tuple. 2265 case def_blocks(Linear) of 2266 [] -> 2267 %% No get_tuple_element instructions, so there is nothing to do. 2268 {St, FuncDb}; 2269 [_|_]=Defs0 -> 2270 Defs = maps:from_list(Defs0), 2271 {do_ssa_opt_sink(Defs, St), FuncDb} 2272 end. 2273 2274do_ssa_opt_sink(Defs, #opt_st{ssa=Linear}=St) -> 2275 %% Find all the blocks that use variables defined by 2276 %% get_tuple_element instructions. 2277 Used = used_blocks(Linear, Defs, []), 2278 2279 %% Calculate dominators. 2280 Blocks0 = maps:from_list(Linear), 2281 RPO = beam_ssa:rpo(Blocks0), 2282 Preds = beam_ssa:predecessors(Blocks0), 2283 {Dom, Numbering} = beam_ssa:dominators_from_predecessors(RPO, Preds), 2284 2285 %% It is not safe to move get_tuple_element instructions to blocks 2286 %% that begin with certain instructions. It is also unsafe to move 2287 %% the instructions into any part of a receive. 2288 Unsuitable = unsuitable(Linear, Blocks0), 2289 2290 %% Calculate new positions for get_tuple_element instructions. The new 2291 %% position is a block that dominates all uses of the variable. 2292 DefLocs0 = new_def_locations(Used, Defs, Dom, Numbering, Unsuitable), 2293 2294 %% Avoid keeping tuples alive if only one element is accessed later and 2295 %% if there is the chance of a recursive call being made. This is an 2296 %% important precaution to avoid that lists:mapfoldl/3 keeps all previous 2297 %% versions of the accumulator alive until the end of the input list. 2298 Ps = partition_deflocs(DefLocs0, Defs, Blocks0), 2299 DefLocs1 = filter_deflocs(Ps, Preds, Blocks0), 2300 DefLocs = sort(DefLocs1), 2301 2302 %% Now move all suitable get_tuple_element instructions to their 2303 %% new blocks. 2304 Blocks = foldl(fun({V,{From,To}}, A) -> 2305 move_defs(V, From, To, A) 2306 end, Blocks0, DefLocs), 2307 2308 St#opt_st{ssa=beam_ssa:linearize(Blocks)}. 2309 2310def_blocks([{L,#b_blk{is=Is}}|Bs]) -> 2311 def_blocks_is(Is, L, def_blocks(Bs)); 2312def_blocks([]) -> []. 2313 2314def_blocks_is([#b_set{op=get_tuple_element,args=[Tuple,_],dst=Dst}|Is], L, Acc) -> 2315 def_blocks_is(Is, L, [{Dst,{L,Tuple}}|Acc]); 2316def_blocks_is([_|Is], L, Acc) -> 2317 def_blocks_is(Is, L, Acc); 2318def_blocks_is([], _, Acc) -> Acc. 2319 2320used_blocks([{L,Blk}|Bs], Def, Acc0) -> 2321 Used = beam_ssa:used(Blk), 2322 Acc = [{V,L} || V <- Used, maps:is_key(V, Def)] ++ Acc0, 2323 used_blocks(Bs, Def, Acc); 2324used_blocks([], _Def, Acc) -> 2325 rel2fam(Acc). 2326 2327%% Partition sinks for get_tuple_element instructions in the same 2328%% clause extracting from the same tuple. Sort each partition in 2329%% execution order. 2330partition_deflocs(DefLoc, _Defs, Blocks) -> 2331 {BlkNums0,_} = mapfoldl(fun(L, N) -> {{L,N},N+1} end, 0, beam_ssa:rpo(Blocks)), 2332 BlkNums = maps:from_list(BlkNums0), 2333 S = [{Tuple,{map_get(To, BlkNums),{V,{From,To}}}} || 2334 {V,Tuple,{From,To}} <- DefLoc], 2335 F = rel2fam(S), 2336 partition_deflocs_1(F, Blocks). 2337 2338partition_deflocs_1([{Tuple,DefLocs0}|T], Blocks) -> 2339 DefLocs1 = [DL || {_,DL} <- DefLocs0], 2340 DefLocs = partition_dl(DefLocs1, Blocks), 2341 [{Tuple,DL} || DL <- DefLocs] ++ partition_deflocs_1(T, Blocks); 2342partition_deflocs_1([], _) -> []. 2343 2344partition_dl([_]=DefLoc, _Blocks) -> 2345 [DefLoc]; 2346partition_dl([{_,{_,First}}|_]=DefLoc0, Blocks) -> 2347 RPO = beam_ssa:rpo([First], Blocks), 2348 {P,DefLoc} = partition_dl_1(DefLoc0, RPO, []), 2349 [P|partition_dl(DefLoc, Blocks)]; 2350partition_dl([], _Blocks) -> []. 2351 2352partition_dl_1([{_,{_,L}}=DL|DLs], [L|_]=Ls, Acc) -> 2353 partition_dl_1(DLs, Ls, [DL|Acc]); 2354partition_dl_1([_|_]=DLs, [_|Ls], Acc) -> 2355 partition_dl_1(DLs, Ls, Acc); 2356partition_dl_1([], _, Acc) -> 2357 {reverse(Acc),[]}; 2358partition_dl_1([_|_]=DLs, [], Acc) -> 2359 {reverse(Acc),DLs}. 2360 2361filter_deflocs([{Tuple,DefLoc0}|DLs], Preds, Blocks) -> 2362 %% Input is a list of sinks of get_tuple_element instructions in 2363 %% execution order from the same tuple in the same clause. 2364 [{_,{_,First}}|_] = DefLoc0, 2365 Paths = find_paths_to_check(DefLoc0, First), 2366 WillGC0 = ordsets:from_list([FromTo || {{_,_}=FromTo,_} <- Paths]), 2367 WillGC1 = [{{From,To},will_gc(From, To, Preds, Blocks, true)} || 2368 {From,To} <- WillGC0], 2369 WillGC = maps:from_list(WillGC1), 2370 2371 %% Separate sinks that will force the reference to the tuple to be 2372 %% saved on the stack from sinks that don't force. 2373 {DefLocGC0,DefLocNoGC} = 2374 partition(fun({{_,_}=FromTo,_}) -> 2375 map_get(FromTo, WillGC) 2376 end, Paths), 2377 2378 %% Avoid potentially harmful sinks. 2379 DefLocGC = filter_gc_deflocs(DefLocGC0, Tuple, First, Preds, Blocks), 2380 2381 %% Construct the complete list of sink operations. 2382 DefLoc1 = DefLocGC ++ DefLocNoGC, 2383 [DL || {_,{_,{From,To}}=DL} <- DefLoc1, From =/= To] ++ 2384 filter_deflocs(DLs, Preds, Blocks); 2385filter_deflocs([], _, _) -> []. 2386 2387%% Use an heuristic to avoid harmful sinking in lists:mapfold/3 and 2388%% similar functions. 2389filter_gc_deflocs(DefLocGC, Tuple, First, Preds, Blocks) -> 2390 case DefLocGC of 2391 [] -> 2392 []; 2393 [{_,{_,{From,To}}}] -> 2394 %% There is only one get_tuple_element instruction that 2395 %% can be sunk. That means that we may not gain any slots 2396 %% by sinking it. 2397 case is_on_stack(First, Tuple, Blocks) of 2398 true -> 2399 %% The tuple itself must be stored in a stack slot 2400 %% (because it will be used later) in addition to 2401 %% the tuple element being extracted. It is 2402 %% probably a win to sink this instruction. 2403 DefLocGC; 2404 false -> 2405 case will_gc(From, To, Preds, Blocks, false) of 2406 false -> 2407 %% There is no risk for recursive calls, 2408 %% so it should be safe to 2409 %% sink. Theoretically, we shouldn't win 2410 %% any stack slots, but in practice it 2411 %% seems that sinking makes it more likely 2412 %% that the stack slot for a dying value 2413 %% can be immediately reused for another 2414 %% value. 2415 DefLocGC; 2416 true -> 2417 %% This function could be involved in a 2418 %% recursive call. Since there is no 2419 %% obvious reduction in the number of 2420 %% stack slots, we will not sink. 2421 [] 2422 end 2423 end; 2424 [_,_|_] -> 2425 %% More than one get_tuple_element instruction can be 2426 %% sunk. Sinking will almost certainly reduce the number 2427 %% of stack slots. 2428 DefLocGC 2429 end. 2430 2431find_paths_to_check([{_,{_,To}}=Move|T], First) -> 2432 [{{First,To},Move}|find_paths_to_check(T, First)]; 2433find_paths_to_check([], _First) -> []. 2434 2435will_gc(From, To, Preds, Blocks, All) -> 2436 Between = beam_ssa:between(From, To, Preds, Blocks), 2437 will_gc_1(Between, To, Blocks, All, #{From => false}). 2438 2439will_gc_1([To|_], To, _Blocks, _All, WillGC) -> 2440 map_get(To, WillGC); 2441will_gc_1([L|Ls], To, Blocks, All, WillGC0) -> 2442 #b_blk{is=Is} = Blk = map_get(L, Blocks), 2443 GC = map_get(L, WillGC0) orelse will_gc_is(Is, All), 2444 WillGC = gc_update_successors(Blk, GC, WillGC0), 2445 will_gc_1(Ls, To, Blocks, All, WillGC). 2446 2447will_gc_is([#b_set{op=call,args=Args}|Is], false) -> 2448 case Args of 2449 [#b_remote{mod=#b_literal{val=erlang}}|_] -> 2450 %% Assume that remote calls to the erlang module can't cause a recursive 2451 %% call. 2452 will_gc_is(Is, false); 2453 [_|_] -> 2454 true 2455 end; 2456will_gc_is([_|Is], false) -> 2457 %% Instructions that clobber X registers may cause a GC, but will not cause 2458 %% a recursive call. 2459 will_gc_is(Is, false); 2460will_gc_is([I|Is], All) -> 2461 beam_ssa:clobbers_xregs(I) orelse will_gc_is(Is, All); 2462will_gc_is([], _All) -> false. 2463 2464is_on_stack(From, Var, Blocks) -> 2465 is_on_stack(beam_ssa:rpo([From], Blocks), Var, Blocks, #{From => false}). 2466 2467is_on_stack([L|Ls], Var, Blocks, WillGC0) -> 2468 #b_blk{is=Is} = Blk = map_get(L, Blocks), 2469 GC0 = map_get(L, WillGC0), 2470 try is_on_stack_is(Is, Var, GC0) of 2471 GC -> 2472 WillGC = gc_update_successors(Blk, GC, WillGC0), 2473 is_on_stack(Ls, Var, Blocks, WillGC) 2474 catch 2475 throw:{done,GC} -> 2476 GC 2477 end; 2478is_on_stack([], _Var, _, _) -> false. 2479 2480is_on_stack_is([#b_set{op=get_tuple_element}|Is], Var, GC) -> 2481 is_on_stack_is(Is, Var, GC); 2482is_on_stack_is([I|Is], Var, GC0) -> 2483 case GC0 andalso member(Var, beam_ssa:used(I)) of 2484 true -> 2485 throw({done,GC0}); 2486 false -> 2487 GC = GC0 orelse beam_ssa:clobbers_xregs(I), 2488 is_on_stack_is(Is, Var, GC) 2489 end; 2490is_on_stack_is([], _, GC) -> GC. 2491 2492gc_update_successors(Blk, GC, WillGC) -> 2493 foldl(fun(L, Acc) -> 2494 case Acc of 2495 #{L := true} -> Acc; 2496 #{L := false} when GC =:= false -> Acc; 2497 #{} -> Acc#{L => GC} 2498 end 2499 end, WillGC, beam_ssa:successors(Blk)). 2500 2501%% unsuitable(Linear, Blocks) -> Unsuitable. 2502%% Return an ordset of block labels for the blocks that are not 2503%% suitable for sinking of get_tuple_element instructions. 2504 2505unsuitable(Linear, Blocks) -> 2506 Predecessors = beam_ssa:predecessors(Blocks), 2507 Unsuitable0 = unsuitable_1(Linear), 2508 Unsuitable1 = unsuitable_recv(Linear, Blocks, Predecessors), 2509 gb_sets:from_list(Unsuitable0 ++ Unsuitable1). 2510 2511unsuitable_1([{L,#b_blk{is=[#b_set{op=Op}=I|_]}}|Bs]) -> 2512 Unsuitable = case Op of 2513 bs_extract -> true; 2514 bs_put -> true; 2515 {float,_} -> true; 2516 landingpad -> true; 2517 _ -> beam_ssa:is_loop_header(I) 2518 end, 2519 case Unsuitable of 2520 true -> 2521 [L|unsuitable_1(Bs)]; 2522 false -> 2523 unsuitable_1(Bs) 2524 end; 2525unsuitable_1([{_,#b_blk{}}|Bs]) -> 2526 unsuitable_1(Bs); 2527unsuitable_1([]) -> []. 2528 2529unsuitable_recv([{L,#b_blk{is=[#b_set{op=Op}|_]}}|Bs], Blocks, Predecessors) -> 2530 Ls = case Op of 2531 remove_message -> 2532 unsuitable_loop(L, Blocks, Predecessors); 2533 recv_next -> 2534 unsuitable_loop(L, Blocks, Predecessors); 2535 _ -> 2536 [] 2537 end, 2538 Ls ++ unsuitable_recv(Bs, Blocks, Predecessors); 2539unsuitable_recv([_|Bs], Blocks, Predecessors) -> 2540 unsuitable_recv(Bs, Blocks, Predecessors); 2541unsuitable_recv([], _, _) -> []. 2542 2543unsuitable_loop(L, Blocks, Predecessors) -> 2544 unsuitable_loop(L, Blocks, Predecessors, []). 2545 2546unsuitable_loop(L, Blocks, Predecessors, Acc) -> 2547 Ps = map_get(L, Predecessors), 2548 unsuitable_loop_1(Ps, Blocks, Predecessors, Acc). 2549 2550unsuitable_loop_1([P|Ps], Blocks, Predecessors, Acc0) -> 2551 case is_loop_header(P, Blocks) of 2552 true -> 2553 unsuitable_loop_1(Ps, Blocks, Predecessors, Acc0); 2554 false -> 2555 case ordsets:is_element(P, Acc0) of 2556 false -> 2557 Acc1 = ordsets:add_element(P, Acc0), 2558 Acc = unsuitable_loop(P, Blocks, Predecessors, Acc1), 2559 unsuitable_loop_1(Ps, Blocks, Predecessors, Acc); 2560 true -> 2561 unsuitable_loop_1(Ps, Blocks, Predecessors, Acc0) 2562 end 2563 end; 2564unsuitable_loop_1([], _, _, Acc) -> Acc. 2565 2566is_loop_header(L, Blocks) -> 2567 case map_get(L, Blocks) of 2568 #b_blk{is=[I|_]} -> 2569 beam_ssa:is_loop_header(I); 2570 #b_blk{} -> 2571 false 2572 end. 2573 2574%% new_def_locations([{Variable,[UsedInBlock]}|Vs], Defs, 2575%% Dominators, Numbering, Unsuitable) -> 2576%% [{Variable,NewDefinitionBlock}] 2577%% 2578%% Calculate new locations for get_tuple_element instructions. For 2579%% each variable, the new location is a block that dominates all uses 2580%% of the variable and as near to the uses of as possible. 2581 2582new_def_locations([{V,UsedIn}|Vs], Defs, Dom, Numbering, Unsuitable) -> 2583 {DefIn,Tuple} = map_get(V, Defs), 2584 Common = common_dominator(UsedIn, Dom, Numbering, Unsuitable), 2585 Sink = case member(Common, map_get(DefIn, Dom)) of 2586 true -> 2587 %% The common dominator is either DefIn or an 2588 %% ancestor of DefIn. We'll need to consider all 2589 %% get_tuple_element instructions so we will add 2590 %% a dummy sink. 2591 {V,Tuple,{DefIn,DefIn}}; 2592 false -> 2593 %% We have found a suitable descendant of DefIn, 2594 %% to which the get_tuple_element instruction can 2595 %% be sunk. 2596 {V,Tuple,{DefIn,Common}} 2597 end, 2598 [Sink|new_def_locations(Vs, Defs, Dom, Numbering, Unsuitable)]; 2599new_def_locations([], _, _, _, _) -> []. 2600 2601common_dominator(Ls0, Dom, Numbering, Unsuitable) -> 2602 [Common|_] = beam_ssa:common_dominators(Ls0, Dom, Numbering), 2603 case gb_sets:is_member(Common, Unsuitable) of 2604 true -> 2605 %% It is not allowed to place the instruction here. Try 2606 %% to find another suitable dominating block by going up 2607 %% one step in the dominator tree. 2608 [Common,OneUp|_] = map_get(Common, Dom), 2609 common_dominator([OneUp], Dom, Numbering, Unsuitable); 2610 false -> 2611 Common 2612 end. 2613 2614%% Move get_tuple_element instructions to their new locations. 2615 2616move_defs(V, From, To, Blocks) -> 2617 #{From:=FromBlk0,To:=ToBlk0} = Blocks, 2618 {Def,FromBlk} = remove_def(V, FromBlk0), 2619 try insert_def(V, Def, ToBlk0) of 2620 ToBlk -> 2621 Blocks#{From:=FromBlk,To:=ToBlk} 2622 catch 2623 throw:not_possible -> 2624 Blocks 2625 end. 2626 2627remove_def(V, #b_blk{is=Is0}=Blk) -> 2628 {Def,Is} = remove_def_is(Is0, V, []), 2629 {Def,Blk#b_blk{is=Is}}. 2630 2631remove_def_is([#b_set{dst=Dst}=Def|Is], Dst, Acc) -> 2632 {Def,reverse(Acc, Is)}; 2633remove_def_is([I|Is], Dst, Acc) -> 2634 remove_def_is(Is, Dst, [I|Acc]). 2635 2636insert_def(V, Def, #b_blk{is=Is0}=Blk) -> 2637 Is = insert_def_is(Is0, V, Def), 2638 Blk#b_blk{is=Is}. 2639 2640insert_def_is([#b_set{op=phi}=I|Is], V, Def) -> 2641 case member(V, beam_ssa:used(I)) of 2642 true -> 2643 throw(not_possible); 2644 false -> 2645 [I|insert_def_is(Is, V, Def)] 2646 end; 2647insert_def_is([#b_set{op=Op}=I|Is]=Is0, V, Def) -> 2648 Action0 = case Op of 2649 call -> beyond; 2650 'catch_end' -> beyond; 2651 wait_timeout -> beyond; 2652 _ -> here 2653 end, 2654 Action = case Is of 2655 [#b_set{op={succeeded,_}}|_] -> here; 2656 _ -> Action0 2657 end, 2658 case Action of 2659 beyond -> 2660 case member(V, beam_ssa:used(I)) of 2661 true -> 2662 %% The variable is used by this instruction. We must 2663 %% place the definition before this instruction. 2664 [Def|Is0]; 2665 false -> 2666 %% Place it beyond the current instruction. 2667 [I|insert_def_is(Is, V, Def)] 2668 end; 2669 here -> 2670 [Def|Is0] 2671 end; 2672insert_def_is([], _V, Def) -> 2673 [Def]. 2674 2675%%% 2676%%% Order consecutive get_tuple_element instructions in ascending 2677%%% position order. This will give the loader more opportunities 2678%%% for combining get_tuple_element instructions. 2679%%% 2680 2681ssa_opt_get_tuple_element({#opt_st{ssa=Blocks0}=St, FuncDb}) -> 2682 Blocks = opt_get_tuple_element(maps:to_list(Blocks0), Blocks0), 2683 {St#opt_st{ssa=Blocks}, FuncDb}. 2684 2685opt_get_tuple_element([{L,#b_blk{is=Is0}=Blk0}|Bs], Blocks) -> 2686 case opt_get_tuple_element_is(Is0, false, []) of 2687 {yes,Is} -> 2688 Blk = Blk0#b_blk{is=Is}, 2689 opt_get_tuple_element(Bs, Blocks#{L:=Blk}); 2690 no -> 2691 opt_get_tuple_element(Bs, Blocks) 2692 end; 2693opt_get_tuple_element([], Blocks) -> Blocks. 2694 2695opt_get_tuple_element_is([#b_set{op=get_tuple_element, 2696 args=[#b_var{}=Src,_]}=I0|Is0], 2697 _AnyChange, Acc) -> 2698 {GetIs0,Is} = collect_get_tuple_element(Is0, Src, [I0]), 2699 GetIs1 = sort([{Pos,I} || #b_set{args=[_,Pos]}=I <- GetIs0]), 2700 GetIs = [I || {_,I} <- GetIs1], 2701 opt_get_tuple_element_is(Is, true, reverse(GetIs, Acc)); 2702opt_get_tuple_element_is([I|Is], AnyChange, Acc) -> 2703 opt_get_tuple_element_is(Is, AnyChange, [I|Acc]); 2704opt_get_tuple_element_is([], AnyChange, Acc) -> 2705 case AnyChange of 2706 true -> {yes,reverse(Acc)}; 2707 false -> no 2708 end. 2709 2710collect_get_tuple_element([#b_set{op=get_tuple_element, 2711 args=[Src,_]}=I|Is], Src, Acc) -> 2712 collect_get_tuple_element(Is, Src, [I|Acc]); 2713collect_get_tuple_element(Is, _Src, Acc) -> 2714 {Acc,Is}. 2715 2716%%% 2717%%% Unfold literals to avoid unnecessary move instructions in call 2718%%% instructions. 2719%%% 2720%%% Consider the following example: 2721%%% 2722%%% -module(whatever). 2723%%% -export([foo/0]). 2724%%% foo() -> 2725%%% foobar(1, 2, 3). 2726%%% foobar(A, B, C) -> 2727%%% foobar(A, B, C, []). 2728%%% foobar(A, B, C, D) -> ... 2729%%% 2730%%% The type optimization pass will find out that A, B, and C have constant 2731%%% values and do constant folding, rewriting foobar/3 to: 2732%%% 2733%%% foobar(A, B, C) -> 2734%%% foobar(1, 2, 3, []). 2735%%% 2736%%% That will result in three extra `move` instructions. 2737%%% 2738%%% This optimization sub pass will undo the constant folding 2739%%% optimization, rewriting code to use the original variable instead 2740%%% of the constant if the original variable is known to be in an x 2741%%% register. 2742%%% 2743%%% This optimization sub pass will also undo constant folding of the 2744%%% list of arguments in the call to error/2 in the last clause of a 2745%%% function. For example: 2746%%% 2747%%% bar(X, Y) -> 2748%%% error(function_clause, [X,42]). 2749%%% 2750%%% will be rewritten to: 2751%%% 2752%%% bar(X, Y) -> 2753%%% error(function_clause, [X,Y]). 2754%%% 2755 2756ssa_opt_unfold_literals({St,FuncDb}) -> 2757 #opt_st{ssa=Blocks0,args=Args,anno=Anno,cnt=Count0} = St, 2758 ParamInfo = maps:get(parameter_info, Anno, #{}), 2759 LitMap = collect_arg_literals(Args, ParamInfo, 0, #{}), 2760 case map_size(LitMap) of 2761 0 -> 2762 %% None of the arguments for this function are known 2763 %% literals. Nothing to do. 2764 {St,FuncDb}; 2765 _ -> 2766 SafeMap = #{0 => true}, 2767 {Blocks,Count} = unfold_literals(beam_ssa:rpo(Blocks0), 2768 LitMap, SafeMap, Count0, Blocks0), 2769 {St#opt_st{ssa=Blocks,cnt=Count},FuncDb} 2770 end. 2771 2772collect_arg_literals([V|Vs], Info, X, Acc0) -> 2773 case Info of 2774 #{V:=VarInfo} -> 2775 Type = proplists:get_value(type, VarInfo, any), 2776 case beam_types:get_singleton_value(Type) of 2777 {ok,Val} -> 2778 F = fun(Vars) -> [{X,V}|Vars] end, 2779 Acc = maps:update_with(Val, F, [{X,V}], Acc0), 2780 collect_arg_literals(Vs, Info, X + 1, Acc); 2781 error -> 2782 collect_arg_literals(Vs, Info, X + 1, Acc0) 2783 end; 2784 #{} -> 2785 collect_arg_literals(Vs, Info, X + 1, Acc0) 2786 end; 2787collect_arg_literals([], _Info, _X, Acc) -> Acc. 2788 2789unfold_literals([L|Ls], LitMap, SafeMap0, Count0, Blocks0) -> 2790 {Blocks,Safe,Count} = 2791 case map_get(L, SafeMap0) of 2792 false -> 2793 %% Before reaching this block, an instruction that 2794 %% clobbers x registers has been executed. *If* we 2795 %% would use an argument variable instead of literal, 2796 %% it would force the value to be saved to a y 2797 %% register. This is not what we want. 2798 {Blocks0,false,Count0}; 2799 true -> 2800 %% All x registers live when entering the function 2801 %% are still live. Using the variable instead of 2802 %% the substituted value will eliminate a `move` 2803 %% instruction. 2804 #b_blk{is=Is0} = Blk = map_get(L, Blocks0), 2805 {Is,Safe0,Count1} = unfold_lit_is(Is0, LitMap, Count0, []), 2806 {Blocks0#{L:=Blk#b_blk{is=Is}},Safe0,Count1} 2807 end, 2808 %% Propagate safeness to successors. 2809 Successors = beam_ssa:successors(L, Blocks), 2810 SafeMap = unfold_update_succ(Successors, Safe, SafeMap0), 2811 unfold_literals(Ls, LitMap, SafeMap, Count,Blocks); 2812unfold_literals([], _, _, Count, Blocks) -> 2813 {Blocks,Count}. 2814 2815unfold_update_succ([S|Ss], Safe, SafeMap0) -> 2816 F = fun(Prev) -> Prev and Safe end, 2817 SafeMap = maps:update_with(S, F, Safe, SafeMap0), 2818 unfold_update_succ(Ss, Safe, SafeMap); 2819unfold_update_succ([], _, SafeMap) -> SafeMap. 2820 2821unfold_lit_is([#b_set{op=call, 2822 args=[#b_remote{mod=#b_literal{val=erlang}, 2823 name=#b_literal{val=error}, 2824 arity=2}, 2825 #b_literal{val=function_clause}, 2826 ArgumentList]}=I0|Is], LitMap, Count0, Acc0) -> 2827 %% This is a call to error/2 that raises a function_clause 2828 %% exception in the final clause of a function. Try to undo 2829 %% constant folding in the list of arguments (the second argument 2830 %% for error/2). 2831 case unfold_arg_list(Acc0, ArgumentList, LitMap, Count0, 0, []) of 2832 {[FinalPutList|_]=Acc,Count} -> 2833 %% Acc now contains the possibly rewritten code that 2834 %% creates the argument list. All that remains is to 2835 %% rewrite the call to error/2 itself so that is will 2836 %% refer to rewritten argument list. This is essential 2837 %% when all arguments have known literal values as in this 2838 %% example: 2839 %% 2840 %% foo(X, Y) -> error(function_clause, [0,1]). 2841 %% 2842 #b_set{op=put_list,dst=ListVar} = FinalPutList, 2843 #b_set{args=[ErlangError,Fc,_]} = I0, 2844 I = I0#b_set{args=[ErlangError,Fc,ListVar]}, 2845 {reverse(Acc, [I|Is]),false,Count}; 2846 {[],_} -> 2847 %% Handle code such as: 2848 %% 2849 %% bar(KnownValue, Stk) -> error(function_clause, Stk). 2850 {reverse(Acc0, [I0|Is]),false,Count0} 2851 end; 2852unfold_lit_is([#b_set{op=Op,args=Args0}=I0|Is], LitMap, Count, Acc) -> 2853 %% Using a register instead of a literal is a clear win only for 2854 %% `call` and `old_make_fun` instructions. Substituting into other 2855 %% instructions is unlikely to be an improvement. 2856 Unfold = case Op of 2857 call -> true; 2858 old_make_fun -> true; 2859 _ -> false 2860 end, 2861 I = case Unfold of 2862 true -> 2863 Args = unfold_call_args(Args0, LitMap, -1), 2864 I0#b_set{args=Args}; 2865 false -> 2866 I0 2867 end, 2868 case beam_ssa:clobbers_xregs(I) of 2869 true -> 2870 %% This instruction clobbers x register. Don't do 2871 %% any substitutions in rest of this block or in any 2872 %% of its successors. 2873 {reverse(Acc, [I|Is]),false,Count}; 2874 false -> 2875 unfold_lit_is(Is, LitMap, Count, [I|Acc]) 2876 end; 2877unfold_lit_is([], _LitMap, Count, Acc) -> 2878 {reverse(Acc),true,Count}. 2879 2880%% unfold_arg_list(Is, ArgumentList, LitMap, Count0, X, Acc) -> 2881%% {UpdatedAcc, Count}. 2882%% 2883%% Unfold the arguments in the argument list (second argument for error/2). 2884%% 2885%% Note that Is is the reversed list of instructions before the 2886%% call to error/2. Because of the way the list is built in reverse, 2887%% it means that the first put_list instruction found will add the first 2888%% argument (x0) to the list, the second the second argument (x1), and 2889%% so on. 2890 2891unfold_arg_list(Is, #b_literal{val=[Hd|Tl]}, LitMap, Count0, X, Acc) -> 2892 %% Handle the case that the entire argument list (the second argument 2893 %% for error/2) is a literal. 2894 {PutListDst,Count} = new_var('@put_list', Count0), 2895 PutList = #b_set{op=put_list,dst=PutListDst, 2896 args=[#b_literal{val=Hd},#b_literal{val=Tl}]}, 2897 unfold_arg_list([PutList|Is], PutListDst, LitMap, Count, X, Acc); 2898unfold_arg_list([#b_set{op=put_list,dst=List, 2899 args=[Hd0,#b_literal{val=[Hd|Tl]}]}=I0|Is0], 2900 List, LitMap, Count0, X, Acc) -> 2901 %% The rest of the argument list is a literal list. 2902 {PutListDst,Count} = new_var('@put_list', Count0), 2903 PutList = #b_set{op=put_list,dst=PutListDst, 2904 args=[#b_literal{val=Hd},#b_literal{val=Tl}]}, 2905 I = I0#b_set{args=[Hd0,PutListDst]}, 2906 unfold_arg_list([I,PutList|Is0], List, LitMap, Count, X, Acc); 2907unfold_arg_list([#b_set{op=put_list,dst=List,args=[Hd0,Tl]}=I0|Is], 2908 List, LitMap, Count, X, Acc) -> 2909 %% Unfold the head of the list. 2910 Hd = unfold_arg(Hd0, LitMap, X), 2911 I = I0#b_set{args=[Hd,Tl]}, 2912 unfold_arg_list(Is, Tl, LitMap, Count, X + 1, [I|Acc]); 2913unfold_arg_list([I|Is], List, LitMap, Count, X, Acc) -> 2914 %% Some other instruction, such as bs_get_tail. 2915 unfold_arg_list(Is, List, LitMap, Count, X, [I|Acc]); 2916unfold_arg_list([], _, _, Count, _, Acc) -> 2917 {reverse(Acc),Count}. 2918 2919unfold_call_args([A0|As], LitMap, X) -> 2920 A = unfold_arg(A0, LitMap, X), 2921 [A|unfold_call_args(As, LitMap, X + 1)]; 2922unfold_call_args([], _, _) -> []. 2923 2924unfold_arg(#b_literal{val=Val}=Lit, LitMap, X) -> 2925 case LitMap of 2926 #{Val:=Vars} -> 2927 %% This literal is available in an x register. 2928 %% If it is in the correct x register, use 2929 %% the register. Don't bother if it is in the 2930 %% wrong register, because that would still result 2931 %% in a `move` instruction. 2932 case keyfind(X, 1, Vars) of 2933 false -> Lit; 2934 {X,Var} -> Var 2935 end; 2936 #{} -> Lit 2937 end; 2938unfold_arg(Expr, _LitMap, _X) -> Expr. 2939 2940%%% 2941%%% Optimize tail calls created as the result of optimizations. 2942%%% 2943%%% Consider the following example of a tail call in Erlang code: 2944%%% 2945%%% bar() -> 2946%%% foo(). 2947%%% 2948%%% The SSA code for the call will look like this: 2949%%% 2950%%% @ssa_ret = call (`foo`/0) 2951%%% ret @ssa_ret 2952%%% 2953%%% Sometimes optimizations create new tail calls. Consider this 2954%%% slight variation of the example: 2955%%% 2956%%% bar() -> 2957%%% {_,_} = foo(). 2958%%% 2959%%% foo() -> {a,b}. 2960%%% 2961%%% If beam_ssa_type can figure out that `foo/0` always returns a tuple 2962%%% of size two, the test for a tuple is no longer needed and the call 2963%%% to `foo/0` will become a tail call. However, the resulting SSA 2964%%% code will look like this: 2965%%% 2966%%% @ssa_ret = call (`foo`/0) 2967%%% @ssa_bool = succeeded:body @ssa_ret 2968%%% br @ssa_bool, ^999, ^1 2969%%% 2970%%% 999: 2971%%% ret @ssa_ret 2972%%% 2973%%% The beam_ssa_codegen pass will not recognize this code as a tail 2974%%% call and will generate an unncessary stack frame. It may also 2975%%% generate unecessary `kill` instructions. 2976%%% 2977%%% To avoid those extra instructions, this optimization will 2978%%% eliminate the `succeeded:body` and `br` instructions and insert 2979%%% the `ret` in the same block as the call: 2980%%% 2981%%% @ssa_ret = call (`foo`/0) 2982%%% ret @ssa_ret 2983%%% 2984%%% Finally, consider this example: 2985%%% 2986%%% bar() -> 2987%%% foo_ok(), 2988%%% ok. 2989%%% 2990%%% foo_ok() -> ok. 2991%%% 2992%%% The SSA code for the call to `foo_ok/0` will look like: 2993%%% 2994%%% %% Result type: `ok` 2995%%% @ssa_ignored = call (`foo_ok`/0) 2996%%% @ssa_bool = succeeded:body @ssa_ignored 2997%%% br @ssa_bool, ^999, ^1 2998%%% 2999%%% 999: 3000%%% ret `ok` 3001%%% 3002%%% Since the call to `foo_ok/0` has an annotation indicating that the 3003%%% call will always return the atom `ok`, the code can be simplified 3004%%% like this: 3005%%% 3006%%% @ssa_ignored = call (`foo_ok`/0) 3007%%% ret @ssa_ignored 3008%%% 3009%%% The beam_jump pass does the same optimization, but it does it too 3010%%% late to avoid creating an uncessary stack frame or unnecessary 3011%%% `kill` instructions. 3012%%% 3013 3014ssa_opt_tail_calls({St,FuncDb}) -> 3015 #opt_st{ssa=Blocks0} = St, 3016 Blocks = opt_tail_calls(beam_ssa:rpo(Blocks0), Blocks0), 3017 {St#opt_st{ssa=Blocks},FuncDb}. 3018 3019opt_tail_calls([L|Ls], Blocks0) -> 3020 #b_blk{is=Is0,last=Last} = Blk0 = map_get(L, Blocks0), 3021 3022 %% Does this block end with a two-way branch whose success 3023 %% label targets an empty block with a `ret` terminator? 3024 case is_potential_tail_call(Last, Blocks0) of 3025 {yes,Bool,Ret} -> 3026 %% Yes, `Ret` is the value returned from that block 3027 %% (either a variable or literal). Do the instructions 3028 %% in this block end with a `call` instruction that 3029 %% returns the same value as `Ret`, followed by a 3030 %% `succeeded:body` instruction? 3031 case is_tail_call_is(Is0, Bool, Ret, []) of 3032 {yes,Is,Var} -> 3033 %% Yes, this is a tail call. `Is` is the instructions 3034 %% in the block with `succeeded:body` removed, and 3035 %% `Var` is the destination variable for the return 3036 %% value of the call. Rewrite this block to directly 3037 %% return `Var`. 3038 Blk = Blk0#b_blk{is=Is,last=#b_ret{arg=Var}}, 3039 Blocks = Blocks0#{L:=Blk}, 3040 opt_tail_calls(Ls, Blocks); 3041 no -> 3042 %% No, the block does not end with a call, or the 3043 %% the call instruction has not the same value 3044 %% as `Ret`. 3045 opt_tail_calls(Ls, Blocks0) 3046 end; 3047 no -> 3048 opt_tail_calls(Ls, Blocks0) 3049 end; 3050opt_tail_calls([], Blocks) -> Blocks. 3051 3052is_potential_tail_call(#b_br{bool=#b_var{}=Bool,succ=Succ}, Blocks) -> 3053 case Blocks of 3054 #{Succ := #b_blk{is=[],last=#b_ret{arg=Arg}}} -> 3055 %% This could be a tail call. 3056 {yes,Bool,Arg}; 3057 #{} -> 3058 %% The block is not empty or does not have a `ret` terminator. 3059 no 3060 end; 3061is_potential_tail_call(_, _) -> 3062 %% Not a two-way branch (a `succeeded:body` instruction must be 3063 %% followed by a two-way branch). 3064 no. 3065 3066is_tail_call_is([#b_set{op=call,dst=Dst}=Call, 3067 #b_set{op={succeeded,body},dst=Bool}], 3068 Bool, Ret, Acc) -> 3069 IsTailCall = 3070 case Ret of 3071 #b_literal{val=Val} -> 3072 %% The return value for this function is a literal. 3073 %% Now test whether it is the same literal that the 3074 %% `call` instruction returns. 3075 Type = beam_ssa:get_anno(result_type, Call, any), 3076 case beam_types:get_singleton_value(Type) of 3077 {ok,Val} -> 3078 %% Same value. 3079 true; 3080 {ok,_} -> 3081 %% Wrong value. 3082 false; 3083 error -> 3084 %% The type for the return value is not a singleton value. 3085 false 3086 end; 3087 #b_var{} -> 3088 %% It is a tail call if the variable set by the `call` instruction 3089 %% is the same variable as the argument for the `ret` terminator. 3090 Ret =:= Dst 3091 end, 3092 case IsTailCall of 3093 true -> 3094 %% Return the instructions in the block with `succeeded:body` removed. 3095 Is = reverse(Acc, [Call]), 3096 {yes,Is,Dst}; 3097 false -> 3098 no 3099 end; 3100is_tail_call_is([I|Is], Bool, Ret, Acc) -> 3101 is_tail_call_is(Is, Bool, Ret, [I|Acc]); 3102is_tail_call_is([], _Bool, _Ret, _Acc) -> no. 3103 3104%%% 3105%%% Common utilities. 3106%%% 3107 3108list_set_union([], Set) -> 3109 Set; 3110list_set_union([E], Set) -> 3111 sets:add_element(E, Set); 3112list_set_union(List, Set) -> 3113 sets:union(sets:from_list(List, [{version, 2}]), Set). 3114 3115non_guards(Linear) -> 3116 gb_sets:from_list(non_guards_1(Linear)). 3117 3118non_guards_1([{L,#b_blk{is=Is}}|Bs]) -> 3119 case Is of 3120 [#b_set{op=landingpad}|_] -> 3121 [L | non_guards_1(Bs)]; 3122 _ -> 3123 non_guards_1(Bs) 3124 end; 3125non_guards_1([]) -> 3126 [?EXCEPTION_BLOCK]. 3127 3128rel2fam(S0) -> 3129 S1 = sofs:relation(S0), 3130 S = sofs:rel2fam(S1), 3131 sofs:to_external(S). 3132 3133sub(I, Sub) -> 3134 beam_ssa:normalize(sub_1(I, Sub)). 3135 3136sub_1(#b_set{op=phi,args=Args}=I, Sub) -> 3137 I#b_set{args=[{sub_arg(A, Sub),P} || {A,P} <- Args]}; 3138sub_1(#b_set{args=Args}=I, Sub) -> 3139 I#b_set{args=[sub_arg(A, Sub) || A <- Args]}; 3140sub_1(#b_br{bool=#b_var{}=Old}=Br, Sub) -> 3141 New = sub_arg(Old, Sub), 3142 Br#b_br{bool=New}; 3143sub_1(#b_switch{arg=#b_var{}=Old}=Sw, Sub) -> 3144 New = sub_arg(Old, Sub), 3145 Sw#b_switch{arg=New}; 3146sub_1(#b_ret{arg=#b_var{}=Old}=Ret, Sub) -> 3147 New = sub_arg(Old, Sub), 3148 Ret#b_ret{arg=New}; 3149sub_1(Last, _) -> Last. 3150 3151sub_arg(#b_remote{mod=Mod,name=Name}=Rem, Sub) -> 3152 Rem#b_remote{mod=sub_arg(Mod, Sub),name=sub_arg(Name, Sub)}; 3153sub_arg(Old, Sub) -> 3154 case Sub of 3155 #{Old:=New} -> New; 3156 #{} -> Old 3157 end. 3158 3159new_var(#b_var{name={Base,N}}, Count) -> 3160 true = is_integer(N), %Assertion. 3161 {#b_var{name={Base,Count}},Count+1}; 3162new_var(#b_var{name=Base}, Count) -> 3163 {#b_var{name={Base,Count}},Count+1}; 3164new_var(Base, Count) when is_atom(Base) -> 3165 {#b_var{name={Base,Count}},Count+1}. 3166