1%% 2%% %CopyrightBegin% 3%% 4%% Copyright Ericsson AB 2018-2020. All Rights Reserved. 5%% 6%% Licensed under the Apache License, Version 2.0 (the "License"); 7%% you may not use this file except in compliance with the License. 8%% You may obtain a copy of the License at 9%% 10%% http://www.apache.org/licenses/LICENSE-2.0 11%% 12%% Unless required by applicable law or agreed to in writing, software 13%% distributed under the License is distributed on an "AS IS" BASIS, 14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15%% See the License for the specific language governing permissions and 16%% limitations under the License. 17%% 18%% %CopyrightEnd% 19%% 20 21%%% 22%%% This is a collection of various optimizations that don't need a separate 23%%% pass by themselves and/or are mutually beneficial to other passes. 24%%% 25%%% The optimizations are applied in "phases," each with a list of sub-passes 26%%% to run. These sub-passes are applied on all functions in a module before 27%%% moving on to the next phase, which lets us gather module-level information 28%%% in one phase and then apply it in the next without having to risk working 29%%% with incomplete information. 30%%% 31%%% Each sub-pass operates on a #st{} record and a func_info_db(), where the 32%%% former is just a #b_function{} whose blocks can be represented either in 33%%% linear or map form, and the latter is a map with information about all 34%%% functions in the module (see beam_ssa_opt.hrl for more details). 35%%% 36 37-module(beam_ssa_opt). 38-export([module/2]). 39 40-include("beam_ssa_opt.hrl"). 41 42-import(lists, [all/2,append/1,duplicate/2,foldl/3,keyfind/3,member/2, 43 reverse/1,reverse/2, 44 splitwith/2,sort/1,takewhile/2,unzip/1]). 45 46-define(DEFAULT_REPETITIONS, 2). 47 48-spec module(beam_ssa:b_module(), [compile:option()]) -> 49 {'ok',beam_ssa:b_module()}. 50 51-record(st, {ssa :: [{beam_ssa:label(),beam_ssa:b_blk()}] | 52 beam_ssa:block_map(), 53 args :: [beam_ssa:b_var()], 54 cnt :: beam_ssa:label(), 55 anno :: beam_ssa:anno()}). 56-type st_map() :: #{ func_id() => #st{} }. 57 58module(Module, Opts) -> 59 FuncDb0 = case proplists:get_value(no_module_opt, Opts, false) of 60 false -> build_func_db(Module); 61 true -> #{} 62 end, 63 64 %% Passes that perform module-level optimizations are often aided by 65 %% optimizing callers before callees and vice versa, so we optimize all 66 %% functions in call order, flipping it as required. 67 StMap0 = build_st_map(Module), 68 Order = get_call_order_po(StMap0, FuncDb0), 69 70 Phases = 71 [{Order, prologue_passes(Opts)}] ++ 72 repeat(Opts, repeated_passes(Opts), Order) ++ 73 [{Order, epilogue_passes(Opts)}], 74 75 {StMap, _FuncDb} = foldl(fun({FuncIds, Ps}, {StMap, FuncDb}) -> 76 phase(FuncIds, Ps, StMap, FuncDb) 77 end, {StMap0, FuncDb0}, Phases), 78 79 {ok, finish(Module, StMap)}. 80 81phase([FuncId | Ids], Ps, StMap, FuncDb0) -> 82 try compile:run_sub_passes(Ps, {map_get(FuncId, StMap), FuncDb0}) of 83 {St, FuncDb} -> 84 phase(Ids, Ps, StMap#{ FuncId => St }, FuncDb) 85 catch 86 Class:Error:Stack -> 87 #b_local{name=#b_literal{val=Name},arity=Arity} = FuncId, 88 io:fwrite("Function: ~w/~w\n", [Name,Arity]), 89 erlang:raise(Class, Error, Stack) 90 end; 91phase([], _Ps, StMap, FuncDb) -> 92 {StMap, FuncDb}. 93 94%% Repeats the given passes, alternating the order between runs to make the 95%% type pass more efficient. 96repeat(Opts, Ps, OrderA) -> 97 Repeat = proplists:get_value(ssa_opt_repeat, Opts, ?DEFAULT_REPETITIONS), 98 OrderB = reverse(OrderA), 99 repeat_1(Repeat, Ps, OrderA, OrderB). 100 101repeat_1(0, _Opts, _OrderA, _OrderB) -> 102 []; 103repeat_1(N, Ps, OrderA, OrderB) when N > 0, N rem 2 =:= 0 -> 104 [{OrderA, Ps} | repeat_1(N - 1, Ps, OrderA, OrderB)]; 105repeat_1(N, Ps, OrderA, OrderB) when N > 0, N rem 2 =:= 1 -> 106 [{OrderB, Ps} | repeat_1(N - 1, Ps, OrderA, OrderB)]. 107 108%% 109 110get_func_id(F) -> 111 {_Mod, Name, Arity} = beam_ssa:get_anno(func_info, F), 112 #b_local{name=#b_literal{val=Name}, arity=Arity}. 113 114-spec build_st_map(#b_module{}) -> st_map(). 115build_st_map(#b_module{body=Fs}) -> 116 build_st_map_1(Fs, #{}). 117 118build_st_map_1([F | Fs], Map) -> 119 #b_function{anno=Anno,args=Args,cnt=Counter,bs=Bs} = F, 120 St = #st{anno=Anno,args=Args,cnt=Counter,ssa=Bs}, 121 build_st_map_1(Fs, Map#{ get_func_id(F) => St }); 122build_st_map_1([], Map) -> 123 Map. 124 125-spec finish(#b_module{}, st_map()) -> #b_module{}. 126finish(#b_module{body=Fs0}=Module, StMap) -> 127 Module#b_module{body=finish_1(Fs0, StMap)}. 128 129finish_1([F0 | Fs], StMap) -> 130 #st{anno=Anno,cnt=Counter,ssa=Blocks} = map_get(get_func_id(F0), StMap), 131 F = F0#b_function{anno=Anno,bs=Blocks,cnt=Counter}, 132 [F | finish_1(Fs, StMap)]; 133finish_1([], _StMap) -> 134 []. 135 136%% 137 138-define(PASS(N), {N,fun N/1}). 139 140prologue_passes(Opts) -> 141 Ps = [?PASS(ssa_opt_split_blocks), 142 ?PASS(ssa_opt_coalesce_phis), 143 ?PASS(ssa_opt_tail_phis), 144 ?PASS(ssa_opt_element), 145 ?PASS(ssa_opt_linearize), 146 ?PASS(ssa_opt_tuple_size), 147 ?PASS(ssa_opt_record), 148 ?PASS(ssa_opt_cse), %Helps the first type pass. 149 ?PASS(ssa_opt_type_start)], 150 passes_1(Ps, Opts). 151 152%% These passes all benefit from each other (in roughly this order), so they 153%% are repeated as required. 154repeated_passes(Opts) -> 155 Ps = [?PASS(ssa_opt_live), 156 ?PASS(ssa_opt_bs_puts), 157 ?PASS(ssa_opt_dead), 158 ?PASS(ssa_opt_cse), 159 ?PASS(ssa_opt_tail_phis), 160 ?PASS(ssa_opt_type_continue)], %Must run after ssa_opt_dead to 161 %clean up phi nodes. 162 passes_1(Ps, Opts). 163 164epilogue_passes(Opts) -> 165 Ps = [?PASS(ssa_opt_type_finish), 166 ?PASS(ssa_opt_float), 167 ?PASS(ssa_opt_sw), 168 169 %% Run live one more time to clean up after the float and sw 170 %% passes. 171 ?PASS(ssa_opt_live), 172 ?PASS(ssa_opt_bsm), 173 ?PASS(ssa_opt_bsm_units), 174 ?PASS(ssa_opt_bsm_shortcut), 175 ?PASS(ssa_opt_blockify), 176 ?PASS(ssa_opt_sink), 177 ?PASS(ssa_opt_merge_blocks), 178 ?PASS(ssa_opt_get_tuple_element), 179 ?PASS(ssa_opt_trim_unreachable)], 180 passes_1(Ps, Opts). 181 182passes_1(Ps, Opts0) -> 183 Negations = [{list_to_atom("no_"++atom_to_list(N)),N} || 184 {N,_} <- Ps], 185 Opts = proplists:substitute_negations(Negations, Opts0), 186 [case proplists:get_value(Name, Opts, true) of 187 true -> 188 P; 189 false -> 190 {NoName,Name} = keyfind(Name, 2, Negations), 191 {NoName,fun(S) -> S end} 192 end || {Name,_}=P <- Ps]. 193 194%% Builds a function information map with basic information about incoming and 195%% outgoing local calls, as well as whether the function is exported. 196-spec build_func_db(#b_module{}) -> func_info_db(). 197build_func_db(#b_module{body=Fs,exports=Exports}) -> 198 try 199 fdb_1(Fs, gb_sets:from_list(Exports), #{}) 200 catch 201 %% All module-level optimizations are invalid when a NIF can override a 202 %% function, so we have to bail out. 203 throw:load_nif -> #{} 204 end. 205 206fdb_1([#b_function{ args=Args,bs=Bs }=F | Fs], Exports, FuncDb0) -> 207 Id = get_func_id(F), 208 209 #b_local{name=#b_literal{val=Name}, arity=Arity} = Id, 210 Exported = gb_sets:is_element({Name, Arity}, Exports), 211 ArgTypes = duplicate(length(Args), #{}), 212 213 FuncDb1 = case FuncDb0 of 214 %% We may have an entry already if someone's called us. 215 #{ Id := Info } -> 216 FuncDb0#{ Id := Info#func_info{ exported=Exported, 217 arg_types=ArgTypes }}; 218 #{} -> 219 FuncDb0#{ Id => #func_info{ exported=Exported, 220 arg_types=ArgTypes }} 221 end, 222 223 FuncDb = beam_ssa:fold_rpo(fun(_L, #b_blk{is=Is}, FuncDb) -> 224 fdb_is(Is, Id, FuncDb) 225 end, FuncDb1, Bs), 226 227 fdb_1(Fs, Exports, FuncDb); 228fdb_1([], _Exports, FuncDb) -> 229 FuncDb. 230 231fdb_is([#b_set{op=call, 232 args=[#b_local{}=Callee | _]} | Is], 233 Caller, FuncDb) -> 234 fdb_is(Is, Caller, fdb_update(Caller, Callee, FuncDb)); 235fdb_is([#b_set{op=call, 236 args=[#b_remote{mod=#b_literal{val=erlang}, 237 name=#b_literal{val=load_nif}}, 238 _Path, _LoadInfo]} | _Is], _Caller, _FuncDb) -> 239 throw(load_nif); 240fdb_is([_ | Is], Caller, FuncDb) -> 241 fdb_is(Is, Caller, FuncDb); 242fdb_is([], _Caller, FuncDb) -> 243 FuncDb. 244 245fdb_update(Caller, Callee, FuncDb) -> 246 CallerVertex = maps:get(Caller, FuncDb, #func_info{}), 247 CalleeVertex = maps:get(Callee, FuncDb, #func_info{}), 248 249 Calls = ordsets:add_element(Callee, CallerVertex#func_info.out), 250 CalledBy = ordsets:add_element(Caller, CalleeVertex#func_info.in), 251 252 FuncDb#{ Caller => CallerVertex#func_info{out=Calls}, 253 Callee => CalleeVertex#func_info{in=CalledBy} }. 254 255%% Returns the post-order of all local calls in this module. That is, 256%% called functions will be ordered before the functions calling them. 257%% 258%% Functions where module-level optimization is disabled are added last in 259%% arbitrary order. 260 261get_call_order_po(StMap, FuncDb) -> 262 Order = gco_po(FuncDb), 263 Order ++ maps:fold(fun(K, _V, Acc) -> 264 case is_map_key(K, FuncDb) of 265 false -> [K | Acc]; 266 true -> Acc 267 end 268 end, [], StMap). 269 270gco_po(FuncDb) -> 271 All = sort(maps:keys(FuncDb)), 272 {RPO,_} = gco_rpo(All, FuncDb, cerl_sets:new(), []), 273 reverse(RPO). 274 275gco_rpo([Id|Ids], FuncDb, Seen0, Acc0) -> 276 case cerl_sets:is_element(Id, Seen0) of 277 true -> 278 gco_rpo(Ids, FuncDb, Seen0, Acc0); 279 false -> 280 #func_info{out=Successors} = map_get(Id, FuncDb), 281 Seen1 = cerl_sets:add_element(Id, Seen0), 282 {Acc,Seen} = gco_rpo(Successors, FuncDb, Seen1, Acc0), 283 gco_rpo(Ids, FuncDb, Seen, [Id|Acc]) 284 end; 285gco_rpo([], _, Seen, Acc) -> 286 {Acc,Seen}. 287 288%%% 289%%% Trivial sub passes. 290%%% 291 292ssa_opt_dead({#st{ssa=Linear}=St, FuncDb}) -> 293 {St#st{ssa=beam_ssa_dead:opt(Linear)}, FuncDb}. 294 295ssa_opt_linearize({#st{ssa=Blocks}=St, FuncDb}) -> 296 {St#st{ssa=beam_ssa:linearize(Blocks)}, FuncDb}. 297 298ssa_opt_type_start({#st{ssa=Linear0,args=Args,anno=Anno}=St0, FuncDb0}) -> 299 {Linear, FuncDb} = beam_ssa_type:opt_start(Linear0, Args, Anno, FuncDb0), 300 {St0#st{ssa=Linear}, FuncDb}. 301 302ssa_opt_type_continue({#st{ssa=Linear0,args=Args,anno=Anno}=St0, FuncDb0}) -> 303 {Linear, FuncDb} = beam_ssa_type:opt_continue(Linear0, Args, Anno, FuncDb0), 304 {St0#st{ssa=Linear}, FuncDb}. 305 306ssa_opt_type_finish({#st{args=Args,anno=Anno0}=St0, FuncDb0}) -> 307 {Anno, FuncDb} = beam_ssa_type:opt_finish(Args, Anno0, FuncDb0), 308 {St0#st{anno=Anno}, FuncDb}. 309 310ssa_opt_blockify({#st{ssa=Linear}=St, FuncDb}) -> 311 {St#st{ssa=maps:from_list(Linear)}, FuncDb}. 312 313ssa_opt_trim_unreachable({#st{ssa=Blocks}=St, FuncDb}) -> 314 {St#st{ssa=beam_ssa:trim_unreachable(Blocks)}, FuncDb}. 315 316%%% 317%%% Split blocks before certain instructions to enable more optimizations. 318%%% 319%%% Splitting before element/2 enables the optimization that swaps 320%%% element/2 instructions. 321%%% 322%%% Splitting before call and make_fun instructions gives more opportunities 323%%% for sinking get_tuple_element instructions. 324%%% 325 326ssa_opt_split_blocks({#st{ssa=Blocks0,cnt=Count0}=St, FuncDb}) -> 327 P = fun(#b_set{op={bif,element}}) -> true; 328 (#b_set{op=call}) -> true; 329 (#b_set{op=make_fun}) -> true; 330 (_) -> false 331 end, 332 {Blocks,Count} = beam_ssa:split_blocks(P, Blocks0, Count0), 333 {St#st{ssa=Blocks,cnt=Count}, FuncDb}. 334 335%%% 336%%% Coalesce phi nodes. 337%%% 338%%% Nested cases can led to code such as this: 339%%% 340%%% 10: 341%%% _1 = phi {literal value1, label 8}, {Var, label 9} 342%%% br 11 343%%% 344%%% 11: 345%%% _2 = phi {_1, label 10}, {literal false, label 3} 346%%% 347%%% The phi nodes can be coalesced like this: 348%%% 349%%% 11: 350%%% _2 = phi {literal value1, label 8}, {Var, label 9}, {literal false, label 3} 351%%% 352%%% Coalescing can help other optimizations, and can in some cases reduce register 353%%% shuffling (if the phi variables for two phi nodes happens to be allocated to 354%%% different registers). 355%%% 356 357ssa_opt_coalesce_phis({#st{ssa=Blocks0}=St, FuncDb}) -> 358 Ls = beam_ssa:rpo(Blocks0), 359 Blocks = c_phis_1(Ls, Blocks0), 360 {St#st{ssa=Blocks}, FuncDb}. 361 362c_phis_1([L|Ls], Blocks0) -> 363 case map_get(L, Blocks0) of 364 #b_blk{is=[#b_set{op=phi}|_]}=Blk -> 365 Blocks = c_phis_2(L, Blk, Blocks0), 366 c_phis_1(Ls, Blocks); 367 #b_blk{} -> 368 c_phis_1(Ls, Blocks0) 369 end; 370c_phis_1([], Blocks) -> Blocks. 371 372c_phis_2(L, #b_blk{is=Is0}=Blk0, Blocks0) -> 373 case c_phis_args(Is0, Blocks0) of 374 none -> 375 Blocks0; 376 {_,_,Preds}=Info -> 377 Is = c_rewrite_phis(Is0, Info), 378 Blk = Blk0#b_blk{is=Is}, 379 Blocks = Blocks0#{L:=Blk}, 380 c_fix_branches(Preds, L, Blocks) 381 end. 382 383c_phis_args([#b_set{op=phi,args=Args0}|Is], Blocks) -> 384 case c_phis_args_1(Args0, Blocks) of 385 none -> 386 c_phis_args(Is, Blocks); 387 Res -> 388 Res 389 end; 390c_phis_args(_, _Blocks) -> none. 391 392c_phis_args_1([{Var,Pred}|As], Blocks) -> 393 case c_get_pred_vars(Var, Pred, Blocks) of 394 none -> 395 c_phis_args_1(As, Blocks); 396 Result -> 397 Result 398 end; 399c_phis_args_1([], _Blocks) -> none. 400 401c_get_pred_vars(Var, Pred, Blocks) -> 402 case map_get(Pred, Blocks) of 403 #b_blk{is=[#b_set{op=phi,dst=Var,args=Args}]} -> 404 {Var,Pred,Args}; 405 #b_blk{} -> 406 none 407 end. 408 409c_rewrite_phis([#b_set{op=phi,args=Args0}=I|Is], Info) -> 410 Args = c_rewrite_phi(Args0, Info), 411 [I#b_set{args=Args}|c_rewrite_phis(Is, Info)]; 412c_rewrite_phis(Is, _Info) -> Is. 413 414c_rewrite_phi([{Var,Pred}|As], {Var,Pred,Values}) -> 415 Values ++ As; 416c_rewrite_phi([{Value,Pred}|As], {_,Pred,Values}) -> 417 [{Value,P} || {_,P} <- Values] ++ As; 418c_rewrite_phi([A|As], Info) -> 419 [A|c_rewrite_phi(As, Info)]; 420c_rewrite_phi([], _Info) -> []. 421 422c_fix_branches([{_,Pred}|As], L, Blocks0) -> 423 #b_blk{last=Last0} = Blk0 = map_get(Pred, Blocks0), 424 #b_br{bool=#b_literal{val=true}} = Last0, %Assertion. 425 Last = Last0#b_br{bool=#b_literal{val=true},succ=L,fail=L}, 426 Blk = Blk0#b_blk{last=Last}, 427 Blocks = Blocks0#{Pred:=Blk}, 428 c_fix_branches(As, L, Blocks); 429c_fix_branches([], _, Blocks) -> Blocks. 430 431%%% 432%%% Eliminate phi nodes in the tail of a function. 433%%% 434%%% Try to eliminate short blocks that starts with a phi node 435%%% and end in a return. For example: 436%%% 437%%% Result = phi { Res1, 4 }, { literal true, 5 } 438%%% Ret = put_tuple literal ok, Result 439%%% ret Ret 440%%% 441%%% The code in this block can be inserted at the end blocks 4 and 442%%% 5. Thus, the following code can be inserted into block 4: 443%%% 444%%% Ret:1 = put_tuple literal ok, Res1 445%%% ret Ret:1 446%%% 447%%% And the following code into block 5: 448%%% 449%%% Ret:2 = put_tuple literal ok, literal true 450%%% ret Ret:2 451%%% 452%%% Which can be further simplified to: 453%%% 454%%% ret literal {ok, true} 455%%% 456%%% This transformation may lead to more code improvements: 457%%% 458%%% - Stack trimming 459%%% - Fewer test_heap instructions 460%%% - Smaller stack frames 461%%% 462 463ssa_opt_tail_phis({#st{ssa=SSA0,cnt=Count0}=St, FuncDb}) -> 464 {SSA,Count} = opt_tail_phis(SSA0, Count0), 465 {St#st{ssa=SSA,cnt=Count}, FuncDb}. 466 467opt_tail_phis(Blocks, Count) when is_map(Blocks) -> 468 opt_tail_phis(maps:values(Blocks), Blocks, Count); 469opt_tail_phis(Linear0, Count0) when is_list(Linear0) -> 470 Blocks0 = maps:from_list(Linear0), 471 {Blocks,Count} = opt_tail_phis(Blocks0, Count0), 472 {beam_ssa:linearize(Blocks),Count}. 473 474opt_tail_phis([#b_blk{is=Is0,last=Last}|Bs], Blocks0, Count0) -> 475 case {Is0,Last} of 476 {[#b_set{op=phi,args=[_,_|_]}|_],#b_ret{arg=#b_var{}}=Ret} -> 477 {Phis,Is} = splitwith(fun(#b_set{op=Op}) -> Op =:= phi end, Is0), 478 case suitable_tail_ops(Is) of 479 true -> 480 {Blocks,Count} = opt_tail_phi(Phis, Is, Ret, 481 Blocks0, Count0), 482 opt_tail_phis(Bs, Blocks, Count); 483 false -> 484 opt_tail_phis(Bs, Blocks0, Count0) 485 end; 486 {_,_} -> 487 opt_tail_phis(Bs, Blocks0, Count0) 488 end; 489opt_tail_phis([], Blocks, Count) -> 490 {Blocks,Count}. 491 492opt_tail_phi(Phis0, Is, Ret, Blocks0, Count0) -> 493 Phis = rel2fam(reduce_phis(Phis0)), 494 {Blocks,Count,Cost} = 495 foldl(fun(PhiArg, Acc) -> 496 opt_tail_phi_arg(PhiArg, Is, Ret, Acc) 497 end, {Blocks0,Count0,0}, Phis), 498 MaxCost = length(Phis) * 3 + 2, 499 if 500 Cost =< MaxCost -> 501 %% The transformation would cause at most a slight 502 %% increase in code size if no more optimizations 503 %% can be applied. 504 {Blocks,Count}; 505 true -> 506 %% The code size would be increased too much. 507 {Blocks0,Count0} 508 end. 509 510reduce_phis([#b_set{dst=PhiDst,args=PhiArgs}|Is]) -> 511 [{L,{PhiDst,Val}} || {Val,L} <- PhiArgs] ++ reduce_phis(Is); 512reduce_phis([]) -> []. 513 514opt_tail_phi_arg({PredL,Sub0}, Is0, Ret0, {Blocks0,Count0,Cost0}) -> 515 Blk0 = map_get(PredL, Blocks0), 516 #b_blk{is=IsPrefix,last=#b_br{succ=Next,fail=Next}} = Blk0, 517 case is_exit_bif(IsPrefix) of 518 false -> 519 Sub1 = maps:from_list(Sub0), 520 {Is1,Count,Sub} = new_names(Is0, Sub1, Count0, []), 521 Is2 = [sub(I, Sub) || I <- Is1], 522 Cost = build_cost(Is2, Cost0), 523 Is = IsPrefix ++ Is2, 524 Ret = sub(Ret0, Sub), 525 Blk = Blk0#b_blk{is=Is,last=Ret}, 526 Blocks = Blocks0#{PredL:=Blk}, 527 {Blocks,Count,Cost}; 528 true -> 529 %% The block ends in a call to a function that 530 %% will cause an exception. 531 {Blocks0,Count0,Cost0+3} 532 end. 533 534is_exit_bif([#b_set{op=call, 535 args=[#b_remote{mod=#b_literal{val=Mod}, 536 name=#b_literal{val=Name}}|Args]}]) -> 537 erl_bifs:is_exit_bif(Mod, Name, length(Args)); 538is_exit_bif(_) -> false. 539 540new_names([#b_set{dst=Dst}=I|Is], Sub0, Count0, Acc) -> 541 {NewDst,Count} = new_var(Dst, Count0), 542 Sub = Sub0#{Dst=>NewDst}, 543 new_names(Is, Sub, Count, [I#b_set{dst=NewDst}|Acc]); 544new_names([], Sub, Count, Acc) -> 545 {reverse(Acc),Count,Sub}. 546 547suitable_tail_ops(Is) -> 548 all(fun(#b_set{op=Op}) -> 549 is_suitable_tail_op(Op) 550 end, Is). 551 552is_suitable_tail_op({bif,_}) -> true; 553is_suitable_tail_op(put_list) -> true; 554is_suitable_tail_op(put_tuple) -> true; 555is_suitable_tail_op(_) -> false. 556 557build_cost([#b_set{op=put_list,args=Args}|Is], Cost) -> 558 case are_all_literals(Args) of 559 true -> 560 build_cost(Is, Cost); 561 false -> 562 build_cost(Is, Cost + 1) 563 end; 564build_cost([#b_set{op=put_tuple,args=Args}|Is], Cost) -> 565 case are_all_literals(Args) of 566 true -> 567 build_cost(Is, Cost); 568 false -> 569 build_cost(Is, Cost + length(Args) + 1) 570 end; 571build_cost([#b_set{op={bif,_},args=Args}|Is], Cost) -> 572 case are_all_literals(Args) of 573 true -> 574 build_cost(Is, Cost); 575 false -> 576 build_cost(Is, Cost + 1) 577 end; 578build_cost([], Cost) -> Cost. 579 580are_all_literals(Args) -> 581 all(fun(#b_literal{}) -> true; 582 (_) -> false 583 end, Args). 584 585%%% 586%%% Order element/2 calls. 587%%% 588%%% Order an unbroken chain of element/2 calls for the same tuple 589%%% with the same failure label so that the highest element is 590%%% retrieved first. That will allow the other element/2 calls to 591%%% be replaced with get_tuple_element/3 instructions. 592%%% 593 594ssa_opt_element({#st{ssa=Blocks}=St, FuncDb}) -> 595 %% Collect the information about element instructions in this 596 %% function. 597 GetEls = collect_element_calls(beam_ssa:linearize(Blocks)), 598 599 %% Collect the element instructions into chains. The 600 %% element calls in each chain are ordered in reverse 601 %% execution order. 602 Chains = collect_chains(GetEls, []), 603 604 %% For each chain, swap the first element call with the 605 %% element call with the highest index. 606 {St#st{ssa=swap_element_calls(Chains, Blocks)}, FuncDb}. 607 608collect_element_calls([{L,#b_blk{is=Is0,last=Last}}|Bs]) -> 609 case {Is0,Last} of 610 {[#b_set{op={bif,element},dst=Element, 611 args=[#b_literal{val=N},#b_var{}=Tuple]}, 612 #b_set{op=succeeded,dst=Bool,args=[Element]}], 613 #b_br{bool=Bool,succ=Succ,fail=Fail}} -> 614 Info = {L,Succ,{Tuple,Fail},N}, 615 [Info|collect_element_calls(Bs)]; 616 {_,_} -> 617 collect_element_calls(Bs) 618 end; 619collect_element_calls([]) -> []. 620 621collect_chains([{This,_,V,_}=El|Els], [{_,This,V,_}|_]=Chain) -> 622 %% Add to the previous chain. 623 collect_chains(Els, [El|Chain]); 624collect_chains([El|Els], [_,_|_]=Chain) -> 625 %% Save the previous chain and start a new chain. 626 [Chain|collect_chains(Els, [El])]; 627collect_chains([El|Els], _Chain) -> 628 %% The previous chain is too short; discard it and start a new. 629 collect_chains(Els, [El]); 630collect_chains([], [_,_|_]=Chain) -> 631 %% Save the last chain. 632 [Chain]; 633collect_chains([], _) -> []. 634 635swap_element_calls([[{L,_,_,N}|_]=Chain|Chains], Blocks0) -> 636 Blocks = swap_element_calls_1(Chain, {N,L}, Blocks0), 637 swap_element_calls(Chains, Blocks); 638swap_element_calls([], Blocks) -> Blocks. 639 640swap_element_calls_1([{L1,_,_,N1}], {N2,L2}, Blocks) when N2 > N1 -> 641 %% We have reached the end of the chain, and the first 642 %% element instrution to be executed. Its index is lower 643 %% than the maximum index found while traversing the chain, 644 %% so we will need to swap the instructions. 645 #{L1:=Blk1,L2:=Blk2} = Blocks, 646 [#b_set{dst=Dst1}=GetEl1,Succ1] = Blk1#b_blk.is, 647 [#b_set{dst=Dst2}=GetEl2,Succ2] = Blk2#b_blk.is, 648 Is1 = [GetEl2,Succ1#b_set{args=[Dst2]}], 649 Is2 = [GetEl1,Succ2#b_set{args=[Dst1]}], 650 Blocks#{L1:=Blk1#b_blk{is=Is1},L2:=Blk2#b_blk{is=Is2}}; 651swap_element_calls_1([{L,_,_,N1}|Els], {N2,_}, Blocks) when N1 > N2 -> 652 swap_element_calls_1(Els, {N2,L}, Blocks); 653swap_element_calls_1([_|Els], Highest, Blocks) -> 654 swap_element_calls_1(Els, Highest, Blocks); 655swap_element_calls_1([], _, Blocks) -> 656 %% Nothing to do. The element call with highest index 657 %% is already the first one to be executed. 658 Blocks. 659 660%%% 661%%% Record optimization. 662%%% 663%%% Replace tuple matching with an is_tagged_tuple instruction 664%%% when applicable. 665%%% 666 667ssa_opt_record({#st{ssa=Linear}=St, FuncDb}) -> 668 Blocks = maps:from_list(Linear), 669 {St#st{ssa=record_opt(Linear, Blocks)}, FuncDb}. 670 671record_opt([{L,#b_blk{is=Is0,last=Last}=Blk0}|Bs], Blocks) -> 672 Is = record_opt_is(Is0, Last, Blocks), 673 Blk = Blk0#b_blk{is=Is}, 674 [{L,Blk}|record_opt(Bs, Blocks)]; 675record_opt([], _Blocks) -> []. 676 677record_opt_is([#b_set{op={bif,is_tuple},dst=Bool,args=[Tuple]}=Set], 678 Last, Blocks) -> 679 case is_tagged_tuple(Tuple, Bool, Last, Blocks) of 680 {yes,Size,Tag} -> 681 Args = [Tuple,Size,Tag], 682 [Set#b_set{op=is_tagged_tuple,args=Args}]; 683 no -> 684 [Set] 685 end; 686record_opt_is([I|Is]=Is0, #b_br{bool=Bool}=Last, Blocks) -> 687 case is_tagged_tuple_1(Is0, Last, Blocks) of 688 {yes,_Fail,Tuple,Arity,Tag} -> 689 Args = [Tuple,Arity,Tag], 690 [I#b_set{op=is_tagged_tuple,dst=Bool,args=Args}]; 691 no -> 692 [I|record_opt_is(Is, Last, Blocks)] 693 end; 694record_opt_is([I|Is], Last, Blocks) -> 695 [I|record_opt_is(Is, Last, Blocks)]; 696record_opt_is([], _Last, _Blocks) -> []. 697 698is_tagged_tuple(#b_var{}=Tuple, Bool, 699 #b_br{bool=Bool,succ=Succ,fail=Fail}, 700 Blocks) -> 701 #b_blk{is=Is,last=Last} = map_get(Succ, Blocks), 702 case is_tagged_tuple_1(Is, Last, Blocks) of 703 {yes,Fail,Tuple,Arity,Tag} -> 704 {yes,Arity,Tag}; 705 _ -> 706 no 707 end; 708is_tagged_tuple(_, _, _, _) -> no. 709 710is_tagged_tuple_1(Is, Last, Blocks) -> 711 case {Is,Last} of 712 {[#b_set{op={bif,tuple_size},dst=ArityVar, 713 args=[#b_var{}=Tuple]}, 714 #b_set{op={bif,'=:='}, 715 dst=Bool, 716 args=[ArityVar, #b_literal{val=ArityVal}=Arity]}], 717 #b_br{bool=Bool,succ=Succ,fail=Fail}} 718 when is_integer(ArityVal) -> 719 SuccBlk = map_get(Succ, Blocks), 720 case is_tagged_tuple_2(SuccBlk, Tuple, Fail) of 721 no -> 722 no; 723 {yes,Tag} -> 724 {yes,Fail,Tuple,Arity,Tag} 725 end; 726 _ -> 727 no 728 end. 729 730is_tagged_tuple_2(#b_blk{is=Is, 731 last=#b_br{bool=#b_var{}=Bool,fail=Fail}}, 732 Tuple, Fail) -> 733 is_tagged_tuple_3(Is, Bool, Tuple); 734is_tagged_tuple_2(#b_blk{}, _, _) -> no. 735 736is_tagged_tuple_3([#b_set{op=get_tuple_element, 737 dst=TagVar, 738 args=[#b_var{}=Tuple,#b_literal{val=0}]}|Is], 739 Bool, Tuple) -> 740 is_tagged_tuple_4(Is, Bool, TagVar); 741is_tagged_tuple_3([_|Is], Bool, Tuple) -> 742 is_tagged_tuple_3(Is, Bool, Tuple); 743is_tagged_tuple_3([], _, _) -> no. 744 745is_tagged_tuple_4([#b_set{op={bif,'=:='},dst=Bool, 746 args=[#b_var{}=TagVar, 747 #b_literal{val=TagVal}=Tag]}], 748 Bool, TagVar) when is_atom(TagVal) -> 749 {yes,Tag}; 750is_tagged_tuple_4([_|Is], Bool, TagVar) -> 751 is_tagged_tuple_4(Is, Bool, TagVar); 752is_tagged_tuple_4([], _, _) -> no. 753 754%%% 755%%% Common subexpression elimination (CSE). 756%%% 757%%% Eliminate repeated evaluation of identical expressions. To avoid 758%%% increasing the size of the stack frame, we don't eliminate 759%%% subexpressions across instructions that clobber the X registers. 760%%% 761 762ssa_opt_cse({#st{ssa=Linear}=St, FuncDb}) -> 763 M = #{0=>#{}}, 764 {St#st{ssa=cse(Linear, #{}, M)}, FuncDb}. 765 766cse([{L,#b_blk{is=Is0,last=Last0}=Blk}|Bs], Sub0, M0) -> 767 Es0 = map_get(L, M0), 768 {Is1,Es,Sub} = cse_is(Is0, Es0, Sub0, []), 769 Last = sub(Last0, Sub), 770 M = cse_successors(Is1, Blk, Es, M0), 771 Is = reverse(Is1), 772 [{L,Blk#b_blk{is=Is,last=Last}}|cse(Bs, Sub, M)]; 773cse([], _, _) -> []. 774 775cse_successors([#b_set{op=succeeded,args=[Src]},Bif|_], Blk, EsSucc, M0) -> 776 case cse_suitable(Bif) of 777 true -> 778 %% The previous instruction only has a valid value at the success branch. 779 %% We must remove the substitution for Src from the failure branch. 780 #b_blk{last=#b_br{succ=Succ,fail=Fail}} = Blk, 781 M = cse_successors_1([Succ], EsSucc, M0), 782 EsFail = maps:filter(fun(_, Val) -> Val =/= Src end, EsSucc), 783 cse_successors_1([Fail], EsFail, M); 784 false -> 785 %% There can't be any replacement for Src in EsSucc. No need for 786 %% any special handling. 787 cse_successors_1(beam_ssa:successors(Blk), EsSucc, M0) 788 end; 789cse_successors(_Is, Blk, Es, M) -> 790 cse_successors_1(beam_ssa:successors(Blk), Es, M). 791 792cse_successors_1([L|Ls], Es0, M) -> 793 case M of 794 #{L:=Es1} when map_size(Es1) =:= 0 -> 795 %% The map is already empty. No need to do anything 796 %% since the intersection will be empty. 797 cse_successors_1(Ls, Es0, M); 798 #{L:=Es1} -> 799 %% Calculate the intersection of the two maps. 800 %% Both keys and values must match. 801 Es = maps:filter(fun(Key, Value) -> 802 case Es1 of 803 #{Key:=Value} -> true; 804 #{} -> false 805 end 806 end, Es0), 807 cse_successors_1(Ls, Es0, M#{L:=Es}); 808 #{} -> 809 cse_successors_1(Ls, Es0, M#{L=>Es0}) 810 end; 811cse_successors_1([], _, M) -> M. 812 813cse_is([#b_set{op=succeeded,dst=Bool,args=[Src]}=I0|Is], Es, Sub0, Acc) -> 814 I = sub(I0, Sub0), 815 case I of 816 #b_set{args=[Src]} -> 817 cse_is(Is, Es, Sub0, [I|Acc]); 818 #b_set{} -> 819 %% The previous instruction has been eliminated. Eliminate the 820 %% 'succeeded' instruction too. 821 Sub = Sub0#{Bool=>#b_literal{val=true}}, 822 cse_is(Is, Es, Sub, Acc) 823 end; 824cse_is([#b_set{dst=Dst}=I0|Is], Es0, Sub0, Acc) -> 825 I = sub(I0, Sub0), 826 case beam_ssa:clobbers_xregs(I) of 827 true -> 828 %% Retaining the expressions map across calls and other 829 %% clobbering instructions would work, but it would cause 830 %% the common subexpressions to be saved to Y registers, 831 %% which would probably increase the size of the stack 832 %% frame. 833 cse_is(Is, #{}, Sub0, [I|Acc]); 834 false -> 835 case cse_expr(I) of 836 none -> 837 %% Not suitable for CSE. 838 cse_is(Is, Es0, Sub0, [I|Acc]); 839 {ok,ExprKey} -> 840 case Es0 of 841 #{ExprKey:=Src} -> 842 Sub = Sub0#{Dst=>Src}, 843 cse_is(Is, Es0, Sub, Acc); 844 #{} -> 845 Es = Es0#{ExprKey=>Dst}, 846 cse_is(Is, Es, Sub0, [I|Acc]) 847 end 848 end 849 end; 850cse_is([], Es, Sub, Acc) -> 851 {Acc,Es,Sub}. 852 853cse_expr(#b_set{op=Op,args=Args}=I) -> 854 case cse_suitable(I) of 855 true -> {ok,{Op,Args}}; 856 false -> none 857 end. 858 859cse_suitable(#b_set{op=get_hd}) -> true; 860cse_suitable(#b_set{op=get_tl}) -> true; 861cse_suitable(#b_set{op=put_list}) -> true; 862cse_suitable(#b_set{op=get_tuple_element}) -> true; 863cse_suitable(#b_set{op=put_tuple}) -> true; 864cse_suitable(#b_set{op={bif,tuple_size}}) -> 865 %% Doing CSE for tuple_size/1 can prevent the 866 %% creation of test_arity and select_tuple_arity 867 %% instructions. That could decrease performance 868 %% and beam_validator could fail to understand 869 %% that tuple operations that follow are safe. 870 false; 871cse_suitable(#b_set{anno=Anno,op={bif,Name},args=Args}) -> 872 %% Doing CSE for floating point operators is unsafe. 873 %% Doing CSE for comparison operators would prevent 874 %% creation of 'test' instructions. 875 Arity = length(Args), 876 not (is_map_key(float_op, Anno) orelse 877 erl_internal:new_type_test(Name, Arity) orelse 878 erl_internal:comp_op(Name, Arity) orelse 879 erl_internal:bool_op(Name, Arity)); 880cse_suitable(#b_set{}) -> false. 881 882%%% 883%%% Using floating point instructions. 884%%% 885%%% Use the special floating points version of arithmetic 886%%% instructions, if the operands are known to be floats or the result 887%%% of the operation will be a float. 888%%% 889%%% The float instructions were never used in guards before, so we 890%%% will take special care to keep not using them in guards. Using 891%%% them in guards would require a new version of the 'fconv' 892%%% instruction that would take a failure label. Since it is unlikely 893%%% that using float instructions in guards would be benefical, why 894%%% bother implementing a new instruction? Also, implementing float 895%%% instructions in guards in HiPE could turn out to be a lot of work. 896%%% 897 898-record(fs, 899 {s=undefined :: 'undefined' | 'cleared', 900 regs=#{} :: #{beam_ssa:b_var():=beam_ssa:b_var()}, 901 fail=none :: 'none' | beam_ssa:label(), 902 non_guards :: gb_sets:set(beam_ssa:label()), 903 bs :: beam_ssa:block_map() 904 }). 905 906ssa_opt_float({#st{ssa=Linear0,cnt=Count0}=St, FuncDb}) -> 907 NonGuards = non_guards(Linear0), 908 Blocks = maps:from_list(Linear0), 909 Fs = #fs{non_guards=NonGuards,bs=Blocks}, 910 {Linear,Count} = float_opt(Linear0, Count0, Fs), 911 {St#st{ssa=Linear,cnt=Count}, FuncDb}. 912 913float_blk_is_in_guard(#b_blk{last=#b_br{fail=F}}, #fs{non_guards=NonGuards}) -> 914 not gb_sets:is_member(F, NonGuards); 915float_blk_is_in_guard(#b_blk{}, #fs{}) -> 916 false. 917 918float_opt([{L,Blk}|Bs0], Count0, Fs) -> 919 case float_blk_is_in_guard(Blk, Fs) of 920 true -> 921 %% This block is inside a guard. Don't do 922 %% any floating point optimizations. 923 {Bs,Count} = float_opt(Bs0, Count0, Fs), 924 {[{L,Blk}|Bs],Count}; 925 false -> 926 %% This block is not inside a guard. 927 %% We can do the optimization. 928 float_opt_1(L, Blk, Bs0, Count0, Fs) 929 end; 930float_opt([], Count, _Fs) -> 931 {[],Count}. 932 933float_opt_1(L, #b_blk{is=Is0}=Blk0, Bs0, Count0, Fs0) -> 934 case float_opt_is(Is0, Fs0, Count0, []) of 935 {Is1,Fs1,Count1} -> 936 Fs2 = float_fail_label(Blk0, Fs1), 937 Fail = Fs2#fs.fail, 938 {Flush,Blk,Fs,Count2} = float_maybe_flush(Blk0, Fs2, Count1), 939 Split = float_split_conv(Is1, Blk), 940 {Blks0,Count3} = float_number(Split, L, Count2), 941 {Blks,Count4} = float_conv(Blks0, Fail, Count3), 942 {Bs,Count} = float_opt(Bs0, Count4, Fs), 943 {Blks++Flush++Bs,Count}; 944 none -> 945 {Bs,Count} = float_opt(Bs0, Count0, Fs0), 946 {[{L,Blk0}|Bs],Count} 947 end. 948 949%% Split {float,convert} instructions into individual blocks. 950float_split_conv(Is0, Blk) -> 951 Br = #b_br{bool=#b_literal{val=true},succ=0,fail=0}, 952 case splitwith(fun(#b_set{op=Op}) -> 953 Op =/= {float,convert} 954 end, Is0) of 955 {Is,[]} -> 956 [Blk#b_blk{is=Is}]; 957 {[_|_]=Is1,[#b_set{op={float,convert}}=Conv|Is2]} -> 958 [#b_blk{is=Is1,last=Br}, 959 #b_blk{is=[Conv],last=Br}|float_split_conv(Is2, Blk)]; 960 {[],[#b_set{op={float,convert}}=Conv|Is1]} -> 961 [#b_blk{is=[Conv],last=Br}|float_split_conv(Is1, Blk)] 962 end. 963 964%% Number the blocks that were split. 965float_number([B|Bs0], FirstL, Count0) -> 966 {Bs,Count} = float_number(Bs0, Count0), 967 {[{FirstL,B}|Bs],Count}. 968 969float_number([B|Bs0], Count0) -> 970 {Bs,Count} = float_number(Bs0, Count0+1), 971 {[{Count0,B}|Bs],Count}; 972float_number([], Count) -> 973 {[],Count}. 974 975%% Insert 'succeeded' instructions after each {float,convert} 976%% instruction. 977float_conv([{L,#b_blk{is=Is0}=Blk0}|Bs0], Fail, Count0) -> 978 case Is0 of 979 [#b_set{op={float,convert}}=Conv] -> 980 {Bool0,Count1} = new_reg('@ssa_bool', Count0), 981 Bool = #b_var{name=Bool0}, 982 Succeeded = #b_set{op=succeeded,dst=Bool, 983 args=[Conv#b_set.dst]}, 984 Is = [Conv,Succeeded], 985 [{NextL,_}|_] = Bs0, 986 Br = #b_br{bool=Bool,succ=NextL,fail=Fail}, 987 Blk = Blk0#b_blk{is=Is,last=Br}, 988 {Bs,Count} = float_conv(Bs0, Fail, Count1), 989 {[{L,Blk}|Bs],Count}; 990 [_|_] -> 991 case Bs0 of 992 [{NextL,_}|_] -> 993 Br = #b_br{bool=#b_literal{val=true}, 994 succ=NextL,fail=NextL}, 995 Blk = Blk0#b_blk{last=Br}, 996 {Bs,Count} = float_conv(Bs0, Fail, Count0), 997 {[{L,Blk}|Bs],Count}; 998 [] -> 999 {[{L,Blk0}],Count0} 1000 end 1001 end. 1002 1003float_maybe_flush(Blk0, #fs{s=cleared,fail=Fail,bs=Blocks}=Fs0, Count0) -> 1004 #b_blk{last=#b_br{bool=#b_var{},succ=Succ}=Br} = Blk0, 1005 1006 %% If the success block starts with a floating point operation, we can 1007 %% defer flushing to that block as long as it isn't a guard. 1008 #b_blk{is=Is} = SuccBlk = map_get(Succ, Blocks), 1009 SuccIsGuard = float_blk_is_in_guard(SuccBlk, Fs0), 1010 1011 case Is of 1012 [#b_set{anno=#{float_op:=_}}|_] when not SuccIsGuard -> 1013 %% No flush needed. 1014 {[],Blk0,Fs0,Count0}; 1015 _ -> 1016 %% Flush needed. 1017 {Bool0,Count1} = new_reg('@ssa_bool', Count0), 1018 Bool = #b_var{name=Bool0}, 1019 1020 %% Allocate block numbers. 1021 CheckL = Count1, %For checkerror. 1022 FlushL = Count1 + 1, %For flushing of float regs. 1023 Count = Count1 + 2, 1024 Blk = Blk0#b_blk{last=Br#b_br{succ=CheckL}}, 1025 1026 %% Build the block with the checkerror instruction. 1027 CheckIs = [#b_set{op={float,checkerror},dst=Bool}], 1028 CheckBr = #b_br{bool=Bool,succ=FlushL,fail=Fail}, 1029 CheckBlk = #b_blk{is=CheckIs,last=CheckBr}, 1030 1031 %% Build the block that flushes all registers. 1032 FlushIs = float_flush_regs(Fs0), 1033 FlushBr = #b_br{bool=#b_literal{val=true},succ=Succ,fail=Succ}, 1034 FlushBlk = #b_blk{is=FlushIs,last=FlushBr}, 1035 1036 %% Update state and blocks. 1037 Fs = Fs0#fs{s=undefined,regs=#{},fail=none}, 1038 FlushBs = [{CheckL,CheckBlk},{FlushL,FlushBlk}], 1039 {FlushBs,Blk,Fs,Count} 1040 end; 1041float_maybe_flush(Blk, Fs, Count) -> 1042 {[],Blk,Fs,Count}. 1043 1044float_opt_is([#b_set{op=succeeded,args=[Src]}=I0], 1045 #fs{regs=Rs}=Fs, Count, Acc) -> 1046 case Rs of 1047 #{Src:=Fr} -> 1048 I = I0#b_set{args=[Fr]}, 1049 {reverse(Acc, [I]),Fs,Count}; 1050 #{} -> 1051 {reverse(Acc, [I0]),Fs,Count} 1052 end; 1053float_opt_is([#b_set{anno=Anno0}=I0|Is0], Fs0, Count0, Acc) -> 1054 case Anno0 of 1055 #{float_op:=FTypes} -> 1056 Anno = maps:remove(float_op, Anno0), 1057 I1 = I0#b_set{anno=Anno}, 1058 {Is,Fs,Count} = float_make_op(I1, FTypes, Fs0, Count0), 1059 float_opt_is(Is0, Fs, Count, reverse(Is, Acc)); 1060 #{} -> 1061 float_opt_is(Is0, Fs0#fs{regs=#{}}, Count0, [I0|Acc]) 1062 end; 1063float_opt_is([], Fs, _Count, _Acc) -> 1064 #fs{s=undefined} = Fs, %Assertion. 1065 none. 1066 1067float_make_op(#b_set{op={bif,Op},dst=Dst,args=As0}=I0, 1068 Ts, #fs{s=S,regs=Rs0}=Fs, Count0) -> 1069 {As1,Rs1,Count1} = float_load(As0, Ts, Rs0, Count0, []), 1070 {As,Is0} = unzip(As1), 1071 {Fr,Count2} = new_reg('@fr', Count1), 1072 FrDst = #b_var{name=Fr}, 1073 I = I0#b_set{op={float,Op},dst=FrDst,args=As}, 1074 Rs = Rs1#{Dst=>FrDst}, 1075 Is = append(Is0) ++ [I], 1076 case S of 1077 undefined -> 1078 {Ignore,Count} = new_reg('@ssa_ignore', Count2), 1079 C = #b_set{op={float,clearerror},dst=#b_var{name=Ignore}}, 1080 {[C|Is],Fs#fs{s=cleared,regs=Rs},Count}; 1081 cleared -> 1082 {Is,Fs#fs{regs=Rs},Count2} 1083 end. 1084 1085float_load([A|As], [T|Ts], Rs0, Count0, Acc) -> 1086 {Load,Rs,Count} = float_reg_arg(A, T, Rs0, Count0), 1087 float_load(As, Ts, Rs, Count, [Load|Acc]); 1088float_load([], [], Rs, Count, Acc) -> 1089 {reverse(Acc),Rs,Count}. 1090 1091float_reg_arg(A, T, Rs, Count0) -> 1092 case Rs of 1093 #{A:=Fr} -> 1094 {{Fr,[]},Rs,Count0}; 1095 #{} -> 1096 {Fr,Count} = new_float_copy_reg(Count0), 1097 Dst = #b_var{name=Fr}, 1098 I = float_load_reg(T, A, Dst), 1099 {{Dst,[I]},Rs#{A=>Dst},Count} 1100 end. 1101 1102float_load_reg(convert, #b_var{}=Src, Dst) -> 1103 #b_set{op={float,convert},dst=Dst,args=[Src]}; 1104float_load_reg(convert, #b_literal{val=Val}=Src, Dst) -> 1105 try float(Val) of 1106 F -> 1107 #b_set{op={float,put},dst=Dst,args=[#b_literal{val=F}]} 1108 catch 1109 error:_ -> 1110 %% Let the exception happen at runtime. 1111 #b_set{op={float,convert},dst=Dst,args=[Src]} 1112 end; 1113float_load_reg(float, Src, Dst) -> 1114 #b_set{op={float,put},dst=Dst,args=[Src]}. 1115 1116new_float_copy_reg(Count) -> 1117 new_reg('@fr_copy', Count). 1118 1119new_reg(Base, Count) -> 1120 Fr = {Base,Count}, 1121 {Fr,Count+1}. 1122 1123float_fail_label(#b_blk{last=Last}, Fs) -> 1124 case Last of 1125 #b_br{bool=#b_var{},fail=Fail} -> 1126 Fs#fs{fail=Fail}; 1127 _ -> 1128 Fs 1129 end. 1130 1131float_flush_regs(#fs{regs=Rs}) -> 1132 maps:fold(fun(_, #b_var{name={'@fr_copy',_}}, Acc) -> 1133 Acc; 1134 (Dst, Fr, Acc) -> 1135 [#b_set{op={float,get},dst=Dst,args=[Fr]}|Acc] 1136 end, [], Rs). 1137 1138%%% 1139%%% Live optimization. 1140%%% 1141%%% Optimize instructions whose values are not used. They could be 1142%%% removed if they have no side effects, or in a few cases replaced 1143%%% with a cheaper instructions 1144%%% 1145 1146ssa_opt_live({#st{ssa=Linear0}=St, FuncDb}) -> 1147 RevLinear = reverse(Linear0), 1148 Blocks0 = maps:from_list(RevLinear), 1149 Blocks = live_opt(RevLinear, #{}, Blocks0), 1150 Linear = beam_ssa:linearize(Blocks), 1151 {St#st{ssa=Linear}, FuncDb}. 1152 1153live_opt([{L,Blk0}|Bs], LiveMap0, Blocks) -> 1154 Blk1 = beam_ssa_share:block(Blk0, Blocks), 1155 Successors = beam_ssa:successors(Blk1), 1156 Live0 = live_opt_succ(Successors, L, LiveMap0, gb_sets:empty()), 1157 {Blk,Live} = live_opt_blk(Blk1, Live0), 1158 LiveMap = live_opt_phis(Blk#b_blk.is, L, Live, LiveMap0), 1159 live_opt(Bs, LiveMap, Blocks#{L:=Blk}); 1160live_opt([], _, Acc) -> Acc. 1161 1162live_opt_succ([S|Ss], L, LiveMap, Live0) -> 1163 Key = {S,L}, 1164 case LiveMap of 1165 #{Key:=Live} -> 1166 %% The successor has a phi node, and the value for 1167 %% this block in the phi node is a variable. 1168 live_opt_succ(Ss, L, LiveMap, gb_sets:union(Live, Live0)); 1169 #{S:=Live} -> 1170 %% No phi node in the successor, or the value for 1171 %% this block in the phi node is a literal. 1172 live_opt_succ(Ss, L, LiveMap, gb_sets:union(Live, Live0)); 1173 #{} -> 1174 %% A peek_message block which has not been processed yet. 1175 live_opt_succ(Ss, L, LiveMap, Live0) 1176 end; 1177live_opt_succ([], _, _, Acc) -> Acc. 1178 1179live_opt_phis(Is, L, Live0, LiveMap0) -> 1180 LiveMap = LiveMap0#{L=>Live0}, 1181 Phis = takewhile(fun(#b_set{op=Op}) -> Op =:= phi end, Is), 1182 case Phis of 1183 [] -> 1184 LiveMap; 1185 [_|_] -> 1186 PhiArgs = append([Args || #b_set{args=Args} <- Phis]), 1187 case [{P,V} || {#b_var{}=V,P} <- PhiArgs] of 1188 [_|_]=PhiVars -> 1189 PhiLive0 = rel2fam(PhiVars), 1190 PhiLive = [{{L,P},gb_sets:union(gb_sets:from_list(Vs), Live0)} || 1191 {P,Vs} <- PhiLive0], 1192 maps:merge(LiveMap, maps:from_list(PhiLive)); 1193 [] -> 1194 %% There were only literals in the phi node(s). 1195 LiveMap 1196 end 1197 end. 1198 1199live_opt_blk(#b_blk{is=Is0,last=Last}=Blk, Live0) -> 1200 Live1 = gb_sets:union(Live0, gb_sets:from_ordset(beam_ssa:used(Last))), 1201 {Is,Live} = live_opt_is(reverse(Is0), Live1, []), 1202 {Blk#b_blk{is=Is},Live}. 1203 1204live_opt_is([#b_set{op=phi,dst=Dst}=I|Is], Live, Acc) -> 1205 case gb_sets:is_member(Dst, Live) of 1206 true -> 1207 live_opt_is(Is, Live, [I|Acc]); 1208 false -> 1209 live_opt_is(Is, Live, Acc) 1210 end; 1211live_opt_is([#b_set{op=succeeded,dst=SuccDst=SuccDstVar, 1212 args=[Dst]}=SuccI, 1213 #b_set{dst=Dst}=I|Is], Live0, Acc) -> 1214 case gb_sets:is_member(Dst, Live0) of 1215 true -> 1216 Live1 = gb_sets:add(Dst, Live0), 1217 Live = gb_sets:delete_any(SuccDst, Live1), 1218 live_opt_is([I|Is], Live, [SuccI|Acc]); 1219 false -> 1220 case live_opt_unused(I) of 1221 {replace,NewI0} -> 1222 NewI = NewI0#b_set{dst=SuccDstVar}, 1223 live_opt_is([NewI|Is], Live0, Acc); 1224 keep -> 1225 case gb_sets:is_member(SuccDst, Live0) of 1226 true -> 1227 Live1 = gb_sets:add(Dst, Live0), 1228 Live = gb_sets:delete(SuccDst, Live1), 1229 live_opt_is([I|Is], Live, [SuccI|Acc]); 1230 false -> 1231 live_opt_is([I|Is], Live0, Acc) 1232 end 1233 end 1234 end; 1235live_opt_is([#b_set{dst=Dst}=I|Is], Live0, Acc) -> 1236 case gb_sets:is_member(Dst, Live0) of 1237 true -> 1238 Live1 = gb_sets:union(Live0, gb_sets:from_ordset(beam_ssa:used(I))), 1239 Live = gb_sets:delete(Dst, Live1), 1240 live_opt_is(Is, Live, [I|Acc]); 1241 false -> 1242 case beam_ssa:no_side_effect(I) of 1243 true -> 1244 live_opt_is(Is, Live0, Acc); 1245 false -> 1246 Live = gb_sets:union(Live0, gb_sets:from_ordset(beam_ssa:used(I))), 1247 live_opt_is(Is, Live, [I|Acc]) 1248 end 1249 end; 1250live_opt_is([], Live, Acc) -> 1251 {Acc,Live}. 1252 1253live_opt_unused(#b_set{op=get_map_element}=Set) -> 1254 {replace,Set#b_set{op=has_map_field}}; 1255live_opt_unused(_) -> keep. 1256 1257%%% 1258%%% Optimize binary matching. 1259%%% 1260%%% * If the value of segment is never extracted, rewrite 1261%%% to a bs_skip instruction. 1262%%% 1263%%% * Coalesce adjacent bs_skip instructions and skip instructions 1264%%% with bs_test_tail. 1265%%% 1266 1267ssa_opt_bsm({#st{ssa=Linear}=St, FuncDb}) -> 1268 Extracted0 = bsm_extracted(Linear), 1269 Extracted = cerl_sets:from_list(Extracted0), 1270 {St#st{ssa=bsm_skip(Linear, Extracted)}, FuncDb}. 1271 1272bsm_skip([{L,#b_blk{is=Is0}=Blk}|Bs0], Extracted) -> 1273 Bs = bsm_skip(Bs0, Extracted), 1274 Is = bsm_skip_is(Is0, Extracted), 1275 coalesce_skips({L,Blk#b_blk{is=Is}}, Bs); 1276bsm_skip([], _) -> []. 1277 1278bsm_skip_is([I0|Is], Extracted) -> 1279 case I0 of 1280 #b_set{op=bs_match, 1281 dst=Ctx, 1282 args=[#b_literal{val=T}=Type,PrevCtx|Args0]} 1283 when T =/= string, T =/= skip -> 1284 I = case cerl_sets:is_element(Ctx, Extracted) of 1285 true -> 1286 I0; 1287 false -> 1288 %% The value is never extracted. 1289 Args = [#b_literal{val=skip},PrevCtx,Type|Args0], 1290 I0#b_set{args=Args} 1291 end, 1292 [I|Is]; 1293 #b_set{} -> 1294 [I0|bsm_skip_is(Is, Extracted)] 1295 end; 1296bsm_skip_is([], _) -> []. 1297 1298bsm_extracted([{_,#b_blk{is=Is}}|Bs]) -> 1299 case Is of 1300 [#b_set{op=bs_extract,args=[Ctx]}|_] -> 1301 [Ctx|bsm_extracted(Bs)]; 1302 _ -> 1303 bsm_extracted(Bs) 1304 end; 1305bsm_extracted([]) -> []. 1306 1307coalesce_skips({L,#b_blk{is=[#b_set{op=bs_extract}=Extract|Is0], 1308 last=Last0}=Blk0}, Bs0) -> 1309 case coalesce_skips_is(Is0, Last0, Bs0) of 1310 not_possible -> 1311 [{L,Blk0}|Bs0]; 1312 {Is,Last,Bs} -> 1313 Blk = Blk0#b_blk{is=[Extract|Is],last=Last}, 1314 [{L,Blk}|Bs] 1315 end; 1316coalesce_skips({L,#b_blk{is=Is0,last=Last0}=Blk0}, Bs0) -> 1317 case coalesce_skips_is(Is0, Last0, Bs0) of 1318 not_possible -> 1319 [{L,Blk0}|Bs0]; 1320 {Is,Last,Bs} -> 1321 Blk = Blk0#b_blk{is=Is,last=Last}, 1322 [{L,Blk}|Bs] 1323 end. 1324 1325coalesce_skips_is([#b_set{op=bs_match, 1326 args=[#b_literal{val=skip}, 1327 Ctx0,Type,Flags, 1328 #b_literal{val=Size0}, 1329 #b_literal{val=Unit0}]}=Skip0, 1330 #b_set{op=succeeded}], 1331 #b_br{succ=L2,fail=Fail}=Br0, 1332 Bs0) when is_integer(Size0) -> 1333 case Bs0 of 1334 [{L2,#b_blk{is=[#b_set{op=bs_match, 1335 dst=SkipDst, 1336 args=[#b_literal{val=skip},_,_,_, 1337 #b_literal{val=Size1}, 1338 #b_literal{val=Unit1}]}, 1339 #b_set{op=succeeded}=Succeeded], 1340 last=#b_br{fail=Fail}=Br}}|Bs] when is_integer(Size1) -> 1341 SkipBits = Size0 * Unit0 + Size1 * Unit1, 1342 Skip = Skip0#b_set{dst=SkipDst, 1343 args=[#b_literal{val=skip},Ctx0, 1344 Type,Flags, 1345 #b_literal{val=SkipBits}, 1346 #b_literal{val=1}]}, 1347 Is = [Skip,Succeeded], 1348 {Is,Br,Bs}; 1349 [{L2,#b_blk{is=[#b_set{op=bs_test_tail, 1350 args=[_Ctx,#b_literal{val=TailSkip}]}], 1351 last=#b_br{succ=NextSucc,fail=Fail}}}|Bs] -> 1352 SkipBits = Size0 * Unit0, 1353 TestTail = Skip0#b_set{op=bs_test_tail, 1354 args=[Ctx0,#b_literal{val=SkipBits+TailSkip}]}, 1355 Br = Br0#b_br{bool=TestTail#b_set.dst,succ=NextSucc}, 1356 Is = [TestTail], 1357 {Is,Br,Bs}; 1358 _ -> 1359 not_possible 1360 end; 1361coalesce_skips_is(_, _, _) -> 1362 not_possible. 1363 1364%%% 1365%%% Short-cutting binary matching instructions. 1366%%% 1367 1368ssa_opt_bsm_shortcut({#st{ssa=Linear}=St, FuncDb}) -> 1369 Positions = bsm_positions(Linear, #{}), 1370 case map_size(Positions) of 1371 0 -> 1372 %% No binary matching instructions. 1373 {St, FuncDb}; 1374 _ -> 1375 {St#st{ssa=bsm_shortcut(Linear, Positions)}, FuncDb} 1376 end. 1377 1378bsm_positions([{L,#b_blk{is=Is,last=Last}}|Bs], PosMap0) -> 1379 PosMap = bsm_positions_is(Is, PosMap0), 1380 case {Is,Last} of 1381 {[#b_set{op=bs_test_tail,dst=Bool,args=[Ctx,#b_literal{val=Bits0}]}], 1382 #b_br{bool=Bool,fail=Fail}} -> 1383 Bits = Bits0 + map_get(Ctx, PosMap0), 1384 bsm_positions(Bs, PosMap#{L=>{Bits,Fail}}); 1385 {_,_} -> 1386 bsm_positions(Bs, PosMap) 1387 end; 1388bsm_positions([], PosMap) -> PosMap. 1389 1390bsm_positions_is([#b_set{op=bs_start_match,dst=New}|Is], PosMap0) -> 1391 PosMap = PosMap0#{New=>0}, 1392 bsm_positions_is(Is, PosMap); 1393bsm_positions_is([#b_set{op=bs_match,dst=New,args=Args}|Is], PosMap0) -> 1394 [_,Old|_] = Args, 1395 #{Old:=Bits0} = PosMap0, 1396 Bits = bsm_update_bits(Args, Bits0), 1397 PosMap = PosMap0#{New=>Bits}, 1398 bsm_positions_is(Is, PosMap); 1399bsm_positions_is([_|Is], PosMap) -> 1400 bsm_positions_is(Is, PosMap); 1401bsm_positions_is([], PosMap) -> PosMap. 1402 1403bsm_update_bits([#b_literal{val=string},_,#b_literal{val=String}], Bits) -> 1404 Bits + bit_size(String); 1405bsm_update_bits([#b_literal{val=utf8}|_], Bits) -> 1406 Bits + 8; 1407bsm_update_bits([#b_literal{val=utf16}|_], Bits) -> 1408 Bits + 16; 1409bsm_update_bits([#b_literal{val=utf32}|_], Bits) -> 1410 Bits + 32; 1411bsm_update_bits([_,_,_,#b_literal{val=Sz},#b_literal{val=U}], Bits) 1412 when is_integer(Sz) -> 1413 Bits + Sz*U; 1414bsm_update_bits(_, Bits) -> Bits. 1415 1416bsm_shortcut([{L,#b_blk{is=Is,last=Last0}=Blk}|Bs], PosMap) -> 1417 case {Is,Last0} of 1418 {[#b_set{op=bs_match,dst=New,args=[_,Old|_]}, 1419 #b_set{op=succeeded,dst=Bool,args=[New]}], 1420 #b_br{bool=Bool,fail=Fail}} -> 1421 case PosMap of 1422 #{Old:=Bits,Fail:={TailBits,NextFail}} when Bits > TailBits -> 1423 Last = Last0#b_br{fail=NextFail}, 1424 [{L,Blk#b_blk{last=Last}}|bsm_shortcut(Bs, PosMap)]; 1425 #{} -> 1426 [{L,Blk}|bsm_shortcut(Bs, PosMap)] 1427 end; 1428 {_,_} -> 1429 [{L,Blk}|bsm_shortcut(Bs, PosMap)] 1430 end; 1431bsm_shortcut([], _PosMap) -> []. 1432 1433%%% 1434%%% Eliminate redundant bs_test_unit2 instructions. 1435%%% 1436 1437ssa_opt_bsm_units({#st{ssa=Linear}=St, FuncDb}) -> 1438 {St#st{ssa=bsm_units(Linear, #{})}, FuncDb}. 1439 1440bsm_units([{L,#b_blk{last=#b_br{succ=Succ,fail=Fail}}=Block0} | Bs], UnitMaps0) -> 1441 UnitsIn = maps:get(L, UnitMaps0, #{}), 1442 {Block, UnitsOut} = bsm_units_skip(Block0, UnitsIn), 1443 UnitMaps1 = bsm_units_join(Succ, UnitsOut, UnitMaps0), 1444 UnitMaps = bsm_units_join(Fail, UnitsIn, UnitMaps1), 1445 [{L, Block} | bsm_units(Bs, UnitMaps)]; 1446bsm_units([{L,#b_blk{last=#b_switch{fail=Fail,list=Switch}}=Block} | Bs], UnitMaps0) -> 1447 UnitsIn = maps:get(L, UnitMaps0, #{}), 1448 Labels = [Fail | [Lbl || {_Arg, Lbl} <- Switch]], 1449 UnitMaps = foldl(fun(Lbl, UnitMaps) -> 1450 bsm_units_join(Lbl, UnitsIn, UnitMaps) 1451 end, UnitMaps0, Labels), 1452 [{L, Block} | bsm_units(Bs, UnitMaps)]; 1453bsm_units([{L, Block} | Bs], UnitMaps) -> 1454 [{L, Block} | bsm_units(Bs, UnitMaps)]; 1455bsm_units([], _UnitMaps) -> 1456 []. 1457 1458bsm_units_skip(Block, Units) -> 1459 bsm_units_skip_1(Block#b_blk.is, Block, Units). 1460 1461bsm_units_skip_1([#b_set{op=bs_start_match,dst=New}|_], Block, Units) -> 1462 %% We bail early since there can't be more than one match per block. 1463 {Block, Units#{ New => 1 }}; 1464bsm_units_skip_1([#b_set{op=bs_match, 1465 dst=New, 1466 args=[#b_literal{val=skip}, 1467 Ctx, 1468 #b_literal{val=binary}, 1469 _Flags, 1470 #b_literal{val=all}, 1471 #b_literal{val=OpUnit}]}=Skip | Test], 1472 Block0, Units) -> 1473 [#b_set{op=succeeded,dst=Bool,args=[New]}] = Test, %Assertion. 1474 #b_br{bool=Bool} = Last0 = Block0#b_blk.last, %Assertion. 1475 CtxUnit = map_get(Ctx, Units), 1476 if 1477 CtxUnit rem OpUnit =:= 0 -> 1478 Is = takewhile(fun(I) -> I =/= Skip end, Block0#b_blk.is), 1479 Last = Last0#b_br{bool=#b_literal{val=true}}, 1480 Block = Block0#b_blk{is=Is,last=Last}, 1481 {Block, Units#{ New => CtxUnit }}; 1482 CtxUnit rem OpUnit =/= 0 -> 1483 {Block0, Units#{ New => OpUnit, Ctx => OpUnit }} 1484 end; 1485bsm_units_skip_1([#b_set{op=bs_match,dst=New,args=Args}|_], Block, Units) -> 1486 [_,Ctx|_] = Args, 1487 CtxUnit = map_get(Ctx, Units), 1488 OpUnit = bsm_op_unit(Args), 1489 {Block, Units#{ New => gcd(OpUnit, CtxUnit) }}; 1490bsm_units_skip_1([_I | Is], Block, Units) -> 1491 bsm_units_skip_1(Is, Block, Units); 1492bsm_units_skip_1([], Block, Units) -> 1493 {Block, Units}. 1494 1495bsm_op_unit([_,_,_,Size,#b_literal{val=U}]) -> 1496 case Size of 1497 #b_literal{val=Sz} when is_integer(Sz) -> Sz*U; 1498 _ -> U 1499 end; 1500bsm_op_unit([#b_literal{val=string},_,#b_literal{val=String}]) -> 1501 bit_size(String); 1502bsm_op_unit([#b_literal{val=utf8}|_]) -> 1503 8; 1504bsm_op_unit([#b_literal{val=utf16}|_]) -> 1505 16; 1506bsm_op_unit([#b_literal{val=utf32}|_]) -> 1507 32; 1508bsm_op_unit(_) -> 1509 1. 1510 1511%% Several paths can lead to the same match instruction and the inferred units 1512%% may differ between them, so we can only keep the information that is common 1513%% to all paths. 1514bsm_units_join(Lbl, MapA, UnitMaps0) when is_map_key(Lbl, UnitMaps0) -> 1515 MapB = map_get(Lbl, UnitMaps0), 1516 Merged = if 1517 map_size(MapB) =< map_size(MapA) -> 1518 bsm_units_join_1(maps:keys(MapB), MapA, MapB); 1519 map_size(MapB) > map_size(MapA) -> 1520 bsm_units_join_1(maps:keys(MapA), MapB, MapA) 1521 end, 1522 UnitMaps0#{Lbl := Merged}; 1523bsm_units_join(Lbl, MapA, UnitMaps0) when MapA =/= #{} -> 1524 UnitMaps0#{Lbl => MapA}; 1525bsm_units_join(_Lbl, _MapA, UnitMaps0) -> 1526 UnitMaps0. 1527 1528bsm_units_join_1([Key | Keys], Left, Right) when is_map_key(Key, Left) -> 1529 UnitA = map_get(Key, Left), 1530 UnitB = map_get(Key, Right), 1531 bsm_units_join_1(Keys, Left, Right#{Key := gcd(UnitA, UnitB)}); 1532bsm_units_join_1([Key | Keys], Left, Right) -> 1533 bsm_units_join_1(Keys, Left, maps:remove(Key, Right)); 1534bsm_units_join_1([], _MapA, Right) -> 1535 Right. 1536 1537%%% 1538%%% Optimize binary construction. 1539%%% 1540%%% If an integer segment or a float segment has a literal size and 1541%%% a literal value, convert to a binary segment. Coalesce adjacent 1542%%% literal binary segments. Literal binary segments will be converted 1543%%% to bs_put_string instructions in later pass. 1544%%% 1545 1546ssa_opt_bs_puts({#st{ssa=Linear0,cnt=Count0}=St, FuncDb}) -> 1547 {Linear,Count} = opt_bs_puts(Linear0, Count0, []), 1548 {St#st{ssa=Linear,cnt=Count}, FuncDb}. 1549 1550opt_bs_puts([{L,#b_blk{is=Is}=Blk0}|Bs], Count0, Acc0) -> 1551 case Is of 1552 [#b_set{op=bs_put}=I0] -> 1553 case opt_bs_put(L, I0, Blk0, Count0, Acc0) of 1554 not_possible -> 1555 opt_bs_puts(Bs, Count0, [{L,Blk0}|Acc0]); 1556 {Count,Acc1} -> 1557 Acc = opt_bs_puts_merge(Acc1), 1558 opt_bs_puts(Bs, Count, Acc) 1559 end; 1560 _ -> 1561 opt_bs_puts(Bs, Count0, [{L,Blk0}|Acc0]) 1562 end; 1563opt_bs_puts([], Count, Acc) -> 1564 {reverse(Acc),Count}. 1565 1566opt_bs_puts_merge([{L1,#b_blk{is=Is}=Blk0},{L2,#b_blk{is=AccIs}}=BAcc|Acc]) -> 1567 case {AccIs,Is} of 1568 {[#b_set{op=bs_put, 1569 args=[#b_literal{val=binary}, 1570 #b_literal{}, 1571 #b_literal{val=Bin0}, 1572 #b_literal{val=all}, 1573 #b_literal{val=1}]}], 1574 [#b_set{op=bs_put, 1575 args=[#b_literal{val=binary}, 1576 #b_literal{}, 1577 #b_literal{val=Bin1}, 1578 #b_literal{val=all}, 1579 #b_literal{val=1}]}=I0]} -> 1580 %% Coalesce the two segments to one. 1581 Bin = <<Bin0/bitstring,Bin1/bitstring>>, 1582 I = I0#b_set{args=bs_put_args(binary, Bin, all)}, 1583 Blk = Blk0#b_blk{is=[I]}, 1584 [{L2,Blk}|Acc]; 1585 {_,_} -> 1586 [{L1,Blk0},BAcc|Acc] 1587 end. 1588 1589opt_bs_put(L, I0, #b_blk{last=Br0}=Blk0, Count0, Acc) -> 1590 case opt_bs_put(I0) of 1591 [Bin] when is_bitstring(Bin) -> 1592 Args = bs_put_args(binary, Bin, all), 1593 I = I0#b_set{args=Args}, 1594 Blk = Blk0#b_blk{is=[I]}, 1595 {Count0,[{L,Blk}|Acc]}; 1596 [{int,Int,Size},Bin] when is_bitstring(Bin) -> 1597 %% Construct a bs_put_integer instruction following 1598 %% by a bs_put_binary instruction. 1599 IntArgs = bs_put_args(integer, Int, Size), 1600 BinArgs = bs_put_args(binary, Bin, all), 1601 {BinL,BinVarNum} = {Count0,Count0+1}, 1602 Count = Count0 + 2, 1603 BinVar = #b_var{name={'@ssa_bool',BinVarNum}}, 1604 BinI = I0#b_set{dst=BinVar,args=BinArgs}, 1605 BinBlk = Blk0#b_blk{is=[BinI],last=Br0#b_br{bool=BinVar}}, 1606 IntI = I0#b_set{args=IntArgs}, 1607 IntBlk = Blk0#b_blk{is=[IntI],last=Br0#b_br{succ=BinL}}, 1608 {Count,[{BinL,BinBlk},{L,IntBlk}|Acc]}; 1609 not_possible -> 1610 not_possible 1611 end. 1612 1613opt_bs_put(#b_set{args=[#b_literal{val=binary},_,#b_literal{val=Val}, 1614 #b_literal{val=all},#b_literal{val=Unit}]}) 1615 when is_bitstring(Val) -> 1616 if 1617 bit_size(Val) rem Unit =:= 0 -> 1618 [Val]; 1619 true -> 1620 not_possible 1621 end; 1622opt_bs_put(#b_set{args=[#b_literal{val=Type},#b_literal{val=Flags}, 1623 #b_literal{val=Val},#b_literal{val=Size}, 1624 #b_literal{val=Unit}]}=I0) when is_integer(Size) -> 1625 EffectiveSize = Size * Unit, 1626 if 1627 EffectiveSize > 0 -> 1628 case {Type,opt_bs_put_endian(Flags)} of 1629 {integer,big} when is_integer(Val) -> 1630 if 1631 EffectiveSize < 64 -> 1632 [<<Val:EffectiveSize>>]; 1633 true -> 1634 opt_bs_put_split_int(Val, EffectiveSize) 1635 end; 1636 {integer,little} when is_integer(Val), EffectiveSize < 128 -> 1637 %% To avoid an explosion in code size, we only try 1638 %% to optimize relatively small fields. 1639 <<Int:EffectiveSize>> = <<Val:EffectiveSize/little>>, 1640 Args = bs_put_args(Type, Int, EffectiveSize), 1641 I = I0#b_set{args=Args}, 1642 opt_bs_put(I); 1643 {binary,_} when is_bitstring(Val) -> 1644 <<Bitstring:EffectiveSize/bits,_/bits>> = Val, 1645 [Bitstring]; 1646 {float,Endian} -> 1647 try 1648 [opt_bs_put_float(Val, EffectiveSize, Endian)] 1649 catch error:_ -> 1650 not_possible 1651 end; 1652 {_,_} -> 1653 not_possible 1654 end; 1655 true -> 1656 not_possible 1657 end; 1658opt_bs_put(#b_set{}) -> not_possible. 1659 1660opt_bs_put_float(N, Sz, Endian) -> 1661 case Endian of 1662 big -> <<N:Sz/big-float-unit:1>>; 1663 little -> <<N:Sz/little-float-unit:1>> 1664 end. 1665 1666bs_put_args(Type, Val, Size) -> 1667 [#b_literal{val=Type}, 1668 #b_literal{val=[unsigned,big]}, 1669 #b_literal{val=Val}, 1670 #b_literal{val=Size}, 1671 #b_literal{val=1}]. 1672 1673opt_bs_put_endian([big=E|_]) -> E; 1674opt_bs_put_endian([little=E|_]) -> E; 1675opt_bs_put_endian([native=E|_]) -> E; 1676opt_bs_put_endian([_|Fs]) -> opt_bs_put_endian(Fs). 1677 1678opt_bs_put_split_int(Int, Size) -> 1679 Pos = opt_bs_put_split_int_1(Int, 0, Size - 1), 1680 UpperSize = Size - Pos, 1681 if 1682 Pos =:= 0 -> 1683 %% Value is 0 or -1 -- keep the original instruction. 1684 not_possible; 1685 UpperSize < 64 -> 1686 %% No or few leading zeroes or ones. 1687 [<<Int:Size>>]; 1688 true -> 1689 %% There are 64 or more leading ones or zeroes in 1690 %% the resulting binary. Split into two separate 1691 %% segments to avoid an explosion in code size. 1692 [{int,Int bsr Pos,UpperSize},<<Int:Pos>>] 1693 end. 1694 1695opt_bs_put_split_int_1(_Int, L, R) when L > R -> 1696 8 * ((L + 7) div 8); 1697opt_bs_put_split_int_1(Int, L, R) -> 1698 Mid = (L + R) div 2, 1699 case Int bsr Mid of 1700 Upper when Upper =:= 0; Upper =:= -1 -> 1701 opt_bs_put_split_int_1(Int, L, Mid - 1); 1702 _ -> 1703 opt_bs_put_split_int_1(Int, Mid + 1, R) 1704 end. 1705 1706%%% 1707%%% Optimize expressions such as "tuple_size(Var) =:= 2". 1708%%% 1709%%% Consider this code: 1710%%% 1711%%% 0: 1712%%% . 1713%%% . 1714%%% . 1715%%% Size = bif:tuple_size Var 1716%%% BoolVar1 = succeeded Size 1717%%% br BoolVar1, label 4, label 3 1718%%% 1719%%% 4: 1720%%% BoolVar2 = bif:'=:=' Size, literal 2 1721%%% br BoolVar2, label 6, label 3 1722%%% 1723%%% 6: ... %% OK 1724%%% 1725%%% 3: ... %% Not a tuple of size 2 1726%%% 1727%%% The BEAM code will look this: 1728%%% 1729%%% {bif,tuple_size,{f,3},[{x,0}],{x,0}}. 1730%%% {test,is_eq_exact,{f,3},[{x,0},{integer,2}]}. 1731%%% 1732%%% Better BEAM code will be produced if we transform the 1733%%% code like this: 1734%%% 1735%%% 0: 1736%%% . 1737%%% . 1738%%% . 1739%%% br label 10 1740%%% 1741%%% 10: 1742%%% NewBoolVar = bif:is_tuple Var 1743%%% br NewBoolVar, label 11, label 3 1744%%% 1745%%% 11: 1746%%% Size = bif:tuple_size Var 1747%%% br label 4 1748%%% 1749%%% 4: 1750%%% BoolVar2 = bif:'=:=' Size, literal 2 1751%%% br BoolVar2, label 6, label 3 1752%%% 1753%%% (The key part of the transformation is the removal of 1754%%% the 'succeeded' instruction to signal to the code generator 1755%%% that the call to tuple_size/1 can't fail.) 1756%%% 1757%%% The BEAM code will look like: 1758%%% 1759%%% {test,is_tuple,{f,3},[{x,0}]}. 1760%%% {test_arity,{f,3},[{x,0},2]}. 1761%%% 1762%%% Those two instructions will be combined into a single 1763%%% is_tuple_of_arity instruction by the loader. 1764%%% 1765 1766ssa_opt_tuple_size({#st{ssa=Linear0,cnt=Count0}=St, FuncDb}) -> 1767 %% This optimization is only safe in guards, as prefixing tuple_size with 1768 %% an is_tuple check prevents it from throwing an exception. 1769 NonGuards = non_guards(Linear0), 1770 {Linear,Count} = opt_tup_size(Linear0, NonGuards, Count0, []), 1771 {St#st{ssa=Linear,cnt=Count}, FuncDb}. 1772 1773opt_tup_size([{L,#b_blk{is=Is,last=Last}=Blk}|Bs], NonGuards, Count0, Acc0) -> 1774 case {Is,Last} of 1775 {[#b_set{op={bif,'=:='},dst=Bool,args=[#b_var{}=Tup,#b_literal{val=Arity}]}], 1776 #b_br{bool=Bool}} when is_integer(Arity), Arity >= 0 -> 1777 {Acc,Count} = opt_tup_size_1(Tup, L, NonGuards, Count0, Acc0), 1778 opt_tup_size(Bs, NonGuards, Count, [{L,Blk}|Acc]); 1779 {_,_} -> 1780 opt_tup_size(Bs, NonGuards, Count0, [{L,Blk}|Acc0]) 1781 end; 1782opt_tup_size([], _NonGuards, Count, Acc) -> 1783 {reverse(Acc),Count}. 1784 1785opt_tup_size_1(Size, EqL, NonGuards, Count0, [{L,Blk0}|Acc]) -> 1786 #b_blk{is=Is0,last=Last} = Blk0, 1787 case Last of 1788 #b_br{bool=Bool,succ=EqL,fail=Fail} -> 1789 case gb_sets:is_member(Fail, NonGuards) of 1790 true -> 1791 {[{L,Blk0}|Acc],Count0}; 1792 false -> 1793 case opt_tup_size_is(Is0, Bool, Size, []) of 1794 none -> 1795 {[{L,Blk0}|Acc],Count0}; 1796 {PreIs,TupleSizeIs,Tuple} -> 1797 opt_tup_size_2(PreIs, TupleSizeIs, L, EqL, 1798 Tuple, Fail, Count0, Acc) 1799 end 1800 end; 1801 _ -> 1802 {[{L,Blk0}|Acc],Count0} 1803 end; 1804opt_tup_size_1(_, _, _, Count, Acc) -> 1805 {Acc,Count}. 1806 1807opt_tup_size_2(PreIs, TupleSizeIs, PreL, EqL, Tuple, Fail, Count0, Acc) -> 1808 IsTupleL = Count0, 1809 TupleSizeL = Count0 + 1, 1810 Bool = #b_var{name={'@ssa_bool',Count0+2}}, 1811 Count = Count0 + 3, 1812 1813 True = #b_literal{val=true}, 1814 PreBr = #b_br{bool=True,succ=IsTupleL,fail=IsTupleL}, 1815 PreBlk = #b_blk{is=PreIs,last=PreBr}, 1816 1817 IsTupleIs = [#b_set{op={bif,is_tuple},dst=Bool,args=[Tuple]}], 1818 IsTupleBr = #b_br{bool=Bool,succ=TupleSizeL,fail=Fail}, 1819 IsTupleBlk = #b_blk{is=IsTupleIs,last=IsTupleBr}, 1820 1821 TupleSizeBr = #b_br{bool=True,succ=EqL,fail=EqL}, 1822 TupleSizeBlk = #b_blk{is=TupleSizeIs,last=TupleSizeBr}, 1823 {[{TupleSizeL,TupleSizeBlk}, 1824 {IsTupleL,IsTupleBlk}, 1825 {PreL,PreBlk}|Acc],Count}. 1826 1827opt_tup_size_is([#b_set{op={bif,tuple_size},dst=Size,args=[Tuple]}=I, 1828 #b_set{op=succeeded,dst=Bool,args=[Size]}], 1829 Bool, Size, Acc) -> 1830 {reverse(Acc),[I],Tuple}; 1831opt_tup_size_is([I|Is], Bool, Size, Acc) -> 1832 opt_tup_size_is(Is, Bool, Size, [I|Acc]); 1833opt_tup_size_is([], _, _, _Acc) -> none. 1834 1835%%% 1836%%% Optimize #b_switch{} instructions. 1837%%% 1838%%% If the argument for a #b_switch{} comes from a phi node with all 1839%%% literals, any values in the switch list which are not in the phi 1840%%% node can be removed. 1841%%% 1842%%% If the values in the phi node and switch list are the same, 1843%%% the failure label can't be reached and be eliminated. 1844%%% 1845%%% A #b_switch{} with only one value can be rewritten to 1846%%% a #b_br{}. A switch that only verifies that the argument 1847%%% is 'true' or 'false' can be rewritten to a is_boolean test. 1848%%% 1849 1850ssa_opt_sw({#st{ssa=Linear0,cnt=Count0}=St, FuncDb}) -> 1851 {Linear,Count} = opt_sw(Linear0, Count0, []), 1852 {St#st{ssa=Linear,cnt=Count}, FuncDb}. 1853 1854opt_sw([{L,#b_blk{is=Is,last=#b_switch{}=Sw0}=Blk0}|Bs], Count0, Acc) -> 1855 %% Ensure that no label in the switch list is the same 1856 %% as the failure label. 1857 #b_switch{fail=Fail,list=List0} = Sw0, 1858 List = [{Val,Lbl} || {Val,Lbl} <- List0, Lbl =/= Fail], 1859 Sw1 = beam_ssa:normalize(Sw0#b_switch{list=List}), 1860 case Sw1 of 1861 #b_switch{arg=Arg,fail=Fail,list=[{Lit,Lbl}]} -> 1862 %% Rewrite a single value switch to a br. 1863 Bool = #b_var{name={'@ssa_bool',Count0}}, 1864 Count = Count0 + 1, 1865 IsEq = #b_set{op={bif,'=:='},dst=Bool,args=[Arg,Lit]}, 1866 Br = #b_br{bool=Bool,succ=Lbl,fail=Fail}, 1867 Blk = Blk0#b_blk{is=Is++[IsEq],last=Br}, 1868 opt_sw(Bs, Count, [{L,Blk}|Acc]); 1869 #b_switch{arg=Arg,fail=Fail, 1870 list=[{#b_literal{val=B1},Lbl},{#b_literal{val=B2},Lbl}]} 1871 when B1 =:= not B2 -> 1872 %% Replace with is_boolean test. 1873 Bool = #b_var{name={'@ssa_bool',Count0}}, 1874 Count = Count0 + 1, 1875 IsBool = #b_set{op={bif,is_boolean},dst=Bool,args=[Arg]}, 1876 Br = #b_br{bool=Bool,succ=Lbl,fail=Fail}, 1877 Blk = Blk0#b_blk{is=Is++[IsBool],last=Br}, 1878 opt_sw(Bs, Count, [{L,Blk}|Acc]); 1879 Sw0 -> 1880 opt_sw(Bs, Count0, [{L,Blk0}|Acc]); 1881 Sw -> 1882 Blk = Blk0#b_blk{last=Sw}, 1883 opt_sw(Bs, Count0, [{L,Blk}|Acc]) 1884 end; 1885opt_sw([{L,#b_blk{}=Blk}|Bs], Count, Acc) -> 1886 opt_sw(Bs, Count, [{L,Blk}|Acc]); 1887opt_sw([], Count, Acc) -> 1888 {reverse(Acc),Count}. 1889 1890%%% 1891%%% Merge blocks. 1892%%% 1893 1894ssa_opt_merge_blocks({#st{ssa=Blocks}=St, FuncDb}) -> 1895 Preds = beam_ssa:predecessors(Blocks), 1896 Merged = merge_blocks_1(beam_ssa:rpo(Blocks), Preds, Blocks), 1897 {St#st{ssa=Merged}, FuncDb}. 1898 1899merge_blocks_1([L|Ls], Preds0, Blocks0) -> 1900 case Preds0 of 1901 #{L:=[P]} -> 1902 #{P:=Blk0,L:=Blk1} = Blocks0, 1903 case is_merge_allowed(L, Blk0, Blk1) of 1904 true -> 1905 #b_blk{is=Is0} = Blk0, 1906 #b_blk{is=Is1} = Blk1, 1907 verify_merge_is(Is1), 1908 Is = Is0 ++ Is1, 1909 Blk = Blk1#b_blk{is=Is}, 1910 Blocks1 = maps:remove(L, Blocks0), 1911 Blocks2 = Blocks1#{P:=Blk}, 1912 Successors = beam_ssa:successors(Blk), 1913 Blocks = beam_ssa:update_phi_labels(Successors, L, P, Blocks2), 1914 Preds = merge_update_preds(Successors, L, P, Preds0), 1915 merge_blocks_1(Ls, Preds, Blocks); 1916 false -> 1917 merge_blocks_1(Ls, Preds0, Blocks0) 1918 end; 1919 #{} -> 1920 merge_blocks_1(Ls, Preds0, Blocks0) 1921 end; 1922merge_blocks_1([], _Preds, Blocks) -> Blocks. 1923 1924merge_update_preds([L|Ls], From, To, Preds0) -> 1925 Ps = [rename_label(P, From, To) || P <- map_get(L, Preds0)], 1926 Preds = Preds0#{L:=Ps}, 1927 merge_update_preds(Ls, From, To, Preds); 1928merge_update_preds([], _, _, Preds) -> Preds. 1929 1930rename_label(From, From, To) -> To; 1931rename_label(Lbl, _, _) -> Lbl. 1932 1933verify_merge_is([#b_set{op=Op}|_]) -> 1934 %% The merged block has only one predecessor, so it should not have any phi 1935 %% nodes. 1936 true = Op =/= phi; %Assertion. 1937verify_merge_is(_) -> 1938 ok. 1939 1940is_merge_allowed(_, #b_blk{}, #b_blk{is=[#b_set{op=peek_message}|_]}) -> 1941 false; 1942is_merge_allowed(L, #b_blk{last=#b_br{}}=Blk, #b_blk{is=Is}) -> 1943 %% The predecessor block must have exactly one successor (L) for 1944 %% the merge to be safe. 1945 case beam_ssa:successors(Blk) of 1946 [L] -> 1947 case Is of 1948 [#b_set{op=phi,args=[_]}|_] -> 1949 %% The type optimizer pass must have been 1950 %% turned off, since it would have removed this 1951 %% redundant phi node. Refuse to merge the blocks 1952 %% to ensure that this phi node remains at the 1953 %% beginning of a block. 1954 false; 1955 _ -> 1956 true 1957 end; 1958 [_|_] -> 1959 false 1960 end; 1961is_merge_allowed(_, #b_blk{last=#b_switch{}}, #b_blk{}) -> 1962 false. 1963 1964%%% 1965%%% When a tuple is matched, the pattern matching compiler generates a 1966%%% get_tuple_element instruction for every tuple element that will 1967%%% ever be used in the rest of the function. That often forces the 1968%%% extracted tuple elements to be stored in Y registers until it's 1969%%% time to use them. It could also mean that there could be execution 1970%%% paths that will never use the extracted elements. 1971%%% 1972%%% This optimization will sink get_tuple_element instructions, that 1973%%% is, move them forward in the execution stream to the last possible 1974%%% block there they will still dominate all uses. That may reduce the 1975%%% size of stack frames, reduce register shuffling, and avoid 1976%%% extracting tuple elements on execution paths that never use the 1977%%% extracted values. 1978%%% 1979 1980ssa_opt_sink({#st{ssa=Blocks0}=St, FuncDb}) -> 1981 Linear = beam_ssa:linearize(Blocks0), 1982 1983 %% Create a map with all variables that define get_tuple_element 1984 %% instructions. The variable name map to the block it is defined in. 1985 case def_blocks(Linear) of 1986 [] -> 1987 %% No get_tuple_element instructions, so there is nothing to do. 1988 {St, FuncDb}; 1989 [_|_]=Defs0 -> 1990 Defs = maps:from_list(Defs0), 1991 {do_ssa_opt_sink(Linear, Defs, St), FuncDb} 1992 end. 1993 1994do_ssa_opt_sink(Linear, Defs, #st{ssa=Blocks0}=St) -> 1995 %% Now find all the blocks that use variables defined by get_tuple_element 1996 %% instructions. 1997 Used = used_blocks(Linear, Defs, []), 1998 1999 %% Calculate dominators. 2000 {Dom,Numbering} = beam_ssa:dominators(Blocks0), 2001 2002 %% It is not safe to move get_tuple_element instructions to blocks 2003 %% that begin with certain instructions. It is also unsafe to move 2004 %% the instructions into any part of a receive. To avoid such 2005 %% unsafe moves, pretend that the unsuitable blocks are not 2006 %% dominators. 2007 Unsuitable = unsuitable(Linear, Blocks0), 2008 2009 %% Calculate new positions for get_tuple_element instructions. The new 2010 %% position is a block that dominates all uses of the variable. 2011 DefLoc = new_def_locations(Used, Defs, Dom, Numbering, Unsuitable), 2012 2013 %% Now move all suitable get_tuple_element instructions to their 2014 %% new blocks. 2015 Blocks = foldl(fun({V,To}, A) -> 2016 From = map_get(V, Defs), 2017 move_defs(V, From, To, A) 2018 end, Blocks0, DefLoc), 2019 St#st{ssa=Blocks}. 2020 2021def_blocks([{L,#b_blk{is=Is}}|Bs]) -> 2022 def_blocks_is(Is, L, def_blocks(Bs)); 2023def_blocks([]) -> []. 2024 2025def_blocks_is([#b_set{op=get_tuple_element,dst=Dst}|Is], L, Acc) -> 2026 def_blocks_is(Is, L, [{Dst,L}|Acc]); 2027def_blocks_is([_|Is], L, Acc) -> 2028 def_blocks_is(Is, L, Acc); 2029def_blocks_is([], _, Acc) -> Acc. 2030 2031used_blocks([{L,Blk}|Bs], Def, Acc0) -> 2032 Used = beam_ssa:used(Blk), 2033 Acc = [{V,L} || V <- Used, maps:is_key(V, Def)] ++ Acc0, 2034 used_blocks(Bs, Def, Acc); 2035used_blocks([], _Def, Acc) -> 2036 rel2fam(Acc). 2037 2038%% unsuitable(Linear, Blocks) -> Unsuitable. 2039%% Return an ordset of block labels for the blocks that are not 2040%% suitable for sinking of get_tuple_element instructions. 2041 2042unsuitable(Linear, Blocks) -> 2043 Predecessors = beam_ssa:predecessors(Blocks), 2044 Unsuitable0 = unsuitable_1(Linear), 2045 Unsuitable1 = unsuitable_recv(Linear, Blocks, Predecessors), 2046 gb_sets:from_list(Unsuitable0 ++ Unsuitable1). 2047 2048unsuitable_1([{L,#b_blk{is=[#b_set{op=Op}|_]}}|Bs]) -> 2049 Unsuitable = case Op of 2050 bs_extract -> true; 2051 bs_put -> true; 2052 {float,_} -> true; 2053 landingpad -> true; 2054 peek_message -> true; 2055 wait_timeout -> true; 2056 _ -> false 2057 end, 2058 case Unsuitable of 2059 true -> 2060 [L|unsuitable_1(Bs)]; 2061 false -> 2062 unsuitable_1(Bs) 2063 end; 2064unsuitable_1([{_,#b_blk{}}|Bs]) -> 2065 unsuitable_1(Bs); 2066unsuitable_1([]) -> []. 2067 2068unsuitable_recv([{L,#b_blk{is=[#b_set{op=Op}|_]}}|Bs], Blocks, Predecessors) -> 2069 Ls = case Op of 2070 remove_message -> 2071 unsuitable_loop(L, Blocks, Predecessors); 2072 recv_next -> 2073 unsuitable_loop(L, Blocks, Predecessors); 2074 _ -> 2075 [] 2076 end, 2077 Ls ++ unsuitable_recv(Bs, Blocks, Predecessors); 2078unsuitable_recv([_|Bs], Blocks, Predecessors) -> 2079 unsuitable_recv(Bs, Blocks, Predecessors); 2080unsuitable_recv([], _, _) -> []. 2081 2082unsuitable_loop(L, Blocks, Predecessors) -> 2083 unsuitable_loop(L, Blocks, Predecessors, []). 2084 2085unsuitable_loop(L, Blocks, Predecessors, Acc) -> 2086 Ps = map_get(L, Predecessors), 2087 unsuitable_loop_1(Ps, Blocks, Predecessors, Acc). 2088 2089unsuitable_loop_1([P|Ps], Blocks, Predecessors, Acc0) -> 2090 case map_get(P, Blocks) of 2091 #b_blk{is=[#b_set{op=peek_message}|_]} -> 2092 unsuitable_loop_1(Ps, Blocks, Predecessors, Acc0); 2093 #b_blk{} -> 2094 case ordsets:is_element(P, Acc0) of 2095 false -> 2096 Acc1 = ordsets:add_element(P, Acc0), 2097 Acc = unsuitable_loop(P, Blocks, Predecessors, Acc1), 2098 unsuitable_loop_1(Ps, Blocks, Predecessors, Acc); 2099 true -> 2100 unsuitable_loop_1(Ps, Blocks, Predecessors, Acc0) 2101 end 2102 end; 2103unsuitable_loop_1([], _, _, Acc) -> Acc. 2104 2105%% new_def_locations([{Variable,[UsedInBlock]}|Vs], Defs, 2106%% Dominators, Numbering, Unsuitable) -> 2107%% [{Variable,NewDefinitionBlock}] 2108%% 2109%% Calculate new locations for get_tuple_element instructions. For 2110%% each variable, the new location is a block that dominates all uses 2111%% of the variable and as near to the uses of as possible. 2112 2113new_def_locations([{V,UsedIn}|Vs], Defs, Dom, Numbering, Unsuitable) -> 2114 DefIn = map_get(V, Defs), 2115 Common = common_dominator(UsedIn, Dom, Numbering, Unsuitable), 2116 case member(Common, map_get(DefIn, Dom)) of 2117 true -> 2118 %% The common dominator is either DefIn or an 2119 %% ancestor of DefIn. 2120 new_def_locations(Vs, Defs, Dom, Numbering, Unsuitable); 2121 false -> 2122 %% We have found a suitable descendant of DefIn, 2123 %% to which the get_tuple_element instruction can 2124 %% be sunk. 2125 [{V,Common}|new_def_locations(Vs, Defs, Dom, Numbering, Unsuitable)] 2126 end; 2127new_def_locations([], _, _, _, _) -> []. 2128 2129common_dominator(Ls0, Dom, Numbering, Unsuitable) -> 2130 [Common|_] = beam_ssa:common_dominators(Ls0, Dom, Numbering), 2131 case gb_sets:is_member(Common, Unsuitable) of 2132 true -> 2133 %% It is not allowed to place the instruction here. Try 2134 %% to find another suitable dominating block by going up 2135 %% one step in the dominator tree. 2136 [Common,OneUp|_] = map_get(Common, Dom), 2137 common_dominator([OneUp], Dom, Numbering, Unsuitable); 2138 false -> 2139 Common 2140 end. 2141 2142%% Move get_tuple_element instructions to their new locations. 2143 2144move_defs(V, From, To, Blocks) -> 2145 #{From:=FromBlk0,To:=ToBlk0} = Blocks, 2146 {Def,FromBlk} = remove_def(V, FromBlk0), 2147 try insert_def(V, Def, ToBlk0) of 2148 ToBlk -> 2149 %%io:format("~p: ~p => ~p\n", [V,From,To]), 2150 Blocks#{From:=FromBlk,To:=ToBlk} 2151 catch 2152 throw:not_possible -> 2153 Blocks 2154 end. 2155 2156remove_def(V, #b_blk{is=Is0}=Blk) -> 2157 {Def,Is} = remove_def_is(Is0, V, []), 2158 {Def,Blk#b_blk{is=Is}}. 2159 2160remove_def_is([#b_set{dst=Dst}=Def|Is], Dst, Acc) -> 2161 {Def,reverse(Acc, Is)}; 2162remove_def_is([I|Is], Dst, Acc) -> 2163 remove_def_is(Is, Dst, [I|Acc]). 2164 2165insert_def(V, Def, #b_blk{is=Is0}=Blk) -> 2166 Is = insert_def_is(Is0, V, Def), 2167 Blk#b_blk{is=Is}. 2168 2169insert_def_is([#b_set{op=phi}=I|Is], V, Def) -> 2170 case member(V, beam_ssa:used(I)) of 2171 true -> 2172 throw(not_possible); 2173 false -> 2174 [I|insert_def_is(Is, V, Def)] 2175 end; 2176insert_def_is([#b_set{op=Op}=I|Is]=Is0, V, Def) -> 2177 Action0 = case Op of 2178 call -> beyond; 2179 'catch_end' -> beyond; 2180 timeout -> beyond; 2181 _ -> here 2182 end, 2183 Action = case Is of 2184 [#b_set{op=succeeded}|_] -> here; 2185 _ -> Action0 2186 end, 2187 case Action of 2188 beyond -> 2189 case member(V, beam_ssa:used(I)) of 2190 true -> 2191 %% The variable is used by this instruction. We must 2192 %% place the definition before this instruction. 2193 [Def|Is0]; 2194 false -> 2195 %% Place it beyond the current instruction. 2196 [I|insert_def_is(Is, V, Def)] 2197 end; 2198 here -> 2199 [Def|Is0] 2200 end; 2201insert_def_is([], _V, Def) -> 2202 [Def]. 2203 2204%%% 2205%%% Order consecutive get_tuple_element instructions in ascending 2206%%% position order. This will give the loader more opportunities 2207%%% for combining get_tuple_element instructions. 2208%%% 2209 2210ssa_opt_get_tuple_element({#st{ssa=Blocks0}=St, FuncDb}) -> 2211 Blocks = opt_get_tuple_element(maps:to_list(Blocks0), Blocks0), 2212 {St#st{ssa=Blocks}, FuncDb}. 2213 2214opt_get_tuple_element([{L,#b_blk{is=Is0}=Blk0}|Bs], Blocks) -> 2215 case opt_get_tuple_element_is(Is0, false, []) of 2216 {yes,Is} -> 2217 Blk = Blk0#b_blk{is=Is}, 2218 opt_get_tuple_element(Bs, Blocks#{L:=Blk}); 2219 no -> 2220 opt_get_tuple_element(Bs, Blocks) 2221 end; 2222opt_get_tuple_element([], Blocks) -> Blocks. 2223 2224opt_get_tuple_element_is([#b_set{op=get_tuple_element, 2225 args=[#b_var{}=Src,_]}=I0|Is0], 2226 _AnyChange, Acc) -> 2227 {GetIs0,Is} = collect_get_tuple_element(Is0, Src, [I0]), 2228 GetIs1 = sort([{Pos,I} || #b_set{args=[_,Pos]}=I <- GetIs0]), 2229 GetIs = [I || {_,I} <- GetIs1], 2230 opt_get_tuple_element_is(Is, true, reverse(GetIs, Acc)); 2231opt_get_tuple_element_is([I|Is], AnyChange, Acc) -> 2232 opt_get_tuple_element_is(Is, AnyChange, [I|Acc]); 2233opt_get_tuple_element_is([], AnyChange, Acc) -> 2234 case AnyChange of 2235 true -> {yes,reverse(Acc)}; 2236 false -> no 2237 end. 2238 2239collect_get_tuple_element([#b_set{op=get_tuple_element, 2240 args=[Src,_]}=I|Is], Src, Acc) -> 2241 collect_get_tuple_element(Is, Src, [I|Acc]); 2242collect_get_tuple_element(Is, _Src, Acc) -> 2243 {Acc,Is}. 2244 2245%%% 2246%%% Common utilities. 2247%%% 2248 2249gcd(A, B) -> 2250 case A rem B of 2251 0 -> B; 2252 X -> gcd(B, X) 2253 end. 2254 2255non_guards(Linear) -> 2256 gb_sets:from_list(non_guards_1(Linear)). 2257 2258non_guards_1([{L,#b_blk{is=Is}}|Bs]) -> 2259 case Is of 2260 [#b_set{op=landingpad}|_] -> 2261 [L | non_guards_1(Bs)]; 2262 _ -> 2263 non_guards_1(Bs) 2264 end; 2265non_guards_1([]) -> 2266 [?BADARG_BLOCK]. 2267 2268rel2fam(S0) -> 2269 S1 = sofs:relation(S0), 2270 S = sofs:rel2fam(S1), 2271 sofs:to_external(S). 2272 2273sub(I, Sub) -> 2274 beam_ssa:normalize(sub_1(I, Sub)). 2275 2276sub_1(#b_set{op=phi,args=Args}=I, Sub) -> 2277 I#b_set{args=[{sub_arg(A, Sub),P} || {A,P} <- Args]}; 2278sub_1(#b_set{args=Args}=I, Sub) -> 2279 I#b_set{args=[sub_arg(A, Sub) || A <- Args]}; 2280sub_1(#b_br{bool=#b_var{}=Old}=Br, Sub) -> 2281 New = sub_arg(Old, Sub), 2282 Br#b_br{bool=New}; 2283sub_1(#b_switch{arg=#b_var{}=Old}=Sw, Sub) -> 2284 New = sub_arg(Old, Sub), 2285 Sw#b_switch{arg=New}; 2286sub_1(#b_ret{arg=#b_var{}=Old}=Ret, Sub) -> 2287 New = sub_arg(Old, Sub), 2288 Ret#b_ret{arg=New}; 2289sub_1(Last, _) -> Last. 2290 2291sub_arg(#b_remote{mod=Mod,name=Name}=Rem, Sub) -> 2292 Rem#b_remote{mod=sub_arg(Mod, Sub),name=sub_arg(Name, Sub)}; 2293sub_arg(Old, Sub) -> 2294 case Sub of 2295 #{Old:=New} -> New; 2296 #{} -> Old 2297 end. 2298 2299new_var(#b_var{name={Base,N}}, Count) -> 2300 true = is_integer(N), %Assertion. 2301 {#b_var{name={Base,Count}},Count+1}; 2302new_var(#b_var{name=Base}, Count) -> 2303 {#b_var{name={Base,Count}},Count+1}. 2304