1%% 2%% %CopyrightBegin% 3%% 4%% Copyright Ericsson AB 2018-2020. All Rights Reserved. 5%% 6%% Licensed under the Apache License, Version 2.0 (the "License"); 7%% you may not use this file except in compliance with the License. 8%% You may obtain a copy of the License at 9%% 10%% http://www.apache.org/licenses/LICENSE-2.0 11%% 12%% Unless required by applicable law or agreed to in writing, software 13%% distributed under the License is distributed on an "AS IS" BASIS, 14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15%% See the License for the specific language governing permissions and 16%% limitations under the License. 17%% 18%% %CopyrightEnd% 19%% 20%% Purpose: Prepare for code generation, including register allocation. 21%% 22%% The output of this compiler pass is still in the SSA format, but 23%% it has been annotated and transformed to help the code generator. 24%% 25%% * Some instructions are translated to other instructions closer to 26%% the BEAM instructions. For example, the binary matching 27%% instructions are transformed from the optimization-friendly 28%% internal format to instruction more similar to the actual BEAM 29%% instructions. 30%% 31%% * Blocks that will need an instruction for allocating a stack frame 32%% are annotated with a {frame_size,Size} annotation. 33%% 34%% * 'copy' instructions are added for all variables that need 35%% to be saved to the stack frame. Additional 'copy' instructions 36%% can be added as an optimization to reuse y registers (see 37%% the copy_retval sub pass). 38%% 39%% * Each function is annotated with a {register,RegisterMap} 40%% annotation that maps each variable to a BEAM register. The linear 41%% scan algorithm is used to allocate registers. 42%% 43%% There are four kind of registers. x, y, fr (floating point register), 44%% and z. A variable will be allocated to a z register if it is only 45%% used by the instruction following the instruction that defines the 46%% the variable. The code generator will typically combine those 47%% instructions to a test instruction. z registers are also used for 48%% some instructions that don't have a return value. 49%% 50%% References: 51%% 52%% [1] H. Mössenböck and M. Pfeiffer. Linear scan register allocation 53%% in the context of SSA form and register constraints. In Proceedings 54%% of the International Conference on Compiler Construction, pages 55%% 229–246. LNCS 2304, Springer-Verlag, 2002. 56%% 57%% [2] C. Wimmer and H. Mössenböck. Optimized interval splitting in a 58%% linear scan register allocator. In Proceedings of the ACM/USENIX 59%% International Conference on Virtual Execution Environments, pages 60%% 132–141. ACM Press, 2005. 61%% 62%% [3] C. Wimmer and M. Franz. Linear Scan Register Allocation on SSA 63%% Form. In Proceedings of the International Symposium on Code 64%% Generation and Optimization, pages 170-179. ACM Press, 2010. 65%% 66 67-module(beam_ssa_pre_codegen). 68 69-export([module/2]). 70 71-include("beam_ssa.hrl"). 72 73-import(lists, [all/2,any/2,append/1,duplicate/2, 74 foldl/3,last/1,member/2,partition/2, 75 reverse/1,reverse/2,sort/1,splitwith/2,zip/2]). 76 77-spec module(beam_ssa:b_module(), [compile:option()]) -> 78 {'ok',beam_ssa:b_module()}. 79 80module(#b_module{body=Fs0}=Module, Opts) -> 81 UseBSM3 = not proplists:get_bool(no_bsm3, Opts), 82 Ps = passes(Opts), 83 Fs = functions(Fs0, Ps, UseBSM3), 84 {ok,Module#b_module{body=Fs}}. 85 86functions([F|Fs], Ps, UseBSM3) -> 87 [function(F, Ps, UseBSM3)|functions(Fs, Ps, UseBSM3)]; 88functions([], _Ps, _UseBSM3) -> []. 89 90-type b_var() :: beam_ssa:b_var(). 91-type var_name() :: beam_ssa:var_name(). 92-type instr_number() :: pos_integer(). 93-type range() :: {instr_number(),instr_number()}. 94-type reg_num() :: beam_asm:reg_num(). 95-type xreg() :: {'x',reg_num()}. 96-type yreg() :: {'y',reg_num()}. 97-type ypool() :: {'y',beam_ssa:label()}. 98-type reservation() :: 'fr' | {'prefer',xreg()} | 'x' | {'x',xreg()} | 99 ypool() | {yreg(),ypool()} | 'z'. 100-type ssa_register() :: beam_ssa_codegen:ssa_register(). 101 102-define(TC(Body), tc(fun() -> Body end, ?FILE, ?LINE)). 103-record(st, {ssa :: beam_ssa:block_map(), 104 args :: [b_var()], 105 cnt :: beam_ssa:label(), 106 use_bsm3 :: boolean(), 107 frames=[] :: [beam_ssa:label()], 108 intervals=[] :: [{b_var(),[range()]}], 109 res=[] :: [{b_var(),reservation()}] | #{b_var():=reservation()}, 110 regs=#{} :: #{b_var():=ssa_register()}, 111 extra_annos=[] :: [{atom(),term()}], 112 location :: term() 113 }). 114-define(PASS(N), {N,fun N/1}). 115 116passes(Opts) -> 117 AddPrecgAnnos = proplists:get_bool(dprecg, Opts), 118 FixTuples = proplists:get_bool(no_put_tuple2, Opts), 119 Ps = [?PASS(assert_no_critical_edges), 120 121 %% Preliminaries. 122 ?PASS(fix_bs), 123 ?PASS(sanitize), 124 ?PASS(match_fail_instructions), 125 case FixTuples of 126 false -> ignore; 127 true -> ?PASS(fix_tuples) 128 end, 129 ?PASS(use_set_tuple_element), 130 ?PASS(place_frames), 131 ?PASS(fix_receives), 132 133 %% Find and reserve Y registers. 134 ?PASS(find_yregs), 135 ?PASS(reserve_yregs), 136 137 %% Handle legacy binary match instruction that don't 138 %% accept a Y register as destination. 139 ?PASS(legacy_bs), 140 141 %% Improve reuse of Y registers to potentially 142 %% reduce the size of the stack frame. 143 ?PASS(copy_retval), 144 ?PASS(opt_get_list), 145 146 %% Calculate live intervals. 147 ?PASS(number_instructions), 148 ?PASS(live_intervals), 149 ?PASS(reserve_regs), 150 151 %% If needed for a .precg file, save the live intervals 152 %% so they can be included in an annotation. 153 case AddPrecgAnnos of 154 false -> ignore; 155 true -> ?PASS(save_live_intervals) 156 end, 157 158 %% Allocate registers. 159 ?PASS(linear_scan), 160 ?PASS(frame_size), 161 ?PASS(turn_yregs), 162 163 ?PASS(assert_no_critical_edges)], 164 [P || P <- Ps, P =/= ignore]. 165 166function(#b_function{anno=Anno,args=Args,bs=Blocks0,cnt=Count0}=F0, 167 Ps, UseBSM3) -> 168 try 169 Location = maps:get(location, Anno, none), 170 St0 = #st{ssa=Blocks0,args=Args,use_bsm3=UseBSM3, 171 cnt=Count0,location=Location}, 172 St = compile:run_sub_passes(Ps, St0), 173 #st{ssa=Blocks,cnt=Count,regs=Regs,extra_annos=ExtraAnnos} = St, 174 F1 = add_extra_annos(F0, ExtraAnnos), 175 F = beam_ssa:add_anno(registers, Regs, F1), 176 F#b_function{bs=Blocks,cnt=Count} 177 catch 178 Class:Error:Stack -> 179 #{func_info:={_,Name,Arity}} = Anno, 180 io:fwrite("Function: ~w/~w\n", [Name,Arity]), 181 erlang:raise(Class, Error, Stack) 182 end. 183 184save_live_intervals(#st{intervals=Intervals}=St) -> 185 St#st{extra_annos=[{live_intervals,Intervals}]}. 186 187%% Add extra annotations when a .precg listing file is being produced. 188add_extra_annos(F, Annos) -> 189 foldl(fun({Name,Value}, Acc) -> 190 beam_ssa:add_anno(Name, Value, Acc) 191 end, F, Annos). 192 193%% assert_no_critical_edges(St0) -> St. 194%% The code generator will not work if there are critial edges. 195%% Abort if any critical edges are found. 196 197assert_no_critical_edges(#st{ssa=Blocks}=St) -> 198 F = fun assert_no_ces/3, 199 RPO = beam_ssa:rpo(Blocks), 200 beam_ssa:fold_blocks(F, RPO, Blocks, Blocks), 201 St. 202 203assert_no_ces(_, #b_blk{is=[#b_set{op=phi,args=[_,_]=Phis}|_]}, Blocks) -> 204 %% This block has multiple predecessors. Make sure that none 205 %% of the precessors have more than one successor. 206 true = all(fun({_,P}) -> 207 length(beam_ssa:successors(P, Blocks)) =:= 1 208 end, Phis), %Assertion. 209 Blocks; 210assert_no_ces(_, _, Blocks) -> Blocks. 211 212%% fix_bs(St0) -> St. 213%% Fix up the binary matching instructions: 214%% 215%% * Insert bs_save and bs_restore instructions where needed. 216%% 217%% * Combine bs_match and bs_extract instructions to bs_get 218%% instructions. 219 220fix_bs(#st{ssa=Blocks,cnt=Count0,use_bsm3=UseBSM3}=St) -> 221 F = fun(#b_set{op=bs_start_match,dst=Dst}, A) -> 222 %% Mark the root of the match context list. 223 [{Dst,{context,Dst}}|A]; 224 (#b_set{op=bs_match,dst=Dst,args=[_,ParentCtx|_]}, A) -> 225 %% Link this match context the previous match context. 226 [{Dst,ParentCtx}|A]; 227 (_, A) -> 228 A 229 end, 230 RPO = beam_ssa:rpo(Blocks), 231 case beam_ssa:fold_instrs(F, RPO, [], Blocks) of 232 [] -> 233 %% No binary matching in this function. 234 St; 235 [_|_]=M -> 236 CtxChain = maps:from_list(M), 237 Linear0 = beam_ssa:linearize(Blocks), 238 239 %% Insert position instructions where needed. 240 {Linear1,Count} = case UseBSM3 of 241 true -> 242 bs_pos_bsm3(Linear0, CtxChain, Count0); 243 false -> 244 bs_pos_bsm2(Linear0, CtxChain, Count0) 245 end, 246 247 %% Rename instructions. 248 Linear = bs_instrs(Linear1, CtxChain, []), 249 250 St#st{ssa=maps:from_list(Linear),cnt=Count} 251 end. 252 253%% Insert bs_get_position and bs_set_position instructions as needed. 254bs_pos_bsm3(Linear0, CtxChain, Count0) -> 255 Rs0 = bs_restores(Linear0, CtxChain, #{}, #{}), 256 Rs = maps:values(Rs0), 257 S0 = sofs:relation(Rs, [{context,save_point}]), 258 S1 = sofs:relation_to_family(S0), 259 S = sofs:to_external(S1), 260 261 {SavePoints,Count1} = make_bs_pos_dict(S, Count0, []), 262 263 {Gets,Count2} = make_bs_getpos_map(Rs, SavePoints, Count1, []), 264 {Sets,Count} = make_bs_setpos_map(maps:to_list(Rs0), SavePoints, Count2, []), 265 266 %% Now insert all saves and restores. 267 {bs_insert_bsm3(Linear0, Gets, Sets), Count}. 268 269make_bs_getpos_map([{Ctx,Save}=Ps|T], SavePoints, Count, Acc) -> 270 SavePoint = get_savepoint(Ps, SavePoints), 271 I = #b_set{op=bs_get_position,dst=SavePoint,args=[Ctx]}, 272 make_bs_getpos_map(T, SavePoints, Count+1, [{Save,I}|Acc]); 273make_bs_getpos_map([], _, Count, Acc) -> 274 {maps:from_list(Acc),Count}. 275 276make_bs_setpos_map([{Bef,{Ctx,_}=Ps}|T], SavePoints, Count, Acc) -> 277 Ignored = #b_var{name={'@ssa_ignored',Count}}, 278 Args = [Ctx, get_savepoint(Ps, SavePoints)], 279 I = #b_set{op=bs_set_position,dst=Ignored,args=Args}, 280 make_bs_setpos_map(T, SavePoints, Count+1, [{Bef,I}|Acc]); 281make_bs_setpos_map([], _, Count, Acc) -> 282 {maps:from_list(Acc),Count}. 283 284get_savepoint({_,_}=Ps, SavePoints) -> 285 Name = {'@ssa_bs_position', map_get(Ps, SavePoints)}, 286 #b_var{name=Name}. 287 288make_bs_pos_dict([{Ctx,Pts}|T], Count0, Acc0) -> 289 {Acc, Count} = make_bs_pos_dict_1(Pts, Ctx, Count0, Acc0), 290 make_bs_pos_dict(T, Count, Acc); 291make_bs_pos_dict([], Count, Acc) -> 292 {maps:from_list(Acc), Count}. 293 294make_bs_pos_dict_1([H|T], Ctx, I, Acc) -> 295 make_bs_pos_dict_1(T, Ctx, I+1, [{{Ctx,H},I}|Acc]); 296make_bs_pos_dict_1([], Ctx, I, Acc) -> 297 {[{Ctx,I}|Acc], I}. 298 299%% As bs_position but without OTP-22 instructions. This is only used when 300%% cross-compiling to older versions. 301bs_pos_bsm2(Linear0, CtxChain, Count0) -> 302 Rs0 = bs_restores(Linear0, CtxChain, #{}, #{}), 303 Rs = maps:values(Rs0), 304 S0 = sofs:relation(Rs, [{context,save_point}]), 305 S1 = sofs:relation_to_family(S0), 306 S = sofs:to_external(S1), 307 Slots = make_save_point_dict(S, []), 308 {Saves,Count1} = make_save_map(Rs, Slots, Count0, []), 309 {Restores,Count} = make_restore_map(maps:to_list(Rs0), Slots, Count1, []), 310 311 %% Now insert all saves and restores. 312 {bs_insert_bsm2(Linear0, Saves, Restores, Slots),Count}. 313 314make_save_map([{Ctx,Save}=Ps|T], Slots, Count, Acc) -> 315 Ignored = #b_var{name={'@ssa_ignored',Count}}, 316 case make_slot(Ps, Slots) of 317 #b_literal{val=start} -> 318 make_save_map(T, Slots, Count, Acc); 319 Slot -> 320 I = #b_set{op=bs_save,dst=Ignored,args=[Ctx,Slot]}, 321 make_save_map(T, Slots, Count+1, [{Save,I}|Acc]) 322 end; 323make_save_map([], _, Count, Acc) -> 324 {maps:from_list(Acc),Count}. 325 326make_restore_map([{Bef,{Ctx,_}=Ps}|T], Slots, Count, Acc) -> 327 Ignored = #b_var{name={'@ssa_ignored',Count}}, 328 I = #b_set{op=bs_restore,dst=Ignored,args=[Ctx,make_slot(Ps, Slots)]}, 329 make_restore_map(T, Slots, Count+1, [{Bef,I}|Acc]); 330make_restore_map([], _, Count, Acc) -> 331 {maps:from_list(Acc),Count}. 332 333make_slot({Same,Same}, _Slots) -> 334 #b_literal{val=start}; 335make_slot({_,_}=Ps, Slots) -> 336 #b_literal{val=map_get(Ps, Slots)}. 337 338make_save_point_dict([{Ctx,Pts}|T], Acc0) -> 339 Acc = make_save_point_dict_1(Pts, Ctx, 0, Acc0), 340 make_save_point_dict(T, Acc); 341make_save_point_dict([], Acc) -> 342 maps:from_list(Acc). 343 344make_save_point_dict_1([Ctx|T], Ctx, I, Acc) -> 345 %% Special {atom,start} save point. Does not need a 346 %% bs_save instruction. 347 make_save_point_dict_1(T, Ctx, I, Acc); 348make_save_point_dict_1([H|T], Ctx, I, Acc) -> 349 make_save_point_dict_1(T, Ctx, I+1, [{{Ctx,H},I}|Acc]); 350make_save_point_dict_1([], Ctx, I, Acc) -> 351 [{Ctx,I}|Acc]. 352 353bs_restores([{L,#b_blk{is=Is,last=Last}}|Bs], CtxChain, D0, Rs0) -> 354 InPos = maps:get(L, D0, #{}), 355 {SuccPos, FailPos, Rs} = bs_restores_is(Is, CtxChain, InPos, InPos, Rs0), 356 357 D = bs_update_successors(Last, SuccPos, FailPos, D0), 358 bs_restores(Bs, CtxChain, D, Rs); 359bs_restores([], _, _, Rs) -> Rs. 360 361bs_update_successors(#b_br{succ=Succ,fail=Fail}, SPos, FPos, D) -> 362 join_positions([{Succ,SPos},{Fail,FPos}], D); 363bs_update_successors(#b_switch{fail=Fail,list=List}, SPos, FPos, D) -> 364 SPos = FPos, %Assertion. 365 Update = [{L,SPos} || {_,L} <- List] ++ [{Fail,SPos}], 366 join_positions(Update, D); 367bs_update_successors(#b_ret{}, SPos, FPos, D) -> 368 SPos = FPos, %Assertion. 369 D. 370 371join_positions([{L,MapPos0}|T], D) -> 372 case D of 373 #{L:=MapPos0} -> 374 %% Same map. 375 join_positions(T, D); 376 #{L:=MapPos1} -> 377 %% Different maps. 378 MapPos = join_positions_1(MapPos0, MapPos1), 379 join_positions(T, D#{L:=MapPos}); 380 #{} -> 381 join_positions(T, D#{L=>MapPos0}) 382 end; 383join_positions([], D) -> D. 384 385join_positions_1(LHS, RHS) -> 386 if 387 map_size(LHS) < map_size(RHS) -> 388 join_positions_2(maps:keys(LHS), RHS, LHS); 389 true -> 390 join_positions_2(maps:keys(RHS), LHS, RHS) 391 end. 392 393join_positions_2([V | Vs], Bigger, Smaller) -> 394 case {Bigger, Smaller} of 395 {#{ V := Same }, #{ V := Same }} -> 396 join_positions_2(Vs, Bigger, Smaller); 397 {#{ V := _ }, #{ V := _ }} -> 398 join_positions_2(Vs, Bigger, Smaller#{ V := unknown }); 399 {#{}, #{ V := _ }} -> 400 join_positions_2(Vs, Bigger, maps:remove(V, Smaller)) 401 end; 402join_positions_2([], _Bigger, Smaller) -> 403 Smaller. 404 405%% 406%% Updates the restore and position maps according to the given instructions. 407%% 408%% Note that positions may be updated even when a match fails; if a match 409%% requires a restore, the position at the fail block will be the position 410%% we've *restored to* and not the one we entered the current block with. 411%% 412 413bs_restores_is([#b_set{op=bs_start_match,dst=Start}|Is], 414 CtxChain, SPos0, _FPos, Rs) -> 415 %% Match instructions leave the position unchanged on failure, so 416 %% FPos must be the SPos we entered the *instruction* with, and not the 417 %% *block*. 418 %% 419 %% This is important when we have multiple matches in a single block where 420 %% all but the last are guaranteed to succeed; the upcoming fail block must 421 %% restore to the position of the next-to-last match, not the position we 422 %% entered the current block with. 423 FPos = SPos0, 424 SPos = SPos0#{Start=>Start}, 425 bs_restores_is(Is, CtxChain, SPos, FPos, Rs); 426bs_restores_is([#b_set{op=bs_match,dst=NewPos,args=Args}=I|Is], 427 CtxChain, SPos0, _FPos, Rs0) -> 428 Start = bs_subst_ctx(NewPos, CtxChain), 429 [_,FromPos|_] = Args, 430 case SPos0 of 431 #{Start:=FromPos} -> 432 %% Same position, no restore needed. 433 SPos = case bs_match_type(I) of 434 plain -> 435 %% Update position to new position. 436 SPos0#{Start:=NewPos}; 437 _ -> 438 %% Position will not change (test_unit 439 %% instruction or no instruction at 440 %% all). 441 SPos0 442 end, 443 FPos = SPos0, 444 bs_restores_is(Is, CtxChain, SPos, FPos, Rs0); 445 #{Start:=_} -> 446 %% Different positions, might need a restore instruction. 447 case bs_match_type(I) of 448 none -> 449 %% This is a tail test that will be optimized away. 450 %% There's no need to do a restore, and all 451 %% positions are unchanged. 452 FPos = SPos0, 453 bs_restores_is(Is, CtxChain, SPos0, FPos, Rs0); 454 test_unit -> 455 %% This match instruction will be replaced by 456 %% a test_unit instruction. We will need a 457 %% restore. The new position will be the position 458 %% restored to (NOT NewPos). 459 SPos = SPos0#{Start:=FromPos}, 460 FPos = SPos, 461 Rs = Rs0#{NewPos=>{Start,FromPos}}, 462 bs_restores_is(Is, CtxChain, SPos, FPos, Rs); 463 plain -> 464 %% Match or skip. Position will be changed. 465 SPos = SPos0#{Start:=NewPos}, 466 FPos = SPos0#{Start:=FromPos}, 467 Rs = Rs0#{NewPos=>{Start,FromPos}}, 468 bs_restores_is(Is, CtxChain, SPos, FPos, Rs) 469 end 470 end; 471bs_restores_is([#b_set{op=bs_extract,args=[FromPos|_]}|Is], 472 CtxChain, SPos, _FPos, Rs) -> 473 Start = bs_subst_ctx(FromPos, CtxChain), 474 475 #{Start:=FromPos} = SPos, %Assertion. 476 FPos = SPos, 477 478 bs_restores_is(Is, CtxChain, SPos, FPos, Rs); 479bs_restores_is([#b_set{op=call,dst=Dst,args=Args}|Is], 480 CtxChain, SPos0, _FPos, Rs0) -> 481 {SPos1, Rs} = bs_restore_args(Args, SPos0, CtxChain, Dst, Rs0), 482 483 SPos = bs_invalidate_pos(Args, SPos1, CtxChain), 484 FPos = SPos, 485 486 bs_restores_is(Is, CtxChain, SPos, FPos, Rs); 487bs_restores_is([#b_set{op=Op,dst=Dst,args=Args}|Is], 488 CtxChain, SPos0, _FPos, Rs0) 489 when Op =:= bs_test_tail; 490 Op =:= bs_get_tail -> 491 {SPos, Rs} = bs_restore_args(Args, SPos0, CtxChain, Dst, Rs0), 492 FPos = SPos, 493 494 bs_restores_is(Is, CtxChain, SPos, FPos, Rs); 495bs_restores_is([#b_set{op={succeeded,guard},args=[Arg]}], 496 CtxChain, SPos, FPos0, Rs) -> 497 %% If we're branching on a match operation, the positions will be different 498 %% depending on whether it succeeds. 499 Ctx = bs_subst_ctx(Arg, CtxChain), 500 FPos = case SPos of 501 #{ Ctx := _ } -> FPos0; 502 #{} -> SPos 503 end, 504 {SPos, FPos, Rs}; 505bs_restores_is([_ | Is], CtxChain, SPos, _FPos, Rs) -> 506 FPos = SPos, 507 bs_restores_is(Is, CtxChain, SPos, FPos, Rs); 508bs_restores_is([], _CtxChain, SPos, _FPos, Rs) -> 509 FPos = SPos, 510 {SPos, FPos, Rs}. 511 512bs_match_type(#b_set{args=[#b_literal{val=skip},_Ctx, 513 #b_literal{val=binary},_Flags, 514 #b_literal{val=all},#b_literal{val=U}]}) -> 515 case U of 516 1 -> none; 517 _ -> test_unit 518 end; 519bs_match_type(_) -> 520 plain. 521 522%% Call instructions leave the match position in an undefined state, 523%% requiring us to invalidate each affected argument. 524bs_invalidate_pos([#b_var{}=Arg|Args], Pos0, CtxChain) -> 525 Start = bs_subst_ctx(Arg, CtxChain), 526 case Pos0 of 527 #{Start:=_} -> 528 Pos = Pos0#{Start:=unknown}, 529 bs_invalidate_pos(Args, Pos, CtxChain); 530 #{} -> 531 %% Not a match context. 532 bs_invalidate_pos(Args, Pos0, CtxChain) 533 end; 534bs_invalidate_pos([_|Args], Pos, CtxChain) -> 535 bs_invalidate_pos(Args, Pos, CtxChain); 536bs_invalidate_pos([], Pos, _CtxChain) -> 537 Pos. 538 539bs_restore_args([#b_var{}=Arg|Args], Pos0, CtxChain, Dst, Rs0) -> 540 Start = bs_subst_ctx(Arg, CtxChain), 541 case Pos0 of 542 #{Start:=Arg} -> 543 %% Same position, no restore needed. 544 bs_restore_args(Args, Pos0, CtxChain, Dst, Rs0); 545 #{Start:=_} -> 546 %% Different positions, need a restore instruction. 547 Pos = Pos0#{Start:=Arg}, 548 Rs = Rs0#{Dst=>{Start,Arg}}, 549 bs_restore_args(Args, Pos, CtxChain, Dst, Rs); 550 #{} -> 551 %% Not a match context. 552 bs_restore_args(Args, Pos0, CtxChain, Dst, Rs0) 553 end; 554bs_restore_args([_|Args], Pos, CtxChain, Dst, Rs) -> 555 bs_restore_args(Args, Pos, CtxChain, Dst, Rs); 556bs_restore_args([], Pos, _CtxChain, _Dst, Rs) -> 557 {Pos, Rs}. 558 559%% Insert all bs_save and bs_restore instructions. 560 561bs_insert_bsm3(Blocks, Saves, Restores) -> 562 bs_insert_1(Blocks, [], Saves, Restores, fun(I) -> I end). 563 564bs_insert_bsm2(Blocks, Saves, Restores, Slots) -> 565 %% The old instructions require bs_start_match to be annotated with the 566 %% number of position slots it needs. 567 bs_insert_1(Blocks, [], Saves, Restores, 568 fun(#b_set{op=bs_start_match,dst=Dst}=I0) -> 569 NumSlots = case Slots of 570 #{Dst:=NumSlots0} -> NumSlots0; 571 #{} -> 0 572 end, 573 beam_ssa:add_anno(num_slots, NumSlots, I0); 574 (I) -> 575 I 576 end). 577 578bs_insert_1([{L,#b_blk{is=Is0}=Blk} | Bs], Deferred0, Saves, Restores, XFrm) -> 579 Is1 = bs_insert_deferred(Is0, Deferred0), 580 {Is, Deferred} = bs_insert_is(Is1, Saves, Restores, XFrm, []), 581 [{L,Blk#b_blk{is=Is}} | bs_insert_1(Bs, Deferred, Saves, Restores, XFrm)]; 582bs_insert_1([], [], _, _, _) -> 583 []. 584 585bs_insert_deferred([#b_set{op=bs_extract}=I | Is], Deferred) -> 586 [I | bs_insert_deferred(Is, Deferred)]; 587bs_insert_deferred(Is, Deferred) -> 588 Deferred ++ Is. 589 590bs_insert_is([#b_set{dst=Dst}=I0|Is], Saves, Restores, XFrm, Acc0) -> 591 I = XFrm(I0), 592 Pre = case Restores of 593 #{Dst:=R} -> [R]; 594 #{} -> [] 595 end, 596 Post = case Saves of 597 #{Dst:=S} -> [S]; 598 #{} -> [] 599 end, 600 Acc = [I | Pre] ++ Acc0, 601 case Is of 602 [#b_set{op={succeeded,_},args=[Dst]}] -> 603 %% Defer the save sequence to the success block. 604 {reverse(Acc, Is), Post}; 605 _ -> 606 bs_insert_is(Is, Saves, Restores, XFrm, Post ++ Acc) 607 end; 608bs_insert_is([], _, _, _, Acc) -> 609 {reverse(Acc), []}. 610 611%% Translate bs_match instructions to bs_get, bs_match_string, 612%% or bs_skip. Also rename match context variables to use the 613%% variable assigned to by the start_match instruction. 614 615bs_instrs([{L,#b_blk{is=Is0}=Blk}|Bs], CtxChain, Acc0) -> 616 case bs_instrs_is(Is0, CtxChain, []) of 617 [#b_set{op=bs_extract,dst=Dst,args=[Ctx]}|Is] -> 618 %% Drop this instruction. Rewrite the corresponding 619 %% bs_match instruction in the previous block to 620 %% a bs_get instruction. 621 Acc = bs_combine(Dst, Ctx, Acc0), 622 bs_instrs(Bs, CtxChain, [{L,Blk#b_blk{is=Is}}|Acc]); 623 Is -> 624 bs_instrs(Bs, CtxChain, [{L,Blk#b_blk{is=Is}}|Acc0]) 625 end; 626bs_instrs([], _, Acc) -> 627 reverse(Acc). 628 629bs_instrs_is([#b_set{op={succeeded,_}}=I|Is], CtxChain, Acc) -> 630 %% This instruction refers to a specific operation, so we must not 631 %% substitute the context argument. 632 bs_instrs_is(Is, CtxChain, [I | Acc]); 633bs_instrs_is([#b_set{op=Op,args=Args0}=I0|Is], CtxChain, Acc) -> 634 Args = [bs_subst_ctx(A, CtxChain) || A <- Args0], 635 I1 = I0#b_set{args=Args}, 636 I = case {Op,Args} of 637 {bs_match,[#b_literal{val=skip},Ctx,Type|As]} -> 638 I1#b_set{op=bs_skip,args=[Type,Ctx|As]}; 639 {bs_match,[#b_literal{val=string},Ctx|As]} -> 640 I1#b_set{op=bs_match_string,args=[Ctx|As]}; 641 {_,_} -> 642 I1 643 end, 644 bs_instrs_is(Is, CtxChain, [I|Acc]); 645bs_instrs_is([], _, Acc) -> 646 reverse(Acc). 647 648%% Combine a bs_match instruction with the destination register 649%% taken from a bs_extract instruction. 650 651bs_combine(Dst, Ctx, [{L,#b_blk{is=Is0}=Blk}|Acc]) -> 652 [#b_set{}=Succeeded, 653 #b_set{op=bs_match,args=[Type,_|As]}=BsMatch|Is1] = reverse(Is0), 654 Is = reverse(Is1, [BsMatch#b_set{op=bs_get,dst=Dst,args=[Type,Ctx|As]}, 655 Succeeded#b_set{args=[Dst]}]), 656 [{L,Blk#b_blk{is=Is}}|Acc]. 657 658bs_subst_ctx(#b_var{}=Var, CtxChain) -> 659 case CtxChain of 660 #{Var:={context,Ctx}} -> 661 Ctx; 662 #{Var:=ParentCtx} -> 663 bs_subst_ctx(ParentCtx, CtxChain); 664 #{} -> 665 %% Not a match context variable. 666 Var 667 end; 668bs_subst_ctx(Other, _CtxChain) -> 669 Other. 670 671%% legacy_bs(St0) -> St. 672%% Binary matching instructions in OTP 21 and earlier don't support 673%% a Y register as destination. If St#st.use_bsm3 is false, 674%% we will need to rewrite those instructions so that the result 675%% is first put in an X register and then moved to a Y register 676%% if the operation succeeded. 677 678legacy_bs(#st{use_bsm3=false,ssa=Blocks0,cnt=Count0,res=Res}=St) -> 679 IsYreg = maps:from_list([{V,true} || {V,{y,_}} <- Res]), 680 Linear0 = beam_ssa:linearize(Blocks0), 681 {Linear,Count} = legacy_bs(Linear0, IsYreg, Count0, #{}, []), 682 Blocks = maps:from_list(Linear), 683 St#st{ssa=Blocks,cnt=Count}; 684legacy_bs(#st{use_bsm3=true}=St) -> St. 685 686legacy_bs([{L,Blk}|Bs], IsYreg, Count0, Copies0, Acc) -> 687 #b_blk{is=Is0,last=Last} = Blk, 688 Is1 = case Copies0 of 689 #{L:=Copy} -> [Copy|Is0]; 690 #{} -> Is0 691 end, 692 {Is,Count,Copies} = legacy_bs_is(Is1, Last, IsYreg, Count0, Copies0, []), 693 legacy_bs(Bs, IsYreg, Count, Copies, [{L,Blk#b_blk{is=Is}}|Acc]); 694legacy_bs([], _IsYreg, Count, _Copies, Acc) -> 695 {Acc,Count}. 696 697legacy_bs_is([#b_set{op=Op,dst=Dst}=I0, 698 #b_set{op=succeeded,dst=SuccDst,args=[Dst]}=SuccI0], 699 Last, IsYreg, Count0, Copies0, Acc) -> 700 NeedsFix = is_map_key(Dst, IsYreg) andalso 701 case Op of 702 bs_get -> true; 703 bs_init -> true; 704 _ -> false 705 end, 706 case NeedsFix of 707 true -> 708 TempDst = #b_var{name={'@bs_temp_dst',Count0}}, 709 Count = Count0 + 1, 710 I = I0#b_set{dst=TempDst}, 711 SuccI = SuccI0#b_set{args=[TempDst]}, 712 Copy = #b_set{op=copy,dst=Dst,args=[TempDst]}, 713 #b_br{bool=SuccDst,succ=SuccL} = Last, 714 Copies = Copies0#{SuccL=>Copy}, 715 legacy_bs_is([], Last, IsYreg, Count, Copies, [SuccI,I|Acc]); 716 false -> 717 legacy_bs_is([], Last, IsYreg, Count0, Copies0, [SuccI0,I0|Acc]) 718 end; 719legacy_bs_is([I|Is], Last, IsYreg, Count, Copies, Acc) -> 720 legacy_bs_is(Is, Last, IsYreg, Count, Copies, [I|Acc]); 721legacy_bs_is([], _Last, _IsYreg, Count, Copies, Acc) -> 722 {reverse(Acc),Count,Copies}. 723 724%% sanitize(St0) -> St. 725%% Remove constructs that can cause problems later: 726%% 727%% * Unreachable blocks may cause problems for determination of 728%% dominators. 729%% 730%% * Some instructions (such as get_hd) don't accept literal 731%% arguments. Evaluate the instructions and remove them. 732 733sanitize(#st{ssa=Blocks0,cnt=Count0}=St) -> 734 Ls = beam_ssa:rpo(Blocks0), 735 {Blocks,Count} = sanitize(Ls, Count0, Blocks0, #{}), 736 St#st{ssa=Blocks,cnt=Count}. 737 738sanitize([L|Ls], Count0, Blocks0, Values0) -> 739 #b_blk{is=Is0,last=Last0} = Blk0 = map_get(L, Blocks0), 740 case sanitize_is(Is0, Last0, Count0, Values0, false, []) of 741 no_change -> 742 sanitize(Ls, Count0, Blocks0, Values0); 743 {Is,Last,Count,Values} -> 744 Blk = Blk0#b_blk{is=Is,last=Last}, 745 Blocks = Blocks0#{L:=Blk}, 746 sanitize(Ls, Count, Blocks, Values) 747 end; 748sanitize([], Count, Blocks0, Values) -> 749 Blocks = if 750 map_size(Values) =:= 0 -> 751 Blocks0; 752 true -> 753 RPO = beam_ssa:rpo(Blocks0), 754 beam_ssa:rename_vars(Values, RPO, Blocks0) 755 end, 756 757 %% Unreachable blocks can cause problems for the dominator calculations. 758 Ls = beam_ssa:rpo(Blocks), 759 Reachable = gb_sets:from_list(Ls), 760 {case map_size(Blocks) =:= gb_sets:size(Reachable) of 761 true -> Blocks; 762 false -> remove_unreachable(Ls, Blocks, Reachable, []) 763 end,Count}. 764 765sanitize_is([#b_set{op=get_map_element,args=Args0}=I0|Is], 766 Last, Count0, Values, Changed, Acc) -> 767 case sanitize_args(Args0, Values) of 768 [#b_literal{}=Map,Key] -> 769 %% Bind the literal map to a variable. 770 {MapVar,Count} = new_var('@ssa_map', Count0), 771 I = I0#b_set{args=[MapVar,Key]}, 772 Copy = #b_set{op=copy,dst=MapVar,args=[Map]}, 773 sanitize_is(Is, Last, Count, Values, true, [I,Copy|Acc]); 774 [_,_]=Args0 -> 775 sanitize_is(Is, Last, Count0, Values, Changed, [I0|Acc]); 776 [_,_]=Args -> 777 I = I0#b_set{args=Args}, 778 sanitize_is(Is, Last, Count0, Values, true, [I|Acc]) 779 end; 780sanitize_is([#b_set{op={succeeded,guard},dst=Dst,args=[Arg0]}=I0], 781 #b_br{bool=Dst}=Last, Count, Values, _Changed, Acc0) -> 782 %% We no longer need to distinguish between guard and body checks, so we'll 783 %% rewrite this as a plain 'succeeded'. 784 case sanitize_arg(Arg0, Values) of 785 #b_var{}=Arg -> 786 case Acc0 of 787 [#b_set{op=call, 788 args=[#b_remote{mod=#b_literal{val=erlang}, 789 name=#b_literal{val=error}, 790 arity=1},_], 791 dst=Arg0}|Acc] -> 792 %% This erlang:error/1 is the result from a 793 %% sanitized bs_add or bs_init instruction. Calls 794 %% to erlang:error/1 in receive is not allowed, so 795 %% we will have to rewrite this instruction 796 %% sequence to an unconditional branch to the 797 %% failure label. 798 Fail = Last#b_br.fail, 799 Br = #b_br{bool=#b_literal{val=true},succ=Fail,fail=Fail}, 800 {reverse(Acc), Br, Count, Values}; 801 _ -> 802 I = I0#b_set{op=succeeded,args=[Arg]}, 803 {reverse(Acc0, [I]), Last, Count, Values} 804 end; 805 #b_literal{} -> 806 Value = #b_literal{val=true}, 807 {reverse(Acc0), Last, Count, Values#{ Dst => Value }} 808 end; 809sanitize_is([#b_set{op={succeeded,body},dst=Dst,args=[Arg0]}=I0], 810 #b_br{bool=Dst}=Last, Count, Values, _Changed, Acc) -> 811 %% We no longer need to distinguish between guard and body checks, so we'll 812 %% rewrite this as a plain 'succeeded'. 813 case sanitize_arg(Arg0, Values) of 814 #b_var{}=Arg -> 815 I = I0#b_set{op=succeeded,args=[Arg]}, 816 {reverse(Acc, [I]), Last, Count, Values}; 817 #b_literal{} -> 818 Value = #b_literal{val=true}, 819 {reverse(Acc), Last, Count, Values#{ Dst => Value }} 820 end; 821sanitize_is([#b_set{op={succeeded,Kind},args=[Arg0]} | Is], 822 Last, Count, Values, _Changed, Acc) -> 823 %% We're no longer branching on this instruction and can safely remove it. 824 [] = Is, #b_br{succ=Same,fail=Same} = Last, %Assertion. 825 if 826 Same =:= ?EXCEPTION_BLOCK -> 827 %% The checked instruction always fails at runtime and we're not 828 %% in a try/catch; rewrite the terminator to a return. 829 body = Kind, %Assertion. 830 Arg = sanitize_arg(Arg0, Values), 831 sanitize_is(Is, #b_ret{arg=Arg}, Count, Values, true, Acc); 832 Same =/= ?EXCEPTION_BLOCK -> 833 %% We either always succeed, or always fail to somewhere other than 834 %% the exception block. 835 true = Kind =:= guard orelse Kind =:= body, %Assertion. 836 sanitize_is(Is, Last, Count, Values, true, Acc) 837 end; 838sanitize_is([#b_set{op=bs_test_tail}=I], Last, Count, Values, Changed, Acc) -> 839 case Last of 840 #b_br{succ=Same,fail=Same} -> 841 sanitize_is([], Last, Count, Values, true, Acc); 842 _ -> 843 do_sanitize_is(I, [], Last, Count, Values, Changed, Acc) 844 end; 845sanitize_is([#b_set{}=I|Is], Last, Count, Values, Changed, Acc) -> 846 do_sanitize_is(I, Is, Last, Count, Values, Changed, Acc); 847sanitize_is([], Last, Count, Values, Changed, Acc) -> 848 case Changed of 849 true -> 850 {reverse(Acc), Last, Count, Values}; 851 false -> 852 no_change 853 end. 854 855do_sanitize_is(#b_set{op=Op,dst=Dst,args=Args0}=I0, 856 Is, Last, Count, Values, Changed0, Acc) -> 857 Args = sanitize_args(Args0, Values), 858 case sanitize_instr(Op, Args, I0) of 859 {value,Value0} -> 860 Value = #b_literal{val=Value0}, 861 sanitize_is(Is, Last, Count, Values#{Dst=>Value}, true, Acc); 862 {ok,I} -> 863 sanitize_is(Is, Last, Count, Values, true, [I|Acc]); 864 ok -> 865 I = I0#b_set{args=Args}, 866 Changed = Changed0 orelse Args =/= Args0, 867 sanitize_is(Is, Last, Count, Values, Changed, [I|Acc]) 868 end. 869 870sanitize_args(Args, Values) -> 871 [sanitize_arg(Arg, Values) || Arg <- Args]. 872 873sanitize_arg(#b_var{}=Var, Values) -> 874 case Values of 875 #{Var:=New} -> New; 876 #{} -> Var 877 end; 878sanitize_arg(Arg, _Values) -> 879 Arg. 880 881 882sanitize_instr(phi, PhiArgs, _I) -> 883 case phi_all_same_literal(PhiArgs) of 884 true -> 885 %% (Can only happen when some optimizations have been 886 %% turned off.) 887 %% 888 %% This phi node always produces the same literal value. 889 %% We must do constant progation of the value to ensure 890 %% that we can sanitize any instructions that don't accept 891 %% literals (such as `get_hd`). This is necessary for 892 %% correctness, because beam_ssa_codegen:prefer_xregs/2 893 %% does constant propagation and could propagate a literal 894 %% into an instruction that don't accept literals. 895 [{#b_literal{val=Val},_}|_] = PhiArgs, 896 {value,Val}; 897 false -> 898 ok 899 end; 900sanitize_instr({bif,Bif}, [#b_literal{val=Lit}], _I) -> 901 case erl_bifs:is_pure(erlang, Bif, 1) of 902 false -> 903 ok; 904 true -> 905 try 906 {value,erlang:Bif(Lit)} 907 catch 908 error:_ -> 909 ok 910 end 911 end; 912sanitize_instr({bif,Bif}, [#b_literal{val=Lit1},#b_literal{val=Lit2}], _I) -> 913 true = erl_bifs:is_pure(erlang, Bif, 2), %Assertion. 914 try 915 {value,erlang:Bif(Lit1, Lit2)} 916 catch 917 error:_ -> 918 ok 919 end; 920sanitize_instr(bs_match, Args, I) -> 921 %% Matching of floats are never changed to a bs_skip even when the 922 %% value is never used, because the match can always fail (for example, 923 %% if it is a NaN). 924 [#b_literal{val=float}|_] = Args, %Assertion. 925 {ok,I#b_set{op=bs_get}}; 926sanitize_instr(get_hd, [#b_literal{val=[Hd|_]}], _I) -> 927 {value,Hd}; 928sanitize_instr(get_tl, [#b_literal{val=[_|Tl]}], _I) -> 929 {value,Tl}; 930sanitize_instr(get_tuple_element, [#b_literal{val=T}, 931 #b_literal{val=I}], _I) 932 when I < tuple_size(T) -> 933 {value,element(I+1, T)}; 934sanitize_instr(is_nonempty_list, [#b_literal{val=Lit}], _I) -> 935 {value,case Lit of 936 [_|_] -> true; 937 _ -> false 938 end}; 939sanitize_instr(is_tagged_tuple, [#b_literal{val=Tuple}, 940 #b_literal{val=Arity}, 941 #b_literal{val=Tag}], _I) 942 when is_integer(Arity), is_atom(Tag) -> 943 if 944 tuple_size(Tuple) =:= Arity, element(1, Tuple) =:= Tag -> 945 {value,true}; 946 true -> 947 {value,false} 948 end; 949sanitize_instr(bs_add, [Arg1,Arg2,_|_], I0) -> 950 case all(fun(#b_literal{val=Size}) -> is_integer(Size) andalso Size >= 0; 951 (#b_var{}) -> true 952 end, [Arg1,Arg2]) of 953 true -> ok; 954 false -> {ok,sanitize_badarg(I0)} 955 end; 956sanitize_instr(bs_init, [#b_literal{val=new},#b_literal{val=Sz}|_], I0) -> 957 if 958 is_integer(Sz), Sz >= 0 -> ok; 959 true -> {ok,sanitize_badarg(I0)} 960 end; 961sanitize_instr(bs_init, [#b_literal{},_,#b_literal{val=Sz}|_], I0) -> 962 if 963 is_integer(Sz), Sz >= 0 -> ok; 964 true -> {ok,sanitize_badarg(I0)} 965 end; 966sanitize_instr(_, _, _) -> 967 ok. 968 969sanitize_badarg(I) -> 970 Func = #b_remote{mod=#b_literal{val=erlang}, 971 name=#b_literal{val=error},arity=1}, 972 I#b_set{op=call,args=[Func,#b_literal{val=badarg}]}. 973 974remove_unreachable([L|Ls], Blocks, Reachable, Acc) -> 975 #b_blk{is=Is0} = Blk0 = map_get(L, Blocks), 976 case split_phis(Is0) of 977 {[_|_]=Phis,Rest} -> 978 Is = [prune_phi(Phi, Reachable) || Phi <- Phis] ++ Rest, 979 Blk = Blk0#b_blk{is=Is}, 980 remove_unreachable(Ls, Blocks, Reachable, [{L,Blk}|Acc]); 981 {[],_} -> 982 remove_unreachable(Ls, Blocks, Reachable, [{L,Blk0}|Acc]) 983 end; 984remove_unreachable([], _Blocks, _, Acc) -> 985 maps:from_list(Acc). 986 987prune_phi(#b_set{args=Args0}=Phi, Reachable) -> 988 Args = [A || {_,Pred}=A <- Args0, 989 gb_sets:is_element(Pred, Reachable)], 990 Phi#b_set{args=Args}. 991 992phi_all_same_literal([{#b_literal{}=Arg, _From} | Phis]) -> 993 phi_all_same_literal_1(Phis, Arg); 994phi_all_same_literal([_|_]) -> 995 false. 996 997phi_all_same_literal_1([{Arg, _From} | Phis], Arg) -> 998 phi_all_same_literal_1(Phis, Arg); 999phi_all_same_literal_1([], _Arg) -> 1000 true; 1001phi_all_same_literal_1(_Phis, _Arg) -> 1002 false. 1003 1004%%% Rewrite certain calls to erlang:error/{1,2} to specialized 1005%%% instructions: 1006%%% 1007%%% erlang:error({badmatch,Value}) => badmatch Value 1008%%% erlang:error({case_clause,Value}) => case_end Value 1009%%% erlang:error({try_clause,Value}) => try_case_end Value 1010%%% erlang:error(if_clause) => if_end 1011%%% erlang:error(function_clause, Args) => jump FuncInfoLabel 1012%%% 1013%%% In SSA code, we represent those instructions as a 'match_fail' 1014%%% instruction with the name of the BEAM instruction as the first 1015%%% argument. 1016 1017match_fail_instructions(#st{ssa=Blocks0,args=Args,location=Location}=St) -> 1018 Ls = maps:to_list(Blocks0), 1019 Info = {length(Args),Location}, 1020 Blocks = match_fail_instrs_1(Ls, Info, Blocks0), 1021 St#st{ssa=Blocks}. 1022 1023match_fail_instrs_1([{L,#b_blk{is=Is0}=Blk}|Bs], Arity, Blocks0) -> 1024 case match_fail_instrs_blk(Is0, Arity, []) of 1025 none -> 1026 match_fail_instrs_1(Bs, Arity, Blocks0); 1027 Is -> 1028 Blocks = Blocks0#{L:=Blk#b_blk{is=Is}}, 1029 match_fail_instrs_1(Bs, Arity, Blocks) 1030 end; 1031match_fail_instrs_1([], _Arity, Blocks) -> Blocks. 1032 1033match_fail_instrs_blk([#b_set{op=put_tuple,dst=Dst, 1034 args=[#b_literal{val=Tag},Val]}, 1035 #b_set{op=call, 1036 args=[#b_remote{mod=#b_literal{val=erlang}, 1037 name=#b_literal{val=error}}, 1038 Dst]}=Call|Is], 1039 _Arity, Acc) -> 1040 match_fail_instr(Call, Tag, Val, Is, Acc); 1041match_fail_instrs_blk([#b_set{op=call, 1042 args=[#b_remote{mod=#b_literal{val=erlang}, 1043 name=#b_literal{val=error}}, 1044 #b_literal{val={Tag,Val}}]}=Call|Is], 1045 _Arity, Acc) -> 1046 match_fail_instr(Call, Tag, #b_literal{val=Val}, Is, Acc); 1047match_fail_instrs_blk([#b_set{op=call, 1048 args=[#b_remote{mod=#b_literal{val=erlang}, 1049 name=#b_literal{val=error}}, 1050 #b_literal{val=if_clause}]}=Call|Is], 1051 _Arity, Acc) -> 1052 I = Call#b_set{op=match_fail,args=[#b_literal{val=if_end}]}, 1053 reverse(Acc, [I|Is]); 1054match_fail_instrs_blk([#b_set{op=call,anno=Anno, 1055 args=[#b_remote{mod=#b_literal{val=erlang}, 1056 name=#b_literal{val=error}}, 1057 #b_literal{val=function_clause}, 1058 Stk]}=Call], 1059 {Arity,Location}, Acc) -> 1060 case match_fail_stk(Stk, Acc, [], []) of 1061 {[_|_]=Vars,Is} when length(Vars) =:= Arity -> 1062 case maps:get(location, Anno, none) of 1063 Location -> 1064 I = Call#b_set{op=match_fail, 1065 args=[#b_literal{val=function_clause}|Vars]}, 1066 Is ++ [I]; 1067 _ -> 1068 %% erlang:error/2 has a different location than the 1069 %% func_info instruction at the beginning of the function 1070 %% (probably because of inlining). Keep the original call. 1071 reverse(Acc, [Call]) 1072 end; 1073 _ -> 1074 %% Either the stacktrace could not be picked apart (for example, 1075 %% if the call to erlang:error/2 was handwritten) or the number 1076 %% of arguments in the stacktrace was different from the arity 1077 %% of the host function (because it is the implementation of a 1078 %% fun). Keep the original call. 1079 reverse(Acc, [Call]) 1080 end; 1081match_fail_instrs_blk([I|Is], Arity, Acc) -> 1082 match_fail_instrs_blk(Is, Arity, [I|Acc]); 1083match_fail_instrs_blk(_, _, _) -> 1084 none. 1085 1086match_fail_instr(Call, Tag, Val, Is, Acc) -> 1087 Op = case Tag of 1088 badmatch -> Tag; 1089 case_clause -> case_end; 1090 try_clause -> try_case_end; 1091 _ -> none 1092 end, 1093 case Op of 1094 none -> 1095 none; 1096 _ -> 1097 I = Call#b_set{op=match_fail,args=[#b_literal{val=Op},Val]}, 1098 reverse(Acc, [I|Is]) 1099 end. 1100 1101match_fail_stk(#b_var{}=V, [#b_set{op=put_list,dst=V,args=[H,T]}|Is], IAcc, VAcc) -> 1102 match_fail_stk(T, Is, IAcc, [H|VAcc]); 1103match_fail_stk(#b_literal{val=[H|T]}, Is, IAcc, VAcc) -> 1104 match_fail_stk(#b_literal{val=T}, Is, IAcc, [#b_literal{val=H}|VAcc]); 1105match_fail_stk(#b_literal{val=[]}, [], IAcc, VAcc) -> 1106 {reverse(VAcc),IAcc}; 1107match_fail_stk(T, [#b_set{op=Op}=I|Is], IAcc, VAcc) 1108 when Op =:= bs_get_tail; Op =:= bs_set_position -> 1109 match_fail_stk(T, Is, [I|IAcc], VAcc); 1110match_fail_stk(_, _, _, _) -> none. 1111 1112%%% 1113%%% Fix tuples. 1114%%% 1115 1116%% fix_tuples(St0) -> St. 1117%% If compatibility with a previous version of Erlang has been 1118%% requested, tuple creation must be split into two instruction to 1119%% mirror the the way tuples are created in BEAM prior to OTP 22. 1120%% Each put_tuple instruction is split into put_tuple_arity followed 1121%% by put_tuple_elements. 1122 1123fix_tuples(#st{ssa=Blocks0,cnt=Count0}=St) -> 1124 F = fun (#b_set{op=put_tuple,args=Args}=Put, C0) -> 1125 Arity = #b_literal{val=length(Args)}, 1126 {Ignore,C} = new_var('@ssa_ignore', C0), 1127 {[Put#b_set{op=put_tuple_arity,args=[Arity]}, 1128 #b_set{dst=Ignore,op=put_tuple_elements,args=Args}],C}; 1129 (I, C) -> {[I],C} 1130 end, 1131 RPO = beam_ssa:rpo(Blocks0), 1132 {Blocks,Count} = beam_ssa:flatmapfold_instrs(F, RPO, Count0, Blocks0), 1133 St#st{ssa=Blocks,cnt=Count}. 1134 1135%%% 1136%%% Introduce the set_tuple_element instructions to make 1137%%% multiple-field record updates faster. 1138%%% 1139%%% The expansion of record field updates, when more than one field is 1140%%% updated, but not a majority of the fields, will create a sequence of 1141%%% calls to `erlang:setelement(Index, Value, Tuple)` where Tuple in the 1142%%% first call is the original record tuple, and in the subsequent calls 1143%%% Tuple is the result of the previous call. Furthermore, all Index 1144%%% values are constant positive integers, and the first call to 1145%%% `setelement` will have the greatest index. Thus all the following 1146%%% calls do not actually need to test at run-time whether Tuple has type 1147%%% tuple, nor that the index is within the tuple bounds. 1148%%% 1149%%% Since this optimization introduces destructive updates, it used to 1150%%% be done as the very last Core Erlang pass before going to 1151%%% lower-level code. However, it turns out that this kind of destructive 1152%%% updates are awkward also in SSA code and can prevent or complicate 1153%%% type analysis and aggressive optimizations. 1154%%% 1155%%% NOTE: Because there no write barriers in the system, this kind of 1156%%% optimization can only be done when we are sure that garbage 1157%%% collection will not be triggered between the creation of the tuple 1158%%% and the destructive updates - otherwise we might insert pointers 1159%%% from an older generation to a newer. 1160%%% 1161 1162use_set_tuple_element(#st{ssa=Blocks0}=St) -> 1163 Uses = count_uses(Blocks0), 1164 RPO = reverse(beam_ssa:rpo(Blocks0)), 1165 Blocks = use_ste_1(RPO, Uses, Blocks0), 1166 St#st{ssa=Blocks}. 1167 1168use_ste_1([L|Ls], Uses, Blocks) -> 1169 #b_blk{is=Is0} = Blk0 = map_get(L, Blocks), 1170 case use_ste_is(Is0, Uses) of 1171 Is0 -> 1172 use_ste_1(Ls, Uses, Blocks); 1173 Is -> 1174 Blk = Blk0#b_blk{is=Is}, 1175 use_ste_1(Ls, Uses, Blocks#{L:=Blk}) 1176 end; 1177use_ste_1([], _, Blocks) -> Blocks. 1178 1179%%% Optimize within a single block. 1180 1181use_ste_is([#b_set{}=I|Is0], Uses) -> 1182 Is = use_ste_is(Is0, Uses), 1183 case extract_ste(I) of 1184 none -> 1185 [I|Is]; 1186 Extracted -> 1187 use_ste_call(Extracted, I, Is, Uses) 1188 end; 1189use_ste_is([], _Uses) -> []. 1190 1191use_ste_call({Dst0,Pos0,_Var0,_Val0}, Call1, Is0, Uses) -> 1192 case get_ste_call(Is0, []) of 1193 {Prefix,{Dst1,Pos1,Dst0,Val1},Call2,Is} 1194 when Pos1 > 0, Pos0 > Pos1 -> 1195 case is_single_use(Dst0, Uses) of 1196 true -> 1197 Call = Call1#b_set{dst=Dst1}, 1198 Args = [Val1,Dst1,#b_literal{val=Pos1-1}], 1199 Dsetel = Call2#b_set{op=set_tuple_element, 1200 dst=Dst0, 1201 args=Args}, 1202 [Call|Prefix] ++ [Dsetel|Is]; 1203 false -> 1204 [Call1|Is0] 1205 end; 1206 _ -> 1207 [Call1|Is0] 1208 end. 1209 1210get_ste_call([#b_set{op=get_tuple_element}=I|Is], Acc) -> 1211 get_ste_call(Is, [I|Acc]); 1212get_ste_call([#b_set{op=call}=I|Is], Acc) -> 1213 case extract_ste(I) of 1214 none -> 1215 none; 1216 Extracted -> 1217 {reverse(Acc),Extracted,I,Is} 1218 end; 1219get_ste_call(_, _) -> none. 1220 1221extract_ste(#b_set{op=call,dst=Dst, 1222 args=[#b_remote{mod=#b_literal{val=M}, 1223 name=#b_literal{val=F}}|Args]}) -> 1224 case {M,F,Args} of 1225 {erlang,setelement,[#b_literal{val=Pos},Tuple,Val]} -> 1226 {Dst,Pos,Tuple,Val}; 1227 {_,_,_} -> 1228 none 1229 end; 1230extract_ste(#b_set{}) -> none. 1231 1232%% Count how many times each variable is used. 1233 1234count_uses(Blocks) -> 1235 count_uses_blk(maps:values(Blocks), #{}). 1236 1237count_uses_blk([#b_blk{is=Is,last=Last}|Bs], CountMap0) -> 1238 F = fun(I, CountMap) -> 1239 foldl(fun(Var, Acc) -> 1240 case Acc of 1241 #{Var:=2} -> Acc; 1242 #{Var:=C} -> Acc#{Var:=C+1}; 1243 #{} -> Acc#{Var=>1} 1244 end 1245 end, CountMap, beam_ssa:used(I)) 1246 end, 1247 CountMap = F(Last, foldl(F, CountMap0, Is)), 1248 count_uses_blk(Bs, CountMap); 1249count_uses_blk([], CountMap) -> CountMap. 1250 1251is_single_use(V, Uses) -> 1252 case Uses of 1253 #{V:=1} -> true; 1254 #{} -> false 1255 end. 1256 1257%%% 1258%%% Find out where frames should be placed. 1259%%% 1260 1261%% place_frames(St0) -> St. 1262%% Return a list of the labels for the blocks that need stack frame 1263%% allocation instructions. 1264%% 1265%% This function attempts to place stack frames as tight as possible 1266%% around the code, to avoid building stack frames for code paths 1267%% that don't need one. 1268%% 1269%% Stack frames are placed in blocks that dominate all of their 1270%% descendants. That guarantees that the deallocation instructions 1271%% cannot be reached from other execution paths that didn't set up 1272%% a stack frame or set up a stack frame with a different size. 1273 1274place_frames(#st{ssa=Blocks}=St) -> 1275 Ls = beam_ssa:rpo(Blocks), 1276 {Doms,_} = beam_ssa:dominators(Ls, Blocks), 1277 Tried = gb_sets:empty(), 1278 Frames0 = [], 1279 {Frames,_} = place_frames_1(Ls, Blocks, Doms, Tried, Frames0), 1280 St#st{frames=Frames}. 1281 1282place_frames_1([L|Ls], Blocks, Doms, Tried0, Frames0) -> 1283 Blk = map_get(L, Blocks), 1284 case need_frame(Blk) of 1285 true -> 1286 %% This block needs a frame. Try to place it here. 1287 {Frames,Tried} = do_place_frame(L, Blocks, Doms, Tried0, Frames0), 1288 1289 %% Successfully placed. Try to place more frames in descendants 1290 %% that are not dominated by this block. 1291 place_frames_1(Ls, Blocks, Doms, Tried, Frames); 1292 false -> 1293 try 1294 place_frames_1(Ls, Blocks, Doms, Tried0, Frames0) 1295 catch 1296 throw:{need_frame,For,Tried1}=Reason -> 1297 %% An descendant block needs a stack frame. Try to 1298 %% place it here. 1299 case is_dominated_by(For, L, Doms) of 1300 true -> 1301 %% Try to place a frame here. 1302 {Frames,Tried} = do_place_frame(L, Blocks, Doms, 1303 Tried1, Frames0), 1304 place_frames_1(Ls, Blocks, Doms, Tried, Frames); 1305 false -> 1306 %% Wrong place. This block does not dominate 1307 %% the block that needs the frame. Pass it on 1308 %% to our ancestors. 1309 throw(Reason) 1310 end 1311 end 1312 end; 1313place_frames_1([], _, _, Tried, Frames) -> 1314 {Frames,Tried}. 1315 1316%% do_place_frame(Label, Blocks, Dominators, Tried0, Frames0) -> {Frames,Tried}. 1317%% Try to place a frame in this block. This function returns 1318%% successfully if it either succeds at placing a frame in this 1319%% block, if an ancestor that dominates this block has already placed 1320%% a frame, or if we have already tried to put a frame in this block. 1321%% 1322%% An {need_frame,Label,Tried} exception will be thrown if this block 1323%% block is not suitable for having a stack frame (i.e. it does not dominate 1324%% all of its descendants). The exception means that an ancestor will have to 1325%% place the frame needed by this block. 1326 1327do_place_frame(L, Blocks, Doms, Tried0, Frames) -> 1328 case gb_sets:is_element(L, Tried0) of 1329 true -> 1330 %% We have already tried to put a frame in this block. 1331 {Frames,Tried0}; 1332 false -> 1333 %% Try to place a frame in this block. 1334 Tried = gb_sets:insert(L, Tried0), 1335 case place_frame_here(L, Blocks, Doms, Frames) of 1336 yes -> 1337 %% We need a frame and it is safe to place it here. 1338 {[L|Frames],Tried}; 1339 no -> 1340 %% An ancestor has a frame. Not needed. 1341 {Frames,Tried}; 1342 ancestor -> 1343 %% This block does not dominate all of its 1344 %% descendants. We must place the frame in 1345 %% an ancestor. 1346 throw({need_frame,L,Tried}) 1347 end 1348 end. 1349 1350%% place_frame_here(Label, Blocks, Doms, Frames) -> no|yes|ancestor. 1351%% Determine whether a frame should be placed in block Label. 1352 1353place_frame_here(L, Blocks, Doms, Frames) -> 1354 B0 = any(fun(DomBy) -> 1355 is_dominated_by(L, DomBy, Doms) 1356 end, Frames), 1357 case B0 of 1358 true -> 1359 %% This block is dominated by an ancestor block that 1360 %% defines a frame. Not needed/allowed to put a frame 1361 %% here. 1362 no; 1363 false -> 1364 %% No frame in any ancestor. We need a frame. 1365 %% Now check whether the frame can be placed here. 1366 %% If this block dominates all of its descendants 1367 %% and the predecessors of any phi nodes it can be 1368 %% placed here. 1369 Descendants = beam_ssa:rpo([L], Blocks), 1370 PhiPredecessors = phi_predecessors(L, Blocks), 1371 MustDominate = ordsets:from_list(PhiPredecessors ++ Descendants), 1372 Dominates = all(fun(?EXCEPTION_BLOCK) -> 1373 %% This block defines no variables and calls 1374 %% erlang:error(badarg). It does not matter 1375 %% whether L dominates ?EXCEPTION_BLOCK or not; 1376 %% it is still safe to put the frame in L. 1377 true; 1378 (Bl) -> 1379 is_dominated_by(Bl, L, Doms) 1380 end, MustDominate), 1381 1382 %% Also, this block must not be a loop header. 1383 IsLoopHeader = is_loop_header(L, Blocks), 1384 case Dominates andalso not IsLoopHeader of 1385 true -> yes; 1386 false -> ancestor 1387 end 1388 end. 1389 1390%% phi_predecessors(Label, Blocks) -> 1391%% Return all predecessors referenced in phi nodes. 1392 1393phi_predecessors(L, Blocks) -> 1394 #b_blk{is=Is} = map_get(L, Blocks), 1395 [P || #b_set{op=phi,args=Args} <- Is, {_,P} <- Args]. 1396 1397%% is_dominated_by(Label, DominatedBy, Dominators) -> true|false. 1398%% Test whether block Label is dominated by block DominatedBy. 1399 1400is_dominated_by(L, DomBy, Doms) -> 1401 DominatedBy = map_get(L, Doms), 1402 member(DomBy, DominatedBy). 1403 1404%% need_frame(#b_blk{}) -> true|false. 1405%% Test whether any of the instructions in the block requires a stack frame. 1406 1407need_frame(#b_blk{is=Is,last=#b_ret{arg=Ret}}) -> 1408 need_frame_1(Is, {return,Ret}); 1409need_frame(#b_blk{is=Is}) -> 1410 need_frame_1(Is, body). 1411 1412need_frame_1([#b_set{op=old_make_fun,dst=Fun}|Is], {return,Ret}=Context) -> 1413 case need_frame_1(Is, Context) of 1414 true -> 1415 true; 1416 false -> 1417 %% Since old_make_fun clobbers X registers, a stack frame is 1418 %% needed if any of the following instructions use any 1419 %% other variable than the one holding the reference to 1420 %% the created fun. 1421 Defs = ordsets:from_list([Dst || #b_set{dst=Dst} <- Is]), 1422 Blk = #b_blk{is=Is,last=#b_ret{arg=Ret}}, 1423 Used = ordsets:subtract(beam_ssa:used(Blk), Defs), 1424 case Used of 1425 [] -> false; 1426 [Fun] -> false; 1427 [_|_] -> true 1428 end 1429 end; 1430need_frame_1([#b_set{op=new_try_tag}|_], _) -> 1431 true; 1432need_frame_1([#b_set{op=call,dst=Val}]=Is, {return,Ret}) -> 1433 if 1434 Val =:= Ret -> need_frame_1(Is, tail); 1435 true -> need_frame_1(Is, body) 1436 end; 1437need_frame_1([#b_set{op=call,args=[Func|_]}|Is], Context) -> 1438 case Func of 1439 #b_remote{mod=#b_literal{val=Mod}, 1440 name=#b_literal{val=Name}, 1441 arity=Arity} when is_atom(Mod), is_atom(Name) -> 1442 Context =:= body orelse 1443 Is =/= [] orelse 1444 is_trap_bif(Mod, Name, Arity); 1445 #b_remote{} -> 1446 %% This is an apply(), which always needs a frame. 1447 true; 1448 #b_local{} -> 1449 Context =:= body orelse Is =/= []; 1450 _ -> 1451 %% A fun call always needs a frame. 1452 true 1453 end; 1454need_frame_1([I|Is], Context) -> 1455 beam_ssa:clobbers_xregs(I) orelse need_frame_1(Is, Context); 1456need_frame_1([], _) -> false. 1457 1458%% is_trap_bif(Mod, Name, Arity) -> true|false. 1459%% Test whether we need a stack frame for this BIF. 1460 1461is_trap_bif(erlang, '!', 2) -> true; 1462is_trap_bif(erlang, link, 1) -> true; 1463is_trap_bif(erlang, unlink, 1) -> true; 1464is_trap_bif(erlang, monitor_node, 2) -> true; 1465is_trap_bif(erlang, group_leader, 2) -> true; 1466is_trap_bif(erlang, exit, 2) -> true; 1467is_trap_bif(_, _, _) -> false. 1468 1469%%% 1470%%% Fix variables used in matching in receive. 1471%%% 1472%%% The loop_rec/2 instruction may return a reference to a 1473%%% message outside of any heap or heap fragment. If the message 1474%%% does not match, it is not allowed to store any reference to 1475%%% the message (or part of the message) on the stack. If we do, 1476%%% the message will be corrupted if there happens to be a GC. 1477%%% 1478%%% Here we make sure to introduce copies of variables that are 1479%%% matched out and subsequently used after the remove_message/0 1480%%% instructions. That will make sure that only X registers are 1481%%% used during matching. 1482%%% 1483%%% Depending on where variables are defined and used, they must 1484%%% be handled in two different ways. 1485%%% 1486%%% Variables that are always defined in the receive (before branching 1487%%% out into the different clauses of the receive) and used after the 1488%%% receive must be handled in the following way: Before each 1489%%% remove_message instruction, each such variable must be copied, and 1490%%% all variables must be consolidated using a phi node in the 1491%%% common exit block for the receive. 1492%%% 1493%%% Variables that are matched out and used in the same clause 1494%%% need copy instructions before the remove_message instruction 1495%%% in that clause. 1496%%% 1497 1498fix_receives(#st{ssa=Blocks0,cnt=Count0}=St) -> 1499 {Blocks,Count} = fix_receives_1(maps:to_list(Blocks0), 1500 Blocks0, Count0), 1501 St#st{ssa=Blocks,cnt=Count}. 1502 1503fix_receives_1([{L,Blk}|Ls], Blocks0, Count0) -> 1504 case Blk of 1505 #b_blk{is=[#b_set{op=peek_message}|_]} -> 1506 Rm = find_rm_blocks(L, Blocks0), 1507 LoopExit = find_loop_exit(Rm, Blocks0), 1508 RPO = beam_ssa:rpo([L], Blocks0), 1509 Defs0 = beam_ssa:def(RPO, Blocks0), 1510 CommonUsed = recv_common(Defs0, LoopExit, Blocks0), 1511 {Blocks1,Count1} = recv_crit_edges(Rm, LoopExit, Blocks0, Count0), 1512 {Blocks2,Count2} = recv_fix_common(CommonUsed, LoopExit, Rm, 1513 Blocks1, Count1), 1514 Defs = ordsets:subtract(Defs0, CommonUsed), 1515 {Blocks,Count} = fix_receive(Rm, Defs, Blocks2, Count2), 1516 fix_receives_1(Ls, Blocks, Count); 1517 #b_blk{} -> 1518 fix_receives_1(Ls, Blocks0, Count0) 1519 end; 1520fix_receives_1([], Blocks, Count) -> 1521 {Blocks,Count}. 1522 1523recv_common(_Defs, none, _Blocks) -> 1524 %% There is no common exit block because receive is used 1525 %% in the tail position of a function. 1526 []; 1527recv_common(Defs, Exit, Blocks) -> 1528 RPO = beam_ssa:rpo([Exit], Blocks), 1529 {ExitDefs,ExitUnused} = beam_ssa:def_unused(RPO, Defs, Blocks), 1530 Def = ordsets:subtract(Defs, ExitDefs), 1531 ordsets:subtract(Def, ExitUnused). 1532 1533%% recv_crit_edges([RemoveMessageLabel], LoopExit, 1534%% Blocks0, Count0) -> {Blocks,Count}. 1535%% 1536%% Adds dummy blocks on all conditional jumps to the exit block so that 1537%% recv_fix_common/5 can insert phi nodes without having to worry about 1538%% critical edges. 1539 1540recv_crit_edges(_Rms, none, Blocks0, Count0) -> 1541 {Blocks0, Count0}; 1542recv_crit_edges(Rms, Exit, Blocks0, Count0) -> 1543 Ls = beam_ssa:rpo(Rms, Blocks0), 1544 rce_insert_edges(Ls, Exit, Count0, Blocks0). 1545 1546rce_insert_edges([L | Ls], Exit, Count0, Blocks0) -> 1547 Successors = beam_ssa:successors(map_get(L, Blocks0)), 1548 case member(Exit, Successors) of 1549 true when Successors =/= [Exit] -> 1550 {Blocks, Count} = rce_insert_edge(L, Exit, Count0, Blocks0), 1551 rce_insert_edges(Ls, Exit, Count, Blocks); 1552 _ -> 1553 rce_insert_edges(Ls, Exit, Count0, Blocks0) 1554 end; 1555rce_insert_edges([], _Exit, Count, Blocks) -> 1556 {Blocks, Count}. 1557 1558rce_insert_edge(L, Exit, Count, Blocks0) -> 1559 #b_blk{last=Last0} = FromBlk0 = map_get(L, Blocks0), 1560 1561 ToExit = #b_br{bool=#b_literal{val=true},succ=Exit,fail=Exit}, 1562 1563 FromBlk = FromBlk0#b_blk{last=rce_reroute_terminator(Last0, Exit, Count)}, 1564 EdgeBlk = #b_blk{anno=#{},is=[],last=ToExit}, 1565 1566 Blocks = Blocks0#{ Count => EdgeBlk, L => FromBlk }, 1567 {Blocks, Count + 1}. 1568 1569rce_reroute_terminator(#b_br{succ=Exit}=Last, Exit, New) -> 1570 rce_reroute_terminator(Last#b_br{succ=New}, Exit, New); 1571rce_reroute_terminator(#b_br{fail=Exit}=Last, Exit, New) -> 1572 rce_reroute_terminator(Last#b_br{fail=New}, Exit, New); 1573rce_reroute_terminator(#b_br{}=Last, _Exit, _New) -> 1574 Last; 1575rce_reroute_terminator(#b_switch{fail=Exit}=Last, Exit, New) -> 1576 rce_reroute_terminator(Last#b_switch{fail=New}, Exit, New); 1577rce_reroute_terminator(#b_switch{list=List0}=Last, Exit, New) -> 1578 List = [if 1579 Lbl =:= Exit -> {Arg, New}; 1580 Lbl =/= Exit -> {Arg, Lbl} 1581 end || {Arg, Lbl} <- List0], 1582 Last#b_switch{list=List}. 1583 1584%% recv_fix_common([CommonVar], LoopExit, [RemoveMessageLabel], 1585%% Blocks0, Count0) -> {Blocks,Count}. 1586%% Handle variables alwys defined in a receive and used 1587%% in the exit block following the receive. 1588 1589recv_fix_common([Msg0|T], Exit, Rm, Blocks0, Count0) -> 1590 {Msg,Count1} = new_var('@recv', Count0), 1591 RPO = beam_ssa:rpo([Exit], Blocks0), 1592 Blocks1 = beam_ssa:rename_vars(#{Msg0=>Msg}, RPO, Blocks0), 1593 N = length(Rm), 1594 {MsgVars,Count} = new_vars(duplicate(N, '@recv'), Count1), 1595 PhiArgs = fix_exit_phi_args(MsgVars, Rm, Exit, Blocks1), 1596 Phi = #b_set{op=phi,dst=Msg,args=PhiArgs}, 1597 ExitBlk0 = map_get(Exit, Blocks1), 1598 ExitBlk = ExitBlk0#b_blk{is=[Phi|ExitBlk0#b_blk.is]}, 1599 Blocks2 = Blocks1#{Exit:=ExitBlk}, 1600 Blocks = recv_fix_common_1(MsgVars, Rm, Msg0, Blocks2), 1601 recv_fix_common(T, Exit, Rm, Blocks, Count); 1602recv_fix_common([], _, _, Blocks, Count) -> 1603 {Blocks,Count}. 1604 1605recv_fix_common_1([V|Vs], [Rm|Rms], Msg, Blocks0) -> 1606 Ren = #{Msg=>V}, 1607 RPO = beam_ssa:rpo([Rm], Blocks0), 1608 Blocks1 = beam_ssa:rename_vars(Ren, RPO, Blocks0), 1609 #b_blk{is=Is0} = Blk0 = map_get(Rm, Blocks1), 1610 Copy = #b_set{op=copy,dst=V,args=[Msg]}, 1611 Is = insert_after_phis(Is0, [Copy]), 1612 Blk = Blk0#b_blk{is=Is}, 1613 Blocks = Blocks1#{Rm:=Blk}, 1614 recv_fix_common_1(Vs, Rms, Msg, Blocks); 1615recv_fix_common_1([], [], _Msg, Blocks) -> Blocks. 1616 1617fix_exit_phi_args([V|Vs], [Rm|Rms], Exit, Blocks) -> 1618 Path = beam_ssa:rpo([Rm], Blocks), 1619 Preds = exit_predecessors(Path, Exit, Blocks), 1620 [{V,Pred} || Pred <- Preds] ++ fix_exit_phi_args(Vs, Rms, Exit, Blocks); 1621fix_exit_phi_args([], [], _, _) -> []. 1622 1623exit_predecessors([L|Ls], Exit, Blocks) -> 1624 Blk = map_get(L, Blocks), 1625 case member(Exit, beam_ssa:successors(Blk)) of 1626 true -> 1627 [L|exit_predecessors(Ls, Exit, Blocks)]; 1628 false -> 1629 exit_predecessors(Ls, Exit, Blocks) 1630 end; 1631exit_predecessors([], _Exit, _Blocks) -> []. 1632 1633%% fix_receive([Label], Defs, Blocks0, Count0) -> {Blocks,Count}. 1634%% Add a copy instruction for all variables that are matched out and 1635%% later used within a clause of the receive. 1636 1637fix_receive([L|Ls], Defs, Blocks0, Count0) -> 1638 RPO = beam_ssa:rpo([L], Blocks0), 1639 {RmDefs,Unused} = beam_ssa:def_unused(RPO, Defs, Blocks0), 1640 Def = ordsets:subtract(Defs, RmDefs), 1641 Used = ordsets:subtract(Def, Unused), 1642 {NewVars,Count} = new_vars([Base || #b_var{name=Base} <- Used], Count0), 1643 Ren = zip(Used, NewVars), 1644 Blocks1 = beam_ssa:rename_vars(Ren, RPO, Blocks0), 1645 #b_blk{is=Is0} = Blk1 = map_get(L, Blocks1), 1646 CopyIs = [#b_set{op=copy,dst=New,args=[Old]} || {Old,New} <- Ren], 1647 Is = insert_after_phis(Is0, CopyIs), 1648 Blk = Blk1#b_blk{is=Is}, 1649 Blocks = Blocks1#{L:=Blk}, 1650 fix_receive(Ls, Defs, Blocks, Count); 1651fix_receive([], _Defs, Blocks, Count) -> 1652 {Blocks,Count}. 1653 1654%% find_loop_exit([Label], Blocks) -> Label | none. 1655%% Given the list of all blocks with the remove_message instructions 1656%% for this receive, find the block to which control is transferred 1657%% when the receive loop is exited (if any). 1658 1659find_loop_exit([_,_|_]=RmBlocks, Blocks) -> 1660 %% We used to only analyze the path from two of the remove_message 1661 %% blocks. That would fail to find a common block if one or both 1662 %% of the blocks happened to raise an exception. To be sure that 1663 %% we always find a common block if there is one (shared by at 1664 %% least two clauses), we must analyze the path from all 1665 %% remove_message blocks. 1666 RPO = beam_ssa:rpo(Blocks), 1667 {Dominators,_} = beam_ssa:dominators(RPO, Blocks), 1668 RmSet = sets:from_list(RmBlocks, [{version, 2}]), 1669 RmRPO = beam_ssa:rpo(RmBlocks, Blocks), 1670 find_loop_exit_1(RmRPO, RmSet, Dominators, Blocks); 1671find_loop_exit(_, _) -> 1672 %% There is (at most) a single clause. There is no common 1673 %% loop exit block. 1674 none. 1675 1676find_loop_exit_1([?EXCEPTION_BLOCK|Ls], RmSet, Dominators, Blocks) -> 1677 %% ?EXCEPTION_BLOCK is a marker and not an actual block, so it is not 1678 %% the block we are looking for. 1679 find_loop_exit_1(Ls, RmSet, Dominators, Blocks); 1680find_loop_exit_1([L|Ls0], RmSet, Dominators, Blocks) -> 1681 DomBy = map_get(L, Dominators), 1682 case any(fun(E) -> sets:is_element(E, RmSet) end, DomBy) of 1683 true -> 1684 %% This block is dominated by one of the remove_message blocks, 1685 %% which means that the block is part of only one clause. 1686 %% It is not the block we are looking for. 1687 find_loop_exit_1(Ls0, RmSet, Dominators, Blocks); 1688 false -> 1689 %% This block is the first block that is not dominated by 1690 %% any of the blocks with remove_message instructions. 1691 case map_get(L, Blocks) of 1692 #b_blk{is=[#b_set{op=landingpad}|_]} -> 1693 %% This is the landing pad reached when an 1694 %% exception is caught. It is not the block 1695 %% we are looking for. Furthermore, none of the 1696 %% blocks reachable from this block can be 1697 %% the exit block we are looking for. 1698 Ls = Ls0 -- beam_ssa:rpo([L], Blocks), 1699 find_loop_exit_1(Ls, RmSet, Dominators, Blocks); 1700 #b_blk{} -> 1701 %% This block is not dominated by any of the receive 1702 %% clauses and is not the landing pad for an exception. 1703 %% It is the common exit block we are looking for. 1704 L 1705 end 1706 end; 1707find_loop_exit_1([], _, _, _) -> 1708 %% None of clauses transfers control to a common block after the receive 1709 %% statement. That means that the receive statement is a the end of a 1710 %% function (or that all clauses raise exceptions). 1711 none. 1712 1713%% find_rm_blocks(StartLabel, Blocks) -> [Label]. 1714%% Find all blocks that start with remove_message within the receive 1715%% loop whose peek_message label is StartLabel. 1716 1717find_rm_blocks(L, Blocks) -> 1718 Seen = gb_sets:singleton(L), 1719 Blk = map_get(L, Blocks), 1720 Succ = beam_ssa:successors(Blk), 1721 find_rm_blocks_1(Succ, Seen, Blocks). 1722 1723find_rm_blocks_1([L|Ls], Seen0, Blocks) -> 1724 case gb_sets:is_member(L, Seen0) of 1725 true -> 1726 find_rm_blocks_1(Ls, Seen0, Blocks); 1727 false -> 1728 Seen = gb_sets:insert(L, Seen0), 1729 Blk = map_get(L, Blocks), 1730 case find_rm_act(Blk#b_blk.is) of 1731 prune -> 1732 %% Looping back. Don't look at any successors. 1733 find_rm_blocks_1(Ls, Seen, Blocks); 1734 continue -> 1735 %% Neutral block. Do nothing here, but look at 1736 %% all successors. 1737 Succ = beam_ssa:successors(Blk), 1738 find_rm_blocks_1(Succ++Ls, Seen, Blocks); 1739 found -> 1740 %% Found remove_message instruction. 1741 [L|find_rm_blocks_1(Ls, Seen, Blocks)] 1742 end 1743 end; 1744find_rm_blocks_1([], _, _) -> []. 1745 1746find_rm_act([#b_set{op=Op}|Is]) -> 1747 case Op of 1748 remove_message -> found; 1749 peek_message -> prune; 1750 recv_next -> prune; 1751 wait_timeout -> prune; 1752 _ -> find_rm_act(Is) 1753 end; 1754find_rm_act([]) -> 1755 continue. 1756 1757%%% 1758%%% Find out which variables need to be stored in Y registers. 1759%%% 1760 1761-record(dk, {d :: ordsets:ordset(var_name()), 1762 k :: sets:set(var_name()) 1763 }). 1764 1765%% find_yregs(St0) -> St. 1766%% Find all variables that must be stored in Y registers. Annotate 1767%% the blocks that allocate frames with the set of Y registers 1768%% used within that stack frame. 1769%% 1770%% Basically, we following all execution paths starting from a block 1771%% that allocates a frame, keeping track of of all defined registers 1772%% and all registers killed by an instruction that clobbers X 1773%% registers. For every use of a variable, we check if if it is in 1774%% the set of killed variables; if it is, it must be stored in an Y 1775%% register. 1776 1777find_yregs(#st{frames=[]}=St) -> 1778 St; 1779find_yregs(#st{frames=[_|_]=Frames,args=Args,ssa=Blocks0}=St) -> 1780 FrameDefs = find_defs(Frames, Blocks0, [V || #b_var{}=V <- Args]), 1781 Blocks = find_yregs_1(FrameDefs, Blocks0), 1782 St#st{ssa=Blocks}. 1783 1784find_yregs_1([{F,Defs}|Fs], Blocks0) -> 1785 DK = #dk{d=Defs,k=sets:new([{version, 2}])}, 1786 D0 = #{F=>DK}, 1787 Ls = beam_ssa:rpo([F], Blocks0), 1788 Yregs0 = sets:new([{version, 2}]), 1789 Yregs = find_yregs_2(Ls, Blocks0, D0, Yregs0), 1790 Blk0 = map_get(F, Blocks0), 1791 Blk = beam_ssa:add_anno(yregs, Yregs, Blk0), 1792 Blocks = Blocks0#{F:=Blk}, 1793 find_yregs_1(Fs, Blocks); 1794find_yregs_1([], Blocks) -> Blocks. 1795 1796find_yregs_2([L|Ls], Blocks0, D0, Yregs0) -> 1797 Blk0 = map_get(L, Blocks0), 1798 #b_blk{is=Is,last=Last} = Blk0, 1799 Ys0 = map_get(L, D0), 1800 {Yregs1,Ys} = find_yregs_is(Is, Ys0, Yregs0), 1801 Yregs = find_yregs_terminator(Last, Ys, Yregs1), 1802 Successors = beam_ssa:successors(Blk0), 1803 D = find_update_succ(Successors, Ys, D0), 1804 find_yregs_2(Ls, Blocks0, D, Yregs); 1805find_yregs_2([], _Blocks, _D, Yregs) -> Yregs. 1806 1807find_defs(Frames, Blocks, Defs) -> 1808 Seen = gb_sets:empty(), 1809 FramesSet = gb_sets:from_list(Frames), 1810 {FrameDefs,_} = find_defs_1([0], Blocks, FramesSet, Seen, Defs, []), 1811 FrameDefs. 1812 1813find_defs_1([L|Ls], Blocks, Frames, Seen0, Defs0, Acc0) -> 1814 case gb_sets:is_member(L, Frames) of 1815 true -> 1816 OrderedDefs = ordsets:from_list(Defs0), 1817 find_defs_1(Ls, Blocks, Frames, Seen0, Defs0, 1818 [{L,OrderedDefs}|Acc0]); 1819 false -> 1820 case gb_sets:is_member(L, Seen0) of 1821 true -> 1822 find_defs_1(Ls, Blocks, Frames, Seen0, Defs0, Acc0); 1823 false -> 1824 Seen1 = gb_sets:insert(L, Seen0), 1825 {Acc,Seen} = find_defs_1(Ls, Blocks, Frames, Seen1, Defs0, Acc0), 1826 #b_blk{is=Is} = Blk = map_get(L, Blocks), 1827 Defs = find_defs_is(Is, Defs0), 1828 Successors = beam_ssa:successors(Blk), 1829 find_defs_1(Successors, Blocks, Frames, Seen, Defs, Acc) 1830 end 1831 end; 1832find_defs_1([], _, _, Seen, _, Acc) -> 1833 {Acc,Seen}. 1834 1835find_defs_is([#b_set{dst=Dst}|Is], Acc) -> 1836 find_defs_is(Is, [Dst|Acc]); 1837find_defs_is([], Acc) -> Acc. 1838 1839find_update_succ([S|Ss], #dk{d=Defs0,k=Killed0}=DK0, D0) -> 1840 case D0 of 1841 #{S:=#dk{d=Defs1,k=Killed1}} -> 1842 Defs = ordsets:intersection(Defs0, Defs1), 1843 Killed = sets:union(Killed0, Killed1), 1844 DK = #dk{d=Defs,k=Killed}, 1845 D = D0#{S:=DK}, 1846 find_update_succ(Ss, DK0, D); 1847 #{} -> 1848 D = D0#{S=>DK0}, 1849 find_update_succ(Ss, DK0, D) 1850 end; 1851find_update_succ([], _, D) -> D. 1852 1853find_yregs_is([#b_set{dst=Dst}=I|Is], #dk{d=Defs0,k=Killed0}=Ys, Yregs0) -> 1854 Yregs1 = intersect_used(I, Killed0), 1855 Yregs = sets:union(Yregs0, Yregs1), 1856 case beam_ssa:clobbers_xregs(I) of 1857 false -> 1858 Defs = ordsets:add_element(Dst, Defs0), 1859 find_yregs_is(Is, Ys#dk{d=Defs}, Yregs); 1860 true -> 1861 Killed = sets:union(sets:from_list(Defs0, [{version, 2}]), Killed0), 1862 Defs = [Dst], 1863 find_yregs_is(Is, Ys#dk{d=Defs,k=Killed}, Yregs) 1864 end; 1865find_yregs_is([], Ys, Yregs) -> {Yregs,Ys}. 1866 1867find_yregs_terminator(Terminator, #dk{k=Killed}, Yregs0) -> 1868 Yregs = intersect_used(Terminator, Killed), 1869 sets:union(Yregs0, Yregs). 1870 1871intersect_used(#b_br{bool=#b_var{}=V}, Set) -> 1872 intersect_used_keep_singleton(V, Set); 1873intersect_used(#b_ret{arg=#b_var{}=V}, Set) -> 1874 intersect_used_keep_singleton(V, Set); 1875intersect_used(#b_set{op=phi,args=Args}, Set) -> 1876 sets:from_list([V || {#b_var{}=V,_} <- Args, sets:is_element(V, Set)], [{version, 2}]); 1877intersect_used(#b_set{args=Args}, Set) -> 1878 sets:from_list(intersect_used_keep(used_args(Args), Set), [{version, 2}]); 1879intersect_used(#b_switch{arg=#b_var{}=V}, Set) -> 1880 intersect_used_keep_singleton(V, Set); 1881intersect_used(_, _) -> sets:new([{version, 2}]). 1882 1883intersect_used_keep_singleton(V, Set) -> 1884 case sets:is_element(V, Set) of 1885 true -> sets:from_list([V], [{version, 2}]); 1886 false -> sets:new([{version, 2}]) 1887 end. 1888 1889intersect_used_keep(Vs, Set) -> 1890 [V || V <- Vs, sets:is_element(V, Set)]. 1891 1892used_args([#b_var{}=V|As]) -> 1893 [V|used_args(As)]; 1894used_args([#b_remote{mod=Mod,name=Name}|As]) -> 1895 used_args([Mod,Name|As]); 1896used_args([_|As]) -> 1897 used_args(As); 1898used_args([]) -> []. 1899 1900%%% 1901%%% Try to reduce the size of the stack frame, by adding an explicit 1902%%% 'copy' instructions for return values from 'call' and 'old_make_fun' that 1903%%% need to be saved in Y registers. Here is an example to show 1904%%% how that's useful. First, here is the Erlang code: 1905%%% 1906%%% f(Pid) -> 1907%%% Res = foo(42), 1908%%% _ = node(Pid), 1909%%% bar(), 1910%%% Res. 1911%%% 1912%%% Compiled to SSA format, the main part of the code looks like this: 1913%%% 1914%%% 0: 1915%%% Res = call local literal foo/1, literal 42 1916%%% @ssa_bool:1 = succeeded:body Res 1917%%% br @ssa_bool:1, label 2, label 1 1918%%% 2: 1919%%% _1 = bif:node Pid 1920%%% @ssa_bool:2 = succeeded:body _1 1921%%% br @ssa_bool:2, label 3, label 1 1922%%% 3: 1923%%% _2 = call local literal bar/0 1924%%% @ssa_bool:3 = succeeded:body _2 1925%%% br @ssa_bool:3, label 4, label 1 1926%%% 4: 1927%%% ret Res 1928%%% 1929%%% It can be seen that the variables Pid and Res must be saved in Y 1930%%% registers in order to survive the function calls. A previous sub 1931%%% pass has inserted a 'copy' instruction to save the value of the 1932%%% variable Pid: 1933%%% 1934%%% 0: 1935%%% Pid:4 = copy Pid 1936%%% Res = call local literal foo/1, literal 42 1937%%% @ssa_bool:1 = succeeded:body Res 1938%%% br @ssa_bool:1, label 2, label 1 1939%%% 2: 1940%%% _1 = bif:node Pid:4 1941%%% @ssa_bool:2 = succeeded:body _1 1942%%% br @ssa_bool:2, label 3, label 1 1943%%% 3: 1944%%% _2 = call local literal bar/0 1945%%% @ssa_bool:3 = succeeded:body _2 1946%%% br @ssa_bool:3, label 4, label 1 1947%%% 4: 1948%%% ret Res 1949%%% 1950%%% The Res and Pid:4 variables must be assigned to different Y registers 1951%%% because they are live at the same time. copy_retval() inserts a 1952%%% 'copy' instruction to copy Res to a new variable: 1953%%% 1954%%% 0: 1955%%% Pid:4 = copy Pid 1956%%% Res:6 = call local literal foo/1, literal 42 1957%%% @ssa_bool:1 = succeeded:body Res:6 1958%%% br @ssa_bool:1, label 2, label 1 1959%%% 2: 1960%%% _1 = bif:node Pid:4 1961%%% @ssa_bool:2 = succeeded:body _1 1962%%% br @ssa_bool:2, label 3, label 1 1963%%% 3: 1964%%% Res = copy Res:6 1965%%% _2 = call local literal bar/0 1966%%% @ssa_bool:3 = succeeded:body _2 1967%%% br @ssa_bool:3, label 4, label 1 1968%%% 4: 1969%%% ret Res 1970%%% 1971%%% The new variable Res:6 is used to capture the return value from the call. 1972%%% The variables Pid:4 and Res are no longer live at the same time, so they 1973%%% can be assigned to the same Y register. 1974%%% 1975 1976copy_retval(#st{frames=Frames,ssa=Blocks0,cnt=Count0}=St) -> 1977 {Blocks,Count} = copy_retval_1(Frames, Blocks0, Count0), 1978 St#st{ssa=Blocks,cnt=Count}. 1979 1980copy_retval_1([F|Fs], Blocks0, Count0) -> 1981 #b_blk{anno=#{yregs:=Yregs0},is=Is} = map_get(F, Blocks0), 1982 Yregs = collect_yregs(Is, Yregs0), 1983 Ls = beam_ssa:rpo([F], Blocks0), 1984 {Blocks,Count} = copy_retval_2(Ls, Yregs, none, Blocks0, Count0), 1985 copy_retval_1(Fs, Blocks, Count); 1986copy_retval_1([], Blocks, Count) -> 1987 {Blocks,Count}. 1988 1989collect_yregs([#b_set{op=copy,dst=Y,args=[#b_var{}=X]}|Is], 1990 Yregs0) -> 1991 true = sets:is_element(X, Yregs0), %Assertion. 1992 Yregs = sets:add_element(Y, sets:del_element(X, Yregs0)), 1993 collect_yregs(Is, Yregs); 1994collect_yregs([#b_set{}|Is], Yregs) -> 1995 collect_yregs(Is, Yregs); 1996collect_yregs([], Yregs) -> Yregs. 1997 1998copy_retval_2([L|Ls], Yregs, Copy0, Blocks0, Count0) -> 1999 #b_blk{is=Is0,last=Last} = Blk = map_get(L, Blocks0), 2000 RC = case {Last,Ls} of 2001 {#b_br{succ=Succ,fail=?EXCEPTION_BLOCK},[Succ|_]} -> 2002 true; 2003 {_,_} -> 2004 false 2005 end, 2006 case copy_retval_is(Is0, RC, Yregs, Copy0, Count0, []) of 2007 {Is,Count} -> 2008 case Copy0 =:= none andalso Count0 =:= Count of 2009 true -> 2010 copy_retval_2(Ls, Yregs, none, Blocks0, Count0); 2011 false -> 2012 Blocks = Blocks0#{L=>Blk#b_blk{is=Is}}, 2013 copy_retval_2(Ls, Yregs, none, Blocks, Count) 2014 end; 2015 {Is,Count,Copy} -> 2016 Blocks = Blocks0#{L=>Blk#b_blk{is=Is}}, 2017 copy_retval_2(Ls, Yregs, Copy, Blocks, Count) 2018 end; 2019copy_retval_2([], _Yregs, none, Blocks, Count) -> 2020 {Blocks,Count}. 2021 2022copy_retval_is([#b_set{op=put_tuple_elements,args=Args0}=I0], false, _Yregs, 2023 Copy, Count, Acc) -> 2024 I = I0#b_set{args=copy_sub_args(Args0, Copy)}, 2025 {reverse(Acc, [I|acc_copy([], Copy)]),Count}; 2026copy_retval_is([#b_set{op=Op}=I0], false, Yregs, Copy, Count0, Acc0) 2027 when Op =:= call; Op =:= old_make_fun -> 2028 {I,Count,Acc} = place_retval_copy(I0, Yregs, Copy, Count0, Acc0), 2029 {reverse(Acc, [I]),Count}; 2030copy_retval_is([#b_set{}]=Is, false, _Yregs, Copy, Count, Acc) -> 2031 {reverse(Acc, acc_copy(Is, Copy)),Count}; 2032copy_retval_is([#b_set{},#b_set{op=succeeded}]=Is, false, _Yregs, Copy, Count, Acc) -> 2033 {reverse(Acc, acc_copy(Is, Copy)),Count}; 2034copy_retval_is([#b_set{op=Op,dst=#b_var{name=RetName}=Dst}=I0|Is], RC, Yregs, 2035 Copy0, Count0, Acc0) when Op =:= call; Op =:= old_make_fun -> 2036 {I1,Count1,Acc} = place_retval_copy(I0, Yregs, Copy0, Count0, Acc0), 2037 case sets:is_element(Dst, Yregs) of 2038 true -> 2039 {NewVar,Count} = new_var(RetName, Count1), 2040 Copy = #b_set{op=copy,dst=Dst,args=[NewVar]}, 2041 I = I1#b_set{dst=NewVar}, 2042 copy_retval_is(Is, RC, Yregs, Copy, Count, [I|Acc]); 2043 false -> 2044 copy_retval_is(Is, RC, Yregs, none, Count1, [I1|Acc]) 2045 end; 2046copy_retval_is([#b_set{args=Args0}=I0|Is], RC, Yregs, Copy, Count, Acc) -> 2047 I = I0#b_set{args=copy_sub_args(Args0, Copy)}, 2048 case beam_ssa:clobbers_xregs(I) of 2049 true -> 2050 copy_retval_is(Is, RC, Yregs, none, Count, [I|acc_copy(Acc, Copy)]); 2051 false -> 2052 copy_retval_is(Is, RC, Yregs, Copy, Count, [I|Acc]) 2053 end; 2054copy_retval_is([], RC, _, Copy, Count, Acc) -> 2055 case {Copy,RC} of 2056 {none,_} -> 2057 {reverse(Acc),Count}; 2058 {#b_set{},true} -> 2059 {reverse(Acc),Count,Copy}; 2060 {#b_set{},false} -> 2061 {reverse(Acc, [Copy]),Count} 2062 end. 2063 2064%% 2065%% Consider this code: 2066%% 2067%% P = ... 2068%% Q = ... 2069%% ... 2070%% A = call foo/0 2071%% A1 = copy A 2072%% B = call bar/2, P, Q 2073%% 2074%% If the P or Q variables are no longer used after this code, one of 2075%% their Y registers can't be reused for A. To allow one of the Y registers to 2076%% be reused we will need to insert 'copy' instructions for arguments 2077%% that are in Y registers: 2078%% 2079%% P = ... 2080%% Q = ... 2081%% ... 2082%% A1 = call foo/0 2083%% Q1 = copy Q 2084%% P1 = copy P 2085%% A = copy A1 2086%% B = call bar/2, P1, Q1 2087%% 2088%% Note that copies of the arguments are done in reverse order to help the 2089%% reserve_xregs/3 function place the copies into the X registers they will 2090%% need to be in. 2091%% 2092%% For this example, P1 needs to be in x0 and Q1 needs to be in x1. If we 2093%% would copy the arguments in order the registers would be assigned like 2094%% this: 2095%% 2096%% x0/A1 = call foo/0 2097%% x1/P1 = copy P 2098%% x2/Q1 = copy Q 2099%% A = copy A1 2100%% B = call bar/2, P1, Q1 2101%% 2102%% That is, both P1 and Q1 would be misplaced and would have to be 2103%% moved to their correct registers before the call. However, with the 2104%% copies in reverse order and with a little help from 2105%% reserve_xregs/3, at least the Q1 variable can be can be placed in 2106%% the correct register: 2107%% 2108%% x0/A1 = call foo/0 2109%% x1/Q1 = copy Q 2110%% x2/P1 = copy P 2111%% A = copy A1 2112%% B = call bar/2, P1, Q1 2113%% 2114%% In general, all but the first argument can be placed in their correct registers. 2115%% 2116 2117place_retval_copy(I, _Yregs, none, Count, Acc) -> 2118 %% There is no copy of a previous return value, so there is nothing 2119 %% to gain by copying the function arguments. 2120 {I,Count,Acc}; 2121place_retval_copy(#b_set{args=[F|Args0]}=I0, Yregs0, RetCopy, Count0, Acc0) -> 2122 %% Copy function arguments, but make sure that we don't make an extra 2123 %% copy of the previous return value. 2124 #b_set{dst=Avoid} = RetCopy, 2125 Yregs = sets:del_element(Avoid, Yregs0), 2126 {Args,Acc1,Count} = copy_func_args(Args0, Yregs, Acc0, Count0), 2127 I = I0#b_set{args=[F|Args]}, 2128 2129 %% Place the copy instruction for the previous return value after the 2130 %% copy instruction for the arguments. 2131 Acc = [RetCopy|Acc1], 2132 {I,Count,Acc}. 2133 2134copy_func_args(Args, Yregs, Acc, Count) -> 2135 copy_func_args_1(reverse(Args), Yregs, Acc, [], Count). 2136 2137copy_func_args_1([#b_var{name=AName}=A|As], Yregs, InstrAcc, ArgAcc, Count0) -> 2138 case sets:is_element(A, Yregs) of 2139 true -> 2140 {NewVar,Count} = new_var(AName, Count0), 2141 Copy = #b_set{op=copy,dst=NewVar,args=[A]}, 2142 copy_func_args_1(As, Yregs, [Copy|InstrAcc], [NewVar|ArgAcc], Count); 2143 false -> 2144 copy_func_args_1(As, Yregs, InstrAcc, [A|ArgAcc], Count0) 2145 end; 2146copy_func_args_1([A|As], Yregs, InstrAcc, ArgAcc, Count) -> 2147 copy_func_args_1(As, Yregs, InstrAcc, [A|ArgAcc], Count); 2148copy_func_args_1([], _Yregs, InstrAcc, ArgAcc, Count) -> 2149 {ArgAcc,InstrAcc,Count}. 2150 2151acc_copy(Acc, none) -> Acc; 2152acc_copy(Acc, #b_set{}=Copy) -> [Copy|Acc]. 2153 2154copy_sub_args(Args, none) -> 2155 Args; 2156copy_sub_args(Args, #b_set{dst=Dst,args=[Src]}) -> 2157 [sub_arg(A, Dst, Src) || A <- Args]. 2158 2159sub_arg(Old, Old, New) -> New; 2160sub_arg(Old, _, _) -> Old. 2161 2162%%% 2163%%% Consider: 2164%%% 2165%%% x1/Hd = get_hd x0/Cons 2166%%% y0/Tl = get_tl x0/Cons 2167%%% 2168%%% Register x0 can't be reused for Hd. If Hd needs to be in x0, 2169%%% a 'move' instruction must be inserted. 2170%%% 2171%%% If we swap get_hd and get_tl when Tl is in a Y register, 2172%%% x0 can be used for Hd if Cons is not used again: 2173%%% 2174%%% y0/Tl = get_tl x0/Cons 2175%%% x0/Hd = get_hd x0/Cons 2176%%% 2177 2178opt_get_list(#st{ssa=Blocks,res=Res}=St) -> 2179 ResMap = maps:from_list(Res), 2180 Ls = beam_ssa:rpo(Blocks), 2181 St#st{ssa=opt_get_list_1(Ls, ResMap, Blocks)}. 2182 2183opt_get_list_1([L|Ls], Res, Blocks0) -> 2184 #b_blk{is=Is0} = Blk = map_get(L, Blocks0), 2185 case opt_get_list_is(Is0, Res, [], false) of 2186 no -> 2187 opt_get_list_1(Ls, Res, Blocks0); 2188 {yes,Is} -> 2189 Blocks = Blocks0#{L:=Blk#b_blk{is=Is}}, 2190 opt_get_list_1(Ls, Res, Blocks) 2191 end; 2192opt_get_list_1([], _, Blocks) -> Blocks. 2193 2194opt_get_list_is([#b_set{op=get_hd,dst=Hd, 2195 args=[Cons]}=GetHd, 2196 #b_set{op=get_tl,dst=Tl, 2197 args=[Cons]}=GetTl|Is], 2198 Res, Acc, Changed) -> 2199 %% Note that when this pass is run, only Y registers have 2200 %% reservations. The absence of an entry for a variable therefore 2201 %% means that the variable will be in an X register. 2202 case Res of 2203 #{Hd:={y,_}} -> 2204 %% Hd will be in a Y register. Don't swap. 2205 opt_get_list_is([GetTl|Is], Res, [GetHd|Acc], Changed); 2206 #{Tl:={y,_}} -> 2207 %% Tl will be in a Y register. Swap. 2208 opt_get_list_is([GetHd|Is], Res, [GetTl|Acc], true); 2209 #{} -> 2210 %% Both are in X registers. Nothing to do. 2211 opt_get_list_is([GetTl|Is], Res, [GetHd|Acc], Changed) 2212 end; 2213opt_get_list_is([I|Is], Res, Acc, Changed) -> 2214 opt_get_list_is(Is, Res, [I|Acc], Changed); 2215opt_get_list_is([], _Res, Acc, Changed) -> 2216 case Changed of 2217 true -> 2218 {yes,reverse(Acc)}; 2219 false -> 2220 no 2221 end. 2222 2223%%% 2224%%% Number instructions in the order they are executed. 2225%%% 2226 2227%% number_instructions(St0) -> St. 2228%% Number instructions in the order they are executed. Use a step 2229%% size of 2. Don't number phi instructions. All phi variables in 2230%% a block will be live one unit before the first non-phi instruction 2231%% in the block. 2232 2233number_instructions(#st{ssa=Blocks0}=St) -> 2234 Ls = beam_ssa:rpo(Blocks0), 2235 St#st{ssa=number_is_1(Ls, 1, Blocks0)}. 2236 2237number_is_1([L|Ls], N0, Blocks0) -> 2238 #b_blk{is=Is0,last=Last0} = Bl0 = map_get(L, Blocks0), 2239 {Is,N1} = number_is_2(Is0, N0, []), 2240 Last = beam_ssa:add_anno(n, N1, Last0), 2241 N = N1 + 2, 2242 Bl = Bl0#b_blk{is=Is,last=Last}, 2243 Blocks = Blocks0#{L:=Bl}, 2244 number_is_1(Ls, N, Blocks); 2245number_is_1([], _, Blocks) -> Blocks. 2246 2247number_is_2([#b_set{op=phi}=I|Is], N, Acc) -> 2248 number_is_2(Is, N, [I|Acc]); 2249number_is_2([I0|Is], N, Acc) -> 2250 I = beam_ssa:add_anno(n, N, I0), 2251 number_is_2(Is, N+2, [I|Acc]); 2252number_is_2([], N, Acc) -> 2253 {reverse(Acc),N}. 2254 2255%%% 2256%%% Calculate live intervals for all variables in this function. 2257%%% 2258%%% This code uses the algorithm and terminology from [3]. 2259%%% 2260%%% For each variable, we calculate its live interval. The live 2261%%% interval is a list of ranges, where a range is a tuple 2262%%% {Def,LastUse}. Def is the instruction number for the instruction 2263%%% that first defines the variable and LastUse is instruction number 2264%%% of the last instruction that uses it. 2265%%% 2266%%% We traverse instruction in post order so that we will see the last 2267%%% use before we see the definition. 2268%%% 2269 2270live_intervals(#st{args=Args,ssa=Blocks}=St) -> 2271 PO = reverse(beam_ssa:rpo(Blocks)), 2272 Intervals0 = live_interval_blk(PO, Blocks, #{}, #{}), 2273 Intervals1 = add_ranges([{V,{0,1}} || #b_var{}=V <- Args], Intervals0), 2274 Intervals = maps:to_list(Intervals1), 2275 St#st{intervals=Intervals}. 2276 2277live_interval_blk([L|Ls], Blocks, LiveMap0, Intervals0) -> 2278 Blk = map_get(L, Blocks), 2279 Successors = beam_ssa:successors(Blk), 2280 Live1 = live_in_successors(Successors, L, Blocks, LiveMap0), 2281 2282 %% Add default ranges for all variables that are live in the 2283 %% successors. 2284 #b_blk{is=Is,last=Last} = Blk, 2285 FirstNumber = first_number(Is, Last), 2286 DefaultRange = {FirstNumber,1+beam_ssa:get_anno(n, Last)}, 2287 Ranges0 = [{V,DefaultRange} || V <- Live1], 2288 2289 case {Is,Last} of 2290 {[],#b_br{bool=#b_literal{val=true}}} -> 2291 %% Optimize the interval calculation for blocks without variables. 2292 Intervals = add_ranges(Ranges0, Intervals0), 2293 LiveMap = LiveMap0#{L => Live1}, 2294 live_interval_blk(Ls, Blocks, LiveMap, Intervals); 2295 {_,_} -> 2296 %% Update the ranges. Variables whose last use is in this 2297 %% block will be added, and variables that are defined 2298 %% in this block will have their starting instruction 2299 %% number updated. 2300 %% 2301 %% We use a gb_tree instead of a map because conversion to and 2302 %% from an orddict is faster. 2303 Ranges1 = gb_trees:from_orddict(Ranges0), 2304 Ranges2 = live_interval_last(Last, FirstNumber, Ranges1), 2305 Ranges3 = live_interval_blk_is(Is, FirstNumber, Ranges2), 2306 Ranges = gb_trees:to_list(Ranges3), 2307 2308 %% Update the interval for each variable. 2309 Intervals = add_ranges(Ranges, Intervals0), 2310 2311 %% Update what is live at the beginning of this block and 2312 %% store it. 2313 Live = [V || {V,{From,_To}} <- Ranges, 2314 From =< FirstNumber], 2315 LiveMap = LiveMap0#{L => Live}, 2316 live_interval_blk(Ls, Blocks, LiveMap, Intervals) 2317 end; 2318live_interval_blk([], _Blocks, _LiveMap, Intervals) -> 2319 Intervals. 2320 2321live_interval_last(I, FirstNumber, Ranges) -> 2322 N = beam_ssa:get_anno(n, I), 2323 Used = beam_ssa:used(I), 2324 update_used(Used, FirstNumber, N, Ranges). 2325 2326live_interval_blk_is([#b_set{op=phi,dst=Dst}|Is], FirstNumber, Acc0) -> 2327 Acc = live_interval_blk_is(Is, FirstNumber, Acc0), 2328 case gb_trees:is_defined(Dst, Acc) of 2329 true -> 2330 %% The value in the tree already has the correct starting value. 2331 update_def(Dst, FirstNumber, Acc); 2332 false -> 2333 %% Unused phi node -- can only happen if optimizations passes 2334 %% have been turned off. 2335 gb_trees:insert(Dst, {FirstNumber,FirstNumber}, Acc) 2336 end; 2337live_interval_blk_is([#b_set{args=Args,dst=Dst}=I|Is], FirstNumber, Acc0) -> 2338 Acc1 = live_interval_blk_is(Is, FirstNumber, Acc0), 2339 N = beam_ssa:get_anno(n, I), 2340 Used = used_args(Args), 2341 Acc = update_used(Used, FirstNumber, N, Acc1), 2342 update_def(Dst, N, Acc); 2343live_interval_blk_is([], _FirstNumber, Acc) -> 2344 Acc. 2345 2346update_def(V, N, Ranges) -> 2347 case gb_trees:lookup(V, Ranges) of 2348 {value,{_From,To}} -> 2349 gb_trees:update(V, {N,To}, Ranges); 2350 none -> 2351 %% The variable is defined but never used. 2352 gb_trees:insert(V, {N,N}, Ranges) 2353 end. 2354 2355update_used([V|Vs], First, N, Ranges) -> 2356 case gb_trees:is_defined(V, Ranges) of 2357 true -> 2358 %% Already up to date. (A later use has already been stored.) 2359 update_used(Vs, First, N, Ranges); 2360 false -> 2361 %% The last use of this variable. (But the first time we 2362 %% see it because we visit instructions in PO.) 2363 update_used(Vs, First, N, gb_trees:insert(V, {First,N}, Ranges)) 2364 end; 2365update_used([], _First, _N, Ranges) -> Ranges. 2366 2367add_ranges([{V,{A,N}=Range}|T], Map) -> 2368 case Map of 2369 #{V := [{N,Z}|Ranges]} -> 2370 %% Coalesce two adjacent ranges. 2371 add_ranges(T, Map#{V := [{A,Z}|Ranges]}); 2372 #{V := [{A,N}|_]} -> 2373 %% Ignore repeated range (probably from arguments). 2374 add_ranges(T, Map); 2375 #{V := Ranges} -> 2376 %% This range is not adjacent to any other range. 2377 add_ranges(T, Map#{V := [Range|Ranges]}); 2378 #{} -> 2379 %% The last use of this variable is in the current block. 2380 add_ranges(T, Map#{V => [Range]}) 2381 end; 2382add_ranges([], Map) -> Map. 2383 2384%% first_number([#b_set{}]) -> InstructionNumber. 2385%% Return the number for the first instruction for the block. 2386%% Note that this number is one less than the first 2387%% non-phi instruction in the block. 2388 2389first_number([#b_set{op=phi}|Is], Last) -> 2390 first_number(Is, Last); 2391first_number([I|_], _) -> 2392 beam_ssa:get_anno(n, I) - 1; 2393first_number([], Last) -> 2394 beam_ssa:get_anno(n, Last) - 1. 2395 2396live_in_successors(Ls, Pred, Blocks, LiveMap) -> 2397 live_in_successors(Ls, Pred, Blocks, LiveMap, []). 2398 2399live_in_successors([L|Ls], Pred, Blocks, LiveMap, Live0) -> 2400 Live1 = ordsets:union(Live0, get_live(L, LiveMap)), 2401 #b_blk{is=Is} = map_get(L, Blocks), 2402 Live = live_in_phis(Is, Pred, Live1), 2403 live_in_successors(Ls, Pred, Blocks, LiveMap, Live); 2404live_in_successors([], _, _, _, Live) -> Live. 2405 2406get_live(L, LiveMap) -> 2407 case LiveMap of 2408 #{L:=Live} -> Live; 2409 #{} -> [] 2410 end. 2411 2412live_in_phis([#b_set{op=phi,dst=Killed,args=Args}|Is], 2413 Pred, Live0) -> 2414 Used = [V || {#b_var{}=V,L} <- Args, L =:= Pred], 2415 Live1 = ordsets:union(Used, Live0), 2416 Live = ordsets:del_element(Killed, Live1), 2417 live_in_phis(Is, Pred, Live); 2418live_in_phis(_, _, Live) -> Live. 2419 2420%%% 2421%%% Reserve Y registers. 2422%%% 2423 2424%% reserve_yregs(St0) -> St. 2425%% In each block that allocates a stack frame, insert instructions 2426%% that copy variables that must be in Y registers (given by 2427%% the `yregs` annotation) to new variables. 2428%% 2429%% Also allocate specific Y registers for try and catch tags. 2430%% The outermost try/catch tag is placed in y0, any directly 2431%% nested tag in y1, and so on. Note that this is the reversed 2432%% order as required by BEAM; it will be corrected later by 2433%% turn_yregs(). 2434 2435reserve_yregs(#st{frames=Frames}=St0) -> 2436 foldl(fun reserve_yregs_1/2, St0, Frames). 2437 2438reserve_yregs_1(L, #st{ssa=Blocks0,cnt=Count0,res=Res0}=St) -> 2439 Blk = map_get(L, Blocks0), 2440 Yregs = ordsets:from_list(sets:to_list(beam_ssa:get_anno(yregs, Blk))), 2441 RPO = beam_ssa:rpo([L], Blocks0), 2442 {Def,Unused} = beam_ssa:def_unused(RPO, Yregs, Blocks0), 2443 UsedYregs = ordsets:subtract(Yregs, Unused), 2444 DefBefore = ordsets:subtract(UsedYregs, Def), 2445 {BeforeVars,Blocks,Count} = rename_vars(DefBefore, L, RPO, Blocks0, Count0), 2446 InsideVars = ordsets:subtract(UsedYregs, DefBefore), 2447 ResTryTags0 = reserve_try_tags(L, Blocks), 2448 ResTryTags = [{V,{Reg,Count}} || {V,Reg} <- ResTryTags0], 2449 Vars = BeforeVars ++ InsideVars, 2450 Res = [{V,{y,Count}} || V <- Vars] ++ ResTryTags ++ Res0, 2451 St#st{res=Res,ssa=Blocks,cnt=Count+1}. 2452 2453reserve_try_tags(L, Blocks) -> 2454 Seen = gb_sets:empty(), 2455 {Res0,_} = reserve_try_tags_1([L], Blocks, Seen, #{}), 2456 Res1 = [maps:to_list(M) || {_,M} <- maps:to_list(Res0)], 2457 Res = [{V,{y,Y}} || {V,Y} <- append(Res1)], 2458 ordsets:from_list(Res). 2459 2460reserve_try_tags_1([L|Ls], Blocks, Seen0, ActMap0) -> 2461 case gb_sets:is_element(L, Seen0) of 2462 true -> 2463 reserve_try_tags_1(Ls, Blocks, Seen0, ActMap0); 2464 false -> 2465 Seen1 = gb_sets:insert(L, Seen0), 2466 #b_blk{is=Is} = Blk = map_get(L, Blocks), 2467 Active0 = get_active(L, ActMap0), 2468 Active = reserve_try_tags_is(Is, Active0), 2469 Successors = beam_ssa:successors(Blk), 2470 ActMap1 = update_act_map(Successors, Active, ActMap0), 2471 {ActMap,Seen} = reserve_try_tags_1(Ls, Blocks, Seen1, ActMap1), 2472 reserve_try_tags_1(Successors, Blocks, Seen,ActMap) 2473 end; 2474reserve_try_tags_1([], _Blocks, Seen, ActMap) -> 2475 {ActMap,Seen}. 2476 2477get_active(L, ActMap) -> 2478 case ActMap of 2479 #{L:=Active} -> Active; 2480 #{} -> #{} 2481 end. 2482 2483reserve_try_tags_is([#b_set{op=new_try_tag,dst=V}|Is], Active) -> 2484 N = map_size(Active), 2485 reserve_try_tags_is(Is, Active#{V=>N}); 2486reserve_try_tags_is([#b_set{op=kill_try_tag,args=[Tag]}|Is], Active) -> 2487 reserve_try_tags_is(Is, maps:remove(Tag, Active)); 2488reserve_try_tags_is([_|Is], Active) -> 2489 reserve_try_tags_is(Is, Active); 2490reserve_try_tags_is([], Active) -> Active. 2491 2492update_act_map([L|Ls], Active0, ActMap0) -> 2493 case ActMap0 of 2494 #{L:=Active1} -> 2495 ActMap = ActMap0#{L=>maps:merge(Active0, Active1)}, 2496 update_act_map(Ls, Active0, ActMap); 2497 #{} -> 2498 ActMap = ActMap0#{L=>Active0}, 2499 update_act_map(Ls, Active0, ActMap) 2500 end; 2501update_act_map([], _, ActMap) -> ActMap. 2502 2503rename_vars([], _, _, Blocks, Count) -> 2504 {[],Blocks,Count}; 2505rename_vars(Vs, L, RPO, Blocks0, Count0) -> 2506 {NewVars,Count} = new_vars([Base || #b_var{name=Base} <- Vs], Count0), 2507 Ren = zip(Vs, NewVars), 2508 Blocks1 = beam_ssa:rename_vars(Ren, RPO, Blocks0), 2509 #b_blk{is=Is0} = Blk0 = map_get(L, Blocks1), 2510 CopyIs = [#b_set{op=copy,dst=New,args=[Old]} || {Old,New} <- Ren], 2511 Is = insert_after_phis(Is0, CopyIs), 2512 Blk = Blk0#b_blk{is=Is}, 2513 Blocks = Blocks1#{L:=Blk}, 2514 {NewVars,Blocks,Count}. 2515 2516insert_after_phis([#b_set{op=phi}=I|Is], InsertIs) -> 2517 [I|insert_after_phis(Is, InsertIs)]; 2518insert_after_phis(Is, InsertIs) -> 2519 InsertIs ++ Is. 2520 2521%% frame_size(St0) -> St. 2522%% Calculate the frame size for each block that allocates a frame. 2523%% Annotate the block with the frame size. Also annotate all 2524%% return instructions with {deallocate,FrameSize} to simplify 2525%% code generation. 2526 2527frame_size(#st{frames=Frames,regs=Regs,ssa=Blocks0}=St) -> 2528 Blocks = foldl(fun(L, Blks) -> 2529 frame_size_1(L, Regs, Blks) 2530 end, Blocks0, Frames), 2531 St#st{ssa=Blocks}. 2532 2533frame_size_1(L, Regs, Blocks0) -> 2534 RPO = beam_ssa:rpo([L], Blocks0), 2535 Def = beam_ssa:def(RPO, Blocks0), 2536 Yregs0 = [map_get(V, Regs) || V <- Def, is_yreg(map_get(V, Regs))], 2537 Yregs = ordsets:from_list(Yregs0), 2538 FrameSize = length(ordsets:from_list(Yregs)), 2539 if 2540 FrameSize =/= 0 -> 2541 [{y,0}|_] = Yregs, %Assertion. 2542 {y,Last} = last(Yregs), 2543 Last = FrameSize - 1, %Assertion. 2544 ok; 2545 true -> 2546 ok 2547 end, 2548 Blk0 = map_get(L, Blocks0), 2549 Blk = beam_ssa:add_anno(frame_size, FrameSize, Blk0), 2550 2551 %% Insert an annotation for frame deallocation on 2552 %% each #b_ret{}. 2553 Blocks = Blocks0#{L:=Blk}, 2554 Reachable = beam_ssa:rpo([L], Blocks), 2555 frame_deallocate(Reachable, FrameSize, Blocks). 2556 2557frame_deallocate([L|Ls], Size, Blocks0) -> 2558 Blk0 = map_get(L, Blocks0), 2559 Blk = case Blk0 of 2560 #b_blk{last=#b_ret{}=Ret0} -> 2561 Ret = beam_ssa:add_anno(deallocate, Size, Ret0), 2562 Blk0#b_blk{last=Ret}; 2563 #b_blk{} -> 2564 Blk0 2565 end, 2566 Blocks = Blocks0#{L:=Blk}, 2567 frame_deallocate(Ls, Size, Blocks); 2568frame_deallocate([], _, Blocks) -> Blocks. 2569 2570 2571%% turn_yregs(St0) -> St. 2572%% Renumber y registers so that {y,0} becomes {y,FrameSize-1}, 2573%% {y,FrameSize-1} becomes {y,0} and so on. This is to make nested 2574%% catches work. The register allocator (linear_scan()) has given 2575%% a lower number to the outermost catch. 2576 2577turn_yregs(#st{frames=Frames,regs=Regs0,ssa=Blocks}=St) -> 2578 Regs1 = foldl(fun(L, A) -> 2579 Blk = map_get(L, Blocks), 2580 FrameSize = beam_ssa:get_anno(frame_size, Blk), 2581 RPO = beam_ssa:rpo([L], Blocks), 2582 Def = beam_ssa:def(RPO, Blocks), 2583 [turn_yregs_1(Def, FrameSize, Regs0)|A] 2584 end, [], Frames), 2585 Regs = maps:merge(Regs0, maps:from_list(append(Regs1))), 2586 St#st{regs=Regs}. 2587 2588turn_yregs_1(Def, FrameSize, Regs) -> 2589 Yregs0 = [{map_get(V, Regs),V} || V <- Def, is_yreg(map_get(V, Regs))], 2590 Yregs1 = rel2fam(Yregs0), 2591 FrameSize = length(Yregs1), 2592 Yregs2 = [{{y,FrameSize-Y-1},Vs} || {{y,Y},Vs} <- Yregs1], 2593 R0 = sofs:family(Yregs2), 2594 R1 = sofs:family_to_relation(R0), 2595 R = sofs:converse(R1), 2596 sofs:to_external(R). 2597 2598%%% 2599%%% Reserving registers before register allocation. 2600%%% 2601 2602%% reserve_regs(St0) -> St. 2603%% Reserve registers prior to register allocation. Y registers 2604%% have already been reserved. This function will reserve z, 2605%% fr, and specific x registers. 2606 2607reserve_regs(#st{args=Args,ssa=Blocks,intervals=Intervals,res=Res0}=St) -> 2608 %% Reserve x0, x1, and so on for the function arguments. 2609 Res1 = reserve_arg_regs(Args, 0, Res0), 2610 RPO = beam_ssa:rpo(Blocks), 2611 2612 %% Reserve Z registers (dummy registers) for instructions with no 2613 %% return values (e.g. remove_message) or pseudo-return values 2614 %% (e.g. landingpad). 2615 Res2 = reserve_zregs(RPO, Blocks, Intervals, Res1), 2616 2617 %% Reserve float registers. 2618 Res3 = reserve_fregs(RPO, Blocks, Res2), 2619 2620 %% Reserve all remaining unreserved variables as X registers. 2621 Res = maps:from_list(Res3), 2622 St#st{res=reserve_xregs(RPO, Blocks, Res)}. 2623 2624reserve_arg_regs([#b_var{}=Arg|Is], N, Acc) -> 2625 reserve_arg_regs(Is, N+1, [{Arg,{x,N}}|Acc]); 2626reserve_arg_regs([], _, Acc) -> Acc. 2627 2628reserve_zregs(RPO, Blocks, Intervals, Res) -> 2629 ShortLived0 = [V || {V,[{Start,End}]} <- Intervals, Start+2 =:= End], 2630 ShortLived = sets:from_list(ShortLived0, [{version, 2}]), 2631 F = fun(_, #b_blk{is=Is,last=Last}, A) -> 2632 reserve_zreg(Is, Last, ShortLived, A) 2633 end, 2634 beam_ssa:fold_blocks(F, RPO, Res, Blocks). 2635 2636reserve_zreg([#b_set{op={bif,tuple_size},dst=Dst}, 2637 #b_set{op={bif,'=:='},args=[Dst,Val],dst=Bool}], 2638 Last, ShortLived, A) -> 2639 case {Val,Last} of 2640 {#b_literal{val=Arity},#b_br{bool=Bool}} when Arity bsr 32 =:= 0 -> 2641 %% These two instructions can be combined to a test_arity 2642 %% instruction provided that the arity variable is short-lived. 2643 reserve_test_zreg(Dst, ShortLived, A); 2644 {_,_} -> 2645 %% Either the arity is too big, or the boolean value is not 2646 %% used in a conditional branch. 2647 A 2648 end; 2649reserve_zreg([#b_set{op={bif,tuple_size},dst=Dst}], 2650 #b_switch{arg=Dst}, ShortLived, A) -> 2651 reserve_test_zreg(Dst, ShortLived, A); 2652reserve_zreg([#b_set{op=Op,dst=Dst}], #b_br{bool=Dst}, ShortLived, A) -> 2653 case use_zreg(Op) of 2654 yes -> [{Dst,z} | A]; 2655 no -> A; 2656 maybe -> reserve_test_zreg(Dst, ShortLived, A) 2657 end; 2658reserve_zreg([#b_set{op=Op,dst=Dst} | Is], Last, ShortLived, A) -> 2659 case use_zreg(Op) of 2660 yes -> reserve_zreg(Is, Last, ShortLived, [{Dst,z} | A]); 2661 _Other -> reserve_zreg(Is, Last, ShortLived, A) 2662 end; 2663reserve_zreg([], _, _, A) -> A. 2664 2665use_zreg(bs_match_string) -> yes; 2666use_zreg(bs_save) -> yes; 2667use_zreg(bs_restore) -> yes; 2668use_zreg(bs_set_position) -> yes; 2669use_zreg(kill_try_tag) -> yes; 2670use_zreg(landingpad) -> yes; 2671use_zreg(put_tuple_elements) -> yes; 2672use_zreg(recv_marker_bind) -> yes; 2673use_zreg(recv_marker_clear) -> yes; 2674use_zreg(remove_message) -> yes; 2675use_zreg(set_tuple_element) -> yes; 2676use_zreg(succeeded) -> yes; 2677use_zreg(wait_timeout) -> yes; 2678%% There's no way we can combine these into a test instruction, so we must 2679%% avoid using a z register if their result is used directly in a branch. 2680use_zreg(call) -> no; 2681use_zreg({bif,is_map_key}) -> no; 2682use_zreg({bif,is_record}) -> no; 2683use_zreg({bif,map_get}) -> no; 2684use_zreg({bif,'xor'}) -> no; 2685use_zreg(get_hd) -> no; 2686use_zreg(get_tl) -> no; 2687use_zreg(get_tuple_element) -> no; 2688%% Assume the instruction can use a z register, provided it's the last in its 2689%% block and that the result is only used in the terminator. 2690use_zreg(_) -> maybe. 2691 2692%% If V is defined just before a branch, we may be able to combine it into a 2693%% test instruction. 2694reserve_test_zreg(#b_var{}=V, ShortLived, A) -> 2695 case sets:is_element(V, ShortLived) of 2696 true -> [{V,z}|A]; 2697 false -> A 2698 end. 2699 2700reserve_fregs(RPO, Blocks, Res) -> 2701 F = fun(_, #b_blk{is=Is}, A) -> 2702 reserve_freg(Is, A) 2703 end, 2704 beam_ssa:fold_blocks(F, RPO, Res, Blocks). 2705 2706reserve_freg([#b_set{op={float,Op},dst=V}|Is], Res) -> 2707 case Op of 2708 get -> 2709 reserve_freg(Is, Res); 2710 _ -> 2711 reserve_freg(Is, [{V,fr}|Res]) 2712 end; 2713reserve_freg([_|Is], Res) -> 2714 reserve_freg(Is, Res); 2715reserve_freg([], Res) -> Res. 2716 2717%% reserve_xregs(St0) -> St. 2718%% Reserve all remaining variables as X registers. 2719%% 2720%% If a variable will need to be in a specific X register for a 2721%% 'call' or 'old_make_fun' (and there is nothing that will kill it 2722%% between the definition and use), reserve the register using a 2723%% {prefer,{x,X} annotation. That annotation means that the linear 2724%% scan algorithm will place the variable in the preferred register, 2725%% unless that register is already occupied. 2726%% 2727%% All remaining variables are reserved as X registers. Linear scan 2728%% will allocate the lowest free X register for the variable. 2729 2730reserve_xregs(RPO, Blocks, Res) -> 2731 Ls = reverse(RPO), 2732 reserve_xregs(Ls, Blocks, #{}, Res). 2733 2734reserve_xregs([L|Ls], Blocks, XsMap0, Res0) -> 2735 #b_blk{anno=Anno,is=Is0,last=Last} = map_get(L, Blocks), 2736 2737 %% Calculate mapping from variable name to the preferred 2738 %% register. 2739 Xs0 = reserve_terminator(L, Is0, Last, Blocks, XsMap0, Res0), 2740 2741 %% We need to figure out where the code generator will 2742 %% place instructions that will do a garbage collection. 2743 %% Insert 'gc' markers as pseudo-instructions in the 2744 %% instruction sequence. 2745 Is1 = reverse(Is0), 2746 Is2 = res_place_gc_instrs(Is1, []), 2747 Is = res_place_allocate(Anno, Is2), 2748 2749 %% Add register hints for variables that are defined 2750 %% in the (reversed) instruction sequence. 2751 {Res,Xs} = reserve_xregs_is(Is, Res0, Xs0, []), 2752 2753 XsMap = XsMap0#{L=>Xs}, 2754 reserve_xregs(Ls, Blocks, XsMap, Res); 2755reserve_xregs([], _, _, Res) -> Res. 2756 2757%% Insert explicit 'gc' markers points where there will 2758%% be a garbage collection. (Note that the instruction 2759%% sequence passed to this function is reversed.) 2760 2761res_place_gc_instrs([#b_set{op=phi}=I|Is], Acc) -> 2762 res_place_gc_instrs(Is, [I|Acc]); 2763res_place_gc_instrs([#b_set{op=Op}=I|Is], Acc) 2764 when Op =:= call; Op =:= old_make_fun -> 2765 case Acc of 2766 [] -> 2767 res_place_gc_instrs(Is, [I|Acc]); 2768 [GC|_] when GC =:= gc; GC =:= test_heap -> 2769 res_place_gc_instrs(Is, [I,gc|Acc]); 2770 [_|_] -> 2771 res_place_gc_instrs(Is, [I,gc|Acc]) 2772 end; 2773res_place_gc_instrs([#b_set{op=Op,args=Args}=I|Is], Acc0) -> 2774 case beam_ssa_codegen:classify_heap_need(Op, Args) of 2775 neutral -> 2776 case Acc0 of 2777 [test_heap|Acc] -> 2778 res_place_gc_instrs(Is, [test_heap,I|Acc]); 2779 Acc -> 2780 res_place_gc_instrs(Is, [I|Acc]) 2781 end; 2782 {put,_} -> 2783 res_place_gc_instrs(Is, res_place_test_heap(I, Acc0)); 2784 {put_fun,_} -> 2785 res_place_gc_instrs(Is, res_place_test_heap(I, Acc0)); 2786 put_float -> 2787 res_place_gc_instrs(Is, res_place_test_heap(I, Acc0)); 2788 gc -> 2789 res_place_gc_instrs(Is, [gc,I|Acc0]) 2790 end; 2791res_place_gc_instrs([], Acc) -> 2792 %% Reverse and replace 'test_heap' markers with 'gc'. 2793 %% (The distinction is no longer useful.) 2794 res_place_gc_instrs_rev(Acc, []). 2795 2796res_place_test_heap(I, Acc) -> 2797 case Acc of 2798 [test_heap|Acc] -> 2799 [test_heap,I|Acc]; 2800 _ -> 2801 [test_heap,I|Acc] 2802 end. 2803 2804res_place_gc_instrs_rev([test_heap|Is], [gc|_]=Acc) -> 2805 res_place_gc_instrs_rev(Is, Acc); 2806res_place_gc_instrs_rev([test_heap|Is], Acc) -> 2807 res_place_gc_instrs_rev(Is, [gc|Acc]); 2808res_place_gc_instrs_rev([gc|Is], [gc|_]=Acc) -> 2809 res_place_gc_instrs_rev(Is, Acc); 2810res_place_gc_instrs_rev([I|Is], Acc) -> 2811 res_place_gc_instrs_rev(Is, [I|Acc]); 2812res_place_gc_instrs_rev([], Acc) -> Acc. 2813 2814res_place_allocate(#{yregs:=_}, Is) -> 2815 %% There will be an 'allocate' instruction inserted here. 2816 Is ++ [gc]; 2817res_place_allocate(#{}, Is) -> Is. 2818 2819reserve_xregs_is([gc|Is], Res, Xs0, Used) -> 2820 %% At this point, the code generator will place an instruction 2821 %% that does a garbage collection. We must prune the remembered 2822 %% registers. 2823 Xs = res_xregs_prune(Xs0, Used, Res), 2824 reserve_xregs_is(Is, Res, Xs, Used); 2825reserve_xregs_is([#b_set{op=Op,dst=Dst,args=Args}=I|Is], Res0, Xs0, Used0) -> 2826 Res = reserve_xreg(Dst, Xs0, Res0), 2827 Used1 = ordsets:union(Used0, beam_ssa:used(I)), 2828 Used = ordsets:del_element(Dst, Used1), 2829 case Op of 2830 call -> 2831 Xs = reserve_call_args(tl(Args)), 2832 reserve_xregs_is(Is, Res, Xs, Used); 2833 old_make_fun -> 2834 Xs = reserve_call_args(tl(Args)), 2835 reserve_xregs_is(Is, Res, Xs, Used); 2836 _ -> 2837 reserve_xregs_is(Is, Res, Xs0, Used) 2838 end; 2839reserve_xregs_is([], Res, Xs, _Used) -> 2840 {Res,Xs}. 2841 2842%% Pick up register hints from the successors of this blocks. 2843reserve_terminator(L, Is, #b_br{bool=#b_var{},succ=Succ,fail=Fail}, 2844 Blocks, XsMap, Res) when Succ =/= Fail, 2845 Fail =/= ?EXCEPTION_BLOCK -> 2846 #{Succ:=SuccBlk,Fail:=FailBlk} = Blocks, 2847 case {SuccBlk,FailBlk} of 2848 {#b_blk{is=[],last=#b_br{succ=PhiL,fail=PhiL}}, 2849 #b_blk{is=[],last=#b_br{succ=PhiL,fail=PhiL}}} -> 2850 %% Both branches ultimately transfer to the same 2851 %% block (via two blocks with no instructions). 2852 %% Pick up register hints from the phi nodes 2853 %% in the common block. 2854 #{PhiL:=#b_blk{is=PhiIs}} = Blocks, 2855 Xs = res_xregs_from_phi(PhiIs, Succ, Res, #{}), 2856 res_xregs_from_phi(PhiIs, Fail, Res, Xs); 2857 {_,_} when Is =/= [] -> 2858 case last(Is) of 2859 #b_set{op=succeeded,args=[Arg]} -> 2860 %% We know that Arg will not be used at the failure 2861 %% label, so we can pick up register hints from the 2862 %% success label. 2863 Br = #b_br{bool=#b_literal{val=true},succ=Succ,fail=Succ}, 2864 case reserve_terminator(L, [], Br, Blocks, XsMap, Res) of 2865 #{Arg:=Reg} -> #{Arg=>Reg}; 2866 #{} -> #{} 2867 end; 2868 _ -> 2869 %% Register hints from the success block may not 2870 %% be safe at the failure block, and vice versa. 2871 #{} 2872 end; 2873 {_,_} -> 2874 %% Register hints from the success block may not 2875 %% be safe at the failure block, and vice versa. 2876 #{} 2877 end; 2878reserve_terminator(L, Is, #b_br{bool=Bool,succ=Succ,fail=Fail}, 2879 Blocks, XsMap, Res) -> 2880 case {Bool, Fail} of 2881 {_, ?EXCEPTION_BLOCK} -> 2882 %% We know that no variables are used from ?EXCEPTION_BLOCK, so any 2883 %% register hints from the successor block are safe to use. 2884 reserve_terminator_1(L, Succ, Is, Blocks, XsMap, Res); 2885 {#b_literal{val=true}, _} -> 2886 %% We only have one successor, so its hints are safe to use. 2887 reserve_terminator_1(L, Succ, Is, Blocks, XsMap, Res); 2888 {_, _} -> 2889 %% Register hints from the success block may not 2890 %% be safe at the failure block, and vice versa. 2891 #{} 2892 end; 2893reserve_terminator(_, _, _, _, _, _) -> 2894 #{}. 2895 2896reserve_terminator_1(L, Succ, _Is, Blocks, XsMap, Res) -> 2897 case {Blocks, XsMap} of 2898 {#{ Succ := #b_blk{is=[#b_set{op=phi}|_]=PhiIs}}, #{}} -> 2899 res_xregs_from_phi(PhiIs, L, Res, #{}); 2900 {#{}, #{ Succ := Xs }}-> 2901 Xs; 2902 {#{}, #{}} -> 2903 #{} 2904 end. 2905 2906%% Pick up a reservation from a phi node. 2907res_xregs_from_phi([#b_set{op=phi,dst=Dst,args=Args}|Is], 2908 Pred, Res, Acc) -> 2909 case [V || {#b_var{}=V,L} <- Args, L =:= Pred] of 2910 [] -> 2911 %% The value of the phi node for this predecessor 2912 %% is a literal. Nothing to do here. 2913 res_xregs_from_phi(Is, Pred, Res, Acc); 2914 [V] -> 2915 case Res of 2916 #{Dst:={prefer,Reg}} -> 2917 %% Try placing V in the same register as for 2918 %% the phi node. 2919 res_xregs_from_phi(Is, Pred, Res, Acc#{V=>Reg}); 2920 #{Dst:=_} -> 2921 res_xregs_from_phi(Is, Pred, Res, Acc) 2922 end 2923 end; 2924res_xregs_from_phi(_, _, _, Acc) -> Acc. 2925 2926reserve_call_args(Args) -> 2927 reserve_call_args(Args, 0, #{}). 2928 2929reserve_call_args([#b_var{}=Var|As], X, Xs) -> 2930 reserve_call_args(As, X+1, Xs#{Var=>{x,X}}); 2931reserve_call_args([#b_literal{}|As], X, Xs) -> 2932 reserve_call_args(As, X+1, Xs); 2933reserve_call_args([], _, Xs) -> Xs. 2934 2935reserve_xreg(V, Xs, Res) -> 2936 case Res of 2937 #{V:=_} -> 2938 %% Already reserved (but not as an X register). 2939 Res; 2940 #{} -> 2941 case Xs of 2942 #{V:=X} -> 2943 %% Add a hint that this specific X register is 2944 %% preferred, unless it is already in use. 2945 Res#{V=>{prefer,X}}; 2946 #{} -> 2947 %% Reserve as an X register in general. 2948 Res#{V=>x} 2949 end 2950 end. 2951 2952%% res_xregs_prune(PreferredRegs, Used, Res) -> PreferredRegs. 2953%% Prune the list of preferred registers, to make sure that 2954%% there are no "holes" (uninitialized X registers) when 2955%% invoking the garbage collector. 2956 2957res_xregs_prune(Xs, Used, Res) when map_size(Xs) =/= 0 -> 2958 %% The number of safe registers is the number of the X registers 2959 %% used after this point. The actual number of safe registers may 2960 %% be higher than this number, but this is a conservative safe 2961 %% estimate. 2962 NumSafe = foldl(fun(V, N) -> 2963 case Res of 2964 #{V:={x,_}} -> N + 1; 2965 #{V:=_} -> N; 2966 #{} -> N + 1 2967 end 2968 end, 0, Used), 2969 2970 %% Remove unsafe registers from the list of potential 2971 %% preferred registers. 2972 maps:filter(fun(_, {x,X}) -> X < NumSafe end, Xs); 2973res_xregs_prune(Xs, _Used, _Res) -> Xs. 2974 2975%%% 2976%%% Register allocation using linear scan. 2977%%% 2978 2979-record(i, 2980 {sort=1 :: instr_number(), 2981 reg=none :: i_reg(), 2982 pool=x :: pool_id(), 2983 var=#b_var{} :: b_var(), 2984 rs=[] :: [range()] 2985 }). 2986 2987-record(l, 2988 {cur=#i{} :: interval(), 2989 unhandled_res=[] :: [interval()], 2990 unhandled_any=[] :: [interval()], 2991 active=[] :: [interval()], 2992 inactive=[] :: [interval()], 2993 free=#{} :: #{var_name()=>pool(), 2994 {'next',pool_id()}:=reg_num()}, 2995 regs=[] :: [{b_var(),ssa_register()}] 2996 }). 2997 2998-type interval() :: #i{}. 2999-type i_reg() :: ssa_register() | {'prefer',xreg()} | 'none'. 3000-type pool_id() :: 'fr' | 'x' | 'z' | instr_number(). 3001-type pool() :: ordsets:ordset(ssa_register()). 3002 3003linear_scan(#st{intervals=Intervals0,res=Res}=St0) -> 3004 St = St0#st{intervals=[],res=[]}, 3005 Free = init_free(maps:to_list(Res)), 3006 Intervals1 = [init_interval(Int, Res) || Int <- Intervals0], 3007 Intervals = sort(Intervals1), 3008 IsReserved = fun(#i{reg=Reg}) -> 3009 case Reg of 3010 none -> false; 3011 {prefer,{_,_}} -> false; 3012 {_,_} -> true 3013 end 3014 end, 3015 {UnhandledRes,Unhandled} = partition(IsReserved, Intervals), 3016 L = #l{unhandled_res=UnhandledRes, 3017 unhandled_any=Unhandled,free=Free}, 3018 #l{regs=Regs} = do_linear(L), 3019 St#st{regs=maps:from_list(Regs)}. 3020 3021init_interval({V,[{Start,_}|_]=Rs}, Res) -> 3022 Info = map_get(V, Res), 3023 Pool = case Info of 3024 {prefer,{x,_}} -> x; 3025 x -> x; 3026 {x,_} -> x; 3027 {y,Uniq} -> Uniq; 3028 {{y,_},Uniq} -> Uniq; 3029 z -> z; 3030 fr -> fr 3031 end, 3032 Reg = case Info of 3033 {prefer,{x,_}} -> Info; 3034 {x,_} -> Info; 3035 {{y,_}=Y,_} -> Y; 3036 _ -> none 3037 end, 3038 #i{sort=Start,var=V,reg=Reg,pool=Pool,rs=Rs}. 3039 3040init_free(Res) -> 3041 Free0 = rel2fam([{x,{x,0}}|init_free_1(Res)]), 3042 #{x:=Xs0} = Free1 = maps:from_list(Free0), 3043 Xs = init_xregs(Xs0), 3044 Free = Free1#{x:=Xs}, 3045 Next = maps:fold(fun(K, V, A) -> [{{next,K},length(V)}|A] end, [], Free), 3046 maps:merge(Free, maps:from_list(Next)). 3047 3048init_free_1([{_,{prefer,{x,_}=Reg}}|Res]) -> 3049 [{x,Reg}|init_free_1(Res)]; 3050init_free_1([{_,{x,_}=Reg}|Res]) -> 3051 [{x,Reg}|init_free_1(Res)]; 3052init_free_1([{_,{y,Uniq}}|Res]) -> 3053 [{Uniq,{y,0}}|init_free_1(Res)]; 3054init_free_1([{_,{{y,_}=Reg,Uniq}}|Res]) -> 3055 [{Uniq,Reg}|init_free_1(Res)]; 3056init_free_1([{_,z}|Res]) -> 3057 [{z,{z,0}}|init_free_1(Res)]; 3058init_free_1([{_,fr}|Res]) -> 3059 [{fr,{fr,0}}|init_free_1(Res)]; 3060init_free_1([{_,x}|Res]) -> 3061 init_free_1(Res); 3062init_free_1([]) -> []. 3063 3064%% Make sure that the pool of xregs is contiguous. 3065init_xregs([{x,N},{x,M}|Is]) when N+1 =:= M -> 3066 [{x,N}|init_xregs([{x,M}|Is])]; 3067init_xregs([{x,N}|[{x,_}|_]=Is]) -> 3068 [{x,N}|init_xregs([{x,N+1}|Is])]; 3069init_xregs([{x,_}]=Is) -> Is. 3070 3071do_linear(L0) -> 3072 case set_next_current(L0) of 3073 done -> 3074 L0; 3075 L1 -> 3076 L2 = expire_active(L1), 3077 L3 = check_inactive(L2), 3078 Available = collect_available(L3), 3079 L4 = select_register(Available, L3), 3080 L = make_cur_active(L4), 3081 do_linear(L) 3082 end. 3083 3084set_next_current(#l{unhandled_res=[Cur1|T1], 3085 unhandled_any=[Cur2|T2]}=L) -> 3086 case {Cur1,Cur2} of 3087 {#i{sort=N1},#i{sort=N2}} when N1 < N2 -> 3088 L#l{cur=Cur1,unhandled_res=T1}; 3089 {_,_} -> 3090 L#l{cur=Cur2,unhandled_any=T2} 3091 end; 3092set_next_current(#l{unhandled_res=[], 3093 unhandled_any=[Cur|T]}=L) -> 3094 L#l{cur=Cur,unhandled_any=T}; 3095set_next_current(#l{unhandled_res=[Cur|T], 3096 unhandled_any=[]}=L) -> 3097 L#l{cur=Cur,unhandled_res=T}; 3098set_next_current(#l{unhandled_res=[],unhandled_any=[]}) -> 3099 done. 3100 3101expire_active(#l{cur=#i{sort=CurBegin},active=Act0}=L0) -> 3102 {Act,L} = expire_active(Act0, CurBegin, L0, []), 3103 L#l{active=Act}. 3104 3105expire_active([#i{reg=Reg,rs=Rs0}=I|Is], CurBegin, L0, Acc) -> 3106 {_,_} = Reg, %Assertion. 3107 case overlap_status(Rs0, CurBegin) of 3108 ends_before_cur -> 3109 L = free_reg(I, L0), 3110 expire_active(Is, CurBegin, L, Acc); 3111 overlapping -> 3112 expire_active(Is, CurBegin, L0, [I|Acc]); 3113 not_overlapping -> 3114 Rs = strip_before_current(Rs0, CurBegin), 3115 L1 = free_reg(I, L0), 3116 L = L1#l{inactive=[I#i{rs=Rs}|L1#l.inactive]}, 3117 expire_active(Is, CurBegin, L, Acc) 3118 end; 3119expire_active([], _CurBegin, L, Acc) -> 3120 {Acc,L}. 3121 3122check_inactive(#l{cur=#i{sort=CurBegin},inactive=InAct0}=L0) -> 3123 {InAct,L} = check_inactive(InAct0, CurBegin, L0, []), 3124 L#l{inactive=InAct}. 3125 3126check_inactive([#i{rs=Rs0}=I|Is], CurBegin, L0, Acc) -> 3127 case overlap_status(Rs0, CurBegin) of 3128 ends_before_cur -> 3129 check_inactive(Is, CurBegin, L0, Acc); 3130 not_overlapping -> 3131 check_inactive(Is, CurBegin, L0, [I|Acc]); 3132 overlapping -> 3133 Rs = strip_before_current(Rs0, CurBegin), 3134 L1 = L0#l{active=[I#i{rs=Rs}|L0#l.active]}, 3135 L = reserve_reg(I, L1), 3136 check_inactive(Is, CurBegin, L, Acc) 3137 end; 3138check_inactive([], _CurBegin, L, Acc) -> 3139 {Acc,L}. 3140 3141strip_before_current([{_,E}|Rs], CurBegin) when E =< CurBegin -> 3142 strip_before_current(Rs, CurBegin); 3143strip_before_current(Rs, _CurBegin) -> Rs. 3144 3145collect_available(#l{cur=#i{reg={prefer,{_,_}=Prefer}}=I}=L) -> 3146 %% Use the preferred register if it is available. 3147 Avail = collect_available(L#l{cur=I#i{reg=none}}), 3148 case member(Prefer, Avail) of 3149 true -> [Prefer]; 3150 false -> Avail 3151 end; 3152collect_available(#l{cur=#i{reg={_,_}=ReservedReg}}) -> 3153 %% Return the already reserved register. 3154 [ReservedReg]; 3155collect_available(#l{unhandled_res=Unhandled,cur=Cur}=L) -> 3156 Free = get_pool(Cur, L), 3157 3158 %% Note that since the live intervals are constructed from 3159 %% SSA form, there cannot be any overlap of the current interval 3160 %% with any inactive interval. See [3], page 175. Therefore we 3161 %% only have check the unhandled intervals for overlap with 3162 %% the current interval. As a further optimization, we only need 3163 %% to check the intervals that have reserved registers. 3164 collect_available(Unhandled, Cur, Free). 3165 3166collect_available([#i{pool=Pool1}|Is], #i{pool=Pool2}=Cur, Free) 3167 when Pool1 =/= Pool2 -> 3168 %% Wrong pool. Ignore this interval. 3169 collect_available(Is, Cur, Free); 3170collect_available([#i{reg={_,_}=Reg}=I|Is], Cur, Free0) -> 3171 case overlaps(I, Cur) of 3172 true -> 3173 Free = ordsets:del_element(Reg, Free0), 3174 collect_available(Is, Cur, Free); 3175 false -> 3176 collect_available(Is, Cur, Free0) 3177 end; 3178collect_available([], _, Free) -> Free. 3179 3180select_register([{_,_}=Reg|_], #l{cur=Cur0,regs=Regs}=L) -> 3181 Cur = Cur0#i{reg=Reg}, 3182 reserve_reg(Cur, L#l{cur=Cur,regs=[{Cur#i.var,Reg}|Regs]}); 3183select_register([], #l{cur=Cur0,regs=Regs}=L0) -> 3184 %% Allocate a new register in the pool. 3185 {Reg,L1} = get_next_free(Cur0, L0), 3186 Cur = Cur0#i{reg=Reg}, 3187 L = L1#l{cur=Cur,regs=[{Cur#i.var,Reg}|Regs]}, 3188 reserve_reg(Cur, L). 3189 3190make_cur_active(#l{cur=Cur,active=Act}=L) -> 3191 L#l{active=[Cur|Act]}. 3192 3193overlaps(#i{rs=Rs1}, #i{rs=Rs2}) -> 3194 are_overlapping(Rs1, Rs2). 3195 3196overlap_status([{S,E}], CurBegin) -> 3197 if 3198 E =< CurBegin -> ends_before_cur; 3199 CurBegin < S -> not_overlapping; 3200 true -> overlapping 3201 end; 3202overlap_status([{S,E}|Rs], CurBegin) -> 3203 if 3204 E =< CurBegin -> 3205 overlap_status(Rs, CurBegin); 3206 S =< CurBegin -> 3207 overlapping; 3208 true -> 3209 not_overlapping 3210 end. 3211 3212reserve_reg(#i{reg={_,_}=Reg}=I, L) -> 3213 FreeRegs0 = get_pool(I, L), 3214 FreeRegs = ordsets:del_element(Reg, FreeRegs0), 3215 update_pool(I, FreeRegs, L). 3216 3217free_reg(#i{reg={_,_}=Reg}=I, L) -> 3218 FreeRegs0 = get_pool(I, L), 3219 FreeRegs = ordsets:add_element(Reg, FreeRegs0), 3220 update_pool(I, FreeRegs, L). 3221 3222get_pool(#i{pool=Pool}, #l{free=Free}) -> 3223 map_get(Pool, Free). 3224 3225update_pool(#i{pool=Pool}, New, #l{free=Free0}=L) -> 3226 Free = Free0#{Pool:=New}, 3227 L#l{free=Free}. 3228 3229get_next_free(#i{pool=Pool}, #l{free=Free0}=L0) -> 3230 K = {next,Pool}, 3231 N = map_get(K, Free0), 3232 Free = Free0#{K:=N+1}, 3233 L = L0#l{free=Free}, 3234 if 3235 is_integer(Pool) -> {{y,N},L}; 3236 is_atom(Pool) -> {{Pool,N},L} 3237 end. 3238 3239%%% 3240%%% Interval utilities. 3241%%% 3242 3243are_overlapping([R|Rs1], Rs2) -> 3244 case are_overlapping_1(R, Rs2) of 3245 true -> 3246 true; 3247 false -> 3248 are_overlapping(Rs1, Rs2) 3249 end; 3250are_overlapping([], _) -> false. 3251 3252are_overlapping_1({_S1,E1}, [{S2,_E2}|_]) when E1 < S2 -> 3253 false; 3254are_overlapping_1({S1,E1}=R, [{S2,E2}|Rs]) -> 3255 (S2 < E1 andalso E2 > S1) orelse are_overlapping_1(R, Rs); 3256are_overlapping_1({_,_}, []) -> false. 3257 3258%%% 3259%%% Utilities. 3260%%% 3261 3262%% is_loop_header(L, Blocks) -> false|true. 3263%% Check whether the block is a loop header. 3264 3265is_loop_header(L, Blocks) -> 3266 case map_get(L, Blocks) of 3267 #b_blk{is=[I|_]} -> beam_ssa:is_loop_header(I); 3268 #b_blk{} -> false 3269 end. 3270 3271rel2fam(S0) -> 3272 S1 = sofs:relation(S0), 3273 S = sofs:rel2fam(S1), 3274 sofs:to_external(S). 3275 3276split_phis(Is) -> 3277 splitwith(fun(#b_set{op=Op}) -> Op =:= phi end, Is). 3278 3279is_yreg({y,_}) -> true; 3280is_yreg({x,_}) -> false; 3281is_yreg({z,_}) -> false; 3282is_yreg({fr,_}) -> false. 3283 3284new_vars([Base|Vs0], Count0) -> 3285 {V,Count1} = new_var(Base, Count0), 3286 {Vs,Count} = new_vars(Vs0, Count1), 3287 {[V|Vs],Count}; 3288new_vars([], Count) -> {[],Count}. 3289 3290new_var({Base,Int}, Count) -> 3291 true = is_integer(Int), %Assertion. 3292 {#b_var{name={Base,Count}},Count+1}; 3293new_var(Base, Count) -> 3294 {#b_var{name={Base,Count}},Count+1}. 3295