1%% ``Licensed under the Apache License, Version 2.0 (the "License"); 2%% you may not use this file except in compliance with the License. 3%% You may obtain a copy of the License at 4%% 5%% http://www.apache.org/licenses/LICENSE-2.0 6%% 7%% Unless required by applicable law or agreed to in writing, software 8%% distributed under the License is distributed on an "AS IS" BASIS, 9%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10%% See the License for the specific language governing permissions and 11%% limitations under the License. 12%% 13%% The Initial Developer of the Original Code is Richard Carlsson. 14%% Copyright (C) 1999-2002 Richard Carlsson. 15%% Portions created by Ericsson are Copyright 2001, Ericsson Utvecklings 16%% AB. All Rights Reserved.'' 17%% 18%% $Id: cerl_inline.erl,v 1.1 2008/12/17 09:53:41 mikpe Exp $ 19%% 20%% Core Erlang inliner. 21%% ===================================================================== 22%% 23%% This is an implementation of the algorithm by Waddell and Dybvig 24%% ("Fast and Effective Procedure Inlining", International Static 25%% Analysis Symposium 1997), adapted to the Core Erlang language. 26%% 27%% Instead of always renaming variables and function variables, this 28%% implementation uses the "no-shadowing strategy" of Peyton Jones and 29%% Marlow ("Secrets of the Glasgow Haskell Compiler Inliner", 1999). 30%% 31%% ===================================================================== 32 33%% TODO: inline single-source-reference operands without size limit. 34 35-module(cerl_inline). 36 37-export([core_transform/2, transform/1, transform/2]). 38 39-import(cerl, [abstract/1, alias_pat/1, alias_var/1, apply_args/1, 40 apply_op/1, atom_name/1, atom_val/1, bitstr_val/1, 41 bitstr_size/1, bitstr_unit/1, bitstr_type/1, 42 bitstr_flags/1, binary_segments/1, update_c_alias/3, 43 update_c_apply/3, update_c_binary/2, update_c_bitstr/6, 44 update_c_call/4, update_c_case/3, update_c_catch/2, 45 update_c_clause/4, c_fun/2, c_int/1, c_let/3, 46 update_c_let/4, update_c_letrec/3, update_c_module/5, 47 update_c_primop/3, update_c_receive/4, update_c_seq/3, 48 c_seq/2, update_c_try/6, c_tuple/1, update_c_values/2, 49 c_values/1, c_var/1, call_args/1, call_module/1, 50 call_name/1, case_arity/1, case_arg/1, case_clauses/1, 51 catch_body/1, clause_body/1, clause_guard/1, 52 clause_pats/1, clause_vars/1, concrete/1, cons_hd/1, 53 cons_tl/1, data_arity/1, data_es/1, data_type/1, 54 fun_body/1, fun_vars/1, get_ann/1, int_val/1, 55 is_c_atom/1, is_c_cons/1, is_c_fun/1, is_c_int/1, 56 is_c_list/1, is_c_seq/1, is_c_tuple/1, is_c_var/1, 57 is_data/1, is_literal/1, is_literal_term/1, let_arg/1, 58 let_body/1, let_vars/1, letrec_body/1, letrec_defs/1, 59 list_length/1, list_elements/1, update_data/3, 60 make_list/1, make_data_skel/2, module_attrs/1, 61 module_defs/1, module_exports/1, module_name/1, 62 primop_args/1, primop_name/1, receive_action/1, 63 receive_clauses/1, receive_timeout/1, seq_arg/1, 64 seq_body/1, set_ann/2, try_arg/1, try_body/1, try_vars/1, 65 try_evars/1, try_handler/1, tuple_es/1, tuple_arity/1, 66 type/1, values_es/1, var_name/1]). 67 68-import(lists, [foldl/3, foldr/3, mapfoldl/3, reverse/1]). 69 70%% 71%% Constants 72%% 73 74debug_runtime() -> false. 75debug_counters() -> false. 76 77%% Normal execution times for inlining are between 0.1 and 0.3 seconds 78%% (on the author's current equipment). The default effort limit of 150 79%% is high enough that most normal programs never hit the limit even 80%% once, and for difficult programs, it generally keeps the execution 81%% times below 2-5 seconds. Using an effort counter of 1000 will thus 82%% have no further effect on most programs, but some programs may take 83%% as much as 10 seconds or more. Effort counts larger than 2500 have 84%% never been observed even on very ill-conditioned programs. 85%% 86%% Size limits between 6 and 18 tend to actually shrink the code, 87%% because of the simplifications made possible by inlining. A limit of 88%% 16 seems to be optimal for this purpose, often shrinking the 89%% executable code by up to 10%. Size limits between 18 and 30 generally 90%% give the same code size as if no inlining was done (i.e., code 91%% duplication balances out the simplifications at these levels). A size 92%% limit between 1 and 5 tends to inline small functions and propagate 93%% constants, but does not cause much simplifications do be done, so the 94%% net effect will be a slight increase in code size. For size limits 95%% above 30, the executable code size tends to increase with about 10% 96%% per 100 units, with some variations depending on the sizes of 97%% functions in the source code. 98%% 99%% Typically, about 90% of the maximum speedup achievable is already 100%% reached using a size limit of 30, and 98% is reached at limits around 101%% 100-150; there is rarely any point in letting the code size increase 102%% by more than 10-15%. If too large functions are inlined, cache 103%% effects will slow the program down. 104 105default_effort() -> 150. 106default_size() -> 24. 107 108%% Base costs/weights for different kinds of expressions. If these are 109%% modified, the size limits above may have to be adjusted. 110 111weight(var) -> 0; % We count no cost for variable accesses. 112weight(values) -> 0; % Value aggregates have no cost in themselves. 113weight(literal) -> 1; % We assume efficient handling of constants. 114weight(data) -> 1; % Base cost; add 1 per element. 115weight(element) -> 1; % Cost of storing/fetching an element. 116weight(argument) -> 1; % Cost of passing a function argument. 117weight('fun') -> 6; % Base cost + average number of free vars. 118weight('let') -> 0; % Count no cost for let-bindings. 119weight(letrec) -> 0; % Like a let-binding. 120weight('case') -> 0; % Case switches have no base cost. 121weight(clause) -> 1; % Count one jump at the end of each clause body. 122weight('receive') -> 9; % Initialization/cleanup cost. 123weight('try') -> 1; % Assume efficient implementation. 124weight('catch') -> 1; % See `try'. 125weight(apply) -> 3; % Average base cost: call/return. 126weight(call) -> 3; % Assume remote-calls as efficient as `apply'. 127weight(primop) -> 2; % Assume more efficient than `apply'. 128weight(binary) -> 4; % Initialisation base cost. 129weight(bitstr) -> 3; % Coding/decoding a value; like a primop. 130weight(module) -> 1. % Like a letrec with a constant body 131 132%% These "reference" structures are used for variables and function 133%% variables. They keep track of the variable name, any bound operand, 134%% and the associated store location. 135 136-record(ref, {name, opnd, loc}). 137 138%% Operand structures contain the operand expression, the renaming and 139%% environment, the state location, and the effort counter at the call 140%% site (cf. `visit'). 141 142-record(opnd, {expr, ren, env, loc, effort}). 143 144%% Since expressions are only visited in `effect' context when they are 145%% not bound to a referenced variable, only expressions visited in 146%% 'value' context are cached. 147 148-record(cache, {expr, size}). 149 150%% The context flags for an application structure are kept separate from 151%% the structure itself. Note that the original algorithm had exactly 152%% one operand in each application context structure, while we can have 153%% several, or none. 154 155-record(app, {opnds, ctxt, loc}). 156 157 158%% 159%% Interface functions 160%% 161 162%% Use compile option `{core_transform, inline}' to insert this as a 163%% compilation pass. 164 165core_transform(Code, Opts) -> 166 cerl:to_records(transform(cerl:from_records(Code), Opts)). 167 168transform(Tree) -> 169 transform(Tree, []). 170 171transform(Tree, Opts) -> 172 main(Tree, value, Opts). 173 174main(Tree, Ctxt, Opts) -> 175 %% We spawn a new process to do the work, so we don't have to worry 176 %% about cluttering the process dictionary with debugging info, or 177 %% proper deallocation of ets-tables. 178 Opts1 = Opts ++ [{inline_size, default_size()}, 179 {inline_effort, default_effort()}], 180 Reply = self(), 181 Pid = spawn_link(fun () -> start(Reply, Tree, Ctxt, Opts1) end), 182 receive 183 {Pid1, Tree1} when Pid1 == Pid -> 184 Tree1 185 end. 186 187start(Reply, Tree, Ctxt, Opts) -> 188 init_debug(), 189 case debug_runtime() of 190 true -> 191 put(inline_start_time, 192 element(1, erlang:statistics(runtime))); 193 _ -> 194 ok 195 end, 196 Size = max(1, proplists:get_value(inline_size, Opts)), 197 Effort = max(1, proplists:get_value(inline_effort, Opts)), 198 case proplists:get_bool(verbose, Opts) of 199 true -> 200 io:fwrite("Inlining: inline_size=~w inline_effort=~w\n", 201 [Size, Effort]); 202 false -> 203 ok 204 end, 205 206 %% Note that the counters of the new state are passive. 207 S = st__new(Effort, Size), 208 209%%% Initialization is not needed at present. Note that the code in 210%%% `inline_init' is not up-to-date with this module. 211%%% {Tree1, S1} = inline_init:init(Tree, S), 212%%% {Tree2, _S2} = i(Tree1, Ctxt, S1), 213 {Tree2, _S2} = i(Tree, Ctxt, S), 214 report_debug(), 215 Reply ! {self(), Tree2}. 216 217init_debug() -> 218 case debug_counters() of 219 true -> 220 put(counter_effort_triggers, 0), 221 put(counter_effort_max, 0), 222 put(counter_size_triggers, 0), 223 put(counter_size_max, 0); 224 _ -> 225 ok 226 end. 227 228report_debug() -> 229 case debug_runtime() of 230 true -> 231 {Time, _} = erlang:statistics(runtime), 232 report("Total run time for inlining: ~.2.0f s.\n", 233 [(Time - get(inline_start_time))/1000]); 234 _ -> 235 ok 236 end, 237 case debug_counters() of 238 true -> 239 counter_stats(); 240 _ -> 241 ok 242 end. 243 244counter_stats() -> 245 T1 = get(counter_effort_triggers), 246 T2 = get(counter_size_triggers), 247 E = get(counter_effort_max), 248 S = get(counter_size_max), 249 M1 = io_lib:fwrite("\tNumber of triggered " 250 "effort counters: ~p.\n", [T1]), 251 M2 = io_lib:fwrite("\tNumber of triggered " 252 "size counters: ~p.\n", [T2]), 253 M3 = io_lib:fwrite("\tLargest active effort counter: ~p.\n", 254 [E]), 255 M4 = io_lib:fwrite("\tLargest active size counter: ~p.\n", 256 [S]), 257 report("Counter statistics:\n~s", [[M1, M2, M3, M4]]). 258 259 260%% ===================================================================== 261%% The main inlining function 262%% 263%% i(E :: coreErlang(), 264%% Ctxt :: value | effect | #app{} 265%% Ren :: renaming(), 266%% Env :: environment(), 267%% S :: state()) 268%% -> {E', S'} 269%% 270%% Note: It is expected that the input source code ('E') does not 271%% contain free variables. If it does, there is a risk of accidental 272%% name capture, in case a generated "new" variable name happens to be 273%% the same as the name of a variable that is free further below in the 274%% tree; the algorithm only consults the current environment to check if 275%% a name already exists. 276%% 277%% The renaming maps names of source-code variable and function 278%% variables to new names as necessary to avoid clashes, according to 279%% the "no-shadowing" strategy. The environment maps *residual-code* 280%% variables and function variables to operands and global information. 281%% Separating the renaming from the environment, and using the 282%% residual-code variables instead of the source-code variables as its 283%% domain, improves the behaviour of the algorithm when code needs to be 284%% traversed more than once. 285%% 286%% Note that there is no such thing as a `test' context for expressions 287%% in (Core) Erlang (see `i_case' below for details). 288 289i(E, Ctxt, S) -> 290 i(E, Ctxt, ren__identity(), env__empty(), S). 291 292i(E, Ctxt, Ren, Env, S0) -> 293 %% Count one unit of effort on each pass. 294 S = count_effort(1, S0), 295 case is_data(E) of 296 true -> 297 i_data(E, Ctxt, Ren, Env, S); 298 false -> 299 case type(E) of 300 var -> 301 i_var(E, Ctxt, Ren, Env, S); 302 values -> 303 i_values(E, Ctxt, Ren, Env, S); 304 'fun' -> 305 i_fun(E, Ctxt, Ren, Env, S); 306 seq -> 307 i_seq(E, Ctxt, Ren, Env, S); 308 'let' -> 309 i_let(E, Ctxt, Ren, Env, S); 310 letrec -> 311 i_letrec(E, Ctxt, Ren, Env, S); 312 'case' -> 313 i_case(E, Ctxt, Ren, Env, S); 314 'receive' -> 315 i_receive(E, Ctxt, Ren, Env, S); 316 apply -> 317 i_apply(E, Ctxt, Ren, Env, S); 318 call -> 319 i_call(E, Ctxt, Ren, Env, S); 320 primop -> 321 i_primop(E, Ren, Env, S); 322 'try' -> 323 i_try(E, Ctxt, Ren, Env, S); 324 'catch' -> 325 i_catch(E, Ctxt, Ren, Env, S); 326 binary -> 327 i_binary(E, Ren, Env, S); 328 module -> 329 i_module(E, Ctxt, Ren, Env, S) 330 end 331 end. 332 333i_data(E, Ctxt, Ren, Env, S) -> 334 case is_literal(E) of 335 true -> 336 %% This is the `(const c)' case of the original algorithm: 337 %% literal terms which (regardless of size) do not need to 338 %% be constructed dynamically at runtime - boldly assuming 339 %% that the compiler/runtime system can handle this. 340 case Ctxt of 341 effect -> 342 %% Reduce useless constants to a simple value. 343 {void(), count_size(weight(literal), S)}; 344 _ -> 345 %% (In Erlang, we cannot set all non-`false' 346 %% constants to `true' in a `test' context, like we 347 %% could do in Lisp or C, so the above is the only 348 %% special case to be handled here.) 349 {E, count_size(weight(literal), S)} 350 end; 351 false -> 352 %% Data constructors are like to calls to safe built-in 353 %% functions, for which we can "decide to inline" 354 %% immediately; there is no need to create operand 355 %% structures. In `effect' context, we can simply make a 356 %% sequence of the argument expressions, also visited in 357 %% `effect' context. In all other cases, the arguments are 358 %% visited for value. 359 case Ctxt of 360 effect -> 361 %% Note that this will count the sizes of the 362 %% subexpressions, even though some or all of them 363 %% might be discarded by the sequencing afterwards. 364 {Es1, S1} = mapfoldl(fun (E, S) -> 365 i(E, effect, Ren, Env, 366 S) 367 end, 368 S, data_es(E)), 369 E1 = foldl(fun (E1, E2) -> make_seq(E1, E2) end, 370 void(), Es1), 371 {E1, S1}; 372 _ -> 373 {Es1, S1} = mapfoldl(fun (E, S) -> 374 i(E, value, Ren, Env, 375 S) 376 end, 377 S, data_es(E)), 378 %% The total size/cost is the base cost for a data 379 %% constructor plus the cost for storing each 380 %% element. 381 N = weight(data) + length(Es1) * weight(element), 382 S2 = count_size(N, S1), 383 {update_data(E, data_type(E), Es1), S2} 384 end 385 end. 386 387%% This is the `(ref x)' (variable use) case of the original algorithm. 388%% Note that binding occurrences are always handled in the respective 389%% cases of the binding constructs. 390 391i_var(E, Ctxt, Ren, Env, S) -> 392 case Ctxt of 393 effect -> 394 %% Reduce useless variable references to a simple constant. 395 %% This also avoids useless visiting of bound operands. 396 {void(), count_size(weight(literal), S)}; 397 _ -> 398 Name = var_name(E), 399 case env__lookup(ren__map(Name, Ren), Env) of 400 {ok, R} -> 401 case R#ref.opnd of 402 undefined -> 403 %% The variable is not associated with an 404 %% argument expression; just residualize it. 405 residualize_var(R, S); 406 Opnd -> 407 i_var_1(R, Opnd, Ctxt, Env, S) 408 end; 409 error -> 410 %% The variable is unbound. (It has not been 411 %% accidentally captured, however, or it would have 412 %% been in the environment.) We leave it as it is, 413 %% without any warning. 414 {E, count_size(weight(var), S)} 415 end 416 end. 417 418%% This first visits the bound operand and then does copy propagation. 419%% Note that we must first set the "inner-pending" flag, and clear the 420%% flag afterwards. 421 422i_var_1(R, Opnd, Ctxt, Env, S) -> 423 %% If the operand is already "inner-pending", it is residualised. 424 %% (In Lisp/C, if the variable might be assigned to, it should also 425 %% be residualised.) 426 L = Opnd#opnd.loc, 427 case st__test_inner_pending(L, S) of 428 true -> 429 residualize_var(R, S); 430 false -> 431 S1 = st__mark_inner_pending(L, S), 432 case catch {ok, visit(Opnd, S1)} of 433 {ok, {E, S2}} -> 434 %% Note that we pass the current environment and 435 %% context to `copy', but not the current renaming. 436 S3 = st__clear_inner_pending(L, S2), 437 copy(R, Opnd, E, Ctxt, Env, S3); 438 {'EXIT', X} -> 439 exit(X); 440 X -> 441 %% If we use destructive update for the 442 %% `inner-pending' flag, we must make sure to clear 443 %% it also if we make a nonlocal return. 444 st__clear_inner_pending(Opnd#opnd.loc, S1), 445 throw(X) 446 end 447 end. 448 449%% A multiple-value aggregate `<e1, ..., en>'. This is very much like a 450%% tuple data constructor `{e1, ..., en}'; cf. `i_data' for details. 451 452i_values(E, Ctxt, Ren, Env, S) -> 453 case values_es(E) of 454 [E1] -> 455 %% Single-value aggregates can be dropped; they are simply 456 %% notation. 457 i(E1, Ctxt, Ren, Env, S); 458 Es -> 459 %% In `effect' context, we can simply make a sequence of the 460 %% argument expressions, also visited in `effect' context. 461 %% In all other cases, the arguments are visited for value. 462 case Ctxt of 463 effect -> 464 {Es1, S1} = 465 mapfoldl(fun (E, S) -> 466 i(E, effect, Ren, Env, S) 467 end, 468 S, Es), 469 E1 = foldl(fun (E1, E2) -> 470 make_seq(E1, E2) 471 end, 472 void(), Es1), 473 {E1, S1}; % drop annotations on E 474 _ -> 475 {Es1, S1} = mapfoldl(fun (E, S) -> 476 i(E, value, Ren, Env, 477 S) 478 end, 479 S, Es), 480 %% Aggregating values does not write them to memory, 481 %% so we count no extra cost per element. 482 S2 = count_size(weight(values), S1), 483 {update_c_values(E, Es1), S2} 484 end 485 end. 486 487%% A let-expression `let <v1,...,vn> = e0 in e1' is semantically 488%% equivalent to a case-expression `case e0 of <v1,...,vn> when 'true' 489%% -> e1 end'. As a special case, `let <v> = e0 in e1' is also 490%% equivalent to `apply fun (v) -> e0 (e1)'. However, for efficiency, 491%% and in order to allow the handling of `case' clauses to introduce new 492%% let-expressions without entering an infinite rewrite loop, we handle 493%% these directly. 494 495%%% %% Rewriting a `let' to an equivalent expression. 496%%% i_let(E, Ctxt, Ren, Env, S) -> 497%%% case let_vars(E) of 498%%% [V] -> 499%%% E1 = update_c_apply(E, c_fun([V], let_body(E)), [let_arg(E)]), 500%%% i(E1, Ctxt, Ren, Env, S); 501%%% Vs -> 502%%% C = c_clause(Vs, abstract(true), let_body(E)), 503%%% E1 = update_c_case(E, let_arg(E), [C]), 504%%% i(E1, Ctxt, Ren, Env, S) 505%%% end. 506 507i_let(E, Ctxt, Ren, Env, S) -> 508 case let_vars(E) of 509 [V] -> 510 i_let_1(V, E, Ctxt, Ren, Env, S); 511 Vs -> 512 %% Visit the argument expression in `value' context, to 513 %% simplify it as far as possible. 514 {A, S1} = i(let_arg(E), value, Ren, Env, S), 515 case get_components(length(Vs), result(A)) of 516 {true, As} -> 517 %% Note that only the components of the result of 518 %% `A' are passed on; any effects are hoisted. 519 {E1, S2} = i_let_2(Vs, As, E, Ctxt, Ren, Env, S1), 520 {hoist_effects(A, E1), S2}; 521 false -> 522 %% We cannot do anything with this `let', since the 523 %% variables cannot be matched against the argument 524 %% components. Just visit the variables for renaming 525 %% and visit the body for value (cf. `i_fun'). 526 {_, Ren1, Env1, S2} = bind_locals(Vs, Ren, Env, S1), 527 Vs1 = i_params(Vs, Ren1, Env1), 528 %% The body is always visited for value here. 529 {B, S3} = i(let_body(E), value, Ren1, Env1, S2), 530 S4 = count_size(weight('let'), S3), 531 {update_c_let(E, Vs1, A, B), S4} 532 end 533 end. 534 535%% Single-variable `let' binding. 536 537i_let_1(V, E, Ctxt, Ren, Env, S) -> 538 %% Make an operand structure for the argument expression, create a 539 %% local binding from the parameter to the operand structure, and 540 %% visit the body. Finally create necessary bindings and/or set 541 %% flags. 542 {Opnd, S1} = make_opnd(let_arg(E), Ren, Env, S), 543 {[R], Ren1, Env1, S2} = bind_locals([V], [Opnd], Ren, Env, S1), 544 {E1, S3} = i(let_body(E), Ctxt, Ren1, Env1, S2), 545 i_let_3([R], [Opnd], E1, S3). 546 547%% Multi-variable `let' binding. 548 549i_let_2(Vs, As, E, Ctxt, Ren, Env, S) -> 550 %% Make operand structures for the argument components. Note that 551 %% since the argument has already been visited at this point, we use 552 %% the identity renaming for the operands. 553 {Opnds, S1} = mapfoldl(fun (E, S) -> 554 make_opnd(E, ren__identity(), Env, S) 555 end, 556 S, As), 557 %% Create local bindings from the parameters to their respective 558 %% operand structures, and visit the body. 559 {Rs, Ren1, Env1, S2} = bind_locals(Vs, Opnds, Ren, Env, S1), 560 {E1, S3} = i(let_body(E), Ctxt, Ren1, Env1, S2), 561 i_let_3(Rs, Opnds, E1, S3). 562 563i_let_3(Rs, Opnds, E, S) -> 564 %% Create necessary bindings and/or set flags. 565 {E1, S1} = make_let_bindings(Rs, E, S), 566 567 %% We must also create evaluation for effect, for any unused 568 %% operands, as after an application expression. 569 residualize_operands(Opnds, E1, S1). 570 571%% A sequence `do e1 e2', written `(seq e1 e2)' in the original 572%% algorithm, where `e1' is evaluated for effect only (since its value 573%% is not used), and `e2' yields the final value. Note that we use 574%% `make_seq' to recompose the sequence after visiting the parts. 575 576i_seq(E, Ctxt, Ren, Env, S) -> 577 {E1, S1} = i(seq_arg(E), effect, Ren, Env, S), 578 {E2, S2} = i(seq_body(E), Ctxt, Ren, Env, S1), 579 %% A sequence has no cost in itself. 580 {make_seq(E1, E2), S2}. 581 582 583%% The `case' switch of Core Erlang is rather different from the boolean 584%% `(if e1 e2 e3)' case of the original algorithm, but the central idea 585%% is the same: if, given the simplified switch expression (which is 586%% visited in `value' context - a boolean `test' context would not be 587%% generally useful), there is a clause which could definitely be 588%% selected, such that no clause before it can possibly be selected, 589%% then we can eliminate all other clauses. (And even if this is not the 590%% case, some clauses can often be eliminated.) Furthermore, if a clause 591%% can be selected, we can replace the case-expression (including the 592%% switch expression) with the body of the clause and a set of zero or 593%% more let-bindings of subexpressions of the switch expression. (In the 594%% simplest case, the switch expression is evaluated only for effect.) 595 596i_case(E, Ctxt, Ren, Env, S) -> 597 %% First visit the switch expression in `value' context, to simplify 598 %% it as far as possible. Note that only the result part is passed 599 %% on to the clause matching below; any effects are hoisted. 600 {A, S1} = i(case_arg(E), value, Ren, Env, S), 601 A1 = result(A), 602 603 %% Propagating an application context into the branches could cause 604 %% the arguments of the application to be evaluated *after* the 605 %% switch expression, but *before* the body of the selected clause. 606 %% Such interleaving is not allowed in general, and it does not seem 607 %% worthwile to make a more powerful transformation here. Therefore, 608 %% the clause bodies are conservatively visited for value if the 609 %% context is `application'. 610 Ctxt1 = safe_context(Ctxt), 611 {E1, S2} = case get_components(case_arity(E), A1) of 612 {true, As} -> 613 i_case_1(As, E, Ctxt1, Ren, Env, S1); 614 false -> 615 i_case_1([], E, Ctxt1, Ren, Env, S1) 616 end, 617 {hoist_effects(A, E1), S2}. 618 619i_case_1(As, E, Ctxt, Ren, Env, S) -> 620 case i_clauses(As, case_clauses(E), Ctxt, Ren, Env, S) of 621 {false, {As1, Vs, Env1, Cs}, S1} -> 622 %% We still have a list of clauses. Sanity check: 623 if Cs == [] -> 624 report_warning("empty list of clauses " 625 "in residual program!.\n"); 626 true -> 627 ok 628 end, 629 {A, S2} = i(c_values(As1), value, ren__identity(), Env1, 630 S1), 631 {E1, S3} = i_case_2(Cs, A, E, S2), 632 i_case_3(Vs, Env1, E1, S3); 633 {true, {_, Vs, Env1, [C]}, S1} -> 634 %% A single clause was selected; we just take the body. 635 i_case_3(Vs, Env1, clause_body(C), S1) 636 end. 637 638%% Check if all clause bodies are actually equivalent expressions that 639%% do not depent on pattern variables (this sometimes occurs as a 640%% consequence of inlining, e.g., all branches might yield 'true'), and 641%% if so, replace the `case' with a sequence, first evaluating the 642%% clause selection for effect, then evaluating one of the clause bodies 643%% for its value. (Unless the switch contains a catch-all clause, the 644%% clause selection must be evaluated for effect, since there is no 645%% guarantee that any of the clauses will actually match. Assuming that 646%% some clause always matches could make an undefined program produce a 647%% value.) This makes the final size less than what was accounted for 648%% when visiting the clauses, but currently we don't try to adjust for 649%% this. 650 651i_case_2(Cs, A, E, S) -> 652 case equivalent_clauses(Cs) of 653 false -> 654 %% Count the base sizes for the remaining clauses; pattern 655 %% and guard sizes are already counted. 656 N = weight('case') + weight(clause) * length(Cs), 657 S1 = count_size(N, S), 658 {update_c_case(E, A, Cs), S1}; 659 true -> 660 case cerl_clauses:any_catchall(Cs) of 661 true -> 662 %% We know that some clause must be selected, so we 663 %% can drop all the testing as well. 664 E1 = make_seq(A, clause_body(hd(Cs))), 665 {E1, S}; 666 false -> 667 %% The clause selection must be performed for 668 %% effect. 669 E1 = update_c_case(E, A, 670 set_clause_bodies(Cs, void())), 671 {make_seq(E1, clause_body(hd(Cs))), S} 672 end 673 end. 674 675i_case_3(Vs, Env, E, S) -> 676 %% For the variables bound to the switch expression subexpressions, 677 %% make let bindings or create evaluation for effect. 678 Rs = [env__get(var_name(V), Env) || V <- Vs], 679 {E1, S1} = make_let_bindings(Rs, E, S), 680 Opnds = [R#ref.opnd || R <- Rs], 681 residualize_operands(Opnds, E1, S1). 682 683%% This function takes a sequence of switch expressions `Es' (which can 684%% be the empty list if these are unknown) and a list `Cs' of clauses, 685%% and returns `{Match, {As, Vs, Env1, Cs1}, S1}' where `As' is a list 686%% of residual switch expressions, `Vs' the list of variables used in 687%% the templates, `Env1' the environment for the templates, and `Cs1' 688%% the list of residual clauses. `Match' is `true' if some clause could 689%% be shown to definitely match (in this case, `Cs1' contains exactly 690%% one element), and `false' otherwise. `S1' is the new state. The given 691%% `Ctxt' is the context to be used for visiting the body of clauses. 692%% 693%% Visiting a clause basically amounts to extending the environment for 694%% all variables in the pattern, as for a `fun' (cf. `i_fun'), 695%% propagating match information if possible, and visiting the guard and 696%% body in the new environment. 697%% 698%% To make it cheaper to do handle a set of clauses, and to avoid 699%% unnecessarily exceeding the size limit, we avoid visiting the bodies 700%% of clauses which are subsequently removed, by dividing the visiting 701%% of a clause into two stages: first construct the environment(s) and 702%% visit the pattern (for renaming) and the guard (for value), then 703%% reduce the switch as much as possible, and lastly visit the body. 704 705i_clauses(Cs, Ctxt, Ren, Env, S) -> 706 i_clauses([], Cs, Ctxt, Ren, Env, S). 707 708i_clauses(Es, Cs, Ctxt, Ren, Env, S) -> 709 %% Create templates for the switch expressions. 710 {Ts, {Vs, Env0}} = mapfoldl(fun (E, {Vs, Env}) -> 711 {T, Vs1, Env1} = 712 make_template(E, Env), 713 {T, {Vs1 ++ Vs, Env1}} 714 end, 715 {[], Env}, Es), 716 717 %% Make operand structures for the switch subexpression templates 718 %% (found in `Env0') and add proper ref-structure bindings to the 719 %% environment. Since the subexpressions in general can be 720 %% interdependent (Vs is in reverse-dependency order), the 721 %% environment (and renaming) must be created incrementally. Note 722 %% that since the switch expressions have been visited already, the 723 %% identity renaming is used for the operands. 724 Vs1 = lists:reverse(Vs), 725 {Ren1, Env1, S1} = 726 foldl(fun (V, {Ren, Env, S}) -> 727 E = env__get(var_name(V), Env0), 728 {Opnd, S_1} = make_opnd(E, ren__identity(), Env, 729 S), 730 {_, Ren1, Env1, S_2} = bind_locals([V], [Opnd], 731 Ren, Env, S_1), 732 {Ren1, Env1, S_2} 733 end, 734 {Ren, Env, S}, Vs1), 735 736 %% First we visit the head of each individual clause, renaming 737 %% pattern variables, inserting let-bindings in the guard and body, 738 %% and visiting the guard. The information used for visiting the 739 %% clause body will be prefixed to the clause annotations. 740 {Cs1, S2} = mapfoldl(fun (C, S) -> 741 i_clause_head(C, Ts, Ren1, Env1, S) 742 end, 743 S1, Cs), 744 745 %% Now that the clause guards have been reduced as far as possible, 746 %% we can attempt to reduce the clauses. 747 As = [hd(get_ann(T)) || T <- Ts], 748 case cerl_clauses:reduce(Cs1, Ts) of 749 {false, Cs2} -> 750 %% We still have one or more clauses (with associated 751 %% extended environments). Their bodies have not yet been 752 %% visited, so we do that (in the respective safe 753 %% environments, adding the sizes of the visited heads to 754 %% the current size counter) and return the final list of 755 %% clauses. 756 {Cs3, S3} = mapfoldl( 757 fun (C, S) -> 758 i_clause_body(C, Ctxt, S) 759 end, 760 S2, Cs2), 761 {false, {As, Vs1, Env1, Cs3}, S3}; 762 {true, {C, _}} -> 763 %% A clause C could be selected (the bindings have already 764 %% been added to the guard/body). Note that since the clause 765 %% head will probably be discarded, its size is not counted. 766 {C1, Ren2, Env2, _} = get_clause_extras(C), 767 {B, S3} = i(clause_body(C), Ctxt, Ren2, Env2, S2), 768 C2 = update_c_clause(C1, clause_pats(C1), clause_guard(C1), B), 769 {true, {As, Vs1, Env1, [C2]}, S3} 770 end. 771 772%% This visits the head of a clause, renames pattern variables, inserts 773%% let-bindings in the guard and body, and does inlining on the guard 774%% expression. Returns a list of pairs `{NewClause, Data}', where `Data' 775%% is `{Renaming, Environment, Size}' used for visiting the body of the 776%% new clause. 777 778i_clause_head(C, Ts, Ren, Env, S) -> 779 %% Match the templates against the (non-renamed) patterns to get the 780 %% available information about matching subexpressions. We don't 781 %% care at this point whether an exact match/nomatch is detected. 782 Ps = clause_pats(C), 783 Bs = case cerl_clauses:match_list(Ps, Ts) of 784 {_, Bs1} -> Bs1; 785 none -> [] 786 end, 787 788 %% The patterns must be visited for renaming; cf. `i_pattern'. We 789 %% use a passive size counter for visiting the patterns and the 790 %% guard (cf. `visit'), because we do not know at this stage whether 791 %% the clause will be kept or not; the final value of the counter is 792 %% included in the returned value below. 793 {_, Ren1, Env1, S1} = bind_locals(clause_vars(C), Ren, Env, S), 794 S2 = new_passive_size(get_size_limit(S1), S1), 795 {Ps1, S3} = mapfoldl(fun (P, S) -> 796 i_pattern(P, Ren1, Env1, Ren, Env, S) 797 end, 798 S2, Ps), 799 800 %% Rewrite guard and body and visit the guard for value. Discard the 801 %% latter size count if the guard turns out to be a constant. 802 G = add_match_bindings(Bs, clause_guard(C)), 803 B = add_match_bindings(Bs, clause_body(C)), 804 {G1, S4} = i(G, value, Ren1, Env1, S3), 805 S5 = case is_literal(G1) of 806 true -> 807 revert_size(S3, S4); 808 false -> 809 S4 810 end, 811 812 %% Revert to the size counter we had on entry to this function. The 813 %% environment and renaming, together with the size of the clause 814 %% head, are prefixed to the annotations for later use. 815 Size = get_size_value(S5), 816 C1 = update_c_clause(C, Ps1, G1, B), 817 {set_clause_extras(C1, Ren1, Env1, Size), revert_size(S, S5)}. 818 819add_match_bindings(Bs, E) -> 820 %% Don't waste time if the variables definitely cannot be used. 821 %% (Most guards are simply `true'.) 822 case is_literal(E) of 823 true -> 824 E; 825 false -> 826 Vs = [V || {V, E} <- Bs, E /= any], 827 Es = [hd(get_ann(E)) || {_V, E} <- Bs, E /= any], 828 c_let(Vs, c_values(Es), E) 829 end. 830 831i_clause_body(C0, Ctxt, S) -> 832 {C, Ren, Env, Size} = get_clause_extras(C0), 833 S1 = count_size(Size, S), 834 {B, S2} = i(clause_body(C), Ctxt, Ren, Env, S1), 835 C1 = update_c_clause(C, clause_pats(C), clause_guard(C), B), 836 {C1, S2}. 837 838get_clause_extras(C) -> 839 [{Ren, Env, Size} | As] = get_ann(C), 840 {set_ann(C, As), Ren, Env, Size}. 841 842set_clause_extras(C, Ren, Env, Size) -> 843 As = [{Ren, Env, Size} | get_ann(C)], 844 set_ann(C, As). 845 846%% This is the `(lambda x e)' case of the original algorithm. A 847%% `fun' is like a lambda expression, but with a varying number of 848%% parameters; possibly zero. 849 850i_fun(E, Ctxt, Ren, Env, S) -> 851 case Ctxt of 852 effect -> 853 %% Reduce useless `fun' expressions to a simple constant; 854 %% visiting the body would be a waste of time, and could 855 %% needlessly mark variables as referenced. 856 {void(), count_size(weight(literal), S)}; 857 value -> 858 %% Note that the variables are visited as patterns. 859 Vs = fun_vars(E), 860 {_, Ren1, Env1, S1} = bind_locals(Vs, Ren, Env, S), 861 Vs1 = i_params(Vs, Ren1, Env1), 862 863 %% The body is always visited for value. 864 {B, S2} = i(fun_body(E), value, Ren1, Env1, S1), 865 866 %% We don't bother to include the exact number of free 867 %% variables in the cost for creating a fun-value. 868 S3 = count_size(weight('fun'), S2), 869 870 %% Inlining might have duplicated code, so we must remove 871 %% any 'id'-annotations from the original fun-expression. 872 %% (This forces a later stage to invent new id:s.) This is 873 %% necessary as long as fun:s may still need to be 874 %% identified the old way. Function variables that are not 875 %% in application context also have such annotations, but 876 %% the inlining will currently lose all annotations on 877 %% variable references (I think), so that's not a problem. 878 {set_ann(c_fun(Vs1, B), kill_id_anns(get_ann(E))), S3}; 879 #app{} -> 880 %% An application of a fun-expression (in the source code) 881 %% is handled by going directly to `inline'; this is never 882 %% residualised, and we don't set up new counters here. Note 883 %% that inlining of copy-propagated fun-expressions is done 884 %% in `copy'; not here. 885 inline(E, Ctxt, Ren, Env, S) 886 end. 887 888%% A `letrec' requires a circular environment, but is otherwise like a 889%% `let', i.e. like a direct lambda application. Note that only 890%% fun-expressions (lambda abstractions) may occur in the right-hand 891%% side of each definition. 892 893i_letrec(E, Ctxt, Ren, Env, S) -> 894 %% Note that we pass an empty list for the auto-referenced 895 %% (exported) functions here. 896 {Es, B, _, S1} = i_letrec(letrec_defs(E), letrec_body(E), [], Ctxt, 897 Ren, Env, S), 898 899 %% If no bindings remain, only the body is returned. 900 case Es of 901 [] -> 902 {B, S1}; % drop annotations on E 903 _ -> 904 S2 = count_size(weight(letrec), S1), 905 {update_c_letrec(E, Es, B), S2} 906 end. 907 908%% The major part of this is shared by letrec-expressions and module 909%% definitions alike. 910 911i_letrec(Es, B, Xs, Ctxt, Ren, Env, S) -> 912 %% First, we create operands with dummy renamings and environments, 913 %% and with fresh store locations for cached expressions and operand 914 %% info. 915 {Opnds, S1} = mapfoldl(fun ({_, E}, S) -> 916 make_opnd(E, undefined, undefined, S) 917 end, 918 S, Es), 919 920 %% Then we make recursive bindings for the definitions. 921 {Rs, Ren1, Env1, S2} = bind_recursive([F || {F, _} <- Es], 922 Opnds, Ren, Env, S1), 923 924 %% For the function variables listed in Xs (none for a 925 %% letrec-expression), we must make sure that the corresponding 926 %% operand expressions are visited and that the definitions are 927 %% marked as referenced; we also need to return the possibly renamed 928 %% function variables. 929 {Xs1, S3} = 930 mapfoldl( 931 fun (X, S) -> 932 Name = ren__map(var_name(X), Ren1), 933 case env__lookup(Name, Env1) of 934 {ok, R} -> 935 S_1 = i_letrec_export(R, S), 936 {ref_to_var(R), S_1}; 937 error -> 938 %% We just skip any exports that are not 939 %% actually defined here, and generate a 940 %% warning message. 941 {N, A} = var_name(X), 942 report_warning("export `~w'/~w " 943 "not defined.\n", [N, A]), 944 {X, S} 945 end 946 end, 947 S2, Xs), 948 949 %% At last, we can then visit the body. 950 {B1, S4} = i(B, Ctxt, Ren1, Env1, S3), 951 952 %% Finally, we create new letrec-bindings for any and all 953 %% residualised definitions. All referenced functions should have 954 %% been visited; the call to `visit' below is expected to retrieve a 955 %% cached expression. 956 Rs1 = keep_referenced(Rs, S4), 957 {Es1, S5} = mapfoldl(fun (R, S) -> 958 {E_1, S_1} = visit(R#ref.opnd, S), 959 {{ref_to_var(R), E_1}, S_1} 960 end, 961 S4, Rs1), 962 {Es1, B1, Xs1, S5}. 963 964%% This visits the operand for a function definition exported by a 965%% `letrec' (which is really a `module' module definition, since normal 966%% letrecs have no export declarations). Only the updated state is 967%% returned. We must handle the "inner-pending" flag when doing this; 968%% cf. `i_var'. 969 970i_letrec_export(R, S) -> 971 Opnd = R#ref.opnd, 972 S1 = st__mark_inner_pending(Opnd#opnd.loc, S), 973 {_, S2} = visit(Opnd, S1), 974 {_, S3} = residualize_var(R, st__clear_inner_pending(Opnd#opnd.loc, 975 S2)), 976 S3. 977 978%% This is the `(call e1 e2)' case of the original algorithm. The only 979%% difference is that we must handle multiple (or no) operand 980%% expressions. 981 982i_apply(E, Ctxt, Ren, Env, S) -> 983 {Opnds, S1} = mapfoldl(fun (E, S) -> 984 make_opnd(E, Ren, Env, S) 985 end, 986 S, apply_args(E)), 987 988 %% Allocate a new app-context location and set up an application 989 %% context structure containing the surrounding context. 990 {L, S2} = st__new_app_loc(S1), 991 Ctxt1 = #app{opnds = Opnds, ctxt = Ctxt, loc = L}, 992 993 %% Visit the operator expression in the new call context. 994 {E1, S3} = i(apply_op(E), Ctxt1, Ren, Env, S2), 995 996 %% Check the "inlined" flag to find out what to do next. (The store 997 %% location could be recycled after the flag has been tested, but 998 %% there is no real advantage to that, because in practice, only 999 %% 4-5% of all created store locations will ever be reused, while 1000 %% there will be a noticeable overhead for managing the free list.) 1001 case st__get_app_inlined(L, S3) of 1002 true -> 1003 %% The application was inlined, so we have the final 1004 %% expression in `E1'. We just have to handle any operands 1005 %% that need to be residualized for effect only (i.e., those 1006 %% the values of which are not used). 1007 residualize_operands(Opnds, E1, S3); 1008 false -> 1009 %% Otherwise, `E1' is the residual operator expression. We 1010 %% make sure all operands are visited, and rebuild the 1011 %% application. 1012 {Es, S4} = mapfoldl(fun (Opnd, S) -> 1013 visit_and_count_size(Opnd, S) 1014 end, 1015 S3, Opnds), 1016 N = apply_size(length(Es)), 1017 {update_c_apply(E, E1, Es), count_size(N, S4)} 1018 end. 1019 1020apply_size(A) -> 1021 weight(apply) + weight(argument) * A. 1022 1023%% Since it is not the task of this transformation to handle 1024%% cross-module inlining, all inter-module calls are handled by visiting 1025%% the components (the module and function name, and the arguments of 1026%% the call) for value. In `effect' context, if the function itself is 1027%% known to be completely effect free, the call can be discarded and the 1028%% arguments evaluated for effect. Otherwise, if all the visited 1029%% arguments are to constants, and the function is known to be safe to 1030%% execute at compile time, then we try to evaluate the call. If 1031%% evaluation completes normally, the call is replaced by the result; 1032%% otherwise the call is residualised. 1033 1034i_call(E, Ctxt, Ren, Env, S) -> 1035 {M, S1} = i(call_module(E), value, Ren, Env, S), 1036 {F, S2} = i(call_name(E), value, Ren, Env, S1), 1037 As = call_args(E), 1038 Arity = length(As), 1039 1040 %% Check if the name of the called function is static. If so, 1041 %% discard the size counts performed above, since the values will 1042 %% not cause any runtime cost. 1043 Static = is_c_atom(M) and is_c_atom(F), 1044 S3 = case Static of 1045 true -> 1046 revert_size(S, S2); 1047 false -> 1048 S2 1049 end, 1050 case Ctxt of 1051 effect when Static == true -> 1052 case is_safe_call(atom_val(M), atom_val(F), Arity) of 1053 true -> 1054 %% The result will not be used, and the call is 1055 %% effect free, so we create a multiple-value 1056 %% aggregate containing the (not yet visited) 1057 %% arguments and process that instead. 1058 i(c_values(As), effect, Ren, Env, S3); 1059 false -> 1060 %% We are not allowed to simply discard the call, 1061 %% but we can try to evaluate it. 1062 i_call_1(Static, M, F, Arity, As, E, Ctxt, Ren, Env, 1063 S3) 1064 end; 1065 _ -> 1066 i_call_1(Static, M, F, Arity, As, E, Ctxt, Ren, Env, S3) 1067 end. 1068 1069i_call_1(Static, M, F, Arity, As, E, Ctxt, Ren, Env, S) -> 1070 %% Visit the arguments for value. 1071 {As1, S1} = mapfoldl(fun (X, A) -> i(X, value, Ren, Env, A) end, 1072 S, As), 1073 case Static of 1074 true -> 1075 case erl_bifs:is_pure(atom_val(M), atom_val(F), Arity) of 1076 true -> 1077 %% It is allowed to evaluate this at compile time. 1078 case all_static(As1) of 1079 true -> 1080 i_call_3(M, F, As1, E, Ctxt, Env, S1); 1081 false -> 1082 %% See if the call can be rewritten instead. 1083 i_call_4(M, F, As1, E, Ctxt, Env, S1) 1084 end; 1085 false -> 1086 i_call_2(M, F, As1, E, S1) 1087 end; 1088 false -> 1089 i_call_2(M, F, As1, E, S1) 1090 end. 1091 1092%% Residualise the call. 1093 1094i_call_2(M, F, As, E, S) -> 1095 N = weight(call) + weight(argument) * length(As), 1096 {update_c_call(E, M, F, As), count_size(N, S)}. 1097 1098%% Attempt to evaluate the call to yield a literal; if that fails, try 1099%% to rewrite the expression. 1100 1101i_call_3(M, F, As, E, Ctxt, Env, S) -> 1102 %% Note that we extract the results of argument expessions here; the 1103 %% expressions could still be sequences with side effects. 1104 Vs = [concrete(result(A)) || A <- As], 1105 case catch {ok, apply(atom_val(M), atom_val(F), Vs)} of 1106 {ok, V} -> 1107 %% Evaluation completed normally - try to turn the result 1108 %% back into a syntax tree (representing a literal). 1109 case is_literal_term(V) of 1110 true -> 1111 %% Make a sequence of the arguments (as a 1112 %% multiple-value aggregate) and the final value. 1113 S1 = count_size(weight(values), S), 1114 S2 = count_size(weight(literal), S1), 1115 {make_seq(c_values(As), abstract(V)), S2}; 1116 false -> 1117 %% The result could not be represented as a literal. 1118 i_call_4(M, F, As, E, Ctxt, Env, S) 1119 end; 1120 _ -> 1121 %% The evaluation attempt did not complete normally. 1122 i_call_4(M, F, As, E, Ctxt, Env, S) 1123 end. 1124 1125%% Rewrite the expression, if possible, otherwise residualise it. 1126 1127i_call_4(M, F, As, E, Ctxt, Env, S) -> 1128 case reduce_bif_call(atom_val(M), atom_val(F), As, Env) of 1129 false -> 1130 %% Nothing more to be done - residualise the call. 1131 i_call_2(M, F, As, E, S); 1132 {true, E1} -> 1133 %% We revisit the result, because the rewriting might have 1134 %% opened possibilities for further inlining. Since the 1135 %% parts have already been visited once, we use the identity 1136 %% renaming here. 1137 i(E1, Ctxt, ren__identity(), Env, S) 1138 end. 1139 1140%% For now, we assume that primops cannot be evaluated at compile time, 1141%% probably being too special. Also, we have no knowledge about their 1142%% side effects. 1143 1144i_primop(E, Ren, Env, S) -> 1145 %% Visit the arguments for value. 1146 {As, S1} = mapfoldl(fun (E, S) -> 1147 i(E, value, Ren, Env, S) 1148 end, 1149 S, primop_args(E)), 1150 N = weight(primop) + weight(argument) * length(As), 1151 {update_c_primop(E, primop_name(E), As), count_size(N, S1)}. 1152 1153%% This is like having an expression with an extra fun-expression 1154%% attached for "exceptional cases"; actually, there are exactly two 1155%% parameter variables for the body, but they are easiest handled as if 1156%% their number might vary, just as for a `fun'. 1157 1158i_try(E, Ctxt, Ren, Env, S) -> 1159 %% The argument expression is evaluated in `value' context, and the 1160 %% surrounding context is propagated into both branches. We do not 1161 %% try to recognize cases when the protected expression will 1162 %% actually raise an exception. Note that the variables are visited 1163 %% as patterns. 1164 {A, S1} = i(try_arg(E), value, Ren, Env, S), 1165 Vs = try_vars(E), 1166 {_, Ren1, Env1, S2} = bind_locals(Vs, Ren, Env, S1), 1167 Vs1 = i_params(Vs, Ren1, Env1), 1168 {B, S3} = i(try_body(E), Ctxt, Ren1, Env1, S2), 1169 case is_safe(A) of 1170 true -> 1171 %% The `try' wrapper can be dropped in this case. Since the 1172 %% expressions have been visited already, the identity 1173 %% renaming is used when we revisit the new let-expression. 1174 i(c_let(Vs1, A, B), Ctxt, ren__identity(), Env, S3); 1175 false -> 1176 Evs = try_evars(E), 1177 {_, Ren2, Env2, S4} = bind_locals(Evs, Ren, Env, S3), 1178 Evs1 = i_params(Evs, Ren2, Env2), 1179 {H, S5} = i(try_handler(E), Ctxt, Ren2, Env2, S4), 1180 S6 = count_size(weight('try'), S5), 1181 {update_c_try(E, A, Vs1, B, Evs1, H), S6} 1182 end. 1183 1184%% A special case of try-expressions: 1185 1186i_catch(E, Ctxt, Ren, Env, S) -> 1187 %% We cannot propagate application contexts into the catch. 1188 {E1, S1} = i(catch_body(E), safe_context(Ctxt), Ren, Env, S), 1189 case is_safe(E1) of 1190 true -> 1191 %% The `catch' wrapper can be dropped in this case. 1192 {E1, S1}; 1193 false -> 1194 S2 = count_size(weight('catch'), S1), 1195 {update_c_catch(E, E1), S2} 1196 end. 1197 1198%% A receive-expression is very much like a case-expression, with the 1199%% difference that we do not have access to a switch expression, since 1200%% the value being switched on is taken from the mailbox. The fact that 1201%% the receive-expression may iterate over an arbitrary number of 1202%% messages is not of interest to us. All we can do here is to visit its 1203%% subexpressions, and possibly eliminate definitely unselectable 1204%% clauses. 1205 1206i_receive(E, Ctxt, Ren, Env, S) -> 1207 %% We first visit the expiry expression (for value) and the expiry 1208 %% body (in the surrounding context). 1209 {T, S1} = i(receive_timeout(E), value, Ren, Env, S), 1210 {B, S2} = i(receive_action(E), Ctxt, Ren, Env, S1), 1211 1212 %% Then we visit the clauses. Note that application contexts may not 1213 %% in general be propagated into the branches (and the expiry body), 1214 %% because the execution of the `receive' may remove a message from 1215 %% the mailbox as a side effect; the situation is thus analogous to 1216 %% that in a `case' expression. 1217 Ctxt1 = safe_context(Ctxt), 1218 case i_clauses(receive_clauses(E), Ctxt1, Ren, Env, S2) of 1219 {false, {[], _, _, Cs}, S3} -> 1220 %% We still have a list of clauses. If the list is empty, 1221 %% and the expiry expression is the integer zero, the 1222 %% expression reduces to the expiry body. 1223 if Cs == [] -> 1224 case is_c_int(T) andalso (int_val(T) == 0) of 1225 true -> 1226 {B, S3}; 1227 false -> 1228 i_receive_1(E, Cs, T, B, S3) 1229 end; 1230 true -> 1231 i_receive_1(E, Cs, T, B, S3) 1232 end; 1233 {true, {_, _, _, Cs}, S3} -> 1234 %% Cs is a single clause that will always be matched (if a 1235 %% message exists), but we must keep the `receive' statement 1236 %% in order to fetch the message from the mailbox. 1237 i_receive_1(E, Cs, T, B, S3) 1238 end. 1239 1240i_receive_1(E, Cs, T, B, S) -> 1241 %% Here, we just add the base sizes for the receive-expression 1242 %% itself and for each remaining clause; cf. `case'. 1243 N = weight('receive') + weight(clause) * length(Cs), 1244 {update_c_receive(E, Cs, T, B), count_size(N, S)}. 1245 1246%% A module definition is like a `letrec', with some add-ons (export and 1247%% attribute declarations) but without an explicit body. Actually, the 1248%% exporting of function names has the same effect as if there was a 1249%% body consisting of the list of references to the exported functions. 1250%% Thus, the exported functions are exactly those which can be 1251%% referenced from outside the module. 1252 1253i_module(E, Ctxt, Ren, Env, S) -> 1254 %% Cf. `i_letrec'. Note that we pass a dummy constant value for the 1255 %% "body" parameter. 1256 {Es, _, Xs1, S1} = i_letrec(module_defs(E), void(), 1257 module_exports(E), Ctxt, Ren, Env, S), 1258 %% Sanity check: 1259 case Es of 1260 [] -> 1261 report_warning("no function definitions remaining " 1262 "in module `~s'.\n", 1263 [atom_name(module_name(E))]); 1264 _ -> 1265 ok 1266 end, 1267 E1 = update_c_module(E, module_name(E), Xs1, module_attrs(E), Es), 1268 {E1, count_size(weight(module), S1)}. 1269 1270%% Binary-syntax expressions are too complicated to do anything 1271%% interesting with here - that is beyond the scope of this program; 1272%% also, their construction could have side effects, so even in effect 1273%% context we can't remove them. (We don't bother to identify cases of 1274%% "safe" unused binaries which could be removed.) 1275 1276i_binary(E, Ren, Env, S) -> 1277 %% Visit the segments for value. 1278 {Es, S1} = mapfoldl(fun (E, S) -> 1279 i_bitstr(E, Ren, Env, S) 1280 end, 1281 S, binary_segments(E)), 1282 S2 = count_size(weight(binary), S1), 1283 {update_c_binary(E, Es), S2}. 1284 1285i_bitstr(E, Ren, Env, S) -> 1286 %% It is not necessary to visit the Unit, Type and Flags fields, 1287 %% since these are always literals. 1288 {Val, S1} = i(bitstr_val(E), value, Ren, Env, S), 1289 {Size, S2} = i(bitstr_size(E), value, Ren, Env, S1), 1290 Unit = bitstr_unit(E), 1291 Type = bitstr_type(E), 1292 Flags = bitstr_flags(E), 1293 S3 = count_size(weight(bitstr), S2), 1294 {update_c_bitstr(E, Val, Size, Unit, Type, Flags), S3}. 1295 1296%% This is a simplified version of `i_pattern', for lists of parameter 1297%% variables only. It does not modify the state. 1298 1299i_params([V | Vs], Ren, Env) -> 1300 Name = ren__map(var_name(V), Ren), 1301 case env__lookup(Name, Env) of 1302 {ok, R} -> 1303 [ref_to_var(R) | i_params(Vs, Ren, Env)]; 1304 error -> 1305 report_internal_error("variable `~w' not bound " 1306 "in pattern.\n", [Name]), 1307 exit(error) 1308 end; 1309i_params([], _, _) -> 1310 []. 1311 1312%% For ordinary patterns, we just visit to rename variables and count 1313%% the size/cost. All occurring binding instances of variables should 1314%% already have been added to the renaming and environment; however, to 1315%% handle the size expressions of binary-syntax patterns, we must pass 1316%% the renaming and environment of the containing expression 1317 1318i_pattern(E, Ren, Env, Ren0, Env0, S) -> 1319 case type(E) of 1320 var -> 1321 %% Count no size. 1322 Name = ren__map(var_name(E), Ren), 1323 case env__lookup(Name, Env) of 1324 {ok, R} -> 1325 {ref_to_var(R), S}; 1326 error -> 1327 report_internal_error("variable `~w' not bound " 1328 "in pattern.\n", [Name]), 1329 exit(error) 1330 end; 1331 alias -> 1332 %% Count no size. 1333 V = alias_var(E), 1334 Name = ren__map(var_name(V), Ren), 1335 case env__lookup(Name, Env) of 1336 {ok, R} -> 1337 %% Visit the subpattern and recompose. 1338 V1 = ref_to_var(R), 1339 {P, S1} = i_pattern(alias_pat(E), Ren, Env, Ren0, 1340 Env0, S), 1341 {update_c_alias(E, V1, P), S1}; 1342 error -> 1343 report_internal_error("variable `~w' not bound " 1344 "in pattern.\n", [Name]), 1345 exit(error) 1346 end; 1347 binary -> 1348 {Es, S1} = mapfoldl(fun (E, S) -> 1349 i_bitstr_pattern(E, Ren, Env, 1350 Ren0, Env0, S) 1351 end, 1352 S, binary_segments(E)), 1353 S2 = count_size(weight(binary), S1), 1354 {update_c_binary(E, Es), S2}; 1355 _ -> 1356 case is_literal(E) of 1357 true -> 1358 {E, count_size(weight(literal), S)}; 1359 false -> 1360 {Es1, S1} = mapfoldl(fun (E, S) -> 1361 i_pattern(E, Ren, Env, 1362 Ren0, Env0, 1363 S) 1364 end, 1365 S, data_es(E)), 1366 %% We assume that in general, the elements of the 1367 %% constructor will all be fetched. 1368 N = weight(data) + length(Es1) * weight(element), 1369 S2 = count_size(N, S1), 1370 {update_data(E, data_type(E), Es1), S2} 1371 end 1372 end. 1373 1374i_bitstr_pattern(E, Ren, Env, Ren0, Env0, S) -> 1375 %% It is not necessary to visit the Unit, Type and Flags fields, 1376 %% since these are always literals. The Value field is a limited 1377 %% pattern - either a literal or an unbound variable. The Size field 1378 %% is a limited expression - either a literal or a variable bound in 1379 %% the environment of the containing expression. 1380 {Val, S1} = i_pattern(bitstr_val(E), Ren, Env, Ren0, Env0, S), 1381 {Size, S2} = i(bitstr_size(E), value, Ren0, Env0, S1), 1382 Unit = bitstr_unit(E), 1383 Type = bitstr_type(E), 1384 Flags = bitstr_flags(E), 1385 S3 = count_size(weight(bitstr), S2), 1386 {update_c_bitstr(E, Val, Size, Unit, Type, Flags), S3}. 1387 1388 1389%% --------------------------------------------------------------------- 1390%% Other central inlining functions 1391 1392%% It is assumed here that `E' is a fun-expression and the context is an 1393%% app-structure. If the inlining might be aborted for some reason, a 1394%% corresponding catch should have been set up before entering `inline'. 1395%% 1396%% Note: if the inlined body is a lambda abstraction, and the 1397%% surrounding context of the app-context is also an app-context, the 1398%% `inlined' flag of the outermost context will be set before that of 1399%% the inner context is set. E.g.: `let F = fun (X) -> fun (Y) -> E in 1400%% apply apply F(A)(B)' will propagate the body of F, which is a lambda 1401%% abstraction, into the outer application context, which will be 1402%% inlined to produce expression `E', and the flag of the outer context 1403%% will be set. Upon return, the flag of the inner context will also be 1404%% set. However, the flags are then tested in innermost-first order. 1405%% Thus, if some inlining attempt is aborted, the `inlined' flags of any 1406%% nested app-contexts must be cleared. 1407%% 1408%% This implementation does nothing to handle inlining of calls to 1409%% recursive functions in a smart way. This means that as long as the 1410%% size and effort counters do not prevent it, the function body will be 1411%% inlined (i.e., the first iteration will be unrolled), and the 1412%% recursive calls will be residualized. 1413 1414inline(E, #app{opnds = Opnds, ctxt = Ctxt, loc = L}, Ren, Env, S) -> 1415 %% Check that the arities match: 1416 Vs = fun_vars(E), 1417 if length(Opnds) /= length(Vs) -> 1418 report_error("function called with wrong number " 1419 "of arguments!\n"), 1420 %% TODO: should really just residualise the call... 1421 exit(error); 1422 true -> 1423 ok 1424 end, 1425 %% Create local bindings for the parameters to their respective 1426 %% operand structures from the app-structure, and visit the body in 1427 %% the context saved in the structure. 1428 {Rs, Ren1, Env1, S1} = bind_locals(Vs, Opnds, Ren, Env, S), 1429 {E1, S2} = i(fun_body(E), Ctxt, Ren1, Env1, S1), 1430 1431 %% Create necessary bindings and/or set flags. 1432 {E2, S3} = make_let_bindings(Rs, E1, S2), 1433 1434 %% Lastly, flag the application as inlined, since the inlining 1435 %% attempt was not aborted before we reached this point. 1436 {E2, st__set_app_inlined(L, S3)}. 1437 1438%% For the (possibly renamed) argument variables to an inlined call, 1439%% either create `let' bindings for them, if they are still referenced 1440%% in the residual expression (in C/Lisp, also if they are assigned to), 1441%% or otherwise (if they are not referenced or assigned) mark them for 1442%% evaluation for side effects. 1443 1444make_let_bindings([R | Rs], E, S) -> 1445 {E1, S1} = make_let_bindings(Rs, E, S), 1446 make_let_binding(R, E1, S1); 1447make_let_bindings([], E, S) -> 1448 {E, S}. 1449 1450make_let_binding(R, E, S) -> 1451 %% The `referenced' flag is conservatively computed. We therefore 1452 %% first check some simple cases where parameter R is definitely not 1453 %% referenced in the resulting body E. 1454 case is_literal(E) of 1455 true -> 1456 %% A constant contains no variable references. 1457 make_let_binding_1(R, E, S); 1458 false -> 1459 case is_c_var(E) of 1460 true -> 1461 case var_name(E) =:= R#ref.name of 1462 true -> 1463 %% The body is simply the parameter variable 1464 %% itself. Visit the operand for value and 1465 %% substitute the result for the body. 1466 visit_and_count_size(R#ref.opnd, S); 1467 false -> 1468 %% Not the same variable, so the parameter 1469 %% is not referenced at all. 1470 make_let_binding_1(R, E, S) 1471 end; 1472 false -> 1473 %% Proceed to check the `referenced' flag. 1474 case st__get_var_referenced(R#ref.loc, S) of 1475 true -> 1476 %% The parameter is probably referenced in 1477 %% the residual code (although it might not 1478 %% be). Visit the operand for value and 1479 %% create a let-binding. 1480 {E1, S1} = visit_and_count_size(R#ref.opnd, 1481 S), 1482 S2 = count_size(weight('let'), S1), 1483 {c_let([ref_to_var(R)], E1, E), S2}; 1484 false -> 1485 %% The parameter is definitely not 1486 %% referenced. 1487 make_let_binding_1(R, E, S) 1488 end 1489 end 1490 end. 1491 1492%% This marks the operand for evaluation for effect. 1493 1494make_let_binding_1(R, E, S) -> 1495 Opnd = R#ref.opnd, 1496 {E, st__set_opnd_effect(Opnd#opnd.loc, S)}. 1497 1498%% Here, `R' is the ref-structure which is the target of the copy 1499%% propagation, and `Opnd' is a visited operand structure, to be 1500%% propagated through `R' if possible - if not, `R' is residualised. 1501%% `Opnd' is normally the operand that `R' is bound to, and `E' is the 1502%% result of visiting `Opnd' for value; we pass this as an argument so 1503%% we don't have to fetch it multiple times (because we don't have 1504%% constant time access). 1505%% 1506%% We also pass the environment of the site of the variable reference, 1507%% for use when inlining a propagated fun-expression. In the original 1508%% algorithm by Waddell, the environment used for inlining such cases is 1509%% the identity mapping, because the fun-expression body has already 1510%% been visited for value, and their algorithm combines renaming of 1511%% source-code variables with the looking up of information about 1512%% residual-code variables. We, however, need to check the environment 1513%% of the call site when creating new non-shadowed variables, but we 1514%% must avoid repeated renaming. We therefore separate the renaming and 1515%% the environment (as in the renaming algorithm of Peyton-Jones and 1516%% Marlow). This also makes our implementation more general, compared to 1517%% the original algorithm, because we do not give up on propagating 1518%% variables that were free in the fun-body. 1519%% 1520%% Example: 1521%% 1522%% let F = fun (X) -> {'foo', X} in 1523%% let G = fun (H) -> apply H(F) % F is free in the fun G 1524%% in apply G(fun (F) -> apply F(42)) 1525%% => 1526%% let F = fun (X) -> {'foo', X} in 1527%% apply (fun (H) -> apply H(F))(fun (F) -> apply F(42)) 1528%% => 1529%% let F = fun (X) -> {'foo', X} in 1530%% apply (fun (F) -> apply F(42))(F) 1531%% => 1532%% let F = fun (X) -> {'foo', X} in 1533%% apply F(42) 1534%% => 1535%% apply (fun (X) -> {'foo', X})(2) 1536%% => 1537%% {'foo', 42} 1538%% 1539%% The original algorithm would give up at stage 4, because F was free 1540%% in the propagated fun-expression. Our version inlines this example 1541%% completely. 1542 1543copy(R, Opnd, E, Ctxt, Env, S) -> 1544 case is_c_var(E) of 1545 true -> 1546 %% The operand reduces to another variable - get its 1547 %% ref-structure and attempt to propagate further. 1548 copy_var(env__get(var_name(E), Opnd#opnd.env), Ctxt, Env, 1549 S); 1550 false -> 1551 %% Apart from variables and functional values (the latter 1552 %% are handled by `copy_1' below), only constant literals 1553 %% are copyable in general; other things, including e.g. 1554 %% tuples `{foo, X}', could cause duplication of work, and 1555 %% are not copy propagated. 1556 case is_literal(E) of 1557 true -> 1558 {E, count_size(weight(literal), S)}; 1559 false -> 1560 copy_1(R, Opnd, E, Ctxt, Env, S) 1561 end 1562 end. 1563 1564copy_var(R, Ctxt, Env, S) -> 1565 %% (In Lisp or C, if this other variable might be assigned to, we 1566 %% should residualize the "parent" instead, so we don't bypass any 1567 %% destructive updates.) 1568 case R#ref.opnd of 1569 undefined -> 1570 %% This variable is not bound to an expression, so just 1571 %% residualize it. 1572 residualize_var(R, S); 1573 Opnd -> 1574 %% Note that because operands are always visited before 1575 %% copied, all copyable operand expressions will be 1576 %% propagated through any number of bindings. If `R' was 1577 %% bound to a constant literal, we would never have reached 1578 %% this point. 1579 case st__lookup_opnd_cache(Opnd#opnd.loc, S) of 1580 error -> 1581 %% The result for this operand is not yet ready 1582 %% (which should mean that it is a recursive 1583 %% reference). Thus, we must residualise the 1584 %% variable. 1585 residualize_var(R, S); 1586 {ok, #cache{expr = E1}} -> 1587 %% The result for the operand is ready, so we can 1588 %% proceed to propagate it. 1589 copy_1(R, Opnd, E1, Ctxt, Env, S) 1590 end 1591 end. 1592 1593copy_1(R, Opnd, E, Ctxt, Env, S) -> 1594 %% Fun-expression (lambdas) are a bit special; they are copyable, 1595 %% but should preferably not be duplicated, so they should not be 1596 %% copy propagated except into application contexts, where they can 1597 %% be inlined. 1598 case is_c_fun(E) of 1599 true -> 1600 case Ctxt of 1601 #app{} -> 1602 %% First test if the operand is "outer-pending"; if 1603 %% so, don't inline. 1604 case st__test_outer_pending(Opnd#opnd.loc, S) of 1605 false -> 1606 copy_inline(R, Opnd, E, Ctxt, Env, S); 1607 true -> 1608 %% Cyclic reference forced inlining to stop 1609 %% (avoiding infinite unfolding). 1610 residualize_var(R, S) 1611 end; 1612 _ -> 1613 residualize_var(R, S) 1614 end; 1615 false -> 1616 %% We have no other cases to handle here 1617 residualize_var(R, S) 1618 end. 1619 1620%% This inlines a function value that was propagated to an application 1621%% context. The inlining is done with an identity renaming (since the 1622%% expression is already visited) but in the environment of the call 1623%% site (which is OK because of the no-shadowing strategy for renaming, 1624%% and because the domain of our environments are the residual-program 1625%% variables instead of the source-program variables). Note that we must 1626%% first set the "outer-pending" flag, and clear it afterwards. 1627 1628copy_inline(R, Opnd, E, Ctxt, Env, S) -> 1629 S1 = st__mark_outer_pending(Opnd#opnd.loc, S), 1630 case catch {ok, copy_inline_1(R, E, Ctxt, Env, S1)} of 1631 {ok, {E1, S2}} -> 1632 {E1, st__clear_outer_pending(Opnd#opnd.loc, S2)}; 1633 {'EXIT', X} -> 1634 exit(X); 1635 X -> 1636 %% If we use destructive update for the `outer-pending' 1637 %% flag, we must make sure to clear it upon a nonlocal 1638 %% return. 1639 st__clear_outer_pending(Opnd#opnd.loc, S1), 1640 throw(X) 1641 end. 1642 1643%% If the current effort counter was passive, we use a new active effort 1644%% counter with the inherited limit for this particular inlining. 1645 1646copy_inline_1(R, E, Ctxt, Env, S) -> 1647 case effort_is_active(S) of 1648 true -> 1649 copy_inline_2(R, E, Ctxt, Env, S); 1650 false -> 1651 S1 = new_active_effort(get_effort_limit(S), S), 1652 case catch {ok, copy_inline_2(R, E, Ctxt, Env, S1)} of 1653 {ok, {E1, S2}} -> 1654 %% Revert to the old effort counter. 1655 {E1, revert_effort(S, S2)}; 1656 {counter_exceeded, effort, _} -> 1657 %% Aborted this inlining attempt because too much 1658 %% effort was spent. Residualize the variable and 1659 %% revert to the previous state. 1660 residualize_var(R, S); 1661 {'EXIT', X} -> 1662 exit(X); 1663 X -> 1664 throw(X) 1665 end 1666 end. 1667 1668%% Regardless of whether the current size counter is active or not, we 1669%% use a new active size counter for each inlining. If the current 1670%% counter was passive, the new counter gets the inherited size limit; 1671%% if it was active, the size limit of the new counter will be equal to 1672%% the remaining budget of the current counter (which itself is not 1673%% affected by the inlining). This distributes the size budget more 1674%% evenly over "inlinings within inlinings", so that the whole size 1675%% budget is not spent on the first few call sites (in an inlined 1676%% function body) forcing the remaining call sites to be residualised. 1677 1678copy_inline_2(R, E, Ctxt, Env, S) -> 1679 Limit = case size_is_active(S) of 1680 true -> 1681 get_size_limit(S) - get_size_value(S); 1682 false -> 1683 get_size_limit(S) 1684 end, 1685 %% Add the cost of the application to the new size limit, so we 1686 %% always inline functions that are small enough, even if `Limit' is 1687 %% close to zero at this point. (This is an extension to the 1688 %% original algorithm.) 1689 S1 = new_active_size(Limit + apply_size(length(Ctxt#app.opnds)), S), 1690 case catch {ok, inline(E, Ctxt, ren__identity(), Env, S1)} of 1691 {ok, {E1, S2}} -> 1692 %% Revert to the old size counter. 1693 {E1, revert_size(S, S2)}; 1694 {counter_exceeded, size, S2} -> 1695 %% Aborted this inlining attempt because it got too big. 1696 %% Residualize the variable and revert to the old size 1697 %% counter. (It is important that we do not also revert the 1698 %% effort counter here. Because the effort and size counters 1699 %% are always set up together, we know that the effort 1700 %% counter returned in S2 is the same that was passed to 1701 %% `inline'.) 1702 S3 = revert_size(S, S2), 1703 %% If we use destructive update for the `inlined' flag, we 1704 %% must make sure to clear the flags of any nested 1705 %% app-contexts upon aborting; see `inline' for details. 1706 reset_nested_apps(Ctxt, S3), % for effect 1707 residualize_var(R, S3); 1708 {'EXIT', X} -> 1709 exit(X); 1710 X -> 1711 throw(X) 1712 end. 1713 1714reset_nested_apps(#app{ctxt = Ctxt, loc = L}, S) -> 1715 reset_nested_apps(Ctxt, st__clear_app_inlined(L, S)); 1716reset_nested_apps(_, S) -> 1717 S. 1718 1719 1720%% --------------------------------------------------------------------- 1721%% Support functions 1722 1723new_var(Env) -> 1724 Name = env__new_vname(Env), 1725 c_var(Name). 1726 1727residualize_var(R, S) -> 1728 S1 = count_size(weight(var), S), 1729 {ref_to_var(R), st__set_var_referenced(R#ref.loc, S1)}. 1730 1731%% This function returns the value-producing subexpression of any 1732%% expression. (Except for sequencing expressions, this is the 1733%% expression itself.) 1734 1735result(E) -> 1736 case is_c_seq(E) of 1737 true -> 1738 %% Also see `make_seq', which is used in all places to build 1739 %% sequences so that they are always nested in the first 1740 %% position. 1741 seq_body(E); 1742 false -> 1743 E 1744 end. 1745 1746%% This function rewrites E to `do A1 E' if A is `do A1 A2', and 1747%% otherwise returns E unchanged. 1748 1749hoist_effects(A, E) -> 1750 case type(A) of 1751 seq -> make_seq(seq_arg(A), E); 1752 _ -> E 1753 end. 1754 1755%% This "build sequencing expression" operation assures that sequences 1756%% are always nested in the first position, which makes it easy to find 1757%% the actual value-producing expression of a sequence (cf. `result'). 1758 1759make_seq(E1, E2) -> 1760 case is_safe(E1) of 1761 true -> 1762 %% The first expression can safely be dropped. 1763 E2; 1764 false -> 1765 %% If `E1' is a sequence whose final expression has no side 1766 %% effects, then we can lose *that* expression when we 1767 %% compose the new sequence, since its value will not be 1768 %% used. 1769 E3 = case is_c_seq(E1) of 1770 true -> 1771 case is_safe(seq_body(E1)) of 1772 true -> 1773 %% Drop the final expression. 1774 seq_arg(E1); 1775 false -> 1776 E1 1777 end; 1778 false -> 1779 E1 1780 end, 1781 case is_c_seq(E2) of 1782 true -> 1783 %% `E2' is a sequence (E2' E2''), so we must 1784 %% rearrange the nesting to ((E1, E2') E2''), to 1785 %% preserve the invariant. Annotations on `E2' are 1786 %% lost. 1787 c_seq(c_seq(E3, seq_arg(E2)), seq_body(E2)); 1788 false -> 1789 c_seq(E3, E2) 1790 end 1791 end. 1792 1793%% Currently, safe expressions include variables, lambda expressions, 1794%% constructors with safe subexpressions (this includes atoms, integers, 1795%% empty lists, etc.), seq-, let- and letrec-expressions with safe 1796%% subexpressions, try- and catch-expressions with safe subexpressions 1797%% and calls to safe functions with safe argument subexpressions. 1798%% Binaries seem too tricky to be considered. 1799 1800is_safe(E) -> 1801 case is_data(E) of 1802 true -> 1803 is_safe_list(data_es(E)); 1804 false -> 1805 case type(E) of 1806 var -> 1807 true; 1808 'fun' -> 1809 true; 1810 values -> 1811 is_safe_list(values_es(E)); 1812 'seq' -> 1813 case is_safe(seq_arg(E)) of 1814 true -> 1815 is_safe(seq_body(E)); 1816 false -> 1817 false 1818 end; 1819 'let' -> 1820 case is_safe(let_arg(E)) of 1821 true -> 1822 is_safe(let_body(E)); 1823 false -> 1824 false 1825 end; 1826 letrec -> 1827 is_safe(letrec_body(E)); 1828 'try' -> 1829 %% If the argument expression is not safe, it could 1830 %% be modifying the state; thus, even if the body is 1831 %% safe, the try-expression as a whole would not be. 1832 %% If the argument is safe, the handler is not used. 1833 case is_safe(try_arg(E)) of 1834 true -> 1835 is_safe(try_body(E)); 1836 false -> 1837 false 1838 end; 1839 'catch' -> 1840 is_safe(catch_body(E)); 1841 call -> 1842 M = call_module(E), 1843 F = call_name(E), 1844 case is_c_atom(M) and is_c_atom(F) of 1845 true -> 1846 As = call_args(E), 1847 case is_safe_list(As) of 1848 true -> 1849 is_safe_call(atom_val(M), 1850 atom_val(F), 1851 length(As)); 1852 false -> 1853 false 1854 end; 1855 false -> 1856 false 1857 end; 1858 _ -> 1859 false 1860 end 1861 end. 1862 1863is_safe_list([E | Es]) -> 1864 case is_safe(E) of 1865 true -> 1866 is_safe_list(Es); 1867 false -> 1868 false 1869 end; 1870is_safe_list([]) -> 1871 true. 1872 1873is_safe_call(M, F, A) -> 1874 erl_bifs:is_safe(M, F, A). 1875 1876%% When setting up local variables, we only create new names if we have 1877%% to, according to the "no-shadowing" strategy. 1878 1879make_locals(Vs, Ren, Env) -> 1880 make_locals(Vs, [], Ren, Env). 1881 1882make_locals([V | Vs], As, Ren, Env) -> 1883 Name = var_name(V), 1884 case env__is_defined(Name, Env) of 1885 false -> 1886 %% The variable need not be renamed. Just make sure that the 1887 %% renaming will map it to itself. 1888 Name1 = Name, 1889 Ren1 = ren__add_identity(Name, Ren); 1890 true -> 1891 %% The variable must be renamed to maintain the no-shadowing 1892 %% invariant. Do the right thing for function variables. 1893 Name1 = case Name of 1894 {A, N} -> 1895 env__new_fname(A, N, Env); 1896 _ -> 1897 env__new_vname(Env) 1898 end, 1899 Ren1 = ren__add(Name, Name1, Ren) 1900 end, 1901 %% This temporary binding is added for correct new-key generation. 1902 Env1 = env__bind(Name1, dummy, Env), 1903 make_locals(Vs, [Name1 | As], Ren1, Env1); 1904make_locals([], As, Ren, Env) -> 1905 {reverse(As), Ren, Env}. 1906 1907%% This adds let-bindings for the source code variables in `Es' to the 1908%% environment `Env'. 1909%% 1910%% Note that we always assign a new state location for the 1911%% residual-program variable, since we cannot know when a location for a 1912%% particular variable in the source code can be reused. 1913 1914bind_locals(Vs, Ren, Env, S) -> 1915 Opnds = lists:duplicate(length(Vs), undefined), 1916 bind_locals(Vs, Opnds, Ren, Env, S). 1917 1918bind_locals(Vs, Opnds, Ren, Env, S) -> 1919 {Ns, Ren1, Env1} = make_locals(Vs, Ren, Env), 1920 {Rs, Env2, S1} = bind_locals_1(Ns, Opnds, [], Env1, S), 1921 {Rs, Ren1, Env2, S1}. 1922 1923%% Note that the `Vs' are currently not used for anything except the 1924%% number of variables. If we were maintaining "source-referenced" 1925%% flags, then the flag in the new variable should be initialized to the 1926%% current value of the (residual-) referenced-flag of the "parent". 1927 1928bind_locals_1([N | Ns], [Opnd | Opnds], Rs, Env, S) -> 1929 {R, S1} = new_ref(N, Opnd, S), 1930 Env1 = env__bind(N, R, Env), 1931 bind_locals_1(Ns, Opnds, [R | Rs], Env1, S1); 1932bind_locals_1([], [], Rs, Env, S) -> 1933 {lists:reverse(Rs), Env, S}. 1934 1935new_refs(Ns, Opnds, S) -> 1936 new_refs(Ns, Opnds, [], S). 1937 1938new_refs([N | Ns], [Opnd | Opnds], Rs, S) -> 1939 {R, S1} = new_ref(N, Opnd, S), 1940 new_refs(Ns, Opnds, [R | Rs], S1); 1941new_refs([], [], Rs, S) -> 1942 {lists:reverse(Rs), S}. 1943 1944new_ref(N, Opnd, S) -> 1945 {L, S1} = st__new_ref_loc(S), 1946 {#ref{name = N, opnd = Opnd, loc = L}, S1}. 1947 1948%% This adds recursive bindings for the source code variables in `Es' to 1949%% the environment `Env'. Note that recursive binding of a set of 1950%% variables is an atomic operation on the environment - they cannot be 1951%% added one at a time. 1952 1953bind_recursive(Vs, Opnds, Ren, Env, S) -> 1954 {Ns, Ren1, Env1} = make_locals(Vs, Ren, Env), 1955 {Rs, S1} = new_refs(Ns, Opnds, S), 1956 1957 %% When this fun-expression is evaluated, it updates the operand 1958 %% structure in the ref-structure to contain the recursively defined 1959 %% environment and the correct renaming. 1960 Fun = fun (R, Env) -> 1961 Opnd = R#ref.opnd, 1962 R#ref{opnd = Opnd#opnd{ren = Ren1, env = Env}} 1963 end, 1964 {Rs, Ren1, env__bind_recursive(Ns, Rs, Fun, Env1), S1}. 1965 1966safe_context(Ctxt) -> 1967 case Ctxt of 1968 #app{} -> 1969 value; 1970 _ -> 1971 Ctxt 1972 end. 1973 1974%% Note that the name of a variable encodes its type: a "plain" variable 1975%% or a function variable. The latter kind also contains an arity number 1976%% which should be preserved upon renaming. 1977 1978ref_to_var(#ref{name = Name}) -> 1979 %% If we were maintaining "source-referenced" flags, the annotation 1980 %% `add_ann([#source_ref{loc = L}], E)' should also be done here, to 1981 %% make the algorithm reapplicable. This is however not necessary 1982 %% since there are no destructive variable assignments in Erlang. 1983 c_var(Name). 1984 1985%% Including the effort counter of the call site assures that the cost 1986%% of processing an operand via `visit' is charged to the correct 1987%% counter. In particular, if the effort counter of the call site was 1988%% passive, the operands will also be processed with a passive counter. 1989 1990make_opnd(E, Ren, Env, S) -> 1991 {L, S1} = st__new_opnd_loc(S), 1992 C = st__get_effort(S1), 1993 Opnd = #opnd{expr = E, ren = Ren, env = Env, loc = L, effort = C}, 1994 {Opnd, S1}. 1995 1996keep_referenced(Rs, S) -> 1997 [R || R <- Rs, st__get_var_referenced(R#ref.loc, S)]. 1998 1999residualize_operands(Opnds, E, S) -> 2000 foldr(fun (Opnd, {E, S}) -> residualize_operand(Opnd, E, S) end, 2001 {E, S}, Opnds). 2002 2003%% This is the only case where an operand expression can be visited in 2004%% `effect' context instead of `value' context. 2005 2006residualize_operand(Opnd, E, S) -> 2007 case st__get_opnd_effect(Opnd#opnd.loc, S) of 2008 true -> 2009 %% The operand has not been visited, so we do that now, but 2010 %% in `effect' context. (Waddell's algorithm does some stuff 2011 %% here to account specially for the operand size, which 2012 %% appears unnecessary.) 2013 {E1, S1} = i(Opnd#opnd.expr, effect, Opnd#opnd.ren, 2014 Opnd#opnd.env, S), 2015 {make_seq(E1, E), S1}; 2016 false -> 2017 {E, S} 2018 end. 2019 2020%% The `visit' function always visits the operand expression in `value' 2021%% context (`residualize_operand' visits an unreferenced operand 2022%% expression in `effect' context when necessary). A new passive size 2023%% counter is used for visiting the operand, the final value of which is 2024%% then cached along with the resulting expression. 2025%% 2026%% Note that the effort counter of the call site, included in the 2027%% operand structure, is not a shared object. Thus, the effort budget is 2028%% actually reused over all occurrences of the operands of a single 2029%% application. This does not appear to be a problem; just a 2030%% modification of the algorithm. 2031 2032visit(Opnd, S) -> 2033 {C, S1} = visit_1(Opnd, S), 2034 {C#cache.expr, S1}. 2035 2036visit_and_count_size(Opnd, S) -> 2037 {C, S1} = visit_1(Opnd, S), 2038 {C#cache.expr, count_size(C#cache.size, S1)}. 2039 2040visit_1(Opnd, S) -> 2041 case st__lookup_opnd_cache(Opnd#opnd.loc, S) of 2042 error -> 2043 %% Use a new, passive, size counter for visiting operands, 2044 %% and use the effort counter of the context of the operand. 2045 %% It turns out that if the latter is active, it must be the 2046 %% same object as the one currently used, and if it is 2047 %% passive, it does not matter if it is the same object as 2048 %% any other counter. 2049 Effort = Opnd#opnd.effort, 2050 Active = counter__is_active(Effort), 2051 S1 = case Active of 2052 true -> 2053 S; % don't change effort counter 2054 false -> 2055 st__set_effort(Effort, S) 2056 end, 2057 S2 = new_passive_size(get_size_limit(S1), S1), 2058 2059 %% Visit the expression and cache the result, along with the 2060 %% final value of the size counter. 2061 {E, S3} = i(Opnd#opnd.expr, value, Opnd#opnd.ren, 2062 Opnd#opnd.env, S2), 2063 Size = get_size_value(S3), 2064 C = #cache{expr = E, size = Size}, 2065 S4 = revert_size(S, st__set_opnd_cache(Opnd#opnd.loc, C, 2066 S3)), 2067 case Active of 2068 true -> 2069 {C, S4}; % keep using the same effort counter 2070 false -> 2071 {C, revert_effort(S, S4)} 2072 end; 2073 {ok, C} -> 2074 {C, S} 2075 end. 2076 2077%% Create a pattern matching template for an expression. A template 2078%% contains only data constructors (including atomic ones) and 2079%% variables, and compound literals are not folded into a single node. 2080%% Each node in the template is annotated with the variable which holds 2081%% the corresponding subexpression; these are new, unique variables not 2082%% existing in the given `Env'. Returns `{Template, Variables, NewEnv}', 2083%% where `Variables' is the list of all variables corresponding to nodes 2084%% in the template *listed in reverse dependency order*, and `NewEnv' is 2085%% `Env' augmented with mappings from the variable names to 2086%% subexpressions of `E' (not #ref{} structures!) rewritten so that no 2087%% computations are duplicated. `Variables' is guaranteed to be nonempty 2088%% - at least the root node will always be bound to a new variable. 2089 2090make_template(E, Env) -> 2091 make_template(E, [], Env). 2092 2093make_template(E, Vs0, Env0) -> 2094 case is_data(E) of 2095 true -> 2096 {Ts, {Vs1, Env1}} = mapfoldl( 2097 fun (E, {Vs0, Env0}) -> 2098 {T, Vs1, Env1} = 2099 make_template(E, Vs0, 2100 Env0), 2101 {T, {Vs1, Env1}} 2102 end, 2103 {Vs0, Env0}, data_es(E)), 2104 T = make_data_skel(data_type(E), Ts), 2105 E1 = update_data(E, data_type(E), 2106 [hd(get_ann(T)) || T <- Ts]), 2107 V = new_var(Env1), 2108 Env2 = env__bind(var_name(V), E1, Env1), 2109 {set_ann(T, [V]), [V | Vs1], Env2}; 2110 false -> 2111 case type(E) of 2112 seq -> 2113 %% For a sequencing, we can rebind the variable used 2114 %% for the body, and pass on the template as it is. 2115 {T, Vs1, Env1} = make_template(seq_body(E), Vs0, 2116 Env0), 2117 V = var_name(hd(get_ann(T))), 2118 E1 = update_c_seq(E, seq_arg(E), env__get(V, Env1)), 2119 Env2 = env__bind(V, E1, Env1), 2120 {T, Vs1, Env2}; 2121 _ -> 2122 V = new_var(Env0), 2123 Env1 = env__bind(var_name(V), E, Env0), 2124 {set_ann(V, [V]), [V | Vs0], Env1} 2125 end 2126 end. 2127 2128%% Two clauses are equivalent if their bodies are equivalent expressions 2129%% given that the respective pattern variables are local. 2130 2131equivalent_clauses([]) -> 2132 true; 2133equivalent_clauses([C | Cs]) -> 2134 Env = cerl_trees:variables(c_values(clause_pats(C))), 2135 equivalent_clauses_1(clause_body(C), Cs, Env). 2136 2137equivalent_clauses_1(E, [C | Cs], Env) -> 2138 Env1 = cerl_trees:variables(c_values(clause_pats(C))), 2139 case equivalent(E, clause_body(C), ordsets:union(Env, Env1)) of 2140 true -> 2141 equivalent_clauses_1(E, Cs, Env); 2142 false -> 2143 false 2144 end; 2145equivalent_clauses_1(_, [], _Env) -> 2146 true. 2147 2148%% Two expressions are equivalent if and only if they yield the same 2149%% value and has the same side effects in the same order. Currently, we 2150%% only accept equality between constructors (constants) and nonlocal 2151%% variables, since this should cover most cases of interest. If a 2152%% variable is locally bound in one expression, it cannot be equivalent 2153%% to one with the same name in the other expression, so we need not 2154%% keep track of two environments. 2155 2156equivalent(E1, E2, Env) -> 2157 case is_data(E1) of 2158 true -> 2159 case is_data(E2) of 2160 true -> 2161 T1 = {data_type(E1), data_arity(E1)}, 2162 T2 = {data_type(E2), data_arity(E2)}, 2163 %% Note that we must test for exact equality. 2164 if T1 =:= T2 -> 2165 equivalent_lists(data_es(E1), data_es(E2), 2166 Env); 2167 true -> 2168 false 2169 end; 2170 false -> 2171 false 2172 end; 2173 false -> 2174 case type(E1) of 2175 var -> 2176 case is_c_var(E2) of 2177 true -> 2178 N1 = var_name(E1), 2179 N2 = var_name(E2), 2180 if N1 =:= N2 -> 2181 not ordsets:is_element(N1, Env); 2182 true -> 2183 false 2184 end; 2185 false -> 2186 false 2187 end; 2188 _ -> 2189 %% Other constructs are not being considered. 2190 false 2191 end 2192 end. 2193 2194equivalent_lists([E1 | Es1], [E2 | Es2], Env) -> 2195 equivalent(E1, E2, Env) and equivalent_lists(Es1, Es2, Env); 2196equivalent_lists([], [], _) -> 2197 true; 2198equivalent_lists(_, _, _) -> 2199 false. 2200 2201%% Return `false' or `{true, EffectExpr, ValueExpr}'. The environment is 2202%% passed for new-variable generation. 2203 2204reduce_bif_call(M, F, As, Env) -> 2205 reduce_bif_call_1(M, F, length(As), As, Env). 2206 2207reduce_bif_call_1(erlang, element, 2, [X, Y], _Env) -> 2208 case is_c_int(X) and is_c_tuple(Y) of 2209 true -> 2210 %% We are free to change the relative evaluation order of 2211 %% the elements, so lifting out a particular element is OK. 2212 T = list_to_tuple(tuple_es(Y)), 2213 N = int_val(X), 2214 if integer(N), N > 0, N =< size(T) -> 2215 E = element(N, T), 2216 Es = tuple_to_list(setelement(N, T, void())), 2217 {true, make_seq(c_tuple(Es), E)}; 2218 true -> 2219 false 2220 end; 2221 false -> 2222 false 2223 end; 2224reduce_bif_call_1(erlang, hd, 1, [X], _Env) -> 2225 case is_c_cons(X) of 2226 true -> 2227 %% Cf. `element/2' above. 2228 {true, make_seq(cons_tl(X), cons_hd(X))}; 2229 false -> 2230 false 2231 end; 2232reduce_bif_call_1(erlang, length, 1, [X], _Env) -> 2233 case is_c_list(X) of 2234 true -> 2235 %% Cf. `erlang:size/1' below. 2236 {true, make_seq(X, c_int(list_length(X)))}; 2237 false -> 2238 false 2239 end; 2240reduce_bif_call_1(erlang, list_to_tuple, 1, [X], _Env) -> 2241 case is_c_list(X) of 2242 true -> 2243 %% This does not actually preserve all the evaluation order 2244 %% constraints of the list, but I don't imagine that it will 2245 %% be a problem. 2246 {true, c_tuple(list_elements(X))}; 2247 false -> 2248 false 2249 end; 2250reduce_bif_call_1(erlang, setelement, 3, [X, Y, Z], Env) -> 2251 case is_c_int(X) and is_c_tuple(Y) of 2252 true -> 2253 %% Here, unless `Z' is a simple expression, we must bind it 2254 %% to a new variable, because in that case, `Z' must be 2255 %% evaluated before any part of `Y'. 2256 T = list_to_tuple(tuple_es(Y)), 2257 N = int_val(X), 2258 if integer(N), N > 0, N =< size(T) -> 2259 E = element(N, T), 2260 case is_simple(Z) of 2261 true -> 2262 Es = tuple_to_list(setelement(N, T, Z)), 2263 {true, make_seq(E, c_tuple(Es))}; 2264 false -> 2265 V = new_var(Env), 2266 Es = tuple_to_list(setelement(N, T, V)), 2267 E1 = make_seq(E, c_tuple(Es)), 2268 {true, c_let([V], Z, E1)} 2269 end; 2270 true -> 2271 false 2272 end; 2273 false -> 2274 false 2275 end; 2276reduce_bif_call_1(erlang, size, 1, [X], _Env) -> 2277 case is_c_tuple(X) of 2278 true -> 2279 %% Just evaluate the tuple for effect and use the size (the 2280 %% arity) as the result. 2281 {true, make_seq(X, c_int(tuple_arity(X)))}; 2282 false -> 2283 false 2284 end; 2285reduce_bif_call_1(erlang, tl, 1, [X], _Env) -> 2286 case is_c_cons(X) of 2287 true -> 2288 %% Cf. `element/2' above. 2289 {true, make_seq(cons_hd(X), cons_tl(X))}; 2290 false -> 2291 false 2292 end; 2293reduce_bif_call_1(erlang, tuple_to_list, 1, [X], _Env) -> 2294 case is_c_tuple(X) of 2295 true -> 2296 %% This actually introduces slightly stronger constraints on 2297 %% the evaluation order of the subexpressions. 2298 {true, make_list(tuple_es(X))}; 2299 false -> 2300 false 2301 end; 2302reduce_bif_call_1(_M, _F, _A, _As, _Env) -> 2303 false. 2304 2305effort_is_active(S) -> 2306 counter__is_active(st__get_effort(S)). 2307 2308size_is_active(S) -> 2309 counter__is_active(st__get_size(S)). 2310 2311get_effort_limit(S) -> 2312 counter__limit(st__get_effort(S)). 2313 2314new_active_effort(Limit, S) -> 2315 st__set_effort(counter__new_active(Limit), S). 2316 2317revert_effort(S1, S2) -> 2318 st__set_effort(st__get_effort(S1), S2). 2319 2320new_active_size(Limit, S) -> 2321 st__set_size(counter__new_active(Limit), S). 2322 2323new_passive_size(Limit, S) -> 2324 st__set_size(counter__new_passive(Limit), S). 2325 2326revert_size(S1, S2) -> 2327 st__set_size(st__get_size(S1), S2). 2328 2329count_effort(N, S) -> 2330 C = st__get_effort(S), 2331 C1 = counter__add(N, C, effort, S), 2332 case debug_counters() of 2333 true -> 2334 case counter__is_active(C1) of 2335 true -> 2336 V = counter__value(C1), 2337 case V > get(counter_effort_max) of 2338 true -> 2339 put(counter_effort_max, V); 2340 false -> 2341 ok 2342 end; 2343 false -> 2344 ok 2345 end; 2346 _ -> 2347 ok 2348 end, 2349 st__set_effort(C1, S). 2350 2351count_size(N, S) -> 2352 C = st__get_size(S), 2353 C1 = counter__add(N, C, size, S), 2354 case debug_counters() of 2355 true -> 2356 case counter__is_active(C1) of 2357 true -> 2358 V = counter__value(C1), 2359 case V > get(counter_size_max) of 2360 true -> 2361 put(counter_size_max, V); 2362 false -> 2363 ok 2364 end; 2365 false -> 2366 ok 2367 end; 2368 _ -> 2369 ok 2370 end, 2371 st__set_size(C1, S). 2372 2373get_size_value(S) -> 2374 counter__value(st__get_size(S)). 2375 2376get_size_limit(S) -> 2377 counter__limit(st__get_size(S)). 2378 2379kill_id_anns([{'id',_} | As]) -> 2380 kill_id_anns(As); 2381kill_id_anns([A | As]) -> 2382 [A | kill_id_anns(As)]; 2383kill_id_anns([]) -> 2384 []. 2385 2386 2387%% ===================================================================== 2388%% General utilities 2389 2390max(X, Y) when X > Y -> X; 2391max(_, Y) -> Y. 2392 2393%% The atom `ok', is widely used in Erlang for "void" values. 2394 2395void() -> abstract(ok). 2396 2397is_simple(E) -> 2398 case type(E) of 2399 literal -> true; 2400 var -> true; 2401 'fun' -> true; 2402 _ -> false 2403 end. 2404 2405get_components(N, E) -> 2406 case type(E) of 2407 values -> 2408 Es = values_es(E), 2409 if length(Es) == N -> 2410 {true, Es}; 2411 true -> 2412 false 2413 end; 2414 _ when N == 1 -> 2415 {true, [E]}; 2416 _ -> 2417 false 2418 end. 2419 2420all_static([E | Es]) -> 2421 case is_literal(result(E)) of 2422 true -> 2423 all_static(Es); 2424 false -> 2425 false 2426 end; 2427all_static([]) -> 2428 true. 2429 2430set_clause_bodies([C | Cs], B) -> 2431 [update_c_clause(C, clause_pats(C), clause_guard(C), B) 2432 | set_clause_bodies(Cs, B)]; 2433set_clause_bodies([], _) -> 2434 []. 2435 2436filename([C | T]) when integer(C), C > 0, C =< 255 -> 2437 [C | filename(T)]; 2438filename([H|T]) -> 2439 filename(H) ++ filename(T); 2440filename([]) -> 2441 []; 2442filename(N) when atom(N) -> 2443 atom_to_list(N); 2444filename(N) -> 2445 report_error("bad filename: `~P'.", [N, 25]), 2446 exit(error). 2447 2448 2449%% ===================================================================== 2450%% Abstract datatype: renaming() 2451 2452ren__identity() -> 2453 dict:new(). 2454 2455ren__add(X, Y, Ren) -> 2456 dict:store(X, Y, Ren). 2457 2458ren__map(X, Ren) -> 2459 case dict:find(X, Ren) of 2460 {ok, Y} -> 2461 Y; 2462 error -> 2463 X 2464 end. 2465 2466ren__add_identity(X, Ren) -> 2467 dict:erase(X, Ren). 2468 2469 2470%% ===================================================================== 2471%% Abstract datatype: environment() 2472 2473env__empty() -> 2474 rec_env:empty(). 2475 2476env__bind(Key, Val, Env) -> 2477 rec_env:bind(Key, Val, Env). 2478 2479%% `Es' should have type `[{Key, Val}]', and `Fun' should have type 2480%% `(Val, Env) -> T', mapping a value together with the recursive 2481%% environment itself to some term `T' to be returned when the entry is 2482%% looked up. 2483 2484env__bind_recursive(Ks, Vs, F, Env) -> 2485 rec_env:bind_recursive(Ks, Vs, F, Env). 2486 2487env__lookup(Key, Env) -> 2488 rec_env:lookup(Key, Env). 2489 2490env__get(Key, Env) -> 2491 rec_env:get(Key, Env). 2492 2493env__is_defined(Key, Env) -> 2494 rec_env:is_defined(Key, Env). 2495 2496env__new_vname(Env) -> 2497 rec_env:new_key(Env). 2498 2499env__new_fname(A, N, Env) -> 2500 rec_env:new_key(fun (X) -> 2501 S = integer_to_list(X), 2502 {list_to_atom(atom_to_list(A) ++ "_" ++ S), 2503 N} 2504 end, Env). 2505 2506 2507%% ===================================================================== 2508%% Abstract datatype: state() 2509 2510-record(state, {free, % next free location 2511 size, % size counter 2512 effort, % effort counter 2513 cache, % operand expression cache 2514 var_flags, % flags for variables (#ref-structures) 2515 opnd_flags, % flags for operands 2516 app_flags}). % flags for #app-structures 2517 2518%% Note that we do not have a `var_assigned' flag, since there is no 2519%% destructive assignment in Erlang. In the original algorithm, the 2520%% "residual-referenced"-flags of the previous inlining pass (or 2521%% initialization pass) are used as the "source-referenced"-flags for 2522%% the subsequent pass. The latter may then be used as a safe 2523%% approximation whenever we need to base a decision on whether or not a 2524%% particular variable or function variable could be referenced in the 2525%% program being generated, and computation of the new 2526%% "residual-referenced" flag for that variable is not yet finished. In 2527%% the present algorithm, this can only happen in the presence of 2528%% variable assignments, which do not exist in Erlang. Therefore, we do 2529%% not keep "source-referenced" flags for residual-code references in 2530%% our implementation. 2531%% 2532%% The "inner-pending" flag tells us whether we are already in the 2533%% process of visiting a particular operand, and the "outer-pending" 2534%% flag whether we are in the process of inlining a propagated 2535%% functional value. The "pending flags" are really counters limiting 2536%% the number of times an operand may be inlined recursively, causing 2537%% loop unrolling; however, unrolling more than one iteration does not 2538%% work offhand in the present implementation. (TODO: find out why.) 2539%% Note that the initial value must be greater than zero in order for 2540%% any inlining at all to be done. 2541 2542%% Flags are stored in ETS-tables, one table for each class. The second 2543%% element in each stored tuple is the key (the "label"). 2544 2545-record(var_flags, {lab, referenced = false}). 2546-record(opnd_flags, {lab, inner_pending = 1, outer_pending = 1, 2547 effect = false}). 2548-record(app_flags, {lab, inlined = false}). 2549 2550st__new(Effort, Size) -> 2551 #state{free = 0, 2552 size = counter__new_passive(Size), 2553 effort = counter__new_passive(Effort), 2554 cache = dict:new(), 2555 var_flags = ets:new(var, [set, private, {keypos, 2}]), 2556 opnd_flags = ets:new(opnd, [set, private, {keypos, 2}]), 2557 app_flags = ets:new(app, [set, private, {keypos, 2}])}. 2558 2559st__new_loc(S) -> 2560 N = S#state.free, 2561 {N, S#state{free = N + 1}}. 2562 2563st__get_effort(S) -> 2564 S#state.effort. 2565 2566st__set_effort(C, S) -> 2567 S#state{effort = C}. 2568 2569st__get_size(S) -> 2570 S#state.size. 2571 2572st__set_size(C, S) -> 2573 S#state{size = C}. 2574 2575st__set_var_referenced(L, S) -> 2576 T = S#state.var_flags, 2577 [F] = ets:lookup(T, L), 2578 ets:insert(T, F#var_flags{referenced = true}), 2579 S. 2580 2581st__get_var_referenced(L, S) -> 2582 ets:lookup_element(S#state.var_flags, L, #var_flags.referenced). 2583 2584st__lookup_opnd_cache(L, S) -> 2585 dict:find(L, S#state.cache). 2586 2587%% Note that setting the cache should only be done once. 2588 2589st__set_opnd_cache(L, C, S) -> 2590 S#state{cache = dict:store(L, C, S#state.cache)}. 2591 2592st__set_opnd_effect(L, S) -> 2593 T = S#state.opnd_flags, 2594 [F] = ets:lookup(T, L), 2595 ets:insert(T, F#opnd_flags{effect = true}), 2596 S. 2597 2598st__get_opnd_effect(L, S) -> 2599 ets:lookup_element(S#state.opnd_flags, L, #opnd_flags.effect). 2600 2601st__set_app_inlined(L, S) -> 2602 T = S#state.app_flags, 2603 [F] = ets:lookup(T, L), 2604 ets:insert(T, F#app_flags{inlined = true}), 2605 S. 2606 2607st__clear_app_inlined(L, S) -> 2608 T = S#state.app_flags, 2609 [F] = ets:lookup(T, L), 2610 ets:insert(T, F#app_flags{inlined = false}), 2611 S. 2612 2613st__get_app_inlined(L, S) -> 2614 ets:lookup_element(S#state.app_flags, L, #app_flags.inlined). 2615 2616%% The pending-flags are initialized by `st__new_opnd_loc' below. 2617 2618st__test_inner_pending(L, S) -> 2619 T = S#state.opnd_flags, 2620 P = ets:lookup_element(T, L, #opnd_flags.inner_pending), 2621 P =< 0. 2622 2623st__mark_inner_pending(L, S) -> 2624 ets:update_counter(S#state.opnd_flags, L, 2625 {#opnd_flags.inner_pending, -1}), 2626 S. 2627 2628st__clear_inner_pending(L, S) -> 2629 ets:update_counter(S#state.opnd_flags, L, 2630 {#opnd_flags.inner_pending, 1}), 2631 S. 2632 2633st__test_outer_pending(L, S) -> 2634 T = S#state.opnd_flags, 2635 P = ets:lookup_element(T, L, #opnd_flags.outer_pending), 2636 P =< 0. 2637 2638st__mark_outer_pending(L, S) -> 2639 ets:update_counter(S#state.opnd_flags, L, 2640 {#opnd_flags.outer_pending, -1}), 2641 S. 2642 2643st__clear_outer_pending(L, S) -> 2644 ets:update_counter(S#state.opnd_flags, L, 2645 {#opnd_flags.outer_pending, 1}), 2646 S. 2647 2648st__new_app_loc(S) -> 2649 V = {L, _S1} = st__new_loc(S), 2650 ets:insert(S#state.app_flags, #app_flags{lab = L}), 2651 V. 2652 2653st__new_ref_loc(S) -> 2654 V = {L, _S1} = st__new_loc(S), 2655 ets:insert(S#state.var_flags, #var_flags{lab = L}), 2656 V. 2657 2658st__new_opnd_loc(S) -> 2659 V = {L, _S1} = st__new_loc(S), 2660 ets:insert(S#state.opnd_flags, #opnd_flags{lab = L}), 2661 V. 2662 2663 2664%% ===================================================================== 2665%% Abstract datatype: counter() 2666%% 2667%% `counter__add' throws `{counter_exceeded, Type, Data}' if the 2668%% resulting counter value would exceed the limit for the counter in 2669%% question (`Type' and `Data' are given by the user). 2670 2671-record(counter, {active, value, limit}). 2672 2673counter__new_passive(Limit) when Limit > 0 -> 2674 {0, Limit}. 2675 2676counter__new_active(Limit) when Limit > 0 -> 2677 {Limit, Limit}. 2678 2679%% Active counters have values > 0 internally; passive counters start at 2680%% zero. The 'limit' field is only accessed by the 'counter__limit' 2681%% function. 2682 2683counter__is_active({C, _}) -> 2684 C > 0. 2685 2686counter__limit({_, L}) -> 2687 L. 2688 2689counter__value({N, L}) -> 2690 if N > 0 -> 2691 L - N; 2692 true -> 2693 -N 2694 end. 2695 2696counter__add(N, {V, L}, Type, Data) -> 2697 N1 = V - N, 2698 if V > 0, N1 =< 0 -> 2699 case debug_counters() of 2700 true -> 2701 case Type of 2702 effort -> 2703 put(counter_effort_triggers, 2704 get(counter_effort_triggers) + 1); 2705 size -> 2706 put(counter_size_triggers, 2707 get(counter_size_triggers) + 1) 2708 end; 2709 _ -> 2710 ok 2711 end, 2712 throw({counter_exceeded, Type, Data}); 2713 true -> 2714 {N1, L} 2715 end. 2716 2717 2718%% ===================================================================== 2719%% Reporting 2720 2721% report_internal_error(S) -> 2722% report_internal_error(S, []). 2723 2724report_internal_error(S, Vs) -> 2725 report_error("internal error: " ++ S, Vs). 2726 2727report_error(D) -> 2728 report_error(D, []). 2729 2730report_error({F, L, D}, Vs) -> 2731 report({F, L, {error, D}}, Vs); 2732report_error(D, Vs) -> 2733 report({error, D}, Vs). 2734 2735report_warning(D) -> 2736 report_warning(D, []). 2737 2738report_warning({F, L, D}, Vs) -> 2739 report({F, L, {warning, D}}, Vs); 2740report_warning(D, Vs) -> 2741 report({warning, D}, Vs). 2742 2743report(D, Vs) -> 2744 io:put_chars(format(D, Vs)). 2745 2746format({error, D}, Vs) -> 2747 ["error: ", format(D, Vs)]; 2748format({warning, D}, Vs) -> 2749 ["warning: ", format(D, Vs)]; 2750format({"", L, D}, Vs) when integer(L), L > 0 -> 2751 [io_lib:fwrite("~w: ", [L]), format(D, Vs)]; 2752format({"", _L, D}, Vs) -> 2753 format(D, Vs); 2754format({F, L, D}, Vs) when integer(L), L > 0 -> 2755 [io_lib:fwrite("~s:~w: ", [filename(F), L]), format(D, Vs)]; 2756format({F, _L, D}, Vs) -> 2757 [io_lib:fwrite("~s: ", [filename(F)]), format(D, Vs)]; 2758format(S, Vs) when list(S) -> 2759 [io_lib:fwrite(S, Vs), $\n]. 2760 2761 2762%% ===================================================================== 2763