1%% ``Licensed under the Apache License, Version 2.0 (the "License");
2%% you may not use this file except in compliance with the License.
3%% You may obtain a copy of the License at
4%%
5%%     http://www.apache.org/licenses/LICENSE-2.0
6%%
7%% Unless required by applicable law or agreed to in writing, software
8%% distributed under the License is distributed on an "AS IS" BASIS,
9%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10%% See the License for the specific language governing permissions and
11%% limitations under the License.
12%%
13%% The Initial Developer of the Original Code is Richard Carlsson.
14%% Copyright (C) 1999-2002 Richard Carlsson.
15%% Portions created by Ericsson are Copyright 2001, Ericsson Utvecklings
16%% AB. All Rights Reserved.''
17%%
18%%     $Id: cerl_inline.erl,v 1.1 2008/12/17 09:53:41 mikpe Exp $
19%%
20%% Core Erlang inliner.
21%% =====================================================================
22%%
23%% This is an implementation of the algorithm by Waddell and Dybvig
24%% ("Fast and Effective Procedure Inlining", International Static
25%% Analysis Symposium 1997), adapted to the Core Erlang language.
26%%
27%% Instead of always renaming variables and function variables, this
28%% implementation uses the "no-shadowing strategy" of Peyton Jones and
29%% Marlow ("Secrets of the Glasgow Haskell Compiler Inliner", 1999).
30%%
31%% =====================================================================
32
33%% TODO: inline single-source-reference operands without size limit.
34
35-module(cerl_inline).
36
37-export([core_transform/2, transform/1, transform/2]).
38
39-import(cerl, [abstract/1, alias_pat/1, alias_var/1, apply_args/1,
40	       apply_op/1, atom_name/1, atom_val/1, bitstr_val/1,
41	       bitstr_size/1, bitstr_unit/1, bitstr_type/1,
42	       bitstr_flags/1, binary_segments/1, update_c_alias/3,
43	       update_c_apply/3, update_c_binary/2, update_c_bitstr/6,
44	       update_c_call/4, update_c_case/3, update_c_catch/2,
45	       update_c_clause/4, c_fun/2, c_int/1, c_let/3,
46	       update_c_let/4, update_c_letrec/3, update_c_module/5,
47	       update_c_primop/3, update_c_receive/4, update_c_seq/3,
48	       c_seq/2, update_c_try/6, c_tuple/1, update_c_values/2,
49	       c_values/1, c_var/1, call_args/1, call_module/1,
50	       call_name/1, case_arity/1, case_arg/1, case_clauses/1,
51	       catch_body/1, clause_body/1, clause_guard/1,
52	       clause_pats/1, clause_vars/1, concrete/1, cons_hd/1,
53	       cons_tl/1, data_arity/1, data_es/1, data_type/1,
54	       fun_body/1, fun_vars/1, get_ann/1, int_val/1,
55	       is_c_atom/1, is_c_cons/1, is_c_fun/1, is_c_int/1,
56	       is_c_list/1, is_c_seq/1, is_c_tuple/1, is_c_var/1,
57	       is_data/1, is_literal/1, is_literal_term/1, let_arg/1,
58	       let_body/1, let_vars/1, letrec_body/1, letrec_defs/1,
59	       list_length/1, list_elements/1, update_data/3,
60	       make_list/1, make_data_skel/2, module_attrs/1,
61	       module_defs/1, module_exports/1, module_name/1,
62	       primop_args/1, primop_name/1, receive_action/1,
63	       receive_clauses/1, receive_timeout/1, seq_arg/1,
64	       seq_body/1, set_ann/2, try_arg/1, try_body/1, try_vars/1,
65	       try_evars/1, try_handler/1, tuple_es/1, tuple_arity/1,
66	       type/1, values_es/1, var_name/1]).
67
68-import(lists, [foldl/3, foldr/3, mapfoldl/3, reverse/1]).
69
70%%
71%% Constants
72%%
73
74debug_runtime() -> false.
75debug_counters() -> false.
76
77%% Normal execution times for inlining are between 0.1 and 0.3 seconds
78%% (on the author's current equipment). The default effort limit of 150
79%% is high enough that most normal programs never hit the limit even
80%% once, and for difficult programs, it generally keeps the execution
81%% times below 2-5 seconds. Using an effort counter of 1000 will thus
82%% have no further effect on most programs, but some programs may take
83%% as much as 10 seconds or more. Effort counts larger than 2500 have
84%% never been observed even on very ill-conditioned programs.
85%%
86%% Size limits between 6 and 18 tend to actually shrink the code,
87%% because of the simplifications made possible by inlining. A limit of
88%% 16 seems to be optimal for this purpose, often shrinking the
89%% executable code by up to 10%. Size limits between 18 and 30 generally
90%% give the same code size as if no inlining was done (i.e., code
91%% duplication balances out the simplifications at these levels). A size
92%% limit between 1 and 5 tends to inline small functions and propagate
93%% constants, but does not cause much simplifications do be done, so the
94%% net effect will be a slight increase in code size. For size limits
95%% above 30, the executable code size tends to increase with about 10%
96%% per 100 units, with some variations depending on the sizes of
97%% functions in the source code.
98%%
99%% Typically, about 90% of the maximum speedup achievable is already
100%% reached using a size limit of 30, and 98% is reached at limits around
101%% 100-150; there is rarely any point in letting the code size increase
102%% by more than 10-15%. If too large functions are inlined, cache
103%% effects will slow the program down.
104
105default_effort() -> 150.
106default_size() -> 24.
107
108%% Base costs/weights for different kinds of expressions. If these are
109%% modified, the size limits above may have to be adjusted.
110
111weight(var) -> 0;	% We count no cost for variable accesses.
112weight(values) -> 0;	% Value aggregates have no cost in themselves.
113weight(literal) -> 1;	% We assume efficient handling of constants.
114weight(data) -> 1;	% Base cost; add 1 per element.
115weight(element) -> 1;   % Cost of storing/fetching an element.
116weight(argument) -> 1;  % Cost of passing a function argument.
117weight('fun') -> 6;	% Base cost + average number of free vars.
118weight('let') -> 0;	% Count no cost for let-bindings.
119weight(letrec) -> 0;    % Like a let-binding.
120weight('case') -> 0;	% Case switches have no base cost.
121weight(clause) -> 1;    % Count one jump at the end of each clause body.
122weight('receive') -> 9;	% Initialization/cleanup cost.
123weight('try') -> 1;	% Assume efficient implementation.
124weight('catch') -> 1;	% See `try'.
125weight(apply) -> 3;     % Average base cost: call/return.
126weight(call) -> 3;      % Assume remote-calls as efficient as `apply'.
127weight(primop) -> 2;    % Assume more efficient than `apply'.
128weight(binary) -> 4;    % Initialisation base cost.
129weight(bitstr) -> 3;   % Coding/decoding a value; like a primop.
130weight(module) -> 1.    % Like a letrec with a constant body
131
132%% These "reference" structures are used for variables and function
133%% variables. They keep track of the variable name, any bound operand,
134%% and the associated store location.
135
136-record(ref, {name, opnd, loc}).
137
138%% Operand structures contain the operand expression, the renaming and
139%% environment, the state location, and the effort counter at the call
140%% site (cf. `visit').
141
142-record(opnd, {expr, ren, env, loc, effort}).
143
144%% Since expressions are only visited in `effect' context when they are
145%% not bound to a referenced variable, only expressions visited in
146%% 'value' context are cached.
147
148-record(cache, {expr, size}).
149
150%% The context flags for an application structure are kept separate from
151%% the structure itself. Note that the original algorithm had exactly
152%% one operand in each application context structure, while we can have
153%% several, or none.
154
155-record(app, {opnds, ctxt, loc}).
156
157
158%%
159%% Interface functions
160%%
161
162%% Use compile option `{core_transform, inline}' to insert this as a
163%% compilation pass.
164
165core_transform(Code, Opts) ->
166    cerl:to_records(transform(cerl:from_records(Code), Opts)).
167
168transform(Tree) ->
169    transform(Tree, []).
170
171transform(Tree, Opts) ->
172    main(Tree, value, Opts).
173
174main(Tree, Ctxt, Opts) ->
175    %% We spawn a new process to do the work, so we don't have to worry
176    %% about cluttering the process dictionary with debugging info, or
177    %% proper deallocation of ets-tables.
178    Opts1 = Opts ++ [{inline_size, default_size()},
179		     {inline_effort, default_effort()}],
180    Reply = self(),
181    Pid = spawn_link(fun () -> start(Reply, Tree, Ctxt, Opts1) end),
182    receive
183        {Pid1, Tree1} when Pid1 == Pid ->
184            Tree1
185    end.
186
187start(Reply, Tree, Ctxt, Opts) ->
188    init_debug(),
189    case debug_runtime() of
190        true ->
191            put(inline_start_time,
192                element(1, erlang:statistics(runtime)));
193        _ ->
194            ok
195    end,
196    Size = max(1, proplists:get_value(inline_size, Opts)),
197    Effort = max(1, proplists:get_value(inline_effort, Opts)),
198    case proplists:get_bool(verbose, Opts) of
199	true ->
200	    io:fwrite("Inlining: inline_size=~w inline_effort=~w\n",
201		      [Size, Effort]);
202	false ->
203	    ok
204    end,
205
206    %% Note that the counters of the new state are passive.
207    S = st__new(Effort, Size),
208
209%%% Initialization is not needed at present. Note that the code in
210%%% `inline_init' is not up-to-date with this module.
211%%%     {Tree1, S1} = inline_init:init(Tree, S),
212%%%     {Tree2, _S2} = i(Tree1, Ctxt, S1),
213    {Tree2, _S2} = i(Tree, Ctxt, S),
214    report_debug(),
215    Reply ! {self(), Tree2}.
216
217init_debug() ->
218    case debug_counters() of
219        true ->
220            put(counter_effort_triggers, 0),
221            put(counter_effort_max, 0),
222            put(counter_size_triggers, 0),
223            put(counter_size_max, 0);
224        _ ->
225            ok
226    end.
227
228report_debug() ->
229    case debug_runtime() of
230        true ->
231            {Time, _} = erlang:statistics(runtime),
232            report("Total run time for inlining: ~.2.0f s.\n",
233		   [(Time - get(inline_start_time))/1000]);
234        _ ->
235            ok
236    end,
237    case debug_counters() of
238        true ->
239            counter_stats();
240        _ ->
241            ok
242    end.
243
244counter_stats() ->
245    T1 = get(counter_effort_triggers),
246    T2 = get(counter_size_triggers),
247    E = get(counter_effort_max),
248    S = get(counter_size_max),
249    M1 = io_lib:fwrite("\tNumber of triggered "
250                       "effort counters: ~p.\n", [T1]),
251    M2 = io_lib:fwrite("\tNumber of triggered "
252                       "size counters: ~p.\n", [T2]),
253    M3 = io_lib:fwrite("\tLargest active effort counter: ~p.\n",
254                       [E]),
255    M4 = io_lib:fwrite("\tLargest active size counter: ~p.\n",
256                       [S]),
257    report("Counter statistics:\n~s", [[M1, M2, M3, M4]]).
258
259
260%% =====================================================================
261%% The main inlining function
262%%
263%% i(E :: coreErlang(),
264%%   Ctxt :: value | effect | #app{}
265%%   Ren :: renaming(),
266%%   Env :: environment(),
267%%   S :: state())
268%%   -> {E', S'}
269%%
270%% Note: It is expected that the input source code ('E') does not
271%% contain free variables. If it does, there is a risk of accidental
272%% name capture, in case a generated "new" variable name happens to be
273%% the same as the name of a variable that is free further below in the
274%% tree; the algorithm only consults the current environment to check if
275%% a name already exists.
276%%
277%% The renaming maps names of source-code variable and function
278%% variables to new names as necessary to avoid clashes, according to
279%% the "no-shadowing" strategy. The environment maps *residual-code*
280%% variables and function variables to operands and global information.
281%% Separating the renaming from the environment, and using the
282%% residual-code variables instead of the source-code variables as its
283%% domain, improves the behaviour of the algorithm when code needs to be
284%% traversed more than once.
285%%
286%% Note that there is no such thing as a `test' context for expressions
287%% in (Core) Erlang (see `i_case' below for details).
288
289i(E, Ctxt, S) ->
290    i(E, Ctxt, ren__identity(), env__empty(), S).
291
292i(E, Ctxt, Ren, Env, S0) ->
293    %% Count one unit of effort on each pass.
294    S = count_effort(1, S0),
295    case is_data(E) of
296        true ->
297            i_data(E, Ctxt, Ren, Env, S);
298        false ->
299            case type(E) of
300                var ->
301                    i_var(E, Ctxt, Ren, Env, S);
302                values ->
303                    i_values(E, Ctxt, Ren, Env, S);
304                'fun' ->
305                    i_fun(E, Ctxt, Ren, Env, S);
306                seq ->
307                    i_seq(E, Ctxt, Ren, Env, S);
308                'let' ->
309                    i_let(E, Ctxt, Ren, Env, S);
310                letrec ->
311                    i_letrec(E, Ctxt, Ren, Env, S);
312                'case' ->
313                    i_case(E, Ctxt, Ren, Env, S);
314                'receive' ->
315                    i_receive(E, Ctxt, Ren, Env, S);
316                apply ->
317                    i_apply(E, Ctxt, Ren, Env, S);
318                call ->
319                    i_call(E, Ctxt, Ren, Env, S);
320                primop ->
321                    i_primop(E, Ren, Env, S);
322                'try' ->
323                    i_try(E, Ctxt, Ren, Env, S);
324                'catch' ->
325                    i_catch(E, Ctxt, Ren, Env, S);
326		binary ->
327		    i_binary(E, Ren, Env, S);
328                module ->
329                    i_module(E, Ctxt, Ren, Env, S)
330            end
331    end.
332
333i_data(E, Ctxt, Ren, Env, S) ->
334    case is_literal(E) of
335        true ->
336            %% This is the `(const c)' case of the original algorithm:
337            %% literal terms which (regardless of size) do not need to
338            %% be constructed dynamically at runtime - boldly assuming
339            %% that the compiler/runtime system can handle this.
340            case Ctxt of
341                effect ->
342                    %% Reduce useless constants to a simple value.
343                    {void(), count_size(weight(literal), S)};
344                _ ->
345                    %% (In Erlang, we cannot set all non-`false'
346                    %% constants to `true' in a `test' context, like we
347                    %% could do in Lisp or C, so the above is the only
348                    %% special case to be handled here.)
349                    {E, count_size(weight(literal), S)}
350            end;
351        false ->
352            %% Data constructors are like to calls to safe built-in
353            %% functions, for which we can "decide to inline"
354            %% immediately; there is no need to create operand
355            %% structures. In `effect' context, we can simply make a
356            %% sequence of the argument expressions, also visited in
357            %% `effect' context. In all other cases, the arguments are
358            %% visited for value.
359            case Ctxt of
360                effect ->
361                    %% Note that this will count the sizes of the
362                    %% subexpressions, even though some or all of them
363                    %% might be discarded by the sequencing afterwards.
364                    {Es1, S1} = mapfoldl(fun (E, S) ->
365						 i(E, effect, Ren, Env,
366						   S)
367					 end,
368					 S, data_es(E)),
369                    E1 = foldl(fun (E1, E2) -> make_seq(E1, E2) end,
370			       void(), Es1),
371                    {E1, S1};
372                _ ->
373                    {Es1, S1} = mapfoldl(fun (E, S) ->
374						 i(E, value, Ren, Env,
375						   S)
376					 end,
377					 S, data_es(E)),
378                    %% The total size/cost is the base cost for a data
379                    %% constructor plus the cost for storing each
380                    %% element.
381                    N = weight(data) + length(Es1) * weight(element),
382                    S2 = count_size(N, S1),
383                    {update_data(E, data_type(E), Es1), S2}
384            end
385    end.
386
387%% This is the `(ref x)' (variable use) case of the original algorithm.
388%% Note that binding occurrences are always handled in the respective
389%% cases of the binding constructs.
390
391i_var(E, Ctxt, Ren, Env, S) ->
392    case Ctxt of
393        effect ->
394            %% Reduce useless variable references to a simple constant.
395	    %% This also avoids useless visiting of bound operands.
396            {void(), count_size(weight(literal), S)};
397        _ ->
398	    Name = var_name(E),
399            case env__lookup(ren__map(Name, Ren), Env) of
400                {ok, R} ->
401                    case R#ref.opnd of
402                        undefined ->
403                            %% The variable is not associated with an
404                            %% argument expression; just residualize it.
405                            residualize_var(R, S);
406                        Opnd ->
407			    i_var_1(R, Opnd, Ctxt, Env, S)
408                    end;
409                error ->
410                    %% The variable is unbound. (It has not been
411                    %% accidentally captured, however, or it would have
412                    %% been in the environment.) We leave it as it is,
413                    %% without any warning.
414		    {E, count_size(weight(var), S)}
415            end
416    end.
417
418%% This first visits the bound operand and then does copy propagation.
419%% Note that we must first set the "inner-pending" flag, and clear the
420%% flag afterwards.
421
422i_var_1(R, Opnd, Ctxt, Env, S) ->
423    %% If the operand is already "inner-pending", it is residualised.
424    %% (In Lisp/C, if the variable might be assigned to, it should also
425    %% be residualised.)
426    L = Opnd#opnd.loc,
427    case st__test_inner_pending(L, S) of
428	true ->
429	    residualize_var(R, S);
430	false ->
431	    S1 = st__mark_inner_pending(L, S),
432	    case catch {ok, visit(Opnd, S1)} of
433		{ok, {E, S2}} ->
434		    %% Note that we pass the current environment and
435		    %% context to `copy', but not the current renaming.
436		    S3 = st__clear_inner_pending(L, S2),
437		    copy(R, Opnd, E, Ctxt, Env, S3);
438		{'EXIT', X} ->
439		    exit(X);
440		X ->
441		    %% If we use destructive update for the
442		    %% `inner-pending' flag, we must make sure to clear
443		    %% it also if we make a nonlocal return.
444		    st__clear_inner_pending(Opnd#opnd.loc, S1),
445		    throw(X)
446	    end
447    end.
448
449%% A multiple-value aggregate `<e1, ..., en>'. This is very much like a
450%% tuple data constructor `{e1, ..., en}'; cf. `i_data' for details.
451
452i_values(E, Ctxt, Ren, Env, S) ->
453    case values_es(E) of
454	[E1] ->
455	    %% Single-value aggregates can be dropped; they are simply
456	    %% notation.
457	    i(E1, Ctxt, Ren, Env, S);
458	Es ->
459	    %% In `effect' context, we can simply make a sequence of the
460	    %% argument expressions, also visited in `effect' context.
461	    %% In all other cases, the arguments are visited for value.
462	    case Ctxt of
463		effect ->
464		    {Es1, S1} =
465			mapfoldl(fun (E, S) ->
466					 i(E, effect, Ren, Env, S)
467				 end,
468				 S, Es),
469		    E1 = foldl(fun (E1, E2) ->
470				       make_seq(E1, E2)
471			       end,
472			       void(), Es1),
473		    {E1, S1};    % drop annotations on E
474		_ ->
475		    {Es1, S1} = mapfoldl(fun (E, S) ->
476						 i(E, value, Ren, Env,
477						   S)
478					 end,
479					 S, Es),
480		    %% Aggregating values does not write them to memory,
481		    %% so we count no extra cost per element.
482		    S2 = count_size(weight(values), S1),
483		    {update_c_values(E, Es1), S2}
484	    end
485    end.
486
487%% A let-expression `let <v1,...,vn> = e0 in e1' is semantically
488%% equivalent to a case-expression `case e0 of <v1,...,vn> when 'true'
489%% -> e1 end'. As a special case, `let <v> = e0 in e1' is also
490%% equivalent to `apply fun (v) -> e0 (e1)'. However, for efficiency,
491%% and in order to allow the handling of `case' clauses to introduce new
492%% let-expressions without entering an infinite rewrite loop, we handle
493%% these directly.
494
495%%% %% Rewriting a `let' to an equivalent expression.
496%%% i_let(E, Ctxt, Ren, Env, S) ->
497%%%     case let_vars(E) of
498%%% 	[V] ->
499%%%  	    E1 = update_c_apply(E, c_fun([V], let_body(E)), [let_arg(E)]),
500%%%  	    i(E1, Ctxt, Ren, Env, S);
501%%% 	Vs ->
502%%%  	    C = c_clause(Vs, abstract(true), let_body(E)),
503%%%  	    E1 = update_c_case(E, let_arg(E), [C]),
504%%%  	    i(E1, Ctxt, Ren, Env, S)
505%%%     end.
506
507i_let(E, Ctxt, Ren, Env, S) ->
508    case let_vars(E) of
509	[V] ->
510	    i_let_1(V, E, Ctxt, Ren, Env, S);
511	Vs ->
512	    %% Visit the argument expression in `value' context, to
513	    %% simplify it as far as possible.
514	    {A, S1} = i(let_arg(E), value, Ren, Env, S),
515	    case get_components(length(Vs), result(A)) of
516		{true, As} ->
517		    %% Note that only the components of the result of
518		    %% `A' are passed on; any effects are hoisted.
519		    {E1, S2} = i_let_2(Vs, As, E, Ctxt, Ren, Env, S1),
520		    {hoist_effects(A, E1), S2};
521		false ->
522		    %% We cannot do anything with this `let', since the
523		    %% variables cannot be matched against the argument
524		    %% components. Just visit the variables for renaming
525		    %% and visit the body for value (cf. `i_fun').
526		    {_, Ren1, Env1, S2} = bind_locals(Vs, Ren, Env, S1),
527		    Vs1 = i_params(Vs, Ren1, Env1),
528		    %% The body is always visited for value here.
529		    {B, S3} = i(let_body(E), value, Ren1, Env1, S2),
530		    S4 = count_size(weight('let'), S3),
531		    {update_c_let(E, Vs1, A, B), S4}
532	    end
533    end.
534
535%% Single-variable `let' binding.
536
537i_let_1(V, E, Ctxt, Ren, Env, S) ->
538    %% Make an operand structure for the argument expression, create a
539    %% local binding from the parameter to the operand structure, and
540    %% visit the body. Finally create necessary bindings and/or set
541    %% flags.
542    {Opnd, S1} = make_opnd(let_arg(E), Ren, Env, S),
543    {[R], Ren1, Env1, S2} = bind_locals([V], [Opnd], Ren, Env, S1),
544    {E1, S3} = i(let_body(E), Ctxt, Ren1, Env1, S2),
545    i_let_3([R], [Opnd], E1, S3).
546
547%% Multi-variable `let' binding.
548
549i_let_2(Vs, As, E, Ctxt, Ren, Env, S) ->
550    %% Make operand structures for the argument components. Note that
551    %% since the argument has already been visited at this point, we use
552    %% the identity renaming for the operands.
553    {Opnds, S1} = mapfoldl(fun (E, S) ->
554                                   make_opnd(E, ren__identity(), Env, S)
555                           end,
556                           S, As),
557    %% Create local bindings from the parameters to their respective
558    %% operand structures, and visit the body.
559    {Rs, Ren1, Env1, S2} = bind_locals(Vs, Opnds, Ren, Env, S1),
560    {E1, S3} = i(let_body(E), Ctxt, Ren1, Env1, S2),
561    i_let_3(Rs, Opnds, E1, S3).
562
563i_let_3(Rs, Opnds, E, S) ->
564    %% Create necessary bindings and/or set flags.
565    {E1, S1} = make_let_bindings(Rs, E, S),
566
567    %% We must also create evaluation for effect, for any unused
568    %% operands, as after an application expression.
569    residualize_operands(Opnds, E1, S1).
570
571%% A sequence `do e1 e2', written `(seq e1 e2)' in the original
572%% algorithm, where `e1' is evaluated for effect only (since its value
573%% is not used), and `e2' yields the final value. Note that we use
574%% `make_seq' to recompose the sequence after visiting the parts.
575
576i_seq(E, Ctxt, Ren, Env, S) ->
577    {E1, S1} = i(seq_arg(E), effect, Ren, Env, S),
578    {E2, S2} = i(seq_body(E), Ctxt, Ren, Env, S1),
579    %% A sequence has no cost in itself.
580    {make_seq(E1, E2), S2}.
581
582
583%% The `case' switch of Core Erlang is rather different from the boolean
584%% `(if e1 e2 e3)' case of the original algorithm, but the central idea
585%% is the same: if, given the simplified switch expression (which is
586%% visited in `value' context - a boolean `test' context would not be
587%% generally useful), there is a clause which could definitely be
588%% selected, such that no clause before it can possibly be selected,
589%% then we can eliminate all other clauses. (And even if this is not the
590%% case, some clauses can often be eliminated.) Furthermore, if a clause
591%% can be selected, we can replace the case-expression (including the
592%% switch expression) with the body of the clause and a set of zero or
593%% more let-bindings of subexpressions of the switch expression. (In the
594%% simplest case, the switch expression is evaluated only for effect.)
595
596i_case(E, Ctxt, Ren, Env, S) ->
597    %% First visit the switch expression in `value' context, to simplify
598    %% it as far as possible. Note that only the result part is passed
599    %% on to the clause matching below; any effects are hoisted.
600    {A, S1} = i(case_arg(E), value, Ren, Env, S),
601    A1 = result(A),
602
603    %% Propagating an application context into the branches could cause
604    %% the arguments of the application to be evaluated *after* the
605    %% switch expression, but *before* the body of the selected clause.
606    %% Such interleaving is not allowed in general, and it does not seem
607    %% worthwile to make a more powerful transformation here. Therefore,
608    %% the clause bodies are conservatively visited for value if the
609    %% context is `application'.
610    Ctxt1 = safe_context(Ctxt),
611    {E1, S2} = case get_components(case_arity(E), A1) of
612		   {true, As} ->
613		       i_case_1(As, E, Ctxt1, Ren, Env, S1);
614		   false ->
615		       i_case_1([], E, Ctxt1, Ren, Env, S1)
616	       end,
617    {hoist_effects(A, E1), S2}.
618
619i_case_1(As, E, Ctxt, Ren, Env, S) ->
620    case i_clauses(As, case_clauses(E), Ctxt, Ren, Env, S) of
621        {false, {As1, Vs, Env1, Cs}, S1} ->
622            %% We still have a list of clauses. Sanity check:
623            if Cs == [] ->
624                    report_warning("empty list of clauses "
625				   "in residual program!.\n");
626               true ->
627                    ok
628            end,
629	    {A, S2} = i(c_values(As1), value, ren__identity(), Env1,
630			S1),
631	    {E1, S3} = i_case_2(Cs, A, E, S2),
632	    i_case_3(Vs, Env1, E1, S3);
633        {true, {_, Vs, Env1, [C]}, S1} ->
634            %% A single clause was selected; we just take the body.
635	    i_case_3(Vs, Env1, clause_body(C), S1)
636    end.
637
638%% Check if all clause bodies are actually equivalent expressions that
639%% do not depent on pattern variables (this sometimes occurs as a
640%% consequence of inlining, e.g., all branches might yield 'true'), and
641%% if so, replace the `case' with a sequence, first evaluating the
642%% clause selection for effect, then evaluating one of the clause bodies
643%% for its value. (Unless the switch contains a catch-all clause, the
644%% clause selection must be evaluated for effect, since there is no
645%% guarantee that any of the clauses will actually match. Assuming that
646%% some clause always matches could make an undefined program produce a
647%% value.) This makes the final size less than what was accounted for
648%% when visiting the clauses, but currently we don't try to adjust for
649%% this.
650
651i_case_2(Cs, A, E, S) ->
652    case equivalent_clauses(Cs) of
653	false ->
654	    %% Count the base sizes for the remaining clauses; pattern
655	    %% and guard sizes are already counted.
656	    N = weight('case') + weight(clause) * length(Cs),
657	    S1 = count_size(N, S),
658	    {update_c_case(E, A, Cs), S1};
659	true ->
660	    case cerl_clauses:any_catchall(Cs) of
661		true ->
662		    %% We know that some clause must be selected, so we
663		    %% can drop all the testing as well.
664		    E1 = make_seq(A, clause_body(hd(Cs))),
665		    {E1, S};
666		false ->
667		    %% The clause selection must be performed for
668		    %% effect.
669		    E1 = update_c_case(E, A,
670				       set_clause_bodies(Cs, void())),
671		    {make_seq(E1, clause_body(hd(Cs))), S}
672	    end
673    end.
674
675i_case_3(Vs, Env, E, S) ->
676    %% For the variables bound to the switch expression subexpressions,
677    %% make let bindings or create evaluation for effect.
678    Rs = [env__get(var_name(V), Env) || V <- Vs],
679    {E1, S1} = make_let_bindings(Rs, E, S),
680    Opnds = [R#ref.opnd || R <- Rs],
681    residualize_operands(Opnds, E1, S1).
682
683%% This function takes a sequence of switch expressions `Es' (which can
684%% be the empty list if these are unknown) and a list `Cs' of clauses,
685%% and returns `{Match, {As, Vs, Env1, Cs1}, S1}' where `As' is a list
686%% of residual switch expressions, `Vs' the list of variables used in
687%% the templates, `Env1' the environment for the templates, and `Cs1'
688%% the list of residual clauses. `Match' is `true' if some clause could
689%% be shown to definitely match (in this case, `Cs1' contains exactly
690%% one element), and `false' otherwise. `S1' is the new state. The given
691%% `Ctxt' is the context to be used for visiting the body of clauses.
692%%
693%% Visiting a clause basically amounts to extending the environment for
694%% all variables in the pattern, as for a `fun' (cf. `i_fun'),
695%% propagating match information if possible, and visiting the guard and
696%% body in the new environment.
697%%
698%% To make it cheaper to do handle a set of clauses, and to avoid
699%% unnecessarily exceeding the size limit, we avoid visiting the bodies
700%% of clauses which are subsequently removed, by dividing the visiting
701%% of a clause into two stages: first construct the environment(s) and
702%% visit the pattern (for renaming) and the guard (for value), then
703%% reduce the switch as much as possible, and lastly visit the body.
704
705i_clauses(Cs, Ctxt, Ren, Env, S) ->
706    i_clauses([], Cs, Ctxt, Ren, Env, S).
707
708i_clauses(Es, Cs, Ctxt, Ren, Env, S) ->
709    %% Create templates for the switch expressions.
710    {Ts, {Vs, Env0}} = mapfoldl(fun (E, {Vs, Env}) ->
711					{T, Vs1, Env1} =
712					    make_template(E, Env),
713					{T, {Vs1 ++ Vs, Env1}}
714				end,
715				{[], Env}, Es),
716
717    %% Make operand structures for the switch subexpression templates
718    %% (found in `Env0') and add proper ref-structure bindings to the
719    %% environment. Since the subexpressions in general can be
720    %% interdependent (Vs is in reverse-dependency order), the
721    %% environment (and renaming) must be created incrementally. Note
722    %% that since the switch expressions have been visited already, the
723    %% identity renaming is used for the operands.
724    Vs1 = lists:reverse(Vs),
725    {Ren1, Env1, S1} =
726	foldl(fun (V, {Ren, Env, S}) ->
727		      E = env__get(var_name(V), Env0),
728		      {Opnd, S_1} = make_opnd(E, ren__identity(), Env,
729					      S),
730		      {_, Ren1, Env1, S_2} = bind_locals([V], [Opnd],
731							 Ren, Env, S_1),
732		      {Ren1, Env1, S_2}
733	      end,
734	      {Ren, Env, S}, Vs1),
735
736    %% First we visit the head of each individual clause, renaming
737    %% pattern variables, inserting let-bindings in the guard and body,
738    %% and visiting the guard. The information used for visiting the
739    %% clause body will be prefixed to the clause annotations.
740    {Cs1, S2} = mapfoldl(fun (C, S) ->
741				 i_clause_head(C, Ts, Ren1, Env1, S)
742			 end,
743			 S1, Cs),
744
745    %% Now that the clause guards have been reduced as far as possible,
746    %% we can attempt to reduce the clauses.
747    As = [hd(get_ann(T)) || T <- Ts],
748    case cerl_clauses:reduce(Cs1, Ts) of
749        {false, Cs2} ->
750            %% We still have one or more clauses (with associated
751            %% extended environments). Their bodies have not yet been
752            %% visited, so we do that (in the respective safe
753            %% environments, adding the sizes of the visited heads to
754            %% the current size counter) and return the final list of
755            %% clauses.
756            {Cs3, S3} = mapfoldl(
757                          fun (C, S) ->
758                                  i_clause_body(C, Ctxt, S)
759                          end,
760                          S2, Cs2),
761            {false, {As, Vs1, Env1, Cs3}, S3};
762        {true, {C, _}} ->
763            %% A clause C could be selected (the bindings have already
764            %% been added to the guard/body). Note that since the clause
765            %% head will probably be discarded, its size is not counted.
766	    {C1, Ren2, Env2, _} = get_clause_extras(C),
767	    {B, S3} = i(clause_body(C), Ctxt, Ren2, Env2, S2),
768	    C2 = update_c_clause(C1, clause_pats(C1), clause_guard(C1), B),
769	    {true, {As, Vs1, Env1, [C2]}, S3}
770    end.
771
772%% This visits the head of a clause, renames pattern variables, inserts
773%% let-bindings in the guard and body, and does inlining on the guard
774%% expression. Returns a list of pairs `{NewClause, Data}', where `Data'
775%% is `{Renaming, Environment, Size}' used for visiting the body of the
776%% new clause.
777
778i_clause_head(C, Ts, Ren, Env, S) ->
779    %% Match the templates against the (non-renamed) patterns to get the
780    %% available information about matching subexpressions. We don't
781    %% care at this point whether an exact match/nomatch is detected.
782    Ps = clause_pats(C),
783    Bs = case cerl_clauses:match_list(Ps, Ts) of
784	     {_, Bs1} -> Bs1;
785	     none -> []
786	 end,
787
788    %% The patterns must be visited for renaming; cf. `i_pattern'. We
789    %% use a passive size counter for visiting the patterns and the
790    %% guard (cf. `visit'), because we do not know at this stage whether
791    %% the clause will be kept or not; the final value of the counter is
792    %% included in the returned value below.
793    {_, Ren1, Env1, S1} = bind_locals(clause_vars(C), Ren, Env, S),
794    S2 = new_passive_size(get_size_limit(S1), S1),
795    {Ps1, S3} = mapfoldl(fun (P, S) ->
796				 i_pattern(P, Ren1, Env1, Ren, Env, S)
797			 end,
798			 S2, Ps),
799
800    %% Rewrite guard and body and visit the guard for value. Discard the
801    %% latter size count if the guard turns out to be a constant.
802    G = add_match_bindings(Bs, clause_guard(C)),
803    B = add_match_bindings(Bs, clause_body(C)),
804    {G1, S4} = i(G, value, Ren1, Env1, S3),
805    S5 = case is_literal(G1) of
806	     true ->
807		 revert_size(S3, S4);
808	     false ->
809		 S4
810	 end,
811
812    %% Revert to the size counter we had on entry to this function. The
813    %% environment and renaming, together with the size of the clause
814    %% head, are prefixed to the annotations for later use.
815    Size = get_size_value(S5),
816    C1 = update_c_clause(C, Ps1, G1, B),
817    {set_clause_extras(C1, Ren1, Env1, Size), revert_size(S, S5)}.
818
819add_match_bindings(Bs, E) ->
820    %% Don't waste time if the variables definitely cannot be used.
821    %% (Most guards are simply `true'.)
822    case is_literal(E) of
823	true ->
824	    E;
825	false ->
826	    Vs = [V || {V, E} <- Bs, E /= any],
827	    Es = [hd(get_ann(E)) || {_V, E} <- Bs, E /= any],
828	    c_let(Vs, c_values(Es), E)
829    end.
830
831i_clause_body(C0, Ctxt, S) ->
832    {C, Ren, Env, Size} = get_clause_extras(C0),
833    S1 = count_size(Size, S),
834    {B, S2} = i(clause_body(C), Ctxt, Ren, Env, S1),
835    C1 = update_c_clause(C, clause_pats(C), clause_guard(C), B),
836    {C1, S2}.
837
838get_clause_extras(C) ->
839    [{Ren, Env, Size} | As] = get_ann(C),
840    {set_ann(C, As), Ren, Env, Size}.
841
842set_clause_extras(C, Ren, Env, Size) ->
843    As = [{Ren, Env, Size} | get_ann(C)],
844    set_ann(C, As).
845
846%% This is the `(lambda x e)' case of the original algorithm. A
847%% `fun' is like a lambda expression, but with a varying number of
848%% parameters; possibly zero.
849
850i_fun(E, Ctxt, Ren, Env, S) ->
851    case Ctxt of
852        effect ->
853            %% Reduce useless `fun' expressions to a simple constant;
854	    %% visiting the body would be a waste of time, and could
855	    %% needlessly mark variables as referenced.
856            {void(), count_size(weight(literal), S)};
857        value ->
858            %% Note that the variables are visited as patterns.
859            Vs = fun_vars(E),
860            {_, Ren1, Env1, S1} = bind_locals(Vs, Ren, Env, S),
861            Vs1 = i_params(Vs, Ren1, Env1),
862
863            %% The body is always visited for value.
864            {B, S2} = i(fun_body(E), value, Ren1, Env1, S1),
865
866	    %% We don't bother to include the exact number of free
867	    %% variables in the cost for creating a fun-value.
868            S3 = count_size(weight('fun'), S2),
869
870	    %% Inlining might have duplicated code, so we must remove
871	    %% any 'id'-annotations from the original fun-expression.
872	    %% (This forces a later stage to invent new id:s.) This is
873	    %% necessary as long as fun:s may still need to be
874	    %% identified the old way. Function variables that are not
875	    %% in application context also have such annotations, but
876	    %% the inlining will currently lose all annotations on
877	    %% variable references (I think), so that's not a problem.
878            {set_ann(c_fun(Vs1, B), kill_id_anns(get_ann(E))), S3};
879        #app{} ->
880            %% An application of a fun-expression (in the source code)
881            %% is handled by going directly to `inline'; this is never
882            %% residualised, and we don't set up new counters here. Note
883            %% that inlining of copy-propagated fun-expressions is done
884            %% in `copy'; not here.
885            inline(E, Ctxt, Ren, Env, S)
886    end.
887
888%% A `letrec' requires a circular environment, but is otherwise like a
889%% `let', i.e. like a direct lambda application. Note that only
890%% fun-expressions (lambda abstractions) may occur in the right-hand
891%% side of each definition.
892
893i_letrec(E, Ctxt, Ren, Env, S) ->
894    %% Note that we pass an empty list for the auto-referenced
895    %% (exported) functions here.
896    {Es, B, _, S1} = i_letrec(letrec_defs(E), letrec_body(E), [], Ctxt,
897			      Ren, Env, S),
898
899    %% If no bindings remain, only the body is returned.
900    case Es of
901        [] ->
902            {B, S1};    % drop annotations on E
903        _ ->
904            S2 = count_size(weight(letrec), S1),
905            {update_c_letrec(E, Es, B), S2}
906    end.
907
908%% The major part of this is shared by letrec-expressions and module
909%% definitions alike.
910
911i_letrec(Es, B, Xs, Ctxt, Ren, Env, S) ->
912    %% First, we create operands with dummy renamings and environments,
913    %% and with fresh store locations for cached expressions and operand
914    %% info.
915    {Opnds, S1} = mapfoldl(fun ({_, E}, S) ->
916                                   make_opnd(E, undefined, undefined, S)
917                           end,
918                           S, Es),
919
920    %% Then we make recursive bindings for the definitions.
921    {Rs, Ren1, Env1, S2} = bind_recursive([F || {F, _} <- Es],
922                                          Opnds, Ren, Env, S1),
923
924    %% For the function variables listed in Xs (none for a
925    %% letrec-expression), we must make sure that the corresponding
926    %% operand expressions are visited and that the definitions are
927    %% marked as referenced; we also need to return the possibly renamed
928    %% function variables.
929    {Xs1, S3} =
930        mapfoldl(
931          fun (X, S) ->
932                  Name = ren__map(var_name(X), Ren1),
933                  case env__lookup(Name, Env1) of
934                      {ok, R} ->
935                          S_1 = i_letrec_export(R, S),
936                          {ref_to_var(R), S_1};
937                      error ->
938                          %% We just skip any exports that are not
939                          %% actually defined here, and generate a
940                          %% warning message.
941                          {N, A} = var_name(X),
942                          report_warning("export `~w'/~w "
943					 "not defined.\n", [N, A]),
944                          {X, S}
945                  end
946          end,
947          S2, Xs),
948
949    %% At last, we can then visit the body.
950    {B1, S4} = i(B, Ctxt, Ren1, Env1, S3),
951
952    %% Finally, we create new letrec-bindings for any and all
953    %% residualised definitions. All referenced functions should have
954    %% been visited; the call to `visit' below is expected to retrieve a
955    %% cached expression.
956    Rs1 = keep_referenced(Rs, S4),
957    {Es1, S5} = mapfoldl(fun (R, S) ->
958				 {E_1, S_1} = visit(R#ref.opnd, S),
959				 {{ref_to_var(R), E_1}, S_1}
960			 end,
961			 S4, Rs1),
962    {Es1, B1, Xs1, S5}.
963
964%% This visits the operand for a function definition exported by a
965%% `letrec' (which is really a `module' module definition, since normal
966%% letrecs have no export declarations). Only the updated state is
967%% returned. We must handle the "inner-pending" flag when doing this;
968%% cf. `i_var'.
969
970i_letrec_export(R, S) ->
971    Opnd = R#ref.opnd,
972    S1 = st__mark_inner_pending(Opnd#opnd.loc, S),
973    {_, S2} = visit(Opnd, S1),
974    {_, S3} = residualize_var(R, st__clear_inner_pending(Opnd#opnd.loc,
975							 S2)),
976    S3.
977
978%% This is the `(call e1 e2)' case of the original algorithm. The only
979%% difference is that we must handle multiple (or no) operand
980%% expressions.
981
982i_apply(E, Ctxt, Ren, Env, S) ->
983    {Opnds, S1} = mapfoldl(fun (E, S) ->
984                                   make_opnd(E, Ren, Env, S)
985                           end,
986                           S, apply_args(E)),
987
988    %% Allocate a new app-context location and set up an application
989    %% context structure containing the surrounding context.
990    {L, S2} = st__new_app_loc(S1),
991    Ctxt1 = #app{opnds = Opnds, ctxt = Ctxt, loc = L},
992
993    %% Visit the operator expression in the new call context.
994    {E1, S3} = i(apply_op(E), Ctxt1, Ren, Env, S2),
995
996    %% Check the "inlined" flag to find out what to do next. (The store
997    %% location could be recycled after the flag has been tested, but
998    %% there is no real advantage to that, because in practice, only
999    %% 4-5% of all created store locations will ever be reused, while
1000    %% there will be a noticeable overhead for managing the free list.)
1001    case st__get_app_inlined(L, S3) of
1002        true ->
1003            %% The application was inlined, so we have the final
1004            %% expression in `E1'. We just have to handle any operands
1005            %% that need to be residualized for effect only (i.e., those
1006            %% the values of which are not used).
1007            residualize_operands(Opnds, E1, S3);
1008        false ->
1009            %% Otherwise, `E1' is the residual operator expression. We
1010            %% make sure all operands are visited, and rebuild the
1011            %% application.
1012            {Es, S4} = mapfoldl(fun (Opnd, S) ->
1013					visit_and_count_size(Opnd, S)
1014				end,
1015				S3, Opnds),
1016	    N = apply_size(length(Es)),
1017            {update_c_apply(E, E1, Es), count_size(N, S4)}
1018    end.
1019
1020apply_size(A) ->
1021    weight(apply) + weight(argument) * A.
1022
1023%% Since it is not the task of this transformation to handle
1024%% cross-module inlining, all inter-module calls are handled by visiting
1025%% the components (the module and function name, and the arguments of
1026%% the call) for value. In `effect' context, if the function itself is
1027%% known to be completely effect free, the call can be discarded and the
1028%% arguments evaluated for effect. Otherwise, if all the visited
1029%% arguments are to constants, and the function is known to be safe to
1030%% execute at compile time, then we try to evaluate the call. If
1031%% evaluation completes normally, the call is replaced by the result;
1032%% otherwise the call is residualised.
1033
1034i_call(E, Ctxt, Ren, Env, S) ->
1035    {M, S1} = i(call_module(E), value, Ren, Env, S),
1036    {F, S2} = i(call_name(E), value, Ren, Env, S1),
1037    As = call_args(E),
1038    Arity = length(As),
1039
1040    %% Check if the name of the called function is static. If so,
1041    %% discard the size counts performed above, since the values will
1042    %% not cause any runtime cost.
1043    Static =  is_c_atom(M) and is_c_atom(F),
1044    S3 = case Static of
1045	     true ->
1046		 revert_size(S, S2);
1047	     false ->
1048		 S2
1049	 end,
1050    case Ctxt of
1051        effect when Static == true ->
1052            case is_safe_call(atom_val(M), atom_val(F), Arity) of
1053                true ->
1054                    %% The result will not be used, and the call is
1055                    %% effect free, so we create a multiple-value
1056                    %% aggregate containing the (not yet visited)
1057                    %% arguments and process that instead.
1058                    i(c_values(As), effect, Ren, Env, S3);
1059                false ->
1060                    %% We are not allowed to simply discard the call,
1061                    %% but we can try to evaluate it.
1062                    i_call_1(Static, M, F, Arity, As, E, Ctxt, Ren, Env,
1063                             S3)
1064            end;
1065        _ ->
1066	    i_call_1(Static, M, F, Arity, As, E, Ctxt, Ren, Env, S3)
1067    end.
1068
1069i_call_1(Static, M, F, Arity, As, E, Ctxt, Ren, Env, S) ->
1070    %% Visit the arguments for value.
1071    {As1, S1} = mapfoldl(fun (X, A) -> i(X, value, Ren, Env, A) end,
1072			 S, As),
1073    case Static of
1074	true ->
1075	    case erl_bifs:is_pure(atom_val(M), atom_val(F), Arity) of
1076		true ->
1077		    %% It is allowed to evaluate this at compile time.
1078		    case all_static(As1) of
1079			true ->
1080			    i_call_3(M, F, As1, E, Ctxt, Env, S1);
1081			false ->
1082			    %% See if the call can be rewritten instead.
1083			    i_call_4(M, F, As1, E, Ctxt, Env, S1)
1084		    end;
1085		false ->
1086		    i_call_2(M, F, As1, E, S1)
1087	    end;
1088	false ->
1089	    i_call_2(M, F, As1, E, S1)
1090    end.
1091
1092%% Residualise the call.
1093
1094i_call_2(M, F, As, E, S) ->
1095    N = weight(call) + weight(argument) * length(As),
1096    {update_c_call(E, M, F, As), count_size(N, S)}.
1097
1098%% Attempt to evaluate the call to yield a literal; if that fails, try
1099%% to rewrite the expression.
1100
1101i_call_3(M, F, As, E, Ctxt, Env, S) ->
1102    %% Note that we extract the results of argument expessions here; the
1103    %% expressions could still be sequences with side effects.
1104    Vs = [concrete(result(A)) || A <- As],
1105    case catch {ok, apply(atom_val(M), atom_val(F), Vs)} of
1106	{ok, V} ->
1107	    %% Evaluation completed normally - try to turn the result
1108	    %% back into a syntax tree (representing a literal).
1109	    case is_literal_term(V) of
1110		true ->
1111		    %% Make a sequence of the arguments (as a
1112		    %% multiple-value aggregate) and the final value.
1113		    S1 = count_size(weight(values), S),
1114		    S2 = count_size(weight(literal), S1),
1115		    {make_seq(c_values(As), abstract(V)), S2};
1116		false ->
1117		    %% The result could not be represented as a literal.
1118		    i_call_4(M, F, As, E, Ctxt, Env, S)
1119	    end;
1120	_ ->
1121	    %% The evaluation attempt did not complete normally.
1122	    i_call_4(M, F, As, E, Ctxt, Env, S)
1123    end.
1124
1125%% Rewrite the expression, if possible, otherwise residualise it.
1126
1127i_call_4(M, F, As, E, Ctxt, Env, S) ->
1128    case reduce_bif_call(atom_val(M), atom_val(F), As, Env) of
1129        false ->
1130            %% Nothing more to be done - residualise the call.
1131            i_call_2(M, F, As, E, S);
1132        {true, E1} ->
1133            %% We revisit the result, because the rewriting might have
1134            %% opened possibilities for further inlining. Since the
1135            %% parts have already been visited once, we use the identity
1136            %% renaming here.
1137            i(E1, Ctxt, ren__identity(), Env, S)
1138    end.
1139
1140%% For now, we assume that primops cannot be evaluated at compile time,
1141%% probably being too special. Also, we have no knowledge about their
1142%% side effects.
1143
1144i_primop(E, Ren, Env, S) ->
1145    %% Visit the arguments for value.
1146    {As, S1} = mapfoldl(fun (E, S) ->
1147				i(E, value, Ren, Env, S)
1148			end,
1149			S, primop_args(E)),
1150    N = weight(primop) + weight(argument) * length(As),
1151    {update_c_primop(E, primop_name(E), As), count_size(N, S1)}.
1152
1153%% This is like having an expression with an extra fun-expression
1154%% attached for "exceptional cases"; actually, there are exactly two
1155%% parameter variables for the body, but they are easiest handled as if
1156%% their number might vary, just as for a `fun'.
1157
1158i_try(E, Ctxt, Ren, Env, S) ->
1159    %% The argument expression is evaluated in `value' context, and the
1160    %% surrounding context is propagated into both branches. We do not
1161    %% try to recognize cases when the protected expression will
1162    %% actually raise an exception. Note that the variables are visited
1163    %% as patterns.
1164    {A, S1} = i(try_arg(E), value, Ren, Env, S),
1165    Vs = try_vars(E),
1166    {_, Ren1, Env1, S2} = bind_locals(Vs, Ren, Env, S1),
1167    Vs1 = i_params(Vs, Ren1, Env1),
1168    {B, S3} = i(try_body(E), Ctxt, Ren1, Env1, S2),
1169    case is_safe(A) of
1170	true ->
1171	    %% The `try' wrapper can be dropped in this case. Since the
1172	    %% expressions have been visited already, the identity
1173	    %% renaming is used when we revisit the new let-expression.
1174	    i(c_let(Vs1, A, B), Ctxt, ren__identity(), Env, S3);
1175	false ->
1176	    Evs = try_evars(E),
1177	    {_, Ren2, Env2, S4} = bind_locals(Evs, Ren, Env, S3),
1178	    Evs1 = i_params(Evs, Ren2, Env2),
1179	    {H, S5} = i(try_handler(E), Ctxt, Ren2, Env2, S4),
1180	    S6 = count_size(weight('try'), S5),
1181	    {update_c_try(E, A, Vs1, B, Evs1, H), S6}
1182    end.
1183
1184%% A special case of try-expressions:
1185
1186i_catch(E, Ctxt, Ren, Env, S) ->
1187    %% We cannot propagate application contexts into the catch.
1188    {E1, S1} = i(catch_body(E), safe_context(Ctxt), Ren, Env, S),
1189    case is_safe(E1) of
1190	true ->
1191	    %% The `catch' wrapper can be dropped in this case.
1192	    {E1, S1};
1193	false ->
1194	    S2 = count_size(weight('catch'), S1),
1195	    {update_c_catch(E, E1), S2}
1196    end.
1197
1198%% A receive-expression is very much like a case-expression, with the
1199%% difference that we do not have access to a switch expression, since
1200%% the value being switched on is taken from the mailbox. The fact that
1201%% the receive-expression may iterate over an arbitrary number of
1202%% messages is not of interest to us. All we can do here is to visit its
1203%% subexpressions, and possibly eliminate definitely unselectable
1204%% clauses.
1205
1206i_receive(E, Ctxt, Ren, Env, S) ->
1207    %% We first visit the expiry expression (for value) and the expiry
1208    %% body (in the surrounding context).
1209    {T, S1} = i(receive_timeout(E), value, Ren, Env, S),
1210    {B, S2} = i(receive_action(E), Ctxt, Ren, Env, S1),
1211
1212    %% Then we visit the clauses. Note that application contexts may not
1213    %% in general be propagated into the branches (and the expiry body),
1214    %% because the execution of the `receive' may remove a message from
1215    %% the mailbox as a side effect; the situation is thus analogous to
1216    %% that in a `case' expression.
1217    Ctxt1 = safe_context(Ctxt),
1218    case i_clauses(receive_clauses(E), Ctxt1, Ren, Env, S2) of
1219        {false, {[], _, _, Cs}, S3} ->
1220            %% We still have a list of clauses. If the list is empty,
1221            %% and the expiry expression is the integer zero, the
1222            %% expression reduces to the expiry body.
1223	    if Cs == [] ->
1224		    case is_c_int(T) andalso (int_val(T) == 0) of
1225			true ->
1226			    {B, S3};
1227			false ->
1228			    i_receive_1(E, Cs, T, B, S3)
1229		    end;
1230	       true ->
1231		    i_receive_1(E, Cs, T, B, S3)
1232	    end;
1233        {true, {_, _, _, Cs}, S3} ->
1234	    %% Cs is a single clause that will always be matched (if a
1235	    %% message exists), but we must keep the `receive' statement
1236	    %% in order to fetch the message from the mailbox.
1237	    i_receive_1(E, Cs, T, B, S3)
1238    end.
1239
1240i_receive_1(E, Cs, T, B, S) ->
1241    %% Here, we just add the base sizes for the receive-expression
1242    %% itself and for each remaining clause; cf. `case'.
1243    N = weight('receive') + weight(clause) * length(Cs),
1244    {update_c_receive(E, Cs, T, B), count_size(N, S)}.
1245
1246%% A module definition is like a `letrec', with some add-ons (export and
1247%% attribute declarations) but without an explicit body. Actually, the
1248%% exporting of function names has the same effect as if there was a
1249%% body consisting of the list of references to the exported functions.
1250%% Thus, the exported functions are exactly those which can be
1251%% referenced from outside the module.
1252
1253i_module(E, Ctxt, Ren, Env, S) ->
1254    %% Cf. `i_letrec'. Note that we pass a dummy constant value for the
1255    %% "body" parameter.
1256    {Es, _, Xs1, S1} = i_letrec(module_defs(E), void(),
1257                                module_exports(E), Ctxt, Ren, Env, S),
1258    %% Sanity check:
1259    case Es of
1260        [] ->
1261            report_warning("no function definitions remaining "
1262			   "in module `~s'.\n",
1263			   [atom_name(module_name(E))]);
1264        _ ->
1265            ok
1266    end,
1267    E1 = update_c_module(E, module_name(E), Xs1, module_attrs(E), Es),
1268    {E1, count_size(weight(module), S1)}.
1269
1270%% Binary-syntax expressions are too complicated to do anything
1271%% interesting with here - that is beyond the scope of this program;
1272%% also, their construction could have side effects, so even in effect
1273%% context we can't remove them. (We don't bother to identify cases of
1274%% "safe" unused binaries which could be removed.)
1275
1276i_binary(E, Ren, Env, S) ->
1277    %% Visit the segments for value.
1278    {Es, S1} = mapfoldl(fun (E, S) ->
1279				i_bitstr(E, Ren, Env, S)
1280			end,
1281			S, binary_segments(E)),
1282    S2 = count_size(weight(binary), S1),
1283    {update_c_binary(E, Es), S2}.
1284
1285i_bitstr(E, Ren, Env, S) ->
1286    %% It is not necessary to visit the Unit, Type and Flags fields,
1287    %% since these are always literals.
1288    {Val, S1} = i(bitstr_val(E), value, Ren, Env, S),
1289    {Size, S2} = i(bitstr_size(E), value, Ren, Env, S1),
1290    Unit = bitstr_unit(E),
1291    Type = bitstr_type(E),
1292    Flags = bitstr_flags(E),
1293    S3 = count_size(weight(bitstr), S2),
1294    {update_c_bitstr(E, Val, Size, Unit, Type, Flags), S3}.
1295
1296%% This is a simplified version of `i_pattern', for lists of parameter
1297%% variables only. It does not modify the state.
1298
1299i_params([V | Vs], Ren, Env) ->
1300    Name = ren__map(var_name(V), Ren),
1301    case env__lookup(Name, Env) of
1302	{ok, R} ->
1303	    [ref_to_var(R) | i_params(Vs, Ren, Env)];
1304	error ->
1305	    report_internal_error("variable `~w' not bound "
1306				  "in pattern.\n", [Name]),
1307	    exit(error)
1308    end;
1309i_params([], _, _) ->
1310    [].
1311
1312%% For ordinary patterns, we just visit to rename variables and count
1313%% the size/cost. All occurring binding instances of variables should
1314%% already have been added to the renaming and environment; however, to
1315%% handle the size expressions of binary-syntax patterns, we must pass
1316%% the renaming and environment of the containing expression
1317
1318i_pattern(E, Ren, Env, Ren0, Env0, S) ->
1319    case type(E) of
1320	var ->
1321	    %% Count no size.
1322            Name = ren__map(var_name(E), Ren),
1323            case env__lookup(Name, Env) of
1324                {ok, R} ->
1325                    {ref_to_var(R), S};
1326                error ->
1327                    report_internal_error("variable `~w' not bound "
1328					  "in pattern.\n", [Name]),
1329		    exit(error)
1330            end;
1331	alias ->
1332	    %% Count no size.
1333	    V = alias_var(E),
1334	    Name = ren__map(var_name(V), Ren),
1335	    case env__lookup(Name, Env) of
1336		{ok, R} ->
1337		    %% Visit the subpattern and recompose.
1338		    V1 = ref_to_var(R),
1339		    {P, S1} = i_pattern(alias_pat(E), Ren, Env, Ren0,
1340					Env0, S),
1341		    {update_c_alias(E, V1, P), S1};
1342		error ->
1343		    report_internal_error("variable `~w' not bound "
1344					  "in pattern.\n", [Name]),
1345		    exit(error)
1346	    end;
1347	binary ->
1348	    {Es, S1} = mapfoldl(fun (E, S) ->
1349					i_bitstr_pattern(E, Ren, Env,
1350							  Ren0, Env0, S)
1351				end,
1352				S, binary_segments(E)),
1353	    S2 = count_size(weight(binary), S1),
1354	    {update_c_binary(E, Es), S2};
1355	_ ->
1356	    case is_literal(E) of
1357		true ->
1358                    {E, count_size(weight(literal), S)};
1359		false ->
1360		    {Es1, S1} = mapfoldl(fun (E, S) ->
1361						 i_pattern(E, Ren, Env,
1362							   Ren0, Env0,
1363							   S)
1364					 end,
1365					 S, data_es(E)),
1366		    %% We assume that in general, the elements of the
1367		    %% constructor will all be fetched.
1368		    N = weight(data) + length(Es1) * weight(element),
1369		    S2 = count_size(N, S1),
1370		    {update_data(E, data_type(E), Es1), S2}
1371	    end
1372    end.
1373
1374i_bitstr_pattern(E, Ren, Env, Ren0, Env0, S) ->
1375    %% It is not necessary to visit the Unit, Type and Flags fields,
1376    %% since these are always literals. The Value field is a limited
1377    %% pattern - either a literal or an unbound variable. The Size field
1378    %% is a limited expression - either a literal or a variable bound in
1379    %% the environment of the containing expression.
1380    {Val, S1} = i_pattern(bitstr_val(E), Ren, Env, Ren0, Env0, S),
1381    {Size, S2} = i(bitstr_size(E), value, Ren0, Env0, S1),
1382    Unit = bitstr_unit(E),
1383    Type = bitstr_type(E),
1384    Flags = bitstr_flags(E),
1385    S3 = count_size(weight(bitstr), S2),
1386    {update_c_bitstr(E, Val, Size, Unit, Type, Flags), S3}.
1387
1388
1389%% ---------------------------------------------------------------------
1390%% Other central inlining functions
1391
1392%% It is assumed here that `E' is a fun-expression and the context is an
1393%% app-structure. If the inlining might be aborted for some reason, a
1394%% corresponding catch should have been set up before entering `inline'.
1395%%
1396%% Note: if the inlined body is a lambda abstraction, and the
1397%% surrounding context of the app-context is also an app-context, the
1398%% `inlined' flag of the outermost context will be set before that of
1399%% the inner context is set. E.g.: `let F = fun (X) -> fun (Y) -> E in
1400%% apply apply F(A)(B)' will propagate the body of F, which is a lambda
1401%% abstraction, into the outer application context, which will be
1402%% inlined to produce expression `E', and the flag of the outer context
1403%% will be set. Upon return, the flag of the inner context will also be
1404%% set. However, the flags are then tested in innermost-first order.
1405%% Thus, if some inlining attempt is aborted, the `inlined' flags of any
1406%% nested app-contexts must be cleared.
1407%%
1408%% This implementation does nothing to handle inlining of calls to
1409%% recursive functions in a smart way. This means that as long as the
1410%% size and effort counters do not prevent it, the function body will be
1411%% inlined (i.e., the first iteration will be unrolled), and the
1412%% recursive calls will be residualized.
1413
1414inline(E, #app{opnds = Opnds, ctxt = Ctxt, loc = L}, Ren, Env, S) ->
1415    %% Check that the arities match:
1416    Vs = fun_vars(E),
1417    if length(Opnds) /= length(Vs) ->
1418            report_error("function called with wrong number "
1419			 "of arguments!\n"),
1420	    %% TODO: should really just residualise the call...
1421	    exit(error);
1422       true ->
1423            ok
1424    end,
1425    %% Create local bindings for the parameters to their respective
1426    %% operand structures from the app-structure, and visit the body in
1427    %% the context saved in the structure.
1428    {Rs, Ren1, Env1, S1} = bind_locals(Vs, Opnds, Ren, Env, S),
1429    {E1, S2} = i(fun_body(E), Ctxt, Ren1, Env1, S1),
1430
1431    %% Create necessary bindings and/or set flags.
1432    {E2, S3} = make_let_bindings(Rs, E1, S2),
1433
1434    %% Lastly, flag the application as inlined, since the inlining
1435    %% attempt was not aborted before we reached this point.
1436    {E2, st__set_app_inlined(L, S3)}.
1437
1438%% For the (possibly renamed) argument variables to an inlined call,
1439%% either create `let' bindings for them, if they are still referenced
1440%% in the residual expression (in C/Lisp, also if they are assigned to),
1441%% or otherwise (if they are not referenced or assigned) mark them for
1442%% evaluation for side effects.
1443
1444make_let_bindings([R | Rs], E, S) ->
1445    {E1, S1} = make_let_bindings(Rs, E, S),
1446    make_let_binding(R, E1, S1);
1447make_let_bindings([], E, S) ->
1448    {E, S}.
1449
1450make_let_binding(R, E, S) ->
1451    %% The `referenced' flag is conservatively computed. We therefore
1452    %% first check some simple cases where parameter R is definitely not
1453    %% referenced in the resulting body E.
1454    case is_literal(E) of
1455        true ->
1456            %% A constant contains no variable references.
1457            make_let_binding_1(R, E, S);
1458        false ->
1459            case is_c_var(E) of
1460                true ->
1461                    case var_name(E) =:= R#ref.name of
1462                        true ->
1463                            %% The body is simply the parameter variable
1464                            %% itself. Visit the operand for value and
1465                            %% substitute the result for the body.
1466                            visit_and_count_size(R#ref.opnd, S);
1467                        false ->
1468                            %% Not the same variable, so the parameter
1469                            %% is not referenced at all.
1470                            make_let_binding_1(R, E, S)
1471                    end;
1472                false ->
1473		    %% Proceed to check the `referenced' flag.
1474		    case st__get_var_referenced(R#ref.loc, S) of
1475			true ->
1476			    %% The parameter is probably referenced in
1477			    %% the residual code (although it might not
1478			    %% be). Visit the operand for value and
1479			    %% create a let-binding.
1480			    {E1, S1} = visit_and_count_size(R#ref.opnd,
1481							    S),
1482			    S2 = count_size(weight('let'), S1),
1483			    {c_let([ref_to_var(R)], E1, E), S2};
1484			false ->
1485			    %% The parameter is definitely not
1486			    %% referenced.
1487			    make_let_binding_1(R, E, S)
1488		    end
1489	    end
1490    end.
1491
1492%% This marks the operand for evaluation for effect.
1493
1494make_let_binding_1(R, E, S) ->
1495    Opnd = R#ref.opnd,
1496    {E, st__set_opnd_effect(Opnd#opnd.loc, S)}.
1497
1498%% Here, `R' is the ref-structure which is the target of the copy
1499%% propagation, and `Opnd' is a visited operand structure, to be
1500%% propagated through `R' if possible - if not, `R' is residualised.
1501%% `Opnd' is normally the operand that `R' is bound to, and `E' is the
1502%% result of visiting `Opnd' for value; we pass this as an argument so
1503%% we don't have to fetch it multiple times (because we don't have
1504%% constant time access).
1505%%
1506%% We also pass the environment of the site of the variable reference,
1507%% for use when inlining a propagated fun-expression. In the original
1508%% algorithm by Waddell, the environment used for inlining such cases is
1509%% the identity mapping, because the fun-expression body has already
1510%% been visited for value, and their algorithm combines renaming of
1511%% source-code variables with the looking up of information about
1512%% residual-code variables. We, however, need to check the environment
1513%% of the call site when creating new non-shadowed variables, but we
1514%% must avoid repeated renaming. We therefore separate the renaming and
1515%% the environment (as in the renaming algorithm of Peyton-Jones and
1516%% Marlow). This also makes our implementation more general, compared to
1517%% the original algorithm, because we do not give up on propagating
1518%% variables that were free in the fun-body.
1519%%
1520%%  Example:
1521%%
1522%%	let F = fun (X) -> {'foo', X} in
1523%%	let G = fun (H) -> apply H(F)        % F is free in the fun G
1524%%	in apply G(fun (F) -> apply F(42))
1525%%	  =>
1526%%	let F = fun (X) -> {'foo', X} in
1527%%	apply (fun (H) -> apply H(F))(fun (F) -> apply F(42))
1528%%	  =>
1529%%	let F = fun (X) -> {'foo', X} in
1530%%	apply (fun (F) -> apply F(42))(F)
1531%%	  =>
1532%%	let F = fun (X) -> {'foo', X} in
1533%%	apply F(42)
1534%%	  =>
1535%%	apply (fun (X) -> {'foo', X})(2)
1536%%	  =>
1537%%	{'foo', 42}
1538%%
1539%%  The original algorithm would give up at stage 4, because F was free
1540%%  in the propagated fun-expression. Our version inlines this example
1541%%  completely.
1542
1543copy(R, Opnd, E, Ctxt, Env, S) ->
1544    case is_c_var(E) of
1545        true ->
1546	    %% The operand reduces to another variable - get its
1547	    %% ref-structure and attempt to propagate further.
1548            copy_var(env__get(var_name(E), Opnd#opnd.env), Ctxt, Env,
1549                     S);
1550        false ->
1551            %% Apart from variables and functional values (the latter
1552            %% are handled by `copy_1' below), only constant literals
1553            %% are copyable in general; other things, including e.g.
1554            %% tuples `{foo, X}', could cause duplication of work, and
1555            %% are not copy propagated.
1556            case is_literal(E) of
1557                true ->
1558                    {E, count_size(weight(literal), S)};
1559                false ->
1560                    copy_1(R, Opnd, E, Ctxt, Env, S)
1561            end
1562    end.
1563
1564copy_var(R, Ctxt, Env, S) ->
1565    %% (In Lisp or C, if this other variable might be assigned to, we
1566    %% should residualize the "parent" instead, so we don't bypass any
1567    %% destructive updates.)
1568    case R#ref.opnd of
1569        undefined ->
1570            %% This variable is not bound to an expression, so just
1571            %% residualize it.
1572            residualize_var(R, S);
1573        Opnd ->
1574	    %% Note that because operands are always visited before
1575	    %% copied, all copyable operand expressions will be
1576	    %% propagated through any number of bindings. If `R' was
1577	    %% bound to a constant literal, we would never have reached
1578	    %% this point.
1579            case st__lookup_opnd_cache(Opnd#opnd.loc, S) of
1580                error ->
1581                    %% The result for this operand is not yet ready
1582                    %% (which should mean that it is a recursive
1583                    %% reference). Thus, we must residualise the
1584                    %% variable.
1585                    residualize_var(R, S);
1586                {ok, #cache{expr = E1}} ->
1587                    %% The result for the operand is ready, so we can
1588                    %% proceed to propagate it.
1589                    copy_1(R, Opnd, E1, Ctxt, Env, S)
1590            end
1591    end.
1592
1593copy_1(R, Opnd, E, Ctxt, Env, S) ->
1594    %% Fun-expression (lambdas) are a bit special; they are copyable,
1595    %% but should preferably not be duplicated, so they should not be
1596    %% copy propagated except into application contexts, where they can
1597    %% be inlined.
1598    case is_c_fun(E) of
1599        true ->
1600            case Ctxt of
1601                #app{} ->
1602                    %% First test if the operand is "outer-pending"; if
1603                    %% so, don't inline.
1604                    case st__test_outer_pending(Opnd#opnd.loc, S) of
1605                        false ->
1606                            copy_inline(R, Opnd, E, Ctxt, Env, S);
1607                        true ->
1608                            %% Cyclic reference forced inlining to stop
1609                            %% (avoiding infinite unfolding).
1610                            residualize_var(R, S)
1611                    end;
1612                _ ->
1613                    residualize_var(R, S)
1614            end;
1615        false ->
1616            %% We have no other cases to handle here
1617            residualize_var(R, S)
1618    end.
1619
1620%% This inlines a function value that was propagated to an application
1621%% context. The inlining is done with an identity renaming (since the
1622%% expression is already visited) but in the environment of the call
1623%% site (which is OK because of the no-shadowing strategy for renaming,
1624%% and because the domain of our environments are the residual-program
1625%% variables instead of the source-program variables). Note that we must
1626%% first set the "outer-pending" flag, and clear it afterwards.
1627
1628copy_inline(R, Opnd, E, Ctxt, Env, S) ->
1629    S1 = st__mark_outer_pending(Opnd#opnd.loc, S),
1630    case catch {ok, copy_inline_1(R, E, Ctxt, Env, S1)} of
1631        {ok, {E1, S2}} ->
1632            {E1, st__clear_outer_pending(Opnd#opnd.loc, S2)};
1633        {'EXIT', X} ->
1634            exit(X);
1635        X ->
1636	    %% If we use destructive update for the `outer-pending'
1637	    %% flag, we must make sure to clear it upon a nonlocal
1638	    %% return.
1639	    st__clear_outer_pending(Opnd#opnd.loc, S1),
1640            throw(X)
1641    end.
1642
1643%% If the current effort counter was passive, we use a new active effort
1644%% counter with the inherited limit for this particular inlining.
1645
1646copy_inline_1(R, E, Ctxt, Env, S) ->
1647    case effort_is_active(S) of
1648        true ->
1649            copy_inline_2(R, E, Ctxt, Env, S);
1650        false ->
1651            S1 = new_active_effort(get_effort_limit(S), S),
1652            case catch {ok, copy_inline_2(R, E, Ctxt, Env, S1)} of
1653                {ok, {E1, S2}} ->
1654                    %% Revert to the old effort counter.
1655                    {E1, revert_effort(S, S2)};
1656                {counter_exceeded, effort, _} ->
1657                    %% Aborted this inlining attempt because too much
1658                    %% effort was spent. Residualize the variable and
1659                    %% revert to the previous state.
1660                    residualize_var(R, S);
1661                {'EXIT', X} ->
1662                    exit(X);
1663                X ->
1664                    throw(X)
1665            end
1666    end.
1667
1668%% Regardless of whether the current size counter is active or not, we
1669%% use a new active size counter for each inlining. If the current
1670%% counter was passive, the new counter gets the inherited size limit;
1671%% if it was active, the size limit of the new counter will be equal to
1672%% the remaining budget of the current counter (which itself is not
1673%% affected by the inlining). This distributes the size budget more
1674%% evenly over "inlinings within inlinings", so that the whole size
1675%% budget is not spent on the first few call sites (in an inlined
1676%% function body) forcing the remaining call sites to be residualised.
1677
1678copy_inline_2(R, E, Ctxt, Env, S) ->
1679    Limit = case size_is_active(S) of
1680                true ->
1681                    get_size_limit(S) - get_size_value(S);
1682                false ->
1683                    get_size_limit(S)
1684            end,
1685    %% Add the cost of the application to the new size limit, so we
1686    %% always inline functions that are small enough, even if `Limit' is
1687    %% close to zero at this point. (This is an extension to the
1688    %% original algorithm.)
1689    S1 = new_active_size(Limit + apply_size(length(Ctxt#app.opnds)), S),
1690    case catch {ok, inline(E, Ctxt, ren__identity(), Env, S1)} of
1691        {ok, {E1, S2}} ->
1692            %% Revert to the old size counter.
1693            {E1, revert_size(S, S2)};
1694        {counter_exceeded, size, S2} ->
1695            %% Aborted this inlining attempt because it got too big.
1696            %% Residualize the variable and revert to the old size
1697            %% counter. (It is important that we do not also revert the
1698            %% effort counter here. Because the effort and size counters
1699            %% are always set up together, we know that the effort
1700            %% counter returned in S2 is the same that was passed to
1701            %% `inline'.)
1702	    S3 = revert_size(S, S2),
1703	    %% If we use destructive update for the `inlined' flag, we
1704	    %% must make sure to clear the flags of any nested
1705	    %% app-contexts upon aborting; see `inline' for details.
1706	    reset_nested_apps(Ctxt, S3),    % for effect
1707            residualize_var(R, S3);
1708        {'EXIT', X} ->
1709            exit(X);
1710        X ->
1711            throw(X)
1712    end.
1713
1714reset_nested_apps(#app{ctxt = Ctxt, loc = L}, S) ->
1715    reset_nested_apps(Ctxt, st__clear_app_inlined(L, S));
1716reset_nested_apps(_, S) ->
1717    S.
1718
1719
1720%% ---------------------------------------------------------------------
1721%%	Support functions
1722
1723new_var(Env) ->
1724    Name = env__new_vname(Env),
1725    c_var(Name).
1726
1727residualize_var(R, S) ->
1728    S1 = count_size(weight(var), S),
1729    {ref_to_var(R), st__set_var_referenced(R#ref.loc, S1)}.
1730
1731%% This function returns the value-producing subexpression of any
1732%% expression. (Except for sequencing expressions, this is the
1733%% expression itself.)
1734
1735result(E) ->
1736    case is_c_seq(E) of
1737        true ->
1738            %% Also see `make_seq', which is used in all places to build
1739            %% sequences so that they are always nested in the first
1740            %% position.
1741            seq_body(E);
1742        false ->
1743            E
1744    end.
1745
1746%% This function rewrites E to `do A1 E' if A is `do A1 A2', and
1747%% otherwise returns E unchanged.
1748
1749hoist_effects(A, E) ->
1750    case type(A) of
1751	seq -> make_seq(seq_arg(A), E);
1752	_ -> E
1753    end.
1754
1755%% This "build sequencing expression" operation assures that sequences
1756%% are always nested in the first position, which makes it easy to find
1757%% the actual value-producing expression of a sequence (cf. `result').
1758
1759make_seq(E1, E2) ->
1760    case is_safe(E1) of
1761        true ->
1762            %% The first expression can safely be dropped.
1763            E2;
1764        false ->
1765            %% If `E1' is a sequence whose final expression has no side
1766            %% effects, then we can lose *that* expression when we
1767            %% compose the new sequence, since its value will not be
1768            %% used.
1769            E3 = case is_c_seq(E1) of
1770                     true ->
1771                         case is_safe(seq_body(E1)) of
1772                             true ->
1773                                 %% Drop the final expression.
1774                                 seq_arg(E1);
1775                             false ->
1776                                 E1
1777                         end;
1778                     false ->
1779                         E1
1780                 end,
1781            case is_c_seq(E2) of
1782                true ->
1783                    %% `E2' is a sequence (E2' E2''), so we must
1784                    %% rearrange the nesting to ((E1, E2') E2''), to
1785                    %% preserve the invariant. Annotations on `E2' are
1786                    %% lost.
1787                    c_seq(c_seq(E3, seq_arg(E2)), seq_body(E2));
1788                false ->
1789                    c_seq(E3, E2)
1790            end
1791    end.
1792
1793%% Currently, safe expressions include variables, lambda expressions,
1794%% constructors with safe subexpressions (this includes atoms, integers,
1795%% empty lists, etc.), seq-, let- and letrec-expressions with safe
1796%% subexpressions, try- and catch-expressions with safe subexpressions
1797%% and calls to safe functions with safe argument subexpressions.
1798%% Binaries seem too tricky to be considered.
1799
1800is_safe(E) ->
1801    case is_data(E) of
1802        true ->
1803	    is_safe_list(data_es(E));
1804        false ->
1805            case type(E) of
1806                var ->
1807                    true;
1808                'fun' ->
1809                    true;
1810		values ->
1811		    is_safe_list(values_es(E));
1812                'seq' ->
1813                    case is_safe(seq_arg(E)) of
1814                        true ->
1815                            is_safe(seq_body(E));
1816                        false ->
1817                            false
1818                    end;
1819                'let' ->
1820                    case is_safe(let_arg(E)) of
1821                        true ->
1822                            is_safe(let_body(E));
1823                        false ->
1824                            false
1825                    end;
1826                letrec ->
1827                    is_safe(letrec_body(E));
1828		'try' ->
1829		    %% If the argument expression is not safe, it could
1830		    %% be modifying the state; thus, even if the body is
1831		    %% safe, the try-expression as a whole would not be.
1832		    %% If the argument is safe, the handler is not used.
1833                    case is_safe(try_arg(E)) of
1834                        true ->
1835                            is_safe(try_body(E));
1836                        false ->
1837                            false
1838                    end;
1839		'catch' ->
1840                    is_safe(catch_body(E));
1841		call ->
1842		    M = call_module(E),
1843		    F = call_name(E),
1844		    case is_c_atom(M) and is_c_atom(F) of
1845			true ->
1846			    As = call_args(E),
1847			    case is_safe_list(As) of
1848				true ->
1849				    is_safe_call(atom_val(M),
1850						 atom_val(F),
1851						 length(As));
1852				false ->
1853				    false
1854			    end;
1855			false ->
1856			    false
1857		    end;
1858                _ ->
1859                    false
1860            end
1861    end.
1862
1863is_safe_list([E | Es]) ->
1864    case is_safe(E) of
1865	true ->
1866	    is_safe_list(Es);
1867	false ->
1868	    false
1869    end;
1870is_safe_list([]) ->
1871    true.
1872
1873is_safe_call(M, F, A) ->
1874    erl_bifs:is_safe(M, F, A).
1875
1876%% When setting up local variables, we only create new names if we have
1877%% to, according to the "no-shadowing" strategy.
1878
1879make_locals(Vs, Ren, Env) ->
1880    make_locals(Vs, [], Ren, Env).
1881
1882make_locals([V | Vs], As, Ren, Env) ->
1883    Name = var_name(V),
1884    case env__is_defined(Name, Env) of
1885        false ->
1886            %% The variable need not be renamed. Just make sure that the
1887            %% renaming will map it to itself.
1888            Name1 = Name,
1889            Ren1 = ren__add_identity(Name, Ren);
1890        true ->
1891            %% The variable must be renamed to maintain the no-shadowing
1892            %% invariant. Do the right thing for function variables.
1893            Name1 = case Name of
1894			{A, N} ->
1895			    env__new_fname(A, N, Env);
1896			_ ->
1897			    env__new_vname(Env)
1898		    end,
1899            Ren1 = ren__add(Name, Name1, Ren)
1900    end,
1901    %% This temporary binding is added for correct new-key generation.
1902    Env1 = env__bind(Name1, dummy, Env),
1903    make_locals(Vs, [Name1 | As], Ren1, Env1);
1904make_locals([], As, Ren, Env) ->
1905    {reverse(As), Ren, Env}.
1906
1907%% This adds let-bindings for the source code variables in `Es' to the
1908%% environment `Env'.
1909%%
1910%% Note that we always assign a new state location for the
1911%% residual-program variable, since we cannot know when a location for a
1912%% particular variable in the source code can be reused.
1913
1914bind_locals(Vs, Ren, Env, S) ->
1915    Opnds = lists:duplicate(length(Vs), undefined),
1916    bind_locals(Vs, Opnds, Ren, Env, S).
1917
1918bind_locals(Vs, Opnds, Ren, Env, S) ->
1919    {Ns, Ren1, Env1} = make_locals(Vs, Ren, Env),
1920    {Rs, Env2, S1} = bind_locals_1(Ns, Opnds, [], Env1, S),
1921    {Rs, Ren1, Env2, S1}.
1922
1923%% Note that the `Vs' are currently not used for anything except the
1924%% number of variables. If we were maintaining "source-referenced"
1925%% flags, then the flag in the new variable should be initialized to the
1926%% current value of the (residual-) referenced-flag of the "parent".
1927
1928bind_locals_1([N | Ns], [Opnd | Opnds], Rs, Env, S) ->
1929    {R, S1} = new_ref(N, Opnd, S),
1930    Env1 = env__bind(N, R, Env),
1931    bind_locals_1(Ns, Opnds, [R | Rs], Env1, S1);
1932bind_locals_1([], [], Rs, Env, S) ->
1933    {lists:reverse(Rs), Env, S}.
1934
1935new_refs(Ns, Opnds, S) ->
1936    new_refs(Ns, Opnds, [], S).
1937
1938new_refs([N | Ns], [Opnd | Opnds], Rs, S) ->
1939    {R, S1} = new_ref(N, Opnd, S),
1940    new_refs(Ns, Opnds, [R | Rs], S1);
1941new_refs([], [], Rs, S) ->
1942    {lists:reverse(Rs), S}.
1943
1944new_ref(N, Opnd, S) ->
1945    {L, S1} = st__new_ref_loc(S),
1946    {#ref{name = N, opnd = Opnd, loc = L}, S1}.
1947
1948%% This adds recursive bindings for the source code variables in `Es' to
1949%% the environment `Env'. Note that recursive binding of a set of
1950%% variables is an atomic operation on the environment - they cannot be
1951%% added one at a time.
1952
1953bind_recursive(Vs, Opnds, Ren, Env, S) ->
1954    {Ns, Ren1, Env1} = make_locals(Vs, Ren, Env),
1955    {Rs, S1} = new_refs(Ns, Opnds, S),
1956
1957    %% When this fun-expression is evaluated, it updates the operand
1958    %% structure in the ref-structure to contain the recursively defined
1959    %% environment and the correct renaming.
1960    Fun = fun (R, Env) ->
1961		  Opnd = R#ref.opnd,
1962		  R#ref{opnd = Opnd#opnd{ren = Ren1, env = Env}}
1963	  end,
1964    {Rs, Ren1, env__bind_recursive(Ns, Rs, Fun, Env1), S1}.
1965
1966safe_context(Ctxt) ->
1967    case Ctxt of
1968        #app{} ->
1969            value;
1970        _ ->
1971            Ctxt
1972    end.
1973
1974%% Note that the name of a variable encodes its type: a "plain" variable
1975%% or a function variable. The latter kind also contains an arity number
1976%% which should be preserved upon renaming.
1977
1978ref_to_var(#ref{name = Name}) ->
1979    %% If we were maintaining "source-referenced" flags, the annotation
1980    %% `add_ann([#source_ref{loc = L}], E)' should also be done here, to
1981    %% make the algorithm reapplicable. This is however not necessary
1982    %% since there are no destructive variable assignments in Erlang.
1983    c_var(Name).
1984
1985%% Including the effort counter of the call site assures that the cost
1986%% of processing an operand via `visit' is charged to the correct
1987%% counter. In particular, if the effort counter of the call site was
1988%% passive, the operands will also be processed with a passive counter.
1989
1990make_opnd(E, Ren, Env, S) ->
1991    {L, S1} = st__new_opnd_loc(S),
1992    C = st__get_effort(S1),
1993    Opnd = #opnd{expr = E, ren = Ren, env = Env, loc = L, effort = C},
1994    {Opnd, S1}.
1995
1996keep_referenced(Rs, S) ->
1997    [R || R <- Rs, st__get_var_referenced(R#ref.loc, S)].
1998
1999residualize_operands(Opnds, E, S) ->
2000    foldr(fun (Opnd, {E, S}) -> residualize_operand(Opnd, E, S) end,
2001          {E, S}, Opnds).
2002
2003%% This is the only case where an operand expression can be visited in
2004%% `effect' context instead of `value' context.
2005
2006residualize_operand(Opnd, E, S) ->
2007    case st__get_opnd_effect(Opnd#opnd.loc, S) of
2008        true ->
2009            %% The operand has not been visited, so we do that now, but
2010            %% in `effect' context. (Waddell's algorithm does some stuff
2011            %% here to account specially for the operand size, which
2012            %% appears unnecessary.)
2013            {E1, S1} = i(Opnd#opnd.expr, effect, Opnd#opnd.ren,
2014                         Opnd#opnd.env, S),
2015            {make_seq(E1, E), S1};
2016        false ->
2017            {E, S}
2018    end.
2019
2020%% The `visit' function always visits the operand expression in `value'
2021%% context (`residualize_operand' visits an unreferenced operand
2022%% expression in `effect' context when necessary). A new passive size
2023%% counter is used for visiting the operand, the final value of which is
2024%% then cached along with the resulting expression.
2025%%
2026%% Note that the effort counter of the call site, included in the
2027%% operand structure, is not a shared object. Thus, the effort budget is
2028%% actually reused over all occurrences of the operands of a single
2029%% application. This does not appear to be a problem; just a
2030%% modification of the algorithm.
2031
2032visit(Opnd, S) ->
2033    {C, S1} = visit_1(Opnd, S),
2034    {C#cache.expr, S1}.
2035
2036visit_and_count_size(Opnd, S) ->
2037    {C, S1} = visit_1(Opnd, S),
2038    {C#cache.expr, count_size(C#cache.size, S1)}.
2039
2040visit_1(Opnd, S) ->
2041    case st__lookup_opnd_cache(Opnd#opnd.loc, S) of
2042        error ->
2043            %% Use a new, passive, size counter for visiting operands,
2044            %% and use the effort counter of the context of the operand.
2045            %% It turns out that if the latter is active, it must be the
2046            %% same object as the one currently used, and if it is
2047            %% passive, it does not matter if it is the same object as
2048            %% any other counter.
2049	    Effort = Opnd#opnd.effort,
2050	    Active = counter__is_active(Effort),
2051	    S1 = case Active of
2052		     true ->
2053			 S;    % don't change effort counter
2054		     false ->
2055			 st__set_effort(Effort, S)
2056		 end,
2057	    S2 = new_passive_size(get_size_limit(S1), S1),
2058
2059            %% Visit the expression and cache the result, along with the
2060            %% final value of the size counter.
2061            {E, S3} = i(Opnd#opnd.expr, value, Opnd#opnd.ren,
2062                        Opnd#opnd.env, S2),
2063            Size = get_size_value(S3),
2064            C = #cache{expr = E, size = Size},
2065            S4 = revert_size(S, st__set_opnd_cache(Opnd#opnd.loc, C,
2066						   S3)),
2067	    case Active of
2068		true ->
2069		    {C, S4};  % keep using the same effort counter
2070		false ->
2071		    {C, revert_effort(S, S4)}
2072	    end;
2073	{ok, C} ->
2074            {C, S}
2075    end.
2076
2077%% Create a pattern matching template for an expression. A template
2078%% contains only data constructors (including atomic ones) and
2079%% variables, and compound literals are not folded into a single node.
2080%% Each node in the template is annotated with the variable which holds
2081%% the corresponding subexpression; these are new, unique variables not
2082%% existing in the given `Env'. Returns `{Template, Variables, NewEnv}',
2083%% where `Variables' is the list of all variables corresponding to nodes
2084%% in the template *listed in reverse dependency order*, and `NewEnv' is
2085%% `Env' augmented with mappings from the variable names to
2086%% subexpressions of `E' (not #ref{} structures!) rewritten so that no
2087%% computations are duplicated. `Variables' is guaranteed to be nonempty
2088%% - at least the root node will always be bound to a new variable.
2089
2090make_template(E, Env) ->
2091    make_template(E, [], Env).
2092
2093make_template(E, Vs0, Env0) ->
2094    case is_data(E) of
2095	true ->
2096	    {Ts, {Vs1, Env1}} = mapfoldl(
2097				  fun (E, {Vs0, Env0}) ->
2098					  {T, Vs1, Env1} =
2099					      make_template(E, Vs0,
2100							    Env0),
2101					  {T, {Vs1, Env1}}
2102				  end,
2103				  {Vs0, Env0}, data_es(E)),
2104	    T = make_data_skel(data_type(E), Ts),
2105	    E1 = update_data(E, data_type(E),
2106			     [hd(get_ann(T)) || T <- Ts]),
2107	    V = new_var(Env1),
2108	    Env2 = env__bind(var_name(V), E1, Env1),
2109	    {set_ann(T, [V]), [V | Vs1], Env2};
2110	false ->
2111	    case type(E) of
2112		seq ->
2113		    %% For a sequencing, we can rebind the variable used
2114		    %% for the body, and pass on the template as it is.
2115		    {T, Vs1, Env1} = make_template(seq_body(E), Vs0,
2116						   Env0),
2117		    V = var_name(hd(get_ann(T))),
2118		    E1 = update_c_seq(E, seq_arg(E), env__get(V, Env1)),
2119		    Env2 = env__bind(V, E1, Env1),
2120		    {T, Vs1, Env2};
2121		_ ->
2122		    V = new_var(Env0),
2123		    Env1 = env__bind(var_name(V), E, Env0),
2124		    {set_ann(V, [V]), [V | Vs0], Env1}
2125	    end
2126    end.
2127
2128%% Two clauses are equivalent if their bodies are equivalent expressions
2129%% given that the respective pattern variables are local.
2130
2131equivalent_clauses([]) ->
2132    true;
2133equivalent_clauses([C | Cs]) ->
2134    Env = cerl_trees:variables(c_values(clause_pats(C))),
2135    equivalent_clauses_1(clause_body(C), Cs, Env).
2136
2137equivalent_clauses_1(E, [C | Cs], Env) ->
2138    Env1 = cerl_trees:variables(c_values(clause_pats(C))),
2139    case equivalent(E, clause_body(C), ordsets:union(Env, Env1)) of
2140	true ->
2141	    equivalent_clauses_1(E, Cs, Env);
2142	false ->
2143	    false
2144    end;
2145equivalent_clauses_1(_, [], _Env) ->
2146    true.
2147
2148%% Two expressions are equivalent if and only if they yield the same
2149%% value and has the same side effects in the same order. Currently, we
2150%% only accept equality between constructors (constants) and nonlocal
2151%% variables, since this should cover most cases of interest. If a
2152%% variable is locally bound in one expression, it cannot be equivalent
2153%% to one with the same name in the other expression, so we need not
2154%% keep track of two environments.
2155
2156equivalent(E1, E2, Env) ->
2157    case is_data(E1) of
2158        true ->
2159            case is_data(E2) of
2160                true ->
2161                    T1 = {data_type(E1), data_arity(E1)},
2162                    T2 = {data_type(E2), data_arity(E2)},
2163                    %% Note that we must test for exact equality.
2164                    if T1 =:= T2 ->
2165                            equivalent_lists(data_es(E1), data_es(E2),
2166					     Env);
2167                       true ->
2168                            false
2169                    end;
2170                false ->
2171                    false
2172            end;
2173        false ->
2174	    case type(E1) of
2175		var ->
2176		    case is_c_var(E2) of
2177			true ->
2178			    N1 = var_name(E1),
2179			    N2 = var_name(E2),
2180			    if N1 =:= N2 ->
2181				    not ordsets:is_element(N1, Env);
2182			       true ->
2183				    false
2184			    end;
2185			false ->
2186			    false
2187		    end;
2188		_ ->
2189		    %% Other constructs are not being considered.
2190		    false
2191	    end
2192    end.
2193
2194equivalent_lists([E1 | Es1], [E2 | Es2], Env) ->
2195    equivalent(E1, E2, Env) and equivalent_lists(Es1, Es2, Env);
2196equivalent_lists([], [], _) ->
2197    true;
2198equivalent_lists(_, _, _) ->
2199    false.
2200
2201%% Return `false' or `{true, EffectExpr, ValueExpr}'. The environment is
2202%% passed for new-variable generation.
2203
2204reduce_bif_call(M, F, As, Env) ->
2205    reduce_bif_call_1(M, F, length(As), As, Env).
2206
2207reduce_bif_call_1(erlang, element, 2, [X, Y], _Env) ->
2208    case is_c_int(X) and is_c_tuple(Y) of
2209	true ->
2210	    %% We are free to change the relative evaluation order of
2211	    %% the elements, so lifting out a particular element is OK.
2212	    T = list_to_tuple(tuple_es(Y)),
2213	    N = int_val(X),
2214	    if integer(N), N > 0, N =< size(T) ->
2215		    E = element(N, T),
2216		    Es = tuple_to_list(setelement(N, T, void())),
2217		    {true, make_seq(c_tuple(Es), E)};
2218	       true ->
2219		    false
2220	    end;
2221	false ->
2222	    false
2223    end;
2224reduce_bif_call_1(erlang, hd, 1, [X], _Env) ->
2225    case is_c_cons(X) of
2226	true ->
2227	    %% Cf. `element/2' above.
2228	    {true, make_seq(cons_tl(X), cons_hd(X))};
2229	false ->
2230	    false
2231    end;
2232reduce_bif_call_1(erlang, length, 1, [X], _Env) ->
2233    case is_c_list(X) of
2234	true ->
2235	    %% Cf. `erlang:size/1' below.
2236	    {true, make_seq(X, c_int(list_length(X)))};
2237	false ->
2238	    false
2239    end;
2240reduce_bif_call_1(erlang, list_to_tuple, 1, [X], _Env) ->
2241    case is_c_list(X) of
2242	true ->
2243	    %% This does not actually preserve all the evaluation order
2244	    %% constraints of the list, but I don't imagine that it will
2245	    %% be a problem.
2246	    {true, c_tuple(list_elements(X))};
2247	false ->
2248	    false
2249    end;
2250reduce_bif_call_1(erlang, setelement, 3, [X, Y, Z], Env) ->
2251    case is_c_int(X) and is_c_tuple(Y) of
2252	true ->
2253	    %% Here, unless `Z' is a simple expression, we must bind it
2254	    %% to a new variable, because in that case, `Z' must be
2255	    %% evaluated before any part of `Y'.
2256	    T = list_to_tuple(tuple_es(Y)),
2257	    N = int_val(X),
2258	    if integer(N), N > 0, N =< size(T) ->
2259		    E = element(N, T),
2260		    case is_simple(Z) of
2261			true ->
2262			    Es = tuple_to_list(setelement(N, T, Z)),
2263			    {true, make_seq(E, c_tuple(Es))};
2264			false ->
2265			    V = new_var(Env),
2266			    Es = tuple_to_list(setelement(N, T, V)),
2267			    E1 = make_seq(E, c_tuple(Es)),
2268			    {true, c_let([V], Z, E1)}
2269		    end;
2270	       true ->
2271		    false
2272	    end;
2273	false ->
2274	    false
2275    end;
2276reduce_bif_call_1(erlang, size, 1, [X], _Env) ->
2277    case is_c_tuple(X) of
2278	true ->
2279	    %% Just evaluate the tuple for effect and use the size (the
2280	    %% arity) as the result.
2281	    {true, make_seq(X, c_int(tuple_arity(X)))};
2282	false ->
2283	    false
2284    end;
2285reduce_bif_call_1(erlang, tl, 1, [X], _Env) ->
2286    case is_c_cons(X) of
2287	true ->
2288	    %% Cf. `element/2' above.
2289	    {true, make_seq(cons_hd(X), cons_tl(X))};
2290	false ->
2291	    false
2292    end;
2293reduce_bif_call_1(erlang, tuple_to_list, 1, [X], _Env) ->
2294    case is_c_tuple(X) of
2295	true ->
2296	    %% This actually introduces slightly stronger constraints on
2297	    %% the evaluation order of the subexpressions.
2298	    {true, make_list(tuple_es(X))};
2299	false ->
2300	    false
2301    end;
2302reduce_bif_call_1(_M, _F, _A, _As, _Env) ->
2303    false.
2304
2305effort_is_active(S) ->
2306    counter__is_active(st__get_effort(S)).
2307
2308size_is_active(S) ->
2309    counter__is_active(st__get_size(S)).
2310
2311get_effort_limit(S) ->
2312    counter__limit(st__get_effort(S)).
2313
2314new_active_effort(Limit, S) ->
2315    st__set_effort(counter__new_active(Limit), S).
2316
2317revert_effort(S1, S2) ->
2318    st__set_effort(st__get_effort(S1), S2).
2319
2320new_active_size(Limit, S) ->
2321    st__set_size(counter__new_active(Limit), S).
2322
2323new_passive_size(Limit, S) ->
2324    st__set_size(counter__new_passive(Limit), S).
2325
2326revert_size(S1, S2) ->
2327    st__set_size(st__get_size(S1), S2).
2328
2329count_effort(N, S) ->
2330    C = st__get_effort(S),
2331    C1 = counter__add(N, C, effort, S),
2332    case debug_counters() of
2333        true ->
2334            case counter__is_active(C1) of
2335                true ->
2336                    V = counter__value(C1),
2337                    case V > get(counter_effort_max) of
2338                        true ->
2339                            put(counter_effort_max, V);
2340                        false ->
2341                            ok
2342                    end;
2343                false ->
2344                    ok
2345            end;
2346        _ ->
2347            ok
2348    end,
2349    st__set_effort(C1, S).
2350
2351count_size(N, S) ->
2352    C = st__get_size(S),
2353    C1 = counter__add(N, C, size, S),
2354    case debug_counters() of
2355        true ->
2356            case counter__is_active(C1) of
2357                true ->
2358                    V = counter__value(C1),
2359                    case V > get(counter_size_max) of
2360                        true ->
2361                            put(counter_size_max, V);
2362                        false ->
2363                            ok
2364                    end;
2365                false ->
2366                    ok
2367            end;
2368        _ ->
2369            ok
2370    end,
2371    st__set_size(C1, S).
2372
2373get_size_value(S) ->
2374    counter__value(st__get_size(S)).
2375
2376get_size_limit(S) ->
2377    counter__limit(st__get_size(S)).
2378
2379kill_id_anns([{'id',_} | As]) ->
2380    kill_id_anns(As);
2381kill_id_anns([A | As]) ->
2382    [A | kill_id_anns(As)];
2383kill_id_anns([]) ->
2384    [].
2385
2386
2387%% =====================================================================
2388%% General utilities
2389
2390max(X, Y) when X > Y -> X;
2391max(_, Y) -> Y.
2392
2393%% The atom `ok', is widely used in Erlang for "void" values.
2394
2395void() -> abstract(ok).
2396
2397is_simple(E) ->
2398    case type(E) of
2399	literal -> true;
2400	var -> true;
2401	'fun' -> true;
2402	_ -> false
2403    end.
2404
2405get_components(N, E) ->
2406    case type(E) of
2407	values ->
2408	    Es = values_es(E),
2409	    if length(Es) == N ->
2410		    {true, Es};
2411	       true ->
2412		    false
2413	    end;
2414	_ when N == 1 ->
2415	    {true, [E]};
2416	_ ->
2417	    false
2418    end.
2419
2420all_static([E | Es]) ->
2421    case is_literal(result(E)) of
2422	true ->
2423	    all_static(Es);
2424	false ->
2425	    false
2426    end;
2427all_static([]) ->
2428    true.
2429
2430set_clause_bodies([C | Cs], B) ->
2431    [update_c_clause(C, clause_pats(C), clause_guard(C), B)
2432     | set_clause_bodies(Cs, B)];
2433set_clause_bodies([], _) ->
2434    [].
2435
2436filename([C | T]) when integer(C), C > 0, C =< 255 ->
2437    [C | filename(T)];
2438filename([H|T]) ->
2439    filename(H) ++ filename(T);
2440filename([]) ->
2441    [];
2442filename(N) when atom(N) ->
2443    atom_to_list(N);
2444filename(N) ->
2445    report_error("bad filename: `~P'.", [N, 25]),
2446    exit(error).
2447
2448
2449%% =====================================================================
2450%% Abstract datatype: renaming()
2451
2452ren__identity() ->
2453    dict:new().
2454
2455ren__add(X, Y, Ren) ->
2456    dict:store(X, Y, Ren).
2457
2458ren__map(X, Ren) ->
2459    case dict:find(X, Ren) of
2460	{ok, Y} ->
2461	    Y;
2462	error ->
2463	    X
2464    end.
2465
2466ren__add_identity(X, Ren) ->
2467    dict:erase(X, Ren).
2468
2469
2470%% =====================================================================
2471%% Abstract datatype: environment()
2472
2473env__empty() ->
2474    rec_env:empty().
2475
2476env__bind(Key, Val, Env) ->
2477    rec_env:bind(Key, Val, Env).
2478
2479%% `Es' should have type `[{Key, Val}]', and `Fun' should have type
2480%% `(Val, Env) -> T', mapping a value together with the recursive
2481%% environment itself to some term `T' to be returned when the entry is
2482%% looked up.
2483
2484env__bind_recursive(Ks, Vs, F, Env) ->
2485    rec_env:bind_recursive(Ks, Vs, F, Env).
2486
2487env__lookup(Key, Env) ->
2488    rec_env:lookup(Key, Env).
2489
2490env__get(Key, Env) ->
2491    rec_env:get(Key, Env).
2492
2493env__is_defined(Key, Env) ->
2494    rec_env:is_defined(Key, Env).
2495
2496env__new_vname(Env) ->
2497    rec_env:new_key(Env).
2498
2499env__new_fname(A, N, Env) ->
2500    rec_env:new_key(fun (X) ->
2501			S = integer_to_list(X),
2502			{list_to_atom(atom_to_list(A) ++ "_" ++ S),
2503			 N}
2504		    end, Env).
2505
2506
2507%% =====================================================================
2508%% Abstract datatype: state()
2509
2510-record(state, {free,		% next free location
2511		size,		% size counter
2512		effort,		% effort counter
2513		cache,		% operand expression cache
2514		var_flags,	% flags for variables (#ref-structures)
2515		opnd_flags,	% flags for operands
2516		app_flags}).	% flags for #app-structures
2517
2518%% Note that we do not have a `var_assigned' flag, since there is no
2519%% destructive assignment in Erlang. In the original algorithm, the
2520%% "residual-referenced"-flags of the previous inlining pass (or
2521%% initialization pass) are used as the "source-referenced"-flags for
2522%% the subsequent pass. The latter may then be used as a safe
2523%% approximation whenever we need to base a decision on whether or not a
2524%% particular variable or function variable could be referenced in the
2525%% program being generated, and computation of the new
2526%% "residual-referenced" flag for that variable is not yet finished. In
2527%% the present algorithm, this can only happen in the presence of
2528%% variable assignments, which do not exist in Erlang. Therefore, we do
2529%% not keep "source-referenced" flags for residual-code references in
2530%% our implementation.
2531%%
2532%% The "inner-pending" flag tells us whether we are already in the
2533%% process of visiting a particular operand, and the "outer-pending"
2534%% flag whether we are in the process of inlining a propagated
2535%% functional value. The "pending flags" are really counters limiting
2536%% the number of times an operand may be inlined recursively, causing
2537%% loop unrolling; however, unrolling more than one iteration does not
2538%% work offhand in the present implementation. (TODO: find out why.)
2539%% Note that the initial value must be greater than zero in order for
2540%% any inlining at all to be done.
2541
2542%% Flags are stored in ETS-tables, one table for each class. The second
2543%% element in each stored tuple is the key (the "label").
2544
2545-record(var_flags, {lab, referenced = false}).
2546-record(opnd_flags, {lab, inner_pending = 1, outer_pending = 1,
2547		     effect = false}).
2548-record(app_flags, {lab, inlined = false}).
2549
2550st__new(Effort, Size) ->
2551    #state{free = 0,
2552	   size = counter__new_passive(Size),
2553           effort = counter__new_passive(Effort),
2554	   cache = dict:new(),
2555	   var_flags = ets:new(var, [set, private, {keypos, 2}]),
2556	   opnd_flags = ets:new(opnd, [set, private, {keypos, 2}]),
2557	   app_flags = ets:new(app, [set, private, {keypos, 2}])}.
2558
2559st__new_loc(S) ->
2560    N = S#state.free,
2561    {N, S#state{free = N + 1}}.
2562
2563st__get_effort(S) ->
2564    S#state.effort.
2565
2566st__set_effort(C, S) ->
2567    S#state{effort = C}.
2568
2569st__get_size(S) ->
2570    S#state.size.
2571
2572st__set_size(C, S) ->
2573    S#state{size = C}.
2574
2575st__set_var_referenced(L, S) ->
2576    T = S#state.var_flags,
2577    [F] = ets:lookup(T, L),
2578    ets:insert(T, F#var_flags{referenced = true}),
2579    S.
2580
2581st__get_var_referenced(L, S) ->
2582    ets:lookup_element(S#state.var_flags, L, #var_flags.referenced).
2583
2584st__lookup_opnd_cache(L, S) ->
2585    dict:find(L, S#state.cache).
2586
2587%% Note that setting the cache should only be done once.
2588
2589st__set_opnd_cache(L, C, S) ->
2590    S#state{cache = dict:store(L, C, S#state.cache)}.
2591
2592st__set_opnd_effect(L, S) ->
2593    T = S#state.opnd_flags,
2594    [F] = ets:lookup(T, L),
2595    ets:insert(T, F#opnd_flags{effect = true}),
2596    S.
2597
2598st__get_opnd_effect(L, S) ->
2599    ets:lookup_element(S#state.opnd_flags, L, #opnd_flags.effect).
2600
2601st__set_app_inlined(L, S) ->
2602    T = S#state.app_flags,
2603    [F] = ets:lookup(T, L),
2604    ets:insert(T, F#app_flags{inlined = true}),
2605    S.
2606
2607st__clear_app_inlined(L, S) ->
2608    T = S#state.app_flags,
2609    [F] = ets:lookup(T, L),
2610    ets:insert(T, F#app_flags{inlined = false}),
2611    S.
2612
2613st__get_app_inlined(L, S) ->
2614    ets:lookup_element(S#state.app_flags, L, #app_flags.inlined).
2615
2616%% The pending-flags are initialized by `st__new_opnd_loc' below.
2617
2618st__test_inner_pending(L, S) ->
2619    T = S#state.opnd_flags,
2620    P = ets:lookup_element(T, L, #opnd_flags.inner_pending),
2621    P =< 0.
2622
2623st__mark_inner_pending(L, S) ->
2624    ets:update_counter(S#state.opnd_flags, L,
2625		       {#opnd_flags.inner_pending, -1}),
2626    S.
2627
2628st__clear_inner_pending(L, S) ->
2629    ets:update_counter(S#state.opnd_flags, L,
2630		       {#opnd_flags.inner_pending, 1}),
2631    S.
2632
2633st__test_outer_pending(L, S) ->
2634    T = S#state.opnd_flags,
2635    P = ets:lookup_element(T, L, #opnd_flags.outer_pending),
2636    P =< 0.
2637
2638st__mark_outer_pending(L, S) ->
2639    ets:update_counter(S#state.opnd_flags, L,
2640		       {#opnd_flags.outer_pending, -1}),
2641    S.
2642
2643st__clear_outer_pending(L, S) ->
2644    ets:update_counter(S#state.opnd_flags, L,
2645		       {#opnd_flags.outer_pending, 1}),
2646    S.
2647
2648st__new_app_loc(S) ->
2649    V = {L, _S1} = st__new_loc(S),
2650    ets:insert(S#state.app_flags, #app_flags{lab = L}),
2651    V.
2652
2653st__new_ref_loc(S) ->
2654    V = {L, _S1} = st__new_loc(S),
2655    ets:insert(S#state.var_flags, #var_flags{lab = L}),
2656    V.
2657
2658st__new_opnd_loc(S) ->
2659    V = {L, _S1} = st__new_loc(S),
2660    ets:insert(S#state.opnd_flags, #opnd_flags{lab = L}),
2661    V.
2662
2663
2664%% =====================================================================
2665%% Abstract datatype: counter()
2666%%
2667%% `counter__add' throws `{counter_exceeded, Type, Data}' if the
2668%% resulting counter value would exceed the limit for the counter in
2669%% question (`Type' and `Data' are given by the user).
2670
2671-record(counter, {active, value, limit}).
2672
2673counter__new_passive(Limit) when Limit > 0 ->
2674    {0, Limit}.
2675
2676counter__new_active(Limit) when Limit > 0 ->
2677    {Limit, Limit}.
2678
2679%% Active counters have values > 0 internally; passive counters start at
2680%% zero. The 'limit' field is only accessed by the 'counter__limit'
2681%% function.
2682
2683counter__is_active({C, _}) ->
2684    C > 0.
2685
2686counter__limit({_, L}) ->
2687    L.
2688
2689counter__value({N, L}) ->
2690    if N > 0 ->
2691	    L - N;
2692       true ->
2693            -N
2694    end.
2695
2696counter__add(N, {V, L}, Type, Data) ->
2697    N1 = V - N,
2698    if V > 0, N1 =< 0 ->
2699	    case debug_counters() of
2700		true ->
2701		    case Type of
2702			effort ->
2703			    put(counter_effort_triggers,
2704				get(counter_effort_triggers) + 1);
2705			size ->
2706			    put(counter_size_triggers,
2707				get(counter_size_triggers) + 1)
2708		    end;
2709		_ ->
2710		    ok
2711	    end,
2712	    throw({counter_exceeded, Type, Data});
2713       true ->
2714	    {N1, L}
2715    end.
2716
2717
2718%% =====================================================================
2719%% Reporting
2720
2721% report_internal_error(S) ->
2722%     report_internal_error(S, []).
2723
2724report_internal_error(S, Vs) ->
2725    report_error("internal error: " ++ S, Vs).
2726
2727report_error(D) ->
2728    report_error(D, []).
2729
2730report_error({F, L, D}, Vs) ->
2731    report({F, L, {error, D}}, Vs);
2732report_error(D, Vs) ->
2733    report({error, D}, Vs).
2734
2735report_warning(D) ->
2736    report_warning(D, []).
2737
2738report_warning({F, L, D}, Vs) ->
2739    report({F, L, {warning, D}}, Vs);
2740report_warning(D, Vs) ->
2741    report({warning, D}, Vs).
2742
2743report(D, Vs) ->
2744    io:put_chars(format(D, Vs)).
2745
2746format({error, D}, Vs) ->
2747    ["error: ", format(D, Vs)];
2748format({warning, D}, Vs) ->
2749    ["warning: ", format(D, Vs)];
2750format({"", L, D}, Vs) when integer(L), L > 0 ->
2751    [io_lib:fwrite("~w: ", [L]), format(D, Vs)];
2752format({"", _L, D}, Vs) ->
2753    format(D, Vs);
2754format({F, L, D}, Vs) when integer(L), L > 0 ->
2755    [io_lib:fwrite("~s:~w: ", [filename(F), L]), format(D, Vs)];
2756format({F, _L, D}, Vs) ->
2757    [io_lib:fwrite("~s: ", [filename(F)]), format(D, Vs)];
2758format(S, Vs) when list(S) ->
2759    [io_lib:fwrite(S, Vs), $\n].
2760
2761
2762%% =====================================================================
2763