1%% Licensed under the Apache License, Version 2.0 (the "License");
2%% you may not use this file except in compliance with the License.
3%% You may obtain a copy of the License at
4%%
5%%     http://www.apache.org/licenses/LICENSE-2.0
6%%
7%% Unless required by applicable law or agreed to in writing, software
8%% distributed under the License is distributed on an "AS IS" BASIS,
9%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10%% See the License for the specific language governing permissions and
11%% limitations under the License.
12%%
13%% @copyright 1999-2004 Richard Carlsson
14%% @author Richard Carlsson <carlsson.richard@gmail.com>
15%% @doc Abstract environments, supporting self-referential bindings and
16%% automatic new-key generation.
17
18%% The current implementation is based on Erlang standard library maps.
19
20%%% -define(DEBUG, true).
21
22-module(rec_env).
23
24-export([bind/3, bind_list/3, bind_recursive/4, delete/2, empty/0,
25	 get/2, is_defined/2, is_empty/1, keys/1, lookup/2, new_key/1,
26	 new_key/2, new_keys/2, new_keys/3, size/1, to_list/1]).
27
28-export_type([environment/0]).
29
30-ifdef(DEBUG).
31-export([test/1, test_custom/1, test_custom/2]).
32-endif.
33
34-ifdef(DEBUG).
35%% Code for testing:
36%%@hidden
37test(N) ->
38    test_0(integer, N).
39
40%%@hidden
41test_custom(N) ->
42    F = fun (X) -> list_to_atom("X"++integer_to_list(X)) end,
43    test_custom(F, N).
44
45%%@hidden
46test_custom(F, N) ->
47    test_0({custom, F}, N).
48
49test_0(Type, N) ->
50    put(new_key_calls, 0),
51    put(new_key_retries, 0),
52    put(new_key_max, 0),
53    Env = test_1(Type, N, empty()),
54    io:fwrite("\ncalls: ~w.\n", [get(new_key_calls)]),
55    io:fwrite("\nretries: ~w.\n", [get(new_key_retries)]),
56    io:fwrite("\nmax: ~w.\n", [get(new_key_max)]),
57    maps:to_list(element(1,Env)).
58
59test_1(integer = Type, N, Env) when is_integer(N), N > 0 ->
60    Key = new_key(Env),
61    test_1(Type, N - 1, bind(Key, value, Env));
62test_1({custom, F} = Type, N, Env) when is_integer(N), N > 0 ->
63    Key = new_key(F, Env),
64    test_1(Type, N - 1, bind(Key, value, Env));
65test_1(_,0, Env) ->
66    Env.
67-endif.
68%%@clear
69
70
71%% Representation:
72%%
73%%	environment() = [Mapping]
74%%
75%%      Mapping = {map, map()} | {rec, map(), map()}
76%%
77%% An empty environment is a list containing a single `{map, map()}'
78%% element - empty lists are not valid environments. To find a key in an
79%% environment, it is searched for in each mapping in the list, in
80%% order, until it the key is found in some mapping, or the end of the
81%% list is reached. In a 'rec' mapping, we keep the original map
82%% together with a version where entries may have been deleted - this
83%% makes it possible to garbage collect the entire 'rec' mapping when
84%% all its entries are unused (for example, by being shadowed by later
85%% definitions).
86
87
88%% =====================================================================
89%% @type environment(). An abstract environment.
90
91-type mapping()     :: {'map', map()} | {'rec', map(), map()}.
92-type environment() :: [mapping(),...].
93
94%% =====================================================================
95%% @spec empty() -> environment()
96%%
97%% @doc Returns an empty environment.
98
99-spec empty() -> environment().
100
101empty() ->
102    [{map, #{}}].
103
104
105%% =====================================================================
106%% @spec is_empty(Env::environment()) -> boolean()
107%%
108%% @doc Returns <code>true</code> if the environment is empty, otherwise
109%% <code>false</code>.
110
111-spec is_empty(environment()) -> boolean().
112
113is_empty([{map, Map} | Es]) ->
114    N = map_size(Map),
115    if N =/= 0 -> false;
116       Es =:= [] -> true;
117       true -> is_empty(Es)
118    end;
119is_empty([{rec, Map, _} | Es]) ->
120    N = map_size(Map),
121    if N =/= 0 -> false;
122       Es =:= [] -> true;
123       true -> is_empty(Es)
124    end.
125
126
127%% =====================================================================
128%% @spec size(Env::environment()) -> integer()
129%%
130%% @doc Returns the number of entries in an environment.
131
132%% (The name 'size' cannot be used in local calls, since there exists a
133%% built-in function with the same name.)
134
135-spec size(environment()) -> non_neg_integer().
136
137size(Env) ->
138    env_size(Env).
139
140env_size([{map, Map}]) ->
141    map_size(Map);
142env_size([{map, Map} | Env]) ->
143    map_size(Map) + env_size(Env);
144env_size([{rec, Map, _Map0} | Env]) ->
145    map_size(Map) + env_size(Env).
146
147
148%% =====================================================================
149%% @spec is_defined(Key, Env) -> boolean()
150%%
151%%	Key = term()
152%%	Env = environment()
153%%
154%% @doc Returns <code>true</code> if <code>Key</code> is bound in the
155%% environment, otherwise <code>false</code>.
156
157-spec is_defined(term(), environment()) -> boolean().
158
159is_defined(Key, [{map, Map} | Env]) ->
160    case maps:is_key(Key, Map) of
161	true ->
162	    true;
163	false when Env =:= [] ->
164	    false;
165	false ->
166	    is_defined(Key, Env)
167    end;
168is_defined(Key, [{rec, Map, _Map0} | Env]) ->
169    maps:is_key(Key, Map) orelse is_defined(Key, Env).
170
171
172%% =====================================================================
173%% @spec keys(Env::environment()) -> [term()]
174%%
175%% @doc Returns the ordered list of all keys in the environment.
176
177-spec keys(environment()) -> [term()].
178
179keys(Env) ->
180    lists:sort(keys(Env, [])).
181
182keys([{map, Map}], S) ->
183    maps:keys(Map) ++ S;
184keys([{map, Map} | Env], S) ->
185    keys(Env, maps:keys(Map) ++ S);
186keys([{rec, Map, _Map0} | Env], S) ->
187    keys(Env, maps:keys(Map) ++ S).
188
189
190%% =====================================================================
191%% @spec to_list(Env) -> [{Key, Value}]
192%%
193%%	Env = environment()
194%%	Key = term()
195%%	Value = term()
196%%
197%% @doc Returns an ordered list of <code>{Key, Value}</code> pairs for
198%% all keys in <code>Env</code>. <code>Value</code> is the same as that
199%% returned by {@link get/2}.
200
201-spec to_list(environment()) -> [{term(), term()}].
202
203to_list(Env) ->
204    lists:sort(to_list(Env, [])).
205
206to_list([{map, Map}], S) ->
207    maps:to_list(Map) ++ S;
208to_list([{map, Map} | Env], S) ->
209    to_list(Env, maps:to_list(Map) ++ S);
210to_list([{rec, Map, _Map0} | Env], S) ->
211    to_list(Env, maps:to_list(Map) ++ S).
212
213
214%% =====================================================================
215%% @spec bind(Key, Value, Env) -> environment()
216%%
217%%	Key = term()
218%%	Value = term()
219%%	Env = environment()
220%%
221%% @doc Make a nonrecursive entry. This binds <code>Key</code> to
222%% <code>Value</code>. If the key already existed in the environment,
223%% the old entry is replaced.
224
225%% Note that deletion is done to free old bindings so they can be
226%% garbage collected.
227
228-spec bind(term(), term(), environment()) -> environment().
229
230bind(Key, Value, [{map, Map}]) ->
231    [{map, maps:put(Key, Value, Map)}];
232bind(Key, Value, [{map, Map} | Env]) ->
233    [{map, maps:put(Key, Value, Map)} | delete_any(Key, Env)];
234bind(Key, Value, Env) ->
235    [{map, maps:put(Key, Value, #{})} | delete_any(Key, Env)].
236
237
238%% =====================================================================
239%% @spec bind_list(Keys, Values, Env) -> environment()
240%%
241%%	Keys = [term()]
242%%	Values = [term()]
243%%	Env = environment()
244%%
245%% @doc Make N nonrecursive entries. This binds each key in
246%% <code>Keys</code> to the corresponding value in
247%% <code>Values</code>. If some key already existed in the environment,
248%% the previous entry is replaced. If <code>Keys</code> does not have
249%% the same length as <code>Values</code>, an exception is generated.
250
251-spec bind_list([term()], [term()], environment()) -> environment().
252
253bind_list(Ks, Vs, [{map, Map}]) ->
254    [{map, store_list(Ks, Vs, Map)}];
255bind_list(Ks, Vs, [{map, Map} | Env]) ->
256    [{map, store_list(Ks, Vs, Map)} | delete_list(Ks, Env)];
257bind_list(Ks, Vs, Env) ->
258    [{map, store_list(Ks, Vs, #{})} | delete_list(Ks, Env)].
259
260store_list([K | Ks], [V | Vs], Map) ->
261    store_list(Ks, Vs, maps:put(K, V, Map));
262store_list([], _, Map) ->
263    Map.
264
265delete_list([K | Ks], Env) ->
266    delete_list(Ks, delete_any(K, Env));
267delete_list([], Env) ->
268    Env.
269
270%% By not calling `delete' unless we have to, we avoid unnecessary
271%% rewriting of the data.
272
273delete_any(Key, Env) ->
274    case is_defined(Key, Env) of
275	true ->
276	    delete(Key, Env);
277	false ->
278	    Env
279    end.
280
281%% =====================================================================
282%% @spec delete(Key, Env) -> environment()
283%%
284%%	Key = term()
285%%	Env = environment()
286%%
287%% @doc Delete an entry. This removes <code>Key</code> from the
288%% environment.
289
290-spec delete(term(), environment()) -> environment().
291
292delete(Key, [{map, Map} = E | Env]) ->
293    case maps:take(Key, Map) of
294	{_, Map1} ->
295	    [{map, Map1} | Env];
296	error ->
297	    delete_1(Key, Env, E)
298    end;
299delete(Key, [{rec, Map, Map0} = E | Env]) ->
300    case maps:take(Key, Map) of
301	{_, Map1} when map_size(Map1) =:= 0 ->
302	    Env; % the whole {rec,...} is now garbage
303	%% The Map0 component must be preserved as it is until all
304	%% keys in Map have been deleted.
305	{_, Map1} ->
306	    [{rec, Map1, Map0} | Env];
307	error ->
308	    [E | delete(Key, Env)]
309    end.
310
311%% This is just like above, except we pass on the preceding 'map'
312%% mapping in the list to enable merging when removing 'rec' mappings.
313
314delete_1(Key, [{rec, Map, Map0} = E | Env], E1) ->
315    case maps:take(Key, Map) of
316	{_, Map1} when map_size(Map1) =:= 0 ->
317	    concat(E1, Env);
318	{_, Map1} ->
319	    [E1, {rec, Map1, Map0} | Env];
320	error ->
321	    [E1, E | delete(Key, Env)]
322    end.
323
324concat({map, M1}, [{map, M2} | Env]) ->
325    [maps:merge(M2, M1) | Env];
326concat(E1, Env) ->
327    [E1 | Env].
328
329
330%% =====================================================================
331%% @spec bind_recursive(Keys, Values, Fun, Env) -> NewEnv
332%%
333%%	Keys = [term()]
334%%	Values = [term()]
335%%	Fun = (Value, Env) -> term()
336%%	Env = environment()
337%%	NewEnv = environment()
338%%
339%% @doc Make N recursive entries. This binds each key in
340%% <code>Keys</code> to the value of <code>Fun(Value, NewEnv)</code> for
341%% the corresponding <code>Value</code>. If <code>Keys</code> does not
342%% have the same length as <code>Values</code>, an exception is
343%% generated. If some key already existed in the environment, the old
344%% entry is replaced.
345%%
346%% <p>Note: the function <code>Fun</code> is evaluated each time one of
347%% the stored keys is looked up, but only then.</p>
348%%
349%% <p>Examples:
350%%<pre>
351%%    NewEnv = bind_recursive([foo, bar], [1, 2],
352%%	                      fun (V, E) -> V end,
353%%	                      Env)</pre>
354%%
355%% This does nothing interesting; <code>get(foo, NewEnv)</code> yields
356%% <code>1</code> and <code>get(bar, NewEnv)</code> yields
357%% <code>2</code>, but there is more overhead than if the {@link
358%% bind_list/3} function had been used.
359%%
360%% <pre>
361%%    NewEnv = bind_recursive([foo, bar], [1, 2],
362%%                            fun (V, E) -> {V, E} end,
363%%                            Env)</pre>
364%%
365%% Here, however, <code>get(foo, NewEnv)</code> will yield <code>{1,
366%% NewEnv}</code> and <code>get(bar, NewEnv)</code> will yield <code>{2,
367%% NewEnv}</code>, i.e., the environment <code>NewEnv</code> contains
368%% recursive bindings.</p>
369
370-spec bind_recursive([term()], [term()],
371		     fun((term(), environment()) -> term()),
372		     environment()) -> environment().
373
374bind_recursive([], [], _, Env) ->
375    Env;
376bind_recursive(Ks, Vs, F, Env) ->
377    F1 = fun (V) ->
378		 fun (Map) -> F(V, [{rec, Map, Map} | Env]) end
379	 end,
380    Map = bind_recursive_1(Ks, Vs, F1, #{}),
381    [{rec, Map, Map} | Env].
382
383bind_recursive_1([K | Ks], [V | Vs], F, Map) ->
384    bind_recursive_1(Ks, Vs, F, maps:put(K, F(V), Map));
385bind_recursive_1([], [], _, Map) ->
386    Map.
387
388
389%% =====================================================================
390%% @spec lookup(Key, Env) -> error | {ok, Value}
391%%
392%%	Key = term()
393%%	Env = environment()
394%%	Value = term()
395%%
396%% @doc Returns <code>{ok, Value}</code> if <code>Key</code> is bound to
397%% <code>Value</code> in <code>Env</code>, and <code>error</code>
398%% otherwise.
399
400-spec lookup(term(), environment()) -> 'error' | {'ok', term()}.
401
402lookup(Key, [{map, Map} | Env]) ->
403    case maps:find(Key, Map) of
404	{ok, _}=Value ->
405	    Value;
406	error when Env =:= [] ->
407	    error;
408	error ->
409	    lookup(Key, Env)
410    end;
411lookup(Key, [{rec, Map, Map0} | Env]) ->
412    case maps:find(Key, Map) of
413	{ok, F} ->
414	    {ok, F(Map0)};
415	error ->
416	    lookup(Key, Env)
417    end.
418
419
420%% =====================================================================
421%% @spec get(Key, Env) -> Value
422%%
423%%	Key = term()
424%%	Env = environment()
425%%	Value = term()
426%%
427%% @doc Returns the value that <code>Key</code> is bound to in
428%% <code>Env</code>. Throws <code>{undefined, Key}</code> if the key
429%% does not exist in <code>Env</code>.
430
431-spec get(term(), environment()) -> term().
432
433get(Key, Env) ->
434    case lookup(Key, Env) of
435	{ok, Value} -> Value;
436	error -> throw({undefined, Key})
437    end.
438
439
440%% =====================================================================
441%% The key-generating algorithm could possibly be further improved. The
442%% important thing to keep in mind is, that when we need a new key, we
443%% are generally in mid-traversal of a syntax tree, and existing names
444%% in the tree may be closely grouped and evenly distributed or even
445%% forming a compact range (often having been generated by a "gensym",
446%% or by this very algorithm itself). This means that if we generate an
447%% identifier whose value is too close to those already seen (i.e.,
448%% which are in the environment), it is very probable that we will
449%% shadow a not-yet-seen identifier further down in the tree, the result
450%% being that we induce another later renaming, and end up renaming most
451%% of the identifiers, completely contrary to our intention. We need to
452%% generate new identifiers in a way that avoids such systematic
453%% collisions.
454%%
455%% One way of getting a new key to try when the previous attempt failed
456%% is of course to e.g. add one to the last tried value. However, in
457%% general it's a bad idea to try adjacent identifiers: the percentage
458%% of retries will typically increase a lot, so you may lose big on the
459%% extra lookups while gaining only a little from the quicker
460%% computation.
461%%
462%% We want an initial range that is large enough for most typical cases.
463%% If we start with, say, a range of 10, we might quickly use up most of
464%% the values in the range 1-10 (or 1-100) for new top-level variables -
465%% but as we start traversing the syntax tree, it is quite likely that
466%% exactly those variables will be encountered again (this depends on
467%% how the names in the tree were created), and will then need to be
468%% renamed. If we instead begin with a larger range, it is less likely
469%% that any top-level names that we introduce will shadow names that we
470%% will find in the tree. Of course we cannot know how large is large
471%% enough: for any initial range, there is some syntax tree that uses
472%% all the values in that range, and thus any top-level names introduced
473%% will shadow names in the tree. The point is to avoid this happening
474%% all the time - a range of about 1000 seems enough for most programs.
475%%
476%% The following values have been shown to work well:
477
478-define(MINIMUM_RANGE, 1000).
479-define(START_RANGE_FACTOR, 50).
480-define(MAX_RETRIES, 2).      % retries before enlarging range
481-define(ENLARGE_FACTOR, 10).  % range enlargment factor
482
483-ifdef(DEBUG).
484%% If you want to use these process dictionary counters, make sure to
485%% initialise them to zero before you call any of the key-generating
486%% functions.
487%%
488%%	new_key_calls		total number of calls
489%%	new_key_retries		failed key generation attempts
490%%	new_key_max		maximum generated integer value
491%%
492-define(measure_calls(),
493	put(new_key_calls, 1 + get(new_key_calls))).
494-define(measure_max_key(N),
495	case N > get(new_key_max) of
496	    true ->
497		put(new_key_max, N);
498	    false ->
499		ok
500	end).
501-define(measure_retries(N),
502	put(new_key_retries, get(new_key_retries) + N)).
503-else.
504-define(measure_calls(), ok).
505-define(measure_max_key(N), ok).
506-define(measure_retries(N), ok).
507-endif.
508
509
510%% =====================================================================
511%% @spec new_key(Env::environment()) -> integer()
512%%
513%% @doc Returns an integer which is not already used as key in the
514%% environment. New integers are generated using an algorithm which
515%% tries to keep the values randomly distributed within a reasonably
516%% small range relative to the number of entries in the environment.
517%%
518%% <p>This function uses the Erlang standard library module
519%% <code>random</code> to generate new keys.</p>
520%%
521%% <p>Note that only the new key is returned; the environment itself is
522%% not updated by this function.</p>
523
524-spec new_key(environment()) -> integer().
525
526new_key(Env) ->
527    new_key(fun (X) -> X end, Env).
528
529
530%% =====================================================================
531%% @spec new_key(Function, Env) -> term()
532%%
533%%	Function = (integer()) -> term()
534%%	Env = environment()
535%%
536%% @doc Returns a term which is not already used as key in the
537%% environment. The term is generated by applying <code>Function</code>
538%% to an integer generated as in {@link new_key/1}.
539%%
540%% <p>Note that only the generated term is returned; the environment
541%% itself is not updated by this function.</p>
542
543-spec new_key(fun((integer()) -> term()), environment()) -> term().
544
545new_key(F, Env) ->
546    ?measure_calls(),
547    R = start_range(Env),
548    %% io:fwrite("Start range: ~w.\n", [R]),
549    new_key(R, F, Env).
550
551new_key(R, F, Env) ->
552    new_key(generate(R, R), R, 0, F, Env).
553
554new_key(N, R, T, F, Env) when T < ?MAX_RETRIES ->
555    A = F(N),
556    case is_defined(A, Env) of
557	true ->
558	    %% io:fwrite("CLASH: ~w.\n", [A]),
559	    new_key(generate(N, R), R, T + 1, F, Env);
560	false ->
561	    ?measure_max_key(N),
562	    ?measure_retries(T),
563	    %% io:fwrite("New: ~w.\n", [N]),
564	    A
565    end;
566new_key(N, R, _T, F, Env) ->
567    %% Too many retries - enlarge the range and start over.
568    ?measure_retries((_T + 1)),
569    R1 = trunc(R * ?ENLARGE_FACTOR),
570    %% io:fwrite("**NEW RANGE**: ~w.\n", [R1]),
571    new_key(generate(N, R1), R1, 0, F, Env).
572
573start_range(Env) ->
574    erlang:max(env_size(Env) * ?START_RANGE_FACTOR, ?MINIMUM_RANGE).
575
576%% The previous key might or might not be used to compute the next key
577%% to be tried. It is currently not used.
578%%
579%% In order to avoid causing cascading renamings, it is important that
580%% this function does not generate values in order, but
581%% (pseudo-)randomly distributed over the range.
582
583generate(_N, Range) ->
584    %% We must use the same sequence of random variables to ensure
585    %% that two compilations of the same source code generates the
586    %% same BEAM code.
587    case rand:export_seed() of
588	undefined ->
589	    _ = rand:seed(exsplus, {1,42,2053}),
590	    ok;
591	_ ->
592	    ok
593    end,
594    rand:uniform(Range).			% works well
595
596
597%% =====================================================================
598%% @spec new_keys(N, Env) -> [integer()]
599%%
600%%	N = integer()
601%%	Env = environment()
602%%
603%% @doc Returns a list of <code>N</code> distinct integers that are not
604%% already used as keys in the environment. See {@link new_key/1} for
605%% details.
606
607-spec new_keys(integer(), environment()) -> [integer()].
608
609new_keys(N, Env) when is_integer(N) ->
610    new_keys(N, fun (X) -> X end, Env).
611
612
613%% =====================================================================
614%% @spec new_keys(N, Function, Env) -> [term()]
615%%
616%%	    N = integer()
617%%	    Function = (integer()) -> term()
618%%	    Env = environment()
619%%
620%% @doc Returns a list of <code>N</code> distinct terms that are not
621%% already used as keys in the environment. See {@link new_key/3} for
622%% details.
623
624-spec new_keys(integer(), fun((integer()) -> term()), environment()) -> [term()].
625
626new_keys(N, F, Env) when is_integer(N) ->
627    R = start_range(Env),
628    new_keys(N, [], R, F, Env).
629
630new_keys(N, Ks, R, F, Env) when N > 0 ->
631    Key = new_key(R, F, Env),
632    Env1 = bind(Key, true, Env),    % dummy binding
633    new_keys(N - 1, [Key | Ks], R, F, Env1);
634new_keys(0, Ks, _, _, _) ->
635    Ks.
636