1%% =====================================================================
2%% This library is free software; you can redistribute it and/or modify
3%% it under the terms of the GNU Lesser General Public License as
4%% published by the Free Software Foundation; either version 2 of the
5%% License, or (at your option) any later version.
6%%
7%% This library is distributed in the hope that it will be useful, but
8%% WITHOUT ANY WARRANTY; without even the implied warranty of
9%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10%% Lesser General Public License for more details.
11%%
12%% You should have received a copy of the GNU Lesser General Public
13%% License along with this library; if not, write to the Free Software
14%% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
15%% USA
16%%
17%% $Id: rec_env.erl,v 1.2 2009/09/17 09:46:19 kostis Exp $
18%%
19%% @author Richard Carlsson <richardc@csd.uu.se>
20%% @copyright 1999-2004 Richard Carlsson
21%% @doc Abstract environments, supporting self-referential bindings and
22%% automatic new-key generation.
23
24%% The current implementation is based on Erlang standard library
25%% dictionaries.
26
27%%% -define(DEBUG, true).
28
29-module(rec_env).
30
31-export([bind/3, bind_list/3, bind_recursive/4, delete/2, empty/0,
32	 get/2, is_defined/2, is_empty/1, keys/1, lookup/2, new_key/1,
33	 new_key/2, new_keys/2, new_keys/3, size/1, to_list/1]).
34
35-ifdef(DEBUG).
36-export([test/1, test_custom/1, test_custom/2]).
37-endif.
38
39-ifdef(DEBUG).
40%% Code for testing:
41%%@hidden
42test(N) ->
43    test_0(integer, N).
44
45%%@hidden
46test_custom(N) ->
47    F = fun (X) -> list_to_atom("X"++integer_to_list(X)) end,
48    test_custom(F, N).
49
50%%@hidden
51test_custom(F, N) ->
52    test_0({custom, F}, N).
53
54test_0(Type, N) ->
55    put(new_key_calls, 0),
56    put(new_key_retries, 0),
57    put(new_key_max, 0),
58    Env = test_1(Type, N, empty()),
59    io:fwrite("\ncalls: ~w.\n", [get(new_key_calls)]),
60    io:fwrite("\nretries: ~w.\n", [get(new_key_retries)]),
61    io:fwrite("\nmax: ~w.\n", [get(new_key_max)]),
62    dict:to_list(element(1,Env)).
63
64test_1(integer = Type, N, Env) when integer(N), N > 0 ->
65    Key = new_key(Env),
66    test_1(Type, N - 1, bind(Key, value, Env));
67test_1({custom, F} = Type, N, Env) when integer(N), N > 0 ->
68    Key = new_key(F, Env),
69    test_1(Type, N - 1, bind(Key, value, Env));
70test_1(_,0, Env) ->
71    Env.
72-endif.
73
74
75%% Representation:
76%%
77%%	environment() = [Mapping]
78%%
79%%      Mapping = {map, Dict} | {rec, Dict, Dict}
80%%	Dict = dict:dictionary()
81%%
82%% An empty environment is a list containing a single `{map, Dict}'
83%% element - empty lists are not valid environments. To find a key in an
84%% environment, it is searched for in each mapping in the list, in
85%% order, until it the key is found in some mapping, or the end of the
86%% list is reached. In a 'rec' mapping, we keep the original dictionary
87%% together with a version where entries may have been deleted - this
88%% makes it possible to garbage collect the entire 'rec' mapping when
89%% all its entries are unused (for example, by being shadowed by later
90%% definitions).
91
92
93
94%% =====================================================================
95%% @type environment(). An abstract environment.
96
97
98%% =====================================================================
99%% @spec empty() -> environment()
100%%
101%% @doc Returns an empty environment.
102
103empty() ->
104    [{map, dict:new()}].
105
106
107%% =====================================================================
108%% @spec is_empty(Env::environment()) -> boolean()
109%%
110%% @doc Returns <code>true</code> if the environment is empty, otherwise
111%% <code>false</code>.
112
113is_empty([{map, Dict} | Es]) ->
114    N = dict:size(Dict),
115    if N /= 0 -> false;
116       Es == [] -> true;
117       true -> is_empty(Es)
118    end;
119is_empty([{rec, Dict, _} | Es]) ->
120    N = dict:size(Dict),
121    if N /= 0 -> false;
122       Es == [] -> true;
123       true -> is_empty(Es)
124    end.
125
126
127%% =====================================================================
128%% @spec size(Env::environment()) -> integer()
129%%
130%% @doc Returns the number of entries in an environment.
131
132%% (The name 'size' cannot be used in local calls, since there exists a
133%% built-in function with the same name.)
134
135size(Env) ->
136    env_size(Env).
137
138env_size([{map, Dict}]) ->
139    dict:size(Dict);
140env_size([{map, Dict} | Env]) ->
141    dict:size(Dict) + env_size(Env);
142env_size([{rec, Dict, _Dict0} | Env]) ->
143    dict:size(Dict) + env_size(Env).
144
145
146%% =====================================================================
147%% @spec is_defined(Key, Env) -> boolean()
148%%
149%%	Key = term()
150%%	Env = environment()
151%%
152%% @doc Returns <code>true</code> if <code>Key</code> is bound in the
153%% environment, otherwise <code>false</code>.
154
155is_defined(Key, [{map, Dict} | Env]) ->
156    case dict:is_key(Key, Dict) of
157	true ->
158	    true;
159	false when Env == [] ->
160	    false;
161	false ->
162	    is_defined(Key, Env)
163    end;
164is_defined(Key, [{rec, Dict, _Dict0} | Env]) ->
165    case dict:is_key(Key, Dict) of
166	true ->
167	    true;
168	false ->
169	    is_defined(Key, Env)
170    end.
171
172
173%% =====================================================================
174%% @spec keys(Env::environment()) -> [term()]
175%%
176%% @doc Returns the ordered list of all keys in the environment.
177
178keys(Env) ->
179    lists:sort(keys(Env, [])).
180
181keys([{map, Dict}], S) ->
182    dict:fetch_keys(Dict) ++ S;
183keys([{map, Dict} | Env], S) ->
184    keys(Env, dict:fetch_keys(Dict) ++ S);
185keys([{rec, Dict, _Dict0} | Env], S) ->
186    keys(Env, dict:fetch_keys(Dict) ++ S).
187
188
189%% =====================================================================
190%% @spec to_list(Env) -> [{Key, Value}]
191%%
192%%	Env = environment()
193%%	Key = term()
194%%	Value = term()
195%%
196%% @doc Returns an ordered list of <code>{Key, Value}</code> pairs for
197%% all keys in <code>Env</code>. <code>Value</code> is the same as that
198%% returned by {@link get/2}.
199
200to_list(Env) ->
201    lists:sort(to_list(Env, [])).
202
203to_list([{map, Dict}], S) ->
204    dict:to_list(Dict) ++ S;
205to_list([{map, Dict} | Env], S) ->
206    to_list(Env, dict:to_list(Dict) ++ S);
207to_list([{rec, Dict, _Dict0} | Env], S) ->
208    to_list(Env, dict:to_list(Dict) ++ S).
209
210
211%% =====================================================================
212%% @spec bind(Key, Value, Env) -> environment()
213%%
214%%	Key = term()
215%%	Value = term()
216%%	Env = environment()
217%%
218%% @doc Make a nonrecursive entry. This binds <code>Key</code> to
219%% <code>Value</code>. If the key already existed in the environment,
220%% the old entry is replaced.
221
222%% Note that deletion is done to free old bindings so they can be
223%% garbage collected.
224
225bind(Key, Value, [{map, Dict}]) ->
226    [{map, dict:store(Key, Value, Dict)}];
227bind(Key, Value, [{map, Dict} | Env]) ->
228    [{map, dict:store(Key, Value, Dict)} | delete_any(Key, Env)];
229bind(Key, Value, Env) ->
230    [{map, dict:store(Key, Value, dict:new())} | delete_any(Key, Env)].
231
232
233%% =====================================================================
234%% @spec bind_list(Keys, Values, Env) -> environment()
235%%
236%%	Keys = [term()]
237%%	Values = [term()]
238%%	Env = environment()
239%%
240%% @doc Make N nonrecursive entries. This binds each key in
241%% <code>Keys</code> to the corresponding value in
242%% <code>Values</code>. If some key already existed in the environment,
243%% the previous entry is replaced. If <code>Keys</code> does not have
244%% the same length as <code>Values</code>, an exception is generated.
245
246bind_list(Ks, Vs, [{map, Dict}]) ->
247    [{map, store_list(Ks, Vs, Dict)}];
248bind_list(Ks, Vs, [{map, Dict} | Env]) ->
249    [{map, store_list(Ks, Vs, Dict)} | delete_list(Ks, Env)];
250bind_list(Ks, Vs, Env) ->
251    [{map, store_list(Ks, Vs, dict:new())} | delete_list(Ks, Env)].
252
253store_list([K | Ks], [V | Vs], Dict) ->
254    store_list(Ks, Vs, dict:store(K, V, Dict));
255store_list([], _, Dict) ->
256    Dict.
257
258delete_list([K | Ks], Env) ->
259    delete_list(Ks, delete_any(K, Env));
260delete_list([], Env) ->
261    Env.
262
263%% By not calling `delete' unless we have to, we avoid unnecessary
264%% rewriting of the data.
265
266delete_any(Key, Env) ->
267    case is_defined(Key, Env) of
268	true ->
269	    delete(Key, Env);
270	false ->
271	    Env
272    end.
273
274%% =====================================================================
275%% @spec delete(Key, Env) -> environment()
276%%
277%%	Key = term()
278%%	Env = environment()
279%%
280%% @doc Delete an entry. This removes <code>Key</code> from the
281%% environment.
282
283delete(Key, [{map, Dict} = E | Env]) ->
284    case dict:is_key(Key, Dict) of
285	true ->
286	    [{map, dict:erase(Key, Dict)} | Env];
287	false ->
288	    delete_1(Key, Env, E)
289    end;
290delete(Key, [{rec, Dict, Dict0} = E | Env]) ->
291    case dict:is_key(Key, Dict) of
292	true ->
293	    %% The Dict0 component must be preserved as it is until all
294	    %% keys in Dict have been deleted.
295	    Dict1 = dict:erase(Key, Dict),
296	    case dict:size(Dict1) of
297		0 ->
298		    Env;    % the whole {rec,...} is now garbage
299		_ ->
300		    [{rec, Dict1, Dict0} | Env]
301	    end;
302	false ->
303	    [E | delete(Key, Env)]
304    end.
305
306%% This is just like above, except we pass on the preceding 'map'
307%% mapping in the list to enable merging when removing 'rec' mappings.
308
309delete_1(Key, [{rec, Dict, Dict0} = E | Env], E1) ->
310    case dict:is_key(Key, Dict) of
311	true ->
312	    Dict1 = dict:erase(Key, Dict),
313	    case dict:size(Dict1) of
314		0 ->
315		    concat(E1, Env);
316		_ ->
317		    [E1, {rec, Dict1, Dict0} | Env]
318	    end;
319	false ->
320	    [E1, E | delete(Key, Env)]
321    end.
322
323concat({map, D1}, [{map, D2} | Env]) ->
324    [dict:merge(fun (_K, V1, _V2) -> V1 end, D1, D2) | Env];
325concat(E1, Env) ->
326    [E1 | Env].
327
328
329%% =====================================================================
330%% @spec bind_recursive(Keys, Values, Fun, Env) -> NewEnv
331%%
332%%	Keys = [term()]
333%%	Values = [term()]
334%%	Fun = (Value, Env) -> term()
335%%	Env = environment()
336%%	NewEnv = environment()
337%%
338%% @doc Make N recursive entries. This binds each key in
339%% <code>Keys</code> to the value of <code>Fun(Value, NewEnv)</code> for
340%% the corresponding <code>Value</code>. If <code>Keys</code> does not
341%% have the same length as <code>Values</code>, an exception is
342%% generated. If some key already existed in the environment, the old
343%% entry is replaced.
344%%
345%% <p>Note: the function <code>Fun</code> is evaluated each time one of
346%% the stored keys is looked up, but only then.</p>
347%%
348%% <p>Examples:
349%%<pre>
350%%    NewEnv = bind_recursive([foo, bar], [1, 2],
351%%	                      fun (V, E) -> V end,
352%%	                      Env)</pre>
353%%
354%% This does nothing interesting; <code>get(foo, NewEnv)</code> yields
355%% <code>1</code> and <code>get(bar, NewEnv)</code> yields
356%% <code>2</code>, but there is more overhead than if the {@link
357%% bind_list/3} function had been used.
358%%
359%% <pre>
360%%    NewEnv = bind_recursive([foo, bar], [1, 2],
361%%                            fun (V, E) -> {V, E} end,
362%%                            Env)</pre>
363%%
364%% Here, however, <code>get(foo, NewEnv)</code> will yield <code>{1,
365%% NewEnv}</code> and <code>get(bar, NewEnv)</code> will yield <code>{2,
366%% NewEnv}</code>, i.e., the environment <code>NewEnv</code> contains
367%% recursive bindings.</p>
368
369bind_recursive([], [], _, Env) ->
370    Env;
371bind_recursive(Ks, Vs, F, Env) ->
372    F1 = fun (V) ->
373		 fun (Dict) -> F(V, [{rec, Dict, Dict} | Env]) end
374	 end,
375    Dict = bind_recursive_1(Ks, Vs, F1, dict:new()),
376    [{rec, Dict, Dict} | Env].
377
378bind_recursive_1([K | Ks], [V | Vs], F, Dict) ->
379    bind_recursive_1(Ks, Vs, F, dict:store(K, F(V), Dict));
380bind_recursive_1([], [], _, Dict) ->
381    Dict.
382
383
384%% =====================================================================
385%% @spec lookup(Key, Env) -> error | {ok, Value}
386%%
387%%	Key = term()
388%%	Env = environment()
389%%	Value = term()
390%%
391%% @doc Returns <code>{ok, Value}</code> if <code>Key</code> is bound to
392%% <code>Value</code> in <code>Env</code>, and <code>error</code>
393%% otherwise.
394
395lookup(Key, [{map, Dict} | Env]) ->
396    case dict:find(Key, Dict) of
397	{ok, _}=Value ->
398	    Value;
399	error when Env == [] ->
400	    error;
401	error ->
402	    lookup(Key, Env)
403    end;
404lookup(Key, [{rec, Dict, Dict0} | Env]) ->
405    case dict:find(Key, Dict) of
406	{ok, F} ->
407	    {ok, F(Dict0)};
408	error ->
409	    lookup(Key, Env)
410    end.
411
412
413%% =====================================================================
414%% @spec get(Key, Env) -> Value
415%%
416%%	Key = term()
417%%	Env = environment()
418%%	Value = term()
419%%
420%% @doc Returns the value that <code>Key</code> is bound to in
421%% <code>Env</code>. Throws <code>{undefined, Key}</code> if the key
422%% does not exist in <code>Env</code>.
423
424get(Key, Env) ->
425    case lookup(Key, Env) of
426	{ok, Value} -> Value;
427	error -> throw({undefined, Key})
428    end.
429
430
431%% =====================================================================
432%% The key-generating algorithm could possibly be further improved. The
433%% important thing to keep in mind is, that when we need a new key, we
434%% are generally in mid-traversal of a syntax tree, and existing names
435%% in the tree may be closely grouped and evenly distributed or even
436%% forming a compact range (often having been generated by a "gensym",
437%% or by this very algorithm itself). This means that if we generate an
438%% identifier whose value is too close to those already seen (i.e.,
439%% which are in the environment), it is very probable that we will
440%% shadow a not-yet-seen identifier further down in the tree, the result
441%% being that we induce another later renaming, and end up renaming most
442%% of the identifiers, completely contrary to our intention. We need to
443%% generate new identifiers in a way that avoids such systematic
444%% collisions.
445%%
446%% One way of getting a new key to try when the previous attempt failed
447%% is of course to e.g. add one to the last tried value. However, in
448%% general it's a bad idea to try adjacent identifiers: the percentage
449%% of retries will typically increase a lot, so you may lose big on the
450%% extra lookups while gaining only a little from the quicker
451%% computation.
452%%
453%% We want an initial range that is large enough for most typical cases.
454%% If we start with, say, a range of 10, we might quickly use up most of
455%% the values in the range 1-10 (or 1-100) for new top-level variables -
456%% but as we start traversing the syntax tree, it is quite likely that
457%% exactly those variables will be encountered again (this depends on
458%% how the names in the tree were created), and will then need to be
459%% renamed. If we instead begin with a larger range, it is less likely
460%% that any top-level names that we introduce will shadow names that we
461%% will find in the tree. Of course we cannot know how large is large
462%% enough: for any initial range, there is some syntax tree that uses
463%% all the values in that range, and thus any top-level names introduced
464%% will shadow names in the tree. The point is to avoid this happening
465%% all the time - a range of about 1000 seems enough for most programs.
466%%
467%% The following values have been shown to work well:
468
469-define(MINIMUM_RANGE, 1000).
470-define(START_RANGE_FACTOR, 50).
471-define(MAX_RETRIES, 2).      % retries before enlarging range
472-define(ENLARGE_FACTOR, 10).  % range enlargement factor
473
474-ifdef(DEBUG).
475%% If you want to use these process dictionary counters, make sure to
476%% initialise them to zero before you call any of the key-generating
477%% functions.
478%%
479%%	new_key_calls		total number of calls
480%%	new_key_retries		failed key generation attempts
481%%	new_key_max		maximum generated integer value
482%%
483-define(measure_calls(),
484	put(new_key_calls, 1 + get(new_key_calls))).
485-define(measure_max_key(N),
486	case N > get(new_key_max) of
487	    true ->
488		put(new_key_max, N);
489	    false ->
490		ok
491	end).
492-define(measure_retries(N),
493	put(new_key_retries, get(new_key_retries) + N)).
494-else.
495-define(measure_calls(), ok).
496-define(measure_max_key(N), ok).
497-define(measure_retries(N), ok).
498-endif.
499
500
501%% =====================================================================
502%% @spec new_key(Env::environment()) -> integer()
503%%
504%% @doc Returns an integer which is not already used as key in the
505%% environment. New integers are generated using an algorithm which
506%% tries to keep the values randomly distributed within a reasonably
507%% small range relative to the number of entries in the environment.
508%%
509%% <p>This function uses the Erlang standard library module
510%% <code>random</code> to generate new keys.</p>
511%%
512%% <p>Note that only the new key is returned; the environment itself is
513%% not updated by this function.</p>
514
515new_key(Env) ->
516    new_key(fun (X) -> X end, Env).
517
518
519%% =====================================================================
520%% @spec new_key(Function, Env) -> term()
521%%
522%%	Function = (integer()) -> term()
523%%	Env = environment()
524%%
525%% @doc Returns a term which is not already used as key in the
526%% environment. The term is generated by applying <code>Function</code>
527%% to an integer generated as in {@link new_key/1}.
528%%
529%% <p>Note that only the generated term is returned; the environment
530%% itself is not updated by this function.</p>
531
532new_key(F, Env) ->
533    ?measure_calls(),
534    R = start_range(Env),
535%%%     io:fwrite("Start range: ~w.\n", [R]),
536    new_key(R, F, Env).
537
538new_key(R, F, Env) ->
539    new_key(generate(R, R), R, 0, F, Env).
540
541new_key(N, R, T, F, Env) when T < ?MAX_RETRIES ->
542    A = F(N),
543    case is_defined(A, Env) of
544	true ->
545%%% 	    io:fwrite("CLASH: ~w.\n", [A]),
546	    new_key(generate(N, R), R, T + 1, F, Env);
547	false ->
548	    ?measure_max_key(N),
549	    ?measure_retries(T),
550%%% 	    io:fwrite("New: ~w.\n", [N]),
551	    A
552    end;
553new_key(N, R, _T, F, Env) ->
554    %% Too many retries - enlarge the range and start over.
555    ?measure_retries((_T + 1)),
556    R1 = trunc(R * ?ENLARGE_FACTOR),
557%%%     io:fwrite("**NEW RANGE**: ~w.\n", [R1]),
558    new_key(generate(N, R1), R1, 0, F, Env).
559
560start_range(Env) ->
561    max(env_size(Env) * ?START_RANGE_FACTOR, ?MINIMUM_RANGE).
562
563max(X, Y) when X > Y -> X;
564max(_, Y) -> Y.
565
566%% The previous key might or might not be used to compute the next key
567%% to be tried. It is currently not used.
568%%
569%% In order to avoid causing cascading renamings, it is important that
570%% this function does not generate values in order, but
571%% (pseudo-)randomly distributed over the range.
572
573generate(_N, Range) ->
574    random:uniform(Range).    % works well
575
576
577%% =====================================================================
578%% @spec new_keys(N, Env) -> [integer()]
579%%
580%%	N = integer()
581%%	Env = environment()
582%%
583%% @doc Returns a list of <code>N</code> distinct integers that are not
584%% already used as keys in the environment. See {@link new_key/1} for
585%% details.
586
587new_keys(N, Env) when integer(N) ->
588    new_keys(N, fun (X) -> X end, Env).
589
590
591%% =====================================================================
592%% @spec new_keys(N, Function, Env) -> [term()]
593%%
594%%	    N = integer()
595%%	    Function = (integer()) -> term()
596%%	    Env = environment()
597%%
598%% @doc Returns a list of <code>N</code> distinct terms that are not
599%% already used as keys in the environment. See {@link new_key/3} for
600%% details.
601
602new_keys(N, F, Env) when integer(N) ->
603    R = start_range(Env),
604    new_keys(N, [], R, F, Env).
605
606new_keys(N, Ks, R, F, Env) when N > 0 ->
607    Key = new_key(R, F, Env),
608    Env1 = bind(Key, true, Env),    % dummy binding
609    new_keys(N - 1, [Key | Ks], R, F, Env1);
610new_keys(0, Ks, _, _, _) ->
611    Ks.
612