1%% ===================================================================== 2%% This library is free software; you can redistribute it and/or modify 3%% it under the terms of the GNU Lesser General Public License as 4%% published by the Free Software Foundation; either version 2 of the 5%% License, or (at your option) any later version. 6%% 7%% This library is distributed in the hope that it will be useful, but 8%% WITHOUT ANY WARRANTY; without even the implied warranty of 9%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10%% Lesser General Public License for more details. 11%% 12%% You should have received a copy of the GNU Lesser General Public 13%% License along with this library; if not, write to the Free Software 14%% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 15%% USA 16%% 17%% $Id: rec_env.erl,v 1.2 2009/09/17 09:46:19 kostis Exp $ 18%% 19%% @author Richard Carlsson <richardc@csd.uu.se> 20%% @copyright 1999-2004 Richard Carlsson 21%% @doc Abstract environments, supporting self-referential bindings and 22%% automatic new-key generation. 23 24%% The current implementation is based on Erlang standard library 25%% dictionaries. 26 27%%% -define(DEBUG, true). 28 29-module(rec_env). 30 31-export([bind/3, bind_list/3, bind_recursive/4, delete/2, empty/0, 32 get/2, is_defined/2, is_empty/1, keys/1, lookup/2, new_key/1, 33 new_key/2, new_keys/2, new_keys/3, size/1, to_list/1]). 34 35-ifdef(DEBUG). 36-export([test/1, test_custom/1, test_custom/2]). 37-endif. 38 39-ifdef(DEBUG). 40%% Code for testing: 41%%@hidden 42test(N) -> 43 test_0(integer, N). 44 45%%@hidden 46test_custom(N) -> 47 F = fun (X) -> list_to_atom("X"++integer_to_list(X)) end, 48 test_custom(F, N). 49 50%%@hidden 51test_custom(F, N) -> 52 test_0({custom, F}, N). 53 54test_0(Type, N) -> 55 put(new_key_calls, 0), 56 put(new_key_retries, 0), 57 put(new_key_max, 0), 58 Env = test_1(Type, N, empty()), 59 io:fwrite("\ncalls: ~w.\n", [get(new_key_calls)]), 60 io:fwrite("\nretries: ~w.\n", [get(new_key_retries)]), 61 io:fwrite("\nmax: ~w.\n", [get(new_key_max)]), 62 dict:to_list(element(1,Env)). 63 64test_1(integer = Type, N, Env) when integer(N), N > 0 -> 65 Key = new_key(Env), 66 test_1(Type, N - 1, bind(Key, value, Env)); 67test_1({custom, F} = Type, N, Env) when integer(N), N > 0 -> 68 Key = new_key(F, Env), 69 test_1(Type, N - 1, bind(Key, value, Env)); 70test_1(_,0, Env) -> 71 Env. 72-endif. 73 74 75%% Representation: 76%% 77%% environment() = [Mapping] 78%% 79%% Mapping = {map, Dict} | {rec, Dict, Dict} 80%% Dict = dict:dictionary() 81%% 82%% An empty environment is a list containing a single `{map, Dict}' 83%% element - empty lists are not valid environments. To find a key in an 84%% environment, it is searched for in each mapping in the list, in 85%% order, until it the key is found in some mapping, or the end of the 86%% list is reached. In a 'rec' mapping, we keep the original dictionary 87%% together with a version where entries may have been deleted - this 88%% makes it possible to garbage collect the entire 'rec' mapping when 89%% all its entries are unused (for example, by being shadowed by later 90%% definitions). 91 92 93 94%% ===================================================================== 95%% @type environment(). An abstract environment. 96 97 98%% ===================================================================== 99%% @spec empty() -> environment() 100%% 101%% @doc Returns an empty environment. 102 103empty() -> 104 [{map, dict:new()}]. 105 106 107%% ===================================================================== 108%% @spec is_empty(Env::environment()) -> boolean() 109%% 110%% @doc Returns <code>true</code> if the environment is empty, otherwise 111%% <code>false</code>. 112 113is_empty([{map, Dict} | Es]) -> 114 N = dict:size(Dict), 115 if N /= 0 -> false; 116 Es == [] -> true; 117 true -> is_empty(Es) 118 end; 119is_empty([{rec, Dict, _} | Es]) -> 120 N = dict:size(Dict), 121 if N /= 0 -> false; 122 Es == [] -> true; 123 true -> is_empty(Es) 124 end. 125 126 127%% ===================================================================== 128%% @spec size(Env::environment()) -> integer() 129%% 130%% @doc Returns the number of entries in an environment. 131 132%% (The name 'size' cannot be used in local calls, since there exists a 133%% built-in function with the same name.) 134 135size(Env) -> 136 env_size(Env). 137 138env_size([{map, Dict}]) -> 139 dict:size(Dict); 140env_size([{map, Dict} | Env]) -> 141 dict:size(Dict) + env_size(Env); 142env_size([{rec, Dict, _Dict0} | Env]) -> 143 dict:size(Dict) + env_size(Env). 144 145 146%% ===================================================================== 147%% @spec is_defined(Key, Env) -> boolean() 148%% 149%% Key = term() 150%% Env = environment() 151%% 152%% @doc Returns <code>true</code> if <code>Key</code> is bound in the 153%% environment, otherwise <code>false</code>. 154 155is_defined(Key, [{map, Dict} | Env]) -> 156 case dict:is_key(Key, Dict) of 157 true -> 158 true; 159 false when Env == [] -> 160 false; 161 false -> 162 is_defined(Key, Env) 163 end; 164is_defined(Key, [{rec, Dict, _Dict0} | Env]) -> 165 case dict:is_key(Key, Dict) of 166 true -> 167 true; 168 false -> 169 is_defined(Key, Env) 170 end. 171 172 173%% ===================================================================== 174%% @spec keys(Env::environment()) -> [term()] 175%% 176%% @doc Returns the ordered list of all keys in the environment. 177 178keys(Env) -> 179 lists:sort(keys(Env, [])). 180 181keys([{map, Dict}], S) -> 182 dict:fetch_keys(Dict) ++ S; 183keys([{map, Dict} | Env], S) -> 184 keys(Env, dict:fetch_keys(Dict) ++ S); 185keys([{rec, Dict, _Dict0} | Env], S) -> 186 keys(Env, dict:fetch_keys(Dict) ++ S). 187 188 189%% ===================================================================== 190%% @spec to_list(Env) -> [{Key, Value}] 191%% 192%% Env = environment() 193%% Key = term() 194%% Value = term() 195%% 196%% @doc Returns an ordered list of <code>{Key, Value}</code> pairs for 197%% all keys in <code>Env</code>. <code>Value</code> is the same as that 198%% returned by {@link get/2}. 199 200to_list(Env) -> 201 lists:sort(to_list(Env, [])). 202 203to_list([{map, Dict}], S) -> 204 dict:to_list(Dict) ++ S; 205to_list([{map, Dict} | Env], S) -> 206 to_list(Env, dict:to_list(Dict) ++ S); 207to_list([{rec, Dict, _Dict0} | Env], S) -> 208 to_list(Env, dict:to_list(Dict) ++ S). 209 210 211%% ===================================================================== 212%% @spec bind(Key, Value, Env) -> environment() 213%% 214%% Key = term() 215%% Value = term() 216%% Env = environment() 217%% 218%% @doc Make a nonrecursive entry. This binds <code>Key</code> to 219%% <code>Value</code>. If the key already existed in the environment, 220%% the old entry is replaced. 221 222%% Note that deletion is done to free old bindings so they can be 223%% garbage collected. 224 225bind(Key, Value, [{map, Dict}]) -> 226 [{map, dict:store(Key, Value, Dict)}]; 227bind(Key, Value, [{map, Dict} | Env]) -> 228 [{map, dict:store(Key, Value, Dict)} | delete_any(Key, Env)]; 229bind(Key, Value, Env) -> 230 [{map, dict:store(Key, Value, dict:new())} | delete_any(Key, Env)]. 231 232 233%% ===================================================================== 234%% @spec bind_list(Keys, Values, Env) -> environment() 235%% 236%% Keys = [term()] 237%% Values = [term()] 238%% Env = environment() 239%% 240%% @doc Make N nonrecursive entries. This binds each key in 241%% <code>Keys</code> to the corresponding value in 242%% <code>Values</code>. If some key already existed in the environment, 243%% the previous entry is replaced. If <code>Keys</code> does not have 244%% the same length as <code>Values</code>, an exception is generated. 245 246bind_list(Ks, Vs, [{map, Dict}]) -> 247 [{map, store_list(Ks, Vs, Dict)}]; 248bind_list(Ks, Vs, [{map, Dict} | Env]) -> 249 [{map, store_list(Ks, Vs, Dict)} | delete_list(Ks, Env)]; 250bind_list(Ks, Vs, Env) -> 251 [{map, store_list(Ks, Vs, dict:new())} | delete_list(Ks, Env)]. 252 253store_list([K | Ks], [V | Vs], Dict) -> 254 store_list(Ks, Vs, dict:store(K, V, Dict)); 255store_list([], _, Dict) -> 256 Dict. 257 258delete_list([K | Ks], Env) -> 259 delete_list(Ks, delete_any(K, Env)); 260delete_list([], Env) -> 261 Env. 262 263%% By not calling `delete' unless we have to, we avoid unnecessary 264%% rewriting of the data. 265 266delete_any(Key, Env) -> 267 case is_defined(Key, Env) of 268 true -> 269 delete(Key, Env); 270 false -> 271 Env 272 end. 273 274%% ===================================================================== 275%% @spec delete(Key, Env) -> environment() 276%% 277%% Key = term() 278%% Env = environment() 279%% 280%% @doc Delete an entry. This removes <code>Key</code> from the 281%% environment. 282 283delete(Key, [{map, Dict} = E | Env]) -> 284 case dict:is_key(Key, Dict) of 285 true -> 286 [{map, dict:erase(Key, Dict)} | Env]; 287 false -> 288 delete_1(Key, Env, E) 289 end; 290delete(Key, [{rec, Dict, Dict0} = E | Env]) -> 291 case dict:is_key(Key, Dict) of 292 true -> 293 %% The Dict0 component must be preserved as it is until all 294 %% keys in Dict have been deleted. 295 Dict1 = dict:erase(Key, Dict), 296 case dict:size(Dict1) of 297 0 -> 298 Env; % the whole {rec,...} is now garbage 299 _ -> 300 [{rec, Dict1, Dict0} | Env] 301 end; 302 false -> 303 [E | delete(Key, Env)] 304 end. 305 306%% This is just like above, except we pass on the preceding 'map' 307%% mapping in the list to enable merging when removing 'rec' mappings. 308 309delete_1(Key, [{rec, Dict, Dict0} = E | Env], E1) -> 310 case dict:is_key(Key, Dict) of 311 true -> 312 Dict1 = dict:erase(Key, Dict), 313 case dict:size(Dict1) of 314 0 -> 315 concat(E1, Env); 316 _ -> 317 [E1, {rec, Dict1, Dict0} | Env] 318 end; 319 false -> 320 [E1, E | delete(Key, Env)] 321 end. 322 323concat({map, D1}, [{map, D2} | Env]) -> 324 [dict:merge(fun (_K, V1, _V2) -> V1 end, D1, D2) | Env]; 325concat(E1, Env) -> 326 [E1 | Env]. 327 328 329%% ===================================================================== 330%% @spec bind_recursive(Keys, Values, Fun, Env) -> NewEnv 331%% 332%% Keys = [term()] 333%% Values = [term()] 334%% Fun = (Value, Env) -> term() 335%% Env = environment() 336%% NewEnv = environment() 337%% 338%% @doc Make N recursive entries. This binds each key in 339%% <code>Keys</code> to the value of <code>Fun(Value, NewEnv)</code> for 340%% the corresponding <code>Value</code>. If <code>Keys</code> does not 341%% have the same length as <code>Values</code>, an exception is 342%% generated. If some key already existed in the environment, the old 343%% entry is replaced. 344%% 345%% <p>Note: the function <code>Fun</code> is evaluated each time one of 346%% the stored keys is looked up, but only then.</p> 347%% 348%% <p>Examples: 349%%<pre> 350%% NewEnv = bind_recursive([foo, bar], [1, 2], 351%% fun (V, E) -> V end, 352%% Env)</pre> 353%% 354%% This does nothing interesting; <code>get(foo, NewEnv)</code> yields 355%% <code>1</code> and <code>get(bar, NewEnv)</code> yields 356%% <code>2</code>, but there is more overhead than if the {@link 357%% bind_list/3} function had been used. 358%% 359%% <pre> 360%% NewEnv = bind_recursive([foo, bar], [1, 2], 361%% fun (V, E) -> {V, E} end, 362%% Env)</pre> 363%% 364%% Here, however, <code>get(foo, NewEnv)</code> will yield <code>{1, 365%% NewEnv}</code> and <code>get(bar, NewEnv)</code> will yield <code>{2, 366%% NewEnv}</code>, i.e., the environment <code>NewEnv</code> contains 367%% recursive bindings.</p> 368 369bind_recursive([], [], _, Env) -> 370 Env; 371bind_recursive(Ks, Vs, F, Env) -> 372 F1 = fun (V) -> 373 fun (Dict) -> F(V, [{rec, Dict, Dict} | Env]) end 374 end, 375 Dict = bind_recursive_1(Ks, Vs, F1, dict:new()), 376 [{rec, Dict, Dict} | Env]. 377 378bind_recursive_1([K | Ks], [V | Vs], F, Dict) -> 379 bind_recursive_1(Ks, Vs, F, dict:store(K, F(V), Dict)); 380bind_recursive_1([], [], _, Dict) -> 381 Dict. 382 383 384%% ===================================================================== 385%% @spec lookup(Key, Env) -> error | {ok, Value} 386%% 387%% Key = term() 388%% Env = environment() 389%% Value = term() 390%% 391%% @doc Returns <code>{ok, Value}</code> if <code>Key</code> is bound to 392%% <code>Value</code> in <code>Env</code>, and <code>error</code> 393%% otherwise. 394 395lookup(Key, [{map, Dict} | Env]) -> 396 case dict:find(Key, Dict) of 397 {ok, _}=Value -> 398 Value; 399 error when Env == [] -> 400 error; 401 error -> 402 lookup(Key, Env) 403 end; 404lookup(Key, [{rec, Dict, Dict0} | Env]) -> 405 case dict:find(Key, Dict) of 406 {ok, F} -> 407 {ok, F(Dict0)}; 408 error -> 409 lookup(Key, Env) 410 end. 411 412 413%% ===================================================================== 414%% @spec get(Key, Env) -> Value 415%% 416%% Key = term() 417%% Env = environment() 418%% Value = term() 419%% 420%% @doc Returns the value that <code>Key</code> is bound to in 421%% <code>Env</code>. Throws <code>{undefined, Key}</code> if the key 422%% does not exist in <code>Env</code>. 423 424get(Key, Env) -> 425 case lookup(Key, Env) of 426 {ok, Value} -> Value; 427 error -> throw({undefined, Key}) 428 end. 429 430 431%% ===================================================================== 432%% The key-generating algorithm could possibly be further improved. The 433%% important thing to keep in mind is, that when we need a new key, we 434%% are generally in mid-traversal of a syntax tree, and existing names 435%% in the tree may be closely grouped and evenly distributed or even 436%% forming a compact range (often having been generated by a "gensym", 437%% or by this very algorithm itself). This means that if we generate an 438%% identifier whose value is too close to those already seen (i.e., 439%% which are in the environment), it is very probable that we will 440%% shadow a not-yet-seen identifier further down in the tree, the result 441%% being that we induce another later renaming, and end up renaming most 442%% of the identifiers, completely contrary to our intention. We need to 443%% generate new identifiers in a way that avoids such systematic 444%% collisions. 445%% 446%% One way of getting a new key to try when the previous attempt failed 447%% is of course to e.g. add one to the last tried value. However, in 448%% general it's a bad idea to try adjacent identifiers: the percentage 449%% of retries will typically increase a lot, so you may lose big on the 450%% extra lookups while gaining only a little from the quicker 451%% computation. 452%% 453%% We want an initial range that is large enough for most typical cases. 454%% If we start with, say, a range of 10, we might quickly use up most of 455%% the values in the range 1-10 (or 1-100) for new top-level variables - 456%% but as we start traversing the syntax tree, it is quite likely that 457%% exactly those variables will be encountered again (this depends on 458%% how the names in the tree were created), and will then need to be 459%% renamed. If we instead begin with a larger range, it is less likely 460%% that any top-level names that we introduce will shadow names that we 461%% will find in the tree. Of course we cannot know how large is large 462%% enough: for any initial range, there is some syntax tree that uses 463%% all the values in that range, and thus any top-level names introduced 464%% will shadow names in the tree. The point is to avoid this happening 465%% all the time - a range of about 1000 seems enough for most programs. 466%% 467%% The following values have been shown to work well: 468 469-define(MINIMUM_RANGE, 1000). 470-define(START_RANGE_FACTOR, 50). 471-define(MAX_RETRIES, 2). % retries before enlarging range 472-define(ENLARGE_FACTOR, 10). % range enlargement factor 473 474-ifdef(DEBUG). 475%% If you want to use these process dictionary counters, make sure to 476%% initialise them to zero before you call any of the key-generating 477%% functions. 478%% 479%% new_key_calls total number of calls 480%% new_key_retries failed key generation attempts 481%% new_key_max maximum generated integer value 482%% 483-define(measure_calls(), 484 put(new_key_calls, 1 + get(new_key_calls))). 485-define(measure_max_key(N), 486 case N > get(new_key_max) of 487 true -> 488 put(new_key_max, N); 489 false -> 490 ok 491 end). 492-define(measure_retries(N), 493 put(new_key_retries, get(new_key_retries) + N)). 494-else. 495-define(measure_calls(), ok). 496-define(measure_max_key(N), ok). 497-define(measure_retries(N), ok). 498-endif. 499 500 501%% ===================================================================== 502%% @spec new_key(Env::environment()) -> integer() 503%% 504%% @doc Returns an integer which is not already used as key in the 505%% environment. New integers are generated using an algorithm which 506%% tries to keep the values randomly distributed within a reasonably 507%% small range relative to the number of entries in the environment. 508%% 509%% <p>This function uses the Erlang standard library module 510%% <code>random</code> to generate new keys.</p> 511%% 512%% <p>Note that only the new key is returned; the environment itself is 513%% not updated by this function.</p> 514 515new_key(Env) -> 516 new_key(fun (X) -> X end, Env). 517 518 519%% ===================================================================== 520%% @spec new_key(Function, Env) -> term() 521%% 522%% Function = (integer()) -> term() 523%% Env = environment() 524%% 525%% @doc Returns a term which is not already used as key in the 526%% environment. The term is generated by applying <code>Function</code> 527%% to an integer generated as in {@link new_key/1}. 528%% 529%% <p>Note that only the generated term is returned; the environment 530%% itself is not updated by this function.</p> 531 532new_key(F, Env) -> 533 ?measure_calls(), 534 R = start_range(Env), 535%%% io:fwrite("Start range: ~w.\n", [R]), 536 new_key(R, F, Env). 537 538new_key(R, F, Env) -> 539 new_key(generate(R, R), R, 0, F, Env). 540 541new_key(N, R, T, F, Env) when T < ?MAX_RETRIES -> 542 A = F(N), 543 case is_defined(A, Env) of 544 true -> 545%%% io:fwrite("CLASH: ~w.\n", [A]), 546 new_key(generate(N, R), R, T + 1, F, Env); 547 false -> 548 ?measure_max_key(N), 549 ?measure_retries(T), 550%%% io:fwrite("New: ~w.\n", [N]), 551 A 552 end; 553new_key(N, R, _T, F, Env) -> 554 %% Too many retries - enlarge the range and start over. 555 ?measure_retries((_T + 1)), 556 R1 = trunc(R * ?ENLARGE_FACTOR), 557%%% io:fwrite("**NEW RANGE**: ~w.\n", [R1]), 558 new_key(generate(N, R1), R1, 0, F, Env). 559 560start_range(Env) -> 561 max(env_size(Env) * ?START_RANGE_FACTOR, ?MINIMUM_RANGE). 562 563max(X, Y) when X > Y -> X; 564max(_, Y) -> Y. 565 566%% The previous key might or might not be used to compute the next key 567%% to be tried. It is currently not used. 568%% 569%% In order to avoid causing cascading renamings, it is important that 570%% this function does not generate values in order, but 571%% (pseudo-)randomly distributed over the range. 572 573generate(_N, Range) -> 574 random:uniform(Range). % works well 575 576 577%% ===================================================================== 578%% @spec new_keys(N, Env) -> [integer()] 579%% 580%% N = integer() 581%% Env = environment() 582%% 583%% @doc Returns a list of <code>N</code> distinct integers that are not 584%% already used as keys in the environment. See {@link new_key/1} for 585%% details. 586 587new_keys(N, Env) when integer(N) -> 588 new_keys(N, fun (X) -> X end, Env). 589 590 591%% ===================================================================== 592%% @spec new_keys(N, Function, Env) -> [term()] 593%% 594%% N = integer() 595%% Function = (integer()) -> term() 596%% Env = environment() 597%% 598%% @doc Returns a list of <code>N</code> distinct terms that are not 599%% already used as keys in the environment. See {@link new_key/3} for 600%% details. 601 602new_keys(N, F, Env) when integer(N) -> 603 R = start_range(Env), 604 new_keys(N, [], R, F, Env). 605 606new_keys(N, Ks, R, F, Env) when N > 0 -> 607 Key = new_key(R, F, Env), 608 Env1 = bind(Key, true, Env), % dummy binding 609 new_keys(N - 1, [Key | Ks], R, F, Env1); 610new_keys(0, Ks, _, _, _) -> 611 Ks. 612