1%% Licensed under the Apache License, Version 2.0 (the "License"); 2%% you may not use this file except in compliance with the License. 3%% You may obtain a copy of the License at 4%% 5%% http://www.apache.org/licenses/LICENSE-2.0 6%% 7%% Unless required by applicable law or agreed to in writing, software 8%% distributed under the License is distributed on an "AS IS" BASIS, 9%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10%% See the License for the specific language governing permissions and 11%% limitations under the License. 12%% 13%% @copyright 1999-2004 Richard Carlsson 14%% @author Richard Carlsson <carlsson.richard@gmail.com> 15%% @doc Abstract environments, supporting self-referential bindings and 16%% automatic new-key generation. 17 18%% The current implementation is based on Erlang standard library maps. 19 20%%% -define(DEBUG, true). 21 22-module(rec_env). 23 24-export([bind/3, bind_list/3, bind_recursive/4, delete/2, empty/0, 25 get/2, is_defined/2, is_empty/1, keys/1, lookup/2, new_key/1, 26 new_key/2, new_keys/2, new_keys/3, size/1, to_list/1]). 27 28-export_type([environment/0]). 29 30-ifdef(DEBUG). 31-export([test/1, test_custom/1, test_custom/2]). 32-endif. 33 34-ifdef(DEBUG). 35%% Code for testing: 36%%@hidden 37test(N) -> 38 test_0(integer, N). 39 40%%@hidden 41test_custom(N) -> 42 F = fun (X) -> list_to_atom("X"++integer_to_list(X)) end, 43 test_custom(F, N). 44 45%%@hidden 46test_custom(F, N) -> 47 test_0({custom, F}, N). 48 49test_0(Type, N) -> 50 put(new_key_calls, 0), 51 put(new_key_retries, 0), 52 put(new_key_max, 0), 53 Env = test_1(Type, N, empty()), 54 io:fwrite("\ncalls: ~w.\n", [get(new_key_calls)]), 55 io:fwrite("\nretries: ~w.\n", [get(new_key_retries)]), 56 io:fwrite("\nmax: ~w.\n", [get(new_key_max)]), 57 maps:to_list(element(1,Env)). 58 59test_1(integer = Type, N, Env) when is_integer(N), N > 0 -> 60 Key = new_key(Env), 61 test_1(Type, N - 1, bind(Key, value, Env)); 62test_1({custom, F} = Type, N, Env) when is_integer(N), N > 0 -> 63 Key = new_key(F, Env), 64 test_1(Type, N - 1, bind(Key, value, Env)); 65test_1(_,0, Env) -> 66 Env. 67-endif. 68%%@clear 69 70 71%% Representation: 72%% 73%% environment() = [Mapping] 74%% 75%% Mapping = {map, map()} | {rec, map(), map()} 76%% 77%% An empty environment is a list containing a single `{map, map()}' 78%% element - empty lists are not valid environments. To find a key in an 79%% environment, it is searched for in each mapping in the list, in 80%% order, until it the key is found in some mapping, or the end of the 81%% list is reached. In a 'rec' mapping, we keep the original map 82%% together with a version where entries may have been deleted - this 83%% makes it possible to garbage collect the entire 'rec' mapping when 84%% all its entries are unused (for example, by being shadowed by later 85%% definitions). 86 87 88%% ===================================================================== 89%% @type environment(). An abstract environment. 90 91-type mapping() :: {'map', map()} | {'rec', map(), map()}. 92-type environment() :: [mapping(),...]. 93 94%% ===================================================================== 95%% @spec empty() -> environment() 96%% 97%% @doc Returns an empty environment. 98 99-spec empty() -> environment(). 100 101empty() -> 102 [{map, #{}}]. 103 104 105%% ===================================================================== 106%% @spec is_empty(Env::environment()) -> boolean() 107%% 108%% @doc Returns <code>true</code> if the environment is empty, otherwise 109%% <code>false</code>. 110 111-spec is_empty(environment()) -> boolean(). 112 113is_empty([{map, Map} | Es]) -> 114 N = map_size(Map), 115 if N =/= 0 -> false; 116 Es =:= [] -> true; 117 true -> is_empty(Es) 118 end; 119is_empty([{rec, Map, _} | Es]) -> 120 N = map_size(Map), 121 if N =/= 0 -> false; 122 Es =:= [] -> true; 123 true -> is_empty(Es) 124 end. 125 126 127%% ===================================================================== 128%% @spec size(Env::environment()) -> integer() 129%% 130%% @doc Returns the number of entries in an environment. 131 132%% (The name 'size' cannot be used in local calls, since there exists a 133%% built-in function with the same name.) 134 135-spec size(environment()) -> non_neg_integer(). 136 137size(Env) -> 138 env_size(Env). 139 140env_size([{map, Map}]) -> 141 map_size(Map); 142env_size([{map, Map} | Env]) -> 143 map_size(Map) + env_size(Env); 144env_size([{rec, Map, _Map0} | Env]) -> 145 map_size(Map) + env_size(Env). 146 147 148%% ===================================================================== 149%% @spec is_defined(Key, Env) -> boolean() 150%% 151%% Key = term() 152%% Env = environment() 153%% 154%% @doc Returns <code>true</code> if <code>Key</code> is bound in the 155%% environment, otherwise <code>false</code>. 156 157-spec is_defined(term(), environment()) -> boolean(). 158 159is_defined(Key, [{map, Map} | Env]) -> 160 case maps:is_key(Key, Map) of 161 true -> 162 true; 163 false when Env =:= [] -> 164 false; 165 false -> 166 is_defined(Key, Env) 167 end; 168is_defined(Key, [{rec, Map, _Map0} | Env]) -> 169 maps:is_key(Key, Map) orelse is_defined(Key, Env). 170 171 172%% ===================================================================== 173%% @spec keys(Env::environment()) -> [term()] 174%% 175%% @doc Returns the ordered list of all keys in the environment. 176 177-spec keys(environment()) -> [term()]. 178 179keys(Env) -> 180 lists:sort(keys(Env, [])). 181 182keys([{map, Map}], S) -> 183 maps:keys(Map) ++ S; 184keys([{map, Map} | Env], S) -> 185 keys(Env, maps:keys(Map) ++ S); 186keys([{rec, Map, _Map0} | Env], S) -> 187 keys(Env, maps:keys(Map) ++ S). 188 189 190%% ===================================================================== 191%% @spec to_list(Env) -> [{Key, Value}] 192%% 193%% Env = environment() 194%% Key = term() 195%% Value = term() 196%% 197%% @doc Returns an ordered list of <code>{Key, Value}</code> pairs for 198%% all keys in <code>Env</code>. <code>Value</code> is the same as that 199%% returned by {@link get/2}. 200 201-spec to_list(environment()) -> [{term(), term()}]. 202 203to_list(Env) -> 204 lists:sort(to_list(Env, [])). 205 206to_list([{map, Map}], S) -> 207 maps:to_list(Map) ++ S; 208to_list([{map, Map} | Env], S) -> 209 to_list(Env, maps:to_list(Map) ++ S); 210to_list([{rec, Map, _Map0} | Env], S) -> 211 to_list(Env, maps:to_list(Map) ++ S). 212 213 214%% ===================================================================== 215%% @spec bind(Key, Value, Env) -> environment() 216%% 217%% Key = term() 218%% Value = term() 219%% Env = environment() 220%% 221%% @doc Make a nonrecursive entry. This binds <code>Key</code> to 222%% <code>Value</code>. If the key already existed in the environment, 223%% the old entry is replaced. 224 225%% Note that deletion is done to free old bindings so they can be 226%% garbage collected. 227 228-spec bind(term(), term(), environment()) -> environment(). 229 230bind(Key, Value, [{map, Map}]) -> 231 [{map, maps:put(Key, Value, Map)}]; 232bind(Key, Value, [{map, Map} | Env]) -> 233 [{map, maps:put(Key, Value, Map)} | delete_any(Key, Env)]; 234bind(Key, Value, Env) -> 235 [{map, maps:put(Key, Value, #{})} | delete_any(Key, Env)]. 236 237 238%% ===================================================================== 239%% @spec bind_list(Keys, Values, Env) -> environment() 240%% 241%% Keys = [term()] 242%% Values = [term()] 243%% Env = environment() 244%% 245%% @doc Make N nonrecursive entries. This binds each key in 246%% <code>Keys</code> to the corresponding value in 247%% <code>Values</code>. If some key already existed in the environment, 248%% the previous entry is replaced. If <code>Keys</code> does not have 249%% the same length as <code>Values</code>, an exception is generated. 250 251-spec bind_list([term()], [term()], environment()) -> environment(). 252 253bind_list(Ks, Vs, [{map, Map}]) -> 254 [{map, store_list(Ks, Vs, Map)}]; 255bind_list(Ks, Vs, [{map, Map} | Env]) -> 256 [{map, store_list(Ks, Vs, Map)} | delete_list(Ks, Env)]; 257bind_list(Ks, Vs, Env) -> 258 [{map, store_list(Ks, Vs, #{})} | delete_list(Ks, Env)]. 259 260store_list([K | Ks], [V | Vs], Map) -> 261 store_list(Ks, Vs, maps:put(K, V, Map)); 262store_list([], _, Map) -> 263 Map. 264 265delete_list([K | Ks], Env) -> 266 delete_list(Ks, delete_any(K, Env)); 267delete_list([], Env) -> 268 Env. 269 270%% By not calling `delete' unless we have to, we avoid unnecessary 271%% rewriting of the data. 272 273delete_any(Key, Env) -> 274 case is_defined(Key, Env) of 275 true -> 276 delete(Key, Env); 277 false -> 278 Env 279 end. 280 281%% ===================================================================== 282%% @spec delete(Key, Env) -> environment() 283%% 284%% Key = term() 285%% Env = environment() 286%% 287%% @doc Delete an entry. This removes <code>Key</code> from the 288%% environment. 289 290-spec delete(term(), environment()) -> environment(). 291 292delete(Key, [{map, Map} = E | Env]) -> 293 case maps:take(Key, Map) of 294 {_, Map1} -> 295 [{map, Map1} | Env]; 296 error -> 297 delete_1(Key, Env, E) 298 end; 299delete(Key, [{rec, Map, Map0} = E | Env]) -> 300 case maps:take(Key, Map) of 301 {_, Map1} when map_size(Map1) =:= 0 -> 302 Env; % the whole {rec,...} is now garbage 303 %% The Map0 component must be preserved as it is until all 304 %% keys in Map have been deleted. 305 {_, Map1} -> 306 [{rec, Map1, Map0} | Env]; 307 error -> 308 [E | delete(Key, Env)] 309 end. 310 311%% This is just like above, except we pass on the preceding 'map' 312%% mapping in the list to enable merging when removing 'rec' mappings. 313 314delete_1(Key, [{rec, Map, Map0} = E | Env], E1) -> 315 case maps:take(Key, Map) of 316 {_, Map1} when map_size(Map1) =:= 0 -> 317 concat(E1, Env); 318 {_, Map1} -> 319 [E1, {rec, Map1, Map0} | Env]; 320 error -> 321 [E1, E | delete(Key, Env)] 322 end. 323 324concat({map, M1}, [{map, M2} | Env]) -> 325 [maps:merge(M2, M1) | Env]; 326concat(E1, Env) -> 327 [E1 | Env]. 328 329 330%% ===================================================================== 331%% @spec bind_recursive(Keys, Values, Fun, Env) -> NewEnv 332%% 333%% Keys = [term()] 334%% Values = [term()] 335%% Fun = (Value, Env) -> term() 336%% Env = environment() 337%% NewEnv = environment() 338%% 339%% @doc Make N recursive entries. This binds each key in 340%% <code>Keys</code> to the value of <code>Fun(Value, NewEnv)</code> for 341%% the corresponding <code>Value</code>. If <code>Keys</code> does not 342%% have the same length as <code>Values</code>, an exception is 343%% generated. If some key already existed in the environment, the old 344%% entry is replaced. 345%% 346%% <p>Note: the function <code>Fun</code> is evaluated each time one of 347%% the stored keys is looked up, but only then.</p> 348%% 349%% <p>Examples: 350%%<pre> 351%% NewEnv = bind_recursive([foo, bar], [1, 2], 352%% fun (V, E) -> V end, 353%% Env)</pre> 354%% 355%% This does nothing interesting; <code>get(foo, NewEnv)</code> yields 356%% <code>1</code> and <code>get(bar, NewEnv)</code> yields 357%% <code>2</code>, but there is more overhead than if the {@link 358%% bind_list/3} function had been used. 359%% 360%% <pre> 361%% NewEnv = bind_recursive([foo, bar], [1, 2], 362%% fun (V, E) -> {V, E} end, 363%% Env)</pre> 364%% 365%% Here, however, <code>get(foo, NewEnv)</code> will yield <code>{1, 366%% NewEnv}</code> and <code>get(bar, NewEnv)</code> will yield <code>{2, 367%% NewEnv}</code>, i.e., the environment <code>NewEnv</code> contains 368%% recursive bindings.</p> 369 370-spec bind_recursive([term()], [term()], 371 fun((term(), environment()) -> term()), 372 environment()) -> environment(). 373 374bind_recursive([], [], _, Env) -> 375 Env; 376bind_recursive(Ks, Vs, F, Env) -> 377 F1 = fun (V) -> 378 fun (Map) -> F(V, [{rec, Map, Map} | Env]) end 379 end, 380 Map = bind_recursive_1(Ks, Vs, F1, #{}), 381 [{rec, Map, Map} | Env]. 382 383bind_recursive_1([K | Ks], [V | Vs], F, Map) -> 384 bind_recursive_1(Ks, Vs, F, maps:put(K, F(V), Map)); 385bind_recursive_1([], [], _, Map) -> 386 Map. 387 388 389%% ===================================================================== 390%% @spec lookup(Key, Env) -> error | {ok, Value} 391%% 392%% Key = term() 393%% Env = environment() 394%% Value = term() 395%% 396%% @doc Returns <code>{ok, Value}</code> if <code>Key</code> is bound to 397%% <code>Value</code> in <code>Env</code>, and <code>error</code> 398%% otherwise. 399 400-spec lookup(term(), environment()) -> 'error' | {'ok', term()}. 401 402lookup(Key, [{map, Map} | Env]) -> 403 case maps:find(Key, Map) of 404 {ok, _}=Value -> 405 Value; 406 error when Env =:= [] -> 407 error; 408 error -> 409 lookup(Key, Env) 410 end; 411lookup(Key, [{rec, Map, Map0} | Env]) -> 412 case maps:find(Key, Map) of 413 {ok, F} -> 414 {ok, F(Map0)}; 415 error -> 416 lookup(Key, Env) 417 end. 418 419 420%% ===================================================================== 421%% @spec get(Key, Env) -> Value 422%% 423%% Key = term() 424%% Env = environment() 425%% Value = term() 426%% 427%% @doc Returns the value that <code>Key</code> is bound to in 428%% <code>Env</code>. Throws <code>{undefined, Key}</code> if the key 429%% does not exist in <code>Env</code>. 430 431-spec get(term(), environment()) -> term(). 432 433get(Key, Env) -> 434 case lookup(Key, Env) of 435 {ok, Value} -> Value; 436 error -> throw({undefined, Key}) 437 end. 438 439 440%% ===================================================================== 441%% The key-generating algorithm could possibly be further improved. The 442%% important thing to keep in mind is, that when we need a new key, we 443%% are generally in mid-traversal of a syntax tree, and existing names 444%% in the tree may be closely grouped and evenly distributed or even 445%% forming a compact range (often having been generated by a "gensym", 446%% or by this very algorithm itself). This means that if we generate an 447%% identifier whose value is too close to those already seen (i.e., 448%% which are in the environment), it is very probable that we will 449%% shadow a not-yet-seen identifier further down in the tree, the result 450%% being that we induce another later renaming, and end up renaming most 451%% of the identifiers, completely contrary to our intention. We need to 452%% generate new identifiers in a way that avoids such systematic 453%% collisions. 454%% 455%% One way of getting a new key to try when the previous attempt failed 456%% is of course to e.g. add one to the last tried value. However, in 457%% general it's a bad idea to try adjacent identifiers: the percentage 458%% of retries will typically increase a lot, so you may lose big on the 459%% extra lookups while gaining only a little from the quicker 460%% computation. 461%% 462%% We want an initial range that is large enough for most typical cases. 463%% If we start with, say, a range of 10, we might quickly use up most of 464%% the values in the range 1-10 (or 1-100) for new top-level variables - 465%% but as we start traversing the syntax tree, it is quite likely that 466%% exactly those variables will be encountered again (this depends on 467%% how the names in the tree were created), and will then need to be 468%% renamed. If we instead begin with a larger range, it is less likely 469%% that any top-level names that we introduce will shadow names that we 470%% will find in the tree. Of course we cannot know how large is large 471%% enough: for any initial range, there is some syntax tree that uses 472%% all the values in that range, and thus any top-level names introduced 473%% will shadow names in the tree. The point is to avoid this happening 474%% all the time - a range of about 1000 seems enough for most programs. 475%% 476%% The following values have been shown to work well: 477 478-define(MINIMUM_RANGE, 1000). 479-define(START_RANGE_FACTOR, 50). 480-define(MAX_RETRIES, 2). % retries before enlarging range 481-define(ENLARGE_FACTOR, 10). % range enlargment factor 482 483-ifdef(DEBUG). 484%% If you want to use these process dictionary counters, make sure to 485%% initialise them to zero before you call any of the key-generating 486%% functions. 487%% 488%% new_key_calls total number of calls 489%% new_key_retries failed key generation attempts 490%% new_key_max maximum generated integer value 491%% 492-define(measure_calls(), 493 put(new_key_calls, 1 + get(new_key_calls))). 494-define(measure_max_key(N), 495 case N > get(new_key_max) of 496 true -> 497 put(new_key_max, N); 498 false -> 499 ok 500 end). 501-define(measure_retries(N), 502 put(new_key_retries, get(new_key_retries) + N)). 503-else. 504-define(measure_calls(), ok). 505-define(measure_max_key(N), ok). 506-define(measure_retries(N), ok). 507-endif. 508 509 510%% ===================================================================== 511%% @spec new_key(Env::environment()) -> integer() 512%% 513%% @doc Returns an integer which is not already used as key in the 514%% environment. New integers are generated using an algorithm which 515%% tries to keep the values randomly distributed within a reasonably 516%% small range relative to the number of entries in the environment. 517%% 518%% <p>This function uses the Erlang standard library module 519%% <code>random</code> to generate new keys.</p> 520%% 521%% <p>Note that only the new key is returned; the environment itself is 522%% not updated by this function.</p> 523 524-spec new_key(environment()) -> integer(). 525 526new_key(Env) -> 527 new_key(fun (X) -> X end, Env). 528 529 530%% ===================================================================== 531%% @spec new_key(Function, Env) -> term() 532%% 533%% Function = (integer()) -> term() 534%% Env = environment() 535%% 536%% @doc Returns a term which is not already used as key in the 537%% environment. The term is generated by applying <code>Function</code> 538%% to an integer generated as in {@link new_key/1}. 539%% 540%% <p>Note that only the generated term is returned; the environment 541%% itself is not updated by this function.</p> 542 543-spec new_key(fun((integer()) -> term()), environment()) -> term(). 544 545new_key(F, Env) -> 546 ?measure_calls(), 547 R = start_range(Env), 548 %% io:fwrite("Start range: ~w.\n", [R]), 549 new_key(R, F, Env). 550 551new_key(R, F, Env) -> 552 new_key(generate(R, R), R, 0, F, Env). 553 554new_key(N, R, T, F, Env) when T < ?MAX_RETRIES -> 555 A = F(N), 556 case is_defined(A, Env) of 557 true -> 558 %% io:fwrite("CLASH: ~w.\n", [A]), 559 new_key(generate(N, R), R, T + 1, F, Env); 560 false -> 561 ?measure_max_key(N), 562 ?measure_retries(T), 563 %% io:fwrite("New: ~w.\n", [N]), 564 A 565 end; 566new_key(N, R, _T, F, Env) -> 567 %% Too many retries - enlarge the range and start over. 568 ?measure_retries((_T + 1)), 569 R1 = trunc(R * ?ENLARGE_FACTOR), 570 %% io:fwrite("**NEW RANGE**: ~w.\n", [R1]), 571 new_key(generate(N, R1), R1, 0, F, Env). 572 573start_range(Env) -> 574 erlang:max(env_size(Env) * ?START_RANGE_FACTOR, ?MINIMUM_RANGE). 575 576%% The previous key might or might not be used to compute the next key 577%% to be tried. It is currently not used. 578%% 579%% In order to avoid causing cascading renamings, it is important that 580%% this function does not generate values in order, but 581%% (pseudo-)randomly distributed over the range. 582 583generate(_N, Range) -> 584 %% We must use the same sequence of random variables to ensure 585 %% that two compilations of the same source code generates the 586 %% same BEAM code. 587 case rand:export_seed() of 588 undefined -> 589 _ = rand:seed(exsplus, {1,42,2053}), 590 ok; 591 _ -> 592 ok 593 end, 594 rand:uniform(Range). % works well 595 596 597%% ===================================================================== 598%% @spec new_keys(N, Env) -> [integer()] 599%% 600%% N = integer() 601%% Env = environment() 602%% 603%% @doc Returns a list of <code>N</code> distinct integers that are not 604%% already used as keys in the environment. See {@link new_key/1} for 605%% details. 606 607-spec new_keys(integer(), environment()) -> [integer()]. 608 609new_keys(N, Env) when is_integer(N) -> 610 new_keys(N, fun (X) -> X end, Env). 611 612 613%% ===================================================================== 614%% @spec new_keys(N, Function, Env) -> [term()] 615%% 616%% N = integer() 617%% Function = (integer()) -> term() 618%% Env = environment() 619%% 620%% @doc Returns a list of <code>N</code> distinct terms that are not 621%% already used as keys in the environment. See {@link new_key/3} for 622%% details. 623 624-spec new_keys(integer(), fun((integer()) -> term()), environment()) -> [term()]. 625 626new_keys(N, F, Env) when is_integer(N) -> 627 R = start_range(Env), 628 new_keys(N, [], R, F, Env). 629 630new_keys(N, Ks, R, F, Env) when N > 0 -> 631 Key = new_key(R, F, Env), 632 Env1 = bind(Key, true, Env), % dummy binding 633 new_keys(N - 1, [Key | Ks], R, F, Env1); 634new_keys(0, Ks, _, _, _) -> 635 Ks. 636