1%% Licensed under the Apache License, Version 2.0 (the "License"); 2%% you may not use this file except in compliance with the License. 3%% You may obtain a copy of the License at 4%% 5%% http://www.apache.org/licenses/LICENSE-2.0 6%% 7%% Unless required by applicable law or agreed to in writing, software 8%% distributed under the License is distributed on an "AS IS" BASIS, 9%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10%% See the License for the specific language governing permissions and 11%% limitations under the License. 12%% 13%% ===================================================================== 14%% Ordered Sets implemented as General Balanced Trees 15%% 16%% Copyright (C) 1999-2001 Richard Carlsson 17%% 18%% An implementation of ordered sets using Prof. Arne Andersson's 19%% General Balanced Trees. This can be much more efficient than using 20%% ordered lists, for larger sets, but depends on the application. See 21%% notes below for details. 22%% --------------------------------------------------------------------- 23%% Notes: 24%% 25%% The complexity on set operations is bounded by either O(|S|) or O(|T| 26%% * log(|S|)), where S is the largest given set, depending on which is 27%% fastest for any particular function call. For operating on sets of 28%% almost equal size, this implementation is about 3 times slower than 29%% using ordered-list sets directly. For sets of very different sizes, 30%% however, this solution can be arbitrarily much faster; in practical 31%% cases, often between 10 and 100 times. This implementation is 32%% particularly suited for ackumulating elements a few at a time, 33%% building up a large set (more than 100-200 elements), and repeatedly 34%% testing for membership in the current set. 35%% 36%% As with normal tree structures, lookup (membership testing), 37%% insertion and deletion have logarithmic complexity. 38%% 39%% Operations: 40%% 41%% - empty(): returns empty set. 42%% 43%% Alias: new(), for compatibility with `sets'. 44%% 45%% - is_empty(S): returns 'true' if S is an empty set, and 'false' 46%% otherwise. 47%% 48%% - size(S): returns the number of nodes in the set as an integer. 49%% Returns 0 (zero) if the set is empty. 50%% 51%% - singleton(X): returns a set containing only the element X. 52%% 53%% - is_member(X, S): returns `true' if element X is a member of set S, 54%% and `false' otherwise. 55%% 56%% Alias: is_element(), for compatibility with `sets'. 57%% 58%% - insert(X, S): inserts element X into set S; returns the new set. 59%% *Assumes that the element is not present in S.* 60%% 61%% - add(X, S): adds element X to set S; returns the new set. If X is 62%% already an element in S, nothing is changed. 63%% 64%% Alias: add_element(), for compatibility with `sets'. 65%% 66%% - delete(X, S): removes element X from set S; returns new set. 67%% Assumes that the element exists in the set. 68%% 69%% - delete_any(X, S): removes key X from set S if the key is present 70%% in the set, otherwise does nothing; returns new set. 71%% 72%% Alias: del_element(), for compatibility with `sets'. 73%% 74%% - balance(S): rebalances the tree representation of S. Note that this 75%% is rarely necessary, but may be motivated when a large number of 76%% elements have been deleted from the tree without further 77%% insertions. Rebalancing could then be forced in order to minimise 78%% lookup times, since deletion only does not rebalance the tree. 79%% 80%% - union(S1, S2): returns a new set that contains each element that is 81%% in either S1 or S2 or both, and no other elements. 82%% 83%% - union(Ss): returns a new set that contains each element that is in 84%% at least one of the sets in the list Ss, and no other elements. 85%% 86%% - intersection(S1, S2): returns a new set that contains each element 87%% that is in both S1 and S2, and no other elements. 88%% 89%% - intersection(Ss): returns a new set that contains each element that 90%% is in all of the sets in the list Ss, and no other elements. 91%% 92%% - is_disjoint(S1, S2): returns `true' if none of the elements in S1 93%% occurs in S2. 94%% 95%% - difference(S1, S2): returns a new set that contains each element in 96%% S1 that is not also in S2, and no other elements. 97%% 98%% Alias: subtract(), for compatibility with `sets'. 99%% 100%% - is_subset(S1, S2): returns `true' if each element in S1 is also a 101%% member of S2, and `false' otherwise. 102%% 103%% - to_list(S): returns an ordered list of all elements in set S. The 104%% list never contains duplicates. 105%% 106%% - from_list(List): creates a set containing all elements in List, 107%% where List may be unordered and contain duplicates. 108%% 109%% - from_ordset(L): turns an ordered-set list L into a set. The list 110%% must not contain duplicates. 111%% 112%% - smallest(S): returns the smallest element in set S. Assumes that 113%% the set S is nonempty. 114%% 115%% - largest(S): returns the largest element in set S. Assumes that the 116%% set S is nonempty. 117%% 118%% - take_smallest(S): returns {X, S1}, where X is the smallest element 119%% in set S, and S1 is the set S with element X deleted. Assumes that 120%% the set S is nonempty. 121%% 122%% - take_largest(S): returns {X, S1}, where X is the largest element in 123%% set S, and S1 is the set S with element X deleted. Assumes that the 124%% set S is nonempty. 125%% 126%% - iterator(S): returns an iterator that can be used for traversing 127%% the entries of set S; see `next'. The implementation of this is 128%% very efficient; traversing the whole set using `next' is only 129%% slightly slower than getting the list of all elements using 130%% `to_list' and traversing that. The main advantage of the iterator 131%% approach is that it does not require the complete list of all 132%% elements to be built in memory at one time. 133%% 134%% - iterator_from(X, S): returns an iterator that can be used for 135%% traversing the elements of set S greater than or equal to X; 136%% see `next'. 137%% 138%% - next(T): returns {X, T1} where X is the smallest element referred 139%% to by the iterator T, and T1 is the new iterator to be used for 140%% traversing the remaining elements, or the atom `none' if no 141%% elements remain. 142%% 143%% - filter(P, S): Filters set S using predicate function P. Included 144%% for compatibility with `sets'. 145%% 146%% - fold(F, A, S): Folds function F over set S with A as the initial 147%% ackumulator. Included for compatibility with `sets'. 148%% 149%% - is_set(S): returns 'true' if S appears to be a set, and 'false' 150%% otherwise. Not recommended; included for compatibility with `sets'. 151 152-module(gb_sets). 153 154-export([empty/0, is_empty/1, size/1, singleton/1, is_member/2, 155 insert/2, add/2, delete/2, delete_any/2, balance/1, union/2, 156 union/1, intersection/2, intersection/1, is_disjoint/2, difference/2, 157 is_subset/2, to_list/1, from_list/1, from_ordset/1, smallest/1, 158 largest/1, take_smallest/1, take_largest/1, iterator/1, 159 iterator_from/2, next/1, filter/2, fold/3, is_set/1]). 160 161%% `sets' compatibility aliases: 162 163-export([new/0, is_element/2, add_element/2, del_element/2, 164 subtract/2]). 165 166%% GB-trees adapted from Sven-Olof Nyström's implementation for 167%% representation of sets. 168%% 169%% Data structures: 170%% - {Size, Tree}, where `Tree' is composed of nodes of the form: 171%% - {Key, Smaller, Bigger}, and the "empty tree" node: 172%% - nil. 173%% 174%% No attempt is made to balance trees after deletions. Since deletions 175%% don't increase the height of a tree, this should be OK. 176%% 177%% Original balance condition h(T) <= ceil(c * log(|T|)) has been 178%% changed to the similar (but not quite equivalent) condition 2 ^ h(T) 179%% <= |T| ^ c. This should also be OK. 180%% 181%% Behaviour is logarithmic (as it should be). 182 183%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 184%% Some macros. 185 186-define(p, 2). % It seems that p = 2 is optimal for sorted keys 187 188-define(pow(A, _), A * A). % correct with exponent as defined above. 189 190-define(div2(X), X bsr 1). 191 192-define(mul2(X), X bsl 1). 193 194%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 195%% Some types. 196 197-export_type([set/0, set/1, iter/0, iter/1]). 198 199-type gb_set_node(Element) :: 'nil' | {Element, _, _}. 200-opaque set(Element) :: {non_neg_integer(), gb_set_node(Element)}. 201-type set() :: set(_). 202-opaque iter(Element) :: [gb_set_node(Element)]. 203-type iter() :: iter(_). 204 205%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 206 207-spec empty() -> Set when 208 Set :: set(). 209 210empty() -> 211 {0, nil}. 212 213-spec new() -> Set when 214 Set :: set(). 215 216new() -> empty(). 217 218-spec is_empty(Set) -> boolean() when 219 Set :: set(). 220 221is_empty({0, nil}) -> 222 true; 223is_empty(_) -> 224 false. 225 226-spec size(Set) -> non_neg_integer() when 227 Set :: set(). 228 229size({Size, _}) -> 230 Size. 231 232-spec singleton(Element) -> set(Element). 233 234singleton(Key) -> 235 {1, {Key, nil, nil}}. 236 237-spec is_element(Element, Set) -> boolean() when 238 Set :: set(Element). 239 240is_element(Key, S) -> 241 is_member(Key, S). 242 243-spec is_member(Element, Set) -> boolean() when 244 Set :: set(Element). 245 246is_member(Key, {_, T}) -> 247 is_member_1(Key, T). 248 249is_member_1(Key, {Key1, Smaller, _}) when Key < Key1 -> 250 is_member_1(Key, Smaller); 251is_member_1(Key, {Key1, _, Bigger}) when Key > Key1 -> 252 is_member_1(Key, Bigger); 253is_member_1(_, {_, _, _}) -> 254 true; 255is_member_1(_, nil) -> 256 false. 257 258-spec insert(Element, Set1) -> Set2 when 259 Set1 :: set(Element), 260 Set2 :: set(Element). 261 262insert(Key, {S, T}) -> 263 S1 = S + 1, 264 {S1, insert_1(Key, T, ?pow(S1, ?p))}. 265 266insert_1(Key, {Key1, Smaller, Bigger}, S) when Key < Key1 -> 267 case insert_1(Key, Smaller, ?div2(S)) of 268 {T1, H1, S1} when is_integer(H1) -> 269 T = {Key1, T1, Bigger}, 270 {H2, S2} = count(Bigger), 271 H = ?mul2(erlang:max(H1, H2)), 272 SS = S1 + S2 + 1, 273 P = ?pow(SS, ?p), 274 if 275 H > P -> 276 balance(T, SS); 277 true -> 278 {T, H, SS} 279 end; 280 T1 -> 281 {Key1, T1, Bigger} 282 end; 283insert_1(Key, {Key1, Smaller, Bigger}, S) when Key > Key1 -> 284 case insert_1(Key, Bigger, ?div2(S)) of 285 {T1, H1, S1} when is_integer(H1) -> 286 T = {Key1, Smaller, T1}, 287 {H2, S2} = count(Smaller), 288 H = ?mul2(erlang:max(H1, H2)), 289 SS = S1 + S2 + 1, 290 P = ?pow(SS, ?p), 291 if 292 H > P -> 293 balance(T, SS); 294 true -> 295 {T, H, SS} 296 end; 297 T1 -> 298 {Key1, Smaller, T1} 299 end; 300insert_1(Key, nil, 0) -> 301 {{Key, nil, nil}, 1, 1}; 302insert_1(Key, nil, _) -> 303 {Key, nil, nil}; 304insert_1(Key, _, _) -> 305 erlang:error({key_exists, Key}). 306 307count({_, nil, nil}) -> 308 {1, 1}; 309count({_, Sm, Bi}) -> 310 {H1, S1} = count(Sm), 311 {H2, S2} = count(Bi), 312 {?mul2(erlang:max(H1, H2)), S1 + S2 + 1}; 313count(nil) -> 314 {1, 0}. 315 316-spec balance(Set1) -> Set2 when 317 Set1 :: set(Element), 318 Set2 :: set(Element). 319 320balance({S, T}) -> 321 {S, balance(T, S)}. 322 323balance(T, S) -> 324 balance_list(to_list_1(T), S). 325 326balance_list(L, S) -> 327 {T, _} = balance_list_1(L, S), 328 T. 329 330balance_list_1(L, S) when S > 1 -> 331 Sm = S - 1, 332 S2 = Sm div 2, 333 S1 = Sm - S2, 334 {T1, [K | L1]} = balance_list_1(L, S1), 335 {T2, L2} = balance_list_1(L1, S2), 336 T = {K, T1, T2}, 337 {T, L2}; 338balance_list_1([Key | L], 1) -> 339 {{Key, nil, nil}, L}; 340balance_list_1(L, 0) -> 341 {nil, L}. 342 343-spec add_element(Element, Set1) -> Set2 when 344 Set1 :: set(Element), 345 Set2 :: set(Element). 346 347add_element(X, S) -> 348 add(X, S). 349 350-spec add(Element, Set1) -> Set2 when 351 Set1 :: set(Element), 352 Set2 :: set(Element). 353 354add(X, S) -> 355 case is_member(X, S) of 356 true -> 357 S; % we don't have to do anything here 358 false -> 359 insert(X, S) 360 end. 361 362-spec from_list(List) -> Set when 363 List :: [Element], 364 Set :: set(Element). 365 366from_list(L) -> 367 from_ordset(ordsets:from_list(L)). 368 369-spec from_ordset(List) -> Set when 370 List :: [Element], 371 Set :: set(Element). 372 373from_ordset(L) -> 374 S = length(L), 375 {S, balance_list(L, S)}. 376 377-spec del_element(Element, Set1) -> Set2 when 378 Set1 :: set(Element), 379 Set2 :: set(Element). 380 381del_element(Key, S) -> 382 delete_any(Key, S). 383 384-spec delete_any(Element, Set1) -> Set2 when 385 Set1 :: set(Element), 386 Set2 :: set(Element). 387 388delete_any(Key, S) -> 389 case is_member(Key, S) of 390 true -> 391 delete(Key, S); 392 false -> 393 S 394 end. 395 396-spec delete(Element, Set1) -> Set2 when 397 Set1 :: set(Element), 398 Set2 :: set(Element). 399 400delete(Key, {S, T}) -> 401 {S - 1, delete_1(Key, T)}. 402 403delete_1(Key, {Key1, Smaller, Larger}) when Key < Key1 -> 404 Smaller1 = delete_1(Key, Smaller), 405 {Key1, Smaller1, Larger}; 406delete_1(Key, {Key1, Smaller, Bigger}) when Key > Key1 -> 407 Bigger1 = delete_1(Key, Bigger), 408 {Key1, Smaller, Bigger1}; 409delete_1(_, {_, Smaller, Larger}) -> 410 merge(Smaller, Larger). 411 412merge(Smaller, nil) -> 413 Smaller; 414merge(nil, Larger) -> 415 Larger; 416merge(Smaller, Larger) -> 417 {Key, Larger1} = take_smallest1(Larger), 418 {Key, Smaller, Larger1}. 419 420-spec take_smallest(Set1) -> {Element, Set2} when 421 Set1 :: set(Element), 422 Set2 :: set(Element). 423 424take_smallest({S, T}) -> 425 {Key, Larger} = take_smallest1(T), 426 {Key, {S - 1, Larger}}. 427 428take_smallest1({Key, nil, Larger}) -> 429 {Key, Larger}; 430take_smallest1({Key, Smaller, Larger}) -> 431 {Key1, Smaller1} = take_smallest1(Smaller), 432 {Key1, {Key, Smaller1, Larger}}. 433 434-spec smallest(Set) -> Element when 435 Set :: set(Element). 436 437smallest({_, T}) -> 438 smallest_1(T). 439 440smallest_1({Key, nil, _Larger}) -> 441 Key; 442smallest_1({_Key, Smaller, _Larger}) -> 443 smallest_1(Smaller). 444 445-spec take_largest(Set1) -> {Element, Set2} when 446 Set1 :: set(Element), 447 Set2 :: set(Element). 448 449take_largest({S, T}) -> 450 {Key, Smaller} = take_largest1(T), 451 {Key, {S - 1, Smaller}}. 452 453take_largest1({Key, Smaller, nil}) -> 454 {Key, Smaller}; 455take_largest1({Key, Smaller, Larger}) -> 456 {Key1, Larger1} = take_largest1(Larger), 457 {Key1, {Key, Smaller, Larger1}}. 458 459-spec largest(Set) -> Element when 460 Set :: set(Element). 461 462largest({_, T}) -> 463 largest_1(T). 464 465largest_1({Key, _Smaller, nil}) -> 466 Key; 467largest_1({_Key, _Smaller, Larger}) -> 468 largest_1(Larger). 469 470-spec to_list(Set) -> List when 471 Set :: set(Element), 472 List :: [Element]. 473 474to_list({_, T}) -> 475 to_list(T, []). 476 477to_list_1(T) -> to_list(T, []). 478 479to_list({Key, Small, Big}, L) -> 480 to_list(Small, [Key | to_list(Big, L)]); 481to_list(nil, L) -> L. 482 483-spec iterator(Set) -> Iter when 484 Set :: set(Element), 485 Iter :: iter(Element). 486 487iterator({_, T}) -> 488 iterator(T, []). 489 490%% The iterator structure is really just a list corresponding to the 491%% call stack of an in-order traversal. This is quite fast. 492 493iterator({_, nil, _} = T, As) -> 494 [T | As]; 495iterator({_, L, _} = T, As) -> 496 iterator(L, [T | As]); 497iterator(nil, As) -> 498 As. 499 500-spec iterator_from(Element, Set) -> Iter when 501 Set :: set(Element), 502 Iter :: iter(Element). 503 504iterator_from(S, {_, T}) -> 505 iterator_from(S, T, []). 506 507iterator_from(S, {K, _, T}, As) when K < S -> 508 iterator_from(S, T, As); 509iterator_from(_, {_, nil, _} = T, As) -> 510 [T | As]; 511iterator_from(S, {_, L, _} = T, As) -> 512 iterator_from(S, L, [T | As]); 513iterator_from(_, nil, As) -> 514 As. 515 516-spec next(Iter1) -> {Element, Iter2} | 'none' when 517 Iter1 :: iter(Element), 518 Iter2 :: iter(Element). 519 520next([{X, _, T} | As]) -> 521 {X, iterator(T, As)}; 522next([]) -> 523 none. 524 525 526%% Set operations: 527 528 529%% If |X| < |Y|, then we traverse the elements of X. The cost for 530%% testing a single random element for membership in a tree S is 531%% proportional to log(|S|); thus, if |Y| / |X| < c * log(|Y|), for some 532%% c, it is more efficient to scan the ordered sequence of elements of Y 533%% while traversing X (under the same ordering) in order to test whether 534%% elements of X are already in Y. Since the `math' module does not have 535%% a `log2'-function, we rewrite the condition to |X| < |Y| * c1 * 536%% ln(|X|), where c1 = c / ln 2. 537 538-define(c, 1.46). % 1 / ln 2; this appears to be best 539 540%% If the sets are not very different in size, i.e., if |Y| / |X| >= c * 541%% log(|Y|), then the fastest way to do union (and the other similar set 542%% operations) is to build the lists of elements, traverse these lists 543%% in parallel while building a reversed ackumulator list, and finally 544%% rebuild the tree directly from the ackumulator. Other methods of 545%% traversing the elements can be devised, but they all have higher 546%% overhead. 547 548-spec union(Set1, Set2) -> Set3 when 549 Set1 :: set(Element), 550 Set2 :: set(Element), 551 Set3 :: set(Element). 552 553union({N1, T1}, {N2, T2}) when N2 < N1 -> 554 union(to_list_1(T2), N2, T1, N1); 555union({N1, T1}, {N2, T2}) -> 556 union(to_list_1(T1), N1, T2, N2). 557 558%% We avoid the expensive mathematical computations if there is little 559%% chance at saving at least the same amount of time by making the right 560%% choice of strategy. Recall that N1 < N2 here. 561 562union(L, N1, T2, N2) when N2 < 10 -> 563 %% Break even is about 7 for N1 = 1 and 10 for N1 = 2 564 union_2(L, to_list_1(T2), N1 + N2); 565union(L, N1, T2, N2) -> 566 X = N1 * round(?c * math:log(N2)), 567 if N2 < X -> 568 union_2(L, to_list_1(T2), N1 + N2); 569 true -> 570 union_1(L, mk_set(N2, T2)) 571 end. 572 573-spec mk_set(non_neg_integer(), gb_set_node(T)) -> set(T). 574 575mk_set(N, T) -> 576 {N, T}. 577 578%% If the length of the list is in proportion with the size of the 579%% target set, this version spends too much time doing lookups, compared 580%% to the below version. 581 582union_1([X | Xs], S) -> 583 union_1(Xs, add(X, S)); 584union_1([], S) -> 585 S. 586 587 588%% If the length of the first list is too small in comparison with the 589%% size of the target set, this version spends too much time scanning 590%% the element list of the target set for possible membership, compared 591%% with the above version. 592 593%% Some notes on sequential scanning of ordered lists 594%% 595%% 1) We want to put the equality case last, if we can assume that the 596%% probability for overlapping elements is relatively low on average. 597%% Doing this also allows us to completely skip the (arithmetic) 598%% equality test, since the term order is arithmetically total. 599%% 600%% 2) We always test for `smaller than' first, i.e., whether the head of 601%% the left list is smaller than the head of the right list, and if the 602%% `greater than' test should instead turn out to be true, we switch 603%% left and right arguments in the recursive call under the assumption 604%% that the same is likely to apply to the next element also, 605%% statistically reducing the number of failed tests and automatically 606%% adapting to cases of lists having very different lengths. This saves 607%% 10-40% of the traversation time compared to a "fixed" strategy, 608%% depending on the sizes and contents of the lists. 609%% 610%% 3) A tail recursive version using `lists:reverse/2' is about 5-10% 611%% faster than a plain recursive version using the stack, for lists of 612%% more than about 20 elements and small stack frames. For very short 613%% lists, however (length < 10), the stack version can be several times 614%% faster. As stack frames grow larger, the advantages of using 615%% `reverse' could get greater. 616 617union_2(Xs, Ys, S) -> 618 union_2(Xs, Ys, [], S). % S is the sum of the sizes here 619 620union_2([X | Xs1], [Y | _] = Ys, As, S) when X < Y -> 621 union_2(Xs1, Ys, [X | As], S); 622union_2([X | _] = Xs, [Y | Ys1], As, S) when X > Y -> 623 union_2(Ys1, Xs, [Y | As], S); 624union_2([X | Xs1], [_ | Ys1], As, S) -> 625 union_2(Xs1, Ys1, [X | As], S - 1); 626union_2([], Ys, As, S) -> 627 {S, balance_revlist(push(Ys, As), S)}; 628union_2(Xs, [], As, S) -> 629 {S, balance_revlist(push(Xs, As), S)}. 630 631push([X | Xs], As) -> 632 push(Xs, [X | As]); 633push([], As) -> 634 As. 635 636balance_revlist(L, S) -> 637 {T, _} = balance_revlist_1(L, S), 638 T. 639 640balance_revlist_1(L, S) when S > 1 -> 641 Sm = S - 1, 642 S2 = Sm div 2, 643 S1 = Sm - S2, 644 {T2, [K | L1]} = balance_revlist_1(L, S1), 645 {T1, L2} = balance_revlist_1(L1, S2), 646 T = {K, T1, T2}, 647 {T, L2}; 648balance_revlist_1([Key | L], 1) -> 649 {{Key, nil, nil}, L}; 650balance_revlist_1(L, 0) -> 651 {nil, L}. 652 653-spec union(SetList) -> Set when 654 SetList :: [set(Element),...], 655 Set :: set(Element). 656 657union([S | Ss]) -> 658 union_list(S, Ss); 659union([]) -> empty(). 660 661union_list(S, [S1 | Ss]) -> 662 union_list(union(S, S1), Ss); 663union_list(S, []) -> S. 664 665 666%% The rest is modelled on the above. 667 668-spec intersection(Set1, Set2) -> Set3 when 669 Set1 :: set(Element), 670 Set2 :: set(Element), 671 Set3 :: set(Element). 672 673intersection({N1, T1}, {N2, T2}) when N2 < N1 -> 674 intersection(to_list_1(T2), N2, T1, N1); 675intersection({N1, T1}, {N2, T2}) -> 676 intersection(to_list_1(T1), N1, T2, N2). 677 678intersection(L, _N1, T2, N2) when N2 < 10 -> 679 intersection_2(L, to_list_1(T2)); 680intersection(L, N1, T2, N2) -> 681 X = N1 * round(?c * math:log(N2)), 682 if N2 < X -> 683 intersection_2(L, to_list_1(T2)); 684 true -> 685 intersection_1(L, T2) 686 end. 687 688%% We collect the intersecting elements in an accumulator list and count 689%% them at the same time so we can balance the list afterwards. 690 691intersection_1(Xs, T) -> 692 intersection_1(Xs, T, [], 0). 693 694intersection_1([X | Xs], T, As, N) -> 695 case is_member_1(X, T) of 696 true -> 697 intersection_1(Xs, T, [X | As], N + 1); 698 false -> 699 intersection_1(Xs, T, As, N) 700 end; 701intersection_1([], _, As, N) -> 702 {N, balance_revlist(As, N)}. 703 704 705intersection_2(Xs, Ys) -> 706 intersection_2(Xs, Ys, [], 0). 707 708intersection_2([X | Xs1], [Y | _] = Ys, As, S) when X < Y -> 709 intersection_2(Xs1, Ys, As, S); 710intersection_2([X | _] = Xs, [Y | Ys1], As, S) when X > Y -> 711 intersection_2(Ys1, Xs, As, S); 712intersection_2([X | Xs1], [_ | Ys1], As, S) -> 713 intersection_2(Xs1, Ys1, [X | As], S + 1); 714intersection_2([], _, As, S) -> 715 {S, balance_revlist(As, S)}; 716intersection_2(_, [], As, S) -> 717 {S, balance_revlist(As, S)}. 718 719-spec intersection(SetList) -> Set when 720 SetList :: [set(Element),...], 721 Set :: set(Element). 722 723intersection([S | Ss]) -> 724 intersection_list(S, Ss). 725 726intersection_list(S, [S1 | Ss]) -> 727 intersection_list(intersection(S, S1), Ss); 728intersection_list(S, []) -> S. 729 730-spec is_disjoint(Set1, Set2) -> boolean() when 731 Set1 :: set(Element), 732 Set2 :: set(Element). 733 734is_disjoint({N1, T1}, {N2, T2}) when N1 < N2 -> 735 is_disjoint_1(T1, T2); 736is_disjoint({_, T1}, {_, T2}) -> 737 is_disjoint_1(T2, T1). 738 739is_disjoint_1({K1, Smaller1, Bigger}, {K2, Smaller2, _}=Tree) when K1 < K2 -> 740 not is_member_1(K1, Smaller2) andalso 741 is_disjoint_1(Smaller1, Smaller2) andalso 742 is_disjoint_1(Bigger, Tree); 743is_disjoint_1({K1, Smaller, Bigger1}, {K2, _, Bigger2}=Tree) when K1 > K2 -> 744 not is_member_1(K1, Bigger2) andalso 745 is_disjoint_1(Bigger1, Bigger2) andalso 746 is_disjoint_1(Smaller, Tree); 747is_disjoint_1({_K1, _, _}, {_K2, _, _}) -> %K1 == K2 748 false; 749is_disjoint_1(nil, _) -> 750 true; 751is_disjoint_1(_, nil) -> 752 true. 753 754%% Note that difference is not symmetric. We don't use `delete' here, 755%% since the GB-trees implementation does not rebalance after deletion 756%% and so we could end up with very unbalanced trees indeed depending on 757%% the sets. Therefore, we always build a new tree, and thus we need to 758%% traverse the whole element list of the left operand. 759 760-spec subtract(Set1, Set2) -> Set3 when 761 Set1 :: set(Element), 762 Set2 :: set(Element), 763 Set3 :: set(Element). 764 765subtract(S1, S2) -> 766 difference(S1, S2). 767 768-spec difference(Set1, Set2) -> Set3 when 769 Set1 :: set(Element), 770 Set2 :: set(Element), 771 Set3 :: set(Element). 772 773difference({N1, T1}, {N2, T2}) -> 774 difference(to_list_1(T1), N1, T2, N2). 775 776difference(L, N1, T2, N2) when N2 < 10 -> 777 difference_2(L, to_list_1(T2), N1); 778difference(L, N1, T2, N2) -> 779 X = N1 * round(?c * math:log(N2)), 780 if N2 < X -> 781 difference_2(L, to_list_1(T2), N1); 782 true -> 783 difference_1(L, T2) 784 end. 785 786 787difference_1(Xs, T) -> 788 difference_1(Xs, T, [], 0). 789 790difference_1([X | Xs], T, As, N) -> 791 case is_member_1(X, T) of 792 true -> 793 difference_1(Xs, T, As, N); 794 false -> 795 difference_1(Xs, T, [X | As], N + 1) 796 end; 797difference_1([], _, As, N) -> 798 {N, balance_revlist(As, N)}. 799 800 801difference_2(Xs, Ys, S) -> 802 difference_2(Xs, Ys, [], S). % S is the size of the left set 803 804difference_2([X | Xs1], [Y | _] = Ys, As, S) when X < Y -> 805 difference_2(Xs1, Ys, [X | As], S); 806difference_2([X | _] = Xs, [Y | Ys1], As, S) when X > Y -> 807 difference_2(Xs, Ys1, As, S); 808difference_2([_X | Xs1], [_Y | Ys1], As, S) -> 809 difference_2(Xs1, Ys1, As, S - 1); 810difference_2([], _Ys, As, S) -> 811 {S, balance_revlist(As, S)}; 812difference_2(Xs, [], As, S) -> 813 {S, balance_revlist(push(Xs, As), S)}. 814 815 816%% Subset testing is much the same thing as set difference, but 817%% without the construction of a new set. 818 819-spec is_subset(Set1, Set2) -> boolean() when 820 Set1 :: set(Element), 821 Set2 :: set(Element). 822 823is_subset({N1, T1}, {N2, T2}) -> 824 is_subset(to_list_1(T1), N1, T2, N2). 825 826is_subset(L, _N1, T2, N2) when N2 < 10 -> 827 is_subset_2(L, to_list_1(T2)); 828is_subset(L, N1, T2, N2) -> 829 X = N1 * round(?c * math:log(N2)), 830 if N2 < X -> 831 is_subset_2(L, to_list_1(T2)); 832 true -> 833 is_subset_1(L, T2) 834 end. 835 836 837is_subset_1([X | Xs], T) -> 838 case is_member_1(X, T) of 839 true -> 840 is_subset_1(Xs, T); 841 false -> 842 false 843 end; 844is_subset_1([], _) -> 845 true. 846 847 848is_subset_2([X | _], [Y | _]) when X < Y -> 849 false; 850is_subset_2([X | _] = Xs, [Y | Ys1]) when X > Y -> 851 is_subset_2(Xs, Ys1); 852is_subset_2([_ | Xs1], [_ | Ys1]) -> 853 is_subset_2(Xs1, Ys1); 854is_subset_2([], _) -> 855 true; 856is_subset_2(_, []) -> 857 false. 858 859 860%% For compatibility with `sets': 861 862-spec is_set(Term) -> boolean() when 863 Term :: term(). 864 865is_set({0, nil}) -> true; 866is_set({N, {_, _, _}}) when is_integer(N), N >= 0 -> true; 867is_set(_) -> false. 868 869-spec filter(Pred, Set1) -> Set2 when 870 Pred :: fun((Element) -> boolean()), 871 Set1 :: set(Element), 872 Set2 :: set(Element). 873 874filter(F, S) -> 875 from_ordset([X || X <- to_list(S), F(X)]). 876 877-spec fold(Function, Acc0, Set) -> Acc1 when 878 Function :: fun((Element, AccIn) -> AccOut), 879 Acc0 :: Acc, 880 Acc1 :: Acc, 881 AccIn :: Acc, 882 AccOut :: Acc, 883 Set :: set(Element). 884 885fold(F, A, {_, T}) when is_function(F, 2) -> 886 fold_1(F, A, T). 887 888fold_1(F, Acc0, {Key, Small, Big}) -> 889 Acc1 = fold_1(F, Acc0, Small), 890 Acc = F(Key, Acc1), 891 fold_1(F, Acc, Big); 892fold_1(_, Acc, _) -> 893 Acc. 894