1%% Licensed under the Apache License, Version 2.0 (the "License");
2%% you may not use this file except in compliance with the License.
3%% You may obtain a copy of the License at
4%%
5%%     http://www.apache.org/licenses/LICENSE-2.0
6%%
7%% Unless required by applicable law or agreed to in writing, software
8%% distributed under the License is distributed on an "AS IS" BASIS,
9%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10%% See the License for the specific language governing permissions and
11%% limitations under the License.
12%%
13%% =====================================================================
14%% Ordered Sets implemented as General Balanced Trees
15%%
16%% Copyright (C) 1999-2001 Richard Carlsson
17%%
18%% An implementation of ordered sets using Prof. Arne Andersson's
19%% General Balanced Trees. This can be much more efficient than using
20%% ordered lists, for larger sets, but depends on the application. See
21%% notes below for details.
22%% ---------------------------------------------------------------------
23%% Notes:
24%%
25%% The complexity on set operations is bounded by either O(|S|) or O(|T|
26%% * log(|S|)), where S is the largest given set, depending on which is
27%% fastest for any particular function call. For operating on sets of
28%% almost equal size, this implementation is about 3 times slower than
29%% using ordered-list sets directly. For sets of very different sizes,
30%% however, this solution can be arbitrarily much faster; in practical
31%% cases, often between 10 and 100 times. This implementation is
32%% particularly suited for ackumulating elements a few at a time,
33%% building up a large set (more than 100-200 elements), and repeatedly
34%% testing for membership in the current set.
35%%
36%% As with normal tree structures, lookup (membership testing),
37%% insertion and deletion have logarithmic complexity.
38%%
39%% Operations:
40%%
41%% - empty(): returns empty set.
42%%
43%%   Alias: new(), for compatibility with `sets'.
44%%
45%% - is_empty(S): returns 'true' if S is an empty set, and 'false'
46%%   otherwise.
47%%
48%% - size(S): returns the number of nodes in the set as an integer.
49%%   Returns 0 (zero) if the set is empty.
50%%
51%% - singleton(X): returns a set containing only the element X.
52%%
53%% - is_member(X, S): returns `true' if element X is a member of set S,
54%%   and `false' otherwise.
55%%
56%%   Alias: is_element(), for compatibility with `sets'.
57%%
58%% - insert(X, S): inserts element X into set S; returns the new set.
59%%   *Assumes that the element is not present in S.*
60%%
61%% - add(X, S): adds element X to set S; returns the new set. If X is
62%%   already an element in S, nothing is changed.
63%%
64%%   Alias: add_element(), for compatibility with `sets'.
65%%
66%% - delete(X, S): removes element X from set S; returns new set.
67%%   Assumes that the element exists in the set.
68%%
69%% - delete_any(X, S): removes key X from set S if the key is present
70%%   in the set, otherwise does nothing; returns new set.
71%%
72%%   Alias: del_element(), for compatibility with `sets'.
73%%
74%% - balance(S): rebalances the tree representation of S. Note that this
75%%   is rarely necessary, but may be motivated when a large number of
76%%   elements have been deleted from the tree without further
77%%   insertions. Rebalancing could then be forced in order to minimise
78%%   lookup times, since deletion only does not rebalance the tree.
79%%
80%% - union(S1, S2): returns a new set that contains each element that is
81%%   in either S1 or S2 or both, and no other elements.
82%%
83%% - union(Ss): returns a new set that contains each element that is in
84%%   at least one of the sets in the list Ss, and no other elements.
85%%
86%% - intersection(S1, S2): returns a new set that contains each element
87%%   that is in both S1 and S2, and no other elements.
88%%
89%% - intersection(Ss): returns a new set that contains each element that
90%%   is in all of the sets in the list Ss, and no other elements.
91%%
92%% - is_disjoint(S1, S2): returns `true' if none of the elements in S1
93%%   occurs in S2.
94%%
95%% - difference(S1, S2): returns a new set that contains each element in
96%%   S1 that is not also in S2, and no other elements.
97%%
98%%   Alias: subtract(), for compatibility with `sets'.
99%%
100%% - is_subset(S1, S2): returns `true' if each element in S1 is also a
101%%   member of S2, and `false' otherwise.
102%%
103%% - to_list(S): returns an ordered list of all elements in set S. The
104%%   list never contains duplicates.
105%%
106%% - from_list(List): creates a set containing all elements in List,
107%%   where List may be unordered and contain duplicates.
108%%
109%% - from_ordset(L): turns an ordered-set list L into a set. The list
110%%   must not contain duplicates.
111%%
112%% - smallest(S): returns the smallest element in set S. Assumes that
113%%   the set S is nonempty.
114%%
115%% - largest(S): returns the largest element in set S. Assumes that the
116%%   set S is nonempty.
117%%
118%% - take_smallest(S): returns {X, S1}, where X is the smallest element
119%%   in set S, and S1 is the set S with element X deleted. Assumes that
120%%   the set S is nonempty.
121%%
122%% - take_largest(S): returns {X, S1}, where X is the largest element in
123%%   set S, and S1 is the set S with element X deleted. Assumes that the
124%%   set S is nonempty.
125%%
126%% - iterator(S): returns an iterator that can be used for traversing
127%%   the entries of set S; see `next'. The implementation of this is
128%%   very efficient; traversing the whole set using `next' is only
129%%   slightly slower than getting the list of all elements using
130%%   `to_list' and traversing that. The main advantage of the iterator
131%%   approach is that it does not require the complete list of all
132%%   elements to be built in memory at one time.
133%%
134%% - iterator_from(X, S): returns an iterator that can be used for
135%%   traversing the elements of set S greater than or equal to X;
136%%   see `next'.
137%%
138%% - next(T): returns {X, T1} where X is the smallest element referred
139%%   to by the iterator T, and T1 is the new iterator to be used for
140%%   traversing the remaining elements, or the atom `none' if no
141%%   elements remain.
142%%
143%% - filter(P, S): Filters set S using predicate function P. Included
144%%   for compatibility with `sets'.
145%%
146%% - fold(F, A, S): Folds function F over set S with A as the initial
147%%   ackumulator. Included for compatibility with `sets'.
148%%
149%% - is_set(S): returns 'true' if S appears to be a set, and 'false'
150%%   otherwise. Not recommended; included for compatibility with `sets'.
151
152-module(gb_sets).
153
154-export([empty/0, is_empty/1, size/1, singleton/1, is_member/2,
155	 insert/2, add/2, delete/2, delete_any/2, balance/1, union/2,
156	 union/1, intersection/2, intersection/1, is_disjoint/2, difference/2,
157	 is_subset/2, to_list/1, from_list/1, from_ordset/1, smallest/1,
158	 largest/1, take_smallest/1, take_largest/1, iterator/1,
159         iterator_from/2, next/1, filter/2, fold/3, is_set/1]).
160
161%% `sets' compatibility aliases:
162
163-export([new/0, is_element/2, add_element/2, del_element/2,
164	 subtract/2]).
165
166%% GB-trees adapted from Sven-Olof Nyström's implementation for
167%% representation of sets.
168%%
169%% Data structures:
170%% - {Size, Tree}, where `Tree' is composed of nodes of the form:
171%% - {Key, Smaller, Bigger}, and the "empty tree" node:
172%% - nil.
173%%
174%% No attempt is made to balance trees after deletions. Since deletions
175%% don't increase the height of a tree, this should be OK.
176%%
177%% Original balance condition h(T) <= ceil(c * log(|T|)) has been
178%% changed to the similar (but not quite equivalent) condition 2 ^ h(T)
179%% <= |T| ^ c. This should also be OK.
180%%
181%% Behaviour is logarithmic (as it should be).
182
183%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
184%% Some macros.
185
186-define(p, 2). % It seems that p = 2 is optimal for sorted keys
187
188-define(pow(A, _), A * A). % correct with exponent as defined above.
189
190-define(div2(X), X bsr 1).
191
192-define(mul2(X), X bsl 1).
193
194%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
195%% Some types.
196
197-export_type([set/0, set/1, iter/0, iter/1]).
198
199-type gb_set_node(Element) :: 'nil' | {Element, _, _}.
200-opaque set(Element) :: {non_neg_integer(), gb_set_node(Element)}.
201-type set() :: set(_).
202-opaque iter(Element) :: [gb_set_node(Element)].
203-type iter() :: iter(_).
204
205%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
206
207-spec empty() -> Set when
208      Set :: set().
209
210empty() ->
211    {0, nil}.
212
213-spec new() -> Set when
214      Set :: set().
215
216new() -> empty().
217
218-spec is_empty(Set) -> boolean() when
219      Set :: set().
220
221is_empty({0, nil}) ->
222    true;
223is_empty(_) ->
224    false.
225
226-spec size(Set) -> non_neg_integer() when
227      Set :: set().
228
229size({Size, _}) ->
230    Size.
231
232-spec singleton(Element) -> set(Element).
233
234singleton(Key) ->
235    {1, {Key, nil, nil}}.
236
237-spec is_element(Element, Set) -> boolean() when
238      Set :: set(Element).
239
240is_element(Key, S) ->
241    is_member(Key, S).
242
243-spec is_member(Element, Set) -> boolean() when
244      Set :: set(Element).
245
246is_member(Key, {_, T}) ->
247    is_member_1(Key, T).
248
249is_member_1(Key, {Key1, Smaller, _}) when Key < Key1 ->
250    is_member_1(Key, Smaller);
251is_member_1(Key, {Key1, _, Bigger}) when Key > Key1 ->
252    is_member_1(Key, Bigger);
253is_member_1(_, {_, _, _}) ->
254    true;
255is_member_1(_, nil) ->
256    false.
257
258-spec insert(Element, Set1) -> Set2 when
259      Set1 :: set(Element),
260      Set2 :: set(Element).
261
262insert(Key, {S, T}) ->
263    S1 = S + 1,
264    {S1, insert_1(Key, T, ?pow(S1, ?p))}.
265
266insert_1(Key, {Key1, Smaller, Bigger}, S) when Key < Key1 ->
267    case insert_1(Key, Smaller, ?div2(S)) of
268	{T1, H1, S1} when is_integer(H1) ->
269	    T = {Key1, T1, Bigger},
270	    {H2, S2} = count(Bigger),
271	    H = ?mul2(erlang:max(H1, H2)),
272	    SS = S1 + S2 + 1,
273	    P = ?pow(SS, ?p),
274	    if
275		H > P ->
276		    balance(T, SS);
277		true ->
278		    {T, H, SS}
279	    end;
280	T1 ->
281	    {Key1, T1, Bigger}
282    end;
283insert_1(Key, {Key1, Smaller, Bigger}, S) when Key > Key1 ->
284    case insert_1(Key, Bigger, ?div2(S)) of
285	{T1, H1, S1} when is_integer(H1) ->
286	    T = {Key1, Smaller, T1},
287	    {H2, S2} = count(Smaller),
288	    H = ?mul2(erlang:max(H1, H2)),
289	    SS = S1 + S2 + 1,
290	    P = ?pow(SS, ?p),
291	    if
292		H > P ->
293		    balance(T, SS);
294		true ->
295		    {T, H, SS}
296	    end;
297	T1 ->
298	    {Key1, Smaller, T1}
299    end;
300insert_1(Key, nil, 0) ->
301    {{Key, nil, nil}, 1, 1};
302insert_1(Key, nil, _) ->
303    {Key, nil, nil};
304insert_1(Key, _, _) ->
305    erlang:error({key_exists, Key}).
306
307count({_, nil, nil}) ->
308    {1, 1};
309count({_, Sm, Bi}) ->
310    {H1, S1} = count(Sm),
311    {H2, S2} = count(Bi),
312    {?mul2(erlang:max(H1, H2)), S1 + S2 + 1};
313count(nil) ->
314    {1, 0}.
315
316-spec balance(Set1) -> Set2 when
317      Set1 :: set(Element),
318      Set2 :: set(Element).
319
320balance({S, T}) ->
321    {S, balance(T, S)}.
322
323balance(T, S) ->
324    balance_list(to_list_1(T), S).
325
326balance_list(L, S) ->
327    {T, _} = balance_list_1(L, S),
328    T.
329
330balance_list_1(L, S) when S > 1 ->
331    Sm = S - 1,
332    S2 = Sm div 2,
333    S1 = Sm - S2,
334    {T1, [K | L1]} = balance_list_1(L, S1),
335    {T2, L2} = balance_list_1(L1, S2),
336    T = {K, T1, T2},
337    {T, L2};
338balance_list_1([Key | L], 1) ->
339    {{Key, nil, nil}, L};
340balance_list_1(L, 0) ->
341    {nil, L}.
342
343-spec add_element(Element, Set1) -> Set2 when
344      Set1 :: set(Element),
345      Set2 :: set(Element).
346
347add_element(X, S) ->
348    add(X, S).
349
350-spec add(Element, Set1) -> Set2 when
351      Set1 :: set(Element),
352      Set2 :: set(Element).
353
354add(X, S) ->
355    case is_member(X, S) of
356	true ->
357	    S;    % we don't have to do anything here
358	false ->
359	    insert(X, S)
360    end.
361
362-spec from_list(List) -> Set when
363      List :: [Element],
364      Set :: set(Element).
365
366from_list(L) ->
367    from_ordset(ordsets:from_list(L)).
368
369-spec from_ordset(List) -> Set when
370      List :: [Element],
371      Set :: set(Element).
372
373from_ordset(L) ->
374    S = length(L),
375    {S, balance_list(L, S)}.
376
377-spec del_element(Element, Set1) -> Set2 when
378      Set1 :: set(Element),
379      Set2 :: set(Element).
380
381del_element(Key, S) ->
382    delete_any(Key, S).
383
384-spec delete_any(Element, Set1) -> Set2 when
385      Set1 :: set(Element),
386      Set2 :: set(Element).
387
388delete_any(Key, S) ->
389    case is_member(Key, S) of
390 	true ->
391 	    delete(Key, S);
392 	false ->
393 	    S
394    end.
395
396-spec delete(Element, Set1) -> Set2 when
397      Set1 :: set(Element),
398      Set2 :: set(Element).
399
400delete(Key, {S, T}) ->
401    {S - 1, delete_1(Key, T)}.
402
403delete_1(Key, {Key1, Smaller, Larger}) when Key < Key1 ->
404    Smaller1 = delete_1(Key, Smaller),
405    {Key1, Smaller1, Larger};
406delete_1(Key, {Key1, Smaller, Bigger}) when Key > Key1 ->
407    Bigger1 = delete_1(Key, Bigger),
408    {Key1, Smaller, Bigger1};
409delete_1(_, {_, Smaller, Larger}) ->
410    merge(Smaller, Larger).
411
412merge(Smaller, nil) ->
413    Smaller;
414merge(nil, Larger) ->
415    Larger;
416merge(Smaller, Larger) ->
417    {Key, Larger1} = take_smallest1(Larger),
418    {Key, Smaller, Larger1}.
419
420-spec take_smallest(Set1) -> {Element, Set2} when
421      Set1 :: set(Element),
422      Set2 :: set(Element).
423
424take_smallest({S, T}) ->
425    {Key, Larger} = take_smallest1(T),
426    {Key, {S - 1, Larger}}.
427
428take_smallest1({Key, nil, Larger}) ->
429    {Key, Larger};
430take_smallest1({Key, Smaller, Larger}) ->
431    {Key1, Smaller1} = take_smallest1(Smaller),
432    {Key1, {Key, Smaller1, Larger}}.
433
434-spec smallest(Set) -> Element when
435      Set :: set(Element).
436
437smallest({_, T}) ->
438    smallest_1(T).
439
440smallest_1({Key, nil, _Larger}) ->
441    Key;
442smallest_1({_Key, Smaller, _Larger}) ->
443    smallest_1(Smaller).
444
445-spec take_largest(Set1) -> {Element, Set2} when
446      Set1 :: set(Element),
447      Set2 :: set(Element).
448
449take_largest({S, T}) ->
450    {Key, Smaller} = take_largest1(T),
451    {Key, {S - 1, Smaller}}.
452
453take_largest1({Key, Smaller, nil}) ->
454    {Key, Smaller};
455take_largest1({Key, Smaller, Larger}) ->
456    {Key1, Larger1} = take_largest1(Larger),
457    {Key1, {Key, Smaller, Larger1}}.
458
459-spec largest(Set) -> Element when
460      Set :: set(Element).
461
462largest({_, T}) ->
463    largest_1(T).
464
465largest_1({Key, _Smaller, nil}) ->
466    Key;
467largest_1({_Key, _Smaller, Larger}) ->
468    largest_1(Larger).
469
470-spec to_list(Set) -> List when
471      Set :: set(Element),
472      List :: [Element].
473
474to_list({_, T}) ->
475    to_list(T, []).
476
477to_list_1(T) -> to_list(T, []).
478
479to_list({Key, Small, Big}, L) ->
480    to_list(Small, [Key | to_list(Big, L)]);
481to_list(nil, L) -> L.
482
483-spec iterator(Set) -> Iter when
484      Set :: set(Element),
485      Iter :: iter(Element).
486
487iterator({_, T}) ->
488    iterator(T, []).
489
490%% The iterator structure is really just a list corresponding to the
491%% call stack of an in-order traversal. This is quite fast.
492
493iterator({_, nil, _} = T, As) ->
494    [T | As];
495iterator({_, L, _} = T, As) ->
496    iterator(L, [T | As]);
497iterator(nil, As) ->
498    As.
499
500-spec iterator_from(Element, Set) -> Iter when
501      Set :: set(Element),
502      Iter :: iter(Element).
503
504iterator_from(S, {_, T}) ->
505    iterator_from(S, T, []).
506
507iterator_from(S, {K, _, T}, As) when K < S ->
508    iterator_from(S, T, As);
509iterator_from(_, {_, nil, _} = T, As) ->
510    [T | As];
511iterator_from(S, {_, L, _} = T, As) ->
512    iterator_from(S, L, [T | As]);
513iterator_from(_, nil, As) ->
514    As.
515
516-spec next(Iter1) -> {Element, Iter2} | 'none' when
517      Iter1 :: iter(Element),
518      Iter2 :: iter(Element).
519
520next([{X, _, T} | As]) ->
521    {X, iterator(T, As)};
522next([]) ->
523    none.
524
525
526%% Set operations:
527
528
529%% If |X| < |Y|, then we traverse the elements of X. The cost for
530%% testing a single random element for membership in a tree S is
531%% proportional to log(|S|); thus, if |Y| / |X| < c * log(|Y|), for some
532%% c, it is more efficient to scan the ordered sequence of elements of Y
533%% while traversing X (under the same ordering) in order to test whether
534%% elements of X are already in Y. Since the `math' module does not have
535%% a `log2'-function, we rewrite the condition to |X| < |Y| * c1 *
536%% ln(|X|), where c1 = c / ln 2.
537
538-define(c, 1.46).    % 1 / ln 2; this appears to be best
539
540%% If the sets are not very different in size, i.e., if |Y| / |X| >= c *
541%% log(|Y|), then the fastest way to do union (and the other similar set
542%% operations) is to build the lists of elements, traverse these lists
543%% in parallel while building a reversed ackumulator list, and finally
544%% rebuild the tree directly from the ackumulator. Other methods of
545%% traversing the elements can be devised, but they all have higher
546%% overhead.
547
548-spec union(Set1, Set2) -> Set3 when
549      Set1 :: set(Element),
550      Set2 :: set(Element),
551      Set3 :: set(Element).
552
553union({N1, T1}, {N2, T2}) when N2 < N1 ->
554    union(to_list_1(T2), N2, T1, N1);
555union({N1, T1}, {N2, T2}) ->
556    union(to_list_1(T1), N1, T2, N2).
557
558%% We avoid the expensive mathematical computations if there is little
559%% chance at saving at least the same amount of time by making the right
560%% choice of strategy. Recall that N1 < N2 here.
561
562union(L, N1, T2, N2) when N2 < 10 ->
563    %% Break even is about 7 for N1 = 1 and 10 for N1 = 2
564    union_2(L, to_list_1(T2), N1 + N2);
565union(L, N1, T2, N2) ->
566    X = N1 * round(?c * math:log(N2)),
567    if N2 < X ->
568	    union_2(L, to_list_1(T2), N1 + N2);
569       true ->
570	    union_1(L, mk_set(N2, T2))
571    end.
572
573-spec mk_set(non_neg_integer(), gb_set_node(T)) -> set(T).
574
575mk_set(N, T) ->
576    {N, T}.
577
578%% If the length of the list is in proportion with the size of the
579%% target set, this version spends too much time doing lookups, compared
580%% to the below version.
581
582union_1([X | Xs], S) ->
583    union_1(Xs, add(X, S));
584union_1([], S) ->
585    S.
586
587
588%% If the length of the first list is too small in comparison with the
589%% size of the target set, this version spends too much time scanning
590%% the element list of the target set for possible membership, compared
591%% with the above version.
592
593%% Some notes on sequential scanning of ordered lists
594%%
595%% 1) We want to put the equality case last, if we can assume that the
596%% probability for overlapping elements is relatively low on average.
597%% Doing this also allows us to completely skip the (arithmetic)
598%% equality test, since the term order is arithmetically total.
599%%
600%% 2) We always test for `smaller than' first, i.e., whether the head of
601%% the left list is smaller than the head of the right list, and if the
602%% `greater than' test should instead turn out to be true, we switch
603%% left and right arguments in the recursive call under the assumption
604%% that the same is likely to apply to the next element also,
605%% statistically reducing the number of failed tests and automatically
606%% adapting to cases of lists having very different lengths. This saves
607%% 10-40% of the traversation time compared to a "fixed" strategy,
608%% depending on the sizes and contents of the lists.
609%%
610%% 3) A tail recursive version using `lists:reverse/2' is about 5-10%
611%% faster than a plain recursive version using the stack, for lists of
612%% more than about 20 elements and small stack frames. For very short
613%% lists, however (length < 10), the stack version can be several times
614%% faster. As stack frames grow larger, the advantages of using
615%% `reverse' could get greater.
616
617union_2(Xs, Ys, S) ->
618    union_2(Xs, Ys, [], S).    % S is the sum of the sizes here
619
620union_2([X | Xs1], [Y | _] = Ys, As, S) when X < Y ->
621    union_2(Xs1, Ys, [X | As], S);
622union_2([X | _] = Xs, [Y | Ys1], As, S) when X > Y ->
623    union_2(Ys1, Xs, [Y | As], S);
624union_2([X | Xs1], [_ | Ys1], As, S) ->
625    union_2(Xs1, Ys1, [X | As], S - 1);
626union_2([], Ys, As, S) ->
627    {S, balance_revlist(push(Ys, As), S)};
628union_2(Xs, [], As, S) ->
629    {S, balance_revlist(push(Xs, As), S)}.
630
631push([X | Xs], As) ->
632    push(Xs, [X | As]);
633push([], As) ->
634    As.
635
636balance_revlist(L, S) ->
637    {T, _} = balance_revlist_1(L, S),
638    T.
639
640balance_revlist_1(L, S) when S > 1 ->
641    Sm = S - 1,
642    S2 = Sm div 2,
643    S1 = Sm - S2,
644    {T2, [K | L1]} = balance_revlist_1(L, S1),
645    {T1, L2} = balance_revlist_1(L1, S2),
646    T = {K, T1, T2},
647    {T, L2};
648balance_revlist_1([Key | L], 1) ->
649    {{Key, nil, nil}, L};
650balance_revlist_1(L, 0) ->
651    {nil, L}.
652
653-spec union(SetList) -> Set when
654      SetList :: [set(Element),...],
655      Set :: set(Element).
656
657union([S | Ss]) ->
658    union_list(S, Ss);
659union([]) -> empty().
660
661union_list(S, [S1 | Ss]) ->
662    union_list(union(S, S1), Ss);
663union_list(S, []) -> S.
664
665
666%% The rest is modelled on the above.
667
668-spec intersection(Set1, Set2) -> Set3 when
669      Set1 :: set(Element),
670      Set2 :: set(Element),
671      Set3 :: set(Element).
672
673intersection({N1, T1}, {N2, T2}) when N2 < N1 ->
674    intersection(to_list_1(T2), N2, T1, N1);
675intersection({N1, T1}, {N2, T2}) ->
676    intersection(to_list_1(T1), N1, T2, N2).
677
678intersection(L, _N1, T2, N2) when N2 < 10 ->
679    intersection_2(L, to_list_1(T2));
680intersection(L, N1, T2, N2) ->
681    X = N1 * round(?c * math:log(N2)),
682    if N2 < X ->
683	    intersection_2(L, to_list_1(T2));
684       true ->
685	    intersection_1(L, T2)
686    end.
687
688%% We collect the intersecting elements in an accumulator list and count
689%% them at the same time so we can balance the list afterwards.
690
691intersection_1(Xs, T) ->
692    intersection_1(Xs, T, [], 0).
693
694intersection_1([X | Xs], T, As, N) ->
695    case is_member_1(X, T) of
696	true ->
697	    intersection_1(Xs, T, [X | As], N + 1);
698	false ->
699	    intersection_1(Xs, T, As, N)
700    end;
701intersection_1([], _, As, N) ->
702    {N, balance_revlist(As, N)}.
703
704
705intersection_2(Xs, Ys) ->
706    intersection_2(Xs, Ys, [], 0).
707
708intersection_2([X | Xs1], [Y | _] = Ys, As, S) when X < Y ->
709    intersection_2(Xs1, Ys, As, S);
710intersection_2([X | _] = Xs, [Y | Ys1], As, S) when X > Y ->
711    intersection_2(Ys1, Xs, As, S);
712intersection_2([X | Xs1], [_ | Ys1], As, S) ->
713    intersection_2(Xs1, Ys1, [X | As], S + 1);
714intersection_2([], _, As, S) ->
715    {S, balance_revlist(As, S)};
716intersection_2(_, [], As, S) ->
717    {S, balance_revlist(As, S)}.
718
719-spec intersection(SetList) -> Set when
720      SetList :: [set(Element),...],
721      Set :: set(Element).
722
723intersection([S | Ss]) ->
724    intersection_list(S, Ss).
725
726intersection_list(S, [S1 | Ss]) ->
727    intersection_list(intersection(S, S1), Ss);
728intersection_list(S, []) -> S.
729
730-spec is_disjoint(Set1, Set2) -> boolean() when
731      Set1 :: set(Element),
732      Set2 :: set(Element).
733
734is_disjoint({N1, T1}, {N2, T2}) when N1 < N2 ->
735    is_disjoint_1(T1, T2);
736is_disjoint({_, T1}, {_, T2}) ->
737    is_disjoint_1(T2, T1).
738
739is_disjoint_1({K1, Smaller1, Bigger}, {K2, Smaller2, _}=Tree) when K1 < K2 ->
740    not is_member_1(K1, Smaller2) andalso
741	is_disjoint_1(Smaller1, Smaller2) andalso
742	is_disjoint_1(Bigger, Tree);
743is_disjoint_1({K1, Smaller, Bigger1}, {K2, _, Bigger2}=Tree) when K1 > K2 ->
744    not is_member_1(K1, Bigger2) andalso
745	is_disjoint_1(Bigger1, Bigger2) andalso
746	is_disjoint_1(Smaller, Tree);
747is_disjoint_1({_K1, _, _}, {_K2, _, _}) ->	%K1 == K2
748    false;
749is_disjoint_1(nil, _) ->
750    true;
751is_disjoint_1(_, nil) ->
752    true.
753
754%% Note that difference is not symmetric. We don't use `delete' here,
755%% since the GB-trees implementation does not rebalance after deletion
756%% and so we could end up with very unbalanced trees indeed depending on
757%% the sets. Therefore, we always build a new tree, and thus we need to
758%% traverse the whole element list of the left operand.
759
760-spec subtract(Set1, Set2) -> Set3 when
761      Set1 :: set(Element),
762      Set2 :: set(Element),
763      Set3 :: set(Element).
764
765subtract(S1, S2) ->
766    difference(S1, S2).
767
768-spec difference(Set1, Set2) -> Set3 when
769      Set1 :: set(Element),
770      Set2 :: set(Element),
771      Set3 :: set(Element).
772
773difference({N1, T1}, {N2, T2}) ->
774    difference(to_list_1(T1), N1, T2, N2).
775
776difference(L, N1, T2, N2) when N2 < 10 ->
777    difference_2(L, to_list_1(T2), N1);
778difference(L, N1, T2, N2) ->
779    X = N1 * round(?c * math:log(N2)),
780    if N2 < X ->
781	    difference_2(L, to_list_1(T2), N1);
782       true ->
783	    difference_1(L, T2)
784    end.
785
786
787difference_1(Xs, T) ->
788    difference_1(Xs, T, [], 0).
789
790difference_1([X | Xs], T, As, N) ->
791    case is_member_1(X, T) of
792	true ->
793	    difference_1(Xs, T, As, N);
794	false ->
795	    difference_1(Xs, T, [X | As], N + 1)
796    end;
797difference_1([], _, As, N) ->
798    {N, balance_revlist(As, N)}.
799
800
801difference_2(Xs, Ys, S) ->
802    difference_2(Xs, Ys, [], S).    % S is the size of the left set
803
804difference_2([X | Xs1], [Y | _] = Ys, As, S) when X < Y ->
805    difference_2(Xs1, Ys, [X | As], S);
806difference_2([X | _] = Xs, [Y | Ys1], As, S) when X > Y ->
807    difference_2(Xs, Ys1, As, S);
808difference_2([_X | Xs1], [_Y | Ys1], As, S) ->
809    difference_2(Xs1, Ys1, As, S - 1);
810difference_2([], _Ys, As, S) ->
811    {S, balance_revlist(As, S)};
812difference_2(Xs, [], As, S) ->
813    {S, balance_revlist(push(Xs, As), S)}.
814
815
816%% Subset testing is much the same thing as set difference, but
817%% without the construction of a new set.
818
819-spec is_subset(Set1, Set2) -> boolean() when
820      Set1 :: set(Element),
821      Set2 :: set(Element).
822
823is_subset({N1, T1}, {N2, T2}) ->
824    is_subset(to_list_1(T1), N1, T2, N2).
825
826is_subset(L, _N1, T2, N2) when N2 < 10 ->
827    is_subset_2(L, to_list_1(T2));
828is_subset(L, N1, T2, N2) ->
829    X = N1 * round(?c * math:log(N2)),
830    if N2 < X ->
831	    is_subset_2(L, to_list_1(T2));
832       true ->
833	    is_subset_1(L, T2)
834    end.
835
836
837is_subset_1([X | Xs], T) ->
838    case is_member_1(X, T) of
839	true ->
840	    is_subset_1(Xs, T);
841	false ->
842	    false
843    end;
844is_subset_1([], _) ->
845    true.
846
847
848is_subset_2([X | _], [Y | _]) when X < Y ->
849    false;
850is_subset_2([X | _] = Xs, [Y | Ys1]) when X > Y ->
851    is_subset_2(Xs, Ys1);
852is_subset_2([_ | Xs1], [_ | Ys1]) ->
853    is_subset_2(Xs1, Ys1);
854is_subset_2([], _) ->
855    true;
856is_subset_2(_, []) ->
857    false.
858
859
860%% For compatibility with `sets':
861
862-spec is_set(Term) -> boolean() when
863      Term :: term().
864
865is_set({0, nil}) -> true;
866is_set({N, {_, _, _}}) when is_integer(N), N >= 0 -> true;
867is_set(_) -> false.
868
869-spec filter(Pred, Set1) -> Set2 when
870      Pred :: fun((Element) -> boolean()),
871      Set1 :: set(Element),
872      Set2 :: set(Element).
873
874filter(F, S) ->
875    from_ordset([X || X <- to_list(S), F(X)]).
876
877-spec fold(Function, Acc0, Set) -> Acc1 when
878      Function :: fun((Element, AccIn) -> AccOut),
879      Acc0 :: Acc,
880      Acc1 :: Acc,
881      AccIn :: Acc,
882      AccOut :: Acc,
883      Set :: set(Element).
884
885fold(F, A, {_, T}) when is_function(F, 2) ->
886    fold_1(F, A, T).
887
888fold_1(F, Acc0, {Key, Small, Big}) ->
889    Acc1 = fold_1(F, Acc0, Small),
890    Acc = F(Key, Acc1),
891    fold_1(F, Acc, Big);
892fold_1(_, Acc, _) ->
893    Acc.
894