1#
2#
3#            Nim's Runtime Library
4#        (c) Copyright 2012 Andreas Rumpf
5#
6#    See the file "copying.txt", included in this
7#    distribution, for details about the copyright.
8#
9
10## The `packedsets` module implements an efficient `Ordinal` set implemented as a
11## `sparse bit set`:idx:.
12##
13## Supports any Ordinal type.
14##
15## .. note:: Currently the assignment operator `=` for `PackedSet[A]`
16##   performs some rather meaningless shallow copy. Since Nim currently does
17##   not allow the assignment operator to be overloaded, use the `assign proc
18##   <#assign,PackedSet[A],PackedSet[A]>`_ to get a deep copy.
19##
20## See also
21## ========
22## * `sets module <sets.html>`_ for more general hash sets
23
24import std/private/since
25import hashes
26
27type
28  BitScalar = uint
29
30const
31  InitIntSetSize = 8              # must be a power of two!
32  TrunkShift = 9
33  BitsPerTrunk = 1 shl TrunkShift # needs to be a power of 2 and
34                                  # divisible by 64
35  TrunkMask = BitsPerTrunk - 1
36  IntsPerTrunk = BitsPerTrunk div (sizeof(BitScalar) * 8)
37  IntShift = 5 + ord(sizeof(BitScalar) == 8) # 5 or 6, depending on int width
38  IntMask = 1 shl IntShift - 1
39
40type
41  Trunk {.acyclic.} = ref object
42    next: Trunk                                 # all nodes are connected with this pointer
43    key: int                                    # start address at bit 0
44    bits: array[0..IntsPerTrunk - 1, BitScalar] # a bit vector
45
46  TrunkSeq = seq[Trunk]
47
48  PackedSet*[A: Ordinal] = object
49    ## An efficient set of `Ordinal` types implemented as a sparse bit set.
50    elems: int           # only valid for small numbers
51    counter, max: int
52    head: Trunk
53    data: TrunkSeq
54    a: array[0..33, int] # profiling shows that 34 elements are enough
55
56proc mustRehash[T](t: T): bool {.inline.} =
57  let length = t.max + 1
58  assert length > t.counter
59  result = (length * 2 < t.counter * 3) or (length - t.counter < 4)
60
61proc nextTry(h, maxHash: Hash, perturb: var Hash): Hash {.inline.} =
62  const PERTURB_SHIFT = 5
63  var perturb2 = cast[uint](perturb) shr PERTURB_SHIFT
64  perturb = cast[Hash](perturb2)
65  result = ((5 * h) + 1 + perturb) and maxHash
66
67proc packedSetGet[A](t: PackedSet[A], key: int): Trunk =
68  var h = key and t.max
69  var perturb = key
70  while t.data[h] != nil:
71    if t.data[h].key == key:
72      return t.data[h]
73    h = nextTry(h, t.max, perturb)
74  result = nil
75
76proc intSetRawInsert[A](t: PackedSet[A], data: var TrunkSeq, desc: Trunk) =
77  var h = desc.key and t.max
78  var perturb = desc.key
79  while data[h] != nil:
80    assert data[h] != desc
81    h = nextTry(h, t.max, perturb)
82  assert data[h] == nil
83  data[h] = desc
84
85proc intSetEnlarge[A](t: var PackedSet[A]) =
86  var n: TrunkSeq
87  var oldMax = t.max
88  t.max = ((t.max + 1) * 2) - 1
89  newSeq(n, t.max + 1)
90  for i in countup(0, oldMax):
91    if t.data[i] != nil: intSetRawInsert(t, n, t.data[i])
92  swap(t.data, n)
93
94proc intSetPut[A](t: var PackedSet[A], key: int): Trunk =
95  var h = key and t.max
96  var perturb = key
97  while t.data[h] != nil:
98    if t.data[h].key == key:
99      return t.data[h]
100    h = nextTry(h, t.max, perturb)
101  if mustRehash(t): intSetEnlarge(t)
102  inc(t.counter)
103  h = key and t.max
104  perturb = key
105  while t.data[h] != nil: h = nextTry(h, t.max, perturb)
106  assert t.data[h] == nil
107  new(result)
108  result.next = t.head
109  result.key = key
110  t.head = result
111  t.data[h] = result
112
113proc bitincl[A](s: var PackedSet[A], key: int) {.inline.} =
114  var ret: Trunk
115  var t = intSetPut(s, key shr TrunkShift)
116  var u = key and TrunkMask
117  t.bits[u shr IntShift] = t.bits[u shr IntShift] or
118      (BitScalar(1) shl (u and IntMask))
119
120proc exclImpl[A](s: var PackedSet[A], key: int) =
121  if s.elems <= s.a.len:
122    for i in 0..<s.elems:
123      if s.a[i] == key:
124        s.a[i] = s.a[s.elems - 1]
125        dec(s.elems)
126        return
127  else:
128    var t = packedSetGet(s, key shr TrunkShift)
129    if t != nil:
130      var u = key and TrunkMask
131      t.bits[u shr IntShift] = t.bits[u shr IntShift] and
132          not(BitScalar(1) shl (u and IntMask))
133
134template dollarImpl(): untyped =
135  result = "{"
136  for key in items(s):
137    if result.len > 1: result.add(", ")
138    result.add $key
139  result.add("}")
140
141iterator items*[A](s: PackedSet[A]): A {.inline.} =
142  ## Iterates over any included element of `s`.
143  if s.elems <= s.a.len:
144    for i in 0..<s.elems:
145      yield A(s.a[i])
146  else:
147    var r = s.head
148    while r != nil:
149      var i = 0
150      while i <= high(r.bits):
151        var w: uint = r.bits[i]
152        # taking a copy of r.bits[i] here is correct, because
153        # modifying operations are not allowed during traversation
154        var j = 0
155        while w != 0: # test all remaining bits for zero
156          if (w and 1) != 0: # the bit is set!
157            yield A((r.key shl TrunkShift) or (i shl IntShift +% j))
158          inc(j)
159          w = w shr 1
160        inc(i)
161      r = r.next
162
163proc initPackedSet*[A]: PackedSet[A] =
164  ## Returns an empty `PackedSet[A]`.
165  ## `A` must be `Ordinal`.
166  ##
167  ## **See also:**
168  ## * `toPackedSet proc <#toPackedSet,openArray[A]>`_
169  runnableExamples:
170    let a = initPackedSet[int]()
171    assert len(a) == 0
172
173    type Id = distinct int
174    var ids = initPackedSet[Id]()
175    ids.incl(3.Id)
176
177  result = PackedSet[A](
178    elems: 0,
179    counter: 0,
180    max: 0,
181    head: nil,
182    data: @[])
183  #  a: array[0..33, int] # profiling shows that 34 elements are enough
184
185proc contains*[A](s: PackedSet[A], key: A): bool =
186  ## Returns true if `key` is in `s`.
187  ##
188  ## This allows the usage of the `in` operator.
189  runnableExamples:
190    type ABCD = enum A, B, C, D
191
192    let a = [1, 3, 5].toPackedSet
193    assert a.contains(3)
194    assert 3 in a
195    assert not a.contains(8)
196    assert 8 notin a
197
198    let letters = [A, C].toPackedSet
199    assert A in letters
200    assert C in letters
201    assert B notin letters
202
203  if s.elems <= s.a.len:
204    for i in 0..<s.elems:
205      if s.a[i] == ord(key): return true
206  else:
207    var t = packedSetGet(s, ord(key) shr TrunkShift)
208    if t != nil:
209      var u = ord(key) and TrunkMask
210      result = (t.bits[u shr IntShift] and
211                (BitScalar(1) shl (u and IntMask))) != 0
212    else:
213      result = false
214
215proc incl*[A](s: var PackedSet[A], key: A) =
216  ## Includes an element `key` in `s`.
217  ##
218  ## This doesn't do anything if `key` is already in `s`.
219  ##
220  ## **See also:**
221  ## * `excl proc <#excl,PackedSet[A],A>`_ for excluding an element
222  ## * `incl proc <#incl,PackedSet[A],PackedSet[A]>`_ for including a set
223  ## * `containsOrIncl proc <#containsOrIncl,PackedSet[A],A>`_
224  runnableExamples:
225    var a = initPackedSet[int]()
226    a.incl(3)
227    a.incl(3)
228    assert len(a) == 1
229
230  if s.elems <= s.a.len:
231    for i in 0..<s.elems:
232      if s.a[i] == ord(key): return
233    if s.elems < s.a.len:
234      s.a[s.elems] = ord(key)
235      inc(s.elems)
236      return
237    newSeq(s.data, InitIntSetSize)
238    s.max = InitIntSetSize - 1
239    for i in 0..<s.elems:
240      bitincl(s, s.a[i])
241    s.elems = s.a.len + 1
242    # fall through:
243  bitincl(s, ord(key))
244
245proc incl*[A](s: var PackedSet[A], other: PackedSet[A]) =
246  ## Includes all elements from `other` into `s`.
247  ##
248  ## This is the in-place version of `s + other <#+,PackedSet[A],PackedSet[A]>`_.
249  ##
250  ## **See also:**
251  ## * `excl proc <#excl,PackedSet[A],PackedSet[A]>`_ for excluding a set
252  ## * `incl proc <#incl,PackedSet[A],A>`_ for including an element
253  ## * `containsOrIncl proc <#containsOrIncl,PackedSet[A],A>`_
254  runnableExamples:
255    var a = [1].toPackedSet
256    a.incl([5].toPackedSet)
257    assert len(a) == 2
258    assert 5 in a
259
260  for item in other.items: incl(s, item)
261
262proc toPackedSet*[A](x: openArray[A]): PackedSet[A] {.since: (1, 3).} =
263  ## Creates a new `PackedSet[A]` that contains the elements of `x`.
264  ##
265  ## Duplicates are removed.
266  ##
267  ## **See also:**
268  ## * `initPackedSet proc <#initPackedSet>`_
269  runnableExamples:
270    let a = [5, 6, 7, 8, 8].toPackedSet
271    assert len(a) == 4
272    assert $a == "{5, 6, 7, 8}"
273
274  result = initPackedSet[A]()
275  for item in x:
276    result.incl(item)
277
278proc containsOrIncl*[A](s: var PackedSet[A], key: A): bool =
279  ## Includes `key` in the set `s` and tells if `key` was already in `s`.
280  ##
281  ## The difference with regards to the `incl proc <#incl,PackedSet[A],A>`_ is
282  ## that this proc returns true if `s` already contained `key`. The
283  ## proc will return false if `key` was added as a new value to `s` during
284  ## this call.
285  ##
286  ## **See also:**
287  ## * `incl proc <#incl,PackedSet[A],A>`_ for including an element
288  ## * `missingOrExcl proc <#missingOrExcl,PackedSet[A],A>`_
289  runnableExamples:
290    var a = initPackedSet[int]()
291    assert a.containsOrIncl(3) == false
292    assert a.containsOrIncl(3) == true
293    assert a.containsOrIncl(4) == false
294
295  if s.elems <= s.a.len:
296    for i in 0..<s.elems:
297      if s.a[i] == ord(key):
298        return true
299    incl(s, key)
300    result = false
301  else:
302    var t = packedSetGet(s, ord(key) shr TrunkShift)
303    if t != nil:
304      var u = ord(key) and TrunkMask
305      result = (t.bits[u shr IntShift] and BitScalar(1) shl (u and IntMask)) != 0
306      if not result:
307        t.bits[u shr IntShift] = t.bits[u shr IntShift] or
308            (BitScalar(1) shl (u and IntMask))
309    else:
310      incl(s, key)
311      result = false
312
313proc excl*[A](s: var PackedSet[A], key: A) =
314  ## Excludes `key` from the set `s`.
315  ##
316  ## This doesn't do anything if `key` is not found in `s`.
317  ##
318  ## **See also:**
319  ## * `incl proc <#incl,PackedSet[A],A>`_ for including an element
320  ## * `excl proc <#excl,PackedSet[A],PackedSet[A]>`_ for excluding a set
321  ## * `missingOrExcl proc <#missingOrExcl,PackedSet[A],A>`_
322  runnableExamples:
323    var a = [3].toPackedSet
324    a.excl(3)
325    a.excl(3)
326    a.excl(99)
327    assert len(a) == 0
328
329  exclImpl[A](s, ord(key))
330
331proc excl*[A](s: var PackedSet[A], other: PackedSet[A]) =
332  ## Excludes all elements from `other` from `s`.
333  ##
334  ## This is the in-place version of `s - other <#-,PackedSet[A],PackedSet[A]>`_.
335  ##
336  ## **See also:**
337  ## * `incl proc <#incl,PackedSet[A],PackedSet[A]>`_ for including a set
338  ## * `excl proc <#excl,PackedSet[A],A>`_ for excluding an element
339  ## * `missingOrExcl proc <#missingOrExcl,PackedSet[A],A>`_
340  runnableExamples:
341    var a = [1, 5].toPackedSet
342    a.excl([5].toPackedSet)
343    assert len(a) == 1
344    assert 5 notin a
345
346  for item in other.items:
347    excl(s, item)
348
349proc len*[A](s: PackedSet[A]): int {.inline.} =
350  ## Returns the number of elements in `s`.
351  runnableExamples:
352    let a = [1, 3, 5].toPackedSet
353    assert len(a) == 3
354
355  if s.elems < s.a.len:
356    result = s.elems
357  else:
358    result = 0
359    for _ in s.items:
360      # pending bug #11167; when fixed, check each explicit `items` to see if it can be removed
361      inc(result)
362
363proc missingOrExcl*[A](s: var PackedSet[A], key: A): bool =
364  ## Excludes `key` from the set `s` and tells if `key` was already missing from `s`.
365  ##
366  ## The difference with regards to the `excl proc <#excl,PackedSet[A],A>`_ is
367  ## that this proc returns true if `key` was missing from `s`.
368  ## The proc will return false if `key` was in `s` and it was removed
369  ## during this call.
370  ##
371  ## **See also:**
372  ## * `excl proc <#excl,PackedSet[A],A>`_ for excluding an element
373  ## * `excl proc <#excl,PackedSet[A],PackedSet[A]>`_ for excluding a set
374  ## * `containsOrIncl proc <#containsOrIncl,PackedSet[A],A>`_
375  runnableExamples:
376    var a = [5].toPackedSet
377    assert a.missingOrExcl(5) == false
378    assert a.missingOrExcl(5) == true
379
380  var count = s.len
381  exclImpl(s, ord(key))
382  result = count == s.len
383
384proc clear*[A](result: var PackedSet[A]) =
385  ## Clears the `PackedSet[A]` back to an empty state.
386  runnableExamples:
387    var a = [5, 7].toPackedSet
388    clear(a)
389    assert len(a) == 0
390
391  # setLen(result.data, InitIntSetSize)
392  # for i in 0..InitIntSetSize - 1: result.data[i] = nil
393  # result.max = InitIntSetSize - 1
394  result.data = @[]
395  result.max = 0
396  result.counter = 0
397  result.head = nil
398  result.elems = 0
399
400proc isNil*[A](x: PackedSet[A]): bool {.inline.} =
401  ## Returns true if `x` is empty, false otherwise.
402  runnableExamples:
403    var a = initPackedSet[int]()
404    assert a.isNil
405    a.incl(2)
406    assert not a.isNil
407    a.excl(2)
408    assert a.isNil
409
410  x.head.isNil and x.elems == 0
411
412proc assign*[A](dest: var PackedSet[A], src: PackedSet[A]) =
413  ## Copies `src` to `dest`.
414  ## `dest` does not need to be initialized by the `initPackedSet proc <#initPackedSet>`_.
415  runnableExamples:
416    var
417      a = initPackedSet[int]()
418      b = initPackedSet[int]()
419    b.incl(5)
420    b.incl(7)
421    a.assign(b)
422    assert len(a) == 2
423
424  if src.elems <= src.a.len:
425    dest.data = @[]
426    dest.max = 0
427    dest.counter = src.counter
428    dest.head = nil
429    dest.elems = src.elems
430    dest.a = src.a
431  else:
432    dest.counter = src.counter
433    dest.max = src.max
434    dest.elems = src.elems
435    newSeq(dest.data, src.data.len)
436
437    var it = src.head
438    while it != nil:
439      var h = it.key and dest.max
440      var perturb = it.key
441      while dest.data[h] != nil: h = nextTry(h, dest.max, perturb)
442      assert dest.data[h] == nil
443      var n: Trunk
444      new(n)
445      n.next = dest.head
446      n.key = it.key
447      n.bits = it.bits
448      dest.head = n
449      dest.data[h] = n
450      it = it.next
451
452proc union*[A](s1, s2: PackedSet[A]): PackedSet[A] =
453  ## Returns the union of the sets `s1` and `s2`.
454  ##
455  ## The same as `s1 + s2 <#+,PackedSet[A],PackedSet[A]>`_.
456  runnableExamples:
457    let
458      a = [1, 2, 3].toPackedSet
459      b = [3, 4, 5].toPackedSet
460      c = union(a, b)
461    assert c.len == 5
462    assert c == [1, 2, 3, 4, 5].toPackedSet
463
464  result.assign(s1)
465  incl(result, s2)
466
467proc intersection*[A](s1, s2: PackedSet[A]): PackedSet[A] =
468  ## Returns the intersection of the sets `s1` and `s2`.
469  ##
470  ## The same as `s1 * s2 <#*,PackedSet[A],PackedSet[A]>`_.
471  runnableExamples:
472    let
473      a = [1, 2, 3].toPackedSet
474      b = [3, 4, 5].toPackedSet
475      c = intersection(a, b)
476    assert c.len == 1
477    assert c == [3].toPackedSet
478
479  result = initPackedSet[A]()
480  for item in s1.items:
481    if contains(s2, item):
482      incl(result, item)
483
484proc difference*[A](s1, s2: PackedSet[A]): PackedSet[A] =
485  ## Returns the difference of the sets `s1` and `s2`.
486  ##
487  ## The same as `s1 - s2 <#-,PackedSet[A],PackedSet[A]>`_.
488  runnableExamples:
489    let
490      a = [1, 2, 3].toPackedSet
491      b = [3, 4, 5].toPackedSet
492      c = difference(a, b)
493    assert c.len == 2
494    assert c == [1, 2].toPackedSet
495
496  result = initPackedSet[A]()
497  for item in s1.items:
498    if not contains(s2, item):
499      incl(result, item)
500
501proc symmetricDifference*[A](s1, s2: PackedSet[A]): PackedSet[A] =
502  ## Returns the symmetric difference of the sets `s1` and `s2`.
503  runnableExamples:
504    let
505      a = [1, 2, 3].toPackedSet
506      b = [3, 4, 5].toPackedSet
507      c = symmetricDifference(a, b)
508    assert c.len == 4
509    assert c == [1, 2, 4, 5].toPackedSet
510
511  result.assign(s1)
512  for item in s2.items:
513    if containsOrIncl(result, item):
514      excl(result, item)
515
516proc `+`*[A](s1, s2: PackedSet[A]): PackedSet[A] {.inline.} =
517  ## Alias for `union(s1, s2) <#union,PackedSet[A],PackedSet[A]>`_.
518  result = union(s1, s2)
519
520proc `*`*[A](s1, s2: PackedSet[A]): PackedSet[A] {.inline.} =
521  ## Alias for `intersection(s1, s2) <#intersection,PackedSet[A],PackedSet[A]>`_.
522  result = intersection(s1, s2)
523
524proc `-`*[A](s1, s2: PackedSet[A]): PackedSet[A] {.inline.} =
525  ## Alias for `difference(s1, s2) <#difference,PackedSet[A],PackedSet[A]>`_.
526  result = difference(s1, s2)
527
528proc disjoint*[A](s1, s2: PackedSet[A]): bool =
529  ## Returns true if the sets `s1` and `s2` have no items in common.
530  runnableExamples:
531    let
532      a = [1, 2].toPackedSet
533      b = [2, 3].toPackedSet
534      c = [3, 4].toPackedSet
535    assert disjoint(a, b) == false
536    assert disjoint(a, c) == true
537
538  for item in s1.items:
539    if contains(s2, item):
540      return false
541  return true
542
543proc card*[A](s: PackedSet[A]): int {.inline.} =
544  ## Alias for `len() <#len,PackedSet[A]>`_.
545  ##
546  ## Card stands for the [cardinality](http://en.wikipedia.org/wiki/Cardinality)
547  ## of a set.
548  result = s.len()
549
550proc `<=`*[A](s1, s2: PackedSet[A]): bool =
551  ## Returns true if `s1` is a subset of `s2`.
552  ##
553  ## A subset `s1` has all of its elements in `s2`, but `s2` doesn't necessarily
554  ## have more elements than `s1`. That is, `s1` can be equal to `s2`.
555  runnableExamples:
556    let
557      a = [1].toPackedSet
558      b = [1, 2].toPackedSet
559      c = [1, 3].toPackedSet
560    assert a <= b
561    assert b <= b
562    assert not (c <= b)
563
564  for item in s1.items:
565    if not s2.contains(item):
566      return false
567  return true
568
569proc `<`*[A](s1, s2: PackedSet[A]): bool =
570  ## Returns true if `s1` is a proper subset of `s2`.
571  ##
572  ## A strict or proper subset `s1` has all of its elements in `s2`, but `s2` has
573  ## more elements than `s1`.
574  runnableExamples:
575    let
576      a = [1].toPackedSet
577      b = [1, 2].toPackedSet
578      c = [1, 3].toPackedSet
579    assert a < b
580    assert not (b < b)
581    assert not (c < b)
582
583  return s1 <= s2 and not (s2 <= s1)
584
585proc `==`*[A](s1, s2: PackedSet[A]): bool =
586  ## Returns true if both `s1` and `s2` have the same elements and set size.
587  runnableExamples:
588    assert [1, 2].toPackedSet == [2, 1].toPackedSet
589    assert [1, 2].toPackedSet == [2, 1, 2].toPackedSet
590
591  return s1 <= s2 and s2 <= s1
592
593proc `$`*[A](s: PackedSet[A]): string =
594  ## Converts `s` to a string.
595  runnableExamples:
596    let a = [1, 2, 3].toPackedSet
597    assert $a == "{1, 2, 3}"
598
599  dollarImpl()
600