1;;
2;; Copyright (C) John Cowan 2013. All Rights Reserved.
3;;
4;; Permission is hereby granted, free of charge, to any person obtaining
5;; a copy of this software and associated documentation
6;; files (the "Software"), to deal in the Software without restriction,
7;; including without limitation the rights to use, copy, modify, merge,
8;; publish, distribute, sublicense, and/or sell copies of the Software,
9;; and to permit persons to whom the Software is furnished to do so,
10;; subject to the following conditions:
11;;
12;; The above copyright notice and this permission notice shall be
13;; included in all copies or substantial portions of the Software.
14;;
15;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18;; NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19;; LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20;; OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21;; WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
23;; Apapted to Gauche by Shiro Kawai
24
25;;;; Implementation of general sets and bags for SRFI 113
26
27;;; A "sob" object is the representation of both sets and bags.
28;;; This allows each set-* and bag-* procedure to be implemented
29;;; using the same code, without having to deal in ugly indirections
30;;; over the field accessors.  There are three fields, "sob-multi?",
31;;; "sob-hash-table", and "sob-comparator."
32
33;;; The value of "sob-multi?" is #t for bags and #f for sets.
34;;; "Sob-hash-table" maps the elements of the sob to the number of times
35;;; the element appears, which is always 1 for a set, any positive value
36;;; for a bag.  "Sob-comparator" is the comparator for the elements of
37;;; the set.
38
39;;; Note that sob-* procedures do not do type checking or (typically) the
40;;; copying required for supporting pure functional update.  These things
41;;; are done by the set-* and bag-* procedures, which are externally
42;;; exposed (but trivial and mostly uncommented below).
43
44
45;;; Shim to convert from SRFI 69 to the future "intermediate hash tables"
46;;; SRFI.  Unfortunately, hash-table-fold is incompatible between the two
47;;; and so is not usable.
48
49(define-module srfi-113
50  (use srfi-114)
51  (use gauche.collection)
52  (use gauche.generator)
53  (export set set-unfold
54          set? set-contains? set-empty? set-disjoint?
55          set-member set-element-comparator
56          set-adjoin set-adjoin! set-replace set-replace!
57          set-delete set-delete! set-delete-all set-delete-all! set-search!
58          set-size set-find set-count set-any? set-every?
59          set-map set-for-each set-fold
60          set-filter set-remove set-remove set-partition
61          set-filter! set-remove! set-partition!
62          set-copy set->list list->set list->set!
63          set=? set<? set>? set<=? set>=?
64          set-union set-intersection set-difference set-xor
65          set-union! set-intersection! set-difference! set-xor!
66          set-comparator
67
68          bag bag-unfold
69          bag? bag-contains? bag-empty? bag-disjoint?
70          bag-member bag-element-comparator
71          bag-adjoin bag-adjoin! bag-replace bag-replace!
72          bag-delete bag-delete! bag-delete-all bag-delete-all! bag-search!
73          bag-size bag-find bag-count bag-any? bag-every?
74          bag-map bag-for-each bag-fold
75          bag-filter bag-remove bag-partition
76          bag-filter! bag-remove! bag-partition!
77          bag-copy bag->list list->bag list->bag!
78          bag=? bag<? bag>? bag<=? bag>=?
79          bag-union bag-intersection bag-difference bag-xor
80          bag-union! bag-intersection! bag-difference! bag-xor!
81          bag-comparator
82
83          bag-sum bag-sum! bag-product bag-product!
84          bag-unique-size bag-element-count bag-for-each-unique bag-fold-unique
85          bag-increment! bag-decrement! bag->set set->bag set->bag!
86          bag->alist alist->bag
87          ))
88(select-module srfi-113)
89
90;; Gauche adaptation
91
92(define hash-table-contains? hash-table-exists?)
93(define (hash-table-for-each proc hash-table) ; argument order reversed
94  ((with-module gauche hash-table-for-each) hash-table proc))
95(define make-hash-table/comparator make-hash-table)
96(define hash-table-size hash-table-num-entries)
97(define hash-table-ref/default hash-table-get)
98(define hash-table-set! hash-table-put!)
99(define hash-table-update!/default hash-table-update!)
100
101(define-class <sob> (<collection>)
102  ((table :init-keyword :table)))
103(define-class <set> (<sob>) ())
104(define-class <bag> (<sob>) ())
105
106(define (raw-make-sob hash-table comparator multi?)
107  (if multi?
108    (make <bag> :table hash-table)
109    (make <set> :table hash-table)))
110
111(define (set? obj) (is-a? obj <set>))
112(define (bag? obj) (is-a? obj <bag>))
113
114;; we assume sob is either <set> or <bag>
115(define (sob-hash-table sob) (slot-ref sob 'table))
116(define (sob-comparator sob) (hash-table-comparator (slot-ref sob'table)))
117(define (sob-multi? sob) (is-a? sob <bag>))
118
119(define-method object-hash ((obj <sob>)) (sob-hash obj))
120(define-method object-equal? ((a <set>) (b <set>)) (sob=? a b))
121(define-method object-equal? ((a <bag>) (b <bag>)) (sob=? a b))
122
123(define-method write-object ((obj <set>) out)
124  (format out "#<set ~a ~aitems>" (debug-label obj)
125          (hash-table-num-entries (sob-hash-table obj))))
126(define-method write-object ((obj <bag>) out)
127  (format out "#<bag ~a ~akinds, ~aitems>" (debug-label obj)
128          (hash-table-num-entries (sob-hash-table obj))
129          (apply + (hash-table-values (sob-hash-table obj)))))
130
131(define-method call-with-iterator ((obj <sob>) proc :allow-other-keys)
132  ($ call-with-iterator (sob-hash-table obj)
133     (^[end? next]
134       (define current (if (end?) #f (next))) ; (<item> . <count>)
135       (define (over?) (or (not current) (and (end?) (zero? (cdr current)))))
136       (define (get) (and (pair? current)
137                          (if (zero? (cdr current))
138                            (and (not (end?))
139                                 (begin (set! current (next))
140                                        (dec! (cdr current))
141                                        (car current)))
142                            (begin (dec! (cdr current))
143                                   (car current)))))
144       (proc over? get))))
145
146;; TODO: To impelment call-with-builder, we need some way to specify
147;; comparator argument for the constructor.
148
149;; The following code is mostly intact from the reference implementation
150;; I commented out some irrelevant definitions, and rewrote early break
151;; pattern (by call/cc) with hash-table-find.
152
153;;; Record definition and core typing/checking procedures
154
155;; (define-record-type sob
156;;   (raw-make-sob hash-table comparator multi?)
157;;   sob?
158;;   (hash-table sob-hash-table)
159;;   (comparator sob-comparator)
160;;   (multi? sob-multi?))
161
162;; (define (set? obj) (and (sob? obj) (not (sob-multi? obj))))
163
164;; (define (bag? obj) (and (sob? obj) (sob-multi? obj)))
165
166(define (check-set obj) (if (not (set? obj)) (error "not a set" obj)))
167
168(define (check-bag obj) (if (not (bag? obj)) (error "not a bag" obj)))
169
170;; These procedures verify that not only are their arguments all sets
171;; or all bags as the case may be, but also share the same comparator.
172
173(define (check-all-sets list)
174  (for-each (lambda (obj) (check-set obj)) list)
175  (sob-check-comparators list))
176
177(define (check-all-bags list)
178  (for-each (lambda (obj) (check-bag obj)) list)
179  (sob-check-comparators list))
180
181(define (sob-check-comparators list)
182  (if (not (null? list))
183      (for-each
184        (lambda (sob)
185          (check-same-comparator (car list) sob))
186        (cdr list))))
187
188;; This procedure is used directly when there are exactly two arguments.
189;; [SK] Gauche's equal? is more permissive than eq? when comparing comparators.
190(define (check-same-comparator a b)
191  (if (not (equal? (sob-comparator a) (sob-comparator b)))
192    (error "different comparators" a b)))
193
194;; This procedure defends against inserting an element
195;; into a sob that violates its constructor, since
196;; typical hash-table implementations don't check for us.
197
198(define (check-element sob element)
199  (comparator-check-type (sob-comparator sob) element))
200
201;;; Constructors
202
203;; Construct an arbitrary empty sob out of nothing.
204
205(define (make-sob comparator multi?)
206  (raw-make-sob (make-hash-table/comparator comparator) comparator multi?))
207
208;; Copy a sob, sharing the constructor.
209
210(define (sob-copy sob)
211  (raw-make-sob (hash-table-copy (sob-hash-table sob))
212            (sob-comparator sob)
213            (sob-multi? sob)))
214
215(define (set-copy set)
216  (check-set set)
217  (sob-copy set))
218
219(define (bag-copy bag)
220  (check-bag bag)
221  (sob-copy bag))
222
223;; Construct an empty sob that shares the constructor of an existing sob.
224
225(define (sob-empty-copy sob)
226  (make-sob (sob-comparator sob) (sob-multi? sob)))
227
228;; Construct a set or a bag and insert elements into it.  These are the
229;; simplest external constructors.
230
231(define (set comparator . elements)
232  (let ((result (make-sob comparator #f)))
233    (for-each (lambda (x) (sob-increment! result x 1)) elements)
234    result))
235
236(define (bag comparator . elements)
237  (let ((result (make-sob comparator #t)))
238    (for-each (lambda (x) (sob-increment! result x 1)) elements)
239    result))
240
241;; The fundamental (as opposed to simplest) constructor: unfold the
242;; results of iterating a function as a set.  In line with SRFI 1,
243;; we provide an opportunity to map the sequence of seeds through a
244;; mapper function.
245
246(define (sob-unfold stop? mapper successor seed comparator multi?)
247  (let ((result (make-sob comparator multi?)))
248    (let loop ((seed seed))
249      (if (stop? seed)
250          result
251          (begin
252            (sob-increment! result (mapper seed) 1)
253            (loop (successor seed)))))))
254
255(define (set-unfold continue? mapper successor seed comparator)
256  (sob-unfold continue? mapper successor seed comparator #f))
257
258(define (bag-unfold continue? mapper successor seed comparator)
259  (sob-unfold continue? mapper successor seed comparator #t))
260
261;;; Predicates
262
263;; Just a wrapper of hash-table-contains?.
264
265(define (sob-contains? sob member)
266  (hash-table-contains? (sob-hash-table sob) member))
267
268(define (set-contains? set member)
269  (check-set set)
270  (sob-contains? set member))
271
272(define (bag-contains? bag member)
273  (check-bag bag)
274  (sob-contains? bag member))
275
276;; A sob is empty if its size is 0.
277
278(define (sob-empty? sob)
279  (= 0 (hash-table-size (sob-hash-table sob))))
280
281(define (set-empty? set)
282  (check-set set)
283  (sob-empty? set))
284
285(define (bag-empty? bag)
286  (check-bag bag)
287  (sob-empty? bag))
288
289;; Two sobs are disjoint if, when looping through one, we can't find
290;; any of its elements in the other.  We have to try both ways:
291;; sob-half-disjoint checks just one direction for simplicity.
292
293(define (sob-half-disjoint? a b)
294  (let ((ha (sob-hash-table a))
295        (hb (sob-hash-table b)))
296    (not (hash-table-find ha (^[k _] (hash-table-contains? hb k))))))
297
298(define (set-disjoint? a b)
299  (check-set a)
300  (check-set b)
301  (check-same-comparator a b)
302  (and (sob-half-disjoint? a b) (sob-half-disjoint? b a)))
303
304(define (bag-disjoint? a b)
305  (check-bag a)
306  (check-bag b)
307  (check-same-comparator a b)
308  (and (sob-half-disjoint? a b) (sob-half-disjoint? b a)))
309
310;; Accessors
311
312;; If two objects are indistinguishable by the comparator's
313;; equality procedure, only one of them will be represented in the sob.
314;; This procedure lets us find out which one it is; it will return
315;; the value stored in the sob that is equal to the element.
316;; Note that we have to search the whole hash table item by item.
317;; The default is returned if there is no such element.
318
319(define (sob-member sob element default)
320  (define (same? a b) (=? (sob-comparator sob) a b))
321  (let1 r (hash-table-find (sob-hash-table sob)
322                           (^[k v] (and (same? k element) (list k))))
323    (if (pair? r) (car r) default)))
324
325(define (set-member set element default)
326  (check-set set)
327  (sob-member set element default))
328
329(define (bag-member bag element default)
330  (check-bag bag)
331  (sob-member bag element default))
332
333;; Retrieve the comparator.
334
335(define (set-element-comparator set)
336  (check-set set)
337  (sob-comparator set))
338
339(define (bag-element-comparator bag)
340  (check-bag bag)
341  (sob-comparator bag))
342
343
344;; Updaters (pure functional and linear update)
345
346;; The primitive operation for adding an element to a sob.
347;; There are a few cases where we bypass this for efficiency.
348
349(define (sob-increment! sob element count)
350  (check-element sob element)
351  (hash-table-update!/default
352    (sob-hash-table sob)
353    element
354    (if (sob-multi? sob)
355      (lambda (value) (+ value count))
356      (lambda (value) 1))
357    0))
358
359;; The primitive operation for removing an element from a sob.  Note this
360;; procedure is incomplete: it allows the count of an element to drop below 1.
361;; Therefore, whenever it is used it is necessary to call sob-cleanup!
362;; to fix things up.  This is done because it is unsafe to remove an
363;; object from a hash table while iterating through it.
364
365(define (sob-decrement! sob element count)
366  (hash-table-update!/default
367    (sob-hash-table sob)
368    element
369    (lambda (value) (- value count))
370    0))
371
372;; This is the cleanup procedure, which happens in two passes: it
373;; iterates through the sob, deciding which elements to remove (those
374;; with non-positive counts), and collecting them in a list.  When the
375;; iteration is done, it is safe to remove the elements using the list,
376;; because we are no longer iterating over the hash table.  It returns
377;; its argument, because it is often tail-called at the end of some
378;; procedure that wants to return the clean sob.
379
380(define (sob-cleanup! sob)
381  (let ((ht (sob-hash-table sob)))
382    (for-each (lambda (key) (hash-table-delete! ht key))
383              (nonpositive-keys ht))
384    sob))
385
386(define (nonpositive-keys ht)
387  (let ((result '()))
388    (hash-table-for-each
389      (lambda (key value)
390        (when (<= value 0)
391          (set! result (cons key result))))
392      ht)
393    result))
394
395;; We expose these for bags but not sets.
396
397(define (bag-increment! bag element count)
398  (check-bag bag)
399  (sob-increment! bag element count)
400  bag)
401
402(define (bag-decrement! bag element count)
403  (check-bag bag)
404  (sob-decrement! bag element count)
405  (sob-cleanup! bag)
406  bag)
407
408;; The primitive operation to add elements from a list.  We expose
409;; this two ways: with a list argument and with multiple arguments.
410
411(define (sob-adjoin-all! sob elements)
412  (for-each
413    (lambda (elem)
414      (sob-increment! sob elem 1))
415    elements))
416
417(define (set-adjoin! set . elements)
418  (check-set set)
419  (sob-adjoin-all! set elements)
420  set)
421
422(define (bag-adjoin! bag . elements)
423  (check-bag bag)
424  (sob-adjoin-all! bag elements)
425  bag)
426
427
428;; These versions copy the set or bag before adjoining.
429
430(define (set-adjoin set . elements)
431  (check-set set)
432  (let ((result (sob-copy set)))
433    (sob-adjoin-all! result elements)
434    result))
435
436(define (bag-adjoin bag . elements)
437  (check-bag bag)
438  (let ((result (sob-copy bag)))
439    (sob-adjoin-all! result elements)
440    result))
441
442;; Given an element which resides in a set, this makes sure that the
443;; specified element is represented by the form given.  Thus if a
444;; sob contains 2 and the equality predicate is =, then calling
445;; (sob-replace! sob 2.0) will replace the 2 with 2.0.  Does nothing
446;; if there is no such element in the sob.
447
448(define (sob-replace! sob element)
449  (let* ((comparator (sob-comparator sob))
450         (= (comparator-equality-predicate comparator))
451         (ht (sob-hash-table sob)))
452    (comparator-check-type comparator element)
453    (or (hash-table-find ht
454                         (^[k v]
455                           (and (= k element)
456                                (begin
457                                  (hash-table-delete! ht k)
458                                  (hash-table-set! ht element v)
459                                  sob))))
460        sob)))
461
462(define (set-replace! set element)
463  (check-set set)
464  (sob-replace! set element)
465  set)
466
467(define (bag-replace! bag element)
468  (check-bag bag)
469  (sob-replace! bag element)
470  bag)
471
472;; Non-destructive versions that copy the set first.  Yes, a little
473;; bit inefficient because it copies the element to be replaced before
474;; actually replacing it.
475
476(define (set-replace set element)
477  (check-set set)
478  (let ((result (sob-copy set)))
479    (sob-replace! result element)
480    result))
481
482(define (bag-replace bag element)
483  (check-bag bag)
484  (let ((result (sob-copy bag)))
485    (sob-replace! result element)
486    result))
487
488;; The primitive operation to delete elemnets from a list.
489;; Like sob-adjoin-all!, this is exposed two ways.  It calls
490;; sob-cleanup! itself, so its callers don't need to (though it is safe
491;; to do so.)
492
493(define (sob-delete-all! sob elements)
494  (for-each (lambda (element) (sob-decrement! sob element 1)) elements)
495  (sob-cleanup! sob)
496  sob)
497
498(define (set-delete! set . elements)
499  (check-set set)
500  (sob-delete-all! set elements))
501
502(define (bag-delete! bag . elements)
503  (check-bag bag)
504  (sob-delete-all! bag elements))
505
506(define (set-delete-all! set elements)
507  (check-set set)
508  (sob-delete-all! set elements))
509
510(define (bag-delete-all! bag elements)
511  (check-bag bag)
512  (sob-delete-all! bag elements))
513
514;; Non-destructive version copy first; this is inefficient.
515
516(define (set-delete set . elements)
517  (check-set set)
518  (sob-delete-all! (sob-copy set) elements))
519
520(define (bag-delete bag . elements)
521  (check-bag bag)
522  (sob-delete-all! (sob-copy bag) elements))
523
524(define (set-delete-all set elements)
525  (check-set set)
526  (sob-delete-all! (sob-copy set) elements))
527
528(define (bag-delete-all bag elements)
529  (check-bag bag)
530  (sob-delete-all! (sob-copy bag) elements))
531
532;; Flag used by sob-search! to represent a missing object.
533
534(define missing (string-copy "missing"))
535
536;; Searches and then dispatches to user-defined procedures on failure
537;; and success, which in turn should reinvoke a procedure to take some
538;; action on the set (insert, ignore, replace, or remove).
539
540(define (sob-search! sob element failure success)
541  (define (insert obj)
542    (sob-increment! sob element 1)
543    (values sob obj))
544  (define (ignore obj)
545    (values sob obj))
546  (define (update new-elem obj)
547    (sob-decrement! sob element 1)
548    (sob-increment! sob new-elem 1)
549    (values (sob-cleanup! sob) obj))
550  (define (remove obj)
551    (sob-decrement! sob element 1)
552    (values (sob-cleanup! sob) obj))
553  (let ((true-element (sob-member sob element missing)))
554    (if (eq? true-element missing)
555      (failure insert ignore)
556      (success true-element update remove))))
557
558(define (set-search! set element failure success)
559  (check-set set)
560  (sob-search! set element failure success))
561
562(define (bag-search! bag element failure success)
563  (check-bag bag)
564  (sob-search! bag element failure success))
565
566;; Return the size of a sob.  If it's a set, we can just use the
567;; number of associations in the hash table, but if it's a bag, we
568;; have to add up the counts.
569
570(define (sob-size sob)
571  (if (sob-multi? sob)
572    (let ((result 0))
573      (hash-table-for-each
574        (lambda (elem count) (set! result (+ count result)))
575        (sob-hash-table sob))
576      result)
577    (hash-table-size (sob-hash-table sob))))
578
579(define (set-size set)
580  (check-set set)
581  (sob-size set))
582
583(define (bag-size bag)
584  (check-bag bag)
585  (sob-size bag))
586
587;; Search a sob to find something that matches a predicate.  You don't
588;; know which element you will get, so this is not as useful as finding
589;; an element in a list or other ordered container.  If it's not there,
590;; call the failure thunk.
591
592(define (sob-find pred sob failure)
593  (let1 r (hash-table-find (sob-hash-table sob)
594                           (^[k _] (and (pred k) (list k))))
595    (if (pair? r) (car r) (failure))))
596
597(define (set-find pred set failure)
598  (check-set set)
599  (sob-find pred set failure))
600
601(define (bag-find pred bag failure)
602  (check-bag bag)
603  (sob-find pred bag failure))
604
605;; Count the number of elements in the sob that satisfy the predicate.
606;; This is a special case of folding.
607
608(define (sob-count pred sob)
609  (sob-fold
610    (lambda (elem total) (if (pred elem) (+ total 1) total))
611    0
612    sob))
613
614(define (set-count pred set)
615  (check-set set)
616  (sob-count pred set))
617
618(define (bag-count pred bag)
619  (check-bag bag)
620  (sob-count pred bag))
621
622;; Check if any of the elements in a sob satisfy a predicate.  Breaks out
623;; early (with call/cc) if a success is found.
624
625(define (sob-any? pred sob)
626  (hash-table-find (sob-hash-table sob) (^[k _] (boolean (pred k)))))
627
628(define (set-any? pred set)
629  (check-set set)
630  (sob-any? pred set))
631
632(define (bag-any? pred bag)
633  (check-bag bag)
634  (sob-any? pred bag))
635
636;; Analogous to set-any?.  Breaks out early if a failure is found.
637
638(define (sob-every? pred sob)
639  (not (hash-table-find (sob-hash-table sob) (^[k _] (not (pred k))))))
640
641(define (set-every? pred set)
642  (check-set set)
643  (sob-every? pred set))
644
645(define (bag-every? pred bag)
646  (check-bag bag)
647  (sob-every? pred bag))
648
649
650;;; Mapping and folding
651
652;; A utility for iterating a command n times.  This is used by sob-for-each
653;; to execute a procedure over the repeated elements in a bag.  Because
654;; of the representation of sets, it works for them too.
655
656(define (do-n-times cmd n)
657  (let loop ((n n))
658    (when (> n 0)
659      (cmd)
660      (loop (- n 1)))))
661
662;; Basic iterator over a sob.
663
664(define (sob-for-each proc sob)
665  (hash-table-for-each
666    (lambda (key value) (do-n-times (lambda () (proc key)) value))
667    (sob-hash-table sob)))
668
669(define (set-for-each proc set)
670  (check-set set)
671  (sob-for-each proc set))
672
673(define (bag-for-each proc bag)
674  (check-bag bag)
675  (sob-for-each proc bag))
676
677;; Fundamental mapping operator.  We map over the associations directly,
678;; because each instance of an element in a bag will be treated identically
679;; anyway; we insert them all at once with sob-increment!.
680
681(define (sob-map comparator proc sob)
682  (let ((result (make-sob comparator (sob-multi? sob))))
683    (hash-table-for-each
684      (lambda (key value) (sob-increment! result (proc key) value))
685      (sob-hash-table sob))
686    result))
687
688(define (set-map comparator proc set)
689  (check-set set)
690  (sob-map comparator proc set))
691
692(define (bag-map comparator proc bag)
693  (check-bag bag)
694  (sob-map comparator proc bag))
695
696;; The fundamental deconstructor.  Note that there are no left vs. right
697;; folds because there is no order.  Each element in a bag is fed into
698;; the fold separately.
699
700(define (sob-fold proc nil sob)
701  (let ((result nil))
702    (sob-for-each
703      (lambda (elem) (set! result (proc elem result)))
704      sob)
705    result))
706
707(define (set-fold proc nil set)
708  (check-set set)
709  (sob-fold proc nil set))
710
711(define (bag-fold proc nil bag)
712  (check-bag bag)
713  (sob-fold proc nil bag))
714
715;; Process every element and copy the ones that satisfy the predicate.
716;; Identical elements are processed all at once.  This is used for both
717;; filter and remove.
718
719(define (sob-filter pred sob)
720  (let ((result (sob-empty-copy sob)))
721    (hash-table-for-each
722      (lambda (key value)
723        (if (pred key) (sob-increment! result key value)))
724      (sob-hash-table sob))
725    result))
726
727(define (set-filter pred set)
728  (check-set set)
729  (sob-filter pred set))
730
731(define (bag-filter pred bag)
732  (check-bag bag)
733  (sob-filter pred bag))
734
735(define (set-remove pred set)
736  (check-set set)
737  (sob-filter (lambda (x) (not (pred x))) set))
738
739(define (bag-remove pred bag)
740  (check-bag bag)
741  (sob-filter (lambda (x) (not (pred x))) bag))
742
743;; Process each element and remove those that don't satisfy the filter.
744;; This does its own cleanup, and is used for both filter! and remove!.
745
746(define (sob-filter! pred sob)
747  (hash-table-for-each
748    (lambda (key value)
749      (if (not (pred key)) (sob-decrement! sob key value)))
750    (sob-hash-table sob))
751  (sob-cleanup! sob))
752
753(define (set-filter! pred set)
754  (check-set set)
755  (sob-filter! pred set))
756
757(define (bag-filter! pred bag)
758  (check-bag bag)
759  (sob-filter! pred bag))
760
761(define (set-remove! pred set)
762  (check-set set)
763  (sob-filter! (lambda (x) (not (pred x))) set))
764
765(define (bag-remove! pred bag)
766  (check-bag bag)
767  (sob-filter! (lambda (x) (not (pred x))) bag))
768
769;; Create two sobs and copy the elements that satisfy the predicate into
770;; one of them, all others into the other.  This is more efficient than
771;; filtering and removing separately.
772
773(define (sob-partition pred sob)
774  (let ((res1 (sob-empty-copy sob))
775        (res2 (sob-empty-copy sob)))
776    (hash-table-for-each
777      (lambda (key value)
778        (if (pred key)
779          (sob-increment! res1 key value)
780          (sob-increment! res2 key value)))
781      (sob-hash-table sob))
782    (values res1 res2)))
783
784(define (set-partition pred set)
785  (check-set set)
786  (sob-partition pred set))
787
788(define (bag-partition pred bag)
789  (check-bag bag)
790  (sob-partition pred bag))
791
792;; Create a sob and iterate through the given sob.  Anything that satisfies
793;; the predicate is left alone; anything that doesn't is removed from the
794;; given sob and added to the new sob.
795
796(define (sob-partition! pred sob)
797  (let ((result (sob-empty-copy sob)))
798    (hash-table-for-each
799      (lambda (key value)
800        (if (not (pred key))
801          (begin
802            (sob-decrement! sob key value)
803            (sob-increment! result key value))))
804      (sob-hash-table sob))
805    (values (sob-cleanup! sob) result)))
806
807(define (set-partition! pred set)
808  (check-set set)
809  (sob-partition! pred set))
810
811(define (bag-partition! pred bag)
812  (check-bag bag)
813  (sob-partition! pred bag))
814
815
816;;; Copying and conversion
817
818;;; Convert a sob to a list; a special case of sob-fold.
819
820(define (sob->list sob)
821  (sob-fold (lambda (elem list) (cons elem list)) '() sob))
822
823(define (set->list set)
824  (check-set set)
825  (sob->list set))
826
827(define (bag->list bag)
828  (check-bag bag)
829  (sob->list bag))
830
831;; Convert a list to a sob.  Probably could be done using unfold, but
832;; since sobs are mutable anyway, it's just as easy to add the elements
833;; by side effect.
834
835(define (list->sob! sob list)
836  (for-each (lambda (elem) (sob-increment! sob elem 1)) list)
837  sob)
838
839(define (list->set comparator list)
840  (list->sob! (make-sob comparator #f) list))
841
842(define (list->bag comparator list)
843  (list->sob! (make-sob comparator #t) list))
844
845(define (list->set! set list)
846  (check-set set)
847  (list->sob! set list))
848
849(define (list->bag! bag list)
850  (check-bag bag)
851  (list->sob! bag list))
852
853
854;;; Subsets
855
856;; All of these procedures follow the same pattern.  The
857;; sob<op>? procedures are case-lambdas that reduce the multi-argument
858;; case to the two-argument case.  As usual, the set<op>? and
859;; bag<op>? procedures are trivial layers over the sob<op>? procedure.
860;; The dyadic-sob<op>? procedures are where it gets interesting, so see
861;; the comments on them.
862
863(define sob=?
864  (case-lambda
865    ((sob) #t)
866    ((sob1 sob2) (dyadic-sob=? sob1 sob2))
867    ((sob1 sob2 . sobs)
868     (and (dyadic-sob=? sob1 sob2)
869          (apply sob=? sob2 sobs)))))
870
871(define (set=? . sets)
872  (check-all-sets sets)
873  (apply sob=? sets))
874
875(define (bag=? . bags)
876  (check-all-bags bags)
877  (apply sob=? bags))
878
879;; First we check that there are the same number of entries in the
880;; hashtables of the two sobs; if that's not true, they can't be equal.
881;; Then we check that for each key, the values are the same (where
882;; being absent counts as a value of 0).  If any values aren't equal,
883;; again they can't be equal.
884
885(define (sob-table-compare op ht1 ht2)
886  (and (op (hash-table-size ht1) (hash-table-size ht2))
887       (not ($ hash-table-find ht1
888               (^[k v] (not (op v (hash-table-ref/default ht2 k 0))))))))
889
890(define (dyadic-sob=? sob1 sob2)
891  (sob-table-compare = (sob-hash-table sob1) (sob-hash-table sob2)))
892
893(define sob<=?
894  (case-lambda
895    ((sob) #t)
896    ((sob1 sob2) (dyadic-sob<=? sob1 sob2))
897    ((sob1 sob2 . sobs)
898     (and (dyadic-sob<=? sob1 sob2)
899          (apply sob<=? sob2 sobs)))))
900
901(define (set<=? . sets)
902  (check-all-sets sets)
903  (apply sob<=? sets))
904
905(define (bag<=? . bags)
906  (check-all-bags bags)
907  (apply sob<=? bags))
908
909;; This is analogous to dyadic-sob=?, except that we have to check
910;; both sobs to make sure each value is <= in order to be sure
911;; that we've traversed all the elements in either sob.
912
913(define (dyadic-sob<=? sob1 sob2)
914  (sob-table-compare <= (sob-hash-table sob1) (sob-hash-table sob2)))
915
916(define sob<?
917  (case-lambda
918    ((sob) #t)
919    ((sob1 sob2) (dyadic-sob<? sob1 sob2))
920    ((sob1 sob2 . sobs)
921     (and (dyadic-sob<? sob1 sob2)
922          (apply sob<? sob2 sobs)))))
923
924(define (set<? . sets)
925  (check-all-sets sets)
926  (apply sob<? sets))
927
928(define (bag<? . bags)
929  (check-all-bags bags)
930  (apply sob<? bags))
931
932
933;; < is a bit complicated - we can't just negate >=, since subset relationship
934;; is partial order.  Neither does it follow the same pattern as = and <=,
935
936(define (dyadic-sob<? sob1 sob2)
937  (let ([ht1 (sob-hash-table sob1)]
938        [ht2 (sob-hash-table sob2)])
939    (and-let* ([small-count
940                (cond [(= (hash-table-size ht1) (hash-table-size ht2)) 0]
941                      [(< (hash-table-size ht1) (hash-table-size ht2)) 1]
942                      [else #f])]
943               [(not ($ hash-table-find ht1
944                        (^[k v]
945                          (let1 v2 (hash-table-ref/default ht2 k 0)
946                            (cond [(< v v2) (inc! small-count) #f]
947                                  [(> v v2) #t]
948                                  [else #f])))))])
949      (positive? small-count))))
950
951(define sob>?
952  (case-lambda
953    ((sob) #t)
954    ((sob1 sob2) (dyadic-sob>? sob1 sob2))
955    ((sob1 sob2 . sobs)
956     (and (dyadic-sob>? sob1 sob2)
957          (apply sob>? sob2 sobs)))))
958
959(define (set>? . sets)
960  (check-all-sets sets)
961  (apply sob>? sets))
962
963(define (bag>? . bags)
964  (check-all-bags bags)
965  (apply sob>? bags))
966
967;; > is the inverse of <.  this is only true dyadically.
968
969(define (dyadic-sob>? sob1 sob2)
970  (dyadic-sob<? sob2 sob1))
971
972(define sob>=?
973  (case-lambda
974    ((sob) #t)
975    ((sob1 sob2) (dyadic-sob>=? sob1 sob2))
976    ((sob1 sob2 . sobs)
977     (and (dyadic-sob>=? sob1 sob2)
978          (apply sob>=? sob2 sobs)))))
979
980(define (set>=? . sets)
981  (check-all-sets sets)
982  (apply sob>=? sets))
983
984(define (bag>=? . bags)
985  (check-all-bags bags)
986  (apply sob>=? bags))
987
988;; Finally, >= is the inverse of <=.
989
990(define (dyadic-sob>=? sob1 sob2)
991  (dyadic-sob<=? sob2 sob1))
992
993
994;;; Set theory operations
995
996;; A trivial helper function which upper-bounds n by one if multi? is false.
997
998(define (max-one n multi?)
999    (if multi? n (if (> n 1) 1 n)))
1000
1001;; The logic of union, intersection, difference, and sum is the same: the
1002;; sob-* and sob-*! procedures do the reduction to the dyadic-sob-*!
1003;; procedures.  The difference is that the sob-* procedures allocate
1004;; an empty copy of the first sob to accumulate the results in, whereas
1005;; the sob-*!  procedures work directly in the first sob.
1006
1007;; Note that there is no set-sum, as it is the same as set-union.
1008
1009(define (sob-union sob1 . sobs)
1010  (if (null? sobs)
1011    sob1
1012    (let ((result (sob-empty-copy sob1)))
1013      (dyadic-sob-union! result sob1 (car sobs))
1014      (for-each
1015       (lambda (sob) (dyadic-sob-union! result result sob))
1016       (cdr sobs))
1017      result)))
1018
1019;; For union, we take the max of the counts of each element found
1020;; in either sob and put that in the result.  On the pass through
1021;; sob2, we know that the intersection is already accounted for,
1022;; so we just copy over things that aren't in sob1.
1023
1024(define (dyadic-sob-union! result sob1 sob2)
1025  (let ((sob1-ht (sob-hash-table sob1))
1026        (sob2-ht (sob-hash-table sob2))
1027        (result-ht (sob-hash-table result)))
1028    (hash-table-for-each
1029      (lambda (key value1)
1030        (let ((value2 (hash-table-ref/default sob2-ht key 0)))
1031          (hash-table-set! result-ht key (max value1 value2))))
1032      sob1-ht)
1033    (hash-table-for-each
1034      (lambda (key value2)
1035        (let ((value1 (hash-table-ref/default sob1-ht key 0)))
1036          (if (= value1 0)
1037              (hash-table-set! result-ht key value2))))
1038      sob2-ht)))
1039
1040(define (set-union . sets)
1041  (check-all-sets sets)
1042  (apply sob-union sets))
1043
1044(define (bag-union . bags)
1045  (check-all-bags bags)
1046  (apply sob-union bags))
1047
1048(define (sob-union! sob1 . sobs)
1049  (for-each
1050   (lambda (sob) (dyadic-sob-union! sob1 sob1 sob))
1051   sobs)
1052  sob1)
1053
1054(define (set-union! . sets)
1055  (check-all-sets sets)
1056  (apply sob-union! sets))
1057
1058(define (bag-union! . bags)
1059  (check-all-bags bags)
1060  (apply sob-union! bags))
1061
1062(define (sob-intersection sob1 . sobs)
1063  (if (null? sobs)
1064    sob1
1065    (let ((result (sob-empty-copy sob1)))
1066      (dyadic-sob-intersection! result sob1 (car sobs))
1067      (for-each
1068       (lambda (sob) (dyadic-sob-intersection! result result sob))
1069       (cdr sobs))
1070      (sob-cleanup! result))))
1071
1072;; For intersection, we compute the min of the counts of each element.
1073;; We only have to scan sob1.  We clean up the result when we are
1074;; done, in case it is the same as sob1.
1075
1076(define (dyadic-sob-intersection! result sob1 sob2)
1077  (let ((sob1-ht (sob-hash-table sob1))
1078        (sob2-ht (sob-hash-table sob2))
1079        (result-ht (sob-hash-table result)))
1080    (hash-table-for-each
1081      (lambda (key value1)
1082        (let ((value2 (hash-table-ref/default sob2-ht key 0)))
1083          (hash-table-set! result-ht key (min value1 value2))))
1084      sob1-ht)))
1085
1086(define (set-intersection . sets)
1087  (check-all-sets sets)
1088  (apply sob-intersection sets))
1089
1090(define (bag-intersection . bags)
1091  (check-all-bags bags)
1092  (apply sob-intersection bags))
1093
1094(define (sob-intersection! sob1 . sobs)
1095  (for-each
1096   (lambda (sob) (dyadic-sob-intersection! sob1 sob1 sob))
1097   sobs)
1098  (sob-cleanup! sob1))
1099
1100(define (set-intersection! . sets)
1101  (check-all-sets sets)
1102  (apply sob-intersection! sets))
1103
1104(define (bag-intersection! . bags)
1105  (check-all-bags bags)
1106  (apply sob-intersection! bags))
1107
1108(define (sob-difference sob1 . sobs)
1109  (if (null? sobs)
1110    sob1
1111    (let ((result (sob-empty-copy sob1)))
1112      (dyadic-sob-difference! result sob1 (car sobs))
1113      (for-each
1114       (lambda (sob) (dyadic-sob-difference! result result sob))
1115       (cdr sobs))
1116      (sob-cleanup! result))))
1117
1118;; For difference, we use (big surprise) the numeric difference, bounded
1119;; by zero.  We only need to scan sob1, but we clean up the result in
1120;; case it is the same as sob1.
1121
1122(define (dyadic-sob-difference! result sob1 sob2)
1123  (let ((sob1-ht (sob-hash-table sob1))
1124        (sob2-ht (sob-hash-table sob2))
1125        (result-ht (sob-hash-table result)))
1126    (hash-table-for-each
1127      (lambda (key value1)
1128        (let ((value2 (hash-table-ref/default sob2-ht key 0)))
1129          (hash-table-set! result-ht key (- value1 value2))))
1130      sob1-ht)))
1131
1132(define (set-difference . sets)
1133  (check-all-sets sets)
1134  (apply sob-difference sets))
1135
1136(define (bag-difference . bags)
1137  (check-all-bags bags)
1138  (apply sob-difference bags))
1139
1140(define (sob-difference! sob1 . sobs)
1141  (for-each
1142   (lambda (sob) (dyadic-sob-difference! sob1 sob1 sob))
1143   sobs)
1144  (sob-cleanup! sob1))
1145
1146(define (set-difference! . sets)
1147  (check-all-sets sets)
1148  (apply sob-difference! sets))
1149
1150(define (bag-difference! . bags)
1151  (check-all-bags bags)
1152  (apply sob-difference! bags))
1153
1154(define (sob-sum sob1 . sobs)
1155  (if (null? sobs)
1156    sob1
1157    (let ((result (sob-empty-copy sob1)))
1158      (dyadic-sob-sum! result sob1 (car sobs))
1159      (for-each
1160       (lambda (sob) (dyadic-sob-sum! result result sob))
1161       (cdr sobs))
1162      result)))
1163
1164;; Sum is just like union, except that we take the sum rather than the max.
1165
1166(define (dyadic-sob-sum! result sob1 sob2)
1167  (let ((sob1-ht (sob-hash-table sob1))
1168        (sob2-ht (sob-hash-table sob2))
1169        (result-ht (sob-hash-table result)))
1170    (hash-table-for-each
1171      (lambda (key value1)
1172        (let ((value2 (hash-table-ref/default sob2-ht key 0)))
1173          (hash-table-set! result-ht key (+ value1 value2))))
1174      sob1-ht)
1175    (hash-table-for-each
1176      (lambda (key value2)
1177        (let ((value1 (hash-table-ref/default sob1-ht key 0)))
1178          (if (= value1 0)
1179              (hash-table-set! result-ht key value2))))
1180      sob2-ht)))
1181
1182
1183;; Sum is defined for bags only; for sets, it is the same as union.
1184
1185(define (bag-sum . bags)
1186  (check-all-bags bags)
1187  (apply sob-sum bags))
1188
1189(define (sob-sum! sob1 . sobs)
1190  (for-each
1191   (lambda (sob) (dyadic-sob-sum! sob1 sob1 sob))
1192   sobs)
1193  sob1)
1194
1195(define (bag-sum! . bags)
1196  (check-all-bags bags)
1197  (apply sob-sum! bags))
1198
1199;; For xor exactly two arguments are required, so the above structures are
1200;; not necessary.  This version accepts a result sob and computes the
1201;; absolute difference between the counts in the first sob and the
1202;; corresponding counts in the second.
1203
1204;; We start by copying the entries in the second sob but not the first
1205;; into the first.  Then we scan the first sob, computing the absolute
1206;; difference of the values and writing them back into the first sob.
1207;; It's essential to scan the second sob first, as we are not going to
1208;; damage it in the process.  (Hat tip: Sam Tobin-Hochstadt.)
1209
1210(define (sob-xor! result sob1 sob2)
1211  (let ((sob1-ht (sob-hash-table sob1))
1212        (sob2-ht (sob-hash-table sob2))
1213        (result-ht (sob-hash-table result)))
1214    (hash-table-for-each
1215      (lambda (key value2)
1216        (let ((value1 (hash-table-ref/default sob1-ht key 0)))
1217          (if (= value1 0)
1218              (hash-table-set! result-ht key value2))))
1219      sob2-ht)
1220    (hash-table-for-each
1221      (lambda (key value1)
1222        (let ((value2 (hash-table-ref/default sob2-ht key 0)))
1223          (hash-table-set! result-ht key (abs (- value1 value2)))))
1224      sob1-ht)
1225    (sob-cleanup! result)))
1226
1227(define (set-xor set1 set2)
1228  (check-set set1)
1229  (check-set set2)
1230  (check-same-comparator set1 set2)
1231  (sob-xor! (sob-empty-copy set1) set1 set2))
1232
1233(define (bag-xor bag1 bag2)
1234  (check-bag bag1)
1235  (check-bag bag2)
1236  (check-same-comparator bag1 bag2)
1237  (sob-xor! (sob-empty-copy bag1) bag1 bag2))
1238
1239(define (set-xor! set1 set2)
1240  (check-set set1)
1241  (check-set set2)
1242  (check-same-comparator set1 set2)
1243  (sob-xor! set1 set1 set2))
1244
1245(define (bag-xor! bag1 bag2)
1246  (check-bag bag1)
1247  (check-bag bag2)
1248  (check-same-comparator bag1 bag2)
1249  (sob-xor! bag1 bag1 bag2))
1250
1251
1252;;; A few bag-specific procedures
1253
1254(define (sob-product! n result sob)
1255  (let ((rht (sob-hash-table result)))
1256    (hash-table-for-each
1257      (lambda (elem count) (hash-table-set! rht elem (* count n)))
1258      (sob-hash-table sob))
1259    result))
1260
1261(define (valid-n n)
1262   (and (integer? n) (exact? n) (positive? n)))
1263
1264(define (bag-product n bag)
1265  (check-bag bag)
1266  (valid-n n)
1267  (sob-product! n (sob-empty-copy bag) bag))
1268
1269(define (bag-product! n bag)
1270  (check-bag bag)
1271  (valid-n n)
1272  (sob-product! n bag bag))
1273
1274(define (bag-unique-size bag)
1275  (check-bag bag)
1276  (hash-table-size (sob-hash-table bag)))
1277
1278(define (bag-element-count bag elem)
1279  (check-bag bag)
1280  (hash-table-ref/default (sob-hash-table bag) elem 0))
1281
1282(define (bag-for-each-unique proc bag)
1283  (check-bag bag)
1284  (hash-table-for-each
1285    (lambda (key value) (proc key value))
1286    (sob-hash-table bag)))
1287
1288(define (bag-fold-unique proc nil bag)
1289  (check-bag bag)
1290  (let ((result nil))
1291    (hash-table-for-each
1292      (lambda (elem count) (set! result (proc elem count result)))
1293      (sob-hash-table bag))
1294    result))
1295
1296(define (bag->set bag)
1297  (check-bag bag)
1298  (let ((result (make-sob (sob-comparator bag) #f)))
1299    (hash-table-for-each
1300      (lambda (key value) (sob-increment! result key value))
1301      (sob-hash-table bag))
1302    result))
1303
1304(define (set->bag set)
1305  (check-set set)
1306  (let ((result (make-sob (sob-comparator set) #t)))
1307    (hash-table-for-each
1308      (lambda (key value) (sob-increment! result key value))
1309      (sob-hash-table set))
1310    result))
1311
1312(define (set->bag! bag set)
1313  (check-bag bag)
1314  (check-set set)
1315  (check-same-comparator set bag)
1316  (hash-table-for-each
1317    (lambda (key value) (sob-increment! bag key value))
1318    (sob-hash-table set))
1319  bag)
1320
1321(define (bag->alist bag)
1322  (check-bag bag)
1323  (bag-fold-unique
1324    (lambda (elem count list) (cons (cons elem count) list))
1325    '()
1326    bag))
1327
1328(define (alist->bag comparator alist)
1329  (let* ((result (bag comparator))
1330         (ht (sob-hash-table result)))
1331    (for-each
1332      (lambda (assoc)
1333        (let ((element (car assoc)))
1334          (if (not (hash-table-contains? ht element))
1335              (sob-increment! result element (cdr assoc)))))
1336      alist)
1337    result))
1338
1339;;; Comparators
1340
1341;; Hash over sobs
1342(define (sob-hash sob)
1343  (let ((hash (comparator-hash-function (sob-comparator sob))))
1344    (sob-fold
1345      (lambda (element result) (logxor (hash element) result))
1346      5381
1347      sob)))
1348
1349;; Set and bag comparator
1350
1351(define set-comparator (make-comparator set? set=? #f sob-hash 'set-comparator))
1352
1353(define bag-comparator (make-comparator bag? bag=? #f sob-hash 'bag-comparator))
1354
1355;;; Register above comparators for use by default-comparator
1356;; (comparator-register-default! set-comparator)
1357;; (comparator-register-default! bag-comparator)
1358
1359;;; Set/bag printer (for debugging)
1360
1361;; (define (sob-print sob port)
1362;;   (display (if (sob-multi? sob) "&bag[" "&set[") port)
1363;;   (sob-for-each
1364;;     (lambda (elem) (display " " port) (write elem port))
1365;;     sob)
1366;;   (display " ]" port))
1367
1368;; ;; Chicken-specific
1369;; (cond-expand
1370;;   (chicken
1371;;     (define-record-printer sob sob-print))
1372;;   (else))
1373