1;;;
2;;; srfi-132 - Sort library
3;;;
4;;;   Copyright (c) 2017-2020  Shiro Kawai  <shiro@acm.org>
5;;;
6;;;   Redistribution and use in source and binary forms, with or without
7;;;   modification, are permitted provided that the following conditions
8;;;   are met:
9;;;
10;;;   1. Redistributions of source code must retain the above copyright
11;;;      notice, this list of conditions and the following disclaimer.
12;;;
13;;;   2. Redistributions in binary form must reproduce the above copyright
14;;;      notice, this list of conditions and the following disclaimer in the
15;;;      documentation and/or other materials provided with the distribution.
16;;;
17;;;   3. Neither the name of the authors nor the names of its contributors
18;;;      may be used to endorse or promote products derived from this
19;;;      software without specific prior written permission.
20;;;
21;;;   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22;;;   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23;;;   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24;;;   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25;;;   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26;;;   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
27;;;   TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28;;;   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29;;;   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30;;;   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31;;;   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32;;;
33
34(define-module srfi-132
35  (use gauche.sequence)
36  (use gauche.generator)
37  (use srfi-27)   ; we use random selection in vector-select
38  (use srfi-133)
39  (export list-sorted? vector-sorted?
40          list-sort list-stable-sort
41          list-sort! list-stable-sort!
42          vector-sort vector-stable-sort
43          vector-sort! vector-stable-sort!
44          list-merge list-merge!
45          vector-merge vector-merge!
46          list-delete-neighbor-dups
47          list-delete-neighbor-dups!
48          vector-delete-neighbor-dups
49          vector-delete-neighbor-dups!
50          vector-find-median
51          vector-find-median!
52          vector-select!
53          vector-separate!
54          ))
55(select-module srfi-132)
56
57(define (list-sort < lis) (assume-type lis <list>) (sort lis <))
58(define (list-sort! < lis) (assume-type lis <list>) (sort! lis <))
59(define (list-stable-sort < lis) (assume-type lis <list>) (stable-sort lis <))
60(define (list-stable-sort! < lis) (assume-type lis <list>) (stable-sort! lis <))
61(define (list-sorted? < lis) (assume-type lis <list>) (sorted? lis <))
62(define (list-merge < lis1 lis2) (merge lis1 lis2 <))
63(define (list-merge! < lis1 lis2) (merge! lis1 lis2 <))
64
65;; NB: We could get range-restricted linear-update version more efficient.
66
67(define-inline (%check-range v start end)
68  (assume-type start <integer>)
69  (assume-type end <integer>)
70  (unless (<= 0 start end (vector-length v))
71    (errorf "Start/end arguments must be nonnegative exact integers, \
72             and must be (<= 0 start end (- (vector-length v) 1)). \
73             We got (start end): (~s ~s)" start end)))
74
75(define (%vector-sorter %sort!)
76  (^[< v :optional (s 0) (e (vector-length v))]
77    (assume-type v <vector>)
78    (%check-range v s e)
79    (%sort! (subseq v s e) <)))
80
81(define (%vector-sorter! %sort!)
82  (^[< v :optional (s 0) (e (vector-length v))]
83    (assume-type v <vector>)
84    (%check-range v s e)
85    (if (and (= s 0) (= e (vector-length v)))
86      (%sort! v <)
87      (set! (subseq v s e) (%sort! (subseq v s e) <)))
88    (undefined)))
89
90(define vector-sort (%vector-sorter sort!))
91(define vector-sort! (%vector-sorter! sort!))
92(define vector-stable-sort (%vector-sorter stable-sort!))
93(define vector-stable-sort! (%vector-sorter! stable-sort!))
94
95(define (%maybe-subseq v s e)
96  (if (and (= s 0) (= e (vector-length v)))
97    v
98    (subseq v s e)))
99
100(define (vector-sorted? < v :optional (s 0) (e (vector-length v)))
101  (assume-type v <vector>)
102  (%check-range v s e)
103  (sorted? (%maybe-subseq v s e) <))
104
105(define (%vector-merge! < dst start v1 v2)
106  (let ([len1 (vector-length v1)]
107        [len2 (vector-length v2)])
108    (cond [(zero? len1) (vector-copy! dst start v2)]
109          [(zero? len2) (vector-copy! dst start v1)]
110          [else (let loop ([e1 (vector-ref v1 0)]
111                           [e2 (vector-ref v2 0)]
112                           [i1 1]
113                           [i2 1]
114                           [d start])
115                  (cond [(< e2 e1)
116                         (vector-set! dst d e2)
117                         (if (= i2 len2)
118                           (vector-copy! dst (+ d 1) v1 (- i1 1))
119                           (loop e1 (vector-ref v2 i2) i1 (+ i2 1) (+ d 1)))]
120                        [else
121                         (vector-set! dst d e1)
122                         (if (= i1 len1)
123                           (vector-copy! dst (+ d 1) v2 (- i2 1))
124                           (loop (vector-ref v1 i1) e2 (+ i1 1) i2 (+ d 1)))]))]
125          )))
126
127(define (vector-merge < v1 v2 :optional (s1 0) (e1 (vector-length v1))
128                                        (s2 0) (e2 (vector-length v2)))
129  (assume-type v1 <vector>)
130  (assume-type v2 <vector>)
131  (%check-range v1 s1 e1)
132  (%check-range v2 s2 e2)
133  (rlet1 vr (make-vector (+ (- e1 s1) (- e2 s2)))
134    (%vector-merge! < vr 0 (%maybe-subseq v1 s1 e1) (%maybe-subseq v2 s2 e2))))
135
136(define (vector-merge! < vr v1 v2 :optional (sr 0)
137                                            (s1 0) (e1 (vector-length v1))
138                                            (s2 0) (e2 (vector-length v2)))
139  (assume-type vr <vector>)
140  (assume-type v1 <vector>)
141  (assume-type v2 <vector>)
142  (%check-range v1 s1 e1)
143  (%check-range v2 s2 e2)
144  (unless (>= (vector-length vr) (+ sr (- e1 s1) (- e2 s2)))
145    (errorf "Destination vector is too short (length=~s, required=~s)"
146            (vector-length vr) (+ sr (- e1 s1) (- e2 s2))))
147  (%vector-merge! < vr sr (%maybe-subseq v1 s1 e1) (%maybe-subseq v2 s2 e2))
148  (undefined))
149
150;; duplicate elimination
151
152(define (list-delete-neighbor-dups = lis)
153  (assume-type lis <list>)
154  (delete-neighbor-dups lis :test =))
155(define (list-delete-neighbor-dups! = lis)
156  (assume-type lis <list>)
157  (delete-neighbor-dups-squeeze! lis :test =))
158(define (vector-delete-neighbor-dups = vec :optional (start 0) (end #f))
159  (assume-type vec <vector>)
160  (delete-neighbor-dups vec :test = :start start :end end))
161(define (vector-delete-neighbor-dups! = vec :optional (start 0) (end #f))
162  (assume-type vec <vector>)
163  (delete-neighbor-dups! vec :test = :start start :end end))
164
165;;;
166;;; Median finding / k-th largest element
167;;;
168
169;; Returns k-th smallest element in v.
170(define (vector-select! elt< v k :optional (start 0) (end (vector-length v)))
171  (assume-type v <vector>)
172  (assume-type k <integer>)
173  (%check-range v start end)
174  (assume (<= start k (- end 1)))
175  (vector-select-1! elt< v k start end))
176
177;; Make initial k element of v contain k-smallest elements, sorted.
178;; we can't use vector-select-1! directly, for partition-in-place! excludes
179;; pivot values.
180(define (vector-separate! elt< v k :optional (start 0) (end (vector-length v)))
181  (assume-type v <vector>)
182  (assume-type k <integer>)
183  (%check-range v start end)
184  (assume (<= start k (- end 1)))
185  (partition-in-place-full! elt< v k start end))
186
187(define (vector-find-median elt< v knil :optional (mean arithmetic-mean))
188  (assume-type v <vector>)
189  (case (vector-length v)
190    [(0) knil]
191    [(1) (vector-ref v 0)]
192    [(2) (mean (vector-ref v 0) (vector-ref v 1))]
193    [else
194     => (^[len]
195          (if (odd? len)
196            (vector-select-1! elt< (vector-copy v) (ash len -1) 0 len)
197            (receive (a b) (vector-select-2! elt< (vector-copy v)
198                                             (- (ash len -1) 1) 0 len)
199              (mean a b))))]))
200
201;; srfi text reads we must leave v sorted.  without that condition,
202;; we could directly use vector-select-[12]!.
203(define (vector-find-median! elt< v knil :optional (mean arithmetic-mean))
204  (assume-type v <vector>)
205  (vector-sort! elt< v)
206  (let1 len (vector-length v)
207    (cond [(zero? len) knil]
208          [(odd? len) (vector-ref v (quotient len 2))]
209          [else (mean (vector-ref v (- (quotient len 2) 1))
210                      (vector-ref v (quotient len 2)))])))
211
212;; Default mean procedure
213(define (arithmetic-mean a b) (/ (+ a b) 2))
214
215;; We use our own random-source to avoid unexpected interference
216(define *random-source*
217  (rlet1 r (make-random-source)
218    (random-source-randomize! r)))
219(define %random-integer
220  (random-source-make-integers *random-source*))
221
222;; Rearrange elements of VEC between start and end, so that all elements
223;; smaller than the pivot are gathered at the front, followed
224;; by elements greater than the pivot.
225;;
226;;  #(G S P G S G G P S S)   ; S:smaller, P:pivot, G:greater
227;;
228;;  to:
229;;            a       b
230;;  #(S S S S G G G G X X)   ; X: don't care
231;;
232;; Returns a and b.
233;;
234;; In the implementation, we use typical two-index scan, where i moves from
235;; start to right, while j moves from the end to left.  Elements
236;; equal to pivot are removed, which is done by shrinking the region
237;; with moving end to left.
238;;
239;; Invariances:
240;;   vec[start] .. vec[i-1] are always smaller than the pivot
241;;   vec[k] .. vec[end-1]   are always greater than the pivot
242;;   start <= i <= k <= end
243;;   vec[end-1] is not equal to pivot (we make it so at the beginning)
244;;
245;;    i                   j E
246;;  #(S G S P G S P G S P G)     ; forward
247;;      i                 j E
248;;  #(S G S P G S P G S P G)     ; backward
249;;      i               j   E
250;;  #(S G S P G S P G S P G)     ; shrink v[j] = v[E-1]
251;;      i               j E
252;;  #(S G S P G S P G S G _)     ; backward
253;;      i             j   E
254;;  #(S G S P G S P G S G _)     ; swap  v[i] <=> v[j]
255;;      i             j   E
256;;  #(S S S P G S P G G G _)     ; forward
257;;        i           j   E
258;;  #(S S S P G S P G G G _)     ; forward
259;;          i         j   E
260;;  #(S S S P G S P G G G _)     ; forward
261;;          i         j   E
262;;  #(S S S P G S P G G G _)     ; shrink v[i] = v[E-1]
263;;          i         j E
264;;  #(S S S G G S P G G _ _)     ; backward
265;;          i       j   E
266;;  #(S S S G G S P G G _ _)     ; backward
267;;          i     j     E
268;;  #(S S S G G S P G G _ _)     ; shrink v[j] = v[E-1]
269;;          i     j   E
270;;  #(S S S G G S G G _ _ _)     ; backward
271;;          i   j     E
272;;  #(S S S G G S G G _ _ _)     ; swap  v[i] <=> v[j]
273;;          i   j     E
274;;  #(S S S S G G G G _ _ _)     ; forward
275;;            i j     E
276;;  #(S S S S G G G G _ _ _)     ; backward
277;;            ij      E
278;;  #(S S S S G G G G _ _ _)     ; end
279
280(define (partition-in-place! elt< pivot vec start end)
281  (define (forward i j end)
282    (cond [(> i j) (values i end)]
283          [(elt< (vector-ref vec i) pivot) (forward (+ i 1) j end)]
284          [(elt< pivot (vector-ref vec i)) (backward i j end)]
285          [else                         ;shrink
286           (vector-set! vec i (vector-ref vec (- end 1))) ; now v[i] > pivot
287           (if (= j (- end 1))
288             (adjust i (- j 1))
289             (backward i j (- end 1)))]))
290  (define (backward i j end)  ; v[i] > pivot
291    (cond [(>= i j) (values i end)]
292          [(elt< (vector-ref vec j) pivot)
293           (vector-swap! vec i j)
294           (forward (+ i 1) (- j 1) end)]
295          [(elt< pivot (vector-ref vec j)) (backward i (- j 1) end)]
296          [else ; shrink
297           (vector-set! vec j (vector-ref vec (- end 1)))
298           (backward i j (- end 1))]))
299  (define (adjust i end-1) ; keep invariance of v[end-1] > pivot.  v[i] > pivot
300    (cond [(> i end-1) (values i i)]
301          [(elt< pivot (vector-ref vec end-1))
302           (backward i end-1 (+ end-1 1))]
303          [(elt< (vector-ref vec end-1) pivot)
304           (vector-swap! vec i end-1)
305           (forward i end-1 (+ end-1 1))]
306          [else (adjust i (- end-1 1))]))
307  ;; We first scan from the end to satisfy the condition that v[end-1] > pivot.
308  (let init ([m (- end 1)])
309    (cond [(> start m) (values start start)]
310          [(elt< pivot (vector-ref vec m))
311           (forward start m (+ m 1))]
312          [(elt< (vector-ref vec m) pivot)
313           ;; We should find at least one element greater than pivot.
314           ;; Scanning vector back, keeping the invariance that
315           ;; elements not equal to the pivot is contained between [start,m]
316           (let init2 ([m m]
317                       [n (- m 1)])
318             (cond [(> start n) (values (+ m 1) (+ m 1))]
319                   [(elt< pivot (vector-ref vec n))
320                    (vector-swap! vec n m)
321                    (forward start m (+ m 1))]
322                   [(elt< (vector-ref vec n) pivot)
323                    (init2 m (- n 1))]
324                   [else
325                    (vector-set! vec n (vector-ref vec m))
326                    (init2 (- m 1) (- n 1))]))]
327          [else (init (- m 1))])))
328
329(define (vector-select-1! elt< vec k start end)
330  (let loop ([k k] [start start] [end end])
331    (define size (- end start))
332    (case size
333      [(1) (vector-ref vec start)] ; k must be 0
334      [(2) (let ([a (vector-ref vec start)]
335                 [b (vector-ref vec (+ start 1))])
336             (if (elt< a b)
337               (if (zero? k) a b)
338               (if (zero? k) b a)))]
339      [else
340       (let* ([ip (%random-integer size)]
341              [pivot (vector-ref vec (+ ip start))])
342         (receive (i j) (partition-in-place! elt< pivot vec start end)
343           (let1 nsmaller (- i start)
344             (if (< k nsmaller)
345               (loop k start i)
346               (let1 nsmaller-or-equal (+ nsmaller (- end j))
347                 (if (< k nsmaller-or-equal)
348                   pivot
349                   (loop (- k nsmaller-or-equal) i j)))))))])))
350
351;; precondition: (- end start) >= 2
352(define (vector-select-2! elt< vec k start end)
353  (let loop ([k k] [start start] [end end])
354    (define size (- end start))
355    (if (= size 2)
356      (let ([a (vector-ref vec start)]
357            [b (vector-ref vec (+ start 1))])
358        (if (elt< a b) (values a b) (values b a)))
359      (let* ([ip (%random-integer size)]
360             [pivot (vector-ref vec (+ ip start))])
361        (receive (i j) (partition-in-place! elt< pivot vec start end)
362          (let1 nsmaller (- i start)
363            (cond [(= (+ k 1) nsmaller)
364                   (values (vector-select-1! elt< vec k start i) pivot)]
365                  [(< k nsmaller) (loop k start i)]
366                  [else
367                   (let1 nsmaller-or-equal (+ nsmaller (- end j))
368                     (cond [(= (+ k 1) nsmaller-or-equal)
369                            (values pivot (vector-select-1! elt< vec 0 i j))]
370                           [(< k nsmaller-or-equal)
371                            (values pivot pivot)]
372                           [else (loop (- k nsmaller-or-equal) i j)]))])))))))
373
374;; Used by vector-separate!.
375(define (partition-in-place-full! elt< vec k start end)
376  (let loop ([k k] [start start] [end end])
377    (define size (- end start))
378    (case size
379      [(1)]
380      [(2) (when (and (>= k 1)
381                      (elt< (vector-ref vec (+ start 1))
382                            (vector-ref vec start)))
383             (vector-swap! vec start (+ start 1)))]
384      [else
385       (let* ([ip (%random-integer size)]
386              [pivot (vector-ref vec (+ ip start))])
387         (receive (i j) (partition-in-place! elt< pivot vec start end)
388           (let1 nsmaller (- i start)
389             (if (< k nsmaller)
390               (begin
391                 ;; recover pivot elements at the end.
392                 (dotimes (t (- end j)) (vector-set! vec (+ j t) pivot))
393                 (loop k start i))
394               (let1 nsmaller-or-equal (+ nsmaller (- end j))
395                 ;; recover pivot elements between smaller and greater elts.
396                 (dotimes (t (- j i))
397                   (vector-set! vec (- end t 1) (vector-ref vec (- j t 1))))
398                 (dotimes (t (- end j))
399                   (vector-set! vec (+ i t) pivot))
400                 (when (> k nsmaller-or-equal)
401                   (loop (- k nsmaller-or-equal)
402                         nsmaller-or-equal
403                         end)))))))])))
404