1;;; File   : sort.scm
2;;; Author : Richard A. O'Keefe (based on Prolog code by D.H.D.Warren)
3;;; Updated: 11 June 1991
4;;; Defines: sorted?, merge, merge!, sort, sort!
5
6;;; --------------------------------------------------------------------
7;   Many Scheme systems provide some kind of sorting functions.  They do
8;   not, however, always provide the _same_ sorting functions, and those
9;   that I have had the opportunity to test provided inefficient ones (a
10;   common blunder is to use quicksort which does not perform well).
11;   Because sort and sort! are not in the standard, there is very little
12;   agreement about what these functions look like.  For example, Dybvig
13;   says that Chez Scheme provides
14;	(merge predicate list1 list2)
15;	(merge! predicate list1 list2)
16;	(sort predicate list)
17;	(sort! predicate list),
18;   while the MIT Scheme 7.1 manual, following Common Lisp, offers
19;	(sort list predicate),
20;   TI PC Scheme offers
21;	(sort! list/vector predicate?)
22;   and Elk offers
23;	(sort list/vector predicate?)
24;	(sort! list/vector predicate?)
25;   Here is a comprehensive catalogue of the variations I have found.
26;   (1) Both sort and sort! may be provided.
27;   (2) sort may be provided without sort!
28;   (3) sort! may be provided without sort
29;   (4) Neither may be provided
30;   ---
31;   (5) The sequence argument may be either a list or a vector.
32;   (6) The sequence argument may only be a list.
33;   (7) The sequence argument may only be a vector.
34;   ---
35;   (8) The comparison function may be expected to behave like <
36;   (9) or it may be expected to behave like <=
37;   ---
38;   (10) The interface may be (sort predicate? sequence)
39;   (11) or (sort sequence predicate?)
40;   (12) or (sort sequence &optional (predicate? <))
41;   ---
42;   (13) The sort may be stable
43;   (14) or it may be unstable.
44;   ---
45;   All of this variation really does not help anybody.  A nice simple
46;   merge sort is both stable and fast (quite a lot faster than `quick'
47;   sort).
48;   I am providing this source code with no restrictions at all on its
49;   use (but please retain D.H.D.Warren's credit for the original idea).
50;   You may have to rename some of these functions in order to use them
51;   in a system which already provides incompatible or inferior sorts.
52;   For each of the functions, only the top-level define needs to be
53;   edited to do that.
54;   I could have given these functions names which would not clash with
55;   any Scheme that I know of, but I would like to encourage implementors
56;   to converge on a single interface, and this may serve as a hint.
57;   The argument order for all functions has been chosen to be as close
58;   to Common Lisp as made sense, in order to avoid NIH-itis.
59;
60;   Each of the five functions has a required *last* parameter which is
61;   a comparison function.  A comparison function f is a function of 2
62;   arguments which acts like <.  For example,
63;	(not (f x x))
64;	(and (f x y) (f y z)) => (f x z)
65;   The standard functions <, >, char<?, char>?, char-ci<?, char-ci>?,
66;   string<?, string>?, string-ci<?, and string-ci>? are suitable for
67;   use as comparison functions.  Think of (less? x y) as saying when
68;   x must *not* precede y.
69;
70;   (sorted? sequence less?)
71;	returns #t when the sequence argument is in non-decreasing order
72;	according to less? (that is, there is no adjacent pair ... x y ...
73;	for which (less? y x))
74;	returns #f when the sequence contains at least one out-of-order pair.
75;	It is an error if the sequence is neither a list nor a vector.
76;
77;   (merge list1 list2 less?)
78;	This merges two lists, producing a completely new list as result.
79;	I gave serious consideration to producing a Common-Lisp-compatible
80;	version.  However, Common Lisp's `sort' is our `sort!' (well, in
81;	fact Common Lisp's `stable-sort' is our `sort!', merge sort is
82;	*fast* as well as stable!) so adapting CL code to Scheme takes a
83;	bit of work anyway.  I did, however, appeal to CL to determine
84;	the *order* of the arguments.
85;
86;   (merge! list1 list2 less?)
87;	merges two lists, re-using the pairs of list1 and list2 to build
88;	the result.  If the code is compiled, and less? constructs no new
89;	pairs, no pairs at all will be allocated.  The first pair of the
90;	result will be either the first pair of list1 or the first pair
91;	of list2, but you can't predict which.
92;
93;	The code of merge and merge! could have been quite a bit simpler,
94;	but they have been coded to reduce the amount of work done per
95;	iteration.  (For example, we only have one null? test per iteration.)
96;
97;   (sort sequence less?)
98;	accepts either a list or a vector, and returns a new sequence which
99;	is sorted.  The new sequence is the same type as the input.  Always
100;	(sorted? (sort sequence less?) less?).
101;	The original sequence is not altered in any way.  The new sequence
102;	shares its _elements_ with the old one; no elements are copied.
103;
104;   (sort! sequence less?)
105;	returns its sorted result in the original boxes.  If the original
106;	sequence is a list, no new storage is allocated at all.  If the
107;	original sequence is a vector, the sorted elements are put back
108;	in the same vector.
109;
110;   Note that these functions do NOT accept a CL-style ":key" argument.
111;   A simple device for obtaining the same expressiveness is to define
112;   (define (keyed less? key) (lambda (x y) (less? (key x) (key y))))
113;   and then, when you would have written
114;	(sort a-sequence #'my-less :key #'my-key)
115;   in Common Lisp, just write
116;	(sort! a-sequence (keyed my-less? my-key))
117;   in Scheme.
118;;; --------------------------------------------------------------------
119
120
121;;; (sorted? sequence less?)
122;;; is true when sequence is a list (x0 x1 ... xm) or a vector #(x0 ... xm)
123;;; such that for all 1 <= i <= m,
124;;;	(not (less? (list-ref list i) (list-ref list (- i 1)))).
125
126(define (sorted? seq less?)
127    (cond
128	((null? seq)
129	    #t)
130	((vector? seq)
131	    (let ((n (vector-length seq)))
132		(if (<= n 1)
133		    #t
134		    (do ((i 1 (+ i 1)))
135			((or (= i n)
136			     (less? (vector-ref seq (- i 1))
137			     	    (vector-ref seq i)))
138			    (= i n)) )) ))
139	(else
140	    (let loop ((last (car seq)) (next (cdr seq)))
141		(or (null? next)
142		    (and (not (less? (car next) last))
143			 (loop (car next) (cdr next)) )) )) ))
144
145
146;;; (merge a b less?)
147;;; takes two lists a and b such that (sorted? a less?) and (sorted? b less?)
148;;; and returns a new list in which the elements of a and b have been stably
149;;; interleaved so that (sorted? (merge a b less?) less?).
150;;; Note:  this does _not_ accept vectors.  See below.
151
152(define (merge a b less?)
153    (cond
154	((null? a) b)
155	((null? b) a)
156	(else (let loop ((x (car a)) (a (cdr a)) (y (car b)) (b (cdr b)))
157	    ;; The loop handles the merging of non-empty lists.  It has
158	    ;; been written this way to save testing and car/cdring.
159	    (if (less? y x)
160		(if (null? b)
161		    (cons y (cons x a))
162		    (cons y (loop x a (car b) (cdr b)) ))
163		;; x <= y
164		(if (null? a)
165		    (cons x (cons y b))
166		    (cons x (loop (car a) (cdr a) y b)) )) )) ))
167
168
169;;; (merge! a b less?)
170;;; takes two sorted lists a and b and smashes their cdr fields to form a
171;;; single sorted list including the elements of both.
172;;; Note:  this does _not_ accept vectors.
173
174(define (merge! a b less?)
175    (define (loop r a b)
176	(if (less? (car b) (car a))
177	    (begin
178		(set-cdr! r b)
179		(if (null? (cdr b))
180		    (set-cdr! b a)
181		    (loop b a (cdr b)) ))
182	    ;; (car a) <= (car b)
183	    (begin
184		(set-cdr! r a)
185		(if (null? (cdr a))
186		    (set-cdr! a b)
187		    (loop a (cdr a) b)) )) )
188    (cond
189	((null? a) b)
190	((null? b) a)
191	((less? (car b) (car a))
192	    (if (null? (cdr b))
193		(set-cdr! b a)
194		(loop b a (cdr b)))
195	    b)
196	(else ; (car a) <= (car b)
197	    (if (null? (cdr a))
198		(set-cdr! a b)
199		(loop a (cdr a) b))
200	    a)))
201
202
203
204;;; (sort! sequence less?)
205;;; sorts the list or vector sequence destructively.  It uses a version
206;;; of merge-sort invented, to the best of my knowledge, by David H. D.
207;;; Warren, and first used in the DEC-10 Prolog system.  R. A. O'Keefe
208;;; adapted it to work destructively in Scheme.
209
210(define (sort! seq less?)
211    (define (step n)
212	(cond
213	    ((> n 2)
214		(let* ((j (quotient n 2))
215		       (a (step j))
216		       (k (- n j))
217		       (b (step k)))
218		    (merge! a b less?)))
219	    ((= n 2)
220		(let ((x (car seq))
221		      (y (cadr seq))
222		      (p seq))
223		    (set! seq (cddr seq))
224		    (if (less? y x) (begin
225			(set-car! p y)
226			(set-car! (cdr p) x)))
227		    (set-cdr! (cdr p) '())
228		    p))
229	    ((= n 1)
230		(let ((p seq))
231		    (set! seq (cdr seq))
232		    (set-cdr! p '())
233		    p))
234	    (else
235		'()) ))
236    (if (vector? seq)
237	(let ((n (vector-length seq)))
238	    (set! seq (vector->list seq))
239	    (do ((p (step n) (cdr p))
240		 (i 0 (+ i 1)))
241		((null? p) vector)
242		(vector-set! vector i (car p)) ))
243	;; otherwise, assume it is a list
244	(step (length seq)) ))
245
246
247;;; (sort sequence less?)
248;;; sorts a vector or list non-destructively.  It does this by sorting a
249;;; copy of the sequence.  My understanding is that the Standard says
250;;; that the result of append is always "newly allocated" except for
251;;; sharing structure with "the last argument", so (append x '()) ought
252;;; to be a standard way of copying a list x.
253
254(define (sort seq less?)
255    (if (vector? seq)
256	(list->vector (sort! (vector->list seq) less?))
257	(sort! (append seq '()) less?)))
258
259;;; eof
260