1 /* Quicksort.
2 *
3 * Differs from stdlib's qsort() in that Easel provides a standardized
4 * and reentrant way to sort arbitrary data arrays/structures by
5 * sorting an array of unique integer identifiers.
6 *
7 * Contents:
8 * 1. Quicksort algorithm
9 * 2. Unit tests
10 * 3. Test driver
11 * 4. Example
12 */
13 #include "esl_config.h"
14
15 #include "easel.h"
16 #include "esl_quicksort.h"
17
18 static void partition(const void *data, int (*comparison)(const void *data, int o1, int o2), int *ord, int lo, int hi);
19
20
21 /*****************************************************************
22 * 1. Quicksort
23 *****************************************************************/
24
25 /* Function: esl_quicksort()
26 * Synopsis: Quicksort algorithm.
27 * Incept: SRE, Wed Nov 11 16:34:05 2015
28 *
29 * Purpose: <data> points to an array or structure that consists of
30 * <n> elements that we can identify with unique integer
31 * identifiers 0..<n-1>. Sort these elements, using a
32 * <*comparison()> function that returns -1, 0, or 1 when
33 * element <o1> should be sorted before, equal to, or after
34 * element <o2>. Caller provides an allocated array
35 * <sorted_at> to hold the result, allocated for at least
36 * <n> integers. The result in <sorted_at> consists of the
37 * ordered array of identifiers. For example,
38 * <sorted_at[0]> is the index of the best (first) element
39 * in the original <data> array, and <sorted_at[n-1]> is
40 * the id of the worst (last) element; <data[sorted_at[0]]>
41 * and <data[sorted_at[n-1]]> are the best and worst
42 * elements themselves. The original <data> array is
43 * unaltered.
44 *
45 * Compared to standard <qsort()>, the main advantage of
46 * <esl_qsort()> is that it is reentrant: it passes an
47 * arbitrary <data> pointer to the comparison function, so
48 * it can sort the elements of arbitrary structures or
49 * arrays without having to globally or statically expose
50 * those data. (A reentrant <qsort_r()> function is
51 * available in the libc of some platforms, but is not
52 * standard POSIX ANSI C, so we cannot rely on it.) The
53 * main disadvantage is that <esl_quicksort()> takes
54 * additional memory (the <sorted_at> array), whereas
55 * <qsort()> sorts in place.
56 *
57 * Args: data - generic pointer to the data to be sorted.
58 * n - <data> consists of <n> elements numbered 0..n-1
59 * comparison() - returns -1,0,1 if o1 is better, equal, worse than o2
60 * sorted_at - sorted array of the data[] idx's 0..n-1.
61 *
62 * Returns: <eslOK> on success.
63 */
64 int
esl_quicksort(const void * data,int n,int (* comparison)(const void * data,int o1,int o2),int * sorted_at)65 esl_quicksort(const void *data, int n, int (*comparison)(const void *data, int o1, int o2), int *sorted_at)
66 {
67 int i;
68 for (i = 0; i < n; i++) sorted_at[i] = i;
69 partition(data, comparison, sorted_at, 0, n-1);
70 return eslOK;
71 }
72
73
74
75 /*
76 * We're sorting a subrange of <ord>, from <ord[lo]..ord[hi]>
77 * Values in <ord> are unique identifiers for <data>.
78 * We're sorting <ord> by data[ord[i]] better than data[ord[i+1]]
79 */
80 static void
partition(const void * data,int (* comparison)(const void * data,int o1,int o2),int * ord,int lo,int hi)81 partition(const void *data, int (*comparison)(const void *data, int o1, int o2), int *ord, int lo, int hi)
82 {
83 int i = lo;
84 int j = hi+1;
85 int swap, pivot;
86
87 /* Select pivot position from lo, hi, lo + (hi-lo)/2 by median-of-three */
88 if (comparison(data, ord[hi], ord[lo]) < 0) { swap = ord[hi]; ord[hi] = ord[lo]; ord[hi] = swap; }
89 pivot = lo + (hi-lo)/2;
90 if (comparison(data, ord[pivot], ord[lo]) < 0) pivot = lo;
91 else if (comparison(data, ord[pivot], ord[hi]) > 0) pivot = hi;
92 swap = ord[pivot]; ord[pivot] = ord[lo]; ord[lo] = swap;
93
94 /* Partition */
95 while (1)
96 {
97 do { i++; } while (i <= hi && comparison(data, ord[i], ord[lo]) < 0);
98 do { j--; } while ( comparison(data, ord[j], ord[lo]) > 0);
99
100 if (j > i) { swap = ord[j]; ord[j] = ord[i]; ord[i] = swap; }
101 else break;
102 }
103 swap = ord[lo]; ord[lo] = ord[j]; ord[j] = swap;
104
105 /* Recurse, doing the smaller partition first */
106 if (j-lo < hi-j) {
107 if (j-lo > 1) partition(data, comparison, ord, lo, j-1);
108 if (hi-j > 1) partition(data, comparison, ord, j+1, hi);
109 } else {
110 if (hi-j > 1) partition(data, comparison, ord, j+1, hi);
111 if (j-lo > 1) partition(data, comparison, ord, lo, j-1);
112 }
113 }
114
115
116
117
118 /*****************************************************************
119 * 2. Unit tests
120 *****************************************************************/
121 #ifdef eslQUICKSORT_TESTDRIVE
122
123 #include "esl_random.h"
124
125 int
sort_floats_ascending(const void * data,int o1,int o2)126 sort_floats_ascending(const void *data, int o1, int o2)
127 {
128 float *x = (float *) data;
129 if (x[o1] < x[o2]) return -1;
130 if (x[o1] > x[o2]) return 1;
131 return 0;
132 }
133
134 static void
utest_floatsort(ESL_RANDOMNESS * rng,int N,int K)135 utest_floatsort(ESL_RANDOMNESS *rng, int N, int K)
136 {
137 char msg[] = "esl_quicksort: float sort test failed";
138 float *x = malloc(sizeof(float) * N);
139 int *sorted_at = malloc(sizeof(int) * N);
140 int i,r;
141
142 for (i = 0; i < N; i++)
143 x[i] = esl_random(rng) * K;
144 esl_quicksort(x, N, sort_floats_ascending, sorted_at);
145
146 for (r = 1; r < N; r++)
147 if (x[sorted_at[r]] < x[sorted_at[r-1]]) esl_fatal(msg);
148
149 free(x);
150 free(sorted_at);
151 }
152
153 #endif /*eslQUICKSORT_TESTDRIVE*/
154
155 /*****************************************************************
156 * 3. Test driver
157 *****************************************************************/
158 #ifdef eslQUICKSORT_TESTDRIVE
159
160 #include "easel.h"
161 #include "esl_getopts.h"
162 #include "esl_random.h"
163 #include "esl_quicksort.h"
164
165 #include <stdio.h>
166
167 static ESL_OPTIONS options[] = {
168 /* name type default env range togs reqs incomp help docgrp */
169 {"-h", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "show help and usage", 0},
170 {"-s", eslARG_INT, "0", NULL, NULL, NULL, NULL, NULL, "set random number seed to <n>", 0},
171
172 { 0,0,0,0,0,0,0,0,0,0},
173 };
174 static char usage[] = "[-options]";
175 static char banner[] = "test driver for quicksort module";
176
177 int
main(int argc,char ** argv)178 main(int argc, char **argv)
179 {
180 ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage);
181 ESL_RANDOMNESS *rng = esl_randomness_Create(esl_opt_GetInteger(go, "-s"));
182 int N = 100;
183 int K = 10;
184
185 utest_floatsort(rng, N, K);
186
187 esl_randomness_Destroy(rng);
188 esl_getopts_Destroy(go);
189 return 0;
190 }
191 #endif /*eslQUICKSORT_TESTDRIVE*/
192
193
194 /*****************************************************************
195 * 4. Example
196 *****************************************************************/
197 #ifdef eslQUICKSORT_EXAMPLE
198
199 #include "easel.h"
200 #include "esl_random.h"
201 #include "esl_quicksort.h"
202
203 int
sort_floats_descending(const void * data,int o1,int o2)204 sort_floats_descending(const void *data, int o1, int o2)
205 {
206 float *x = (float *) data;
207 if (x[o1] > x[o2]) return -1;
208 if (x[o1] < x[o2]) return 1;
209 return 0;
210 }
211
212 int
main(int argc,char ** argv)213 main(int argc, char **argv)
214 {
215 ESL_RANDOMNESS *rng = esl_randomness_Create(0);
216 int N = 100;
217 float K = 10.0;
218 float *x = malloc(sizeof(float) * N);
219 int *sorted_at = malloc(sizeof(int) * N);
220 int i,r;
221
222 for (i = 0; i < N; i++)
223 x[i] = esl_random(rng) * K;
224
225 esl_quicksort(x, N, sort_floats_descending, sorted_at);
226
227 for (r = 0; r < N; r++)
228 printf("%.4f\n", x[sorted_at[r]]);
229
230 return 0;
231 }
232
233 #endif /*eslQUICKSORT_EXAMPLE*/
234
235
236