1 /*- 2 * Copyright (c) 1990 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Dan Bernstein at New York University. 7 * 8 * %sccs.include.redist.c% 9 */ 10 11 #if defined(LIBC_SCCS) && !defined(lint) 12 static char sccsid[] = "@(#)radixsort.c 5.12 (Berkeley) 01/14/92"; 13 #endif /* LIBC_SCCS and not lint */ 14 15 #include <sys/types.h> 16 #include <limits.h> 17 #include <stdlib.h> 18 #include <stddef.h> 19 #include <string.h> 20 #include <errno.h> 21 22 static void shellsort __P((const u_char **, int, int, const u_char *, int)); 23 24 /* 25 * __rspartition is the cutoff point for a further partitioning instead 26 * of a shellsort. If it changes check __rsshell_increments. Both of 27 * these are exported, as the best values are data dependent. 28 */ 29 #define NPARTITION 30 30 31 int __rspartition = NPARTITION; 32 int __rsshell_increments[] = { 4, 1, 0, 0, 0, 0, 0, 0 }; 33 34 /* 35 * Stackp points to context structures, where each structure schedules a 36 * partitioning. Radixsort exits when the stack is empty. 37 * 38 * If the buckets are placed on the stack randomly, the worst case is when 39 * all the buckets but one contain (__rspartition + 1) elements and the bucket 40 * pushed on the stack last contains the rest of the elements. In this case, 41 * stack growth is bounded by: 42 * 43 * limit = (nelements / (__rspartitions + 1)) - 1; 44 * 45 * This is a very large number, 102,261,125 for the maximum 32-bit signed 46 * integer if NPARTITION is 20. 47 * 48 * By forcing the largest bucket to be pushed on the stack first, the worst 49 * case is when all but two buckets each contain (__rspartition + 1) elements, 50 * with the remaining elements split equally between the first and last 51 * buckets pushed on the stack. In this case, stack growth is bounded by 52 * the recurrence relation: 53 * 54 * nelements_max[1] = (NBINS-1) * (__rspartition + 1); 55 * nelements_max[i] = (NBINS-3) * (__rspartition + 1) + 2 * npartitions[i-1]; 56 * which resolves to: 57 * nelements_max[i] = ((NBINS-2) * (2^i - 1) + 1) * (__rspartitions + 1), 58 * 59 * which yields, for a given nelements, 60 * 61 * i = ceil(log2((((nelements / (__rspartition + 1)) - 1) / (NBINS - 2)) + 1)); 62 * 63 * The bound is: 64 * 65 * limit = i * (NBINS - 1); 66 * 67 * This is a much smaller number, 4845 for the maximum 32-bit signed int. 68 */ 69 #define NBUCKETS (UCHAR_MAX + 1) 70 71 typedef struct _stack { 72 const u_char **bot; 73 int indx, nmemb; 74 } CONTEXT; 75 76 #define STACKPUSH { \ 77 stackp->bot = p; \ 78 stackp->nmemb = nmemb; \ 79 stackp->indx = indx; \ 80 ++stackp; \ 81 } 82 #define STACKPOP { \ 83 if (stackp == stack) \ 84 break; \ 85 --stackp; \ 86 bot = stackp->bot; \ 87 nmemb = stackp->nmemb; \ 88 indx = stackp->indx; \ 89 } 90 /* 91 * A variant of MSD radix sorting; see Knuth Vol. 3, page 177, and 5.2.5, 92 * Ex. 10 and 12. Also, "Three Partition Refinement Algorithms, Paige 93 * and Tarjan, SIAM J. Comput. Vol. 16, No. 6, December 1987. 94 * 95 * This uses a simple sort as soon as a bucket crosses a cutoff point, 96 * rather than sorting the entire list after partitioning is finished. 97 * This should be an advantage. 98 * 99 * This is pure MSD instead of LSD of some number of MSD, switching to 100 * the simple sort as soon as possible. Takes linear time relative to 101 * the number of bytes in the strings. 102 */ 103 int 104 radixsort(l1, nmemb, tab, endbyte) 105 const u_char **l1; 106 register int nmemb; 107 const u_char *tab; 108 u_int endbyte; 109 { 110 register int *cpos, *first, *last, i, indx; 111 register const u_char **bot, **l2, **p, *tr; 112 CONTEXT *stack, *stackp; 113 int c[NBUCKETS + 1], *max, *recd, t1, t2; 114 u_char ltab[NBUCKETS]; 115 116 if (nmemb <= 1) 117 return (0); 118 119 /* 120 * T1 is the constant part of the equation, the number of elements 121 * represented on the stack between the top and bottom entries. 122 */ 123 if (nmemb > __rspartition) 124 t1 = (nmemb / (__rspartition + 1) - 1) / (NBUCKETS - 2) + 1; 125 else 126 t1 = 0; 127 for (i = 0; t1; ++i) 128 t1 /= 2; 129 if (i) { 130 i *= NBUCKETS - 1; 131 if ((stack = stackp = malloc(i * sizeof(CONTEXT))) == NULL) 132 return (-1); 133 } 134 else 135 stack = stackp = NULL; 136 137 /* 138 * There are two arrays, l1 and l2. The data is sorted into the temp 139 * stack, and then copied back. The speedup of using the index to 140 * determine which stack the data is on and simply swapping stacks back 141 * and forth, thus avoiding the copy every iteration, turns out to not 142 * be any faster than the current implementation. 143 */ 144 if ((l2 = malloc(nmemb * sizeof(u_char **))) == NULL) 145 return (-1); 146 /* 147 * Tr references a table of sort weights; multiple entries may 148 * map to the same weight; EOS char must have the lowest weight. 149 */ 150 if (tab) { 151 tr = tab; 152 recd = c + tr[endbyte]; 153 if (recd != c && recd != c + NBUCKETS - 1) { 154 errno = EINVAL; 155 return (-1); 156 } 157 } 158 else { 159 for (t1 = 0, t2 = endbyte; t1 < t2; ++t1) 160 ltab[t1] = t1 + 1; 161 ltab[t2] = 0; 162 for (t1 = endbyte + 1; t1 < NBUCKETS; ++t1) 163 ltab[t1] = t1; 164 tr = ltab; 165 recd = c + tr[endbyte]; 166 } 167 last = c + NBUCKETS; 168 first = c; 169 endbyte = tr[endbyte]; 170 171 /* First sort is entire stack. */ 172 bot = l1; 173 indx = 0; 174 175 for (;;) { 176 /* Clear bucket count array */ 177 bzero(first, sizeof(c[0]) * (last - first + 1)); 178 *recd = 0; 179 180 /* 181 * Compute number of items that sort to the same bucket 182 * for this index. 183 */ 184 first = c + NBUCKETS - 1; 185 last = c; 186 for (p = bot, i = nmemb; --i >= 0;) { 187 ++*(cpos = c + tr[(*p++)[indx]]); 188 if (cpos > last && cpos != recd) 189 last = cpos; 190 if (cpos < first && cpos != recd) 191 first = cpos; 192 } 193 ++last; 194 195 /* 196 * Sum the number of characters into c, dividing the temp 197 * stack into the right number of buckets for this bucket, 198 * this index. C contains the cumulative total of keys 199 * before and included in this bucket, and will later be used 200 * as an index to the bucket. c[NBUCKETS] (or *last) contains 201 * the total number of elements, for determining how many 202 * elements the last bucket contains. At the same time 203 * find the largest bucket so it gets pushed first. 204 */ 205 t1 = (c == recd) ? c[0] : 0; 206 t2 = __rspartition; 207 for (i = last - (cpos = max = first); i-- >= 0;) { 208 if (*cpos > t2) { 209 t2 = *cpos; 210 max = cpos; 211 } 212 t1 = *cpos++ += t1; 213 } 214 if (c != recd) 215 *recd += t1; 216 217 /* 218 * Partition the elements into buckets; c decrements through 219 * the bucket, and ends up pointing to the first element of 220 * the bucket. 221 */ 222 for (i = nmemb; --i >= 0;) { 223 --p; 224 l2[--c[tr[(*p)[indx]]]] = *p; 225 } 226 227 /* Copy the partitioned elements back to user stack */ 228 bcopy(l2, bot, nmemb * sizeof(u_char *)); 229 230 ++indx; 231 /* 232 * Sort buckets as necessary; don't sort the EOS character 233 * bucket c[endbyte] since it is already sorted. *max is 234 * pushed first. 235 */ 236 for (i = last - (cpos = max); --i >= 0;) { 237 if ((nmemb = *(cpos+1) - (t1 = *cpos++)) < 2) 238 continue; 239 p = bot + t1; 240 if (nmemb > __rspartition) 241 STACKPUSH 242 else 243 shellsort(p, indx, nmemb, tr, endbyte); 244 } 245 for (i = max - (cpos = first); --i >= 0;) { 246 if ((nmemb = *(cpos + 1) - (t1 = *cpos++)) < 2) 247 continue; 248 p = bot + t1; 249 if (nmemb > __rspartition) 250 STACKPUSH 251 else 252 shellsort(p, indx, nmemb, tr, endbyte); 253 } 254 /* Break out when stack is empty. */ 255 STACKPOP 256 } 257 free(stack); 258 free(l2); 259 return (0); 260 } 261 262 /* 263 * Shellsort (diminishing increment sort) from Data Structures and 264 * Algorithms, Aho, Hopcraft and Ullman, 1983 Edition, page 290; 265 * see also Knuth Vol. 3, page 84. The increments are selected from 266 * formula (8), page 95. Roughly O(N^3/2). 267 */ 268 static void 269 shellsort(p, indx, nmemb, tr, recd) 270 register const u_char **p, *tr; 271 register int indx, nmemb, recd; 272 { 273 register const u_char *s1, *s2; 274 register u_char ch; 275 register int incr, *incrp, t1, t2; 276 277 for (incrp = __rsshell_increments; incr = *incrp++;) 278 for (t1 = incr; t1 < nmemb; ++t1) 279 for (t2 = t1 - incr; t2 >= 0;) { 280 s1 = p[t2] + indx; 281 s2 = p[t2 + incr] + indx; 282 while ((ch = tr[*s1++]) == tr[*s2] && 283 (ch != recd)) 284 ++s2; 285 if (ch > tr[*s2]) { 286 s1 = p[t2]; 287 p[t2] = p[t2 + incr]; 288 p[t2 + incr] = s1; 289 t2 -= incr; 290 } else 291 break; 292 } 293 } 294