1 /* @(#)isort.c 1.20 19/04/03 J. Schilling from cdparanoia-III-alpha9.8 */
2 #include <schily/mconfig.h>
3 #ifndef lint
4 static UConst char sccsid[] =
5 "@(#)isort.c 1.20 19/04/03 J. Schilling from cdparanoia-III-alpha9.8";
6
7 #endif
8 /*
9 * CopyPolicy: GNU Lesser General Public License v2.1 applies
10 * Copyright (C) 1997-2001,2008 by Monty (xiphmont@mit.edu)
11 * Copyright (C) 2002-2019 by J. Schilling
12 *
13 * sorted vector abstraction for paranoia
14 *
15 */
16
17 /*
18 * Old isort got a bit complex. This re-constrains complexity to
19 * give a go at speed through a more alpha-6-like mechanism.
20 */
21
22 /*
23 * "Sort" is a bit of a misnomer in this implementation. It's actually
24 * basically a hash table of sample values (with a linked-list collision
25 * resolution), which lets you quickly determine where in a vector a
26 * particular sample value occurs.
27 *
28 * Collisions aren't due to hash collisions, as the table has one bucket
29 * for each possible sample value. Instead, the "collisions" represent
30 * multiple occurrences of a given value.
31 */
32
33 #include <schily/stdlib.h>
34 #include <schily/standard.h>
35 #include <schily/utypes.h>
36 #include <schily/string.h>
37 #include "p_block.h"
38 #include "isort.h"
39 #include "pmalloc.h"
40
41 EXPORT sort_info *sort_alloc __PR((long size));
42 EXPORT void sort_unsortall __PR((sort_info * i));
43 EXPORT void sort_free __PR((sort_info * i));
44 LOCAL void sort_sort __PR((sort_info * i,
45 long sortlo, long sorthi));
46 EXPORT void sort_setup __PR((sort_info * i,
47 Int16_t * vector,
48 long *abspos, long size,
49 long sortlo, long sorthi));
50 EXPORT sort_link *sort_getmatch __PR((sort_info * i,
51 long post, long overlap,
52 int value));
53 EXPORT sort_link *sort_nextmatch __PR((sort_info * i, sort_link * prev));
54
55
56 /*
57 * sort_alloc()
58 *
59 * Allocates and initializes a new, empty sort_info object, which can be
60 * used to index up to (size) samples from a vector.
61 */
62 EXPORT sort_info *
sort_alloc(size)63 sort_alloc(size)
64 long size;
65 {
66 sort_info *ret = _pcalloc(1, sizeof (sort_info));
67
68 DBG_MALLOC_MARK(ret);
69 ret->vector = NULL;
70 ret->sortbegin = -1;
71 ret->size = -1;
72 ret->maxsize = size;
73
74 ret->head = _pcalloc(65536, sizeof (sort_link *));
75 DBG_MALLOC_MARK(ret->head);
76 ret->bucketusage = _pmalloc(65536 * sizeof (long));
77 DBG_MALLOC_MARK(ret->bucketusage);
78 ret->revindex = _pcalloc(size, sizeof (sort_link));
79 DBG_MALLOC_MARK(ret->revindex);
80 ret->lastbucket = 0;
81
82 return (ret);
83 }
84
85 /*
86 * sort_unsortall() (internal)
87 *
88 * This function resets the index for further use with a different vector
89 * or range, without the overhead of an unnecessary free/alloc.
90 */
91 EXPORT void
sort_unsortall(i)92 sort_unsortall(i)
93 sort_info *i;
94 {
95 /*
96 * If there were few enough different samples encountered (and hence few
97 * enough buckets used), we can just zero out those buckets. If there
98 * were many (2000 is picked somewhat arbitrarily), it's faster simply to
99 * zero out all buckets with a memset() rather than walking the data
100 * structure and zeroing them out one by one.
101 */
102 if (i->lastbucket > 2000) { /* a guess */
103 memset(i->head, 0, 65536 * sizeof (sort_link *));
104 } else {
105 long b;
106
107 for (b = 0; b < i->lastbucket; b++)
108 i->head[i->bucketusage[b]] = NULL;
109 }
110
111 i->lastbucket = 0;
112 i->sortbegin = -1;
113
114 /*
115 * Curiously, this function preserves the vector association created
116 * by sort_setup(), but it is used only internally by sort_setup, so
117 * preserving this association is unnecessary.
118 */
119 }
120
121 /*
122 * sort_free()
123 *
124 * Releases all memory consumed by a sort_info object.
125 */
126 EXPORT void
sort_free(i)127 sort_free(i)
128 sort_info *i;
129 {
130 _pfree(i->revindex);
131 _pfree(i->head);
132 _pfree(i->bucketusage);
133 _pfree(i);
134 }
135
136 /*
137 * sort_sort() (internal)
138 *
139 * This function builds the index to allow for fast searching for sample
140 * values within a portion (sortlo - sorthi) of the object's associated
141 * vector. It is called internally and only when needed.
142 */
143 LOCAL void
sort_sort(i,sortlo,sorthi)144 sort_sort(i, sortlo, sorthi)
145 sort_info *i;
146 long sortlo;
147 long sorthi;
148 {
149 long j;
150
151 /*
152 * We walk backward through the range to index because we insert new
153 * samples at the head of each bucket's list. At the end, they'll be
154 * sorted from first to last occurrence.
155 */
156 for (j = sorthi - 1; j >= sortlo; j--) {
157 /*
158 * i->vector[j] = the signed 16-bit sample to index.
159 * hv = pointer to the head of the sorted list of occurences
160 * of this sample
161 * l = the node to associate with this sample
162 *
163 * We add 32768 to convert the signed 16-bit integer to an unsigned
164 * range from 0 to 65535.
165 *
166 * Note that l is located within i->revindex at a position
167 * corresponding to the sample's position in the vector. This allows
168 * ipos() to determine the sample position from a returned sort_link.
169 */
170 sort_link **hv = i->head + i->vector[j] + 32768;
171 sort_link *l = i->revindex + j;
172
173 /*
174 * If this is the first time we've encountered this sample, add its
175 * bucket to the list of buckets used. This list is used only for
176 * resetting the index quickly.
177 */
178 if (*hv == NULL) {
179 i->bucketusage[i->lastbucket] = i->vector[j] + 32768;
180 i->lastbucket++;
181 }
182 /*
183 * Point the new node at the old head, then assign the new node as
184 * the new head.
185 */
186 l->next = *hv;
187 *hv = l;
188 }
189 /*
190 * Mark the index as initialized.
191 */
192 i->sortbegin = 0;
193 }
194
195 /*
196 * sort_setup()
197 *
198 * This function initializes a previously allocated sort_info_t. The
199 * sort_info_t is associated with a vector of samples of length
200 * (size), whose position begins at (*abspos) within the CD's stream
201 * of samples. Only the range of samples between (sortlo, sorthi)
202 * will eventually be indexed for fast searching. (sortlo, sorthi)
203 * are absolute sample positions.
204 *
205 * Note: size *must* be <= i->maxsize given to the preceding sort_alloc(),
206 * but no error checking is done here.
207 */
208 EXPORT void
sort_setup(i,vector,abspos,size,sortlo,sorthi)209 sort_setup(i, vector, abspos, size, sortlo, sorthi)
210 sort_info *i;
211 Int16_t *vector;
212 long *abspos;
213 long size;
214 long sortlo;
215 long sorthi;
216 {
217 /*
218 * Reset the index if it has already been built.
219 */
220 if (i->sortbegin != -1)
221 sort_unsortall(i);
222
223 i->vector = vector;
224 i->size = size;
225 i->abspos = abspos;
226
227 /*
228 * Convert the absolute (sortlo, sorthi) to offsets within the vector.
229 * Note that the index will not be built until sort_getmatch() is called.
230 * Here we're simply hanging on to the range to index until then.
231 */
232 i->lo = min(size, max(sortlo - *abspos, 0));
233 i->hi = max(0, min(sorthi - *abspos, size));
234 }
235
236 /*
237 * sort_getmatch()
238 *
239 * This function returns a sort_link_t pointer which refers to the
240 * first sample equal to (value) in the vector. It only searches for
241 * hits within (overlap) samples of (post), where (post) is an offset
242 * within the vector. The caller can determine the position of the
243 * matched sample using ipos(sort_info *, sort_link *).
244 *
245 * This function returns NULL if no matches were found.
246 */
247 EXPORT sort_link *
sort_getmatch(i,post,overlap,value)248 sort_getmatch(i, post, overlap, value)
249 sort_info *i;
250 long post;
251 long overlap;
252 int value;
253 {
254 sort_link *ret;
255
256 /*
257 * If the vector hasn't been indexed yet, index it now.
258 */
259 if (i->sortbegin == -1)
260 sort_sort(i, i->lo, i->hi);
261 /*
262 * Now we reuse lo and hi
263 *
264 * We'll only return samples within (overlap) samples of (post).
265 * Clamp the boundaries to search to the boundaries of the array,
266 * convert the signed sample to an unsigned offset, and store the
267 * state so that future calls to sort_nextmatch do the right thing.
268 *
269 * Reusing lo and hi this way is awful.
270 */
271 post = max(0, min(i->size, post));
272 i->val = value + 32768;
273 i->lo = max(0, post - overlap); /* absolute position */
274 i->hi = min(i->size, post + overlap); /* absolute position */
275
276 /*
277 * Walk through the linked list of samples with this value, until
278 * we find the first one within the bounds specified. If there
279 * aren't any, return NULL.
280 */
281 ret = i->head[i->val];
282 while (ret) {
283 /*
284 * ipos() calculates the offset (in terms of the original vector)
285 * of this hit.
286 */
287 if (ipos(i, ret) < i->lo) {
288 ret = ret->next;
289 } else {
290 if (ipos(i, ret) >= i->hi)
291 ret = NULL;
292 break;
293 }
294 }
295 /* i->head[i->val]=ret; */
296 return (ret);
297 }
298
299 /*
300 * sort_nextmatch()
301 *
302 * This function returns a sort_link_t pointer which refers to the next sample
303 * matching the criteria previously passed to sort_getmatch(). See
304 * sort_getmatch() for details.
305 *
306 * This function returns NULL if no further matches were found.
307 */
308 EXPORT sort_link *
sort_nextmatch(i,prev)309 sort_nextmatch(i, prev)
310 sort_info *i;
311 sort_link *prev;
312 {
313 sort_link *ret = prev->next;
314
315 /*
316 * If there aren't any more hits, or we've passed the boundary requested
317 * of sort_getmatch(), we're done.
318 */
319 if (!ret || ipos(i, ret) >= i->hi)
320 return (NULL);
321 return (ret);
322 }
323