1 /* @(#)isort.c	1.20 19/04/03 J. Schilling from cdparanoia-III-alpha9.8 */
2 #include <schily/mconfig.h>
3 #ifndef lint
4 static	UConst char sccsid[] =
5 "@(#)isort.c	1.20 19/04/03 J. Schilling from cdparanoia-III-alpha9.8";
6 
7 #endif
8 /*
9  * CopyPolicy: GNU Lesser General Public License v2.1 applies
10  * Copyright (C) 1997-2001,2008 by Monty (xiphmont@mit.edu)
11  * Copyright (C) 2002-2019 by J. Schilling
12  *
13  * sorted vector abstraction for paranoia
14  *
15  */
16 
17 /*
18  * Old isort got a bit complex.  This re-constrains complexity to
19  * give a go at speed through a more alpha-6-like mechanism.
20  */
21 
22 /*
23  * "Sort" is a bit of a misnomer in this implementation.  It's actually
24  * basically a hash table of sample values (with a linked-list collision
25  * resolution), which lets you quickly determine where in a vector a
26  * particular sample value occurs.
27  *
28  * Collisions aren't due to hash collisions, as the table has one bucket
29  * for each possible sample value.  Instead, the "collisions" represent
30  * multiple occurrences of a given value.
31  */
32 
33 #include <schily/stdlib.h>
34 #include <schily/standard.h>
35 #include <schily/utypes.h>
36 #include <schily/string.h>
37 #include "p_block.h"
38 #include "isort.h"
39 #include "pmalloc.h"
40 
41 EXPORT	sort_info	*sort_alloc	__PR((long size));
42 EXPORT	void		sort_unsortall	__PR((sort_info * i));
43 EXPORT	void		sort_free	__PR((sort_info * i));
44 LOCAL	void		sort_sort	__PR((sort_info * i,
45 						long sortlo, long sorthi));
46 EXPORT	void		sort_setup	__PR((sort_info * i,
47 						Int16_t * vector,
48 						long *abspos, long size,
49 						long sortlo, long sorthi));
50 EXPORT	sort_link	*sort_getmatch	__PR((sort_info * i,
51 						long post, long overlap,
52 						int value));
53 EXPORT	sort_link	*sort_nextmatch	__PR((sort_info * i, sort_link * prev));
54 
55 
56 /*
57  * sort_alloc()
58  *
59  * Allocates and initializes a new, empty sort_info object, which can be
60  * used to index up to (size) samples from a vector.
61  */
62 EXPORT sort_info *
sort_alloc(size)63 sort_alloc(size)
64 	long	size;
65 {
66 	sort_info	*ret = _pcalloc(1, sizeof (sort_info));
67 
68 	DBG_MALLOC_MARK(ret);
69 	ret->vector = NULL;
70 	ret->sortbegin = -1;
71 	ret->size = -1;
72 	ret->maxsize = size;
73 
74 	ret->head = _pcalloc(65536, sizeof (sort_link *));
75 	DBG_MALLOC_MARK(ret->head);
76 	ret->bucketusage = _pmalloc(65536 * sizeof (long));
77 	DBG_MALLOC_MARK(ret->bucketusage);
78 	ret->revindex = _pcalloc(size, sizeof (sort_link));
79 	DBG_MALLOC_MARK(ret->revindex);
80 	ret->lastbucket = 0;
81 
82 	return (ret);
83 }
84 
85 /*
86  * sort_unsortall() (internal)
87  *
88  * This function resets the index for further use with a different vector
89  * or range, without the overhead of an unnecessary free/alloc.
90  */
91 EXPORT void
sort_unsortall(i)92 sort_unsortall(i)
93 	sort_info	*i;
94 {
95 	/*
96 	 * If there were few enough different samples encountered (and hence few
97 	 * enough buckets used), we can just zero out those buckets.  If there
98 	 * were many (2000 is picked somewhat arbitrarily), it's faster simply to
99 	 * zero out all buckets with a memset() rather than walking the data
100 	 * structure and zeroing them out one by one.
101 	 */
102 	if (i->lastbucket > 2000) {	/* a guess */
103 		memset(i->head, 0, 65536 * sizeof (sort_link *));
104 	} else {
105 		long	b;
106 
107 		for (b = 0; b < i->lastbucket; b++)
108 			i->head[i->bucketusage[b]] = NULL;
109 	}
110 
111 	i->lastbucket = 0;
112 	i->sortbegin = -1;
113 
114 	/*
115 	 * Curiously, this function preserves the vector association created
116 	 * by sort_setup(), but it is used only internally by sort_setup, so
117 	 * preserving this association is unnecessary.
118 	 */
119 }
120 
121 /*
122  * sort_free()
123  *
124  * Releases all memory consumed by a sort_info object.
125  */
126 EXPORT void
sort_free(i)127 sort_free(i)
128 	sort_info	*i;
129 {
130 	_pfree(i->revindex);
131 	_pfree(i->head);
132 	_pfree(i->bucketusage);
133 	_pfree(i);
134 }
135 
136 /*
137  * sort_sort() (internal)
138  *
139  * This function builds the index to allow for fast searching for sample
140  * values within a portion (sortlo - sorthi) of the object's associated
141  * vector.  It is called internally and only when needed.
142  */
143 LOCAL void
sort_sort(i,sortlo,sorthi)144 sort_sort(i, sortlo, sorthi)
145 	sort_info	*i;
146 	long		sortlo;
147 	long		sorthi;
148 {
149 	long	j;
150 
151 	/*
152 	 * We walk backward through the range to index because we insert new
153 	 * samples at the head of each bucket's list.  At the end, they'll be
154 	 * sorted from first to last occurrence.
155 	 */
156 	for (j = sorthi - 1; j >= sortlo; j--) {
157 		/*
158 		 * i->vector[j] = the signed 16-bit sample to index.
159 		 * hv		= pointer to the head of the sorted list of occurences
160 		 *		  of this sample
161 		 * l		= the node to associate with this sample
162 		 *
163 		 * We add 32768 to convert the signed 16-bit integer to an unsigned
164 		 * range from 0 to 65535.
165 		 *
166 		 * Note that l is located within i->revindex at a position
167 		 * corresponding to the sample's position in the vector.  This allows
168 		 * ipos() to determine the sample position from a returned sort_link.
169 		 */
170 		sort_link	**hv = i->head + i->vector[j] + 32768;
171 		sort_link	 *l = i->revindex + j;
172 
173 		/*
174 		 * If this is the first time we've encountered this sample, add its
175 		 * bucket to the list of buckets used.  This list is used only for
176 		 * resetting the index quickly.
177 		 */
178 		if (*hv == NULL) {
179 			i->bucketusage[i->lastbucket] = i->vector[j] + 32768;
180 			i->lastbucket++;
181 		}
182 		/*
183 		 * Point the new node at the old head, then assign the new node as
184 		 * the new head.
185 		 */
186 		l->next = *hv;
187 		*hv = l;
188 	}
189 	/*
190 	 * Mark the index as initialized.
191 	 */
192 	i->sortbegin = 0;
193 }
194 
195 /*
196  * sort_setup()
197  *
198  * This function initializes a previously allocated sort_info_t.  The
199  * sort_info_t is associated with a vector of samples of length
200  * (size), whose position begins at (*abspos) within the CD's stream
201  * of samples.  Only the range of samples between (sortlo, sorthi)
202  * will eventually be indexed for fast searching.  (sortlo, sorthi)
203  * are absolute sample positions.
204  *
205  * Note: size *must* be <= i->maxsize given to the preceding sort_alloc(),
206  * but no error checking is done here.
207  */
208 EXPORT void
sort_setup(i,vector,abspos,size,sortlo,sorthi)209 sort_setup(i, vector, abspos, size, sortlo, sorthi)
210 	sort_info	*i;
211 	Int16_t		*vector;
212 	long		*abspos;
213 	long		size;
214 	long		sortlo;
215 	long		sorthi;
216 {
217 	/*
218 	 * Reset the index if it has already been built.
219 	 */
220 	if (i->sortbegin != -1)
221 		sort_unsortall(i);
222 
223 	i->vector = vector;
224 	i->size = size;
225 	i->abspos = abspos;
226 
227 	/*
228 	 * Convert the absolute (sortlo, sorthi) to offsets within the vector.
229 	 * Note that the index will not be built until sort_getmatch() is called.
230 	 * Here we're simply hanging on to the range to index until then.
231 	 */
232 	i->lo = min(size, max(sortlo - *abspos, 0));
233 	i->hi = max(0, min(sorthi - *abspos, size));
234 }
235 
236 /*
237  * sort_getmatch()
238  *
239  * This function returns a sort_link_t pointer which refers to the
240  * first sample equal to (value) in the vector.  It only searches for
241  * hits within (overlap) samples of (post), where (post) is an offset
242  * within the vector.  The caller can determine the position of the
243  * matched sample using ipos(sort_info *, sort_link *).
244  *
245  * This function returns NULL if no matches were found.
246  */
247 EXPORT sort_link *
sort_getmatch(i,post,overlap,value)248 sort_getmatch(i, post, overlap, value)
249 	sort_info	*i;
250 	long		post;
251 	long		overlap;
252 	int		value;
253 {
254 	sort_link	*ret;
255 
256 	/*
257 	 * If the vector hasn't been indexed yet, index it now.
258 	 */
259 	if (i->sortbegin == -1)
260 		sort_sort(i, i->lo, i->hi);
261 	/*
262 	 * Now we reuse lo and hi
263 	 *
264 	 * We'll only return samples within (overlap) samples of (post).
265 	 * Clamp the boundaries to search to the boundaries of the array,
266 	 * convert the signed sample to an unsigned offset, and store the
267 	 * state so that future calls to sort_nextmatch do the right thing.
268 	 *
269 	 * Reusing lo and hi this way is awful.
270 	 */
271 	post = max(0, min(i->size, post));
272 	i->val = value + 32768;
273 	i->lo = max(0, post - overlap);		/* absolute position */
274 	i->hi = min(i->size, post + overlap);	/* absolute position */
275 
276 	/*
277 	 * Walk through the linked list of samples with this value, until
278 	 * we find the first one within the bounds specified.  If there
279 	 * aren't any, return NULL.
280 	 */
281 	ret = i->head[i->val];
282 	while (ret) {
283 		/*
284 		 * ipos() calculates the offset (in terms of the original vector)
285 		 * of this hit.
286 		 */
287 		if (ipos(i, ret) < i->lo) {
288 			ret = ret->next;
289 		} else {
290 			if (ipos(i, ret) >= i->hi)
291 				ret = NULL;
292 			break;
293 		}
294 	}
295 /*	i->head[i->val]=ret; */
296 	return (ret);
297 }
298 
299 /*
300  * sort_nextmatch()
301  *
302  * This function returns a sort_link_t pointer which refers to the next sample
303  * matching the criteria previously passed to sort_getmatch().  See
304  * sort_getmatch() for details.
305  *
306  * This function returns NULL if no further matches were found.
307  */
308 EXPORT sort_link *
sort_nextmatch(i,prev)309 sort_nextmatch(i, prev)
310 	sort_info	*i;
311 	sort_link	*prev;
312 {
313 	sort_link	*ret = prev->next;
314 
315 	/*
316 	 * If there aren't any more hits, or we've passed the boundary requested
317 	 * of sort_getmatch(), we're done.
318 	 */
319 	if (!ret || ipos(i, ret) >= i->hi)
320 		return (NULL);
321 	return (ret);
322 }
323