1 /* @(#)gap.c	1.18 09/07/11 J. Schilling from cdparanoia-III-alpha9.8 */
2 #include <schily/mconfig.h>
3 #ifndef lint
4 static	UConst char sccsid[] =
5 "@(#)gap.c	1.18 09/07/11 J. Schilling from cdparanoia-III-alpha9.8";
6 
7 #endif
8 /*
9  * CopyPolicy: GNU Lesser General Public License v2.1 applies
10  * Copyright (C) 1997-2001,2008 by Monty (xiphmont@mit.edu)
11  * Copyright (C) 2002-2009 by J. Schilling
12  *
13  * Gap analysis support code for paranoia
14  *
15  */
16 
17 #include <schily/standard.h>
18 #include <schily/utypes.h>
19 #include <schily/string.h>
20 #include "p_block.h"
21 #include "cdda_paranoia.h"
22 #include "gap.h"
23 
24 EXPORT long	i_paranoia_overlap_r	__PR((Int16_t * buffA, Int16_t * buffB,
25 						long offsetA, long offsetB));
26 EXPORT long	i_paranoia_overlap_f	__PR((Int16_t * buffA, Int16_t * buffB,
27 						long offsetA, long offsetB,
28 						long sizeA, long sizeB));
29 EXPORT int	i_stutter_or_gap	__PR((Int16_t * A, Int16_t * B,
30 						long offA, long offB,
31 						long gap));
32 EXPORT void	i_analyze_rift_f	__PR((Int16_t * A, Int16_t * B,
33 						long sizeA, long sizeB,
34 						long aoffset, long boffset,
35 						long *matchA, long *matchB,
36 						long *matchC));
37 EXPORT void	i_analyze_rift_r	__PR((Int16_t * A, Int16_t * B,
38 						long sizeA, long sizeB,
39 						long aoffset, long boffset,
40 						long *matchA, long *matchB,
41 						long *matchC));
42 
43 EXPORT void	analyze_rift_silence_f	__PR((Int16_t * A, Int16_t * B,
44 						long sizeA, long sizeB,
45 						long aoffset, long boffset,
46 						long *matchA, long *matchB));
47 
48 /*
49  * Gap analysis code
50  */
51 
52 /*
53  * i_paranoia_overlap_r (internal)
54  *
55  * This function seeks backward through two vectors (starting at the given
56  * offsets) to determine how many consecutive samples agree.  It returns
57  * the number of matching samples, which may be 0.
58  *
59  * Unlike its sibling, i_paranoia_overlap_f, this function doesn't need to
60  * be given the size of the vectors (all vectors stop at offset 0).
61  *
62  * This function is used by i_analyze_rift_r() below to find where a
63  * leading rift ends.
64  */
65 EXPORT long
i_paranoia_overlap_r(buffA,buffB,offsetA,offsetB)66 i_paranoia_overlap_r(buffA, buffB, offsetA, offsetB)
67 	Int16_t	*buffA;
68 	Int16_t	*buffB;
69 	long	offsetA;
70 	long	offsetB;
71 {
72 	long		beginA = offsetA;
73 	long		beginB = offsetB;
74 
75 	/*
76 	 * Start at the given offsets and work our way backwards until we hit
77 	 * the beginning of one of the vectors.
78 	 */
79 	for (; beginA >= 0 && beginB >= 0; beginA--, beginB--)
80 		if (buffA[beginA] != buffB[beginB])
81 			break;
82 
83 	return (offsetA - beginA);
84 }
85 
86 /*
87  * i_paranoia_overlap_f (internal)
88  *
89  * This function seeks forward through two vectors (starting at the given
90  * offsets) to determine how many consecutive samples agree.  It returns
91  * the number of matching samples, which may be 0.
92  *
93  * Unlike its sibling, i_paranoia_overlap_r, this function needs to given
94  * the size of the vectors.
95  *
96  * This function is used by i_analyze_rift_f() below to find where a
97  * trailing rift ends.
98  */
99 EXPORT long
i_paranoia_overlap_f(buffA,buffB,offsetA,offsetB,sizeA,sizeB)100 i_paranoia_overlap_f(buffA, buffB, offsetA, offsetB, sizeA, sizeB)
101 	Int16_t	*buffA;
102 	Int16_t	*buffB;
103 	long	offsetA;
104 	long	offsetB;
105 	long	sizeA;
106 	long	sizeB;
107 {
108 	long		endA = offsetA;
109 	long		endB = offsetB;
110 
111 	/*
112 	 * Start at the given offsets and work our way forward until we hit
113 	 * the end of one of the vectors.
114 	 */
115 	for (; endA < sizeA && endB < sizeB; endA++, endB++)
116 		if (buffA[endA] != buffB[endB])
117 			break;
118 
119 	return (endA - offsetA);
120 }
121 
122 /*
123  * i_stutter_or_gap (internal)
124  *
125  * This function compares (gap) samples of two vectors at the given offsets.
126  * It returns 0 if all the samples are identical, or nonzero if they differ.
127  *
128  * This is used by i_analyze_rift_[rf] below to determine whether a rift
129  * contains samples dropped by the other vector (that should be inserted),
130  * or whether the rift contains a stutter (that should be dropped).  See
131  * i_analyze_rift_[rf] for more details.
132  */
133 EXPORT int
i_stutter_or_gap(A,B,offA,offB,gap)134 i_stutter_or_gap(A, B, offA, offB, gap)
135 	Int16_t	*A;
136 	Int16_t	*B;
137 	long	offA;
138 	long	offB;
139 	long	gap;
140 {
141 	long		a1 = offA;
142 	long		b1 = offB;
143 
144 	/* BEGIN CSTYLED */
145 	/*
146 	 * If the rift was so big that there aren't enough samples in the other
147 	 * vector to compare against the full gap, then just compare what we
148 	 * have available.  E.g.:
149 	 *
150 	 *            (5678)|(newly matching run ...)
151 	 *    (... 12345678)| (345678) |(newly matching run ...)
152 	 *
153 	 * In this case, a1 would be -2, since we'd want to compare 6 samples
154 	 * against a vector that had only 4.  So we start 2 samples later, and
155 	 * compare the 4 available samples.
156 	 *
157 	 * Again, this approach to identifying stutters is simply a heuristic,
158 	 * so this may not produce correct results in all cases.
159 	 */
160 	/* END CSTYLED */
161 	if (a1 < 0) {
162 		/*
163 		 * Note that a1 is negative, so we're increasing b1 and decreasing (gap).
164 		 */
165 		b1 -= a1;
166 		gap += a1;
167 		a1 = 0;
168 	}
169 
170 	/*
171 	 * Note that we don't have an equivalent adjustment for leading rifts.
172 	 * Thus, it's possible for the following memcmp() to run off the end
173 	 * of A.  See the bug note in i_analyze_rift_r().
174 	 *
175 	 * Multiply gap by 2 because samples are 2 bytes long and memcmp compares
176 	 * at the byte level.
177 	 */
178 	return (memcmp(A + a1, B + b1, gap * 2));
179 }
180 
181 /*
182  * riftv is the first value into the rift -> or <-
183  *
184  * i_analyze_rift_f (internal)
185  *
186  * This function examines a trailing rift to see how far forward the rift goes
187  * and to determine what kind of rift it is.  This function is called by
188  * i_stage2_each() when a trailing rift is detected.  (aoffset,boffset) are
189  * the offsets into (A,B) of the first mismatching sample.
190  *
191  * This function returns:
192  *  matchA  > 0 if there are (matchA) samples missing from A
193  *  matchA  < 0 if there are (-matchA) duplicate samples (stuttering) in A
194  *  matchB  > 0 if there are (matchB) samples missing from B
195  *  matchB  < 0 if there are (-matchB) duplicate samples in B
196  *  matchC != 0 if there are (matchC) samples of garbage, after which
197  *              both A and B are in sync again
198  */
199 EXPORT void
i_analyze_rift_f(A,B,sizeA,sizeB,aoffset,boffset,matchA,matchB,matchC)200 i_analyze_rift_f(A, B, sizeA, sizeB, aoffset, boffset, matchA, matchB, matchC)
201 	Int16_t	*A;
202 	Int16_t	*B;
203 	long	sizeA;
204 	long	sizeB;
205 	long	aoffset;
206 	long	boffset;
207 	long	*matchA;
208 	long	*matchB;
209 	long	*matchC;
210 {
211 
212 	long		apast = sizeA - aoffset;
213 	long		bpast = sizeB - boffset;
214 	long		i;
215 
216 	*matchA = 0, *matchB = 0, *matchC = 0;
217 
218 	/* BEGIN CSTYLED */
219 	/*
220 	 * Look forward to see where we regain agreement between vectors
221 	 * A and B (of at least MIN_WORDS_RIFT samples).  We look for one of
222 	 * the following possible matches:
223 	 *
224 	 *                         edge
225 	 *                          v
226 	 * (1)  (... A matching run)|(aoffset matches ...)
227 	 *      (... B matching run)| (rift) |(boffset+i matches ...)
228 	 *
229 	 * (2)  (... A matching run)| (rift) |(aoffset+i matches ...)
230 	 *      (... B matching run)|(boffset matches ...)
231 	 *
232 	 * (3)  (... A matching run)| (rift) |(aoffset+i matches ...)
233 	 *      (... B matching run)| (rift) |(boffset+i matches ...)
234 	 *
235 	 * Anything that doesn't match one of these three is too corrupt to
236 	 * for us to recover from.  E.g.:
237 	 *
238 	 *      (... A matching run)| (rift) |(eventual match ...)
239 	 *      (... B matching run)| (big rift) |(eventual match ...)
240 	 *
241 	 * We won't find the eventual match, since we wouldn't be sure how
242 	 * to fix the rift.
243 	 */
244 	/* END CSTYLED */
245 	for (i = 0; ; i++) {
246 		/*
247 		 * Search for whatever case we hit first, so as to end up with the
248 		 * smallest rift.
249 		 */
250 		if (i < bpast) { /* Don't search for (1) past the end of B */
251 			/*
252 			 * See if we match case (1) above, which either means that A dropped
253 			 * samples at the rift, or that B stuttered.
254 			 */
255 			if (i_paranoia_overlap_f(A, B, aoffset, boffset + i, sizeA, sizeB) >= MIN_WORDS_RIFT) {
256 				*matchA = i;
257 				break;
258 			}
259 		}
260 		if (i < apast) { /* Don't search for (2) or (3) past the beginning of A */
261 			/*
262 			 * See if we match case (2) above, which either means that B dropped
263 			 * samples at the rift, or that A stuttered.
264 			 */
265 			if (i_paranoia_overlap_f(A, B, aoffset + i, boffset, sizeA, sizeB) >= MIN_WORDS_RIFT) {
266 				*matchB = i;
267 				break;
268 			}
269 			if (i < bpast)	/* Don't search for (3) past the end of B */
270 				/*
271 				 * See if we match case (3) above, which means that a fixed-length
272 				 * rift of samples is getting read unreliably.
273 				 */
274 				if (i_paranoia_overlap_f(A, B, aoffset + i, boffset + i, sizeA, sizeB) >= MIN_WORDS_RIFT) {
275 					*matchC = i;
276 					break;
277 				}
278 		} else if (i >= bpast) {
279 			/*
280 			 * Stop searching when we've reached the end of both vectors.
281 			 * In theory we could stop when there aren't MIN_WORDS_RIFT samples
282 			 * left in both vectors, but this case should happen fairly rarely.
283 			 */
284 			break;
285 		}
286 	}
287 
288 	if (*matchA == 0 && *matchB == 0 && *matchC == 0)
289 		return;
290 
291 	if (*matchC)
292 		return;
293 
294 	/* BEGIN CSTYLED */
295 	/*
296 	 * For case (1) or (2), we need to determine whether the rift contains
297 	 * samples dropped by the other vector (that should be inserted), or
298 	 * whether the rift contains a stutter (that should be dropped).  To
299 	 * distinguish, we check the contents of the rift against the good samples
300 	 * just before the rift.  If the contents match, then the rift contains
301 	 * a stutter.
302 	 *
303 	 * A stutter in the second vector:
304 	 *     (...good samples... 1234)|(567 ...newly matched run...)
305 	 *     (...good samples... 1234)| (1234) | (567 ...newly matched run)
306 	 *
307 	 * Samples missing from the first vector:
308 	 *     (...good samples... 1234)|(901 ...newly matched run...)
309 	 *     (...good samples... 1234)| (5678) |(901 ...newly matched run...)
310 	 *
311 	 * Of course, there's no theoretical guarantee that a non-stutter
312 	 * truly represents missing samples, but given that we're dealing with
313 	 * verified fragments in stage 2, we can have some confidence that this
314 	 * is the case.
315 	 */
316 	/* END CSTYLED */
317 	if (*matchA) {
318 		/*
319 		 * For case (1), we need to determine whether A dropped samples at the
320 		 * rift or whether B stuttered.
321 		 *
322 		 * If the rift doesn't match the good samples in A (and hence in B),
323 		 * it's not a stutter, and the rift should be inserted into A.
324 		 */
325 		if (i_stutter_or_gap(A, B, aoffset - *matchA, boffset, *matchA))
326 			return;
327 		*matchB = -*matchA;	/* signify we need to remove n bytes */
328 					/* from B */
329 		*matchA = 0;
330 		return;
331 	} else {
332 		/*
333 		 * Case (2) is the inverse of case (1) above.
334 		 */
335 		if (i_stutter_or_gap(B, A, boffset - *matchB, aoffset, *matchB))
336 			return;
337 		*matchA = -*matchB;
338 		*matchB = 0;
339 		return;
340 	}
341 }
342 
343 /*
344  * riftv must be first even val of rift moving back
345  *
346  * i_analyze_rift_r (internal)
347  *
348  * This function examines a leading rift to see how far back the rift goes
349  * and to determine what kind of rift it is.  This function is called by
350  * i_stage2_each() when a leading rift is detected.  (aoffset,boffset) are
351  * the offsets into (A,B) of the first mismatching sample.
352  *
353  * This function returns:
354  *  matchA  > 0 if there are (matchA) samples missing from A
355  *  matchA  < 0 if there are (-matchA) duplicate samples (stuttering) in A
356  *  matchB  > 0 if there are (matchB) samples missing from B
357  *  matchB  < 0 if there are (-matchB) duplicate samples in B
358  *  matchC != 0 if there are (matchC) samples of garbage, after which
359  *              both A and B are in sync again
360  */
361 EXPORT void
i_analyze_rift_r(A,B,sizeA,sizeB,aoffset,boffset,matchA,matchB,matchC)362 i_analyze_rift_r(A, B, sizeA, sizeB, aoffset, boffset, matchA, matchB, matchC)
363 	Int16_t	*A;
364 	Int16_t	*B;
365 	long	sizeA;
366 	long	sizeB;
367 	long	aoffset;
368 	long	boffset;
369 	long	*matchA;
370 	long	*matchB;
371 	long	*matchC;
372 {
373 
374 	long		apast = aoffset + 1;
375 	long		bpast = boffset + 1;
376 	long		i;
377 
378 	*matchA = 0, *matchB = 0, *matchC = 0;
379 
380 	/* BEGIN CSTYLED */
381 	/*
382 	 * Look backward to see where we regain agreement between vectors
383 	 * A and B (of at least MIN_WORDS_RIFT samples).  We look for one of
384 	 * the following possible matches:
385 	 *
386 	 *                                    edge
387 	 *                                      v
388 	 * (1)             (... aoffset matches)|(A matching run ...)
389 	 *      (... boffset-i matches)| (rift) |(B matching run ...)
390 	 *
391 	 * (2)  (... aoffset-i matches)| (rift) |(A matching run ...)
392 	 *                (... boffset matches)|(B matching run ...)
393 	 *
394 	 * (3)  (... aoffset-i matches)| (rift) |(A matching run ...)
395 	 *      (... boffset-i matches)| (rift) |(B matching run ...)
396 	 *
397 	 * Anything that doesn't match one of these three is too corrupt to
398 	 * for us to recover from.  E.g.:
399 	 *
400 	 *         (... eventual match)| (rift) |(A matching run ...)
401 	 *    (... eventual match) | (big rift) |(B matching run ...)
402 	 *
403 	 * We won't find the eventual match, since we wouldn't be sure how
404 	 * to fix the rift.
405 	 */
406 	/* END CSTYLED */
407 	for (i = 0; ; i++) {
408 		/*
409 		 * Search for whatever case we hit first, so as to end up with the
410 		 * smallest rift.
411 		 */
412 		if (i < bpast) { /* Don't search for (1) past the beginning of B */
413 			/*
414 			 * See if we match case (1) above, which either means that A dropped
415 			 * samples at the rift, or that B stuttered.
416 			 */
417 			if (i_paranoia_overlap_r(A, B, aoffset, boffset - i) >= MIN_WORDS_RIFT) {
418 				*matchA = i;
419 				break;
420 			}
421 		}
422 		if (i < apast) { /* Don't search for (2) or (3) past the beginning of A */
423 			/*
424 			 * See if we match case (2) above, which either means that B dropped
425 			 * samples at the rift, or that A stuttered.
426 			 */
427 			if (i_paranoia_overlap_r(A, B, aoffset - i, boffset) >= MIN_WORDS_RIFT) {
428 				*matchB = i;
429 				break;
430 			}
431 			if (i < bpast) { /* Don't search for (3) past the beginning of B */
432 				/*
433 				 * See if we match case (3) above, which means that a fixed-length
434 				 * rift of samples is getting read unreliably.
435 				 */
436 				if (i_paranoia_overlap_r(A, B, aoffset - i, boffset - i) >= MIN_WORDS_RIFT) {
437 					*matchC = i;
438 					break;
439 				}
440 			}
441 		} else if (i >= bpast) {
442 			/*
443 			 * Stop searching when we've reached the end of both vectors.
444 			 * In theory we could stop when there aren't MIN_WORDS_RIFT samples
445 			 * left in both vectors, but this case should happen fairly rarely.
446 			 */
447 			break;
448 		}
449 		/*
450 		 * Try the search again with a larger tentative rift.
451 		 */
452 	}
453 
454 	if (*matchA == 0 && *matchB == 0 && *matchC == 0)
455 		return;
456 
457 	if (*matchC)
458 		return;
459 
460 	/* BEGIN CSTYLED */
461 	/*
462 	 * For case (1) or (2), we need to determine whether the rift contains
463 	 * samples dropped by the other vector (that should be inserted), or
464 	 * whether the rift contains a stutter (that should be dropped).  To
465 	 * distinguish, we check the contents of the rift against the good samples
466 	 * just after the rift.  If the contents match, then the rift contains
467 	 * a stutter.
468 	 *
469 	 * A stutter in the second vector:
470 	 *              (...newly matched run... 234)|(5678 ...good samples...)
471 	 *     (...newly matched run... 234)| (5678) |(5678 ...good samples...)
472 	 *
473 	 * Samples missing from the first vector:
474 	 *              (...newly matched run... 890)|(5678 ...good samples...)
475 	 *     (...newly matched run... 890)| (1234) |(5678 ...good samples...)
476 	 *
477 	 * Of course, there's no theoretical guarantee that a non-stutter
478 	 * truly represents missing samples, but given that we're dealing with
479 	 * verified fragments in stage 2, we can have some confidence that this
480 	 * is the case.
481 	 */
482 	/* END CSTYLED */
483 	if (*matchA) {
484 		/*
485 		 * For case (1), we need to determine whether A dropped samples at the
486 		 * rift or whether B stuttered.
487 		 *
488 		 * If the rift doesn't match the good samples in A (and hence in B),
489 		 * it's not a stutter, and the rift should be inserted into A.
490 		 *
491 		 * ???BUG??? It's possible for aoffset+1+*matchA to be > sizeA, in
492 		 * which case the comparison in i_stutter_or_gap() will extend beyond
493 		 * the bounds of A.  Thankfully, this isn't writing data and thus
494 		 * trampling memory, but it's still a memory access error that should
495 		 * be fixed.
496 		 *
497 		 * This bug is not fixed yet.
498 		 */
499 		if (i_stutter_or_gap(A, B, aoffset + 1, boffset - *matchA + 1, *matchA))
500 			return;
501 
502 		/*
503 		 * It is a stutter, so we need to signal that we need to remove
504 		 * (matchA) bytes from B.
505 		 */
506 		*matchB = -*matchA;	/* signify we need to remove n bytes */
507 					/* from B */
508 		*matchA = 0;
509 		return;
510 	} else {
511 		/*
512 		 * Case (2) is the inverse of case (1) above.
513 		 */
514 		if (i_stutter_or_gap(B, A, boffset + 1, aoffset - *matchB + 1, *matchB))
515 			return;
516 		*matchA = -*matchB;
517 		*matchB = 0;
518 		return;
519 	}
520 }
521 
522 /*
523  * analyze_rift_silence_f (internal)
524  *
525  * This function examines the fragment and root from the rift onward to
526  * see if they have a rift's worth of silence (or if they end with silence).
527  * It sets (*matchA) to -1 if A's rift is silence, (*matchB) to -1 if B's
528  * rift is silence, and sets them to 0 otherwise.
529  *
530  * Note that, unlike every other function in cdparanoia, this function
531  * considers any repeated value to be silence (which, in effect, it is).
532  * All other functions only consider repeated zeroes to be silence.
533  *
534  * This function is called by i_stage2_each() if it runs into a trailing rift
535  * that i_analyze_rift_f couldn't diagnose.  This checks for another variant:
536  * where one vector has silence and the other doesn't.  We then assume
537  * that the silence (and anything following it) is garbage.
538  *
539  * Note that while this function checks both A and B for silence, the caller
540  * assumes that only one or the other has silence.
541  */
542 EXPORT void
analyze_rift_silence_f(A,B,sizeA,sizeB,aoffset,boffset,matchA,matchB)543 analyze_rift_silence_f(A, B, sizeA, sizeB, aoffset, boffset, matchA, matchB)
544 	Int16_t	*A;
545 	Int16_t	*B;
546 	long	sizeA;
547 	long	sizeB;
548 	long	aoffset;
549 	long	boffset;
550 	long	*matchA;
551 	long	*matchB;
552 {
553 	*matchA = -1;
554 	*matchB = -1;
555 
556 	sizeA = min(sizeA, aoffset + MIN_WORDS_RIFT);
557 	sizeB = min(sizeB, boffset + MIN_WORDS_RIFT);
558 
559 	aoffset++;
560 	boffset++;
561 
562 	/*
563 	 * Check whether A has only "silence" within the search range.  Note
564 	 * that "silence" here is a single, repeated value (zero or not).
565 	 */
566 	while (aoffset < sizeA) {
567 		if (A[aoffset] != A[aoffset - 1]) {
568 			*matchA = 0;
569 			break;
570 		}
571 		aoffset++;
572 	}
573 
574 	/*
575 	 * Check whether B has only "silence" within the search range.  Note
576 	 * that "silence" here is a single, repeated value (zero or not).
577 	 *
578 	 * Also note that while the caller assumes that only matchA or matchB
579 	 * is set, we check both vectors here.
580 	 */
581 	while (boffset < sizeB) {
582 		if (B[boffset] != B[boffset - 1]) {
583 			*matchB = 0;
584 			break;
585 		}
586 		boffset++;
587 	}
588 }
589