1 /* @(#)gap.c 1.18 09/07/11 J. Schilling from cdparanoia-III-alpha9.8 */
2 #include <schily/mconfig.h>
3 #ifndef lint
4 static UConst char sccsid[] =
5 "@(#)gap.c 1.18 09/07/11 J. Schilling from cdparanoia-III-alpha9.8";
6
7 #endif
8 /*
9 * CopyPolicy: GNU Lesser General Public License v2.1 applies
10 * Copyright (C) 1997-2001,2008 by Monty (xiphmont@mit.edu)
11 * Copyright (C) 2002-2009 by J. Schilling
12 *
13 * Gap analysis support code for paranoia
14 *
15 */
16
17 #include <schily/standard.h>
18 #include <schily/utypes.h>
19 #include <schily/string.h>
20 #include "p_block.h"
21 #include "cdda_paranoia.h"
22 #include "gap.h"
23
24 EXPORT long i_paranoia_overlap_r __PR((Int16_t * buffA, Int16_t * buffB,
25 long offsetA, long offsetB));
26 EXPORT long i_paranoia_overlap_f __PR((Int16_t * buffA, Int16_t * buffB,
27 long offsetA, long offsetB,
28 long sizeA, long sizeB));
29 EXPORT int i_stutter_or_gap __PR((Int16_t * A, Int16_t * B,
30 long offA, long offB,
31 long gap));
32 EXPORT void i_analyze_rift_f __PR((Int16_t * A, Int16_t * B,
33 long sizeA, long sizeB,
34 long aoffset, long boffset,
35 long *matchA, long *matchB,
36 long *matchC));
37 EXPORT void i_analyze_rift_r __PR((Int16_t * A, Int16_t * B,
38 long sizeA, long sizeB,
39 long aoffset, long boffset,
40 long *matchA, long *matchB,
41 long *matchC));
42
43 EXPORT void analyze_rift_silence_f __PR((Int16_t * A, Int16_t * B,
44 long sizeA, long sizeB,
45 long aoffset, long boffset,
46 long *matchA, long *matchB));
47
48 /*
49 * Gap analysis code
50 */
51
52 /*
53 * i_paranoia_overlap_r (internal)
54 *
55 * This function seeks backward through two vectors (starting at the given
56 * offsets) to determine how many consecutive samples agree. It returns
57 * the number of matching samples, which may be 0.
58 *
59 * Unlike its sibling, i_paranoia_overlap_f, this function doesn't need to
60 * be given the size of the vectors (all vectors stop at offset 0).
61 *
62 * This function is used by i_analyze_rift_r() below to find where a
63 * leading rift ends.
64 */
65 EXPORT long
i_paranoia_overlap_r(buffA,buffB,offsetA,offsetB)66 i_paranoia_overlap_r(buffA, buffB, offsetA, offsetB)
67 Int16_t *buffA;
68 Int16_t *buffB;
69 long offsetA;
70 long offsetB;
71 {
72 long beginA = offsetA;
73 long beginB = offsetB;
74
75 /*
76 * Start at the given offsets and work our way backwards until we hit
77 * the beginning of one of the vectors.
78 */
79 for (; beginA >= 0 && beginB >= 0; beginA--, beginB--)
80 if (buffA[beginA] != buffB[beginB])
81 break;
82
83 return (offsetA - beginA);
84 }
85
86 /*
87 * i_paranoia_overlap_f (internal)
88 *
89 * This function seeks forward through two vectors (starting at the given
90 * offsets) to determine how many consecutive samples agree. It returns
91 * the number of matching samples, which may be 0.
92 *
93 * Unlike its sibling, i_paranoia_overlap_r, this function needs to given
94 * the size of the vectors.
95 *
96 * This function is used by i_analyze_rift_f() below to find where a
97 * trailing rift ends.
98 */
99 EXPORT long
i_paranoia_overlap_f(buffA,buffB,offsetA,offsetB,sizeA,sizeB)100 i_paranoia_overlap_f(buffA, buffB, offsetA, offsetB, sizeA, sizeB)
101 Int16_t *buffA;
102 Int16_t *buffB;
103 long offsetA;
104 long offsetB;
105 long sizeA;
106 long sizeB;
107 {
108 long endA = offsetA;
109 long endB = offsetB;
110
111 /*
112 * Start at the given offsets and work our way forward until we hit
113 * the end of one of the vectors.
114 */
115 for (; endA < sizeA && endB < sizeB; endA++, endB++)
116 if (buffA[endA] != buffB[endB])
117 break;
118
119 return (endA - offsetA);
120 }
121
122 /*
123 * i_stutter_or_gap (internal)
124 *
125 * This function compares (gap) samples of two vectors at the given offsets.
126 * It returns 0 if all the samples are identical, or nonzero if they differ.
127 *
128 * This is used by i_analyze_rift_[rf] below to determine whether a rift
129 * contains samples dropped by the other vector (that should be inserted),
130 * or whether the rift contains a stutter (that should be dropped). See
131 * i_analyze_rift_[rf] for more details.
132 */
133 EXPORT int
i_stutter_or_gap(A,B,offA,offB,gap)134 i_stutter_or_gap(A, B, offA, offB, gap)
135 Int16_t *A;
136 Int16_t *B;
137 long offA;
138 long offB;
139 long gap;
140 {
141 long a1 = offA;
142 long b1 = offB;
143
144 /* BEGIN CSTYLED */
145 /*
146 * If the rift was so big that there aren't enough samples in the other
147 * vector to compare against the full gap, then just compare what we
148 * have available. E.g.:
149 *
150 * (5678)|(newly matching run ...)
151 * (... 12345678)| (345678) |(newly matching run ...)
152 *
153 * In this case, a1 would be -2, since we'd want to compare 6 samples
154 * against a vector that had only 4. So we start 2 samples later, and
155 * compare the 4 available samples.
156 *
157 * Again, this approach to identifying stutters is simply a heuristic,
158 * so this may not produce correct results in all cases.
159 */
160 /* END CSTYLED */
161 if (a1 < 0) {
162 /*
163 * Note that a1 is negative, so we're increasing b1 and decreasing (gap).
164 */
165 b1 -= a1;
166 gap += a1;
167 a1 = 0;
168 }
169
170 /*
171 * Note that we don't have an equivalent adjustment for leading rifts.
172 * Thus, it's possible for the following memcmp() to run off the end
173 * of A. See the bug note in i_analyze_rift_r().
174 *
175 * Multiply gap by 2 because samples are 2 bytes long and memcmp compares
176 * at the byte level.
177 */
178 return (memcmp(A + a1, B + b1, gap * 2));
179 }
180
181 /*
182 * riftv is the first value into the rift -> or <-
183 *
184 * i_analyze_rift_f (internal)
185 *
186 * This function examines a trailing rift to see how far forward the rift goes
187 * and to determine what kind of rift it is. This function is called by
188 * i_stage2_each() when a trailing rift is detected. (aoffset,boffset) are
189 * the offsets into (A,B) of the first mismatching sample.
190 *
191 * This function returns:
192 * matchA > 0 if there are (matchA) samples missing from A
193 * matchA < 0 if there are (-matchA) duplicate samples (stuttering) in A
194 * matchB > 0 if there are (matchB) samples missing from B
195 * matchB < 0 if there are (-matchB) duplicate samples in B
196 * matchC != 0 if there are (matchC) samples of garbage, after which
197 * both A and B are in sync again
198 */
199 EXPORT void
i_analyze_rift_f(A,B,sizeA,sizeB,aoffset,boffset,matchA,matchB,matchC)200 i_analyze_rift_f(A, B, sizeA, sizeB, aoffset, boffset, matchA, matchB, matchC)
201 Int16_t *A;
202 Int16_t *B;
203 long sizeA;
204 long sizeB;
205 long aoffset;
206 long boffset;
207 long *matchA;
208 long *matchB;
209 long *matchC;
210 {
211
212 long apast = sizeA - aoffset;
213 long bpast = sizeB - boffset;
214 long i;
215
216 *matchA = 0, *matchB = 0, *matchC = 0;
217
218 /* BEGIN CSTYLED */
219 /*
220 * Look forward to see where we regain agreement between vectors
221 * A and B (of at least MIN_WORDS_RIFT samples). We look for one of
222 * the following possible matches:
223 *
224 * edge
225 * v
226 * (1) (... A matching run)|(aoffset matches ...)
227 * (... B matching run)| (rift) |(boffset+i matches ...)
228 *
229 * (2) (... A matching run)| (rift) |(aoffset+i matches ...)
230 * (... B matching run)|(boffset matches ...)
231 *
232 * (3) (... A matching run)| (rift) |(aoffset+i matches ...)
233 * (... B matching run)| (rift) |(boffset+i matches ...)
234 *
235 * Anything that doesn't match one of these three is too corrupt to
236 * for us to recover from. E.g.:
237 *
238 * (... A matching run)| (rift) |(eventual match ...)
239 * (... B matching run)| (big rift) |(eventual match ...)
240 *
241 * We won't find the eventual match, since we wouldn't be sure how
242 * to fix the rift.
243 */
244 /* END CSTYLED */
245 for (i = 0; ; i++) {
246 /*
247 * Search for whatever case we hit first, so as to end up with the
248 * smallest rift.
249 */
250 if (i < bpast) { /* Don't search for (1) past the end of B */
251 /*
252 * See if we match case (1) above, which either means that A dropped
253 * samples at the rift, or that B stuttered.
254 */
255 if (i_paranoia_overlap_f(A, B, aoffset, boffset + i, sizeA, sizeB) >= MIN_WORDS_RIFT) {
256 *matchA = i;
257 break;
258 }
259 }
260 if (i < apast) { /* Don't search for (2) or (3) past the beginning of A */
261 /*
262 * See if we match case (2) above, which either means that B dropped
263 * samples at the rift, or that A stuttered.
264 */
265 if (i_paranoia_overlap_f(A, B, aoffset + i, boffset, sizeA, sizeB) >= MIN_WORDS_RIFT) {
266 *matchB = i;
267 break;
268 }
269 if (i < bpast) /* Don't search for (3) past the end of B */
270 /*
271 * See if we match case (3) above, which means that a fixed-length
272 * rift of samples is getting read unreliably.
273 */
274 if (i_paranoia_overlap_f(A, B, aoffset + i, boffset + i, sizeA, sizeB) >= MIN_WORDS_RIFT) {
275 *matchC = i;
276 break;
277 }
278 } else if (i >= bpast) {
279 /*
280 * Stop searching when we've reached the end of both vectors.
281 * In theory we could stop when there aren't MIN_WORDS_RIFT samples
282 * left in both vectors, but this case should happen fairly rarely.
283 */
284 break;
285 }
286 }
287
288 if (*matchA == 0 && *matchB == 0 && *matchC == 0)
289 return;
290
291 if (*matchC)
292 return;
293
294 /* BEGIN CSTYLED */
295 /*
296 * For case (1) or (2), we need to determine whether the rift contains
297 * samples dropped by the other vector (that should be inserted), or
298 * whether the rift contains a stutter (that should be dropped). To
299 * distinguish, we check the contents of the rift against the good samples
300 * just before the rift. If the contents match, then the rift contains
301 * a stutter.
302 *
303 * A stutter in the second vector:
304 * (...good samples... 1234)|(567 ...newly matched run...)
305 * (...good samples... 1234)| (1234) | (567 ...newly matched run)
306 *
307 * Samples missing from the first vector:
308 * (...good samples... 1234)|(901 ...newly matched run...)
309 * (...good samples... 1234)| (5678) |(901 ...newly matched run...)
310 *
311 * Of course, there's no theoretical guarantee that a non-stutter
312 * truly represents missing samples, but given that we're dealing with
313 * verified fragments in stage 2, we can have some confidence that this
314 * is the case.
315 */
316 /* END CSTYLED */
317 if (*matchA) {
318 /*
319 * For case (1), we need to determine whether A dropped samples at the
320 * rift or whether B stuttered.
321 *
322 * If the rift doesn't match the good samples in A (and hence in B),
323 * it's not a stutter, and the rift should be inserted into A.
324 */
325 if (i_stutter_or_gap(A, B, aoffset - *matchA, boffset, *matchA))
326 return;
327 *matchB = -*matchA; /* signify we need to remove n bytes */
328 /* from B */
329 *matchA = 0;
330 return;
331 } else {
332 /*
333 * Case (2) is the inverse of case (1) above.
334 */
335 if (i_stutter_or_gap(B, A, boffset - *matchB, aoffset, *matchB))
336 return;
337 *matchA = -*matchB;
338 *matchB = 0;
339 return;
340 }
341 }
342
343 /*
344 * riftv must be first even val of rift moving back
345 *
346 * i_analyze_rift_r (internal)
347 *
348 * This function examines a leading rift to see how far back the rift goes
349 * and to determine what kind of rift it is. This function is called by
350 * i_stage2_each() when a leading rift is detected. (aoffset,boffset) are
351 * the offsets into (A,B) of the first mismatching sample.
352 *
353 * This function returns:
354 * matchA > 0 if there are (matchA) samples missing from A
355 * matchA < 0 if there are (-matchA) duplicate samples (stuttering) in A
356 * matchB > 0 if there are (matchB) samples missing from B
357 * matchB < 0 if there are (-matchB) duplicate samples in B
358 * matchC != 0 if there are (matchC) samples of garbage, after which
359 * both A and B are in sync again
360 */
361 EXPORT void
i_analyze_rift_r(A,B,sizeA,sizeB,aoffset,boffset,matchA,matchB,matchC)362 i_analyze_rift_r(A, B, sizeA, sizeB, aoffset, boffset, matchA, matchB, matchC)
363 Int16_t *A;
364 Int16_t *B;
365 long sizeA;
366 long sizeB;
367 long aoffset;
368 long boffset;
369 long *matchA;
370 long *matchB;
371 long *matchC;
372 {
373
374 long apast = aoffset + 1;
375 long bpast = boffset + 1;
376 long i;
377
378 *matchA = 0, *matchB = 0, *matchC = 0;
379
380 /* BEGIN CSTYLED */
381 /*
382 * Look backward to see where we regain agreement between vectors
383 * A and B (of at least MIN_WORDS_RIFT samples). We look for one of
384 * the following possible matches:
385 *
386 * edge
387 * v
388 * (1) (... aoffset matches)|(A matching run ...)
389 * (... boffset-i matches)| (rift) |(B matching run ...)
390 *
391 * (2) (... aoffset-i matches)| (rift) |(A matching run ...)
392 * (... boffset matches)|(B matching run ...)
393 *
394 * (3) (... aoffset-i matches)| (rift) |(A matching run ...)
395 * (... boffset-i matches)| (rift) |(B matching run ...)
396 *
397 * Anything that doesn't match one of these three is too corrupt to
398 * for us to recover from. E.g.:
399 *
400 * (... eventual match)| (rift) |(A matching run ...)
401 * (... eventual match) | (big rift) |(B matching run ...)
402 *
403 * We won't find the eventual match, since we wouldn't be sure how
404 * to fix the rift.
405 */
406 /* END CSTYLED */
407 for (i = 0; ; i++) {
408 /*
409 * Search for whatever case we hit first, so as to end up with the
410 * smallest rift.
411 */
412 if (i < bpast) { /* Don't search for (1) past the beginning of B */
413 /*
414 * See if we match case (1) above, which either means that A dropped
415 * samples at the rift, or that B stuttered.
416 */
417 if (i_paranoia_overlap_r(A, B, aoffset, boffset - i) >= MIN_WORDS_RIFT) {
418 *matchA = i;
419 break;
420 }
421 }
422 if (i < apast) { /* Don't search for (2) or (3) past the beginning of A */
423 /*
424 * See if we match case (2) above, which either means that B dropped
425 * samples at the rift, or that A stuttered.
426 */
427 if (i_paranoia_overlap_r(A, B, aoffset - i, boffset) >= MIN_WORDS_RIFT) {
428 *matchB = i;
429 break;
430 }
431 if (i < bpast) { /* Don't search for (3) past the beginning of B */
432 /*
433 * See if we match case (3) above, which means that a fixed-length
434 * rift of samples is getting read unreliably.
435 */
436 if (i_paranoia_overlap_r(A, B, aoffset - i, boffset - i) >= MIN_WORDS_RIFT) {
437 *matchC = i;
438 break;
439 }
440 }
441 } else if (i >= bpast) {
442 /*
443 * Stop searching when we've reached the end of both vectors.
444 * In theory we could stop when there aren't MIN_WORDS_RIFT samples
445 * left in both vectors, but this case should happen fairly rarely.
446 */
447 break;
448 }
449 /*
450 * Try the search again with a larger tentative rift.
451 */
452 }
453
454 if (*matchA == 0 && *matchB == 0 && *matchC == 0)
455 return;
456
457 if (*matchC)
458 return;
459
460 /* BEGIN CSTYLED */
461 /*
462 * For case (1) or (2), we need to determine whether the rift contains
463 * samples dropped by the other vector (that should be inserted), or
464 * whether the rift contains a stutter (that should be dropped). To
465 * distinguish, we check the contents of the rift against the good samples
466 * just after the rift. If the contents match, then the rift contains
467 * a stutter.
468 *
469 * A stutter in the second vector:
470 * (...newly matched run... 234)|(5678 ...good samples...)
471 * (...newly matched run... 234)| (5678) |(5678 ...good samples...)
472 *
473 * Samples missing from the first vector:
474 * (...newly matched run... 890)|(5678 ...good samples...)
475 * (...newly matched run... 890)| (1234) |(5678 ...good samples...)
476 *
477 * Of course, there's no theoretical guarantee that a non-stutter
478 * truly represents missing samples, but given that we're dealing with
479 * verified fragments in stage 2, we can have some confidence that this
480 * is the case.
481 */
482 /* END CSTYLED */
483 if (*matchA) {
484 /*
485 * For case (1), we need to determine whether A dropped samples at the
486 * rift or whether B stuttered.
487 *
488 * If the rift doesn't match the good samples in A (and hence in B),
489 * it's not a stutter, and the rift should be inserted into A.
490 *
491 * ???BUG??? It's possible for aoffset+1+*matchA to be > sizeA, in
492 * which case the comparison in i_stutter_or_gap() will extend beyond
493 * the bounds of A. Thankfully, this isn't writing data and thus
494 * trampling memory, but it's still a memory access error that should
495 * be fixed.
496 *
497 * This bug is not fixed yet.
498 */
499 if (i_stutter_or_gap(A, B, aoffset + 1, boffset - *matchA + 1, *matchA))
500 return;
501
502 /*
503 * It is a stutter, so we need to signal that we need to remove
504 * (matchA) bytes from B.
505 */
506 *matchB = -*matchA; /* signify we need to remove n bytes */
507 /* from B */
508 *matchA = 0;
509 return;
510 } else {
511 /*
512 * Case (2) is the inverse of case (1) above.
513 */
514 if (i_stutter_or_gap(B, A, boffset + 1, aoffset - *matchB + 1, *matchB))
515 return;
516 *matchA = -*matchB;
517 *matchB = 0;
518 return;
519 }
520 }
521
522 /*
523 * analyze_rift_silence_f (internal)
524 *
525 * This function examines the fragment and root from the rift onward to
526 * see if they have a rift's worth of silence (or if they end with silence).
527 * It sets (*matchA) to -1 if A's rift is silence, (*matchB) to -1 if B's
528 * rift is silence, and sets them to 0 otherwise.
529 *
530 * Note that, unlike every other function in cdparanoia, this function
531 * considers any repeated value to be silence (which, in effect, it is).
532 * All other functions only consider repeated zeroes to be silence.
533 *
534 * This function is called by i_stage2_each() if it runs into a trailing rift
535 * that i_analyze_rift_f couldn't diagnose. This checks for another variant:
536 * where one vector has silence and the other doesn't. We then assume
537 * that the silence (and anything following it) is garbage.
538 *
539 * Note that while this function checks both A and B for silence, the caller
540 * assumes that only one or the other has silence.
541 */
542 EXPORT void
analyze_rift_silence_f(A,B,sizeA,sizeB,aoffset,boffset,matchA,matchB)543 analyze_rift_silence_f(A, B, sizeA, sizeB, aoffset, boffset, matchA, matchB)
544 Int16_t *A;
545 Int16_t *B;
546 long sizeA;
547 long sizeB;
548 long aoffset;
549 long boffset;
550 long *matchA;
551 long *matchB;
552 {
553 *matchA = -1;
554 *matchB = -1;
555
556 sizeA = min(sizeA, aoffset + MIN_WORDS_RIFT);
557 sizeB = min(sizeB, boffset + MIN_WORDS_RIFT);
558
559 aoffset++;
560 boffset++;
561
562 /*
563 * Check whether A has only "silence" within the search range. Note
564 * that "silence" here is a single, repeated value (zero or not).
565 */
566 while (aoffset < sizeA) {
567 if (A[aoffset] != A[aoffset - 1]) {
568 *matchA = 0;
569 break;
570 }
571 aoffset++;
572 }
573
574 /*
575 * Check whether B has only "silence" within the search range. Note
576 * that "silence" here is a single, repeated value (zero or not).
577 *
578 * Also note that while the caller assumes that only matchA or matchB
579 * is set, we check both vectors here.
580 */
581 while (boffset < sizeB) {
582 if (B[boffset] != B[boffset - 1]) {
583 *matchB = 0;
584 break;
585 }
586 boffset++;
587 }
588 }
589