1 /*
2   Copyright (C) 2004, 2008, 2011 Rocky Bernstein <rocky@gnu.org>
3   Copyright (C) 1998 Monty xiphmont@mit.edu
4 
5   This program is free software: you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation, either version 3 of the License, or
8   (at your option) any later version.
9 
10   This program is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14 
15   You should have received a copy of the GNU General Public License
16   along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 */
18 /***
19  * Gap analysis support code for paranoia
20  *
21  ***/
22 
23 #ifdef HAVE_CONFIG_H
24 # include "config.h"
25 # define __CDIO_CONFIG_H__ 1
26 #endif
27 #include "p_block.h"
28 #include <cdio/paranoia/paranoia.h>
29 #include "gap.h"
30 #include <string.h>
31 
32 /**** Gap analysis code ***************************************************/
33 
34 /* ===========================================================================
35  * i_paranoia_overlap_r (internal)
36  *
37  * This function seeks backward through two vectors (starting at the given
38  * offsets) to determine how many consecutive samples agree.  It returns
39  * the number of matching samples, which may be 0.
40  *
41  * Unlike its sibling, i_paranoia_overlap_f, this function doesn't need to
42  * be given the size of the vectors (all vectors stop at offset 0).
43  *
44  * This function is used by i_analyze_rift_r() below to find where a
45  * leading rift ends.
46  */
47 long int
i_paranoia_overlap_r(int16_t * buffA,int16_t * buffB,long offsetA,long offsetB)48 i_paranoia_overlap_r(int16_t *buffA,int16_t *buffB,
49 			  long offsetA, long offsetB)
50 {
51   long beginA=offsetA;
52   long beginB=offsetB;
53 
54   /* Start at the given offsets and work our way backwards until we hit
55    * the beginning of one of the vectors.
56    */
57   for( ; beginA>=0 && beginB>=0; beginA--,beginB-- )
58     if (buffA[beginA] != buffB[beginB]) break;
59 
60   return(offsetA-beginA);
61 }
62 
63 
64 /* ===========================================================================
65  * i_paranoia_overlap_f (internal)
66  *
67  * This function seeks forward through two vectors (starting at the given
68  * offsets) to determine how many consecutive samples agree.  It returns
69  * the number of matching samples, which may be 0.
70  *
71  * Unlike its sibling, i_paranoia_overlap_r, this function needs to given
72  * the size of the vectors.
73  *
74  * This function is used by i_analyze_rift_f() below to find where a
75  * trailing rift ends.
76  */
77 long int
i_paranoia_overlap_f(int16_t * buffA,int16_t * buffB,long offsetA,long offsetB,long sizeA,long sizeB)78 i_paranoia_overlap_f(int16_t *buffA,int16_t *buffB,
79 		     long offsetA, long offsetB,
80 		     long sizeA,long sizeB)
81 {
82   long endA=offsetA;
83   long endB=offsetB;
84 
85   /* Start at the given offsets and work our way forward until we hit
86    * the end of one of the vectors.
87    */
88   for(;endA<sizeA && endB<sizeB;endA++,endB++)
89     if(buffA[endA]!=buffB[endB])break;
90 
91   return(endA-offsetA);
92 }
93 
94 
95 /* ===========================================================================
96  * i_stutter_or_gap (internal)
97  *
98  * This function compares (gap) samples of two vectors at the given offsets.
99  * It returns 0 if all the samples are identical, or nonzero if they differ.
100  *
101  * This is used by i_analyze_rift_[rf] below to determine whether a rift
102  * contains samples dropped by the other vector (that should be inserted),
103  * or whether the rift contains a stutter (that should be dropped).  See
104  * i_analyze_rift_[rf] for more details.
105  */
106 int
i_stutter_or_gap(int16_t * A,int16_t * B,long offA,long offB,long int gap)107 i_stutter_or_gap(int16_t *A, int16_t *B,long offA, long offB, long int gap)
108 {
109   long a1=offA;
110   long b1=offB;
111 
112   /* If the rift was so big that there aren't enough samples in the other
113    * vector to compare against the full gap, then just compare what we
114    * have available.  E.g.:
115    *
116    *            (5678)|(newly matching run ...)
117    *    (... 12345678)| (345678) |(newly matching run ...)
118    *
119    * In this case, a1 would be -2, since we'd want to compare 6 samples
120    * against a vector that had only 4.  So we start 2 samples later, and
121    * compare the 4 available samples.
122    *
123    * Again, this approach to identifying stutters is simply a heuristic,
124    * so this may not produce correct results in all cases.
125    */
126   if(a1<0){
127     /* Note that a1 is negative, so we're increasing b1 and decreasing (gap).
128      */
129     b1-=a1;
130     gap+=a1;
131     a1=0;
132   }
133 
134   /* Note that we don't have an equivalent adjustment for leading rifts.
135    * Thus, it's possible for the following memcmp() to run off the end
136    * of A.  See the bug note in i_analyze_rift_r().
137    */
138 
139   /* Multiply gap by 2 because samples are 2 bytes long and memcmp compares
140    * at the byte level.
141    */
142   return(memcmp(A+a1,B+b1,gap*2));
143 }
144 
145 /* riftv is the first value into the rift -> or <- */
146 
147 
148 /* ===========================================================================
149  * i_analyze_rift_f (internal)
150  *
151  * This function examines a trailing rift to see how far forward the rift goes
152  * and to determine what kind of rift it is.  This function is called by
153  * i_stage2_each() when a trailing rift is detected.  (aoffset,boffset) are
154  * the offsets into (A,B) of the first mismatching sample.
155  *
156  * This function returns:
157  *  matchA  > 0 if there are (matchA) samples missing from A
158  *  matchA  < 0 if there are (-matchA) duplicate samples (stuttering) in A
159  *  matchB  > 0 if there are (matchB) samples missing from B
160  *  matchB  < 0 if there are (-matchB) duplicate samples in B
161  *  matchC != 0 if there are (matchC) samples of garbage, after which
162  *              both A and B are in sync again
163  */
164 void
i_analyze_rift_f(int16_t * A,int16_t * B,long sizeA,long sizeB,long aoffset,long boffset,long * matchA,long * matchB,long * matchC)165 i_analyze_rift_f(int16_t *A,int16_t *B,
166 		 long sizeA, long sizeB,
167 		 long aoffset, long boffset,
168 		 long *matchA,long *matchB,long *matchC)
169 {
170 
171   long apast=sizeA-aoffset;
172   long bpast=sizeB-boffset;
173   long i;
174 
175   *matchA=0, *matchB=0, *matchC=0;
176 
177   /* Look forward to see where we regain agreement between vectors
178    * A and B (of at least MIN_WORDS_RIFT samples).  We look for one of
179    * the following possible matches:
180    *
181    *                         edge
182    *                          v
183    * (1)  (... A matching run)|(aoffset matches ...)
184    *      (... B matching run)| (rift) |(boffset+i matches ...)
185    *
186    * (2)  (... A matching run)| (rift) |(aoffset+i matches ...)
187    *      (... B matching run)|(boffset matches ...)
188    *
189    * (3)  (... A matching run)| (rift) |(aoffset+i matches ...)
190    *      (... B matching run)| (rift) |(boffset+i matches ...)
191    *
192    * Anything that doesn't match one of these three is too corrupt to
193    * for us to recover from.  E.g.:
194    *
195    *      (... A matching run)| (rift) |(eventual match ...)
196    *      (... B matching run)| (big rift) |(eventual match ...)
197    *
198    * We won't find the eventual match, since we wouldn't be sure how
199    * to fix the rift.
200    */
201 
202   for(i=1;;i++){
203     /* Search for whatever case we hit first, so as to end up with the
204      * smallest rift.
205      */
206 
207     /* Don't search for (1) past the end of B */
208     if (i<bpast)
209 
210       /* See if we match case (1) above, which either means that A dropped
211        * samples at the rift, or that B stuttered.
212        */
213       if(i_paranoia_overlap_f(A,B,aoffset,boffset+i,sizeA,sizeB)>=MIN_WORDS_RIFT){
214 	*matchA=i;
215 	break;
216       }
217 
218     /* Don't search for (2) or (3) past the end of A */
219     if (i<apast) {
220 
221       /* See if we match case (2) above, which either means that B dropped
222        * samples at the rift, or that A stuttered.
223        */
224       if(i_paranoia_overlap_f(A,B,aoffset+i,boffset,sizeA,sizeB)>=MIN_WORDS_RIFT){
225 	*matchB=i;
226 	break;
227       }
228 
229       /* Don't search for (3) past the end of B */
230       if (i<bpast)
231 
232         /* See if we match case (3) above, which means that a fixed-length
233          * rift of samples is getting read unreliably.
234          */
235 	if(i_paranoia_overlap_f(A,B,aoffset+i,boffset+i,sizeA,sizeB)>=MIN_WORDS_RIFT){
236 	  *matchC=i;
237 	  break;
238 	}
239     }else
240 
241       /* Stop searching when we've reached the end of both vectors.
242        * In theory we could stop when there aren't MIN_WORDS_RIFT samples
243        * left in both vectors, but this case should happen fairly rarely.
244        */
245       if(i>=bpast)break;
246 
247     /* Try the search again with a larger tentative rift. */
248   }
249 
250   if(*matchA==0 && *matchB==0 && *matchC==0)return;
251 
252   if(*matchC)return;
253 
254   /* For case (1) or (2), we need to determine whether the rift contains
255    * samples dropped by the other vector (that should be inserted), or
256    * whether the rift contains a stutter (that should be dropped).  To
257    * distinguish, we check the contents of the rift against the good samples
258    * just before the rift.  If the contents match, then the rift contains
259    * a stutter.
260    *
261    * A stutter in the second vector:
262    *     (...good samples... 1234)|(567 ...newly matched run...)
263    *     (...good samples... 1234)| (1234) | (567 ...newly matched run)
264    *
265    * Samples missing from the first vector:
266    *     (...good samples... 1234)|(901 ...newly matched run...)
267    *     (...good samples... 1234)| (5678) |(901 ...newly matched run...)
268    *
269    * Of course, there's no theoretical guarantee that a non-stutter
270    * truly represents missing samples, but given that we're dealing with
271    * verified fragments in stage 2, we can have some confidence that this
272    * is the case.
273    */
274   if(*matchA){
275     /* For case (1), we need to determine whether A dropped samples at the
276      * rift or whether B stuttered.
277      *
278      * If the rift doesn't match the good samples in A (and hence in B),
279      * it's not a stutter, and the rift should be inserted into A.
280      */
281     if(i_stutter_or_gap(A,B,aoffset-*matchA,boffset,*matchA))
282       return;
283 
284     /* It is a stutter, so we need to signal that we need to remove
285      * (matchA) bytes from B.
286      */
287     *matchB = -*matchA;
288     *matchA=0;
289     return;
290 
291   }else{
292     /* Case (2) is the inverse of case (1) above. */
293     if(i_stutter_or_gap(B,A,boffset-*matchB,aoffset,*matchB))
294       return;
295 
296     *matchA = -*matchB;
297     *matchB=0;
298     return;
299   }
300 }
301 
302 
303 /* riftv must be first even val of rift moving back */
304 
305 /* ===========================================================================
306  * i_analyze_rift_r (internal)
307  *
308  * This function examines a leading rift to see how far back the rift goes
309  * and to determine what kind of rift it is.  This function is called by
310  * i_stage2_each() when a leading rift is detected.  (aoffset,boffset) are
311  * the offsets into (A,B) of the first mismatching sample.
312  *
313  * This function returns:
314  *  matchA  > 0 if there are (matchA) samples missing from A
315  *  matchA  < 0 if there are (-matchA) duplicate samples (stuttering) in A
316  *  matchB  > 0 if there are (matchB) samples missing from B
317  *  matchB  < 0 if there are (-matchB) duplicate samples in B
318  *  matchC != 0 if there are (matchC) samples of garbage, after which
319  *              both A and B are in sync again
320  */
321 void
i_analyze_rift_r(int16_t * A,int16_t * B,long sizeA,long sizeB,long aoffset,long boffset,long * matchA,long * matchB,long * matchC)322 i_analyze_rift_r(int16_t *A,int16_t *B,
323 		 long sizeA, long sizeB,
324 		 long aoffset, long boffset,
325 		 long *matchA,long *matchB,long *matchC)
326 {
327 
328   long apast=aoffset+1;
329   long bpast=boffset+1;
330   long i;
331 
332   *matchA=0, *matchB=0, *matchC=0;
333 
334   /* Look backward to see where we regain agreement between vectors
335    * A and B (of at least MIN_WORDS_RIFT samples).  We look for one of
336    * the following possible matches:
337    *
338    *                                    edge
339    *                                      v
340    * (1)             (... aoffset matches)|(A matching run ...)
341    *      (... boffset-i matches)| (rift) |(B matching run ...)
342    *
343    * (2)  (... aoffset-i matches)| (rift) |(A matching run ...)
344    *                (... boffset matches)|(B matching run ...)
345    *
346    * (3)  (... aoffset-i matches)| (rift) |(A matching run ...)
347    *      (... boffset-i matches)| (rift) |(B matching run ...)
348    *
349    * Anything that doesn't match one of these three is too corrupt to
350    * for us to recover from.  E.g.:
351    *
352    *         (... eventual match)| (rift) |(A matching run ...)
353    *    (... eventual match) | (big rift) |(B matching run ...)
354    *
355    * We won't find the eventual match, since we wouldn't be sure how
356    * to fix the rift.
357    */
358 
359   for(i=1;;i++){
360     /* Search for whatever case we hit first, so as to end up with the
361      * smallest rift.
362      */
363 
364     /* Don't search for (1) past the beginning of B */
365     if (i<bpast)
366 
367       /* See if we match case (1) above, which either means that A dropped
368        * samples at the rift, or that B stuttered.
369        */
370       if(i_paranoia_overlap_r(A,B,aoffset,boffset-i)>=MIN_WORDS_RIFT){
371 	*matchA=i;
372 	break;
373       }
374 
375     /* Don't search for (2) or (3) past the beginning of A */
376     if (i<apast) {
377 
378       /* See if we match case (2) above, which either means that B dropped
379        * samples at the rift, or that A stuttered.
380        */
381       if(i_paranoia_overlap_r(A,B,aoffset-i,boffset)>=MIN_WORDS_RIFT){
382 	*matchB=i;
383 	break;
384       }
385 
386       /* Don't search for (3) past the beginning of B */
387       if (i<bpast)
388 
389 	/* See if we match case (3) above, which means that a fixed-length
390 	 * rift of samples is getting read unreliably.
391 	 */
392 	if(i_paranoia_overlap_r(A,B,aoffset-i,boffset-i)>=MIN_WORDS_RIFT){
393 	  *matchC=i;
394 	  break;
395 	}
396     }else
397 
398       /* Stop searching when we've reached the end of both vectors.
399        * In theory we could stop when there aren't MIN_WORDS_RIFT samples
400        * left in both vectors, but this case should happen fairly rarely.
401        */
402       if(i>=bpast)break;
403 
404     /* Try the search again with a larger tentative rift. */
405   }
406 
407   if(*matchA==0 && *matchB==0 && *matchC==0)return;
408 
409   if(*matchC)return;
410 
411   /* For case (1) or (2), we need to determine whether the rift contains
412    * samples dropped by the other vector (that should be inserted), or
413    * whether the rift contains a stutter (that should be dropped).  To
414    * distinguish, we check the contents of the rift against the good samples
415    * just after the rift.  If the contents match, then the rift contains
416    * a stutter.
417    *
418    * A stutter in the second vector:
419    *              (...newly matched run... 234)|(5678 ...good samples...)
420    *     (...newly matched run... 234)| (5678) |(5678 ...good samples...)
421    *
422    * Samples missing from the first vector:
423    *              (...newly matched run... 890)|(5678 ...good samples...)
424    *     (...newly matched run... 890)| (1234) |(5678 ...good samples...)
425    *
426    * Of course, there's no theoretical guarantee that a non-stutter
427    * truly represents missing samples, but given that we're dealing with
428    * verified fragments in stage 2, we can have some confidence that this
429    * is the case.
430    */
431 
432   if(*matchA){
433     /* For case (1), we need to determine whether A dropped samples at the
434      * rift or whether B stuttered.
435      *
436      * If the rift doesn't match the good samples in A (and hence in B),
437      * it's not a stutter, and the rift should be inserted into A.
438      *
439      * ???BUG??? It's possible for aoffset+1+*matchA to be > sizeA, in
440      * which case the comparison in i_stutter_or_gap() will extend beyond
441      * the bounds of A.  Thankfully, this isn't writing data and thus
442      * trampling memory, but it's still a memory access error that should
443      * be fixed.
444      *
445      * This bug is not fixed yet.
446      */
447     if(i_stutter_or_gap(A,B,aoffset+1,boffset-*matchA+1,*matchA))
448       return;
449 
450     /* It is a stutter, so we need to signal that we need to remove
451      * (matchA) bytes from B.
452      */
453     *matchB = -*matchA;
454     *matchA=0;
455     return;
456 
457   }else{
458     /* Case (2) is the inverse of case (1) above. */
459     if(i_stutter_or_gap(B,A,boffset+1,aoffset-*matchB+1,*matchB))
460       return;
461 
462     *matchA = -*matchB;
463     *matchB=0;
464     return;
465   }
466 }
467 
468 
469 /* ===========================================================================
470  * analyze_rift_silence_f (internal)
471  *
472  * This function examines the fragment and root from the rift onward to
473  * see if they have a rift's worth of silence (or if they end with silence).
474  * It sets (*matchA) to -1 if A's rift is silence, (*matchB) to -1 if B's
475  * rift is silence, and sets them to 0 otherwise.
476  *
477  * Note that, unlike every other function in cdparanoia, this function
478  * considers any repeated value to be silence (which, in effect, it is).
479  * All other functions only consider repeated zeroes to be silence.
480  *
481  * ??? Is this function name just a misnomer, as it's really looking for
482  * repeated garbage?
483  *
484  * This function is called by i_stage2_each() if it runs into a trailing rift
485  * that i_analyze_rift_f couldn't diagnose.  This checks for another variant:
486  * where one vector has silence and the other doesn't.  We then assume
487  * that the silence (and anything following it) is garbage.
488  *
489  * Note that while this function checks both A and B for silence, the caller
490  * assumes that only one or the other has silence.
491  */
492 void
analyze_rift_silence_f(int16_t * A,int16_t * B,long sizeA,long sizeB,long aoffset,long boffset,long * matchA,long * matchB)493 analyze_rift_silence_f(int16_t *A,int16_t *B,long sizeA,long sizeB,
494 		       long aoffset, long boffset,
495 		       long *matchA, long *matchB)
496 {
497   *matchA=-1;
498   *matchB=-1;
499 
500   /* Search for MIN_WORDS_RIFT samples, or to the end of the vector,
501    * whichever comes first.
502    */
503   sizeA=min(sizeA,aoffset+MIN_WORDS_RIFT);
504   sizeB=min(sizeB,boffset+MIN_WORDS_RIFT);
505 
506   aoffset++;
507   boffset++;
508 
509   /* Check whether A has only "silence" within the search range.  Note
510    * that "silence" here is a single, repeated value (zero or not).
511    */
512   while(aoffset<sizeA){
513     if(A[aoffset]!=A[aoffset-1]){
514       *matchA=0;
515       break;
516     }
517     aoffset++;
518   }
519 
520   /* Check whether B has only "silence" within the search range.  Note
521    * that "silence" here is a single, repeated value (zero or not).
522    *
523    * Also note that while the caller assumes that only matchA or matchB
524    * is set, we check both vectors here.
525    */
526   while(boffset<sizeB){
527     if(B[boffset]!=B[boffset-1]){
528       *matchB=0;
529       break;
530     }
531     boffset++;
532   }
533 }
534