1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /*
3  * Copyright (c) 2008 Beyond Access, Inc.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in
14  *    the documentation and/or other materials provided with the
15  *    distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY BEYOND ACCESS, INC. ``AS IS'' AND ANY
18  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL BEYOND ACCESS, INC.  NOR
21  * ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 /**
31  * @file yin.c Implementation of pitch extraction.
32  * @author David Huggins-Daines <dhuggins@cs.cmu.edu>
33  */
34 
35 /* This implements part of the YIN algorithm:
36  *
37  * "YIN, a fundamental frequency estimator for speech and music".
38  * Alain de Cheveigné and Hideki Kawahara.  Journal of the Acoustical
39  * Society of America, 111 (4), April 2002.
40  */
41 
42 #include "sphinxbase/prim_type.h"
43 #include "sphinxbase/ckd_alloc.h"
44 #include "sphinxbase/fixpoint.h"
45 
46 #include "sphinxbase/yin.h"
47 
48 #include <stdio.h>
49 #include <string.h>
50 
51 struct yin_s {
52     uint16 frame_size;       /** Size of analysis frame. */
53 #ifndef FIXED_POINT
54     float search_threshold; /**< Threshold for finding period */
55     float search_range;     /**< Range around best local estimate to search */
56 #else
57     uint16 search_threshold; /**< Threshold for finding period, in Q15 */
58     uint16 search_range;     /**< Range around best local estimate to search, in Q15 */
59 #endif
60     uint16 nfr;              /**< Number of frames read so far. */
61 
62     unsigned char wsize;    /**< Size of smoothing window. */
63     unsigned char wstart;   /**< First frame in window. */
64     unsigned char wcur;     /**< Current frame of analysis. */
65     unsigned char endut;    /**< Hoch Hech! Are we at the utterance end? */
66 
67 #ifndef FIXED_POINT
68     float **diff_window;    /**< Window of difference function outputs. */
69 #else
70     fixed32 **diff_window;  /**< Window of difference function outputs. */
71 #endif
72     uint16 *period_window;  /**< Window of best period estimates. */
73     int16 *frame;           /**< Storage for frame */
74 };
75 
76 /**
77  * The core of YIN: cumulative mean normalized difference function.
78  */
79 #ifndef FIXED_POINT
80 static void
cmn_diff(int16 const * signal,float * out_diff,int ndiff)81 cmn_diff(int16 const *signal, float *out_diff, int ndiff)
82 {
83     double cum;
84     int t, j;
85 
86     cum = 0.0f;
87     out_diff[0] = 1.0f;
88 
89     for (t = 1; t < ndiff; ++t) {
90         float dd;
91         dd = 0.0f;
92         for (j = 0; j < ndiff; ++j) {
93              int diff = signal[j] - signal[t + j];
94              dd += (diff * diff);
95         }
96         cum += dd;
97         out_diff[t] = (float)(dd * t / cum);
98     }
99 }
100 #else
101 static void
cmn_diff(int16 const * signal,int32 * out_diff,int ndiff)102 cmn_diff(int16 const *signal, int32 *out_diff, int ndiff)
103 {
104     uint32 cum, cshift;
105     int32 t, tscale;
106 
107     out_diff[0] = 32768;
108     cum = 0;
109     cshift = 0;
110 
111     /* Determine how many bits we can scale t up by below. */
112     for (tscale = 0; tscale < 32; ++tscale)
113         if (ndiff & (1<<(31-tscale)))
114             break;
115     --tscale; /* Avoid teh overflowz. */
116     /* printf("tscale is %d (ndiff - 1) << tscale is %d\n",
117        tscale, (ndiff-1) << tscale); */
118 
119     /* Somewhat elaborate block floating point implementation.
120      * The fp implementation of this is really a lot simpler. */
121     for (t = 1; t < ndiff; ++t) {
122         uint32 dd, dshift, norm;
123         int j;
124 
125         dd = 0;
126         dshift = 0;
127         for (j = 0; j < ndiff; ++j) {
128             int diff = signal[j] - signal[t + j];
129             /* Guard against overflows. */
130             if (dd > (1UL<<tscale)) {
131                 dd >>= 1;
132                 ++dshift;
133             }
134             dd += (diff * diff) >> dshift;
135         }
136         /* Make sure the diffs and cum are shifted to the same
137          * scaling factor (usually dshift will be zero) */
138         if (dshift > cshift) {
139             cum += dd << (dshift-cshift);
140         }
141         else {
142             cum += dd >> (cshift-dshift);
143         }
144 
145         /* Guard against overflows and also ensure that (t<<tscale) > cum. */
146         while (cum > (1UL<<tscale)) {
147             cum >>= 1;
148             ++cshift;
149         }
150         /* Avoid divide-by-zero! */
151         if (cum == 0) cum = 1;
152         /* Calculate the normalizer in high precision. */
153         norm = (t << tscale) / cum;
154         /* Do a long multiply and shift down to Q15. */
155         out_diff[t] = (int32)(((long long)dd * norm)
156                               >> (tscale - 15 + cshift - dshift));
157         /* printf("dd %d cshift %d dshift %d scaledt %d cum %d norm %d cmn %d\n",
158            dd, cshift, dshift, (t<<tscale), cum, norm, out_diff[t]); */
159     }
160 }
161 #endif
162 
163 yin_t *
yin_init(int frame_size,float search_threshold,float search_range,int smooth_window)164 yin_init(int frame_size, float search_threshold,
165          float search_range, int smooth_window)
166 {
167     yin_t *pe;
168 
169     pe = ckd_calloc(1, sizeof(*pe));
170     pe->frame_size = frame_size;
171 #ifndef FIXED_POINT
172     pe->search_threshold = search_threshold;
173     pe->search_range = search_range;
174 #else
175     pe->search_threshold = (uint16)(search_threshold * 32768);
176     pe->search_range = (uint16)(search_range * 32768);
177 #endif
178     pe->wsize = smooth_window * 2 + 1;
179     pe->diff_window = ckd_calloc_2d(pe->wsize,
180                                     pe->frame_size / 2,
181                                     sizeof(**pe->diff_window));
182     pe->period_window = ckd_calloc(pe->wsize,
183                                    sizeof(*pe->period_window));
184     pe->frame = ckd_calloc(pe->frame_size, sizeof(*pe->frame));
185     return pe;
186 }
187 
188 void
yin_free(yin_t * pe)189 yin_free(yin_t *pe)
190 {
191     ckd_free_2d(pe->diff_window);
192     ckd_free(pe->period_window);
193     ckd_free(pe);
194 }
195 
196 void
yin_start(yin_t * pe)197 yin_start(yin_t *pe)
198 {
199     /* Reset the circular window pointers. */
200     pe->wstart = pe->endut = 0;
201     pe->nfr = 0;
202 }
203 
204 void
yin_end(yin_t * pe)205 yin_end(yin_t *pe)
206 {
207     pe->endut = 1;
208 }
209 
210 int
211 #ifndef FIXED_POINT
thresholded_search(float * diff_window,float threshold,int start,int end)212 thresholded_search(float *diff_window, float threshold, int start, int end)
213 #else
214 thresholded_search(int32 *diff_window, fixed32 threshold, int start, int end)
215 #endif
216 {
217     int i, argmin;
218 #ifndef FIXED_POINT
219     float min;
220 #else
221     int min;
222 #endif
223 
224     min = diff_window[start];
225     argmin = start;
226     for (i = start + 1; i < end; ++i) {
227 #ifndef FIXED_POINT
228         float diff = diff_window[i];
229 #else
230         int diff = diff_window[i];
231 #endif
232 
233         if (diff < threshold) {
234             min = diff;
235             argmin = i;
236             break;
237         }
238         if (diff < min) {
239             min = diff;
240             argmin = i;
241         }
242     }
243     return argmin;
244 }
245 
246 void
yin_store(yin_t * pe,int16 const * frame)247 yin_store(yin_t *pe, int16 const *frame)
248 {
249     memcpy(pe->frame, frame, pe->frame_size * sizeof(*pe->frame));
250 }
251 
252 void
yin_write(yin_t * pe,int16 const * frame)253 yin_write(yin_t *pe, int16 const *frame)
254 {
255     int outptr, difflen;
256 
257     /* Rotate the window one frame forward. */
258     ++pe->wstart;
259     /* Fill in the frame before wstart. */
260     outptr = pe->wstart - 1;
261     /* Wrap around the window pointer. */
262     if (pe->wstart == pe->wsize)
263         pe->wstart = 0;
264 
265     /* Now calculate normalized difference function. */
266     difflen = pe->frame_size / 2;
267     cmn_diff(frame, pe->diff_window[outptr], difflen);
268 
269     /* Find the first point under threshold.  If not found, then
270      * use the absolute minimum. */
271     pe->period_window[outptr]
272         = thresholded_search(pe->diff_window[outptr],
273                              pe->search_threshold, 0, difflen);
274 
275     /* Increment total number of frames. */
276     ++pe->nfr;
277 }
278 
279 void
yin_write_stored(yin_t * pe)280 yin_write_stored(yin_t *pe)
281 {
282     yin_write(pe, pe->frame);
283 }
284 
285 int
yin_read(yin_t * pe,uint16 * out_period,float * out_bestdiff)286 yin_read(yin_t *pe, uint16 *out_period, float *out_bestdiff)
287 {
288     int wstart, wlen, half_wsize, i;
289     int best, search_width, low_period, high_period;
290 #ifndef FIXED_POINT
291     float best_diff;
292 #else
293     int best_diff;
294 #endif
295 
296     half_wsize = (pe->wsize-1)/2;
297     /* Without any smoothing, just return the current value (don't
298      * need to do anything to the current poitner either). */
299     if (half_wsize == 0) {
300         if (pe->endut)
301             return 0;
302         *out_period = pe->period_window[0];
303 #ifndef FIXED_POINT
304         *out_bestdiff = pe->diff_window[0][pe->period_window[0]];
305 #else
306         *out_bestdiff = pe->diff_window[0][pe->period_window[0]] / 32768.0f;
307 #endif
308         return 1;
309     }
310 
311     /* We can't do anything unless we have at least (wsize-1)/2 + 1
312      * frames, unless we're at the end of the utterance. */
313     if (pe->endut == 0 && pe->nfr < half_wsize + 1) {
314         /* Don't increment the current pointer either. */
315         return 0;
316     }
317 
318     /* Establish the smoothing window. */
319     /* End of utterance. */
320     if (pe->endut) {
321         /* We are done (no more data) when pe->wcur = pe->wstart. */
322         if (pe->wcur == pe->wstart)
323             return 0;
324         /* I.e. pe->wcur (circular minus) half_wsize. */
325         wstart = (pe->wcur + pe->wsize - half_wsize) % pe->wsize;
326         /* Number of frames from wstart up to pe->wstart. */
327         wlen = pe->wstart - wstart;
328         if (wlen < 0) wlen += pe->wsize;
329         /*printf("ENDUT! ");*/
330     }
331     /* Beginning of utterance. */
332     else if (pe->nfr < pe->wsize) {
333         wstart = 0;
334         wlen = pe->nfr;
335     }
336     /* Normal case, it is what it is. */
337     else {
338         wstart = pe->wstart;
339         wlen = pe->wsize;
340     }
341 
342     /* Now (finally) look for the best local estimate. */
343     /* printf("Searching for local estimate in %d frames around %d\n",
344        wlen, pe->nfr + 1 - wlen); */
345     best = pe->period_window[pe->wcur];
346     best_diff = pe->diff_window[pe->wcur][best];
347     for (i = 0; i < wlen; ++i) {
348         int j = wstart + i;
349 #ifndef FIXED_POINT
350         float diff;
351 #else
352         int diff;
353 #endif
354 
355         j %= pe->wsize;
356         diff = pe->diff_window[j][pe->period_window[j]];
357         /* printf("%.2f,%.2f ", 1.0 - (double)diff/32768,
358            pe->period_window[j] ? 8000.0/pe->period_window[j] : 8000.0); */
359         if (diff < best_diff) {
360             best_diff = diff;
361             best = pe->period_window[j];
362         }
363     }
364     /* printf("best: %.2f, %.2f\n", 1.0 - (double)best_diff/32768,
365        best ? 8000.0/best : 8000.0); */
366     /* If it's the same as the current one then return it. */
367     if (best == pe->period_window[pe->wcur]) {
368         /* Increment the current pointer. */
369         if (++pe->wcur == pe->wsize)
370             pe->wcur = 0;
371         *out_period = best;
372 #ifndef FIXED_POINT
373         *out_bestdiff = best_diff;
374 #else
375         *out_bestdiff = best_diff / 32768.0f;
376 #endif
377         return 1;
378     }
379     /* Otherwise, redo the search inside a narrower range. */
380 #ifndef FIXED_POINT
381     search_width = (int)(best * pe->search_range);
382 #else
383     search_width = best * pe->search_range / 32768;
384 #endif
385     /* printf("Search width = %d * %.2f = %d\n",
386        best, (double)pe->search_range/32768, search_width); */
387     if (search_width == 0) search_width = 1;
388     low_period = best - search_width;
389     high_period = best + search_width;
390     if (low_period < 0) low_period = 0;
391     if (high_period > pe->frame_size / 2) high_period = pe->frame_size / 2;
392     /* printf("Searching from %d to %d\n", low_period, high_period); */
393     best = thresholded_search(pe->diff_window[pe->wcur],
394                               pe->search_threshold,
395                               low_period, high_period);
396     best_diff = pe->diff_window[pe->wcur][best];
397 
398     if (out_period)
399         *out_period = (best > 65535) ? 65535 : best;
400     if (out_bestdiff) {
401 #ifndef FIXED_POINT
402         *out_bestdiff = (best_diff > 1.0f) ? 1.0f : best_diff;
403 #else
404         *out_bestdiff = (best_diff > 32768) ? 1.0f : best_diff / 32768.0f;
405 #endif
406     }
407 
408     /* Increment the current pointer. */
409     if (++pe->wcur == pe->wsize)
410         pe->wcur = 0;
411     return 1;
412 }
413