1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /*
3  * Copyright (c) 2008 Beyond Access, Inc.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in
14  *    the documentation and/or other materials provided with the
15  *    distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY BEYOND ACCESS, INC. ``AS IS'' AND ANY
18  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL BEYOND ACCESS, INC.  NOR
21  * ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 /**
31  * @file yin.c Implementation of pitch extraction.
32  * @author David Huggins-Daines <dhuggins@cs.cmu.edu>
33  */
34 
35 /* This implements part of the YIN algorithm:
36  *
37  * "YIN, a fundamental frequency estimator for speech and music".
38  * Alain de Cheveigné and Hideki Kawahara.  Journal of the Acoustical
39  * Society of America, 111 (4), April 2002.
40  */
41 
42 #include "sphinxbase/prim_type.h"
43 #include "sphinxbase/ckd_alloc.h"
44 #include "sphinxbase/fixpoint.h"
45 
46 #include "sphinxbase/yin.h"
47 
48 #include <stdio.h>
49 
50 struct yin_s {
51     uint16 frame_size;       /** Size of analysis frame. */
52     uint16 search_threshold; /**< Threshold for finding period, in Q15 */
53     uint16 search_range;     /**< Range around best local estimate to search, in Q15 */
54     uint16 nfr;              /**< Number of frames read so far. */
55 
56     unsigned char wsize;    /**< Size of smoothing window. */
57     unsigned char wstart;   /**< First frame in window. */
58     unsigned char wcur;     /**< Current frame of analysis. */
59     unsigned char endut;    /**< Hoch Hech! Are we at the utterance end? */
60 
61     fixed32 **diff_window;  /**< Window of difference function outputs. */
62     uint16 *period_window;  /**< Window of best period estimates. */
63 };
64 
65 /**
66  * The core of YIN: cumulative mean normalized difference function.
67  */
68 static void
cmn_diff(int16 const * signal,int32 * out_diff,int ndiff)69 cmn_diff(int16 const *signal, int32 *out_diff, int ndiff)
70 {
71     uint32 cum, cshift;
72     int32 t, tscale;
73 
74     out_diff[0] = 32768;
75     cum = 0;
76     cshift = 0;
77 
78     /* Determine how many bits we can scale t up by below. */
79     for (tscale = 0; tscale < 32; ++tscale)
80         if (ndiff & (1<<(31-tscale)))
81             break;
82     --tscale; /* Avoid teh overflowz. */
83     /* printf("tscale is %d (ndiff - 1) << tscale is %d\n",
84        tscale, (ndiff-1) << tscale); */
85 
86     /* Somewhat elaborate block floating point implementation.
87      * The fp implementation of this is really a lot simpler. */
88     for (t = 1; t < ndiff; ++t) {
89         uint32 dd, dshift, norm;
90         int j;
91 
92         dd = 0;
93         dshift = 0;
94         for (j = 0; j < ndiff; ++j) {
95             int diff = signal[j] - signal[t + j];
96             /* Guard against overflows. */
97             if (dd > (1UL<<tscale)) {
98                 dd >>= 1;
99                 ++dshift;
100             }
101             dd += (diff * diff) >> dshift;
102         }
103         /* Make sure the diffs and cum are shifted to the same
104          * scaling factor (usually dshift will be zero) */
105         if (dshift > cshift) {
106             cum += dd << (dshift-cshift);
107         }
108         else {
109             cum += dd >> (cshift-dshift);
110         }
111 
112         /* Guard against overflows and also ensure that (t<<tscale) > cum. */
113         while (cum > (1UL<<tscale)) {
114             cum >>= 1;
115             ++cshift;
116         }
117         /* Avoid divide-by-zero! */
118         if (cum == 0) cum = 1;
119         /* Calculate the normalizer in high precision. */
120         norm = (t << tscale) / cum;
121         /* Do a long multiply and shift down to Q15. */
122         out_diff[t] = (int32)(((long long)dd * norm)
123                               >> (tscale - 15 + cshift - dshift));
124         /* printf("dd %d cshift %d dshift %d scaledt %d cum %d norm %d cmn %d\n",
125            dd, cshift, dshift, (t<<tscale), cum, norm, out_diff[t]); */
126     }
127 }
128 
129 yin_t *
yin_init(int frame_size,float search_threshold,float search_range,int smooth_window)130 yin_init(int frame_size, float search_threshold,
131          float search_range, int smooth_window)
132 {
133     yin_t *pe;
134 
135     pe = ckd_calloc(1, sizeof(*pe));
136     pe->frame_size = frame_size;
137     pe->search_threshold = (uint16)(search_threshold * 32768);
138     pe->search_range = (uint16)(search_range * 32768);
139     pe->wsize = smooth_window * 2 + 1;
140     pe->diff_window = ckd_calloc_2d(pe->wsize,
141                                     pe->frame_size / 2,
142                                     sizeof(**pe->diff_window));
143     pe->period_window = ckd_calloc(pe->wsize,
144                                    sizeof(*pe->period_window));
145     return pe;
146 }
147 
148 void
yin_free(yin_t * pe)149 yin_free(yin_t *pe)
150 {
151     ckd_free_2d(pe->diff_window);
152     ckd_free(pe->period_window);
153     ckd_free(pe);
154 }
155 
156 void
yin_start(yin_t * pe)157 yin_start(yin_t *pe)
158 {
159     /* Reset the circular window pointers. */
160     pe->wstart = pe->endut = 0;
161     pe->nfr = 0;
162 }
163 
164 void
yin_end(yin_t * pe)165 yin_end(yin_t *pe)
166 {
167     pe->endut = 1;
168 }
169 
170 int
thresholded_search(int32 * diff_window,fixed32 threshold,int start,int end)171 thresholded_search(int32 *diff_window, fixed32 threshold, int start, int end)
172 {
173     int i, min, argmin;
174 
175     min = INT_MAX;
176     argmin = 0;
177     for (i = start; i < end; ++i) {
178         int diff = diff_window[i];
179 
180         if (diff < threshold) {
181             min = diff;
182             argmin = i;
183             break;
184         }
185         if (diff < min) {
186             min = diff;
187             argmin = i;
188         }
189     }
190     return argmin;
191 }
192 
193 void
yin_write(yin_t * pe,int16 const * frame)194 yin_write(yin_t *pe, int16 const *frame)
195 {
196     int outptr, difflen;
197 
198     /* Rotate the window one frame forward. */
199     ++pe->wstart;
200     /* Fill in the frame before wstart. */
201     outptr = pe->wstart - 1;
202     /* Wrap around the window pointer. */
203     if (pe->wstart == pe->wsize)
204         pe->wstart = 0;
205 
206     /* Now calculate normalized difference function. */
207     difflen = pe->frame_size / 2;
208     cmn_diff(frame, pe->diff_window[outptr], difflen);
209 
210     /* Find the first point under threshold.  If not found, then
211      * use the absolute minimum. */
212     pe->period_window[outptr]
213         = thresholded_search(pe->diff_window[outptr],
214                              pe->search_threshold, 0, difflen);
215 
216     /* Increment total number of frames. */
217     ++pe->nfr;
218 }
219 
220 int
yin_read(yin_t * pe,uint16 * out_period,uint16 * out_bestdiff)221 yin_read(yin_t *pe, uint16 *out_period, uint16 *out_bestdiff)
222 {
223     int wstart, wlen, half_wsize, i;
224     int best, best_diff, search_width, low_period, high_period;
225 
226     half_wsize = (pe->wsize-1)/2;
227     /* Without any smoothing, just return the current value (don't
228      * need to do anything to the current poitner either). */
229     if (half_wsize == 0) {
230         if (pe->endut)
231             return 0;
232         *out_period = pe->period_window[0];
233         *out_bestdiff = pe->diff_window[0][pe->period_window[0]];
234         return 1;
235     }
236 
237     /* We can't do anything unless we have at least (wsize-1)/2 + 1
238      * frames, unless we're at the end of the utterance. */
239     if (pe->endut == 0 && pe->nfr < half_wsize + 1) {
240         /* Don't increment the current pointer either. */
241         return 0;
242     }
243 
244     /* Establish the smoothing window. */
245     /* End of utterance. */
246     if (pe->endut) {
247         /* We are done (no more data) when pe->wcur = pe->wstart. */
248         if (pe->wcur == pe->wstart)
249             return 0;
250         /* I.e. pe->wcur (circular minus) half_wsize. */
251         wstart = (pe->wcur + pe->wsize - half_wsize) % pe->wsize;
252         /* Number of frames from wstart up to pe->wstart. */
253         wlen = pe->wstart - wstart;
254         if (wlen < 0) wlen += pe->wsize;
255         /*printf("ENDUT! ");*/
256     }
257     /* Beginning of utterance. */
258     else if (pe->nfr < pe->wsize) {
259         wstart = 0;
260         wlen = pe->nfr;
261     }
262     /* Normal case, it is what it is. */
263     else {
264         wstart = pe->wstart;
265         wlen = pe->wsize;
266     }
267 
268     /* Now (finally) look for the best local estimate. */
269     /* printf("Searching for local estimate in %d frames around %d\n",
270        wlen, pe->nfr + 1 - wlen); */
271     best = pe->period_window[pe->wcur];
272     best_diff = pe->diff_window[pe->wcur][best];
273     for (i = 0; i < wlen; ++i) {
274         int j = wstart + i;
275         int diff;
276 
277         j %= pe->wsize;
278         diff = pe->diff_window[j][pe->period_window[j]];
279         /* printf("%.2f,%.2f ", 1.0 - (double)diff/32768,
280            pe->period_window[j] ? 8000.0/pe->period_window[j] : 8000.0); */
281         if (diff < best_diff) {
282             best_diff = diff;
283             best = pe->period_window[j];
284         }
285     }
286     /* printf("best: %.2f, %.2f\n", 1.0 - (double)best_diff/32768,
287        best ? 8000.0/best : 8000.0); */
288     /* If it's the same as the current one then return it. */
289     if (best == pe->period_window[pe->wcur]) {
290         /* Increment the current pointer. */
291         if (++pe->wcur == pe->wsize)
292             pe->wcur = 0;
293         *out_period = best;
294         *out_bestdiff = best_diff;
295         return 1;
296     }
297     /* Otherwise, redo the search inside a narrower range. */
298     search_width = best * pe->search_range / 32768;
299     /* printf("Search width = %d * %.2f = %d\n",
300        best, (double)pe->search_range/32768, search_width); */
301     if (search_width == 0) search_width = 1;
302     low_period = best - search_width;
303     high_period = best + search_width;
304     if (low_period < 0) low_period = 0;
305     if (high_period > pe->frame_size / 2) high_period = pe->frame_size / 2;
306     /* printf("Searching from %d to %d\n", low_period, high_period); */
307     best = thresholded_search(pe->diff_window[pe->wcur],
308                               pe->search_threshold,
309                               low_period, high_period);
310     best_diff = pe->diff_window[pe->wcur][best];
311 
312     if (out_period)
313         *out_period = (best > 65535) ? 65535 : best;
314     if (out_bestdiff)
315         *out_bestdiff = (best_diff > 65535) ? 65535 : best_diff;
316 
317     /* Increment the current pointer. */
318     if (++pe->wcur == pe->wsize)
319         pe->wcur = 0;
320     return 1;
321 }
322