1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /*
3 * Copyright (c) 2008 Beyond Access, Inc. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY BEYOND ACCESS, INC. ``AS IS'' AND ANY
18 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL BEYOND ACCESS, INC. NOR
21 * ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30 /**
31 * @file yin.c Implementation of pitch extraction.
32 * @author David Huggins-Daines <dhuggins@cs.cmu.edu>
33 */
34
35 /* This implements part of the YIN algorithm:
36 *
37 * "YIN, a fundamental frequency estimator for speech and music".
38 * Alain de Cheveigné and Hideki Kawahara. Journal of the Acoustical
39 * Society of America, 111 (4), April 2002.
40 */
41
42 #include "sphinxbase/prim_type.h"
43 #include "sphinxbase/ckd_alloc.h"
44 #include "sphinxbase/fixpoint.h"
45
46 #include "sphinxbase/yin.h"
47
48 #include <stdio.h>
49 #include <string.h>
50
51 struct yin_s {
52 uint16 frame_size; /** Size of analysis frame. */
53 #ifndef FIXED_POINT
54 float search_threshold; /**< Threshold for finding period */
55 float search_range; /**< Range around best local estimate to search */
56 #else
57 uint16 search_threshold; /**< Threshold for finding period, in Q15 */
58 uint16 search_range; /**< Range around best local estimate to search, in Q15 */
59 #endif
60 uint16 nfr; /**< Number of frames read so far. */
61
62 unsigned char wsize; /**< Size of smoothing window. */
63 unsigned char wstart; /**< First frame in window. */
64 unsigned char wcur; /**< Current frame of analysis. */
65 unsigned char endut; /**< Hoch Hech! Are we at the utterance end? */
66
67 #ifndef FIXED_POINT
68 float **diff_window; /**< Window of difference function outputs. */
69 #else
70 fixed32 **diff_window; /**< Window of difference function outputs. */
71 #endif
72 uint16 *period_window; /**< Window of best period estimates. */
73 int16 *frame; /**< Storage for frame */
74 };
75
76 /**
77 * The core of YIN: cumulative mean normalized difference function.
78 */
79 #ifndef FIXED_POINT
80 static void
cmn_diff(int16 const * signal,float * out_diff,int ndiff)81 cmn_diff(int16 const *signal, float *out_diff, int ndiff)
82 {
83 double cum;
84 int t, j;
85
86 cum = 0.0f;
87 out_diff[0] = 1.0f;
88
89 for (t = 1; t < ndiff; ++t) {
90 float dd;
91 dd = 0.0f;
92 for (j = 0; j < ndiff; ++j) {
93 int diff = signal[j] - signal[t + j];
94 dd += (diff * diff);
95 }
96 cum += dd;
97 out_diff[t] = (float)(dd * t / cum);
98 }
99 }
100 #else
101 static void
cmn_diff(int16 const * signal,int32 * out_diff,int ndiff)102 cmn_diff(int16 const *signal, int32 *out_diff, int ndiff)
103 {
104 uint32 cum, cshift;
105 int32 t, tscale;
106
107 out_diff[0] = 32768;
108 cum = 0;
109 cshift = 0;
110
111 /* Determine how many bits we can scale t up by below. */
112 for (tscale = 0; tscale < 32; ++tscale)
113 if (ndiff & (1<<(31-tscale)))
114 break;
115 --tscale; /* Avoid teh overflowz. */
116 /* printf("tscale is %d (ndiff - 1) << tscale is %d\n",
117 tscale, (ndiff-1) << tscale); */
118
119 /* Somewhat elaborate block floating point implementation.
120 * The fp implementation of this is really a lot simpler. */
121 for (t = 1; t < ndiff; ++t) {
122 uint32 dd, dshift, norm;
123 int j;
124
125 dd = 0;
126 dshift = 0;
127 for (j = 0; j < ndiff; ++j) {
128 int diff = signal[j] - signal[t + j];
129 /* Guard against overflows. */
130 if (dd > (1UL<<tscale)) {
131 dd >>= 1;
132 ++dshift;
133 }
134 dd += (diff * diff) >> dshift;
135 }
136 /* Make sure the diffs and cum are shifted to the same
137 * scaling factor (usually dshift will be zero) */
138 if (dshift > cshift) {
139 cum += dd << (dshift-cshift);
140 }
141 else {
142 cum += dd >> (cshift-dshift);
143 }
144
145 /* Guard against overflows and also ensure that (t<<tscale) > cum. */
146 while (cum > (1UL<<tscale)) {
147 cum >>= 1;
148 ++cshift;
149 }
150 /* Avoid divide-by-zero! */
151 if (cum == 0) cum = 1;
152 /* Calculate the normalizer in high precision. */
153 norm = (t << tscale) / cum;
154 /* Do a long multiply and shift down to Q15. */
155 out_diff[t] = (int32)(((long long)dd * norm)
156 >> (tscale - 15 + cshift - dshift));
157 /* printf("dd %d cshift %d dshift %d scaledt %d cum %d norm %d cmn %d\n",
158 dd, cshift, dshift, (t<<tscale), cum, norm, out_diff[t]); */
159 }
160 }
161 #endif
162
163 yin_t *
yin_init(int frame_size,float search_threshold,float search_range,int smooth_window)164 yin_init(int frame_size, float search_threshold,
165 float search_range, int smooth_window)
166 {
167 yin_t *pe;
168
169 pe = ckd_calloc(1, sizeof(*pe));
170 pe->frame_size = frame_size;
171 #ifndef FIXED_POINT
172 pe->search_threshold = search_threshold;
173 pe->search_range = search_range;
174 #else
175 pe->search_threshold = (uint16)(search_threshold * 32768);
176 pe->search_range = (uint16)(search_range * 32768);
177 #endif
178 pe->wsize = smooth_window * 2 + 1;
179 pe->diff_window = ckd_calloc_2d(pe->wsize,
180 pe->frame_size / 2,
181 sizeof(**pe->diff_window));
182 pe->period_window = ckd_calloc(pe->wsize,
183 sizeof(*pe->period_window));
184 pe->frame = ckd_calloc(pe->frame_size, sizeof(*pe->frame));
185 return pe;
186 }
187
188 void
yin_free(yin_t * pe)189 yin_free(yin_t *pe)
190 {
191 ckd_free_2d(pe->diff_window);
192 ckd_free(pe->period_window);
193 ckd_free(pe);
194 }
195
196 void
yin_start(yin_t * pe)197 yin_start(yin_t *pe)
198 {
199 /* Reset the circular window pointers. */
200 pe->wstart = pe->endut = 0;
201 pe->nfr = 0;
202 }
203
204 void
yin_end(yin_t * pe)205 yin_end(yin_t *pe)
206 {
207 pe->endut = 1;
208 }
209
210 int
211 #ifndef FIXED_POINT
thresholded_search(float * diff_window,float threshold,int start,int end)212 thresholded_search(float *diff_window, float threshold, int start, int end)
213 #else
214 thresholded_search(int32 *diff_window, fixed32 threshold, int start, int end)
215 #endif
216 {
217 int i, argmin;
218 #ifndef FIXED_POINT
219 float min;
220 #else
221 int min;
222 #endif
223
224 min = diff_window[start];
225 argmin = start;
226 for (i = start + 1; i < end; ++i) {
227 #ifndef FIXED_POINT
228 float diff = diff_window[i];
229 #else
230 int diff = diff_window[i];
231 #endif
232
233 if (diff < threshold) {
234 min = diff;
235 argmin = i;
236 break;
237 }
238 if (diff < min) {
239 min = diff;
240 argmin = i;
241 }
242 }
243 return argmin;
244 }
245
246 void
yin_store(yin_t * pe,int16 const * frame)247 yin_store(yin_t *pe, int16 const *frame)
248 {
249 memcpy(pe->frame, frame, pe->frame_size * sizeof(*pe->frame));
250 }
251
252 void
yin_write(yin_t * pe,int16 const * frame)253 yin_write(yin_t *pe, int16 const *frame)
254 {
255 int outptr, difflen;
256
257 /* Rotate the window one frame forward. */
258 ++pe->wstart;
259 /* Fill in the frame before wstart. */
260 outptr = pe->wstart - 1;
261 /* Wrap around the window pointer. */
262 if (pe->wstart == pe->wsize)
263 pe->wstart = 0;
264
265 /* Now calculate normalized difference function. */
266 difflen = pe->frame_size / 2;
267 cmn_diff(frame, pe->diff_window[outptr], difflen);
268
269 /* Find the first point under threshold. If not found, then
270 * use the absolute minimum. */
271 pe->period_window[outptr]
272 = thresholded_search(pe->diff_window[outptr],
273 pe->search_threshold, 0, difflen);
274
275 /* Increment total number of frames. */
276 ++pe->nfr;
277 }
278
279 void
yin_write_stored(yin_t * pe)280 yin_write_stored(yin_t *pe)
281 {
282 yin_write(pe, pe->frame);
283 }
284
285 int
yin_read(yin_t * pe,uint16 * out_period,float * out_bestdiff)286 yin_read(yin_t *pe, uint16 *out_period, float *out_bestdiff)
287 {
288 int wstart, wlen, half_wsize, i;
289 int best, search_width, low_period, high_period;
290 #ifndef FIXED_POINT
291 float best_diff;
292 #else
293 int best_diff;
294 #endif
295
296 half_wsize = (pe->wsize-1)/2;
297 /* Without any smoothing, just return the current value (don't
298 * need to do anything to the current poitner either). */
299 if (half_wsize == 0) {
300 if (pe->endut)
301 return 0;
302 *out_period = pe->period_window[0];
303 #ifndef FIXED_POINT
304 *out_bestdiff = pe->diff_window[0][pe->period_window[0]];
305 #else
306 *out_bestdiff = pe->diff_window[0][pe->period_window[0]] / 32768.0f;
307 #endif
308 return 1;
309 }
310
311 /* We can't do anything unless we have at least (wsize-1)/2 + 1
312 * frames, unless we're at the end of the utterance. */
313 if (pe->endut == 0 && pe->nfr < half_wsize + 1) {
314 /* Don't increment the current pointer either. */
315 return 0;
316 }
317
318 /* Establish the smoothing window. */
319 /* End of utterance. */
320 if (pe->endut) {
321 /* We are done (no more data) when pe->wcur = pe->wstart. */
322 if (pe->wcur == pe->wstart)
323 return 0;
324 /* I.e. pe->wcur (circular minus) half_wsize. */
325 wstart = (pe->wcur + pe->wsize - half_wsize) % pe->wsize;
326 /* Number of frames from wstart up to pe->wstart. */
327 wlen = pe->wstart - wstart;
328 if (wlen < 0) wlen += pe->wsize;
329 /*printf("ENDUT! ");*/
330 }
331 /* Beginning of utterance. */
332 else if (pe->nfr < pe->wsize) {
333 wstart = 0;
334 wlen = pe->nfr;
335 }
336 /* Normal case, it is what it is. */
337 else {
338 wstart = pe->wstart;
339 wlen = pe->wsize;
340 }
341
342 /* Now (finally) look for the best local estimate. */
343 /* printf("Searching for local estimate in %d frames around %d\n",
344 wlen, pe->nfr + 1 - wlen); */
345 best = pe->period_window[pe->wcur];
346 best_diff = pe->diff_window[pe->wcur][best];
347 for (i = 0; i < wlen; ++i) {
348 int j = wstart + i;
349 #ifndef FIXED_POINT
350 float diff;
351 #else
352 int diff;
353 #endif
354
355 j %= pe->wsize;
356 diff = pe->diff_window[j][pe->period_window[j]];
357 /* printf("%.2f,%.2f ", 1.0 - (double)diff/32768,
358 pe->period_window[j] ? 8000.0/pe->period_window[j] : 8000.0); */
359 if (diff < best_diff) {
360 best_diff = diff;
361 best = pe->period_window[j];
362 }
363 }
364 /* printf("best: %.2f, %.2f\n", 1.0 - (double)best_diff/32768,
365 best ? 8000.0/best : 8000.0); */
366 /* If it's the same as the current one then return it. */
367 if (best == pe->period_window[pe->wcur]) {
368 /* Increment the current pointer. */
369 if (++pe->wcur == pe->wsize)
370 pe->wcur = 0;
371 *out_period = best;
372 #ifndef FIXED_POINT
373 *out_bestdiff = best_diff;
374 #else
375 *out_bestdiff = best_diff / 32768.0f;
376 #endif
377 return 1;
378 }
379 /* Otherwise, redo the search inside a narrower range. */
380 #ifndef FIXED_POINT
381 search_width = (int)(best * pe->search_range);
382 #else
383 search_width = best * pe->search_range / 32768;
384 #endif
385 /* printf("Search width = %d * %.2f = %d\n",
386 best, (double)pe->search_range/32768, search_width); */
387 if (search_width == 0) search_width = 1;
388 low_period = best - search_width;
389 high_period = best + search_width;
390 if (low_period < 0) low_period = 0;
391 if (high_period > pe->frame_size / 2) high_period = pe->frame_size / 2;
392 /* printf("Searching from %d to %d\n", low_period, high_period); */
393 best = thresholded_search(pe->diff_window[pe->wcur],
394 pe->search_threshold,
395 low_period, high_period);
396 best_diff = pe->diff_window[pe->wcur][best];
397
398 if (out_period)
399 *out_period = (best > 65535) ? 65535 : best;
400 if (out_bestdiff) {
401 #ifndef FIXED_POINT
402 *out_bestdiff = (best_diff > 1.0f) ? 1.0f : best_diff;
403 #else
404 *out_bestdiff = (best_diff > 32768) ? 1.0f : best_diff / 32768.0f;
405 #endif
406 }
407
408 /* Increment the current pointer. */
409 if (++pe->wcur == pe->wsize)
410 pe->wcur = 0;
411 return 1;
412 }
413