1 /*---------------------------------------------------------------------------*\
2 
3   FILE........: phase.c
4   AUTHOR......: David Rowe
5   DATE CREATED: 1/2/09
6 
7   Functions for modelling and synthesising phase.
8 
9 \*---------------------------------------------------------------------------*/
10 
11 /*
12   Copyright (C) 2009 David Rowe
13 
14   All rights reserved.
15 
16   This program is free software; you can redistribute it and/or modify
17   it under the terms of the GNU Lesser General Public License version 2.1, as
18   published by the Free Software Foundation.  This program is
19   distributed in the hope that it will be useful, but WITHOUT ANY
20   WARRANTY; without even the implied warranty of MERCHANTABILITY or
21   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
22   License for more details.
23 
24   You should have received a copy of the GNU Lesser General Public License
25   along with this program; if not,see <http://www.gnu.org/licenses/>.
26 */
27 
28 #include "defines.h"
29 #include "phase.h"
30 #include "kiss_fft.h"
31 #include "comp.h"
32 #include "comp_prim.h"
33 #include "sine.h"
34 
35 #include <assert.h>
36 #include <ctype.h>
37 #include <math.h>
38 #include <string.h>
39 #include <stdlib.h>
40 
41 /*---------------------------------------------------------------------------*\
42 
43   sample_phase()
44 
45   Samples phase at centre of each harmonic from and array of FFT_ENC
46   DFT samples.
47 
48 \*---------------------------------------------------------------------------*/
49 
sample_phase(MODEL * model,COMP H[],COMP A[])50 void sample_phase(MODEL *model,
51                   COMP H[],
52                   COMP A[]        /* LPC analysis filter in freq domain */
53                   )
54 {
55     int   m, b;
56     float r;
57 
58     r = TWO_PI/(FFT_ENC);
59 
60     /* Sample phase at harmonics */
61 
62     for(m=1; m<=model->L; m++) {
63         b = (int)(m*model->Wo/r + 0.5);
64         H[m] = cconj(A[b]);      /* synth filter 1/A is opposite phase to analysis filter */
65     }
66 }
67 
68 
69 /*---------------------------------------------------------------------------*\
70 
71    phase_synth_zero_order()
72 
73    Synthesises phases based on SNR and a rule based approach.  No phase
74    parameters are required apart from the SNR (which can be reduced to a
75    1 bit V/UV decision per frame).
76 
77    The phase of each harmonic is modelled as the phase of a synthesis
78    filter excited by an impulse.  In many Codec 2 modes the synthesis
79    filter is a LPC filter. Unlike the first order model the position
80    of the impulse is not transmitted, so we create an excitation pulse
81    train using a rule based approach.
82 
83    Consider a pulse train with a pulse starting time n=0, with pulses
84    repeated at a rate of Wo, the fundamental frequency.  A pulse train
85    in the time domain is equivalent to harmonics in the frequency
86    domain.  We can make an excitation pulse train using a sum of
87    sinsusoids:
88 
89      for(m=1; m<=L; m++)
90        ex[n] = cos(m*Wo*n)
91 
92    Note: the Octave script ../octave/phase.m is an example of this if
93    you would like to try making a pulse train.
94 
95    The phase of each excitation harmonic is:
96 
97      arg(E[m]) = mWo
98 
99    where E[m] are the complex excitation (freq domain) samples,
100    arg(x), just returns the phase of a complex sample x.
101 
102    As we don't transmit the pulse position for this model, we need to
103    synthesise it.  Now the excitation pulses occur at a rate of Wo.
104    This means the phase of the first harmonic advances by N_SAMP samples
105    over a synthesis frame of N_SAMP samples.  For example if Wo is pi/20
106    (200 Hz), then over a 10ms frame (N_SAMP=80 samples), the phase of the
107    first harmonic would advance (pi/20)*80 = 4*pi or two complete
108    cycles.
109 
110    We generate the excitation phase of the fundamental (first
111    harmonic):
112 
113      arg[E[1]] = Wo*N_SAMP;
114 
115    We then relate the phase of the m-th excitation harmonic to the
116    phase of the fundamental as:
117 
118      arg(E[m]) = m*arg(E[1])
119 
120    This E[m] then gets passed through the LPC synthesis filter to
121    determine the final harmonic phase.
122 
123    Comparing to speech synthesised using original phases:
124 
125    - Through headphones speech synthesised with this model is not as
126      good. Through a loudspeaker it is very close to original phases.
127 
128    - If there are voicing errors, the speech can sound clicky or
129      staticy.  If V speech is mistakenly declared UV, this model tends to
130      synthesise impulses or clicks, as there is usually very little shift or
131      dispersion through the LPC synthesis filter.
132 
133    - When combined with LPC amplitude modelling there is an additional
134      drop in quality.  I am not sure why, theory is interformant energy
135      is raised making any phase errors more obvious.
136 
137    NOTES:
138 
139      1/ This synthesis model is effectively the same as a simple LPC-10
140      vocoders, and yet sounds much better.  Why? Conventional wisdom
141      (AMBE, MELP) says mixed voicing is required for high quality
142      speech.
143 
144      2/ I am pretty sure the Lincoln Lab sinusoidal coding guys (like xMBE
145      also from MIT) first described this zero phase model, I need to look
146      up the paper.
147 
148      3/ Note that this approach could cause some discontinuities in
149      the phase at the edge of synthesis frames, as no attempt is made
150      to make sure that the phase tracks are continuous (the excitation
151      phases are continuous, but not the final phases after filtering
152      by the LPC spectra).  Technically this is a bad thing.  However
153      this may actually be a good thing, disturbing the phase tracks a
154      bit.  More research needed, e.g. test a synthesis model that adds
155      a small delta-W to make phase tracks line up for voiced
156      harmonics.
157 
158 \*---------------------------------------------------------------------------*/
159 
phase_synth_zero_order(int n_samp,MODEL * model,float * ex_phase,COMP H[])160 void phase_synth_zero_order(
161     int    n_samp,
162     MODEL *model,
163     float *ex_phase,            /* excitation phase of fundamental        */
164     COMP   H[]                  /* L synthesis filter freq domain samples */
165 
166 )
167 {
168     int   m;
169     float new_phi;
170     COMP  Ex[MAX_AMP+1];	  /* excitation samples */
171     COMP  A_[MAX_AMP+1];	  /* synthesised harmonic samples */
172 
173     /*
174        Update excitation fundamental phase track, this sets the position
175        of each pitch pulse during voiced speech.  After much experiment
176        I found that using just this frame's Wo improved quality for UV
177        sounds compared to interpolating two frames Wo like this:
178 
179        ex_phase[0] += (*prev_Wo+model->Wo)*N_SAMP/2;
180     */
181 
182     ex_phase[0] += (model->Wo)*n_samp;
183     ex_phase[0] -= TWO_PI*floorf(ex_phase[0]/TWO_PI + 0.5);
184 
185     for(m=1; m<=model->L; m++) {
186 
187         /* generate excitation */
188 
189         if (model->voiced) {
190 
191             Ex[m].real = cosf(ex_phase[0]*m);
192             Ex[m].imag = sinf(ex_phase[0]*m);
193         }
194         else {
195 
196             /* When a few samples were tested I found that LPC filter
197                phase is not needed in the unvoiced case, but no harm in
198                keeping it.
199             */
200             float phi = TWO_PI*(float)codec2_rand()/CODEC2_RAND_MAX;
201             Ex[m].real = cosf(phi);
202             Ex[m].imag = sinf(phi);
203         }
204 
205         /* filter using LPC filter */
206 
207         A_[m].real = H[m].real*Ex[m].real - H[m].imag*Ex[m].imag;
208         A_[m].imag = H[m].imag*Ex[m].real + H[m].real*Ex[m].imag;
209 
210         /* modify sinusoidal phase */
211 
212         new_phi = atan2f(A_[m].imag, A_[m].real+1E-12);
213         model->phi[m] = new_phi;
214     }
215 
216 }
217 
218 
219 /*---------------------------------------------------------------------------*\
220 
221   FUNCTION....: mag_to_phase
222   AUTHOR......: David Rowe
223   DATE CREATED: Jan 2017
224 
225   Algorithm for http://www.dsprelated.com/showcode/20.php ported to C.  See
226   also Octave function mag_to_phase.m
227 
228   Given a magnitude spectrum in dB, returns a minimum-phase phase
229   spectra.
230 
231 \*---------------------------------------------------------------------------*/
232 
mag_to_phase(float phase[],float Gdbfk[],int Nfft,codec2_fft_cfg fft_fwd_cfg,codec2_fft_cfg fft_inv_cfg)233 void mag_to_phase(float phase[],             /* Nfft/2+1 output phase samples in radians       */
234                   float Gdbfk[],             /* Nfft/2+1 postive freq amplitudes samples in dB */
235                   int Nfft,
236                   codec2_fft_cfg fft_fwd_cfg,
237                   codec2_fft_cfg fft_inv_cfg
238                   )
239 {
240     COMP Sdb[Nfft], c[Nfft], cf[Nfft], Cf[Nfft];
241     int  Ns = Nfft/2+1;
242     int  i;
243 
244     /* install negative frequency components, 1/Nfft takes into
245        account kiss fft lack of scaling on ifft */
246 
247     Sdb[0].real = Gdbfk[0];
248     Sdb[0].imag = 0.0;
249     for(i=1; i<Ns; i++) {
250         Sdb[i].real = Sdb[Nfft-i].real = Gdbfk[i];
251         Sdb[i].imag = Sdb[Nfft-i].imag = 0.0;
252     }
253 
254     /* compute real cepstrum from log magnitude spectrum */
255 
256     codec2_fft(fft_inv_cfg, Sdb, c);
257     for(i=0; i<Nfft; i++) {
258         c[i].real /= (float)Nfft;
259         c[i].imag /= (float)Nfft;
260     }
261 
262     /* Fold cepstrum to reflect non-min-phase zeros inside unit circle */
263 
264     cf[0] = c[0];
265     for(i=1; i<Ns-1; i++) {
266         cf[i] = cadd(c[i],c[Nfft-i]);
267     }
268     cf[Ns-1] = c[Ns-1];
269     for(i=Ns; i<Nfft; i++) {
270         cf[i].real = 0.0;
271         cf[i].imag = 0.0;
272     }
273 
274     /* Cf = dB_magnitude + j * minimum_phase */
275 
276     codec2_fft(fft_fwd_cfg, cf, Cf);
277 
278     /*  The maths says we are meant to be using log(x), not 20*log10(x),
279         so we need to scale the phase to account for this:
280         log(x) = 20*log10(x)/scale */
281 
282     float scale = (20.0/logf(10.0));
283 
284     for(i=0; i<Ns; i++) {
285         phase[i] = Cf[i].imag/scale;
286     }
287 
288 
289 }
290