1 /* Copyright (C) 2002 Jean-Marc Valin
2    File: vbr.c
3 
4    VBR-related routines
5 
6    Redistribution and use in source and binary forms, with or without
7    modification, are permitted provided that the following conditions
8    are met:
9 
10    - Redistributions of source code must retain the above copyright
11    notice, this list of conditions and the following disclaimer.
12 
13    - Redistributions in binary form must reproduce the above copyright
14    notice, this list of conditions and the following disclaimer in the
15    documentation and/or other materials provided with the distribution.
16 
17    - Neither the name of the Xiph.org Foundation nor the names of its
18    contributors may be used to endorse or promote products derived from
19    this software without specific prior written permission.
20 
21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
25    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 
33 */
34 
35 #ifdef HAVE_CONFIG_H
36 #include "config.h"
37 #endif
38 
39 #include "vbr.h"
40 #include <math.h>
41 
42 
43 #define sqr(x) ((x)*(x))
44 
45 #define MIN_ENERGY 6000
46 #define NOISE_POW .3
47 
48 
49 const float vbr_nb_thresh[9][11]={
50    {-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0}, /*   CNG   */
51    { 3.5,  2.5,  2.0,  1.2,  0.5,  0.0, -0.5, -0.7, -0.8, -0.9, -1.0}, /*  2 kbps */
52    {10.0,  6.5,  5.2,  4.5,  3.9,  3.5,  3.0,  2.5,  2.3,  1.8,  1.0}, /*  6 kbps */
53    {11.0,  8.8,  7.5,  6.5,  5.0,  3.9,  3.9,  3.9,  3.5,  3.0,  1.0}, /*  8 kbps */
54    {11.0, 11.0,  9.9,  9.0,  8.0,  7.0,  6.5,  6.0,  5.0,  4.0,  2.0}, /* 11 kbps */
55    {11.0, 11.0, 11.0, 11.0,  9.5,  9.0,  8.0,  7.0,  6.5,  5.0,  3.0}, /* 15 kbps */
56    {11.0, 11.0, 11.0, 11.0, 11.0, 11.0,  9.5,  8.5,  8.0,  6.5,  4.0}, /* 18 kbps */
57    {11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0,  9.8,  7.5,  5.5}, /* 24 kbps */
58    { 8.0,  5.0,  3.7,  3.0,  2.5,  2.0,  1.8,  1.5,  1.0,  0.0,  0.0}  /*  4 kbps */
59 };
60 
61 
62 const float vbr_hb_thresh[5][11]={
63    {-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0}, /* silence */
64    {-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0}, /*  2 kbps */
65    {11.0, 11.0,  9.5,  8.5,  7.5,  6.0,  5.0,  3.9,  3.0,  2.0,  1.0}, /*  6 kbps */
66    {11.0, 11.0, 11.0, 11.0, 11.0,  9.5,  8.7,  7.8,  7.0,  6.5,  4.0}, /* 10 kbps */
67    {11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0,  9.8,  7.5,  5.5}  /* 18 kbps */
68 };
69 
70 const float vbr_uhb_thresh[2][11]={
71    {-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0}, /* silence */
72    { 3.9,  2.5,  0.0,  0.0,  0.0,  0.0,  0.0,  0.0,  0.0,  0.0, -1.0}  /*  2 kbps */
73 };
74 
vbr_init(VBRState * vbr)75 void vbr_init(VBRState *vbr)
76 {
77    int i;
78 
79    vbr->average_energy=0;
80    vbr->last_energy=1;
81    vbr->accum_sum=0;
82    vbr->energy_alpha=.1;
83    vbr->soft_pitch=0;
84    vbr->last_pitch_coef=0;
85    vbr->last_quality=0;
86 
87    vbr->noise_accum = .05*pow(MIN_ENERGY, NOISE_POW);
88    vbr->noise_accum_count=.05;
89    vbr->noise_level=vbr->noise_accum/vbr->noise_accum_count;
90    vbr->consec_noise=0;
91 
92 
93    for (i=0;i<VBR_MEMORY_SIZE;i++)
94       vbr->last_log_energy[i] = log(MIN_ENERGY);
95 }
96 
97 
98 /*
99   This function should analyse the signal and decide how critical the
100   coding error will be perceptually. The following factors should be
101   taken into account:
102 
103   -Attacks (positive energy derivative) should be coded with more bits
104 
105   -Stationary voiced segments should receive more bits
106 
107   -Segments with (very) low absolute energy should receive less bits (maybe
108   only shaped noise?)
109 
110   -DTX for near-zero energy?
111 
112   -Stationary fricative segments should have less bits
113 
114   -Temporal masking: when energy slope is decreasing, decrease the bit-rate
115 
116   -Decrease bit-rate for males (low pitch)?
117 
118   -(wideband only) less bits in the high-band when signal is very
119   non-stationary (harder to notice high-frequency noise)???
120 
121 */
122 
vbr_analysis(VBRState * vbr,spx_word16_t * sig,int len,int pitch,float pitch_coef)123 float vbr_analysis(VBRState *vbr, spx_word16_t *sig, int len, int pitch, float pitch_coef)
124 {
125    int i;
126    float ener=0, ener1=0, ener2=0;
127    float qual=7;
128    int va;
129    float log_energy;
130    float non_st=0;
131    float voicing;
132    float pow_ener;
133 
134    for (i=0;i<len>>1;i++)
135       ener1 += ((float)sig[i])*sig[i];
136 
137    for (i=len>>1;i<len;i++)
138       ener2 += ((float)sig[i])*sig[i];
139    ener=ener1+ener2;
140 
141    log_energy = log(ener+MIN_ENERGY);
142    for (i=0;i<VBR_MEMORY_SIZE;i++)
143       non_st += sqr(log_energy-vbr->last_log_energy[i]);
144    non_st =  non_st/(30*VBR_MEMORY_SIZE);
145    if (non_st>1)
146       non_st=1;
147 
148    voicing = 3*(pitch_coef-.4)*fabs(pitch_coef-.4);
149    vbr->average_energy = (1-vbr->energy_alpha)*vbr->average_energy + vbr->energy_alpha*ener;
150    vbr->noise_level=vbr->noise_accum/vbr->noise_accum_count;
151    pow_ener = pow(ener,NOISE_POW);
152    if (vbr->noise_accum_count<.06 && ener>MIN_ENERGY)
153       vbr->noise_accum = .05*pow_ener;
154 
155    if ((voicing<.3 && non_st < .2 && pow_ener < 1.2*vbr->noise_level)
156        || (voicing<.3 && non_st < .05 && pow_ener < 1.5*vbr->noise_level)
157        || (voicing<.4 && non_st < .05 && pow_ener < 1.2*vbr->noise_level)
158        || (voicing<0 && non_st < .05))
159    {
160       float tmp;
161       va = 0;
162       vbr->consec_noise++;
163       if (pow_ener > 3*vbr->noise_level)
164          tmp = 3*vbr->noise_level;
165       else
166          tmp = pow_ener;
167       if (vbr->consec_noise>=4)
168       {
169          vbr->noise_accum = .95*vbr->noise_accum + .05*tmp;
170          vbr->noise_accum_count = .95*vbr->noise_accum_count + .05;
171       }
172    } else {
173       va = 1;
174       vbr->consec_noise=0;
175    }
176 
177    if (pow_ener < vbr->noise_level && ener>MIN_ENERGY)
178    {
179       vbr->noise_accum = .95*vbr->noise_accum + .05*pow_ener;
180       vbr->noise_accum_count = .95*vbr->noise_accum_count + .05;
181    }
182 
183    /* Checking for very low absolute energy */
184    if (ener < 30000)
185    {
186       qual -= .7;
187       if (ener < 10000)
188          qual-=.7;
189       if (ener < 3000)
190          qual-=.7;
191    } else {
192       float short_diff, long_diff;
193       short_diff = log((ener+1)/(1+vbr->last_energy));
194       long_diff = log((ener+1)/(1+vbr->average_energy));
195       /*fprintf (stderr, "%f %f\n", short_diff, long_diff);*/
196 
197       if (long_diff<-5)
198          long_diff=-5;
199       if (long_diff>2)
200          long_diff=2;
201 
202       if (long_diff>0)
203          qual += .6*long_diff;
204       if (long_diff<0)
205          qual += .5*long_diff;
206       if (short_diff>0)
207       {
208          if (short_diff>5)
209             short_diff=5;
210          qual += .5*short_diff;
211       }
212       /* Checking for energy increases */
213       if (ener2 > 1.6*ener1)
214          qual += .5;
215    }
216    vbr->last_energy = ener;
217    vbr->soft_pitch = .6*vbr->soft_pitch + .4*pitch_coef;
218    qual += 2.2*((pitch_coef-.4) + (vbr->soft_pitch-.4));
219 
220    if (qual < vbr->last_quality)
221       qual = .5*qual + .5*vbr->last_quality;
222    if (qual<4)
223       qual=4;
224    if (qual>10)
225       qual=10;
226 
227    /*
228    if (vbr->consec_noise>=2)
229       qual-=1.3;
230    if (vbr->consec_noise>=5)
231       qual-=1.3;
232    if (vbr->consec_noise>=12)
233       qual-=1.3;
234    */
235    if (vbr->consec_noise>=3)
236       qual=4;
237 
238    if (vbr->consec_noise)
239       qual -= 1.0 * (log(3.0 + vbr->consec_noise)-log(3));
240    if (qual<0)
241       qual=0;
242 
243    if (ener<60000)
244    {
245       if (vbr->consec_noise>2)
246          qual-=0.5*(log(3.0 + vbr->consec_noise)-log(3));
247       if (ener<10000&&vbr->consec_noise>2)
248          qual-=0.5*(log(3.0 + vbr->consec_noise)-log(3));
249       if (qual<0)
250          qual=0;
251       qual += .3*log(ener/60000.0);
252    }
253    if (qual<-1)
254       qual=-1;
255 
256    /*printf ("%f %f %f %f %d\n", qual, voicing, non_st, pow_ener/(.01+vbr->noise_level), va);*/
257 
258    vbr->last_pitch_coef = pitch_coef;
259    vbr->last_quality = qual;
260 
261    for (i=VBR_MEMORY_SIZE-1;i>0;i--)
262       vbr->last_log_energy[i] = vbr->last_log_energy[i-1];
263    vbr->last_log_energy[0] = log_energy;
264 
265    /*printf ("VBR: %f %f %f %d %f\n", (float)(log_energy-log(vbr->average_energy+MIN_ENERGY)), non_st, voicing, va, vbr->noise_level);*/
266 
267    return qual;
268 }
269 
vbr_destroy(VBRState * vbr)270 void vbr_destroy(VBRState *vbr)
271 {
272 }
273