1 /* Copyright (C) 2002 Jean-Marc Valin
2    File: vbr.c
3 
4    VBR-related routines
5 
6    Redistribution and use in source and binary forms, with or without
7    modification, are permitted provided that the following conditions
8    are met:
9 
10    - Redistributions of source code must retain the above copyright
11    notice, this list of conditions and the following disclaimer.
12 
13    - Redistributions in binary form must reproduce the above copyright
14    notice, this list of conditions and the following disclaimer in the
15    documentation and/or other materials provided with the distribution.
16 
17    - Neither the name of the Xiph.org Foundation nor the names of its
18    contributors may be used to endorse or promote products derived from
19    this software without specific prior written permission.
20 
21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
25    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 
33 */
34 
35 
36 #include "config.h"
37 
38 
39 #include "vbr.h"
40 #include <math.h>
41 
42 
43 #define sqr(x) ((x)*(x))
44 
45 #define MIN_ENERGY 6000
46 #define NOISE_POW .3
47 
48 #ifndef DISABLE_VBR
49 
50 const float vbr_nb_thresh[9][11]={
51    {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /*   CNG   */
52    { 4.0f,  2.5f,  2.0f,  1.2f,  0.5f,  0.0f, -0.5f, -0.7f, -0.8f, -0.9f, -1.0f}, /*  2 kbps */
53    {10.0f,  6.5f,  5.2f,  4.5f,  3.9f,  3.5f,  3.0f,  2.5f,  2.3f,  1.8f,  1.0f}, /*  6 kbps */
54    {11.0f,  8.8f,  7.5f,  6.5f,  5.0f,  3.9f,  3.9f,  3.9f,  3.5f,  3.0f,  1.0f}, /*  8 kbps */
55    {11.0f, 11.0f,  9.9f,  8.5f,  7.0f,  6.0f,  4.5f,  4.0f,  4.0f,  4.0f,  2.0f}, /* 11 kbps */
56    {11.0f, 11.0f, 11.0f, 11.0f,  9.5f,  8.5f,  8.0f,  7.0f,  6.0f,  5.0f,  3.0f}, /* 15 kbps */
57    {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f,  9.5f,  8.5f,  7.0f,  6.0f,  5.0f}, /* 18 kbps */
58    {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f,  9.8f,  9.5f,  7.5f}, /* 24 kbps */
59    { 7.0f,  4.5f,  3.7f,  3.0f,  2.5f,  2.0f,  1.8f,  1.5f,  1.0f,  0.0f,  0.0f}  /*  4 kbps */
60 };
61 
62 
63 const float vbr_hb_thresh[5][11]={
64    {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* silence */
65    {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /*  2 kbps */
66    {11.0f, 11.0f,  9.5f,  8.5f,  7.5f,  6.0f,  5.0f,  3.9f,  3.0f,  2.0f,  1.0f}, /*  6 kbps */
67    {11.0f, 11.0f, 11.0f, 11.0f, 11.0f,  9.5f,  8.7f,  7.8f,  7.0f,  6.5f,  4.0f}, /* 10 kbps */
68    {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f,  9.8f,  7.5f,  5.5f}  /* 18 kbps */
69 };
70 
71 const float vbr_uhb_thresh[2][11]={
72    {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* silence */
73    { 3.9f,  2.5f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f, -1.0f}  /*  2 kbps */
74 };
75 
vbr_init(VBRState * vbr)76 void vbr_init(VBRState *vbr)
77 {
78    int i;
79 
80    vbr->average_energy=0;
81    vbr->last_energy=1;
82    vbr->accum_sum=0;
83    vbr->energy_alpha=.1;
84    vbr->soft_pitch=0;
85    vbr->last_pitch_coef=0;
86    vbr->last_quality=0;
87 
88    vbr->noise_accum = .05*pow(MIN_ENERGY, NOISE_POW);
89    vbr->noise_accum_count=.05;
90    vbr->noise_level=vbr->noise_accum/vbr->noise_accum_count;
91    vbr->consec_noise=0;
92 
93 
94    for (i=0;i<VBR_MEMORY_SIZE;i++)
95       vbr->last_log_energy[i] = log(MIN_ENERGY);
96 }
97 
98 
99 /*
100   This function should analyse the signal and decide how critical the
101   coding error will be perceptually. The following factors should be
102   taken into account:
103 
104   -Attacks (positive energy derivative) should be coded with more bits
105 
106   -Stationary voiced segments should receive more bits
107 
108   -Segments with (very) low absolute energy should receive less bits (maybe
109   only shaped noise?)
110 
111   -DTX for near-zero energy?
112 
113   -Stationary fricative segments should have less bits
114 
115   -Temporal masking: when energy slope is decreasing, decrease the bit-rate
116 
117   -Decrease bit-rate for males (low pitch)?
118 
119   -(wideband only) less bits in the high-band when signal is very
120   non-stationary (harder to notice high-frequency noise)???
121 
122 */
123 
vbr_analysis(VBRState * vbr,spx_word16_t * sig,int len,int pitch,float pitch_coef)124 float vbr_analysis(VBRState *vbr, spx_word16_t *sig, int len, int pitch, float pitch_coef)
125 {
126    int i;
127    float ener=0, ener1=0, ener2=0;
128    float qual=7;
129    int va;
130    float log_energy;
131    float non_st=0;
132    float voicing;
133    float pow_ener;
134 
135    for (i=0;i<len>>1;i++)
136       ener1 += ((float)sig[i])*sig[i];
137 
138    for (i=len>>1;i<len;i++)
139       ener2 += ((float)sig[i])*sig[i];
140    ener=ener1+ener2;
141 
142    log_energy = log(ener+MIN_ENERGY);
143    for (i=0;i<VBR_MEMORY_SIZE;i++)
144       non_st += sqr(log_energy-vbr->last_log_energy[i]);
145    non_st =  non_st/(30*VBR_MEMORY_SIZE);
146    if (non_st>1)
147       non_st=1;
148 
149    voicing = 3*(pitch_coef-.4)*fabs(pitch_coef-.4);
150    vbr->average_energy = (1-vbr->energy_alpha)*vbr->average_energy + vbr->energy_alpha*ener;
151    vbr->noise_level=vbr->noise_accum/vbr->noise_accum_count;
152    pow_ener = pow(ener,NOISE_POW);
153    if (vbr->noise_accum_count<.06 && ener>MIN_ENERGY)
154       vbr->noise_accum = .05*pow_ener;
155 
156    if ((voicing<.3 && non_st < .2 && pow_ener < 1.2*vbr->noise_level)
157        || (voicing<.3 && non_st < .05 && pow_ener < 1.5*vbr->noise_level)
158        || (voicing<.4 && non_st < .05 && pow_ener < 1.2*vbr->noise_level)
159        || (voicing<0 && non_st < .05))
160    {
161       float tmp;
162       va = 0;
163       vbr->consec_noise++;
164       if (pow_ener > 3*vbr->noise_level)
165          tmp = 3*vbr->noise_level;
166       else
167          tmp = pow_ener;
168       if (vbr->consec_noise>=4)
169       {
170          vbr->noise_accum = .95*vbr->noise_accum + .05*tmp;
171          vbr->noise_accum_count = .95*vbr->noise_accum_count + .05;
172       }
173    } else {
174       va = 1;
175       vbr->consec_noise=0;
176    }
177 
178    if (pow_ener < vbr->noise_level && ener>MIN_ENERGY)
179    {
180       vbr->noise_accum = .95*vbr->noise_accum + .05*pow_ener;
181       vbr->noise_accum_count = .95*vbr->noise_accum_count + .05;
182    }
183 
184    /* Checking for very low absolute energy */
185    if (ener < 30000)
186    {
187       qual -= .7;
188       if (ener < 10000)
189          qual-=.7;
190       if (ener < 3000)
191          qual-=.7;
192    } else {
193       float short_diff, long_diff;
194       short_diff = log((ener+1)/(1+vbr->last_energy));
195       long_diff = log((ener+1)/(1+vbr->average_energy));
196       /*fprintf (stderr, "%f %f\n", short_diff, long_diff);*/
197 
198       if (long_diff<-5)
199          long_diff=-5;
200       if (long_diff>2)
201          long_diff=2;
202 
203       if (long_diff>0)
204          qual += .6*long_diff;
205       if (long_diff<0)
206          qual += .5*long_diff;
207       if (short_diff>0)
208       {
209          if (short_diff>5)
210             short_diff=5;
211          qual += .5*short_diff;
212       }
213       /* Checking for energy increases */
214       if (ener2 > 1.6*ener1)
215          qual += .5;
216    }
217    vbr->last_energy = ener;
218    vbr->soft_pitch = .6*vbr->soft_pitch + .4*pitch_coef;
219    qual += 2.2*((pitch_coef-.4) + (vbr->soft_pitch-.4));
220 
221    if (qual < vbr->last_quality)
222       qual = .5*qual + .5*vbr->last_quality;
223    if (qual<4)
224       qual=4;
225    if (qual>10)
226       qual=10;
227 
228    /*
229    if (vbr->consec_noise>=2)
230       qual-=1.3;
231    if (vbr->consec_noise>=5)
232       qual-=1.3;
233    if (vbr->consec_noise>=12)
234       qual-=1.3;
235    */
236    if (vbr->consec_noise>=3)
237       qual=4;
238 
239    if (vbr->consec_noise)
240       qual -= 1.0 * (log(3.0 + vbr->consec_noise)-log(3));
241    if (qual<0)
242       qual=0;
243 
244    if (ener<60000)
245    {
246       if (vbr->consec_noise>2)
247          qual-=0.5*(log(3.0 + vbr->consec_noise)-log(3));
248       if (ener<10000&&vbr->consec_noise>2)
249          qual-=0.5*(log(3.0 + vbr->consec_noise)-log(3));
250       if (qual<0)
251          qual=0;
252       qual += .3*log(.0001+ener/60000.0);
253    }
254    if (qual<-1)
255       qual=-1;
256 
257    /*printf ("%f %f %f %f %d\n", qual, voicing, non_st, pow_ener/(.01+vbr->noise_level), va);*/
258 
259    vbr->last_pitch_coef = pitch_coef;
260    vbr->last_quality = qual;
261 
262    for (i=VBR_MEMORY_SIZE-1;i>0;i--)
263       vbr->last_log_energy[i] = vbr->last_log_energy[i-1];
264    vbr->last_log_energy[0] = log_energy;
265 
266    /*printf ("VBR: %f %f %f %d %f\n", (float)(log_energy-log(vbr->average_energy+MIN_ENERGY)), non_st, voicing, va, vbr->noise_level);*/
267 
268    return qual;
269 }
270 
vbr_destroy(VBRState * vbr)271 void vbr_destroy(VBRState *vbr)
272 {
273 }
274 
275 #endif /* #ifndef DISABLE_VBR */
276