1 /* Copyright (C) 2002 Jean-Marc Valin
2 File: vbr.c
3
4 VBR-related routines
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions
8 are met:
9
10 - Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12
13 - Redistributions in binary form must reproduce the above copyright
14 notice, this list of conditions and the following disclaimer in the
15 documentation and/or other materials provided with the distribution.
16
17 - Neither the name of the Xiph.org Foundation nor the names of its
18 contributors may be used to endorse or promote products derived from
19 this software without specific prior written permission.
20
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
25 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
33 */
34
35 #ifdef HAVE_CONFIG_H
36 #include "config.h"
37 #endif
38
39 #include "vbr.h"
40 #include <math.h>
41
42
43 #define sqr(x) ((x)*(x))
44
45 #define MIN_ENERGY 6000
46 #define NOISE_POW .3
47
48
49 const float vbr_nb_thresh[9][11]={
50 {-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0}, /* CNG */
51 { 3.5, 2.5, 2.0, 1.2, 0.5, 0.0, -0.5, -0.7, -0.8, -0.9, -1.0}, /* 2 kbps */
52 {10.0, 6.5, 5.2, 4.5, 3.9, 3.5, 3.0, 2.5, 2.3, 1.8, 1.0}, /* 6 kbps */
53 {11.0, 8.8, 7.5, 6.5, 5.0, 3.9, 3.9, 3.9, 3.5, 3.0, 1.0}, /* 8 kbps */
54 {11.0, 11.0, 9.9, 9.0, 8.0, 7.0, 6.5, 6.0, 5.0, 4.0, 2.0}, /* 11 kbps */
55 {11.0, 11.0, 11.0, 11.0, 9.5, 9.0, 8.0, 7.0, 6.5, 5.0, 3.0}, /* 15 kbps */
56 {11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 9.5, 8.5, 8.0, 6.5, 4.0}, /* 18 kbps */
57 {11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 9.8, 7.5, 5.5}, /* 24 kbps */
58 { 8.0, 5.0, 3.7, 3.0, 2.5, 2.0, 1.8, 1.5, 1.0, 0.0, 0.0} /* 4 kbps */
59 };
60
61
62 const float vbr_hb_thresh[5][11]={
63 {-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0}, /* silence */
64 {-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0}, /* 2 kbps */
65 {11.0, 11.0, 9.5, 8.5, 7.5, 6.0, 5.0, 3.9, 3.0, 2.0, 1.0}, /* 6 kbps */
66 {11.0, 11.0, 11.0, 11.0, 11.0, 9.5, 8.7, 7.8, 7.0, 6.5, 4.0}, /* 10 kbps */
67 {11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 9.8, 7.5, 5.5} /* 18 kbps */
68 };
69
70 const float vbr_uhb_thresh[2][11]={
71 {-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0}, /* silence */
72 { 3.9, 2.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -1.0} /* 2 kbps */
73 };
74
vbr_init(VBRState * vbr)75 void vbr_init(VBRState *vbr)
76 {
77 int i;
78
79 vbr->average_energy=0;
80 vbr->last_energy=1;
81 vbr->accum_sum=0;
82 vbr->energy_alpha=.1;
83 vbr->soft_pitch=0;
84 vbr->last_pitch_coef=0;
85 vbr->last_quality=0;
86
87 vbr->noise_accum = .05*pow(MIN_ENERGY, NOISE_POW);
88 vbr->noise_accum_count=.05;
89 vbr->noise_level=vbr->noise_accum/vbr->noise_accum_count;
90 vbr->consec_noise=0;
91
92
93 for (i=0;i<VBR_MEMORY_SIZE;i++)
94 vbr->last_log_energy[i] = log(MIN_ENERGY);
95 }
96
97
98 /*
99 This function should analyse the signal and decide how critical the
100 coding error will be perceptually. The following factors should be
101 taken into account:
102
103 -Attacks (positive energy derivative) should be coded with more bits
104
105 -Stationary voiced segments should receive more bits
106
107 -Segments with (very) low absolute energy should receive less bits (maybe
108 only shaped noise?)
109
110 -DTX for near-zero energy?
111
112 -Stationary fricative segments should have less bits
113
114 -Temporal masking: when energy slope is decreasing, decrease the bit-rate
115
116 -Decrease bit-rate for males (low pitch)?
117
118 -(wideband only) less bits in the high-band when signal is very
119 non-stationary (harder to notice high-frequency noise)???
120
121 */
122
vbr_analysis(VBRState * vbr,spx_word16_t * sig,int len,int pitch,float pitch_coef)123 float vbr_analysis(VBRState *vbr, spx_word16_t *sig, int len, int pitch, float pitch_coef)
124 {
125 int i;
126 float ener=0, ener1=0, ener2=0;
127 float qual=7;
128 int va;
129 float log_energy;
130 float non_st=0;
131 float voicing;
132 float pow_ener;
133
134 for (i=0;i<len>>1;i++)
135 ener1 += ((float)sig[i])*sig[i];
136
137 for (i=len>>1;i<len;i++)
138 ener2 += ((float)sig[i])*sig[i];
139 ener=ener1+ener2;
140
141 log_energy = log(ener+MIN_ENERGY);
142 for (i=0;i<VBR_MEMORY_SIZE;i++)
143 non_st += sqr(log_energy-vbr->last_log_energy[i]);
144 non_st = non_st/(30*VBR_MEMORY_SIZE);
145 if (non_st>1)
146 non_st=1;
147
148 voicing = 3*(pitch_coef-.4)*fabs(pitch_coef-.4);
149 vbr->average_energy = (1-vbr->energy_alpha)*vbr->average_energy + vbr->energy_alpha*ener;
150 vbr->noise_level=vbr->noise_accum/vbr->noise_accum_count;
151 pow_ener = pow(ener,NOISE_POW);
152 if (vbr->noise_accum_count<.06 && ener>MIN_ENERGY)
153 vbr->noise_accum = .05*pow_ener;
154
155 if ((voicing<.3 && non_st < .2 && pow_ener < 1.2*vbr->noise_level)
156 || (voicing<.3 && non_st < .05 && pow_ener < 1.5*vbr->noise_level)
157 || (voicing<.4 && non_st < .05 && pow_ener < 1.2*vbr->noise_level)
158 || (voicing<0 && non_st < .05))
159 {
160 float tmp;
161 va = 0;
162 vbr->consec_noise++;
163 if (pow_ener > 3*vbr->noise_level)
164 tmp = 3*vbr->noise_level;
165 else
166 tmp = pow_ener;
167 if (vbr->consec_noise>=4)
168 {
169 vbr->noise_accum = .95*vbr->noise_accum + .05*tmp;
170 vbr->noise_accum_count = .95*vbr->noise_accum_count + .05;
171 }
172 } else {
173 va = 1;
174 vbr->consec_noise=0;
175 }
176
177 if (pow_ener < vbr->noise_level && ener>MIN_ENERGY)
178 {
179 vbr->noise_accum = .95*vbr->noise_accum + .05*pow_ener;
180 vbr->noise_accum_count = .95*vbr->noise_accum_count + .05;
181 }
182
183 /* Checking for very low absolute energy */
184 if (ener < 30000)
185 {
186 qual -= .7;
187 if (ener < 10000)
188 qual-=.7;
189 if (ener < 3000)
190 qual-=.7;
191 } else {
192 float short_diff, long_diff;
193 short_diff = log((ener+1)/(1+vbr->last_energy));
194 long_diff = log((ener+1)/(1+vbr->average_energy));
195 /*fprintf (stderr, "%f %f\n", short_diff, long_diff);*/
196
197 if (long_diff<-5)
198 long_diff=-5;
199 if (long_diff>2)
200 long_diff=2;
201
202 if (long_diff>0)
203 qual += .6*long_diff;
204 if (long_diff<0)
205 qual += .5*long_diff;
206 if (short_diff>0)
207 {
208 if (short_diff>5)
209 short_diff=5;
210 qual += .5*short_diff;
211 }
212 /* Checking for energy increases */
213 if (ener2 > 1.6*ener1)
214 qual += .5;
215 }
216 vbr->last_energy = ener;
217 vbr->soft_pitch = .6*vbr->soft_pitch + .4*pitch_coef;
218 qual += 2.2*((pitch_coef-.4) + (vbr->soft_pitch-.4));
219
220 if (qual < vbr->last_quality)
221 qual = .5*qual + .5*vbr->last_quality;
222 if (qual<4)
223 qual=4;
224 if (qual>10)
225 qual=10;
226
227 /*
228 if (vbr->consec_noise>=2)
229 qual-=1.3;
230 if (vbr->consec_noise>=5)
231 qual-=1.3;
232 if (vbr->consec_noise>=12)
233 qual-=1.3;
234 */
235 if (vbr->consec_noise>=3)
236 qual=4;
237
238 if (vbr->consec_noise)
239 qual -= 1.0 * (log(3.0 + vbr->consec_noise)-log(3));
240 if (qual<0)
241 qual=0;
242
243 if (ener<60000)
244 {
245 if (vbr->consec_noise>2)
246 qual-=0.5*(log(3.0 + vbr->consec_noise)-log(3));
247 if (ener<10000&&vbr->consec_noise>2)
248 qual-=0.5*(log(3.0 + vbr->consec_noise)-log(3));
249 if (qual<0)
250 qual=0;
251 qual += .3*log(ener/60000.0);
252 }
253 if (qual<-1)
254 qual=-1;
255
256 /*printf ("%f %f %f %f %d\n", qual, voicing, non_st, pow_ener/(.01+vbr->noise_level), va);*/
257
258 vbr->last_pitch_coef = pitch_coef;
259 vbr->last_quality = qual;
260
261 for (i=VBR_MEMORY_SIZE-1;i>0;i--)
262 vbr->last_log_energy[i] = vbr->last_log_energy[i-1];
263 vbr->last_log_energy[0] = log_energy;
264
265 /*printf ("VBR: %f %f %f %d %f\n", (float)(log_energy-log(vbr->average_energy+MIN_ENERGY)), non_st, voicing, va, vbr->noise_level);*/
266
267 return qual;
268 }
269
vbr_destroy(VBRState * vbr)270 void vbr_destroy(VBRState *vbr)
271 {
272 }
273