1 /***********************************************************************
2 Copyright (c) 2006-2010, Skype Limited. All rights reserved.
3 Redistribution and use in source and binary forms, with or without
4 modification, (subject to the limitations in the disclaimer below)
5 are permitted provided that the following conditions are met:
6 - Redistributions of source code must retain the above copyright notice,
7 this list of conditions and the following disclaimer.
8 - Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in the
10 documentation and/or other materials provided with the distribution.
11 - Neither the name of Skype Limited, nor the names of specific
12 contributors, may be used to endorse or promote products derived from
13 this software without specific prior written permission.
14 NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED
15 BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
16 CONTRIBUTORS ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
17 BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
18 FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
22 USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
23 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 ***********************************************************************/
27
28 #include "SKP_Silk_main_FIX.h"
29
30 /* Control encoder SNR */
SKP_Silk_control_encoder_FIX(SKP_Silk_encoder_state_FIX * psEnc,const SKP_int API_fs_kHz,const SKP_int PacketSize_ms,SKP_int32 TargetRate_bps,const SKP_int PacketLoss_perc,const SKP_int INBandFec_enabled,const SKP_int DTX_enabled,const SKP_int InputFramesize_ms,const SKP_int Complexity)31 SKP_int SKP_Silk_control_encoder_FIX(
32 SKP_Silk_encoder_state_FIX *psEnc, /* I/O Pointer to Silk encoder state */
33 const SKP_int API_fs_kHz, /* I External (API) sampling rate (kHz) */
34 const SKP_int PacketSize_ms, /* I Packet length (ms) */
35 SKP_int32 TargetRate_bps, /* I Target max bitrate (bps) (used if SNR_dB == 0) */
36 const SKP_int PacketLoss_perc, /* I Packet loss rate (in percent) */
37 const SKP_int INBandFec_enabled, /* I Enable (1) / disable (0) inband FEC */
38 const SKP_int DTX_enabled, /* I Enable / disable DTX */
39 const SKP_int InputFramesize_ms, /* I Inputframe in ms */
40 const SKP_int Complexity /* I Complexity (0->low; 1->medium; 2->high) */
41 )
42 {
43 SKP_int32 LBRRRate_thres_bps;
44 SKP_int k, fs_kHz, ret = 0;
45 SKP_int32 frac_Q6;
46 const SKP_int32 *rateTable;
47
48 /* State machine for the SWB/WB switching */
49 fs_kHz = psEnc->sCmn.fs_kHz;
50
51 /* Only switch during low speech activity, when no frames are sitting in the payload buffer */
52 if( API_fs_kHz == 8 || fs_kHz == 0 || API_fs_kHz < fs_kHz ) {
53 // Switching is not possible, encoder just initialized, or internal mode higher than external
54 fs_kHz = API_fs_kHz;
55 } else {
56
57 /* Resample all valid data in x_buf. Resampling the last part gets rid of a click, 5ms after switching */
58 /* this is because the same state is used when downsampling in API.c and is then up to date */
59 /* the click immidiatly after switching is most of the time still there */
60
61 if( psEnc->sCmn.fs_kHz == 24 ) {
62 /* Accumulate the difference between the target rate and limit */
63 if( psEnc->sCmn.fs_kHz_changed == 0 ) {
64 psEnc->sCmn.bitrateDiff += SKP_MUL( InputFramesize_ms, TargetRate_bps - SWB2WB_BITRATE_BPS_INITIAL );
65 } else {
66 psEnc->sCmn.bitrateDiff += SKP_MUL( InputFramesize_ms, TargetRate_bps - SWB2WB_BITRATE_BPS );
67 }
68 psEnc->sCmn.bitrateDiff = SKP_min( psEnc->sCmn.bitrateDiff, 0 );
69
70 /* Check if we should switch from 24 to 16 kHz */
71 #if SWITCH_TRANSITION_FILTERING
72 if( ( psEnc->sCmn.sLP.transition_frame_no == 0 ) && /* Transition phase not active */
73 ( psEnc->sCmn.bitrateDiff <= -ACCUM_BITS_DIFF_THRESHOLD || psEnc->sCmn.sSWBdetect.WB_detected == 1 ) &&
74 ( psEnc->speech_activity_Q8 < 128 && psEnc->sCmn.nFramesInPayloadBuf == 0 ) ) {
75 psEnc->sCmn.sLP.transition_frame_no = 1; /* Begin transition phase */
76 psEnc->sCmn.sLP.mode = 0; /* Switch down */
77 }
78
79 if( ( psEnc->sCmn.sLP.transition_frame_no >= TRANSITION_FRAMES_DOWN ) && ( psEnc->sCmn.sLP.mode == 0 ) && /* Transition phase complete, ready to switch */
80 #else
81 if( ( psEnc->sCmn.bitrateDiff <= -ACCUM_BITS_DIFF_THRESHOLD || psEnc->sCmn.sSWBdetect.WB_detected == 1 ) &&
82 #endif
83 ( psEnc->speech_activity_Q8 < 128 && psEnc->sCmn.nFramesInPayloadBuf == 0 ) ) {
84
85 SKP_int16 x_buf[ 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ];
86 SKP_int16 x_bufout[ 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ];
87
88 psEnc->sCmn.bitrateDiff = 0;
89 fs_kHz = 16;
90
91 SKP_memcpy( x_buf, psEnc->x_buf, ( 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ) * sizeof( SKP_int16 ) );
92
93 SKP_memset( psEnc->sCmn.resample24To16state, 0, sizeof( psEnc->sCmn.resample24To16state ) );
94
95 #if LOW_COMPLEXITY_ONLY
96 {
97 SKP_int16 scratch[ ( 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ) + SigProc_Resample_2_3_coarse_NUM_FIR_COEFS - 1 ];
98 SKP_Silk_resample_2_3_coarse( &x_bufout[ 0 ], psEnc->sCmn.resample24To16state, &x_buf[ 0 ], SKP_LSHIFT( psEnc->sCmn.frame_length, 1 ) + psEnc->sCmn.la_shape, (SKP_int16*)scratch );
99 }
100 #else
101 SKP_Silk_resample_2_3( &x_bufout[ 0 ], psEnc->sCmn.resample24To16state, &x_buf[ 0 ], SKP_LSHIFT( psEnc->sCmn.frame_length, 1 ) + psEnc->sCmn.la_shape );
102 #endif
103
104 /* set the first frame to zero, no performance difference was noticed though */
105 SKP_memset( x_bufout, 0, 320 * sizeof( SKP_int16 ) );
106 SKP_memcpy( psEnc->x_buf, x_bufout, ( 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ) * sizeof( SKP_int16 ) );
107
108 #if SWITCH_TRANSITION_FILTERING
109 psEnc->sCmn.sLP.transition_frame_no = 0; /* Transition phase complete */
110 #endif
111 }
112 } else if( psEnc->sCmn.fs_kHz == 16 ) {
113
114 /* Check if we should switch from 16 to 24 kHz */
115 #if SWITCH_TRANSITION_FILTERING
116 if( ( psEnc->sCmn.sLP.transition_frame_no == 0 ) && /* No transition phase running, ready to switch */
117 #else
118 if(
119 #endif
120 ( API_fs_kHz > psEnc->sCmn.fs_kHz && TargetRate_bps >= WB2SWB_BITRATE_BPS && psEnc->sCmn.sSWBdetect.WB_detected == 0 ) &&
121 ( psEnc->speech_activity_Q8 < 128 && psEnc->sCmn.nFramesInPayloadBuf == 0 ) ) {
122
123 SKP_int16 x_buf[ 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ];
124 SKP_int16 x_bufout[ 3 * ( 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ) / 2 ];
125 SKP_int32 resample16To24state[ 11 ];
126
127 psEnc->sCmn.bitrateDiff = 0;
128 fs_kHz = 24;
129
130 SKP_memcpy( x_buf, psEnc->x_buf, ( 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ) * sizeof( SKP_int16 ) );
131
132 SKP_memset( resample16To24state, 0, sizeof(resample16To24state) );
133
134 SKP_Silk_resample_3_2( &x_bufout[ 0 ], resample16To24state, &x_buf[ 0 ], SKP_LSHIFT( psEnc->sCmn.frame_length, 1 ) + psEnc->sCmn.la_shape );
135
136 /* set the first frame to zero, no performance difference was noticed though */
137 SKP_memset( x_bufout, 0, 480 * sizeof( SKP_int16 ) );
138 SKP_memcpy( psEnc->x_buf, x_bufout, ( 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ) * sizeof( SKP_int16 ) );
139 #if SWITCH_TRANSITION_FILTERING
140 psEnc->sCmn.sLP.mode = 1; /* Switch up */
141 #endif
142 } else {
143 /* accumulate the difference between the target rate and limit */
144 psEnc->sCmn.bitrateDiff += SKP_MUL( InputFramesize_ms, TargetRate_bps - WB2MB_BITRATE_BPS );
145 psEnc->sCmn.bitrateDiff = SKP_min( psEnc->sCmn.bitrateDiff, 0 );
146
147 /* Check if we should switch from 16 to 12 kHz */
148 #if SWITCH_TRANSITION_FILTERING
149 if( ( psEnc->sCmn.sLP.transition_frame_no == 0 ) && /* Transition phase not active */
150 ( psEnc->sCmn.bitrateDiff <= -ACCUM_BITS_DIFF_THRESHOLD ) &&
151 ( psEnc->speech_activity_Q8 < 128 && psEnc->sCmn.nFramesInPayloadBuf == 0 ) ) {
152 psEnc->sCmn.sLP.transition_frame_no = 1; /* Begin transition phase */
153 psEnc->sCmn.sLP.mode = 0; /* Switch down */
154 }
155
156 if( ( psEnc->sCmn.sLP.transition_frame_no >= TRANSITION_FRAMES_DOWN ) && ( psEnc->sCmn.sLP.mode == 0 ) && /* Transition phase complete, ready to switch */
157 #else
158 if( ( psEnc->sCmn.bitrateDiff <= -ACCUM_BITS_DIFF_THRESHOLD ) &&
159 #endif
160 ( psEnc->speech_activity_Q8 < 128 && psEnc->sCmn.nFramesInPayloadBuf == 0 ) ) {
161
162 SKP_int16 x_buf[ 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ];
163
164 SKP_memcpy( x_buf, psEnc->x_buf, ( 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ) * sizeof( SKP_int16 ) );
165
166 psEnc->sCmn.bitrateDiff = 0;
167 fs_kHz = 12;
168
169 if( API_fs_kHz == 24 ) {
170
171 /* Intermediate upsampling of x_bufFIX from 16 to 24 kHz */
172 SKP_int16 x_buf24[ 3 * ( 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ) / 2 ];
173 SKP_int32 scratch[ 3 * ( 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ) ];
174 SKP_int32 resample16To24state[ 11 ];
175
176 SKP_memset( resample16To24state, 0, sizeof( resample16To24state ) );
177 SKP_Silk_resample_3_2( &x_buf24[ 0 ], resample16To24state, &x_buf[ 0 ], SKP_LSHIFT( psEnc->sCmn.frame_length, 1 ) + psEnc->sCmn.la_shape );
178
179 /* Update the state of the resampler used in API.c, from 24 to 12 kHz */
180 SKP_memset( psEnc->sCmn.resample24To12state, 0, sizeof( psEnc->sCmn.resample24To12state ) );
181 SKP_Silk_resample_1_2_coarse( &x_buf24[ 0 ], psEnc->sCmn.resample24To12state, &x_buf[ 0 ], scratch, SKP_RSHIFT( SKP_SMULBB( 3, SKP_LSHIFT( psEnc->sCmn.frame_length, 1 ) + psEnc->sCmn.la_shape ), 2 ) );
182
183 /* set the first frame to zero, no performance difference was noticed though */
184 SKP_memset( x_buf, 0, 240 * sizeof( SKP_int16 ) );
185 SKP_memcpy( psEnc->x_buf, x_buf, ( 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ) * sizeof( SKP_int16 ) );
186
187 } else if( API_fs_kHz == 16 ) {
188 SKP_int16 x_bufout[ 3 * ( 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ) / 4 ];
189 SKP_memset( psEnc->sCmn.resample16To12state, 0, sizeof( psEnc->sCmn.resample16To12state ) );
190
191 SKP_Silk_resample_3_4( &x_bufout[ 0 ], psEnc->sCmn.resample16To12state, &x_buf[ 0 ], SKP_LSHIFT( psEnc->sCmn.frame_length, 1 ) + psEnc->sCmn.la_shape );
192
193 /* set the first frame to zero, no performance difference was noticed though */
194 SKP_memset( x_bufout, 0, 240 * sizeof( SKP_int16 ) );
195 SKP_memcpy( psEnc->x_buf, x_bufout, ( 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ) * sizeof( SKP_int16 ) );
196 }
197 #if SWITCH_TRANSITION_FILTERING
198 psEnc->sCmn.sLP.transition_frame_no = 0; /* Transition phase complete */
199 #endif
200 }
201 }
202 } else if( psEnc->sCmn.fs_kHz == 12 ) {
203
204 /* Check if we should switch from 12 to 16 kHz */
205 #if SWITCH_TRANSITION_FILTERING
206 if( ( psEnc->sCmn.sLP.transition_frame_no == 0 ) && /* No transition phase running, ready to switch */
207 #else
208 if(
209 #endif
210 ( API_fs_kHz > psEnc->sCmn.fs_kHz && TargetRate_bps >= MB2WB_BITRATE_BPS ) &&
211 ( psEnc->speech_activity_Q8 < 128 && psEnc->sCmn.nFramesInPayloadBuf == 0 ) ) {
212
213 SKP_int16 x_buf[ 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ];
214
215 SKP_memcpy( x_buf, psEnc->x_buf, ( 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ) * sizeof( SKP_int16 ) );
216
217 psEnc->sCmn.bitrateDiff = 0;
218 fs_kHz = 16;
219
220 /* Reset state of the resampler to be used */
221 if( API_fs_kHz == 24 ) {
222
223 SKP_int16 x_bufout[ 2 * 2 * ( 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ) / 3 ];
224
225 /* Intermediate upsampling of x_bufFIX from 12 to 24 kHz */
226 SKP_int16 x_buf24[ 2 * ( 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ) ];
227 SKP_int32 scratch[ 3 * ( 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ) ];
228 SKP_int32 resample12To24state[6];
229
230 SKP_memset( resample12To24state, 0, sizeof( resample12To24state ) );
231 SKP_Silk_resample_2_1_coarse( &x_buf[ 0 ], resample12To24state, &x_buf24[ 0 ], scratch, SKP_LSHIFT( psEnc->sCmn.frame_length, 1 ) + psEnc->sCmn.la_shape );
232
233 SKP_memset( psEnc->sCmn.resample24To16state, 0, sizeof( psEnc->sCmn.resample24To16state ) );
234
235 #if LOW_COMPLEXITY_ONLY
236 SKP_assert( sizeof( SKP_int16 ) * ( 2 * ( 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ) + SigProc_Resample_2_3_coarse_NUM_FIR_COEFS - 1 ) <= sizeof( scratch ) );
237 SKP_Silk_resample_2_3_coarse( &x_bufout[ 0 ], psEnc->sCmn.resample24To16state, &x_buf24[ 0 ], SKP_LSHIFT( SKP_LSHIFT( psEnc->sCmn.frame_length, 1 ) + psEnc->sCmn.la_shape, 1 ), (SKP_int16*)scratch );
238 #else
239 SKP_Silk_resample_2_3( &x_bufout[ 0 ], psEnc->sCmn.resample24To16state, &x_buf24[ 0 ], SKP_LSHIFT( SKP_LSHIFT( psEnc->sCmn.frame_length, 1 ) + psEnc->sCmn.la_shape, 1 ) );
240 #endif
241
242 /* set the first frame to zero, no performance difference was noticed though */
243 SKP_memset( x_bufout, 0, 320 * sizeof( SKP_int16 ) );
244 SKP_memcpy( psEnc->x_buf, x_bufout, ( 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ) * sizeof( SKP_int16 ) );
245 }
246 #if SWITCH_TRANSITION_FILTERING
247 psEnc->sCmn.sLP.mode = 1; /* Switch up */
248 #endif
249 } else {
250 /* accumulate the difference between the target rate and limit */
251 psEnc->sCmn.bitrateDiff += SKP_MUL( InputFramesize_ms, TargetRate_bps - MB2NB_BITRATE_BPS );
252 psEnc->sCmn.bitrateDiff = SKP_min( psEnc->sCmn.bitrateDiff, 0 );
253
254 /* Check if we should switch from 12 to 8 kHz */
255 #if SWITCH_TRANSITION_FILTERING
256 if( ( psEnc->sCmn.sLP.transition_frame_no == 0 ) && /* Transition phase not active */
257 ( psEnc->sCmn.bitrateDiff <= -ACCUM_BITS_DIFF_THRESHOLD ) &&
258 ( psEnc->speech_activity_Q8 < 128 && psEnc->sCmn.nFramesInPayloadBuf == 0 ) ) {
259 psEnc->sCmn.sLP.transition_frame_no = 1; /* Begin transition phase */
260 psEnc->sCmn.sLP.mode = 0; /* Switch down */
261 }
262
263 if( ( psEnc->sCmn.sLP.transition_frame_no >= TRANSITION_FRAMES_DOWN ) && ( psEnc->sCmn.sLP.mode == 0 ) &&
264 #else
265 if( ( psEnc->sCmn.bitrateDiff <= -ACCUM_BITS_DIFF_THRESHOLD ) &&
266 #endif
267 ( psEnc->speech_activity_Q8 < 128 && psEnc->sCmn.nFramesInPayloadBuf == 0 ) ) {
268
269 SKP_int16 x_buf[ 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ];
270
271 SKP_memcpy( x_buf, psEnc->x_buf, ( 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ) * sizeof( SKP_int16 ) );
272
273 psEnc->sCmn.bitrateDiff = 0;
274 fs_kHz = 8;
275
276 if( API_fs_kHz == 24 ) {
277
278 SKP_int32 scratch[ 3 * ( 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ) ];
279 /* Intermediate upsampling of x_buf from 12 to 24 kHz */
280 SKP_int16 x_buf24[ 2 * ( 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ) ];
281 SKP_int32 resample12To24state[ 6 ];
282
283 SKP_memset( resample12To24state, 0, sizeof( resample12To24state ) );
284 SKP_Silk_resample_2_1_coarse( &x_buf[ 0 ], resample12To24state, &x_buf24[ 0 ], scratch, SKP_LSHIFT( psEnc->sCmn.frame_length, 1 ) + psEnc->sCmn.la_shape );
285
286 /* Update the state of the resampler used in API.c, from 24 to 8 kHz */
287 SKP_memset( psEnc->sCmn.resample24To8state, 0, sizeof( psEnc->sCmn.resample24To8state ) );
288 SKP_Silk_resample_1_3( &x_buf[ 0 ], psEnc->sCmn.resample24To8state, &x_buf24[ 0 ], SKP_LSHIFT( SKP_LSHIFT( psEnc->sCmn.frame_length, 1 ) + psEnc->sCmn.la_shape, 1 ) );
289
290 /* set the first frame to zero, no performance difference was noticed though */
291 SKP_memset( x_buf, 0, 160 * sizeof( SKP_int16 ) );
292 SKP_memcpy( psEnc->x_buf, x_buf, ( 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ) * sizeof( SKP_int16 ) );
293
294 } else if( API_fs_kHz == 16 ) {
295 /* Intermediate upsampling of x_bufFIX from 12 to 16 kHz */
296 SKP_int16 x_buf16[ 3 * ( 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ) / 2 ];
297 SKP_int32 resample12To16state[11];
298
299 SKP_memset( resample12To16state, 0, sizeof( resample12To16state ) );
300 SKP_Silk_resample_3_2( &x_buf16[ 0 ], resample12To16state, &x_buf[ 0 ], SKP_LSHIFT( psEnc->sCmn.frame_length, 1 ) + psEnc->sCmn.la_shape );
301
302 /* set the first frame to zero, no performance difference was noticed though */
303 SKP_memset( x_buf, 0, 160 * sizeof( SKP_int16 ) );
304 SKP_memcpy( psEnc->x_buf, x_buf, ( 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ) * sizeof( SKP_int16 ) );
305
306 } else if( API_fs_kHz == 12 ) {
307 SKP_int16 x_bufout[ 2 * ( 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ) / 3 ];
308 SKP_memset( psEnc->sCmn.resample12To8state, 0, sizeof( psEnc->sCmn.resample12To8state ) );
309 #if LOW_COMPLEXITY_ONLY
310 {
311 SKP_int16 scratch[ ( 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ) + SigProc_Resample_2_3_coarse_NUM_FIR_COEFS - 1 ];
312 SKP_Silk_resample_2_3_coarse( &x_bufout[ 0 ], psEnc->sCmn.resample12To8state, &x_buf[ 0 ],
313 SKP_LSHIFT( psEnc->sCmn.frame_length, 1 ) + psEnc->sCmn.la_shape, scratch );
314 }
315 #else
316 SKP_Silk_resample_2_3( &x_bufout[ 0 ], psEnc->sCmn.resample12To8state, &x_buf[ 0 ], SKP_LSHIFT( psEnc->sCmn.frame_length, 1 ) + psEnc->sCmn.la_shape );
317 #endif
318 /* set the first frame to zero, no performance difference was noticed though */
319 SKP_memset( x_bufout, 0, 160 * sizeof( SKP_int16 ) );
320 SKP_memcpy( psEnc->x_buf, x_bufout, ( 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ) * sizeof( SKP_int16 ) );
321 }
322 #if SWITCH_TRANSITION_FILTERING
323 psEnc->sCmn.sLP.transition_frame_no = 0; /* Transition phase complete */
324 #endif
325 }
326 }
327 } else if( psEnc->sCmn.fs_kHz == 8 ) {
328
329 /* Check if we should switch from 8 to 12 kHz */
330 #if SWITCH_TRANSITION_FILTERING
331 if( ( psEnc->sCmn.sLP.transition_frame_no == 0 ) && /* No transition phase running, ready to switch */
332 #else
333 if(
334 #endif
335 ( API_fs_kHz > psEnc->sCmn.fs_kHz && TargetRate_bps >= NB2MB_BITRATE_BPS ) &&
336 ( psEnc->speech_activity_Q8 < 128 && psEnc->sCmn.nFramesInPayloadBuf == 0 ) ) {
337
338 SKP_int16 x_buf[ 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ];
339
340 SKP_memcpy( x_buf, psEnc->x_buf, ( 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ) * sizeof( SKP_int16 ) );
341
342 psEnc->sCmn.bitrateDiff = 0;
343 fs_kHz = 12;
344
345 /* Reset state of the resampler to be used */
346 if( API_fs_kHz == 24 ) {
347 SKP_int16 x_buf24[ 3 * ( 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ) ];
348 SKP_int32 scratch[ 3 * 3 * ( 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ) / 2 ];
349 SKP_int32 resample8To24state[ 7 ];
350
351 /* Intermediate upsampling of x_bufFIX from 8 to 24 kHz */
352 SKP_memset( resample8To24state, 0, sizeof( resample8To24state ) );
353 SKP_Silk_resample_3_1( &x_buf24[ 0 ], resample8To24state, &x_buf[ 0 ], SKP_LSHIFT( psEnc->sCmn.frame_length, 1 ) + psEnc->sCmn.la_shape );
354
355 SKP_memset( psEnc->sCmn.resample24To12state, 0, sizeof( psEnc->sCmn.resample24To12state ) );
356
357 SKP_Silk_resample_1_2_coarse( &x_buf24[ 0 ], psEnc->sCmn.resample24To12state, &x_buf[ 0 ], scratch, SKP_RSHIFT( SKP_SMULBB( 3, SKP_LSHIFT( psEnc->sCmn.frame_length, 1 ) + psEnc->sCmn.la_shape ), 1 ) );
358
359 /* set the first frame to zero, no performance difference was noticed though */
360 SKP_memset( x_buf, 0, 240 * sizeof( SKP_int16 ) );
361 SKP_memcpy( psEnc->x_buf, x_buf, ( 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ) * sizeof( SKP_int16 ) );
362
363 } else if( API_fs_kHz == 16 ) {
364 SKP_int16 x_buf16[ 2 * ( 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ) ];
365 SKP_int32 scratch[ 3 * ( 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ) ];
366 SKP_int32 resample8To16state[ 6 ];
367
368 /* Intermediate upsampling of x_bufFIX from 8 to 16 kHz */
369 SKP_memset( resample8To16state, 0, sizeof( resample8To16state ) );
370 SKP_Silk_resample_2_1_coarse( &x_buf[ 0 ], resample8To16state, &x_buf16[ 0 ], scratch, SKP_LSHIFT( psEnc->sCmn.frame_length, 1 ) + psEnc->sCmn.la_shape );
371
372 SKP_memset( psEnc->sCmn.resample16To12state, 0, sizeof( psEnc->sCmn.resample16To12state ) );
373
374 SKP_Silk_resample_3_4( &x_buf[ 0 ], psEnc->sCmn.resample16To12state, &x_buf16[ 0 ], SKP_LSHIFT( SKP_LSHIFT( psEnc->sCmn.frame_length, 1 ) + psEnc->sCmn.la_shape, 1 ) );
375
376 /* set the first frame to zero, no performance difference was noticed though */
377 SKP_memset( x_buf, 0, 240 * sizeof( SKP_int16 ) );
378 SKP_memcpy( psEnc->x_buf, x_buf, ( 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ) * sizeof( SKP_int16 ) );
379 }
380 #if SWITCH_TRANSITION_FILTERING
381 psEnc->sCmn.sLP.mode = 1; /* Switch up */
382 #endif
383 }
384 } else {
385 // Internal sample frequency not supported!
386 SKP_assert( 0 );
387 }
388 }
389
390 #if SWITCH_TRANSITION_FILTERING
391 /* After switching up, stop transition filter during speech inactivity */
392 if( ( psEnc->sCmn.sLP.mode == 1 ) &&
393 ( psEnc->sCmn.sLP.transition_frame_no >= TRANSITION_FRAMES_UP ) &&
394 ( psEnc->speech_activity_Q8 < 128 ) &&
395 ( psEnc->sCmn.nFramesInPayloadBuf == 0 ) ) {
396
397 psEnc->sCmn.sLP.transition_frame_no = 0;
398
399 /* Reset transition filter state */
400 SKP_memset( psEnc->sCmn.sLP.In_LP_State, 0, 2 * sizeof( SKP_int32 ) );
401 }
402 #endif
403
404
405
406 /* Set internal sampling frequency */
407 if( psEnc->sCmn.fs_kHz != fs_kHz ) {
408 /* reset part of the state */
409 SKP_memset( &psEnc->sShape, 0, sizeof( SKP_Silk_shape_state_FIX ) );
410 SKP_memset( &psEnc->sPrefilt, 0, sizeof( SKP_Silk_prefilter_state_FIX ) );
411 SKP_memset( &psEnc->sNSQ, 0, sizeof( SKP_Silk_nsq_state ) );
412 SKP_memset( &psEnc->sPred, 0, sizeof( SKP_Silk_predict_state_FIX ) );
413 SKP_memset( psEnc->sNSQ.xq, 0, ( 2 * MAX_FRAME_LENGTH ) * sizeof( SKP_int16 ) );
414 SKP_memset( psEnc->sNSQ_LBRR.xq, 0, ( 2 * MAX_FRAME_LENGTH ) * sizeof( SKP_int16 ) );
415 SKP_memset( psEnc->sCmn.LBRR_buffer, 0, MAX_LBRR_DELAY * sizeof( SKP_SILK_LBRR_struct ) );
416 #if SWITCH_TRANSITION_FILTERING
417 SKP_memset( psEnc->sCmn.sLP.In_LP_State, 0, 2 * sizeof( SKP_int32 ) );
418 if( psEnc->sCmn.sLP.mode == 1 ) {
419 /* Begin transition phase */
420 psEnc->sCmn.sLP.transition_frame_no = 1;
421 } else {
422 /* End transition phase */
423 psEnc->sCmn.sLP.transition_frame_no = 0;
424 }
425 #endif
426 psEnc->sCmn.inputBufIx = 0;
427 psEnc->sCmn.nFramesInPayloadBuf = 0;
428 psEnc->sCmn.nBytesInPayloadBuf = 0;
429 psEnc->sCmn.oldest_LBRR_idx = 0;
430 psEnc->sCmn.TargetRate_bps = 0; /* ensures that psEnc->SNR_dB is recomputed */
431
432 SKP_memset( psEnc->sPred.prev_NLSFq_Q15, 0, MAX_LPC_ORDER * sizeof( SKP_int ) );
433
434 /* Initialize non-zero parameters */
435 psEnc->sCmn.prevLag = 100;
436 psEnc->sCmn.prev_sigtype = SIG_TYPE_UNVOICED;
437 psEnc->sCmn.first_frame_after_reset = 1;
438 psEnc->sPrefilt.lagPrev = 100;
439 psEnc->sShape.LastGainIndex = 1;
440 psEnc->sNSQ.lagPrev = 100;
441 psEnc->sNSQ.prev_inv_gain_Q16 = 65536;
442 psEnc->sNSQ_LBRR.prev_inv_gain_Q16 = 65536;
443 psEnc->sCmn.fs_kHz = fs_kHz;
444 if( psEnc->sCmn.fs_kHz == 8 ) {
445 psEnc->sCmn.predictLPCOrder = MIN_LPC_ORDER;
446 psEnc->sCmn.psNLSF_CB[ 0 ] = &SKP_Silk_NLSF_CB0_10;
447 psEnc->sCmn.psNLSF_CB[ 1 ] = &SKP_Silk_NLSF_CB1_10;
448 } else {
449 psEnc->sCmn.predictLPCOrder = MAX_LPC_ORDER;
450 psEnc->sCmn.psNLSF_CB[ 0 ] = &SKP_Silk_NLSF_CB0_16;
451 psEnc->sCmn.psNLSF_CB[ 1 ] = &SKP_Silk_NLSF_CB1_16;
452 }
453 psEnc->sCmn.frame_length = SKP_SMULBB( FRAME_LENGTH_MS, fs_kHz );
454 psEnc->sCmn.subfr_length = SKP_DIV32_16( psEnc->sCmn.frame_length, NB_SUBFR );
455 psEnc->sCmn.la_pitch = SKP_SMULBB( LA_PITCH_MS, fs_kHz );
456 psEnc->sCmn.la_shape = SKP_SMULBB( LA_SHAPE_MS, fs_kHz );
457 psEnc->sPred.min_pitch_lag = SKP_SMULBB( 3, fs_kHz );
458 psEnc->sPred.max_pitch_lag = SKP_SMULBB( 18, fs_kHz );
459 psEnc->sPred.pitch_LPC_win_length = SKP_SMULBB( FIND_PITCH_LPC_WIN_MS, fs_kHz );
460 if( psEnc->sCmn.fs_kHz == 24 ) {
461 psEnc->mu_LTP_Q8 = MU_LTP_QUANT_SWB_Q8;
462 } else if( psEnc->sCmn.fs_kHz == 16 ) {
463 psEnc->mu_LTP_Q8 = MU_LTP_QUANT_WB_Q8;
464 } else if( psEnc->sCmn.fs_kHz == 12 ) {
465 psEnc->mu_LTP_Q8 = MU_LTP_QUANT_MB_Q8;
466 } else {
467 psEnc->mu_LTP_Q8 = MU_LTP_QUANT_NB_Q8;
468 }
469 psEnc->sCmn.fs_kHz_changed = 1;
470
471 /* Check that settings are valid */
472 SKP_assert( ( psEnc->sCmn.subfr_length * NB_SUBFR ) == psEnc->sCmn.frame_length );
473 }
474
475 /* Set encoding complexity */
476 if( Complexity == 0 || LOW_COMPLEXITY_ONLY ) {
477 /* Low complexity */
478 psEnc->sCmn.Complexity = 0;
479 psEnc->sCmn.pitchEstimationComplexity = PITCH_EST_COMPLEXITY_LC_MODE;
480 psEnc->pitchEstimationThreshold_Q16 = FIND_PITCH_CORRELATION_THRESHOLD_Q16_LC_MODE;
481 psEnc->sCmn.pitchEstimationLPCOrder = 8;
482 psEnc->sCmn.shapingLPCOrder = 12;
483 psEnc->sCmn.nStatesDelayedDecision = 1;
484 psEnc->NoiseShapingQuantizer = SKP_Silk_NSQ;
485 psEnc->sCmn.useInterpolatedNLSFs = 0;
486 psEnc->sCmn.LTPQuantLowComplexity = 1;
487 psEnc->sCmn.NLSF_MSVQ_Survivors = MAX_NLSF_MSVQ_SURVIVORS_LC_MODE;
488 } else if( Complexity == 1 ) {
489 /* Medium complexity */
490 psEnc->sCmn.Complexity = 1;
491 psEnc->sCmn.pitchEstimationComplexity = PITCH_EST_COMPLEXITY_MC_MODE;
492 psEnc->pitchEstimationThreshold_Q16 = FIND_PITCH_CORRELATION_THRESHOLD_Q16_MC_MODE;
493 psEnc->sCmn.pitchEstimationLPCOrder = 12;
494 psEnc->sCmn.shapingLPCOrder = 16;
495 psEnc->sCmn.nStatesDelayedDecision = 2;
496 psEnc->NoiseShapingQuantizer = SKP_Silk_NSQ_del_dec;
497 psEnc->sCmn.useInterpolatedNLSFs = 0;
498 psEnc->sCmn.LTPQuantLowComplexity = 0;
499 psEnc->sCmn.NLSF_MSVQ_Survivors = MAX_NLSF_MSVQ_SURVIVORS_MC_MODE;
500 } else if( Complexity == 2 ) {
501 /* High complexity */
502 psEnc->sCmn.Complexity = 2;
503 psEnc->sCmn.pitchEstimationComplexity = PITCH_EST_COMPLEXITY_HC_MODE;
504 psEnc->pitchEstimationThreshold_Q16 = FIND_PITCH_CORRELATION_THRESHOLD_Q16_HC_MODE;
505 psEnc->sCmn.pitchEstimationLPCOrder = 16;
506 psEnc->sCmn.shapingLPCOrder = 16;
507 psEnc->sCmn.nStatesDelayedDecision = 4;
508 psEnc->NoiseShapingQuantizer = SKP_Silk_NSQ_del_dec;
509 psEnc->sCmn.useInterpolatedNLSFs = 1;
510 psEnc->sCmn.LTPQuantLowComplexity = 0;
511 psEnc->sCmn.NLSF_MSVQ_Survivors = MAX_NLSF_MSVQ_SURVIVORS;
512 } else {
513 ret = SKP_SILK_ENC_WRONG_COMPLEXITY_SETTING;
514 }
515
516 /* Dont have higher Pitch estimation LPC order than predict LPC order */
517 psEnc->sCmn.pitchEstimationLPCOrder = SKP_min_int( psEnc->sCmn.pitchEstimationLPCOrder, psEnc->sCmn.predictLPCOrder );
518
519 SKP_assert( psEnc->sCmn.pitchEstimationLPCOrder <= FIND_PITCH_LPC_ORDER_MAX );
520 SKP_assert( psEnc->sCmn.shapingLPCOrder <= SHAPE_LPC_ORDER_MAX );
521 SKP_assert( psEnc->sCmn.nStatesDelayedDecision <= DEL_DEC_STATES_MAX );
522
523 /* Set bitrate/coding quality */
524 TargetRate_bps = SKP_min( TargetRate_bps, 100000 );
525 if( psEnc->sCmn.fs_kHz == 8 ) {
526 TargetRate_bps = SKP_max( TargetRate_bps, MIN_TARGET_RATE_NB_BPS );
527 } else if( psEnc->sCmn.fs_kHz == 12 ) {
528 TargetRate_bps = SKP_max( TargetRate_bps, MIN_TARGET_RATE_MB_BPS );
529 } else if( psEnc->sCmn.fs_kHz == 16 ) {
530 TargetRate_bps = SKP_max( TargetRate_bps, MIN_TARGET_RATE_WB_BPS );
531 } else {
532 TargetRate_bps = SKP_max( TargetRate_bps, MIN_TARGET_RATE_SWB_BPS );
533 }
534 if( TargetRate_bps != psEnc->sCmn.TargetRate_bps ) {
535 psEnc->sCmn.TargetRate_bps = TargetRate_bps;
536
537 /* if new TargetRate_bps, translate to SNR_dB value */
538 if( psEnc->sCmn.fs_kHz == 8 ) {
539 rateTable = TargetRate_table_NB;
540 } else if( psEnc->sCmn.fs_kHz == 12 ) {
541 rateTable = TargetRate_table_MB;
542 } else if( psEnc->sCmn.fs_kHz == 16 ) {
543 rateTable = TargetRate_table_WB;
544 } else {
545 rateTable = TargetRate_table_SWB;
546 }
547 for( k = 1; k < TARGET_RATE_TAB_SZ; k++ ) {
548 /* find bitrate interval in table and interpolate */
549 if( TargetRate_bps < rateTable[ k ] ) {
550 frac_Q6 = SKP_DIV32( SKP_LSHIFT( TargetRate_bps - rateTable[ k - 1 ], 6 ), rateTable[ k ] - rateTable[ k - 1 ] );
551 psEnc->SNR_dB_Q7 = SKP_LSHIFT( SNR_table_Q1[ k - 1 ], 6 ) + SKP_MUL( frac_Q6, SNR_table_Q1[ k ] - SNR_table_Q1[ k - 1 ] );
552 break;
553 }
554 }
555 }
556
557 /* Set packet size */
558 if( ( PacketSize_ms != 20 ) &&
559 ( PacketSize_ms != 40 ) &&
560 ( PacketSize_ms != 60 ) &&
561 ( PacketSize_ms != 80 ) &&
562 ( PacketSize_ms != 100 ) ) {
563 ret = SKP_SILK_ENC_PACKET_SIZE_NOT_SUPPORTED;
564 } else {
565 if( PacketSize_ms != psEnc->sCmn.PacketSize_ms ) {
566 psEnc->sCmn.PacketSize_ms = PacketSize_ms;
567
568 /* Packet length changes. Reset LBRR buffer */
569 SKP_Silk_LBRR_reset( &psEnc->sCmn );
570 }
571 }
572
573 /* Set packet loss rate measured by farend */
574 if( ( PacketLoss_perc < 0 ) || ( PacketLoss_perc > 100 ) ) {
575 ret = SKP_SILK_ENC_WRONG_LOSS_RATE;
576 }
577 psEnc->sCmn.PacketLoss_perc = PacketLoss_perc;
578
579 #if USE_LBRR
580 if( INBandFec_enabled < 0 || INBandFec_enabled > 1 ) {
581 ret = SKP_SILK_ENC_WRONG_INBAND_FEC_SETTING;
582 }
583
584 /* Only change settings if first frame in packet */
585 if( psEnc->sCmn.nFramesInPayloadBuf == 0 ) {
586
587 psEnc->sCmn.LBRR_enabled = INBandFec_enabled;
588 if( psEnc->sCmn.fs_kHz == 8 ) {
589 LBRRRate_thres_bps = INBAND_FEC_MIN_RATE_BPS - 9000;
590 } else if( psEnc->sCmn.fs_kHz == 12 ) {
591 LBRRRate_thres_bps = INBAND_FEC_MIN_RATE_BPS - 6000;;
592 } else if( psEnc->sCmn.fs_kHz == 16 ) {
593 LBRRRate_thres_bps = INBAND_FEC_MIN_RATE_BPS - 3000;
594 } else {
595 LBRRRate_thres_bps = INBAND_FEC_MIN_RATE_BPS;
596 }
597
598 if( psEnc->sCmn.TargetRate_bps >= LBRRRate_thres_bps ) {
599 /* Set gain increase / rate reduction for LBRR usage */
600 /* Coarse tuned with pesq for now. */
601 /* Linear regression coefs G = 8 - 0.5 * loss */
602 /* Meaning that at 16% loss main rate and redundant rate is the same, -> G = 0 */
603 psEnc->sCmn.LBRR_GainIncreases = SKP_max_int( 8 - SKP_RSHIFT( psEnc->sCmn.PacketLoss_perc, 1 ), 0 );
604
605 /* Set main stream rate compensation */
606 if( psEnc->sCmn.LBRR_enabled && psEnc->sCmn.PacketLoss_perc > LBRR_LOSS_THRES ) {
607 /* Tuned to give aprox same mean / weighted bitrate as no inband FEC */
608 psEnc->inBandFEC_SNR_comp_Q8 = ( 6 << 8 ) - SKP_LSHIFT( psEnc->sCmn.LBRR_GainIncreases, 7 );
609 } else {
610 psEnc->inBandFEC_SNR_comp_Q8 = 0;
611 psEnc->sCmn.LBRR_enabled = 0;
612 }
613 } else {
614 psEnc->inBandFEC_SNR_comp_Q8 = 0;
615 psEnc->sCmn.LBRR_enabled = 0;
616 }
617 }
618 #else
619 psEnc->sCmn.LBRR_enabled = 0;
620 #endif
621
622 /* Set DTX mode */
623 if( DTX_enabled < 0 || DTX_enabled > 1 ) {
624 ret = SKP_SILK_ENC_WRONG_DTX_SETTING;
625 }
626 psEnc->sCmn.useDTX = DTX_enabled;
627
628 return ret;
629 }
630
631 /* Control low bitrate redundancy usage */
SKP_Silk_LBRR_ctrl_FIX(SKP_Silk_encoder_state_FIX * psEnc,SKP_Silk_encoder_control_FIX * psEncCtrl)632 void SKP_Silk_LBRR_ctrl_FIX(
633 SKP_Silk_encoder_state_FIX *psEnc, /* I/O encoder state */
634 SKP_Silk_encoder_control_FIX *psEncCtrl /* I/O encoder control */
635 )
636 {
637 SKP_int LBRR_usage;
638
639 if( psEnc->sCmn.LBRR_enabled ) {
640 /* Control LBRR */
641
642 /* Usage Control based on sensitivity and packet loss caracteristics */
643 /* For now only enable adding to next for active frames. Make more complex later */
644 LBRR_usage = SKP_SILK_NO_LBRR;
645 if( psEnc->speech_activity_Q8 > LBRR_SPEECH_ACTIVITY_THRES_Q8 && psEnc->sCmn.PacketLoss_perc > LBRR_LOSS_THRES ) { // nb! maybe multiply loss prob and speech activity
646 //if( psEnc->PacketLoss_burst > BURST_THRES )
647 // psEncCtrl->LBRR_usage = SKP_SILK_ADD_LBRR_TO_PLUS2;
648 //} else {
649 LBRR_usage = SKP_SILK_ADD_LBRR_TO_PLUS1;//SKP_SILK_NO_LBRR
650 //}
651 }
652 psEncCtrl->sCmn.LBRR_usage = LBRR_usage;
653 } else {
654 psEncCtrl->sCmn.LBRR_usage = SKP_SILK_NO_LBRR;
655 }
656 }
657