1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "modules/audio_coding/codecs/opus/opus_interface.h"
12
13 #include "rtc_base/checks.h"
14
15 #include <stdlib.h>
16 #include <string.h>
17
18 enum {
19 #if WEBRTC_OPUS_SUPPORT_120MS_PTIME
20 /* Maximum supported frame size in WebRTC is 120 ms. */
21 kWebRtcOpusMaxEncodeFrameSizeMs = 120,
22 #else
23 /* Maximum supported frame size in WebRTC is 60 ms. */
24 kWebRtcOpusMaxEncodeFrameSizeMs = 60,
25 #endif
26
27 /* The format allows up to 120 ms frames. Since we don't control the other
28 * side, we must allow for packets of that size. NetEq is currently limited
29 * to 60 ms on the receive side. */
30 kWebRtcOpusMaxDecodeFrameSizeMs = 120,
31
32 /* Maximum sample count per channel is 48 kHz * maximum frame size in
33 * milliseconds. */
34 kWebRtcOpusMaxFrameSizePerChannel = 48 * kWebRtcOpusMaxDecodeFrameSizeMs,
35
36 /* Default frame size, 20 ms @ 48 kHz, in samples (for one channel). */
37 kWebRtcOpusDefaultFrameSize = 960,
38 };
39
WebRtcOpus_EncoderCreate(OpusEncInst ** inst,size_t channels,int32_t application)40 int16_t WebRtcOpus_EncoderCreate(OpusEncInst** inst,
41 size_t channels,
42 int32_t application) {
43 int opus_app;
44 if (!inst)
45 return -1;
46
47 switch (application) {
48 case 0:
49 opus_app = OPUS_APPLICATION_VOIP;
50 break;
51 case 1:
52 opus_app = OPUS_APPLICATION_AUDIO;
53 break;
54 default:
55 return -1;
56 }
57
58 OpusEncInst* state = calloc(1, sizeof(OpusEncInst));
59 RTC_DCHECK(state);
60
61 int error;
62 state->encoder = opus_encoder_create(48000, (int)channels, opus_app,
63 &error);
64 if (error != OPUS_OK || !state->encoder) {
65 WebRtcOpus_EncoderFree(state);
66 return -1;
67 }
68
69 state->in_dtx_mode = 0;
70 state->channels = channels;
71
72 *inst = state;
73 return 0;
74 }
75
WebRtcOpus_EncoderFree(OpusEncInst * inst)76 int16_t WebRtcOpus_EncoderFree(OpusEncInst* inst) {
77 if (inst) {
78 opus_encoder_destroy(inst->encoder);
79 free(inst);
80 return 0;
81 } else {
82 return -1;
83 }
84 }
85
WebRtcOpus_Encode(OpusEncInst * inst,const int16_t * audio_in,size_t samples,size_t length_encoded_buffer,uint8_t * encoded)86 int WebRtcOpus_Encode(OpusEncInst* inst,
87 const int16_t* audio_in,
88 size_t samples,
89 size_t length_encoded_buffer,
90 uint8_t* encoded) {
91 int res;
92
93 if (samples > 48 * kWebRtcOpusMaxEncodeFrameSizeMs) {
94 return -1;
95 }
96
97 res = opus_encode(inst->encoder,
98 (const opus_int16*)audio_in,
99 (int)samples,
100 encoded,
101 (opus_int32)length_encoded_buffer);
102
103 if (res <= 0) {
104 return -1;
105 }
106
107 if (res <= 2) {
108 // Indicates DTX since the packet has nothing but a header. In principle,
109 // there is no need to send this packet. However, we do transmit the first
110 // occurrence to let the decoder know that the encoder enters DTX mode.
111 if (inst->in_dtx_mode) {
112 return 0;
113 } else {
114 inst->in_dtx_mode = 1;
115 return res;
116 }
117 }
118
119 inst->in_dtx_mode = 0;
120 return res;
121 }
122
WebRtcOpus_SetBitRate(OpusEncInst * inst,int32_t rate)123 int16_t WebRtcOpus_SetBitRate(OpusEncInst* inst, int32_t rate) {
124 if (inst) {
125 return opus_encoder_ctl(inst->encoder, OPUS_SET_BITRATE(rate));
126 } else {
127 return -1;
128 }
129 }
130
WebRtcOpus_SetPacketLossRate(OpusEncInst * inst,int32_t loss_rate)131 int16_t WebRtcOpus_SetPacketLossRate(OpusEncInst* inst, int32_t loss_rate) {
132 if (inst) {
133 return opus_encoder_ctl(inst->encoder,
134 OPUS_SET_PACKET_LOSS_PERC(loss_rate));
135 } else {
136 return -1;
137 }
138 }
139
WebRtcOpus_SetMaxPlaybackRate(OpusEncInst * inst,int32_t frequency_hz)140 int16_t WebRtcOpus_SetMaxPlaybackRate(OpusEncInst* inst, int32_t frequency_hz) {
141 opus_int32 set_bandwidth;
142
143 if (!inst)
144 return -1;
145
146 if (frequency_hz <= 8000) {
147 set_bandwidth = OPUS_BANDWIDTH_NARROWBAND;
148 } else if (frequency_hz <= 12000) {
149 set_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
150 } else if (frequency_hz <= 16000) {
151 set_bandwidth = OPUS_BANDWIDTH_WIDEBAND;
152 } else if (frequency_hz <= 24000) {
153 set_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND;
154 } else {
155 set_bandwidth = OPUS_BANDWIDTH_FULLBAND;
156 }
157 return opus_encoder_ctl(inst->encoder,
158 OPUS_SET_MAX_BANDWIDTH(set_bandwidth));
159 }
160
WebRtcOpus_EnableFec(OpusEncInst * inst)161 int16_t WebRtcOpus_EnableFec(OpusEncInst* inst) {
162 if (inst) {
163 return opus_encoder_ctl(inst->encoder, OPUS_SET_INBAND_FEC(1));
164 } else {
165 return -1;
166 }
167 }
168
WebRtcOpus_DisableFec(OpusEncInst * inst)169 int16_t WebRtcOpus_DisableFec(OpusEncInst* inst) {
170 if (inst) {
171 return opus_encoder_ctl(inst->encoder, OPUS_SET_INBAND_FEC(0));
172 } else {
173 return -1;
174 }
175 }
176
WebRtcOpus_EnableDtx(OpusEncInst * inst)177 int16_t WebRtcOpus_EnableDtx(OpusEncInst* inst) {
178 if (!inst) {
179 return -1;
180 }
181
182 // To prevent Opus from entering CELT-only mode by forcing signal type to
183 // voice to make sure that DTX behaves correctly. Currently, DTX does not
184 // last long during a pure silence, if the signal type is not forced.
185 // TODO(minyue): Remove the signal type forcing when Opus DTX works properly
186 // without it.
187 int ret = opus_encoder_ctl(inst->encoder,
188 OPUS_SET_SIGNAL(OPUS_SIGNAL_VOICE));
189 if (ret != OPUS_OK)
190 return ret;
191
192 return opus_encoder_ctl(inst->encoder, OPUS_SET_DTX(1));
193 }
194
WebRtcOpus_DisableDtx(OpusEncInst * inst)195 int16_t WebRtcOpus_DisableDtx(OpusEncInst* inst) {
196 if (inst) {
197 int ret = opus_encoder_ctl(inst->encoder,
198 OPUS_SET_SIGNAL(OPUS_AUTO));
199 if (ret != OPUS_OK)
200 return ret;
201 return opus_encoder_ctl(inst->encoder, OPUS_SET_DTX(0));
202 } else {
203 return -1;
204 }
205 }
206
WebRtcOpus_EnableCbr(OpusEncInst * inst)207 int16_t WebRtcOpus_EnableCbr(OpusEncInst* inst) {
208 if (inst) {
209 return opus_encoder_ctl(inst->encoder, OPUS_SET_VBR(0));
210 } else {
211 return -1;
212 }
213 }
214
WebRtcOpus_DisableCbr(OpusEncInst * inst)215 int16_t WebRtcOpus_DisableCbr(OpusEncInst* inst) {
216 if (inst) {
217 return opus_encoder_ctl(inst->encoder, OPUS_SET_VBR(1));
218 } else {
219 return -1;
220 }
221 }
222
WebRtcOpus_SetComplexity(OpusEncInst * inst,int32_t complexity)223 int16_t WebRtcOpus_SetComplexity(OpusEncInst* inst, int32_t complexity) {
224 if (inst) {
225 return opus_encoder_ctl(inst->encoder, OPUS_SET_COMPLEXITY(complexity));
226 } else {
227 return -1;
228 }
229 }
230
WebRtcOpus_GetBandwidth(OpusEncInst * inst)231 int32_t WebRtcOpus_GetBandwidth(OpusEncInst* inst) {
232 if (!inst) {
233 return -1;
234 }
235 int32_t bandwidth;
236 if (opus_encoder_ctl(inst->encoder, OPUS_GET_BANDWIDTH(&bandwidth)) == 0) {
237 return bandwidth;
238 } else {
239 return -1;
240 }
241
242 }
243
WebRtcOpus_SetBandwidth(OpusEncInst * inst,int32_t bandwidth)244 int16_t WebRtcOpus_SetBandwidth(OpusEncInst* inst, int32_t bandwidth) {
245 if (inst) {
246 return opus_encoder_ctl(inst->encoder, OPUS_SET_BANDWIDTH(bandwidth));
247 } else {
248 return -1;
249 }
250 }
251
WebRtcOpus_SetForceChannels(OpusEncInst * inst,size_t num_channels)252 int16_t WebRtcOpus_SetForceChannels(OpusEncInst* inst, size_t num_channels) {
253 if (!inst)
254 return -1;
255 if (num_channels == 0) {
256 return opus_encoder_ctl(inst->encoder,
257 OPUS_SET_FORCE_CHANNELS(OPUS_AUTO));
258 } else if (num_channels == 1 || num_channels == 2) {
259 return opus_encoder_ctl(inst->encoder,
260 OPUS_SET_FORCE_CHANNELS(num_channels));
261 } else {
262 return -1;
263 }
264 }
265
WebRtcOpus_DecoderCreate(OpusDecInst ** inst,size_t channels)266 int16_t WebRtcOpus_DecoderCreate(OpusDecInst** inst, size_t channels) {
267 int error;
268 OpusDecInst* state;
269
270 if (inst != NULL) {
271 /* Create Opus decoder state. */
272 state = (OpusDecInst*) calloc(1, sizeof(OpusDecInst));
273 if (state == NULL) {
274 return -1;
275 }
276
277 /* Create new memory, always at 48000 Hz. */
278 state->decoder = opus_decoder_create(48000, (int)channels, &error);
279 if (error == OPUS_OK && state->decoder != NULL) {
280 /* Creation of memory all ok. */
281 state->channels = channels;
282 state->prev_decoded_samples = kWebRtcOpusDefaultFrameSize;
283 state->in_dtx_mode = 0;
284 *inst = state;
285 return 0;
286 }
287
288 /* If memory allocation was unsuccessful, free the entire state. */
289 if (state->decoder) {
290 opus_decoder_destroy(state->decoder);
291 }
292 free(state);
293 }
294 return -1;
295 }
296
WebRtcOpus_DecoderFree(OpusDecInst * inst)297 int16_t WebRtcOpus_DecoderFree(OpusDecInst* inst) {
298 if (inst) {
299 opus_decoder_destroy(inst->decoder);
300 free(inst);
301 return 0;
302 } else {
303 return -1;
304 }
305 }
306
WebRtcOpus_DecoderChannels(OpusDecInst * inst)307 size_t WebRtcOpus_DecoderChannels(OpusDecInst* inst) {
308 return inst->channels;
309 }
310
WebRtcOpus_DecoderInit(OpusDecInst * inst)311 void WebRtcOpus_DecoderInit(OpusDecInst* inst) {
312 opus_decoder_ctl(inst->decoder, OPUS_RESET_STATE);
313 inst->in_dtx_mode = 0;
314 }
315
316 /* For decoder to determine if it is to output speech or comfort noise. */
DetermineAudioType(OpusDecInst * inst,size_t encoded_bytes)317 static int16_t DetermineAudioType(OpusDecInst* inst, size_t encoded_bytes) {
318 // Audio type becomes comfort noise if |encoded_byte| is 1 and keeps
319 // to be so if the following |encoded_byte| are 0 or 1.
320 if (encoded_bytes == 0 && inst->in_dtx_mode) {
321 return 2; // Comfort noise.
322 } else if (encoded_bytes == 1 || encoded_bytes == 2) {
323 // TODO(henrik.lundin): There is a slight risk that a 2-byte payload is in
324 // fact a 1-byte TOC with a 1-byte payload. That will be erroneously
325 // interpreted as comfort noise output, but such a payload is probably
326 // faulty anyway.
327 inst->in_dtx_mode = 1;
328 return 2; // Comfort noise.
329 } else {
330 inst->in_dtx_mode = 0;
331 return 0; // Speech.
332 }
333 }
334
335 /* |frame_size| is set to maximum Opus frame size in the normal case, and
336 * is set to the number of samples needed for PLC in case of losses.
337 * It is up to the caller to make sure the value is correct. */
DecodeNative(OpusDecInst * inst,const uint8_t * encoded,size_t encoded_bytes,int frame_size,int16_t * decoded,int16_t * audio_type,int decode_fec)338 static int DecodeNative(OpusDecInst* inst, const uint8_t* encoded,
339 size_t encoded_bytes, int frame_size,
340 int16_t* decoded, int16_t* audio_type, int decode_fec) {
341 int res = opus_decode(inst->decoder, encoded, (opus_int32)encoded_bytes,
342 (opus_int16*)decoded, frame_size, decode_fec);
343
344 if (res <= 0)
345 return -1;
346
347 *audio_type = DetermineAudioType(inst, encoded_bytes);
348
349 return res;
350 }
351
WebRtcOpus_Decode(OpusDecInst * inst,const uint8_t * encoded,size_t encoded_bytes,int16_t * decoded,int16_t * audio_type)352 int WebRtcOpus_Decode(OpusDecInst* inst, const uint8_t* encoded,
353 size_t encoded_bytes, int16_t* decoded,
354 int16_t* audio_type) {
355 int decoded_samples;
356
357 if (encoded_bytes == 0) {
358 *audio_type = DetermineAudioType(inst, encoded_bytes);
359 decoded_samples = WebRtcOpus_DecodePlc(inst, decoded, 1);
360 } else {
361 decoded_samples = DecodeNative(inst,
362 encoded,
363 encoded_bytes,
364 kWebRtcOpusMaxFrameSizePerChannel,
365 decoded,
366 audio_type,
367 0);
368 }
369 if (decoded_samples < 0) {
370 return -1;
371 }
372
373 /* Update decoded sample memory, to be used by the PLC in case of losses. */
374 inst->prev_decoded_samples = decoded_samples;
375
376 return decoded_samples;
377 }
378
WebRtcOpus_DecodePlc(OpusDecInst * inst,int16_t * decoded,int number_of_lost_frames)379 int WebRtcOpus_DecodePlc(OpusDecInst* inst, int16_t* decoded,
380 int number_of_lost_frames) {
381 int16_t audio_type = 0;
382 int decoded_samples;
383 int plc_samples;
384
385 /* The number of samples we ask for is |number_of_lost_frames| times
386 * |prev_decoded_samples_|. Limit the number of samples to maximum
387 * |kWebRtcOpusMaxFrameSizePerChannel|. */
388 plc_samples = number_of_lost_frames * inst->prev_decoded_samples;
389 plc_samples = (plc_samples <= kWebRtcOpusMaxFrameSizePerChannel) ?
390 plc_samples : kWebRtcOpusMaxFrameSizePerChannel;
391 decoded_samples = DecodeNative(inst, NULL, 0, plc_samples,
392 decoded, &audio_type, 0);
393 if (decoded_samples < 0) {
394 return -1;
395 }
396
397 return decoded_samples;
398 }
399
WebRtcOpus_DecodeFec(OpusDecInst * inst,const uint8_t * encoded,size_t encoded_bytes,int16_t * decoded,int16_t * audio_type)400 int WebRtcOpus_DecodeFec(OpusDecInst* inst, const uint8_t* encoded,
401 size_t encoded_bytes, int16_t* decoded,
402 int16_t* audio_type) {
403 int decoded_samples;
404 int fec_samples;
405
406 if (WebRtcOpus_PacketHasFec(encoded, encoded_bytes) != 1) {
407 return 0;
408 }
409
410 fec_samples = opus_packet_get_samples_per_frame(encoded, 48000);
411
412 decoded_samples = DecodeNative(inst, encoded, encoded_bytes,
413 fec_samples, decoded, audio_type, 1);
414 if (decoded_samples < 0) {
415 return -1;
416 }
417
418 return decoded_samples;
419 }
420
WebRtcOpus_DurationEst(OpusDecInst * inst,const uint8_t * payload,size_t payload_length_bytes)421 int WebRtcOpus_DurationEst(OpusDecInst* inst,
422 const uint8_t* payload,
423 size_t payload_length_bytes) {
424 if (payload_length_bytes == 0) {
425 // WebRtcOpus_Decode calls PLC when payload length is zero. So we return
426 // PLC duration correspondingly.
427 return WebRtcOpus_PlcDuration(inst);
428 }
429
430 int frames, samples;
431 frames = opus_packet_get_nb_frames(payload, (opus_int32)payload_length_bytes);
432 if (frames < 0) {
433 /* Invalid payload data. */
434 return 0;
435 }
436 samples = frames * opus_packet_get_samples_per_frame(payload, 48000);
437 if (samples < 120 || samples > 5760) {
438 /* Invalid payload duration. */
439 return 0;
440 }
441 return samples;
442 }
443
WebRtcOpus_PlcDuration(OpusDecInst * inst)444 int WebRtcOpus_PlcDuration(OpusDecInst* inst) {
445 /* The number of samples we ask for is |number_of_lost_frames| times
446 * |prev_decoded_samples_|. Limit the number of samples to maximum
447 * |kWebRtcOpusMaxFrameSizePerChannel|. */
448 const int plc_samples = inst->prev_decoded_samples;
449 return (plc_samples <= kWebRtcOpusMaxFrameSizePerChannel) ?
450 plc_samples : kWebRtcOpusMaxFrameSizePerChannel;
451 }
452
WebRtcOpus_FecDurationEst(const uint8_t * payload,size_t payload_length_bytes)453 int WebRtcOpus_FecDurationEst(const uint8_t* payload,
454 size_t payload_length_bytes) {
455 int samples;
456 if (WebRtcOpus_PacketHasFec(payload, payload_length_bytes) != 1) {
457 return 0;
458 }
459
460 samples = opus_packet_get_samples_per_frame(payload, 48000);
461 if (samples < 480 || samples > 5760) {
462 /* Invalid payload duration. */
463 return 0;
464 }
465 return samples;
466 }
467
WebRtcOpus_PacketHasFec(const uint8_t * payload,size_t payload_length_bytes)468 int WebRtcOpus_PacketHasFec(const uint8_t* payload,
469 size_t payload_length_bytes) {
470 int frames, channels, payload_length_ms;
471 int n;
472 opus_int16 frame_sizes[48];
473 const unsigned char *frame_data[48];
474
475 if (payload == NULL || payload_length_bytes == 0)
476 return 0;
477
478 /* In CELT_ONLY mode, packets should not have FEC. */
479 if (payload[0] & 0x80)
480 return 0;
481
482 payload_length_ms = opus_packet_get_samples_per_frame(payload, 48000) / 48;
483 if (10 > payload_length_ms)
484 payload_length_ms = 10;
485
486 channels = opus_packet_get_nb_channels(payload);
487
488 switch (payload_length_ms) {
489 case 10:
490 case 20: {
491 frames = 1;
492 break;
493 }
494 case 40: {
495 frames = 2;
496 break;
497 }
498 case 60: {
499 frames = 3;
500 break;
501 }
502 default: {
503 return 0; // It is actually even an invalid packet.
504 }
505 }
506
507 /* The following is to parse the LBRR flags. */
508 if (opus_packet_parse(payload, (opus_int32)payload_length_bytes, NULL,
509 frame_data, frame_sizes, NULL) < 0) {
510 return 0;
511 }
512
513 if (frame_sizes[0] <= 1) {
514 return 0;
515 }
516
517 for (n = 0; n < channels; n++) {
518 if (frame_data[0][0] & (0x80 >> ((n + 1) * (frames + 1) - 1)))
519 return 1;
520 }
521
522 return 0;
523 }
524