1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/audio_coding/codecs/opus/opus_interface.h"
12 
13 #include "rtc_base/checks.h"
14 
15 #include <stdlib.h>
16 #include <string.h>
17 
18 enum {
19 #if WEBRTC_OPUS_SUPPORT_120MS_PTIME
20   /* Maximum supported frame size in WebRTC is 120 ms. */
21   kWebRtcOpusMaxEncodeFrameSizeMs = 120,
22 #else
23   /* Maximum supported frame size in WebRTC is 60 ms. */
24   kWebRtcOpusMaxEncodeFrameSizeMs = 60,
25 #endif
26 
27   /* The format allows up to 120 ms frames. Since we don't control the other
28    * side, we must allow for packets of that size. NetEq is currently limited
29    * to 60 ms on the receive side. */
30   kWebRtcOpusMaxDecodeFrameSizeMs = 120,
31 
32   /* Maximum sample count per channel is 48 kHz * maximum frame size in
33    * milliseconds. */
34   kWebRtcOpusMaxFrameSizePerChannel = 48 * kWebRtcOpusMaxDecodeFrameSizeMs,
35 
36   /* Default frame size, 20 ms @ 48 kHz, in samples (for one channel). */
37   kWebRtcOpusDefaultFrameSize = 960,
38 };
39 
WebRtcOpus_EncoderCreate(OpusEncInst ** inst,size_t channels,int32_t application)40 int16_t WebRtcOpus_EncoderCreate(OpusEncInst** inst,
41                                  size_t channels,
42                                  int32_t application) {
43   int opus_app;
44   if (!inst)
45     return -1;
46 
47   switch (application) {
48     case 0:
49       opus_app = OPUS_APPLICATION_VOIP;
50       break;
51     case 1:
52       opus_app = OPUS_APPLICATION_AUDIO;
53       break;
54     default:
55       return -1;
56   }
57 
58   OpusEncInst* state = calloc(1, sizeof(OpusEncInst));
59   RTC_DCHECK(state);
60 
61   int error;
62   state->encoder = opus_encoder_create(48000, (int)channels, opus_app,
63                                        &error);
64   if (error != OPUS_OK || !state->encoder) {
65     WebRtcOpus_EncoderFree(state);
66     return -1;
67   }
68 
69   state->in_dtx_mode = 0;
70   state->channels = channels;
71 
72   *inst = state;
73   return 0;
74 }
75 
WebRtcOpus_EncoderFree(OpusEncInst * inst)76 int16_t WebRtcOpus_EncoderFree(OpusEncInst* inst) {
77   if (inst) {
78     opus_encoder_destroy(inst->encoder);
79     free(inst);
80     return 0;
81   } else {
82     return -1;
83   }
84 }
85 
WebRtcOpus_Encode(OpusEncInst * inst,const int16_t * audio_in,size_t samples,size_t length_encoded_buffer,uint8_t * encoded)86 int WebRtcOpus_Encode(OpusEncInst* inst,
87                       const int16_t* audio_in,
88                       size_t samples,
89                       size_t length_encoded_buffer,
90                       uint8_t* encoded) {
91   int res;
92 
93   if (samples > 48 * kWebRtcOpusMaxEncodeFrameSizeMs) {
94     return -1;
95   }
96 
97   res = opus_encode(inst->encoder,
98                     (const opus_int16*)audio_in,
99                     (int)samples,
100                     encoded,
101                     (opus_int32)length_encoded_buffer);
102 
103   if (res <= 0) {
104     return -1;
105   }
106 
107   if (res <= 2) {
108     // Indicates DTX since the packet has nothing but a header. In principle,
109     // there is no need to send this packet. However, we do transmit the first
110     // occurrence to let the decoder know that the encoder enters DTX mode.
111     if (inst->in_dtx_mode) {
112       return 0;
113     } else {
114       inst->in_dtx_mode = 1;
115       return res;
116     }
117   }
118 
119   inst->in_dtx_mode = 0;
120   return res;
121 }
122 
WebRtcOpus_SetBitRate(OpusEncInst * inst,int32_t rate)123 int16_t WebRtcOpus_SetBitRate(OpusEncInst* inst, int32_t rate) {
124   if (inst) {
125     return opus_encoder_ctl(inst->encoder, OPUS_SET_BITRATE(rate));
126   } else {
127     return -1;
128   }
129 }
130 
WebRtcOpus_SetPacketLossRate(OpusEncInst * inst,int32_t loss_rate)131 int16_t WebRtcOpus_SetPacketLossRate(OpusEncInst* inst, int32_t loss_rate) {
132   if (inst) {
133     return opus_encoder_ctl(inst->encoder,
134                             OPUS_SET_PACKET_LOSS_PERC(loss_rate));
135   } else {
136     return -1;
137   }
138 }
139 
WebRtcOpus_SetMaxPlaybackRate(OpusEncInst * inst,int32_t frequency_hz)140 int16_t WebRtcOpus_SetMaxPlaybackRate(OpusEncInst* inst, int32_t frequency_hz) {
141   opus_int32 set_bandwidth;
142 
143   if (!inst)
144     return -1;
145 
146   if (frequency_hz <= 8000) {
147     set_bandwidth = OPUS_BANDWIDTH_NARROWBAND;
148   } else if (frequency_hz <= 12000) {
149     set_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
150   } else if (frequency_hz <= 16000) {
151     set_bandwidth = OPUS_BANDWIDTH_WIDEBAND;
152   } else if (frequency_hz <= 24000) {
153     set_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND;
154   } else {
155     set_bandwidth = OPUS_BANDWIDTH_FULLBAND;
156   }
157   return opus_encoder_ctl(inst->encoder,
158                           OPUS_SET_MAX_BANDWIDTH(set_bandwidth));
159 }
160 
WebRtcOpus_EnableFec(OpusEncInst * inst)161 int16_t WebRtcOpus_EnableFec(OpusEncInst* inst) {
162   if (inst) {
163     return opus_encoder_ctl(inst->encoder, OPUS_SET_INBAND_FEC(1));
164   } else {
165     return -1;
166   }
167 }
168 
WebRtcOpus_DisableFec(OpusEncInst * inst)169 int16_t WebRtcOpus_DisableFec(OpusEncInst* inst) {
170   if (inst) {
171     return opus_encoder_ctl(inst->encoder, OPUS_SET_INBAND_FEC(0));
172   } else {
173     return -1;
174   }
175 }
176 
WebRtcOpus_EnableDtx(OpusEncInst * inst)177 int16_t WebRtcOpus_EnableDtx(OpusEncInst* inst) {
178   if (!inst) {
179     return -1;
180   }
181 
182   // To prevent Opus from entering CELT-only mode by forcing signal type to
183   // voice to make sure that DTX behaves correctly. Currently, DTX does not
184   // last long during a pure silence, if the signal type is not forced.
185   // TODO(minyue): Remove the signal type forcing when Opus DTX works properly
186   // without it.
187   int ret = opus_encoder_ctl(inst->encoder,
188                              OPUS_SET_SIGNAL(OPUS_SIGNAL_VOICE));
189   if (ret != OPUS_OK)
190     return ret;
191 
192   return opus_encoder_ctl(inst->encoder, OPUS_SET_DTX(1));
193 }
194 
WebRtcOpus_DisableDtx(OpusEncInst * inst)195 int16_t WebRtcOpus_DisableDtx(OpusEncInst* inst) {
196   if (inst) {
197     int ret = opus_encoder_ctl(inst->encoder,
198                                OPUS_SET_SIGNAL(OPUS_AUTO));
199     if (ret != OPUS_OK)
200       return ret;
201     return opus_encoder_ctl(inst->encoder, OPUS_SET_DTX(0));
202   } else {
203     return -1;
204   }
205 }
206 
WebRtcOpus_EnableCbr(OpusEncInst * inst)207 int16_t WebRtcOpus_EnableCbr(OpusEncInst* inst) {
208   if (inst) {
209     return opus_encoder_ctl(inst->encoder, OPUS_SET_VBR(0));
210   } else {
211     return -1;
212   }
213 }
214 
WebRtcOpus_DisableCbr(OpusEncInst * inst)215 int16_t WebRtcOpus_DisableCbr(OpusEncInst* inst) {
216   if (inst) {
217     return opus_encoder_ctl(inst->encoder, OPUS_SET_VBR(1));
218   } else {
219     return -1;
220   }
221 }
222 
WebRtcOpus_SetComplexity(OpusEncInst * inst,int32_t complexity)223 int16_t WebRtcOpus_SetComplexity(OpusEncInst* inst, int32_t complexity) {
224   if (inst) {
225     return opus_encoder_ctl(inst->encoder, OPUS_SET_COMPLEXITY(complexity));
226   } else {
227     return -1;
228   }
229 }
230 
WebRtcOpus_GetBandwidth(OpusEncInst * inst)231 int32_t WebRtcOpus_GetBandwidth(OpusEncInst* inst) {
232   if (!inst) {
233     return -1;
234   }
235   int32_t bandwidth;
236   if (opus_encoder_ctl(inst->encoder, OPUS_GET_BANDWIDTH(&bandwidth)) == 0) {
237     return bandwidth;
238   } else {
239     return -1;
240   }
241 
242 }
243 
WebRtcOpus_SetBandwidth(OpusEncInst * inst,int32_t bandwidth)244 int16_t WebRtcOpus_SetBandwidth(OpusEncInst* inst, int32_t bandwidth) {
245   if (inst) {
246     return opus_encoder_ctl(inst->encoder, OPUS_SET_BANDWIDTH(bandwidth));
247   } else {
248     return -1;
249   }
250 }
251 
WebRtcOpus_SetForceChannels(OpusEncInst * inst,size_t num_channels)252 int16_t WebRtcOpus_SetForceChannels(OpusEncInst* inst, size_t num_channels) {
253   if (!inst)
254     return -1;
255   if (num_channels == 0) {
256     return opus_encoder_ctl(inst->encoder,
257                             OPUS_SET_FORCE_CHANNELS(OPUS_AUTO));
258   } else if (num_channels == 1 || num_channels == 2) {
259     return opus_encoder_ctl(inst->encoder,
260                             OPUS_SET_FORCE_CHANNELS(num_channels));
261   } else {
262     return -1;
263   }
264 }
265 
WebRtcOpus_DecoderCreate(OpusDecInst ** inst,size_t channels)266 int16_t WebRtcOpus_DecoderCreate(OpusDecInst** inst, size_t channels) {
267   int error;
268   OpusDecInst* state;
269 
270   if (inst != NULL) {
271     /* Create Opus decoder state. */
272     state = (OpusDecInst*) calloc(1, sizeof(OpusDecInst));
273     if (state == NULL) {
274       return -1;
275     }
276 
277     /* Create new memory, always at 48000 Hz. */
278     state->decoder = opus_decoder_create(48000, (int)channels, &error);
279     if (error == OPUS_OK && state->decoder != NULL) {
280       /* Creation of memory all ok. */
281       state->channels = channels;
282       state->prev_decoded_samples = kWebRtcOpusDefaultFrameSize;
283       state->in_dtx_mode = 0;
284       *inst = state;
285       return 0;
286     }
287 
288     /* If memory allocation was unsuccessful, free the entire state. */
289     if (state->decoder) {
290       opus_decoder_destroy(state->decoder);
291     }
292     free(state);
293   }
294   return -1;
295 }
296 
WebRtcOpus_DecoderFree(OpusDecInst * inst)297 int16_t WebRtcOpus_DecoderFree(OpusDecInst* inst) {
298   if (inst) {
299     opus_decoder_destroy(inst->decoder);
300     free(inst);
301     return 0;
302   } else {
303     return -1;
304   }
305 }
306 
WebRtcOpus_DecoderChannels(OpusDecInst * inst)307 size_t WebRtcOpus_DecoderChannels(OpusDecInst* inst) {
308   return inst->channels;
309 }
310 
WebRtcOpus_DecoderInit(OpusDecInst * inst)311 void WebRtcOpus_DecoderInit(OpusDecInst* inst) {
312   opus_decoder_ctl(inst->decoder, OPUS_RESET_STATE);
313   inst->in_dtx_mode = 0;
314 }
315 
316 /* For decoder to determine if it is to output speech or comfort noise. */
DetermineAudioType(OpusDecInst * inst,size_t encoded_bytes)317 static int16_t DetermineAudioType(OpusDecInst* inst, size_t encoded_bytes) {
318   // Audio type becomes comfort noise if |encoded_byte| is 1 and keeps
319   // to be so if the following |encoded_byte| are 0 or 1.
320   if (encoded_bytes == 0 && inst->in_dtx_mode) {
321     return 2;  // Comfort noise.
322   } else if (encoded_bytes == 1 || encoded_bytes == 2) {
323     // TODO(henrik.lundin): There is a slight risk that a 2-byte payload is in
324     // fact a 1-byte TOC with a 1-byte payload. That will be erroneously
325     // interpreted as comfort noise output, but such a payload is probably
326     // faulty anyway.
327     inst->in_dtx_mode = 1;
328     return 2;  // Comfort noise.
329   } else {
330     inst->in_dtx_mode = 0;
331     return 0;  // Speech.
332   }
333 }
334 
335 /* |frame_size| is set to maximum Opus frame size in the normal case, and
336  * is set to the number of samples needed for PLC in case of losses.
337  * It is up to the caller to make sure the value is correct. */
DecodeNative(OpusDecInst * inst,const uint8_t * encoded,size_t encoded_bytes,int frame_size,int16_t * decoded,int16_t * audio_type,int decode_fec)338 static int DecodeNative(OpusDecInst* inst, const uint8_t* encoded,
339                         size_t encoded_bytes, int frame_size,
340                         int16_t* decoded, int16_t* audio_type, int decode_fec) {
341   int res = opus_decode(inst->decoder, encoded, (opus_int32)encoded_bytes,
342                         (opus_int16*)decoded, frame_size, decode_fec);
343 
344   if (res <= 0)
345     return -1;
346 
347   *audio_type = DetermineAudioType(inst, encoded_bytes);
348 
349   return res;
350 }
351 
WebRtcOpus_Decode(OpusDecInst * inst,const uint8_t * encoded,size_t encoded_bytes,int16_t * decoded,int16_t * audio_type)352 int WebRtcOpus_Decode(OpusDecInst* inst, const uint8_t* encoded,
353                       size_t encoded_bytes, int16_t* decoded,
354                       int16_t* audio_type) {
355   int decoded_samples;
356 
357   if (encoded_bytes == 0) {
358     *audio_type = DetermineAudioType(inst, encoded_bytes);
359     decoded_samples = WebRtcOpus_DecodePlc(inst, decoded, 1);
360   } else {
361     decoded_samples = DecodeNative(inst,
362                                    encoded,
363                                    encoded_bytes,
364                                    kWebRtcOpusMaxFrameSizePerChannel,
365                                    decoded,
366                                    audio_type,
367                                    0);
368   }
369   if (decoded_samples < 0) {
370     return -1;
371   }
372 
373   /* Update decoded sample memory, to be used by the PLC in case of losses. */
374   inst->prev_decoded_samples = decoded_samples;
375 
376   return decoded_samples;
377 }
378 
WebRtcOpus_DecodePlc(OpusDecInst * inst,int16_t * decoded,int number_of_lost_frames)379 int WebRtcOpus_DecodePlc(OpusDecInst* inst, int16_t* decoded,
380                          int number_of_lost_frames) {
381   int16_t audio_type = 0;
382   int decoded_samples;
383   int plc_samples;
384 
385   /* The number of samples we ask for is |number_of_lost_frames| times
386    * |prev_decoded_samples_|. Limit the number of samples to maximum
387    * |kWebRtcOpusMaxFrameSizePerChannel|. */
388   plc_samples = number_of_lost_frames * inst->prev_decoded_samples;
389   plc_samples = (plc_samples <= kWebRtcOpusMaxFrameSizePerChannel) ?
390       plc_samples : kWebRtcOpusMaxFrameSizePerChannel;
391   decoded_samples = DecodeNative(inst, NULL, 0, plc_samples,
392                                  decoded, &audio_type, 0);
393   if (decoded_samples < 0) {
394     return -1;
395   }
396 
397   return decoded_samples;
398 }
399 
WebRtcOpus_DecodeFec(OpusDecInst * inst,const uint8_t * encoded,size_t encoded_bytes,int16_t * decoded,int16_t * audio_type)400 int WebRtcOpus_DecodeFec(OpusDecInst* inst, const uint8_t* encoded,
401                          size_t encoded_bytes, int16_t* decoded,
402                          int16_t* audio_type) {
403   int decoded_samples;
404   int fec_samples;
405 
406   if (WebRtcOpus_PacketHasFec(encoded, encoded_bytes) != 1) {
407     return 0;
408   }
409 
410   fec_samples = opus_packet_get_samples_per_frame(encoded, 48000);
411 
412   decoded_samples = DecodeNative(inst, encoded, encoded_bytes,
413                                  fec_samples, decoded, audio_type, 1);
414   if (decoded_samples < 0) {
415     return -1;
416   }
417 
418   return decoded_samples;
419 }
420 
WebRtcOpus_DurationEst(OpusDecInst * inst,const uint8_t * payload,size_t payload_length_bytes)421 int WebRtcOpus_DurationEst(OpusDecInst* inst,
422                            const uint8_t* payload,
423                            size_t payload_length_bytes) {
424   if (payload_length_bytes == 0) {
425     // WebRtcOpus_Decode calls PLC when payload length is zero. So we return
426     // PLC duration correspondingly.
427     return WebRtcOpus_PlcDuration(inst);
428   }
429 
430   int frames, samples;
431   frames = opus_packet_get_nb_frames(payload, (opus_int32)payload_length_bytes);
432   if (frames < 0) {
433     /* Invalid payload data. */
434     return 0;
435   }
436   samples = frames * opus_packet_get_samples_per_frame(payload, 48000);
437   if (samples < 120 || samples > 5760) {
438     /* Invalid payload duration. */
439     return 0;
440   }
441   return samples;
442 }
443 
WebRtcOpus_PlcDuration(OpusDecInst * inst)444 int WebRtcOpus_PlcDuration(OpusDecInst* inst) {
445   /* The number of samples we ask for is |number_of_lost_frames| times
446    * |prev_decoded_samples_|. Limit the number of samples to maximum
447    * |kWebRtcOpusMaxFrameSizePerChannel|. */
448   const int plc_samples = inst->prev_decoded_samples;
449   return (plc_samples <= kWebRtcOpusMaxFrameSizePerChannel) ?
450       plc_samples : kWebRtcOpusMaxFrameSizePerChannel;
451 }
452 
WebRtcOpus_FecDurationEst(const uint8_t * payload,size_t payload_length_bytes)453 int WebRtcOpus_FecDurationEst(const uint8_t* payload,
454                               size_t payload_length_bytes) {
455   int samples;
456   if (WebRtcOpus_PacketHasFec(payload, payload_length_bytes) != 1) {
457     return 0;
458   }
459 
460   samples = opus_packet_get_samples_per_frame(payload, 48000);
461   if (samples < 480 || samples > 5760) {
462     /* Invalid payload duration. */
463     return 0;
464   }
465   return samples;
466 }
467 
WebRtcOpus_PacketHasFec(const uint8_t * payload,size_t payload_length_bytes)468 int WebRtcOpus_PacketHasFec(const uint8_t* payload,
469                             size_t payload_length_bytes) {
470   int frames, channels, payload_length_ms;
471   int n;
472   opus_int16 frame_sizes[48];
473   const unsigned char *frame_data[48];
474 
475   if (payload == NULL || payload_length_bytes == 0)
476     return 0;
477 
478   /* In CELT_ONLY mode, packets should not have FEC. */
479   if (payload[0] & 0x80)
480     return 0;
481 
482   payload_length_ms = opus_packet_get_samples_per_frame(payload, 48000) / 48;
483   if (10 > payload_length_ms)
484     payload_length_ms = 10;
485 
486   channels = opus_packet_get_nb_channels(payload);
487 
488   switch (payload_length_ms) {
489     case 10:
490     case 20: {
491       frames = 1;
492       break;
493     }
494     case 40: {
495       frames = 2;
496       break;
497     }
498     case 60: {
499       frames = 3;
500       break;
501     }
502     default: {
503       return 0; // It is actually even an invalid packet.
504     }
505   }
506 
507   /* The following is to parse the LBRR flags. */
508   if (opus_packet_parse(payload, (opus_int32)payload_length_bytes, NULL,
509                         frame_data, frame_sizes, NULL) < 0) {
510     return 0;
511   }
512 
513   if (frame_sizes[0] <= 1) {
514     return 0;
515   }
516 
517   for (n = 0; n < channels; n++) {
518     if (frame_data[0][0] & (0x80 >> ((n + 1) * (frames + 1) - 1)))
519       return 1;
520   }
521 
522   return 0;
523 }
524