1 /* SPDX-License-Identifier: GPL-3.0-or-later
2 * Copyright © 2016-2018 The TokTok team.
3 * Copyright © 2013-2015 Tox project.
4 */
5 #ifdef HAVE_CONFIG_H
6 #include "config.h"
7 #endif /* HAVE_CONFIG_H */
8
9 #include "audio.h"
10
11 #include <stdlib.h>
12 #include <string.h>
13
14 #include "rtp.h"
15
16 #include "../toxcore/logger.h"
17 #include "../toxcore/mono_time.h"
18
19 static struct JitterBuffer *jbuf_new(uint32_t capacity);
20 static void jbuf_clear(struct JitterBuffer *q);
21 static void jbuf_free(struct JitterBuffer *q);
22 static int jbuf_write(const Logger *log, struct JitterBuffer *q, struct RTPMessage *m);
23 static struct RTPMessage *jbuf_read(struct JitterBuffer *q, int32_t *success);
24 static OpusEncoder *create_audio_encoder(const Logger *log, int32_t bit_rate, int32_t sampling_rate,
25 int32_t channel_count);
26 static bool reconfigure_audio_encoder(const Logger *log, OpusEncoder **e, int32_t new_br, int32_t new_sr,
27 uint8_t new_ch, int32_t *old_br, int32_t *old_sr, int32_t *old_ch);
28 static bool reconfigure_audio_decoder(ACSession *ac, int32_t sampling_rate, int8_t channels);
29
30
31
ac_new(Mono_Time * mono_time,const Logger * log,ToxAV * av,uint32_t friend_number,toxav_audio_receive_frame_cb * cb,void * cb_data)32 ACSession *ac_new(Mono_Time *mono_time, const Logger *log, ToxAV *av, uint32_t friend_number,
33 toxav_audio_receive_frame_cb *cb, void *cb_data)
34 {
35 ACSession *ac = (ACSession *)calloc(sizeof(ACSession), 1);
36
37 if (!ac) {
38 LOGGER_WARNING(log, "Allocation failed! Application might misbehave!");
39 return nullptr;
40 }
41
42 if (create_recursive_mutex(ac->queue_mutex) != 0) {
43 LOGGER_WARNING(log, "Failed to create recursive mutex!");
44 free(ac);
45 return nullptr;
46 }
47
48 int status;
49 ac->decoder = opus_decoder_create(AUDIO_DECODER_START_SAMPLE_RATE, AUDIO_DECODER_START_CHANNEL_COUNT, &status);
50
51 if (status != OPUS_OK) {
52 LOGGER_ERROR(log, "Error while starting audio decoder: %s", opus_strerror(status));
53 goto BASE_CLEANUP;
54 }
55
56 ac->j_buf = jbuf_new(AUDIO_JITTERBUFFER_COUNT);
57
58 if (ac->j_buf == nullptr) {
59 LOGGER_WARNING(log, "Jitter buffer creaton failed!");
60 opus_decoder_destroy(ac->decoder);
61 goto BASE_CLEANUP;
62 }
63
64 ac->mono_time = mono_time;
65 ac->log = log;
66
67 /* Initialize encoders with default values */
68 ac->encoder = create_audio_encoder(log, AUDIO_START_BITRATE, AUDIO_START_SAMPLE_RATE, AUDIO_START_CHANNEL_COUNT);
69
70 if (ac->encoder == nullptr) {
71 goto DECODER_CLEANUP;
72 }
73
74 ac->le_bit_rate = AUDIO_START_BITRATE;
75 ac->le_sample_rate = AUDIO_START_SAMPLE_RATE;
76 ac->le_channel_count = AUDIO_START_CHANNEL_COUNT;
77
78 ac->ld_channel_count = AUDIO_DECODER_START_CHANNEL_COUNT;
79 ac->ld_sample_rate = AUDIO_DECODER_START_SAMPLE_RATE;
80 ac->ldrts = 0; /* Make it possible to reconfigure straight away */
81
82 /* These need to be set in order to properly
83 * do error correction with opus */
84 ac->lp_frame_duration = AUDIO_MAX_FRAME_DURATION_MS;
85 ac->lp_sampling_rate = AUDIO_DECODER_START_SAMPLE_RATE;
86 ac->lp_channel_count = AUDIO_DECODER_START_CHANNEL_COUNT;
87
88 ac->av = av;
89 ac->friend_number = friend_number;
90 ac->acb = cb;
91 ac->acb_user_data = cb_data;
92
93 return ac;
94
95 DECODER_CLEANUP:
96 opus_decoder_destroy(ac->decoder);
97 jbuf_free((struct JitterBuffer *)ac->j_buf);
98 BASE_CLEANUP:
99 pthread_mutex_destroy(ac->queue_mutex);
100 free(ac);
101 return nullptr;
102 }
103
ac_kill(ACSession * ac)104 void ac_kill(ACSession *ac)
105 {
106 if (!ac) {
107 return;
108 }
109
110 opus_encoder_destroy(ac->encoder);
111 opus_decoder_destroy(ac->decoder);
112 jbuf_free((struct JitterBuffer *)ac->j_buf);
113
114 pthread_mutex_destroy(ac->queue_mutex);
115
116 LOGGER_DEBUG(ac->log, "Terminated audio handler: %p", (void *)ac);
117 free(ac);
118 }
119
ac_iterate(ACSession * ac)120 void ac_iterate(ACSession *ac)
121 {
122 if (!ac) {
123 return;
124 }
125
126 /* TODO: fix this and jitter buffering */
127
128 /* Enough space for the maximum frame size (120 ms 48 KHz stereo audio) */
129 int16_t *temp_audio_buffer = (int16_t *)malloc(AUDIO_MAX_BUFFER_SIZE_PCM16 * AUDIO_MAX_CHANNEL_COUNT * sizeof(int16_t));
130
131 if (temp_audio_buffer == nullptr) {
132 LOGGER_ERROR(ac->log, "Failed to allocate memory for audio buffer");
133 return;
134 }
135
136 pthread_mutex_lock(ac->queue_mutex);
137 struct JitterBuffer *const j_buf = (struct JitterBuffer *)ac->j_buf;
138
139 int rc = 0;
140 struct RTPMessage *msg = jbuf_read(j_buf, &rc);
141
142 for (; msg != nullptr || rc == 2; msg = jbuf_read(j_buf, &rc)) {
143 pthread_mutex_unlock(ac->queue_mutex);
144
145 if (rc == 2) {
146 LOGGER_DEBUG(ac->log, "OPUS correction");
147 int fs = (ac->lp_sampling_rate * ac->lp_frame_duration) / 1000;
148 rc = opus_decode(ac->decoder, nullptr, 0, temp_audio_buffer, fs, 1);
149 } else {
150 /* Get values from packet and decode. */
151 /* NOTE: This didn't work very well */
152 #if 0
153 rc = convert_bw_to_sampling_rate(opus_packet_get_bandwidth(msg->data));
154
155 if (rc != -1) {
156 cs->last_packet_sampling_rate = rc;
157 } else {
158 LOGGER_WARNING(ac->log, "Failed to load packet values!");
159 rtp_free_msg(msg);
160 continue;
161 }
162
163 #endif
164
165
166 /* Pick up sampling rate from packet */
167 memcpy(&ac->lp_sampling_rate, msg->data, 4);
168 ac->lp_sampling_rate = net_ntohl(ac->lp_sampling_rate);
169
170 ac->lp_channel_count = opus_packet_get_nb_channels(msg->data + 4);
171
172 /** NOTE: even though OPUS supports decoding mono frames with stereo decoder and vice versa,
173 * it didn't work quite well.
174 */
175 if (!reconfigure_audio_decoder(ac, ac->lp_sampling_rate, ac->lp_channel_count)) {
176 LOGGER_WARNING(ac->log, "Failed to reconfigure decoder!");
177 free(msg);
178 continue;
179 }
180
181 /*
182 * frame_size = opus_decode(dec, packet, len, decoded, max_size, 0);
183 * where
184 * packet is the byte array containing the compressed data
185 * len is the exact number of bytes contained in the packet
186 * decoded is the decoded audio data in opus_int16 (or float for opus_decode_float())
187 * max_size is the max duration of the frame in samples (per channel) that can fit
188 * into the decoded_frame array
189 */
190 rc = opus_decode(ac->decoder, msg->data + 4, msg->len - 4, temp_audio_buffer, 5760, 0);
191 free(msg);
192 }
193
194 if (rc < 0) {
195 LOGGER_WARNING(ac->log, "Decoding error: %s", opus_strerror(rc));
196 } else if (ac->acb) {
197 ac->lp_frame_duration = (rc * 1000) / ac->lp_sampling_rate;
198
199 ac->acb(ac->av, ac->friend_number, temp_audio_buffer, rc, ac->lp_channel_count,
200 ac->lp_sampling_rate, ac->acb_user_data);
201 }
202
203 free(temp_audio_buffer);
204
205 return;
206 }
207
208 pthread_mutex_unlock(ac->queue_mutex);
209
210 free(temp_audio_buffer);
211 }
212
ac_queue_message(Mono_Time * mono_time,void * acp,struct RTPMessage * msg)213 int ac_queue_message(Mono_Time *mono_time, void *acp, struct RTPMessage *msg)
214 {
215 if (!acp || !msg) {
216 if (msg) {
217 free(msg);
218 }
219
220 return -1;
221 }
222
223 ACSession *ac = (ACSession *)acp;
224
225 if ((msg->header.pt & 0x7f) == (RTP_TYPE_AUDIO + 2) % 128) {
226 LOGGER_WARNING(ac->log, "Got dummy!");
227 free(msg);
228 return 0;
229 }
230
231 if ((msg->header.pt & 0x7f) != RTP_TYPE_AUDIO % 128) {
232 LOGGER_WARNING(ac->log, "Invalid payload type!");
233 free(msg);
234 return -1;
235 }
236
237 pthread_mutex_lock(ac->queue_mutex);
238 int rc = jbuf_write(ac->log, (struct JitterBuffer *)ac->j_buf, msg);
239 pthread_mutex_unlock(ac->queue_mutex);
240
241 if (rc == -1) {
242 LOGGER_WARNING(ac->log, "Could not queue the message!");
243 free(msg);
244 return -1;
245 }
246
247 return 0;
248 }
249
ac_reconfigure_encoder(ACSession * ac,int32_t bit_rate,int32_t sampling_rate,uint8_t channels)250 int ac_reconfigure_encoder(ACSession *ac, int32_t bit_rate, int32_t sampling_rate, uint8_t channels)
251 {
252 if (!ac || !reconfigure_audio_encoder(ac->log, &ac->encoder, bit_rate,
253 sampling_rate, channels,
254 &ac->le_bit_rate,
255 &ac->le_sample_rate,
256 &ac->le_channel_count)) {
257 return -1;
258 }
259
260 return 0;
261 }
262
263
264
265 struct JitterBuffer {
266 struct RTPMessage **queue;
267 uint32_t size;
268 uint32_t capacity;
269 uint16_t bottom;
270 uint16_t top;
271 };
272
jbuf_new(uint32_t capacity)273 static struct JitterBuffer *jbuf_new(uint32_t capacity)
274 {
275 unsigned int size = 1;
276
277 while (size <= (capacity * 4)) {
278 size *= 2;
279 }
280
281 struct JitterBuffer *q = (struct JitterBuffer *)calloc(sizeof(struct JitterBuffer), 1);
282
283 if (!q) {
284 return nullptr;
285 }
286
287 q->queue = (struct RTPMessage **)calloc(sizeof(struct RTPMessage *), size);
288
289 if (!q->queue) {
290 free(q);
291 return nullptr;
292 }
293
294 q->size = size;
295 q->capacity = capacity;
296 return q;
297 }
jbuf_clear(struct JitterBuffer * q)298 static void jbuf_clear(struct JitterBuffer *q)
299 {
300 for (; q->bottom != q->top; ++q->bottom) {
301 if (q->queue[q->bottom % q->size]) {
302 free(q->queue[q->bottom % q->size]);
303 q->queue[q->bottom % q->size] = nullptr;
304 }
305 }
306 }
jbuf_free(struct JitterBuffer * q)307 static void jbuf_free(struct JitterBuffer *q)
308 {
309 if (!q) {
310 return;
311 }
312
313 jbuf_clear(q);
314 free(q->queue);
315 free(q);
316 }
jbuf_write(const Logger * log,struct JitterBuffer * q,struct RTPMessage * m)317 static int jbuf_write(const Logger *log, struct JitterBuffer *q, struct RTPMessage *m)
318 {
319 uint16_t sequnum = m->header.sequnum;
320
321 unsigned int num = sequnum % q->size;
322
323 if ((uint32_t)(sequnum - q->bottom) > q->size) {
324 LOGGER_DEBUG(log, "Clearing filled jitter buffer: %p", (void *)q);
325
326 jbuf_clear(q);
327 q->bottom = sequnum - q->capacity;
328 q->queue[num] = m;
329 q->top = sequnum + 1;
330 return 0;
331 }
332
333 if (q->queue[num]) {
334 return -1;
335 }
336
337 q->queue[num] = m;
338
339 if ((sequnum - q->bottom) >= (q->top - q->bottom)) {
340 q->top = sequnum + 1;
341 }
342
343 return 0;
344 }
jbuf_read(struct JitterBuffer * q,int32_t * success)345 static struct RTPMessage *jbuf_read(struct JitterBuffer *q, int32_t *success)
346 {
347 if (q->top == q->bottom) {
348 *success = 0;
349 return nullptr;
350 }
351
352 unsigned int num = q->bottom % q->size;
353
354 if (q->queue[num]) {
355 struct RTPMessage *ret = q->queue[num];
356 q->queue[num] = nullptr;
357 ++q->bottom;
358 *success = 1;
359 return ret;
360 }
361
362 if ((uint32_t)(q->top - q->bottom) > q->capacity) {
363 ++q->bottom;
364 *success = 2;
365 return nullptr;
366 }
367
368 *success = 0;
369 return nullptr;
370 }
create_audio_encoder(const Logger * log,int32_t bit_rate,int32_t sampling_rate,int32_t channel_count)371 static OpusEncoder *create_audio_encoder(const Logger *log, int32_t bit_rate, int32_t sampling_rate,
372 int32_t channel_count)
373 {
374 int status = OPUS_OK;
375 /*
376 * OPUS_APPLICATION_VOIP Process signal for improved speech intelligibility
377 * OPUS_APPLICATION_AUDIO Favor faithfulness to the original input
378 * OPUS_APPLICATION_RESTRICTED_LOWDELAY Configure the minimum possible coding delay
379 */
380 OpusEncoder *rc = opus_encoder_create(sampling_rate, channel_count, OPUS_APPLICATION_VOIP, &status);
381
382 if (status != OPUS_OK) {
383 LOGGER_ERROR(log, "Error while starting audio encoder: %s", opus_strerror(status));
384 return nullptr;
385 }
386
387
388 /*
389 * Rates from 500 to 512000 bits per second are meaningful as well as the special
390 * values OPUS_BITRATE_AUTO and OPUS_BITRATE_MAX. The value OPUS_BITRATE_MAX can
391 * be used to cause the codec to use as much rate as it can, which is useful for
392 * controlling the rate by adjusting the output buffer size.
393 *
394 * Parameters:
395 * `[in]` `x` `opus_int32`: bitrate in bits per second.
396 */
397 status = opus_encoder_ctl(rc, OPUS_SET_BITRATE(bit_rate));
398
399 if (status != OPUS_OK) {
400 LOGGER_ERROR(log, "Error while setting encoder ctl: %s", opus_strerror(status));
401 goto FAILURE;
402 }
403
404
405 /*
406 * Configures the encoder's use of inband forward error correction.
407 * Note:
408 * This is only applicable to the LPC layer
409 * Parameters:
410 * `[in]` `x` `int`: FEC flag, 0 (disabled) is default
411 */
412 /* Enable in-band forward error correction in codec */
413 status = opus_encoder_ctl(rc, OPUS_SET_INBAND_FEC(1));
414
415 if (status != OPUS_OK) {
416 LOGGER_ERROR(log, "Error while setting encoder ctl: %s", opus_strerror(status));
417 goto FAILURE;
418 }
419
420
421 /*
422 * Configures the encoder's expected packet loss percentage.
423 * Higher values with trigger progressively more loss resistant behavior in
424 * the encoder at the expense of quality at a given bitrate in the lossless case,
425 * but greater quality under loss.
426 * Parameters:
427 * `[in]` `x` `int`: Loss percentage in the range 0-100, inclusive.
428 */
429 /* Make codec resistant to up to 10% packet loss
430 * NOTE This could also be adjusted on the fly, rather than hard-coded,
431 * with feedback from the receiving client.
432 */
433 status = opus_encoder_ctl(rc, OPUS_SET_PACKET_LOSS_PERC(AUDIO_OPUS_PACKET_LOSS_PERC));
434
435 if (status != OPUS_OK) {
436 LOGGER_ERROR(log, "Error while setting encoder ctl: %s", opus_strerror(status));
437 goto FAILURE;
438 }
439
440
441 /*
442 * Configures the encoder's computational complexity.
443 *
444 * The supported range is 0-10 inclusive with 10 representing the highest complexity.
445 * The default value is 10.
446 *
447 * Parameters:
448 * `[in]` `x` `int`: 0-10, inclusive
449 */
450 /* Set algorithm to the highest complexity, maximizing compression */
451 status = opus_encoder_ctl(rc, OPUS_SET_COMPLEXITY(AUDIO_OPUS_COMPLEXITY));
452
453 if (status != OPUS_OK) {
454 LOGGER_ERROR(log, "Error while setting encoder ctl: %s", opus_strerror(status));
455 goto FAILURE;
456 }
457
458 return rc;
459
460 FAILURE:
461 opus_encoder_destroy(rc);
462 return nullptr;
463 }
464
reconfigure_audio_encoder(const Logger * log,OpusEncoder ** e,int32_t new_br,int32_t new_sr,uint8_t new_ch,int32_t * old_br,int32_t * old_sr,int32_t * old_ch)465 static bool reconfigure_audio_encoder(const Logger *log, OpusEncoder **e, int32_t new_br, int32_t new_sr,
466 uint8_t new_ch, int32_t *old_br, int32_t *old_sr, int32_t *old_ch)
467 {
468 /* Values are checked in toxav.c */
469 if (*old_sr != new_sr || *old_ch != new_ch) {
470 OpusEncoder *new_encoder = create_audio_encoder(log, new_br, new_sr, new_ch);
471
472 if (new_encoder == nullptr) {
473 return false;
474 }
475
476 opus_encoder_destroy(*e);
477 *e = new_encoder;
478 } else if (*old_br == new_br) {
479 return true; /* Nothing changed */
480 }
481
482 int status = opus_encoder_ctl(*e, OPUS_SET_BITRATE(new_br));
483
484 if (status != OPUS_OK) {
485 LOGGER_ERROR(log, "Error while setting encoder ctl: %s", opus_strerror(status));
486 return false;
487 }
488
489 *old_br = new_br;
490 *old_sr = new_sr;
491 *old_ch = new_ch;
492
493 LOGGER_DEBUG(log, "Reconfigured audio encoder br: %d sr: %d cc:%d", new_br, new_sr, new_ch);
494 return true;
495 }
496
reconfigure_audio_decoder(ACSession * ac,int32_t sampling_rate,int8_t channels)497 static bool reconfigure_audio_decoder(ACSession *ac, int32_t sampling_rate, int8_t channels)
498 {
499 if (sampling_rate != ac->ld_sample_rate || channels != ac->ld_channel_count) {
500 if (current_time_monotonic(ac->mono_time) - ac->ldrts < 500) {
501 return false;
502 }
503
504 int status;
505 OpusDecoder *new_dec = opus_decoder_create(sampling_rate, channels, &status);
506
507 if (status != OPUS_OK) {
508 LOGGER_ERROR(ac->log, "Error while starting audio decoder(%d %d): %s", sampling_rate, channels, opus_strerror(status));
509 return false;
510 }
511
512 ac->ld_sample_rate = sampling_rate;
513 ac->ld_channel_count = channels;
514 ac->ldrts = current_time_monotonic(ac->mono_time);
515
516 opus_decoder_destroy(ac->decoder);
517 ac->decoder = new_dec;
518
519 LOGGER_DEBUG(ac->log, "Reconfigured audio decoder sr: %d cc: %d", sampling_rate, channels);
520 }
521
522 return true;
523 }
524