1 #include <libavcodec/avcodec.h>
2 #include <libavformat/avformat.h>
3 #include <libswresample/swresample.h>
4 #include <libavutil/time.h>
5 #include <libavutil/pixfmt.h>
6 #include <libswscale/swscale.h>
7
8 #include <SDL.h>
9 #include <SDL_thread.h>
10
11 #include <stdlib.h>
12
13 /* Should a mono channel be split into two equal stero channels (true) or
14 * should the energy be split onto two stereo channels with 1/2 the energy
15 * (false).
16 */
17 static int audio_equal_mono = 1;
18
19 /* The weight of stereo channels when audio_equal_mono is true. */
20 static double stereo_matrix[] = { 1.0, 1.0 };
21
22 /* The output audio sample rate. */
23 static int audio_sample_rate = 44100;
24
25 static int audio_sample_increase = 44100 / 5;
26 static int audio_target_samples = 44100 * 2;
27
28 const int CHANNELS = 2;
29 const int BPC = 2; // Bytes per channel.
30 const int BPS = 4; // Bytes per sample.
31
32 const int FRAMES = 3;
33
34 // The alignment of each row of pixels.
35 const int ROW_ALIGNMENT = 16;
36
37 // The number of pixels on each side. This has to be greater that 0 (since
38 // Ren'Py needs some padding), FRAME_PADDING * BPS has to be a multiple of
39 // 16 (alignment issues on ARM NEON), and has to match the crop in the
40 // read_video function of renpysound.pyx.
41 const int FRAME_PADDING = ROW_ALIGNMENT / 4;
42
43 const int SPEED = 1;
44
45 // How many seconds early can frames be delivered?
46 static const double frame_early_delivery = .005;
47
48 static SDL_Surface *rgb_surface = NULL;
49 static SDL_Surface *rgba_surface = NULL;
50
51 // http://dranger.com/ffmpeg/
52
53 /*******************************************************************************
54 * SDL_RWops <-> AVIOContext
55 * */
56
rwops_read(void * opaque,uint8_t * buf,int buf_size)57 static int rwops_read(void *opaque, uint8_t *buf, int buf_size) {
58 SDL_RWops *rw = (SDL_RWops *) opaque;
59
60 int rv = rw->read(rw, buf, 1, buf_size);
61 return rv;
62
63 }
64
rwops_write(void * opaque,uint8_t * buf,int buf_size)65 static int rwops_write(void *opaque, uint8_t *buf, int buf_size) {
66 printf("Writing to an SDL_rwops is a really bad idea.\n");
67 return -1;
68 }
69
rwops_seek(void * opaque,int64_t offset,int whence)70 static int64_t rwops_seek(void *opaque, int64_t offset, int whence) {
71 SDL_RWops *rw = (SDL_RWops *) opaque;
72
73 if (whence == AVSEEK_SIZE) {
74 return rw->size(rw);
75 }
76
77 // Ignore flags like AVSEEK_FORCE.
78 whence &= (SEEK_SET | SEEK_CUR | SEEK_END);
79
80 int64_t rv = rw->seek(rw, (int) offset, whence);
81 return rv;
82 }
83
84 #define RWOPS_BUFFER 65536
85
86
rwops_open(SDL_RWops * rw)87 static AVIOContext *rwops_open(SDL_RWops *rw) {
88
89 unsigned char *buffer = av_malloc(RWOPS_BUFFER);
90 if (buffer == NULL) {
91 return NULL;
92 }
93 AVIOContext *rv = avio_alloc_context(
94 buffer,
95 RWOPS_BUFFER,
96 0,
97 rw,
98 rwops_read,
99 rwops_write,
100 rwops_seek);
101 if (rv == NULL) {
102 av_free(buffer);
103 return NULL;
104 }
105
106 return rv;
107 }
108
rwops_close(SDL_RWops * rw)109 static void rwops_close(SDL_RWops *rw) {
110 rw->close(rw);
111 }
112
113 static double current_time = 0;
114
115 typedef struct PacketQueue {
116 AVPacketList *first;
117 AVPacketList *last;
118 } PacketQueue;
119
120 typedef struct FrameQueue {
121 AVFrame *first;
122 AVFrame *last;
123 } FrameQueue;
124
125
126 typedef struct SurfaceQueueEntry {
127 struct SurfaceQueueEntry *next;
128
129 SDL_Surface *surf;
130
131 /* The pts, converted to seconds. */
132 double pts;
133
134 /* The format. This is not refcounted, but it's kept alive by being
135 * the format of one of the sampel surfaces.
136 */
137 SDL_PixelFormat *format;
138
139 /* As with SDL_Surface. */
140 int w, h, pitch;
141 void *pixels;
142
143 } SurfaceQueueEntry;
144
145 typedef struct MediaState {
146
147 /* The next entry in a list of MediaStates */
148 struct MediaState *next;
149
150 /* The thread associated with decoding this media. */
151 SDL_Thread *thread;
152
153 /* The condition and lock. */
154 SDL_cond* cond;
155 SDL_mutex* lock;
156
157 SDL_RWops *rwops;
158 char *filename;
159
160 /*
161 * True if we this stream should have video.
162 */
163 int want_video;
164
165 /* This becomes true once the decode thread has finished initializing
166 * and the readers and writers can do their thing.
167 */
168 int ready; // Lock.
169
170 /* This is set to true when data has been read, in order to ask the
171 * decode thread to produce more data.
172 */
173 int needs_decode; // Lock.
174
175 /*
176 * This is set to true when data has been read, in order to ask the
177 * decode thread to shut down and deallocate all resources.
178 */
179 int quit; // Lock
180
181 /* The number of seconds to skip at the start. */
182 double skip;
183
184 /* These become true when the audio and video finish. */
185 int audio_finished;
186 int video_finished;
187
188 /* Indexes of video and audio streams. */
189 int video_stream;
190 int audio_stream;
191
192 /* The main context. */
193 AVFormatContext *ctx;
194
195 /* Contexts for decoding audio and video streams. */
196 AVCodecContext *video_context;
197 AVCodecContext *audio_context;
198
199 /* Queues of packets going to the audio and video
200 * streams.
201 */
202 PacketQueue video_packet_queue;
203 PacketQueue audio_packet_queue;
204
205
206 /* The total duration of the video. Only used for information purposes. */
207 double total_duration;
208
209 /* Audio Stuff ***********************************************************/
210
211 /* The queue of converted audio frames. */
212 FrameQueue audio_queue; // Lock
213
214 /* The size of the audio queue, and the target size in seconds. */
215 int audio_queue_samples;
216 int audio_queue_target_samples;
217
218 /* A frame used for decoding. */
219 AVFrame *audio_decode_frame;
220
221 /* The audio frame being read from, and the index into the audio frame. */
222 AVFrame *audio_out_frame; // Lock
223 int audio_out_index; // Lock
224
225 SwrContext *swr;
226
227 /* The duration of the audio stream, in samples.
228 * -1 means to play until we run out of data.
229 */
230 int audio_duration;
231
232 /* The number of samples that have been read so far. */
233 int audio_read_samples; // Lock
234
235 /* A frame that video is decoded into. */
236 AVFrame *video_decode_frame;
237
238 /* The video packet we're decoding, and the partial packet. */
239 AVPacket video_pkt;
240 AVPacket video_pkt_tmp;
241
242
243 /* Video Stuff ***********************************************************/
244
245 /* Software rescaling context. */
246 struct SwsContext *sws;
247
248 /* A queue of decoded video frames. */
249 SurfaceQueueEntry *surface_queue; // Lock
250 int surface_queue_size; // Lock
251
252 /* The offset between a pts timestamp and realtime. */
253 double video_pts_offset;
254
255 /* The wall time the last video frame was read. */
256 double video_read_time;
257
258 /* Are frame drops allowed? */
259 int frame_drops;
260
261 /* The time the pause happened, or 0 if we're not paused. */
262 double pause_time;
263
264 /* The offset between now and the time of the current frame, at least for video. */
265 double time_offset;
266
267
268 } MediaState;
269
270 static AVFrame *dequeue_frame(FrameQueue *fq);
271 static void free_packet_queue(PacketQueue *pq);
272 static SurfaceQueueEntry *dequeue_surface(SurfaceQueueEntry **queue);
273
274
275 /* A queue of MediaState objects that are awaiting deallocation.*/
276 static MediaState *deallocate_queue = NULL;
277
278 /* A mutex that discards deallocate_queue. */
279 SDL_mutex *deallocate_mutex = NULL;
280
281 /* Deallocates a single MediaState. */
deallocate(MediaState * ms)282 static void deallocate(MediaState *ms) {
283
284 while (1) {
285 SurfaceQueueEntry *sqe = dequeue_surface(&ms->surface_queue);
286
287 if (! sqe) {
288 break;
289 }
290
291 if (sqe->pixels) {
292 SDL_free(sqe->pixels);
293 }
294 av_free(sqe);
295 }
296
297 if (ms->sws) {
298 sws_freeContext(ms->sws);
299 }
300
301 if (ms->video_decode_frame) {
302 av_frame_free(&ms->video_decode_frame);
303 }
304
305 av_packet_unref(&ms->video_pkt);
306
307 /* Destroy audio stuff. */
308 if (ms->swr) {
309 swr_free(&ms->swr);
310 }
311
312 if (ms->audio_decode_frame) {
313 av_frame_free(&ms->audio_decode_frame);
314 }
315
316 if (ms->audio_out_frame) {
317 av_frame_free(&ms->audio_out_frame);
318 }
319
320 while (1) {
321 AVFrame *f = dequeue_frame(&ms->audio_queue);
322
323 if (!f) {
324 break;
325 }
326
327 av_frame_free(&f);
328 }
329
330 /* Destroy/Close core stuff. */
331 free_packet_queue(&ms->audio_packet_queue);
332 free_packet_queue(&ms->video_packet_queue);
333
334 if (ms->video_context) {
335 avcodec_free_context(&ms->video_context);
336 }
337 if (ms->audio_context) {
338 avcodec_free_context(&ms->audio_context);
339 }
340
341 if (ms->ctx) {
342
343 if (ms->ctx->pb) {
344 if (ms->ctx->pb->buffer) {
345 av_freep(&ms->ctx->pb->buffer);
346 }
347 av_freep(&ms->ctx->pb);
348 }
349
350 avformat_close_input(&ms->ctx);
351 avformat_free_context(ms->ctx);
352 }
353
354 /* Destroy alloc stuff. */
355 if (ms->cond) {
356 SDL_DestroyCond(ms->cond);
357 }
358 if (ms->lock) {
359 SDL_DestroyMutex(ms->lock);
360 }
361
362 if (ms->rwops) {
363 rwops_close(ms->rwops);
364 }
365
366 if (ms->filename) {
367 av_free(ms->filename);
368 }
369
370 /* Add this MediaState to a queue to have its thread ended, and the MediaState
371 * deactivated.
372 */
373 SDL_LockMutex(deallocate_mutex);
374 ms->next = deallocate_queue;
375 deallocate_queue = ms;
376 SDL_UnlockMutex(deallocate_mutex);
377
378 }
379
380 /* Perform the portion of deallocation that's been deferred to the main thread. */
deallocate_deferred()381 static void deallocate_deferred() {
382
383 SDL_LockMutex(deallocate_mutex);
384
385 while (deallocate_queue) {
386 MediaState *ms = deallocate_queue;
387 deallocate_queue = ms->next;
388
389 if (ms->thread) {
390 SDL_WaitThread(ms->thread, NULL);
391 }
392
393 av_free(ms);
394 }
395
396 SDL_UnlockMutex(deallocate_mutex);
397 }
398
enqueue_frame(FrameQueue * fq,AVFrame * frame)399 static void enqueue_frame(FrameQueue *fq, AVFrame *frame) {
400 frame->opaque = NULL;
401
402 if (fq->first) {
403 fq->last->opaque = frame;
404 fq->last = frame;
405 } else {
406 fq->first = fq->last = frame;
407 }
408 }
409
dequeue_frame(FrameQueue * fq)410 static AVFrame *dequeue_frame(FrameQueue *fq) {
411 if (!fq->first) {
412 return NULL;
413 }
414
415 AVFrame *rv = fq->first;
416 fq->first = (AVFrame *) rv->opaque;
417
418 if (!fq->first) {
419 fq->last = NULL;
420 }
421
422 return rv;
423 }
424
425
enqueue_packet(PacketQueue * pq,AVPacket * pkt)426 static void enqueue_packet(PacketQueue *pq, AVPacket *pkt) {
427 AVPacketList *pl = av_malloc(sizeof(AVPacketList));
428 if (pl == NULL)
429 {
430 return;
431 }
432
433 av_init_packet(&pl->pkt);
434 av_packet_ref(&pl->pkt, pkt);
435
436 pl->next = NULL;
437
438 if (!pq->first) {
439 pq->first = pq->last = pl;
440 } else {
441 pq->last->next = pl;
442 pq->last = pl;
443 }
444 }
445
dequeue_packet(PacketQueue * pq,AVPacket * pkt)446 static int dequeue_packet(PacketQueue *pq, AVPacket *pkt) {
447 if (! pq->first ) {
448 return 0;
449 }
450
451 AVPacketList *pl = pq->first;
452
453 av_packet_move_ref(pkt, &pl->pkt);
454
455 pq->first = pl->next;
456
457 if (!pq->first) {
458 pq->last = NULL;
459 }
460
461 av_free(pl);
462
463 return 1;
464 }
count_packet_queue(PacketQueue * pq)465 static int count_packet_queue(PacketQueue *pq) {
466 AVPacketList *pl = pq->first;
467
468 int rv = 0;
469
470 while (pl) {
471 rv += 1;
472 pl = pl->next;
473 }
474
475 return rv;
476 }
477
free_packet_queue(PacketQueue * pq)478 static void free_packet_queue(PacketQueue *pq) {
479 AVPacket scratch;
480
481 av_init_packet(&scratch);
482
483 while (dequeue_packet(pq, &scratch)) {
484 av_packet_unref(&scratch);
485 }
486 }
487
enqueue_surface(SurfaceQueueEntry ** queue,SurfaceQueueEntry * sqe)488 static void enqueue_surface(SurfaceQueueEntry **queue, SurfaceQueueEntry *sqe) {
489 while (*queue) {
490 queue = &(*queue)->next;
491 }
492
493 *queue = sqe;
494 }
495
496
dequeue_surface(SurfaceQueueEntry ** queue)497 static SurfaceQueueEntry *dequeue_surface(SurfaceQueueEntry **queue) {
498 SurfaceQueueEntry *rv = *queue;
499
500 if (rv) {
501 *queue = rv->next;
502 }
503
504 return rv;
505 }
506
507
508 #if 0
509 static void check_surface_queue(MediaState *ms) {
510
511 SurfaceQueueEntry **queue = &ms->surface_queue;
512
513 int count = 0;
514
515 while (*queue) {
516 count += 1;
517 queue = &(*queue)->next;
518 }
519
520 if (count != ms->surface_queue_size) {
521 abort();
522 }
523
524 }
525 #endif
526
527
528 /**
529 * Reads a packet from one of the queues, filling the other queue if
530 * necessary.
531 */
read_packet(MediaState * ms,PacketQueue * pq,AVPacket * pkt)532 static int read_packet(MediaState *ms, PacketQueue *pq, AVPacket *pkt) {
533 AVPacket scratch;
534
535 av_init_packet(&scratch);
536
537 while (1) {
538 if (dequeue_packet(pq, pkt)) {
539 return 1;
540 }
541
542 if (av_read_frame(ms->ctx, &scratch)) {
543 pkt->data = NULL;
544 pkt->size = 0;
545 return 0;
546 }
547
548 if (scratch.stream_index == ms->video_stream && ! ms->video_finished) {
549 enqueue_packet(&ms->video_packet_queue, &scratch);
550 } else if (scratch.stream_index == ms->audio_stream && ! ms->audio_finished) {
551 enqueue_packet(&ms->audio_packet_queue, &scratch);
552 }
553
554 av_packet_unref(&scratch);
555 }
556 }
557
558
find_context(AVFormatContext * ctx,int index)559 static AVCodecContext *find_context(AVFormatContext *ctx, int index) {
560
561 AVDictionary *opts = NULL;
562
563 if (index == -1) {
564 return NULL;
565 }
566
567 AVCodec *codec = NULL;
568 AVCodecContext *codec_ctx = NULL;
569
570 codec_ctx = avcodec_alloc_context3(NULL);
571
572 if (codec_ctx == NULL) {
573 return NULL;
574 }
575
576 if (avcodec_parameters_to_context(codec_ctx, ctx->streams[index]->codecpar) < 0) {
577 goto fail;
578 }
579
580 codec_ctx->pkt_timebase = ctx->streams[index]->time_base;
581
582 codec = avcodec_find_decoder(codec_ctx->codec_id);
583
584 if (codec == NULL) {
585 goto fail;
586 }
587
588 codec_ctx->codec_id = codec->id;
589
590 av_dict_set(&opts, "threads", "auto", 0);
591 av_dict_set(&opts, "refcounted_frames", "0", 0);
592
593 if (avcodec_open2(codec_ctx, codec, &opts)) {
594 goto fail;
595 }
596
597 return codec_ctx;
598
599 fail:
600
601 av_dict_free(&opts);
602
603 avcodec_free_context(&codec_ctx);
604 return NULL;
605 }
606
607
608 /**
609 * Given a packet, decodes a frame if possible. This is intended to be a drop-in replacement
610 * for the now deprecated avcodec_decode_audio4/video2 APIs.
611 *
612 * \param[in] context The context the decoding is done in.
613 * \param[out] frame A frame that is updated with the decoded data.
614 * \param[out] got_frame Set to 1 if a frame was decoded, 0 if not.
615 * \param[in] pkt The packet data to present.
616 *
617 * Returns pkt->size if the packet was consumed, 0 if not, or < 0 on error (including
618 * end of file.)
619 */
decode_common(AVCodecContext * context,AVFrame * frame,int * got_frame,AVPacket * pkt)620 static int decode_common(AVCodecContext *context, AVFrame *frame, int *got_frame, AVPacket *pkt) {
621
622 int ret;
623 int rv = 0;
624
625 if (pkt) {
626 ret = avcodec_send_packet(context, pkt);
627
628 if (ret >= 0) {
629 rv = pkt->size;
630 } else if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
631 rv = 0;
632 } else {
633 return ret;
634 }
635 }
636
637 ret = avcodec_receive_frame(context, frame);
638
639 if (ret >= 0) {
640 *got_frame = 1;
641 } else if (ret == AVERROR(EAGAIN)) {
642 *got_frame = 0;
643 } else if (ret == AVERROR_EOF) {
644 *got_frame = 0;
645 if (!pkt || pkt->size == 0) {
646 return ret;
647 }
648 } else {
649 *got_frame = 0;
650 return ret;
651 }
652
653 return rv;
654 }
655
656
657 /**
658 * Decodes audio. Returns 0 if no audio was decoded, or 1 if some audio was
659 * decoded.
660 */
decode_audio(MediaState * ms)661 static void decode_audio(MediaState *ms) {
662 AVPacket pkt;
663 AVPacket pkt_temp;
664 AVFrame *converted_frame;
665
666 if (!ms->audio_context) {
667 ms->audio_finished = 1;
668 return;
669 }
670
671 if (ms->audio_decode_frame == NULL) {
672 ms->audio_decode_frame = av_frame_alloc();
673 }
674
675 if (ms->audio_decode_frame == NULL) {
676 ms->audio_finished = 1;
677 return;
678 }
679
680 av_init_packet(&pkt);
681
682 double timebase = av_q2d(ms->ctx->streams[ms->audio_stream]->time_base);
683
684 if (ms->audio_queue_target_samples < audio_target_samples) {
685 ms->audio_queue_target_samples += audio_sample_increase;
686 }
687
688 while (ms->audio_queue_samples < ms->audio_queue_target_samples) {
689
690 read_packet(ms, &ms->audio_packet_queue, &pkt);
691
692 pkt_temp = pkt;
693
694 do {
695 int got_frame;
696 int read_size = decode_common(ms->audio_context, ms->audio_decode_frame, &got_frame, &pkt_temp);
697
698 if (read_size < 0) {
699
700 if (pkt.data) {
701 av_packet_unref(&pkt);
702 }
703
704 ms->audio_finished = 1;
705 return;
706 }
707
708 pkt_temp.data += read_size;
709 pkt_temp.size -= read_size;
710
711 if (!got_frame) {
712 if (pkt.data == NULL) {
713 ms->audio_finished = 1;
714 av_packet_unref(&pkt);
715 return;
716 }
717
718 break;
719 }
720
721 converted_frame = av_frame_alloc();
722
723 if (converted_frame == NULL) {
724 ms->audio_finished = 1;
725 return;
726 }
727
728 converted_frame->sample_rate = audio_sample_rate;
729 converted_frame->channel_layout = AV_CH_LAYOUT_STEREO;
730 converted_frame->format = AV_SAMPLE_FMT_S16;
731
732 if (!ms->audio_decode_frame->channel_layout) {
733 ms->audio_decode_frame->channel_layout = av_get_default_channel_layout(ms->audio_decode_frame->channels);
734
735 if (audio_equal_mono && (ms->audio_decode_frame->channels == 1)) {
736 swr_alloc_set_opts(
737 ms->swr,
738 converted_frame->channel_layout,
739 converted_frame->format,
740 converted_frame->sample_rate,
741 ms->audio_decode_frame->channel_layout,
742 ms->audio_decode_frame->format,
743 ms->audio_decode_frame->sample_rate,
744 0,
745 NULL);
746
747 swr_set_matrix(ms->swr, stereo_matrix, 1);
748 }
749 }
750
751 if(swr_convert_frame(ms->swr, converted_frame, ms->audio_decode_frame)) {
752 av_frame_free(&converted_frame);
753 continue;
754 }
755
756 double start = ms->audio_decode_frame->best_effort_timestamp * timebase;
757 double end = start + 1.0 * converted_frame->nb_samples / audio_sample_rate;
758
759 SDL_LockMutex(ms->lock);
760
761 if (start >= ms->skip) {
762
763 // Normal case, queue the frame.
764 ms->audio_queue_samples += converted_frame->nb_samples;
765 enqueue_frame(&ms->audio_queue, converted_frame);
766
767 } else if (end < ms->skip) {
768 // Totally before, drop the frame.
769 av_frame_free(&converted_frame);
770
771 } else {
772 // The frame straddles skip, so we queue the (necessarily single)
773 // frame and set the index into the frame.
774 ms->audio_out_frame = converted_frame;
775 ms->audio_out_index = BPS * (int) ((ms->skip - start) * audio_sample_rate);
776
777 }
778
779 SDL_UnlockMutex(ms->lock);
780
781 } while (pkt_temp.size);
782
783 if (pkt.data) {
784 av_packet_unref(&pkt);
785 }
786 }
787
788 return;
789
790 }
791
get_pixel_format(SDL_Surface * surf)792 static enum AVPixelFormat get_pixel_format(SDL_Surface *surf) {
793 uint32_t pixel;
794 uint8_t *bytes = (uint8_t *) &pixel;
795
796 pixel = SDL_MapRGBA(surf->format, 1, 2, 3, 4);
797
798 enum AVPixelFormat fmt;
799
800 if ((bytes[0] == 4 || bytes[0] == 0) && bytes[1] == 1) {
801 fmt = AV_PIX_FMT_ARGB;
802 } else if ((bytes[0] == 4 || bytes[0] == 0) && bytes[1] == 3) {
803 fmt = AV_PIX_FMT_ABGR;
804 } else if (bytes[0] == 1) {
805 fmt = AV_PIX_FMT_RGBA;
806 } else {
807 fmt = AV_PIX_FMT_BGRA;
808 }
809
810 return fmt;
811 }
812
813
814
decode_video_frame(MediaState * ms)815 static SurfaceQueueEntry *decode_video_frame(MediaState *ms) {
816
817 while (1) {
818
819 if (! ms->video_pkt_tmp.size) {
820 av_packet_unref(&ms->video_pkt);
821 read_packet(ms, &ms->video_packet_queue, &ms->video_pkt);
822 ms->video_pkt_tmp = ms->video_pkt;
823 }
824
825 int got_frame = 0;
826 int read_size = decode_common(ms->video_context, ms->video_decode_frame, &got_frame, &ms->video_pkt_tmp);
827
828 if (read_size < 0) {
829 ms->video_finished = 1;
830 return NULL;
831 }
832
833 ms->video_pkt_tmp.data += read_size;
834 ms->video_pkt_tmp.size -= read_size;
835
836 if (got_frame) {
837 break;
838 }
839
840 if (!got_frame && !ms->video_pkt.size) {
841 ms->video_finished = 1;
842 return NULL;
843 }
844
845 }
846
847 double pts = ms->video_decode_frame->best_effort_timestamp * av_q2d(ms->ctx->streams[ms->video_stream]->time_base);
848
849 if (pts < ms->skip) {
850 return NULL;
851 }
852
853 // If we're behind on decoding the frame, drop it.
854 if (ms->video_pts_offset && (ms->video_pts_offset + pts < ms->video_read_time)) {
855
856 // If we're 5s behind, give up on video for the time being, so we don't
857 // blow out memory.
858 if (ms->video_pts_offset + pts < ms->video_read_time - 5.0) {
859 ms->video_finished = 1;
860 }
861
862 if (ms->frame_drops) {
863 return NULL;
864 }
865 }
866
867 SDL_Surface *sample = rgba_surface;
868
869 ms->sws = sws_getCachedContext(
870 ms->sws,
871
872 ms->video_decode_frame->width,
873 ms->video_decode_frame->height,
874 ms->video_decode_frame->format,
875
876 ms->video_decode_frame->width,
877 ms->video_decode_frame->height,
878 get_pixel_format(rgba_surface),
879
880 SWS_POINT,
881
882 NULL,
883 NULL,
884 NULL
885 );
886
887 if (!ms->sws) {
888 ms->video_finished = 1;
889 return NULL;
890 }
891
892 SurfaceQueueEntry *rv = av_malloc(sizeof(SurfaceQueueEntry));
893 if (rv == NULL) {
894 ms->video_finished = 1;
895 return NULL;
896 }
897 rv->w = ms->video_decode_frame->width + FRAME_PADDING * 2;
898 rv->h = ms->video_decode_frame->height + FRAME_PADDING * 2;
899
900 rv->pitch = rv->w * sample->format->BytesPerPixel;
901
902 if (rv->pitch % ROW_ALIGNMENT) {
903 rv->pitch += ROW_ALIGNMENT - (rv->pitch % ROW_ALIGNMENT);
904 }
905
906 #if defined(_WIN32)
907 rv->pixels = SDL_calloc(rv->pitch * rv->h, 1);
908 #else
909 posix_memalign(&rv->pixels, ROW_ALIGNMENT, rv->pitch * rv->h);
910 #endif
911
912 memset(rv->pixels, 0, rv->pitch * rv->h);
913
914 rv->format = sample->format;
915 rv->next = NULL;
916 rv->pts = pts;
917
918 uint8_t *surf_pixels = (uint8_t *) rv->pixels;
919 uint8_t *surf_data[] = { &surf_pixels[FRAME_PADDING * rv->pitch + FRAME_PADDING * sample->format->BytesPerPixel] };
920 int surf_linesize[] = { rv->pitch };
921
922 sws_scale(
923 ms->sws,
924
925 (const uint8_t * const *) ms->video_decode_frame->data,
926 ms->video_decode_frame->linesize,
927
928 0,
929 ms->video_decode_frame->height,
930
931 surf_data,
932 surf_linesize
933 );
934
935 return rv;
936 }
937
938
decode_video(MediaState * ms)939 static void decode_video(MediaState *ms) {
940 if (!ms->video_context) {
941 ms->video_finished = 1;
942 return;
943 }
944
945 if (!ms->video_decode_frame) {
946 ms->video_decode_frame = av_frame_alloc();
947 }
948
949 if (!ms->video_decode_frame) {
950 ms->video_finished = 1;
951 return;
952 }
953
954 SDL_LockMutex(ms->lock);
955
956 if (!ms->video_finished && (ms->surface_queue_size < FRAMES)) {
957
958 SDL_UnlockMutex(ms->lock);
959
960 SurfaceQueueEntry *sqe = decode_video_frame(ms);
961
962 SDL_LockMutex(ms->lock);
963
964 if (sqe) {
965 enqueue_surface(&ms->surface_queue, sqe);
966 ms->surface_queue_size += 1;
967 }
968 }
969
970 if (!ms->video_finished && (ms->surface_queue_size < FRAMES)) {
971 ms->needs_decode = 1;
972 }
973
974 SDL_UnlockMutex(ms->lock);
975 }
976
977
978 static int decode_sync_start(void *arg);
979 void media_read_sync(struct MediaState *ms);
980 void media_read_sync_finish(struct MediaState *ms);
981
982
983 /**
984 * Returns 1 if there is a video frame ready on this channel, or 0 otherwise.
985 */
media_video_ready(struct MediaState * ms)986 int media_video_ready(struct MediaState *ms) {
987
988 int consumed = 0;
989 int rv = 0;
990
991 if (ms->video_stream == -1) {
992 return 1;
993 }
994
995 SDL_LockMutex(ms->lock);
996
997 if (!ms->ready) {
998 goto done;
999 }
1000
1001 if (ms->pause_time > 0) {
1002 goto done;
1003 }
1004
1005 double offset_time = current_time - ms->time_offset;
1006
1007 /*
1008 * If we have an obsolete frame, drop it.
1009 */
1010 if (ms->video_pts_offset) {
1011 while (ms->surface_queue) {
1012
1013 /* The PTS is greater that the last frame read, so we're good. */
1014 if (ms->surface_queue->pts + ms->video_pts_offset >= ms->video_read_time) {
1015 break;
1016 }
1017
1018 /* Otherwise, drop it without display. */
1019 SurfaceQueueEntry *sqe = dequeue_surface(&ms->surface_queue);
1020 ms->surface_queue_size -= 1;
1021
1022 SDL_free(sqe->pixels);
1023 av_free(sqe);
1024
1025 consumed = 1;
1026 }
1027 }
1028
1029
1030 /*
1031 * Otherwise, check to see if we have a frame with a PTS that has passed.
1032 */
1033
1034 if (ms->surface_queue) {
1035 if (ms->video_pts_offset) {
1036 if (ms->surface_queue->pts + ms->video_pts_offset <= offset_time + frame_early_delivery) {
1037 rv = 1;
1038 }
1039 } else {
1040 rv = 1;
1041 }
1042 }
1043
1044 done:
1045
1046 /* Only signal if we've consumed something. */
1047 if (consumed) {
1048 ms->needs_decode = 1;
1049 SDL_CondBroadcast(ms->cond);
1050 }
1051
1052 SDL_UnlockMutex(ms->lock);
1053
1054 return rv;
1055 }
1056
1057
media_read_video(MediaState * ms)1058 SDL_Surface *media_read_video(MediaState *ms) {
1059
1060 SDL_Surface *rv = NULL;
1061 SurfaceQueueEntry *sqe = NULL;
1062
1063 if (ms->video_stream == -1) {
1064 return NULL;
1065 }
1066
1067 double offset_time = current_time - ms->time_offset;
1068
1069 SDL_LockMutex(ms->lock);
1070
1071 #ifndef __EMSCRIPTEN__
1072 while (!ms->ready) {
1073 SDL_CondWait(ms->cond, ms->lock);
1074 }
1075 #endif
1076
1077 if (ms->pause_time > 0) {
1078 goto done;
1079 }
1080
1081 if (!ms->surface_queue_size) {
1082 goto done;
1083 }
1084
1085 if (ms->video_pts_offset == 0.0) {
1086 ms->video_pts_offset = offset_time - ms->surface_queue->pts;
1087 }
1088
1089 if (ms->surface_queue->pts + ms->video_pts_offset <= offset_time + frame_early_delivery) {
1090 sqe = dequeue_surface(&ms->surface_queue);
1091 ms->surface_queue_size -= 1;
1092
1093 }
1094
1095 done:
1096
1097 /* Only signal if we've consumed something. */
1098 if (sqe) {
1099 ms->needs_decode = 1;
1100 ms->video_read_time = offset_time;
1101 SDL_CondBroadcast(ms->cond);
1102 }
1103
1104 SDL_UnlockMutex(ms->lock);
1105
1106 if (sqe) {
1107 rv = SDL_CreateRGBSurfaceFrom(
1108 sqe->pixels,
1109 sqe->w,
1110 sqe->h,
1111 sqe->format->BitsPerPixel,
1112 sqe->pitch,
1113 sqe->format->Rmask,
1114 sqe->format->Gmask,
1115 sqe->format->Bmask,
1116 sqe->format->Amask
1117 );
1118
1119 /* Force SDL to take over management of pixels. */
1120 rv->flags &= ~SDL_PREALLOC;
1121 av_free(sqe);
1122 }
1123
1124 return rv;
1125 }
1126
1127
decode_thread(void * arg)1128 static int decode_thread(void *arg) {
1129 MediaState *ms = (MediaState *) arg;
1130
1131 int err;
1132
1133 AVFormatContext *ctx = avformat_alloc_context();
1134 if (ctx == NULL) {
1135 goto finish;
1136 }
1137 ms->ctx = ctx;
1138
1139 AVIOContext *io_context = rwops_open(ms->rwops);
1140 if (io_context == NULL) {
1141 goto finish;
1142 }
1143 ctx->pb = io_context;
1144
1145 err = avformat_open_input(&ctx, ms->filename, NULL, NULL);
1146 if (err) {
1147 avformat_free_context(ctx);
1148 ms->ctx = NULL;
1149 goto finish;
1150 }
1151
1152 err = avformat_find_stream_info(ctx, NULL);
1153 if (err) {
1154 goto finish;
1155 }
1156
1157
1158 ms->video_stream = -1;
1159 ms->audio_stream = -1;
1160
1161 for (int i = 0; i < ctx->nb_streams; i++) {
1162 if (ctx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
1163 if (ms->want_video && ms->video_stream == -1) {
1164 ms->video_stream = i;
1165 }
1166 }
1167
1168 if (ctx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
1169 if (ms->audio_stream == -1) {
1170 ms->audio_stream = i;
1171 }
1172 }
1173 }
1174
1175 ms->video_context = find_context(ctx, ms->video_stream);
1176 ms->audio_context = find_context(ctx, ms->audio_stream);
1177
1178 ms->swr = swr_alloc();
1179 if (ms->swr == NULL) {
1180 goto finish;
1181 }
1182
1183 av_init_packet(&ms->video_pkt);
1184
1185 // Compute the number of samples we need to play back.
1186 if (ms->audio_duration < 0) {
1187 if (av_fmt_ctx_get_duration_estimation_method(ctx) != AVFMT_DURATION_FROM_BITRATE) {
1188
1189 long long duration = ((long long) ctx->duration) * audio_sample_rate;
1190 ms->audio_duration = (unsigned int) (duration / AV_TIME_BASE);
1191
1192 ms->total_duration = 1.0 * ctx->duration / AV_TIME_BASE;
1193
1194 // Check that the duration is reasonable (between 0s and 3600s). If not,
1195 // reject it.
1196 if (ms->audio_duration < 0 || ms->audio_duration > 3600 * audio_sample_rate) {
1197 ms->audio_duration = -1;
1198 }
1199
1200 ms->audio_duration -= (unsigned int) (ms->skip * audio_sample_rate);
1201
1202
1203 } else {
1204 ms->audio_duration = -1;
1205 }
1206 }
1207
1208 if (ms->skip != 0.0) {
1209 av_seek_frame(ctx, -1, (int64_t) (ms->skip * AV_TIME_BASE), AVSEEK_FLAG_BACKWARD);
1210 }
1211
1212 while (!ms->quit) {
1213
1214 if (! ms->audio_finished) {
1215 decode_audio(ms);
1216 }
1217
1218 if (! ms->video_finished) {
1219 decode_video(ms);
1220 }
1221
1222 SDL_LockMutex(ms->lock);
1223
1224 if (!ms->ready) {
1225 ms->ready = 1;
1226 SDL_CondBroadcast(ms->cond);
1227 }
1228
1229 if (!(ms->needs_decode || ms->quit)) {
1230 SDL_CondWait(ms->cond, ms->lock);
1231 }
1232
1233 ms->needs_decode = 0;
1234
1235 SDL_UnlockMutex(ms->lock);
1236 }
1237
1238
1239 finish:
1240 /* Data used by the decoder should be freed here, while data shared with
1241 * the readers should be freed in media_close.
1242 */
1243
1244 SDL_LockMutex(ms->lock);
1245
1246 /* Ensures that every stream becomes ready. */
1247 if (!ms->ready) {
1248 ms->ready = 1;
1249 SDL_CondBroadcast(ms->cond);
1250 }
1251
1252 while (!ms->quit) {
1253 SDL_CondWait(ms->cond, ms->lock);
1254 }
1255
1256 SDL_UnlockMutex(ms->lock);
1257
1258 deallocate(ms);
1259
1260 return 0;
1261 }
1262
1263
media_read_sync_finish(struct MediaState * ms)1264 void media_read_sync_finish(struct MediaState *ms) {
1265 // copy/paste from end of decode_thread
1266
1267 /* Data used by the decoder should be freed here, while data shared with
1268 * the readers should be freed in media_close.
1269 */
1270
1271 SDL_LockMutex(ms->lock);
1272
1273 /* Ensures that every stream becomes ready. */
1274 if (!ms->ready) {
1275 ms->ready = 1;
1276 SDL_CondBroadcast(ms->cond);
1277 }
1278
1279 while (!ms->quit) {
1280 /* SDL_CondWait(ms->cond, ms->lock); */
1281 }
1282
1283 SDL_UnlockMutex(ms->lock);
1284
1285 deallocate(ms);
1286 }
1287
1288
decode_sync_start(void * arg)1289 static int decode_sync_start(void *arg) {
1290 // copy/paste from start of decode_thread
1291 MediaState *ms = (MediaState *) arg;
1292
1293 int err;
1294
1295 AVFormatContext *ctx = avformat_alloc_context();
1296 if (ctx == NULL) {
1297 media_read_sync_finish(ms);
1298 }
1299 ms->ctx = ctx;
1300
1301 AVIOContext *io_context = rwops_open(ms->rwops);
1302 if (io_context == NULL) {
1303 media_read_sync_finish(ms);
1304 }
1305 ctx->pb = io_context;
1306
1307 err = avformat_open_input(&ctx, ms->filename, NULL, NULL);
1308 if (err) {
1309 avformat_free_context(ctx);
1310 ms->ctx = NULL;
1311 media_read_sync_finish(ms);
1312 }
1313
1314 err = avformat_find_stream_info(ctx, NULL);
1315 if (err) {
1316 media_read_sync_finish(ms);
1317 }
1318
1319
1320 ms->video_stream = -1;
1321 ms->audio_stream = -1;
1322
1323 for (int i = 0; i < ctx->nb_streams; i++) {
1324 if (ctx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
1325 if (ms->want_video && ms->video_stream == -1) {
1326 ms->video_stream = i;
1327 }
1328 }
1329
1330 if (ctx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
1331 if (ms->audio_stream == -1) {
1332 ms->audio_stream = i;
1333 }
1334 }
1335 }
1336
1337 ms->video_context = find_context(ctx, ms->video_stream);
1338 ms->audio_context = find_context(ctx, ms->audio_stream);
1339
1340 ms->swr = swr_alloc();
1341 if (ms->swr == NULL) {
1342 media_read_sync_finish(ms);
1343 }
1344
1345 av_init_packet(&ms->video_pkt);
1346
1347 // Compute the number of samples we need to play back.
1348 if (ms->audio_duration < 0) {
1349 if (av_fmt_ctx_get_duration_estimation_method(ctx) != AVFMT_DURATION_FROM_BITRATE) {
1350
1351 long long duration = ((long long) ctx->duration) * audio_sample_rate;
1352 ms->audio_duration = (unsigned int) (duration / AV_TIME_BASE);
1353
1354 ms->total_duration = 1.0 * ctx->duration / AV_TIME_BASE;
1355
1356 // Check that the duration is reasonable (between 0s and 3600s). If not,
1357 // reject it.
1358 if (ms->audio_duration < 0 || ms->audio_duration > 3600 * audio_sample_rate) {
1359 ms->audio_duration = -1;
1360 }
1361
1362 ms->audio_duration -= (unsigned int) (ms->skip * audio_sample_rate);
1363
1364
1365 } else {
1366 ms->audio_duration = -1;
1367 }
1368 }
1369
1370 if (ms->skip != 0.0) {
1371 av_seek_frame(ctx, -1, (int64_t) (ms->skip * AV_TIME_BASE), AVSEEK_FLAG_BACKWARD);
1372 }
1373
1374 // [snip!]
1375
1376 return 0;
1377 }
1378
1379
media_read_sync(struct MediaState * ms)1380 void media_read_sync(struct MediaState *ms) {
1381 // copy/paste from middle of decode_thread
1382 // printf("---* media_read_sync %p\n", ms);
1383
1384 //while (!ms->quit) {
1385 if (!ms->quit) {
1386 // printf(" audio_finished: %d, video_finished: %d\n", ms->audio_finished, ms->video_finished);
1387 if (! ms->audio_finished) {
1388 decode_audio(ms);
1389 }
1390
1391 if (! ms->video_finished) {
1392 decode_video(ms);
1393 }
1394
1395 SDL_LockMutex(ms->lock);
1396
1397 if (!ms->ready) {
1398 ms->ready = 1;
1399 SDL_CondBroadcast(ms->cond);
1400 }
1401
1402 if (!(ms->needs_decode || ms->quit)) {
1403 /* SDL_CondWait(ms->cond, ms->lock); */
1404 }
1405
1406 ms->needs_decode = 0;
1407
1408 SDL_UnlockMutex(ms->lock);
1409 }
1410 }
1411
1412
media_read_audio(struct MediaState * ms,Uint8 * stream,int len)1413 int media_read_audio(struct MediaState *ms, Uint8 *stream, int len) {
1414 #ifdef __EMSCRIPTEN__
1415 media_read_sync(ms);
1416 #endif
1417
1418 SDL_LockMutex(ms->lock);
1419
1420 if(!ms->ready) {
1421 SDL_UnlockMutex(ms->lock);
1422 memset(stream, 0, len);
1423 return len;
1424 }
1425
1426 int rv = 0;
1427
1428 if (ms->audio_duration >= 0) {
1429 unsigned int remaining = (ms->audio_duration - ms->audio_read_samples) * BPS;
1430 if (len > remaining) {
1431 len = remaining;
1432 }
1433
1434 if (!remaining) {
1435 ms->audio_finished = 1;
1436 }
1437
1438 }
1439
1440 while (len) {
1441
1442 if (!ms->audio_out_frame) {
1443 ms->audio_out_frame = dequeue_frame(&ms->audio_queue);
1444 ms->audio_out_index = 0;
1445 }
1446
1447 if (!ms->audio_out_frame) {
1448 break;
1449 }
1450
1451 AVFrame *f = ms->audio_out_frame;
1452
1453 int avail = f->nb_samples * BPS - ms->audio_out_index;
1454 int count;
1455
1456 if (len > avail) {
1457 count = avail;
1458 } else {
1459 count = len;
1460 }
1461
1462 memcpy(stream, &f->data[0][ms->audio_out_index], count);
1463
1464 ms->audio_out_index += count;
1465
1466 ms->audio_read_samples += count / BPS;
1467 ms->audio_queue_samples -= count / BPS;
1468
1469 rv += count;
1470 len -= count;
1471 stream += count;
1472
1473 if (ms->audio_out_index >= f->nb_samples * BPS) {
1474 av_frame_free(&ms->audio_out_frame);
1475 ms->audio_out_index = 0;
1476 }
1477 }
1478
1479 /* Only signal if we've consumed something. */
1480 if (rv) {
1481 ms->needs_decode = 1;
1482 SDL_CondBroadcast(ms->cond);
1483 }
1484
1485 SDL_UnlockMutex(ms->lock);
1486
1487 if (ms->audio_duration >= 0) {
1488 if ((ms->audio_duration - ms->audio_read_samples) * BPS < len) {
1489 len = (ms->audio_duration - ms->audio_read_samples) * BPS;
1490 }
1491
1492 memset(stream, 0, len);
1493 ms->audio_read_samples += len / BPS;
1494 rv += len;
1495 }
1496
1497 return rv;
1498 }
1499
media_wait_ready(struct MediaState * ms)1500 void media_wait_ready(struct MediaState *ms) {
1501 #ifndef __EMSCRIPTEN__
1502 SDL_LockMutex(ms->lock);
1503
1504 while (!ms->ready) {
1505 SDL_CondWait(ms->cond, ms->lock);
1506 }
1507
1508 SDL_UnlockMutex(ms->lock);
1509 #endif
1510 }
1511
1512
media_duration(MediaState * ms)1513 double media_duration(MediaState *ms) {
1514 return ms->total_duration;
1515 }
1516
media_start(MediaState * ms)1517 void media_start(MediaState *ms) {
1518
1519 #ifdef __EMSCRIPTEN__
1520 decode_sync_start(ms);
1521 #else
1522
1523 char buf[1024];
1524
1525 snprintf(buf, 1024, "decode: %s", ms->filename);
1526 SDL_Thread *t = SDL_CreateThread(decode_thread, buf, (void *) ms);
1527 ms->thread = t;
1528 #endif
1529 }
1530
1531
media_open(SDL_RWops * rwops,const char * filename)1532 MediaState *media_open(SDL_RWops *rwops, const char *filename) {
1533
1534 deallocate_deferred();
1535
1536 MediaState *ms = av_calloc(1, sizeof(MediaState));
1537 if (ms == NULL) {
1538 return NULL;
1539 }
1540
1541 ms->filename = av_strdup(filename);
1542 if (ms->filename == NULL) {
1543 deallocate(ms);
1544 return NULL;
1545 }
1546 ms->rwops = rwops;
1547
1548 #ifndef __EMSCRIPTEN__
1549 ms->cond = SDL_CreateCond();
1550 if (ms->cond == NULL) {
1551 deallocate(ms);
1552 return NULL;
1553 }
1554 ms->lock = SDL_CreateMutex();
1555 if (ms->lock == NULL) {
1556 deallocate(ms);
1557 return NULL;
1558 }
1559 #endif
1560
1561 ms->audio_duration = -1;
1562 ms->frame_drops = 1;
1563
1564 return ms;
1565 }
1566
1567 /**
1568 * Sets the start and end of the stream. This must be called before
1569 * media_start.
1570 *
1571 * start
1572 * The time in the stream at which the media starts playing.
1573 * end
1574 * If not 0, the time at which the stream is forced to end if it has not
1575 * already. If 0, the stream plays until its natural end.
1576 */
media_start_end(MediaState * ms,double start,double end)1577 void media_start_end(MediaState *ms, double start, double end) {
1578 ms->skip = start;
1579
1580 if (end >= 0) {
1581 if (end < start) {
1582 ms->audio_duration = 0;
1583 } else {
1584 ms->audio_duration = (int) ((end - start) * audio_sample_rate);
1585 }
1586 }
1587 }
1588
1589 /**
1590 * Marks the channel as having video.
1591 */
media_want_video(MediaState * ms,int video)1592 void media_want_video(MediaState *ms, int video) {
1593 ms->want_video = 1;
1594 ms->frame_drops = (video != 2);
1595 }
1596
media_pause(MediaState * ms,int pause)1597 void media_pause(MediaState *ms, int pause) {
1598 if (pause && (ms->pause_time == 0)) {
1599 ms->pause_time = current_time;
1600 } else if ((!pause) && (ms->pause_time > 0)) {
1601 ms->time_offset += current_time - ms->pause_time;
1602 ms->pause_time = 0;
1603 }
1604 }
1605
media_close(MediaState * ms)1606 void media_close(MediaState *ms) {
1607
1608 if (!ms->thread) {
1609 deallocate(ms);
1610 return;
1611 }
1612
1613 /* Tell the decoder to terminate. It will deallocate everything for us. */
1614 SDL_LockMutex(ms->lock);
1615 ms->quit = 1;
1616
1617 #ifdef __EMSCRIPTEN__
1618 media_read_sync_finish(ms);
1619 #endif
1620
1621 SDL_CondBroadcast(ms->cond);
1622 SDL_UnlockMutex(ms->lock);
1623
1624 }
1625
media_advance_time(void)1626 void media_advance_time(void) {
1627 current_time = SPEED * av_gettime() * 1e-6;
1628 }
1629
media_sample_surfaces(SDL_Surface * rgb,SDL_Surface * rgba)1630 void media_sample_surfaces(SDL_Surface *rgb, SDL_Surface *rgba) {
1631 rgb_surface = rgb;
1632 rgba_surface = rgba;
1633 }
1634
media_init(int rate,int status,int equal_mono)1635 void media_init(int rate, int status, int equal_mono) {
1636
1637 deallocate_mutex = SDL_CreateMutex();
1638
1639 audio_sample_rate = rate / SPEED;
1640 audio_equal_mono = equal_mono;
1641
1642 if (status) {
1643 av_log_set_level(AV_LOG_INFO);
1644 } else {
1645 av_log_set_level(AV_LOG_ERROR);
1646 }
1647
1648 }
1649
1650
1651