1 /*****************************************************************************
2  * libavsmash_video.c / libavsmash_video.cpp
3  *****************************************************************************
4  * Copyright (C) 2012-2015 L-SMASH Works project
5  *
6  * Authors: Yusuke Nakamura <muken.the.vfrmaniac@gmail.com>
7  *
8  * Permission to use, copy, modify, and/or distribute this software for any
9  * purpose with or without fee is hereby granted, provided that the above
10  * copyright notice and this permission notice appear in all copies.
11  *
12  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
13  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
14  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
15  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
16  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
17  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
18  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19  *****************************************************************************/
20 
21 /* This file is available under an ISC license. */
22 
23 #include "cpp_compat.h"
24 
25 #include <inttypes.h>
26 #include <float.h>
27 
28 #ifdef __cplusplus
29 extern "C"
30 {
31 #endif  /* __cplusplus */
32 #include <lsmash.h>
33 #include <libavformat/avformat.h>
34 #include <libavcodec/avcodec.h>
35 #ifdef __cplusplus
36 }
37 #endif  /* __cplusplus */
38 
39 #include "utils.h"
40 #include "video_output.h"
41 #include "libavsmash.h"
42 #include "libavsmash_video.h"
43 #include "libavsmash_video_internal.h"
44 #include "decode.h"
45 
46 /*****************************************************************************
47  * Allocators / Deallocators
48  *****************************************************************************/
libavsmash_video_alloc_decode_handler(void)49 libavsmash_video_decode_handler_t *libavsmash_video_alloc_decode_handler
50 (
51     void
52 )
53 {
54     libavsmash_video_decode_handler_t *vdhp = (libavsmash_video_decode_handler_t *)lw_malloc_zero( sizeof(libavsmash_video_decode_handler_t) );
55     if( !vdhp )
56         return NULL;
57     vdhp->frame_buffer = av_frame_alloc();
58     if( !vdhp->frame_buffer )
59     {
60         libavsmash_video_free_decode_handler( vdhp );
61         return NULL;
62     }
63     return vdhp;
64 }
65 
libavsmash_video_alloc_output_handler(void)66 libavsmash_video_output_handler_t *libavsmash_video_alloc_output_handler
67 (
68     void
69 )
70 {
71     return (libavsmash_video_output_handler_t *)lw_malloc_zero( sizeof(libavsmash_video_output_handler_t) );
72 }
73 
libavsmash_video_free_decode_handler(libavsmash_video_decode_handler_t * vdhp)74 void libavsmash_video_free_decode_handler
75 (
76     libavsmash_video_decode_handler_t *vdhp
77 )
78 {
79     if( !vdhp )
80         return;
81     lw_freep( &vdhp->keyframe_list );
82     lw_freep( &vdhp->order_converter );
83     av_frame_free( &vdhp->frame_buffer );
84     av_frame_free( &vdhp->first_valid_frame );
85     cleanup_configuration( &vdhp->config );
86     lw_free( vdhp );
87 }
88 
libavsmash_video_free_output_handler(libavsmash_video_output_handler_t * vohp)89 void libavsmash_video_free_output_handler
90 (
91     libavsmash_video_output_handler_t *vohp
92 )
93 {
94     if( !vohp )
95         return;
96     lw_cleanup_video_output_handler( vohp );
97     lw_free( vohp );
98 }
99 
libavsmash_video_free_decode_handler_ptr(libavsmash_video_decode_handler_t ** vdhpp)100 void libavsmash_video_free_decode_handler_ptr
101 (
102     libavsmash_video_decode_handler_t **vdhpp
103 )
104 {
105     if( !vdhpp || !*vdhpp )
106         return;
107     libavsmash_video_free_decode_handler( *vdhpp );
108     *vdhpp = NULL;
109 }
110 
libavsmash_video_free_output_handler_ptr(libavsmash_video_output_handler_t ** vohpp)111 void libavsmash_video_free_output_handler_ptr
112 (
113     libavsmash_video_output_handler_t **vohpp
114 )
115 {
116     if( !vohpp || !*vohpp )
117         return;
118     libavsmash_video_free_output_handler( *vohpp );
119     *vohpp = NULL;
120 }
121 
122 /*****************************************************************************
123  * Setters
124  *****************************************************************************/
libavsmash_video_set_root(libavsmash_video_decode_handler_t * vdhp,lsmash_root_t * root)125 void libavsmash_video_set_root
126 (
127     libavsmash_video_decode_handler_t *vdhp,
128     lsmash_root_t                     *root
129 )
130 {
131     vdhp->root = root;
132 }
133 
libavsmash_video_set_track_id(libavsmash_video_decode_handler_t * vdhp,uint32_t track_id)134 void libavsmash_video_set_track_id
135 (
136     libavsmash_video_decode_handler_t *vdhp,
137     uint32_t                           track_id
138 )
139 {
140     vdhp->track_id = track_id;
141 }
142 
libavsmash_video_set_forward_seek_threshold(libavsmash_video_decode_handler_t * vdhp,uint32_t forward_seek_threshold)143 void libavsmash_video_set_forward_seek_threshold
144 (
145     libavsmash_video_decode_handler_t *vdhp,
146     uint32_t                           forward_seek_threshold
147 )
148 {
149     vdhp->forward_seek_threshold = forward_seek_threshold;
150 }
151 
libavsmash_video_set_seek_mode(libavsmash_video_decode_handler_t * vdhp,int seek_mode)152 void libavsmash_video_set_seek_mode
153 (
154     libavsmash_video_decode_handler_t *vdhp,
155     int                                seek_mode
156 )
157 {
158     vdhp->seek_mode = seek_mode;
159 }
160 
libavsmash_video_set_preferred_decoder_names(libavsmash_video_decode_handler_t * vdhp,const char ** preferred_decoder_names)161 void libavsmash_video_set_preferred_decoder_names
162 (
163     libavsmash_video_decode_handler_t *vdhp,
164     const char                       **preferred_decoder_names
165 )
166 {
167     vdhp->config.preferred_decoder_names = preferred_decoder_names;
168 }
169 
libavsmash_video_set_log_handler(libavsmash_video_decode_handler_t * vdhp,lw_log_handler_t * lh)170 void libavsmash_video_set_log_handler
171 (
172     libavsmash_video_decode_handler_t *vdhp,
173     lw_log_handler_t                  *lh
174 )
175 {
176     vdhp->config.lh = *lh;
177 }
178 
libavsmash_video_set_get_buffer_func(libavsmash_video_decode_handler_t * vdhp)179 void libavsmash_video_set_get_buffer_func
180 (
181     libavsmash_video_decode_handler_t *vdhp
182 )
183 {
184     vdhp->config.get_buffer = vdhp->config.ctx->get_buffer2;
185 }
186 
187 /*****************************************************************************
188  * Getters
189  *****************************************************************************/
libavsmash_video_get_root(libavsmash_video_decode_handler_t * vdhp)190 lsmash_root_t *libavsmash_video_get_root
191 (
192     libavsmash_video_decode_handler_t *vdhp
193 )
194 {
195     return vdhp ? vdhp->root : NULL;
196 }
197 
libavsmash_video_get_track_id(libavsmash_video_decode_handler_t * vdhp)198 uint32_t libavsmash_video_get_track_id
199 (
200     libavsmash_video_decode_handler_t *vdhp
201 )
202 {
203     return vdhp ? vdhp->track_id : 0;
204 }
205 
libavsmash_video_get_forward_seek_threshold(libavsmash_video_decode_handler_t * vdhp)206 uint32_t libavsmash_video_get_forward_seek_threshold
207 (
208     libavsmash_video_decode_handler_t *vdhp
209 )
210 {
211     return vdhp ? vdhp->forward_seek_threshold : 0;
212 }
213 
libavsmash_video_get_seek_mode(libavsmash_video_decode_handler_t * vdhp)214 int libavsmash_video_get_seek_mode
215 (
216     libavsmash_video_decode_handler_t *vdhp
217 )
218 {
219     return vdhp ? vdhp->seek_mode : -1;
220 }
221 
libavsmash_video_get_preferred_decoder_names(libavsmash_video_decode_handler_t * vdhp)222 const char **libavsmash_video_get_preferred_decoder_names
223 (
224     libavsmash_video_decode_handler_t *vdhp
225 )
226 {
227     return vdhp ? vdhp->config.preferred_decoder_names : NULL;
228 }
229 
libavsmash_video_get_error(libavsmash_video_decode_handler_t * vdhp)230 int libavsmash_video_get_error
231 (
232     libavsmash_video_decode_handler_t *vdhp
233 )
234 {
235     return vdhp ? vdhp->config.error : -1;
236 }
237 
libavsmash_video_get_log_handler(libavsmash_video_decode_handler_t * vdhp)238 lw_log_handler_t *libavsmash_video_get_log_handler
239 (
240     libavsmash_video_decode_handler_t *vdhp
241 )
242 {
243     return vdhp ? &vdhp->config.lh : NULL;
244 }
245 
libavsmash_video_get_codec_context(libavsmash_video_decode_handler_t * vdhp)246 AVCodecContext *libavsmash_video_get_codec_context
247 (
248     libavsmash_video_decode_handler_t *vdhp
249 )
250 {
251     return vdhp ? vdhp->config.ctx : NULL;
252 }
253 
libavsmash_video_get_max_width(libavsmash_video_decode_handler_t * vdhp)254 int libavsmash_video_get_max_width
255 (
256     libavsmash_video_decode_handler_t *vdhp
257 )
258 {
259     return vdhp ? vdhp->config.prefer.width : 0;
260 }
261 
libavsmash_video_get_max_height(libavsmash_video_decode_handler_t * vdhp)262 int libavsmash_video_get_max_height
263 (
264     libavsmash_video_decode_handler_t *vdhp
265 )
266 {
267     return vdhp ? vdhp->config.prefer.height : 0;
268 }
269 
libavsmash_video_get_frame_buffer(libavsmash_video_decode_handler_t * vdhp)270 AVFrame *libavsmash_video_get_frame_buffer
271 (
272     libavsmash_video_decode_handler_t *vdhp
273 )
274 {
275     return vdhp ? vdhp->frame_buffer : NULL;
276 }
277 
libavsmash_video_get_sample_count(libavsmash_video_decode_handler_t * vdhp)278 uint32_t libavsmash_video_get_sample_count
279 (
280     libavsmash_video_decode_handler_t *vdhp
281 )
282 {
283     return vdhp ? vdhp->sample_count : 0;
284 }
285 
libavsmash_video_get_media_timescale(libavsmash_video_decode_handler_t * vdhp)286 uint32_t libavsmash_video_get_media_timescale
287 (
288     libavsmash_video_decode_handler_t *vdhp
289 )
290 {
291     return vdhp ? vdhp->media_timescale : 0;
292 }
293 
libavsmash_video_get_media_duration(libavsmash_video_decode_handler_t * vdhp)294 uint64_t libavsmash_video_get_media_duration
295 (
296     libavsmash_video_decode_handler_t *vdhp
297 )
298 {
299     return vdhp ? vdhp->media_duration : 0;
300 }
301 
libavsmash_video_get_min_cts(libavsmash_video_decode_handler_t * vdhp)302 uint64_t libavsmash_video_get_min_cts
303 (
304     libavsmash_video_decode_handler_t *vdhp
305 )
306 {
307     return vdhp ? vdhp->min_cts : 0;
308 }
309 
310 /*****************************************************************************
311  * Fetchers
312  *****************************************************************************/
libavsmash_video_fetch_sample_count(libavsmash_video_decode_handler_t * vdhp)313 static uint32_t libavsmash_video_fetch_sample_count
314 (
315     libavsmash_video_decode_handler_t *vdhp
316 )
317 {
318     if( !vdhp )
319         return 0;
320     vdhp->sample_count = lsmash_get_sample_count_in_media_timeline( vdhp->root, vdhp->track_id );
321     return vdhp->sample_count;
322 }
323 
libavsmash_video_fetch_media_timescale(libavsmash_video_decode_handler_t * vdhp)324 static uint32_t libavsmash_video_fetch_media_timescale
325 (
326     libavsmash_video_decode_handler_t *vdhp
327 )
328 {
329     if( !vdhp )
330         return 0;
331     lsmash_media_parameters_t media_param;
332     lsmash_initialize_media_parameters( &media_param );
333     if( lsmash_get_media_parameters( vdhp->root, vdhp->track_id, &media_param ) < 0 )
334         return 0;
335     vdhp->media_timescale = media_param.timescale;
336     return vdhp->media_timescale;
337 }
338 
libavsmash_video_fetch_media_duration(libavsmash_video_decode_handler_t * vdhp)339 static uint64_t libavsmash_video_fetch_media_duration
340 (
341     libavsmash_video_decode_handler_t *vdhp
342 )
343 {
344     if( !vdhp )
345         return 0;
346     vdhp->media_duration = lsmash_get_media_duration_from_media_timeline( vdhp->root, vdhp->track_id );
347     return vdhp->media_duration;
348 }
349 
350 /*****************************************************************************
351  * Others
352  *****************************************************************************/
libavsmash_video_get_track(libavsmash_video_decode_handler_t * vdhp,uint32_t track_number)353 int libavsmash_video_get_track
354 (
355     libavsmash_video_decode_handler_t *vdhp,
356     uint32_t                           track_number
357 )
358 {
359     lw_log_handler_t *lhp = libavsmash_video_get_log_handler( vdhp );
360     uint32_t track_id =
361         libavsmash_get_track_by_media_type
362         (
363             libavsmash_video_get_root( vdhp ),
364             ISOM_MEDIA_HANDLER_TYPE_VIDEO_TRACK,
365             track_number,
366             lhp
367         );
368     if( track_id == 0 )
369         return -1;
370     libavsmash_video_set_track_id( vdhp, track_id );
371     (void)libavsmash_video_fetch_sample_count   ( vdhp );
372     (void)libavsmash_video_fetch_media_duration ( vdhp );
373     (void)libavsmash_video_fetch_media_timescale( vdhp );
374     return 0;
375 }
376 
libavsmash_video_initialize_decoder_configuration(libavsmash_video_decode_handler_t * vdhp,AVFormatContext * format_ctx,int threads)377 int libavsmash_video_initialize_decoder_configuration
378 (
379     libavsmash_video_decode_handler_t *vdhp,
380     AVFormatContext                   *format_ctx,
381     int                                threads
382 )
383 {
384     char error_string[128] = { 0 };
385     if( libavsmash_video_get_summaries( vdhp ) < 0 )
386         return -1;
387     /* libavformat */
388     uint32_t type = AVMEDIA_TYPE_VIDEO;
389     uint32_t i;
390     for( i = 0; i < format_ctx->nb_streams && format_ctx->streams[i]->codecpar->codec_type != type; i++ );
391     if( i == format_ctx->nb_streams )
392     {
393         strcpy( error_string, "Failed to find stream by libavformat.\n" );
394         goto fail;
395     }
396     /* libavcodec */
397     AVCodecParameters *codecpar = format_ctx->streams[i]->codecpar;
398     if( libavsmash_find_and_open_decoder( &vdhp->config, codecpar, threads, 1 ) < 0 )
399     {
400         strcpy( error_string, "Failed to find and open the video decoder.\n" );
401         goto fail;
402     }
403     return initialize_decoder_configuration( vdhp->root, vdhp->track_id, &vdhp->config );
404 fail:;
405     lw_log_handler_t *lhp = libavsmash_video_get_log_handler( vdhp );
406     lw_log_show( lhp, LW_LOG_FATAL, "%s", error_string );
407     return -1;
408 }
409 
libavsmash_video_get_summaries(libavsmash_video_decode_handler_t * vdhp)410 int libavsmash_video_get_summaries
411 (
412     libavsmash_video_decode_handler_t *vdhp
413 )
414 {
415     return get_summaries( vdhp->root, vdhp->track_id, &vdhp->config );
416 }
417 
libavsmash_video_force_seek(libavsmash_video_decode_handler_t * vdhp)418 void libavsmash_video_force_seek
419 (
420     libavsmash_video_decode_handler_t *vdhp
421 )
422 {
423     /* Force seek before the next reading. */
424     vdhp->last_sample_number = vdhp->sample_count + 1;
425 }
426 
get_decoding_sample_number(order_converter_t * order_converter,uint32_t composition_sample_number)427 static inline uint32_t get_decoding_sample_number
428 (
429     order_converter_t *order_converter,
430     uint32_t           composition_sample_number
431 )
432 {
433     return order_converter
434          ? order_converter[composition_sample_number].composition_to_decoding
435          : composition_sample_number;
436 }
437 
libavsmash_video_get_coded_sample_number(libavsmash_video_decode_handler_t * vdhp,uint32_t composition_sample_number)438 uint32_t libavsmash_video_get_coded_sample_number
439 (
440     libavsmash_video_decode_handler_t *vdhp,
441     uint32_t                           composition_sample_number
442 )
443 {
444     return get_decoding_sample_number( vdhp->order_converter, composition_sample_number );
445 }
446 
libavsmash_video_get_cts(libavsmash_video_decode_handler_t * vdhp,uint32_t coded_sample_number,uint64_t * cts)447 int libavsmash_video_get_cts
448 (
449     libavsmash_video_decode_handler_t *vdhp,
450     uint32_t                           coded_sample_number,
451     uint64_t                          *cts
452 )
453 {
454     return lsmash_get_cts_from_media_timeline( vdhp->root, vdhp->track_id, coded_sample_number, cts );
455 }
456 
libavsmash_video_get_sample_duration(libavsmash_video_decode_handler_t * vdhp,uint32_t coded_sample_number,uint32_t * sample_duration)457 int libavsmash_video_get_sample_duration
458 (
459     libavsmash_video_decode_handler_t *vdhp,
460     uint32_t                           coded_sample_number,
461     uint32_t                          *sample_duration
462 )
463 {
464     return lsmash_get_sample_delta_from_media_timeline( vdhp->root, vdhp->track_id, coded_sample_number, sample_duration );
465 }
466 
libavsmash_video_clear_error(libavsmash_video_decode_handler_t * vdhp)467 void libavsmash_video_clear_error
468 (
469     libavsmash_video_decode_handler_t *vdhp
470 )
471 {
472     vdhp->config.error = 0;
473 }
474 
libavsmash_video_close_codec_context(libavsmash_video_decode_handler_t * vdhp)475 void libavsmash_video_close_codec_context
476 (
477     libavsmash_video_decode_handler_t *vdhp
478 )
479 {
480     if( !vdhp || !vdhp->config.ctx )
481         return;
482     avcodec_free_context( &vdhp->config.ctx );
483 }
484 
libavsmash_video_setup_timestamp_info(libavsmash_video_decode_handler_t * vdhp,libavsmash_video_output_handler_t * vohp,int64_t * framerate_num,int64_t * framerate_den)485 int libavsmash_video_setup_timestamp_info
486 (
487     libavsmash_video_decode_handler_t *vdhp,
488     libavsmash_video_output_handler_t *vohp,
489     int64_t                           *framerate_num,
490     int64_t                           *framerate_den
491 )
492 {
493     int err = -1;
494     uint64_t media_timescale = lsmash_get_media_timescale( vdhp->root, vdhp->track_id );
495     uint64_t media_duration  = lsmash_get_media_duration_from_media_timeline( vdhp->root, vdhp->track_id );
496     if( media_duration == 0 )
497         media_duration = INT32_MAX;
498     if( vdhp->sample_count == 1 )
499     {
500         /* Calculate average framerate. */
501         reduce_fraction( &media_timescale, &media_duration );
502         *framerate_num = (int64_t)media_timescale;
503         *framerate_den = (int64_t)media_duration;
504         err = 0;
505         goto setup_finish;
506     }
507     lw_log_handler_t *lhp = &vdhp->config.lh;
508     lsmash_media_ts_list_t ts_list;
509     if( lsmash_get_media_timestamps( vdhp->root, vdhp->track_id, &ts_list ) < 0 )
510     {
511         lw_log_show( lhp, LW_LOG_ERROR, "Failed to get timestamps." );
512         goto setup_finish;
513     }
514     if( ts_list.sample_count != vdhp->sample_count )
515     {
516         lw_log_show( lhp, LW_LOG_ERROR, "Failed to count number of video samples." );
517         goto setup_finish;
518     }
519     uint32_t composition_sample_delay;
520     if( lsmash_get_max_sample_delay( &ts_list, &composition_sample_delay ) < 0 )
521     {
522         lsmash_delete_media_timestamps( &ts_list );
523         lw_log_show( lhp, LW_LOG_ERROR, "Failed to get composition delay." );
524         goto setup_finish;
525     }
526     if( composition_sample_delay )
527     {
528         /* Consider composition order for keyframe detection.
529          * Note: sample number for L-SMASH is 1-origin. */
530         vdhp->order_converter = (order_converter_t *)lw_malloc_zero( (ts_list.sample_count + 1) * sizeof(order_converter_t) );
531         if( !vdhp->order_converter )
532         {
533             lsmash_delete_media_timestamps( &ts_list );
534             lw_log_show( lhp, LW_LOG_ERROR, "Failed to allocate memory." );
535             goto setup_finish;
536         }
537         for( uint32_t i = 0; i < ts_list.sample_count; i++ )
538             ts_list.timestamp[i].dts = i + 1;
539         lsmash_sort_timestamps_composition_order( &ts_list );
540         for( uint32_t i = 0; i < ts_list.sample_count; i++ )
541             vdhp->order_converter[i + 1].composition_to_decoding = (uint32_t)ts_list.timestamp[i].dts;
542     }
543     /* Calculate average framerate. */
544     uint64_t largest_cts          = ts_list.timestamp[0].cts;
545     uint64_t second_largest_cts   = 0;
546     uint64_t first_duration       = ts_list.timestamp[1].cts - ts_list.timestamp[0].cts;
547     uint64_t composition_timebase = first_duration;
548     int      strict_cfr           = 1;
549     for( uint32_t i = 1; i < ts_list.sample_count; i++ )
550     {
551         uint64_t duration = ts_list.timestamp[i].cts - ts_list.timestamp[i - 1].cts;
552         if( duration == 0 )
553         {
554             lsmash_delete_media_timestamps( &ts_list );
555             lw_log_show( lhp, LW_LOG_WARNING, "Detected CTS duplication at frame %" PRIu32, i );
556             err = 0;
557             goto setup_finish;
558         }
559         if( strict_cfr && duration != first_duration )
560             strict_cfr = 0;
561         composition_timebase = get_gcd( composition_timebase, duration );
562         second_largest_cts   = largest_cts;
563         largest_cts          = ts_list.timestamp[i].cts;
564     }
565     uint64_t reduce = reduce_fraction( &media_timescale, &composition_timebase );
566     uint64_t composition_duration = ((largest_cts - ts_list.timestamp[0].cts) + (largest_cts - second_largest_cts)) / reduce;
567     lsmash_delete_media_timestamps( &ts_list );
568     double avg_frame_rate = (vdhp->sample_count * ((double)media_timescale / composition_duration));
569     if( strict_cfr || !lw_try_rational_framerate( avg_frame_rate, framerate_num, framerate_den, composition_timebase ) )
570     {
571         uint64_t num = (uint64_t)(avg_frame_rate * composition_timebase + 0.5);
572         uint64_t den = composition_timebase;
573         if( num && den )
574             reduce_fraction( &num, &den );
575         else
576         {
577             num = 1;
578             den = 1;
579         }
580         *framerate_num = (int64_t)num;
581         *framerate_den = (int64_t)den;
582     }
583     err = 0;
584 setup_finish:;
585     if( vohp->vfr2cfr )
586     {
587         /* Override average framerate by specified output constant framerate. */
588         *framerate_num = (int64_t)vohp->cfr_num;
589         *framerate_den = (int64_t)vohp->cfr_den;
590         vohp->frame_count = ((double)vohp->cfr_num / vohp->cfr_den)
591                           * ((double)media_duration / media_timescale)
592                           + 0.5;
593     }
594     else
595         vohp->frame_count = libavsmash_video_get_sample_count( vdhp );
596     uint32_t min_cts_sample_number = get_decoding_sample_number( vdhp->order_converter, 1 );
597     vdhp->config.error = lsmash_get_cts_from_media_timeline( vdhp->root, vdhp->track_id, min_cts_sample_number, &vdhp->min_cts );
598     return err;
599 }
600 
decode_video_sample(libavsmash_video_decode_handler_t * vdhp,AVFrame * picture,int * got_picture,uint32_t sample_number)601 static int decode_video_sample
602 (
603     libavsmash_video_decode_handler_t *vdhp,
604     AVFrame                           *picture,
605     int                               *got_picture,
606     uint32_t                           sample_number
607 )
608 {
609     codec_configuration_t *config = &vdhp->config;
610     AVPacket pkt = { 0 };
611     int ret = get_sample( vdhp->root, vdhp->track_id, sample_number, config, &pkt );
612     if( ret )
613         return ret;
614     if( pkt.flags != ISOM_SAMPLE_RANDOM_ACCESS_FLAG_NONE )
615     {
616         pkt.flags = AV_PKT_FLAG_KEY;
617         vdhp->last_rap_number = sample_number;
618     }
619     else
620         pkt.flags = 0;
621     av_frame_unref( picture );
622     uint64_t cts = pkt.pts;
623     ret = decode_video_packet( config->ctx, picture, got_picture, &pkt );
624     picture->pts = cts;
625     if( ret < 0 )
626     {
627         lw_log_show( &config->lh, LW_LOG_WARNING, "Failed to decode a video frame." );
628         return -1;
629     }
630     return 0;
631 }
632 
find_random_accessible_point(libavsmash_video_decode_handler_t * vdhp,uint32_t composition_sample_number,uint32_t decoding_sample_number,uint32_t * rap_number)633 static int find_random_accessible_point
634 (
635     libavsmash_video_decode_handler_t *vdhp,
636     uint32_t                           composition_sample_number,
637     uint32_t                           decoding_sample_number,
638     uint32_t                          *rap_number
639 )
640 {
641     if( decoding_sample_number == 0 )
642         decoding_sample_number = get_decoding_sample_number( vdhp->order_converter, composition_sample_number );
643     lsmash_random_access_flag ra_flags;
644     uint32_t distance;  /* distance from the closest random accessible point to the previous. */
645     uint32_t number_of_leadings;
646     if( lsmash_get_closest_random_accessible_point_detail_from_media_timeline( vdhp->root, vdhp->track_id,
647                                                                                decoding_sample_number, rap_number,
648                                                                                &ra_flags, &number_of_leadings, &distance ) < 0 )
649         *rap_number = 1;
650     int roll_recovery = !!(ra_flags & ISOM_SAMPLE_RANDOM_ACCESS_FLAG_GDR);
651     int is_leading    = number_of_leadings && (decoding_sample_number - *rap_number <= number_of_leadings);
652     if( (roll_recovery || is_leading) && *rap_number > distance )
653         *rap_number -= distance;
654     /* Check whether random accessible point has the same decoder configuration or not. */
655     decoding_sample_number = get_decoding_sample_number( vdhp->order_converter, composition_sample_number );
656     do
657     {
658         lsmash_sample_t sample;
659         lsmash_sample_t rap_sample;
660         if( lsmash_get_sample_info_from_media_timeline( vdhp->root, vdhp->track_id, decoding_sample_number, &sample ) < 0
661          || lsmash_get_sample_info_from_media_timeline( vdhp->root, vdhp->track_id, *rap_number, &rap_sample ) < 0 )
662         {
663             /* Fatal error. */
664             *rap_number = vdhp->last_rap_number;
665             return 0;
666         }
667         if( sample.index == rap_sample.index )
668             break;
669         uint32_t sample_index = sample.index;
670         for( uint32_t i = decoding_sample_number - 1; i; i-- )
671         {
672             if( lsmash_get_sample_info_from_media_timeline( vdhp->root, vdhp->track_id, i, &sample ) < 0 )
673             {
674                 /* Fatal error. */
675                 *rap_number = vdhp->last_rap_number;
676                 return 0;
677             }
678             if( sample.index != sample_index )
679             {
680                 if( distance )
681                 {
682                     *rap_number += distance;
683                     distance = 0;
684                     continue;
685                 }
686                 else
687                     *rap_number = i + 1;
688             }
689         }
690         break;
691     } while( 1 );
692     return roll_recovery;
693 }
694 
695 /* This function returns the number of the next sample. */
seek_video(libavsmash_video_decode_handler_t * vdhp,AVFrame * picture,uint32_t composition_sample_number,uint32_t rap_number,int error_ignorance)696 static uint32_t seek_video
697 (
698     libavsmash_video_decode_handler_t *vdhp,
699     AVFrame                           *picture,
700     uint32_t                           composition_sample_number,
701     uint32_t                           rap_number,
702     int                                error_ignorance
703 )
704 {
705     /* Prepare to decode from random accessible sample. */
706     codec_configuration_t *config = &vdhp->config;
707     if( config->update_pending )
708         /* Update the decoder configuration. */
709         update_configuration( vdhp->root, vdhp->track_id, config );
710     else
711         libavsmash_flush_buffers( config );
712     if( config->error )
713         return 0;
714     int got_picture;
715     int output_ready = 0;
716     uint64_t rap_cts = 0;
717     uint32_t i;
718     uint32_t decoder_delay = get_decoder_delay( config->ctx );
719     uint32_t goal = composition_sample_number + decoder_delay;
720     for( i = rap_number; i < goal; i++ )
721     {
722         if( config->index == config->queue.index )
723             config->delay_count = MIN( decoder_delay, i - rap_number );
724         int ret = decode_video_sample( vdhp, picture, &got_picture, i );
725         if( got_picture )
726         {
727             output_ready = 1;
728             if( decoder_delay > config->delay_count )
729             {
730                 /* Shorten the distance to the goal if we got a frame earlier than expected. */
731                 uint32_t new_decoder_delay = config->delay_count;
732                 goal -= decoder_delay - new_decoder_delay;
733                 decoder_delay = new_decoder_delay;
734             }
735         }
736         else if( output_ready )
737         {
738             /* More input samples are required to output and the goal become more distant. */
739             ++decoder_delay;
740             ++goal;
741         }
742         /* Some decoders return -1 when feeding a leading sample.
743          * We don't consider as an error if the return value -1 is caused by a leading sample since it's not fatal at all. */
744         if( i == vdhp->last_rap_number )
745             rap_cts = picture->pts;
746         if( ret == -1 && (uint64_t)picture->pts >= rap_cts && !error_ignorance )
747         {
748             lw_log_show( &config->lh, LW_LOG_WARNING, "Failed to decode a video frame." );
749             return 0;
750         }
751         else if( ret >= 1 )
752             /* No decoding occurs. */
753             break;
754     }
755     if( config->index == config->queue.index )
756         config->delay_count = MIN( decoder_delay, i - rap_number );
757     return i;
758 }
759 
get_picture(libavsmash_video_decode_handler_t * vdhp,AVFrame * picture,uint32_t current,uint32_t goal)760 static int get_picture
761 (
762     libavsmash_video_decode_handler_t *vdhp,
763     AVFrame                           *picture,
764     uint32_t                           current,
765     uint32_t                           goal
766 )
767 {
768     codec_configuration_t *config = &vdhp->config;
769     int got_picture = (current > goal);
770     while( current <= goal )
771     {
772         int ret = decode_video_sample( vdhp, picture, &got_picture, current );
773         if( ret == -1 )
774             return -1;
775         else if( ret == 1 )
776             /* Sample doesn't exist. */
777             break;
778         ++current;
779         if( config->update_pending )
780             /* A new decoder configuration is needed. Anyway, stop getting picture. */
781             break;
782         if( !got_picture )
783         {
784             /* Fundamental seek operations after the decoder initialization is already done, but
785              * more input samples are required to output and the goal become more distant. */
786             ++ config->delay_count;
787             ++ goal;
788         }
789     }
790     /* Flush the last frames. */
791     if( current > vdhp->sample_count && get_decoder_delay( config->ctx ) )
792         while( current <= goal )
793         {
794             AVPacket pkt = { 0 };
795             av_init_packet( &pkt );
796             pkt.data = NULL;
797             pkt.size = 0;
798             av_frame_unref( picture );
799             if( decode_video_packet( config->ctx, picture, &got_picture, &pkt ) < 0 )
800             {
801                 lw_log_show( &config->lh, LW_LOG_WARNING, "Failed to decode and flush a video frame." );
802                 return -1;
803             }
804             ++current;
805         }
806     return got_picture ? 0 : -1;
807 }
808 
get_requested_picture(libavsmash_video_decode_handler_t * vdhp,AVFrame * picture,uint32_t sample_number)809 static int get_requested_picture
810 (
811     libavsmash_video_decode_handler_t *vdhp,
812     AVFrame                           *picture,
813     uint32_t                           sample_number
814 )
815 {
816 #define MAX_ERROR_COUNT 3       /* arbitrary */
817     codec_configuration_t *config = &vdhp->config;
818     uint32_t config_index;
819     if( sample_number < vdhp->first_valid_frame_number || vdhp->sample_count == 1 )
820     {
821         /* Get the index of the decoder configuration. */
822         lsmash_sample_t sample;
823         uint32_t decoding_sample_number = get_decoding_sample_number( vdhp->order_converter, vdhp->first_valid_frame_number );
824         if( lsmash_get_sample_info_from_media_timeline( vdhp->root, vdhp->track_id, decoding_sample_number, &sample ) < 0 )
825             goto video_fail;
826         config_index = sample.index;
827         /* Copy the first valid video frame data. */
828         av_frame_unref( picture );
829         if( av_frame_ref( picture, vdhp->first_valid_frame ) < 0 )
830             goto video_fail;
831         /* Force seeking at the next access for valid video frame. */
832         vdhp->last_sample_number = vdhp->sample_count + 1;
833         goto return_frame;
834     }
835     uint32_t start_number;  /* number of sample, for normal decoding, where decoding starts excluding decoding delay */
836     uint32_t rap_number;    /* number of sample, for seeking, where decoding starts excluding decoding delay */
837     int seek_mode = vdhp->seek_mode;
838     int roll_recovery = 0;
839     if( sample_number > vdhp->last_sample_number
840      && sample_number <= vdhp->last_sample_number + vdhp->forward_seek_threshold )
841     {
842         start_number = vdhp->last_sample_number + 1 + config->delay_count;
843         rap_number   = vdhp->last_rap_number;
844     }
845     else
846     {
847         roll_recovery = find_random_accessible_point( vdhp, sample_number, 0, &rap_number );
848         if( rap_number == vdhp->last_rap_number && sample_number > vdhp->last_sample_number )
849         {
850             roll_recovery = 0;
851             start_number  = vdhp->last_sample_number + 1 + config->delay_count;
852         }
853         else
854         {
855             /* Require starting to decode from random accessible sample. */
856             vdhp->last_rap_number = rap_number;
857             start_number = seek_video( vdhp, picture, sample_number, rap_number, roll_recovery || seek_mode != SEEK_MODE_NORMAL );
858         }
859     }
860     /* Get the desired picture. */
861     int error_count = 0;
862     while( start_number == 0    /* Failed to seek. */
863      || config->update_pending  /* Need to update the decoder configuration to decode pictures. */
864      || get_picture( vdhp, picture, start_number, sample_number + config->delay_count ) < 0 )
865     {
866         if( config->update_pending )
867         {
868             roll_recovery = find_random_accessible_point( vdhp, sample_number, 0, &rap_number );
869             vdhp->last_rap_number = rap_number;
870         }
871         else
872         {
873             /* Failed to get the desired picture. */
874             if( config->error || seek_mode == SEEK_MODE_AGGRESSIVE )
875                 goto video_fail;
876             if( ++error_count > MAX_ERROR_COUNT || rap_number <= 1 )
877             {
878                 if( seek_mode == SEEK_MODE_UNSAFE )
879                     goto video_fail;
880                 /* Retry to decode from the same random accessible sample with error ignorance. */
881                 seek_mode = SEEK_MODE_AGGRESSIVE;
882             }
883             else
884             {
885                 /* Retry to decode from more past random accessible sample. */
886                 roll_recovery = find_random_accessible_point( vdhp, sample_number, rap_number - 1, &rap_number );
887                 if( vdhp->last_rap_number == rap_number )
888                     goto video_fail;
889                 vdhp->last_rap_number = rap_number;
890             }
891         }
892         start_number = seek_video( vdhp, picture, sample_number, rap_number, roll_recovery || seek_mode != SEEK_MODE_NORMAL );
893     }
894     vdhp->last_sample_number = sample_number;
895     config_index = config->index;
896 return_frame:;
897     /* Don't exceed the maximum presentation size specified for each sequence. */
898     extended_summary_t *extended = &config->entries[ config_index - 1 ].extended;
899     if( config->ctx->width > extended->width )
900         config->ctx->width = extended->width;
901     if( config->ctx->height > extended->height )
902         config->ctx->height = extended->height;
903     return 0;
904 video_fail:
905     /* fatal error of decoding */
906     lw_log_show( &config->lh, LW_LOG_WARNING, "Couldn't read video frame." );
907     return -1;
908 #undef MAX_ERROR_COUNT
909 }
910 
libavsmash_vfr2cfr(libavsmash_video_decode_handler_t * vdhp,libavsmash_video_output_handler_t * vohp,uint32_t sample_number)911 static uint32_t libavsmash_vfr2cfr
912 (
913     libavsmash_video_decode_handler_t *vdhp,
914     libavsmash_video_output_handler_t *vohp,
915     uint32_t                           sample_number
916 )
917 {
918     /* Convert VFR to CFR. */
919     double target_pts  = (double)((uint64_t)(sample_number - 1) * vohp->cfr_den) / vohp->cfr_num;
920     double current_pts = DBL_MAX;
921     lsmash_sample_t sample;
922     if( vdhp->last_sample_number <= vdhp->sample_count )
923     {
924         uint32_t last_decoding_sample_number = get_decoding_sample_number( vdhp->order_converter, vdhp->last_sample_number );
925         if( lsmash_get_sample_info_from_media_timeline( vdhp->root, vdhp->track_id, last_decoding_sample_number, &sample ) < 0 )
926             return 0;
927         current_pts = (double)(sample.cts - vdhp->min_cts) / vdhp->media_timescale;
928         if( target_pts == current_pts )
929             return vdhp->last_sample_number;
930     }
931     uint32_t composition_sample_number = vdhp->last_sample_number;
932     double   prev_pts = current_pts;
933     if( target_pts < current_pts )
934     {
935         for( composition_sample_number--;
936              composition_sample_number;
937              composition_sample_number-- )
938         {
939             uint32_t decoding_sample_number = get_decoding_sample_number( vdhp->order_converter, composition_sample_number );
940             if( lsmash_get_sample_info_from_media_timeline( vdhp->root, vdhp->track_id, decoding_sample_number, &sample ) < 0 )
941                 return 0;
942             current_pts = (double)(sample.cts - vdhp->min_cts) / vdhp->media_timescale;
943             prev_pts = current_pts;
944             if( current_pts <= target_pts )
945                 break;
946         }
947         if( composition_sample_number == 0 )
948             return 0;
949     }
950     double next_target_pts = (double)((uint64_t)sample_number * vohp->cfr_den) / vohp->cfr_num;
951     for( composition_sample_number++;
952          composition_sample_number <= vdhp->sample_count;
953          composition_sample_number++ )
954     {
955         uint32_t decoding_sample_number = get_decoding_sample_number( vdhp->order_converter, composition_sample_number );
956         if( lsmash_get_sample_info_from_media_timeline( vdhp->root, vdhp->track_id, decoding_sample_number, &sample ) < 0 )
957             return 0;
958         current_pts = (double)(sample.cts - vdhp->min_cts) / vdhp->media_timescale;
959         if( current_pts >= target_pts )
960         {
961             uint32_t prev_composition_sample_number = composition_sample_number - 1;
962             if( current_pts > next_target_pts )
963                 /* Between the current target and the next target, there are no input samples.
964                  * Therefore, output the previous sample. This is absolutely correct. */
965                 sample_number = prev_composition_sample_number;
966             else
967             {
968                 if( current_pts > (next_target_pts + target_pts) / 2 )
969                     /* The current sample is far from the current target and should be a candidate for the next target. */
970                     sample_number = prev_composition_sample_number;
971                 else
972                 {
973                     /* Choose the nearest one. */
974                     if( current_pts - target_pts >= target_pts - prev_pts )
975                         sample_number = prev_composition_sample_number;
976                     else
977                         sample_number = composition_sample_number;
978                 }
979             }
980             break;
981         }
982         prev_pts = current_pts;
983     }
984     if( composition_sample_number > vdhp->sample_count )
985         sample_number = vdhp->sample_count;
986     return sample_number;
987 }
988 
989 /* Return 0 if successful.
990  * Return 1 if the same frame was requested at the last call.
991  * Return a negative value otherwise. */
libavsmash_video_get_frame(libavsmash_video_decode_handler_t * vdhp,libavsmash_video_output_handler_t * vohp,uint32_t sample_number)992 int libavsmash_video_get_frame
993 (
994     libavsmash_video_decode_handler_t *vdhp,
995     libavsmash_video_output_handler_t *vohp,
996     uint32_t                           sample_number
997 )
998 {
999     if( vohp->vfr2cfr )
1000     {
1001         sample_number = libavsmash_vfr2cfr( vdhp, vohp, sample_number );
1002         if( sample_number == 0 )
1003             return -1;
1004     }
1005     if( sample_number == vdhp->last_sample_number )
1006         return 1;
1007     int ret;
1008     if( (ret = get_requested_picture( vdhp, vdhp->frame_buffer, sample_number )) < 0
1009      || (ret = update_scaler_configuration_if_needed( &vohp->scaler, &vdhp->config.lh, vdhp->frame_buffer )) < 0 )
1010         return ret;
1011     return 0;
1012 }
1013 
libavsmash_video_find_first_valid_frame(libavsmash_video_decode_handler_t * vdhp)1014 int libavsmash_video_find_first_valid_frame
1015 (
1016     libavsmash_video_decode_handler_t *vdhp
1017 )
1018 {
1019     codec_configuration_t *config = &vdhp->config;
1020     for( uint32_t i = 1; i <= vdhp->sample_count + get_decoder_delay( config->ctx ); i++ )
1021     {
1022         AVPacket pkt = { 0 };
1023         get_sample( vdhp->root, vdhp->track_id, i, config, &pkt );
1024         av_frame_unref( vdhp->frame_buffer );
1025         int got_picture;
1026         if( decode_video_packet( config->ctx, vdhp->frame_buffer, &got_picture, &pkt ) >= 0 && got_picture )
1027         {
1028             vdhp->first_valid_frame_number = i - MIN( get_decoder_delay( config->ctx ), config->delay_count );
1029             if( vdhp->first_valid_frame_number > 1 || vdhp->sample_count == 1 )
1030             {
1031                 vdhp->first_valid_frame = av_frame_clone( vdhp->frame_buffer );
1032                 if( !vdhp->first_valid_frame )
1033                     return -1;
1034                 av_frame_unref( vdhp->frame_buffer );
1035             }
1036             break;
1037         }
1038         else if( pkt.data )
1039             ++ config->delay_count;
1040     }
1041     return 0;
1042 }
1043 
libavsmash_video_create_keyframe_list(libavsmash_video_decode_handler_t * vdhp)1044 int libavsmash_video_create_keyframe_list
1045 (
1046     libavsmash_video_decode_handler_t *vdhp
1047 )
1048 {
1049     vdhp->keyframe_list = (uint8_t *)lw_malloc_zero( (vdhp->sample_count + 1) * sizeof(uint8_t) );
1050     if( !vdhp->keyframe_list )
1051         return -1;
1052     for( uint32_t composition_sample_number = 1; composition_sample_number <= vdhp->sample_count; composition_sample_number++ )
1053     {
1054         uint32_t decoding_sample_number = get_decoding_sample_number( vdhp->order_converter, composition_sample_number );
1055         uint32_t rap_number;
1056         if( lsmash_get_closest_random_accessible_point_from_media_timeline( vdhp->root,
1057                                                                             vdhp->track_id,
1058                                                                             decoding_sample_number, &rap_number ) < 0 )
1059             continue;
1060         if( decoding_sample_number == rap_number )
1061             vdhp->keyframe_list[composition_sample_number] = 1;
1062     }
1063     return 0;
1064 }
1065 
libavsmash_video_is_keyframe(libavsmash_video_decode_handler_t * vdhp,libavsmash_video_output_handler_t * vohp,uint32_t sample_number)1066 int libavsmash_video_is_keyframe
1067 (
1068     libavsmash_video_decode_handler_t *vdhp,
1069     libavsmash_video_output_handler_t *vohp,
1070     uint32_t                           sample_number
1071 )
1072 {
1073     if( vohp->vfr2cfr )
1074         sample_number = libavsmash_vfr2cfr( vdhp, vohp, sample_number );
1075     return vdhp->keyframe_list[sample_number];
1076 }
1077