1 /*****************************************************************************
2  * libavsmash_source.cpp
3  *****************************************************************************
4  * Copyright (C) 2012-2015 L-SMASH Works project
5  *
6  * Authors: Yusuke Nakamura <muken.the.vfrmaniac@gmail.com>
7  *
8  * Permission to use, copy, modify, and/or distribute this software for any
9  * purpose with or without fee is hereby granted, provided that the above
10  * copyright notice and this permission notice appear in all copies.
11  *
12  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
13  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
14  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
15  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
16  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
17  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
18  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19  *****************************************************************************/
20 
21 /* This file is available under an ISC license.
22  * However, when distributing its binary file, it will be under LGPL or GPL. */
23 
24 #include "lsmashsource.h"
25 
26 extern "C"
27 {
28 /* L-SMASH */
29 #include <lsmash.h>                 /* Demuxer */
30 
31 /* Libav
32  * The binary file will be LGPLed or GPLed. */
33 #include <libavformat/avformat.h>       /* Codec specific info importer */
34 #include <libavcodec/avcodec.h>         /* Decoder */
35 #include <libswscale/swscale.h>         /* Colorspace converter */
36 #include <libavresample/avresample.h>   /* Audio resampler */
37 #include <libavutil/imgutils.h>
38 #include <libavutil/mem.h>
39 #include <libavutil/opt.h>
40 }
41 
42 #include "video_output.h"
43 #include "audio_output.h"
44 #include "libavsmash_source.h"
45 
46 static const char func_name_video_source[] = "LSMASHVideoSource";
47 static const char func_name_audio_source[] = "LSMASHAudioSource";
48 
open_file(const char * source,IScriptEnvironment * env)49 uint32_t LSMASHVideoSource::open_file
50 (
51     const char                        *source,
52     IScriptEnvironment                *env
53 )
54 {
55     libavsmash_video_decode_handler_t *vdhp = this->vdhp.get();
56     lw_log_handler_t *lhp = libavsmash_video_get_log_handler( vdhp );
57     lhp->name     = func_name_video_source;
58     lhp->level    = LW_LOG_FATAL;
59     lhp->priv     = env;
60     lhp->show_log = throw_error;
61     lsmash_movie_parameters_t movie_param;
62     AVFormatContext *format_ctx = nullptr;
63     lsmash_root_t *root = libavsmash_open_file( &format_ctx, source, &file_param, &movie_param, lhp );
64     this->format_ctx.reset( format_ctx );
65     libavsmash_video_set_root( vdhp, root );
66     return movie_param.number_of_tracks;
67 }
68 
get_video_track(const char * source,uint32_t track_number,IScriptEnvironment * env)69 void LSMASHVideoSource::get_video_track
70 (
71     const char                        *source,
72     uint32_t                           track_number,
73     IScriptEnvironment                *env
74 )
75 {
76     libavsmash_video_decode_handler_t *vdhp = this->vdhp.get();
77     libavsmash_video_output_handler_t *vohp = this->vohp.get();
78     uint32_t number_of_tracks = open_file( source, env );
79     if( track_number && track_number > number_of_tracks )
80         env->ThrowError( "LSMASHVideoSource: the number of tracks equals %I32u.", number_of_tracks );
81     (void)libavsmash_video_get_track( vdhp, track_number );
82 }
83 
prepare_video_decoding(libavsmash_video_decode_handler_t * vdhp,libavsmash_video_output_handler_t * vohp,AVFormatContext * format_ctx,int threads,int direct_rendering,int stacked_format,enum AVPixelFormat pixel_format,VideoInfo & vi,IScriptEnvironment * env)84 static void prepare_video_decoding
85 (
86     libavsmash_video_decode_handler_t *vdhp,
87     libavsmash_video_output_handler_t *vohp,
88     AVFormatContext                   *format_ctx,
89     int                                threads,
90     int                                direct_rendering,
91     int                                stacked_format,
92     enum AVPixelFormat                 pixel_format,
93     VideoInfo                         &vi,
94     IScriptEnvironment                *env
95 )
96 {
97     /* Initialize the video decoder configuration. */
98     if( libavsmash_video_initialize_decoder_configuration( vdhp, format_ctx, threads ) < 0 )
99         env->ThrowError( "LSMASHVideoSource: failed to initialize the decoder configuration." );
100     /* Set up output format. */
101     AVCodecContext *ctx = libavsmash_video_get_codec_context( vdhp );
102     int max_width  = libavsmash_video_get_max_width ( vdhp );
103     int max_height = libavsmash_video_get_max_height( vdhp );
104     as_setup_video_rendering( vohp, ctx, "LSMASHVideoSource",
105                               direct_rendering, stacked_format, pixel_format,
106                               max_width, max_height );
107     libavsmash_video_set_get_buffer_func( vdhp );
108     /* Calculate average framerate. */
109     int64_t fps_num = 25;
110     int64_t fps_den = 1;
111     libavsmash_video_setup_timestamp_info( vdhp, vohp, &fps_num, &fps_den );
112     if( vohp->vfr2cfr )
113     {
114         if( libavsmash_video_get_error( vdhp ) )
115             env->ThrowError( "LSMASHVideoSource: failed to get the minimum CTS of video stream." );
116     }
117     else
118         libavsmash_video_clear_error( vdhp );
119     /* Find the first valid video sample. */
120     if( libavsmash_video_find_first_valid_frame( vdhp ) < 0 )
121         env->ThrowError( "LSMASHVideoSource: failed to find the first valid video frame." );
122     /* Setup filter specific info. */
123     vi.fps_numerator   = (unsigned int)fps_num;
124     vi.fps_denominator = (unsigned int)fps_den;
125     vi.num_frames      = vohp->frame_count;
126     /* Force seeking at the first reading. */
127     libavsmash_video_force_seek( vdhp );
128 }
129 
LSMASHVideoSource(const char * source,uint32_t track_number,int threads,int seek_mode,uint32_t forward_seek_threshold,int direct_rendering,int fps_num,int fps_den,int stacked_format,enum AVPixelFormat pixel_format,const char * preferred_decoder_names,IScriptEnvironment * env)130 LSMASHVideoSource::LSMASHVideoSource
131 (
132     const char         *source,
133     uint32_t            track_number,
134     int                 threads,
135     int                 seek_mode,
136     uint32_t            forward_seek_threshold,
137     int                 direct_rendering,
138     int                 fps_num,
139     int                 fps_den,
140     int                 stacked_format,
141     enum AVPixelFormat  pixel_format,
142     const char         *preferred_decoder_names,
143     IScriptEnvironment *env
144 ) : LSMASHVideoSource{}
145 {
146     memset( &vi,  0, sizeof(VideoInfo) );
147     libavsmash_video_decode_handler_t *vdhp = this->vdhp.get();
148     libavsmash_video_output_handler_t *vohp = this->vohp.get();
149     set_preferred_decoder_names( preferred_decoder_names );
150     libavsmash_video_set_seek_mode              ( vdhp, seek_mode );
151     libavsmash_video_set_forward_seek_threshold ( vdhp, forward_seek_threshold );
152     libavsmash_video_set_preferred_decoder_names( vdhp, tokenize_preferred_decoder_names() );
153     vohp->vfr2cfr = (fps_num > 0 && fps_den > 0);
154     vohp->cfr_num = (uint32_t)fps_num;
155     vohp->cfr_den = (uint32_t)fps_den;
156     as_video_output_handler_t *as_vohp = (as_video_output_handler_t *)lw_malloc_zero( sizeof(as_video_output_handler_t) );
157     if( as_vohp == nullptr )
158         env->ThrowError( "LSMASHVideoSource: failed to allocate the AviSynth video output handler." );
159     as_vohp->vi  = &vi;
160     as_vohp->env = env;
161     vohp->private_handler      = as_vohp;
162     vohp->free_private_handler = as_free_video_output_handler;
163     get_video_track( source, track_number, env );
164     prepare_video_decoding( vdhp, vohp, format_ctx.get(), threads, direct_rendering, stacked_format, pixel_format, vi, env );
165     lsmash_discard_boxes( libavsmash_video_get_root( vdhp ) );
166 }
167 
~LSMASHVideoSource()168 LSMASHVideoSource::~LSMASHVideoSource()
169 {
170     libavsmash_video_decode_handler_t *vdhp = this->vdhp.get();
171     lsmash_root_t *root = libavsmash_video_get_root( vdhp );
172     lw_free( libavsmash_video_get_preferred_decoder_names( vdhp ) );
173     lsmash_close_file( &file_param );
174     lsmash_destroy_root( root );
175 }
176 
GetFrame(int n,IScriptEnvironment * env)177 PVideoFrame __stdcall LSMASHVideoSource::GetFrame( int n, IScriptEnvironment *env )
178 {
179     uint32_t sample_number = n + 1;     /* For L-SMASH, sample_number is 1-origin. */
180     libavsmash_video_decode_handler_t *vdhp = this->vdhp.get();
181     libavsmash_video_output_handler_t *vohp = this->vohp.get();
182     lw_log_handler_t *lhp = libavsmash_video_get_log_handler( vdhp );
183     lhp->priv = env;
184     if( libavsmash_video_get_error( vdhp )
185      || libavsmash_video_get_frame( vdhp, vohp, sample_number ) < 0 )
186         return env->NewVideoFrame( vi );
187     AVFrame    *av_frame = libavsmash_video_get_frame_buffer( vdhp );
188     PVideoFrame as_frame;
189     if( make_frame( vohp, av_frame, as_frame, env ) < 0 )
190         env->ThrowError( "LSMASHVideoSource: failed to make a frame." );
191     return as_frame;
192 }
193 
open_file(const char * source,IScriptEnvironment * env)194 uint32_t LSMASHAudioSource::open_file( const char *source, IScriptEnvironment *env )
195 {
196     libavsmash_audio_decode_handler_t *adhp = this->adhp.get();
197     lw_log_handler_t *lhp = libavsmash_audio_get_log_handler( adhp );
198     lhp->name     = func_name_audio_source;
199     lhp->level    = LW_LOG_FATAL;
200     lhp->priv     = env;
201     lhp->show_log = throw_error;
202     lsmash_movie_parameters_t movie_param;
203     AVFormatContext *format_ctx = nullptr;
204     lsmash_root_t *root = libavsmash_open_file( &format_ctx, source, &file_param, &movie_param, lhp );
205     this->format_ctx.reset( format_ctx );
206     libavsmash_audio_set_root( adhp, root );
207     return movie_param.number_of_tracks;
208 }
209 
get_start_time(lsmash_root_t * root,uint32_t track_ID)210 static int64_t get_start_time( lsmash_root_t *root, uint32_t track_ID )
211 {
212     /* Consider start time of this media if any non-empty edit is present. */
213     uint32_t edit_count = lsmash_count_explicit_timeline_map( root, track_ID );
214     for( uint32_t edit_number = 1; edit_number <= edit_count; edit_number++ )
215     {
216         lsmash_edit_t edit;
217         if( lsmash_get_explicit_timeline_map( root, track_ID, edit_number, &edit ) )
218             return 0;
219         if( edit.duration == 0 )
220             return 0;   /* no edits */
221         if( edit.start_time >= 0 )
222             return edit.start_time;
223     }
224     return 0;
225 }
226 
duplicate_as_string(void * src,size_t length)227 static char *duplicate_as_string( void *src, size_t length )
228 {
229     char *dst = new char[length + 1];
230     if( !dst )
231         return nullptr;
232     memcpy( dst, src, length );
233     dst[length] = '\0';
234     return dst;
235 }
236 
get_audio_track(const char * source,uint32_t track_number,IScriptEnvironment * env)237 void LSMASHAudioSource::get_audio_track( const char *source, uint32_t track_number, IScriptEnvironment *env )
238 {
239     libavsmash_audio_decode_handler_t *adhp = this->adhp.get();
240     uint32_t number_of_tracks = open_file( source, env );
241     if( track_number && track_number > number_of_tracks )
242         env->ThrowError( "LSMASHAudioSource: the number of tracks equals %I32u.", number_of_tracks );
243     /* L-SMASH */
244     (void)libavsmash_audio_get_track( adhp, track_number );
245 }
246 
count_output_audio_samples(libavsmash_audio_decode_handler_t * adhp,libavsmash_audio_output_handler_t * aohp,bool skip_priming,VideoInfo & vi,IScriptEnvironment * env)247 static void count_output_audio_samples
248 (
249     libavsmash_audio_decode_handler_t *adhp,
250     libavsmash_audio_output_handler_t *aohp,
251     bool                               skip_priming,
252     VideoInfo                         &vi,
253     IScriptEnvironment                *env
254 )
255 {
256     lsmash_root_t *root = libavsmash_audio_get_root( adhp );
257     uint32_t track_id   = libavsmash_audio_get_track_id( adhp );
258     uint64_t start_time = 0;
259     if( skip_priming )
260     {
261         uint32_t media_timescale = libavsmash_audio_get_media_timescale( adhp );
262         uint32_t itunes_metadata_count = lsmash_count_itunes_metadata( root );
263         for( uint32_t i = 1; i <= itunes_metadata_count; i++ )
264         {
265             lsmash_itunes_metadata_t metadata;
266             if( lsmash_get_itunes_metadata( root, i, &metadata ) < 0 )
267                 continue;
268             if( metadata.item != ITUNES_METADATA_ITEM_CUSTOM
269              || (metadata.type != ITUNES_METADATA_TYPE_STRING && metadata.type != ITUNES_METADATA_TYPE_BINARY)
270              || !metadata.meaning || !metadata.name
271              || memcmp( "com.apple.iTunes", metadata.meaning, strlen( metadata.meaning ) )
272              || memcmp( "iTunSMPB", metadata.name, strlen( metadata.name ) ) )
273             {
274                 lsmash_cleanup_itunes_metadata( &metadata );
275                 continue;
276             }
277             char *value = nullptr;
278             if( metadata.type == ITUNES_METADATA_TYPE_STRING )
279             {
280                 size_t length = strlen( metadata.value.string );
281                 if( length >= 116 )
282                     value = duplicate_as_string( metadata.value.string, length );
283             }
284             else    /* metadata.type == ITUNES_METADATA_TYPE_BINARY */
285             {
286                 if( metadata.value.binary.size >= 116 )
287                     value = duplicate_as_string( metadata.value.binary.data, metadata.value.binary.size );
288             }
289             lsmash_cleanup_itunes_metadata( &metadata );
290             if( !value )
291                 continue;
292             uint32_t dummy[9];
293             uint32_t priming_samples;
294             uint32_t padding;
295             uint64_t duration;
296             if( 12 != sscanf( value, " %I32x %I32x %I32x %I64x %I32x %I32x %I32x %I32x %I32x %I32x %I32x %I32x",
297                               &dummy[0], &priming_samples, &padding, &duration, &dummy[1], &dummy[2],
298                               &dummy[3], &dummy[4], &dummy[5], &dummy[6], &dummy[7], &dummy[8] ) )
299             {
300                 delete [] value;
301                 continue;
302             }
303             delete [] value;
304             libavsmash_audio_set_implicit_preroll( adhp );
305             start_time = av_rescale( priming_samples, media_timescale, aohp->output_sample_rate );
306             aohp->skip_decoded_samples = priming_samples;
307             // vi.num_audio_samples = duration + priming_samples;
308             break;
309         }
310         if( aohp->skip_decoded_samples == 0 )
311         {
312             uint32_t ctd_shift;
313             if( lsmash_get_composition_to_decode_shift_from_media_timeline( root, track_id, &ctd_shift ) )
314                 env->ThrowError( "LSMASHAudioSource: failed to get the timeline shift." );
315             start_time = ctd_shift + get_start_time( root, track_id );
316             aohp->skip_decoded_samples = av_rescale( start_time, aohp->output_sample_rate, media_timescale );
317         }
318     }
319     vi.num_audio_samples = libavsmash_audio_count_overall_pcm_samples( adhp, aohp->output_sample_rate, start_time );
320     if( vi.num_audio_samples == 0 )
321         env->ThrowError( "LSMASHAudioSource: no valid audio frame." );
322 }
323 
prepare_audio_decoding(libavsmash_audio_decode_handler_t * adhp,libavsmash_audio_output_handler_t * aohp,AVFormatContext * format_ctx,uint64_t channel_layout,int sample_rate,bool skip_priming,VideoInfo & vi,IScriptEnvironment * env)324 static void prepare_audio_decoding
325 (
326     libavsmash_audio_decode_handler_t *adhp,
327     libavsmash_audio_output_handler_t *aohp,
328     AVFormatContext                   *format_ctx,
329     uint64_t                           channel_layout,
330     int                                sample_rate,
331     bool                               skip_priming,
332     VideoInfo                         &vi,
333     IScriptEnvironment                *env
334 )
335 {
336     /* Initialize the audio decoder configuration. */
337     if( libavsmash_audio_initialize_decoder_configuration( adhp, format_ctx, 0 ) < 0 )
338         env->ThrowError( "LSMASHAudioSource: failed to initialize the decoder configuration." );
339     aohp->output_channel_layout  = libavsmash_audio_get_best_used_channel_layout ( adhp );
340     aohp->output_sample_format   = libavsmash_audio_get_best_used_sample_format  ( adhp );
341     aohp->output_sample_rate     = libavsmash_audio_get_best_used_sample_rate    ( adhp );
342     aohp->output_bits_per_sample = libavsmash_audio_get_best_used_bits_per_sample( adhp );
343     AVCodecContext *ctx = libavsmash_audio_get_codec_context( adhp );
344     as_setup_audio_rendering( aohp, ctx, &vi, env, "LSMASHAudioSource", channel_layout, sample_rate );
345     count_output_audio_samples( adhp, aohp, skip_priming, vi, env );
346     /* Force seeking at the first reading. */
347     libavsmash_audio_force_seek( adhp );
348 }
349 
LSMASHAudioSource(const char * source,uint32_t track_number,bool skip_priming,uint64_t channel_layout,int sample_rate,const char * preferred_decoder_names,IScriptEnvironment * env)350 LSMASHAudioSource::LSMASHAudioSource
351 (
352     const char         *source,
353     uint32_t            track_number,
354     bool                skip_priming,
355     uint64_t            channel_layout,
356     int                 sample_rate,
357     const char         *preferred_decoder_names,
358     IScriptEnvironment *env
359 ) : LSMASHAudioSource{}
360 {
361     memset( &vi,  0, sizeof(VideoInfo) );
362     libavsmash_audio_decode_handler_t *adhp = this->adhp.get();
363     libavsmash_audio_output_handler_t *aohp = this->aohp.get();
364     set_preferred_decoder_names( preferred_decoder_names );
365     libavsmash_audio_set_preferred_decoder_names( adhp, tokenize_preferred_decoder_names() );
366     get_audio_track( source, track_number, env );
367     prepare_audio_decoding( adhp, aohp, format_ctx.get(), channel_layout, sample_rate, skip_priming, vi, env );
368     lsmash_discard_boxes( libavsmash_audio_get_root( adhp ) );
369 }
370 
~LSMASHAudioSource()371 LSMASHAudioSource::~LSMASHAudioSource()
372 {
373     libavsmash_audio_decode_handler_t *adhp = this->adhp.get();
374     lsmash_root_t *root = libavsmash_audio_get_root( adhp );
375     lw_free( libavsmash_audio_get_preferred_decoder_names( adhp ) );
376     lsmash_close_file( &file_param );
377     lsmash_destroy_root( root );
378 }
379 
GetAudio(void * buf,__int64 start,__int64 wanted_length,IScriptEnvironment * env)380 void __stdcall LSMASHAudioSource::GetAudio( void *buf, __int64 start, __int64 wanted_length, IScriptEnvironment *env )
381 {
382     libavsmash_audio_decode_handler_t *adhp = this->adhp.get();
383     libavsmash_audio_output_handler_t *aohp = this->aohp.get();
384     lw_log_handler_t *lhp = libavsmash_audio_get_log_handler( adhp );
385     lhp->priv = env;
386     return (void)libavsmash_audio_get_pcm_samples( adhp, aohp, buf, start, wanted_length );
387 }
388 
CreateLSMASHVideoSource(AVSValue args,void * user_data,IScriptEnvironment * env)389 AVSValue __cdecl CreateLSMASHVideoSource( AVSValue args, void *user_data, IScriptEnvironment *env )
390 {
391 #ifdef NDEBUG
392     av_log_set_level( AV_LOG_QUIET );
393 #endif
394     const char *source                  = args[0].AsString();
395     uint32_t    track_number            = args[1].AsInt( 0 );
396     int         threads                 = args[2].AsInt( 0 );
397     int         seek_mode               = args[3].AsInt( 0 );
398     uint32_t    forward_seek_threshold  = args[4].AsInt( 10 );
399     int         direct_rendering        = args[5].AsBool( false ) ? 1 : 0;
400     int         fps_num                 = args[6].AsInt( 0 );
401     int         fps_den                 = args[7].AsInt( 1 );
402     int         stacked_format          = args[8].AsBool( false ) ? 1 : 0;
403     enum AVPixelFormat pixel_format     = get_av_output_pixel_format( args[9].AsString( nullptr ) );
404     const char *preferred_decoder_names = args[10].AsString( nullptr );
405     threads                = threads >= 0 ? threads : 0;
406     seek_mode              = CLIP_VALUE( seek_mode, 0, 2 );
407     forward_seek_threshold = CLIP_VALUE( forward_seek_threshold, 1, 999 );
408     direct_rendering      &= (pixel_format == AV_PIX_FMT_NONE);
409     return new LSMASHVideoSource( source, track_number, threads, seek_mode, forward_seek_threshold,
410                                   direct_rendering, fps_num, fps_den, stacked_format, pixel_format, preferred_decoder_names, env );
411 }
412 
CreateLSMASHAudioSource(AVSValue args,void * user_data,IScriptEnvironment * env)413 AVSValue __cdecl CreateLSMASHAudioSource( AVSValue args, void *user_data, IScriptEnvironment *env )
414 {
415 #ifdef NDEBUG
416     av_log_set_level( AV_LOG_QUIET );
417 #endif
418     const char *source                  = args[0].AsString();
419     uint32_t    track_number            = args[1].AsInt( 0 );
420     bool        skip_priming            = args[2].AsBool( true );
421     const char *layout_string           = args[3].AsString( nullptr );
422     int         sample_rate             = args[4].AsInt( 0 );
423     const char *preferred_decoder_names = args[5].AsString( nullptr );
424     uint64_t channel_layout = layout_string ? av_get_channel_layout( layout_string ) : 0;
425     return new LSMASHAudioSource( source, track_number, skip_priming,
426                                   channel_layout, sample_rate, preferred_decoder_names, env );
427 }
428