1 /*
2  * transition_mix.c -- mix two audio streams
3  * Copyright (C) 2003-2020 Meltytech, LLC
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2.1 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18  */
19 
20 #include <framework/mlt_transition.h>
21 #include <framework/mlt_frame.h>
22 #include <framework/mlt_log.h>
23 
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <math.h>
28 
29 #define MAX_CHANNELS (6)
30 #define MAX_SAMPLES  (192000)
31 #define SAMPLE_BYTES(samples, channels) ((samples) * (channels) * sizeof(float))
32 #define MAX_BYTES    SAMPLE_BYTES( MAX_SAMPLES, MAX_CHANNELS )
33 
34 typedef struct transition_mix_s
35 {
36 	mlt_transition parent;
37 	float src_buffer[MAX_SAMPLES *  MAX_CHANNELS];
38 	float dest_buffer[MAX_SAMPLES * MAX_CHANNELS];
39 	int src_buffer_count;
40 	int dest_buffer_count;
41 	mlt_position previous_frame_a;
42 	mlt_position previous_frame_b;
43 } *transition_mix;
44 
mix_audio(double weight_start,double weight_end,float * buffer_a,float * buffer_b,int channels_a,int channels_b,int channels_out,int samples)45 static void mix_audio( double weight_start, double weight_end, float *buffer_a,
46 	float *buffer_b, int channels_a, int channels_b, int channels_out, int samples )
47 {
48 	int i, j;
49 	double a, b, v;
50 
51 	// Compute a smooth ramp over start to end
52 	double mix = weight_start;
53 	double mix_step = ( weight_end - weight_start ) / samples;
54 
55 	for ( i = 0; i < samples; i++ )
56 	{
57 		for ( j = 0; j < channels_out; j++ )
58 		{
59 			a = (double) buffer_a[ i * channels_a + j ];
60 			b = (double) buffer_b[ i * channels_b + j ];
61 			v = mix * b + (1.0 - mix) * a;
62 			buffer_a[ i * channels_a + j ] = v;
63 		}
64 		mix += mix_step;
65 	}
66 }
67 
sum_audio(double weight_start,double weight_end,float * buffer_a,float * buffer_b,int channels_a,int channels_b,int channels_out,int samples)68 static void sum_audio( double weight_start, double weight_end, float *buffer_a,
69 	float *buffer_b, int channels_a, int channels_b, int channels_out, int samples )
70 {
71 	int i, j;
72 	double a, b;
73 
74 	// Compute a smooth ramp over start to end
75 	double mix = weight_start;
76 	double mix_step = ( weight_end - weight_start ) / samples;
77 
78 	for ( i = 0; i < samples; i++ )
79 	{
80 		for ( j = 0; j < channels_out; j++ )
81 		{
82 			a = (double) buffer_a[ i * channels_a + j ];
83 			b = (double) buffer_b[ i * channels_b + j ];
84 			buffer_a[ i * channels_a + j ] = mix * b + a;
85 		}
86 		mix += mix_step;
87 	}
88 }
89 
90 // This filter uses an inline low pass filter to allow mixing without volume hacking.
combine_audio(double weight,float * buffer_a,float * buffer_b,int channels_a,int channels_b,int channels_out,int samples)91 static void combine_audio( double weight, float *buffer_a, float *buffer_b,
92 	int channels_a, int channels_b, int channels_out, int samples )
93 {
94 	int i, j;
95 	double Fc = 0.5;
96 	double B = exp(-2.0 * M_PI * Fc);
97 	double A = 1.0 - B;
98 	double a, b, v;
99 	double v_prev[MAX_CHANNELS];
100 
101 	for ( j = 0; j < channels_out; j++ )
102 		v_prev[j] = (double) buffer_a[j];
103 
104 	for ( i = 0; i < samples; i++ )
105 	{
106 		for ( j = 0; j < channels_out; j++ )
107 		{
108 			a = (double) buffer_a[ i * channels_a + j ];
109 			b = (double) buffer_b[ i * channels_b + j ];
110 			v = weight * a + b;
111 			v_prev[j] = buffer_a[ i * channels_a + j ] = v * A + v_prev[j] * B;
112 		}
113 	}
114 }
115 
116 /** Get the audio.
117 */
118 
transition_get_audio(mlt_frame frame_a,void ** buffer,mlt_audio_format * format,int * frequency,int * channels,int * samples)119 static int transition_get_audio( mlt_frame frame_a, void **buffer, mlt_audio_format *format, int *frequency, int *channels, int *samples )
120 {
121 	int error = 0;
122 
123 	// Get the b frame from the stack
124 	mlt_frame frame_b = mlt_frame_pop_audio( frame_a );
125 
126 	// Get the effect
127 	mlt_transition transition = mlt_frame_pop_audio( frame_a );
128 
129 	// Get the properties of the b frame
130 	mlt_properties b_props = MLT_FRAME_PROPERTIES( frame_b );
131 
132 	transition_mix self = transition->child;
133 	float *buffer_b, *buffer_a;
134 	int frequency_b = *frequency, frequency_a = *frequency;
135 	int channels_b = *channels, channels_a = *channels;
136 	int samples_b = *samples, samples_a = *samples;
137 
138 	// We can only mix interleaved 32-bit float.
139 	*format = mlt_audio_f32le;
140 	// Get the audio from our producers
141 	mlt_frame_get_audio( frame_b, (void**) &buffer_b, format, &frequency_b, &channels_b, &samples_b );
142 	mlt_frame_get_audio( frame_a, (void**) &buffer_a, format, &frequency_a, &channels_a, &samples_a );
143 
144 	// Prevent dividing by zero.
145 	if ( !channels_a || !channels_b || !buffer_a || !buffer_b )
146 		return 1;
147 
148 	if ( buffer_b == buffer_a )
149 	{
150 		*samples = samples_b;
151 		*channels = channels_b;
152 		*buffer = buffer_b;
153 		*frequency = frequency_b;
154 		return error;
155 	}
156 
157 	// I do not recall what these silent_audio properties are about.
158 	int silent = mlt_properties_get_int( MLT_FRAME_PROPERTIES( frame_a ), "silent_audio" );
159 	mlt_properties_set_int( MLT_FRAME_PROPERTIES( frame_a ), "silent_audio", 0 );
160 	if ( silent )
161 		memset( buffer_a, 0, samples_a * channels_a * sizeof( float ) );
162 	silent = mlt_properties_get_int( b_props, "silent_audio" );
163 	mlt_properties_set_int( b_props, "silent_audio", 0 );
164 	if ( silent )
165 		memset( buffer_b, 0, samples_b * channels_b * sizeof( float ) );
166 
167 	// At this point we have two frames of audio with possibly differing sample
168 	// counts. How to reconcile this?
169 
170 #ifdef KEEP_IT_SIMPLE_AND_STUPID
171 	// The simple and stupid way to deal with different sample counts was to
172 	// use the lesser of the two. This sounds good. You can #define SIMPLE_AND_STUPID
173 	// and hear what it sounds like.
174 	*samples = MIN(samples_a, samples_b);
175 	*channels = MIN( MIN( channels_b, channels_a ), MAX_CHANNELS );
176 	*frequency = frequency_a;
177 	// Note this direct call to sum_audio() skips ramping and the alternative
178 	// mixing methods.
179 	sum_audio( 1, 1, buffer_a, buffer_b, channels_a, channels_b, *channels, *samples );
180 	*buffer = buffer_a;
181 
182 	return error;
183 #endif
184 
185 	// However, the simple and stupid approach drops samples. Over time, this
186 	// can accumulate and cause an A/V sync drift, which addressed in b2640656
187 	// by saving the unused samples in a buffer and then using them first on the
188 	// next iteration.
189 
190 	// determine number of samples to process
191 	*samples = MIN( self->src_buffer_count + samples_b, self->dest_buffer_count + samples_a );
192 	*channels = MIN( MIN( channels_b, channels_a ), MAX_CHANNELS );
193 	*frequency = frequency_a;
194 
195 	// Prevent src buffer overflow by discarding oldest samples.
196 	samples_b = MIN( samples_b, MAX_SAMPLES * MAX_CHANNELS / channels_b );
197 	size_t bytes = SAMPLE_BYTES( samples_b, channels_b );
198 	if ( SAMPLE_BYTES( self->src_buffer_count + samples_b, channels_b ) > MAX_BYTES ) {
199 		mlt_log_verbose( MLT_TRANSITION_SERVICE(transition), "buffer overflow: src_buffer_count %d\n",
200 					  self->src_buffer_count );
201 		self->src_buffer_count = MAX_SAMPLES * MAX_CHANNELS / channels_b - samples_b;
202 		memmove( self->src_buffer, &self->src_buffer[MAX_SAMPLES * MAX_CHANNELS - samples_b * channels_b],
203 				 SAMPLE_BYTES( samples_b, channels_b ) );
204 	}
205 
206 	// Silence src buffer if discontinuity
207 	if (self->src_buffer_count > 0 && mlt_frame_get_position(frame_b) != self->previous_frame_b + 1)
208 		memset(self->src_buffer, 0, SAMPLE_BYTES(self->src_buffer_count, channels_b));
209 	self->previous_frame_b = mlt_frame_get_position(frame_b);
210 
211 	// Append the new samples from frame B to the src buffer
212 	memcpy( &self->src_buffer[self->src_buffer_count * channels_b], buffer_b, bytes );
213 	self->src_buffer_count += samples_b;
214 	buffer_b = self->src_buffer;
215 
216 	// Prevent dest buffer overflow by discarding oldest samples.
217 	samples_a = MIN( samples_a, MAX_SAMPLES * MAX_CHANNELS / channels_a );
218 	bytes = SAMPLE_BYTES( samples_a, channels_a );
219 	if ( SAMPLE_BYTES( self->dest_buffer_count + samples_a, channels_a ) > MAX_BYTES ) {
220 		mlt_log_verbose( MLT_TRANSITION_SERVICE(transition), "buffer overflow: dest_buffer_count %d\n",
221 					  self->dest_buffer_count );
222 		self->dest_buffer_count = MAX_SAMPLES * MAX_CHANNELS / channels_a - samples_a;
223 		memmove( self->dest_buffer, &self->dest_buffer[MAX_SAMPLES * MAX_CHANNELS - samples_a * channels_a],
224 				 SAMPLE_BYTES( samples_a, channels_a ) );
225 	}
226 
227 	// Silence dest buffer if discontinuity
228 	if (self->dest_buffer_count > 0 && mlt_frame_get_position(frame_a) != self->previous_frame_a + 1)
229 		memset(self->dest_buffer, 0, SAMPLE_BYTES(self->dest_buffer_count, channels_a));
230 	self->previous_frame_a = mlt_frame_get_position(frame_a);
231 
232 	// Append the new samples from frame A to the dest buffer
233 	memcpy( &self->dest_buffer[self->dest_buffer_count * channels_a], buffer_a, bytes );
234 	self->dest_buffer_count += samples_a;
235 	buffer_a = self->dest_buffer;
236 
237 	// Do the mixing.
238 	if ( mlt_properties_get_int( MLT_TRANSITION_PROPERTIES(transition), "sum" ) )
239 	{
240 		double mix_start = 1.0, mix_end = 1.0;
241 		if ( mlt_properties_get( b_props, "audio.previous_mix" ) )
242 			mix_start = mlt_properties_get_double( b_props, "audio.previous_mix" );
243 		if ( mlt_properties_get( b_props, "audio.mix" ) )
244 			mix_end = mlt_properties_get_double( b_props, "audio.mix" );
245 		if ( mlt_properties_get_int( b_props, "audio.reverse" ) )
246 		{
247 			mix_start = 1.0 - mix_start;
248 			mix_end = 1.0 - mix_end;
249 		}
250 		sum_audio( mix_start, mix_end, buffer_a, buffer_b, channels_a, channels_b, *channels, *samples );
251 	}
252 	else if ( mlt_properties_get_int( MLT_TRANSITION_PROPERTIES(transition), "combine" ) )
253 	{
254 		double weight = 1.0;
255 		if ( mlt_properties_get_int( MLT_FRAME_PROPERTIES( frame_a ), "meta.mixdown" ) )
256 			weight = 1.0 - mlt_properties_get_double( MLT_FRAME_PROPERTIES( frame_a ), "meta.volume" );
257 		combine_audio( weight, buffer_a, buffer_b, channels_a, channels_b, *channels, *samples );
258 	}
259 	else
260 	{
261 		double mix_start = 0.5, mix_end = 0.5;
262 		if ( mlt_properties_get( b_props, "audio.previous_mix" ) )
263 			mix_start = mlt_properties_get_double( b_props, "audio.previous_mix" );
264 		if ( mlt_properties_get( b_props, "audio.mix" ) )
265 			mix_end = mlt_properties_get_double( b_props, "audio.mix" );
266 		if ( mlt_properties_get_int( b_props, "audio.reverse" ) )
267 		{
268 			mix_start = 1.0 - mix_start;
269 			mix_end = 1.0 - mix_end;
270 		}
271 		mix_audio( mix_start, mix_end, buffer_a, buffer_b, channels_a, channels_b, *channels, *samples );
272 	}
273 
274 	// Copy the audio from the dest buffer into the frame.
275 	bytes = SAMPLE_BYTES( *samples, *channels );
276 	*buffer = mlt_pool_alloc( bytes );
277 	memcpy( *buffer, buffer_a, bytes );
278 	mlt_frame_set_audio( frame_a, *buffer, *format, bytes, mlt_pool_release );
279 
280 	if ( mlt_properties_get_int( b_props, "_speed" ) == 0 )
281 	{
282 		// Flush the buffer when paused and scrubbing.
283 		samples_b = self->src_buffer_count;
284 		samples_a = self->dest_buffer_count;
285 	}
286 	else
287 	{
288 		// It is also not good for A/V sync to let many samples accumulate in
289 		// the buffer. This part provides a time-based buffer limit.
290 
291 		// Determine the maximum amount of latency permitted in the buffer.
292 		int max_latency = CLAMP( *frequency / 1000, 0, MAX_SAMPLES ); // samples in 1ms
293 		// samples_b becomes the new target src buffer count.
294 		samples_b = CLAMP( self->src_buffer_count - *samples, 0, max_latency );
295 		// samples_b becomes the number of samples to consume: difference between actual and the target.
296 		samples_b = self->src_buffer_count - samples_b;
297 		// samples_a becomes the new target dest buffer count.
298 		samples_a = CLAMP( self->dest_buffer_count - *samples, 0, max_latency );
299 		// samples_a becomes the number of samples to consume: difference between actual and the target.
300 		samples_a = self->dest_buffer_count - samples_a;
301 	}
302 
303 	// Consume the src buffer.
304 	self->src_buffer_count -= samples_b;
305 	if ( self->src_buffer_count ) {
306 		memmove( self->src_buffer, &self->src_buffer[samples_b * channels_b],
307 			SAMPLE_BYTES( self->src_buffer_count, channels_b ));
308 	}
309 	// Consume the dest buffer.
310 	self->dest_buffer_count -= samples_a;
311 	if ( self->dest_buffer_count > 0 ) {
312 		memmove( self->dest_buffer, &self->dest_buffer[samples_a * channels_a],
313 			SAMPLE_BYTES( self->dest_buffer_count, channels_a ));
314 	}
315 
316 	return error;
317 }
318 
319 
320 /** Mix transition processing.
321 */
322 
transition_process(mlt_transition transition,mlt_frame a_frame,mlt_frame b_frame)323 static mlt_frame transition_process( mlt_transition transition, mlt_frame a_frame, mlt_frame b_frame )
324 {
325 	mlt_properties properties = MLT_TRANSITION_PROPERTIES( transition );
326 	mlt_properties b_props = MLT_FRAME_PROPERTIES( b_frame );
327 
328 	// Only if mix is specified, otherwise a producer may set the mix
329 	if ( mlt_properties_get( properties, "start" ) )
330 	{
331 		// Determine the time position of this frame in the transition duration
332 		mlt_properties props = mlt_properties_get_data( MLT_FRAME_PROPERTIES( b_frame ), "_producer", NULL );
333 		mlt_position in = mlt_properties_get_int( props, "in" );
334 		mlt_position out = mlt_properties_get_int( props, "out" );
335 		int length = mlt_properties_get_int( properties, "length" );
336 		mlt_position time = mlt_properties_get_int( props, "_frame" );
337 		double mix = mlt_transition_get_progress( transition, b_frame );
338 		if ( mlt_properties_get_int(  properties, "always_active" ) )
339 			mix = ( double ) ( time - in ) / ( double ) ( out - in + 1 );
340 
341 		// TODO: Check the logic here - shouldn't we be computing current and next mixing levels in all cases?
342 		if ( length == 0 )
343 		{
344 			// If there is an end mix level adjust mix to the range
345 			if ( mlt_properties_get( properties, "end" ) )
346 			{
347 				double start = mlt_properties_get_double( properties, "start" );
348 				double end = mlt_properties_get_double( properties, "end" );
349 				mix = start + ( end - start ) * mix;
350 			}
351 			// A negative means total crossfade (uses position)
352 			else if ( mlt_properties_get_double( properties, "start" ) >= 0 )
353 			{
354 				// Otherwise, start/constructor is a constant mix level
355 		    	mix = mlt_properties_get_double( properties, "start" );
356 			}
357 
358 			// Finally, set the mix property on the frame
359 			mlt_properties_set_double( b_props, "audio.mix", mix );
360 
361 			// Initialise transition previous mix value to prevent an inadvertent jump from 0
362 			mlt_position last_position = mlt_properties_get_position( properties, "_last_position" );
363 			mlt_position current_position = mlt_frame_get_position( b_frame );
364 			mlt_properties_set_position( properties, "_last_position", current_position );
365 			if ( !mlt_properties_get( properties, "_previous_mix" )
366 			     || current_position != last_position + 1 )
367 				mlt_properties_set_double( properties, "_previous_mix", mix );
368 
369 			// Tell b frame what the previous mix level was
370 			mlt_properties_set_double( b_props, "audio.previous_mix", mlt_properties_get_double( properties, "_previous_mix" ) );
371 
372 			// Save the current mix level for the next iteration
373 			mlt_properties_set_double( properties, "_previous_mix", mlt_properties_get_double( b_props, "audio.mix" ) );
374 
375 			mlt_properties_set_double( b_props, "audio.reverse", mlt_properties_get_double( properties, "reverse" ) );
376 		}
377 		else
378 		{
379 			double level = mlt_properties_get_double( properties, "start" );
380 			double mix_start = level;
381 			double mix_end = mix_start;
382 			double mix_increment = 1.0 / length;
383 			if ( time - in < length )
384 			{
385 				mix_start = mix_start * ( ( double )( time - in ) / length );
386 				mix_end = mix_start + mix_increment;
387 			}
388 			else if ( time > out - length )
389 			{
390 				mix_end = mix_start * ( ( double )( out - time - in ) / length );
391 				mix_start = mix_end - mix_increment;
392 			}
393 
394 			mix_start = mix_start < 0 ? 0 : mix_start > level ? level : mix_start;
395 			mix_end = mix_end < 0 ? 0 : mix_end > level ? level : mix_end;
396 			mlt_properties_set_double( b_props, "audio.previous_mix", mix_start );
397 			mlt_properties_set_double( b_props, "audio.mix", mix_end );
398 		}
399 	}
400 
401 	// Override the get_audio method
402 	mlt_frame_push_audio( a_frame, transition );
403 	mlt_frame_push_audio( a_frame, b_frame );
404 	mlt_frame_push_audio( a_frame, transition_get_audio );
405 
406 	// Ensure transition_get_audio is called if test_audio=1.
407 	if ( mlt_properties_get_int( properties, "accepts_blanks" ) )
408 		mlt_properties_set_int( MLT_FRAME_PROPERTIES(a_frame), "test_audio", 0 );
409 
410 	return a_frame;
411 }
412 
transition_close(mlt_transition transition)413 static void transition_close( mlt_transition transition )
414 {
415 	free( transition->child );
416 	transition->close = NULL;
417 	mlt_transition_close( transition );
418 }
419 
420 /** Constructor for the transition.
421 */
422 
transition_mix_init(mlt_profile profile,mlt_service_type type,const char * id,char * arg)423 mlt_transition transition_mix_init( mlt_profile profile, mlt_service_type type, const char *id, char *arg )
424 {
425 	transition_mix mix = calloc( 1 , sizeof( struct transition_mix_s ) );
426 	mlt_transition transition = calloc( 1, sizeof( struct mlt_transition_s ) );
427 	if ( mix && transition && !mlt_transition_init( transition, mix ) )
428 	{
429 		mix->parent = transition;
430 		transition->close = transition_close;
431 		transition->process = transition_process;
432 		if ( arg )
433 		{
434 			mlt_properties_set_double( MLT_TRANSITION_PROPERTIES( transition ), "start", atof( arg ) );
435 			if ( atof( arg ) < 0 )
436 				mlt_properties_set_int( MLT_TRANSITION_PROPERTIES( transition ), "accepts_blanks", 1 );
437 		}
438 		// Inform apps and framework that this is an audio only transition
439 		mlt_properties_set_int( MLT_TRANSITION_PROPERTIES( transition ), "_transition_type", 2 );
440 	} else {
441 		if ( transition )
442 			mlt_transition_close( transition );
443 		if ( mix )
444 			free( mix );
445 	}
446 	return transition;
447 }
448 
449