1 /*
2  * transition_matte.c -- replace alpha channel of track
3  *
4  * Copyright (C) 2003-2014 Meltytech, LLC
5  * Author: Maksym Veremeyenko <verem@m1stereo.tv>
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this library; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
20  */
21 
22 #include <framework/mlt.h>
23 
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <ctype.h>
27 #include <string.h>
28 #include <math.h>
29 
30 #if defined(USE_SSE) && defined(ARCH_X86_64)
copy_Y_to_A_scaled_luma_sse(uint8_t * alpha_a,uint8_t * image_b,int cnt)31 static void __attribute__((noinline)) copy_Y_to_A_scaled_luma_sse(uint8_t* alpha_a, uint8_t* image_b, int cnt)
32 {
33 	const static unsigned char const1[] =
34 	{
35 		43, 0, 43, 0, 43, 0, 43, 0, 43, 0, 43, 0, 43, 0, 43, 0
36 	};
37 	const static unsigned char const2[] =
38 	{
39 		16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0
40 	};
41 	const static unsigned char const3[] =
42 	{
43 		235, 0, 235, 0, 235, 0, 235, 0, 235, 0, 235, 0, 235, 0, 235, 0
44 	};
45 	const static unsigned char const4[] =
46 	{
47 		255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0
48 	};
49 
50 	__asm__ volatile
51 	(
52 		"movdqu         (%[equ43]), %%xmm7      \n\t"   /* load multiplier 43 */
53 		"movdqu         (%[equ16]), %%xmm6      \n\t"   /* load bottom value 16 */
54 		"movdqu         (%[equ235]), %%xmm5     \n\t"   /* load bottom value 235 */
55 		"movdqu         (%[equ255]), %%xmm4     \n\t"   /* load bottom value 0xff */
56 
57 		"loop_start:                            \n\t"
58 
59 		/* load pixels block 1 */
60 		"movdqu         0(%[image_b]), %%xmm0   \n\t"
61 		"add            $0x10, %[image_b]       \n\t"
62 
63 		/* load pixels block 2 */
64 		"movdqu         0(%[image_b]), %%xmm1   \n\t"
65 		"add            $0x10, %[image_b]       \n\t"
66 
67 		/* leave only Y */
68 		"pand           %%xmm4, %%xmm0          \n\t"
69 		"pand           %%xmm4, %%xmm1          \n\t"
70 
71 		/* upper range clip */
72 		"pminsw         %%xmm5, %%xmm0          \n\t"
73 		"pminsw         %%xmm5, %%xmm1          \n\t"
74 
75 		/* upper range clip */
76 		"pmaxsw         %%xmm6, %%xmm0          \n\t"
77 		"pmaxsw         %%xmm6, %%xmm1          \n\t"
78 
79 		/* upper range clip */
80 		"psubw          %%xmm6, %%xmm0          \n\t"
81 		"psubw          %%xmm6, %%xmm1          \n\t"
82 
83 		/* duplicate values */
84 		"movdqa         %%xmm0,%%xmm2           \n\t"
85 		"movdqa         %%xmm1,%%xmm3           \n\t"
86 
87 		/* regA = regA << 8 */
88 		"psllw          $8, %%xmm0              \n\t"
89 		"psllw          $8, %%xmm1              \n\t"
90 
91 		/* regB = regB * 47 */
92 		"pmullw         %%xmm7, %%xmm2          \n\t"
93 		"pmullw         %%xmm7, %%xmm3          \n\t"
94 
95 		/* regA = regA + regB */
96 		"paddw          %%xmm2, %%xmm0          \n\t"
97 		"paddw          %%xmm3, %%xmm1          \n\t"
98 
99 		/* regA = regA >> 8 */
100 		"psrlw          $8, %%xmm0              \n\t"
101 		"psrlw          $8, %%xmm1              \n\t"
102 
103 		/* pack to 8 bit value */
104 		"packuswb       %%xmm1, %%xmm0          \n\t"
105 
106 		/* store */
107 		"movdqu         %%xmm0, (%[alpha_a])    \n\t"
108 		"add            $0x10, %[alpha_a]       \n\t"
109 
110 		/* loop if we done */
111 		"dec            %[cnt]                  \n\t"
112 		"jnz            loop_start              \n\t"
113 		: [cnt]"+r" (cnt), [alpha_a]"+r"(alpha_a), [image_b]"+r"(image_b)
114 		: [equ43]"r"(const1), [equ16]"r"(const2), [equ235]"r"(const3), [equ255]"r"(const4)
115 		: "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
116 	);
117 };
118 #endif
119 
copy_Y_to_A_scaled_luma(uint8_t * alpha_a,int stride_a,uint8_t * image_b,int stride_b,int width,int height)120 static void copy_Y_to_A_scaled_luma(uint8_t* alpha_a, int stride_a, uint8_t* image_b, int stride_b, int width, int height)
121 {
122 	int i, j;
123 
124 	for(j = 0; j < height; j++)
125 	{
126 		i = 0;
127 #if defined(USE_SSE) && defined(ARCH_X86_64)
128 		if(width >= 16)
129 		{
130 			copy_Y_to_A_scaled_luma_sse(alpha_a, image_b, width >> 4);
131 			i = (width >> 4) << 4;
132 		}
133 #endif
134 		for(; i < width; i++)
135 		{
136 			unsigned int p = image_b[2*i];
137 
138 			if(p < 16)
139 				p = 16;
140 			if(p > 235)
141 				p = 235;
142 			/* p = (p - 16) * 255 / 219; */
143 			p -= 16;
144 			p = ((p << 8) + (p * 43)) >> 8;
145 
146 			alpha_a[i] = p;
147 		};
148 
149 		alpha_a += stride_a;
150 		image_b += stride_b;
151 	};
152 };
153 
154 /** Get the image.
155 */
156 
transition_get_image(mlt_frame a_frame,uint8_t ** image,mlt_image_format * format,int * width,int * height,int writable)157 static int transition_get_image( mlt_frame a_frame, uint8_t **image, mlt_image_format *format, int *width, int *height, int writable )
158 {
159 	// Get the b frame from the stack
160 	mlt_frame b_frame = mlt_frame_pop_frame( a_frame );
161 
162 	mlt_frame_get_image( a_frame, image, format, width, height, 1 );
163 
164 	// Get the properties of the a frame
165 	mlt_properties a_props = MLT_FRAME_PROPERTIES( a_frame );
166 
167 	// Get the properties of the b frame
168 	mlt_properties b_props = MLT_FRAME_PROPERTIES( b_frame );
169 
170 	int
171 		width_a = mlt_properties_get_int( a_props, "width" ),
172 		width_b = width_a,
173 		height_a = mlt_properties_get_int( a_props, "height" ),
174 		height_b = height_a;
175 
176 	uint8_t *alpha_a, *image_b;
177 
178 	// This transition is yuv422 only
179 	*format = mlt_image_yuv422;
180 
181 	// Get the image from the a frame
182 	mlt_frame_get_image( b_frame, &image_b, format, &width_b, &height_b, 1 );
183 	alpha_a = mlt_frame_get_alpha_mask( a_frame );
184 
185 	// copy data
186 	copy_Y_to_A_scaled_luma
187 	(
188 		alpha_a, width_a, image_b, width_b * 2,
189 		(width_a > width_b)?width_b:width_a,
190 		(height_a > height_b)?height_b:height_a
191 	);
192 
193 	// Extract the a_frame image info
194 	*width = mlt_properties_get_int( a_props, "width" );
195 	*height = mlt_properties_get_int( a_props, "height" );
196 	*image = mlt_properties_get_data( a_props, "image", NULL );
197 
198 	return 0;
199 }
200 
201 
202 /** Matte transition processing.
203 */
204 
transition_process(mlt_transition transition,mlt_frame a_frame,mlt_frame b_frame)205 static mlt_frame transition_process( mlt_transition transition, mlt_frame a_frame, mlt_frame b_frame )
206 {
207 	// Push the b_frame on to the stack
208 	mlt_frame_push_frame( a_frame, b_frame );
209 
210 	// Push the transition method
211 	mlt_frame_push_get_image( a_frame, transition_get_image );
212 
213 	return a_frame;
214 }
215 
216 /** Constructor for the filter.
217 */
218 
transition_matte_init(mlt_profile profile,mlt_service_type type,const char * id,char * lumafile)219 mlt_transition transition_matte_init( mlt_profile profile, mlt_service_type type, const char *id, char *lumafile )
220 {
221 	mlt_transition transition = mlt_transition_new( );
222 	if ( transition != NULL )
223 	{
224 		// Set the methods
225 		transition->process = transition_process;
226 
227 		// Inform apps and framework that this is a video only transition
228 		mlt_properties_set_int( MLT_TRANSITION_PROPERTIES( transition ), "_transition_type", 1 );
229 
230 		return transition;
231 	}
232 	return NULL;
233 }
234