1 /*
2 * transition_matte.c -- replace alpha channel of track
3 *
4 * Copyright (C) 2003-2014 Meltytech, LLC
5 * Author: Maksym Veremeyenko <verem@m1stereo.tv>
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 #include <framework/mlt.h>
23
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <ctype.h>
27 #include <string.h>
28 #include <math.h>
29
30 #if defined(USE_SSE) && defined(ARCH_X86_64)
copy_Y_to_A_scaled_luma_sse(uint8_t * alpha_a,uint8_t * image_b,int cnt)31 static void __attribute__((noinline)) copy_Y_to_A_scaled_luma_sse(uint8_t* alpha_a, uint8_t* image_b, int cnt)
32 {
33 const static unsigned char const1[] =
34 {
35 43, 0, 43, 0, 43, 0, 43, 0, 43, 0, 43, 0, 43, 0, 43, 0
36 };
37 const static unsigned char const2[] =
38 {
39 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0
40 };
41 const static unsigned char const3[] =
42 {
43 235, 0, 235, 0, 235, 0, 235, 0, 235, 0, 235, 0, 235, 0, 235, 0
44 };
45 const static unsigned char const4[] =
46 {
47 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0
48 };
49
50 __asm__ volatile
51 (
52 "movdqu (%[equ43]), %%xmm7 \n\t" /* load multiplier 43 */
53 "movdqu (%[equ16]), %%xmm6 \n\t" /* load bottom value 16 */
54 "movdqu (%[equ235]), %%xmm5 \n\t" /* load bottom value 235 */
55 "movdqu (%[equ255]), %%xmm4 \n\t" /* load bottom value 0xff */
56
57 "loop_start: \n\t"
58
59 /* load pixels block 1 */
60 "movdqu 0(%[image_b]), %%xmm0 \n\t"
61 "add $0x10, %[image_b] \n\t"
62
63 /* load pixels block 2 */
64 "movdqu 0(%[image_b]), %%xmm1 \n\t"
65 "add $0x10, %[image_b] \n\t"
66
67 /* leave only Y */
68 "pand %%xmm4, %%xmm0 \n\t"
69 "pand %%xmm4, %%xmm1 \n\t"
70
71 /* upper range clip */
72 "pminsw %%xmm5, %%xmm0 \n\t"
73 "pminsw %%xmm5, %%xmm1 \n\t"
74
75 /* upper range clip */
76 "pmaxsw %%xmm6, %%xmm0 \n\t"
77 "pmaxsw %%xmm6, %%xmm1 \n\t"
78
79 /* upper range clip */
80 "psubw %%xmm6, %%xmm0 \n\t"
81 "psubw %%xmm6, %%xmm1 \n\t"
82
83 /* duplicate values */
84 "movdqa %%xmm0,%%xmm2 \n\t"
85 "movdqa %%xmm1,%%xmm3 \n\t"
86
87 /* regA = regA << 8 */
88 "psllw $8, %%xmm0 \n\t"
89 "psllw $8, %%xmm1 \n\t"
90
91 /* regB = regB * 47 */
92 "pmullw %%xmm7, %%xmm2 \n\t"
93 "pmullw %%xmm7, %%xmm3 \n\t"
94
95 /* regA = regA + regB */
96 "paddw %%xmm2, %%xmm0 \n\t"
97 "paddw %%xmm3, %%xmm1 \n\t"
98
99 /* regA = regA >> 8 */
100 "psrlw $8, %%xmm0 \n\t"
101 "psrlw $8, %%xmm1 \n\t"
102
103 /* pack to 8 bit value */
104 "packuswb %%xmm1, %%xmm0 \n\t"
105
106 /* store */
107 "movdqu %%xmm0, (%[alpha_a]) \n\t"
108 "add $0x10, %[alpha_a] \n\t"
109
110 /* loop if we done */
111 "dec %[cnt] \n\t"
112 "jnz loop_start \n\t"
113 : [cnt]"+r" (cnt), [alpha_a]"+r"(alpha_a), [image_b]"+r"(image_b)
114 : [equ43]"r"(const1), [equ16]"r"(const2), [equ235]"r"(const3), [equ255]"r"(const4)
115 : "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7"
116 );
117 };
118 #endif
119
copy_Y_to_A_scaled_luma(uint8_t * alpha_a,int stride_a,uint8_t * image_b,int stride_b,int width,int height)120 static void copy_Y_to_A_scaled_luma(uint8_t* alpha_a, int stride_a, uint8_t* image_b, int stride_b, int width, int height)
121 {
122 int i, j;
123
124 for(j = 0; j < height; j++)
125 {
126 i = 0;
127 #if defined(USE_SSE) && defined(ARCH_X86_64)
128 if(width >= 16)
129 {
130 copy_Y_to_A_scaled_luma_sse(alpha_a, image_b, width >> 4);
131 i = (width >> 4) << 4;
132 }
133 #endif
134 for(; i < width; i++)
135 {
136 unsigned int p = image_b[2*i];
137
138 if(p < 16)
139 p = 16;
140 if(p > 235)
141 p = 235;
142 /* p = (p - 16) * 255 / 219; */
143 p -= 16;
144 p = ((p << 8) + (p * 43)) >> 8;
145
146 alpha_a[i] = p;
147 };
148
149 alpha_a += stride_a;
150 image_b += stride_b;
151 };
152 };
153
154 /** Get the image.
155 */
156
transition_get_image(mlt_frame a_frame,uint8_t ** image,mlt_image_format * format,int * width,int * height,int writable)157 static int transition_get_image( mlt_frame a_frame, uint8_t **image, mlt_image_format *format, int *width, int *height, int writable )
158 {
159 // Get the b frame from the stack
160 mlt_frame b_frame = mlt_frame_pop_frame( a_frame );
161
162 mlt_frame_get_image( a_frame, image, format, width, height, 1 );
163
164 // Get the properties of the a frame
165 mlt_properties a_props = MLT_FRAME_PROPERTIES( a_frame );
166
167 // Get the properties of the b frame
168 mlt_properties b_props = MLT_FRAME_PROPERTIES( b_frame );
169
170 int
171 width_a = mlt_properties_get_int( a_props, "width" ),
172 width_b = width_a,
173 height_a = mlt_properties_get_int( a_props, "height" ),
174 height_b = height_a;
175
176 uint8_t *alpha_a, *image_b;
177
178 // This transition is yuv422 only
179 *format = mlt_image_yuv422;
180
181 // Get the image from the a frame
182 mlt_frame_get_image( b_frame, &image_b, format, &width_b, &height_b, 1 );
183 alpha_a = mlt_frame_get_alpha_mask( a_frame );
184
185 // copy data
186 copy_Y_to_A_scaled_luma
187 (
188 alpha_a, width_a, image_b, width_b * 2,
189 (width_a > width_b)?width_b:width_a,
190 (height_a > height_b)?height_b:height_a
191 );
192
193 // Extract the a_frame image info
194 *width = mlt_properties_get_int( a_props, "width" );
195 *height = mlt_properties_get_int( a_props, "height" );
196 *image = mlt_properties_get_data( a_props, "image", NULL );
197
198 return 0;
199 }
200
201
202 /** Matte transition processing.
203 */
204
transition_process(mlt_transition transition,mlt_frame a_frame,mlt_frame b_frame)205 static mlt_frame transition_process( mlt_transition transition, mlt_frame a_frame, mlt_frame b_frame )
206 {
207 // Push the b_frame on to the stack
208 mlt_frame_push_frame( a_frame, b_frame );
209
210 // Push the transition method
211 mlt_frame_push_get_image( a_frame, transition_get_image );
212
213 return a_frame;
214 }
215
216 /** Constructor for the filter.
217 */
218
transition_matte_init(mlt_profile profile,mlt_service_type type,const char * id,char * lumafile)219 mlt_transition transition_matte_init( mlt_profile profile, mlt_service_type type, const char *id, char *lumafile )
220 {
221 mlt_transition transition = mlt_transition_new( );
222 if ( transition != NULL )
223 {
224 // Set the methods
225 transition->process = transition_process;
226
227 // Inform apps and framework that this is a video only transition
228 mlt_properties_set_int( MLT_TRANSITION_PROPERTIES( transition ), "_transition_type", 1 );
229
230 return transition;
231 }
232 return NULL;
233 }
234