1 /*****************************************************************
2  * gavl - a general purpose audio/video processing library
3  *
4  * Copyright (c) 2001 - 2011 Members of the Gmerlin project
5  * gmerlin-general@lists.sourceforge.net
6  * http://gmerlin.sourceforge.net
7  *
8  * This program is free software: you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation, either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
20  * *****************************************************************/
21 
22 #include <config.h>
23 #include <attributes.h>
24 
25 #include <stdio.h>
26 #include <gavl/gavl.h>
27 #include <video.h>
28 #include <scale.h>
29 
30 #include "mmx.h"
31 
32 static const mmx_t factor_mask = { 0x000000000000FFFFLL };
33 
34 #if 0
35 static mmx_t mm_tmp;
36 #define DUMP_MM(name, reg) MOVQ_R2M(reg, mm_tmp);\
37   fprintf(stderr, "%s: %016llx\n", name, mm_tmp.q);
38 #endif
39 
40 #ifdef MMXEXT
41 #define MOVQ_R2M(reg,mem) movntq_r2m(reg, mem)
42 #else
43 #define MOVQ_R2M(reg,mem) movq_r2m(reg, mem)
44 #endif
45 
46 /*
47  *  mm0: Input1
48  *  mm1: Input2
49  *  mm2: Factor1
50  *  mm3: Factor1
51  *  mm4: Output1
52  *  mm5: Output2
53  *  mm6: Scratch
54  *  mm7: factor_mask
55  */
56 
57 #define INIT_8_GLOBAL \
58   pxor_r2r(mm6, mm6);\
59   movq_m2r(factor_mask, mm7);
60 
61 #define INIT_8 \
62   pxor_r2r(mm3, mm3);\
63   pxor_r2r(mm4, mm4);
64 
65 #ifdef MMXEXT
66 #define LOAD_FACTOR_8(num) \
67   /* Load factor */ \
68   movd_m2r(ctx->table_v.pixels[scanline].factor_i[num], mm2);\
69   pand_r2r(mm7, mm2);\
70   pshufw_r2r(mm2,mm5,0x00)
71 
72 #else
73 
74 #define LOAD_FACTOR_8(num) \
75   /* Load factor */ \
76   movd_m2r(ctx->table_v.pixels[scanline].factor_i[num], mm2);\
77   pand_r2r(mm7, mm2);\
78   movq_r2r(mm2, mm5);\
79   psllq_i2r(16, mm5);\
80   por_r2r(mm5, mm2);\
81   movq_r2r(mm2, mm5);\
82   psllq_i2r(32, mm5);\
83   por_r2r(mm2, mm5)
84 #endif
85 
86 #define ACCUM_8(num)  \
87   /* Load input */ \
88   movq_m2r(*src,mm0);\
89   movq_r2r(mm0,mm1);\
90   punpcklbw_r2r(mm6, mm0); \
91   punpckhbw_r2r(mm6, mm1); \
92   psllw_i2r(7, mm0);\
93   psllw_i2r(7, mm1);\
94   LOAD_FACTOR_8(num); \
95   /* Accumulate mm0 */ \
96   pmulhw_r2r(mm5, mm0);\
97   paddsw_r2r(mm0, mm3);\
98   /* Accumulate mm1 */ \
99   pmulhw_r2r(mm5, mm1);\
100   paddsw_r2r(mm1, mm4)
101 
102 #define OUTPUT_8 \
103   psraw_i2r(5, mm3);\
104   psraw_i2r(5, mm4);\
105   packuswb_r2r(mm4, mm3);\
106   MOVQ_R2M(mm3, *dst)
107 
108 #define ACCUM_C_8(num) \
109    tmp += ctx->table_v.pixels[scanline].factor_i[num] * *src
110 
111 #define OUTPUT_C_8 \
112    tmp >>= 14; \
113    *dst = (uint8_t)((tmp & ~0xFF)?((-tmp) >> 63) : tmp);
114 
115 /* scale_uint8_x_1_y_bicubic_mmx  */
116 
117 #define FUNC_NAME scale_uint8_x_1_y_bicubic_mmx
118 #define WIDTH_MUL 1
119 #define BITS 8
120 #define NUM_TAPS 4
121 
122 #include "scale_y.h"
123 
124 /* scale_uint8_x_2_y_bicubic_mmx  */
125 
126 #define FUNC_NAME scale_uint8_x_2_y_bicubic_mmx
127 #define WIDTH_MUL 2
128 #define BITS 8
129 #define NUM_TAPS 4
130 
131 #include "scale_y.h"
132 
133 /* scale_uint8_x_3_y_bicubic_mmx  */
134 
135 #define FUNC_NAME scale_uint8_x_3_y_bicubic_mmx
136 #define WIDTH_MUL 3
137 #define BITS 8
138 #define NUM_TAPS 4
139 
140 #include "scale_y.h"
141 
142 /* scale_uint8_x_4_y_bicubic_mmx  */
143 
144 #define FUNC_NAME scale_uint8_x_4_y_bicubic_mmx
145 #define WIDTH_MUL 4
146 #define BITS 8
147 #define NUM_TAPS 4
148 
149 #include "scale_y.h"
150 
151 
152 /* scale_uint8_x_1_y_quadratic_mmx  */
153 
154 #define FUNC_NAME scale_uint8_x_1_y_quadratic_mmx
155 #define WIDTH_MUL 1
156 #define BITS 8
157 #define NUM_TAPS 3
158 
159 #include "scale_y.h"
160 
161 /* scale_uint8_x_2_y_quadratic_mmx  */
162 
163 #define FUNC_NAME scale_uint8_x_2_y_quadratic_mmx
164 #define WIDTH_MUL 2
165 #define BITS 8
166 #define NUM_TAPS 3
167 
168 #include "scale_y.h"
169 
170 /* scale_uint8_x_3_y_quadratic_mmx  */
171 
172 #define FUNC_NAME scale_uint8_x_3_y_quadratic_mmx
173 #define WIDTH_MUL 3
174 #define BITS 8
175 #define NUM_TAPS 3
176 
177 #include "scale_y.h"
178 
179 /* scale_uint8_x_4_y_quadratic_mmx  */
180 
181 #define FUNC_NAME scale_uint8_x_4_y_quadratic_mmx
182 #define WIDTH_MUL 4
183 #define BITS 8
184 #define NUM_TAPS 3
185 
186 #include "scale_y.h"
187 
188 /* scale_uint8_x_1_y_generic_mmx  */
189 
190 #define FUNC_NAME scale_uint8_x_1_y_generic_mmx
191 #define WIDTH_MUL 1
192 #define BITS 8
193 #define NUM_TAPS -1
194 
195 #include "scale_y.h"
196 
197 /* scale_uint8_x_2_y_generic_mmx  */
198 
199 #define FUNC_NAME scale_uint8_x_2_y_generic_mmx
200 #define WIDTH_MUL 2
201 #define BITS 8
202 #define NUM_TAPS -1
203 
204 #include "scale_y.h"
205 
206 /* scale_uint8_x_4_y_generic_mmx  */
207 
208 #define FUNC_NAME scale_uint8_x_4_y_generic_mmx
209 #define WIDTH_MUL 4
210 #define BITS 8
211 #define NUM_TAPS -1
212 
213 #include "scale_y.h"
214 
215 /* scale_uint8_x_3_y_generic_mmx  */
216 
217 #define FUNC_NAME scale_uint8_x_3_y_generic_mmx
218 #define WIDTH_MUL 3
219 #define BITS 8
220 #define NUM_TAPS -1
221 
222 #include "scale_y.h"
223 
224 #ifdef MMXEXT
gavl_init_scale_funcs_quadratic_y_mmxext(gavl_scale_funcs_t * tab,int src_advance,int dst_advance)225 void gavl_init_scale_funcs_quadratic_y_mmxext(gavl_scale_funcs_t * tab,
226                                            int src_advance, int dst_advance)
227 #else
228 void gavl_init_scale_funcs_quadratic_y_mmx(gavl_scale_funcs_t * tab,
229                                            int src_advance, int dst_advance)
230 #endif
231   {
232   if((src_advance == 1) && (dst_advance == 1))
233     {
234     tab->funcs_y.scale_uint8_x_1_noadvance =  scale_uint8_x_1_y_quadratic_mmx;
235     tab->funcs_y.bits_uint8_noadvance = 14;
236     }
237   else if((src_advance == 3) && (dst_advance == 3))
238     {
239     tab->funcs_y.scale_uint8_x_3 =  scale_uint8_x_3_y_quadratic_mmx;
240     tab->funcs_y.bits_uint8_noadvance = 14;
241     }
242   else if((src_advance == 4) && (dst_advance == 4))
243     {
244     tab->funcs_y.scale_uint8_x_3 =  scale_uint8_x_4_y_quadratic_mmx;
245     tab->funcs_y.scale_uint8_x_4 =  scale_uint8_x_4_y_quadratic_mmx;
246     tab->funcs_y.bits_uint8_noadvance  = 14;
247     }
248   else if((src_advance == 2) && (dst_advance == 2))
249     {
250     tab->funcs_y.scale_uint8_x_2 =  scale_uint8_x_2_y_quadratic_mmx;
251     tab->funcs_y.bits_uint8_noadvance = 14;
252     }
253   }
254 
255 #ifdef MMXEXT
gavl_init_scale_funcs_bicubic_y_mmxext(gavl_scale_funcs_t * tab,int src_advance,int dst_advance)256 void gavl_init_scale_funcs_bicubic_y_mmxext(gavl_scale_funcs_t * tab,
257                                             int src_advance, int dst_advance)
258 #else
259 void gavl_init_scale_funcs_bicubic_y_mmx(gavl_scale_funcs_t * tab,
260                                          int src_advance, int dst_advance)
261 #endif
262   {
263   if((src_advance == 1) && (dst_advance == 1))
264     {
265     tab->funcs_y.scale_uint8_x_1_noadvance =  scale_uint8_x_1_y_bicubic_mmx;
266     tab->funcs_y.bits_uint8_noadvance = 14;
267     }
268   else if((src_advance == 3) && (dst_advance == 3))
269     {
270     tab->funcs_y.scale_uint8_x_3 =  scale_uint8_x_3_y_bicubic_mmx;
271     tab->funcs_y.bits_uint8_noadvance = 14;
272     }
273   else if((src_advance == 4) && (dst_advance == 4))
274     {
275     tab->funcs_y.scale_uint8_x_3 =  scale_uint8_x_4_y_bicubic_mmx;
276     tab->funcs_y.scale_uint8_x_4 =  scale_uint8_x_4_y_bicubic_mmx;
277     tab->funcs_y.bits_uint8_noadvance  = 14;
278     }
279   else if((src_advance == 2) && (dst_advance == 2))
280     {
281     tab->funcs_y.scale_uint8_x_2 =  scale_uint8_x_2_y_bicubic_mmx;
282     tab->funcs_y.bits_uint8_noadvance = 14;
283     }
284   }
285 
286 #ifdef MMXEXT
gavl_init_scale_funcs_generic_y_mmxext(gavl_scale_funcs_t * tab,int src_advance,int dst_advance)287 void gavl_init_scale_funcs_generic_y_mmxext(gavl_scale_funcs_t * tab,
288                                             int src_advance, int dst_advance)
289 #else
290 void gavl_init_scale_funcs_generic_y_mmx(gavl_scale_funcs_t * tab,
291                                          int src_advance, int dst_advance)
292 #endif
293   {
294   if((src_advance == 1) && (dst_advance == 1))
295     {
296     tab->funcs_y.scale_uint8_x_1_noadvance =  scale_uint8_x_1_y_generic_mmx;
297     tab->funcs_y.bits_uint8_noadvance = 14;
298     }
299   else if((src_advance == 3) && (dst_advance == 3))
300     {
301     tab->funcs_y.scale_uint8_x_3 =  scale_uint8_x_3_y_generic_mmx;
302     tab->funcs_y.bits_uint8_noadvance = 14;
303     }
304   else if((src_advance == 2) && (dst_advance == 2))
305     {
306     tab->funcs_y.scale_uint8_x_2 =  scale_uint8_x_2_y_generic_mmx;
307     tab->funcs_y.bits_uint8_noadvance = 14;
308     }
309   else if((src_advance == 4) && (dst_advance == 4))
310     {
311     tab->funcs_y.scale_uint8_x_3 =  scale_uint8_x_4_y_generic_mmx;
312     tab->funcs_y.scale_uint8_x_4 =  scale_uint8_x_4_y_generic_mmx;
313     tab->funcs_y.bits_uint8_noadvance  = 14;
314     }
315 
316   }
317 
318 /* scale_uint8_x_1_y_bilinear_mmx  */
319 
320 #define FUNC_NAME scale_uint8_x_1_y_bilinear_mmx
321 #define WIDTH_MUL 1
322 #define BITS 8
323 
324 #include "scale_y_linear.h"
325 
326 /* scale_uint8_x_2_y_bilinear_mmx  */
327 
328 #define FUNC_NAME scale_uint8_x_2_y_bilinear_mmx
329 #define WIDTH_MUL 2
330 #define BITS 8
331 
332 #include "scale_y_linear.h"
333 
334 /* scale_uint8_x_4_y_bilinear_mmx  */
335 
336 #define FUNC_NAME scale_uint8_x_4_y_bilinear_mmx
337 #define WIDTH_MUL 4
338 #define BITS 8
339 
340 #include "scale_y_linear.h"
341 
342 /* scale_uint8_x_3_y_bilinear_mmx  */
343 
344 #define FUNC_NAME scale_uint8_x_3_y_bilinear_mmx
345 #define WIDTH_MUL 3
346 #define BITS 8
347 
348 #include "scale_y_linear.h"
349 
350 #ifdef MMXEXT
gavl_init_scale_funcs_bilinear_y_mmxext(gavl_scale_funcs_t * tab,int src_advance,int dst_advance)351 void gavl_init_scale_funcs_bilinear_y_mmxext(gavl_scale_funcs_t * tab,
352                                              int src_advance, int dst_advance)
353 #else
354 void gavl_init_scale_funcs_bilinear_y_mmx(gavl_scale_funcs_t * tab,
355                                          int src_advance, int dst_advance)
356 #endif
357   {
358   if((src_advance == 1) && (dst_advance == 1))
359     {
360     tab->funcs_y.scale_uint8_x_1_noadvance =  scale_uint8_x_1_y_bilinear_mmx;
361     tab->funcs_y.bits_uint8_noadvance = 14;
362     }
363   else if((src_advance == 3) && (dst_advance == 3))
364     {
365     tab->funcs_y.scale_uint8_x_3 =  scale_uint8_x_3_y_bilinear_mmx;
366     tab->funcs_y.bits_uint8_noadvance = 14;
367     }
368   else if((src_advance == 2) && (dst_advance == 2))
369     {
370     tab->funcs_y.scale_uint8_x_2 =  scale_uint8_x_2_y_bilinear_mmx;
371     tab->funcs_y.bits_uint8_noadvance = 14;
372     }
373   else if((src_advance == 4) && (dst_advance == 4))
374     {
375     tab->funcs_y.scale_uint8_x_3 =  scale_uint8_x_4_y_bilinear_mmx;
376     tab->funcs_y.scale_uint8_x_4 =  scale_uint8_x_4_y_bilinear_mmx;
377     tab->funcs_y.bits_uint8_noadvance  = 14;
378     }
379 
380   }
381