1 /*****************************************************************
2 * gavl - a general purpose audio/video processing library
3 *
4 * Copyright (c) 2001 - 2011 Members of the Gmerlin project
5 * gmerlin-general@lists.sourceforge.net
6 * http://gmerlin.sourceforge.net
7 *
8 * This program is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program. If not, see <http://www.gnu.org/licenses/>.
20 * *****************************************************************/
21
22 #include <config.h>
23 #include <attributes.h>
24
25 #include <stdio.h>
26 #include <gavl/gavl.h>
27 #include <video.h>
28 #include <scale.h>
29
30 #include "mmx.h"
31
32 static const mmx_t factor_mask = { 0x000000000000FFFFLL };
33
34 #if 0
35 static mmx_t mm_tmp;
36 #define DUMP_MM(name, reg) MOVQ_R2M(reg, mm_tmp);\
37 fprintf(stderr, "%s: %016llx\n", name, mm_tmp.q);
38 #endif
39
40 #ifdef MMXEXT
41 #define MOVQ_R2M(reg,mem) movntq_r2m(reg, mem)
42 #else
43 #define MOVQ_R2M(reg,mem) movq_r2m(reg, mem)
44 #endif
45
46 /*
47 * mm0: Input1
48 * mm1: Input2
49 * mm2: Factor1
50 * mm3: Factor1
51 * mm4: Output1
52 * mm5: Output2
53 * mm6: Scratch
54 * mm7: factor_mask
55 */
56
57 #define INIT_8_GLOBAL \
58 pxor_r2r(mm6, mm6);\
59 movq_m2r(factor_mask, mm7);
60
61 #define INIT_8 \
62 pxor_r2r(mm3, mm3);\
63 pxor_r2r(mm4, mm4);
64
65 #ifdef MMXEXT
66 #define LOAD_FACTOR_8(num) \
67 /* Load factor */ \
68 movd_m2r(ctx->table_v.pixels[scanline].factor_i[num], mm2);\
69 pand_r2r(mm7, mm2);\
70 pshufw_r2r(mm2,mm5,0x00)
71
72 #else
73
74 #define LOAD_FACTOR_8(num) \
75 /* Load factor */ \
76 movd_m2r(ctx->table_v.pixels[scanline].factor_i[num], mm2);\
77 pand_r2r(mm7, mm2);\
78 movq_r2r(mm2, mm5);\
79 psllq_i2r(16, mm5);\
80 por_r2r(mm5, mm2);\
81 movq_r2r(mm2, mm5);\
82 psllq_i2r(32, mm5);\
83 por_r2r(mm2, mm5)
84 #endif
85
86 #define ACCUM_8(num) \
87 /* Load input */ \
88 movq_m2r(*src,mm0);\
89 movq_r2r(mm0,mm1);\
90 punpcklbw_r2r(mm6, mm0); \
91 punpckhbw_r2r(mm6, mm1); \
92 psllw_i2r(7, mm0);\
93 psllw_i2r(7, mm1);\
94 LOAD_FACTOR_8(num); \
95 /* Accumulate mm0 */ \
96 pmulhw_r2r(mm5, mm0);\
97 paddsw_r2r(mm0, mm3);\
98 /* Accumulate mm1 */ \
99 pmulhw_r2r(mm5, mm1);\
100 paddsw_r2r(mm1, mm4)
101
102 #define OUTPUT_8 \
103 psraw_i2r(5, mm3);\
104 psraw_i2r(5, mm4);\
105 packuswb_r2r(mm4, mm3);\
106 MOVQ_R2M(mm3, *dst)
107
108 #define ACCUM_C_8(num) \
109 tmp += ctx->table_v.pixels[scanline].factor_i[num] * *src
110
111 #define OUTPUT_C_8 \
112 tmp >>= 14; \
113 *dst = (uint8_t)((tmp & ~0xFF)?((-tmp) >> 63) : tmp);
114
115 /* scale_uint8_x_1_y_bicubic_mmx */
116
117 #define FUNC_NAME scale_uint8_x_1_y_bicubic_mmx
118 #define WIDTH_MUL 1
119 #define BITS 8
120 #define NUM_TAPS 4
121
122 #include "scale_y.h"
123
124 /* scale_uint8_x_2_y_bicubic_mmx */
125
126 #define FUNC_NAME scale_uint8_x_2_y_bicubic_mmx
127 #define WIDTH_MUL 2
128 #define BITS 8
129 #define NUM_TAPS 4
130
131 #include "scale_y.h"
132
133 /* scale_uint8_x_3_y_bicubic_mmx */
134
135 #define FUNC_NAME scale_uint8_x_3_y_bicubic_mmx
136 #define WIDTH_MUL 3
137 #define BITS 8
138 #define NUM_TAPS 4
139
140 #include "scale_y.h"
141
142 /* scale_uint8_x_4_y_bicubic_mmx */
143
144 #define FUNC_NAME scale_uint8_x_4_y_bicubic_mmx
145 #define WIDTH_MUL 4
146 #define BITS 8
147 #define NUM_TAPS 4
148
149 #include "scale_y.h"
150
151
152 /* scale_uint8_x_1_y_quadratic_mmx */
153
154 #define FUNC_NAME scale_uint8_x_1_y_quadratic_mmx
155 #define WIDTH_MUL 1
156 #define BITS 8
157 #define NUM_TAPS 3
158
159 #include "scale_y.h"
160
161 /* scale_uint8_x_2_y_quadratic_mmx */
162
163 #define FUNC_NAME scale_uint8_x_2_y_quadratic_mmx
164 #define WIDTH_MUL 2
165 #define BITS 8
166 #define NUM_TAPS 3
167
168 #include "scale_y.h"
169
170 /* scale_uint8_x_3_y_quadratic_mmx */
171
172 #define FUNC_NAME scale_uint8_x_3_y_quadratic_mmx
173 #define WIDTH_MUL 3
174 #define BITS 8
175 #define NUM_TAPS 3
176
177 #include "scale_y.h"
178
179 /* scale_uint8_x_4_y_quadratic_mmx */
180
181 #define FUNC_NAME scale_uint8_x_4_y_quadratic_mmx
182 #define WIDTH_MUL 4
183 #define BITS 8
184 #define NUM_TAPS 3
185
186 #include "scale_y.h"
187
188 /* scale_uint8_x_1_y_generic_mmx */
189
190 #define FUNC_NAME scale_uint8_x_1_y_generic_mmx
191 #define WIDTH_MUL 1
192 #define BITS 8
193 #define NUM_TAPS -1
194
195 #include "scale_y.h"
196
197 /* scale_uint8_x_2_y_generic_mmx */
198
199 #define FUNC_NAME scale_uint8_x_2_y_generic_mmx
200 #define WIDTH_MUL 2
201 #define BITS 8
202 #define NUM_TAPS -1
203
204 #include "scale_y.h"
205
206 /* scale_uint8_x_4_y_generic_mmx */
207
208 #define FUNC_NAME scale_uint8_x_4_y_generic_mmx
209 #define WIDTH_MUL 4
210 #define BITS 8
211 #define NUM_TAPS -1
212
213 #include "scale_y.h"
214
215 /* scale_uint8_x_3_y_generic_mmx */
216
217 #define FUNC_NAME scale_uint8_x_3_y_generic_mmx
218 #define WIDTH_MUL 3
219 #define BITS 8
220 #define NUM_TAPS -1
221
222 #include "scale_y.h"
223
224 #ifdef MMXEXT
gavl_init_scale_funcs_quadratic_y_mmxext(gavl_scale_funcs_t * tab,int src_advance,int dst_advance)225 void gavl_init_scale_funcs_quadratic_y_mmxext(gavl_scale_funcs_t * tab,
226 int src_advance, int dst_advance)
227 #else
228 void gavl_init_scale_funcs_quadratic_y_mmx(gavl_scale_funcs_t * tab,
229 int src_advance, int dst_advance)
230 #endif
231 {
232 if((src_advance == 1) && (dst_advance == 1))
233 {
234 tab->funcs_y.scale_uint8_x_1_noadvance = scale_uint8_x_1_y_quadratic_mmx;
235 tab->funcs_y.bits_uint8_noadvance = 14;
236 }
237 else if((src_advance == 3) && (dst_advance == 3))
238 {
239 tab->funcs_y.scale_uint8_x_3 = scale_uint8_x_3_y_quadratic_mmx;
240 tab->funcs_y.bits_uint8_noadvance = 14;
241 }
242 else if((src_advance == 4) && (dst_advance == 4))
243 {
244 tab->funcs_y.scale_uint8_x_3 = scale_uint8_x_4_y_quadratic_mmx;
245 tab->funcs_y.scale_uint8_x_4 = scale_uint8_x_4_y_quadratic_mmx;
246 tab->funcs_y.bits_uint8_noadvance = 14;
247 }
248 else if((src_advance == 2) && (dst_advance == 2))
249 {
250 tab->funcs_y.scale_uint8_x_2 = scale_uint8_x_2_y_quadratic_mmx;
251 tab->funcs_y.bits_uint8_noadvance = 14;
252 }
253 }
254
255 #ifdef MMXEXT
gavl_init_scale_funcs_bicubic_y_mmxext(gavl_scale_funcs_t * tab,int src_advance,int dst_advance)256 void gavl_init_scale_funcs_bicubic_y_mmxext(gavl_scale_funcs_t * tab,
257 int src_advance, int dst_advance)
258 #else
259 void gavl_init_scale_funcs_bicubic_y_mmx(gavl_scale_funcs_t * tab,
260 int src_advance, int dst_advance)
261 #endif
262 {
263 if((src_advance == 1) && (dst_advance == 1))
264 {
265 tab->funcs_y.scale_uint8_x_1_noadvance = scale_uint8_x_1_y_bicubic_mmx;
266 tab->funcs_y.bits_uint8_noadvance = 14;
267 }
268 else if((src_advance == 3) && (dst_advance == 3))
269 {
270 tab->funcs_y.scale_uint8_x_3 = scale_uint8_x_3_y_bicubic_mmx;
271 tab->funcs_y.bits_uint8_noadvance = 14;
272 }
273 else if((src_advance == 4) && (dst_advance == 4))
274 {
275 tab->funcs_y.scale_uint8_x_3 = scale_uint8_x_4_y_bicubic_mmx;
276 tab->funcs_y.scale_uint8_x_4 = scale_uint8_x_4_y_bicubic_mmx;
277 tab->funcs_y.bits_uint8_noadvance = 14;
278 }
279 else if((src_advance == 2) && (dst_advance == 2))
280 {
281 tab->funcs_y.scale_uint8_x_2 = scale_uint8_x_2_y_bicubic_mmx;
282 tab->funcs_y.bits_uint8_noadvance = 14;
283 }
284 }
285
286 #ifdef MMXEXT
gavl_init_scale_funcs_generic_y_mmxext(gavl_scale_funcs_t * tab,int src_advance,int dst_advance)287 void gavl_init_scale_funcs_generic_y_mmxext(gavl_scale_funcs_t * tab,
288 int src_advance, int dst_advance)
289 #else
290 void gavl_init_scale_funcs_generic_y_mmx(gavl_scale_funcs_t * tab,
291 int src_advance, int dst_advance)
292 #endif
293 {
294 if((src_advance == 1) && (dst_advance == 1))
295 {
296 tab->funcs_y.scale_uint8_x_1_noadvance = scale_uint8_x_1_y_generic_mmx;
297 tab->funcs_y.bits_uint8_noadvance = 14;
298 }
299 else if((src_advance == 3) && (dst_advance == 3))
300 {
301 tab->funcs_y.scale_uint8_x_3 = scale_uint8_x_3_y_generic_mmx;
302 tab->funcs_y.bits_uint8_noadvance = 14;
303 }
304 else if((src_advance == 2) && (dst_advance == 2))
305 {
306 tab->funcs_y.scale_uint8_x_2 = scale_uint8_x_2_y_generic_mmx;
307 tab->funcs_y.bits_uint8_noadvance = 14;
308 }
309 else if((src_advance == 4) && (dst_advance == 4))
310 {
311 tab->funcs_y.scale_uint8_x_3 = scale_uint8_x_4_y_generic_mmx;
312 tab->funcs_y.scale_uint8_x_4 = scale_uint8_x_4_y_generic_mmx;
313 tab->funcs_y.bits_uint8_noadvance = 14;
314 }
315
316 }
317
318 /* scale_uint8_x_1_y_bilinear_mmx */
319
320 #define FUNC_NAME scale_uint8_x_1_y_bilinear_mmx
321 #define WIDTH_MUL 1
322 #define BITS 8
323
324 #include "scale_y_linear.h"
325
326 /* scale_uint8_x_2_y_bilinear_mmx */
327
328 #define FUNC_NAME scale_uint8_x_2_y_bilinear_mmx
329 #define WIDTH_MUL 2
330 #define BITS 8
331
332 #include "scale_y_linear.h"
333
334 /* scale_uint8_x_4_y_bilinear_mmx */
335
336 #define FUNC_NAME scale_uint8_x_4_y_bilinear_mmx
337 #define WIDTH_MUL 4
338 #define BITS 8
339
340 #include "scale_y_linear.h"
341
342 /* scale_uint8_x_3_y_bilinear_mmx */
343
344 #define FUNC_NAME scale_uint8_x_3_y_bilinear_mmx
345 #define WIDTH_MUL 3
346 #define BITS 8
347
348 #include "scale_y_linear.h"
349
350 #ifdef MMXEXT
gavl_init_scale_funcs_bilinear_y_mmxext(gavl_scale_funcs_t * tab,int src_advance,int dst_advance)351 void gavl_init_scale_funcs_bilinear_y_mmxext(gavl_scale_funcs_t * tab,
352 int src_advance, int dst_advance)
353 #else
354 void gavl_init_scale_funcs_bilinear_y_mmx(gavl_scale_funcs_t * tab,
355 int src_advance, int dst_advance)
356 #endif
357 {
358 if((src_advance == 1) && (dst_advance == 1))
359 {
360 tab->funcs_y.scale_uint8_x_1_noadvance = scale_uint8_x_1_y_bilinear_mmx;
361 tab->funcs_y.bits_uint8_noadvance = 14;
362 }
363 else if((src_advance == 3) && (dst_advance == 3))
364 {
365 tab->funcs_y.scale_uint8_x_3 = scale_uint8_x_3_y_bilinear_mmx;
366 tab->funcs_y.bits_uint8_noadvance = 14;
367 }
368 else if((src_advance == 2) && (dst_advance == 2))
369 {
370 tab->funcs_y.scale_uint8_x_2 = scale_uint8_x_2_y_bilinear_mmx;
371 tab->funcs_y.bits_uint8_noadvance = 14;
372 }
373 else if((src_advance == 4) && (dst_advance == 4))
374 {
375 tab->funcs_y.scale_uint8_x_3 = scale_uint8_x_4_y_bilinear_mmx;
376 tab->funcs_y.scale_uint8_x_4 = scale_uint8_x_4_y_bilinear_mmx;
377 tab->funcs_y.bits_uint8_noadvance = 14;
378 }
379
380 }
381