1 /*****************************************************************************
2 *
3 * XVID MPEG-4 VIDEO CODEC
4 * - Native API implementation -
5 *
6 * Copyright(C) 2001-2014 Peter Ross <pross@xvid.org>
7 * 2002-2014 Michael Militzer <isibaar@xvid.org>
8 *
9 * This program is free software ; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation ; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY ; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program ; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 *
23 * $Id: xvid.c 2190 2019-12-28 09:56:26Z Isibaar $
24 *
25 ****************************************************************************/
26
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <time.h>
31
32 #if !defined(_WIN32)
33 #include <unistd.h>
34 #endif
35
36 #if defined(__APPLE__) && defined(__MACH__) && !defined(_SC_NPROCESSORS_CONF)
37 #include <sys/types.h>
38 #include <sys/sysctl.h>
39 #ifdef MAX
40 #undef MAX
41 #endif
42 #ifdef MIN
43 #undef MIN
44 #endif
45 #endif
46
47 #if defined(__amigaos4__)
48 #include <exec/exec.h>
49 #include <proto/exec.h>
50 #endif
51
52 #include "xvid.h"
53 #include "decoder.h"
54 #include "encoder.h"
55 #include "bitstream/cbp.h"
56 #include "dct/idct.h"
57 #include "dct/fdct.h"
58 #include "image/colorspace.h"
59 #include "image/interpolate8x8.h"
60 #include "utils/mem_transfer.h"
61 #include "utils/mbfunctions.h"
62 #include "quant/quant.h"
63 #include "motion/motion.h"
64 #include "motion/gmc.h"
65 #include "motion/sad.h"
66 #include "utils/emms.h"
67 #include "utils/timer.h"
68 #include "bitstream/mbcoding.h"
69 #include "image/qpel.h"
70 #include "image/postprocessing.h"
71
72 #if defined(_DEBUG)
73 unsigned int xvid_debug = 0; /* xvid debug mask */
74 #endif
75
76 #if (defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64)) && defined(_MSC_VER)
77 # include <windows.h>
78 #elif defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64) || (defined(ARCH_IS_PPC) && !defined(__amigaos4__))
79 # include <signal.h>
80 # include <setjmp.h>
81
82 static jmp_buf mark;
83
84 static void
sigill_handler(int signal)85 sigill_handler(int signal)
86 {
87 longjmp(mark, 1);
88 }
89 #endif
90
91 /*
92 * Calls the funcptr, and returns whether SIGILL (illegal instruction) was
93 * signalled
94 *
95 * Return values:
96 * -1 : could not determine
97 * 0 : SIGILL was *not* signalled
98 * 1 : SIGILL was signalled
99 */
100 #if (defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64)) && defined(_MSC_VER)
101 static int
sigill_check(void (* func)())102 sigill_check(void (*func)())
103 {
104 _try {
105 func();
106 } _except(EXCEPTION_EXECUTE_HANDLER) {
107
108 if (_exception_code() == STATUS_ILLEGAL_INSTRUCTION)
109 return(1);
110 }
111 return(0);
112 }
113 #elif defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64) || (defined(ARCH_IS_PPC) && !defined(__amigaos4__))
114 static int
sigill_check(void (* func)())115 sigill_check(void (*func)())
116 {
117 void *old_handler;
118 int jmpret;
119
120 /* Set our SIGILL handler */
121 old_handler = signal(SIGILL, sigill_handler);
122
123 /* Check for error */
124 if (old_handler == SIG_ERR) {
125 return(-1);
126 }
127
128 /* Save stack context, so if func triggers a SIGILL, we can still roll
129 * back to a valid CPU state */
130 jmpret = setjmp(mark);
131
132 /* If setjmp returned directly, then its returned value is 0, and we still
133 * have to test the passed func. Otherwise it means the stack context has
134 * been restored by a longjmp() call, which in our case happens only in the
135 * signal handler */
136 if (jmpret == 0) {
137 func();
138 }
139
140 /* Restore old signal handler */
141 signal(SIGILL, old_handler);
142
143 return(jmpret);
144 }
145 #endif
146
147
148 /* detect cpu flags */
149 static unsigned int
detect_cpu_flags(void)150 detect_cpu_flags(void)
151 {
152 /* enable native assembly optimizations by default */
153 unsigned int cpu_flags = XVID_CPU_ASM;
154
155 #if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64)
156 cpu_flags |= check_cpu_features();
157 if ((cpu_flags & XVID_CPU_SSE) && sigill_check(sse_os_trigger))
158 cpu_flags &= ~XVID_CPU_SSE;
159
160 if ((cpu_flags & (XVID_CPU_SSE2|XVID_CPU_SSE3|XVID_CPU_SSE41)) && sigill_check(sse2_os_trigger))
161 cpu_flags &= ~(XVID_CPU_SSE2|XVID_CPU_SSE3|XVID_CPU_SSE41);
162 #endif
163
164 #if defined(ARCH_IS_PPC)
165 #if defined(__amigaos4__)
166 {
167 uint32_t vector_unit = VECTORTYPE_NONE;
168 IExec->GetCPUInfoTags(GCIT_VectorUnit, &vector_unit, TAG_END);
169 if (vector_unit == VECTORTYPE_ALTIVEC) {
170 cpu_flags |= XVID_CPU_ALTIVEC;
171 }
172 }
173 #else
174 if (!sigill_check(altivec_trigger))
175 cpu_flags |= XVID_CPU_ALTIVEC;
176 #endif
177 #endif
178
179 return cpu_flags;
180 }
181
182
183 /*****************************************************************************
184 * Xvid Init Entry point
185 *
186 * Well this function initialize all internal function pointers according
187 * to the CPU features forced by the library client or autodetected (depending
188 * on the XVID_CPU_FORCE flag). It also initializes vlc coding tables and all
189 * image colorspace transformation tables.
190 *
191 * Returned value : XVID_ERR_OK
192 * + API_VERSION in the input XVID_INIT_PARAM structure
193 * + core build " " " " "
194 *
195 ****************************************************************************/
196
197
198 static
xvid_gbl_init(xvid_gbl_init_t * init)199 int xvid_gbl_init(xvid_gbl_init_t * init)
200 {
201 unsigned int cpu_flags;
202
203 if (XVID_VERSION_MAJOR(init->version) != 1) /* v1.x.x */
204 return XVID_ERR_VERSION;
205
206 cpu_flags = (init->cpu_flags & XVID_CPU_FORCE) ? init->cpu_flags : detect_cpu_flags();
207
208 /* Initialize the function pointers */
209 init_vlc_tables();
210
211 /* Fixed Point Forward/Inverse DCT transformations */
212 fdct = fdct_int32;
213 idct = idct_int32;
214
215 /* Only needed on PPC Altivec archs */
216 sadInit = NULL;
217
218 /* Restore FPU context : emms_c is a nop functions */
219 emms = emms_c;
220
221 /* Qpel stuff */
222 xvid_QP_Funcs = &xvid_QP_Funcs_C;
223 xvid_QP_Add_Funcs = &xvid_QP_Add_Funcs_C;
224 xvid_Init_QP();
225
226 /* Quantization functions */
227 quant_h263_intra = quant_h263_intra_c;
228 quant_h263_inter = quant_h263_inter_c;
229 dequant_h263_intra = dequant_h263_intra_c;
230 dequant_h263_inter = dequant_h263_inter_c;
231
232 quant_mpeg_intra = quant_mpeg_intra_c;
233 quant_mpeg_inter = quant_mpeg_inter_c;
234 dequant_mpeg_intra = dequant_mpeg_intra_c;
235 dequant_mpeg_inter = dequant_mpeg_inter_c;
236
237 /* Block transfer related functions */
238 transfer_8to16copy = transfer_8to16copy_c;
239 transfer_16to8copy = transfer_16to8copy_c;
240 transfer_8to16sub = transfer_8to16sub_c;
241 transfer_8to16subro = transfer_8to16subro_c;
242 transfer_8to16sub2 = transfer_8to16sub2_c;
243 transfer_8to16sub2ro = transfer_8to16sub2ro_c;
244 transfer_16to8add = transfer_16to8add_c;
245 transfer8x8_copy = transfer8x8_copy_c;
246 transfer8x4_copy = transfer8x4_copy_c;
247
248 /* Interlacing functions */
249 MBFieldTest = MBFieldTest_c;
250
251 /* Image interpolation related functions */
252 interpolate8x8_halfpel_h = interpolate8x8_halfpel_h_c;
253 interpolate8x8_halfpel_v = interpolate8x8_halfpel_v_c;
254 interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_c;
255
256 interpolate8x4_halfpel_h = interpolate8x4_halfpel_h_c;
257 interpolate8x4_halfpel_v = interpolate8x4_halfpel_v_c;
258 interpolate8x4_halfpel_hv = interpolate8x4_halfpel_hv_c;
259
260 interpolate8x8_halfpel_add = interpolate8x8_halfpel_add_c;
261 interpolate8x8_halfpel_h_add = interpolate8x8_halfpel_h_add_c;
262 interpolate8x8_halfpel_v_add = interpolate8x8_halfpel_v_add_c;
263 interpolate8x8_halfpel_hv_add = interpolate8x8_halfpel_hv_add_c;
264
265 interpolate16x16_lowpass_h = interpolate16x16_lowpass_h_c;
266 interpolate16x16_lowpass_v = interpolate16x16_lowpass_v_c;
267 interpolate16x16_lowpass_hv = interpolate16x16_lowpass_hv_c;
268
269 interpolate8x8_lowpass_h = interpolate8x8_lowpass_h_c;
270 interpolate8x8_lowpass_v = interpolate8x8_lowpass_v_c;
271 interpolate8x8_lowpass_hv = interpolate8x8_lowpass_hv_c;
272
273 interpolate8x8_6tap_lowpass_h = interpolate8x8_6tap_lowpass_h_c;
274 interpolate8x8_6tap_lowpass_v = interpolate8x8_6tap_lowpass_v_c;
275
276 interpolate8x8_avg2 = interpolate8x8_avg2_c;
277 interpolate8x8_avg4 = interpolate8x8_avg4_c;
278
279 /* postprocessing */
280 image_brightness = image_brightness_c;
281
282 /* Initialize internal colorspace transformation tables */
283 colorspace_init();
284
285 /* All colorspace transformation functions User Format->YV12 */
286 yv12_to_yv12 = yv12_to_yv12_c;
287 rgb555_to_yv12 = rgb555_to_yv12_c;
288 rgb565_to_yv12 = rgb565_to_yv12_c;
289 rgb_to_yv12 = rgb_to_yv12_c;
290 bgr_to_yv12 = bgr_to_yv12_c;
291 bgra_to_yv12 = bgra_to_yv12_c;
292 abgr_to_yv12 = abgr_to_yv12_c;
293 rgba_to_yv12 = rgba_to_yv12_c;
294 argb_to_yv12 = argb_to_yv12_c;
295 yuyv_to_yv12 = yuyv_to_yv12_c;
296 uyvy_to_yv12 = uyvy_to_yv12_c;
297
298 rgb555i_to_yv12 = rgb555i_to_yv12_c;
299 rgb565i_to_yv12 = rgb565i_to_yv12_c;
300 bgri_to_yv12 = bgri_to_yv12_c;
301 bgrai_to_yv12 = bgrai_to_yv12_c;
302 abgri_to_yv12 = abgri_to_yv12_c;
303 rgbai_to_yv12 = rgbai_to_yv12_c;
304 argbi_to_yv12 = argbi_to_yv12_c;
305 yuyvi_to_yv12 = yuyvi_to_yv12_c;
306 uyvyi_to_yv12 = uyvyi_to_yv12_c;
307
308 /* All colorspace transformation functions YV12->User format */
309 yv12_to_rgb555 = yv12_to_rgb555_c;
310 yv12_to_rgb565 = yv12_to_rgb565_c;
311 yv12_to_rgb = yv12_to_rgb_c;
312 yv12_to_bgr = yv12_to_bgr_c;
313 yv12_to_bgra = yv12_to_bgra_c;
314 yv12_to_abgr = yv12_to_abgr_c;
315 yv12_to_rgba = yv12_to_rgba_c;
316 yv12_to_argb = yv12_to_argb_c;
317 yv12_to_yuyv = yv12_to_yuyv_c;
318 yv12_to_uyvy = yv12_to_uyvy_c;
319
320 yv12_to_rgb555i = yv12_to_rgb555i_c;
321 yv12_to_rgb565i = yv12_to_rgb565i_c;
322 yv12_to_bgri = yv12_to_bgri_c;
323 yv12_to_bgrai = yv12_to_bgrai_c;
324 yv12_to_abgri = yv12_to_abgri_c;
325 yv12_to_rgbai = yv12_to_rgbai_c;
326 yv12_to_argbi = yv12_to_argbi_c;
327 yv12_to_yuyvi = yv12_to_yuyvi_c;
328 yv12_to_uyvyi = yv12_to_uyvyi_c;
329
330 /* Functions used in motion estimation algorithms */
331 calc_cbp = calc_cbp_c;
332 sad16 = sad16_c;
333 sad8 = sad8_c;
334 sad16bi = sad16bi_c;
335 sad8bi = sad8bi_c;
336 dev16 = dev16_c;
337 sad16v = sad16v_c;
338 sse8_16bit = sse8_16bit_c;
339 sse8_8bit = sse8_8bit_c;
340
341 sseh8_16bit = sseh8_16bit_c;
342 coeff8_energy = coeff8_energy_c;
343 blocksum8 = blocksum8_c;
344
345 init_GMC(cpu_flags);
346
347 #if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64)
348
349 if ((cpu_flags & XVID_CPU_MMX) || (cpu_flags & XVID_CPU_MMXEXT) ||
350 (cpu_flags & XVID_CPU_3DNOW) || (cpu_flags & XVID_CPU_3DNOWEXT) ||
351 (cpu_flags & XVID_CPU_SSE) || (cpu_flags & XVID_CPU_SSE2) ||
352 (cpu_flags & XVID_CPU_SSE3) || (cpu_flags & XVID_CPU_SSE41))
353 {
354 /* Restore FPU context : emms_c is a nop functions */
355 emms = emms_mmx;
356 }
357
358 if ((cpu_flags & XVID_CPU_MMX)) {
359
360 /* Forward and Inverse Discrete Cosine Transformation functions */
361 fdct = fdct_mmx_skal;
362 idct = idct_mmx;
363
364 /* Qpel stuff */
365 xvid_QP_Funcs = &xvid_QP_Funcs_mmx;
366 xvid_QP_Add_Funcs = &xvid_QP_Add_Funcs_mmx;
367
368 /* Quantization related functions */
369 quant_h263_intra = quant_h263_intra_mmx;
370 quant_h263_inter = quant_h263_inter_mmx;
371 dequant_h263_intra = dequant_h263_intra_mmx;
372 dequant_h263_inter = dequant_h263_inter_mmx;
373 quant_mpeg_intra = quant_mpeg_intra_mmx;
374 quant_mpeg_inter = quant_mpeg_inter_mmx;
375 dequant_mpeg_intra = dequant_mpeg_intra_mmx;
376 dequant_mpeg_inter = dequant_mpeg_inter_mmx;
377
378
379 /* Block related functions */
380 transfer_8to16copy = transfer_8to16copy_mmx;
381 transfer_16to8copy = transfer_16to8copy_mmx;
382 transfer_8to16sub = transfer_8to16sub_mmx;
383 transfer_8to16subro = transfer_8to16subro_mmx;
384 transfer_8to16sub2 = transfer_8to16sub2_mmx;
385 transfer_16to8add = transfer_16to8add_mmx;
386 transfer8x8_copy = transfer8x8_copy_mmx;
387 transfer8x4_copy = transfer8x4_copy_mmx;
388
389 /* Interlacing Functions */
390 MBFieldTest = MBFieldTest_mmx;
391
392 /* Image Interpolation related functions */
393 interpolate8x8_halfpel_h = interpolate8x8_halfpel_h_mmx;
394 interpolate8x8_halfpel_v = interpolate8x8_halfpel_v_mmx;
395 interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_mmx;
396
397 interpolate8x4_halfpel_h = interpolate8x4_halfpel_h_mmx;
398 interpolate8x4_halfpel_v = interpolate8x4_halfpel_v_mmx;
399 interpolate8x4_halfpel_hv = interpolate8x4_halfpel_hv_mmx;
400
401 interpolate8x8_halfpel_add = interpolate8x8_halfpel_add_mmx;
402 interpolate8x8_halfpel_h_add = interpolate8x8_halfpel_h_add_mmx;
403 interpolate8x8_halfpel_v_add = interpolate8x8_halfpel_v_add_mmx;
404 interpolate8x8_halfpel_hv_add = interpolate8x8_halfpel_hv_add_mmx;
405
406 interpolate8x8_6tap_lowpass_h = interpolate8x8_6tap_lowpass_h_mmx;
407 interpolate8x8_6tap_lowpass_v = interpolate8x8_6tap_lowpass_v_mmx;
408
409 interpolate8x8_avg2 = interpolate8x8_avg2_mmx;
410 interpolate8x8_avg4 = interpolate8x8_avg4_mmx;
411
412 /* postprocessing */
413 image_brightness = image_brightness_mmx;
414
415 /* image input xxx_to_yv12 related functions */
416
417 yv12_to_yv12 = yv12_to_yv12_mmx;
418
419 bgr_to_yv12 = bgr_to_yv12_mmx;
420 rgb_to_yv12 = rgb_to_yv12_mmx;
421 bgra_to_yv12 = bgra_to_yv12_mmx;
422 rgba_to_yv12 = rgba_to_yv12_mmx;
423 yuyv_to_yv12 = yuyv_to_yv12_mmx;
424 uyvy_to_yv12 = uyvy_to_yv12_mmx;
425
426 /* image output yv12_to_xxx related functions */
427 yv12_to_bgr = yv12_to_bgr_mmx;
428 yv12_to_bgra = yv12_to_bgra_mmx;
429 yv12_to_yuyv = yv12_to_yuyv_mmx;
430 yv12_to_uyvy = yv12_to_uyvy_mmx;
431
432 yv12_to_yuyvi = yv12_to_yuyvi_mmx;
433 yv12_to_uyvyi = yv12_to_uyvyi_mmx;
434
435 /* Motion estimation related functions */
436 calc_cbp = calc_cbp_mmx;
437 sad16 = sad16_mmx;
438 sad8 = sad8_mmx;
439 sad16bi = sad16bi_mmx;
440 sad8bi = sad8bi_mmx;
441 dev16 = dev16_mmx;
442 sad16v = sad16v_mmx;
443 sse8_16bit = sse8_16bit_mmx;
444 sse8_8bit = sse8_8bit_mmx;
445 }
446
447 /* these 3dnow functions are faster than mmx, but slower than xmm. */
448 if ((cpu_flags & XVID_CPU_3DNOW)) {
449
450 emms = emms_3dn;
451
452 /* ME functions */
453 sad16bi = sad16bi_3dn;
454 sad8bi = sad8bi_3dn;
455
456 yuyv_to_yv12 = yuyv_to_yv12_3dn;
457 uyvy_to_yv12 = uyvy_to_yv12_3dn;
458
459 }
460
461
462 if ((cpu_flags & XVID_CPU_MMXEXT)) {
463
464 /* DCT */
465 fdct = fdct_xmm_skal;
466 idct = idct_xmm;
467
468 /* Interpolation */
469 interpolate8x8_halfpel_h = interpolate8x8_halfpel_h_xmm;
470 interpolate8x8_halfpel_v = interpolate8x8_halfpel_v_xmm;
471 interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_xmm;
472
473 interpolate8x4_halfpel_h = interpolate8x4_halfpel_h_xmm;
474 interpolate8x4_halfpel_v = interpolate8x4_halfpel_v_xmm;
475 interpolate8x4_halfpel_hv = interpolate8x4_halfpel_hv_xmm;
476
477 interpolate8x8_halfpel_add = interpolate8x8_halfpel_add_xmm;
478 interpolate8x8_halfpel_h_add = interpolate8x8_halfpel_h_add_xmm;
479 interpolate8x8_halfpel_v_add = interpolate8x8_halfpel_v_add_xmm;
480 interpolate8x8_halfpel_hv_add = interpolate8x8_halfpel_hv_add_xmm;
481
482 /* Quantization */
483 quant_mpeg_inter = quant_mpeg_inter_xmm;
484
485 dequant_h263_intra = dequant_h263_intra_xmm;
486 dequant_h263_inter = dequant_h263_inter_xmm;
487
488 /* Buffer transfer */
489 transfer_8to16sub2 = transfer_8to16sub2_xmm;
490 transfer_8to16sub2ro = transfer_8to16sub2ro_xmm;
491
492 /* Colorspace transformation */
493 /* yv12_to_yv12 = yv12_to_yv12_xmm; */ /* appears to be slow on many machines */
494 yuyv_to_yv12 = yuyv_to_yv12_xmm;
495 uyvy_to_yv12 = uyvy_to_yv12_xmm;
496
497 /* ME functions */
498 sad16 = sad16_xmm;
499 sad8 = sad8_xmm;
500 sad16bi = sad16bi_xmm;
501 sad8bi = sad8bi_xmm;
502 dev16 = dev16_xmm;
503 sad16v = sad16v_xmm;
504 }
505
506 if ((cpu_flags & XVID_CPU_3DNOW)) {
507
508 /* Interpolation */
509 interpolate8x8_halfpel_h = interpolate8x8_halfpel_h_3dn;
510 interpolate8x8_halfpel_v = interpolate8x8_halfpel_v_3dn;
511 interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_3dn;
512
513 interpolate8x4_halfpel_h = interpolate8x4_halfpel_h_3dn;
514 interpolate8x4_halfpel_v = interpolate8x4_halfpel_v_3dn;
515 interpolate8x4_halfpel_hv = interpolate8x4_halfpel_hv_3dn;
516 }
517
518 if ((cpu_flags & XVID_CPU_3DNOWEXT)) {
519
520 /* Buffer transfer */
521 transfer_8to16copy = transfer_8to16copy_3dne;
522 transfer_16to8copy = transfer_16to8copy_3dne;
523 transfer_8to16sub = transfer_8to16sub_3dne;
524 transfer_8to16subro = transfer_8to16subro_3dne;
525 transfer_16to8add = transfer_16to8add_3dne;
526 transfer8x8_copy = transfer8x8_copy_3dne;
527 transfer8x4_copy = transfer8x4_copy_3dne;
528
529 if ((cpu_flags & XVID_CPU_MMXEXT)) {
530 /* Inverse DCT */
531 idct = idct_3dne;
532
533 /* Buffer transfer */
534 transfer_8to16sub2 = transfer_8to16sub2_3dne;
535
536 /* Interpolation */
537 interpolate8x8_halfpel_h = interpolate8x8_halfpel_h_3dne;
538 interpolate8x8_halfpel_v = interpolate8x8_halfpel_v_3dne;
539 interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_3dne;
540
541 interpolate8x4_halfpel_h = interpolate8x4_halfpel_h_3dne;
542 interpolate8x4_halfpel_v = interpolate8x4_halfpel_v_3dne;
543 interpolate8x4_halfpel_hv = interpolate8x4_halfpel_hv_3dne;
544
545 /* Quantization */
546 quant_h263_intra = quant_h263_intra_3dne; /* cmov only */
547 quant_h263_inter = quant_h263_inter_3dne;
548 dequant_mpeg_intra = dequant_mpeg_intra_3dne; /* cmov only */
549 dequant_mpeg_inter = dequant_mpeg_inter_3dne;
550 dequant_h263_intra = dequant_h263_intra_3dne;
551 dequant_h263_inter = dequant_h263_inter_3dne;
552
553 /* ME functions */
554 sad16 = sad16_3dne;
555 sad8 = sad8_3dne;
556 sad16bi = sad16bi_3dne;
557 sad8bi = sad8bi_3dne;
558 dev16 = dev16_3dne;
559 }
560 }
561
562 if ((cpu_flags & XVID_CPU_SSE2)) {
563
564 calc_cbp = calc_cbp_sse2;
565
566 /* Quantization */
567 quant_h263_intra = quant_h263_intra_sse2;
568 quant_h263_inter = quant_h263_inter_sse2;
569 dequant_h263_intra = dequant_h263_intra_sse2;
570 dequant_h263_inter = dequant_h263_inter_sse2;
571
572 /* SAD operators */
573 sad16 = sad16_sse2;
574 dev16 = dev16_sse2;
575
576 /* PSNR-HVS-M distortion metric */
577 sseh8_16bit = sseh8_16bit_sse2;
578 coeff8_energy = coeff8_energy_sse2;
579 blocksum8 = blocksum8_sse2;
580
581 /* DCT operators */
582 fdct = fdct_sse2_skal;
583 idct = idct_sse2_skal; /* Is now IEEE1180 and Walken compliant. */
584
585 /* postprocessing */
586 image_brightness = image_brightness_sse2;
587
588 }
589
590 if ((cpu_flags & XVID_CPU_SSE3)) {
591
592 /* SAD operators */
593 sad16 = sad16_sse3;
594 dev16 = dev16_sse3;
595 }
596
597 #endif /* ARCH_IS_IA32 */
598
599 #if defined(ARCH_IS_IA64)
600 if ((cpu_flags & XVID_CPU_ASM)) { /* use assembler routines? */
601 idct_ia64_init();
602 fdct = fdct_ia64;
603 idct = idct_ia64; /*not yet working, crashes */
604 interpolate8x8_halfpel_h = interpolate8x8_halfpel_h_ia64;
605 interpolate8x8_halfpel_v = interpolate8x8_halfpel_v_ia64;
606 interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_ia64;
607 sad16 = sad16_ia64;
608 sad16bi = sad16bi_ia64;
609 sad8 = sad8_ia64;
610 dev16 = dev16_ia64;
611 /* Halfpel8_Refine = Halfpel8_Refine_ia64; */
612 quant_h263_intra = quant_h263_intra_ia64;
613 quant_h263_inter = quant_h263_inter_ia64;
614 dequant_h263_intra = dequant_h263_intra_ia64;
615 dequant_h263_inter = dequant_h263_inter_ia64;
616 transfer_8to16copy = transfer_8to16copy_ia64;
617 transfer_16to8copy = transfer_16to8copy_ia64;
618 transfer_8to16sub = transfer_8to16sub_ia64;
619 transfer_8to16sub2 = transfer_8to16sub2_ia64;
620 transfer_16to8add = transfer_16to8add_ia64;
621 transfer8x8_copy = transfer8x8_copy_ia64;
622 }
623 #endif
624
625 #if defined(ARCH_IS_PPC)
626 if ((cpu_flags & XVID_CPU_ALTIVEC)) {
627 /* sad operators */
628 sad16 = sad16_altivec_c;
629 sad16bi = sad16bi_altivec_c;
630 sad8 = sad8_altivec_c;
631 dev16 = dev16_altivec_c;
632
633 sse8_16bit = sse8_16bit_altivec_c;
634
635 /* mem transfer */
636 transfer_8to16copy = transfer_8to16copy_altivec_c;
637 transfer_16to8copy = transfer_16to8copy_altivec_c;
638 transfer_8to16sub = transfer_8to16sub_altivec_c;
639 transfer_8to16subro = transfer_8to16subro_altivec_c;
640 transfer_8to16sub2 = transfer_8to16sub2_altivec_c;
641 transfer_16to8add = transfer_16to8add_altivec_c;
642 transfer8x8_copy = transfer8x8_copy_altivec_c;
643
644 /* Inverse DCT */
645 idct = idct_altivec_c;
646
647 /* Interpolation */
648 interpolate8x8_halfpel_h = interpolate8x8_halfpel_h_altivec_c;
649 interpolate8x8_halfpel_v = interpolate8x8_halfpel_v_altivec_c;
650 interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_altivec_c;
651
652 interpolate8x8_avg2 = interpolate8x8_avg2_altivec_c;
653 interpolate8x8_avg4 = interpolate8x8_avg4_altivec_c;
654
655 interpolate8x8_halfpel_add = interpolate8x8_halfpel_add_altivec_c;
656 interpolate8x8_halfpel_h_add = interpolate8x8_halfpel_h_add_altivec_c;
657 interpolate8x8_halfpel_v_add = interpolate8x8_halfpel_v_add_altivec_c;
658 interpolate8x8_halfpel_hv_add = interpolate8x8_halfpel_hv_add_altivec_c;
659
660 /* Colorspace conversion */
661 bgra_to_yv12 = bgra_to_yv12_altivec_c;
662 abgr_to_yv12 = abgr_to_yv12_altivec_c;
663 rgba_to_yv12 = rgba_to_yv12_altivec_c;
664 argb_to_yv12 = argb_to_yv12_altivec_c;
665
666 yuyv_to_yv12 = yuyv_to_yv12_altivec_c;
667 uyvy_to_yv12 = uyvy_to_yv12_altivec_c;
668
669 yv12_to_yuyv = yv12_to_yuyv_altivec_c;
670 yv12_to_uyvy = yv12_to_uyvy_altivec_c;
671
672 /* Quantization */
673 quant_h263_intra = quant_h263_intra_altivec_c;
674 quant_h263_inter = quant_h263_inter_altivec_c;
675 dequant_h263_intra = dequant_h263_intra_altivec_c;
676 dequant_h263_inter = dequant_h263_inter_altivec_c;
677
678 dequant_mpeg_intra = dequant_mpeg_intra_altivec_c;
679 dequant_mpeg_inter = dequant_mpeg_inter_altivec_c;
680
681 /* Qpel stuff */
682 xvid_QP_Funcs = &xvid_QP_Funcs_Altivec_C;
683 xvid_QP_Add_Funcs = &xvid_QP_Add_Funcs_Altivec_C;
684 }
685 #endif
686
687 #if defined(_DEBUG)
688 xvid_debug = init->debug;
689 #endif
690
691 return(0);
692 }
693
694
695 static int
xvid_gbl_info(xvid_gbl_info_t * info)696 xvid_gbl_info(xvid_gbl_info_t * info)
697 {
698 if (XVID_VERSION_MAJOR(info->version) != 1) /* v1.x.x */
699 return XVID_ERR_VERSION;
700
701 info->actual_version = XVID_VERSION;
702 info->build = "xvid-1.3.7";
703 info->cpu_flags = detect_cpu_flags();
704 info->num_threads = 0; /* single-thread */
705
706 #if defined(_WIN32)
707
708 {
709 SYSTEM_INFO siSysInfo;
710 GetSystemInfo(&siSysInfo);
711 info->num_threads = siSysInfo.dwNumberOfProcessors; /* number of _logical_ cores */
712 }
713
714 #elif defined(_SC_NPROCESSORS_CONF) /* should be available on Apple too actually */
715
716 info->num_threads = sysconf(_SC_NPROCESSORS_CONF);
717
718 #elif defined(__APPLE__) && defined(__MACH__)
719
720 {
721 size_t len;
722 int mib[2], ncpu;
723
724 mib[0] = CTL_HW;
725 mib[1] = HW_NCPU;
726 len = sizeof(ncpu);
727 if (sysctl(mib, 2, &ncpu, &len, NULL, 0) == 0)
728 info -> num_threads = ncpu;
729 else
730 info -> num_threads = 1;
731 }
732
733 #elif defined(__amigaos4__)
734
735 {
736 uint32_t num_threads = 1;
737 IExec->GetCPUInfoTags(GCIT_NumberOfCPUs, &num_threads, TAG_END);
738 info->num_threads = num_threads;
739 }
740
741 #endif
742
743 return 0;
744 }
745
746
747 static int
xvid_gbl_convert(xvid_gbl_convert_t * convert)748 xvid_gbl_convert(xvid_gbl_convert_t* convert)
749 {
750 int width;
751 int height;
752 int width2;
753 int height2;
754 IMAGE img;
755
756 if (XVID_VERSION_MAJOR(convert->version) != 1) /* v1.x.x */
757 return XVID_ERR_VERSION;
758
759 #if 0
760 const int flip1 = (convert->input.colorspace & XVID_CSP_VFLIP) ^ (convert->output.colorspace & XVID_CSP_VFLIP);
761 #endif
762 width = convert->width;
763 height = convert->height;
764 width2 = convert->width/2;
765 height2 = convert->height/2;
766
767 switch (convert->input.csp & ~XVID_CSP_VFLIP)
768 {
769 case XVID_CSP_YV12 :
770 img.y = convert->input.plane[0];
771 img.v = (uint8_t*)convert->input.plane[0] + convert->input.stride[0]*height;
772 img.u = (uint8_t*)convert->input.plane[0] + convert->input.stride[0]*height + (convert->input.stride[0]/2)*height2;
773 image_output(&img, width, height, width,
774 (uint8_t**)convert->output.plane, convert->output.stride,
775 convert->output.csp, convert->interlacing);
776 break;
777
778 case XVID_CSP_INTERNAL :
779 img.y = (uint8_t*)convert->input.plane[0];
780 img.u = (uint8_t*)convert->input.plane[1];
781 img.v = (uint8_t*)convert->input.plane[2];
782 image_output(&img, width, height, convert->input.stride[0],
783 (uint8_t**)convert->output.plane, convert->output.stride,
784 convert->output.csp, convert->interlacing);
785 break;
786
787 default :
788 return XVID_ERR_FORMAT;
789 }
790
791
792 emms();
793 return 0;
794 }
795
796 /*****************************************************************************
797 * Xvid Global Entry point
798 *
799 * Well this function initialize all internal function pointers according
800 * to the CPU features forced by the library client or autodetected (depending
801 * on the XVID_CPU_FORCE flag). It also initializes vlc coding tables and all
802 * image colorspace transformation tables.
803 *
804 ****************************************************************************/
805
806
807 int
xvid_global(void * handle,int opt,void * param1,void * param2)808 xvid_global(void *handle,
809 int opt,
810 void *param1,
811 void *param2)
812 {
813 switch(opt)
814 {
815 case XVID_GBL_INIT :
816 return xvid_gbl_init((xvid_gbl_init_t*)param1);
817
818 case XVID_GBL_INFO :
819 return xvid_gbl_info((xvid_gbl_info_t*)param1);
820
821 case XVID_GBL_CONVERT :
822 return xvid_gbl_convert((xvid_gbl_convert_t*)param1);
823
824 default :
825 return XVID_ERR_FAIL;
826 }
827 }
828
829 /*****************************************************************************
830 * Xvid Native decoder entry point
831 *
832 * This function is just a wrapper to all the option cases.
833 *
834 * Returned values : XVID_ERR_FAIL when opt is invalid
835 * else returns the wrapped function result
836 *
837 ****************************************************************************/
838
839 int
xvid_decore(void * handle,int opt,void * param1,void * param2)840 xvid_decore(void *handle,
841 int opt,
842 void *param1,
843 void *param2)
844 {
845 switch (opt) {
846 case XVID_DEC_CREATE:
847 return decoder_create((xvid_dec_create_t *) param1);
848
849 case XVID_DEC_DESTROY:
850 return decoder_destroy((DECODER *) handle);
851
852 case XVID_DEC_DECODE:
853 return decoder_decode((DECODER *) handle, (xvid_dec_frame_t *) param1, (xvid_dec_stats_t*) param2);
854
855 default:
856 return XVID_ERR_FAIL;
857 }
858 }
859
860
861 /*****************************************************************************
862 * Xvid Native encoder entry point
863 *
864 * This function is just a wrapper to all the option cases.
865 *
866 * Returned values : XVID_ERR_FAIL when opt is invalid
867 * else returns the wrapped function result
868 *
869 ****************************************************************************/
870
871 int
xvid_encore(void * handle,int opt,void * param1,void * param2)872 xvid_encore(void *handle,
873 int opt,
874 void *param1,
875 void *param2)
876 {
877 switch (opt) {
878 case XVID_ENC_ENCODE:
879
880 return enc_encode((Encoder *) handle,
881 (xvid_enc_frame_t *) param1,
882 (xvid_enc_stats_t *) param2);
883
884 case XVID_ENC_CREATE:
885 return enc_create((xvid_enc_create_t *) param1);
886
887 case XVID_ENC_DESTROY:
888 return enc_destroy((Encoder *) handle);
889
890 default:
891 return XVID_ERR_FAIL;
892 }
893 }
894