1
2 /* autogenerated from videomixerorc.orc */
3
4 #ifdef HAVE_CONFIG_H
5 #include "config.h"
6 #endif
7 #include <glib.h>
8
9 #ifndef _ORC_INTEGER_TYPEDEFS_
10 #define _ORC_INTEGER_TYPEDEFS_
11 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
12 #include <stdint.h>
13 typedef int8_t orc_int8;
14 typedef int16_t orc_int16;
15 typedef int32_t orc_int32;
16 typedef int64_t orc_int64;
17 typedef uint8_t orc_uint8;
18 typedef uint16_t orc_uint16;
19 typedef uint32_t orc_uint32;
20 typedef uint64_t orc_uint64;
21 #define ORC_UINT64_C(x) UINT64_C(x)
22 #elif defined(_MSC_VER)
23 typedef signed __int8 orc_int8;
24 typedef signed __int16 orc_int16;
25 typedef signed __int32 orc_int32;
26 typedef signed __int64 orc_int64;
27 typedef unsigned __int8 orc_uint8;
28 typedef unsigned __int16 orc_uint16;
29 typedef unsigned __int32 orc_uint32;
30 typedef unsigned __int64 orc_uint64;
31 #define ORC_UINT64_C(x) (x##Ui64)
32 #define inline __inline
33 #else
34 #include <limits.h>
35 typedef signed char orc_int8;
36 typedef short orc_int16;
37 typedef int orc_int32;
38 typedef unsigned char orc_uint8;
39 typedef unsigned short orc_uint16;
40 typedef unsigned int orc_uint32;
41 #if INT_MAX == LONG_MAX
42 typedef long long orc_int64;
43 typedef unsigned long long orc_uint64;
44 #define ORC_UINT64_C(x) (x##ULL)
45 #else
46 typedef long orc_int64;
47 typedef unsigned long orc_uint64;
48 #define ORC_UINT64_C(x) (x##UL)
49 #endif
50 #endif
51 typedef union
52 {
53 orc_int16 i;
54 orc_int8 x2[2];
55 } orc_union16;
56 typedef union
57 {
58 orc_int32 i;
59 float f;
60 orc_int16 x2[2];
61 orc_int8 x4[4];
62 } orc_union32;
63 typedef union
64 {
65 orc_int64 i;
66 double f;
67 orc_int32 x2[2];
68 float x2f[2];
69 orc_int16 x4[4];
70 } orc_union64;
71 #endif
72 #ifndef ORC_RESTRICT
73 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
74 #define ORC_RESTRICT restrict
75 #elif defined(__GNUC__) && __GNUC__ >= 4
76 #define ORC_RESTRICT __restrict__
77 #else
78 #define ORC_RESTRICT
79 #endif
80 #endif
81
82 #ifndef ORC_INTERNAL
83 #if defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)
84 #define ORC_INTERNAL __attribute__((visibility("hidden")))
85 #elif defined(__SUNPRO_C) && (__SUNPRO_C >= 0x550)
86 #define ORC_INTERNAL __hidden
87 #elif defined (__GNUC__)
88 #define ORC_INTERNAL __attribute__((visibility("hidden")))
89 #else
90 #define ORC_INTERNAL
91 #endif
92 #endif
93
94
95 #ifndef DISABLE_ORC
96 #include <orc/orc.h>
97 #endif
98 void video_mixer_orc_splat_u32 (guint32 * ORC_RESTRICT d1, int p1, int n);
99 void video_mixer_orc_memcpy_u32 (guint32 * ORC_RESTRICT d1,
100 const guint32 * ORC_RESTRICT s1, int n);
101 void video_mixer_orc_blend_u8 (guint8 * ORC_RESTRICT d1, int d1_stride,
102 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
103 void video_mixer_orc_blend_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
104 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
105 void video_mixer_orc_blend_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
106 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
107 void video_mixer_orc_overlay_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
108 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
109 void video_mixer_orc_overlay_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
110 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
111
112
113 /* begin Orc C target preamble */
114 #define ORC_CLAMP(x,a,b) ((x)<(a) ? (a) : ((x)>(b) ? (b) : (x)))
115 #define ORC_ABS(a) ((a)<0 ? -(a) : (a))
116 #define ORC_MIN(a,b) ((a)<(b) ? (a) : (b))
117 #define ORC_MAX(a,b) ((a)>(b) ? (a) : (b))
118 #define ORC_SB_MAX 127
119 #define ORC_SB_MIN (-1-ORC_SB_MAX)
120 #define ORC_UB_MAX (orc_uint8) 255
121 #define ORC_UB_MIN 0
122 #define ORC_SW_MAX 32767
123 #define ORC_SW_MIN (-1-ORC_SW_MAX)
124 #define ORC_UW_MAX (orc_uint16)65535
125 #define ORC_UW_MIN 0
126 #define ORC_SL_MAX 2147483647
127 #define ORC_SL_MIN (-1-ORC_SL_MAX)
128 #define ORC_UL_MAX 4294967295U
129 #define ORC_UL_MIN 0
130 #define ORC_CLAMP_SB(x) ORC_CLAMP(x,ORC_SB_MIN,ORC_SB_MAX)
131 #define ORC_CLAMP_UB(x) ORC_CLAMP(x,ORC_UB_MIN,ORC_UB_MAX)
132 #define ORC_CLAMP_SW(x) ORC_CLAMP(x,ORC_SW_MIN,ORC_SW_MAX)
133 #define ORC_CLAMP_UW(x) ORC_CLAMP(x,ORC_UW_MIN,ORC_UW_MAX)
134 #define ORC_CLAMP_SL(x) ORC_CLAMP(x,ORC_SL_MIN,ORC_SL_MAX)
135 #define ORC_CLAMP_UL(x) ORC_CLAMP(x,ORC_UL_MIN,ORC_UL_MAX)
136 #define ORC_SWAP_W(x) ((((x)&0xffU)<<8) | (((x)&0xff00U)>>8))
137 #define ORC_SWAP_L(x) ((((x)&0xffU)<<24) | (((x)&0xff00U)<<8) | (((x)&0xff0000U)>>8) | (((x)&0xff000000U)>>24))
138 #define ORC_SWAP_Q(x) ((((x)&ORC_UINT64_C(0xff))<<56) | (((x)&ORC_UINT64_C(0xff00))<<40) | (((x)&ORC_UINT64_C(0xff0000))<<24) | (((x)&ORC_UINT64_C(0xff000000))<<8) | (((x)&ORC_UINT64_C(0xff00000000))>>8) | (((x)&ORC_UINT64_C(0xff0000000000))>>24) | (((x)&ORC_UINT64_C(0xff000000000000))>>40) | (((x)&ORC_UINT64_C(0xff00000000000000))>>56))
139 #define ORC_PTR_OFFSET(ptr,offset) ((void *)(((unsigned char *)(ptr)) + (offset)))
140 #define ORC_DENORMAL(x) ((x) & ((((x)&0x7f800000) == 0) ? 0xff800000 : 0xffffffff))
141 #define ORC_ISNAN(x) ((((x)&0x7f800000) == 0x7f800000) && (((x)&0x007fffff) != 0))
142 #define ORC_DENORMAL_DOUBLE(x) ((x) & ((((x)&ORC_UINT64_C(0x7ff0000000000000)) == 0) ? ORC_UINT64_C(0xfff0000000000000) : ORC_UINT64_C(0xffffffffffffffff)))
143 #define ORC_ISNAN_DOUBLE(x) ((((x)&ORC_UINT64_C(0x7ff0000000000000)) == ORC_UINT64_C(0x7ff0000000000000)) && (((x)&ORC_UINT64_C(0x000fffffffffffff)) != 0))
144 #ifndef ORC_RESTRICT
145 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
146 #define ORC_RESTRICT restrict
147 #elif defined(__GNUC__) && __GNUC__ >= 4
148 #define ORC_RESTRICT __restrict__
149 #else
150 #define ORC_RESTRICT
151 #endif
152 #endif
153 /* end Orc C target preamble */
154
155
156
157 /* video_mixer_orc_splat_u32 */
158 #ifdef DISABLE_ORC
159 void
video_mixer_orc_splat_u32(guint32 * ORC_RESTRICT d1,int p1,int n)160 video_mixer_orc_splat_u32 (guint32 * ORC_RESTRICT d1, int p1, int n)
161 {
162 int i;
163 orc_union32 *ORC_RESTRICT ptr0;
164 orc_union32 var32;
165 orc_union32 var33;
166
167 ptr0 = (orc_union32 *) d1;
168
169 /* 0: loadpl */
170 var32.i = p1;
171
172 for (i = 0; i < n; i++) {
173 /* 1: copyl */
174 var33.i = var32.i;
175 /* 2: storel */
176 ptr0[i] = var33;
177 }
178
179 }
180
181 #else
182 static void
_backup_video_mixer_orc_splat_u32(OrcExecutor * ORC_RESTRICT ex)183 _backup_video_mixer_orc_splat_u32 (OrcExecutor * ORC_RESTRICT ex)
184 {
185 int i;
186 int n = ex->n;
187 orc_union32 *ORC_RESTRICT ptr0;
188 orc_union32 var32;
189 orc_union32 var33;
190
191 ptr0 = (orc_union32 *) ex->arrays[0];
192
193 /* 0: loadpl */
194 var32.i = ex->params[24];
195
196 for (i = 0; i < n; i++) {
197 /* 1: copyl */
198 var33.i = var32.i;
199 /* 2: storel */
200 ptr0[i] = var33;
201 }
202
203 }
204
205 void
video_mixer_orc_splat_u32(guint32 * ORC_RESTRICT d1,int p1,int n)206 video_mixer_orc_splat_u32 (guint32 * ORC_RESTRICT d1, int p1, int n)
207 {
208 OrcExecutor _ex, *ex = &_ex;
209 static volatile int p_inited = 0;
210 static OrcCode *c = 0;
211 void (*func) (OrcExecutor *);
212
213 if (!p_inited) {
214 orc_once_mutex_lock ();
215 if (!p_inited) {
216 OrcProgram *p;
217
218 #if 1
219 static const orc_uint8 bc[] = {
220 1, 9, 25, 118, 105, 100, 101, 111, 95, 109, 105, 120, 101, 114, 95, 111,
221 114, 99, 95, 115, 112, 108, 97, 116, 95, 117, 51, 50, 11, 4, 4, 16,
222 4, 112, 0, 24, 2, 0,
223 };
224 p = orc_program_new_from_static_bytecode (bc);
225 orc_program_set_backup_function (p, _backup_video_mixer_orc_splat_u32);
226 #else
227 p = orc_program_new ();
228 orc_program_set_name (p, "video_mixer_orc_splat_u32");
229 orc_program_set_backup_function (p, _backup_video_mixer_orc_splat_u32);
230 orc_program_add_destination (p, 4, "d1");
231 orc_program_add_parameter (p, 4, "p1");
232
233 orc_program_append_2 (p, "copyl", 0, ORC_VAR_D1, ORC_VAR_P1, ORC_VAR_D1,
234 ORC_VAR_D1);
235 #endif
236
237 orc_program_compile (p);
238 c = orc_program_take_code (p);
239 orc_program_free (p);
240 }
241 p_inited = TRUE;
242 orc_once_mutex_unlock ();
243 }
244 ex->arrays[ORC_VAR_A2] = c;
245 ex->program = 0;
246
247 ex->n = n;
248 ex->arrays[ORC_VAR_D1] = d1;
249 ex->params[ORC_VAR_P1] = p1;
250
251 func = c->exec;
252 func (ex);
253 }
254 #endif
255
256
257 /* video_mixer_orc_memcpy_u32 */
258 #ifdef DISABLE_ORC
259 void
video_mixer_orc_memcpy_u32(guint32 * ORC_RESTRICT d1,const guint32 * ORC_RESTRICT s1,int n)260 video_mixer_orc_memcpy_u32 (guint32 * ORC_RESTRICT d1,
261 const guint32 * ORC_RESTRICT s1, int n)
262 {
263 int i;
264 orc_union32 *ORC_RESTRICT ptr0;
265 const orc_union32 *ORC_RESTRICT ptr4;
266 orc_union32 var32;
267 orc_union32 var33;
268
269 ptr0 = (orc_union32 *) d1;
270 ptr4 = (orc_union32 *) s1;
271
272
273 for (i = 0; i < n; i++) {
274 /* 0: loadl */
275 var32 = ptr4[i];
276 /* 1: copyl */
277 var33.i = var32.i;
278 /* 2: storel */
279 ptr0[i] = var33;
280 }
281
282 }
283
284 #else
285 static void
_backup_video_mixer_orc_memcpy_u32(OrcExecutor * ORC_RESTRICT ex)286 _backup_video_mixer_orc_memcpy_u32 (OrcExecutor * ORC_RESTRICT ex)
287 {
288 int i;
289 int n = ex->n;
290 orc_union32 *ORC_RESTRICT ptr0;
291 const orc_union32 *ORC_RESTRICT ptr4;
292 orc_union32 var32;
293 orc_union32 var33;
294
295 ptr0 = (orc_union32 *) ex->arrays[0];
296 ptr4 = (orc_union32 *) ex->arrays[4];
297
298
299 for (i = 0; i < n; i++) {
300 /* 0: loadl */
301 var32 = ptr4[i];
302 /* 1: copyl */
303 var33.i = var32.i;
304 /* 2: storel */
305 ptr0[i] = var33;
306 }
307
308 }
309
310 void
video_mixer_orc_memcpy_u32(guint32 * ORC_RESTRICT d1,const guint32 * ORC_RESTRICT s1,int n)311 video_mixer_orc_memcpy_u32 (guint32 * ORC_RESTRICT d1,
312 const guint32 * ORC_RESTRICT s1, int n)
313 {
314 OrcExecutor _ex, *ex = &_ex;
315 static volatile int p_inited = 0;
316 static OrcCode *c = 0;
317 void (*func) (OrcExecutor *);
318
319 if (!p_inited) {
320 orc_once_mutex_lock ();
321 if (!p_inited) {
322 OrcProgram *p;
323
324 #if 1
325 static const orc_uint8 bc[] = {
326 1, 9, 26, 118, 105, 100, 101, 111, 95, 109, 105, 120, 101, 114, 95, 111,
327 114, 99, 95, 109, 101, 109, 99, 112, 121, 95, 117, 51, 50, 11, 4, 4,
328 12, 4, 4, 112, 0, 4, 2, 0,
329 };
330 p = orc_program_new_from_static_bytecode (bc);
331 orc_program_set_backup_function (p, _backup_video_mixer_orc_memcpy_u32);
332 #else
333 p = orc_program_new ();
334 orc_program_set_name (p, "video_mixer_orc_memcpy_u32");
335 orc_program_set_backup_function (p, _backup_video_mixer_orc_memcpy_u32);
336 orc_program_add_destination (p, 4, "d1");
337 orc_program_add_source (p, 4, "s1");
338
339 orc_program_append_2 (p, "copyl", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_D1,
340 ORC_VAR_D1);
341 #endif
342
343 orc_program_compile (p);
344 c = orc_program_take_code (p);
345 orc_program_free (p);
346 }
347 p_inited = TRUE;
348 orc_once_mutex_unlock ();
349 }
350 ex->arrays[ORC_VAR_A2] = c;
351 ex->program = 0;
352
353 ex->n = n;
354 ex->arrays[ORC_VAR_D1] = d1;
355 ex->arrays[ORC_VAR_S1] = (void *) s1;
356
357 func = c->exec;
358 func (ex);
359 }
360 #endif
361
362
363 /* video_mixer_orc_blend_u8 */
364 #ifdef DISABLE_ORC
365 void
video_mixer_orc_blend_u8(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)366 video_mixer_orc_blend_u8 (guint8 * ORC_RESTRICT d1, int d1_stride,
367 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
368 {
369 int i;
370 int j;
371 orc_int8 *ORC_RESTRICT ptr0;
372 const orc_int8 *ORC_RESTRICT ptr4;
373 orc_int8 var34;
374 orc_int8 var35;
375 orc_union16 var36;
376 orc_int8 var37;
377 orc_union16 var38;
378 orc_union16 var39;
379 orc_union16 var40;
380 orc_union16 var41;
381 orc_union16 var42;
382 orc_union16 var43;
383 orc_union16 var44;
384
385 for (j = 0; j < m; j++) {
386 ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
387 ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
388
389 /* 5: loadpw */
390 var36.i = p1;
391
392 for (i = 0; i < n; i++) {
393 /* 0: loadb */
394 var34 = ptr0[i];
395 /* 1: convubw */
396 var38.i = (orc_uint8) var34;
397 /* 2: loadb */
398 var35 = ptr4[i];
399 /* 3: convubw */
400 var39.i = (orc_uint8) var35;
401 /* 4: subw */
402 var40.i = var39.i - var38.i;
403 /* 6: mullw */
404 var41.i = (var40.i * var36.i) & 0xffff;
405 /* 7: shlw */
406 var42.i = ((orc_uint16) var38.i) << 8;
407 /* 8: addw */
408 var43.i = var42.i + var41.i;
409 /* 9: shruw */
410 var44.i = ((orc_uint16) var43.i) >> 8;
411 /* 10: convsuswb */
412 var37 = ORC_CLAMP_UB (var44.i);
413 /* 11: storeb */
414 ptr0[i] = var37;
415 }
416 }
417
418 }
419
420 #else
421 static void
_backup_video_mixer_orc_blend_u8(OrcExecutor * ORC_RESTRICT ex)422 _backup_video_mixer_orc_blend_u8 (OrcExecutor * ORC_RESTRICT ex)
423 {
424 int i;
425 int j;
426 int n = ex->n;
427 int m = ex->params[ORC_VAR_A1];
428 orc_int8 *ORC_RESTRICT ptr0;
429 const orc_int8 *ORC_RESTRICT ptr4;
430 orc_int8 var34;
431 orc_int8 var35;
432 orc_union16 var36;
433 orc_int8 var37;
434 orc_union16 var38;
435 orc_union16 var39;
436 orc_union16 var40;
437 orc_union16 var41;
438 orc_union16 var42;
439 orc_union16 var43;
440 orc_union16 var44;
441
442 for (j = 0; j < m; j++) {
443 ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
444 ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
445
446 /* 5: loadpw */
447 var36.i = ex->params[24];
448
449 for (i = 0; i < n; i++) {
450 /* 0: loadb */
451 var34 = ptr0[i];
452 /* 1: convubw */
453 var38.i = (orc_uint8) var34;
454 /* 2: loadb */
455 var35 = ptr4[i];
456 /* 3: convubw */
457 var39.i = (orc_uint8) var35;
458 /* 4: subw */
459 var40.i = var39.i - var38.i;
460 /* 6: mullw */
461 var41.i = (var40.i * var36.i) & 0xffff;
462 /* 7: shlw */
463 var42.i = ((orc_uint16) var38.i) << 8;
464 /* 8: addw */
465 var43.i = var42.i + var41.i;
466 /* 9: shruw */
467 var44.i = ((orc_uint16) var43.i) >> 8;
468 /* 10: convsuswb */
469 var37 = ORC_CLAMP_UB (var44.i);
470 /* 11: storeb */
471 ptr0[i] = var37;
472 }
473 }
474
475 }
476
477 void
video_mixer_orc_blend_u8(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)478 video_mixer_orc_blend_u8 (guint8 * ORC_RESTRICT d1, int d1_stride,
479 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
480 {
481 OrcExecutor _ex, *ex = &_ex;
482 static volatile int p_inited = 0;
483 static OrcCode *c = 0;
484 void (*func) (OrcExecutor *);
485
486 if (!p_inited) {
487 orc_once_mutex_lock ();
488 if (!p_inited) {
489 OrcProgram *p;
490
491 #if 1
492 static const orc_uint8 bc[] = {
493 1, 7, 9, 24, 118, 105, 100, 101, 111, 95, 109, 105, 120, 101, 114, 95,
494 111, 114, 99, 95, 98, 108, 101, 110, 100, 95, 117, 56, 11, 1, 1, 12,
495 1, 1, 14, 1, 8, 0, 0, 0, 16, 2, 20, 2, 20, 2, 150, 32,
496 0, 150, 33, 4, 98, 33, 33, 32, 89, 33, 33, 24, 93, 32, 32, 16,
497 70, 33, 32, 33, 95, 33, 33, 16, 160, 0, 33, 2, 0,
498 };
499 p = orc_program_new_from_static_bytecode (bc);
500 orc_program_set_backup_function (p, _backup_video_mixer_orc_blend_u8);
501 #else
502 p = orc_program_new ();
503 orc_program_set_2d (p);
504 orc_program_set_name (p, "video_mixer_orc_blend_u8");
505 orc_program_set_backup_function (p, _backup_video_mixer_orc_blend_u8);
506 orc_program_add_destination (p, 1, "d1");
507 orc_program_add_source (p, 1, "s1");
508 orc_program_add_constant (p, 1, 0x00000008, "c1");
509 orc_program_add_parameter (p, 2, "p1");
510 orc_program_add_temporary (p, 2, "t1");
511 orc_program_add_temporary (p, 2, "t2");
512
513 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
514 ORC_VAR_D1);
515 orc_program_append_2 (p, "convubw", 0, ORC_VAR_T2, ORC_VAR_S1, ORC_VAR_D1,
516 ORC_VAR_D1);
517 orc_program_append_2 (p, "subw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1,
518 ORC_VAR_D1);
519 orc_program_append_2 (p, "mullw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P1,
520 ORC_VAR_D1);
521 orc_program_append_2 (p, "shlw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1,
522 ORC_VAR_D1);
523 orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_T2,
524 ORC_VAR_D1);
525 orc_program_append_2 (p, "shruw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1,
526 ORC_VAR_D1);
527 orc_program_append_2 (p, "convsuswb", 0, ORC_VAR_D1, ORC_VAR_T2,
528 ORC_VAR_D1, ORC_VAR_D1);
529 #endif
530
531 orc_program_compile (p);
532 c = orc_program_take_code (p);
533 orc_program_free (p);
534 }
535 p_inited = TRUE;
536 orc_once_mutex_unlock ();
537 }
538 ex->arrays[ORC_VAR_A2] = c;
539 ex->program = 0;
540
541 ex->n = n;
542 ORC_EXECUTOR_M (ex) = m;
543 ex->arrays[ORC_VAR_D1] = d1;
544 ex->params[ORC_VAR_D1] = d1_stride;
545 ex->arrays[ORC_VAR_S1] = (void *) s1;
546 ex->params[ORC_VAR_S1] = s1_stride;
547 ex->params[ORC_VAR_P1] = p1;
548
549 func = c->exec;
550 func (ex);
551 }
552 #endif
553
554
555 /* video_mixer_orc_blend_argb */
556 #ifdef DISABLE_ORC
557 void
video_mixer_orc_blend_argb(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)558 video_mixer_orc_blend_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
559 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
560 {
561 int i;
562 int j;
563 orc_union32 *ORC_RESTRICT ptr0;
564 const orc_union32 *ORC_RESTRICT ptr4;
565 orc_union64 var39;
566 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
567 volatile orc_union32 var40;
568 #else
569 orc_union32 var40;
570 #endif
571 orc_union32 var41;
572 orc_union16 var42;
573 orc_int8 var43;
574 orc_union32 var44;
575 orc_union64 var45;
576 orc_union64 var46;
577 orc_union64 var47;
578 orc_union64 var48;
579 orc_union32 var49;
580 orc_union64 var50;
581 orc_union64 var51;
582 orc_union64 var52;
583 orc_union64 var53;
584 orc_union64 var54;
585 orc_union32 var55;
586 orc_union32 var56;
587
588 for (j = 0; j < m; j++) {
589 ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
590 ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
591
592 /* 5: loadpw */
593 var39.x4[0] = p1;
594 var39.x4[1] = p1;
595 var39.x4[2] = p1;
596 var39.x4[3] = p1;
597 /* 16: loadpl */
598 var40.i = 0x000000ff; /* 255 or 1.25987e-321f */
599
600 for (i = 0; i < n; i++) {
601 /* 0: loadl */
602 var41 = ptr4[i];
603 /* 1: convlw */
604 var42.i = var41.i;
605 /* 2: convwb */
606 var43 = var42.i;
607 /* 3: splatbl */
608 var44.i =
609 ((((orc_uint32) var43) & 0xff) << 24) | ((((orc_uint32) var43) & 0xff)
610 << 16) | ((((orc_uint32) var43) & 0xff) << 8) | (((orc_uint32) var43)
611 & 0xff);
612 /* 4: convubw */
613 var45.x4[0] = (orc_uint8) var44.x4[0];
614 var45.x4[1] = (orc_uint8) var44.x4[1];
615 var45.x4[2] = (orc_uint8) var44.x4[2];
616 var45.x4[3] = (orc_uint8) var44.x4[3];
617 /* 6: mullw */
618 var46.x4[0] = (var45.x4[0] * var39.x4[0]) & 0xffff;
619 var46.x4[1] = (var45.x4[1] * var39.x4[1]) & 0xffff;
620 var46.x4[2] = (var45.x4[2] * var39.x4[2]) & 0xffff;
621 var46.x4[3] = (var45.x4[3] * var39.x4[3]) & 0xffff;
622 /* 7: shruw */
623 var47.x4[0] = ((orc_uint16) var46.x4[0]) >> 8;
624 var47.x4[1] = ((orc_uint16) var46.x4[1]) >> 8;
625 var47.x4[2] = ((orc_uint16) var46.x4[2]) >> 8;
626 var47.x4[3] = ((orc_uint16) var46.x4[3]) >> 8;
627 /* 8: convubw */
628 var48.x4[0] = (orc_uint8) var41.x4[0];
629 var48.x4[1] = (orc_uint8) var41.x4[1];
630 var48.x4[2] = (orc_uint8) var41.x4[2];
631 var48.x4[3] = (orc_uint8) var41.x4[3];
632 /* 9: loadl */
633 var49 = ptr0[i];
634 /* 10: convubw */
635 var50.x4[0] = (orc_uint8) var49.x4[0];
636 var50.x4[1] = (orc_uint8) var49.x4[1];
637 var50.x4[2] = (orc_uint8) var49.x4[2];
638 var50.x4[3] = (orc_uint8) var49.x4[3];
639 /* 11: subw */
640 var51.x4[0] = var48.x4[0] - var50.x4[0];
641 var51.x4[1] = var48.x4[1] - var50.x4[1];
642 var51.x4[2] = var48.x4[2] - var50.x4[2];
643 var51.x4[3] = var48.x4[3] - var50.x4[3];
644 /* 12: mullw */
645 var52.x4[0] = (var51.x4[0] * var47.x4[0]) & 0xffff;
646 var52.x4[1] = (var51.x4[1] * var47.x4[1]) & 0xffff;
647 var52.x4[2] = (var51.x4[2] * var47.x4[2]) & 0xffff;
648 var52.x4[3] = (var51.x4[3] * var47.x4[3]) & 0xffff;
649 /* 13: div255w */
650 var53.x4[0] =
651 ((orc_uint16) (((orc_uint16) (var52.x4[0] + 128)) +
652 (((orc_uint16) (var52.x4[0] + 128)) >> 8))) >> 8;
653 var53.x4[1] =
654 ((orc_uint16) (((orc_uint16) (var52.x4[1] + 128)) +
655 (((orc_uint16) (var52.x4[1] + 128)) >> 8))) >> 8;
656 var53.x4[2] =
657 ((orc_uint16) (((orc_uint16) (var52.x4[2] + 128)) +
658 (((orc_uint16) (var52.x4[2] + 128)) >> 8))) >> 8;
659 var53.x4[3] =
660 ((orc_uint16) (((orc_uint16) (var52.x4[3] + 128)) +
661 (((orc_uint16) (var52.x4[3] + 128)) >> 8))) >> 8;
662 /* 14: addw */
663 var54.x4[0] = var50.x4[0] + var53.x4[0];
664 var54.x4[1] = var50.x4[1] + var53.x4[1];
665 var54.x4[2] = var50.x4[2] + var53.x4[2];
666 var54.x4[3] = var50.x4[3] + var53.x4[3];
667 /* 15: convwb */
668 var55.x4[0] = var54.x4[0];
669 var55.x4[1] = var54.x4[1];
670 var55.x4[2] = var54.x4[2];
671 var55.x4[3] = var54.x4[3];
672 /* 17: orl */
673 var56.i = var55.i | var40.i;
674 /* 18: storel */
675 ptr0[i] = var56;
676 }
677 }
678
679 }
680
681 #else
682 static void
_backup_video_mixer_orc_blend_argb(OrcExecutor * ORC_RESTRICT ex)683 _backup_video_mixer_orc_blend_argb (OrcExecutor * ORC_RESTRICT ex)
684 {
685 int i;
686 int j;
687 int n = ex->n;
688 int m = ex->params[ORC_VAR_A1];
689 orc_union32 *ORC_RESTRICT ptr0;
690 const orc_union32 *ORC_RESTRICT ptr4;
691 orc_union64 var39;
692 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
693 volatile orc_union32 var40;
694 #else
695 orc_union32 var40;
696 #endif
697 orc_union32 var41;
698 orc_union16 var42;
699 orc_int8 var43;
700 orc_union32 var44;
701 orc_union64 var45;
702 orc_union64 var46;
703 orc_union64 var47;
704 orc_union64 var48;
705 orc_union32 var49;
706 orc_union64 var50;
707 orc_union64 var51;
708 orc_union64 var52;
709 orc_union64 var53;
710 orc_union64 var54;
711 orc_union32 var55;
712 orc_union32 var56;
713
714 for (j = 0; j < m; j++) {
715 ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
716 ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
717
718 /* 5: loadpw */
719 var39.x4[0] = ex->params[24];
720 var39.x4[1] = ex->params[24];
721 var39.x4[2] = ex->params[24];
722 var39.x4[3] = ex->params[24];
723 /* 16: loadpl */
724 var40.i = 0x000000ff; /* 255 or 1.25987e-321f */
725
726 for (i = 0; i < n; i++) {
727 /* 0: loadl */
728 var41 = ptr4[i];
729 /* 1: convlw */
730 var42.i = var41.i;
731 /* 2: convwb */
732 var43 = var42.i;
733 /* 3: splatbl */
734 var44.i =
735 ((((orc_uint32) var43) & 0xff) << 24) | ((((orc_uint32) var43) & 0xff)
736 << 16) | ((((orc_uint32) var43) & 0xff) << 8) | (((orc_uint32) var43)
737 & 0xff);
738 /* 4: convubw */
739 var45.x4[0] = (orc_uint8) var44.x4[0];
740 var45.x4[1] = (orc_uint8) var44.x4[1];
741 var45.x4[2] = (orc_uint8) var44.x4[2];
742 var45.x4[3] = (orc_uint8) var44.x4[3];
743 /* 6: mullw */
744 var46.x4[0] = (var45.x4[0] * var39.x4[0]) & 0xffff;
745 var46.x4[1] = (var45.x4[1] * var39.x4[1]) & 0xffff;
746 var46.x4[2] = (var45.x4[2] * var39.x4[2]) & 0xffff;
747 var46.x4[3] = (var45.x4[3] * var39.x4[3]) & 0xffff;
748 /* 7: shruw */
749 var47.x4[0] = ((orc_uint16) var46.x4[0]) >> 8;
750 var47.x4[1] = ((orc_uint16) var46.x4[1]) >> 8;
751 var47.x4[2] = ((orc_uint16) var46.x4[2]) >> 8;
752 var47.x4[3] = ((orc_uint16) var46.x4[3]) >> 8;
753 /* 8: convubw */
754 var48.x4[0] = (orc_uint8) var41.x4[0];
755 var48.x4[1] = (orc_uint8) var41.x4[1];
756 var48.x4[2] = (orc_uint8) var41.x4[2];
757 var48.x4[3] = (orc_uint8) var41.x4[3];
758 /* 9: loadl */
759 var49 = ptr0[i];
760 /* 10: convubw */
761 var50.x4[0] = (orc_uint8) var49.x4[0];
762 var50.x4[1] = (orc_uint8) var49.x4[1];
763 var50.x4[2] = (orc_uint8) var49.x4[2];
764 var50.x4[3] = (orc_uint8) var49.x4[3];
765 /* 11: subw */
766 var51.x4[0] = var48.x4[0] - var50.x4[0];
767 var51.x4[1] = var48.x4[1] - var50.x4[1];
768 var51.x4[2] = var48.x4[2] - var50.x4[2];
769 var51.x4[3] = var48.x4[3] - var50.x4[3];
770 /* 12: mullw */
771 var52.x4[0] = (var51.x4[0] * var47.x4[0]) & 0xffff;
772 var52.x4[1] = (var51.x4[1] * var47.x4[1]) & 0xffff;
773 var52.x4[2] = (var51.x4[2] * var47.x4[2]) & 0xffff;
774 var52.x4[3] = (var51.x4[3] * var47.x4[3]) & 0xffff;
775 /* 13: div255w */
776 var53.x4[0] =
777 ((orc_uint16) (((orc_uint16) (var52.x4[0] + 128)) +
778 (((orc_uint16) (var52.x4[0] + 128)) >> 8))) >> 8;
779 var53.x4[1] =
780 ((orc_uint16) (((orc_uint16) (var52.x4[1] + 128)) +
781 (((orc_uint16) (var52.x4[1] + 128)) >> 8))) >> 8;
782 var53.x4[2] =
783 ((orc_uint16) (((orc_uint16) (var52.x4[2] + 128)) +
784 (((orc_uint16) (var52.x4[2] + 128)) >> 8))) >> 8;
785 var53.x4[3] =
786 ((orc_uint16) (((orc_uint16) (var52.x4[3] + 128)) +
787 (((orc_uint16) (var52.x4[3] + 128)) >> 8))) >> 8;
788 /* 14: addw */
789 var54.x4[0] = var50.x4[0] + var53.x4[0];
790 var54.x4[1] = var50.x4[1] + var53.x4[1];
791 var54.x4[2] = var50.x4[2] + var53.x4[2];
792 var54.x4[3] = var50.x4[3] + var53.x4[3];
793 /* 15: convwb */
794 var55.x4[0] = var54.x4[0];
795 var55.x4[1] = var54.x4[1];
796 var55.x4[2] = var54.x4[2];
797 var55.x4[3] = var54.x4[3];
798 /* 17: orl */
799 var56.i = var55.i | var40.i;
800 /* 18: storel */
801 ptr0[i] = var56;
802 }
803 }
804
805 }
806
807 void
video_mixer_orc_blend_argb(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)808 video_mixer_orc_blend_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
809 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
810 {
811 OrcExecutor _ex, *ex = &_ex;
812 static volatile int p_inited = 0;
813 static OrcCode *c = 0;
814 void (*func) (OrcExecutor *);
815
816 if (!p_inited) {
817 orc_once_mutex_lock ();
818 if (!p_inited) {
819 OrcProgram *p;
820
821 #if 1
822 static const orc_uint8 bc[] = {
823 1, 7, 9, 26, 118, 105, 100, 101, 111, 95, 109, 105, 120, 101, 114, 95,
824 111, 114, 99, 95, 98, 108, 101, 110, 100, 95, 97, 114, 103, 98, 11, 4,
825 4, 12, 4, 4, 14, 4, 255, 0, 0, 0, 14, 2, 8, 0, 0, 0,
826 16, 2, 20, 4, 20, 2, 20, 1, 20, 4, 20, 8, 20, 8, 20, 8,
827 113, 32, 4, 163, 33, 32, 157, 34, 33, 152, 35, 34, 21, 2, 150, 38,
828 35, 21, 2, 89, 38, 38, 24, 21, 2, 95, 38, 38, 17, 21, 2, 150,
829 37, 32, 113, 32, 0, 21, 2, 150, 36, 32, 21, 2, 98, 37, 37, 36,
830 21, 2, 89, 37, 37, 38, 21, 2, 80, 37, 37, 21, 2, 70, 36, 36,
831 37, 21, 2, 157, 32, 36, 123, 32, 32, 16, 128, 0, 32, 2, 0,
832 };
833 p = orc_program_new_from_static_bytecode (bc);
834 orc_program_set_backup_function (p, _backup_video_mixer_orc_blend_argb);
835 #else
836 p = orc_program_new ();
837 orc_program_set_2d (p);
838 orc_program_set_name (p, "video_mixer_orc_blend_argb");
839 orc_program_set_backup_function (p, _backup_video_mixer_orc_blend_argb);
840 orc_program_add_destination (p, 4, "d1");
841 orc_program_add_source (p, 4, "s1");
842 orc_program_add_constant (p, 4, 0x000000ff, "c1");
843 orc_program_add_constant (p, 2, 0x00000008, "c2");
844 orc_program_add_parameter (p, 2, "p1");
845 orc_program_add_temporary (p, 4, "t1");
846 orc_program_add_temporary (p, 2, "t2");
847 orc_program_add_temporary (p, 1, "t3");
848 orc_program_add_temporary (p, 4, "t4");
849 orc_program_add_temporary (p, 8, "t5");
850 orc_program_add_temporary (p, 8, "t6");
851 orc_program_add_temporary (p, 8, "t7");
852
853 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1,
854 ORC_VAR_D1);
855 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1,
856 ORC_VAR_D1);
857 orc_program_append_2 (p, "convwb", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
858 ORC_VAR_D1);
859 orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1,
860 ORC_VAR_D1);
861 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T7, ORC_VAR_T4, ORC_VAR_D1,
862 ORC_VAR_D1);
863 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_P1,
864 ORC_VAR_D1);
865 orc_program_append_2 (p, "shruw", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C2,
866 ORC_VAR_D1);
867 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T6, ORC_VAR_T1, ORC_VAR_D1,
868 ORC_VAR_D1);
869 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
870 ORC_VAR_D1);
871 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T5, ORC_VAR_T1, ORC_VAR_D1,
872 ORC_VAR_D1);
873 orc_program_append_2 (p, "subw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T5,
874 ORC_VAR_D1);
875 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T7,
876 ORC_VAR_D1);
877 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_D1,
878 ORC_VAR_D1);
879 orc_program_append_2 (p, "addw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_T6,
880 ORC_VAR_D1);
881 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T1, ORC_VAR_T5, ORC_VAR_D1,
882 ORC_VAR_D1);
883 orc_program_append_2 (p, "orl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1,
884 ORC_VAR_D1);
885 orc_program_append_2 (p, "storel", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1,
886 ORC_VAR_D1);
887 #endif
888
889 orc_program_compile (p);
890 c = orc_program_take_code (p);
891 orc_program_free (p);
892 }
893 p_inited = TRUE;
894 orc_once_mutex_unlock ();
895 }
896 ex->arrays[ORC_VAR_A2] = c;
897 ex->program = 0;
898
899 ex->n = n;
900 ORC_EXECUTOR_M (ex) = m;
901 ex->arrays[ORC_VAR_D1] = d1;
902 ex->params[ORC_VAR_D1] = d1_stride;
903 ex->arrays[ORC_VAR_S1] = (void *) s1;
904 ex->params[ORC_VAR_S1] = s1_stride;
905 ex->params[ORC_VAR_P1] = p1;
906
907 func = c->exec;
908 func (ex);
909 }
910 #endif
911
912
913 /* video_mixer_orc_blend_bgra */
914 #ifdef DISABLE_ORC
915 void
video_mixer_orc_blend_bgra(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)916 video_mixer_orc_blend_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
917 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
918 {
919 int i;
920 int j;
921 orc_union32 *ORC_RESTRICT ptr0;
922 const orc_union32 *ORC_RESTRICT ptr4;
923 orc_union64 var40;
924 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
925 volatile orc_union32 var41;
926 #else
927 orc_union32 var41;
928 #endif
929 orc_union32 var42;
930 orc_union32 var43;
931 orc_union16 var44;
932 orc_int8 var45;
933 orc_union32 var46;
934 orc_union64 var47;
935 orc_union64 var48;
936 orc_union64 var49;
937 orc_union64 var50;
938 orc_union32 var51;
939 orc_union64 var52;
940 orc_union64 var53;
941 orc_union64 var54;
942 orc_union64 var55;
943 orc_union64 var56;
944 orc_union32 var57;
945 orc_union32 var58;
946
947 for (j = 0; j < m; j++) {
948 ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
949 ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
950
951 /* 6: loadpw */
952 var40.x4[0] = p1;
953 var40.x4[1] = p1;
954 var40.x4[2] = p1;
955 var40.x4[3] = p1;
956 /* 17: loadpl */
957 var41.i = 0xff000000; /* -16777216 or 2.11371e-314f */
958
959 for (i = 0; i < n; i++) {
960 /* 0: loadl */
961 var42 = ptr4[i];
962 /* 1: shrul */
963 var43.i = ((orc_uint32) var42.i) >> 24;
964 /* 2: convlw */
965 var44.i = var43.i;
966 /* 3: convwb */
967 var45 = var44.i;
968 /* 4: splatbl */
969 var46.i =
970 ((((orc_uint32) var45) & 0xff) << 24) | ((((orc_uint32) var45) & 0xff)
971 << 16) | ((((orc_uint32) var45) & 0xff) << 8) | (((orc_uint32) var45)
972 & 0xff);
973 /* 5: convubw */
974 var47.x4[0] = (orc_uint8) var46.x4[0];
975 var47.x4[1] = (orc_uint8) var46.x4[1];
976 var47.x4[2] = (orc_uint8) var46.x4[2];
977 var47.x4[3] = (orc_uint8) var46.x4[3];
978 /* 7: mullw */
979 var48.x4[0] = (var47.x4[0] * var40.x4[0]) & 0xffff;
980 var48.x4[1] = (var47.x4[1] * var40.x4[1]) & 0xffff;
981 var48.x4[2] = (var47.x4[2] * var40.x4[2]) & 0xffff;
982 var48.x4[3] = (var47.x4[3] * var40.x4[3]) & 0xffff;
983 /* 8: shruw */
984 var49.x4[0] = ((orc_uint16) var48.x4[0]) >> 8;
985 var49.x4[1] = ((orc_uint16) var48.x4[1]) >> 8;
986 var49.x4[2] = ((orc_uint16) var48.x4[2]) >> 8;
987 var49.x4[3] = ((orc_uint16) var48.x4[3]) >> 8;
988 /* 9: convubw */
989 var50.x4[0] = (orc_uint8) var42.x4[0];
990 var50.x4[1] = (orc_uint8) var42.x4[1];
991 var50.x4[2] = (orc_uint8) var42.x4[2];
992 var50.x4[3] = (orc_uint8) var42.x4[3];
993 /* 10: loadl */
994 var51 = ptr0[i];
995 /* 11: convubw */
996 var52.x4[0] = (orc_uint8) var51.x4[0];
997 var52.x4[1] = (orc_uint8) var51.x4[1];
998 var52.x4[2] = (orc_uint8) var51.x4[2];
999 var52.x4[3] = (orc_uint8) var51.x4[3];
1000 /* 12: subw */
1001 var53.x4[0] = var50.x4[0] - var52.x4[0];
1002 var53.x4[1] = var50.x4[1] - var52.x4[1];
1003 var53.x4[2] = var50.x4[2] - var52.x4[2];
1004 var53.x4[3] = var50.x4[3] - var52.x4[3];
1005 /* 13: mullw */
1006 var54.x4[0] = (var53.x4[0] * var49.x4[0]) & 0xffff;
1007 var54.x4[1] = (var53.x4[1] * var49.x4[1]) & 0xffff;
1008 var54.x4[2] = (var53.x4[2] * var49.x4[2]) & 0xffff;
1009 var54.x4[3] = (var53.x4[3] * var49.x4[3]) & 0xffff;
1010 /* 14: div255w */
1011 var55.x4[0] =
1012 ((orc_uint16) (((orc_uint16) (var54.x4[0] + 128)) +
1013 (((orc_uint16) (var54.x4[0] + 128)) >> 8))) >> 8;
1014 var55.x4[1] =
1015 ((orc_uint16) (((orc_uint16) (var54.x4[1] + 128)) +
1016 (((orc_uint16) (var54.x4[1] + 128)) >> 8))) >> 8;
1017 var55.x4[2] =
1018 ((orc_uint16) (((orc_uint16) (var54.x4[2] + 128)) +
1019 (((orc_uint16) (var54.x4[2] + 128)) >> 8))) >> 8;
1020 var55.x4[3] =
1021 ((orc_uint16) (((orc_uint16) (var54.x4[3] + 128)) +
1022 (((orc_uint16) (var54.x4[3] + 128)) >> 8))) >> 8;
1023 /* 15: addw */
1024 var56.x4[0] = var52.x4[0] + var55.x4[0];
1025 var56.x4[1] = var52.x4[1] + var55.x4[1];
1026 var56.x4[2] = var52.x4[2] + var55.x4[2];
1027 var56.x4[3] = var52.x4[3] + var55.x4[3];
1028 /* 16: convwb */
1029 var57.x4[0] = var56.x4[0];
1030 var57.x4[1] = var56.x4[1];
1031 var57.x4[2] = var56.x4[2];
1032 var57.x4[3] = var56.x4[3];
1033 /* 18: orl */
1034 var58.i = var57.i | var41.i;
1035 /* 19: storel */
1036 ptr0[i] = var58;
1037 }
1038 }
1039
1040 }
1041
1042 #else
1043 static void
_backup_video_mixer_orc_blend_bgra(OrcExecutor * ORC_RESTRICT ex)1044 _backup_video_mixer_orc_blend_bgra (OrcExecutor * ORC_RESTRICT ex)
1045 {
1046 int i;
1047 int j;
1048 int n = ex->n;
1049 int m = ex->params[ORC_VAR_A1];
1050 orc_union32 *ORC_RESTRICT ptr0;
1051 const orc_union32 *ORC_RESTRICT ptr4;
1052 orc_union64 var40;
1053 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1054 volatile orc_union32 var41;
1055 #else
1056 orc_union32 var41;
1057 #endif
1058 orc_union32 var42;
1059 orc_union32 var43;
1060 orc_union16 var44;
1061 orc_int8 var45;
1062 orc_union32 var46;
1063 orc_union64 var47;
1064 orc_union64 var48;
1065 orc_union64 var49;
1066 orc_union64 var50;
1067 orc_union32 var51;
1068 orc_union64 var52;
1069 orc_union64 var53;
1070 orc_union64 var54;
1071 orc_union64 var55;
1072 orc_union64 var56;
1073 orc_union32 var57;
1074 orc_union32 var58;
1075
1076 for (j = 0; j < m; j++) {
1077 ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
1078 ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
1079
1080 /* 6: loadpw */
1081 var40.x4[0] = ex->params[24];
1082 var40.x4[1] = ex->params[24];
1083 var40.x4[2] = ex->params[24];
1084 var40.x4[3] = ex->params[24];
1085 /* 17: loadpl */
1086 var41.i = 0xff000000; /* -16777216 or 2.11371e-314f */
1087
1088 for (i = 0; i < n; i++) {
1089 /* 0: loadl */
1090 var42 = ptr4[i];
1091 /* 1: shrul */
1092 var43.i = ((orc_uint32) var42.i) >> 24;
1093 /* 2: convlw */
1094 var44.i = var43.i;
1095 /* 3: convwb */
1096 var45 = var44.i;
1097 /* 4: splatbl */
1098 var46.i =
1099 ((((orc_uint32) var45) & 0xff) << 24) | ((((orc_uint32) var45) & 0xff)
1100 << 16) | ((((orc_uint32) var45) & 0xff) << 8) | (((orc_uint32) var45)
1101 & 0xff);
1102 /* 5: convubw */
1103 var47.x4[0] = (orc_uint8) var46.x4[0];
1104 var47.x4[1] = (orc_uint8) var46.x4[1];
1105 var47.x4[2] = (orc_uint8) var46.x4[2];
1106 var47.x4[3] = (orc_uint8) var46.x4[3];
1107 /* 7: mullw */
1108 var48.x4[0] = (var47.x4[0] * var40.x4[0]) & 0xffff;
1109 var48.x4[1] = (var47.x4[1] * var40.x4[1]) & 0xffff;
1110 var48.x4[2] = (var47.x4[2] * var40.x4[2]) & 0xffff;
1111 var48.x4[3] = (var47.x4[3] * var40.x4[3]) & 0xffff;
1112 /* 8: shruw */
1113 var49.x4[0] = ((orc_uint16) var48.x4[0]) >> 8;
1114 var49.x4[1] = ((orc_uint16) var48.x4[1]) >> 8;
1115 var49.x4[2] = ((orc_uint16) var48.x4[2]) >> 8;
1116 var49.x4[3] = ((orc_uint16) var48.x4[3]) >> 8;
1117 /* 9: convubw */
1118 var50.x4[0] = (orc_uint8) var42.x4[0];
1119 var50.x4[1] = (orc_uint8) var42.x4[1];
1120 var50.x4[2] = (orc_uint8) var42.x4[2];
1121 var50.x4[3] = (orc_uint8) var42.x4[3];
1122 /* 10: loadl */
1123 var51 = ptr0[i];
1124 /* 11: convubw */
1125 var52.x4[0] = (orc_uint8) var51.x4[0];
1126 var52.x4[1] = (orc_uint8) var51.x4[1];
1127 var52.x4[2] = (orc_uint8) var51.x4[2];
1128 var52.x4[3] = (orc_uint8) var51.x4[3];
1129 /* 12: subw */
1130 var53.x4[0] = var50.x4[0] - var52.x4[0];
1131 var53.x4[1] = var50.x4[1] - var52.x4[1];
1132 var53.x4[2] = var50.x4[2] - var52.x4[2];
1133 var53.x4[3] = var50.x4[3] - var52.x4[3];
1134 /* 13: mullw */
1135 var54.x4[0] = (var53.x4[0] * var49.x4[0]) & 0xffff;
1136 var54.x4[1] = (var53.x4[1] * var49.x4[1]) & 0xffff;
1137 var54.x4[2] = (var53.x4[2] * var49.x4[2]) & 0xffff;
1138 var54.x4[3] = (var53.x4[3] * var49.x4[3]) & 0xffff;
1139 /* 14: div255w */
1140 var55.x4[0] =
1141 ((orc_uint16) (((orc_uint16) (var54.x4[0] + 128)) +
1142 (((orc_uint16) (var54.x4[0] + 128)) >> 8))) >> 8;
1143 var55.x4[1] =
1144 ((orc_uint16) (((orc_uint16) (var54.x4[1] + 128)) +
1145 (((orc_uint16) (var54.x4[1] + 128)) >> 8))) >> 8;
1146 var55.x4[2] =
1147 ((orc_uint16) (((orc_uint16) (var54.x4[2] + 128)) +
1148 (((orc_uint16) (var54.x4[2] + 128)) >> 8))) >> 8;
1149 var55.x4[3] =
1150 ((orc_uint16) (((orc_uint16) (var54.x4[3] + 128)) +
1151 (((orc_uint16) (var54.x4[3] + 128)) >> 8))) >> 8;
1152 /* 15: addw */
1153 var56.x4[0] = var52.x4[0] + var55.x4[0];
1154 var56.x4[1] = var52.x4[1] + var55.x4[1];
1155 var56.x4[2] = var52.x4[2] + var55.x4[2];
1156 var56.x4[3] = var52.x4[3] + var55.x4[3];
1157 /* 16: convwb */
1158 var57.x4[0] = var56.x4[0];
1159 var57.x4[1] = var56.x4[1];
1160 var57.x4[2] = var56.x4[2];
1161 var57.x4[3] = var56.x4[3];
1162 /* 18: orl */
1163 var58.i = var57.i | var41.i;
1164 /* 19: storel */
1165 ptr0[i] = var58;
1166 }
1167 }
1168
1169 }
1170
1171 void
video_mixer_orc_blend_bgra(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)1172 video_mixer_orc_blend_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
1173 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
1174 {
1175 OrcExecutor _ex, *ex = &_ex;
1176 static volatile int p_inited = 0;
1177 static OrcCode *c = 0;
1178 void (*func) (OrcExecutor *);
1179
1180 if (!p_inited) {
1181 orc_once_mutex_lock ();
1182 if (!p_inited) {
1183 OrcProgram *p;
1184
1185 #if 1
1186 static const orc_uint8 bc[] = {
1187 1, 7, 9, 26, 118, 105, 100, 101, 111, 95, 109, 105, 120, 101, 114, 95,
1188 111, 114, 99, 95, 98, 108, 101, 110, 100, 95, 98, 103, 114, 97, 11, 4,
1189 4, 12, 4, 4, 14, 4, 0, 0, 0, 255, 14, 4, 24, 0, 0, 0,
1190 14, 2, 8, 0, 0, 0, 16, 2, 20, 4, 20, 4, 20, 2, 20, 1,
1191 20, 4, 20, 8, 20, 8, 20, 8, 113, 32, 4, 126, 33, 32, 17, 163,
1192 34, 33, 157, 35, 34, 152, 36, 35, 21, 2, 150, 39, 36, 21, 2, 89,
1193 39, 39, 24, 21, 2, 95, 39, 39, 18, 21, 2, 150, 38, 32, 113, 32,
1194 0, 21, 2, 150, 37, 32, 21, 2, 98, 38, 38, 37, 21, 2, 89, 38,
1195 38, 39, 21, 2, 80, 38, 38, 21, 2, 70, 37, 37, 38, 21, 2, 157,
1196 32, 37, 123, 32, 32, 16, 128, 0, 32, 2, 0,
1197 };
1198 p = orc_program_new_from_static_bytecode (bc);
1199 orc_program_set_backup_function (p, _backup_video_mixer_orc_blend_bgra);
1200 #else
1201 p = orc_program_new ();
1202 orc_program_set_2d (p);
1203 orc_program_set_name (p, "video_mixer_orc_blend_bgra");
1204 orc_program_set_backup_function (p, _backup_video_mixer_orc_blend_bgra);
1205 orc_program_add_destination (p, 4, "d1");
1206 orc_program_add_source (p, 4, "s1");
1207 orc_program_add_constant (p, 4, 0xff000000, "c1");
1208 orc_program_add_constant (p, 4, 0x00000018, "c2");
1209 orc_program_add_constant (p, 2, 0x00000008, "c3");
1210 orc_program_add_parameter (p, 2, "p1");
1211 orc_program_add_temporary (p, 4, "t1");
1212 orc_program_add_temporary (p, 4, "t2");
1213 orc_program_add_temporary (p, 2, "t3");
1214 orc_program_add_temporary (p, 1, "t4");
1215 orc_program_add_temporary (p, 4, "t5");
1216 orc_program_add_temporary (p, 8, "t6");
1217 orc_program_add_temporary (p, 8, "t7");
1218 orc_program_add_temporary (p, 8, "t8");
1219
1220 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1,
1221 ORC_VAR_D1);
1222 orc_program_append_2 (p, "shrul", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_C2,
1223 ORC_VAR_D1);
1224 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
1225 ORC_VAR_D1);
1226 orc_program_append_2 (p, "convwb", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1,
1227 ORC_VAR_D1);
1228 orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T5, ORC_VAR_T4, ORC_VAR_D1,
1229 ORC_VAR_D1);
1230 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T8, ORC_VAR_T5, ORC_VAR_D1,
1231 ORC_VAR_D1);
1232 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_P1,
1233 ORC_VAR_D1);
1234 orc_program_append_2 (p, "shruw", 2, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_C3,
1235 ORC_VAR_D1);
1236 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T7, ORC_VAR_T1, ORC_VAR_D1,
1237 ORC_VAR_D1);
1238 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
1239 ORC_VAR_D1);
1240 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T6, ORC_VAR_T1, ORC_VAR_D1,
1241 ORC_VAR_D1);
1242 orc_program_append_2 (p, "subw", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T6,
1243 ORC_VAR_D1);
1244 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T8,
1245 ORC_VAR_D1);
1246 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_D1,
1247 ORC_VAR_D1);
1248 orc_program_append_2 (p, "addw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T7,
1249 ORC_VAR_D1);
1250 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T1, ORC_VAR_T6, ORC_VAR_D1,
1251 ORC_VAR_D1);
1252 orc_program_append_2 (p, "orl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1,
1253 ORC_VAR_D1);
1254 orc_program_append_2 (p, "storel", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1,
1255 ORC_VAR_D1);
1256 #endif
1257
1258 orc_program_compile (p);
1259 c = orc_program_take_code (p);
1260 orc_program_free (p);
1261 }
1262 p_inited = TRUE;
1263 orc_once_mutex_unlock ();
1264 }
1265 ex->arrays[ORC_VAR_A2] = c;
1266 ex->program = 0;
1267
1268 ex->n = n;
1269 ORC_EXECUTOR_M (ex) = m;
1270 ex->arrays[ORC_VAR_D1] = d1;
1271 ex->params[ORC_VAR_D1] = d1_stride;
1272 ex->arrays[ORC_VAR_S1] = (void *) s1;
1273 ex->params[ORC_VAR_S1] = s1_stride;
1274 ex->params[ORC_VAR_P1] = p1;
1275
1276 func = c->exec;
1277 func (ex);
1278 }
1279 #endif
1280
1281
1282 /* video_mixer_orc_overlay_argb */
1283 #ifdef DISABLE_ORC
1284 void
video_mixer_orc_overlay_argb(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)1285 video_mixer_orc_overlay_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
1286 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
1287 {
1288 int i;
1289 int j;
1290 orc_union32 *ORC_RESTRICT ptr0;
1291 const orc_union32 *ORC_RESTRICT ptr4;
1292 orc_union64 var41;
1293 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1294 volatile orc_union32 var42;
1295 #else
1296 orc_union32 var42;
1297 #endif
1298 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1299 volatile orc_union32 var43;
1300 #else
1301 orc_union32 var43;
1302 #endif
1303 orc_union32 var44;
1304 orc_union16 var45;
1305 orc_int8 var46;
1306 orc_union32 var47;
1307 orc_union64 var48;
1308 orc_union64 var49;
1309 orc_union64 var50;
1310 orc_union64 var51;
1311 orc_union64 var52;
1312 orc_union32 var53;
1313 orc_union64 var54;
1314 orc_union64 var55;
1315 orc_union32 var56;
1316 orc_union16 var57;
1317 orc_int8 var58;
1318 orc_union32 var59;
1319 orc_union64 var60;
1320 orc_union64 var61;
1321 orc_union64 var62;
1322 orc_union64 var63;
1323 orc_union64 var64;
1324 orc_union64 var65;
1325 orc_union64 var66;
1326 orc_union64 var67;
1327 orc_union32 var68;
1328 orc_union32 var69;
1329 orc_union32 var70;
1330 orc_union32 var71;
1331 orc_union32 var72;
1332
1333 for (j = 0; j < m; j++) {
1334 ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
1335 ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
1336
1337 /* 5: loadpw */
1338 var41.x4[0] = p1;
1339 var41.x4[1] = p1;
1340 var41.x4[2] = p1;
1341 var41.x4[3] = p1;
1342 /* 10: loadpl */
1343 var53.i = 0xffffffff; /* -1 or 2.122e-314f */
1344 /* 26: loadpl */
1345 var42.i = 0xffffff00; /* -256 or 2.122e-314f */
1346 /* 29: loadpl */
1347 var43.i = 0x000000ff; /* 255 or 1.25987e-321f */
1348
1349 for (i = 0; i < n; i++) {
1350 /* 0: loadl */
1351 var44 = ptr4[i];
1352 /* 1: convlw */
1353 var45.i = var44.i;
1354 /* 2: convwb */
1355 var46 = var45.i;
1356 /* 3: splatbl */
1357 var47.i =
1358 ((((orc_uint32) var46) & 0xff) << 24) | ((((orc_uint32) var46) & 0xff)
1359 << 16) | ((((orc_uint32) var46) & 0xff) << 8) | (((orc_uint32) var46)
1360 & 0xff);
1361 /* 4: convubw */
1362 var48.x4[0] = (orc_uint8) var47.x4[0];
1363 var48.x4[1] = (orc_uint8) var47.x4[1];
1364 var48.x4[2] = (orc_uint8) var47.x4[2];
1365 var48.x4[3] = (orc_uint8) var47.x4[3];
1366 /* 6: mullw */
1367 var49.x4[0] = (var48.x4[0] * var41.x4[0]) & 0xffff;
1368 var49.x4[1] = (var48.x4[1] * var41.x4[1]) & 0xffff;
1369 var49.x4[2] = (var48.x4[2] * var41.x4[2]) & 0xffff;
1370 var49.x4[3] = (var48.x4[3] * var41.x4[3]) & 0xffff;
1371 /* 7: shruw */
1372 var50.x4[0] = ((orc_uint16) var49.x4[0]) >> 8;
1373 var50.x4[1] = ((orc_uint16) var49.x4[1]) >> 8;
1374 var50.x4[2] = ((orc_uint16) var49.x4[2]) >> 8;
1375 var50.x4[3] = ((orc_uint16) var49.x4[3]) >> 8;
1376 /* 8: convubw */
1377 var51.x4[0] = (orc_uint8) var44.x4[0];
1378 var51.x4[1] = (orc_uint8) var44.x4[1];
1379 var51.x4[2] = (orc_uint8) var44.x4[2];
1380 var51.x4[3] = (orc_uint8) var44.x4[3];
1381 /* 9: mullw */
1382 var52.x4[0] = (var51.x4[0] * var50.x4[0]) & 0xffff;
1383 var52.x4[1] = (var51.x4[1] * var50.x4[1]) & 0xffff;
1384 var52.x4[2] = (var51.x4[2] * var50.x4[2]) & 0xffff;
1385 var52.x4[3] = (var51.x4[3] * var50.x4[3]) & 0xffff;
1386 /* 11: convubw */
1387 var54.x4[0] = (orc_uint8) var53.x4[0];
1388 var54.x4[1] = (orc_uint8) var53.x4[1];
1389 var54.x4[2] = (orc_uint8) var53.x4[2];
1390 var54.x4[3] = (orc_uint8) var53.x4[3];
1391 /* 12: subw */
1392 var55.x4[0] = var54.x4[0] - var50.x4[0];
1393 var55.x4[1] = var54.x4[1] - var50.x4[1];
1394 var55.x4[2] = var54.x4[2] - var50.x4[2];
1395 var55.x4[3] = var54.x4[3] - var50.x4[3];
1396 /* 13: loadl */
1397 var56 = ptr0[i];
1398 /* 14: convlw */
1399 var57.i = var56.i;
1400 /* 15: convwb */
1401 var58 = var57.i;
1402 /* 16: splatbl */
1403 var59.i =
1404 ((((orc_uint32) var58) & 0xff) << 24) | ((((orc_uint32) var58) & 0xff)
1405 << 16) | ((((orc_uint32) var58) & 0xff) << 8) | (((orc_uint32) var58)
1406 & 0xff);
1407 /* 17: convubw */
1408 var60.x4[0] = (orc_uint8) var59.x4[0];
1409 var60.x4[1] = (orc_uint8) var59.x4[1];
1410 var60.x4[2] = (orc_uint8) var59.x4[2];
1411 var60.x4[3] = (orc_uint8) var59.x4[3];
1412 /* 18: mullw */
1413 var61.x4[0] = (var60.x4[0] * var55.x4[0]) & 0xffff;
1414 var61.x4[1] = (var60.x4[1] * var55.x4[1]) & 0xffff;
1415 var61.x4[2] = (var60.x4[2] * var55.x4[2]) & 0xffff;
1416 var61.x4[3] = (var60.x4[3] * var55.x4[3]) & 0xffff;
1417 /* 19: div255w */
1418 var62.x4[0] =
1419 ((orc_uint16) (((orc_uint16) (var61.x4[0] + 128)) +
1420 (((orc_uint16) (var61.x4[0] + 128)) >> 8))) >> 8;
1421 var62.x4[1] =
1422 ((orc_uint16) (((orc_uint16) (var61.x4[1] + 128)) +
1423 (((orc_uint16) (var61.x4[1] + 128)) >> 8))) >> 8;
1424 var62.x4[2] =
1425 ((orc_uint16) (((orc_uint16) (var61.x4[2] + 128)) +
1426 (((orc_uint16) (var61.x4[2] + 128)) >> 8))) >> 8;
1427 var62.x4[3] =
1428 ((orc_uint16) (((orc_uint16) (var61.x4[3] + 128)) +
1429 (((orc_uint16) (var61.x4[3] + 128)) >> 8))) >> 8;
1430 /* 20: convubw */
1431 var63.x4[0] = (orc_uint8) var56.x4[0];
1432 var63.x4[1] = (orc_uint8) var56.x4[1];
1433 var63.x4[2] = (orc_uint8) var56.x4[2];
1434 var63.x4[3] = (orc_uint8) var56.x4[3];
1435 /* 21: mullw */
1436 var64.x4[0] = (var63.x4[0] * var62.x4[0]) & 0xffff;
1437 var64.x4[1] = (var63.x4[1] * var62.x4[1]) & 0xffff;
1438 var64.x4[2] = (var63.x4[2] * var62.x4[2]) & 0xffff;
1439 var64.x4[3] = (var63.x4[3] * var62.x4[3]) & 0xffff;
1440 /* 22: addw */
1441 var65.x4[0] = var64.x4[0] + var52.x4[0];
1442 var65.x4[1] = var64.x4[1] + var52.x4[1];
1443 var65.x4[2] = var64.x4[2] + var52.x4[2];
1444 var65.x4[3] = var64.x4[3] + var52.x4[3];
1445 /* 23: addw */
1446 var66.x4[0] = var62.x4[0] + var50.x4[0];
1447 var66.x4[1] = var62.x4[1] + var50.x4[1];
1448 var66.x4[2] = var62.x4[2] + var50.x4[2];
1449 var66.x4[3] = var62.x4[3] + var50.x4[3];
1450 /* 24: divluw */
1451 var67.x4[0] =
1452 ((var66.x4[0] & 0xff) ==
1453 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[0]) /
1454 ((orc_uint16) var66.x4[0] & 0xff));
1455 var67.x4[1] =
1456 ((var66.x4[1] & 0xff) ==
1457 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[1]) /
1458 ((orc_uint16) var66.x4[1] & 0xff));
1459 var67.x4[2] =
1460 ((var66.x4[2] & 0xff) ==
1461 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[2]) /
1462 ((orc_uint16) var66.x4[2] & 0xff));
1463 var67.x4[3] =
1464 ((var66.x4[3] & 0xff) ==
1465 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[3]) /
1466 ((orc_uint16) var66.x4[3] & 0xff));
1467 /* 25: convwb */
1468 var68.x4[0] = var67.x4[0];
1469 var68.x4[1] = var67.x4[1];
1470 var68.x4[2] = var67.x4[2];
1471 var68.x4[3] = var67.x4[3];
1472 /* 27: andl */
1473 var69.i = var68.i & var42.i;
1474 /* 28: convwb */
1475 var70.x4[0] = var66.x4[0];
1476 var70.x4[1] = var66.x4[1];
1477 var70.x4[2] = var66.x4[2];
1478 var70.x4[3] = var66.x4[3];
1479 /* 30: andl */
1480 var71.i = var70.i & var43.i;
1481 /* 31: orl */
1482 var72.i = var69.i | var71.i;
1483 /* 32: storel */
1484 ptr0[i] = var72;
1485 }
1486 }
1487
1488 }
1489
1490 #else
1491 static void
_backup_video_mixer_orc_overlay_argb(OrcExecutor * ORC_RESTRICT ex)1492 _backup_video_mixer_orc_overlay_argb (OrcExecutor * ORC_RESTRICT ex)
1493 {
1494 int i;
1495 int j;
1496 int n = ex->n;
1497 int m = ex->params[ORC_VAR_A1];
1498 orc_union32 *ORC_RESTRICT ptr0;
1499 const orc_union32 *ORC_RESTRICT ptr4;
1500 orc_union64 var41;
1501 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1502 volatile orc_union32 var42;
1503 #else
1504 orc_union32 var42;
1505 #endif
1506 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1507 volatile orc_union32 var43;
1508 #else
1509 orc_union32 var43;
1510 #endif
1511 orc_union32 var44;
1512 orc_union16 var45;
1513 orc_int8 var46;
1514 orc_union32 var47;
1515 orc_union64 var48;
1516 orc_union64 var49;
1517 orc_union64 var50;
1518 orc_union64 var51;
1519 orc_union64 var52;
1520 orc_union32 var53;
1521 orc_union64 var54;
1522 orc_union64 var55;
1523 orc_union32 var56;
1524 orc_union16 var57;
1525 orc_int8 var58;
1526 orc_union32 var59;
1527 orc_union64 var60;
1528 orc_union64 var61;
1529 orc_union64 var62;
1530 orc_union64 var63;
1531 orc_union64 var64;
1532 orc_union64 var65;
1533 orc_union64 var66;
1534 orc_union64 var67;
1535 orc_union32 var68;
1536 orc_union32 var69;
1537 orc_union32 var70;
1538 orc_union32 var71;
1539 orc_union32 var72;
1540
1541 for (j = 0; j < m; j++) {
1542 ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
1543 ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
1544
1545 /* 5: loadpw */
1546 var41.x4[0] = ex->params[24];
1547 var41.x4[1] = ex->params[24];
1548 var41.x4[2] = ex->params[24];
1549 var41.x4[3] = ex->params[24];
1550 /* 10: loadpl */
1551 var53.i = 0xffffffff; /* -1 or 2.122e-314f */
1552 /* 26: loadpl */
1553 var42.i = 0xffffff00; /* -256 or 2.122e-314f */
1554 /* 29: loadpl */
1555 var43.i = 0x000000ff; /* 255 or 1.25987e-321f */
1556
1557 for (i = 0; i < n; i++) {
1558 /* 0: loadl */
1559 var44 = ptr4[i];
1560 /* 1: convlw */
1561 var45.i = var44.i;
1562 /* 2: convwb */
1563 var46 = var45.i;
1564 /* 3: splatbl */
1565 var47.i =
1566 ((((orc_uint32) var46) & 0xff) << 24) | ((((orc_uint32) var46) & 0xff)
1567 << 16) | ((((orc_uint32) var46) & 0xff) << 8) | (((orc_uint32) var46)
1568 & 0xff);
1569 /* 4: convubw */
1570 var48.x4[0] = (orc_uint8) var47.x4[0];
1571 var48.x4[1] = (orc_uint8) var47.x4[1];
1572 var48.x4[2] = (orc_uint8) var47.x4[2];
1573 var48.x4[3] = (orc_uint8) var47.x4[3];
1574 /* 6: mullw */
1575 var49.x4[0] = (var48.x4[0] * var41.x4[0]) & 0xffff;
1576 var49.x4[1] = (var48.x4[1] * var41.x4[1]) & 0xffff;
1577 var49.x4[2] = (var48.x4[2] * var41.x4[2]) & 0xffff;
1578 var49.x4[3] = (var48.x4[3] * var41.x4[3]) & 0xffff;
1579 /* 7: shruw */
1580 var50.x4[0] = ((orc_uint16) var49.x4[0]) >> 8;
1581 var50.x4[1] = ((orc_uint16) var49.x4[1]) >> 8;
1582 var50.x4[2] = ((orc_uint16) var49.x4[2]) >> 8;
1583 var50.x4[3] = ((orc_uint16) var49.x4[3]) >> 8;
1584 /* 8: convubw */
1585 var51.x4[0] = (orc_uint8) var44.x4[0];
1586 var51.x4[1] = (orc_uint8) var44.x4[1];
1587 var51.x4[2] = (orc_uint8) var44.x4[2];
1588 var51.x4[3] = (orc_uint8) var44.x4[3];
1589 /* 9: mullw */
1590 var52.x4[0] = (var51.x4[0] * var50.x4[0]) & 0xffff;
1591 var52.x4[1] = (var51.x4[1] * var50.x4[1]) & 0xffff;
1592 var52.x4[2] = (var51.x4[2] * var50.x4[2]) & 0xffff;
1593 var52.x4[3] = (var51.x4[3] * var50.x4[3]) & 0xffff;
1594 /* 11: convubw */
1595 var54.x4[0] = (orc_uint8) var53.x4[0];
1596 var54.x4[1] = (orc_uint8) var53.x4[1];
1597 var54.x4[2] = (orc_uint8) var53.x4[2];
1598 var54.x4[3] = (orc_uint8) var53.x4[3];
1599 /* 12: subw */
1600 var55.x4[0] = var54.x4[0] - var50.x4[0];
1601 var55.x4[1] = var54.x4[1] - var50.x4[1];
1602 var55.x4[2] = var54.x4[2] - var50.x4[2];
1603 var55.x4[3] = var54.x4[3] - var50.x4[3];
1604 /* 13: loadl */
1605 var56 = ptr0[i];
1606 /* 14: convlw */
1607 var57.i = var56.i;
1608 /* 15: convwb */
1609 var58 = var57.i;
1610 /* 16: splatbl */
1611 var59.i =
1612 ((((orc_uint32) var58) & 0xff) << 24) | ((((orc_uint32) var58) & 0xff)
1613 << 16) | ((((orc_uint32) var58) & 0xff) << 8) | (((orc_uint32) var58)
1614 & 0xff);
1615 /* 17: convubw */
1616 var60.x4[0] = (orc_uint8) var59.x4[0];
1617 var60.x4[1] = (orc_uint8) var59.x4[1];
1618 var60.x4[2] = (orc_uint8) var59.x4[2];
1619 var60.x4[3] = (orc_uint8) var59.x4[3];
1620 /* 18: mullw */
1621 var61.x4[0] = (var60.x4[0] * var55.x4[0]) & 0xffff;
1622 var61.x4[1] = (var60.x4[1] * var55.x4[1]) & 0xffff;
1623 var61.x4[2] = (var60.x4[2] * var55.x4[2]) & 0xffff;
1624 var61.x4[3] = (var60.x4[3] * var55.x4[3]) & 0xffff;
1625 /* 19: div255w */
1626 var62.x4[0] =
1627 ((orc_uint16) (((orc_uint16) (var61.x4[0] + 128)) +
1628 (((orc_uint16) (var61.x4[0] + 128)) >> 8))) >> 8;
1629 var62.x4[1] =
1630 ((orc_uint16) (((orc_uint16) (var61.x4[1] + 128)) +
1631 (((orc_uint16) (var61.x4[1] + 128)) >> 8))) >> 8;
1632 var62.x4[2] =
1633 ((orc_uint16) (((orc_uint16) (var61.x4[2] + 128)) +
1634 (((orc_uint16) (var61.x4[2] + 128)) >> 8))) >> 8;
1635 var62.x4[3] =
1636 ((orc_uint16) (((orc_uint16) (var61.x4[3] + 128)) +
1637 (((orc_uint16) (var61.x4[3] + 128)) >> 8))) >> 8;
1638 /* 20: convubw */
1639 var63.x4[0] = (orc_uint8) var56.x4[0];
1640 var63.x4[1] = (orc_uint8) var56.x4[1];
1641 var63.x4[2] = (orc_uint8) var56.x4[2];
1642 var63.x4[3] = (orc_uint8) var56.x4[3];
1643 /* 21: mullw */
1644 var64.x4[0] = (var63.x4[0] * var62.x4[0]) & 0xffff;
1645 var64.x4[1] = (var63.x4[1] * var62.x4[1]) & 0xffff;
1646 var64.x4[2] = (var63.x4[2] * var62.x4[2]) & 0xffff;
1647 var64.x4[3] = (var63.x4[3] * var62.x4[3]) & 0xffff;
1648 /* 22: addw */
1649 var65.x4[0] = var64.x4[0] + var52.x4[0];
1650 var65.x4[1] = var64.x4[1] + var52.x4[1];
1651 var65.x4[2] = var64.x4[2] + var52.x4[2];
1652 var65.x4[3] = var64.x4[3] + var52.x4[3];
1653 /* 23: addw */
1654 var66.x4[0] = var62.x4[0] + var50.x4[0];
1655 var66.x4[1] = var62.x4[1] + var50.x4[1];
1656 var66.x4[2] = var62.x4[2] + var50.x4[2];
1657 var66.x4[3] = var62.x4[3] + var50.x4[3];
1658 /* 24: divluw */
1659 var67.x4[0] =
1660 ((var66.x4[0] & 0xff) ==
1661 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[0]) /
1662 ((orc_uint16) var66.x4[0] & 0xff));
1663 var67.x4[1] =
1664 ((var66.x4[1] & 0xff) ==
1665 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[1]) /
1666 ((orc_uint16) var66.x4[1] & 0xff));
1667 var67.x4[2] =
1668 ((var66.x4[2] & 0xff) ==
1669 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[2]) /
1670 ((orc_uint16) var66.x4[2] & 0xff));
1671 var67.x4[3] =
1672 ((var66.x4[3] & 0xff) ==
1673 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[3]) /
1674 ((orc_uint16) var66.x4[3] & 0xff));
1675 /* 25: convwb */
1676 var68.x4[0] = var67.x4[0];
1677 var68.x4[1] = var67.x4[1];
1678 var68.x4[2] = var67.x4[2];
1679 var68.x4[3] = var67.x4[3];
1680 /* 27: andl */
1681 var69.i = var68.i & var42.i;
1682 /* 28: convwb */
1683 var70.x4[0] = var66.x4[0];
1684 var70.x4[1] = var66.x4[1];
1685 var70.x4[2] = var66.x4[2];
1686 var70.x4[3] = var66.x4[3];
1687 /* 30: andl */
1688 var71.i = var70.i & var43.i;
1689 /* 31: orl */
1690 var72.i = var69.i | var71.i;
1691 /* 32: storel */
1692 ptr0[i] = var72;
1693 }
1694 }
1695
1696 }
1697
1698 void
video_mixer_orc_overlay_argb(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)1699 video_mixer_orc_overlay_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
1700 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
1701 {
1702 OrcExecutor _ex, *ex = &_ex;
1703 static volatile int p_inited = 0;
1704 static OrcCode *c = 0;
1705 void (*func) (OrcExecutor *);
1706
1707 if (!p_inited) {
1708 orc_once_mutex_lock ();
1709 if (!p_inited) {
1710 OrcProgram *p;
1711
1712 #if 1
1713 static const orc_uint8 bc[] = {
1714 1, 7, 9, 28, 118, 105, 100, 101, 111, 95, 109, 105, 120, 101, 114, 95,
1715 111, 114, 99, 95, 111, 118, 101, 114, 108, 97, 121, 95, 97, 114, 103,
1716 98,
1717 11, 4, 4, 12, 4, 4, 14, 4, 255, 255, 255, 255, 14, 4, 255, 0,
1718 0, 0, 14, 4, 0, 255, 255, 255, 14, 2, 8, 0, 0, 0, 16, 2,
1719 20, 4, 20, 2, 20, 1, 20, 8, 20, 8, 20, 8, 20, 4, 20, 8,
1720 20, 8, 113, 32, 4, 163, 33, 32, 157, 34, 33, 152, 38, 34, 21, 2,
1721 150, 35, 38, 21, 2, 89, 35, 35, 24, 21, 2, 95, 35, 35, 19, 21,
1722 2, 150, 40, 32, 21, 2, 89, 40, 40, 35, 115, 38, 16, 21, 2, 150,
1723 36, 38, 21, 2, 98, 36, 36, 35, 113, 32, 0, 163, 33, 32, 157, 34,
1724 33, 152, 38, 34, 21, 2, 150, 37, 38, 21, 2, 89, 37, 37, 36, 21,
1725 2, 80, 37, 37, 21, 2, 150, 39, 32, 21, 2, 89, 39, 39, 37, 21,
1726 2, 70, 39, 39, 40, 21, 2, 70, 37, 37, 35, 21, 2, 81, 39, 39,
1727 37, 21, 2, 157, 32, 39, 106, 32, 32, 18, 21, 2, 157, 38, 37, 106,
1728 38, 38, 17, 123, 32, 32, 38, 128, 0, 32, 2, 0,
1729 };
1730 p = orc_program_new_from_static_bytecode (bc);
1731 orc_program_set_backup_function (p, _backup_video_mixer_orc_overlay_argb);
1732 #else
1733 p = orc_program_new ();
1734 orc_program_set_2d (p);
1735 orc_program_set_name (p, "video_mixer_orc_overlay_argb");
1736 orc_program_set_backup_function (p, _backup_video_mixer_orc_overlay_argb);
1737 orc_program_add_destination (p, 4, "d1");
1738 orc_program_add_source (p, 4, "s1");
1739 orc_program_add_constant (p, 4, 0xffffffff, "c1");
1740 orc_program_add_constant (p, 4, 0x000000ff, "c2");
1741 orc_program_add_constant (p, 4, 0xffffff00, "c3");
1742 orc_program_add_constant (p, 2, 0x00000008, "c4");
1743 orc_program_add_parameter (p, 2, "p1");
1744 orc_program_add_temporary (p, 4, "t1");
1745 orc_program_add_temporary (p, 2, "t2");
1746 orc_program_add_temporary (p, 1, "t3");
1747 orc_program_add_temporary (p, 8, "t4");
1748 orc_program_add_temporary (p, 8, "t5");
1749 orc_program_add_temporary (p, 8, "t6");
1750 orc_program_add_temporary (p, 4, "t7");
1751 orc_program_add_temporary (p, 8, "t8");
1752 orc_program_add_temporary (p, 8, "t9");
1753
1754 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1,
1755 ORC_VAR_D1);
1756 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1,
1757 ORC_VAR_D1);
1758 orc_program_append_2 (p, "convwb", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
1759 ORC_VAR_D1);
1760 orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T7, ORC_VAR_T3, ORC_VAR_D1,
1761 ORC_VAR_D1);
1762 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T4, ORC_VAR_T7, ORC_VAR_D1,
1763 ORC_VAR_D1);
1764 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_P1,
1765 ORC_VAR_D1);
1766 orc_program_append_2 (p, "shruw", 2, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_C4,
1767 ORC_VAR_D1);
1768 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T9, ORC_VAR_T1, ORC_VAR_D1,
1769 ORC_VAR_D1);
1770 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T9, ORC_VAR_T9, ORC_VAR_T4,
1771 ORC_VAR_D1);
1772 orc_program_append_2 (p, "loadpl", 0, ORC_VAR_T7, ORC_VAR_C1, ORC_VAR_D1,
1773 ORC_VAR_D1);
1774 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T5, ORC_VAR_T7, ORC_VAR_D1,
1775 ORC_VAR_D1);
1776 orc_program_append_2 (p, "subw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_T4,
1777 ORC_VAR_D1);
1778 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
1779 ORC_VAR_D1);
1780 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1,
1781 ORC_VAR_D1);
1782 orc_program_append_2 (p, "convwb", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
1783 ORC_VAR_D1);
1784 orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T7, ORC_VAR_T3, ORC_VAR_D1,
1785 ORC_VAR_D1);
1786 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T6, ORC_VAR_T7, ORC_VAR_D1,
1787 ORC_VAR_D1);
1788 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T5,
1789 ORC_VAR_D1);
1790 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_D1,
1791 ORC_VAR_D1);
1792 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T8, ORC_VAR_T1, ORC_VAR_D1,
1793 ORC_VAR_D1);
1794 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_T6,
1795 ORC_VAR_D1);
1796 orc_program_append_2 (p, "addw", 2, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_T9,
1797 ORC_VAR_D1);
1798 orc_program_append_2 (p, "addw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T4,
1799 ORC_VAR_D1);
1800 orc_program_append_2 (p, "divluw", 2, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_T6,
1801 ORC_VAR_D1);
1802 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T1, ORC_VAR_T8, ORC_VAR_D1,
1803 ORC_VAR_D1);
1804 orc_program_append_2 (p, "andl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C3,
1805 ORC_VAR_D1);
1806 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T7, ORC_VAR_T6, ORC_VAR_D1,
1807 ORC_VAR_D1);
1808 orc_program_append_2 (p, "andl", 0, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C2,
1809 ORC_VAR_D1);
1810 orc_program_append_2 (p, "orl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T7,
1811 ORC_VAR_D1);
1812 orc_program_append_2 (p, "storel", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1,
1813 ORC_VAR_D1);
1814 #endif
1815
1816 orc_program_compile (p);
1817 c = orc_program_take_code (p);
1818 orc_program_free (p);
1819 }
1820 p_inited = TRUE;
1821 orc_once_mutex_unlock ();
1822 }
1823 ex->arrays[ORC_VAR_A2] = c;
1824 ex->program = 0;
1825
1826 ex->n = n;
1827 ORC_EXECUTOR_M (ex) = m;
1828 ex->arrays[ORC_VAR_D1] = d1;
1829 ex->params[ORC_VAR_D1] = d1_stride;
1830 ex->arrays[ORC_VAR_S1] = (void *) s1;
1831 ex->params[ORC_VAR_S1] = s1_stride;
1832 ex->params[ORC_VAR_P1] = p1;
1833
1834 func = c->exec;
1835 func (ex);
1836 }
1837 #endif
1838
1839
1840 /* video_mixer_orc_overlay_bgra */
1841 #ifdef DISABLE_ORC
1842 void
video_mixer_orc_overlay_bgra(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)1843 video_mixer_orc_overlay_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
1844 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
1845 {
1846 int i;
1847 int j;
1848 orc_union32 *ORC_RESTRICT ptr0;
1849 const orc_union32 *ORC_RESTRICT ptr4;
1850 orc_union64 var42;
1851 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1852 volatile orc_union32 var43;
1853 #else
1854 orc_union32 var43;
1855 #endif
1856 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1857 volatile orc_union32 var44;
1858 #else
1859 orc_union32 var44;
1860 #endif
1861 orc_union32 var45;
1862 orc_union32 var46;
1863 orc_union16 var47;
1864 orc_int8 var48;
1865 orc_union32 var49;
1866 orc_union64 var50;
1867 orc_union64 var51;
1868 orc_union64 var52;
1869 orc_union64 var53;
1870 orc_union64 var54;
1871 orc_union32 var55;
1872 orc_union64 var56;
1873 orc_union64 var57;
1874 orc_union32 var58;
1875 orc_union32 var59;
1876 orc_union16 var60;
1877 orc_int8 var61;
1878 orc_union32 var62;
1879 orc_union64 var63;
1880 orc_union64 var64;
1881 orc_union64 var65;
1882 orc_union64 var66;
1883 orc_union64 var67;
1884 orc_union64 var68;
1885 orc_union64 var69;
1886 orc_union64 var70;
1887 orc_union32 var71;
1888 orc_union32 var72;
1889 orc_union32 var73;
1890 orc_union32 var74;
1891 orc_union32 var75;
1892
1893 for (j = 0; j < m; j++) {
1894 ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
1895 ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
1896
1897 /* 6: loadpw */
1898 var42.x4[0] = p1;
1899 var42.x4[1] = p1;
1900 var42.x4[2] = p1;
1901 var42.x4[3] = p1;
1902 /* 11: loadpl */
1903 var55.i = 0xffffffff; /* -1 or 2.122e-314f */
1904 /* 28: loadpl */
1905 var43.i = 0x00ffffff; /* 16777215 or 8.28905e-317f */
1906 /* 31: loadpl */
1907 var44.i = 0xff000000; /* -16777216 or 2.11371e-314f */
1908
1909 for (i = 0; i < n; i++) {
1910 /* 0: loadl */
1911 var45 = ptr4[i];
1912 /* 1: shrul */
1913 var46.i = ((orc_uint32) var45.i) >> 24;
1914 /* 2: convlw */
1915 var47.i = var46.i;
1916 /* 3: convwb */
1917 var48 = var47.i;
1918 /* 4: splatbl */
1919 var49.i =
1920 ((((orc_uint32) var48) & 0xff) << 24) | ((((orc_uint32) var48) & 0xff)
1921 << 16) | ((((orc_uint32) var48) & 0xff) << 8) | (((orc_uint32) var48)
1922 & 0xff);
1923 /* 5: convubw */
1924 var50.x4[0] = (orc_uint8) var49.x4[0];
1925 var50.x4[1] = (orc_uint8) var49.x4[1];
1926 var50.x4[2] = (orc_uint8) var49.x4[2];
1927 var50.x4[3] = (orc_uint8) var49.x4[3];
1928 /* 7: mullw */
1929 var51.x4[0] = (var50.x4[0] * var42.x4[0]) & 0xffff;
1930 var51.x4[1] = (var50.x4[1] * var42.x4[1]) & 0xffff;
1931 var51.x4[2] = (var50.x4[2] * var42.x4[2]) & 0xffff;
1932 var51.x4[3] = (var50.x4[3] * var42.x4[3]) & 0xffff;
1933 /* 8: shruw */
1934 var52.x4[0] = ((orc_uint16) var51.x4[0]) >> 8;
1935 var52.x4[1] = ((orc_uint16) var51.x4[1]) >> 8;
1936 var52.x4[2] = ((orc_uint16) var51.x4[2]) >> 8;
1937 var52.x4[3] = ((orc_uint16) var51.x4[3]) >> 8;
1938 /* 9: convubw */
1939 var53.x4[0] = (orc_uint8) var45.x4[0];
1940 var53.x4[1] = (orc_uint8) var45.x4[1];
1941 var53.x4[2] = (orc_uint8) var45.x4[2];
1942 var53.x4[3] = (orc_uint8) var45.x4[3];
1943 /* 10: mullw */
1944 var54.x4[0] = (var53.x4[0] * var52.x4[0]) & 0xffff;
1945 var54.x4[1] = (var53.x4[1] * var52.x4[1]) & 0xffff;
1946 var54.x4[2] = (var53.x4[2] * var52.x4[2]) & 0xffff;
1947 var54.x4[3] = (var53.x4[3] * var52.x4[3]) & 0xffff;
1948 /* 12: convubw */
1949 var56.x4[0] = (orc_uint8) var55.x4[0];
1950 var56.x4[1] = (orc_uint8) var55.x4[1];
1951 var56.x4[2] = (orc_uint8) var55.x4[2];
1952 var56.x4[3] = (orc_uint8) var55.x4[3];
1953 /* 13: subw */
1954 var57.x4[0] = var56.x4[0] - var52.x4[0];
1955 var57.x4[1] = var56.x4[1] - var52.x4[1];
1956 var57.x4[2] = var56.x4[2] - var52.x4[2];
1957 var57.x4[3] = var56.x4[3] - var52.x4[3];
1958 /* 14: loadl */
1959 var58 = ptr0[i];
1960 /* 15: shrul */
1961 var59.i = ((orc_uint32) var58.i) >> 24;
1962 /* 16: convlw */
1963 var60.i = var59.i;
1964 /* 17: convwb */
1965 var61 = var60.i;
1966 /* 18: splatbl */
1967 var62.i =
1968 ((((orc_uint32) var61) & 0xff) << 24) | ((((orc_uint32) var61) & 0xff)
1969 << 16) | ((((orc_uint32) var61) & 0xff) << 8) | (((orc_uint32) var61)
1970 & 0xff);
1971 /* 19: convubw */
1972 var63.x4[0] = (orc_uint8) var62.x4[0];
1973 var63.x4[1] = (orc_uint8) var62.x4[1];
1974 var63.x4[2] = (orc_uint8) var62.x4[2];
1975 var63.x4[3] = (orc_uint8) var62.x4[3];
1976 /* 20: mullw */
1977 var64.x4[0] = (var63.x4[0] * var57.x4[0]) & 0xffff;
1978 var64.x4[1] = (var63.x4[1] * var57.x4[1]) & 0xffff;
1979 var64.x4[2] = (var63.x4[2] * var57.x4[2]) & 0xffff;
1980 var64.x4[3] = (var63.x4[3] * var57.x4[3]) & 0xffff;
1981 /* 21: div255w */
1982 var65.x4[0] =
1983 ((orc_uint16) (((orc_uint16) (var64.x4[0] + 128)) +
1984 (((orc_uint16) (var64.x4[0] + 128)) >> 8))) >> 8;
1985 var65.x4[1] =
1986 ((orc_uint16) (((orc_uint16) (var64.x4[1] + 128)) +
1987 (((orc_uint16) (var64.x4[1] + 128)) >> 8))) >> 8;
1988 var65.x4[2] =
1989 ((orc_uint16) (((orc_uint16) (var64.x4[2] + 128)) +
1990 (((orc_uint16) (var64.x4[2] + 128)) >> 8))) >> 8;
1991 var65.x4[3] =
1992 ((orc_uint16) (((orc_uint16) (var64.x4[3] + 128)) +
1993 (((orc_uint16) (var64.x4[3] + 128)) >> 8))) >> 8;
1994 /* 22: convubw */
1995 var66.x4[0] = (orc_uint8) var58.x4[0];
1996 var66.x4[1] = (orc_uint8) var58.x4[1];
1997 var66.x4[2] = (orc_uint8) var58.x4[2];
1998 var66.x4[3] = (orc_uint8) var58.x4[3];
1999 /* 23: mullw */
2000 var67.x4[0] = (var66.x4[0] * var65.x4[0]) & 0xffff;
2001 var67.x4[1] = (var66.x4[1] * var65.x4[1]) & 0xffff;
2002 var67.x4[2] = (var66.x4[2] * var65.x4[2]) & 0xffff;
2003 var67.x4[3] = (var66.x4[3] * var65.x4[3]) & 0xffff;
2004 /* 24: addw */
2005 var68.x4[0] = var67.x4[0] + var54.x4[0];
2006 var68.x4[1] = var67.x4[1] + var54.x4[1];
2007 var68.x4[2] = var67.x4[2] + var54.x4[2];
2008 var68.x4[3] = var67.x4[3] + var54.x4[3];
2009 /* 25: addw */
2010 var69.x4[0] = var65.x4[0] + var52.x4[0];
2011 var69.x4[1] = var65.x4[1] + var52.x4[1];
2012 var69.x4[2] = var65.x4[2] + var52.x4[2];
2013 var69.x4[3] = var65.x4[3] + var52.x4[3];
2014 /* 26: divluw */
2015 var70.x4[0] =
2016 ((var69.x4[0] & 0xff) ==
2017 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[0]) /
2018 ((orc_uint16) var69.x4[0] & 0xff));
2019 var70.x4[1] =
2020 ((var69.x4[1] & 0xff) ==
2021 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[1]) /
2022 ((orc_uint16) var69.x4[1] & 0xff));
2023 var70.x4[2] =
2024 ((var69.x4[2] & 0xff) ==
2025 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[2]) /
2026 ((orc_uint16) var69.x4[2] & 0xff));
2027 var70.x4[3] =
2028 ((var69.x4[3] & 0xff) ==
2029 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[3]) /
2030 ((orc_uint16) var69.x4[3] & 0xff));
2031 /* 27: convwb */
2032 var71.x4[0] = var70.x4[0];
2033 var71.x4[1] = var70.x4[1];
2034 var71.x4[2] = var70.x4[2];
2035 var71.x4[3] = var70.x4[3];
2036 /* 29: andl */
2037 var72.i = var71.i & var43.i;
2038 /* 30: convwb */
2039 var73.x4[0] = var69.x4[0];
2040 var73.x4[1] = var69.x4[1];
2041 var73.x4[2] = var69.x4[2];
2042 var73.x4[3] = var69.x4[3];
2043 /* 32: andl */
2044 var74.i = var73.i & var44.i;
2045 /* 33: orl */
2046 var75.i = var72.i | var74.i;
2047 /* 34: storel */
2048 ptr0[i] = var75;
2049 }
2050 }
2051
2052 }
2053
2054 #else
2055 static void
_backup_video_mixer_orc_overlay_bgra(OrcExecutor * ORC_RESTRICT ex)2056 _backup_video_mixer_orc_overlay_bgra (OrcExecutor * ORC_RESTRICT ex)
2057 {
2058 int i;
2059 int j;
2060 int n = ex->n;
2061 int m = ex->params[ORC_VAR_A1];
2062 orc_union32 *ORC_RESTRICT ptr0;
2063 const orc_union32 *ORC_RESTRICT ptr4;
2064 orc_union64 var42;
2065 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
2066 volatile orc_union32 var43;
2067 #else
2068 orc_union32 var43;
2069 #endif
2070 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
2071 volatile orc_union32 var44;
2072 #else
2073 orc_union32 var44;
2074 #endif
2075 orc_union32 var45;
2076 orc_union32 var46;
2077 orc_union16 var47;
2078 orc_int8 var48;
2079 orc_union32 var49;
2080 orc_union64 var50;
2081 orc_union64 var51;
2082 orc_union64 var52;
2083 orc_union64 var53;
2084 orc_union64 var54;
2085 orc_union32 var55;
2086 orc_union64 var56;
2087 orc_union64 var57;
2088 orc_union32 var58;
2089 orc_union32 var59;
2090 orc_union16 var60;
2091 orc_int8 var61;
2092 orc_union32 var62;
2093 orc_union64 var63;
2094 orc_union64 var64;
2095 orc_union64 var65;
2096 orc_union64 var66;
2097 orc_union64 var67;
2098 orc_union64 var68;
2099 orc_union64 var69;
2100 orc_union64 var70;
2101 orc_union32 var71;
2102 orc_union32 var72;
2103 orc_union32 var73;
2104 orc_union32 var74;
2105 orc_union32 var75;
2106
2107 for (j = 0; j < m; j++) {
2108 ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
2109 ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
2110
2111 /* 6: loadpw */
2112 var42.x4[0] = ex->params[24];
2113 var42.x4[1] = ex->params[24];
2114 var42.x4[2] = ex->params[24];
2115 var42.x4[3] = ex->params[24];
2116 /* 11: loadpl */
2117 var55.i = 0xffffffff; /* -1 or 2.122e-314f */
2118 /* 28: loadpl */
2119 var43.i = 0x00ffffff; /* 16777215 or 8.28905e-317f */
2120 /* 31: loadpl */
2121 var44.i = 0xff000000; /* -16777216 or 2.11371e-314f */
2122
2123 for (i = 0; i < n; i++) {
2124 /* 0: loadl */
2125 var45 = ptr4[i];
2126 /* 1: shrul */
2127 var46.i = ((orc_uint32) var45.i) >> 24;
2128 /* 2: convlw */
2129 var47.i = var46.i;
2130 /* 3: convwb */
2131 var48 = var47.i;
2132 /* 4: splatbl */
2133 var49.i =
2134 ((((orc_uint32) var48) & 0xff) << 24) | ((((orc_uint32) var48) & 0xff)
2135 << 16) | ((((orc_uint32) var48) & 0xff) << 8) | (((orc_uint32) var48)
2136 & 0xff);
2137 /* 5: convubw */
2138 var50.x4[0] = (orc_uint8) var49.x4[0];
2139 var50.x4[1] = (orc_uint8) var49.x4[1];
2140 var50.x4[2] = (orc_uint8) var49.x4[2];
2141 var50.x4[3] = (orc_uint8) var49.x4[3];
2142 /* 7: mullw */
2143 var51.x4[0] = (var50.x4[0] * var42.x4[0]) & 0xffff;
2144 var51.x4[1] = (var50.x4[1] * var42.x4[1]) & 0xffff;
2145 var51.x4[2] = (var50.x4[2] * var42.x4[2]) & 0xffff;
2146 var51.x4[3] = (var50.x4[3] * var42.x4[3]) & 0xffff;
2147 /* 8: shruw */
2148 var52.x4[0] = ((orc_uint16) var51.x4[0]) >> 8;
2149 var52.x4[1] = ((orc_uint16) var51.x4[1]) >> 8;
2150 var52.x4[2] = ((orc_uint16) var51.x4[2]) >> 8;
2151 var52.x4[3] = ((orc_uint16) var51.x4[3]) >> 8;
2152 /* 9: convubw */
2153 var53.x4[0] = (orc_uint8) var45.x4[0];
2154 var53.x4[1] = (orc_uint8) var45.x4[1];
2155 var53.x4[2] = (orc_uint8) var45.x4[2];
2156 var53.x4[3] = (orc_uint8) var45.x4[3];
2157 /* 10: mullw */
2158 var54.x4[0] = (var53.x4[0] * var52.x4[0]) & 0xffff;
2159 var54.x4[1] = (var53.x4[1] * var52.x4[1]) & 0xffff;
2160 var54.x4[2] = (var53.x4[2] * var52.x4[2]) & 0xffff;
2161 var54.x4[3] = (var53.x4[3] * var52.x4[3]) & 0xffff;
2162 /* 12: convubw */
2163 var56.x4[0] = (orc_uint8) var55.x4[0];
2164 var56.x4[1] = (orc_uint8) var55.x4[1];
2165 var56.x4[2] = (orc_uint8) var55.x4[2];
2166 var56.x4[3] = (orc_uint8) var55.x4[3];
2167 /* 13: subw */
2168 var57.x4[0] = var56.x4[0] - var52.x4[0];
2169 var57.x4[1] = var56.x4[1] - var52.x4[1];
2170 var57.x4[2] = var56.x4[2] - var52.x4[2];
2171 var57.x4[3] = var56.x4[3] - var52.x4[3];
2172 /* 14: loadl */
2173 var58 = ptr0[i];
2174 /* 15: shrul */
2175 var59.i = ((orc_uint32) var58.i) >> 24;
2176 /* 16: convlw */
2177 var60.i = var59.i;
2178 /* 17: convwb */
2179 var61 = var60.i;
2180 /* 18: splatbl */
2181 var62.i =
2182 ((((orc_uint32) var61) & 0xff) << 24) | ((((orc_uint32) var61) & 0xff)
2183 << 16) | ((((orc_uint32) var61) & 0xff) << 8) | (((orc_uint32) var61)
2184 & 0xff);
2185 /* 19: convubw */
2186 var63.x4[0] = (orc_uint8) var62.x4[0];
2187 var63.x4[1] = (orc_uint8) var62.x4[1];
2188 var63.x4[2] = (orc_uint8) var62.x4[2];
2189 var63.x4[3] = (orc_uint8) var62.x4[3];
2190 /* 20: mullw */
2191 var64.x4[0] = (var63.x4[0] * var57.x4[0]) & 0xffff;
2192 var64.x4[1] = (var63.x4[1] * var57.x4[1]) & 0xffff;
2193 var64.x4[2] = (var63.x4[2] * var57.x4[2]) & 0xffff;
2194 var64.x4[3] = (var63.x4[3] * var57.x4[3]) & 0xffff;
2195 /* 21: div255w */
2196 var65.x4[0] =
2197 ((orc_uint16) (((orc_uint16) (var64.x4[0] + 128)) +
2198 (((orc_uint16) (var64.x4[0] + 128)) >> 8))) >> 8;
2199 var65.x4[1] =
2200 ((orc_uint16) (((orc_uint16) (var64.x4[1] + 128)) +
2201 (((orc_uint16) (var64.x4[1] + 128)) >> 8))) >> 8;
2202 var65.x4[2] =
2203 ((orc_uint16) (((orc_uint16) (var64.x4[2] + 128)) +
2204 (((orc_uint16) (var64.x4[2] + 128)) >> 8))) >> 8;
2205 var65.x4[3] =
2206 ((orc_uint16) (((orc_uint16) (var64.x4[3] + 128)) +
2207 (((orc_uint16) (var64.x4[3] + 128)) >> 8))) >> 8;
2208 /* 22: convubw */
2209 var66.x4[0] = (orc_uint8) var58.x4[0];
2210 var66.x4[1] = (orc_uint8) var58.x4[1];
2211 var66.x4[2] = (orc_uint8) var58.x4[2];
2212 var66.x4[3] = (orc_uint8) var58.x4[3];
2213 /* 23: mullw */
2214 var67.x4[0] = (var66.x4[0] * var65.x4[0]) & 0xffff;
2215 var67.x4[1] = (var66.x4[1] * var65.x4[1]) & 0xffff;
2216 var67.x4[2] = (var66.x4[2] * var65.x4[2]) & 0xffff;
2217 var67.x4[3] = (var66.x4[3] * var65.x4[3]) & 0xffff;
2218 /* 24: addw */
2219 var68.x4[0] = var67.x4[0] + var54.x4[0];
2220 var68.x4[1] = var67.x4[1] + var54.x4[1];
2221 var68.x4[2] = var67.x4[2] + var54.x4[2];
2222 var68.x4[3] = var67.x4[3] + var54.x4[3];
2223 /* 25: addw */
2224 var69.x4[0] = var65.x4[0] + var52.x4[0];
2225 var69.x4[1] = var65.x4[1] + var52.x4[1];
2226 var69.x4[2] = var65.x4[2] + var52.x4[2];
2227 var69.x4[3] = var65.x4[3] + var52.x4[3];
2228 /* 26: divluw */
2229 var70.x4[0] =
2230 ((var69.x4[0] & 0xff) ==
2231 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[0]) /
2232 ((orc_uint16) var69.x4[0] & 0xff));
2233 var70.x4[1] =
2234 ((var69.x4[1] & 0xff) ==
2235 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[1]) /
2236 ((orc_uint16) var69.x4[1] & 0xff));
2237 var70.x4[2] =
2238 ((var69.x4[2] & 0xff) ==
2239 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[2]) /
2240 ((orc_uint16) var69.x4[2] & 0xff));
2241 var70.x4[3] =
2242 ((var69.x4[3] & 0xff) ==
2243 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[3]) /
2244 ((orc_uint16) var69.x4[3] & 0xff));
2245 /* 27: convwb */
2246 var71.x4[0] = var70.x4[0];
2247 var71.x4[1] = var70.x4[1];
2248 var71.x4[2] = var70.x4[2];
2249 var71.x4[3] = var70.x4[3];
2250 /* 29: andl */
2251 var72.i = var71.i & var43.i;
2252 /* 30: convwb */
2253 var73.x4[0] = var69.x4[0];
2254 var73.x4[1] = var69.x4[1];
2255 var73.x4[2] = var69.x4[2];
2256 var73.x4[3] = var69.x4[3];
2257 /* 32: andl */
2258 var74.i = var73.i & var44.i;
2259 /* 33: orl */
2260 var75.i = var72.i | var74.i;
2261 /* 34: storel */
2262 ptr0[i] = var75;
2263 }
2264 }
2265
2266 }
2267
2268 void
video_mixer_orc_overlay_bgra(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)2269 video_mixer_orc_overlay_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
2270 const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
2271 {
2272 OrcExecutor _ex, *ex = &_ex;
2273 static volatile int p_inited = 0;
2274 static OrcCode *c = 0;
2275 void (*func) (OrcExecutor *);
2276
2277 if (!p_inited) {
2278 orc_once_mutex_lock ();
2279 if (!p_inited) {
2280 OrcProgram *p;
2281
2282 #if 1
2283 static const orc_uint8 bc[] = {
2284 1, 7, 9, 28, 118, 105, 100, 101, 111, 95, 109, 105, 120, 101, 114, 95,
2285 111, 114, 99, 95, 111, 118, 101, 114, 108, 97, 121, 95, 98, 103, 114,
2286 97,
2287 11, 4, 4, 12, 4, 4, 14, 4, 255, 255, 255, 255, 14, 4, 0, 0,
2288 0, 255, 14, 4, 255, 255, 255, 0, 14, 4, 24, 0, 0, 0, 14, 2,
2289 8, 0, 0, 0, 16, 2, 20, 4, 20, 4, 20, 2, 20, 1, 20, 8,
2290 20, 8, 20, 8, 20, 4, 20, 8, 20, 8, 113, 32, 4, 126, 33, 32,
2291 19, 163, 34, 33, 157, 35, 34, 152, 39, 35, 21, 2, 150, 36, 39, 21,
2292 2, 89, 36, 36, 24, 21, 2, 95, 36, 36, 20, 21, 2, 150, 41, 32,
2293 21, 2, 89, 41, 41, 36, 115, 39, 16, 21, 2, 150, 37, 39, 21, 2,
2294 98, 37, 37, 36, 113, 32, 0, 126, 33, 32, 19, 163, 34, 33, 157, 35,
2295 34, 152, 39, 35, 21, 2, 150, 38, 39, 21, 2, 89, 38, 38, 37, 21,
2296 2, 80, 38, 38, 21, 2, 150, 40, 32, 21, 2, 89, 40, 40, 38, 21,
2297 2, 70, 40, 40, 41, 21, 2, 70, 38, 38, 36, 21, 2, 81, 40, 40,
2298 38, 21, 2, 157, 32, 40, 106, 32, 32, 18, 21, 2, 157, 39, 38, 106,
2299 39, 39, 17, 123, 32, 32, 39, 128, 0, 32, 2, 0,
2300 };
2301 p = orc_program_new_from_static_bytecode (bc);
2302 orc_program_set_backup_function (p, _backup_video_mixer_orc_overlay_bgra);
2303 #else
2304 p = orc_program_new ();
2305 orc_program_set_2d (p);
2306 orc_program_set_name (p, "video_mixer_orc_overlay_bgra");
2307 orc_program_set_backup_function (p, _backup_video_mixer_orc_overlay_bgra);
2308 orc_program_add_destination (p, 4, "d1");
2309 orc_program_add_source (p, 4, "s1");
2310 orc_program_add_constant (p, 4, 0xffffffff, "c1");
2311 orc_program_add_constant (p, 4, 0xff000000, "c2");
2312 orc_program_add_constant (p, 4, 0x00ffffff, "c3");
2313 orc_program_add_constant (p, 4, 0x00000018, "c4");
2314 orc_program_add_constant (p, 2, 0x00000008, "c5");
2315 orc_program_add_parameter (p, 2, "p1");
2316 orc_program_add_temporary (p, 4, "t1");
2317 orc_program_add_temporary (p, 4, "t2");
2318 orc_program_add_temporary (p, 2, "t3");
2319 orc_program_add_temporary (p, 1, "t4");
2320 orc_program_add_temporary (p, 8, "t5");
2321 orc_program_add_temporary (p, 8, "t6");
2322 orc_program_add_temporary (p, 8, "t7");
2323 orc_program_add_temporary (p, 4, "t8");
2324 orc_program_add_temporary (p, 8, "t9");
2325 orc_program_add_temporary (p, 8, "t10");
2326
2327 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1,
2328 ORC_VAR_D1);
2329 orc_program_append_2 (p, "shrul", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_C4,
2330 ORC_VAR_D1);
2331 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
2332 ORC_VAR_D1);
2333 orc_program_append_2 (p, "convwb", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1,
2334 ORC_VAR_D1);
2335 orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T8, ORC_VAR_T4, ORC_VAR_D1,
2336 ORC_VAR_D1);
2337 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T5, ORC_VAR_T8, ORC_VAR_D1,
2338 ORC_VAR_D1);
2339 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_P1,
2340 ORC_VAR_D1);
2341 orc_program_append_2 (p, "shruw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_C5,
2342 ORC_VAR_D1);
2343 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T10, ORC_VAR_T1,
2344 ORC_VAR_D1, ORC_VAR_D1);
2345 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T10, ORC_VAR_T10, ORC_VAR_T5,
2346 ORC_VAR_D1);
2347 orc_program_append_2 (p, "loadpl", 0, ORC_VAR_T8, ORC_VAR_C1, ORC_VAR_D1,
2348 ORC_VAR_D1);
2349 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T6, ORC_VAR_T8, ORC_VAR_D1,
2350 ORC_VAR_D1);
2351 orc_program_append_2 (p, "subw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T5,
2352 ORC_VAR_D1);
2353 orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
2354 ORC_VAR_D1);
2355 orc_program_append_2 (p, "shrul", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_C4,
2356 ORC_VAR_D1);
2357 orc_program_append_2 (p, "convlw", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
2358 ORC_VAR_D1);
2359 orc_program_append_2 (p, "convwb", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1,
2360 ORC_VAR_D1);
2361 orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T8, ORC_VAR_T4, ORC_VAR_D1,
2362 ORC_VAR_D1);
2363 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T7, ORC_VAR_T8, ORC_VAR_D1,
2364 ORC_VAR_D1);
2365 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T6,
2366 ORC_VAR_D1);
2367 orc_program_append_2 (p, "div255w", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_D1,
2368 ORC_VAR_D1);
2369 orc_program_append_2 (p, "convubw", 2, ORC_VAR_T9, ORC_VAR_T1, ORC_VAR_D1,
2370 ORC_VAR_D1);
2371 orc_program_append_2 (p, "mullw", 2, ORC_VAR_T9, ORC_VAR_T9, ORC_VAR_T7,
2372 ORC_VAR_D1);
2373 orc_program_append_2 (p, "addw", 2, ORC_VAR_T9, ORC_VAR_T9, ORC_VAR_T10,
2374 ORC_VAR_D1);
2375 orc_program_append_2 (p, "addw", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T5,
2376 ORC_VAR_D1);
2377 orc_program_append_2 (p, "divluw", 2, ORC_VAR_T9, ORC_VAR_T9, ORC_VAR_T7,
2378 ORC_VAR_D1);
2379 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T1, ORC_VAR_T9, ORC_VAR_D1,
2380 ORC_VAR_D1);
2381 orc_program_append_2 (p, "andl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C3,
2382 ORC_VAR_D1);
2383 orc_program_append_2 (p, "convwb", 2, ORC_VAR_T8, ORC_VAR_T7, ORC_VAR_D1,
2384 ORC_VAR_D1);
2385 orc_program_append_2 (p, "andl", 0, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_C2,
2386 ORC_VAR_D1);
2387 orc_program_append_2 (p, "orl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T8,
2388 ORC_VAR_D1);
2389 orc_program_append_2 (p, "storel", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1,
2390 ORC_VAR_D1);
2391 #endif
2392
2393 orc_program_compile (p);
2394 c = orc_program_take_code (p);
2395 orc_program_free (p);
2396 }
2397 p_inited = TRUE;
2398 orc_once_mutex_unlock ();
2399 }
2400 ex->arrays[ORC_VAR_A2] = c;
2401 ex->program = 0;
2402
2403 ex->n = n;
2404 ORC_EXECUTOR_M (ex) = m;
2405 ex->arrays[ORC_VAR_D1] = d1;
2406 ex->params[ORC_VAR_D1] = d1_stride;
2407 ex->arrays[ORC_VAR_S1] = (void *) s1;
2408 ex->params[ORC_VAR_S1] = s1_stride;
2409 ex->params[ORC_VAR_P1] = p1;
2410
2411 func = c->exec;
2412 func (ex);
2413 }
2414 #endif
2415