1 
2 /* autogenerated from videomixerorc.orc */
3 
4 #ifdef HAVE_CONFIG_H
5 #include "config.h"
6 #endif
7 #include <glib.h>
8 
9 #ifndef _ORC_INTEGER_TYPEDEFS_
10 #define _ORC_INTEGER_TYPEDEFS_
11 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
12 #include <stdint.h>
13 typedef int8_t orc_int8;
14 typedef int16_t orc_int16;
15 typedef int32_t orc_int32;
16 typedef int64_t orc_int64;
17 typedef uint8_t orc_uint8;
18 typedef uint16_t orc_uint16;
19 typedef uint32_t orc_uint32;
20 typedef uint64_t orc_uint64;
21 #define ORC_UINT64_C(x) UINT64_C(x)
22 #elif defined(_MSC_VER)
23 typedef signed __int8 orc_int8;
24 typedef signed __int16 orc_int16;
25 typedef signed __int32 orc_int32;
26 typedef signed __int64 orc_int64;
27 typedef unsigned __int8 orc_uint8;
28 typedef unsigned __int16 orc_uint16;
29 typedef unsigned __int32 orc_uint32;
30 typedef unsigned __int64 orc_uint64;
31 #define ORC_UINT64_C(x) (x##Ui64)
32 #define inline __inline
33 #else
34 #include <limits.h>
35 typedef signed char orc_int8;
36 typedef short orc_int16;
37 typedef int orc_int32;
38 typedef unsigned char orc_uint8;
39 typedef unsigned short orc_uint16;
40 typedef unsigned int orc_uint32;
41 #if INT_MAX == LONG_MAX
42 typedef long long orc_int64;
43 typedef unsigned long long orc_uint64;
44 #define ORC_UINT64_C(x) (x##ULL)
45 #else
46 typedef long orc_int64;
47 typedef unsigned long orc_uint64;
48 #define ORC_UINT64_C(x) (x##UL)
49 #endif
50 #endif
51 typedef union
52 {
53   orc_int16 i;
54   orc_int8 x2[2];
55 } orc_union16;
56 typedef union
57 {
58   orc_int32 i;
59   float f;
60   orc_int16 x2[2];
61   orc_int8 x4[4];
62 } orc_union32;
63 typedef union
64 {
65   orc_int64 i;
66   double f;
67   orc_int32 x2[2];
68   float x2f[2];
69   orc_int16 x4[4];
70 } orc_union64;
71 #endif
72 #ifndef ORC_RESTRICT
73 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
74 #define ORC_RESTRICT restrict
75 #elif defined(__GNUC__) && __GNUC__ >= 4
76 #define ORC_RESTRICT __restrict__
77 #else
78 #define ORC_RESTRICT
79 #endif
80 #endif
81 
82 #ifndef ORC_INTERNAL
83 #if defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)
84 #define ORC_INTERNAL __attribute__((visibility("hidden")))
85 #elif defined(__SUNPRO_C) && (__SUNPRO_C >= 0x550)
86 #define ORC_INTERNAL __hidden
87 #elif defined (__GNUC__)
88 #define ORC_INTERNAL __attribute__((visibility("hidden")))
89 #else
90 #define ORC_INTERNAL
91 #endif
92 #endif
93 
94 
95 #ifndef DISABLE_ORC
96 #include <orc/orc.h>
97 #endif
98 void video_mixer_orc_splat_u32 (guint32 * ORC_RESTRICT d1, int p1, int n);
99 void video_mixer_orc_memcpy_u32 (guint32 * ORC_RESTRICT d1,
100     const guint32 * ORC_RESTRICT s1, int n);
101 void video_mixer_orc_blend_u8 (guint8 * ORC_RESTRICT d1, int d1_stride,
102     const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
103 void video_mixer_orc_blend_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
104     const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
105 void video_mixer_orc_blend_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
106     const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
107 void video_mixer_orc_overlay_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
108     const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
109 void video_mixer_orc_overlay_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
110     const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m);
111 
112 
113 /* begin Orc C target preamble */
114 #define ORC_CLAMP(x,a,b) ((x)<(a) ? (a) : ((x)>(b) ? (b) : (x)))
115 #define ORC_ABS(a) ((a)<0 ? -(a) : (a))
116 #define ORC_MIN(a,b) ((a)<(b) ? (a) : (b))
117 #define ORC_MAX(a,b) ((a)>(b) ? (a) : (b))
118 #define ORC_SB_MAX 127
119 #define ORC_SB_MIN (-1-ORC_SB_MAX)
120 #define ORC_UB_MAX (orc_uint8) 255
121 #define ORC_UB_MIN 0
122 #define ORC_SW_MAX 32767
123 #define ORC_SW_MIN (-1-ORC_SW_MAX)
124 #define ORC_UW_MAX (orc_uint16)65535
125 #define ORC_UW_MIN 0
126 #define ORC_SL_MAX 2147483647
127 #define ORC_SL_MIN (-1-ORC_SL_MAX)
128 #define ORC_UL_MAX 4294967295U
129 #define ORC_UL_MIN 0
130 #define ORC_CLAMP_SB(x) ORC_CLAMP(x,ORC_SB_MIN,ORC_SB_MAX)
131 #define ORC_CLAMP_UB(x) ORC_CLAMP(x,ORC_UB_MIN,ORC_UB_MAX)
132 #define ORC_CLAMP_SW(x) ORC_CLAMP(x,ORC_SW_MIN,ORC_SW_MAX)
133 #define ORC_CLAMP_UW(x) ORC_CLAMP(x,ORC_UW_MIN,ORC_UW_MAX)
134 #define ORC_CLAMP_SL(x) ORC_CLAMP(x,ORC_SL_MIN,ORC_SL_MAX)
135 #define ORC_CLAMP_UL(x) ORC_CLAMP(x,ORC_UL_MIN,ORC_UL_MAX)
136 #define ORC_SWAP_W(x) ((((x)&0xffU)<<8) | (((x)&0xff00U)>>8))
137 #define ORC_SWAP_L(x) ((((x)&0xffU)<<24) | (((x)&0xff00U)<<8) | (((x)&0xff0000U)>>8) | (((x)&0xff000000U)>>24))
138 #define ORC_SWAP_Q(x) ((((x)&ORC_UINT64_C(0xff))<<56) | (((x)&ORC_UINT64_C(0xff00))<<40) | (((x)&ORC_UINT64_C(0xff0000))<<24) | (((x)&ORC_UINT64_C(0xff000000))<<8) | (((x)&ORC_UINT64_C(0xff00000000))>>8) | (((x)&ORC_UINT64_C(0xff0000000000))>>24) | (((x)&ORC_UINT64_C(0xff000000000000))>>40) | (((x)&ORC_UINT64_C(0xff00000000000000))>>56))
139 #define ORC_PTR_OFFSET(ptr,offset) ((void *)(((unsigned char *)(ptr)) + (offset)))
140 #define ORC_DENORMAL(x) ((x) & ((((x)&0x7f800000) == 0) ? 0xff800000 : 0xffffffff))
141 #define ORC_ISNAN(x) ((((x)&0x7f800000) == 0x7f800000) && (((x)&0x007fffff) != 0))
142 #define ORC_DENORMAL_DOUBLE(x) ((x) & ((((x)&ORC_UINT64_C(0x7ff0000000000000)) == 0) ? ORC_UINT64_C(0xfff0000000000000) : ORC_UINT64_C(0xffffffffffffffff)))
143 #define ORC_ISNAN_DOUBLE(x) ((((x)&ORC_UINT64_C(0x7ff0000000000000)) == ORC_UINT64_C(0x7ff0000000000000)) && (((x)&ORC_UINT64_C(0x000fffffffffffff)) != 0))
144 #ifndef ORC_RESTRICT
145 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
146 #define ORC_RESTRICT restrict
147 #elif defined(__GNUC__) && __GNUC__ >= 4
148 #define ORC_RESTRICT __restrict__
149 #else
150 #define ORC_RESTRICT
151 #endif
152 #endif
153 /* end Orc C target preamble */
154 
155 
156 
157 /* video_mixer_orc_splat_u32 */
158 #ifdef DISABLE_ORC
159 void
video_mixer_orc_splat_u32(guint32 * ORC_RESTRICT d1,int p1,int n)160 video_mixer_orc_splat_u32 (guint32 * ORC_RESTRICT d1, int p1, int n)
161 {
162   int i;
163   orc_union32 *ORC_RESTRICT ptr0;
164   orc_union32 var32;
165   orc_union32 var33;
166 
167   ptr0 = (orc_union32 *) d1;
168 
169   /* 0: loadpl */
170   var32.i = p1;
171 
172   for (i = 0; i < n; i++) {
173     /* 1: copyl */
174     var33.i = var32.i;
175     /* 2: storel */
176     ptr0[i] = var33;
177   }
178 
179 }
180 
181 #else
182 static void
_backup_video_mixer_orc_splat_u32(OrcExecutor * ORC_RESTRICT ex)183 _backup_video_mixer_orc_splat_u32 (OrcExecutor * ORC_RESTRICT ex)
184 {
185   int i;
186   int n = ex->n;
187   orc_union32 *ORC_RESTRICT ptr0;
188   orc_union32 var32;
189   orc_union32 var33;
190 
191   ptr0 = (orc_union32 *) ex->arrays[0];
192 
193   /* 0: loadpl */
194   var32.i = ex->params[24];
195 
196   for (i = 0; i < n; i++) {
197     /* 1: copyl */
198     var33.i = var32.i;
199     /* 2: storel */
200     ptr0[i] = var33;
201   }
202 
203 }
204 
205 void
video_mixer_orc_splat_u32(guint32 * ORC_RESTRICT d1,int p1,int n)206 video_mixer_orc_splat_u32 (guint32 * ORC_RESTRICT d1, int p1, int n)
207 {
208   OrcExecutor _ex, *ex = &_ex;
209   static volatile int p_inited = 0;
210   static OrcCode *c = 0;
211   void (*func) (OrcExecutor *);
212 
213   if (!p_inited) {
214     orc_once_mutex_lock ();
215     if (!p_inited) {
216       OrcProgram *p;
217 
218 #if 1
219       static const orc_uint8 bc[] = {
220         1, 9, 25, 118, 105, 100, 101, 111, 95, 109, 105, 120, 101, 114, 95, 111,
221         114, 99, 95, 115, 112, 108, 97, 116, 95, 117, 51, 50, 11, 4, 4, 16,
222         4, 112, 0, 24, 2, 0,
223       };
224       p = orc_program_new_from_static_bytecode (bc);
225       orc_program_set_backup_function (p, _backup_video_mixer_orc_splat_u32);
226 #else
227       p = orc_program_new ();
228       orc_program_set_name (p, "video_mixer_orc_splat_u32");
229       orc_program_set_backup_function (p, _backup_video_mixer_orc_splat_u32);
230       orc_program_add_destination (p, 4, "d1");
231       orc_program_add_parameter (p, 4, "p1");
232 
233       orc_program_append_2 (p, "copyl", 0, ORC_VAR_D1, ORC_VAR_P1, ORC_VAR_D1,
234           ORC_VAR_D1);
235 #endif
236 
237       orc_program_compile (p);
238       c = orc_program_take_code (p);
239       orc_program_free (p);
240     }
241     p_inited = TRUE;
242     orc_once_mutex_unlock ();
243   }
244   ex->arrays[ORC_VAR_A2] = c;
245   ex->program = 0;
246 
247   ex->n = n;
248   ex->arrays[ORC_VAR_D1] = d1;
249   ex->params[ORC_VAR_P1] = p1;
250 
251   func = c->exec;
252   func (ex);
253 }
254 #endif
255 
256 
257 /* video_mixer_orc_memcpy_u32 */
258 #ifdef DISABLE_ORC
259 void
video_mixer_orc_memcpy_u32(guint32 * ORC_RESTRICT d1,const guint32 * ORC_RESTRICT s1,int n)260 video_mixer_orc_memcpy_u32 (guint32 * ORC_RESTRICT d1,
261     const guint32 * ORC_RESTRICT s1, int n)
262 {
263   int i;
264   orc_union32 *ORC_RESTRICT ptr0;
265   const orc_union32 *ORC_RESTRICT ptr4;
266   orc_union32 var32;
267   orc_union32 var33;
268 
269   ptr0 = (orc_union32 *) d1;
270   ptr4 = (orc_union32 *) s1;
271 
272 
273   for (i = 0; i < n; i++) {
274     /* 0: loadl */
275     var32 = ptr4[i];
276     /* 1: copyl */
277     var33.i = var32.i;
278     /* 2: storel */
279     ptr0[i] = var33;
280   }
281 
282 }
283 
284 #else
285 static void
_backup_video_mixer_orc_memcpy_u32(OrcExecutor * ORC_RESTRICT ex)286 _backup_video_mixer_orc_memcpy_u32 (OrcExecutor * ORC_RESTRICT ex)
287 {
288   int i;
289   int n = ex->n;
290   orc_union32 *ORC_RESTRICT ptr0;
291   const orc_union32 *ORC_RESTRICT ptr4;
292   orc_union32 var32;
293   orc_union32 var33;
294 
295   ptr0 = (orc_union32 *) ex->arrays[0];
296   ptr4 = (orc_union32 *) ex->arrays[4];
297 
298 
299   for (i = 0; i < n; i++) {
300     /* 0: loadl */
301     var32 = ptr4[i];
302     /* 1: copyl */
303     var33.i = var32.i;
304     /* 2: storel */
305     ptr0[i] = var33;
306   }
307 
308 }
309 
310 void
video_mixer_orc_memcpy_u32(guint32 * ORC_RESTRICT d1,const guint32 * ORC_RESTRICT s1,int n)311 video_mixer_orc_memcpy_u32 (guint32 * ORC_RESTRICT d1,
312     const guint32 * ORC_RESTRICT s1, int n)
313 {
314   OrcExecutor _ex, *ex = &_ex;
315   static volatile int p_inited = 0;
316   static OrcCode *c = 0;
317   void (*func) (OrcExecutor *);
318 
319   if (!p_inited) {
320     orc_once_mutex_lock ();
321     if (!p_inited) {
322       OrcProgram *p;
323 
324 #if 1
325       static const orc_uint8 bc[] = {
326         1, 9, 26, 118, 105, 100, 101, 111, 95, 109, 105, 120, 101, 114, 95, 111,
327         114, 99, 95, 109, 101, 109, 99, 112, 121, 95, 117, 51, 50, 11, 4, 4,
328         12, 4, 4, 112, 0, 4, 2, 0,
329       };
330       p = orc_program_new_from_static_bytecode (bc);
331       orc_program_set_backup_function (p, _backup_video_mixer_orc_memcpy_u32);
332 #else
333       p = orc_program_new ();
334       orc_program_set_name (p, "video_mixer_orc_memcpy_u32");
335       orc_program_set_backup_function (p, _backup_video_mixer_orc_memcpy_u32);
336       orc_program_add_destination (p, 4, "d1");
337       orc_program_add_source (p, 4, "s1");
338 
339       orc_program_append_2 (p, "copyl", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_D1,
340           ORC_VAR_D1);
341 #endif
342 
343       orc_program_compile (p);
344       c = orc_program_take_code (p);
345       orc_program_free (p);
346     }
347     p_inited = TRUE;
348     orc_once_mutex_unlock ();
349   }
350   ex->arrays[ORC_VAR_A2] = c;
351   ex->program = 0;
352 
353   ex->n = n;
354   ex->arrays[ORC_VAR_D1] = d1;
355   ex->arrays[ORC_VAR_S1] = (void *) s1;
356 
357   func = c->exec;
358   func (ex);
359 }
360 #endif
361 
362 
363 /* video_mixer_orc_blend_u8 */
364 #ifdef DISABLE_ORC
365 void
video_mixer_orc_blend_u8(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)366 video_mixer_orc_blend_u8 (guint8 * ORC_RESTRICT d1, int d1_stride,
367     const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
368 {
369   int i;
370   int j;
371   orc_int8 *ORC_RESTRICT ptr0;
372   const orc_int8 *ORC_RESTRICT ptr4;
373   orc_int8 var34;
374   orc_int8 var35;
375   orc_union16 var36;
376   orc_int8 var37;
377   orc_union16 var38;
378   orc_union16 var39;
379   orc_union16 var40;
380   orc_union16 var41;
381   orc_union16 var42;
382   orc_union16 var43;
383   orc_union16 var44;
384 
385   for (j = 0; j < m; j++) {
386     ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
387     ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
388 
389     /* 5: loadpw */
390     var36.i = p1;
391 
392     for (i = 0; i < n; i++) {
393       /* 0: loadb */
394       var34 = ptr0[i];
395       /* 1: convubw */
396       var38.i = (orc_uint8) var34;
397       /* 2: loadb */
398       var35 = ptr4[i];
399       /* 3: convubw */
400       var39.i = (orc_uint8) var35;
401       /* 4: subw */
402       var40.i = var39.i - var38.i;
403       /* 6: mullw */
404       var41.i = (var40.i * var36.i) & 0xffff;
405       /* 7: shlw */
406       var42.i = ((orc_uint16) var38.i) << 8;
407       /* 8: addw */
408       var43.i = var42.i + var41.i;
409       /* 9: shruw */
410       var44.i = ((orc_uint16) var43.i) >> 8;
411       /* 10: convsuswb */
412       var37 = ORC_CLAMP_UB (var44.i);
413       /* 11: storeb */
414       ptr0[i] = var37;
415     }
416   }
417 
418 }
419 
420 #else
421 static void
_backup_video_mixer_orc_blend_u8(OrcExecutor * ORC_RESTRICT ex)422 _backup_video_mixer_orc_blend_u8 (OrcExecutor * ORC_RESTRICT ex)
423 {
424   int i;
425   int j;
426   int n = ex->n;
427   int m = ex->params[ORC_VAR_A1];
428   orc_int8 *ORC_RESTRICT ptr0;
429   const orc_int8 *ORC_RESTRICT ptr4;
430   orc_int8 var34;
431   orc_int8 var35;
432   orc_union16 var36;
433   orc_int8 var37;
434   orc_union16 var38;
435   orc_union16 var39;
436   orc_union16 var40;
437   orc_union16 var41;
438   orc_union16 var42;
439   orc_union16 var43;
440   orc_union16 var44;
441 
442   for (j = 0; j < m; j++) {
443     ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
444     ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
445 
446     /* 5: loadpw */
447     var36.i = ex->params[24];
448 
449     for (i = 0; i < n; i++) {
450       /* 0: loadb */
451       var34 = ptr0[i];
452       /* 1: convubw */
453       var38.i = (orc_uint8) var34;
454       /* 2: loadb */
455       var35 = ptr4[i];
456       /* 3: convubw */
457       var39.i = (orc_uint8) var35;
458       /* 4: subw */
459       var40.i = var39.i - var38.i;
460       /* 6: mullw */
461       var41.i = (var40.i * var36.i) & 0xffff;
462       /* 7: shlw */
463       var42.i = ((orc_uint16) var38.i) << 8;
464       /* 8: addw */
465       var43.i = var42.i + var41.i;
466       /* 9: shruw */
467       var44.i = ((orc_uint16) var43.i) >> 8;
468       /* 10: convsuswb */
469       var37 = ORC_CLAMP_UB (var44.i);
470       /* 11: storeb */
471       ptr0[i] = var37;
472     }
473   }
474 
475 }
476 
477 void
video_mixer_orc_blend_u8(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)478 video_mixer_orc_blend_u8 (guint8 * ORC_RESTRICT d1, int d1_stride,
479     const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
480 {
481   OrcExecutor _ex, *ex = &_ex;
482   static volatile int p_inited = 0;
483   static OrcCode *c = 0;
484   void (*func) (OrcExecutor *);
485 
486   if (!p_inited) {
487     orc_once_mutex_lock ();
488     if (!p_inited) {
489       OrcProgram *p;
490 
491 #if 1
492       static const orc_uint8 bc[] = {
493         1, 7, 9, 24, 118, 105, 100, 101, 111, 95, 109, 105, 120, 101, 114, 95,
494         111, 114, 99, 95, 98, 108, 101, 110, 100, 95, 117, 56, 11, 1, 1, 12,
495         1, 1, 14, 1, 8, 0, 0, 0, 16, 2, 20, 2, 20, 2, 150, 32,
496         0, 150, 33, 4, 98, 33, 33, 32, 89, 33, 33, 24, 93, 32, 32, 16,
497         70, 33, 32, 33, 95, 33, 33, 16, 160, 0, 33, 2, 0,
498       };
499       p = orc_program_new_from_static_bytecode (bc);
500       orc_program_set_backup_function (p, _backup_video_mixer_orc_blend_u8);
501 #else
502       p = orc_program_new ();
503       orc_program_set_2d (p);
504       orc_program_set_name (p, "video_mixer_orc_blend_u8");
505       orc_program_set_backup_function (p, _backup_video_mixer_orc_blend_u8);
506       orc_program_add_destination (p, 1, "d1");
507       orc_program_add_source (p, 1, "s1");
508       orc_program_add_constant (p, 1, 0x00000008, "c1");
509       orc_program_add_parameter (p, 2, "p1");
510       orc_program_add_temporary (p, 2, "t1");
511       orc_program_add_temporary (p, 2, "t2");
512 
513       orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
514           ORC_VAR_D1);
515       orc_program_append_2 (p, "convubw", 0, ORC_VAR_T2, ORC_VAR_S1, ORC_VAR_D1,
516           ORC_VAR_D1);
517       orc_program_append_2 (p, "subw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1,
518           ORC_VAR_D1);
519       orc_program_append_2 (p, "mullw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P1,
520           ORC_VAR_D1);
521       orc_program_append_2 (p, "shlw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1,
522           ORC_VAR_D1);
523       orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_T2,
524           ORC_VAR_D1);
525       orc_program_append_2 (p, "shruw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1,
526           ORC_VAR_D1);
527       orc_program_append_2 (p, "convsuswb", 0, ORC_VAR_D1, ORC_VAR_T2,
528           ORC_VAR_D1, ORC_VAR_D1);
529 #endif
530 
531       orc_program_compile (p);
532       c = orc_program_take_code (p);
533       orc_program_free (p);
534     }
535     p_inited = TRUE;
536     orc_once_mutex_unlock ();
537   }
538   ex->arrays[ORC_VAR_A2] = c;
539   ex->program = 0;
540 
541   ex->n = n;
542   ORC_EXECUTOR_M (ex) = m;
543   ex->arrays[ORC_VAR_D1] = d1;
544   ex->params[ORC_VAR_D1] = d1_stride;
545   ex->arrays[ORC_VAR_S1] = (void *) s1;
546   ex->params[ORC_VAR_S1] = s1_stride;
547   ex->params[ORC_VAR_P1] = p1;
548 
549   func = c->exec;
550   func (ex);
551 }
552 #endif
553 
554 
555 /* video_mixer_orc_blend_argb */
556 #ifdef DISABLE_ORC
557 void
video_mixer_orc_blend_argb(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)558 video_mixer_orc_blend_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
559     const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
560 {
561   int i;
562   int j;
563   orc_union32 *ORC_RESTRICT ptr0;
564   const orc_union32 *ORC_RESTRICT ptr4;
565   orc_union64 var39;
566 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
567   volatile orc_union32 var40;
568 #else
569   orc_union32 var40;
570 #endif
571   orc_union32 var41;
572   orc_union16 var42;
573   orc_int8 var43;
574   orc_union32 var44;
575   orc_union64 var45;
576   orc_union64 var46;
577   orc_union64 var47;
578   orc_union64 var48;
579   orc_union32 var49;
580   orc_union64 var50;
581   orc_union64 var51;
582   orc_union64 var52;
583   orc_union64 var53;
584   orc_union64 var54;
585   orc_union32 var55;
586   orc_union32 var56;
587 
588   for (j = 0; j < m; j++) {
589     ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
590     ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
591 
592     /* 5: loadpw */
593     var39.x4[0] = p1;
594     var39.x4[1] = p1;
595     var39.x4[2] = p1;
596     var39.x4[3] = p1;
597     /* 16: loadpl */
598     var40.i = 0x000000ff;       /* 255 or 1.25987e-321f */
599 
600     for (i = 0; i < n; i++) {
601       /* 0: loadl */
602       var41 = ptr4[i];
603       /* 1: convlw */
604       var42.i = var41.i;
605       /* 2: convwb */
606       var43 = var42.i;
607       /* 3: splatbl */
608       var44.i =
609           ((((orc_uint32) var43) & 0xff) << 24) | ((((orc_uint32) var43) & 0xff)
610           << 16) | ((((orc_uint32) var43) & 0xff) << 8) | (((orc_uint32) var43)
611           & 0xff);
612       /* 4: convubw */
613       var45.x4[0] = (orc_uint8) var44.x4[0];
614       var45.x4[1] = (orc_uint8) var44.x4[1];
615       var45.x4[2] = (orc_uint8) var44.x4[2];
616       var45.x4[3] = (orc_uint8) var44.x4[3];
617       /* 6: mullw */
618       var46.x4[0] = (var45.x4[0] * var39.x4[0]) & 0xffff;
619       var46.x4[1] = (var45.x4[1] * var39.x4[1]) & 0xffff;
620       var46.x4[2] = (var45.x4[2] * var39.x4[2]) & 0xffff;
621       var46.x4[3] = (var45.x4[3] * var39.x4[3]) & 0xffff;
622       /* 7: shruw */
623       var47.x4[0] = ((orc_uint16) var46.x4[0]) >> 8;
624       var47.x4[1] = ((orc_uint16) var46.x4[1]) >> 8;
625       var47.x4[2] = ((orc_uint16) var46.x4[2]) >> 8;
626       var47.x4[3] = ((orc_uint16) var46.x4[3]) >> 8;
627       /* 8: convubw */
628       var48.x4[0] = (orc_uint8) var41.x4[0];
629       var48.x4[1] = (orc_uint8) var41.x4[1];
630       var48.x4[2] = (orc_uint8) var41.x4[2];
631       var48.x4[3] = (orc_uint8) var41.x4[3];
632       /* 9: loadl */
633       var49 = ptr0[i];
634       /* 10: convubw */
635       var50.x4[0] = (orc_uint8) var49.x4[0];
636       var50.x4[1] = (orc_uint8) var49.x4[1];
637       var50.x4[2] = (orc_uint8) var49.x4[2];
638       var50.x4[3] = (orc_uint8) var49.x4[3];
639       /* 11: subw */
640       var51.x4[0] = var48.x4[0] - var50.x4[0];
641       var51.x4[1] = var48.x4[1] - var50.x4[1];
642       var51.x4[2] = var48.x4[2] - var50.x4[2];
643       var51.x4[3] = var48.x4[3] - var50.x4[3];
644       /* 12: mullw */
645       var52.x4[0] = (var51.x4[0] * var47.x4[0]) & 0xffff;
646       var52.x4[1] = (var51.x4[1] * var47.x4[1]) & 0xffff;
647       var52.x4[2] = (var51.x4[2] * var47.x4[2]) & 0xffff;
648       var52.x4[3] = (var51.x4[3] * var47.x4[3]) & 0xffff;
649       /* 13: div255w */
650       var53.x4[0] =
651           ((orc_uint16) (((orc_uint16) (var52.x4[0] + 128)) +
652               (((orc_uint16) (var52.x4[0] + 128)) >> 8))) >> 8;
653       var53.x4[1] =
654           ((orc_uint16) (((orc_uint16) (var52.x4[1] + 128)) +
655               (((orc_uint16) (var52.x4[1] + 128)) >> 8))) >> 8;
656       var53.x4[2] =
657           ((orc_uint16) (((orc_uint16) (var52.x4[2] + 128)) +
658               (((orc_uint16) (var52.x4[2] + 128)) >> 8))) >> 8;
659       var53.x4[3] =
660           ((orc_uint16) (((orc_uint16) (var52.x4[3] + 128)) +
661               (((orc_uint16) (var52.x4[3] + 128)) >> 8))) >> 8;
662       /* 14: addw */
663       var54.x4[0] = var50.x4[0] + var53.x4[0];
664       var54.x4[1] = var50.x4[1] + var53.x4[1];
665       var54.x4[2] = var50.x4[2] + var53.x4[2];
666       var54.x4[3] = var50.x4[3] + var53.x4[3];
667       /* 15: convwb */
668       var55.x4[0] = var54.x4[0];
669       var55.x4[1] = var54.x4[1];
670       var55.x4[2] = var54.x4[2];
671       var55.x4[3] = var54.x4[3];
672       /* 17: orl */
673       var56.i = var55.i | var40.i;
674       /* 18: storel */
675       ptr0[i] = var56;
676     }
677   }
678 
679 }
680 
681 #else
682 static void
_backup_video_mixer_orc_blend_argb(OrcExecutor * ORC_RESTRICT ex)683 _backup_video_mixer_orc_blend_argb (OrcExecutor * ORC_RESTRICT ex)
684 {
685   int i;
686   int j;
687   int n = ex->n;
688   int m = ex->params[ORC_VAR_A1];
689   orc_union32 *ORC_RESTRICT ptr0;
690   const orc_union32 *ORC_RESTRICT ptr4;
691   orc_union64 var39;
692 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
693   volatile orc_union32 var40;
694 #else
695   orc_union32 var40;
696 #endif
697   orc_union32 var41;
698   orc_union16 var42;
699   orc_int8 var43;
700   orc_union32 var44;
701   orc_union64 var45;
702   orc_union64 var46;
703   orc_union64 var47;
704   orc_union64 var48;
705   orc_union32 var49;
706   orc_union64 var50;
707   orc_union64 var51;
708   orc_union64 var52;
709   orc_union64 var53;
710   orc_union64 var54;
711   orc_union32 var55;
712   orc_union32 var56;
713 
714   for (j = 0; j < m; j++) {
715     ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
716     ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
717 
718     /* 5: loadpw */
719     var39.x4[0] = ex->params[24];
720     var39.x4[1] = ex->params[24];
721     var39.x4[2] = ex->params[24];
722     var39.x4[3] = ex->params[24];
723     /* 16: loadpl */
724     var40.i = 0x000000ff;       /* 255 or 1.25987e-321f */
725 
726     for (i = 0; i < n; i++) {
727       /* 0: loadl */
728       var41 = ptr4[i];
729       /* 1: convlw */
730       var42.i = var41.i;
731       /* 2: convwb */
732       var43 = var42.i;
733       /* 3: splatbl */
734       var44.i =
735           ((((orc_uint32) var43) & 0xff) << 24) | ((((orc_uint32) var43) & 0xff)
736           << 16) | ((((orc_uint32) var43) & 0xff) << 8) | (((orc_uint32) var43)
737           & 0xff);
738       /* 4: convubw */
739       var45.x4[0] = (orc_uint8) var44.x4[0];
740       var45.x4[1] = (orc_uint8) var44.x4[1];
741       var45.x4[2] = (orc_uint8) var44.x4[2];
742       var45.x4[3] = (orc_uint8) var44.x4[3];
743       /* 6: mullw */
744       var46.x4[0] = (var45.x4[0] * var39.x4[0]) & 0xffff;
745       var46.x4[1] = (var45.x4[1] * var39.x4[1]) & 0xffff;
746       var46.x4[2] = (var45.x4[2] * var39.x4[2]) & 0xffff;
747       var46.x4[3] = (var45.x4[3] * var39.x4[3]) & 0xffff;
748       /* 7: shruw */
749       var47.x4[0] = ((orc_uint16) var46.x4[0]) >> 8;
750       var47.x4[1] = ((orc_uint16) var46.x4[1]) >> 8;
751       var47.x4[2] = ((orc_uint16) var46.x4[2]) >> 8;
752       var47.x4[3] = ((orc_uint16) var46.x4[3]) >> 8;
753       /* 8: convubw */
754       var48.x4[0] = (orc_uint8) var41.x4[0];
755       var48.x4[1] = (orc_uint8) var41.x4[1];
756       var48.x4[2] = (orc_uint8) var41.x4[2];
757       var48.x4[3] = (orc_uint8) var41.x4[3];
758       /* 9: loadl */
759       var49 = ptr0[i];
760       /* 10: convubw */
761       var50.x4[0] = (orc_uint8) var49.x4[0];
762       var50.x4[1] = (orc_uint8) var49.x4[1];
763       var50.x4[2] = (orc_uint8) var49.x4[2];
764       var50.x4[3] = (orc_uint8) var49.x4[3];
765       /* 11: subw */
766       var51.x4[0] = var48.x4[0] - var50.x4[0];
767       var51.x4[1] = var48.x4[1] - var50.x4[1];
768       var51.x4[2] = var48.x4[2] - var50.x4[2];
769       var51.x4[3] = var48.x4[3] - var50.x4[3];
770       /* 12: mullw */
771       var52.x4[0] = (var51.x4[0] * var47.x4[0]) & 0xffff;
772       var52.x4[1] = (var51.x4[1] * var47.x4[1]) & 0xffff;
773       var52.x4[2] = (var51.x4[2] * var47.x4[2]) & 0xffff;
774       var52.x4[3] = (var51.x4[3] * var47.x4[3]) & 0xffff;
775       /* 13: div255w */
776       var53.x4[0] =
777           ((orc_uint16) (((orc_uint16) (var52.x4[0] + 128)) +
778               (((orc_uint16) (var52.x4[0] + 128)) >> 8))) >> 8;
779       var53.x4[1] =
780           ((orc_uint16) (((orc_uint16) (var52.x4[1] + 128)) +
781               (((orc_uint16) (var52.x4[1] + 128)) >> 8))) >> 8;
782       var53.x4[2] =
783           ((orc_uint16) (((orc_uint16) (var52.x4[2] + 128)) +
784               (((orc_uint16) (var52.x4[2] + 128)) >> 8))) >> 8;
785       var53.x4[3] =
786           ((orc_uint16) (((orc_uint16) (var52.x4[3] + 128)) +
787               (((orc_uint16) (var52.x4[3] + 128)) >> 8))) >> 8;
788       /* 14: addw */
789       var54.x4[0] = var50.x4[0] + var53.x4[0];
790       var54.x4[1] = var50.x4[1] + var53.x4[1];
791       var54.x4[2] = var50.x4[2] + var53.x4[2];
792       var54.x4[3] = var50.x4[3] + var53.x4[3];
793       /* 15: convwb */
794       var55.x4[0] = var54.x4[0];
795       var55.x4[1] = var54.x4[1];
796       var55.x4[2] = var54.x4[2];
797       var55.x4[3] = var54.x4[3];
798       /* 17: orl */
799       var56.i = var55.i | var40.i;
800       /* 18: storel */
801       ptr0[i] = var56;
802     }
803   }
804 
805 }
806 
807 void
video_mixer_orc_blend_argb(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)808 video_mixer_orc_blend_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
809     const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
810 {
811   OrcExecutor _ex, *ex = &_ex;
812   static volatile int p_inited = 0;
813   static OrcCode *c = 0;
814   void (*func) (OrcExecutor *);
815 
816   if (!p_inited) {
817     orc_once_mutex_lock ();
818     if (!p_inited) {
819       OrcProgram *p;
820 
821 #if 1
822       static const orc_uint8 bc[] = {
823         1, 7, 9, 26, 118, 105, 100, 101, 111, 95, 109, 105, 120, 101, 114, 95,
824         111, 114, 99, 95, 98, 108, 101, 110, 100, 95, 97, 114, 103, 98, 11, 4,
825         4, 12, 4, 4, 14, 4, 255, 0, 0, 0, 14, 2, 8, 0, 0, 0,
826         16, 2, 20, 4, 20, 2, 20, 1, 20, 4, 20, 8, 20, 8, 20, 8,
827         113, 32, 4, 163, 33, 32, 157, 34, 33, 152, 35, 34, 21, 2, 150, 38,
828         35, 21, 2, 89, 38, 38, 24, 21, 2, 95, 38, 38, 17, 21, 2, 150,
829         37, 32, 113, 32, 0, 21, 2, 150, 36, 32, 21, 2, 98, 37, 37, 36,
830         21, 2, 89, 37, 37, 38, 21, 2, 80, 37, 37, 21, 2, 70, 36, 36,
831         37, 21, 2, 157, 32, 36, 123, 32, 32, 16, 128, 0, 32, 2, 0,
832       };
833       p = orc_program_new_from_static_bytecode (bc);
834       orc_program_set_backup_function (p, _backup_video_mixer_orc_blend_argb);
835 #else
836       p = orc_program_new ();
837       orc_program_set_2d (p);
838       orc_program_set_name (p, "video_mixer_orc_blend_argb");
839       orc_program_set_backup_function (p, _backup_video_mixer_orc_blend_argb);
840       orc_program_add_destination (p, 4, "d1");
841       orc_program_add_source (p, 4, "s1");
842       orc_program_add_constant (p, 4, 0x000000ff, "c1");
843       orc_program_add_constant (p, 2, 0x00000008, "c2");
844       orc_program_add_parameter (p, 2, "p1");
845       orc_program_add_temporary (p, 4, "t1");
846       orc_program_add_temporary (p, 2, "t2");
847       orc_program_add_temporary (p, 1, "t3");
848       orc_program_add_temporary (p, 4, "t4");
849       orc_program_add_temporary (p, 8, "t5");
850       orc_program_add_temporary (p, 8, "t6");
851       orc_program_add_temporary (p, 8, "t7");
852 
853       orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1,
854           ORC_VAR_D1);
855       orc_program_append_2 (p, "convlw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1,
856           ORC_VAR_D1);
857       orc_program_append_2 (p, "convwb", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
858           ORC_VAR_D1);
859       orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1,
860           ORC_VAR_D1);
861       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T7, ORC_VAR_T4, ORC_VAR_D1,
862           ORC_VAR_D1);
863       orc_program_append_2 (p, "mullw", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_P1,
864           ORC_VAR_D1);
865       orc_program_append_2 (p, "shruw", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C2,
866           ORC_VAR_D1);
867       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T6, ORC_VAR_T1, ORC_VAR_D1,
868           ORC_VAR_D1);
869       orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
870           ORC_VAR_D1);
871       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T5, ORC_VAR_T1, ORC_VAR_D1,
872           ORC_VAR_D1);
873       orc_program_append_2 (p, "subw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T5,
874           ORC_VAR_D1);
875       orc_program_append_2 (p, "mullw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T7,
876           ORC_VAR_D1);
877       orc_program_append_2 (p, "div255w", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_D1,
878           ORC_VAR_D1);
879       orc_program_append_2 (p, "addw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_T6,
880           ORC_VAR_D1);
881       orc_program_append_2 (p, "convwb", 2, ORC_VAR_T1, ORC_VAR_T5, ORC_VAR_D1,
882           ORC_VAR_D1);
883       orc_program_append_2 (p, "orl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1,
884           ORC_VAR_D1);
885       orc_program_append_2 (p, "storel", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1,
886           ORC_VAR_D1);
887 #endif
888 
889       orc_program_compile (p);
890       c = orc_program_take_code (p);
891       orc_program_free (p);
892     }
893     p_inited = TRUE;
894     orc_once_mutex_unlock ();
895   }
896   ex->arrays[ORC_VAR_A2] = c;
897   ex->program = 0;
898 
899   ex->n = n;
900   ORC_EXECUTOR_M (ex) = m;
901   ex->arrays[ORC_VAR_D1] = d1;
902   ex->params[ORC_VAR_D1] = d1_stride;
903   ex->arrays[ORC_VAR_S1] = (void *) s1;
904   ex->params[ORC_VAR_S1] = s1_stride;
905   ex->params[ORC_VAR_P1] = p1;
906 
907   func = c->exec;
908   func (ex);
909 }
910 #endif
911 
912 
913 /* video_mixer_orc_blend_bgra */
914 #ifdef DISABLE_ORC
915 void
video_mixer_orc_blend_bgra(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)916 video_mixer_orc_blend_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
917     const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
918 {
919   int i;
920   int j;
921   orc_union32 *ORC_RESTRICT ptr0;
922   const orc_union32 *ORC_RESTRICT ptr4;
923   orc_union64 var40;
924 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
925   volatile orc_union32 var41;
926 #else
927   orc_union32 var41;
928 #endif
929   orc_union32 var42;
930   orc_union32 var43;
931   orc_union16 var44;
932   orc_int8 var45;
933   orc_union32 var46;
934   orc_union64 var47;
935   orc_union64 var48;
936   orc_union64 var49;
937   orc_union64 var50;
938   orc_union32 var51;
939   orc_union64 var52;
940   orc_union64 var53;
941   orc_union64 var54;
942   orc_union64 var55;
943   orc_union64 var56;
944   orc_union32 var57;
945   orc_union32 var58;
946 
947   for (j = 0; j < m; j++) {
948     ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
949     ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
950 
951     /* 6: loadpw */
952     var40.x4[0] = p1;
953     var40.x4[1] = p1;
954     var40.x4[2] = p1;
955     var40.x4[3] = p1;
956     /* 17: loadpl */
957     var41.i = 0xff000000;       /* -16777216 or 2.11371e-314f */
958 
959     for (i = 0; i < n; i++) {
960       /* 0: loadl */
961       var42 = ptr4[i];
962       /* 1: shrul */
963       var43.i = ((orc_uint32) var42.i) >> 24;
964       /* 2: convlw */
965       var44.i = var43.i;
966       /* 3: convwb */
967       var45 = var44.i;
968       /* 4: splatbl */
969       var46.i =
970           ((((orc_uint32) var45) & 0xff) << 24) | ((((orc_uint32) var45) & 0xff)
971           << 16) | ((((orc_uint32) var45) & 0xff) << 8) | (((orc_uint32) var45)
972           & 0xff);
973       /* 5: convubw */
974       var47.x4[0] = (orc_uint8) var46.x4[0];
975       var47.x4[1] = (orc_uint8) var46.x4[1];
976       var47.x4[2] = (orc_uint8) var46.x4[2];
977       var47.x4[3] = (orc_uint8) var46.x4[3];
978       /* 7: mullw */
979       var48.x4[0] = (var47.x4[0] * var40.x4[0]) & 0xffff;
980       var48.x4[1] = (var47.x4[1] * var40.x4[1]) & 0xffff;
981       var48.x4[2] = (var47.x4[2] * var40.x4[2]) & 0xffff;
982       var48.x4[3] = (var47.x4[3] * var40.x4[3]) & 0xffff;
983       /* 8: shruw */
984       var49.x4[0] = ((orc_uint16) var48.x4[0]) >> 8;
985       var49.x4[1] = ((orc_uint16) var48.x4[1]) >> 8;
986       var49.x4[2] = ((orc_uint16) var48.x4[2]) >> 8;
987       var49.x4[3] = ((orc_uint16) var48.x4[3]) >> 8;
988       /* 9: convubw */
989       var50.x4[0] = (orc_uint8) var42.x4[0];
990       var50.x4[1] = (orc_uint8) var42.x4[1];
991       var50.x4[2] = (orc_uint8) var42.x4[2];
992       var50.x4[3] = (orc_uint8) var42.x4[3];
993       /* 10: loadl */
994       var51 = ptr0[i];
995       /* 11: convubw */
996       var52.x4[0] = (orc_uint8) var51.x4[0];
997       var52.x4[1] = (orc_uint8) var51.x4[1];
998       var52.x4[2] = (orc_uint8) var51.x4[2];
999       var52.x4[3] = (orc_uint8) var51.x4[3];
1000       /* 12: subw */
1001       var53.x4[0] = var50.x4[0] - var52.x4[0];
1002       var53.x4[1] = var50.x4[1] - var52.x4[1];
1003       var53.x4[2] = var50.x4[2] - var52.x4[2];
1004       var53.x4[3] = var50.x4[3] - var52.x4[3];
1005       /* 13: mullw */
1006       var54.x4[0] = (var53.x4[0] * var49.x4[0]) & 0xffff;
1007       var54.x4[1] = (var53.x4[1] * var49.x4[1]) & 0xffff;
1008       var54.x4[2] = (var53.x4[2] * var49.x4[2]) & 0xffff;
1009       var54.x4[3] = (var53.x4[3] * var49.x4[3]) & 0xffff;
1010       /* 14: div255w */
1011       var55.x4[0] =
1012           ((orc_uint16) (((orc_uint16) (var54.x4[0] + 128)) +
1013               (((orc_uint16) (var54.x4[0] + 128)) >> 8))) >> 8;
1014       var55.x4[1] =
1015           ((orc_uint16) (((orc_uint16) (var54.x4[1] + 128)) +
1016               (((orc_uint16) (var54.x4[1] + 128)) >> 8))) >> 8;
1017       var55.x4[2] =
1018           ((orc_uint16) (((orc_uint16) (var54.x4[2] + 128)) +
1019               (((orc_uint16) (var54.x4[2] + 128)) >> 8))) >> 8;
1020       var55.x4[3] =
1021           ((orc_uint16) (((orc_uint16) (var54.x4[3] + 128)) +
1022               (((orc_uint16) (var54.x4[3] + 128)) >> 8))) >> 8;
1023       /* 15: addw */
1024       var56.x4[0] = var52.x4[0] + var55.x4[0];
1025       var56.x4[1] = var52.x4[1] + var55.x4[1];
1026       var56.x4[2] = var52.x4[2] + var55.x4[2];
1027       var56.x4[3] = var52.x4[3] + var55.x4[3];
1028       /* 16: convwb */
1029       var57.x4[0] = var56.x4[0];
1030       var57.x4[1] = var56.x4[1];
1031       var57.x4[2] = var56.x4[2];
1032       var57.x4[3] = var56.x4[3];
1033       /* 18: orl */
1034       var58.i = var57.i | var41.i;
1035       /* 19: storel */
1036       ptr0[i] = var58;
1037     }
1038   }
1039 
1040 }
1041 
1042 #else
1043 static void
_backup_video_mixer_orc_blend_bgra(OrcExecutor * ORC_RESTRICT ex)1044 _backup_video_mixer_orc_blend_bgra (OrcExecutor * ORC_RESTRICT ex)
1045 {
1046   int i;
1047   int j;
1048   int n = ex->n;
1049   int m = ex->params[ORC_VAR_A1];
1050   orc_union32 *ORC_RESTRICT ptr0;
1051   const orc_union32 *ORC_RESTRICT ptr4;
1052   orc_union64 var40;
1053 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1054   volatile orc_union32 var41;
1055 #else
1056   orc_union32 var41;
1057 #endif
1058   orc_union32 var42;
1059   orc_union32 var43;
1060   orc_union16 var44;
1061   orc_int8 var45;
1062   orc_union32 var46;
1063   orc_union64 var47;
1064   orc_union64 var48;
1065   orc_union64 var49;
1066   orc_union64 var50;
1067   orc_union32 var51;
1068   orc_union64 var52;
1069   orc_union64 var53;
1070   orc_union64 var54;
1071   orc_union64 var55;
1072   orc_union64 var56;
1073   orc_union32 var57;
1074   orc_union32 var58;
1075 
1076   for (j = 0; j < m; j++) {
1077     ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
1078     ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
1079 
1080     /* 6: loadpw */
1081     var40.x4[0] = ex->params[24];
1082     var40.x4[1] = ex->params[24];
1083     var40.x4[2] = ex->params[24];
1084     var40.x4[3] = ex->params[24];
1085     /* 17: loadpl */
1086     var41.i = 0xff000000;       /* -16777216 or 2.11371e-314f */
1087 
1088     for (i = 0; i < n; i++) {
1089       /* 0: loadl */
1090       var42 = ptr4[i];
1091       /* 1: shrul */
1092       var43.i = ((orc_uint32) var42.i) >> 24;
1093       /* 2: convlw */
1094       var44.i = var43.i;
1095       /* 3: convwb */
1096       var45 = var44.i;
1097       /* 4: splatbl */
1098       var46.i =
1099           ((((orc_uint32) var45) & 0xff) << 24) | ((((orc_uint32) var45) & 0xff)
1100           << 16) | ((((orc_uint32) var45) & 0xff) << 8) | (((orc_uint32) var45)
1101           & 0xff);
1102       /* 5: convubw */
1103       var47.x4[0] = (orc_uint8) var46.x4[0];
1104       var47.x4[1] = (orc_uint8) var46.x4[1];
1105       var47.x4[2] = (orc_uint8) var46.x4[2];
1106       var47.x4[3] = (orc_uint8) var46.x4[3];
1107       /* 7: mullw */
1108       var48.x4[0] = (var47.x4[0] * var40.x4[0]) & 0xffff;
1109       var48.x4[1] = (var47.x4[1] * var40.x4[1]) & 0xffff;
1110       var48.x4[2] = (var47.x4[2] * var40.x4[2]) & 0xffff;
1111       var48.x4[3] = (var47.x4[3] * var40.x4[3]) & 0xffff;
1112       /* 8: shruw */
1113       var49.x4[0] = ((orc_uint16) var48.x4[0]) >> 8;
1114       var49.x4[1] = ((orc_uint16) var48.x4[1]) >> 8;
1115       var49.x4[2] = ((orc_uint16) var48.x4[2]) >> 8;
1116       var49.x4[3] = ((orc_uint16) var48.x4[3]) >> 8;
1117       /* 9: convubw */
1118       var50.x4[0] = (orc_uint8) var42.x4[0];
1119       var50.x4[1] = (orc_uint8) var42.x4[1];
1120       var50.x4[2] = (orc_uint8) var42.x4[2];
1121       var50.x4[3] = (orc_uint8) var42.x4[3];
1122       /* 10: loadl */
1123       var51 = ptr0[i];
1124       /* 11: convubw */
1125       var52.x4[0] = (orc_uint8) var51.x4[0];
1126       var52.x4[1] = (orc_uint8) var51.x4[1];
1127       var52.x4[2] = (orc_uint8) var51.x4[2];
1128       var52.x4[3] = (orc_uint8) var51.x4[3];
1129       /* 12: subw */
1130       var53.x4[0] = var50.x4[0] - var52.x4[0];
1131       var53.x4[1] = var50.x4[1] - var52.x4[1];
1132       var53.x4[2] = var50.x4[2] - var52.x4[2];
1133       var53.x4[3] = var50.x4[3] - var52.x4[3];
1134       /* 13: mullw */
1135       var54.x4[0] = (var53.x4[0] * var49.x4[0]) & 0xffff;
1136       var54.x4[1] = (var53.x4[1] * var49.x4[1]) & 0xffff;
1137       var54.x4[2] = (var53.x4[2] * var49.x4[2]) & 0xffff;
1138       var54.x4[3] = (var53.x4[3] * var49.x4[3]) & 0xffff;
1139       /* 14: div255w */
1140       var55.x4[0] =
1141           ((orc_uint16) (((orc_uint16) (var54.x4[0] + 128)) +
1142               (((orc_uint16) (var54.x4[0] + 128)) >> 8))) >> 8;
1143       var55.x4[1] =
1144           ((orc_uint16) (((orc_uint16) (var54.x4[1] + 128)) +
1145               (((orc_uint16) (var54.x4[1] + 128)) >> 8))) >> 8;
1146       var55.x4[2] =
1147           ((orc_uint16) (((orc_uint16) (var54.x4[2] + 128)) +
1148               (((orc_uint16) (var54.x4[2] + 128)) >> 8))) >> 8;
1149       var55.x4[3] =
1150           ((orc_uint16) (((orc_uint16) (var54.x4[3] + 128)) +
1151               (((orc_uint16) (var54.x4[3] + 128)) >> 8))) >> 8;
1152       /* 15: addw */
1153       var56.x4[0] = var52.x4[0] + var55.x4[0];
1154       var56.x4[1] = var52.x4[1] + var55.x4[1];
1155       var56.x4[2] = var52.x4[2] + var55.x4[2];
1156       var56.x4[3] = var52.x4[3] + var55.x4[3];
1157       /* 16: convwb */
1158       var57.x4[0] = var56.x4[0];
1159       var57.x4[1] = var56.x4[1];
1160       var57.x4[2] = var56.x4[2];
1161       var57.x4[3] = var56.x4[3];
1162       /* 18: orl */
1163       var58.i = var57.i | var41.i;
1164       /* 19: storel */
1165       ptr0[i] = var58;
1166     }
1167   }
1168 
1169 }
1170 
1171 void
video_mixer_orc_blend_bgra(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)1172 video_mixer_orc_blend_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
1173     const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
1174 {
1175   OrcExecutor _ex, *ex = &_ex;
1176   static volatile int p_inited = 0;
1177   static OrcCode *c = 0;
1178   void (*func) (OrcExecutor *);
1179 
1180   if (!p_inited) {
1181     orc_once_mutex_lock ();
1182     if (!p_inited) {
1183       OrcProgram *p;
1184 
1185 #if 1
1186       static const orc_uint8 bc[] = {
1187         1, 7, 9, 26, 118, 105, 100, 101, 111, 95, 109, 105, 120, 101, 114, 95,
1188         111, 114, 99, 95, 98, 108, 101, 110, 100, 95, 98, 103, 114, 97, 11, 4,
1189         4, 12, 4, 4, 14, 4, 0, 0, 0, 255, 14, 4, 24, 0, 0, 0,
1190         14, 2, 8, 0, 0, 0, 16, 2, 20, 4, 20, 4, 20, 2, 20, 1,
1191         20, 4, 20, 8, 20, 8, 20, 8, 113, 32, 4, 126, 33, 32, 17, 163,
1192         34, 33, 157, 35, 34, 152, 36, 35, 21, 2, 150, 39, 36, 21, 2, 89,
1193         39, 39, 24, 21, 2, 95, 39, 39, 18, 21, 2, 150, 38, 32, 113, 32,
1194         0, 21, 2, 150, 37, 32, 21, 2, 98, 38, 38, 37, 21, 2, 89, 38,
1195         38, 39, 21, 2, 80, 38, 38, 21, 2, 70, 37, 37, 38, 21, 2, 157,
1196         32, 37, 123, 32, 32, 16, 128, 0, 32, 2, 0,
1197       };
1198       p = orc_program_new_from_static_bytecode (bc);
1199       orc_program_set_backup_function (p, _backup_video_mixer_orc_blend_bgra);
1200 #else
1201       p = orc_program_new ();
1202       orc_program_set_2d (p);
1203       orc_program_set_name (p, "video_mixer_orc_blend_bgra");
1204       orc_program_set_backup_function (p, _backup_video_mixer_orc_blend_bgra);
1205       orc_program_add_destination (p, 4, "d1");
1206       orc_program_add_source (p, 4, "s1");
1207       orc_program_add_constant (p, 4, 0xff000000, "c1");
1208       orc_program_add_constant (p, 4, 0x00000018, "c2");
1209       orc_program_add_constant (p, 2, 0x00000008, "c3");
1210       orc_program_add_parameter (p, 2, "p1");
1211       orc_program_add_temporary (p, 4, "t1");
1212       orc_program_add_temporary (p, 4, "t2");
1213       orc_program_add_temporary (p, 2, "t3");
1214       orc_program_add_temporary (p, 1, "t4");
1215       orc_program_add_temporary (p, 4, "t5");
1216       orc_program_add_temporary (p, 8, "t6");
1217       orc_program_add_temporary (p, 8, "t7");
1218       orc_program_add_temporary (p, 8, "t8");
1219 
1220       orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1,
1221           ORC_VAR_D1);
1222       orc_program_append_2 (p, "shrul", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_C2,
1223           ORC_VAR_D1);
1224       orc_program_append_2 (p, "convlw", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
1225           ORC_VAR_D1);
1226       orc_program_append_2 (p, "convwb", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1,
1227           ORC_VAR_D1);
1228       orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T5, ORC_VAR_T4, ORC_VAR_D1,
1229           ORC_VAR_D1);
1230       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T8, ORC_VAR_T5, ORC_VAR_D1,
1231           ORC_VAR_D1);
1232       orc_program_append_2 (p, "mullw", 2, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_P1,
1233           ORC_VAR_D1);
1234       orc_program_append_2 (p, "shruw", 2, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_C3,
1235           ORC_VAR_D1);
1236       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T7, ORC_VAR_T1, ORC_VAR_D1,
1237           ORC_VAR_D1);
1238       orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
1239           ORC_VAR_D1);
1240       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T6, ORC_VAR_T1, ORC_VAR_D1,
1241           ORC_VAR_D1);
1242       orc_program_append_2 (p, "subw", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T6,
1243           ORC_VAR_D1);
1244       orc_program_append_2 (p, "mullw", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T8,
1245           ORC_VAR_D1);
1246       orc_program_append_2 (p, "div255w", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_D1,
1247           ORC_VAR_D1);
1248       orc_program_append_2 (p, "addw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T7,
1249           ORC_VAR_D1);
1250       orc_program_append_2 (p, "convwb", 2, ORC_VAR_T1, ORC_VAR_T6, ORC_VAR_D1,
1251           ORC_VAR_D1);
1252       orc_program_append_2 (p, "orl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1,
1253           ORC_VAR_D1);
1254       orc_program_append_2 (p, "storel", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1,
1255           ORC_VAR_D1);
1256 #endif
1257 
1258       orc_program_compile (p);
1259       c = orc_program_take_code (p);
1260       orc_program_free (p);
1261     }
1262     p_inited = TRUE;
1263     orc_once_mutex_unlock ();
1264   }
1265   ex->arrays[ORC_VAR_A2] = c;
1266   ex->program = 0;
1267 
1268   ex->n = n;
1269   ORC_EXECUTOR_M (ex) = m;
1270   ex->arrays[ORC_VAR_D1] = d1;
1271   ex->params[ORC_VAR_D1] = d1_stride;
1272   ex->arrays[ORC_VAR_S1] = (void *) s1;
1273   ex->params[ORC_VAR_S1] = s1_stride;
1274   ex->params[ORC_VAR_P1] = p1;
1275 
1276   func = c->exec;
1277   func (ex);
1278 }
1279 #endif
1280 
1281 
1282 /* video_mixer_orc_overlay_argb */
1283 #ifdef DISABLE_ORC
1284 void
video_mixer_orc_overlay_argb(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)1285 video_mixer_orc_overlay_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
1286     const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
1287 {
1288   int i;
1289   int j;
1290   orc_union32 *ORC_RESTRICT ptr0;
1291   const orc_union32 *ORC_RESTRICT ptr4;
1292   orc_union64 var41;
1293 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1294   volatile orc_union32 var42;
1295 #else
1296   orc_union32 var42;
1297 #endif
1298 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1299   volatile orc_union32 var43;
1300 #else
1301   orc_union32 var43;
1302 #endif
1303   orc_union32 var44;
1304   orc_union16 var45;
1305   orc_int8 var46;
1306   orc_union32 var47;
1307   orc_union64 var48;
1308   orc_union64 var49;
1309   orc_union64 var50;
1310   orc_union64 var51;
1311   orc_union64 var52;
1312   orc_union32 var53;
1313   orc_union64 var54;
1314   orc_union64 var55;
1315   orc_union32 var56;
1316   orc_union16 var57;
1317   orc_int8 var58;
1318   orc_union32 var59;
1319   orc_union64 var60;
1320   orc_union64 var61;
1321   orc_union64 var62;
1322   orc_union64 var63;
1323   orc_union64 var64;
1324   orc_union64 var65;
1325   orc_union64 var66;
1326   orc_union64 var67;
1327   orc_union32 var68;
1328   orc_union32 var69;
1329   orc_union32 var70;
1330   orc_union32 var71;
1331   orc_union32 var72;
1332 
1333   for (j = 0; j < m; j++) {
1334     ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
1335     ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
1336 
1337     /* 5: loadpw */
1338     var41.x4[0] = p1;
1339     var41.x4[1] = p1;
1340     var41.x4[2] = p1;
1341     var41.x4[3] = p1;
1342     /* 10: loadpl */
1343     var53.i = 0xffffffff;       /* -1 or 2.122e-314f */
1344     /* 26: loadpl */
1345     var42.i = 0xffffff00;       /* -256 or 2.122e-314f */
1346     /* 29: loadpl */
1347     var43.i = 0x000000ff;       /* 255 or 1.25987e-321f */
1348 
1349     for (i = 0; i < n; i++) {
1350       /* 0: loadl */
1351       var44 = ptr4[i];
1352       /* 1: convlw */
1353       var45.i = var44.i;
1354       /* 2: convwb */
1355       var46 = var45.i;
1356       /* 3: splatbl */
1357       var47.i =
1358           ((((orc_uint32) var46) & 0xff) << 24) | ((((orc_uint32) var46) & 0xff)
1359           << 16) | ((((orc_uint32) var46) & 0xff) << 8) | (((orc_uint32) var46)
1360           & 0xff);
1361       /* 4: convubw */
1362       var48.x4[0] = (orc_uint8) var47.x4[0];
1363       var48.x4[1] = (orc_uint8) var47.x4[1];
1364       var48.x4[2] = (orc_uint8) var47.x4[2];
1365       var48.x4[3] = (orc_uint8) var47.x4[3];
1366       /* 6: mullw */
1367       var49.x4[0] = (var48.x4[0] * var41.x4[0]) & 0xffff;
1368       var49.x4[1] = (var48.x4[1] * var41.x4[1]) & 0xffff;
1369       var49.x4[2] = (var48.x4[2] * var41.x4[2]) & 0xffff;
1370       var49.x4[3] = (var48.x4[3] * var41.x4[3]) & 0xffff;
1371       /* 7: shruw */
1372       var50.x4[0] = ((orc_uint16) var49.x4[0]) >> 8;
1373       var50.x4[1] = ((orc_uint16) var49.x4[1]) >> 8;
1374       var50.x4[2] = ((orc_uint16) var49.x4[2]) >> 8;
1375       var50.x4[3] = ((orc_uint16) var49.x4[3]) >> 8;
1376       /* 8: convubw */
1377       var51.x4[0] = (orc_uint8) var44.x4[0];
1378       var51.x4[1] = (orc_uint8) var44.x4[1];
1379       var51.x4[2] = (orc_uint8) var44.x4[2];
1380       var51.x4[3] = (orc_uint8) var44.x4[3];
1381       /* 9: mullw */
1382       var52.x4[0] = (var51.x4[0] * var50.x4[0]) & 0xffff;
1383       var52.x4[1] = (var51.x4[1] * var50.x4[1]) & 0xffff;
1384       var52.x4[2] = (var51.x4[2] * var50.x4[2]) & 0xffff;
1385       var52.x4[3] = (var51.x4[3] * var50.x4[3]) & 0xffff;
1386       /* 11: convubw */
1387       var54.x4[0] = (orc_uint8) var53.x4[0];
1388       var54.x4[1] = (orc_uint8) var53.x4[1];
1389       var54.x4[2] = (orc_uint8) var53.x4[2];
1390       var54.x4[3] = (orc_uint8) var53.x4[3];
1391       /* 12: subw */
1392       var55.x4[0] = var54.x4[0] - var50.x4[0];
1393       var55.x4[1] = var54.x4[1] - var50.x4[1];
1394       var55.x4[2] = var54.x4[2] - var50.x4[2];
1395       var55.x4[3] = var54.x4[3] - var50.x4[3];
1396       /* 13: loadl */
1397       var56 = ptr0[i];
1398       /* 14: convlw */
1399       var57.i = var56.i;
1400       /* 15: convwb */
1401       var58 = var57.i;
1402       /* 16: splatbl */
1403       var59.i =
1404           ((((orc_uint32) var58) & 0xff) << 24) | ((((orc_uint32) var58) & 0xff)
1405           << 16) | ((((orc_uint32) var58) & 0xff) << 8) | (((orc_uint32) var58)
1406           & 0xff);
1407       /* 17: convubw */
1408       var60.x4[0] = (orc_uint8) var59.x4[0];
1409       var60.x4[1] = (orc_uint8) var59.x4[1];
1410       var60.x4[2] = (orc_uint8) var59.x4[2];
1411       var60.x4[3] = (orc_uint8) var59.x4[3];
1412       /* 18: mullw */
1413       var61.x4[0] = (var60.x4[0] * var55.x4[0]) & 0xffff;
1414       var61.x4[1] = (var60.x4[1] * var55.x4[1]) & 0xffff;
1415       var61.x4[2] = (var60.x4[2] * var55.x4[2]) & 0xffff;
1416       var61.x4[3] = (var60.x4[3] * var55.x4[3]) & 0xffff;
1417       /* 19: div255w */
1418       var62.x4[0] =
1419           ((orc_uint16) (((orc_uint16) (var61.x4[0] + 128)) +
1420               (((orc_uint16) (var61.x4[0] + 128)) >> 8))) >> 8;
1421       var62.x4[1] =
1422           ((orc_uint16) (((orc_uint16) (var61.x4[1] + 128)) +
1423               (((orc_uint16) (var61.x4[1] + 128)) >> 8))) >> 8;
1424       var62.x4[2] =
1425           ((orc_uint16) (((orc_uint16) (var61.x4[2] + 128)) +
1426               (((orc_uint16) (var61.x4[2] + 128)) >> 8))) >> 8;
1427       var62.x4[3] =
1428           ((orc_uint16) (((orc_uint16) (var61.x4[3] + 128)) +
1429               (((orc_uint16) (var61.x4[3] + 128)) >> 8))) >> 8;
1430       /* 20: convubw */
1431       var63.x4[0] = (orc_uint8) var56.x4[0];
1432       var63.x4[1] = (orc_uint8) var56.x4[1];
1433       var63.x4[2] = (orc_uint8) var56.x4[2];
1434       var63.x4[3] = (orc_uint8) var56.x4[3];
1435       /* 21: mullw */
1436       var64.x4[0] = (var63.x4[0] * var62.x4[0]) & 0xffff;
1437       var64.x4[1] = (var63.x4[1] * var62.x4[1]) & 0xffff;
1438       var64.x4[2] = (var63.x4[2] * var62.x4[2]) & 0xffff;
1439       var64.x4[3] = (var63.x4[3] * var62.x4[3]) & 0xffff;
1440       /* 22: addw */
1441       var65.x4[0] = var64.x4[0] + var52.x4[0];
1442       var65.x4[1] = var64.x4[1] + var52.x4[1];
1443       var65.x4[2] = var64.x4[2] + var52.x4[2];
1444       var65.x4[3] = var64.x4[3] + var52.x4[3];
1445       /* 23: addw */
1446       var66.x4[0] = var62.x4[0] + var50.x4[0];
1447       var66.x4[1] = var62.x4[1] + var50.x4[1];
1448       var66.x4[2] = var62.x4[2] + var50.x4[2];
1449       var66.x4[3] = var62.x4[3] + var50.x4[3];
1450       /* 24: divluw */
1451       var67.x4[0] =
1452           ((var66.x4[0] & 0xff) ==
1453           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[0]) /
1454           ((orc_uint16) var66.x4[0] & 0xff));
1455       var67.x4[1] =
1456           ((var66.x4[1] & 0xff) ==
1457           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[1]) /
1458           ((orc_uint16) var66.x4[1] & 0xff));
1459       var67.x4[2] =
1460           ((var66.x4[2] & 0xff) ==
1461           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[2]) /
1462           ((orc_uint16) var66.x4[2] & 0xff));
1463       var67.x4[3] =
1464           ((var66.x4[3] & 0xff) ==
1465           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[3]) /
1466           ((orc_uint16) var66.x4[3] & 0xff));
1467       /* 25: convwb */
1468       var68.x4[0] = var67.x4[0];
1469       var68.x4[1] = var67.x4[1];
1470       var68.x4[2] = var67.x4[2];
1471       var68.x4[3] = var67.x4[3];
1472       /* 27: andl */
1473       var69.i = var68.i & var42.i;
1474       /* 28: convwb */
1475       var70.x4[0] = var66.x4[0];
1476       var70.x4[1] = var66.x4[1];
1477       var70.x4[2] = var66.x4[2];
1478       var70.x4[3] = var66.x4[3];
1479       /* 30: andl */
1480       var71.i = var70.i & var43.i;
1481       /* 31: orl */
1482       var72.i = var69.i | var71.i;
1483       /* 32: storel */
1484       ptr0[i] = var72;
1485     }
1486   }
1487 
1488 }
1489 
1490 #else
1491 static void
_backup_video_mixer_orc_overlay_argb(OrcExecutor * ORC_RESTRICT ex)1492 _backup_video_mixer_orc_overlay_argb (OrcExecutor * ORC_RESTRICT ex)
1493 {
1494   int i;
1495   int j;
1496   int n = ex->n;
1497   int m = ex->params[ORC_VAR_A1];
1498   orc_union32 *ORC_RESTRICT ptr0;
1499   const orc_union32 *ORC_RESTRICT ptr4;
1500   orc_union64 var41;
1501 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1502   volatile orc_union32 var42;
1503 #else
1504   orc_union32 var42;
1505 #endif
1506 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1507   volatile orc_union32 var43;
1508 #else
1509   orc_union32 var43;
1510 #endif
1511   orc_union32 var44;
1512   orc_union16 var45;
1513   orc_int8 var46;
1514   orc_union32 var47;
1515   orc_union64 var48;
1516   orc_union64 var49;
1517   orc_union64 var50;
1518   orc_union64 var51;
1519   orc_union64 var52;
1520   orc_union32 var53;
1521   orc_union64 var54;
1522   orc_union64 var55;
1523   orc_union32 var56;
1524   orc_union16 var57;
1525   orc_int8 var58;
1526   orc_union32 var59;
1527   orc_union64 var60;
1528   orc_union64 var61;
1529   orc_union64 var62;
1530   orc_union64 var63;
1531   orc_union64 var64;
1532   orc_union64 var65;
1533   orc_union64 var66;
1534   orc_union64 var67;
1535   orc_union32 var68;
1536   orc_union32 var69;
1537   orc_union32 var70;
1538   orc_union32 var71;
1539   orc_union32 var72;
1540 
1541   for (j = 0; j < m; j++) {
1542     ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
1543     ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
1544 
1545     /* 5: loadpw */
1546     var41.x4[0] = ex->params[24];
1547     var41.x4[1] = ex->params[24];
1548     var41.x4[2] = ex->params[24];
1549     var41.x4[3] = ex->params[24];
1550     /* 10: loadpl */
1551     var53.i = 0xffffffff;       /* -1 or 2.122e-314f */
1552     /* 26: loadpl */
1553     var42.i = 0xffffff00;       /* -256 or 2.122e-314f */
1554     /* 29: loadpl */
1555     var43.i = 0x000000ff;       /* 255 or 1.25987e-321f */
1556 
1557     for (i = 0; i < n; i++) {
1558       /* 0: loadl */
1559       var44 = ptr4[i];
1560       /* 1: convlw */
1561       var45.i = var44.i;
1562       /* 2: convwb */
1563       var46 = var45.i;
1564       /* 3: splatbl */
1565       var47.i =
1566           ((((orc_uint32) var46) & 0xff) << 24) | ((((orc_uint32) var46) & 0xff)
1567           << 16) | ((((orc_uint32) var46) & 0xff) << 8) | (((orc_uint32) var46)
1568           & 0xff);
1569       /* 4: convubw */
1570       var48.x4[0] = (orc_uint8) var47.x4[0];
1571       var48.x4[1] = (orc_uint8) var47.x4[1];
1572       var48.x4[2] = (orc_uint8) var47.x4[2];
1573       var48.x4[3] = (orc_uint8) var47.x4[3];
1574       /* 6: mullw */
1575       var49.x4[0] = (var48.x4[0] * var41.x4[0]) & 0xffff;
1576       var49.x4[1] = (var48.x4[1] * var41.x4[1]) & 0xffff;
1577       var49.x4[2] = (var48.x4[2] * var41.x4[2]) & 0xffff;
1578       var49.x4[3] = (var48.x4[3] * var41.x4[3]) & 0xffff;
1579       /* 7: shruw */
1580       var50.x4[0] = ((orc_uint16) var49.x4[0]) >> 8;
1581       var50.x4[1] = ((orc_uint16) var49.x4[1]) >> 8;
1582       var50.x4[2] = ((orc_uint16) var49.x4[2]) >> 8;
1583       var50.x4[3] = ((orc_uint16) var49.x4[3]) >> 8;
1584       /* 8: convubw */
1585       var51.x4[0] = (orc_uint8) var44.x4[0];
1586       var51.x4[1] = (orc_uint8) var44.x4[1];
1587       var51.x4[2] = (orc_uint8) var44.x4[2];
1588       var51.x4[3] = (orc_uint8) var44.x4[3];
1589       /* 9: mullw */
1590       var52.x4[0] = (var51.x4[0] * var50.x4[0]) & 0xffff;
1591       var52.x4[1] = (var51.x4[1] * var50.x4[1]) & 0xffff;
1592       var52.x4[2] = (var51.x4[2] * var50.x4[2]) & 0xffff;
1593       var52.x4[3] = (var51.x4[3] * var50.x4[3]) & 0xffff;
1594       /* 11: convubw */
1595       var54.x4[0] = (orc_uint8) var53.x4[0];
1596       var54.x4[1] = (orc_uint8) var53.x4[1];
1597       var54.x4[2] = (orc_uint8) var53.x4[2];
1598       var54.x4[3] = (orc_uint8) var53.x4[3];
1599       /* 12: subw */
1600       var55.x4[0] = var54.x4[0] - var50.x4[0];
1601       var55.x4[1] = var54.x4[1] - var50.x4[1];
1602       var55.x4[2] = var54.x4[2] - var50.x4[2];
1603       var55.x4[3] = var54.x4[3] - var50.x4[3];
1604       /* 13: loadl */
1605       var56 = ptr0[i];
1606       /* 14: convlw */
1607       var57.i = var56.i;
1608       /* 15: convwb */
1609       var58 = var57.i;
1610       /* 16: splatbl */
1611       var59.i =
1612           ((((orc_uint32) var58) & 0xff) << 24) | ((((orc_uint32) var58) & 0xff)
1613           << 16) | ((((orc_uint32) var58) & 0xff) << 8) | (((orc_uint32) var58)
1614           & 0xff);
1615       /* 17: convubw */
1616       var60.x4[0] = (orc_uint8) var59.x4[0];
1617       var60.x4[1] = (orc_uint8) var59.x4[1];
1618       var60.x4[2] = (orc_uint8) var59.x4[2];
1619       var60.x4[3] = (orc_uint8) var59.x4[3];
1620       /* 18: mullw */
1621       var61.x4[0] = (var60.x4[0] * var55.x4[0]) & 0xffff;
1622       var61.x4[1] = (var60.x4[1] * var55.x4[1]) & 0xffff;
1623       var61.x4[2] = (var60.x4[2] * var55.x4[2]) & 0xffff;
1624       var61.x4[3] = (var60.x4[3] * var55.x4[3]) & 0xffff;
1625       /* 19: div255w */
1626       var62.x4[0] =
1627           ((orc_uint16) (((orc_uint16) (var61.x4[0] + 128)) +
1628               (((orc_uint16) (var61.x4[0] + 128)) >> 8))) >> 8;
1629       var62.x4[1] =
1630           ((orc_uint16) (((orc_uint16) (var61.x4[1] + 128)) +
1631               (((orc_uint16) (var61.x4[1] + 128)) >> 8))) >> 8;
1632       var62.x4[2] =
1633           ((orc_uint16) (((orc_uint16) (var61.x4[2] + 128)) +
1634               (((orc_uint16) (var61.x4[2] + 128)) >> 8))) >> 8;
1635       var62.x4[3] =
1636           ((orc_uint16) (((orc_uint16) (var61.x4[3] + 128)) +
1637               (((orc_uint16) (var61.x4[3] + 128)) >> 8))) >> 8;
1638       /* 20: convubw */
1639       var63.x4[0] = (orc_uint8) var56.x4[0];
1640       var63.x4[1] = (orc_uint8) var56.x4[1];
1641       var63.x4[2] = (orc_uint8) var56.x4[2];
1642       var63.x4[3] = (orc_uint8) var56.x4[3];
1643       /* 21: mullw */
1644       var64.x4[0] = (var63.x4[0] * var62.x4[0]) & 0xffff;
1645       var64.x4[1] = (var63.x4[1] * var62.x4[1]) & 0xffff;
1646       var64.x4[2] = (var63.x4[2] * var62.x4[2]) & 0xffff;
1647       var64.x4[3] = (var63.x4[3] * var62.x4[3]) & 0xffff;
1648       /* 22: addw */
1649       var65.x4[0] = var64.x4[0] + var52.x4[0];
1650       var65.x4[1] = var64.x4[1] + var52.x4[1];
1651       var65.x4[2] = var64.x4[2] + var52.x4[2];
1652       var65.x4[3] = var64.x4[3] + var52.x4[3];
1653       /* 23: addw */
1654       var66.x4[0] = var62.x4[0] + var50.x4[0];
1655       var66.x4[1] = var62.x4[1] + var50.x4[1];
1656       var66.x4[2] = var62.x4[2] + var50.x4[2];
1657       var66.x4[3] = var62.x4[3] + var50.x4[3];
1658       /* 24: divluw */
1659       var67.x4[0] =
1660           ((var66.x4[0] & 0xff) ==
1661           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[0]) /
1662           ((orc_uint16) var66.x4[0] & 0xff));
1663       var67.x4[1] =
1664           ((var66.x4[1] & 0xff) ==
1665           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[1]) /
1666           ((orc_uint16) var66.x4[1] & 0xff));
1667       var67.x4[2] =
1668           ((var66.x4[2] & 0xff) ==
1669           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[2]) /
1670           ((orc_uint16) var66.x4[2] & 0xff));
1671       var67.x4[3] =
1672           ((var66.x4[3] & 0xff) ==
1673           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var65.x4[3]) /
1674           ((orc_uint16) var66.x4[3] & 0xff));
1675       /* 25: convwb */
1676       var68.x4[0] = var67.x4[0];
1677       var68.x4[1] = var67.x4[1];
1678       var68.x4[2] = var67.x4[2];
1679       var68.x4[3] = var67.x4[3];
1680       /* 27: andl */
1681       var69.i = var68.i & var42.i;
1682       /* 28: convwb */
1683       var70.x4[0] = var66.x4[0];
1684       var70.x4[1] = var66.x4[1];
1685       var70.x4[2] = var66.x4[2];
1686       var70.x4[3] = var66.x4[3];
1687       /* 30: andl */
1688       var71.i = var70.i & var43.i;
1689       /* 31: orl */
1690       var72.i = var69.i | var71.i;
1691       /* 32: storel */
1692       ptr0[i] = var72;
1693     }
1694   }
1695 
1696 }
1697 
1698 void
video_mixer_orc_overlay_argb(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)1699 video_mixer_orc_overlay_argb (guint8 * ORC_RESTRICT d1, int d1_stride,
1700     const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
1701 {
1702   OrcExecutor _ex, *ex = &_ex;
1703   static volatile int p_inited = 0;
1704   static OrcCode *c = 0;
1705   void (*func) (OrcExecutor *);
1706 
1707   if (!p_inited) {
1708     orc_once_mutex_lock ();
1709     if (!p_inited) {
1710       OrcProgram *p;
1711 
1712 #if 1
1713       static const orc_uint8 bc[] = {
1714         1, 7, 9, 28, 118, 105, 100, 101, 111, 95, 109, 105, 120, 101, 114, 95,
1715         111, 114, 99, 95, 111, 118, 101, 114, 108, 97, 121, 95, 97, 114, 103,
1716             98,
1717         11, 4, 4, 12, 4, 4, 14, 4, 255, 255, 255, 255, 14, 4, 255, 0,
1718         0, 0, 14, 4, 0, 255, 255, 255, 14, 2, 8, 0, 0, 0, 16, 2,
1719         20, 4, 20, 2, 20, 1, 20, 8, 20, 8, 20, 8, 20, 4, 20, 8,
1720         20, 8, 113, 32, 4, 163, 33, 32, 157, 34, 33, 152, 38, 34, 21, 2,
1721         150, 35, 38, 21, 2, 89, 35, 35, 24, 21, 2, 95, 35, 35, 19, 21,
1722         2, 150, 40, 32, 21, 2, 89, 40, 40, 35, 115, 38, 16, 21, 2, 150,
1723         36, 38, 21, 2, 98, 36, 36, 35, 113, 32, 0, 163, 33, 32, 157, 34,
1724         33, 152, 38, 34, 21, 2, 150, 37, 38, 21, 2, 89, 37, 37, 36, 21,
1725         2, 80, 37, 37, 21, 2, 150, 39, 32, 21, 2, 89, 39, 39, 37, 21,
1726         2, 70, 39, 39, 40, 21, 2, 70, 37, 37, 35, 21, 2, 81, 39, 39,
1727         37, 21, 2, 157, 32, 39, 106, 32, 32, 18, 21, 2, 157, 38, 37, 106,
1728         38, 38, 17, 123, 32, 32, 38, 128, 0, 32, 2, 0,
1729       };
1730       p = orc_program_new_from_static_bytecode (bc);
1731       orc_program_set_backup_function (p, _backup_video_mixer_orc_overlay_argb);
1732 #else
1733       p = orc_program_new ();
1734       orc_program_set_2d (p);
1735       orc_program_set_name (p, "video_mixer_orc_overlay_argb");
1736       orc_program_set_backup_function (p, _backup_video_mixer_orc_overlay_argb);
1737       orc_program_add_destination (p, 4, "d1");
1738       orc_program_add_source (p, 4, "s1");
1739       orc_program_add_constant (p, 4, 0xffffffff, "c1");
1740       orc_program_add_constant (p, 4, 0x000000ff, "c2");
1741       orc_program_add_constant (p, 4, 0xffffff00, "c3");
1742       orc_program_add_constant (p, 2, 0x00000008, "c4");
1743       orc_program_add_parameter (p, 2, "p1");
1744       orc_program_add_temporary (p, 4, "t1");
1745       orc_program_add_temporary (p, 2, "t2");
1746       orc_program_add_temporary (p, 1, "t3");
1747       orc_program_add_temporary (p, 8, "t4");
1748       orc_program_add_temporary (p, 8, "t5");
1749       orc_program_add_temporary (p, 8, "t6");
1750       orc_program_add_temporary (p, 4, "t7");
1751       orc_program_add_temporary (p, 8, "t8");
1752       orc_program_add_temporary (p, 8, "t9");
1753 
1754       orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1,
1755           ORC_VAR_D1);
1756       orc_program_append_2 (p, "convlw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1,
1757           ORC_VAR_D1);
1758       orc_program_append_2 (p, "convwb", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
1759           ORC_VAR_D1);
1760       orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T7, ORC_VAR_T3, ORC_VAR_D1,
1761           ORC_VAR_D1);
1762       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T4, ORC_VAR_T7, ORC_VAR_D1,
1763           ORC_VAR_D1);
1764       orc_program_append_2 (p, "mullw", 2, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_P1,
1765           ORC_VAR_D1);
1766       orc_program_append_2 (p, "shruw", 2, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_C4,
1767           ORC_VAR_D1);
1768       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T9, ORC_VAR_T1, ORC_VAR_D1,
1769           ORC_VAR_D1);
1770       orc_program_append_2 (p, "mullw", 2, ORC_VAR_T9, ORC_VAR_T9, ORC_VAR_T4,
1771           ORC_VAR_D1);
1772       orc_program_append_2 (p, "loadpl", 0, ORC_VAR_T7, ORC_VAR_C1, ORC_VAR_D1,
1773           ORC_VAR_D1);
1774       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T5, ORC_VAR_T7, ORC_VAR_D1,
1775           ORC_VAR_D1);
1776       orc_program_append_2 (p, "subw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_T4,
1777           ORC_VAR_D1);
1778       orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
1779           ORC_VAR_D1);
1780       orc_program_append_2 (p, "convlw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1,
1781           ORC_VAR_D1);
1782       orc_program_append_2 (p, "convwb", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
1783           ORC_VAR_D1);
1784       orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T7, ORC_VAR_T3, ORC_VAR_D1,
1785           ORC_VAR_D1);
1786       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T6, ORC_VAR_T7, ORC_VAR_D1,
1787           ORC_VAR_D1);
1788       orc_program_append_2 (p, "mullw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T5,
1789           ORC_VAR_D1);
1790       orc_program_append_2 (p, "div255w", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_D1,
1791           ORC_VAR_D1);
1792       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T8, ORC_VAR_T1, ORC_VAR_D1,
1793           ORC_VAR_D1);
1794       orc_program_append_2 (p, "mullw", 2, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_T6,
1795           ORC_VAR_D1);
1796       orc_program_append_2 (p, "addw", 2, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_T9,
1797           ORC_VAR_D1);
1798       orc_program_append_2 (p, "addw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T4,
1799           ORC_VAR_D1);
1800       orc_program_append_2 (p, "divluw", 2, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_T6,
1801           ORC_VAR_D1);
1802       orc_program_append_2 (p, "convwb", 2, ORC_VAR_T1, ORC_VAR_T8, ORC_VAR_D1,
1803           ORC_VAR_D1);
1804       orc_program_append_2 (p, "andl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C3,
1805           ORC_VAR_D1);
1806       orc_program_append_2 (p, "convwb", 2, ORC_VAR_T7, ORC_VAR_T6, ORC_VAR_D1,
1807           ORC_VAR_D1);
1808       orc_program_append_2 (p, "andl", 0, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C2,
1809           ORC_VAR_D1);
1810       orc_program_append_2 (p, "orl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T7,
1811           ORC_VAR_D1);
1812       orc_program_append_2 (p, "storel", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1,
1813           ORC_VAR_D1);
1814 #endif
1815 
1816       orc_program_compile (p);
1817       c = orc_program_take_code (p);
1818       orc_program_free (p);
1819     }
1820     p_inited = TRUE;
1821     orc_once_mutex_unlock ();
1822   }
1823   ex->arrays[ORC_VAR_A2] = c;
1824   ex->program = 0;
1825 
1826   ex->n = n;
1827   ORC_EXECUTOR_M (ex) = m;
1828   ex->arrays[ORC_VAR_D1] = d1;
1829   ex->params[ORC_VAR_D1] = d1_stride;
1830   ex->arrays[ORC_VAR_S1] = (void *) s1;
1831   ex->params[ORC_VAR_S1] = s1_stride;
1832   ex->params[ORC_VAR_P1] = p1;
1833 
1834   func = c->exec;
1835   func (ex);
1836 }
1837 #endif
1838 
1839 
1840 /* video_mixer_orc_overlay_bgra */
1841 #ifdef DISABLE_ORC
1842 void
video_mixer_orc_overlay_bgra(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)1843 video_mixer_orc_overlay_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
1844     const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
1845 {
1846   int i;
1847   int j;
1848   orc_union32 *ORC_RESTRICT ptr0;
1849   const orc_union32 *ORC_RESTRICT ptr4;
1850   orc_union64 var42;
1851 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1852   volatile orc_union32 var43;
1853 #else
1854   orc_union32 var43;
1855 #endif
1856 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
1857   volatile orc_union32 var44;
1858 #else
1859   orc_union32 var44;
1860 #endif
1861   orc_union32 var45;
1862   orc_union32 var46;
1863   orc_union16 var47;
1864   orc_int8 var48;
1865   orc_union32 var49;
1866   orc_union64 var50;
1867   orc_union64 var51;
1868   orc_union64 var52;
1869   orc_union64 var53;
1870   orc_union64 var54;
1871   orc_union32 var55;
1872   orc_union64 var56;
1873   orc_union64 var57;
1874   orc_union32 var58;
1875   orc_union32 var59;
1876   orc_union16 var60;
1877   orc_int8 var61;
1878   orc_union32 var62;
1879   orc_union64 var63;
1880   orc_union64 var64;
1881   orc_union64 var65;
1882   orc_union64 var66;
1883   orc_union64 var67;
1884   orc_union64 var68;
1885   orc_union64 var69;
1886   orc_union64 var70;
1887   orc_union32 var71;
1888   orc_union32 var72;
1889   orc_union32 var73;
1890   orc_union32 var74;
1891   orc_union32 var75;
1892 
1893   for (j = 0; j < m; j++) {
1894     ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
1895     ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);
1896 
1897     /* 6: loadpw */
1898     var42.x4[0] = p1;
1899     var42.x4[1] = p1;
1900     var42.x4[2] = p1;
1901     var42.x4[3] = p1;
1902     /* 11: loadpl */
1903     var55.i = 0xffffffff;       /* -1 or 2.122e-314f */
1904     /* 28: loadpl */
1905     var43.i = 0x00ffffff;       /* 16777215 or 8.28905e-317f */
1906     /* 31: loadpl */
1907     var44.i = 0xff000000;       /* -16777216 or 2.11371e-314f */
1908 
1909     for (i = 0; i < n; i++) {
1910       /* 0: loadl */
1911       var45 = ptr4[i];
1912       /* 1: shrul */
1913       var46.i = ((orc_uint32) var45.i) >> 24;
1914       /* 2: convlw */
1915       var47.i = var46.i;
1916       /* 3: convwb */
1917       var48 = var47.i;
1918       /* 4: splatbl */
1919       var49.i =
1920           ((((orc_uint32) var48) & 0xff) << 24) | ((((orc_uint32) var48) & 0xff)
1921           << 16) | ((((orc_uint32) var48) & 0xff) << 8) | (((orc_uint32) var48)
1922           & 0xff);
1923       /* 5: convubw */
1924       var50.x4[0] = (orc_uint8) var49.x4[0];
1925       var50.x4[1] = (orc_uint8) var49.x4[1];
1926       var50.x4[2] = (orc_uint8) var49.x4[2];
1927       var50.x4[3] = (orc_uint8) var49.x4[3];
1928       /* 7: mullw */
1929       var51.x4[0] = (var50.x4[0] * var42.x4[0]) & 0xffff;
1930       var51.x4[1] = (var50.x4[1] * var42.x4[1]) & 0xffff;
1931       var51.x4[2] = (var50.x4[2] * var42.x4[2]) & 0xffff;
1932       var51.x4[3] = (var50.x4[3] * var42.x4[3]) & 0xffff;
1933       /* 8: shruw */
1934       var52.x4[0] = ((orc_uint16) var51.x4[0]) >> 8;
1935       var52.x4[1] = ((orc_uint16) var51.x4[1]) >> 8;
1936       var52.x4[2] = ((orc_uint16) var51.x4[2]) >> 8;
1937       var52.x4[3] = ((orc_uint16) var51.x4[3]) >> 8;
1938       /* 9: convubw */
1939       var53.x4[0] = (orc_uint8) var45.x4[0];
1940       var53.x4[1] = (orc_uint8) var45.x4[1];
1941       var53.x4[2] = (orc_uint8) var45.x4[2];
1942       var53.x4[3] = (orc_uint8) var45.x4[3];
1943       /* 10: mullw */
1944       var54.x4[0] = (var53.x4[0] * var52.x4[0]) & 0xffff;
1945       var54.x4[1] = (var53.x4[1] * var52.x4[1]) & 0xffff;
1946       var54.x4[2] = (var53.x4[2] * var52.x4[2]) & 0xffff;
1947       var54.x4[3] = (var53.x4[3] * var52.x4[3]) & 0xffff;
1948       /* 12: convubw */
1949       var56.x4[0] = (orc_uint8) var55.x4[0];
1950       var56.x4[1] = (orc_uint8) var55.x4[1];
1951       var56.x4[2] = (orc_uint8) var55.x4[2];
1952       var56.x4[3] = (orc_uint8) var55.x4[3];
1953       /* 13: subw */
1954       var57.x4[0] = var56.x4[0] - var52.x4[0];
1955       var57.x4[1] = var56.x4[1] - var52.x4[1];
1956       var57.x4[2] = var56.x4[2] - var52.x4[2];
1957       var57.x4[3] = var56.x4[3] - var52.x4[3];
1958       /* 14: loadl */
1959       var58 = ptr0[i];
1960       /* 15: shrul */
1961       var59.i = ((orc_uint32) var58.i) >> 24;
1962       /* 16: convlw */
1963       var60.i = var59.i;
1964       /* 17: convwb */
1965       var61 = var60.i;
1966       /* 18: splatbl */
1967       var62.i =
1968           ((((orc_uint32) var61) & 0xff) << 24) | ((((orc_uint32) var61) & 0xff)
1969           << 16) | ((((orc_uint32) var61) & 0xff) << 8) | (((orc_uint32) var61)
1970           & 0xff);
1971       /* 19: convubw */
1972       var63.x4[0] = (orc_uint8) var62.x4[0];
1973       var63.x4[1] = (orc_uint8) var62.x4[1];
1974       var63.x4[2] = (orc_uint8) var62.x4[2];
1975       var63.x4[3] = (orc_uint8) var62.x4[3];
1976       /* 20: mullw */
1977       var64.x4[0] = (var63.x4[0] * var57.x4[0]) & 0xffff;
1978       var64.x4[1] = (var63.x4[1] * var57.x4[1]) & 0xffff;
1979       var64.x4[2] = (var63.x4[2] * var57.x4[2]) & 0xffff;
1980       var64.x4[3] = (var63.x4[3] * var57.x4[3]) & 0xffff;
1981       /* 21: div255w */
1982       var65.x4[0] =
1983           ((orc_uint16) (((orc_uint16) (var64.x4[0] + 128)) +
1984               (((orc_uint16) (var64.x4[0] + 128)) >> 8))) >> 8;
1985       var65.x4[1] =
1986           ((orc_uint16) (((orc_uint16) (var64.x4[1] + 128)) +
1987               (((orc_uint16) (var64.x4[1] + 128)) >> 8))) >> 8;
1988       var65.x4[2] =
1989           ((orc_uint16) (((orc_uint16) (var64.x4[2] + 128)) +
1990               (((orc_uint16) (var64.x4[2] + 128)) >> 8))) >> 8;
1991       var65.x4[3] =
1992           ((orc_uint16) (((orc_uint16) (var64.x4[3] + 128)) +
1993               (((orc_uint16) (var64.x4[3] + 128)) >> 8))) >> 8;
1994       /* 22: convubw */
1995       var66.x4[0] = (orc_uint8) var58.x4[0];
1996       var66.x4[1] = (orc_uint8) var58.x4[1];
1997       var66.x4[2] = (orc_uint8) var58.x4[2];
1998       var66.x4[3] = (orc_uint8) var58.x4[3];
1999       /* 23: mullw */
2000       var67.x4[0] = (var66.x4[0] * var65.x4[0]) & 0xffff;
2001       var67.x4[1] = (var66.x4[1] * var65.x4[1]) & 0xffff;
2002       var67.x4[2] = (var66.x4[2] * var65.x4[2]) & 0xffff;
2003       var67.x4[3] = (var66.x4[3] * var65.x4[3]) & 0xffff;
2004       /* 24: addw */
2005       var68.x4[0] = var67.x4[0] + var54.x4[0];
2006       var68.x4[1] = var67.x4[1] + var54.x4[1];
2007       var68.x4[2] = var67.x4[2] + var54.x4[2];
2008       var68.x4[3] = var67.x4[3] + var54.x4[3];
2009       /* 25: addw */
2010       var69.x4[0] = var65.x4[0] + var52.x4[0];
2011       var69.x4[1] = var65.x4[1] + var52.x4[1];
2012       var69.x4[2] = var65.x4[2] + var52.x4[2];
2013       var69.x4[3] = var65.x4[3] + var52.x4[3];
2014       /* 26: divluw */
2015       var70.x4[0] =
2016           ((var69.x4[0] & 0xff) ==
2017           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[0]) /
2018           ((orc_uint16) var69.x4[0] & 0xff));
2019       var70.x4[1] =
2020           ((var69.x4[1] & 0xff) ==
2021           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[1]) /
2022           ((orc_uint16) var69.x4[1] & 0xff));
2023       var70.x4[2] =
2024           ((var69.x4[2] & 0xff) ==
2025           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[2]) /
2026           ((orc_uint16) var69.x4[2] & 0xff));
2027       var70.x4[3] =
2028           ((var69.x4[3] & 0xff) ==
2029           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[3]) /
2030           ((orc_uint16) var69.x4[3] & 0xff));
2031       /* 27: convwb */
2032       var71.x4[0] = var70.x4[0];
2033       var71.x4[1] = var70.x4[1];
2034       var71.x4[2] = var70.x4[2];
2035       var71.x4[3] = var70.x4[3];
2036       /* 29: andl */
2037       var72.i = var71.i & var43.i;
2038       /* 30: convwb */
2039       var73.x4[0] = var69.x4[0];
2040       var73.x4[1] = var69.x4[1];
2041       var73.x4[2] = var69.x4[2];
2042       var73.x4[3] = var69.x4[3];
2043       /* 32: andl */
2044       var74.i = var73.i & var44.i;
2045       /* 33: orl */
2046       var75.i = var72.i | var74.i;
2047       /* 34: storel */
2048       ptr0[i] = var75;
2049     }
2050   }
2051 
2052 }
2053 
2054 #else
2055 static void
_backup_video_mixer_orc_overlay_bgra(OrcExecutor * ORC_RESTRICT ex)2056 _backup_video_mixer_orc_overlay_bgra (OrcExecutor * ORC_RESTRICT ex)
2057 {
2058   int i;
2059   int j;
2060   int n = ex->n;
2061   int m = ex->params[ORC_VAR_A1];
2062   orc_union32 *ORC_RESTRICT ptr0;
2063   const orc_union32 *ORC_RESTRICT ptr4;
2064   orc_union64 var42;
2065 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
2066   volatile orc_union32 var43;
2067 #else
2068   orc_union32 var43;
2069 #endif
2070 #if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
2071   volatile orc_union32 var44;
2072 #else
2073   orc_union32 var44;
2074 #endif
2075   orc_union32 var45;
2076   orc_union32 var46;
2077   orc_union16 var47;
2078   orc_int8 var48;
2079   orc_union32 var49;
2080   orc_union64 var50;
2081   orc_union64 var51;
2082   orc_union64 var52;
2083   orc_union64 var53;
2084   orc_union64 var54;
2085   orc_union32 var55;
2086   orc_union64 var56;
2087   orc_union64 var57;
2088   orc_union32 var58;
2089   orc_union32 var59;
2090   orc_union16 var60;
2091   orc_int8 var61;
2092   orc_union32 var62;
2093   orc_union64 var63;
2094   orc_union64 var64;
2095   orc_union64 var65;
2096   orc_union64 var66;
2097   orc_union64 var67;
2098   orc_union64 var68;
2099   orc_union64 var69;
2100   orc_union64 var70;
2101   orc_union32 var71;
2102   orc_union32 var72;
2103   orc_union32 var73;
2104   orc_union32 var74;
2105   orc_union32 var75;
2106 
2107   for (j = 0; j < m; j++) {
2108     ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
2109     ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);
2110 
2111     /* 6: loadpw */
2112     var42.x4[0] = ex->params[24];
2113     var42.x4[1] = ex->params[24];
2114     var42.x4[2] = ex->params[24];
2115     var42.x4[3] = ex->params[24];
2116     /* 11: loadpl */
2117     var55.i = 0xffffffff;       /* -1 or 2.122e-314f */
2118     /* 28: loadpl */
2119     var43.i = 0x00ffffff;       /* 16777215 or 8.28905e-317f */
2120     /* 31: loadpl */
2121     var44.i = 0xff000000;       /* -16777216 or 2.11371e-314f */
2122 
2123     for (i = 0; i < n; i++) {
2124       /* 0: loadl */
2125       var45 = ptr4[i];
2126       /* 1: shrul */
2127       var46.i = ((orc_uint32) var45.i) >> 24;
2128       /* 2: convlw */
2129       var47.i = var46.i;
2130       /* 3: convwb */
2131       var48 = var47.i;
2132       /* 4: splatbl */
2133       var49.i =
2134           ((((orc_uint32) var48) & 0xff) << 24) | ((((orc_uint32) var48) & 0xff)
2135           << 16) | ((((orc_uint32) var48) & 0xff) << 8) | (((orc_uint32) var48)
2136           & 0xff);
2137       /* 5: convubw */
2138       var50.x4[0] = (orc_uint8) var49.x4[0];
2139       var50.x4[1] = (orc_uint8) var49.x4[1];
2140       var50.x4[2] = (orc_uint8) var49.x4[2];
2141       var50.x4[3] = (orc_uint8) var49.x4[3];
2142       /* 7: mullw */
2143       var51.x4[0] = (var50.x4[0] * var42.x4[0]) & 0xffff;
2144       var51.x4[1] = (var50.x4[1] * var42.x4[1]) & 0xffff;
2145       var51.x4[2] = (var50.x4[2] * var42.x4[2]) & 0xffff;
2146       var51.x4[3] = (var50.x4[3] * var42.x4[3]) & 0xffff;
2147       /* 8: shruw */
2148       var52.x4[0] = ((orc_uint16) var51.x4[0]) >> 8;
2149       var52.x4[1] = ((orc_uint16) var51.x4[1]) >> 8;
2150       var52.x4[2] = ((orc_uint16) var51.x4[2]) >> 8;
2151       var52.x4[3] = ((orc_uint16) var51.x4[3]) >> 8;
2152       /* 9: convubw */
2153       var53.x4[0] = (orc_uint8) var45.x4[0];
2154       var53.x4[1] = (orc_uint8) var45.x4[1];
2155       var53.x4[2] = (orc_uint8) var45.x4[2];
2156       var53.x4[3] = (orc_uint8) var45.x4[3];
2157       /* 10: mullw */
2158       var54.x4[0] = (var53.x4[0] * var52.x4[0]) & 0xffff;
2159       var54.x4[1] = (var53.x4[1] * var52.x4[1]) & 0xffff;
2160       var54.x4[2] = (var53.x4[2] * var52.x4[2]) & 0xffff;
2161       var54.x4[3] = (var53.x4[3] * var52.x4[3]) & 0xffff;
2162       /* 12: convubw */
2163       var56.x4[0] = (orc_uint8) var55.x4[0];
2164       var56.x4[1] = (orc_uint8) var55.x4[1];
2165       var56.x4[2] = (orc_uint8) var55.x4[2];
2166       var56.x4[3] = (orc_uint8) var55.x4[3];
2167       /* 13: subw */
2168       var57.x4[0] = var56.x4[0] - var52.x4[0];
2169       var57.x4[1] = var56.x4[1] - var52.x4[1];
2170       var57.x4[2] = var56.x4[2] - var52.x4[2];
2171       var57.x4[3] = var56.x4[3] - var52.x4[3];
2172       /* 14: loadl */
2173       var58 = ptr0[i];
2174       /* 15: shrul */
2175       var59.i = ((orc_uint32) var58.i) >> 24;
2176       /* 16: convlw */
2177       var60.i = var59.i;
2178       /* 17: convwb */
2179       var61 = var60.i;
2180       /* 18: splatbl */
2181       var62.i =
2182           ((((orc_uint32) var61) & 0xff) << 24) | ((((orc_uint32) var61) & 0xff)
2183           << 16) | ((((orc_uint32) var61) & 0xff) << 8) | (((orc_uint32) var61)
2184           & 0xff);
2185       /* 19: convubw */
2186       var63.x4[0] = (orc_uint8) var62.x4[0];
2187       var63.x4[1] = (orc_uint8) var62.x4[1];
2188       var63.x4[2] = (orc_uint8) var62.x4[2];
2189       var63.x4[3] = (orc_uint8) var62.x4[3];
2190       /* 20: mullw */
2191       var64.x4[0] = (var63.x4[0] * var57.x4[0]) & 0xffff;
2192       var64.x4[1] = (var63.x4[1] * var57.x4[1]) & 0xffff;
2193       var64.x4[2] = (var63.x4[2] * var57.x4[2]) & 0xffff;
2194       var64.x4[3] = (var63.x4[3] * var57.x4[3]) & 0xffff;
2195       /* 21: div255w */
2196       var65.x4[0] =
2197           ((orc_uint16) (((orc_uint16) (var64.x4[0] + 128)) +
2198               (((orc_uint16) (var64.x4[0] + 128)) >> 8))) >> 8;
2199       var65.x4[1] =
2200           ((orc_uint16) (((orc_uint16) (var64.x4[1] + 128)) +
2201               (((orc_uint16) (var64.x4[1] + 128)) >> 8))) >> 8;
2202       var65.x4[2] =
2203           ((orc_uint16) (((orc_uint16) (var64.x4[2] + 128)) +
2204               (((orc_uint16) (var64.x4[2] + 128)) >> 8))) >> 8;
2205       var65.x4[3] =
2206           ((orc_uint16) (((orc_uint16) (var64.x4[3] + 128)) +
2207               (((orc_uint16) (var64.x4[3] + 128)) >> 8))) >> 8;
2208       /* 22: convubw */
2209       var66.x4[0] = (orc_uint8) var58.x4[0];
2210       var66.x4[1] = (orc_uint8) var58.x4[1];
2211       var66.x4[2] = (orc_uint8) var58.x4[2];
2212       var66.x4[3] = (orc_uint8) var58.x4[3];
2213       /* 23: mullw */
2214       var67.x4[0] = (var66.x4[0] * var65.x4[0]) & 0xffff;
2215       var67.x4[1] = (var66.x4[1] * var65.x4[1]) & 0xffff;
2216       var67.x4[2] = (var66.x4[2] * var65.x4[2]) & 0xffff;
2217       var67.x4[3] = (var66.x4[3] * var65.x4[3]) & 0xffff;
2218       /* 24: addw */
2219       var68.x4[0] = var67.x4[0] + var54.x4[0];
2220       var68.x4[1] = var67.x4[1] + var54.x4[1];
2221       var68.x4[2] = var67.x4[2] + var54.x4[2];
2222       var68.x4[3] = var67.x4[3] + var54.x4[3];
2223       /* 25: addw */
2224       var69.x4[0] = var65.x4[0] + var52.x4[0];
2225       var69.x4[1] = var65.x4[1] + var52.x4[1];
2226       var69.x4[2] = var65.x4[2] + var52.x4[2];
2227       var69.x4[3] = var65.x4[3] + var52.x4[3];
2228       /* 26: divluw */
2229       var70.x4[0] =
2230           ((var69.x4[0] & 0xff) ==
2231           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[0]) /
2232           ((orc_uint16) var69.x4[0] & 0xff));
2233       var70.x4[1] =
2234           ((var69.x4[1] & 0xff) ==
2235           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[1]) /
2236           ((orc_uint16) var69.x4[1] & 0xff));
2237       var70.x4[2] =
2238           ((var69.x4[2] & 0xff) ==
2239           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[2]) /
2240           ((orc_uint16) var69.x4[2] & 0xff));
2241       var70.x4[3] =
2242           ((var69.x4[3] & 0xff) ==
2243           0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var68.x4[3]) /
2244           ((orc_uint16) var69.x4[3] & 0xff));
2245       /* 27: convwb */
2246       var71.x4[0] = var70.x4[0];
2247       var71.x4[1] = var70.x4[1];
2248       var71.x4[2] = var70.x4[2];
2249       var71.x4[3] = var70.x4[3];
2250       /* 29: andl */
2251       var72.i = var71.i & var43.i;
2252       /* 30: convwb */
2253       var73.x4[0] = var69.x4[0];
2254       var73.x4[1] = var69.x4[1];
2255       var73.x4[2] = var69.x4[2];
2256       var73.x4[3] = var69.x4[3];
2257       /* 32: andl */
2258       var74.i = var73.i & var44.i;
2259       /* 33: orl */
2260       var75.i = var72.i | var74.i;
2261       /* 34: storel */
2262       ptr0[i] = var75;
2263     }
2264   }
2265 
2266 }
2267 
2268 void
video_mixer_orc_overlay_bgra(guint8 * ORC_RESTRICT d1,int d1_stride,const guint8 * ORC_RESTRICT s1,int s1_stride,int p1,int n,int m)2269 video_mixer_orc_overlay_bgra (guint8 * ORC_RESTRICT d1, int d1_stride,
2270     const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m)
2271 {
2272   OrcExecutor _ex, *ex = &_ex;
2273   static volatile int p_inited = 0;
2274   static OrcCode *c = 0;
2275   void (*func) (OrcExecutor *);
2276 
2277   if (!p_inited) {
2278     orc_once_mutex_lock ();
2279     if (!p_inited) {
2280       OrcProgram *p;
2281 
2282 #if 1
2283       static const orc_uint8 bc[] = {
2284         1, 7, 9, 28, 118, 105, 100, 101, 111, 95, 109, 105, 120, 101, 114, 95,
2285         111, 114, 99, 95, 111, 118, 101, 114, 108, 97, 121, 95, 98, 103, 114,
2286             97,
2287         11, 4, 4, 12, 4, 4, 14, 4, 255, 255, 255, 255, 14, 4, 0, 0,
2288         0, 255, 14, 4, 255, 255, 255, 0, 14, 4, 24, 0, 0, 0, 14, 2,
2289         8, 0, 0, 0, 16, 2, 20, 4, 20, 4, 20, 2, 20, 1, 20, 8,
2290         20, 8, 20, 8, 20, 4, 20, 8, 20, 8, 113, 32, 4, 126, 33, 32,
2291         19, 163, 34, 33, 157, 35, 34, 152, 39, 35, 21, 2, 150, 36, 39, 21,
2292         2, 89, 36, 36, 24, 21, 2, 95, 36, 36, 20, 21, 2, 150, 41, 32,
2293         21, 2, 89, 41, 41, 36, 115, 39, 16, 21, 2, 150, 37, 39, 21, 2,
2294         98, 37, 37, 36, 113, 32, 0, 126, 33, 32, 19, 163, 34, 33, 157, 35,
2295         34, 152, 39, 35, 21, 2, 150, 38, 39, 21, 2, 89, 38, 38, 37, 21,
2296         2, 80, 38, 38, 21, 2, 150, 40, 32, 21, 2, 89, 40, 40, 38, 21,
2297         2, 70, 40, 40, 41, 21, 2, 70, 38, 38, 36, 21, 2, 81, 40, 40,
2298         38, 21, 2, 157, 32, 40, 106, 32, 32, 18, 21, 2, 157, 39, 38, 106,
2299         39, 39, 17, 123, 32, 32, 39, 128, 0, 32, 2, 0,
2300       };
2301       p = orc_program_new_from_static_bytecode (bc);
2302       orc_program_set_backup_function (p, _backup_video_mixer_orc_overlay_bgra);
2303 #else
2304       p = orc_program_new ();
2305       orc_program_set_2d (p);
2306       orc_program_set_name (p, "video_mixer_orc_overlay_bgra");
2307       orc_program_set_backup_function (p, _backup_video_mixer_orc_overlay_bgra);
2308       orc_program_add_destination (p, 4, "d1");
2309       orc_program_add_source (p, 4, "s1");
2310       orc_program_add_constant (p, 4, 0xffffffff, "c1");
2311       orc_program_add_constant (p, 4, 0xff000000, "c2");
2312       orc_program_add_constant (p, 4, 0x00ffffff, "c3");
2313       orc_program_add_constant (p, 4, 0x00000018, "c4");
2314       orc_program_add_constant (p, 2, 0x00000008, "c5");
2315       orc_program_add_parameter (p, 2, "p1");
2316       orc_program_add_temporary (p, 4, "t1");
2317       orc_program_add_temporary (p, 4, "t2");
2318       orc_program_add_temporary (p, 2, "t3");
2319       orc_program_add_temporary (p, 1, "t4");
2320       orc_program_add_temporary (p, 8, "t5");
2321       orc_program_add_temporary (p, 8, "t6");
2322       orc_program_add_temporary (p, 8, "t7");
2323       orc_program_add_temporary (p, 4, "t8");
2324       orc_program_add_temporary (p, 8, "t9");
2325       orc_program_add_temporary (p, 8, "t10");
2326 
2327       orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1,
2328           ORC_VAR_D1);
2329       orc_program_append_2 (p, "shrul", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_C4,
2330           ORC_VAR_D1);
2331       orc_program_append_2 (p, "convlw", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
2332           ORC_VAR_D1);
2333       orc_program_append_2 (p, "convwb", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1,
2334           ORC_VAR_D1);
2335       orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T8, ORC_VAR_T4, ORC_VAR_D1,
2336           ORC_VAR_D1);
2337       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T5, ORC_VAR_T8, ORC_VAR_D1,
2338           ORC_VAR_D1);
2339       orc_program_append_2 (p, "mullw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_P1,
2340           ORC_VAR_D1);
2341       orc_program_append_2 (p, "shruw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_C5,
2342           ORC_VAR_D1);
2343       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T10, ORC_VAR_T1,
2344           ORC_VAR_D1, ORC_VAR_D1);
2345       orc_program_append_2 (p, "mullw", 2, ORC_VAR_T10, ORC_VAR_T10, ORC_VAR_T5,
2346           ORC_VAR_D1);
2347       orc_program_append_2 (p, "loadpl", 0, ORC_VAR_T8, ORC_VAR_C1, ORC_VAR_D1,
2348           ORC_VAR_D1);
2349       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T6, ORC_VAR_T8, ORC_VAR_D1,
2350           ORC_VAR_D1);
2351       orc_program_append_2 (p, "subw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T5,
2352           ORC_VAR_D1);
2353       orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1,
2354           ORC_VAR_D1);
2355       orc_program_append_2 (p, "shrul", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_C4,
2356           ORC_VAR_D1);
2357       orc_program_append_2 (p, "convlw", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1,
2358           ORC_VAR_D1);
2359       orc_program_append_2 (p, "convwb", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1,
2360           ORC_VAR_D1);
2361       orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T8, ORC_VAR_T4, ORC_VAR_D1,
2362           ORC_VAR_D1);
2363       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T7, ORC_VAR_T8, ORC_VAR_D1,
2364           ORC_VAR_D1);
2365       orc_program_append_2 (p, "mullw", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T6,
2366           ORC_VAR_D1);
2367       orc_program_append_2 (p, "div255w", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_D1,
2368           ORC_VAR_D1);
2369       orc_program_append_2 (p, "convubw", 2, ORC_VAR_T9, ORC_VAR_T1, ORC_VAR_D1,
2370           ORC_VAR_D1);
2371       orc_program_append_2 (p, "mullw", 2, ORC_VAR_T9, ORC_VAR_T9, ORC_VAR_T7,
2372           ORC_VAR_D1);
2373       orc_program_append_2 (p, "addw", 2, ORC_VAR_T9, ORC_VAR_T9, ORC_VAR_T10,
2374           ORC_VAR_D1);
2375       orc_program_append_2 (p, "addw", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T5,
2376           ORC_VAR_D1);
2377       orc_program_append_2 (p, "divluw", 2, ORC_VAR_T9, ORC_VAR_T9, ORC_VAR_T7,
2378           ORC_VAR_D1);
2379       orc_program_append_2 (p, "convwb", 2, ORC_VAR_T1, ORC_VAR_T9, ORC_VAR_D1,
2380           ORC_VAR_D1);
2381       orc_program_append_2 (p, "andl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C3,
2382           ORC_VAR_D1);
2383       orc_program_append_2 (p, "convwb", 2, ORC_VAR_T8, ORC_VAR_T7, ORC_VAR_D1,
2384           ORC_VAR_D1);
2385       orc_program_append_2 (p, "andl", 0, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_C2,
2386           ORC_VAR_D1);
2387       orc_program_append_2 (p, "orl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T8,
2388           ORC_VAR_D1);
2389       orc_program_append_2 (p, "storel", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1,
2390           ORC_VAR_D1);
2391 #endif
2392 
2393       orc_program_compile (p);
2394       c = orc_program_take_code (p);
2395       orc_program_free (p);
2396     }
2397     p_inited = TRUE;
2398     orc_once_mutex_unlock ();
2399   }
2400   ex->arrays[ORC_VAR_A2] = c;
2401   ex->program = 0;
2402 
2403   ex->n = n;
2404   ORC_EXECUTOR_M (ex) = m;
2405   ex->arrays[ORC_VAR_D1] = d1;
2406   ex->params[ORC_VAR_D1] = d1_stride;
2407   ex->arrays[ORC_VAR_S1] = (void *) s1;
2408   ex->params[ORC_VAR_S1] = s1_stride;
2409   ex->params[ORC_VAR_P1] = p1;
2410 
2411   func = c->exec;
2412   func (ex);
2413 }
2414 #endif
2415