1 /* Copyright (C) 2006-2019 Free Software Foundation, Inc.
2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 Under Section 7 of GPL version 3, you are granted additional
14 permissions described in the GCC Runtime Library Exception, version
15 3.1, as published by the Free Software Foundation.
16
17 You should have received a copy of the GNU General Public License and
18 a copy of the GCC Runtime Library Exception along with this program;
19 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
20 <http://www.gnu.org/licenses/>. */
21
22 #ifndef _VMX2SPU_H_
23 #define _VMX2SPU_H_ 1
24
25 #ifdef __cplusplus
26
27 #ifdef __SPU__
28
29 #include <spu_intrinsics.h>
30 #include <vec_types.h>
31
32 /* This file maps generic VMX intrinsics and predicates to the SPU using
33 * overloaded C++ functions.
34 */
35
36 /************************************************************************
37 * INTRINSICS
38 ************************************************************************/
39
40 /* vec_abs (vector absolute value)
41 * =======
42 */
vec_abs(vec_char16 a)43 static inline vec_char16 vec_abs(vec_char16 a)
44 {
45 vec_char16 minus_a;
46
47 minus_a = (vec_char16)(spu_add((vec_ushort8)(spu_and(spu_xor(a, 0xFF), 0x7F)), 0x101));
48 return (spu_sel(minus_a, a, spu_cmpgt(a, -1)));
49 }
50
vec_abs(vec_short8 a)51 static inline vec_short8 vec_abs(vec_short8 a)
52 {
53 return (spu_sel(spu_sub(0, a), a, spu_cmpgt(a, -1)));
54 }
55
vec_abs(vec_int4 a)56 static inline vec_int4 vec_abs(vec_int4 a)
57 {
58 return (spu_sel(spu_sub(0, a), a, spu_cmpgt(a, -1)));
59 }
60
vec_abs(vec_float4 a)61 static inline vec_float4 vec_abs(vec_float4 a)
62 {
63 return ((vec_float4)(spu_rlmask(spu_sl((vec_uint4)(a), 1), -1)));
64 }
65
66 /* vec_abss (vector absolute value saturate)
67 * ========
68 */
vec_abss(vec_char16 a)69 static inline vec_char16 vec_abss(vec_char16 a)
70 {
71 vec_char16 minus_a;
72
73 minus_a = (vec_char16)spu_add((vec_short8)(spu_xor(a, -1)),
74 (vec_short8)(spu_and(spu_cmpgt((vec_uchar16)(a), 0x80), 1)));
75 return (spu_sel(minus_a, a, spu_cmpgt(a, -1)));
76 }
77
vec_abss(vec_short8 a)78 static inline vec_short8 vec_abss(vec_short8 a)
79 {
80 vec_short8 minus_a;
81
82 minus_a = spu_add(spu_sub(0, a), (vec_short8)(spu_cmpeq(a, ((vec_short8){0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000}))));
83 return (spu_sel(minus_a, a, spu_cmpgt(a, -1)));
84 }
85
vec_abss(vec_int4 a)86 static inline vec_int4 vec_abss(vec_int4 a)
87 {
88 vec_int4 minus_a;
89
90 minus_a = spu_add(spu_sub(0, a), (vec_int4)(spu_cmpeq(a, ((vec_int4){0x80000000,0x80000000,0x80000000,0x80000000}))));
91 return (spu_sel(minus_a, a, spu_cmpgt(a, -1)));
92 }
93
94
95 /* vec_add (vector add)
96 * =======
97 */
vec_add(vec_uchar16 a,vec_uchar16 b)98 static inline vec_uchar16 vec_add(vec_uchar16 a, vec_uchar16 b)
99 {
100 return ((vec_uchar16)(spu_sel(spu_add((vec_ushort8)(a), (vec_ushort8)(b)),
101 spu_add(spu_and((vec_ushort8)(a), 0xFF00), spu_and((vec_ushort8)(b), 0xFF00)),
102 spu_splats((unsigned short)(0xFF00)))));
103 }
104
vec_add(vec_char16 a,vec_char16 b)105 static inline vec_char16 vec_add(vec_char16 a, vec_char16 b)
106 {
107 return ((vec_char16)vec_add((vec_uchar16)(a), (vec_uchar16)(b)));
108 }
109
vec_add(vec_bchar16 a,vec_char16 b)110 static inline vec_char16 vec_add(vec_bchar16 a, vec_char16 b)
111 {
112 return ((vec_char16)vec_add((vec_uchar16)(a), (vec_uchar16)(b)));
113 }
114
vec_add(vec_char16 a,vec_bchar16 b)115 static inline vec_char16 vec_add(vec_char16 a, vec_bchar16 b)
116 {
117 return ((vec_char16)vec_add((vec_uchar16)(a), (vec_uchar16)(b)));
118 }
119
vec_add(vec_ushort8 a,vec_ushort8 b)120 static inline vec_ushort8 vec_add(vec_ushort8 a, vec_ushort8 b)
121 {
122 return (spu_add(a, b));
123 }
124
vec_add(vec_short8 a,vec_short8 b)125 static inline vec_short8 vec_add(vec_short8 a, vec_short8 b)
126 {
127 return (spu_add(a, b));
128 }
129
vec_add(vec_bshort8 a,vec_short8 b)130 static inline vec_short8 vec_add(vec_bshort8 a, vec_short8 b)
131 {
132 return (spu_add((vec_short8)(a), b));
133 }
134
vec_add(vec_short8 a,vec_bshort8 b)135 static inline vec_short8 vec_add(vec_short8 a, vec_bshort8 b)
136 {
137 return (spu_add(a, (vec_short8)(b)));
138 }
139
vec_add(vec_uint4 a,vec_uint4 b)140 static inline vec_uint4 vec_add(vec_uint4 a, vec_uint4 b)
141 {
142 return (spu_add(a, b));
143 }
144
vec_add(vec_int4 a,vec_int4 b)145 static inline vec_int4 vec_add(vec_int4 a, vec_int4 b)
146 {
147 return (spu_add(a, b));
148 }
149
vec_add(vec_bint4 a,vec_int4 b)150 static inline vec_int4 vec_add(vec_bint4 a, vec_int4 b)
151 {
152 return (spu_add((vec_int4)(a), b));
153 }
154
vec_add(vec_int4 a,vec_bint4 b)155 static inline vec_int4 vec_add(vec_int4 a, vec_bint4 b)
156 {
157 return (spu_add(a, (vec_int4)(b)));
158 }
159
vec_add(vec_float4 a,vec_float4 b)160 static inline vec_float4 vec_add(vec_float4 a, vec_float4 b)
161 {
162 return (spu_add(a, b));
163 }
164
165 /* vec_addc (vector add carryout unsigned word)
166 * ========
167 */
168 #define vec_addc(_a, _b) spu_genc(_a, _b)
169
170 /* vec_adds (vector add saturated)
171 * ========
172 */
vec_adds(vec_uchar16 a,vec_uchar16 b)173 static inline vec_uchar16 vec_adds(vec_uchar16 a, vec_uchar16 b)
174 {
175 vec_uchar16 s1, s2, s, d;
176
177 s1 = (vec_uchar16)(spu_add(spu_rlmask((vec_ushort8)(a), -8), spu_rlmask((vec_ushort8)(b), -8)));
178 s2 = (vec_uchar16)(spu_add(spu_and((vec_ushort8)(a), 0xFF), spu_and((vec_ushort8)(b), 0xFF)));
179 s = spu_shuffle(s1, s2, ((vec_uchar16){0, 16, 2, 18, 4, 20, 6, 22,
180 8, 24, 10, 26, 12, 28, 14, 30}));
181 d = spu_shuffle(s1, s2, ((vec_uchar16){1, 17, 3, 19, 5, 21, 7, 23,
182 9, 25, 11, 27, 13, 29, 15, 31}));
183 return (spu_or(d, spu_cmpeq(s, 1)));
184 }
185
vec_adds(vec_char16 a,vec_char16 b)186 static inline vec_char16 vec_adds(vec_char16 a, vec_char16 b)
187 {
188 vec_uchar16 s1, s2, s, d;
189
190 s1 = (vec_uchar16)(spu_add(spu_rlmask((vec_ushort8)(a), -8), spu_rlmask((vec_ushort8)(b), -8)));
191 s2 = (vec_uchar16)(spu_add(spu_and((vec_ushort8)(a), 0xFF), spu_and((vec_ushort8)(b), 0xFF)));
192 s = spu_shuffle(s1, s2, ((vec_uchar16){1, 17, 3, 19, 5, 21, 7, 23,
193 9, 25, 11, 27, 13, 29, 15, 31}));
194 d = spu_sel(s, spu_splats((unsigned char)0x7F), spu_cmpgt(spu_and(s, (vec_uchar16)(spu_nor(a, b))), 0x7F));
195 d = spu_sel(d, spu_splats((unsigned char)0x80), spu_cmpgt(spu_nor(s, (vec_uchar16)(spu_nand(a, b))), 0x7F));
196 return ((vec_char16)(d));
197 }
198
vec_adds(vec_bchar16 a,vec_char16 b)199 static inline vec_char16 vec_adds(vec_bchar16 a, vec_char16 b)
200 {
201 return (vec_adds((vec_char16)(a), b));
202 }
203
vec_adds(vec_char16 a,vec_bchar16 b)204 static inline vec_char16 vec_adds(vec_char16 a, vec_bchar16 b)
205 {
206 return (vec_adds(a, (vec_char16)(b)));
207 }
208
vec_adds(vec_ushort8 a,vec_ushort8 b)209 static inline vec_ushort8 vec_adds(vec_ushort8 a, vec_ushort8 b)
210 {
211 vec_ushort8 s, d;
212
213 s = spu_add(a, b);
214 d = spu_or(s, spu_rlmaska(spu_sel(spu_xor(s, -1), a, spu_eqv(a, b)), -15));
215 return (d);
216 }
217
vec_adds(vec_short8 a,vec_short8 b)218 static inline vec_short8 vec_adds(vec_short8 a, vec_short8 b)
219 {
220 vec_short8 s, d;
221
222 s = spu_add(a, b);
223 d = spu_sel(s, spu_splats((signed short)0x7FFF), (vec_ushort8)(spu_rlmaska(spu_and(s, spu_nor(a, b)), -15)));
224 d = spu_sel(d, spu_splats((signed short)0x8000), (vec_ushort8)(spu_rlmaska(spu_nor(s, spu_nand(a, b)), -15)));
225 return (d);
226 }
227
vec_adds(vec_bshort8 a,vec_short8 b)228 static inline vec_short8 vec_adds(vec_bshort8 a, vec_short8 b)
229 {
230 return (vec_adds((vec_short8)(a), b));
231 }
232
vec_adds(vec_short8 a,vec_bshort8 b)233 static inline vec_short8 vec_adds(vec_short8 a, vec_bshort8 b)
234 {
235 return (vec_adds(a, (vec_short8)(b)));
236 }
237
vec_adds(vec_uint4 a,vec_uint4 b)238 static inline vec_uint4 vec_adds(vec_uint4 a, vec_uint4 b)
239 {
240 return (spu_or(spu_add(a, b), spu_rlmaska(spu_sl(spu_genc(a, b), 31), -31)));
241 }
242
vec_adds(vec_int4 a,vec_int4 b)243 static inline vec_int4 vec_adds(vec_int4 a, vec_int4 b)
244 {
245 vec_int4 s, d;
246
247 s = spu_add(a, b);
248 d = spu_sel(s, spu_splats((signed int)0x7FFFFFFF), (vec_uint4)spu_rlmaska(spu_and(s, spu_nor(a, b)), -31));
249 d = spu_sel(d, spu_splats((signed int)0x80000000), (vec_uint4)spu_rlmaska(spu_nor(s, spu_nand(a, b)), -31));
250 return (d);
251 }
252
vec_adds(vec_bint4 a,vec_int4 b)253 static inline vec_int4 vec_adds(vec_bint4 a, vec_int4 b)
254 {
255 return (vec_adds((vec_int4)(a), b));
256 }
257
vec_adds(vec_int4 a,vec_bint4 b)258 static inline vec_int4 vec_adds(vec_int4 a, vec_bint4 b)
259 {
260 return (vec_adds(a, (vec_int4)(b)));
261 }
262
263 /* vec_and (vector logical and)
264 * =======
265 */
vec_and(vec_uchar16 a,vec_uchar16 b)266 static inline vec_uchar16 vec_and(vec_uchar16 a, vec_uchar16 b)
267 {
268 return (spu_and(a, b));
269 }
270
vec_and(vec_char16 a,vec_char16 b)271 static inline vec_char16 vec_and(vec_char16 a, vec_char16 b)
272 {
273 return (spu_and(a, b));
274 }
275
vec_and(vec_bchar16 a,vec_char16 b)276 static inline vec_char16 vec_and(vec_bchar16 a, vec_char16 b)
277 {
278 return (spu_and((vec_char16)(a), b));
279 }
280
vec_and(vec_char16 a,vec_bchar16 b)281 static inline vec_char16 vec_and(vec_char16 a, vec_bchar16 b)
282 {
283 return (spu_and(a, (vec_char16)(b)));
284 }
285
vec_and(vec_ushort8 a,vec_ushort8 b)286 static inline vec_ushort8 vec_and(vec_ushort8 a, vec_ushort8 b)
287 {
288 return (spu_and(a, b));
289 }
290
vec_and(vec_short8 a,vec_short8 b)291 static inline vec_short8 vec_and(vec_short8 a, vec_short8 b)
292 {
293 return (spu_and(a, b));
294 }
295
vec_and(vec_bshort8 a,vec_short8 b)296 static inline vec_short8 vec_and(vec_bshort8 a, vec_short8 b)
297 {
298 return (spu_and((vec_short8)(a), b));
299 }
300
vec_and(vec_short8 a,vec_bshort8 b)301 static inline vec_short8 vec_and(vec_short8 a, vec_bshort8 b)
302 {
303 return (spu_and(a, (vec_short8)(b)));
304 }
305
vec_and(vec_uint4 a,vec_uint4 b)306 static inline vec_uint4 vec_and(vec_uint4 a, vec_uint4 b)
307 {
308 return (spu_and(a, b));
309 }
310
vec_and(vec_int4 a,vec_int4 b)311 static inline vec_int4 vec_and(vec_int4 a, vec_int4 b)
312 {
313 return (spu_and(a, b));
314 }
315
vec_and(vec_bint4 a,vec_int4 b)316 static inline vec_int4 vec_and(vec_bint4 a, vec_int4 b)
317 {
318 return (spu_and((vec_int4)(a), b));
319 }
320
vec_and(vec_int4 a,vec_bint4 b)321 static inline vec_int4 vec_and(vec_int4 a, vec_bint4 b)
322 {
323 return (spu_and(a, (vec_int4)(b)));
324 }
325
vec_and(vec_float4 a,vec_float4 b)326 static inline vec_float4 vec_and(vec_float4 a, vec_float4 b)
327 {
328 return (spu_and(a, b));
329 }
330
vec_and(vec_bint4 a,vec_float4 b)331 static inline vec_float4 vec_and(vec_bint4 a, vec_float4 b)
332 {
333 return (spu_and((vec_float4)(a),b));
334 }
335
vec_and(vec_float4 a,vec_bint4 b)336 static inline vec_float4 vec_and(vec_float4 a, vec_bint4 b)
337 {
338 return (spu_and(a, (vec_float4)(b)));
339 }
340
341
342 /* vec_andc (vector logical and with complement)
343 * ========
344 */
vec_andc(vec_uchar16 a,vec_uchar16 b)345 static inline vec_uchar16 vec_andc(vec_uchar16 a, vec_uchar16 b)
346 {
347 return (spu_andc(a, b));
348 }
349
vec_andc(vec_char16 a,vec_char16 b)350 static inline vec_char16 vec_andc(vec_char16 a, vec_char16 b)
351 {
352 return (spu_andc(a, b));
353 }
354
vec_andc(vec_bchar16 a,vec_char16 b)355 static inline vec_char16 vec_andc(vec_bchar16 a, vec_char16 b)
356 {
357 return (spu_andc((vec_char16)(a), b));
358 }
359
vec_andc(vec_char16 a,vec_bchar16 b)360 static inline vec_char16 vec_andc(vec_char16 a, vec_bchar16 b)
361 {
362 return (spu_andc(a, (vec_char16)(b)));
363 }
364
vec_andc(vec_ushort8 a,vec_ushort8 b)365 static inline vec_ushort8 vec_andc(vec_ushort8 a, vec_ushort8 b)
366 {
367 return (spu_andc(a, b));
368 }
369
vec_andc(vec_short8 a,vec_short8 b)370 static inline vec_short8 vec_andc(vec_short8 a, vec_short8 b)
371 {
372 return (spu_andc(a, b));
373 }
374
vec_andc(vec_bshort8 a,vec_short8 b)375 static inline vec_short8 vec_andc(vec_bshort8 a, vec_short8 b)
376 {
377 return (spu_andc((vec_short8)(a), b));
378 }
379
vec_andc(vec_short8 a,vec_bshort8 b)380 static inline vec_short8 vec_andc(vec_short8 a, vec_bshort8 b)
381 {
382 return (spu_andc(a, (vec_short8)(b)));
383 }
384
vec_andc(vec_uint4 a,vec_uint4 b)385 static inline vec_uint4 vec_andc(vec_uint4 a, vec_uint4 b)
386 {
387 return (spu_andc(a, b));
388 }
389
vec_andc(vec_int4 a,vec_int4 b)390 static inline vec_int4 vec_andc(vec_int4 a, vec_int4 b)
391 {
392 return (spu_andc(a, b));
393 }
394
vec_andc(vec_bint4 a,vec_int4 b)395 static inline vec_int4 vec_andc(vec_bint4 a, vec_int4 b)
396 {
397 return (spu_andc((vec_int4)(a), b));
398 }
399
vec_andc(vec_int4 a,vec_bint4 b)400 static inline vec_int4 vec_andc(vec_int4 a, vec_bint4 b)
401 {
402 return (spu_andc(a, (vec_int4)(b)));
403 }
404
vec_andc(vec_float4 a,vec_float4 b)405 static inline vec_float4 vec_andc(vec_float4 a, vec_float4 b)
406 {
407 return (spu_andc(a,b));
408 }
409
vec_andc(vec_bint4 a,vec_float4 b)410 static inline vec_float4 vec_andc(vec_bint4 a, vec_float4 b)
411 {
412 return (spu_andc((vec_float4)(a),b));
413 }
414
vec_andc(vec_float4 a,vec_bint4 b)415 static inline vec_float4 vec_andc(vec_float4 a, vec_bint4 b)
416 {
417 return (spu_andc(a, (vec_float4)(b)));
418 }
419
420 /* vec_avg (vector average)
421 * =======
422 */
vec_avg(vec_uchar16 a,vec_uchar16 b)423 static inline vec_uchar16 vec_avg(vec_uchar16 a, vec_uchar16 b)
424 {
425 return (spu_avg(a, b));
426 }
427
vec_avg(vec_char16 a,vec_char16 b)428 static inline vec_char16 vec_avg(vec_char16 a, vec_char16 b)
429 {
430 return ((vec_char16)(spu_xor(spu_avg((vec_uchar16)(a), (vec_uchar16)(b)),
431 (vec_uchar16)(spu_and(spu_xor(a,b), 0x80)))));
432 }
433
vec_avg(vec_ushort8 a,vec_ushort8 b)434 static inline vec_ushort8 vec_avg(vec_ushort8 a, vec_ushort8 b)
435 {
436 return (spu_add(spu_add(spu_rlmask(a, -1), spu_rlmask(b, -1)),
437 spu_and(spu_or(a, b), 1)));
438 }
439
vec_avg(vec_short8 a,vec_short8 b)440 static inline vec_short8 vec_avg(vec_short8 a, vec_short8 b)
441 {
442 return (spu_add(spu_add(spu_rlmaska(a, -1), spu_rlmaska(b, -1)),
443 spu_and(spu_or(a, b), 1)));
444 }
445
vec_avg(vec_uint4 a,vec_uint4 b)446 static inline vec_uint4 vec_avg(vec_uint4 a, vec_uint4 b)
447 {
448 return (spu_add(spu_add(spu_rlmask(a, -1), spu_rlmask(b, -1)),
449 spu_and(spu_or(a, b), 1)));
450 }
451
vec_avg(vec_int4 a,vec_int4 b)452 static inline vec_int4 vec_avg(vec_int4 a, vec_int4 b)
453 {
454 return (spu_add(spu_add(spu_rlmaska(a, -1), spu_rlmaska(b, -1)),
455 spu_and(spu_or(a, b), 1)));
456 }
457
458
459 /* vec_ceil (vector ceiling)
460 * ========
461 */
vec_ceil(vec_float4 a)462 static inline vec_float4 vec_ceil(vec_float4 a)
463 {
464 vec_int4 exp;
465 vec_uint4 mask;
466
467 a = spu_add(a, (vec_float4)(spu_and(spu_xor(spu_rlmaska((vec_int4)a, -31), -1), spu_splats((signed int)0x3F7FFFFF))));
468 exp = spu_sub(127, (vec_int4)(spu_and(spu_rlmask((vec_uint4)(a), -23), 0xFF)));
469 mask = spu_rlmask(spu_splats((unsigned int)0x7FFFFF), exp);
470 mask = spu_sel(spu_splats((unsigned int)0), mask, spu_cmpgt(exp, -31));
471 mask = spu_or(mask, spu_xor((vec_uint4)(spu_rlmaska(spu_add(exp, -1), -31)), -1));
472
473 return ((vec_float4)(spu_andc((vec_uint4)(a), mask)));
474 }
475
476
477 /* vec_cmpb (vector compare bounds floating-point)
478 * ========
479 */
vec_cmpb(vec_float4 a,vec_float4 b)480 static inline vec_int4 vec_cmpb(vec_float4 a, vec_float4 b)
481 {
482 vec_int4 b0 = (vec_int4)spu_splats(0x80000000);
483 vec_int4 b1 = (vec_int4)spu_splats(0x40000000);
484
485 return (spu_or(spu_and((vec_int4)spu_cmpgt(a, b), b0),
486 spu_and((vec_int4)spu_cmpgt(spu_xor(b, (vec_float4)(b0)), a), b1)));
487 }
488
489 /* vec_cmpeq (vector compare equal)
490 * =========
491 */
492 #define vec_cmpeq(_a, _b) spu_cmpeq(_a, _b)
493
494
495 /* vec_cmpge (vector compare greater than or equal)
496 * =========
497 */
vec_cmpge(vec_float4 a,vec_float4 b)498 static inline vec_bint4 vec_cmpge(vec_float4 a, vec_float4 b)
499 {
500 return (spu_xor(spu_cmpgt(b, a), -1));
501 }
502
503
504 /* vec_cmpgt (vector compare greater than)
505 * =========
506 */
507 #define vec_cmpgt(_a, _b) spu_cmpgt(_a, _b)
508
509
510 /* vec_cmple (vector compare less than or equal)
511 * =========
512 */
vec_cmple(vec_float4 a,vec_float4 b)513 static inline vec_bint4 vec_cmple(vec_float4 a, vec_float4 b)
514 {
515 return (spu_xor(spu_cmpgt(a, b), -1));
516 }
517
518
519 /* vec_cmplt (vector compare less than)
520 * =========
521 */
522 #define vec_cmplt(_a, _b) spu_cmpgt(_b, _a)
523
524
525 /* vec_ctf (vector convert from fixed-point word)
526 * =======
527 */
528 #define vec_ctf(_a, _b) spu_convtf(_a, _b)
529
530
531 /* vec_cts (vector convert to signed fixed-point word saturate)
532 * =======
533 */
534 #define vec_cts(_a, _b) spu_convts(_a, _b)
535
536
537 /* vec_ctu (vector convert to unsigned fixed-point word saturate)
538 * =======
539 */
540 #define vec_ctu(_a, _b) spu_convtu(_a, _b)
541
542
543 /* vec_dss (vector data stream stop)
544 * =======
545 */
546 #define vec_dss(_a)
547
548
549 /* vec_dssall (vector data stream stop all)
550 * ==========
551 */
552 #define vec_dssall()
553
554
555 /* vec_dst (vector data stream touch)
556 * =======
557 */
558 #define vec_dst(_a, _b, _c)
559
560
561 /* vec_dstst (vector data stream touch for store)
562 * =========
563 */
564 #define vec_dstst(_a, _b, _c)
565
566
567 /* vec_dststt (vector data stream touch for store transient)
568 * ==========
569 */
570 #define vec_dststt(_a, _b, _c)
571
572
573 /* vec_dstt (vector data stream touch transient)
574 * ========
575 */
576 #define vec_dstt(_a, _b, _c)
577
578
579 /* vec_expte (vector is 2 raised tp the exponent estimate floating-point)
580 * =========
581 */
vec_expte(vec_float4 a)582 static inline vec_float4 vec_expte(vec_float4 a)
583 {
584 vec_float4 bias, frac, exp;
585 vec_int4 ia;
586
587 bias = (vec_float4)(spu_andc(spu_splats((signed int)0x3F7FFFFF), spu_rlmaska((vec_int4)(a), -31)));
588 ia = spu_convts(spu_add(a, bias), 0);
589 frac = spu_sub(spu_convtf(ia, 0), a);
590 exp = (vec_float4)(spu_sl(spu_add(ia, 127), 23));
591
592 return (spu_mul(spu_madd(spu_madd(spu_splats(0.17157287f), frac, spu_splats(-0.67157287f)),
593 frac, spu_splats(1.0f)), exp));
594 }
595
596
597 /* vec_floor (vector floor)
598 * =========
599 */
vec_floor(vec_float4 a)600 static inline vec_float4 vec_floor(vec_float4 a)
601 {
602 vec_int4 exp;
603 vec_uint4 mask;
604
605 a = spu_sub(a, (vec_float4)(spu_and(spu_rlmaska((vec_int4)a, -31), spu_splats((signed int)0x3F7FFFFF))));
606 exp = spu_sub(127, (vec_int4)(spu_and(spu_rlmask((vec_uint4)(a), -23), 0xFF)));
607 mask = spu_rlmask(spu_splats((unsigned int)0x7FFFFF), exp);
608 mask = spu_sel(spu_splats((unsigned int)0), mask, spu_cmpgt(exp, -31));
609 mask = spu_or(mask, spu_xor((vec_uint4)(spu_rlmaska(spu_add(exp, -1), -31)), -1));
610
611 return ((vec_float4)(spu_andc((vec_uint4)(a), mask)));
612 }
613
614
615 /* vec_ld (vector load indexed)
616 * ======
617 */
vec_ld(int a,unsigned char * b)618 static inline vec_uchar16 vec_ld(int a, unsigned char *b)
619 {
620 return (*((vec_uchar16 *)(b+a)));
621 }
622
vec_ld(int a,vec_uchar16 * b)623 static inline vec_uchar16 vec_ld(int a, vec_uchar16 *b)
624 {
625 return (*((vec_uchar16 *)((unsigned char *)(b)+a)));
626 }
627
vec_ld(int a,signed char * b)628 static inline vec_char16 vec_ld(int a, signed char *b)
629 {
630 return (*((vec_char16 *)(b+a)));
631 }
632
vec_ld(int a,vec_char16 * b)633 static inline vec_char16 vec_ld(int a, vec_char16 *b)
634 {
635 return (*((vec_char16 *)((signed char *)(b)+a)));
636 }
637
vec_ld(int a,unsigned short * b)638 static inline vec_ushort8 vec_ld(int a, unsigned short *b)
639 {
640 return (*((vec_ushort8 *)((unsigned char *)(b)+a)));
641 }
642
vec_ld(int a,vec_ushort8 * b)643 static inline vec_ushort8 vec_ld(int a, vec_ushort8 *b)
644 {
645 return (*((vec_ushort8 *)((unsigned char *)(b)+a)));
646 }
647
vec_ld(int a,signed short * b)648 static inline vec_short8 vec_ld(int a, signed short *b)
649 {
650 return (*((vec_short8 *)((unsigned char *)(b)+a)));
651 }
652
vec_ld(int a,vec_short8 * b)653 static inline vec_short8 vec_ld(int a, vec_short8 *b)
654 {
655 return (*((vec_short8 *)((signed char *)(b)+a)));
656 }
657
vec_ld(int a,unsigned int * b)658 static inline vec_uint4 vec_ld(int a, unsigned int *b)
659 {
660 return (*((vec_uint4 *)((unsigned char *)(b)+a)));
661 }
662
vec_ld(int a,vec_uint4 * b)663 static inline vec_uint4 vec_ld(int a, vec_uint4 *b)
664 {
665 return (*((vec_uint4 *)((unsigned char *)(b)+a)));
666 }
667
vec_ld(int a,signed int * b)668 static inline vec_int4 vec_ld(int a, signed int *b)
669 {
670 return (*((vec_int4 *)((unsigned char *)(b)+a)));
671 }
672
vec_ld(int a,vec_int4 * b)673 static inline vec_int4 vec_ld(int a, vec_int4 *b)
674 {
675 return (*((vec_int4 *)((signed char *)(b)+a)));
676 }
677
vec_ld(int a,float * b)678 static inline vec_float4 vec_ld(int a, float *b)
679 {
680 return (*((vec_float4 *)((unsigned char *)(b)+a)));
681 }
682
vec_ld(int a,vec_float4 * b)683 static inline vec_float4 vec_ld(int a, vec_float4 *b)
684 {
685 return (*((vec_float4 *)((unsigned char *)(b)+a)));
686 }
687
688 /* vec_lde (vector load element indexed)
689 * =======
690 */
vec_lde(int a,unsigned char * b)691 static inline vec_uchar16 vec_lde(int a, unsigned char *b)
692 {
693 return (*((vec_uchar16 *)(b+a)));
694 }
695
vec_lde(int a,signed char * b)696 static inline vec_char16 vec_lde(int a, signed char *b)
697 {
698 return (*((vec_char16 *)(b+a)));
699 }
700
vec_lde(int a,unsigned short * b)701 static inline vec_ushort8 vec_lde(int a, unsigned short *b)
702 {
703 return (*((vec_ushort8 *)((unsigned char *)(b)+a)));
704 }
705
vec_lde(int a,signed short * b)706 static inline vec_short8 vec_lde(int a, signed short *b)
707 {
708 return (*((vec_short8 *)((unsigned char *)(b)+a)));
709 }
710
711
vec_lde(int a,unsigned int * b)712 static inline vec_uint4 vec_lde(int a, unsigned int *b)
713 {
714 return (*((vec_uint4 *)((unsigned char *)(b)+a)));
715 }
716
vec_lde(int a,signed int * b)717 static inline vec_int4 vec_lde(int a, signed int *b)
718 {
719 return (*((vec_int4 *)((unsigned char *)(b)+a)));
720 }
721
722
vec_lde(int a,float * b)723 static inline vec_float4 vec_lde(int a, float *b)
724 {
725 return (*((vec_float4 *)((unsigned char *)(b)+a)));
726 }
727
728 /* vec_ldl (vector load indexed LRU)
729 * =======
730 */
731 #define vec_ldl(_a, _b) vec_ld(_a, _b)
732
733
734 /* vec_loge (vector log2 estimate floating-point)
735 * ========
736 */
vec_loge(vec_float4 a)737 static inline vec_float4 vec_loge(vec_float4 a)
738 {
739 vec_int4 exp;
740 vec_float4 frac;
741
742 exp = spu_add((vec_int4)(spu_and(spu_rlmask((vec_uint4)(a), -23), 0xFF)), -127);
743 frac = (vec_float4)(spu_sub((vec_int4)(a), spu_sl(exp, 23)));
744
745 return (spu_madd(spu_madd(spu_splats(-0.33985f), frac, spu_splats(2.01955f)),
746 frac, spu_sub(spu_convtf(exp, 0), spu_splats(1.6797f))));
747 }
748
749
750 /* vec_lvsl (vector load for shift left)
751 * ========
752 */
vec_lvsl(int a,unsigned char * b)753 static inline vec_uchar16 vec_lvsl(int a, unsigned char *b)
754 {
755 return ((vec_uchar16)spu_add((vec_ushort8)(spu_splats((unsigned char)((a + (int)(b)) & 0xF))),
756 ((vec_ushort8){0x0001, 0x0203, 0x0405, 0x0607,
757 0x0809, 0x0A0B, 0x0C0D, 0x0E0F})));
758 }
759
vec_lvsl(int a,signed char * b)760 static inline vec_uchar16 vec_lvsl(int a, signed char *b)
761 {
762 return (vec_lvsl(a, (unsigned char *)b));
763 }
764
vec_lvsl(int a,unsigned short * b)765 static inline vec_uchar16 vec_lvsl(int a, unsigned short *b)
766 {
767 return (vec_lvsl(a, (unsigned char *)b));
768 }
769
vec_lvsl(int a,short * b)770 static inline vec_uchar16 vec_lvsl(int a, short *b)
771 {
772 return (vec_lvsl(a, (unsigned char *)b));
773 }
774
vec_lvsl(int a,unsigned int * b)775 static inline vec_uchar16 vec_lvsl(int a, unsigned int *b)
776 {
777 return (vec_lvsl(a, (unsigned char *)b));
778 }
779
vec_lvsl(int a,int * b)780 static inline vec_uchar16 vec_lvsl(int a, int *b)
781 {
782 return (vec_lvsl(a, (unsigned char *)b));
783 }
784
vec_lvsl(int a,float * b)785 static inline vec_uchar16 vec_lvsl(int a, float *b)
786 {
787 return (vec_lvsl(a, (unsigned char *)b));
788 }
789
790
791 /* vec_lvsr (vector load for shift right)
792 * ========
793 */
vec_lvsr(int a,unsigned char * b)794 static inline vec_uchar16 vec_lvsr(int a, unsigned char *b)
795 {
796 return ((vec_uchar16)(spu_sub(((vec_ushort8){0x1011, 0x1213, 0x1415, 0x1617,
797 0x1819, 0x1A1B, 0x1C1D, 0x1E1F}),
798 (vec_ushort8)(spu_splats((unsigned char)((a + (int)(b)) & 0xF))))));
799 }
800
vec_lvsr(int a,signed char * b)801 static inline vec_uchar16 vec_lvsr(int a, signed char *b)
802 {
803 return (vec_lvsr(a, (unsigned char *)b));
804 }
805
vec_lvsr(int a,unsigned short * b)806 static inline vec_uchar16 vec_lvsr(int a, unsigned short *b)
807 {
808 return (vec_lvsr(a, (unsigned char *)b));
809 }
810
vec_lvsr(int a,short * b)811 static inline vec_uchar16 vec_lvsr(int a, short *b)
812 {
813 return (vec_lvsr(a, (unsigned char *)b));
814 }
815
vec_lvsr(int a,unsigned int * b)816 static inline vec_uchar16 vec_lvsr(int a, unsigned int *b)
817 {
818 return (vec_lvsr(a, (unsigned char *)b));
819 }
820
vec_lvsr(int a,int * b)821 static inline vec_uchar16 vec_lvsr(int a, int *b)
822 {
823 return (vec_lvsr(a, (unsigned char *)b));
824 }
825
vec_lvsr(int a,float * b)826 static inline vec_uchar16 vec_lvsr(int a, float *b)
827 {
828 return (vec_lvsr(a, (unsigned char *)b));
829 }
830
831 /* vec_madd (vector multiply add)
832 * ========
833 */
834 #define vec_madd(_a, _b, _c) spu_madd(_a, _b, _c)
835
836
837
838 /* vec_madds (vector multiply add saturate)
839 * =========
840 */
vec_madds(vec_short8 a,vec_short8 b,vec_short8 c)841 static inline vec_short8 vec_madds(vec_short8 a, vec_short8 b, vec_short8 c)
842 {
843 return (vec_adds(c, spu_sel((vec_short8)(spu_sl(spu_mule(a, b), 1)),
844 (vec_short8)(spu_rlmask(spu_mulo(a, b), -15)),
845 ((vec_ushort8){0, 0xFFFF, 0, 0xFFFF, 0, 0xFFFF, 0, 0xFFFF}))));
846 }
847
848 /* vec_max (vector maximum)
849 * =======
850 */
vec_max(vec_uchar16 a,vec_uchar16 b)851 static inline vec_uchar16 vec_max(vec_uchar16 a, vec_uchar16 b)
852 {
853 return (spu_sel(b, a, spu_cmpgt(a, b)));
854 }
855
vec_max(vec_char16 a,vec_char16 b)856 static inline vec_char16 vec_max(vec_char16 a, vec_char16 b)
857 {
858 return (spu_sel(b, a, spu_cmpgt(a, b)));
859 }
860
vec_max(vec_bchar16 a,vec_char16 b)861 static inline vec_char16 vec_max(vec_bchar16 a, vec_char16 b)
862 {
863 return (spu_sel(b, (vec_char16)(a), spu_cmpgt((vec_char16)(a), b)));
864 }
865
vec_max(vec_char16 a,vec_bchar16 b)866 static inline vec_char16 vec_max(vec_char16 a, vec_bchar16 b)
867 {
868 return (spu_sel((vec_char16)(b), a, spu_cmpgt(a, (vec_char16)(b))));
869 }
870
vec_max(vec_ushort8 a,vec_ushort8 b)871 static inline vec_ushort8 vec_max(vec_ushort8 a, vec_ushort8 b)
872 {
873 return (spu_sel(b, a, spu_cmpgt(a, b)));
874 }
875
vec_max(vec_short8 a,vec_short8 b)876 static inline vec_short8 vec_max(vec_short8 a, vec_short8 b)
877 {
878 return (spu_sel(b, a, spu_cmpgt(a, b)));
879 }
880
vec_max(vec_bshort8 a,vec_short8 b)881 static inline vec_short8 vec_max(vec_bshort8 a, vec_short8 b)
882 {
883 return (spu_sel(b, (vec_short8)(a), spu_cmpgt((vec_short8)(a), b)));
884 }
885
vec_max(vec_short8 a,vec_bshort8 b)886 static inline vec_short8 vec_max(vec_short8 a, vec_bshort8 b)
887 {
888 return (spu_sel((vec_short8)(b), a, spu_cmpgt(a, (vec_short8)(b))));
889 }
890
vec_max(vec_uint4 a,vec_uint4 b)891 static inline vec_uint4 vec_max(vec_uint4 a, vec_uint4 b)
892 {
893 return (spu_sel(b, a, spu_cmpgt(a, b)));
894 }
895
vec_max(vec_int4 a,vec_int4 b)896 static inline vec_int4 vec_max(vec_int4 a, vec_int4 b)
897 {
898 return (spu_sel(b, a, spu_cmpgt(a, b)));
899 }
900
vec_max(vec_bint4 a,vec_int4 b)901 static inline vec_int4 vec_max(vec_bint4 a, vec_int4 b)
902 {
903 return (spu_sel(b, (vec_int4)(a), spu_cmpgt((vec_int4)(a), b)));
904 }
905
vec_max(vec_int4 a,vec_bint4 b)906 static inline vec_int4 vec_max(vec_int4 a, vec_bint4 b)
907 {
908 return (spu_sel((vec_int4)(b), a, spu_cmpgt(a, (vec_int4)(b))));
909 }
910
vec_max(vec_float4 a,vec_float4 b)911 static inline vec_float4 vec_max(vec_float4 a, vec_float4 b)
912 {
913 return (spu_sel(b, a, spu_cmpgt(a, b)));
914 }
915
916
917 /* vec_mergeh (vector merge high)
918 * ==========
919 */
vec_mergeh(vec_uchar16 a,vec_uchar16 b)920 static inline vec_uchar16 vec_mergeh(vec_uchar16 a, vec_uchar16 b)
921 {
922 return (spu_shuffle(a, b, ((vec_uchar16){0, 16, 1, 17, 2, 18, 3, 19,
923 4, 20, 5, 21, 6, 22, 7, 23})));
924 }
925
vec_mergeh(vec_char16 a,vec_char16 b)926 static inline vec_char16 vec_mergeh(vec_char16 a, vec_char16 b)
927 {
928 return (spu_shuffle(a, b, ((vec_uchar16){0, 16, 1, 17, 2, 18, 3, 19,
929 4, 20, 5, 21, 6, 22, 7, 23})));
930 }
931
vec_mergeh(vec_ushort8 a,vec_ushort8 b)932 static inline vec_ushort8 vec_mergeh(vec_ushort8 a, vec_ushort8 b)
933 {
934 return (spu_shuffle(a, b, ((vec_uchar16){0, 1, 16, 17, 2, 3, 18, 19,
935 4, 5, 20, 21, 6, 7, 22, 23})));
936 }
937
vec_mergeh(vec_short8 a,vec_short8 b)938 static inline vec_short8 vec_mergeh(vec_short8 a, vec_short8 b)
939 {
940 return (spu_shuffle(a, b, ((vec_uchar16){0, 1, 16, 17, 2, 3, 18, 19,
941 4, 5, 20, 21, 6, 7, 22, 23})));
942 }
943
vec_mergeh(vec_uint4 a,vec_uint4 b)944 static inline vec_uint4 vec_mergeh(vec_uint4 a, vec_uint4 b)
945 {
946 return (spu_shuffle(a, b, ((vec_uchar16){0, 1, 2, 3, 16, 17, 18, 19,
947 4, 5, 6, 7, 20, 21, 22, 23})));
948 }
949
vec_mergeh(vec_int4 a,vec_int4 b)950 static inline vec_int4 vec_mergeh(vec_int4 a, vec_int4 b)
951 {
952 return (spu_shuffle(a, b, ((vec_uchar16){0, 1, 2, 3, 16, 17, 18, 19,
953 4, 5, 6, 7, 20, 21, 22, 23})));
954 }
955
vec_mergeh(vec_float4 a,vec_float4 b)956 static inline vec_float4 vec_mergeh(vec_float4 a, vec_float4 b)
957 {
958 return (spu_shuffle(a, b, ((vec_uchar16){0, 1, 2, 3, 16, 17, 18, 19,
959 4, 5, 6, 7, 20, 21, 22, 23})));
960 }
961
962 /* vec_mergel (vector merge low)
963 * ==========
964 */
vec_mergel(vec_uchar16 a,vec_uchar16 b)965 static inline vec_uchar16 vec_mergel(vec_uchar16 a, vec_uchar16 b)
966 {
967 return (spu_shuffle(a, b, ((vec_uchar16){ 8, 24, 9, 25, 10, 26, 11, 27,
968 12, 28, 13, 29, 14, 30, 15, 31})));
969 }
970
vec_mergel(vec_char16 a,vec_char16 b)971 static inline vec_char16 vec_mergel(vec_char16 a, vec_char16 b)
972 {
973 return (spu_shuffle(a, b, ((vec_uchar16){ 8, 24, 9, 25, 10, 26, 11, 27,
974 12, 28, 13, 29, 14, 30, 15, 31})));
975 }
976
vec_mergel(vec_ushort8 a,vec_ushort8 b)977 static inline vec_ushort8 vec_mergel(vec_ushort8 a, vec_ushort8 b)
978 {
979 return (spu_shuffle(a, b, ((vec_uchar16){ 8, 9, 24, 25, 10, 11, 26, 27,
980 12, 13, 28, 29, 14, 15, 30, 31})));
981 }
982
vec_mergel(vec_short8 a,vec_short8 b)983 static inline vec_short8 vec_mergel(vec_short8 a, vec_short8 b)
984 {
985 return (spu_shuffle(a, b, ((vec_uchar16){ 8, 9, 24, 25, 10, 11, 26, 27,
986 12, 13, 28, 29, 14, 15, 30, 31})));
987 }
988
vec_mergel(vec_uint4 a,vec_uint4 b)989 static inline vec_uint4 vec_mergel(vec_uint4 a, vec_uint4 b)
990 {
991 return (spu_shuffle(a, b, ((vec_uchar16){ 8, 9, 10, 11, 24, 25, 26, 27,
992 12, 13, 14, 15, 28, 29, 30, 31})));
993 }
994
vec_mergel(vec_int4 a,vec_int4 b)995 static inline vec_int4 vec_mergel(vec_int4 a, vec_int4 b)
996 {
997 return (spu_shuffle(a, b, ((vec_uchar16){ 8, 9, 10, 11, 24, 25, 26, 27,
998 12, 13, 14, 15, 28, 29, 30, 31})));
999 }
1000
vec_mergel(vec_float4 a,vec_float4 b)1001 static inline vec_float4 vec_mergel(vec_float4 a, vec_float4 b)
1002 {
1003 return (spu_shuffle(a, b, ((vec_uchar16){ 8, 9, 10, 11, 24, 25, 26, 27,
1004 12, 13, 14, 15, 28, 29, 30, 31})));
1005 }
1006
1007 /* vec_mfvscr (vector move from vector status and control register)
1008 * ==========
1009 */
vec_mfvscr()1010 static inline vec_ushort8 vec_mfvscr()
1011 {
1012 return ((vec_ushort8)spu_splats(0)); /* not supported */
1013 }
1014
1015
1016 /* vec_min (vector minimum)
1017 * =======
1018 */
vec_min(vec_uchar16 a,vec_uchar16 b)1019 static inline vec_uchar16 vec_min(vec_uchar16 a, vec_uchar16 b)
1020 {
1021 return (spu_sel(a, b, spu_cmpgt(a, b)));
1022 }
1023
vec_min(vec_char16 a,vec_char16 b)1024 static inline vec_char16 vec_min(vec_char16 a, vec_char16 b)
1025 {
1026 return (spu_sel(a, b, spu_cmpgt(a, b)));
1027 }
1028
vec_min(vec_bchar16 a,vec_char16 b)1029 static inline vec_char16 vec_min(vec_bchar16 a, vec_char16 b)
1030 {
1031 return (spu_sel((vec_char16)(a), b, spu_cmpgt((vec_char16)(a), b)));
1032 }
1033
vec_min(vec_char16 a,vec_bchar16 b)1034 static inline vec_char16 vec_min(vec_char16 a, vec_bchar16 b)
1035 {
1036 return (spu_sel(a, (vec_char16)(b), spu_cmpgt(a, (vec_char16)(b))));
1037 }
1038
vec_min(vec_ushort8 a,vec_ushort8 b)1039 static inline vec_ushort8 vec_min(vec_ushort8 a, vec_ushort8 b)
1040 {
1041 return (spu_sel(a, b, spu_cmpgt(a, b)));
1042 }
1043
vec_min(vec_short8 a,vec_short8 b)1044 static inline vec_short8 vec_min(vec_short8 a, vec_short8 b)
1045 {
1046 return (spu_sel(a, b, spu_cmpgt(a, b)));
1047 }
1048
vec_min(vec_bshort8 a,vec_short8 b)1049 static inline vec_short8 vec_min(vec_bshort8 a, vec_short8 b)
1050 {
1051 return (spu_sel((vec_short8)(a), b, spu_cmpgt((vec_short8)(a), b)));
1052 }
1053
vec_min(vec_short8 a,vec_bshort8 b)1054 static inline vec_short8 vec_min(vec_short8 a, vec_bshort8 b)
1055 {
1056 return (spu_sel(a, (vec_short8)(b), spu_cmpgt(a, (vec_short8)(b))));
1057 }
1058
vec_min(vec_uint4 a,vec_uint4 b)1059 static inline vec_uint4 vec_min(vec_uint4 a, vec_uint4 b)
1060 {
1061 return (spu_sel(a, b, spu_cmpgt(a, b)));
1062 }
1063
vec_min(vec_int4 a,vec_int4 b)1064 static inline vec_int4 vec_min(vec_int4 a, vec_int4 b)
1065 {
1066 return (spu_sel(a, b, spu_cmpgt(a, b)));
1067 }
1068
vec_min(vec_bint4 a,vec_int4 b)1069 static inline vec_int4 vec_min(vec_bint4 a, vec_int4 b)
1070 {
1071 return (spu_sel((vec_int4)(a), b, spu_cmpgt((vec_int4)(a), b)));
1072 }
1073
vec_min(vec_int4 a,vec_bint4 b)1074 static inline vec_int4 vec_min(vec_int4 a, vec_bint4 b)
1075 {
1076 return (spu_sel(a, (vec_int4)(b), spu_cmpgt(a, (vec_int4)(b))));
1077 }
1078
vec_min(vec_float4 a,vec_float4 b)1079 static inline vec_float4 vec_min(vec_float4 a, vec_float4 b)
1080 {
1081 return (spu_sel(a, b, spu_cmpgt(a, b)));
1082 }
1083
1084 /* vec_mladd (vector multiply low and add unsigned half word)
1085 * =========
1086 */
vec_mladd(vec_short8 a,vec_short8 b,vec_short8 c)1087 static inline vec_short8 vec_mladd(vec_short8 a, vec_short8 b, vec_short8 c)
1088 {
1089 return ((vec_short8)(spu_shuffle(spu_madd((vec_short8)(spu_rl((vec_uint4)(a), -16)),
1090 (vec_short8)(spu_rl((vec_uint4)(b), -16)),
1091 (vec_int4)(spu_rl((vec_uint4)(c), -16))),
1092 spu_madd(a, b, spu_extend(c)),
1093 ((vec_uchar16){ 2, 3, 18, 19, 6, 7, 22, 23,
1094 10, 11, 26, 27, 14, 15, 30, 31}))));
1095 }
1096
1097
vec_mladd(vec_ushort8 a,vec_ushort8 b,vec_ushort8 c)1098 static inline vec_ushort8 vec_mladd(vec_ushort8 a, vec_ushort8 b, vec_ushort8 c)
1099 {
1100 return ((vec_ushort8)(vec_mladd((vec_short8)(a), (vec_short8)(b), (vec_short8)(c))));
1101 }
1102
vec_mladd(vec_ushort8 a,vec_short8 b,vec_short8 c)1103 static inline vec_short8 vec_mladd(vec_ushort8 a, vec_short8 b, vec_short8 c)
1104 {
1105 return (vec_mladd((vec_short8)(a), b, c));
1106 }
1107
vec_mladd(vec_short8 a,vec_ushort8 b,vec_ushort8 c)1108 static inline vec_short8 vec_mladd(vec_short8 a, vec_ushort8 b, vec_ushort8 c)
1109 {
1110 return (vec_mladd(a, (vec_short8)(b), (vec_short8)(c)));
1111 }
1112
1113
1114 /* vec_mradds (vector multiply round and add saturate)
1115 * ==========
1116 */
vec_mradds(vec_short8 a,vec_short8 b,vec_short8 c)1117 static inline vec_short8 vec_mradds(vec_short8 a, vec_short8 b, vec_short8 c)
1118 {
1119 vec_int4 round = (vec_int4)spu_splats(0x4000);
1120 vec_short8 hi, lo;
1121
1122 hi = (vec_short8)(spu_sl(spu_add(spu_mule(a, b), round), 1));
1123 lo = (vec_short8)(spu_rlmask(spu_add(spu_mulo(a, b), round), -15));
1124
1125 return (vec_adds(spu_sel(hi, lo, ((vec_ushort8){0, 0xFFFF, 0, 0xFFFF, 0, 0xFFFF, 0, 0xFFFF})), c));
1126 }
1127
1128
1129 /* vec_msum (vector multiply sum)
1130 * ========
1131 */
vec_msum(vec_uchar16 a,vec_uchar16 b,vec_uint4 c)1132 static inline vec_uint4 vec_msum(vec_uchar16 a, vec_uchar16 b, vec_uint4 c)
1133 {
1134 vec_ushort8 a1, a2, b1, b2;
1135 vec_uint4 p1, p2;
1136
1137 a1 = spu_and((vec_ushort8)(a), 0xFF);
1138 a2 = spu_rlmask((vec_ushort8)(a), -8);
1139 b1 = spu_and((vec_ushort8)(b), 0xFF);
1140 b2 = spu_rlmask((vec_ushort8)(b), -8);
1141
1142 p1 = spu_add(spu_mulo(a1, b1), spu_mulo(spu_rlqwbyte(a1, -2), spu_rlqwbyte(b1, -2)));
1143 p2 = spu_add(spu_mulo(a2, b2), spu_mulo(spu_rlqwbyte(a2, -2), spu_rlqwbyte(b2, -2)));
1144 return (spu_add(p2, spu_add(p1, c)));
1145 }
1146
vec_msum(vec_char16 a,vec_uchar16 b,vec_int4 c)1147 static inline vec_int4 vec_msum(vec_char16 a, vec_uchar16 b, vec_int4 c)
1148 {
1149 vec_short8 a1, a2, b1, b2;
1150 vec_int4 p1, p2;
1151
1152 a1 = (vec_short8)(spu_extend(a));
1153 a2 = spu_rlmaska((vec_short8)(a), -8);
1154 b1 = (vec_short8)(spu_and((vec_ushort8)(b), 0xFF));
1155 b2 = (vec_short8)spu_rlmask((vec_ushort8)(b), -8);
1156
1157 p1 = spu_add(spu_mulo(a1, b1), spu_mulo(spu_rlqwbyte(a1, -2), spu_rlqwbyte(b1, -2)));
1158 p2 = spu_add(spu_mulo(a2, b2), spu_mulo(spu_rlqwbyte(a2, -2), spu_rlqwbyte(b2, -2)));
1159 return (spu_add(p2, spu_add(p1, c)));
1160 }
1161
vec_msum(vec_ushort8 a,vec_ushort8 b,vec_uint4 c)1162 static inline vec_uint4 vec_msum(vec_ushort8 a, vec_ushort8 b, vec_uint4 c)
1163 {
1164 return (spu_add(spu_add(spu_mulo(a, b), spu_mulo(spu_rlqwbyte(a, -2), spu_rlqwbyte(b, -2))), c));
1165 }
1166
vec_msum(vec_short8 a,vec_short8 b,vec_int4 c)1167 static inline vec_int4 vec_msum(vec_short8 a, vec_short8 b, vec_int4 c)
1168 {
1169 return (spu_add(spu_add(spu_mulo(a, b), spu_mulo(spu_rlqwbyte(a, -2), spu_rlqwbyte(b, -2))), c));
1170 }
1171
1172
1173 /* vec_msums (vector multiply sum saturate)
1174 * ========
1175 */
vec_msums(vec_ushort8 a,vec_ushort8 b,vec_uint4 c)1176 static inline vec_uint4 vec_msums(vec_ushort8 a, vec_ushort8 b, vec_uint4 c)
1177 {
1178 vec_uint4 p1, p2;
1179
1180 p1 = spu_mulo(a, b);
1181 p2 = spu_mulo(spu_rlqwbyte(a, -2), spu_rlqwbyte(b, -2));
1182
1183 return (vec_adds(p2, vec_adds(p1, c)));
1184 }
1185
vec_msums(vec_short8 a,vec_short8 b,vec_int4 c)1186 static inline vec_int4 vec_msums(vec_short8 a, vec_short8 b, vec_int4 c)
1187 {
1188 return (vec_adds(spu_add(spu_mulo(a, b), spu_mulo(spu_rlqwbyte(a, -2), spu_rlqwbyte(b, -2))), c));
1189 }
1190
1191 /* vec_mtvscr (vector move to vector status and control register)
1192 * ==========
1193 */
1194 #define vec_mtvscr(_a) /* not supported */
1195
1196
1197 /* vec_mule (vector multiply even)
1198 * ========
1199 */
vec_mule(vec_uchar16 a,vec_uchar16 b)1200 static inline vec_ushort8 vec_mule(vec_uchar16 a, vec_uchar16 b)
1201 {
1202 vec_ushort8 hi, lo;
1203
1204 hi = (vec_ushort8)spu_mulo((vec_ushort8)(spu_rlmask((vec_uint4)(a), -24)),
1205 (vec_ushort8)(spu_rlmask((vec_uint4)(b), -24)));
1206 lo = (vec_ushort8)spu_mulo((vec_ushort8)(spu_rlmask((vec_short8)(a), -8)),
1207 (vec_ushort8)(spu_rlmask((vec_short8)(b), -8)));
1208
1209 return (spu_shuffle(hi, lo, ((vec_uchar16){ 2, 3, 18, 19, 6, 7, 22, 23,
1210 10, 11, 26, 27, 14, 15, 30, 31})));
1211 }
1212
vec_mule(vec_char16 a,vec_char16 b)1213 static inline vec_short8 vec_mule(vec_char16 a, vec_char16 b)
1214 {
1215 vec_short8 hi, lo;
1216
1217 hi = (vec_short8)spu_mulo((vec_short8)(spu_rlmaska((vec_uint4)(a), -24)),
1218 (vec_short8)(spu_rlmaska((vec_uint4)(b), -24)));
1219 lo = (vec_short8)spu_mulo((vec_short8)(spu_rlmaska((vec_short8)(a), -8)),
1220 (vec_short8)(spu_rlmaska((vec_short8)(b), -8)));
1221
1222 return (spu_shuffle(hi, lo, ((vec_uchar16){ 2, 3, 18, 19, 6, 7, 22, 23,
1223 10, 11, 26, 27, 14, 15, 30, 31})));
1224 }
1225
vec_mule(vec_ushort8 a,vec_ushort8 b)1226 static inline vec_uint4 vec_mule(vec_ushort8 a, vec_ushort8 b)
1227 {
1228 return (spu_mulo((vec_ushort8)spu_rlmask((vec_uint4)(a), -16),
1229 (vec_ushort8)spu_rlmask((vec_uint4)(b), -16)));
1230 }
1231
1232
vec_mule(vec_short8 a,vec_short8 b)1233 static inline vec_int4 vec_mule(vec_short8 a, vec_short8 b)
1234 {
1235 return (spu_mulo((vec_short8)spu_rlmaska((vec_int4)(a), -16),
1236 (vec_short8)spu_rlmaska((vec_int4)(b), -16)));
1237 }
1238
1239
1240 /* vec_mulo (vector multiply odd)
1241 * ========
1242 */
vec_mulo(vec_uchar16 a,vec_uchar16 b)1243 static inline vec_ushort8 vec_mulo(vec_uchar16 a, vec_uchar16 b)
1244 {
1245 vec_ushort8 hi, lo;
1246
1247 hi = (vec_ushort8)spu_mulo((vec_ushort8)(spu_and(spu_rlmask((vec_uint4)(a), -16), 0xFF)),
1248 (vec_ushort8)(spu_and(spu_rlmask((vec_uint4)(b), -16), 0xFF)));
1249 lo = (vec_ushort8)spu_mulo(spu_and((vec_ushort8)(a), 0xFF), spu_and((vec_ushort8)(b), 0xFF));
1250
1251 return (spu_shuffle(hi, lo, ((vec_uchar16){ 2, 3, 18, 19, 6, 7, 22, 23,
1252 10, 11, 26, 27, 14, 15, 30, 31})));
1253 }
1254
vec_mulo(vec_char16 a,vec_char16 b)1255 static inline vec_short8 vec_mulo(vec_char16 a, vec_char16 b)
1256 {
1257 vec_short8 aa, bb, hi, lo;
1258
1259 aa = spu_extend(a);
1260 bb = spu_extend(b);
1261
1262 hi = (vec_short8)spu_mulo((vec_short8)(spu_rlmaska((vec_uint4)(aa), -16)),
1263 (vec_short8)(spu_rlmaska((vec_uint4)(bb), -16)));
1264 lo = (vec_short8)spu_mulo(aa, bb);
1265 return (spu_shuffle(hi, lo, ((vec_uchar16){ 2, 3, 18, 19, 6, 7, 22, 23,
1266 10, 11, 26, 27, 14, 15, 30, 31})));
1267 }
1268
vec_mulo(vec_ushort8 a,vec_ushort8 b)1269 static inline vec_uint4 vec_mulo(vec_ushort8 a, vec_ushort8 b)
1270 {
1271 return (spu_mulo(a, b));
1272 }
1273
1274
vec_mulo(vec_short8 a,vec_short8 b)1275 static inline vec_int4 vec_mulo(vec_short8 a, vec_short8 b)
1276 {
1277 return (spu_mulo(a, b));
1278 }
1279
1280
1281 /* vec_nmsub (vector negative multiply subtract)
1282 * =========
1283 */
1284 #define vec_nmsub(_a, _b, _c) spu_nmsub(_a, _b, _c)
1285
1286
1287 /* vec_nor (vector logical nor)
1288 * =======
1289 */
1290 #define vec_nor(_a, _b) spu_nor(_a, _b)
1291
1292
1293 /* vec_or (vector logical or)
1294 * ======
1295 */
vec_or(vec_uchar16 a,vec_uchar16 b)1296 static inline vec_uchar16 vec_or(vec_uchar16 a, vec_uchar16 b)
1297 {
1298 return (spu_or(a, b));
1299 }
1300
vec_or(vec_char16 a,vec_char16 b)1301 static inline vec_char16 vec_or(vec_char16 a, vec_char16 b)
1302 {
1303 return (spu_or(a, b));
1304 }
1305
vec_or(vec_bchar16 a,vec_char16 b)1306 static inline vec_char16 vec_or(vec_bchar16 a, vec_char16 b)
1307 {
1308 return (spu_or((vec_char16)(a), b));
1309 }
1310
vec_or(vec_char16 a,vec_bchar16 b)1311 static inline vec_char16 vec_or(vec_char16 a, vec_bchar16 b)
1312 {
1313 return (spu_or(a, (vec_char16)(b)));
1314 }
1315
vec_or(vec_ushort8 a,vec_ushort8 b)1316 static inline vec_ushort8 vec_or(vec_ushort8 a, vec_ushort8 b)
1317 {
1318 return (spu_or(a, b));
1319 }
1320
vec_or(vec_short8 a,vec_short8 b)1321 static inline vec_short8 vec_or(vec_short8 a, vec_short8 b)
1322 {
1323 return (spu_or(a, b));
1324 }
1325
vec_or(vec_bshort8 a,vec_short8 b)1326 static inline vec_short8 vec_or(vec_bshort8 a, vec_short8 b)
1327 {
1328 return (spu_or((vec_short8)(a), b));
1329 }
1330
vec_or(vec_short8 a,vec_bshort8 b)1331 static inline vec_short8 vec_or(vec_short8 a, vec_bshort8 b)
1332 {
1333 return (spu_or(a, (vec_short8)(b)));
1334 }
1335
vec_or(vec_uint4 a,vec_uint4 b)1336 static inline vec_uint4 vec_or(vec_uint4 a, vec_uint4 b)
1337 {
1338 return (spu_or(a, b));
1339 }
1340
vec_or(vec_int4 a,vec_int4 b)1341 static inline vec_int4 vec_or(vec_int4 a, vec_int4 b)
1342 {
1343 return (spu_or(a, b));
1344 }
1345
vec_or(vec_bint4 a,vec_int4 b)1346 static inline vec_int4 vec_or(vec_bint4 a, vec_int4 b)
1347 {
1348 return (spu_or((vec_int4)(a), b));
1349 }
1350
vec_or(vec_int4 a,vec_bint4 b)1351 static inline vec_int4 vec_or(vec_int4 a, vec_bint4 b)
1352 {
1353 return (spu_or(a, (vec_int4)(b)));
1354 }
1355
vec_or(vec_float4 a,vec_float4 b)1356 static inline vec_float4 vec_or(vec_float4 a, vec_float4 b)
1357 {
1358 return (spu_or(a, b));
1359 }
1360
vec_or(vec_bint4 a,vec_float4 b)1361 static inline vec_float4 vec_or(vec_bint4 a, vec_float4 b)
1362 {
1363 return (spu_or((vec_float4)(a),b));
1364 }
1365
vec_or(vec_float4 a,vec_bint4 b)1366 static inline vec_float4 vec_or(vec_float4 a, vec_bint4 b)
1367 {
1368 return (spu_or(a, (vec_float4)(b)));
1369 }
1370
1371
1372 /* vec_pack (vector pack)
1373 * ========
1374 */
vec_pack(vec_ushort8 a,vec_ushort8 b)1375 static inline vec_uchar16 vec_pack(vec_ushort8 a, vec_ushort8 b)
1376 {
1377 return ((vec_uchar16)spu_shuffle(a, b, ((vec_uchar16){ 1, 3, 5, 7, 9, 11, 13, 15,
1378 17, 19, 21, 23, 25, 27, 29, 31})));
1379 }
1380
vec_pack(vec_short8 a,vec_short8 b)1381 static inline vec_char16 vec_pack(vec_short8 a, vec_short8 b)
1382 {
1383 return ((vec_char16)spu_shuffle(a, b, ((vec_uchar16){ 1, 3, 5, 7, 9, 11, 13, 15,
1384 17, 19, 21, 23, 25, 27, 29, 31})));
1385 }
1386
vec_pack(vec_uint4 a,vec_uint4 b)1387 static inline vec_ushort8 vec_pack(vec_uint4 a, vec_uint4 b)
1388 {
1389 return ((vec_ushort8)spu_shuffle(a, b, ((vec_uchar16){ 2, 3, 6, 7, 10, 11, 14, 15,
1390 18, 19, 22, 23, 26, 27, 30, 31})));
1391 }
1392
vec_pack(vec_int4 a,vec_int4 b)1393 static inline vec_short8 vec_pack(vec_int4 a, vec_int4 b)
1394 {
1395 return ((vec_short8)spu_shuffle(a, b, ((vec_uchar16){ 2, 3, 6, 7, 10, 11, 14, 15,
1396 18, 19, 22, 23, 26, 27, 30, 31})));
1397 }
1398
1399
1400 /* vec_packpx (vector pack pixel)
1401 * ==========
1402 */
vec_packpx(vec_uint4 a,vec_uint4 b)1403 static inline vec_pixel8 vec_packpx(vec_uint4 a, vec_uint4 b)
1404 {
1405 vec_uint4 x03FF = (vec_uint4)(spu_splats((unsigned short)0x03FF));
1406 vec_uint4 x001F = (vec_uint4)(spu_splats((unsigned short)0x001F));
1407
1408 return ((vec_pixel8)(spu_shuffle(spu_sel(spu_sel(spu_sl(a, 7), spu_sl(a, 10), x03FF),
1409 spu_sl(a, 13), x001F),
1410 spu_sel(spu_sel(spu_sl(b, 7), spu_sl(b, 10), x03FF),
1411 spu_sl(b, 13), x001F),
1412 ((vec_uchar16){ 0, 1, 4, 5, 8, 9, 12, 13,
1413 16, 17, 20, 21, 24, 25, 28, 29}))));
1414 }
1415
1416
1417 /* vec_packs (vector pack saturate)
1418 * =========
1419 */
vec_packs(vec_ushort8 a,vec_ushort8 b)1420 static inline vec_uchar16 vec_packs(vec_ushort8 a, vec_ushort8 b)
1421 {
1422 vec_ushort8 max = spu_splats((unsigned short)0x00FF);
1423
1424 return ((vec_uchar16)(spu_shuffle(spu_sel(a, max, spu_cmpgt(a, 255)),
1425 spu_sel(b, max, spu_cmpgt(b, 255)),
1426 ((vec_uchar16){ 1, 3, 5, 7, 9, 11, 13, 15,
1427 17, 19, 21, 23, 25, 27, 29, 31}))));
1428 }
1429
vec_packs(vec_short8 a,vec_short8 b)1430 static inline vec_char16 vec_packs(vec_short8 a, vec_short8 b)
1431 {
1432 vec_short8 max = spu_splats((signed short)0x007F);
1433 vec_short8 min = spu_splats((signed short)0xFF80);
1434
1435 return ((vec_char16)(spu_shuffle(spu_sel(min, spu_sel(a, max, spu_cmpgt(a, 127)), spu_cmpgt(a, -128)),
1436 spu_sel(min, spu_sel(b, max, spu_cmpgt(b, 127)), spu_cmpgt(b, -128)),
1437 ((vec_uchar16){ 1, 3, 5, 7, 9, 11, 13, 15,
1438 17, 19, 21, 23, 25, 27, 29, 31}))));
1439 }
1440
vec_packs(vec_uint4 a,vec_uint4 b)1441 static inline vec_ushort8 vec_packs(vec_uint4 a, vec_uint4 b)
1442 {
1443 vec_uint4 max = spu_splats((unsigned int)0x0000FFFF);
1444
1445 return ((vec_ushort8)(spu_shuffle(spu_sel(a, max, spu_cmpgt(a, max)),
1446 spu_sel(b, max, spu_cmpgt(b, max)),
1447 ((vec_uchar16){ 2, 3, 6, 7, 10, 11, 14, 15,
1448 18, 19, 22, 23, 26, 27, 30, 31}))));
1449 }
1450
vec_packs(vec_int4 a,vec_int4 b)1451 static inline vec_short8 vec_packs(vec_int4 a, vec_int4 b)
1452 {
1453 vec_int4 max = spu_splats((signed int)0x00007FFF);
1454 vec_int4 min = spu_splats((signed int)0xFFFF8000);
1455
1456 return ((vec_short8)(spu_shuffle(spu_sel(min, spu_sel(a, max, spu_cmpgt(a, max)), spu_cmpgt(a, min)),
1457 spu_sel(min, spu_sel(b, max, spu_cmpgt(b, max)), spu_cmpgt(b, min)),
1458 ((vec_uchar16){ 2, 3, 6, 7, 10, 11, 14, 15,
1459 18, 19, 22, 23, 26, 27, 30, 31}))));
1460 }
1461
1462
1463 /* vec_packsu (vector pack saturate unsigned)
1464 * ==========
1465 */
vec_packsu(vec_ushort8 a,vec_ushort8 b)1466 static inline vec_uchar16 vec_packsu(vec_ushort8 a, vec_ushort8 b)
1467 {
1468 return ((vec_uchar16)spu_shuffle(spu_or(a, (vec_ushort8)(spu_cmpgt(a, 255))),
1469 spu_or(b, (vec_ushort8)(spu_cmpgt(b, 255))),
1470 ((vec_uchar16){ 1, 3, 5, 7, 9, 11, 13, 15,
1471 17, 19, 21, 23, 25, 27, 29, 31})));
1472 }
1473
vec_packsu(vec_short8 a,vec_short8 b)1474 static inline vec_uchar16 vec_packsu(vec_short8 a, vec_short8 b)
1475 {
1476 vec_short8 max = spu_splats((signed short)0x00FF);
1477 vec_short8 min = spu_splats((signed short)0x0000);
1478
1479 return ((vec_uchar16)(spu_shuffle(spu_sel(min, spu_sel(a, max, spu_cmpgt(a, 255)), spu_cmpgt(a, 0)),
1480 spu_sel(min, spu_sel(b, max, spu_cmpgt(b, 255)), spu_cmpgt(b, 0)),
1481 ((vec_uchar16){ 1, 3, 5, 7, 9, 11, 13, 15,
1482 17, 19, 21, 23, 25, 27, 29, 31}))));
1483
1484 return (vec_packsu((vec_ushort8)(a), (vec_ushort8)(b)));
1485 }
1486
vec_packsu(vec_uint4 a,vec_uint4 b)1487 static inline vec_ushort8 vec_packsu(vec_uint4 a, vec_uint4 b)
1488 {
1489 vec_uint4 max = spu_splats((unsigned int)0xFFFF);
1490
1491 return ((vec_ushort8)spu_shuffle(spu_or(a, (vec_uint4)(spu_cmpgt(a, max))),
1492 spu_or(b, (vec_uint4)(spu_cmpgt(b, max))),
1493 ((vec_uchar16){ 2, 3, 6, 7, 10, 11, 14, 15,
1494 18, 19, 22, 23, 26, 27, 30, 31})));
1495 }
1496
vec_packsu(vec_int4 a,vec_int4 b)1497 static inline vec_ushort8 vec_packsu(vec_int4 a, vec_int4 b)
1498 {
1499 vec_int4 max = spu_splats((signed int)0x0000FFFF);
1500 vec_int4 min = spu_splats((signed int)0x00000000);
1501
1502 return ((vec_ushort8)(spu_shuffle(spu_sel(min, spu_sel(a, max, spu_cmpgt(a, max)), spu_cmpgt(a, min)),
1503 spu_sel(min, spu_sel(b, max, spu_cmpgt(b, max)), spu_cmpgt(b, min)),
1504 ((vec_uchar16){ 2, 3, 6, 7, 10, 11, 14, 15,
1505 18, 19, 22, 23, 26, 27, 30, 31}))));
1506 }
1507
1508
1509 /* vec_perm (vector permute)
1510 * ========
1511 */
vec_perm(vec_uchar16 a,vec_uchar16 b,vec_uchar16 c)1512 static inline vec_uchar16 vec_perm(vec_uchar16 a, vec_uchar16 b, vec_uchar16 c)
1513 {
1514 return (spu_shuffle(a, b, spu_and(c, 0x1F)));
1515 }
1516
vec_perm(vec_char16 a,vec_char16 b,vec_uchar16 c)1517 static inline vec_char16 vec_perm(vec_char16 a, vec_char16 b, vec_uchar16 c)
1518 {
1519 return ((vec_char16)(vec_perm((vec_uchar16)(a), (vec_uchar16)(b), c)));
1520 }
1521
vec_perm(vec_ushort8 a,vec_ushort8 b,vec_uchar16 c)1522 static inline vec_ushort8 vec_perm(vec_ushort8 a, vec_ushort8 b, vec_uchar16 c)
1523 {
1524 return ((vec_ushort8)(vec_perm((vec_uchar16)(a), (vec_uchar16)(b), c)));
1525 }
1526
vec_perm(vec_short8 a,vec_short8 b,vec_uchar16 c)1527 static inline vec_short8 vec_perm(vec_short8 a, vec_short8 b, vec_uchar16 c)
1528 {
1529 return ((vec_short8)(vec_perm((vec_uchar16)(a), (vec_uchar16)(b), c)));
1530 }
1531
vec_perm(vec_uint4 a,vec_uint4 b,vec_uchar16 c)1532 static inline vec_uint4 vec_perm(vec_uint4 a, vec_uint4 b, vec_uchar16 c)
1533 {
1534 return ((vec_uint4)(vec_perm((vec_uchar16)(a), (vec_uchar16)(b), c)));
1535 }
1536
vec_perm(vec_int4 a,vec_int4 b,vec_uchar16 c)1537 static inline vec_int4 vec_perm(vec_int4 a, vec_int4 b, vec_uchar16 c)
1538 {
1539 return ((vec_int4)(vec_perm((vec_uchar16)(a), (vec_uchar16)(b), c)));
1540 }
1541
vec_perm(vec_float4 a,vec_float4 b,vec_uchar16 c)1542 static inline vec_float4 vec_perm(vec_float4 a, vec_float4 b, vec_uchar16 c)
1543 {
1544 return ((vec_float4)(vec_perm((vec_uchar16)(a), (vec_uchar16)(b), c)));
1545 }
1546
1547
1548 /* vec_re (vector reciprocal estimate)
1549 * ======
1550 */
1551 #define vec_re(_a) spu_re(_a)
1552
1553
1554 /* vec_rl (vector rotate left)
1555 * ======
1556 */
vec_rl(vec_uchar16 a,vec_uchar16 b)1557 static inline vec_uchar16 vec_rl(vec_uchar16 a, vec_uchar16 b)
1558 {
1559 vec_ushort8 r1, r2;
1560
1561 r1 = spu_rl(spu_and((vec_ushort8)(a), 0xFF), (vec_short8)spu_and((vec_ushort8)(b), 7));
1562 r2 = spu_rl(spu_and((vec_ushort8)(a), -256), (vec_short8)spu_and(spu_rlmask((vec_ushort8)(b), -8), 7));
1563 return ((vec_uchar16)(spu_sel(spu_or(r2, spu_sl(r2, 8)), spu_or(r1, spu_rlmask(r1, -8)), spu_splats((unsigned short)0xFF))));
1564 }
1565
vec_rl(vec_char16 a,vec_uchar16 b)1566 static inline vec_char16 vec_rl(vec_char16 a, vec_uchar16 b)
1567 {
1568 return ((vec_char16)(vec_rl((vec_uchar16)(a), b)));
1569 }
1570
vec_rl(vec_ushort8 a,vec_ushort8 b)1571 static inline vec_ushort8 vec_rl(vec_ushort8 a, vec_ushort8 b)
1572 {
1573 return (spu_rl(a, (vec_short8)(b)));
1574 }
1575
vec_rl(vec_short8 a,vec_ushort8 b)1576 static inline vec_short8 vec_rl(vec_short8 a, vec_ushort8 b)
1577 {
1578 return (spu_rl(a, (vec_short8)(b)));
1579 }
1580
vec_rl(vec_uint4 a,vec_uint4 b)1581 static inline vec_uint4 vec_rl(vec_uint4 a, vec_uint4 b)
1582 {
1583 return (spu_rl(a, (vec_int4)(b)));
1584 }
1585
vec_rl(vec_int4 a,vec_uint4 b)1586 static inline vec_int4 vec_rl(vec_int4 a, vec_uint4 b)
1587 {
1588 return (spu_rl(a, (vec_int4)(b)));
1589 }
1590
1591
1592 /* vec_round (vector round)
1593 * =========
1594 */
vec_round(vec_float4 a)1595 static inline vec_float4 vec_round(vec_float4 a)
1596 {
1597 vec_float4 s_half, s_one, d;
1598 vec_uint4 odd;
1599 vec_uint4 msb = spu_splats((unsigned int)0x80000000);
1600 vec_float4 half = spu_splats(0.5f);
1601 vec_int4 exp;
1602 vec_uint4 mask;
1603
1604 s_half = (vec_float4)(spu_sel((vec_uint4)(half), (vec_uint4)(a), msb));
1605 a = spu_add(a, s_half);
1606 s_one = spu_add(s_half, s_half);
1607 exp = spu_sub(127, (vec_int4)(spu_and(spu_rlmask((vec_uint4)(a), -23), 0xFF)));
1608 mask = spu_rlmask(spu_splats((unsigned int)0x7FFFFF), exp);
1609 mask = spu_sel(spu_splats((unsigned int)0), mask, spu_cmpgt(exp, -31));
1610 mask = spu_or(mask, spu_xor((vec_uint4)(spu_rlmaska(spu_add(exp, -1), -31)), -1));
1611
1612 odd = spu_and((vec_uint4)(spu_convts(a, 0)), 1);
1613 s_one = spu_andc(s_one, (vec_float4)spu_cmpeq(mask, 0));
1614 s_one = spu_and(s_one, spu_and((vec_float4)spu_cmpeq(spu_and((vec_uint4)(a), mask), 0),
1615 (vec_float4)spu_cmpeq(odd, 1)));
1616 d = spu_andc(a, (vec_float4)(mask));
1617 d = spu_sub(d, s_one);
1618 return (d);
1619 }
1620
1621 /* vec_rsqrte (vector reciprocal square root estimate)
1622 * ==========
1623 */
1624 #define vec_rsqrte(_a) spu_rsqrte(_a)
1625
1626
1627 /* vec_sel (vector select)
1628 * =======
1629 */
1630 #define vec_sel(_a, _b, _c) spu_sel(_a, _b, _c)
1631
1632
1633 /* vec_sl (vector shift left)
1634 * ======
1635 */
vec_sl(vec_uchar16 a,vec_uchar16 b)1636 static inline vec_uchar16 vec_sl(vec_uchar16 a, vec_uchar16 b)
1637 {
1638 vec_ushort8 hi, lo;
1639
1640 lo = spu_and(spu_sl((vec_ushort8)(a), spu_and((vec_ushort8)(b), 7)), 0xFF);
1641 hi = spu_sl(spu_and((vec_ushort8)(a), -256), spu_and(spu_rlmask((vec_ushort8)(b), -8), 7));
1642
1643 return ((vec_uchar16)(spu_or(hi, lo)));
1644 }
1645
vec_sl(vec_char16 a,vec_uchar16 b)1646 static inline vec_char16 vec_sl(vec_char16 a, vec_uchar16 b)
1647 {
1648 return ((vec_char16)(vec_sl((vec_uchar16)(a), b)));
1649 }
1650
vec_sl(vec_ushort8 a,vec_ushort8 b)1651 static inline vec_ushort8 vec_sl(vec_ushort8 a, vec_ushort8 b)
1652 {
1653 return (spu_sl(a, spu_and(b, 15)));
1654 }
1655
vec_sl(vec_short8 a,vec_ushort8 b)1656 static inline vec_short8 vec_sl(vec_short8 a, vec_ushort8 b)
1657 {
1658 return (spu_sl(a, spu_and((vec_ushort8)(b), 15)));
1659 }
1660
vec_sl(vec_uint4 a,vec_uint4 b)1661 static inline vec_uint4 vec_sl(vec_uint4 a, vec_uint4 b)
1662 {
1663 return (spu_sl(a, spu_and(b, 31)));
1664 }
1665
vec_sl(vec_int4 a,vec_uint4 b)1666 static inline vec_int4 vec_sl(vec_int4 a, vec_uint4 b)
1667 {
1668 return (spu_sl(a, spu_and(b, 31)));
1669 }
1670
1671
1672 /* vec_sld (vector shift left double)
1673 * =======
1674 */
1675 #define vec_sld(_a, _b, _c) spu_shuffle(_a, _b, ((vec_uchar16){ 0+(_c), 1+(_c), 2+(_c), 3+(_c), \
1676 4+(_c), 5+(_c), 6+(_c), 7+(_c), \
1677 8+(_c), 9+(_c), 10+(_c), 11+(_c), \
1678 12+(_c), 13+(_c), 14+(_c), 15+(_c)}))
1679
1680
1681 /* vec_sll (vector shift left long)
1682 * =======
1683 */
1684 #define vec_sll(_a, _b) spu_slqw(_a, spu_extract((vec_uint4)(_b), 0))
1685
1686
1687 /* vec_slo (vector shift left by octet)
1688 * =======
1689 */
1690 #define vec_slo(_a, _b) spu_slqwbytebc(_a, spu_extract((vec_uint4)(_b), 3) & 0x7F)
1691
1692
1693 /* vec_splat (vector splat)
1694 * =========
1695 */
1696 #define vec_splat(_a, _b) spu_splats(spu_extract(_a, _b))
1697
1698
1699 /* vec_splat_s8 (vector splat signed byte)
1700 * ============
1701 */
1702 #define vec_splat_s8(_a) spu_splats((signed char)(_a))
1703
1704
1705 /* vec_splat_s16 (vector splat signed half-word)
1706 * =============
1707 */
1708 #define vec_splat_s16(_a) spu_splats((signed short)(_a))
1709
1710
1711 /* vec_splat_s32 (vector splat signed word)
1712 * =============
1713 */
1714 #define vec_splat_s32(_a) spu_splats((signed int)(_a))
1715
1716
1717 /* vec_splat_u8 (vector splat unsigned byte)
1718 * ============
1719 */
1720 #define vec_splat_u8(_a) spu_splats((unsigned char)(_a))
1721
1722
1723 /* vec_splat_u16 (vector splat unsigned half-word)
1724 * =============
1725 */
1726 #define vec_splat_u16(_a) spu_splats((unsigned short)(_a))
1727
1728
1729 /* vec_splat_u32 (vector splat unsigned word)
1730 * =============
1731 */
1732 #define vec_splat_u32(_a) spu_splats((unsigned int)(_a))
1733
1734
1735 /* vec_sr (vector shift right)
1736 * ======
1737 */
vec_sr(vec_uchar16 a,vec_uchar16 b)1738 static inline vec_uchar16 vec_sr(vec_uchar16 a, vec_uchar16 b)
1739 {
1740 vec_ushort8 hi, lo;
1741
1742 lo = spu_rlmask(spu_and((vec_ushort8)(a), 0xFF), spu_sub(0, (vec_short8)(spu_and((vec_ushort8)(b), 7))));
1743 hi = spu_and(spu_rlmask((vec_ushort8)(a), spu_sub(0, (vec_short8)(spu_and(spu_rlmask((vec_ushort8)(b), -8), 7)))), -256);
1744
1745 return ((vec_uchar16)(spu_or(hi, lo)));
1746 }
1747
vec_sr(vec_char16 a,vec_uchar16 b)1748 static inline vec_char16 vec_sr(vec_char16 a, vec_uchar16 b)
1749 {
1750 return ((vec_char16)(vec_sr((vec_uchar16)(a), b)));
1751 }
1752
vec_sr(vec_ushort8 a,vec_ushort8 b)1753 static inline vec_ushort8 vec_sr(vec_ushort8 a, vec_ushort8 b)
1754 {
1755 return (spu_rlmask(a, spu_sub(0, (vec_short8)(spu_and(b, 15)))));
1756 }
1757
vec_sr(vec_short8 a,vec_ushort8 b)1758 static inline vec_short8 vec_sr(vec_short8 a, vec_ushort8 b)
1759 {
1760 return ((vec_short8)(vec_sr((vec_ushort8)(a), b)));
1761 }
1762
vec_sr(vec_uint4 a,vec_uint4 b)1763 static inline vec_uint4 vec_sr(vec_uint4 a, vec_uint4 b)
1764 {
1765 return (spu_rlmask(a, spu_sub(0, (vec_int4)(spu_and(b, 31)))));
1766 }
1767
vec_sr(vec_int4 a,vec_uint4 b)1768 static inline vec_int4 vec_sr(vec_int4 a, vec_uint4 b)
1769 {
1770 return ((vec_int4)(vec_sr((vec_uint4)(a), b)));
1771 }
1772
1773
1774 /* vec_sra (vector shift right algebraic)
1775 * =======
1776 */
vec_sra(vec_char16 a,vec_uchar16 b)1777 static inline vec_char16 vec_sra(vec_char16 a, vec_uchar16 b)
1778 {
1779 vec_short8 hi, lo;
1780
1781 lo = spu_and(spu_rlmaska(spu_extend(a), spu_sub(0, (vec_short8)(spu_and((vec_ushort8)(b), 7)))), 0xFF);
1782 hi = spu_and(spu_rlmaska((vec_short8)(a), spu_sub(0, (vec_short8)(spu_and(spu_rlmask((vec_ushort8)(b), -8), 7)))), -256);
1783
1784 return ((vec_char16)(spu_or(hi, lo)));
1785 }
1786
vec_sra(vec_uchar16 a,vec_uchar16 b)1787 static inline vec_uchar16 vec_sra(vec_uchar16 a, vec_uchar16 b)
1788 {
1789 return ((vec_uchar16)(vec_sra((vec_char16)(a), b)));
1790 }
1791
vec_sra(vec_short8 a,vec_ushort8 b)1792 static inline vec_short8 vec_sra(vec_short8 a, vec_ushort8 b)
1793 {
1794 return (spu_rlmaska(a, spu_sub(0, (vec_short8)(spu_and(b, 15)))));
1795 }
1796
vec_sra(vec_ushort8 a,vec_ushort8 b)1797 static inline vec_ushort8 vec_sra(vec_ushort8 a, vec_ushort8 b)
1798 {
1799 return ((vec_ushort8)(vec_sra((vec_short8)(a), b)));
1800 }
1801
vec_sra(vec_int4 a,vec_uint4 b)1802 static inline vec_int4 vec_sra(vec_int4 a, vec_uint4 b)
1803 {
1804 return (spu_rlmaska(a, spu_sub(0, (vec_int4)(spu_and(b, 31)))));
1805 }
1806
vec_sra(vec_uint4 a,vec_uint4 b)1807 static inline vec_uint4 vec_sra(vec_uint4 a, vec_uint4 b)
1808 {
1809 return ((vec_uint4)(vec_sra((vec_int4)(a), b)));
1810 }
1811
1812
1813 /* vec_srl (vector shift right long)
1814 * =======
1815 */
1816 #define vec_srl(_a, _b) spu_rlmaskqw(_a, 0-spu_extract((vec_int4)(_b), 3))
1817
1818
1819 /* vec_sro (vector shift right by octet)
1820 * =======
1821 */
1822 #define vec_sro(_a, _b) spu_rlmaskqwbyte(_a, 0 - ((spu_extract((vec_int4)(_b), 3) >> 3) & 0xF))
1823
1824 /* vec_st (vector store indexed)
1825 * ======
1826 */
vec_st(vec_uchar16 a,int b,unsigned char * c)1827 static inline void vec_st(vec_uchar16 a, int b, unsigned char *c)
1828 {
1829 *((vec_uchar16 *)(c+b)) = a;
1830 }
1831
vec_st(vec_uchar16 a,int b,vec_uchar16 * c)1832 static inline void vec_st(vec_uchar16 a, int b, vec_uchar16 *c)
1833 {
1834 *((vec_uchar16 *)((unsigned char *)(c)+b)) = a;
1835 }
1836
vec_st(vec_char16 a,int b,signed char * c)1837 static inline void vec_st(vec_char16 a, int b, signed char *c)
1838 {
1839 *((vec_char16 *)(c+b)) = a;
1840 }
1841
vec_st(vec_char16 a,int b,vec_char16 * c)1842 static inline void vec_st(vec_char16 a, int b, vec_char16 *c)
1843 {
1844 *((vec_char16 *)((signed char *)(c)+b)) = a;
1845 }
1846
vec_st(vec_bchar16 a,int b,signed char * c)1847 static inline void vec_st(vec_bchar16 a, int b, signed char *c)
1848 {
1849 *((vec_bchar16 *)((signed char *)(c)+b)) = a;
1850 }
1851
vec_st(vec_ushort8 a,int b,unsigned short * c)1852 static inline void vec_st(vec_ushort8 a, int b, unsigned short *c)
1853 {
1854 *((vec_ushort8 *)((unsigned char *)(c)+b)) = a;
1855 }
1856
vec_st(vec_ushort8 a,int b,vec_ushort8 * c)1857 static inline void vec_st(vec_ushort8 a, int b, vec_ushort8 *c)
1858 {
1859 *((vec_ushort8 *)((unsigned char *)(c)+b)) = a;
1860 }
1861
vec_st(vec_short8 a,int b,signed short * c)1862 static inline void vec_st(vec_short8 a, int b, signed short *c)
1863 {
1864 *((vec_short8 *)((unsigned char *)(c)+b)) = a;
1865 }
1866
vec_st(vec_short8 a,int b,vec_short8 * c)1867 static inline void vec_st(vec_short8 a, int b, vec_short8 *c)
1868 {
1869 *((vec_short8 *)((signed char *)(c)+b)) = a;
1870 }
1871
vec_st(vec_bshort8 a,int b,signed short * c)1872 static inline void vec_st(vec_bshort8 a, int b, signed short *c)
1873 {
1874 *((vec_bshort8 *)((signed char *)(c)+b)) = a;
1875 }
1876
vec_st(vec_uint4 a,int b,unsigned int * c)1877 static inline void vec_st(vec_uint4 a, int b, unsigned int *c)
1878 {
1879 *((vec_uint4 *)((unsigned char *)(c)+b)) = a;
1880 }
1881
vec_st(vec_uint4 a,int b,vec_uint4 * c)1882 static inline void vec_st(vec_uint4 a, int b, vec_uint4 *c)
1883 {
1884 *((vec_uint4 *)((unsigned char *)(c)+b)) = a;
1885 }
1886
vec_st(vec_int4 a,int b,signed int * c)1887 static inline void vec_st(vec_int4 a, int b, signed int *c)
1888 {
1889 *((vec_int4 *)((unsigned char *)(c)+b)) = a;
1890 }
1891
vec_st(vec_int4 a,int b,vec_int4 * c)1892 static inline void vec_st(vec_int4 a, int b, vec_int4 *c)
1893 {
1894 *((vec_int4 *)((signed char *)(c)+b)) = a;
1895 }
1896
vec_st(vec_bint4 a,int b,signed int * c)1897 static inline void vec_st(vec_bint4 a, int b, signed int *c)
1898 {
1899 *((vec_bint4 *)((signed char *)(c)+b)) = a;
1900 }
1901
vec_st(vec_float4 a,int b,float * c)1902 static inline void vec_st(vec_float4 a, int b, float *c)
1903 {
1904 *((vec_float4 *)((unsigned char *)(c)+b)) = a;
1905 }
1906
vec_st(vec_float4 a,int b,vec_float4 * c)1907 static inline void vec_st(vec_float4 a, int b, vec_float4 *c)
1908 {
1909 *((vec_float4 *)((unsigned char *)(c)+b)) = a;
1910 }
1911
1912
1913 /* vec_ste (vector store element indexed)
1914 * =======
1915 */
vec_ste(vec_uchar16 a,int b,unsigned char * c)1916 static inline void vec_ste(vec_uchar16 a, int b, unsigned char *c)
1917 {
1918 unsigned char *ptr;
1919
1920 ptr = c + b;
1921 *ptr = spu_extract(a, (int)(ptr) & 15);
1922 }
1923
vec_ste(vec_char16 a,int b,signed char * c)1924 static inline void vec_ste(vec_char16 a, int b, signed char *c)
1925 {
1926 vec_ste((vec_uchar16)(a), b, (unsigned char *)(c));
1927 }
1928
vec_ste(vec_bchar16 a,int b,signed char * c)1929 static inline void vec_ste(vec_bchar16 a, int b, signed char *c)
1930 {
1931 vec_ste((vec_uchar16)(a), b, (unsigned char *)(c));
1932 }
1933
vec_ste(vec_ushort8 a,int b,unsigned short * c)1934 static inline void vec_ste(vec_ushort8 a, int b, unsigned short *c)
1935 {
1936 unsigned short *ptr;
1937
1938 ptr = (unsigned short *)(((unsigned int)(c) + b) & ~1);
1939 *ptr = spu_extract(a, ((int)(ptr) >> 1) & 7);
1940 }
1941
vec_ste(vec_short8 a,int b,signed short * c)1942 static inline void vec_ste(vec_short8 a, int b, signed short *c)
1943 {
1944 vec_ste((vec_ushort8)(a), b, (unsigned short *)(c));
1945 }
1946
vec_ste(vec_bshort8 a,int b,signed short * c)1947 static inline void vec_ste(vec_bshort8 a, int b, signed short *c)
1948 {
1949 vec_ste((vec_ushort8)(a), b, (unsigned short *)(c));
1950 }
1951
vec_ste(vec_uint4 a,int b,unsigned int * c)1952 static inline void vec_ste(vec_uint4 a, int b, unsigned int *c)
1953 {
1954 unsigned int *ptr;
1955
1956 ptr = (unsigned int *)(((unsigned int)(c) + b) & ~3);
1957 *ptr = spu_extract(a, ((int)(ptr) >> 2) & 3);
1958 }
1959
vec_ste(vec_int4 a,int b,signed int * c)1960 static inline void vec_ste(vec_int4 a, int b, signed int *c)
1961 {
1962 vec_ste((vec_uint4)(a), b, (unsigned int *)(c));
1963 }
1964
vec_ste(vec_bint4 a,int b,signed int * c)1965 static inline void vec_ste(vec_bint4 a, int b, signed int *c)
1966 {
1967 vec_ste((vec_uint4)(a), b, (unsigned int *)(c));
1968 }
1969
vec_ste(vec_float4 a,int b,float * c)1970 static inline void vec_ste(vec_float4 a, int b, float *c)
1971 {
1972 vec_ste((vec_uint4)(a), b, (unsigned int *)(c));
1973 }
1974
1975
1976 /* vec_stl (vector store indexed LRU)
1977 * =======
1978 */
1979 #define vec_stl(_a, _b, _c) vec_st(_a, _b, _c)
1980
1981
1982 /* vec_sub (vector subtract)
1983 * =======
1984 */
vec_sub(vec_uchar16 a,vec_uchar16 b)1985 static inline vec_uchar16 vec_sub(vec_uchar16 a, vec_uchar16 b)
1986 {
1987 return ((vec_uchar16)(spu_sel(spu_sub((vec_ushort8)(a), (vec_ushort8)(b)),
1988 spu_sub(spu_and((vec_ushort8)(a), -256), spu_and((vec_ushort8)(b), -256)),
1989 spu_splats((unsigned short)0xFF00))));
1990 }
1991
vec_sub(vec_char16 a,vec_char16 b)1992 static inline vec_char16 vec_sub(vec_char16 a, vec_char16 b)
1993 {
1994 return ((vec_char16)(vec_sub((vec_uchar16)(a), (vec_uchar16)(b))));
1995 }
1996
vec_sub(vec_bchar16 a,vec_char16 b)1997 static inline vec_char16 vec_sub(vec_bchar16 a, vec_char16 b)
1998 {
1999 return ((vec_char16)(vec_sub((vec_uchar16)(a), (vec_uchar16)(b))));
2000 }
2001
vec_sub(vec_char16 a,vec_bchar16 b)2002 static inline vec_char16 vec_sub(vec_char16 a, vec_bchar16 b)
2003 {
2004 return ((vec_char16)(vec_sub((vec_uchar16)(a), (vec_uchar16)(b))));
2005 }
2006
vec_sub(vec_ushort8 a,vec_ushort8 b)2007 static inline vec_ushort8 vec_sub(vec_ushort8 a, vec_ushort8 b)
2008 {
2009 return (spu_sub(a, b));
2010 }
2011
vec_sub(vec_short8 a,vec_short8 b)2012 static inline vec_short8 vec_sub(vec_short8 a, vec_short8 b)
2013 {
2014 return (spu_sub(a, b));
2015 }
2016
vec_sub(vec_bshort8 a,vec_short8 b)2017 static inline vec_short8 vec_sub(vec_bshort8 a, vec_short8 b)
2018 {
2019 return (spu_sub((vec_short8)(a), b));
2020 }
2021
vec_sub(vec_short8 a,vec_bshort8 b)2022 static inline vec_short8 vec_sub(vec_short8 a, vec_bshort8 b)
2023 {
2024 return (spu_sub(a, (vec_short8)(b)));
2025 }
2026
vec_sub(vec_uint4 a,vec_uint4 b)2027 static inline vec_uint4 vec_sub(vec_uint4 a, vec_uint4 b)
2028 {
2029 return (spu_sub(a, b));
2030 }
2031
vec_sub(vec_int4 a,vec_int4 b)2032 static inline vec_int4 vec_sub(vec_int4 a, vec_int4 b)
2033 {
2034 return (spu_sub(a, b));
2035 }
2036
vec_sub(vec_bint4 a,vec_int4 b)2037 static inline vec_int4 vec_sub(vec_bint4 a, vec_int4 b)
2038 {
2039 return (spu_sub((vec_int4)(a), b));
2040 }
2041
vec_sub(vec_int4 a,vec_bint4 b)2042 static inline vec_int4 vec_sub(vec_int4 a, vec_bint4 b)
2043 {
2044 return (spu_sub(a, (vec_int4)(b)));
2045 }
2046
vec_sub(vec_float4 a,vec_float4 b)2047 static inline vec_float4 vec_sub(vec_float4 a, vec_float4 b)
2048 {
2049 return (spu_sub(a, b));
2050 }
2051
2052
2053 /* vec_subc (vector subtract carryout)
2054 * ========
2055 */
2056 #define vec_subc(_a, _b) spu_genb(_a, _b)
2057
2058
2059 /* vec_subs (vector subtract saturate)
2060 * ========
2061 */
vec_subs(vec_uchar16 a,vec_uchar16 b)2062 static inline vec_uchar16 vec_subs(vec_uchar16 a, vec_uchar16 b)
2063 {
2064 vec_ushort8 s1, s2;
2065 vec_uchar16 s, d;
2066
2067 s1 = spu_sub(spu_rlmask((vec_ushort8)(a), -8), spu_rlmask((vec_ushort8)(b), -8));
2068 s2 = spu_sub(spu_and((vec_ushort8)(a), 0xFF), spu_and((vec_ushort8)(b), 0xFF));
2069 s = (vec_uchar16)(spu_shuffle(s1, s2, ((vec_uchar16){0, 16, 2, 18, 4, 20, 6, 22,
2070 8, 24, 10, 26, 12, 28, 14, 30})));
2071 d = (vec_uchar16)(spu_shuffle(s1, s2, ((vec_uchar16){1, 17, 3, 19, 5, 21, 7, 23,
2072 9, 25, 11, 27, 13, 29, 15, 31})));
2073 return (spu_andc(d, s));
2074 }
2075
vec_subs(vec_char16 a,vec_char16 b)2076 static inline vec_char16 vec_subs(vec_char16 a, vec_char16 b)
2077 {
2078 vec_ushort8 s1, s2;
2079 vec_uchar16 s, d;
2080
2081 s1 = spu_sub(spu_rlmask((vec_ushort8)(a), -8), spu_rlmask((vec_ushort8)(b), -8));
2082 s2 = spu_sub(spu_and((vec_ushort8)(a), 0xFF), spu_and((vec_ushort8)(b), 0xFF));
2083 s = (vec_uchar16)(spu_shuffle(s1, s2, ((vec_uchar16){1, 17, 3, 19, 5, 21, 7, 23,
2084 9, 25, 11, 27, 13, 29, 15, 31})));
2085 d = spu_sel(s, spu_splats((unsigned char)0x7F), spu_cmpgt(spu_nor((vec_uchar16)(a), spu_nand(s, (vec_uchar16)(b))), 0x7F));
2086 d = spu_sel(d, spu_splats((unsigned char)0x80), spu_cmpgt(spu_and((vec_uchar16)(a), spu_nor(s, (vec_uchar16)(b))), 0x7F));
2087
2088 return ((vec_char16)(d));
2089 }
2090
vec_subs(vec_bchar16 a,vec_char16 b)2091 static inline vec_char16 vec_subs(vec_bchar16 a, vec_char16 b)
2092 {
2093 return (vec_subs((vec_char16)(a), b));
2094 }
2095
vec_subs(vec_char16 a,vec_bchar16 b)2096 static inline vec_char16 vec_subs(vec_char16 a, vec_bchar16 b)
2097 {
2098 return (vec_subs(a, (vec_char16)(b)));
2099 }
2100
vec_subs(vec_ushort8 a,vec_ushort8 b)2101 static inline vec_ushort8 vec_subs(vec_ushort8 a, vec_ushort8 b)
2102 {
2103 return (spu_andc(spu_sub(a, b), spu_cmpgt(b, a)));
2104 }
2105
vec_subs(vec_short8 a,vec_short8 b)2106 static inline vec_short8 vec_subs(vec_short8 a, vec_short8 b)
2107 {
2108 vec_short8 s;
2109 vec_short8 d;
2110
2111 s = spu_sub(a, b);
2112 d = spu_sel(s, spu_splats((signed short)0x7FFF), (vec_ushort8)(spu_rlmaska(spu_nor(a, spu_nand(s, b)), -15)));
2113 d = spu_sel(d, spu_splats((signed short)0x8000), (vec_ushort8)(spu_rlmaska(spu_and(a, spu_nor(s, b)), -15)));
2114
2115 return (d);
2116 }
2117
vec_subs(vec_bshort8 a,vec_short8 b)2118 static inline vec_short8 vec_subs(vec_bshort8 a, vec_short8 b)
2119 {
2120 return ((vec_short8)(vec_subs((vec_short8)(a), b)));
2121 }
2122
vec_subs(vec_short8 a,vec_bshort8 b)2123 static inline vec_short8 vec_subs(vec_short8 a, vec_bshort8 b)
2124 {
2125 return ((vec_short8)(vec_subs(a, (vec_short8)(b))));
2126 }
2127
vec_subs(vec_uint4 a,vec_uint4 b)2128 static inline vec_uint4 vec_subs(vec_uint4 a, vec_uint4 b)
2129 {
2130 return (spu_andc(spu_sub(a, b), spu_cmpgt(b, a)));
2131 }
2132
vec_subs(vec_int4 a,vec_int4 b)2133 static inline vec_int4 vec_subs(vec_int4 a, vec_int4 b)
2134 {
2135 vec_int4 s;
2136 vec_int4 d;
2137
2138 s = spu_sub(a, b);
2139 d = spu_sel(s, spu_splats((signed int)0x7FFFFFFF), (vec_uint4)(spu_rlmaska(spu_nor(a, spu_nand(s, b)), -31)));
2140 d = spu_sel(d, spu_splats((signed int)0x80000000), (vec_uint4)(spu_rlmaska(spu_and(a, spu_nor(s, b)), -31)));
2141
2142 return (d);
2143 }
2144
vec_subs(vec_bint4 a,vec_int4 b)2145 static inline vec_int4 vec_subs(vec_bint4 a, vec_int4 b)
2146 {
2147 return ((vec_int4)(vec_subs((vec_int4)(a), b)));
2148 }
2149
vec_subs(vec_int4 a,vec_bint4 b)2150 static inline vec_int4 vec_subs(vec_int4 a, vec_bint4 b)
2151 {
2152 return ((vec_int4)(vec_subs(a, (vec_int4)(b))));
2153 }
2154
2155
2156 /* vec_sum4s (vector sum across partial (1/4) saturated)
2157 * =========
2158 */
vec_sum4s(vec_uchar16 a,vec_uint4 b)2159 static inline vec_uint4 vec_sum4s(vec_uchar16 a, vec_uint4 b)
2160 {
2161 vec_uint4 a01_23, a0123;
2162
2163 a01_23 = (vec_uint4)(spu_add(spu_rlmask((vec_ushort8)(a), -8),
2164 spu_and((vec_ushort8)(a), 0xFF)));
2165 a0123 = spu_add(spu_rlmask(a01_23, -16), spu_and(a01_23, 0x1FF));
2166 return (vec_adds(a0123, b));
2167 }
2168
vec_sum4s(vec_char16 a,vec_int4 b)2169 static inline vec_int4 vec_sum4s(vec_char16 a, vec_int4 b)
2170 {
2171 vec_int4 a01_23, a0123;
2172
2173 a01_23 = (vec_int4)(spu_add(spu_rlmaska((vec_short8)(a), -8),
2174 spu_extend(a)));
2175 a0123 = spu_add(spu_rlmaska(a01_23, -16), spu_extend((vec_short8)(a01_23)));
2176 return (vec_adds(a0123, b));
2177 }
2178
vec_sum4s(vec_short8 a,vec_int4 b)2179 static inline vec_int4 vec_sum4s(vec_short8 a, vec_int4 b)
2180 {
2181 vec_int4 a0123;
2182
2183 a0123 = spu_add(spu_rlmaska((vec_int4)(a), -16), spu_extend(a));
2184 return (vec_adds(a0123, b));
2185 }
2186
2187
2188 /* vec_sum2s (vector sum across partial (1/2) saturated)
2189 * =========
2190 */
vec_sum2s(vec_int4 a,vec_int4 b)2191 static inline vec_int4 vec_sum2s(vec_int4 a, vec_int4 b)
2192 {
2193 vec_int4 c, d;
2194 vec_int4 sign1, sign2, sign3;
2195 vec_int4 carry, sum_l, sum_h, sat, sat_val;
2196
2197 sign1 = spu_rlmaska(a, -31);
2198 sign2 = spu_rlmaska(b, -31);
2199
2200 c = spu_rlqwbyte(a, -4);
2201 sign3 = spu_rlqwbyte(sign1, -4);
2202
2203 carry = spu_genc(a, b);
2204 sum_l = spu_add(a, b);
2205 sum_h = spu_addx(sign1, sign2, carry);
2206
2207 carry = spu_genc(sum_l, c);
2208 sum_l = spu_add(sum_l, c);
2209 sum_h = spu_addx(sum_h, sign3, carry);
2210
2211 sign1 = spu_rlmaska(sum_l, -31);
2212 sign2 = spu_rlmaska(sum_h, -31);
2213
2214 sat_val = spu_xor(sign2, spu_splats((signed int)0x7FFFFFFF));
2215
2216 sat = spu_orc(spu_xor(sign1, sign2), (vec_int4)spu_cmpeq(sum_h, sign2));
2217
2218 d = spu_and(spu_sel(sum_l, sat_val, (vec_uint4)(sat)), (vec_int4){0, -1, 0, -1});
2219
2220 return (d);
2221 }
2222
2223
2224 /* vec_sums (vector sum saturated)
2225 * ========
2226 */
vec_sums(vec_int4 a,vec_int4 b)2227 static inline vec_int4 vec_sums(vec_int4 a, vec_int4 b)
2228 {
2229 vec_int4 a0, a1, a2, c0, c1, c2, d;
2230 vec_int4 sign_a, sign_b, sign_l, sign_h;
2231 vec_int4 sum_l, sum_h, sat, sat_val;
2232
2233 sign_a = spu_rlmaska(a, -31);
2234 sign_b = spu_rlmaska(b, -31);
2235
2236 a0 = spu_rlqwbyte(a, -12);
2237 a1 = spu_rlqwbyte(a, -8);
2238 a2 = spu_rlqwbyte(a, -4);
2239
2240 sum_l = spu_add(a, b);
2241 sum_h = spu_addx(sign_a, sign_b, spu_genc(a, b));
2242
2243 c2 = spu_genc(sum_l, a2);
2244 sum_l = spu_add(sum_l, a2);
2245 sum_h = spu_addx(sum_h, spu_rlqwbyte(sign_a, -4), c2);
2246
2247 c1 = spu_genc(sum_l, a1);
2248 sum_l = spu_add(sum_l, a1);
2249 sum_h = spu_addx(sum_h, spu_rlqwbyte(sign_a, -8), c1);
2250
2251 c0 = spu_genc(sum_l, a0);
2252 sum_l = spu_add(sum_l, a0);
2253 sum_h = spu_addx(sum_h, spu_rlqwbyte(sign_a, -12), c0);
2254
2255 sign_l = spu_rlmaska(sum_l, -31);
2256 sign_h = spu_rlmaska(sum_h, -31);
2257
2258 sat_val = spu_xor(sign_h, spu_splats((signed int)0x7FFFFFFF));
2259
2260 sat = spu_orc(spu_xor(sign_l, sign_h), (vec_int4)spu_cmpeq(sum_h, sign_h));
2261
2262 d = spu_and(spu_sel(sum_l, sat_val, (vec_uint4)(sat)), ((vec_int4){0, 0, 0, -1}));
2263
2264 return (d);
2265 }
2266
2267
2268 /* vec_trunc (vector truncate)
2269 * =========
2270 */
vec_trunc(vec_float4 a)2271 static inline vec_float4 vec_trunc(vec_float4 a)
2272 {
2273 vec_int4 exp;
2274 vec_uint4 mask;
2275
2276 exp = spu_sub(127, (vec_int4)(spu_and(spu_rlmask((vec_uint4)(a), -23), 0xFF)));
2277 mask = spu_rlmask(spu_splats((unsigned int)0x7FFFFF), exp);
2278 mask = spu_sel(spu_splats((unsigned int)0), mask, spu_cmpgt(exp, -31));
2279 mask = spu_or(mask, spu_xor((vec_uint4)(spu_rlmaska(spu_add(exp, -1), -31)), -1));
2280 return (spu_andc(a, (vec_float4)(mask)));
2281 }
2282
2283 /* vec_unpackh (vector unpack high element)
2284 * ===========
2285 */
vec_unpackh(vec_char16 a)2286 static inline vec_short8 vec_unpackh(vec_char16 a)
2287 {
2288 return (spu_extend(spu_shuffle(a, a, ((vec_uchar16){0, 0, 1, 1, 2, 2, 3, 3,
2289 4, 4, 5, 5, 6, 6, 7, 7}))));
2290 }
2291
vec_unpackh(vec_bchar16 a)2292 static inline vec_bshort8 vec_unpackh(vec_bchar16 a)
2293 {
2294 return ((vec_bshort8)(vec_unpackh((vec_char16)(a))));
2295 }
2296
vec_unpackh(vec_short8 a)2297 static inline vec_int4 vec_unpackh(vec_short8 a)
2298 {
2299 return (spu_extend(spu_shuffle(a, a, ((vec_uchar16){0, 0, 0, 1, 0, 0, 2, 3,
2300 0, 0, 4, 5, 0, 0, 6, 7}))));
2301 }
2302
2303 #ifdef SUPPORT_UNPACK_PIXEL
2304 /* Due to type conflicts, unpacking of pixel types and boolean shorts
2305 * cannot simultaneously be supported. By default, the boolean short is
2306 * supported.
2307 */
vec_unpackh(vec_pixel8 a)2308 static inline vec_uint4 vec_unpackh(vec_pixel8 a)
2309 {
2310 vec_ushort8 p1, p2;
2311
2312 p1 = spu_shuffle((vec_ushort8)(spu_rlmaska((vec_short8)(a.p), -7)),
2313 spu_and((vec_ushort8)(a.p), 0x1F),
2314 ((vec_uchar16){ 0, 128, 128, 17, 2, 128, 128, 19,
2315 4, 128, 128, 21, 6, 128, 128, 23}));
2316 p2 = spu_shuffle(spu_and(spu_rlmask((vec_ushort8)(a.p), -5), 0x1F),
2317 spu_and(spu_rlmask((vec_ushort8)(a.p), -10), 0x1F),
2318 ((vec_uchar16){ 128, 17, 1, 128, 128, 19, 3, 128,
2319 128, 21, 5, 128, 128, 23, 7, 128}));
2320 return ((vec_uint4)(spu_or(p1, p2)));
2321 }
2322
2323 #else
2324
vec_unpackh(vec_bshort8 a)2325 static inline vec_bint4 vec_unpackh(vec_bshort8 a)
2326 {
2327 return ((vec_bint4)(vec_unpackh((vec_short8)(a))));
2328 }
2329 #endif
2330
2331
2332
2333
2334
2335 /* vec_unpackl (vector unpack low element)
2336 * ===========
2337 */
vec_unpackl(vec_char16 a)2338 static inline vec_short8 vec_unpackl(vec_char16 a)
2339 {
2340 return (spu_extend(spu_shuffle(a, a, ((vec_uchar16){8, 8, 9, 9, 10, 10, 11, 11,
2341 12, 12, 13, 13, 14, 14, 15, 15}))));
2342 }
2343
vec_unpackl(vec_bchar16 a)2344 static inline vec_bshort8 vec_unpackl(vec_bchar16 a)
2345 {
2346 return ((vec_bshort8)(vec_unpackl((vec_char16)(a))));
2347 }
2348
2349
vec_unpackl(vec_short8 a)2350 static inline vec_int4 vec_unpackl(vec_short8 a)
2351 {
2352 return (spu_extend(spu_shuffle(a, a, ((vec_uchar16){0, 0, 8, 9, 0, 0, 10, 11,
2353 0, 0,12,13, 0, 0, 14, 15}))));
2354 }
2355
2356
2357 #ifdef SUPPORT_UNPACK_PIXEL
2358 /* Due to type conflicts, unpacking of pixel types and boolean shorts
2359 * cannot simultaneously be supported. By default, the boolean short is
2360 * supported.
2361 */
vec_unpackl(vec_pixel8 a)2362 static inline vec_uint4 vec_unpackl(vec_pixel8 a)
2363 {
2364 vec_ushort8 p1, p2;
2365
2366 p1 = spu_shuffle((vec_ushort8)(spu_rlmaska((vec_short8)(a), -7)),
2367 spu_and((vec_ushort8)(a), 0x1F),
2368 ((vec_uchar16){ 8, 128, 128, 25, 10, 128, 128, 27,
2369 12, 128, 128, 29, 14, 128, 128, 31}));
2370 p2 = spu_shuffle(spu_and(spu_rlmask((vec_ushort8)(a), -5), 0x1F),
2371 spu_and(spu_rlmask((vec_ushort8)(a), -10), 0x1F),
2372 ((vec_uchar16){ 128, 25, 9, 128, 128, 27, 11, 128,
2373 128, 29, 13, 128, 128, 31, 15, 128}));
2374 return ((vec_uint4)(spu_or(p1, p2)));
2375 }
2376
2377 #else
2378
vec_unpackl(vec_bshort8 a)2379 static inline vec_bint4 vec_unpackl(vec_bshort8 a)
2380 {
2381 return ((vec_bint4)(vec_unpackl((vec_short8)(a))));
2382
2383 }
2384 #endif
2385
2386
2387
2388 /* vec_xor (vector logical xor)
2389 * ======
2390 */
vec_xor(vec_uchar16 a,vec_uchar16 b)2391 static inline vec_uchar16 vec_xor(vec_uchar16 a, vec_uchar16 b)
2392 {
2393 return (spu_xor(a, b));
2394 }
2395
vec_xor(vec_char16 a,vec_char16 b)2396 static inline vec_char16 vec_xor(vec_char16 a, vec_char16 b)
2397 {
2398 return (spu_xor(a, b));
2399 }
2400
vec_xor(vec_bchar16 a,vec_char16 b)2401 static inline vec_char16 vec_xor(vec_bchar16 a, vec_char16 b)
2402 {
2403 return (spu_xor((vec_char16)(a), b));
2404 }
2405
vec_xor(vec_char16 a,vec_bchar16 b)2406 static inline vec_char16 vec_xor(vec_char16 a, vec_bchar16 b)
2407 {
2408 return (spu_xor(a, (vec_char16)(b)));
2409 }
2410
vec_xor(vec_ushort8 a,vec_ushort8 b)2411 static inline vec_ushort8 vec_xor(vec_ushort8 a, vec_ushort8 b)
2412 {
2413 return (spu_xor(a, b));
2414 }
2415
vec_xor(vec_short8 a,vec_short8 b)2416 static inline vec_short8 vec_xor(vec_short8 a, vec_short8 b)
2417 {
2418 return (spu_xor(a, b));
2419 }
2420
vec_xor(vec_bshort8 a,vec_short8 b)2421 static inline vec_short8 vec_xor(vec_bshort8 a, vec_short8 b)
2422 {
2423 return (spu_xor((vec_short8)(a), b));
2424 }
2425
vec_xor(vec_short8 a,vec_bshort8 b)2426 static inline vec_short8 vec_xor(vec_short8 a, vec_bshort8 b)
2427 {
2428 return (spu_xor(a, (vec_short8)(b)));
2429 }
2430
vec_xor(vec_uint4 a,vec_uint4 b)2431 static inline vec_uint4 vec_xor(vec_uint4 a, vec_uint4 b)
2432 {
2433 return (spu_xor(a, b));
2434 }
2435
vec_xor(vec_int4 a,vec_int4 b)2436 static inline vec_int4 vec_xor(vec_int4 a, vec_int4 b)
2437 {
2438 return (spu_xor(a, b));
2439 }
2440
vec_xor(vec_bint4 a,vec_int4 b)2441 static inline vec_int4 vec_xor(vec_bint4 a, vec_int4 b)
2442 {
2443 return (spu_xor((vec_int4)(a), b));
2444 }
2445
vec_xor(vec_int4 a,vec_bint4 b)2446 static inline vec_int4 vec_xor(vec_int4 a, vec_bint4 b)
2447 {
2448 return (spu_xor(a, (vec_int4)(b)));
2449 }
2450
vec_xor(vec_float4 a,vec_float4 b)2451 static inline vec_float4 vec_xor(vec_float4 a, vec_float4 b)
2452 {
2453 return (spu_xor(a, b));
2454 }
2455
vec_xor(vec_bint4 a,vec_float4 b)2456 static inline vec_float4 vec_xor(vec_bint4 a, vec_float4 b)
2457 {
2458 return (spu_xor((vec_float4)(a),b));
2459 }
2460
vec_xor(vec_float4 a,vec_bint4 b)2461 static inline vec_float4 vec_xor(vec_float4 a, vec_bint4 b)
2462 {
2463 return (spu_xor(a, (vec_float4)(b)));
2464 }
2465
2466 /************************************************************************
2467 * PREDICATES
2468 ************************************************************************/
2469
2470 /* vec_all_eq (all elements equal)
2471 * ==========
2472 */
vec_all_eq(vec_uchar16 a,vec_uchar16 b)2473 static inline int vec_all_eq(vec_uchar16 a, vec_uchar16 b)
2474 {
2475 return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0xFFFF));
2476 }
2477
vec_all_eq(vec_char16 a,vec_char16 b)2478 static inline int vec_all_eq(vec_char16 a, vec_char16 b)
2479 {
2480 return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0xFFFF));
2481 }
2482
vec_all_eq(vec_bchar16 a,vec_char16 b)2483 static inline int vec_all_eq(vec_bchar16 a, vec_char16 b)
2484 {
2485 return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_char16)(a), b)), 0) == 0xFFFF));
2486 }
2487
vec_all_eq(vec_char16 a,vec_bchar16 b)2488 static inline int vec_all_eq(vec_char16 a, vec_bchar16 b)
2489 {
2490 return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_char16)(b))), 0) == 0xFFFF));
2491 }
2492
vec_all_eq(vec_ushort8 a,vec_ushort8 b)2493 static inline int vec_all_eq(vec_ushort8 a, vec_ushort8 b)
2494 {
2495 return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0xFF));
2496 }
2497
vec_all_eq(vec_short8 a,vec_short8 b)2498 static inline int vec_all_eq(vec_short8 a, vec_short8 b)
2499 {
2500 return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0xFF));
2501 }
2502
vec_all_eq(vec_bshort8 a,vec_short8 b)2503 static inline int vec_all_eq(vec_bshort8 a, vec_short8 b)
2504 {
2505 return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_short8)(a), b)), 0) == 0xFF));
2506 }
2507
vec_all_eq(vec_short8 a,vec_bshort8 b)2508 static inline int vec_all_eq(vec_short8 a, vec_bshort8 b)
2509 {
2510 return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_short8)(b))), 0) == 0xFF));
2511 }
2512
vec_all_eq(vec_uint4 a,vec_uint4 b)2513 static inline int vec_all_eq(vec_uint4 a, vec_uint4 b)
2514 {
2515 return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0xF));
2516 }
2517
vec_all_eq(vec_int4 a,vec_int4 b)2518 static inline int vec_all_eq(vec_int4 a, vec_int4 b)
2519 {
2520 return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0xF));
2521 }
2522
vec_all_eq(vec_bint4 a,vec_int4 b)2523 static inline int vec_all_eq(vec_bint4 a, vec_int4 b)
2524 {
2525 return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_int4)(a), b)), 0) == 0xF));
2526 }
2527
vec_all_eq(vec_int4 a,vec_bint4 b)2528 static inline int vec_all_eq(vec_int4 a, vec_bint4 b)
2529 {
2530 return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_int4)(b))), 0) == 0xF));
2531 }
2532
vec_all_eq(vec_float4 a,vec_float4 b)2533 static inline int vec_all_eq(vec_float4 a, vec_float4 b)
2534 {
2535 return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0xF));
2536 }
2537
2538
2539 /* vec_all_ge (all elements greater than or equal)
2540 * ==========
2541 */
vec_all_ge(vec_uchar16 a,vec_uchar16 b)2542 static inline int vec_all_ge(vec_uchar16 a, vec_uchar16 b)
2543 {
2544 return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0));
2545 }
2546
vec_all_ge(vec_char16 a,vec_char16 b)2547 static inline int vec_all_ge(vec_char16 a, vec_char16 b)
2548 {
2549 return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0));
2550 }
2551
vec_all_ge(vec_bchar16 a,vec_char16 b)2552 static inline int vec_all_ge(vec_bchar16 a, vec_char16 b)
2553 {
2554 return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_char16)(a))), 0) == 0));
2555 }
2556
vec_all_ge(vec_char16 a,vec_bchar16 b)2557 static inline int vec_all_ge(vec_char16 a, vec_bchar16 b)
2558 {
2559 return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_char16)(b), a)), 0) == 0));
2560 }
2561
vec_all_ge(vec_ushort8 a,vec_ushort8 b)2562 static inline int vec_all_ge(vec_ushort8 a, vec_ushort8 b)
2563 {
2564 return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0));
2565 }
2566
vec_all_ge(vec_short8 a,vec_short8 b)2567 static inline int vec_all_ge(vec_short8 a, vec_short8 b)
2568 {
2569 return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0));
2570 }
2571
vec_all_ge(vec_bshort8 a,vec_short8 b)2572 static inline int vec_all_ge(vec_bshort8 a, vec_short8 b)
2573 {
2574 return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_short8)(a))), 0) == 0));
2575 }
2576
vec_all_ge(vec_short8 a,vec_bshort8 b)2577 static inline int vec_all_ge(vec_short8 a, vec_bshort8 b)
2578 {
2579 return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_short8)(b), a)), 0) == 0));
2580 }
2581
vec_all_ge(vec_uint4 a,vec_uint4 b)2582 static inline int vec_all_ge(vec_uint4 a, vec_uint4 b)
2583 {
2584 return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0));
2585 }
2586
vec_all_ge(vec_int4 a,vec_int4 b)2587 static inline int vec_all_ge(vec_int4 a, vec_int4 b)
2588 {
2589 return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0));
2590 }
2591
vec_all_ge(vec_bint4 a,vec_int4 b)2592 static inline int vec_all_ge(vec_bint4 a, vec_int4 b)
2593 {
2594 return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_int4)(a))), 0) == 0));
2595 }
2596
vec_all_ge(vec_int4 a,vec_bint4 b)2597 static inline int vec_all_ge(vec_int4 a, vec_bint4 b)
2598 {
2599 return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_int4)(b), a)), 0) == 0));
2600 }
2601
vec_all_ge(vec_float4 a,vec_float4 b)2602 static inline int vec_all_ge(vec_float4 a, vec_float4 b)
2603 {
2604 return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0));
2605 }
2606
2607
2608 /* vec_all_gt (all elements greater than)
2609 * ==========
2610 */
vec_all_gt(vec_uchar16 a,vec_uchar16 b)2611 static inline int vec_all_gt(vec_uchar16 a, vec_uchar16 b)
2612 {
2613 return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0xFFFF));
2614 }
2615
vec_all_gt(vec_char16 a,vec_char16 b)2616 static inline int vec_all_gt(vec_char16 a, vec_char16 b)
2617 {
2618 return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0xFFFF));
2619 }
2620
vec_all_gt(vec_bchar16 a,vec_char16 b)2621 static inline int vec_all_gt(vec_bchar16 a, vec_char16 b)
2622 {
2623 return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_char16)(a), b)), 0) == 0xFFFF));
2624 }
2625
vec_all_gt(vec_char16 a,vec_bchar16 b)2626 static inline int vec_all_gt(vec_char16 a, vec_bchar16 b)
2627 {
2628 return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_char16)(b))), 0) == 0xFFFF));
2629 }
2630
vec_all_gt(vec_ushort8 a,vec_ushort8 b)2631 static inline int vec_all_gt(vec_ushort8 a, vec_ushort8 b)
2632 {
2633 return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0xFF));
2634 }
2635
vec_all_gt(vec_short8 a,vec_short8 b)2636 static inline int vec_all_gt(vec_short8 a, vec_short8 b)
2637 {
2638 return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0xFF));
2639 }
2640
vec_all_gt(vec_bshort8 a,vec_short8 b)2641 static inline int vec_all_gt(vec_bshort8 a, vec_short8 b)
2642 {
2643 return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_short8)(a), b)), 0) == 0xFF));
2644 }
2645
vec_all_gt(vec_short8 a,vec_bshort8 b)2646 static inline int vec_all_gt(vec_short8 a, vec_bshort8 b)
2647 {
2648 return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_short8)(b))), 0) == 0xFF));
2649 }
2650
vec_all_gt(vec_uint4 a,vec_uint4 b)2651 static inline int vec_all_gt(vec_uint4 a, vec_uint4 b)
2652 {
2653 return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0xF));
2654 }
2655
vec_all_gt(vec_int4 a,vec_int4 b)2656 static inline int vec_all_gt(vec_int4 a, vec_int4 b)
2657 {
2658 return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0xF));
2659 }
2660
vec_all_gt(vec_bint4 a,vec_int4 b)2661 static inline int vec_all_gt(vec_bint4 a, vec_int4 b)
2662 {
2663 return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_int4)(a), b)), 0) == 0xF));
2664 }
2665
vec_all_gt(vec_int4 a,vec_bint4 b)2666 static inline int vec_all_gt(vec_int4 a, vec_bint4 b)
2667 {
2668 return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_int4)(b))), 0) == 0xF));
2669 }
2670
vec_all_gt(vec_float4 a,vec_float4 b)2671 static inline int vec_all_gt(vec_float4 a, vec_float4 b)
2672 {
2673 return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0xF));
2674 }
2675
2676
2677 /* vec_all_in (all elements in bounds)
2678 * ==========
2679 */
vec_all_in(vec_float4 a,vec_float4 b)2680 static inline int vec_all_in(vec_float4 a, vec_float4 b)
2681 {
2682 return (spu_extract(spu_gather(spu_nor(spu_cmpabsgt(a, b), (vec_uint4)(spu_rlmaska((vec_int4)(b), -31)))), 0) == 0xF);
2683 }
2684
2685
2686 /* vec_all_le (all elements less than or equal)
2687 * ==========
2688 */
vec_all_le(vec_uchar16 a,vec_uchar16 b)2689 static inline int vec_all_le(vec_uchar16 a, vec_uchar16 b)
2690 {
2691 return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0));
2692 }
2693
vec_all_le(vec_char16 a,vec_char16 b)2694 static inline int vec_all_le(vec_char16 a, vec_char16 b)
2695 {
2696 return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0));
2697 }
2698
vec_all_le(vec_bchar16 a,vec_char16 b)2699 static inline int vec_all_le(vec_bchar16 a, vec_char16 b)
2700 {
2701 return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_char16)(a), b)), 0) == 0));
2702 }
2703
vec_all_le(vec_char16 a,vec_bchar16 b)2704 static inline int vec_all_le(vec_char16 a, vec_bchar16 b)
2705 {
2706 return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_char16)(b))), 0) == 0));
2707 }
2708
vec_all_le(vec_ushort8 a,vec_ushort8 b)2709 static inline int vec_all_le(vec_ushort8 a, vec_ushort8 b)
2710 {
2711 return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0));
2712 }
2713
vec_all_le(vec_short8 a,vec_short8 b)2714 static inline int vec_all_le(vec_short8 a, vec_short8 b)
2715 {
2716 return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0));
2717 }
2718
vec_all_le(vec_bshort8 a,vec_short8 b)2719 static inline int vec_all_le(vec_bshort8 a, vec_short8 b)
2720 {
2721 return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_short8)(a), b)), 0) == 0));
2722 }
2723
vec_all_le(vec_short8 a,vec_bshort8 b)2724 static inline int vec_all_le(vec_short8 a, vec_bshort8 b)
2725 {
2726 return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_short8)(b))), 0) == 0));
2727 }
2728
vec_all_le(vec_uint4 a,vec_uint4 b)2729 static inline int vec_all_le(vec_uint4 a, vec_uint4 b)
2730 {
2731 return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0));
2732 }
2733
vec_all_le(vec_int4 a,vec_int4 b)2734 static inline int vec_all_le(vec_int4 a, vec_int4 b)
2735 {
2736 return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0));
2737 }
2738
vec_all_le(vec_bint4 a,vec_int4 b)2739 static inline int vec_all_le(vec_bint4 a, vec_int4 b)
2740 {
2741 return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_int4)(a), b)), 0) == 0));
2742 }
2743
vec_all_le(vec_int4 a,vec_bint4 b)2744 static inline int vec_all_le(vec_int4 a, vec_bint4 b)
2745 {
2746 return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_int4)(b))), 0) == 0));
2747 }
2748
vec_all_le(vec_float4 a,vec_float4 b)2749 static inline int vec_all_le(vec_float4 a, vec_float4 b)
2750 {
2751 return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0));
2752 }
2753
2754
2755 /* vec_all_lt (all elements less than)
2756 * ==========
2757 */
vec_all_lt(vec_uchar16 a,vec_uchar16 b)2758 static inline int vec_all_lt(vec_uchar16 a, vec_uchar16 b)
2759 {
2760 return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0xFFFF));
2761 }
2762
vec_all_lt(vec_char16 a,vec_char16 b)2763 static inline int vec_all_lt(vec_char16 a, vec_char16 b)
2764 {
2765 return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0xFFFF));
2766 }
2767
vec_all_lt(vec_bchar16 a,vec_char16 b)2768 static inline int vec_all_lt(vec_bchar16 a, vec_char16 b)
2769 {
2770 return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_char16)(a))), 0) == 0xFFFF));
2771 }
2772
vec_all_lt(vec_char16 a,vec_bchar16 b)2773 static inline int vec_all_lt(vec_char16 a, vec_bchar16 b)
2774 {
2775 return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_char16)(b), a)), 0) == 0xFFFF));
2776 }
2777
vec_all_lt(vec_ushort8 a,vec_ushort8 b)2778 static inline int vec_all_lt(vec_ushort8 a, vec_ushort8 b)
2779 {
2780 return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0xFF));
2781 }
2782
vec_all_lt(vec_short8 a,vec_short8 b)2783 static inline int vec_all_lt(vec_short8 a, vec_short8 b)
2784 {
2785 return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0xFF));
2786 }
2787
vec_all_lt(vec_bshort8 a,vec_short8 b)2788 static inline int vec_all_lt(vec_bshort8 a, vec_short8 b)
2789 {
2790 return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_short8)(a))), 0) == 0xFF));
2791 }
2792
vec_all_lt(vec_short8 a,vec_bshort8 b)2793 static inline int vec_all_lt(vec_short8 a, vec_bshort8 b)
2794 {
2795 return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_short8)(b), a)), 0) == 0xFF));
2796 }
2797
vec_all_lt(vec_uint4 a,vec_uint4 b)2798 static inline int vec_all_lt(vec_uint4 a, vec_uint4 b)
2799 {
2800 return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0xF));
2801 }
2802
vec_all_lt(vec_int4 a,vec_int4 b)2803 static inline int vec_all_lt(vec_int4 a, vec_int4 b)
2804 {
2805 return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0xF));
2806 }
2807
vec_all_lt(vec_bint4 a,vec_int4 b)2808 static inline int vec_all_lt(vec_bint4 a, vec_int4 b)
2809 {
2810 return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_int4)(a))), 0) == 0xF));
2811 }
2812
vec_all_lt(vec_int4 a,vec_bint4 b)2813 static inline int vec_all_lt(vec_int4 a, vec_bint4 b)
2814 {
2815 return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_int4)(b), a)), 0) == 0xF));
2816 }
2817
vec_all_lt(vec_float4 a,vec_float4 b)2818 static inline int vec_all_lt(vec_float4 a, vec_float4 b)
2819 {
2820 return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0xF));
2821 }
2822
2823
2824 /* vec_all_nan (all elements not a number)
2825 * ===========
2826 */
vec_all_nan(vec_float4 a)2827 static inline int vec_all_nan(vec_float4 a)
2828 {
2829 vec_uint4 exp, man;
2830 vec_uint4 exp_mask = spu_splats((unsigned int)0x7F800000);
2831
2832 exp = spu_and((vec_uint4)(a), exp_mask);
2833 man = spu_and((vec_uint4)(a), spu_splats((unsigned int)0x007FFFFF));
2834 return ((int)(spu_extract(spu_gather(spu_andc(spu_cmpeq(exp, exp_mask),
2835 spu_cmpeq(man, 0))), 0) == 0xF));
2836 }
2837
2838 #define vec_all_nan(_a) (0)
2839
2840
2841 /* vec_all_ne (all elements not equal)
2842 * ==========
2843 */
vec_all_ne(vec_uchar16 a,vec_uchar16 b)2844 static inline int vec_all_ne(vec_uchar16 a, vec_uchar16 b)
2845 {
2846 return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0));
2847 }
2848
vec_all_ne(vec_char16 a,vec_char16 b)2849 static inline int vec_all_ne(vec_char16 a, vec_char16 b)
2850 {
2851 return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0));
2852 }
2853
vec_all_ne(vec_bchar16 a,vec_char16 b)2854 static inline int vec_all_ne(vec_bchar16 a, vec_char16 b)
2855 {
2856 return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_char16)(a), b)), 0) == 0));
2857 }
2858
vec_all_ne(vec_char16 a,vec_bchar16 b)2859 static inline int vec_all_ne(vec_char16 a, vec_bchar16 b)
2860 {
2861 return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_char16)(b))), 0) == 0));
2862 }
2863
vec_all_ne(vec_ushort8 a,vec_ushort8 b)2864 static inline int vec_all_ne(vec_ushort8 a, vec_ushort8 b)
2865 {
2866 return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0));
2867 }
2868
vec_all_ne(vec_short8 a,vec_short8 b)2869 static inline int vec_all_ne(vec_short8 a, vec_short8 b)
2870 {
2871 return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0));
2872 }
2873
vec_all_ne(vec_bshort8 a,vec_short8 b)2874 static inline int vec_all_ne(vec_bshort8 a, vec_short8 b)
2875 {
2876 return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_short8)(a), b)), 0) == 0));
2877 }
2878
vec_all_ne(vec_short8 a,vec_bshort8 b)2879 static inline int vec_all_ne(vec_short8 a, vec_bshort8 b)
2880 {
2881 return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_short8)(b))), 0) == 0));
2882 }
2883
vec_all_ne(vec_uint4 a,vec_uint4 b)2884 static inline int vec_all_ne(vec_uint4 a, vec_uint4 b)
2885 {
2886 return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0));
2887 }
2888
vec_all_ne(vec_int4 a,vec_int4 b)2889 static inline int vec_all_ne(vec_int4 a, vec_int4 b)
2890 {
2891 return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0));
2892 }
2893
vec_all_ne(vec_bint4 a,vec_int4 b)2894 static inline int vec_all_ne(vec_bint4 a, vec_int4 b)
2895 {
2896 return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_int4)(a), b)), 0) == 0));
2897 }
2898
vec_all_ne(vec_int4 a,vec_bint4 b)2899 static inline int vec_all_ne(vec_int4 a, vec_bint4 b)
2900 {
2901 return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_int4)(b))), 0) == 0));
2902 }
2903
vec_all_ne(vec_float4 a,vec_float4 b)2904 static inline int vec_all_ne(vec_float4 a, vec_float4 b)
2905 {
2906 return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) == 0));
2907 }
2908
2909
2910 /* vec_all_nge (all elements not greater than or equal)
2911 * ===========
2912 */
vec_all_nge(vec_float4 a,vec_float4 b)2913 static inline int vec_all_nge(vec_float4 a, vec_float4 b)
2914 {
2915 return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0xF));
2916 }
2917
2918
2919 /* vec_all_ngt (all elements not greater than)
2920 * ===========
2921 */
vec_all_ngt(vec_float4 a,vec_float4 b)2922 static inline int vec_all_ngt(vec_float4 a, vec_float4 b)
2923 {
2924 return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0));
2925 }
2926
2927
2928 /* vec_all_nle (all elements not less than or equal)
2929 * ===========
2930 */
vec_all_nle(vec_float4 a,vec_float4 b)2931 static inline int vec_all_nle(vec_float4 a, vec_float4 b)
2932 {
2933 return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) == 0xF));
2934 }
2935
2936
2937 /* vec_all_nlt (all elements not less than)
2938 * ===========
2939 */
vec_all_nlt(vec_float4 a,vec_float4 b)2940 static inline int vec_all_nlt(vec_float4 a, vec_float4 b)
2941 {
2942 return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) == 0));
2943 }
2944
2945
2946 /* vec_all_numeric (all elements numeric)
2947 * ===========
2948 */
vec_all_numeric(vec_float4 a)2949 static inline int vec_all_numeric(vec_float4 a)
2950 {
2951 vec_uint4 exp;
2952
2953 exp = spu_and(spu_rlmask((vec_uint4)(a), -23), 0xFF);
2954 return ((int)(spu_extract(spu_gather(spu_cmpeq(exp, 255)), 0) == 0));
2955 }
2956
2957
2958
2959 /* vec_any_eq (any elements equal)
2960 * ==========
2961 */
vec_any_eq(vec_uchar16 a,vec_uchar16 b)2962 static inline int vec_any_eq(vec_uchar16 a, vec_uchar16 b)
2963 {
2964 return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0));
2965 }
2966
vec_any_eq(vec_char16 a,vec_char16 b)2967 static inline int vec_any_eq(vec_char16 a, vec_char16 b)
2968 {
2969 return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0));
2970 }
2971
vec_any_eq(vec_bchar16 a,vec_char16 b)2972 static inline int vec_any_eq(vec_bchar16 a, vec_char16 b)
2973 {
2974 return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_char16)(a), b)), 0) != 0));
2975 }
2976
vec_any_eq(vec_char16 a,vec_bchar16 b)2977 static inline int vec_any_eq(vec_char16 a, vec_bchar16 b)
2978 {
2979 return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_char16)(b))), 0) != 0));
2980 }
2981
vec_any_eq(vec_ushort8 a,vec_ushort8 b)2982 static inline int vec_any_eq(vec_ushort8 a, vec_ushort8 b)
2983 {
2984 return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0));
2985 }
2986
vec_any_eq(vec_short8 a,vec_short8 b)2987 static inline int vec_any_eq(vec_short8 a, vec_short8 b)
2988 {
2989 return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0));
2990 }
2991
vec_any_eq(vec_bshort8 a,vec_short8 b)2992 static inline int vec_any_eq(vec_bshort8 a, vec_short8 b)
2993 {
2994 return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_short8)(a), b)), 0) != 0));
2995 }
2996
vec_any_eq(vec_short8 a,vec_bshort8 b)2997 static inline int vec_any_eq(vec_short8 a, vec_bshort8 b)
2998 {
2999 return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_short8)(b))), 0) != 0));
3000 }
3001
vec_any_eq(vec_uint4 a,vec_uint4 b)3002 static inline int vec_any_eq(vec_uint4 a, vec_uint4 b)
3003 {
3004 return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpeq(a, b), -31)), 0)));
3005 }
3006
vec_any_eq(vec_int4 a,vec_int4 b)3007 static inline int vec_any_eq(vec_int4 a, vec_int4 b)
3008 {
3009 return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpeq(a, b), -31)), 0)));
3010 }
3011
vec_any_eq(vec_bint4 a,vec_int4 b)3012 static inline int vec_any_eq(vec_bint4 a, vec_int4 b)
3013 {
3014 return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpeq((vec_int4)(a), b), -31)), 0)));
3015 }
3016
vec_any_eq(vec_int4 a,vec_bint4 b)3017 static inline int vec_any_eq(vec_int4 a, vec_bint4 b)
3018 {
3019 return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpeq(a, (vec_int4)(b)), -31)), 0)));
3020 }
3021
vec_any_eq(vec_float4 a,vec_float4 b)3022 static inline int vec_any_eq(vec_float4 a, vec_float4 b)
3023 {
3024 return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpeq(a, b), -31)), 0)));
3025 }
3026
3027 /* vec_any_ge (any elements greater than or equal)
3028 * ==========
3029 */
vec_any_ge(vec_uchar16 a,vec_uchar16 b)3030 static inline int vec_any_ge(vec_uchar16 a, vec_uchar16 b)
3031 {
3032 return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0xFFFF));
3033 }
3034
vec_any_ge(vec_char16 a,vec_char16 b)3035 static inline int vec_any_ge(vec_char16 a, vec_char16 b)
3036 {
3037 return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0xFFFF));
3038 }
3039
vec_any_ge(vec_bchar16 a,vec_char16 b)3040 static inline int vec_any_ge(vec_bchar16 a, vec_char16 b)
3041 {
3042 return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_char16)(a))), 0) != 0xFFFF));
3043 }
3044
vec_any_ge(vec_char16 a,vec_bchar16 b)3045 static inline int vec_any_ge(vec_char16 a, vec_bchar16 b)
3046 {
3047 return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_char16)(b), a)), 0) != 0xFFFF));
3048 }
3049
vec_any_ge(vec_ushort8 a,vec_ushort8 b)3050 static inline int vec_any_ge(vec_ushort8 a, vec_ushort8 b)
3051 {
3052 return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0xFF));
3053 }
3054
vec_any_ge(vec_short8 a,vec_short8 b)3055 static inline int vec_any_ge(vec_short8 a, vec_short8 b)
3056 {
3057 return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0xFF));
3058 }
3059
vec_any_ge(vec_bshort8 a,vec_short8 b)3060 static inline int vec_any_ge(vec_bshort8 a, vec_short8 b)
3061 {
3062 return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_short8)(a))), 0) != 0xFF));
3063 }
3064
vec_any_ge(vec_short8 a,vec_bshort8 b)3065 static inline int vec_any_ge(vec_short8 a, vec_bshort8 b)
3066 {
3067 return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_short8)(b), a)), 0) != 0xFF));
3068 }
3069
vec_any_ge(vec_uint4 a,vec_uint4 b)3070 static inline int vec_any_ge(vec_uint4 a, vec_uint4 b)
3071 {
3072 return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0xF));
3073 }
3074
vec_any_ge(vec_int4 a,vec_int4 b)3075 static inline int vec_any_ge(vec_int4 a, vec_int4 b)
3076 {
3077 return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0xF));
3078 }
3079
vec_any_ge(vec_bint4 a,vec_int4 b)3080 static inline int vec_any_ge(vec_bint4 a, vec_int4 b)
3081 {
3082 return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_int4)(a))), 0) != 0xF));
3083 }
3084
vec_any_ge(vec_int4 a,vec_bint4 b)3085 static inline int vec_any_ge(vec_int4 a, vec_bint4 b)
3086 {
3087 return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_int4)(b), a)), 0) != 0xF));
3088 }
3089
vec_any_ge(vec_float4 a,vec_float4 b)3090 static inline int vec_any_ge(vec_float4 a, vec_float4 b)
3091 {
3092 return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0xF));
3093 }
3094
3095
3096 /* vec_any_gt (any elements greater than)
3097 * ==========
3098 */
vec_any_gt(vec_uchar16 a,vec_uchar16 b)3099 static inline int vec_any_gt(vec_uchar16 a, vec_uchar16 b)
3100 {
3101 return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0));
3102 }
3103
vec_any_gt(vec_char16 a,vec_char16 b)3104 static inline int vec_any_gt(vec_char16 a, vec_char16 b)
3105 {
3106 return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0));
3107 }
3108
vec_any_gt(vec_bchar16 a,vec_char16 b)3109 static inline int vec_any_gt(vec_bchar16 a, vec_char16 b)
3110 {
3111 return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_char16)(a), b)), 0) != 0));
3112 }
3113
vec_any_gt(vec_char16 a,vec_bchar16 b)3114 static inline int vec_any_gt(vec_char16 a, vec_bchar16 b)
3115 {
3116 return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_char16)(b))), 0) != 0));
3117 }
3118
vec_any_gt(vec_ushort8 a,vec_ushort8 b)3119 static inline int vec_any_gt(vec_ushort8 a, vec_ushort8 b)
3120 {
3121 return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0));
3122 }
3123
vec_any_gt(vec_short8 a,vec_short8 b)3124 static inline int vec_any_gt(vec_short8 a, vec_short8 b)
3125 {
3126 return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0));
3127 }
3128
vec_any_gt(vec_bshort8 a,vec_short8 b)3129 static inline int vec_any_gt(vec_bshort8 a, vec_short8 b)
3130 {
3131 return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_short8)(a), b)), 0) != 0));
3132 }
3133
vec_any_gt(vec_short8 a,vec_bshort8 b)3134 static inline int vec_any_gt(vec_short8 a, vec_bshort8 b)
3135 {
3136 return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_short8)(b))), 0) != 0));
3137 }
3138
3139
vec_any_gt(vec_uint4 a,vec_uint4 b)3140 static inline int vec_any_gt(vec_uint4 a, vec_uint4 b)
3141 {
3142 return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt(a, b), -31)), 0)));
3143 }
3144
vec_any_gt(vec_int4 a,vec_int4 b)3145 static inline int vec_any_gt(vec_int4 a, vec_int4 b)
3146 {
3147 return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt(a, b), -31)), 0)));
3148 }
3149
vec_any_gt(vec_bint4 a,vec_int4 b)3150 static inline int vec_any_gt(vec_bint4 a, vec_int4 b)
3151 {
3152 return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt((vec_int4)(a), b), -31)), 0)));
3153 }
3154
vec_any_gt(vec_int4 a,vec_bint4 b)3155 static inline int vec_any_gt(vec_int4 a, vec_bint4 b)
3156 {
3157 return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt(a, (vec_int4)(b)), -31)), 0)));
3158 }
3159
vec_any_gt(vec_float4 a,vec_float4 b)3160 static inline int vec_any_gt(vec_float4 a, vec_float4 b)
3161 {
3162 return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt(a, b), -31)), 0)));
3163 }
3164
3165 /* vec_any_le (any elements less than or equal)
3166 * ==========
3167 */
vec_any_le(vec_uchar16 a,vec_uchar16 b)3168 static inline int vec_any_le(vec_uchar16 a, vec_uchar16 b)
3169 {
3170 return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0xFFFF));
3171 }
3172
vec_any_le(vec_char16 a,vec_char16 b)3173 static inline int vec_any_le(vec_char16 a, vec_char16 b)
3174 {
3175 return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0xFFFF));
3176 }
3177
vec_any_le(vec_bchar16 a,vec_char16 b)3178 static inline int vec_any_le(vec_bchar16 a, vec_char16 b)
3179 {
3180 return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_char16)(a), b)), 0) != 0xFFFF));
3181 }
3182
vec_any_le(vec_char16 a,vec_bchar16 b)3183 static inline int vec_any_le(vec_char16 a, vec_bchar16 b)
3184 {
3185 return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_char16)(b))), 0) != 0xFFFF));
3186 }
3187
vec_any_le(vec_ushort8 a,vec_ushort8 b)3188 static inline int vec_any_le(vec_ushort8 a, vec_ushort8 b)
3189 {
3190 return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0xFF));
3191 }
3192
vec_any_le(vec_short8 a,vec_short8 b)3193 static inline int vec_any_le(vec_short8 a, vec_short8 b)
3194 {
3195 return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0xFF));
3196 }
3197
vec_any_le(vec_bshort8 a,vec_short8 b)3198 static inline int vec_any_le(vec_bshort8 a, vec_short8 b)
3199 {
3200 return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_short8)(a), b)), 0) != 0xFF));
3201 }
3202
vec_any_le(vec_short8 a,vec_bshort8 b)3203 static inline int vec_any_le(vec_short8 a, vec_bshort8 b)
3204 {
3205 return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_short8)(b))), 0) != 0xFF));
3206 }
3207
vec_any_le(vec_uint4 a,vec_uint4 b)3208 static inline int vec_any_le(vec_uint4 a, vec_uint4 b)
3209 {
3210 return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0xF));
3211 }
3212
vec_any_le(vec_int4 a,vec_int4 b)3213 static inline int vec_any_le(vec_int4 a, vec_int4 b)
3214 {
3215 return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0xF));
3216 }
3217
vec_any_le(vec_bint4 a,vec_int4 b)3218 static inline int vec_any_le(vec_bint4 a, vec_int4 b)
3219 {
3220 return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_int4)(a), b)), 0) != 0xF));
3221 }
3222
vec_any_le(vec_int4 a,vec_bint4 b)3223 static inline int vec_any_le(vec_int4 a, vec_bint4 b)
3224 {
3225 return ((int)(spu_extract(spu_gather(spu_cmpgt(a, (vec_int4)(b))), 0) != 0xF));
3226 }
3227
vec_any_le(vec_float4 a,vec_float4 b)3228 static inline int vec_any_le(vec_float4 a, vec_float4 b)
3229 {
3230 return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0xF));
3231 }
3232
3233
3234 /* vec_any_lt (any elements less than)
3235 * ==========
3236 */
vec_any_lt(vec_uchar16 a,vec_uchar16 b)3237 static inline int vec_any_lt(vec_uchar16 a, vec_uchar16 b)
3238 {
3239 return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0));
3240 }
3241
vec_any_lt(vec_char16 a,vec_char16 b)3242 static inline int vec_any_lt(vec_char16 a, vec_char16 b)
3243 {
3244 return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0));
3245 }
3246
vec_any_lt(vec_bchar16 a,vec_char16 b)3247 static inline int vec_any_lt(vec_bchar16 a, vec_char16 b)
3248 {
3249 return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_char16)(a))), 0) != 0));
3250 }
3251
vec_any_lt(vec_char16 a,vec_bchar16 b)3252 static inline int vec_any_lt(vec_char16 a, vec_bchar16 b)
3253 {
3254 return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_char16)(b), a)), 0) != 0));
3255 }
3256
vec_any_lt(vec_ushort8 a,vec_ushort8 b)3257 static inline int vec_any_lt(vec_ushort8 a, vec_ushort8 b)
3258 {
3259 return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0));
3260 }
3261
vec_any_lt(vec_short8 a,vec_short8 b)3262 static inline int vec_any_lt(vec_short8 a, vec_short8 b)
3263 {
3264 return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0));
3265 }
3266
vec_any_lt(vec_bshort8 a,vec_short8 b)3267 static inline int vec_any_lt(vec_bshort8 a, vec_short8 b)
3268 {
3269 return ((int)(spu_extract(spu_gather(spu_cmpgt(b, (vec_short8)(a))), 0) != 0));
3270 }
3271
vec_any_lt(vec_short8 a,vec_bshort8 b)3272 static inline int vec_any_lt(vec_short8 a, vec_bshort8 b)
3273 {
3274 return ((int)(spu_extract(spu_gather(spu_cmpgt((vec_short8)(b), a)), 0) != 0));
3275 }
3276
vec_any_lt(vec_uint4 a,vec_uint4 b)3277 static inline int vec_any_lt(vec_uint4 a, vec_uint4 b)
3278 {
3279 return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt(b, a), -31)), 0)));
3280 }
3281
vec_any_lt(vec_int4 a,vec_int4 b)3282 static inline int vec_any_lt(vec_int4 a, vec_int4 b)
3283 {
3284 return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt(b, a), -31)), 0)));
3285 }
3286
vec_any_lt(vec_bint4 a,vec_int4 b)3287 static inline int vec_any_lt(vec_bint4 a, vec_int4 b)
3288 {
3289 return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt(b, (vec_int4)(a)), -31)), 0)));
3290 }
3291
vec_any_lt(vec_int4 a,vec_bint4 b)3292 static inline int vec_any_lt(vec_int4 a, vec_bint4 b)
3293 {
3294 return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt((vec_int4)(b), a), -31)), 0)));
3295 }
3296
vec_any_lt(vec_float4 a,vec_float4 b)3297 static inline int vec_any_lt(vec_float4 a, vec_float4 b)
3298 {
3299 return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt(b, a), -31)), 0)));
3300 }
3301
3302 /* vec_any_nan (any elements not a number)
3303 * ===========
3304 */
vec_any_nan(vec_float4 a)3305 static inline int vec_any_nan(vec_float4 a)
3306 {
3307 vec_uint4 exp, man;
3308 vec_uint4 exp_mask = spu_splats((unsigned int)0x7F800000);
3309
3310 exp = spu_and((vec_uint4)(a), exp_mask);
3311 man = spu_and((vec_uint4)(a), spu_splats((unsigned int)0x007FFFFF));
3312 return ((int)(spu_extract(spu_gather(spu_andc(spu_cmpeq(exp, exp_mask),
3313 spu_cmpeq(man, 0))), 0) != 0));
3314 }
3315
3316
3317 /* vec_any_ne (any elements not equal)
3318 * ==========
3319 */
vec_any_ne(vec_uchar16 a,vec_uchar16 b)3320 static inline int vec_any_ne(vec_uchar16 a, vec_uchar16 b)
3321 {
3322 return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0xFFFF));
3323 }
3324
vec_any_ne(vec_char16 a,vec_char16 b)3325 static inline int vec_any_ne(vec_char16 a, vec_char16 b)
3326 {
3327 return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0xFFFF));
3328 }
3329
vec_any_ne(vec_bchar16 a,vec_char16 b)3330 static inline int vec_any_ne(vec_bchar16 a, vec_char16 b)
3331 {
3332 return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_char16)(a), b)), 0) != 0xFFFF));
3333 }
3334
vec_any_ne(vec_char16 a,vec_bchar16 b)3335 static inline int vec_any_ne(vec_char16 a, vec_bchar16 b)
3336 {
3337 return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_char16)(b))), 0) != 0xFFFF));
3338 }
3339
vec_any_ne(vec_ushort8 a,vec_ushort8 b)3340 static inline int vec_any_ne(vec_ushort8 a, vec_ushort8 b)
3341 {
3342 return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0xFF));
3343 }
3344
vec_any_ne(vec_short8 a,vec_short8 b)3345 static inline int vec_any_ne(vec_short8 a, vec_short8 b)
3346 {
3347 return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0xFF));
3348 }
3349
vec_any_ne(vec_bshort8 a,vec_short8 b)3350 static inline int vec_any_ne(vec_bshort8 a, vec_short8 b)
3351 {
3352 return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_short8)(a), b)), 0) != 0xFF));
3353 }
3354
vec_any_ne(vec_short8 a,vec_bshort8 b)3355 static inline int vec_any_ne(vec_short8 a, vec_bshort8 b)
3356 {
3357 return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_short8)(b))), 0) != 0xFF));
3358 }
3359
vec_any_ne(vec_uint4 a,vec_uint4 b)3360 static inline int vec_any_ne(vec_uint4 a, vec_uint4 b)
3361 {
3362 return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0xF));
3363 }
3364
vec_any_ne(vec_int4 a,vec_int4 b)3365 static inline int vec_any_ne(vec_int4 a, vec_int4 b)
3366 {
3367 return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0xF));
3368 }
3369
vec_any_ne(vec_bint4 a,vec_int4 b)3370 static inline int vec_any_ne(vec_bint4 a, vec_int4 b)
3371 {
3372 return ((int)(spu_extract(spu_gather(spu_cmpeq((vec_int4)(a), b)), 0) != 0xF));
3373 }
3374
vec_any_ne(vec_int4 a,vec_bint4 b)3375 static inline int vec_any_ne(vec_int4 a, vec_bint4 b)
3376 {
3377 return ((int)(spu_extract(spu_gather(spu_cmpeq(a, (vec_int4)(b))), 0) != 0xF));
3378 }
3379
vec_any_ne(vec_float4 a,vec_float4 b)3380 static inline int vec_any_ne(vec_float4 a, vec_float4 b)
3381 {
3382 return ((int)(spu_extract(spu_gather(spu_cmpeq(a, b)), 0) != 0xF));
3383 }
3384
3385
3386 /* vec_any_nge (any elements not greater than or equal)
3387 * ===========
3388 */
vec_any_nge(vec_float4 a,vec_float4 b)3389 static inline int vec_any_nge(vec_float4 a, vec_float4 b)
3390 {
3391 return ((int)(spu_extract(spu_orx(spu_rlmask(spu_cmpgt(b, a), -31)), 0)));
3392 }
3393
3394 /* vec_any_ngt (any elements not greater than)
3395 * ===========
3396 */
vec_any_ngt(vec_float4 a,vec_float4 b)3397 static inline int vec_any_ngt(vec_float4 a, vec_float4 b)
3398 {
3399 return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0xF));
3400 }
3401
3402
3403 /* vec_any_nle (any elements not less than or equal)
3404 * ===========
3405 */
vec_any_nle(vec_float4 a,vec_float4 b)3406 static inline int vec_any_nle(vec_float4 a, vec_float4 b)
3407 {
3408 return ((int)(spu_extract(spu_gather(spu_cmpgt(a, b)), 0) != 0));
3409 }
3410
3411
3412 /* vec_any_nlt (any elements not less than)
3413 * ===========
3414 */
vec_any_nlt(vec_float4 a,vec_float4 b)3415 static inline int vec_any_nlt(vec_float4 a, vec_float4 b)
3416 {
3417 return ((int)(spu_extract(spu_gather(spu_cmpgt(b, a)), 0) != 0xF));
3418 }
3419
3420
3421 /* vec_any_numeric (any elements numeric)
3422 * ===============
3423 */
vec_any_numeric(vec_float4 a)3424 static inline int vec_any_numeric(vec_float4 a)
3425 {
3426 vec_uint4 exp;
3427
3428 exp = spu_and(spu_rlmask((vec_uint4)(a), -23), 0xFF);
3429 return ((int)(spu_extract(spu_gather(spu_cmpeq(exp, 255)), 0) != 0xF));
3430 }
3431
3432
3433 /* vec_any_out (any elements out of bounds)
3434 * ===========
3435 */
vec_any_out(vec_float4 a,vec_float4 b)3436 static inline int vec_any_out(vec_float4 a, vec_float4 b)
3437 {
3438 return (spu_extract(spu_gather(spu_nor(spu_cmpabsgt(a, b), (vec_uint4)(spu_rlmaska((vec_int4)(b), -31)))), 0) != 0xF);
3439 }
3440
3441
3442 /* CBE Language Extension Intrinsics
3443 */
3444
3445 /* vec_extract (extract element from vector)
3446 * ===========
3447 */
3448 #define vec_extract(_a, _element) spu_extract(_a, _element)
3449
3450
3451 /* vec_insert (insert scalar into specified vector element)
3452 * ==========
3453 */
3454 #define vec_insert(_a, _b, _element) spu_insert(_a, _b, _element)
3455
3456 /* vec_lvlx (load vector left indexed)
3457 * ========
3458 */
vec_lvlx(int a,unsigned char * b)3459 static inline vec_uchar16 vec_lvlx(int a, unsigned char *b)
3460 {
3461 vec_uchar16 *p = (vec_uchar16 *)((unsigned char *)(b) + a);
3462 return(spu_slqwbyte(*p, (unsigned int)p & 0xF));
3463 }
3464
vec_lvlx(int a,vec_uchar16 * b)3465 static inline vec_uchar16 vec_lvlx(int a, vec_uchar16 *b)
3466 {
3467 vec_uchar16 *p = (vec_uchar16 *)((unsigned char *)(b) + a);
3468 return(spu_slqwbyte(*p, (unsigned int)p & 0xF));
3469 }
3470
vec_lvlx(int a,signed char * b)3471 static inline vec_char16 vec_lvlx(int a, signed char *b)
3472 {
3473 vec_char16 *p = (vec_char16 *)((unsigned char *)(b) + a);
3474 return(spu_slqwbyte(*p, (unsigned int)p & 0xF));
3475 }
3476
vec_lvlx(int a,vec_char16 * b)3477 static inline vec_char16 vec_lvlx(int a, vec_char16 *b)
3478 {
3479 vec_char16 *p = (vec_char16 *)((unsigned char *)(b) + a);
3480 return(spu_slqwbyte(*p, (unsigned int)p & 0xF));
3481 }
3482
vec_lvlx(int a,unsigned short * b)3483 static inline vec_ushort8 vec_lvlx(int a, unsigned short *b)
3484 {
3485 vec_ushort8 *p = (vec_ushort8 *)((unsigned char *)(b) + a);
3486 return(spu_slqwbyte(*p, (unsigned int)p & 0xF));
3487 }
3488
vec_lvlx(int a,vec_ushort8 * b)3489 static inline vec_ushort8 vec_lvlx(int a, vec_ushort8 *b)
3490 {
3491 vec_ushort8 *p = (vec_ushort8 *)((unsigned char *)(b) + a);
3492 return(spu_slqwbyte(*p, (unsigned int)p & 0xF));
3493 }
3494
vec_lvlx(int a,signed short * b)3495 static inline vec_short8 vec_lvlx(int a, signed short *b)
3496 {
3497 vec_short8 *p = (vec_short8 *)((unsigned char *)(b) + a);
3498 return(spu_slqwbyte(*p, (unsigned int)p & 0xF));
3499 }
3500
vec_lvlx(int a,vec_short8 * b)3501 static inline vec_short8 vec_lvlx(int a, vec_short8 *b)
3502 {
3503 vec_short8 *p = (vec_short8 *)((unsigned char *)(b) + a);
3504 return(spu_slqwbyte(*p, (unsigned int)p & 0xF));
3505 }
3506
vec_lvlx(int a,unsigned int * b)3507 static inline vec_uint4 vec_lvlx(int a, unsigned int *b)
3508 {
3509 vec_uint4 *p = (vec_uint4 *)((unsigned char *)(b) + a);
3510 return(spu_slqwbyte(*p, (unsigned int)p & 0xF));
3511 }
3512
vec_lvlx(int a,vec_uint4 * b)3513 static inline vec_uint4 vec_lvlx(int a, vec_uint4 *b)
3514 {
3515 vec_uint4 *p = (vec_uint4 *)((unsigned char *)(b) + a);
3516 return(spu_slqwbyte(*p, (unsigned int)p & 0xF));
3517 }
3518
vec_lvlx(int a,signed int * b)3519 static inline vec_int4 vec_lvlx(int a, signed int *b)
3520 {
3521 vec_int4 *p = (vec_int4 *)((unsigned char *)(b) + a);
3522 return(spu_slqwbyte(*p, (unsigned int)p & 0xF));
3523 }
3524
vec_lvlx(int a,vec_int4 * b)3525 static inline vec_int4 vec_lvlx(int a, vec_int4 *b)
3526 {
3527 vec_int4 *p = (vec_int4 *)((unsigned char *)(b) + a);
3528 return(spu_slqwbyte(*p, (unsigned int)p & 0xF));
3529 }
3530
vec_lvlx(int a,float * b)3531 static inline vec_float4 vec_lvlx(int a, float *b)
3532 {
3533 vec_float4 *p = (vec_float4 *)((unsigned char *)(b) + a);
3534 return(spu_slqwbyte(*p, (unsigned int)p & 0xF));
3535 }
3536
vec_lvlx(int a,vec_float4 * b)3537 static inline vec_float4 vec_lvlx(int a, vec_float4 *b)
3538 {
3539 vec_float4 *p = (vec_float4 *)((unsigned char *)(b) + a);
3540 return(spu_slqwbyte(*p, (unsigned int)p & 0xF));
3541 }
3542
3543
3544 /* vec_lvlxl (load vector left indexed last)
3545 * =========
3546 */
3547 #define vec_lvlxl(_a, _b) vec_lvlx(_a, _b)
3548
3549
3550 /* vec_lvrx (load vector right indexed)
3551 * ========
3552 */
vec_lvrx(int a,unsigned char * b)3553 static inline vec_uchar16 vec_lvrx(int a, unsigned char *b)
3554 {
3555 vec_uchar16 *p = (vec_uchar16 *)((unsigned char *)(b) + a);
3556 return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16));
3557 }
3558
vec_lvrx(int a,vec_uchar16 * b)3559 static inline vec_uchar16 vec_lvrx(int a, vec_uchar16 *b)
3560 {
3561 vec_uchar16 *p = (vec_uchar16 *)((unsigned char *)(b) + a);
3562 return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16));
3563 }
3564
vec_lvrx(int a,signed char * b)3565 static inline vec_char16 vec_lvrx(int a, signed char *b)
3566 {
3567 vec_char16 *p = (vec_char16 *)((unsigned char *)(b) + a);
3568 return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16));
3569 }
3570
vec_lvrx(int a,vec_char16 * b)3571 static inline vec_char16 vec_lvrx(int a, vec_char16 *b)
3572 {
3573 vec_char16 *p = (vec_char16 *)((unsigned char *)(b) + a);
3574 return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16));
3575 }
3576
vec_lvrx(int a,unsigned short * b)3577 static inline vec_ushort8 vec_lvrx(int a, unsigned short *b)
3578 {
3579 vec_ushort8 *p = (vec_ushort8 *)((unsigned char *)(b) + a);
3580 return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16));
3581 }
3582
vec_lvrx(int a,vec_ushort8 * b)3583 static inline vec_ushort8 vec_lvrx(int a, vec_ushort8 *b)
3584 {
3585 vec_ushort8 *p = (vec_ushort8 *)((unsigned char *)(b) + a);
3586 return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16));
3587 }
3588
vec_lvrx(int a,signed short * b)3589 static inline vec_short8 vec_lvrx(int a, signed short *b)
3590 {
3591 vec_short8 *p = (vec_short8 *)((unsigned char *)(b) + a);
3592 return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16));
3593 }
3594
vec_lvrx(int a,vec_short8 * b)3595 static inline vec_short8 vec_lvrx(int a, vec_short8 *b)
3596 {
3597 vec_short8 *p = (vec_short8 *)((unsigned char *)(b) + a);
3598 return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16));
3599 }
3600
vec_lvrx(int a,unsigned int * b)3601 static inline vec_uint4 vec_lvrx(int a, unsigned int *b)
3602 {
3603 vec_uint4 *p = (vec_uint4 *)((unsigned char *)(b) + a);
3604 return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16));
3605 }
3606
vec_lvrx(int a,vec_uint4 * b)3607 static inline vec_uint4 vec_lvrx(int a, vec_uint4 *b)
3608 {
3609 vec_uint4 *p = (vec_uint4 *)((unsigned char *)(b) + a);
3610 return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16));
3611 }
3612
vec_lvrx(int a,signed int * b)3613 static inline vec_int4 vec_lvrx(int a, signed int *b)
3614 {
3615 vec_int4 *p = (vec_int4 *)((unsigned char *)(b) + a);
3616 return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16));
3617 }
3618
vec_lvrx(int a,vec_int4 * b)3619 static inline vec_int4 vec_lvrx(int a, vec_int4 *b)
3620 {
3621 vec_int4 *p = (vec_int4 *)((unsigned char *)(b) + a);
3622 return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16));
3623 }
3624
vec_lvrx(int a,float * b)3625 static inline vec_float4 vec_lvrx(int a, float *b)
3626 {
3627 vec_float4 *p = (vec_float4 *)((unsigned char *)(b) + a);
3628 return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16));
3629 }
3630
vec_lvrx(int a,vec_float4 * b)3631 static inline vec_float4 vec_lvrx(int a, vec_float4 *b)
3632 {
3633 vec_float4 *p = (vec_float4 *)((unsigned char *)(b) + a);
3634 return(spu_rlmaskqwbyte(*p, ((int)p & 0xF)-16));
3635 }
3636
3637
3638
3639 /* vec_lvrxl (load vector right indexed last)
3640 * =========
3641 */
3642 #define vec_lvrxl(_a, _b) vec_lvrx(_a, _b)
3643
3644
3645 /* vec_promote (promote scalar to a vector)
3646 * ===========
3647 */
3648 #define vec_promote(_a, _element) spu_promote(_a, _element)
3649
3650
3651 /* vec_splats (splat scalar to a vector)
3652 * ==========
3653 */
3654 #define vec_splats(_a) spu_splats(_a)
3655
3656
3657 /* vec_stvlx (store vector left indexed)
3658 * =========
3659 */
vec_stvlx(vec_uchar16 a,int b,unsigned char * c)3660 static inline void vec_stvlx(vec_uchar16 a, int b, unsigned char *c)
3661 {
3662 int shift;
3663 vec_uchar16 *p = (vec_uchar16 *)((unsigned char *)(c) + b);
3664
3665 shift = -((int)p & 0xF);
3666 *p = spu_sel(*p,
3667 spu_rlmaskqwbyte(a, shift),
3668 spu_rlmaskqwbyte(spu_splats((unsigned char)0xFF), shift));
3669 }
3670
vec_stvlx(vec_uchar16 a,int b,vec_uchar16 * c)3671 static inline void vec_stvlx(vec_uchar16 a, int b, vec_uchar16 *c)
3672 {
3673 int shift;
3674 vec_uchar16 *p = (vec_uchar16 *)((unsigned char *)(c) + b);
3675
3676 shift = -((int)p & 0xF);
3677 *p = spu_sel(*p,
3678 spu_rlmaskqwbyte(a, shift),
3679 spu_rlmaskqwbyte(spu_splats((unsigned char)0xFF), shift));
3680 }
3681
vec_stvlx(vec_char16 a,int b,signed char * c)3682 static inline void vec_stvlx(vec_char16 a, int b, signed char *c)
3683 {
3684 int shift;
3685 vec_char16 *p = (vec_char16 *)((unsigned char *)(c) + b);
3686
3687 shift = -((int)p & 0xF);
3688 *p = spu_sel(*p,
3689 spu_rlmaskqwbyte(a, shift),
3690 spu_rlmaskqwbyte(spu_splats((unsigned char)0xFF), shift));
3691 }
3692
vec_stvlx(vec_char16 a,int b,vec_char16 * c)3693 static inline void vec_stvlx(vec_char16 a, int b, vec_char16 *c)
3694 {
3695 int shift;
3696 vec_char16 *p = (vec_char16 *)((unsigned char *)(c) + b);
3697
3698 shift = -((int)p & 0xF);
3699 *p = spu_sel(*p,
3700 spu_rlmaskqwbyte(a, shift),
3701 spu_rlmaskqwbyte(spu_splats((unsigned char)0xFF), shift));
3702 }
3703
vec_stvlx(vec_ushort8 a,int b,unsigned short * c)3704 static inline void vec_stvlx(vec_ushort8 a, int b, unsigned short *c)
3705 {
3706 int shift;
3707 vec_ushort8 *p = (vec_ushort8 *)((unsigned char *)(c) + b);
3708
3709 shift = -((int)p & 0xF);
3710 *p = spu_sel(*p,
3711 spu_rlmaskqwbyte(a, shift),
3712 spu_rlmaskqwbyte(spu_splats((unsigned short)0xFFFF), shift));
3713 }
3714
vec_stvlx(vec_ushort8 a,int b,vec_ushort8 * c)3715 static inline void vec_stvlx(vec_ushort8 a, int b, vec_ushort8 *c)
3716 {
3717 int shift;
3718 vec_ushort8 *p = (vec_ushort8 *)((unsigned char *)(c) + b);
3719
3720 shift = -((int)p & 0xF);
3721 *p = spu_sel(*p,
3722 spu_rlmaskqwbyte(a, shift),
3723 spu_rlmaskqwbyte(spu_splats((unsigned short)0xFFFF), shift));
3724 }
3725
vec_stvlx(vec_short8 a,int b,signed short * c)3726 static inline void vec_stvlx(vec_short8 a, int b, signed short *c)
3727 {
3728 int shift;
3729 vec_short8 *p = (vec_short8 *)((unsigned char *)(c) + b);
3730
3731 shift = -((int)p & 0xF);
3732 *p = spu_sel(*p,
3733 spu_rlmaskqwbyte(a, shift),
3734 spu_rlmaskqwbyte(spu_splats((unsigned short)0xFFFF), shift));
3735 }
3736
vec_stvlx(vec_short8 a,int b,vec_short8 * c)3737 static inline void vec_stvlx(vec_short8 a, int b, vec_short8 *c)
3738 {
3739 int shift;
3740 vec_short8 *p = (vec_short8 *)((unsigned char *)(c) + b);
3741
3742 shift = -((int)p & 0xF);
3743 *p = spu_sel(*p,
3744 spu_rlmaskqwbyte(a, shift),
3745 spu_rlmaskqwbyte(spu_splats((unsigned short)0xFFFF), shift));
3746 }
3747
vec_stvlx(vec_uint4 a,int b,unsigned int * c)3748 static inline void vec_stvlx(vec_uint4 a, int b, unsigned int *c)
3749 {
3750 int shift;
3751 vec_uint4 *p = (vec_uint4 *)((unsigned char *)(c) + b);
3752
3753 shift = -((int)p & 0xF);
3754 *p = spu_sel(*p,
3755 spu_rlmaskqwbyte(a, shift),
3756 spu_rlmaskqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift));
3757 }
3758
vec_stvlx(vec_uint4 a,int b,vec_uint4 * c)3759 static inline void vec_stvlx(vec_uint4 a, int b, vec_uint4 *c)
3760 {
3761 int shift;
3762 vec_uint4 *p = (vec_uint4 *)((unsigned char *)(c) + b);
3763
3764 shift = -((int)p & 0xF);
3765 *p = spu_sel(*p,
3766 spu_rlmaskqwbyte(a, shift),
3767 spu_rlmaskqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift));
3768 }
3769
vec_stvlx(vec_int4 a,int b,signed int * c)3770 static inline void vec_stvlx(vec_int4 a, int b, signed int *c)
3771 {
3772 int shift;
3773 vec_int4 *p = (vec_int4 *)((unsigned char *)(c) + b);
3774
3775 shift = -((int)p & 0xF);
3776 *p = spu_sel(*p,
3777 spu_rlmaskqwbyte(a, shift),
3778 spu_rlmaskqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift));
3779 }
3780
vec_stvlx(vec_int4 a,int b,vec_int4 * c)3781 static inline void vec_stvlx(vec_int4 a, int b, vec_int4 *c)
3782 {
3783 int shift;
3784 vec_int4 *p = (vec_int4 *)((unsigned char *)(c) + b);
3785
3786 shift = -((int)p & 0xF);
3787 *p = spu_sel(*p,
3788 spu_rlmaskqwbyte(a, shift),
3789 spu_rlmaskqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift));
3790 }
3791
vec_stvlx(vec_float4 a,int b,float * c)3792 static inline void vec_stvlx(vec_float4 a, int b, float *c)
3793 {
3794 int shift;
3795 vec_float4 *p = (vec_float4 *)((unsigned char *)(c) + b);
3796
3797 shift = -((int)p & 0xF);
3798 *p = spu_sel(*p,
3799 spu_rlmaskqwbyte(a, shift),
3800 spu_rlmaskqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift));
3801 }
3802
vec_stvlx(vec_float4 a,int b,vec_float4 * c)3803 static inline void vec_stvlx(vec_float4 a, int b, vec_float4 *c)
3804 {
3805 int shift;
3806 vec_float4 *p = (vec_float4 *)((unsigned char *)(c) + b);
3807
3808 shift = -((int)p & 0xF);
3809 *p = spu_sel(*p,
3810 spu_rlmaskqwbyte(a, shift),
3811 spu_rlmaskqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift));
3812 }
3813
3814 /* vec_stvlxl (store vector left indexed last)
3815 * ==========
3816 */
3817 #define vec_stvlxl(_a, _b, _c) vec_stvlx(_a, _b, _c)
3818
3819
3820 /* vec_stvrx (store vector right indexed)
3821 * =========
3822 */
vec_stvrx(vec_uchar16 a,int b,unsigned char * c)3823 static inline void vec_stvrx(vec_uchar16 a, int b, unsigned char *c)
3824 {
3825 int shift;
3826 vec_uchar16 *p = (vec_uchar16 *)((unsigned char *)(c) + b);
3827
3828 shift = 16-((int)p & 0xF);
3829 *p = spu_sel(*p,
3830 spu_slqwbyte(a, shift),
3831 spu_slqwbyte(spu_splats((unsigned char)0xFF), shift));
3832 }
3833
vec_stvrx(vec_uchar16 a,int b,vec_uchar16 * c)3834 static inline void vec_stvrx(vec_uchar16 a, int b, vec_uchar16 *c)
3835 {
3836 int shift;
3837 vec_uchar16 *p = (vec_uchar16 *)((unsigned char *)(c) + b);
3838
3839 shift = 16-((int)p & 0xF);
3840 *p = spu_sel(*p,
3841 spu_slqwbyte(a, shift),
3842 spu_slqwbyte(spu_splats((unsigned char)0xFF), shift));
3843 }
3844
vec_stvrx(vec_char16 a,int b,signed char * c)3845 static inline void vec_stvrx(vec_char16 a, int b, signed char *c)
3846 {
3847 int shift;
3848 vec_char16 *p = (vec_char16 *)((unsigned char *)(c) + b);
3849
3850 shift = 16-((int)p & 0xF);
3851 *p = spu_sel(*p,
3852 spu_slqwbyte(a, shift),
3853 spu_slqwbyte(spu_splats((unsigned char)0xFF), shift));
3854 }
3855
vec_stvrx(vec_char16 a,int b,vec_char16 * c)3856 static inline void vec_stvrx(vec_char16 a, int b, vec_char16 *c)
3857 {
3858 int shift;
3859 vec_char16 *p = (vec_char16 *)((unsigned char *)(c) + b);
3860
3861 shift = 16-((int)p & 0xF);
3862 *p = spu_sel(*p,
3863 spu_slqwbyte(a, shift),
3864 spu_slqwbyte(spu_splats((unsigned char)0xFF), shift));
3865 }
3866
vec_stvrx(vec_ushort8 a,int b,unsigned short * c)3867 static inline void vec_stvrx(vec_ushort8 a, int b, unsigned short *c)
3868 {
3869 int shift;
3870 vec_ushort8 *p = (vec_ushort8 *)((unsigned char *)(c) + b);
3871
3872 shift = 16-((int)p & 0xF);
3873 *p = spu_sel(*p,
3874 spu_slqwbyte(a, shift),
3875 spu_slqwbyte(spu_splats((unsigned short)0xFFFF), shift));
3876 }
3877
vec_stvrx(vec_ushort8 a,int b,vec_ushort8 * c)3878 static inline void vec_stvrx(vec_ushort8 a, int b, vec_ushort8 *c)
3879 {
3880 int shift;
3881 vec_ushort8 *p = (vec_ushort8 *)((unsigned char *)(c) + b);
3882
3883 shift = 16-((int)p & 0xF);
3884 *p = spu_sel(*p,
3885 spu_slqwbyte(a, shift),
3886 spu_slqwbyte(spu_splats((unsigned short)0xFFFF), shift));
3887 }
3888
vec_stvrx(vec_short8 a,int b,signed short * c)3889 static inline void vec_stvrx(vec_short8 a, int b, signed short *c)
3890 {
3891 int shift;
3892 vec_short8 *p = (vec_short8 *)((unsigned char *)(c) + b);
3893
3894 shift = 16-((int)p & 0xF);
3895 *p = spu_sel(*p,
3896 spu_slqwbyte(a, shift),
3897 spu_slqwbyte(spu_splats((unsigned short)0xFFFF), shift));
3898 }
3899
vec_stvrx(vec_short8 a,int b,vec_short8 * c)3900 static inline void vec_stvrx(vec_short8 a, int b, vec_short8 *c)
3901 {
3902 int shift;
3903 vec_short8 *p = (vec_short8 *)((unsigned char *)(c) + b);
3904
3905 shift = 16-((int)p & 0xF);
3906 *p = spu_sel(*p,
3907 spu_slqwbyte(a, shift),
3908 spu_slqwbyte(spu_splats((unsigned short)0xFFFF), shift));
3909 }
3910
vec_stvrx(vec_uint4 a,int b,unsigned int * c)3911 static inline void vec_stvrx(vec_uint4 a, int b, unsigned int *c)
3912 {
3913 int shift;
3914 vec_uint4 *p = (vec_uint4 *)((unsigned char *)(c) + b);
3915
3916 shift = 16-((int)p & 0xF);
3917 *p = spu_sel(*p,
3918 spu_slqwbyte(a, shift),
3919 spu_slqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift));
3920 }
3921
vec_stvrx(vec_uint4 a,int b,vec_uint4 * c)3922 static inline void vec_stvrx(vec_uint4 a, int b, vec_uint4 *c)
3923 {
3924 int shift;
3925 vec_uint4 *p = (vec_uint4 *)((unsigned char *)(c) + b);
3926
3927 shift = 16-((int)p & 0xF);
3928 *p = spu_sel(*p,
3929 spu_slqwbyte(a, shift),
3930 spu_slqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift));
3931 }
3932
vec_stvrx(vec_int4 a,int b,signed int * c)3933 static inline void vec_stvrx(vec_int4 a, int b, signed int *c)
3934 {
3935 int shift;
3936 vec_int4 *p = (vec_int4 *)((unsigned char *)(c) + b);
3937
3938 shift = 16-((int)p & 0xF);
3939 *p = spu_sel(*p,
3940 spu_slqwbyte(a, shift),
3941 spu_slqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift));
3942 }
3943
vec_stvrx(vec_int4 a,int b,vec_int4 * c)3944 static inline void vec_stvrx(vec_int4 a, int b, vec_int4 *c)
3945 {
3946 int shift;
3947 vec_int4 *p = (vec_int4 *)((unsigned char *)(c) + b);
3948
3949 shift = 16-((int)p & 0xF);
3950 *p = spu_sel(*p,
3951 spu_slqwbyte(a, shift),
3952 spu_slqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift));
3953 }
3954
vec_stvrx(vec_float4 a,int b,float * c)3955 static inline void vec_stvrx(vec_float4 a, int b, float *c)
3956 {
3957 int shift;
3958 vec_float4 *p = (vec_float4 *)((unsigned char *)(c) + b);
3959
3960 shift = 16-((int)p & 0xF);
3961 *p = spu_sel(*p,
3962 spu_slqwbyte(a, shift),
3963 spu_slqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift));
3964 }
3965
vec_stvrx(vec_float4 a,int b,vec_float4 * c)3966 static inline void vec_stvrx(vec_float4 a, int b, vec_float4 *c)
3967 {
3968 int shift;
3969 vec_float4 *p = (vec_float4 *)((unsigned char *)(c) + b);
3970
3971 shift = 16-((int)p & 0xF);
3972 *p = spu_sel(*p,
3973 spu_slqwbyte(a, shift),
3974 spu_slqwbyte(spu_splats((unsigned int)0xFFFFFFFF), shift));
3975 }
3976
3977 /* vec_stvrxl (store vector right indexed last)
3978 * ==========
3979 */
3980 #define vec_stvrxl(_a, _b, _c) vec_stvrx(_a, _b, _c)
3981
3982
3983 #endif /* __SPU__ */
3984 #endif /* __cplusplus */
3985 #endif /* !_VMX2SPU_H_ */
3986