1 /* Copyright (C) 2018-2021 Free Software Foundation, Inc.
2 
3    This file is part of GCC.
4 
5    GCC is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 3, or (at your option)
8    any later version.
9 
10    GCC is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    Under Section 7 of GPL version 3, you are granted additional
16    permissions described in the GCC Runtime Library Exception, version
17    3.1, as published by the Free Software Foundation.
18 
19    You should have received a copy of the GNU General Public License and
20    a copy of the GCC Runtime Library Exception along with this program;
21    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22    <http://www.gnu.org/licenses/>.  */
23 
24 /* Implemented from the specification included in the Intel C++ Compiler
25    User Guide and Reference, version 9.0.
26 
27    NOTE: This is NOT a complete implementation of the SSE4 intrinsics!  */
28 
29 #ifndef NO_WARN_X86_INTRINSICS
30 /* This header is distributed to simplify porting x86_64 code that
31    makes explicit use of Intel intrinsics to powerpc64le.
32    It is the user's responsibility to determine if the results are
33    acceptable and make additional changes as necessary.
34    Note that much code that uses Intel intrinsics can be rewritten in
35    standard C or GNU C extensions, which are more portable and better
36    optimized across multiple targets.  */
37 #endif
38 
39 #ifndef SMMINTRIN_H_
40 #define SMMINTRIN_H_
41 
42 #include <altivec.h>
43 #include <tmmintrin.h>
44 
45 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_insert_epi8(__m128i const __A,int const __D,int const __N)46 _mm_insert_epi8 (__m128i const __A, int const __D, int const __N)
47 {
48   __v16qi result = (__v16qi)__A;
49 
50   result [__N & 0xf] = __D;
51 
52   return (__m128i) result;
53 }
54 
55 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_insert_epi32(__m128i const __A,int const __D,int const __N)56 _mm_insert_epi32 (__m128i const __A, int const __D, int const __N)
57 {
58   __v4si result = (__v4si)__A;
59 
60   result [__N & 3] = __D;
61 
62   return (__m128i) result;
63 }
64 
65 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_insert_epi64(__m128i const __A,long long const __D,int const __N)66 _mm_insert_epi64 (__m128i const __A, long long const __D, int const __N)
67 {
68   __v2di result = (__v2di)__A;
69 
70   result [__N & 1] = __D;
71 
72   return (__m128i) result;
73 }
74 
75 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_extract_epi8(__m128i __X,const int __N)76 _mm_extract_epi8 (__m128i __X, const int __N)
77 {
78   return (unsigned char) ((__v16qi)__X)[__N & 15];
79 }
80 
81 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_extract_epi32(__m128i __X,const int __N)82 _mm_extract_epi32 (__m128i __X, const int __N)
83 {
84   return ((__v4si)__X)[__N & 3];
85 }
86 
87 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_extract_epi64(__m128i __X,const int __N)88 _mm_extract_epi64 (__m128i __X, const int __N)
89 {
90   return ((__v2di)__X)[__N & 1];
91 }
92 
93 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_extract_ps(__m128 __X,const int __N)94 _mm_extract_ps (__m128 __X, const int __N)
95 {
96   return ((__v4si)__X)[__N & 3];
97 }
98 
99 #ifdef _ARCH_PWR8
100 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_blend_epi16(__m128i __A,__m128i __B,const int __imm8)101 _mm_blend_epi16 (__m128i __A, __m128i __B, const int __imm8)
102 {
103   __v16qi __charmask = vec_splats ((signed char) __imm8);
104   __charmask = vec_gb (__charmask);
105   __v8hu __shortmask = (__v8hu) vec_unpackh (__charmask);
106   #ifdef __BIG_ENDIAN__
107   __shortmask = vec_reve (__shortmask);
108   #endif
109   return (__m128i) vec_sel ((__v8hu) __A, (__v8hu) __B, __shortmask);
110 }
111 #endif
112 
113 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_blendv_epi8(__m128i __A,__m128i __B,__m128i __mask)114 _mm_blendv_epi8 (__m128i __A, __m128i __B, __m128i __mask)
115 {
116   const __v16qu __seven = vec_splats ((unsigned char) 0x07);
117   __v16qu __lmask = vec_sra ((__v16qu) __mask, __seven);
118   return (__m128i) vec_sel ((__v16qu) __A, (__v16qu) __B, __lmask);
119 }
120 
121 extern __inline __m128
122 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_blend_ps(__m128 __A,__m128 __B,const int __imm8)123 _mm_blend_ps (__m128 __A, __m128 __B, const int __imm8)
124 {
125   __v16qu __pcv[] =
126     {
127       {  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15 },
128       { 16, 17, 18, 19,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15 },
129       {  0,  1,  2,  3, 20, 21, 22, 23,  8,  9, 10, 11, 12, 13, 14, 15 },
130       { 16, 17, 18, 19, 20, 21, 22, 23,  8,  9, 10, 11, 12, 13, 14, 15 },
131       {  0,  1,  2,  3,  4,  5,  6,  7, 24, 25, 26, 27, 12, 13, 14, 15 },
132       { 16, 17, 18, 19,  4,  5,  6,  7, 24, 25, 26, 27, 12, 13, 14, 15 },
133       {  0,  1,  2,  3, 20, 21, 22, 23, 24, 25, 26, 27, 12, 13, 14, 15 },
134       { 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 12, 13, 14, 15 },
135       {  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 28, 29, 30, 31 },
136       { 16, 17, 18, 19,  4,  5,  6,  7,  8,  9, 10, 11, 28, 29, 30, 31 },
137       {  0,  1,  2,  3, 20, 21, 22, 23,  8,  9, 10, 11, 28, 29, 30, 31 },
138       { 16, 17, 18, 19, 20, 21, 22, 23,  8,  9, 10, 11, 28, 29, 30, 31 },
139       {  0,  1,  2,  3,  4,  5,  6,  7, 24, 25, 26, 27, 28, 29, 30, 31 },
140       { 16, 17, 18, 19,  4,  5,  6,  7, 24, 25, 26, 27, 28, 29, 30, 31 },
141       {  0,  1,  2,  3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 },
142       { 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 },
143     };
144   __v16qu __r = vec_perm ((__v16qu) __A, (__v16qu)__B, __pcv[__imm8]);
145   return (__m128) __r;
146 }
147 
148 extern __inline __m128
149 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_blendv_ps(__m128 __A,__m128 __B,__m128 __mask)150 _mm_blendv_ps (__m128 __A, __m128 __B, __m128 __mask)
151 {
152   const __v4si __zero = {0};
153   const __vector __bool int __boolmask = vec_cmplt ((__v4si) __mask, __zero);
154   return (__m128) vec_sel ((__v4su) __A, (__v4su) __B, (__v4su) __boolmask);
155 }
156 
157 extern __inline __m128d
158 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_blend_pd(__m128d __A,__m128d __B,const int __imm8)159 _mm_blend_pd (__m128d __A, __m128d __B, const int __imm8)
160 {
161   __v16qu __pcv[] =
162     {
163       {  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15 },
164       { 16, 17, 18, 19, 20, 21, 22, 23,  8,  9, 10, 11, 12, 13, 14, 15 },
165       {  0,  1,  2,  3,  4,  5,  6,  7, 24, 25, 26, 27, 28, 29, 30, 31 },
166       { 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 }
167     };
168   __v16qu __r = vec_perm ((__v16qu) __A, (__v16qu)__B, __pcv[__imm8]);
169   return (__m128d) __r;
170 }
171 
172 #ifdef _ARCH_PWR8
173 extern __inline __m128d
174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_blendv_pd(__m128d __A,__m128d __B,__m128d __mask)175 _mm_blendv_pd (__m128d __A, __m128d __B, __m128d __mask)
176 {
177   const __v2di __zero = {0};
178   const __vector __bool long long __boolmask = vec_cmplt ((__v2di) __mask, __zero);
179   return (__m128d) vec_sel ((__v2du) __A, (__v2du) __B, (__v2du) __boolmask);
180 }
181 #endif
182 
183 extern __inline int
184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_testz_si128(__m128i __A,__m128i __B)185 _mm_testz_si128 (__m128i __A, __m128i __B)
186 {
187   /* Note: This implementation does NOT set "zero" or "carry" flags.  */
188   const __v16qu __zero = {0};
189   return vec_all_eq (vec_and ((__v16qu) __A, (__v16qu) __B), __zero);
190 }
191 
192 extern __inline int
193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_testc_si128(__m128i __A,__m128i __B)194 _mm_testc_si128 (__m128i __A, __m128i __B)
195 {
196   /* Note: This implementation does NOT set "zero" or "carry" flags.  */
197   const __v16qu __zero = {0};
198   const __v16qu __notA = vec_nor ((__v16qu) __A, (__v16qu) __A);
199   return vec_all_eq (vec_and ((__v16qu) __notA, (__v16qu) __B), __zero);
200 }
201 
202 extern __inline int
203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_testnzc_si128(__m128i __A,__m128i __B)204 _mm_testnzc_si128 (__m128i __A, __m128i __B)
205 {
206   /* Note: This implementation does NOT set "zero" or "carry" flags.  */
207   return _mm_testz_si128 (__A, __B) == 0 && _mm_testc_si128 (__A, __B) == 0;
208 }
209 
210 #define _mm_test_all_zeros(M, V) _mm_testz_si128 ((M), (V))
211 
212 #define _mm_test_all_ones(V) \
213   _mm_testc_si128 ((V), _mm_cmpeq_epi32 ((V), (V)))
214 
215 #define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128 ((M), (V))
216 
217 extern __inline __m128d
218 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_ceil_pd(__m128d __A)219 _mm_ceil_pd (__m128d __A)
220 {
221   return (__m128d) vec_ceil ((__v2df) __A);
222 }
223 
224 extern __inline __m128d
225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_ceil_sd(__m128d __A,__m128d __B)226 _mm_ceil_sd (__m128d __A, __m128d __B)
227 {
228   __v2df __r = vec_ceil ((__v2df) __B);
229   __r[1] = ((__v2df) __A)[1];
230   return (__m128d) __r;
231 }
232 
233 extern __inline __m128d
234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_floor_pd(__m128d __A)235 _mm_floor_pd (__m128d __A)
236 {
237   return (__m128d) vec_floor ((__v2df) __A);
238 }
239 
240 extern __inline __m128d
241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_floor_sd(__m128d __A,__m128d __B)242 _mm_floor_sd (__m128d __A, __m128d __B)
243 {
244   __v2df __r = vec_floor ((__v2df) __B);
245   __r[1] = ((__v2df) __A)[1];
246   return (__m128d) __r;
247 }
248 
249 extern __inline __m128
250 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_ceil_ps(__m128 __A)251 _mm_ceil_ps (__m128 __A)
252 {
253   return (__m128) vec_ceil ((__v4sf) __A);
254 }
255 
256 extern __inline __m128
257 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_ceil_ss(__m128 __A,__m128 __B)258 _mm_ceil_ss (__m128 __A, __m128 __B)
259 {
260   __v4sf __r = (__v4sf) __A;
261   __r[0] = __builtin_ceil (((__v4sf) __B)[0]);
262   return __r;
263 }
264 
265 extern __inline __m128
266 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_floor_ps(__m128 __A)267 _mm_floor_ps (__m128 __A)
268 {
269   return (__m128) vec_floor ((__v4sf) __A);
270 }
271 
272 extern __inline __m128
273 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_floor_ss(__m128 __A,__m128 __B)274 _mm_floor_ss (__m128 __A, __m128 __B)
275 {
276   __v4sf __r = (__v4sf) __A;
277   __r[0] = __builtin_floor (((__v4sf) __B)[0]);
278   return __r;
279 }
280 
281 #ifdef _ARCH_PWR8
282 extern __inline __m128i
283 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_epi64(__m128i __X,__m128i __Y)284 _mm_cmpeq_epi64 (__m128i __X, __m128i __Y)
285 {
286   return (__m128i) vec_cmpeq ((__v2di) __X, (__v2di) __Y);
287 }
288 #endif
289 
290 extern __inline __m128i
291 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_epi8(__m128i __X,__m128i __Y)292 _mm_min_epi8 (__m128i __X, __m128i __Y)
293 {
294   return (__m128i) vec_min ((__v16qi)__X, (__v16qi)__Y);
295 }
296 
297 extern __inline __m128i
298 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_epu16(__m128i __X,__m128i __Y)299 _mm_min_epu16 (__m128i __X, __m128i __Y)
300 {
301   return (__m128i) vec_min ((__v8hu)__X, (__v8hu)__Y);
302 }
303 
304 extern __inline __m128i
305 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_epi32(__m128i __X,__m128i __Y)306 _mm_min_epi32 (__m128i __X, __m128i __Y)
307 {
308   return (__m128i) vec_min ((__v4si)__X, (__v4si)__Y);
309 }
310 
311 extern __inline __m128i
312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_min_epu32(__m128i __X,__m128i __Y)313 _mm_min_epu32 (__m128i __X, __m128i __Y)
314 {
315   return (__m128i) vec_min ((__v4su)__X, (__v4su)__Y);
316 }
317 
318 extern __inline __m128i
319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_epi8(__m128i __X,__m128i __Y)320 _mm_max_epi8 (__m128i __X, __m128i __Y)
321 {
322   return (__m128i) vec_max ((__v16qi)__X, (__v16qi)__Y);
323 }
324 
325 extern __inline __m128i
326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_epu16(__m128i __X,__m128i __Y)327 _mm_max_epu16 (__m128i __X, __m128i __Y)
328 {
329   return (__m128i) vec_max ((__v8hu)__X, (__v8hu)__Y);
330 }
331 
332 extern __inline __m128i
333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_epi32(__m128i __X,__m128i __Y)334 _mm_max_epi32 (__m128i __X, __m128i __Y)
335 {
336   return (__m128i) vec_max ((__v4si)__X, (__v4si)__Y);
337 }
338 
339 extern __inline __m128i
340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_max_epu32(__m128i __X,__m128i __Y)341 _mm_max_epu32 (__m128i __X, __m128i __Y)
342 {
343   return (__m128i) vec_max ((__v4su)__X, (__v4su)__Y);
344 }
345 
346 extern __inline __m128i
347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mullo_epi32(__m128i __X,__m128i __Y)348 _mm_mullo_epi32 (__m128i __X, __m128i __Y)
349 {
350   return (__m128i) vec_mul ((__v4su) __X, (__v4su) __Y);
351 }
352 
353 #ifdef _ARCH_PWR8
354 extern __inline __m128i
355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mul_epi32(__m128i __X,__m128i __Y)356 _mm_mul_epi32 (__m128i __X, __m128i __Y)
357 {
358   return (__m128i) vec_mule ((__v4si) __X, (__v4si) __Y);
359 }
360 #endif
361 
362 extern __inline __m128i
363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepi8_epi16(__m128i __A)364 _mm_cvtepi8_epi16 (__m128i __A)
365 {
366   return (__m128i) vec_unpackh ((__v16qi) __A);
367 }
368 
369 extern __inline __m128i
370 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepi8_epi32(__m128i __A)371 _mm_cvtepi8_epi32 (__m128i __A)
372 {
373   __A = (__m128i) vec_unpackh ((__v16qi) __A);
374   return (__m128i) vec_unpackh ((__v8hi) __A);
375 }
376 
377 #ifdef _ARCH_PWR8
378 extern __inline __m128i
379 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepi8_epi64(__m128i __A)380 _mm_cvtepi8_epi64 (__m128i __A)
381 {
382   __A = (__m128i) vec_unpackh ((__v16qi) __A);
383   __A = (__m128i) vec_unpackh ((__v8hi) __A);
384   return (__m128i) vec_unpackh ((__v4si) __A);
385 }
386 #endif
387 
388 extern __inline __m128i
389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepi16_epi32(__m128i __A)390 _mm_cvtepi16_epi32 (__m128i __A)
391 {
392   return (__m128i) vec_unpackh ((__v8hi) __A);
393 }
394 
395 #ifdef _ARCH_PWR8
396 extern __inline __m128i
397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepi16_epi64(__m128i __A)398 _mm_cvtepi16_epi64 (__m128i __A)
399 {
400   __A = (__m128i) vec_unpackh ((__v8hi) __A);
401   return (__m128i) vec_unpackh ((__v4si) __A);
402 }
403 #endif
404 
405 #ifdef _ARCH_PWR8
406 extern __inline __m128i
407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepi32_epi64(__m128i __A)408 _mm_cvtepi32_epi64 (__m128i __A)
409 {
410   return (__m128i) vec_unpackh ((__v4si) __A);
411 }
412 #endif
413 
414 extern __inline __m128i
415 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepu8_epi16(__m128i __A)416 _mm_cvtepu8_epi16 (__m128i __A)
417 {
418   const __v16qu __zero = {0};
419 #ifdef __LITTLE_ENDIAN__
420   __A = (__m128i) vec_mergeh ((__v16qu) __A, __zero);
421 #else /* __BIG_ENDIAN__.  */
422   __A = (__m128i) vec_mergeh (__zero, (__v16qu) __A);
423 #endif /* __BIG_ENDIAN__.  */
424   return __A;
425 }
426 
427 extern __inline __m128i
428 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepu8_epi32(__m128i __A)429 _mm_cvtepu8_epi32 (__m128i __A)
430 {
431   const __v16qu __zero = {0};
432 #ifdef __LITTLE_ENDIAN__
433   __A = (__m128i) vec_mergeh ((__v16qu) __A, __zero);
434   __A = (__m128i) vec_mergeh ((__v8hu) __A, (__v8hu) __zero);
435 #else /* __BIG_ENDIAN__.  */
436   __A = (__m128i) vec_mergeh (__zero, (__v16qu) __A);
437   __A = (__m128i) vec_mergeh ((__v8hu) __zero, (__v8hu) __A);
438 #endif /* __BIG_ENDIAN__.  */
439   return __A;
440 }
441 
442 extern __inline __m128i
443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepu8_epi64(__m128i __A)444 _mm_cvtepu8_epi64 (__m128i __A)
445 {
446   const __v16qu __zero = {0};
447 #ifdef __LITTLE_ENDIAN__
448   __A = (__m128i) vec_mergeh ((__v16qu) __A, __zero);
449   __A = (__m128i) vec_mergeh ((__v8hu) __A, (__v8hu) __zero);
450   __A = (__m128i) vec_mergeh ((__v4su) __A, (__v4su) __zero);
451 #else /* __BIG_ENDIAN__.  */
452   __A = (__m128i) vec_mergeh (__zero, (__v16qu) __A);
453   __A = (__m128i) vec_mergeh ((__v8hu) __zero, (__v8hu) __A);
454   __A = (__m128i) vec_mergeh ((__v4su) __zero, (__v4su) __A);
455 #endif /* __BIG_ENDIAN__.  */
456   return __A;
457 }
458 
459 extern __inline __m128i
460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepu16_epi32(__m128i __A)461 _mm_cvtepu16_epi32 (__m128i __A)
462 {
463   const __v8hu __zero = {0};
464 #ifdef __LITTLE_ENDIAN__
465   __A = (__m128i) vec_mergeh ((__v8hu) __A, __zero);
466 #else /* __BIG_ENDIAN__.  */
467   __A = (__m128i) vec_mergeh (__zero, (__v8hu) __A);
468 #endif /* __BIG_ENDIAN__.  */
469   return __A;
470 }
471 
472 extern __inline __m128i
473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepu16_epi64(__m128i __A)474 _mm_cvtepu16_epi64 (__m128i __A)
475 {
476   const __v8hu __zero = {0};
477 #ifdef __LITTLE_ENDIAN__
478   __A = (__m128i) vec_mergeh ((__v8hu) __A, __zero);
479   __A = (__m128i) vec_mergeh ((__v4su) __A, (__v4su) __zero);
480 #else /* __BIG_ENDIAN__.  */
481   __A = (__m128i) vec_mergeh (__zero, (__v8hu) __A);
482   __A = (__m128i) vec_mergeh ((__v4su) __zero, (__v4su) __A);
483 #endif /* __BIG_ENDIAN__.  */
484   return __A;
485 }
486 
487 extern __inline __m128i
488 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtepu32_epi64(__m128i __A)489 _mm_cvtepu32_epi64 (__m128i __A)
490 {
491   const __v4su __zero = {0};
492 #ifdef __LITTLE_ENDIAN__
493   __A = (__m128i) vec_mergeh ((__v4su) __A, __zero);
494 #else /* __BIG_ENDIAN__.  */
495   __A = (__m128i) vec_mergeh (__zero, (__v4su) __A);
496 #endif /* __BIG_ENDIAN__.  */
497   return __A;
498 }
499 
500 /* Return horizontal packed word minimum and its index in bits [15:0]
501    and bits [18:16] respectively.  */
502 extern __inline __m128i
503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_minpos_epu16(__m128i __A)504 _mm_minpos_epu16 (__m128i __A)
505 {
506   union __u
507     {
508       __m128i __m;
509       __v8hu __uh;
510     };
511   union __u __u = { .__m = __A }, __r = { .__m = {0} };
512   unsigned short __ridx = 0;
513   unsigned short __rmin = __u.__uh[__ridx];
514   for (unsigned long __i = 1; __i < 8; __i++)
515     {
516       if (__u.__uh[__i] < __rmin)
517 	{
518 	  __rmin = __u.__uh[__i];
519 	  __ridx = __i;
520 	}
521     }
522   __r.__uh[0] = __rmin;
523   __r.__uh[1] = __ridx;
524   return __r.__m;
525 }
526 
527 extern __inline __m128i
528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_packus_epi32(__m128i __X,__m128i __Y)529 _mm_packus_epi32 (__m128i __X, __m128i __Y)
530 {
531   return (__m128i) vec_packsu ((__v4si) __X, (__v4si) __Y);
532 }
533 
534 #ifdef _ARCH_PWR8
535 extern __inline __m128i
536 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_epi64(__m128i __X,__m128i __Y)537 _mm_cmpgt_epi64 (__m128i __X, __m128i __Y)
538 {
539   return (__m128i) vec_cmpgt ((__v2di) __X, (__v2di) __Y);
540 }
541 #endif
542 
543 #endif
544