1 /*===---- tmmintrin.h - Implementation of SSSE3 intrinsics on PowerPC ------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10 /* Implemented from the specification included in the Intel C++ Compiler
11 User Guide and Reference, version 9.0. */
12
13 #ifndef NO_WARN_X86_INTRINSICS
14 /* This header is distributed to simplify porting x86_64 code that
15 makes explicit use of Intel intrinsics to powerpc64le.
16
17 It is the user's responsibility to determine if the results are
18 acceptable and make additional changes as necessary.
19
20 Note that much code that uses Intel intrinsics can be rewritten in
21 standard C or GNU C extensions, which are more portable and better
22 optimized across multiple targets. */
23 #endif
24
25 #ifndef TMMINTRIN_H_
26 #define TMMINTRIN_H_
27
28 #if defined(__linux__) && defined(__ppc64__)
29
30 #include <altivec.h>
31
32 /* We need definitions from the SSE header files. */
33 #include <pmmintrin.h>
34
35 extern __inline __m128i
36 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_abs_epi16(__m128i __A)37 _mm_abs_epi16 (__m128i __A)
38 {
39 return (__m128i) vec_abs ((__v8hi) __A);
40 }
41
42 extern __inline __m128i
43 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_abs_epi32(__m128i __A)44 _mm_abs_epi32 (__m128i __A)
45 {
46 return (__m128i) vec_abs ((__v4si) __A);
47 }
48
49 extern __inline __m128i
50 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_abs_epi8(__m128i __A)51 _mm_abs_epi8 (__m128i __A)
52 {
53 return (__m128i) vec_abs ((__v16qi) __A);
54 }
55
56 extern __inline __m64
57 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_abs_pi16(__m64 __A)58 _mm_abs_pi16 (__m64 __A)
59 {
60 __v8hi __B = (__v8hi) (__v2du) { __A, __A };
61 return (__m64) ((__v2du) vec_abs (__B))[0];
62 }
63
64 extern __inline __m64
65 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_abs_pi32(__m64 __A)66 _mm_abs_pi32 (__m64 __A)
67 {
68 __v4si __B = (__v4si) (__v2du) { __A, __A };
69 return (__m64) ((__v2du) vec_abs (__B))[0];
70 }
71
72 extern __inline __m64
73 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_abs_pi8(__m64 __A)74 _mm_abs_pi8 (__m64 __A)
75 {
76 __v16qi __B = (__v16qi) (__v2du) { __A, __A };
77 return (__m64) ((__v2du) vec_abs (__B))[0];
78 }
79
80 extern __inline __m128i
81 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_alignr_epi8(__m128i __A,__m128i __B,const unsigned int __count)82 _mm_alignr_epi8 (__m128i __A, __m128i __B, const unsigned int __count)
83 {
84 if (__builtin_constant_p (__count) && __count < 16)
85 {
86 #ifdef __LITTLE_ENDIAN__
87 __A = (__m128i) vec_reve ((__v16qu) __A);
88 __B = (__m128i) vec_reve ((__v16qu) __B);
89 #endif
90 __A = (__m128i) vec_sld ((__v16qu) __B, (__v16qu) __A, __count);
91 #ifdef __LITTLE_ENDIAN__
92 __A = (__m128i) vec_reve ((__v16qu) __A);
93 #endif
94 return __A;
95 }
96
97 if (__count == 0)
98 return __B;
99
100 if (__count >= 16)
101 {
102 if (__count >= 32)
103 {
104 const __v16qu zero = { 0 };
105 return (__m128i) zero;
106 }
107 else
108 {
109 const __v16qu __shift =
110 vec_splats ((unsigned char) ((__count - 16) * 8));
111 #ifdef __LITTLE_ENDIAN__
112 return (__m128i) vec_sro ((__v16qu) __A, __shift);
113 #else
114 return (__m128i) vec_slo ((__v16qu) __A, __shift);
115 #endif
116 }
117 }
118 else
119 {
120 const __v16qu __shiftA =
121 vec_splats ((unsigned char) ((16 - __count) * 8));
122 const __v16qu __shiftB = vec_splats ((unsigned char) (__count * 8));
123 #ifdef __LITTLE_ENDIAN__
124 __A = (__m128i) vec_slo ((__v16qu) __A, __shiftA);
125 __B = (__m128i) vec_sro ((__v16qu) __B, __shiftB);
126 #else
127 __A = (__m128i) vec_sro ((__v16qu) __A, __shiftA);
128 __B = (__m128i) vec_slo ((__v16qu) __B, __shiftB);
129 #endif
130 return (__m128i) vec_or ((__v16qu) __A, (__v16qu) __B);
131 }
132 }
133
134 extern __inline __m64
135 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_alignr_pi8(__m64 __A,__m64 __B,unsigned int __count)136 _mm_alignr_pi8 (__m64 __A, __m64 __B, unsigned int __count)
137 {
138 if (__count < 16)
139 {
140 __v2du __C = { __B, __A };
141 #ifdef __LITTLE_ENDIAN__
142 const __v4su __shift = { __count << 3, 0, 0, 0 };
143 __C = (__v2du) vec_sro ((__v16qu) __C, (__v16qu) __shift);
144 #else
145 const __v4su __shift = { 0, 0, 0, __count << 3 };
146 __C = (__v2du) vec_slo ((__v16qu) __C, (__v16qu) __shift);
147 #endif
148 return (__m64) __C[0];
149 }
150 else
151 {
152 const __m64 __zero = { 0 };
153 return __zero;
154 }
155 }
156
157 extern __inline __m128i
158 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadd_epi16(__m128i __A,__m128i __B)159 _mm_hadd_epi16 (__m128i __A, __m128i __B)
160 {
161 const __v16qu __P =
162 { 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 };
163 const __v16qu __Q =
164 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 };
165 __v8hi __C = vec_perm ((__v8hi) __A, (__v8hi) __B, __P);
166 __v8hi __D = vec_perm ((__v8hi) __A, (__v8hi) __B, __Q);
167 return (__m128i) vec_add (__C, __D);
168 }
169
170 extern __inline __m128i
171 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadd_epi32(__m128i __A,__m128i __B)172 _mm_hadd_epi32 (__m128i __A, __m128i __B)
173 {
174 const __v16qu __P =
175 { 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 };
176 const __v16qu __Q =
177 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 };
178 __v4si __C = vec_perm ((__v4si) __A, (__v4si) __B, __P);
179 __v4si __D = vec_perm ((__v4si) __A, (__v4si) __B, __Q);
180 return (__m128i) vec_add (__C, __D);
181 }
182
183 extern __inline __m64
184 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadd_pi16(__m64 __A,__m64 __B)185 _mm_hadd_pi16 (__m64 __A, __m64 __B)
186 {
187 __v8hi __C = (__v8hi) (__v2du) { __A, __B };
188 const __v16qu __P =
189 { 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13 };
190 const __v16qu __Q =
191 { 2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15 };
192 __v8hi __D = vec_perm (__C, __C, __Q);
193 __C = vec_perm (__C, __C, __P);
194 __C = vec_add (__C, __D);
195 return (__m64) ((__v2du) __C)[1];
196 }
197
198 extern __inline __m64
199 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadd_pi32(__m64 __A,__m64 __B)200 _mm_hadd_pi32 (__m64 __A, __m64 __B)
201 {
202 __v4si __C = (__v4si) (__v2du) { __A, __B };
203 const __v16qu __P =
204 { 0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 8, 9, 10, 11 };
205 const __v16qu __Q =
206 { 4, 5, 6, 7, 12, 13, 14, 15, 4, 5, 6, 7, 12, 13, 14, 15 };
207 __v4si __D = vec_perm (__C, __C, __Q);
208 __C = vec_perm (__C, __C, __P);
209 __C = vec_add (__C, __D);
210 return (__m64) ((__v2du) __C)[1];
211 }
212
213 extern __inline __m128i
214 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadds_epi16(__m128i __A,__m128i __B)215 _mm_hadds_epi16 (__m128i __A, __m128i __B)
216 {
217 __v4si __C = { 0 }, __D = { 0 };
218 __C = vec_sum4s ((__v8hi) __A, __C);
219 __D = vec_sum4s ((__v8hi) __B, __D);
220 __C = (__v4si) vec_packs (__C, __D);
221 return (__m128i) __C;
222 }
223
224 extern __inline __m64
225 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadds_pi16(__m64 __A,__m64 __B)226 _mm_hadds_pi16 (__m64 __A, __m64 __B)
227 {
228 const __v4si __zero = { 0 };
229 __v8hi __C = (__v8hi) (__v2du) { __A, __B };
230 __v4si __D = vec_sum4s (__C, __zero);
231 __C = vec_packs (__D, __D);
232 return (__m64) ((__v2du) __C)[1];
233 }
234
235 extern __inline __m128i
236 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsub_epi16(__m128i __A,__m128i __B)237 _mm_hsub_epi16 (__m128i __A, __m128i __B)
238 {
239 const __v16qu __P =
240 { 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 };
241 const __v16qu __Q =
242 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 };
243 __v8hi __C = vec_perm ((__v8hi) __A, (__v8hi) __B, __P);
244 __v8hi __D = vec_perm ((__v8hi) __A, (__v8hi) __B, __Q);
245 return (__m128i) vec_sub (__C, __D);
246 }
247
248 extern __inline __m128i
249 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsub_epi32(__m128i __A,__m128i __B)250 _mm_hsub_epi32 (__m128i __A, __m128i __B)
251 {
252 const __v16qu __P =
253 { 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 };
254 const __v16qu __Q =
255 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 };
256 __v4si __C = vec_perm ((__v4si) __A, (__v4si) __B, __P);
257 __v4si __D = vec_perm ((__v4si) __A, (__v4si) __B, __Q);
258 return (__m128i) vec_sub (__C, __D);
259 }
260
261 extern __inline __m64
262 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsub_pi16(__m64 __A,__m64 __B)263 _mm_hsub_pi16 (__m64 __A, __m64 __B)
264 {
265 const __v16qu __P =
266 { 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13 };
267 const __v16qu __Q =
268 { 2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15 };
269 __v8hi __C = (__v8hi) (__v2du) { __A, __B };
270 __v8hi __D = vec_perm (__C, __C, __Q);
271 __C = vec_perm (__C, __C, __P);
272 __C = vec_sub (__C, __D);
273 return (__m64) ((__v2du) __C)[1];
274 }
275
276 extern __inline __m64
277 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsub_pi32(__m64 __A,__m64 __B)278 _mm_hsub_pi32 (__m64 __A, __m64 __B)
279 {
280 const __v16qu __P =
281 { 0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 8, 9, 10, 11 };
282 const __v16qu __Q =
283 { 4, 5, 6, 7, 12, 13, 14, 15, 4, 5, 6, 7, 12, 13, 14, 15 };
284 __v4si __C = (__v4si) (__v2du) { __A, __B };
285 __v4si __D = vec_perm (__C, __C, __Q);
286 __C = vec_perm (__C, __C, __P);
287 __C = vec_sub (__C, __D);
288 return (__m64) ((__v2du) __C)[1];
289 }
290
291 extern __inline __m128i
292 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsubs_epi16(__m128i __A,__m128i __B)293 _mm_hsubs_epi16 (__m128i __A, __m128i __B)
294 {
295 const __v16qu __P =
296 { 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 };
297 const __v16qu __Q =
298 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 };
299 __v8hi __C = vec_perm ((__v8hi) __A, (__v8hi) __B, __P);
300 __v8hi __D = vec_perm ((__v8hi) __A, (__v8hi) __B, __Q);
301 return (__m128i) vec_subs (__C, __D);
302 }
303
304 extern __inline __m64
305 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hsubs_pi16(__m64 __A,__m64 __B)306 _mm_hsubs_pi16 (__m64 __A, __m64 __B)
307 {
308 const __v16qu __P =
309 { 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13 };
310 const __v16qu __Q =
311 { 2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15 };
312 __v8hi __C = (__v8hi) (__v2du) { __A, __B };
313 __v8hi __D = vec_perm (__C, __C, __P);
314 __v8hi __E = vec_perm (__C, __C, __Q);
315 __C = vec_subs (__D, __E);
316 return (__m64) ((__v2du) __C)[1];
317 }
318
319 extern __inline __m128i
320 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shuffle_epi8(__m128i __A,__m128i __B)321 _mm_shuffle_epi8 (__m128i __A, __m128i __B)
322 {
323 const __v16qi __zero = { 0 };
324 __vector __bool char __select = vec_cmplt ((__v16qi) __B, __zero);
325 __v16qi __C = vec_perm ((__v16qi) __A, (__v16qi) __A, (__v16qu) __B);
326 return (__m128i) vec_sel (__C, __zero, __select);
327 }
328
329 extern __inline __m64
330 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shuffle_pi8(__m64 __A,__m64 __B)331 _mm_shuffle_pi8 (__m64 __A, __m64 __B)
332 {
333 const __v16qi __zero = { 0 };
334 __v16qi __C = (__v16qi) (__v2du) { __A, __A };
335 __v16qi __D = (__v16qi) (__v2du) { __B, __B };
336 __vector __bool char __select = vec_cmplt ((__v16qi) __D, __zero);
337 __C = vec_perm ((__v16qi) __C, (__v16qi) __C, (__v16qu) __D);
338 __C = vec_sel (__C, __zero, __select);
339 return (__m64) ((__v2du) (__C))[0];
340 }
341
342 extern __inline __m128i
343 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_epi8(__m128i __A,__m128i __B)344 _mm_sign_epi8 (__m128i __A, __m128i __B)
345 {
346 const __v16qi __zero = { 0 };
347 __v16qi __selectneg = (__v16qi) vec_cmplt ((__v16qi) __B, __zero);
348 __v16qi __selectpos =
349 (__v16qi) vec_neg ((__v16qi) vec_cmpgt ((__v16qi) __B, __zero));
350 __v16qi __conv = vec_add (__selectneg, __selectpos);
351 return (__m128i) vec_mul ((__v16qi) __A, (__v16qi) __conv);
352 }
353
354 extern __inline __m128i
355 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_epi16(__m128i __A,__m128i __B)356 _mm_sign_epi16 (__m128i __A, __m128i __B)
357 {
358 const __v8hi __zero = { 0 };
359 __v8hi __selectneg = (__v8hi) vec_cmplt ((__v8hi) __B, __zero);
360 __v8hi __selectpos =
361 (__v8hi) vec_neg ((__v8hi) vec_cmpgt ((__v8hi) __B, __zero));
362 __v8hi __conv = vec_add (__selectneg, __selectpos);
363 return (__m128i) vec_mul ((__v8hi) __A, (__v8hi) __conv);
364 }
365
366 extern __inline __m128i
367 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_epi32(__m128i __A,__m128i __B)368 _mm_sign_epi32 (__m128i __A, __m128i __B)
369 {
370 const __v4si __zero = { 0 };
371 __v4si __selectneg = (__v4si) vec_cmplt ((__v4si) __B, __zero);
372 __v4si __selectpos =
373 (__v4si) vec_neg ((__v4si) vec_cmpgt ((__v4si) __B, __zero));
374 __v4si __conv = vec_add (__selectneg, __selectpos);
375 return (__m128i) vec_mul ((__v4si) __A, (__v4si) __conv);
376 }
377
378 extern __inline __m64
379 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_pi8(__m64 __A,__m64 __B)380 _mm_sign_pi8 (__m64 __A, __m64 __B)
381 {
382 const __v16qi __zero = { 0 };
383 __v16qi __C = (__v16qi) (__v2du) { __A, __A };
384 __v16qi __D = (__v16qi) (__v2du) { __B, __B };
385 __C = (__v16qi) _mm_sign_epi8 ((__m128i) __C, (__m128i) __D);
386 return (__m64) ((__v2du) (__C))[0];
387 }
388
389 extern __inline __m64
390 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_pi16(__m64 __A,__m64 __B)391 _mm_sign_pi16 (__m64 __A, __m64 __B)
392 {
393 const __v8hi __zero = { 0 };
394 __v8hi __C = (__v8hi) (__v2du) { __A, __A };
395 __v8hi __D = (__v8hi) (__v2du) { __B, __B };
396 __C = (__v8hi) _mm_sign_epi16 ((__m128i) __C, (__m128i) __D);
397 return (__m64) ((__v2du) (__C))[0];
398 }
399
400 extern __inline __m64
401 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sign_pi32(__m64 __A,__m64 __B)402 _mm_sign_pi32 (__m64 __A, __m64 __B)
403 {
404 const __v4si __zero = { 0 };
405 __v4si __C = (__v4si) (__v2du) { __A, __A };
406 __v4si __D = (__v4si) (__v2du) { __B, __B };
407 __C = (__v4si) _mm_sign_epi32 ((__m128i) __C, (__m128i) __D);
408 return (__m64) ((__v2du) (__C))[0];
409 }
410
411 extern __inline __m128i
412 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maddubs_epi16(__m128i __A,__m128i __B)413 _mm_maddubs_epi16 (__m128i __A, __m128i __B)
414 {
415 __v8hi __unsigned = vec_splats ((signed short) 0x00ff);
416 __v8hi __C = vec_and (vec_unpackh ((__v16qi) __A), __unsigned);
417 __v8hi __D = vec_and (vec_unpackl ((__v16qi) __A), __unsigned);
418 __v8hi __E = vec_unpackh ((__v16qi) __B);
419 __v8hi __F = vec_unpackl ((__v16qi) __B);
420 __C = vec_mul (__C, __E);
421 __D = vec_mul (__D, __F);
422 const __v16qu __odds =
423 { 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 };
424 const __v16qu __evens =
425 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 };
426 __E = vec_perm (__C, __D, __odds);
427 __F = vec_perm (__C, __D, __evens);
428 return (__m128i) vec_adds (__E, __F);
429 }
430
431 extern __inline __m64
432 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maddubs_pi16(__m64 __A,__m64 __B)433 _mm_maddubs_pi16 (__m64 __A, __m64 __B)
434 {
435 __v8hi __C = (__v8hi) (__v2du) { __A, __A };
436 __C = vec_unpackl ((__v16qi) __C);
437 const __v8hi __unsigned = vec_splats ((signed short) 0x00ff);
438 __C = vec_and (__C, __unsigned);
439 __v8hi __D = (__v8hi) (__v2du) { __B, __B };
440 __D = vec_unpackl ((__v16qi) __D);
441 __D = vec_mul (__C, __D);
442 const __v16qu __odds =
443 { 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 };
444 const __v16qu __evens =
445 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 };
446 __C = vec_perm (__D, __D, __odds);
447 __D = vec_perm (__D, __D, __evens);
448 __C = vec_adds (__C, __D);
449 return (__m64) ((__v2du) (__C))[0];
450 }
451
452 extern __inline __m128i
453 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mulhrs_epi16(__m128i __A,__m128i __B)454 _mm_mulhrs_epi16 (__m128i __A, __m128i __B)
455 {
456 __v4si __C = vec_unpackh ((__v8hi) __A);
457 __v4si __D = vec_unpackh ((__v8hi) __B);
458 __C = vec_mul (__C, __D);
459 __D = vec_unpackl ((__v8hi) __A);
460 __v4si __E = vec_unpackl ((__v8hi) __B);
461 __D = vec_mul (__D, __E);
462 const __v4su __shift = vec_splats ((unsigned int) 14);
463 __C = vec_sr (__C, __shift);
464 __D = vec_sr (__D, __shift);
465 const __v4si __ones = vec_splats ((signed int) 1);
466 __C = vec_add (__C, __ones);
467 __C = vec_sr (__C, (__v4su) __ones);
468 __D = vec_add (__D, __ones);
469 __D = vec_sr (__D, (__v4su) __ones);
470 return (__m128i) vec_pack (__C, __D);
471 }
472
473 extern __inline __m64
474 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mulhrs_pi16(__m64 __A,__m64 __B)475 _mm_mulhrs_pi16 (__m64 __A, __m64 __B)
476 {
477 __v4si __C = (__v4si) (__v2du) { __A, __A };
478 __C = vec_unpackh ((__v8hi) __C);
479 __v4si __D = (__v4si) (__v2du) { __B, __B };
480 __D = vec_unpackh ((__v8hi) __D);
481 __C = vec_mul (__C, __D);
482 const __v4su __shift = vec_splats ((unsigned int) 14);
483 __C = vec_sr (__C, __shift);
484 const __v4si __ones = vec_splats ((signed int) 1);
485 __C = vec_add (__C, __ones);
486 __C = vec_sr (__C, (__v4su) __ones);
487 __v8hi __E = vec_pack (__C, __D);
488 return (__m64) ((__v2du) (__E))[0];
489 }
490
491 #else
492 #include_next <tmmintrin.h>
493 #endif /* defined(__linux__) && defined(__ppc64__) */
494
495 #endif /* TMMINTRIN_H_ */
496