1 /* Copyright (C) 2013-2018 Free Software Foundation, Inc. 2 3 This file is part of GCC. 4 5 GCC is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 3, or (at your option) 8 any later version. 9 10 GCC is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 Under Section 7 of GPL version 3, you are granted additional 16 permissions described in the GCC Runtime Library Exception, version 17 3.1, as published by the Free Software Foundation. 18 19 You should have received a copy of the GNU General Public License and 20 a copy of the GCC Runtime Library Exception along with this program; 21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 <http://www.gnu.org/licenses/>. */ 23 24 #ifndef _IMMINTRIN_H_INCLUDED 25 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead." 26 #endif 27 28 #ifndef _AVX512FINTRIN_H_INCLUDED 29 #define _AVX512FINTRIN_H_INCLUDED 30 31 #ifndef __AVX512F__ 32 #pragma GCC push_options 33 #pragma GCC target("avx512f") 34 #define __DISABLE_AVX512F__ 35 #endif /* __AVX512F__ */ 36 37 /* Internal data types for implementing the intrinsics. */ 38 typedef double __v8df __attribute__ ((__vector_size__ (64))); 39 typedef float __v16sf __attribute__ ((__vector_size__ (64))); 40 typedef long long __v8di __attribute__ ((__vector_size__ (64))); 41 typedef unsigned long long __v8du __attribute__ ((__vector_size__ (64))); 42 typedef int __v16si __attribute__ ((__vector_size__ (64))); 43 typedef unsigned int __v16su __attribute__ ((__vector_size__ (64))); 44 typedef short __v32hi __attribute__ ((__vector_size__ (64))); 45 typedef unsigned short __v32hu __attribute__ ((__vector_size__ (64))); 46 typedef char __v64qi __attribute__ ((__vector_size__ (64))); 47 typedef unsigned char __v64qu __attribute__ ((__vector_size__ (64))); 48 49 /* The Intel API is flexible enough that we must allow aliasing with other 50 vector types, and their scalar components. */ 51 typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__)); 52 typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__)); 53 typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__)); 54 55 /* Unaligned version of the same type. */ 56 typedef float __m512_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1))); 57 typedef long long __m512i_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1))); 58 typedef double __m512d_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1))); 59 60 typedef unsigned char __mmask8; 61 typedef unsigned short __mmask16; 62 63 extern __inline __mmask16 64 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 65 _mm512_int2mask (int __M) 66 { 67 return (__mmask16) __M; 68 } 69 70 extern __inline int 71 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 72 _mm512_mask2int (__mmask16 __M) 73 { 74 return (int) __M; 75 } 76 77 extern __inline __m512i 78 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 79 _mm512_set_epi64 (long long __A, long long __B, long long __C, 80 long long __D, long long __E, long long __F, 81 long long __G, long long __H) 82 { 83 return __extension__ (__m512i) (__v8di) 84 { __H, __G, __F, __E, __D, __C, __B, __A }; 85 } 86 87 /* Create the vector [A B C D E F G H I J K L M N O P]. */ 88 extern __inline __m512i 89 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 90 _mm512_set_epi32 (int __A, int __B, int __C, int __D, 91 int __E, int __F, int __G, int __H, 92 int __I, int __J, int __K, int __L, 93 int __M, int __N, int __O, int __P) 94 { 95 return __extension__ (__m512i)(__v16si) 96 { __P, __O, __N, __M, __L, __K, __J, __I, 97 __H, __G, __F, __E, __D, __C, __B, __A }; 98 } 99 100 extern __inline __m512d 101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 102 _mm512_set_pd (double __A, double __B, double __C, double __D, 103 double __E, double __F, double __G, double __H) 104 { 105 return __extension__ (__m512d) 106 { __H, __G, __F, __E, __D, __C, __B, __A }; 107 } 108 109 extern __inline __m512 110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 111 _mm512_set_ps (float __A, float __B, float __C, float __D, 112 float __E, float __F, float __G, float __H, 113 float __I, float __J, float __K, float __L, 114 float __M, float __N, float __O, float __P) 115 { 116 return __extension__ (__m512) 117 { __P, __O, __N, __M, __L, __K, __J, __I, 118 __H, __G, __F, __E, __D, __C, __B, __A }; 119 } 120 121 #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \ 122 _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0) 123 124 #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \ 125 e8,e9,e10,e11,e12,e13,e14,e15) \ 126 _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0) 127 128 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \ 129 _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0) 130 131 #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \ 132 _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0) 133 134 extern __inline __m512 135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 136 _mm512_undefined_ps (void) 137 { 138 __m512 __Y = __Y; 139 return __Y; 140 } 141 142 #define _mm512_undefined _mm512_undefined_ps 143 144 extern __inline __m512d 145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 146 _mm512_undefined_pd (void) 147 { 148 __m512d __Y = __Y; 149 return __Y; 150 } 151 152 extern __inline __m512i 153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 154 _mm512_undefined_epi32 (void) 155 { 156 __m512i __Y = __Y; 157 return __Y; 158 } 159 160 #define _mm512_undefined_si512 _mm512_undefined_epi32 161 162 extern __inline __m512i 163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 164 _mm512_set1_epi8 (char __A) 165 { 166 return __extension__ (__m512i)(__v64qi) 167 { __A, __A, __A, __A, __A, __A, __A, __A, 168 __A, __A, __A, __A, __A, __A, __A, __A, 169 __A, __A, __A, __A, __A, __A, __A, __A, 170 __A, __A, __A, __A, __A, __A, __A, __A, 171 __A, __A, __A, __A, __A, __A, __A, __A, 172 __A, __A, __A, __A, __A, __A, __A, __A, 173 __A, __A, __A, __A, __A, __A, __A, __A, 174 __A, __A, __A, __A, __A, __A, __A, __A }; 175 } 176 177 extern __inline __m512i 178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 179 _mm512_set1_epi16 (short __A) 180 { 181 return __extension__ (__m512i)(__v32hi) 182 { __A, __A, __A, __A, __A, __A, __A, __A, 183 __A, __A, __A, __A, __A, __A, __A, __A, 184 __A, __A, __A, __A, __A, __A, __A, __A, 185 __A, __A, __A, __A, __A, __A, __A, __A }; 186 } 187 188 extern __inline __m512d 189 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 190 _mm512_set1_pd (double __A) 191 { 192 return (__m512d) __builtin_ia32_broadcastsd512 (__extension__ 193 (__v2df) { __A, }, 194 (__v8df) 195 _mm512_undefined_pd (), 196 (__mmask8) -1); 197 } 198 199 extern __inline __m512 200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 201 _mm512_set1_ps (float __A) 202 { 203 return (__m512) __builtin_ia32_broadcastss512 (__extension__ 204 (__v4sf) { __A, }, 205 (__v16sf) 206 _mm512_undefined_ps (), 207 (__mmask16) -1); 208 } 209 210 /* Create the vector [A B C D A B C D A B C D A B C D]. */ 211 extern __inline __m512i 212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 213 _mm512_set4_epi32 (int __A, int __B, int __C, int __D) 214 { 215 return __extension__ (__m512i)(__v16si) 216 { __D, __C, __B, __A, __D, __C, __B, __A, 217 __D, __C, __B, __A, __D, __C, __B, __A }; 218 } 219 220 extern __inline __m512i 221 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 222 _mm512_set4_epi64 (long long __A, long long __B, long long __C, 223 long long __D) 224 { 225 return __extension__ (__m512i) (__v8di) 226 { __D, __C, __B, __A, __D, __C, __B, __A }; 227 } 228 229 extern __inline __m512d 230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 231 _mm512_set4_pd (double __A, double __B, double __C, double __D) 232 { 233 return __extension__ (__m512d) 234 { __D, __C, __B, __A, __D, __C, __B, __A }; 235 } 236 237 extern __inline __m512 238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 239 _mm512_set4_ps (float __A, float __B, float __C, float __D) 240 { 241 return __extension__ (__m512) 242 { __D, __C, __B, __A, __D, __C, __B, __A, 243 __D, __C, __B, __A, __D, __C, __B, __A }; 244 } 245 246 #define _mm512_setr4_epi64(e0,e1,e2,e3) \ 247 _mm512_set4_epi64(e3,e2,e1,e0) 248 249 #define _mm512_setr4_epi32(e0,e1,e2,e3) \ 250 _mm512_set4_epi32(e3,e2,e1,e0) 251 252 #define _mm512_setr4_pd(e0,e1,e2,e3) \ 253 _mm512_set4_pd(e3,e2,e1,e0) 254 255 #define _mm512_setr4_ps(e0,e1,e2,e3) \ 256 _mm512_set4_ps(e3,e2,e1,e0) 257 258 extern __inline __m512 259 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 260 _mm512_setzero_ps (void) 261 { 262 return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 263 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; 264 } 265 266 extern __inline __m512d 267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 268 _mm512_setzero_pd (void) 269 { 270 return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; 271 } 272 273 extern __inline __m512i 274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 275 _mm512_setzero_epi32 (void) 276 { 277 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; 278 } 279 280 extern __inline __m512i 281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 282 _mm512_setzero_si512 (void) 283 { 284 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; 285 } 286 287 extern __inline __m512d 288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 289 _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A) 290 { 291 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A, 292 (__v8df) __W, 293 (__mmask8) __U); 294 } 295 296 extern __inline __m512d 297 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 298 _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A) 299 { 300 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A, 301 (__v8df) 302 _mm512_setzero_pd (), 303 (__mmask8) __U); 304 } 305 306 extern __inline __m512 307 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 308 _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A) 309 { 310 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A, 311 (__v16sf) __W, 312 (__mmask16) __U); 313 } 314 315 extern __inline __m512 316 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 317 _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A) 318 { 319 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A, 320 (__v16sf) 321 _mm512_setzero_ps (), 322 (__mmask16) __U); 323 } 324 325 extern __inline __m512d 326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 327 _mm512_load_pd (void const *__P) 328 { 329 return *(__m512d *) __P; 330 } 331 332 extern __inline __m512d 333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 334 _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P) 335 { 336 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P, 337 (__v8df) __W, 338 (__mmask8) __U); 339 } 340 341 extern __inline __m512d 342 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 343 _mm512_maskz_load_pd (__mmask8 __U, void const *__P) 344 { 345 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P, 346 (__v8df) 347 _mm512_setzero_pd (), 348 (__mmask8) __U); 349 } 350 351 extern __inline void 352 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 353 _mm512_store_pd (void *__P, __m512d __A) 354 { 355 *(__m512d *) __P = __A; 356 } 357 358 extern __inline void 359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 360 _mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A) 361 { 362 __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A, 363 (__mmask8) __U); 364 } 365 366 extern __inline __m512 367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 368 _mm512_load_ps (void const *__P) 369 { 370 return *(__m512 *) __P; 371 } 372 373 extern __inline __m512 374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 375 _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P) 376 { 377 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P, 378 (__v16sf) __W, 379 (__mmask16) __U); 380 } 381 382 extern __inline __m512 383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 384 _mm512_maskz_load_ps (__mmask16 __U, void const *__P) 385 { 386 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P, 387 (__v16sf) 388 _mm512_setzero_ps (), 389 (__mmask16) __U); 390 } 391 392 extern __inline void 393 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 394 _mm512_store_ps (void *__P, __m512 __A) 395 { 396 *(__m512 *) __P = __A; 397 } 398 399 extern __inline void 400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 401 _mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A) 402 { 403 __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A, 404 (__mmask16) __U); 405 } 406 407 extern __inline __m512i 408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 409 _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A) 410 { 411 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A, 412 (__v8di) __W, 413 (__mmask8) __U); 414 } 415 416 extern __inline __m512i 417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 418 _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A) 419 { 420 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A, 421 (__v8di) 422 _mm512_setzero_si512 (), 423 (__mmask8) __U); 424 } 425 426 extern __inline __m512i 427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 428 _mm512_load_epi64 (void const *__P) 429 { 430 return *(__m512i *) __P; 431 } 432 433 extern __inline __m512i 434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 435 _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P) 436 { 437 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P, 438 (__v8di) __W, 439 (__mmask8) __U); 440 } 441 442 extern __inline __m512i 443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 444 _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P) 445 { 446 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P, 447 (__v8di) 448 _mm512_setzero_si512 (), 449 (__mmask8) __U); 450 } 451 452 extern __inline void 453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 454 _mm512_store_epi64 (void *__P, __m512i __A) 455 { 456 *(__m512i *) __P = __A; 457 } 458 459 extern __inline void 460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 461 _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A) 462 { 463 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A, 464 (__mmask8) __U); 465 } 466 467 extern __inline __m512i 468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 469 _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A) 470 { 471 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A, 472 (__v16si) __W, 473 (__mmask16) __U); 474 } 475 476 extern __inline __m512i 477 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 478 _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A) 479 { 480 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A, 481 (__v16si) 482 _mm512_setzero_si512 (), 483 (__mmask16) __U); 484 } 485 486 extern __inline __m512i 487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 488 _mm512_load_si512 (void const *__P) 489 { 490 return *(__m512i *) __P; 491 } 492 493 extern __inline __m512i 494 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 495 _mm512_load_epi32 (void const *__P) 496 { 497 return *(__m512i *) __P; 498 } 499 500 extern __inline __m512i 501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 502 _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P) 503 { 504 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P, 505 (__v16si) __W, 506 (__mmask16) __U); 507 } 508 509 extern __inline __m512i 510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 511 _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P) 512 { 513 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P, 514 (__v16si) 515 _mm512_setzero_si512 (), 516 (__mmask16) __U); 517 } 518 519 extern __inline void 520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 521 _mm512_store_si512 (void *__P, __m512i __A) 522 { 523 *(__m512i *) __P = __A; 524 } 525 526 extern __inline void 527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 528 _mm512_store_epi32 (void *__P, __m512i __A) 529 { 530 *(__m512i *) __P = __A; 531 } 532 533 extern __inline void 534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 535 _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A) 536 { 537 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A, 538 (__mmask16) __U); 539 } 540 541 extern __inline __m512i 542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 543 _mm512_mullo_epi32 (__m512i __A, __m512i __B) 544 { 545 return (__m512i) ((__v16su) __A * (__v16su) __B); 546 } 547 548 extern __inline __m512i 549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 550 _mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B) 551 { 552 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A, 553 (__v16si) __B, 554 (__v16si) 555 _mm512_setzero_si512 (), 556 __M); 557 } 558 559 extern __inline __m512i 560 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 561 _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 562 { 563 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A, 564 (__v16si) __B, 565 (__v16si) __W, __M); 566 } 567 568 extern __inline __m512i 569 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 570 _mm512_sllv_epi32 (__m512i __X, __m512i __Y) 571 { 572 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X, 573 (__v16si) __Y, 574 (__v16si) 575 _mm512_undefined_epi32 (), 576 (__mmask16) -1); 577 } 578 579 extern __inline __m512i 580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 581 _mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) 582 { 583 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X, 584 (__v16si) __Y, 585 (__v16si) __W, 586 (__mmask16) __U); 587 } 588 589 extern __inline __m512i 590 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 591 _mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y) 592 { 593 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X, 594 (__v16si) __Y, 595 (__v16si) 596 _mm512_setzero_si512 (), 597 (__mmask16) __U); 598 } 599 600 extern __inline __m512i 601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 602 _mm512_srav_epi32 (__m512i __X, __m512i __Y) 603 { 604 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X, 605 (__v16si) __Y, 606 (__v16si) 607 _mm512_undefined_epi32 (), 608 (__mmask16) -1); 609 } 610 611 extern __inline __m512i 612 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 613 _mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) 614 { 615 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X, 616 (__v16si) __Y, 617 (__v16si) __W, 618 (__mmask16) __U); 619 } 620 621 extern __inline __m512i 622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 623 _mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y) 624 { 625 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X, 626 (__v16si) __Y, 627 (__v16si) 628 _mm512_setzero_si512 (), 629 (__mmask16) __U); 630 } 631 632 extern __inline __m512i 633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 634 _mm512_srlv_epi32 (__m512i __X, __m512i __Y) 635 { 636 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X, 637 (__v16si) __Y, 638 (__v16si) 639 _mm512_undefined_epi32 (), 640 (__mmask16) -1); 641 } 642 643 extern __inline __m512i 644 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 645 _mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) 646 { 647 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X, 648 (__v16si) __Y, 649 (__v16si) __W, 650 (__mmask16) __U); 651 } 652 653 extern __inline __m512i 654 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 655 _mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y) 656 { 657 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X, 658 (__v16si) __Y, 659 (__v16si) 660 _mm512_setzero_si512 (), 661 (__mmask16) __U); 662 } 663 664 extern __inline __m512i 665 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 666 _mm512_add_epi64 (__m512i __A, __m512i __B) 667 { 668 return (__m512i) ((__v8du) __A + (__v8du) __B); 669 } 670 671 extern __inline __m512i 672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 673 _mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 674 { 675 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A, 676 (__v8di) __B, 677 (__v8di) __W, 678 (__mmask8) __U); 679 } 680 681 extern __inline __m512i 682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 683 _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 684 { 685 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A, 686 (__v8di) __B, 687 (__v8di) 688 _mm512_setzero_si512 (), 689 (__mmask8) __U); 690 } 691 692 extern __inline __m512i 693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 694 _mm512_sub_epi64 (__m512i __A, __m512i __B) 695 { 696 return (__m512i) ((__v8du) __A - (__v8du) __B); 697 } 698 699 extern __inline __m512i 700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 701 _mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 702 { 703 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A, 704 (__v8di) __B, 705 (__v8di) __W, 706 (__mmask8) __U); 707 } 708 709 extern __inline __m512i 710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 711 _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 712 { 713 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A, 714 (__v8di) __B, 715 (__v8di) 716 _mm512_setzero_si512 (), 717 (__mmask8) __U); 718 } 719 720 extern __inline __m512i 721 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 722 _mm512_sllv_epi64 (__m512i __X, __m512i __Y) 723 { 724 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X, 725 (__v8di) __Y, 726 (__v8di) 727 _mm512_undefined_pd (), 728 (__mmask8) -1); 729 } 730 731 extern __inline __m512i 732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 733 _mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) 734 { 735 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X, 736 (__v8di) __Y, 737 (__v8di) __W, 738 (__mmask8) __U); 739 } 740 741 extern __inline __m512i 742 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 743 _mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y) 744 { 745 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X, 746 (__v8di) __Y, 747 (__v8di) 748 _mm512_setzero_si512 (), 749 (__mmask8) __U); 750 } 751 752 extern __inline __m512i 753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 754 _mm512_srav_epi64 (__m512i __X, __m512i __Y) 755 { 756 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X, 757 (__v8di) __Y, 758 (__v8di) 759 _mm512_undefined_epi32 (), 760 (__mmask8) -1); 761 } 762 763 extern __inline __m512i 764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 765 _mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) 766 { 767 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X, 768 (__v8di) __Y, 769 (__v8di) __W, 770 (__mmask8) __U); 771 } 772 773 extern __inline __m512i 774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 775 _mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y) 776 { 777 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X, 778 (__v8di) __Y, 779 (__v8di) 780 _mm512_setzero_si512 (), 781 (__mmask8) __U); 782 } 783 784 extern __inline __m512i 785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 786 _mm512_srlv_epi64 (__m512i __X, __m512i __Y) 787 { 788 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X, 789 (__v8di) __Y, 790 (__v8di) 791 _mm512_undefined_epi32 (), 792 (__mmask8) -1); 793 } 794 795 extern __inline __m512i 796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 797 _mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) 798 { 799 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X, 800 (__v8di) __Y, 801 (__v8di) __W, 802 (__mmask8) __U); 803 } 804 805 extern __inline __m512i 806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 807 _mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y) 808 { 809 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X, 810 (__v8di) __Y, 811 (__v8di) 812 _mm512_setzero_si512 (), 813 (__mmask8) __U); 814 } 815 816 extern __inline __m512i 817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 818 _mm512_add_epi32 (__m512i __A, __m512i __B) 819 { 820 return (__m512i) ((__v16su) __A + (__v16su) __B); 821 } 822 823 extern __inline __m512i 824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 825 _mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 826 { 827 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A, 828 (__v16si) __B, 829 (__v16si) __W, 830 (__mmask16) __U); 831 } 832 833 extern __inline __m512i 834 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 835 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 836 { 837 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A, 838 (__v16si) __B, 839 (__v16si) 840 _mm512_setzero_si512 (), 841 (__mmask16) __U); 842 } 843 844 extern __inline __m512i 845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 846 _mm512_mul_epi32 (__m512i __X, __m512i __Y) 847 { 848 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X, 849 (__v16si) __Y, 850 (__v8di) 851 _mm512_undefined_epi32 (), 852 (__mmask8) -1); 853 } 854 855 extern __inline __m512i 856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 857 _mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) 858 { 859 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X, 860 (__v16si) __Y, 861 (__v8di) __W, __M); 862 } 863 864 extern __inline __m512i 865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 866 _mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y) 867 { 868 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X, 869 (__v16si) __Y, 870 (__v8di) 871 _mm512_setzero_si512 (), 872 __M); 873 } 874 875 extern __inline __m512i 876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 877 _mm512_sub_epi32 (__m512i __A, __m512i __B) 878 { 879 return (__m512i) ((__v16su) __A - (__v16su) __B); 880 } 881 882 extern __inline __m512i 883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 884 _mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 885 { 886 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A, 887 (__v16si) __B, 888 (__v16si) __W, 889 (__mmask16) __U); 890 } 891 892 extern __inline __m512i 893 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 894 _mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 895 { 896 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A, 897 (__v16si) __B, 898 (__v16si) 899 _mm512_setzero_si512 (), 900 (__mmask16) __U); 901 } 902 903 extern __inline __m512i 904 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 905 _mm512_mul_epu32 (__m512i __X, __m512i __Y) 906 { 907 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X, 908 (__v16si) __Y, 909 (__v8di) 910 _mm512_undefined_epi32 (), 911 (__mmask8) -1); 912 } 913 914 extern __inline __m512i 915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 916 _mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) 917 { 918 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X, 919 (__v16si) __Y, 920 (__v8di) __W, __M); 921 } 922 923 extern __inline __m512i 924 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 925 _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y) 926 { 927 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X, 928 (__v16si) __Y, 929 (__v8di) 930 _mm512_setzero_si512 (), 931 __M); 932 } 933 934 #ifdef __OPTIMIZE__ 935 extern __inline __m512i 936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 937 _mm512_slli_epi64 (__m512i __A, unsigned int __B) 938 { 939 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B, 940 (__v8di) 941 _mm512_undefined_epi32 (), 942 (__mmask8) -1); 943 } 944 945 extern __inline __m512i 946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 947 _mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A, 948 unsigned int __B) 949 { 950 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B, 951 (__v8di) __W, 952 (__mmask8) __U); 953 } 954 955 extern __inline __m512i 956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 957 _mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B) 958 { 959 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B, 960 (__v8di) 961 _mm512_setzero_si512 (), 962 (__mmask8) __U); 963 } 964 #else 965 #define _mm512_slli_epi64(X, C) \ 966 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ 967 (__v8di)(__m512i)_mm512_undefined_epi32 (),\ 968 (__mmask8)-1)) 969 970 #define _mm512_mask_slli_epi64(W, U, X, C) \ 971 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ 972 (__v8di)(__m512i)(W),\ 973 (__mmask8)(U))) 974 975 #define _mm512_maskz_slli_epi64(U, X, C) \ 976 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ 977 (__v8di)(__m512i)_mm512_setzero_si512 (),\ 978 (__mmask8)(U))) 979 #endif 980 981 extern __inline __m512i 982 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 983 _mm512_sll_epi64 (__m512i __A, __m128i __B) 984 { 985 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A, 986 (__v2di) __B, 987 (__v8di) 988 _mm512_undefined_epi32 (), 989 (__mmask8) -1); 990 } 991 992 extern __inline __m512i 993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 994 _mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) 995 { 996 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A, 997 (__v2di) __B, 998 (__v8di) __W, 999 (__mmask8) __U); 1000 } 1001 1002 extern __inline __m512i 1003 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1004 _mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B) 1005 { 1006 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A, 1007 (__v2di) __B, 1008 (__v8di) 1009 _mm512_setzero_si512 (), 1010 (__mmask8) __U); 1011 } 1012 1013 #ifdef __OPTIMIZE__ 1014 extern __inline __m512i 1015 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1016 _mm512_srli_epi64 (__m512i __A, unsigned int __B) 1017 { 1018 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B, 1019 (__v8di) 1020 _mm512_undefined_epi32 (), 1021 (__mmask8) -1); 1022 } 1023 1024 extern __inline __m512i 1025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1026 _mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U, 1027 __m512i __A, unsigned int __B) 1028 { 1029 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B, 1030 (__v8di) __W, 1031 (__mmask8) __U); 1032 } 1033 1034 extern __inline __m512i 1035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1036 _mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B) 1037 { 1038 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B, 1039 (__v8di) 1040 _mm512_setzero_si512 (), 1041 (__mmask8) __U); 1042 } 1043 #else 1044 #define _mm512_srli_epi64(X, C) \ 1045 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ 1046 (__v8di)(__m512i)_mm512_undefined_epi32 (),\ 1047 (__mmask8)-1)) 1048 1049 #define _mm512_mask_srli_epi64(W, U, X, C) \ 1050 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ 1051 (__v8di)(__m512i)(W),\ 1052 (__mmask8)(U))) 1053 1054 #define _mm512_maskz_srli_epi64(U, X, C) \ 1055 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ 1056 (__v8di)(__m512i)_mm512_setzero_si512 (),\ 1057 (__mmask8)(U))) 1058 #endif 1059 1060 extern __inline __m512i 1061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1062 _mm512_srl_epi64 (__m512i __A, __m128i __B) 1063 { 1064 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A, 1065 (__v2di) __B, 1066 (__v8di) 1067 _mm512_undefined_epi32 (), 1068 (__mmask8) -1); 1069 } 1070 1071 extern __inline __m512i 1072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1073 _mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) 1074 { 1075 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A, 1076 (__v2di) __B, 1077 (__v8di) __W, 1078 (__mmask8) __U); 1079 } 1080 1081 extern __inline __m512i 1082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1083 _mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B) 1084 { 1085 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A, 1086 (__v2di) __B, 1087 (__v8di) 1088 _mm512_setzero_si512 (), 1089 (__mmask8) __U); 1090 } 1091 1092 #ifdef __OPTIMIZE__ 1093 extern __inline __m512i 1094 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1095 _mm512_srai_epi64 (__m512i __A, unsigned int __B) 1096 { 1097 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B, 1098 (__v8di) 1099 _mm512_undefined_epi32 (), 1100 (__mmask8) -1); 1101 } 1102 1103 extern __inline __m512i 1104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1105 _mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A, 1106 unsigned int __B) 1107 { 1108 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B, 1109 (__v8di) __W, 1110 (__mmask8) __U); 1111 } 1112 1113 extern __inline __m512i 1114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1115 _mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B) 1116 { 1117 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B, 1118 (__v8di) 1119 _mm512_setzero_si512 (), 1120 (__mmask8) __U); 1121 } 1122 #else 1123 #define _mm512_srai_epi64(X, C) \ 1124 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ 1125 (__v8di)(__m512i)_mm512_undefined_epi32 (),\ 1126 (__mmask8)-1)) 1127 1128 #define _mm512_mask_srai_epi64(W, U, X, C) \ 1129 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ 1130 (__v8di)(__m512i)(W),\ 1131 (__mmask8)(U))) 1132 1133 #define _mm512_maskz_srai_epi64(U, X, C) \ 1134 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ 1135 (__v8di)(__m512i)_mm512_setzero_si512 (),\ 1136 (__mmask8)(U))) 1137 #endif 1138 1139 extern __inline __m512i 1140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1141 _mm512_sra_epi64 (__m512i __A, __m128i __B) 1142 { 1143 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A, 1144 (__v2di) __B, 1145 (__v8di) 1146 _mm512_undefined_epi32 (), 1147 (__mmask8) -1); 1148 } 1149 1150 extern __inline __m512i 1151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1152 _mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) 1153 { 1154 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A, 1155 (__v2di) __B, 1156 (__v8di) __W, 1157 (__mmask8) __U); 1158 } 1159 1160 extern __inline __m512i 1161 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1162 _mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B) 1163 { 1164 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A, 1165 (__v2di) __B, 1166 (__v8di) 1167 _mm512_setzero_si512 (), 1168 (__mmask8) __U); 1169 } 1170 1171 #ifdef __OPTIMIZE__ 1172 extern __inline __m512i 1173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1174 _mm512_slli_epi32 (__m512i __A, unsigned int __B) 1175 { 1176 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B, 1177 (__v16si) 1178 _mm512_undefined_epi32 (), 1179 (__mmask16) -1); 1180 } 1181 1182 extern __inline __m512i 1183 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1184 _mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A, 1185 unsigned int __B) 1186 { 1187 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B, 1188 (__v16si) __W, 1189 (__mmask16) __U); 1190 } 1191 1192 extern __inline __m512i 1193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1194 _mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B) 1195 { 1196 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B, 1197 (__v16si) 1198 _mm512_setzero_si512 (), 1199 (__mmask16) __U); 1200 } 1201 #else 1202 #define _mm512_slli_epi32(X, C) \ 1203 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ 1204 (__v16si)(__m512i)_mm512_undefined_epi32 (),\ 1205 (__mmask16)-1)) 1206 1207 #define _mm512_mask_slli_epi32(W, U, X, C) \ 1208 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ 1209 (__v16si)(__m512i)(W),\ 1210 (__mmask16)(U))) 1211 1212 #define _mm512_maskz_slli_epi32(U, X, C) \ 1213 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ 1214 (__v16si)(__m512i)_mm512_setzero_si512 (),\ 1215 (__mmask16)(U))) 1216 #endif 1217 1218 extern __inline __m512i 1219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1220 _mm512_sll_epi32 (__m512i __A, __m128i __B) 1221 { 1222 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A, 1223 (__v4si) __B, 1224 (__v16si) 1225 _mm512_undefined_epi32 (), 1226 (__mmask16) -1); 1227 } 1228 1229 extern __inline __m512i 1230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1231 _mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) 1232 { 1233 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A, 1234 (__v4si) __B, 1235 (__v16si) __W, 1236 (__mmask16) __U); 1237 } 1238 1239 extern __inline __m512i 1240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1241 _mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B) 1242 { 1243 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A, 1244 (__v4si) __B, 1245 (__v16si) 1246 _mm512_setzero_si512 (), 1247 (__mmask16) __U); 1248 } 1249 1250 #ifdef __OPTIMIZE__ 1251 extern __inline __m512i 1252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1253 _mm512_srli_epi32 (__m512i __A, unsigned int __B) 1254 { 1255 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B, 1256 (__v16si) 1257 _mm512_undefined_epi32 (), 1258 (__mmask16) -1); 1259 } 1260 1261 extern __inline __m512i 1262 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1263 _mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U, 1264 __m512i __A, unsigned int __B) 1265 { 1266 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B, 1267 (__v16si) __W, 1268 (__mmask16) __U); 1269 } 1270 1271 extern __inline __m512i 1272 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1273 _mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B) 1274 { 1275 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B, 1276 (__v16si) 1277 _mm512_setzero_si512 (), 1278 (__mmask16) __U); 1279 } 1280 #else 1281 #define _mm512_srli_epi32(X, C) \ 1282 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ 1283 (__v16si)(__m512i)_mm512_undefined_epi32 (),\ 1284 (__mmask16)-1)) 1285 1286 #define _mm512_mask_srli_epi32(W, U, X, C) \ 1287 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ 1288 (__v16si)(__m512i)(W),\ 1289 (__mmask16)(U))) 1290 1291 #define _mm512_maskz_srli_epi32(U, X, C) \ 1292 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ 1293 (__v16si)(__m512i)_mm512_setzero_si512 (),\ 1294 (__mmask16)(U))) 1295 #endif 1296 1297 extern __inline __m512i 1298 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1299 _mm512_srl_epi32 (__m512i __A, __m128i __B) 1300 { 1301 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A, 1302 (__v4si) __B, 1303 (__v16si) 1304 _mm512_undefined_epi32 (), 1305 (__mmask16) -1); 1306 } 1307 1308 extern __inline __m512i 1309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1310 _mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) 1311 { 1312 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A, 1313 (__v4si) __B, 1314 (__v16si) __W, 1315 (__mmask16) __U); 1316 } 1317 1318 extern __inline __m512i 1319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1320 _mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B) 1321 { 1322 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A, 1323 (__v4si) __B, 1324 (__v16si) 1325 _mm512_setzero_si512 (), 1326 (__mmask16) __U); 1327 } 1328 1329 #ifdef __OPTIMIZE__ 1330 extern __inline __m512i 1331 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1332 _mm512_srai_epi32 (__m512i __A, unsigned int __B) 1333 { 1334 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B, 1335 (__v16si) 1336 _mm512_undefined_epi32 (), 1337 (__mmask16) -1); 1338 } 1339 1340 extern __inline __m512i 1341 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1342 _mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A, 1343 unsigned int __B) 1344 { 1345 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B, 1346 (__v16si) __W, 1347 (__mmask16) __U); 1348 } 1349 1350 extern __inline __m512i 1351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1352 _mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B) 1353 { 1354 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B, 1355 (__v16si) 1356 _mm512_setzero_si512 (), 1357 (__mmask16) __U); 1358 } 1359 #else 1360 #define _mm512_srai_epi32(X, C) \ 1361 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\ 1362 (__v16si)(__m512i)_mm512_undefined_epi32 (),\ 1363 (__mmask16)-1)) 1364 1365 #define _mm512_mask_srai_epi32(W, U, X, C) \ 1366 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\ 1367 (__v16si)(__m512i)(W),\ 1368 (__mmask16)(U))) 1369 1370 #define _mm512_maskz_srai_epi32(U, X, C) \ 1371 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\ 1372 (__v16si)(__m512i)_mm512_setzero_si512 (),\ 1373 (__mmask16)(U))) 1374 #endif 1375 1376 extern __inline __m512i 1377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1378 _mm512_sra_epi32 (__m512i __A, __m128i __B) 1379 { 1380 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A, 1381 (__v4si) __B, 1382 (__v16si) 1383 _mm512_undefined_epi32 (), 1384 (__mmask16) -1); 1385 } 1386 1387 extern __inline __m512i 1388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1389 _mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) 1390 { 1391 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A, 1392 (__v4si) __B, 1393 (__v16si) __W, 1394 (__mmask16) __U); 1395 } 1396 1397 extern __inline __m512i 1398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1399 _mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B) 1400 { 1401 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A, 1402 (__v4si) __B, 1403 (__v16si) 1404 _mm512_setzero_si512 (), 1405 (__mmask16) __U); 1406 } 1407 1408 #ifdef __OPTIMIZE__ 1409 extern __inline __m128d 1410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1411 _mm_add_round_sd (__m128d __A, __m128d __B, const int __R) 1412 { 1413 return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, 1414 (__v2df) __B, 1415 __R); 1416 } 1417 1418 extern __inline __m128d 1419 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1420 _mm_mask_add_round_sd (__m128d __W, __mmask8 __U, __m128d __A, 1421 __m128d __B, const int __R) 1422 { 1423 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A, 1424 (__v2df) __B, 1425 (__v2df) __W, 1426 (__mmask8) __U, __R); 1427 } 1428 1429 extern __inline __m128d 1430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1431 _mm_maskz_add_round_sd (__mmask8 __U, __m128d __A, __m128d __B, 1432 const int __R) 1433 { 1434 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A, 1435 (__v2df) __B, 1436 (__v2df) 1437 _mm_setzero_pd (), 1438 (__mmask8) __U, __R); 1439 } 1440 1441 extern __inline __m128 1442 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1443 _mm_add_round_ss (__m128 __A, __m128 __B, const int __R) 1444 { 1445 return (__m128) __builtin_ia32_addss_round ((__v4sf) __A, 1446 (__v4sf) __B, 1447 __R); 1448 } 1449 1450 extern __inline __m128 1451 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1452 _mm_mask_add_round_ss (__m128 __W, __mmask8 __U, __m128 __A, 1453 __m128 __B, const int __R) 1454 { 1455 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A, 1456 (__v4sf) __B, 1457 (__v4sf) __W, 1458 (__mmask8) __U, __R); 1459 } 1460 1461 extern __inline __m128 1462 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1463 _mm_maskz_add_round_ss (__mmask8 __U, __m128 __A, __m128 __B, 1464 const int __R) 1465 { 1466 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A, 1467 (__v4sf) __B, 1468 (__v4sf) 1469 _mm_setzero_ps (), 1470 (__mmask8) __U, __R); 1471 } 1472 1473 extern __inline __m128d 1474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1475 _mm_sub_round_sd (__m128d __A, __m128d __B, const int __R) 1476 { 1477 return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, 1478 (__v2df) __B, 1479 __R); 1480 } 1481 1482 extern __inline __m128d 1483 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1484 _mm_mask_sub_round_sd (__m128d __W, __mmask8 __U, __m128d __A, 1485 __m128d __B, const int __R) 1486 { 1487 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A, 1488 (__v2df) __B, 1489 (__v2df) __W, 1490 (__mmask8) __U, __R); 1491 } 1492 1493 extern __inline __m128d 1494 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1495 _mm_maskz_sub_round_sd (__mmask8 __U, __m128d __A, __m128d __B, 1496 const int __R) 1497 { 1498 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A, 1499 (__v2df) __B, 1500 (__v2df) 1501 _mm_setzero_pd (), 1502 (__mmask8) __U, __R); 1503 } 1504 1505 extern __inline __m128 1506 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1507 _mm_sub_round_ss (__m128 __A, __m128 __B, const int __R) 1508 { 1509 return (__m128) __builtin_ia32_subss_round ((__v4sf) __A, 1510 (__v4sf) __B, 1511 __R); 1512 } 1513 1514 extern __inline __m128 1515 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1516 _mm_mask_sub_round_ss (__m128 __W, __mmask8 __U, __m128 __A, 1517 __m128 __B, const int __R) 1518 { 1519 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A, 1520 (__v4sf) __B, 1521 (__v4sf) __W, 1522 (__mmask8) __U, __R); 1523 } 1524 1525 extern __inline __m128 1526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1527 _mm_maskz_sub_round_ss (__mmask8 __U, __m128 __A, __m128 __B, 1528 const int __R) 1529 { 1530 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A, 1531 (__v4sf) __B, 1532 (__v4sf) 1533 _mm_setzero_ps (), 1534 (__mmask8) __U, __R); 1535 } 1536 1537 #else 1538 #define _mm_add_round_sd(A, B, C) \ 1539 (__m128d)__builtin_ia32_addsd_round(A, B, C) 1540 1541 #define _mm_mask_add_round_sd(W, U, A, B, C) \ 1542 (__m128d)__builtin_ia32_addsd_mask_round(A, B, W, U, C) 1543 1544 #define _mm_maskz_add_round_sd(U, A, B, C) \ 1545 (__m128d)__builtin_ia32_addsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C) 1546 1547 #define _mm_add_round_ss(A, B, C) \ 1548 (__m128)__builtin_ia32_addss_round(A, B, C) 1549 1550 #define _mm_mask_add_round_ss(W, U, A, B, C) \ 1551 (__m128)__builtin_ia32_addss_mask_round(A, B, W, U, C) 1552 1553 #define _mm_maskz_add_round_ss(U, A, B, C) \ 1554 (__m128)__builtin_ia32_addss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C) 1555 1556 #define _mm_sub_round_sd(A, B, C) \ 1557 (__m128d)__builtin_ia32_subsd_round(A, B, C) 1558 1559 #define _mm_mask_sub_round_sd(W, U, A, B, C) \ 1560 (__m128d)__builtin_ia32_subsd_mask_round(A, B, W, U, C) 1561 1562 #define _mm_maskz_sub_round_sd(U, A, B, C) \ 1563 (__m128d)__builtin_ia32_subsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C) 1564 1565 #define _mm_sub_round_ss(A, B, C) \ 1566 (__m128)__builtin_ia32_subss_round(A, B, C) 1567 1568 #define _mm_mask_sub_round_ss(W, U, A, B, C) \ 1569 (__m128)__builtin_ia32_subss_mask_round(A, B, W, U, C) 1570 1571 #define _mm_maskz_sub_round_ss(U, A, B, C) \ 1572 (__m128)__builtin_ia32_subss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C) 1573 1574 #endif 1575 1576 #ifdef __OPTIMIZE__ 1577 extern __inline __m512i 1578 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1579 _mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C, 1580 const int __imm) 1581 { 1582 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A, 1583 (__v8di) __B, 1584 (__v8di) __C, __imm, 1585 (__mmask8) -1); 1586 } 1587 1588 extern __inline __m512i 1589 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1590 _mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B, 1591 __m512i __C, const int __imm) 1592 { 1593 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A, 1594 (__v8di) __B, 1595 (__v8di) __C, __imm, 1596 (__mmask8) __U); 1597 } 1598 1599 extern __inline __m512i 1600 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1601 _mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B, 1602 __m512i __C, const int __imm) 1603 { 1604 return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A, 1605 (__v8di) __B, 1606 (__v8di) __C, 1607 __imm, (__mmask8) __U); 1608 } 1609 1610 extern __inline __m512i 1611 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1612 _mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C, 1613 const int __imm) 1614 { 1615 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A, 1616 (__v16si) __B, 1617 (__v16si) __C, 1618 __imm, (__mmask16) -1); 1619 } 1620 1621 extern __inline __m512i 1622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1623 _mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B, 1624 __m512i __C, const int __imm) 1625 { 1626 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A, 1627 (__v16si) __B, 1628 (__v16si) __C, 1629 __imm, (__mmask16) __U); 1630 } 1631 1632 extern __inline __m512i 1633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1634 _mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B, 1635 __m512i __C, const int __imm) 1636 { 1637 return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A, 1638 (__v16si) __B, 1639 (__v16si) __C, 1640 __imm, (__mmask16) __U); 1641 } 1642 #else 1643 #define _mm512_ternarylogic_epi64(A, B, C, I) \ 1644 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \ 1645 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1)) 1646 #define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) \ 1647 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \ 1648 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U))) 1649 #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) \ 1650 ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A), \ 1651 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U))) 1652 #define _mm512_ternarylogic_epi32(A, B, C, I) \ 1653 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \ 1654 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \ 1655 (__mmask16)-1)) 1656 #define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) \ 1657 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \ 1658 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \ 1659 (__mmask16)(U))) 1660 #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) \ 1661 ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A), \ 1662 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \ 1663 (__mmask16)(U))) 1664 #endif 1665 1666 extern __inline __m512d 1667 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1668 _mm512_rcp14_pd (__m512d __A) 1669 { 1670 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, 1671 (__v8df) 1672 _mm512_undefined_pd (), 1673 (__mmask8) -1); 1674 } 1675 1676 extern __inline __m512d 1677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1678 _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A) 1679 { 1680 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, 1681 (__v8df) __W, 1682 (__mmask8) __U); 1683 } 1684 1685 extern __inline __m512d 1686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1687 _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A) 1688 { 1689 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, 1690 (__v8df) 1691 _mm512_setzero_pd (), 1692 (__mmask8) __U); 1693 } 1694 1695 extern __inline __m512 1696 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1697 _mm512_rcp14_ps (__m512 __A) 1698 { 1699 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, 1700 (__v16sf) 1701 _mm512_undefined_ps (), 1702 (__mmask16) -1); 1703 } 1704 1705 extern __inline __m512 1706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1707 _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A) 1708 { 1709 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, 1710 (__v16sf) __W, 1711 (__mmask16) __U); 1712 } 1713 1714 extern __inline __m512 1715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1716 _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A) 1717 { 1718 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, 1719 (__v16sf) 1720 _mm512_setzero_ps (), 1721 (__mmask16) __U); 1722 } 1723 1724 extern __inline __m128d 1725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1726 _mm_rcp14_sd (__m128d __A, __m128d __B) 1727 { 1728 return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B, 1729 (__v2df) __A); 1730 } 1731 1732 extern __inline __m128d 1733 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1734 _mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 1735 { 1736 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B, 1737 (__v2df) __A, 1738 (__v2df) __W, 1739 (__mmask8) __U); 1740 } 1741 1742 extern __inline __m128d 1743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1744 _mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B) 1745 { 1746 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B, 1747 (__v2df) __A, 1748 (__v2df) _mm_setzero_ps (), 1749 (__mmask8) __U); 1750 } 1751 1752 extern __inline __m128 1753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1754 _mm_rcp14_ss (__m128 __A, __m128 __B) 1755 { 1756 return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B, 1757 (__v4sf) __A); 1758 } 1759 1760 extern __inline __m128 1761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1762 _mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 1763 { 1764 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B, 1765 (__v4sf) __A, 1766 (__v4sf) __W, 1767 (__mmask8) __U); 1768 } 1769 1770 extern __inline __m128 1771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1772 _mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B) 1773 { 1774 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B, 1775 (__v4sf) __A, 1776 (__v4sf) _mm_setzero_ps (), 1777 (__mmask8) __U); 1778 } 1779 1780 extern __inline __m512d 1781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1782 _mm512_rsqrt14_pd (__m512d __A) 1783 { 1784 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, 1785 (__v8df) 1786 _mm512_undefined_pd (), 1787 (__mmask8) -1); 1788 } 1789 1790 extern __inline __m512d 1791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1792 _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A) 1793 { 1794 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, 1795 (__v8df) __W, 1796 (__mmask8) __U); 1797 } 1798 1799 extern __inline __m512d 1800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1801 _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A) 1802 { 1803 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, 1804 (__v8df) 1805 _mm512_setzero_pd (), 1806 (__mmask8) __U); 1807 } 1808 1809 extern __inline __m512 1810 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1811 _mm512_rsqrt14_ps (__m512 __A) 1812 { 1813 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, 1814 (__v16sf) 1815 _mm512_undefined_ps (), 1816 (__mmask16) -1); 1817 } 1818 1819 extern __inline __m512 1820 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1821 _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A) 1822 { 1823 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, 1824 (__v16sf) __W, 1825 (__mmask16) __U); 1826 } 1827 1828 extern __inline __m512 1829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1830 _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A) 1831 { 1832 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, 1833 (__v16sf) 1834 _mm512_setzero_ps (), 1835 (__mmask16) __U); 1836 } 1837 1838 extern __inline __m128d 1839 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1840 _mm_rsqrt14_sd (__m128d __A, __m128d __B) 1841 { 1842 return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B, 1843 (__v2df) __A); 1844 } 1845 1846 extern __inline __m128d 1847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1848 _mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 1849 { 1850 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B, 1851 (__v2df) __A, 1852 (__v2df) __W, 1853 (__mmask8) __U); 1854 } 1855 1856 extern __inline __m128d 1857 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1858 _mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B) 1859 { 1860 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B, 1861 (__v2df) __A, 1862 (__v2df) _mm_setzero_pd (), 1863 (__mmask8) __U); 1864 } 1865 1866 extern __inline __m128 1867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1868 _mm_rsqrt14_ss (__m128 __A, __m128 __B) 1869 { 1870 return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B, 1871 (__v4sf) __A); 1872 } 1873 1874 extern __inline __m128 1875 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1876 _mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 1877 { 1878 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B, 1879 (__v4sf) __A, 1880 (__v4sf) __W, 1881 (__mmask8) __U); 1882 } 1883 1884 extern __inline __m128 1885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1886 _mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B) 1887 { 1888 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B, 1889 (__v4sf) __A, 1890 (__v4sf) _mm_setzero_ps (), 1891 (__mmask8) __U); 1892 } 1893 1894 #ifdef __OPTIMIZE__ 1895 extern __inline __m512d 1896 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1897 _mm512_sqrt_round_pd (__m512d __A, const int __R) 1898 { 1899 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, 1900 (__v8df) 1901 _mm512_undefined_pd (), 1902 (__mmask8) -1, __R); 1903 } 1904 1905 extern __inline __m512d 1906 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1907 _mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A, 1908 const int __R) 1909 { 1910 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, 1911 (__v8df) __W, 1912 (__mmask8) __U, __R); 1913 } 1914 1915 extern __inline __m512d 1916 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1917 _mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R) 1918 { 1919 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, 1920 (__v8df) 1921 _mm512_setzero_pd (), 1922 (__mmask8) __U, __R); 1923 } 1924 1925 extern __inline __m512 1926 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1927 _mm512_sqrt_round_ps (__m512 __A, const int __R) 1928 { 1929 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A, 1930 (__v16sf) 1931 _mm512_undefined_ps (), 1932 (__mmask16) -1, __R); 1933 } 1934 1935 extern __inline __m512 1936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1937 _mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R) 1938 { 1939 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A, 1940 (__v16sf) __W, 1941 (__mmask16) __U, __R); 1942 } 1943 1944 extern __inline __m512 1945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1946 _mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R) 1947 { 1948 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A, 1949 (__v16sf) 1950 _mm512_setzero_ps (), 1951 (__mmask16) __U, __R); 1952 } 1953 1954 extern __inline __m128d 1955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1956 _mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R) 1957 { 1958 return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B, 1959 (__v2df) __A, 1960 (__v2df) 1961 _mm_setzero_pd (), 1962 (__mmask8) -1, __R); 1963 } 1964 1965 extern __inline __m128d 1966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1967 _mm_mask_sqrt_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B, 1968 const int __R) 1969 { 1970 return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B, 1971 (__v2df) __A, 1972 (__v2df) __W, 1973 (__mmask8) __U, __R); 1974 } 1975 1976 extern __inline __m128d 1977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1978 _mm_maskz_sqrt_round_sd (__mmask8 __U, __m128d __A, __m128d __B, const int __R) 1979 { 1980 return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B, 1981 (__v2df) __A, 1982 (__v2df) 1983 _mm_setzero_pd (), 1984 (__mmask8) __U, __R); 1985 } 1986 1987 extern __inline __m128 1988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1989 _mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R) 1990 { 1991 return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B, 1992 (__v4sf) __A, 1993 (__v4sf) 1994 _mm_setzero_ps (), 1995 (__mmask8) -1, __R); 1996 } 1997 1998 extern __inline __m128 1999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2000 _mm_mask_sqrt_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B, 2001 const int __R) 2002 { 2003 return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B, 2004 (__v4sf) __A, 2005 (__v4sf) __W, 2006 (__mmask8) __U, __R); 2007 } 2008 2009 extern __inline __m128 2010 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2011 _mm_maskz_sqrt_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R) 2012 { 2013 return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B, 2014 (__v4sf) __A, 2015 (__v4sf) 2016 _mm_setzero_ps (), 2017 (__mmask8) __U, __R); 2018 } 2019 #else 2020 #define _mm512_sqrt_round_pd(A, C) \ 2021 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C) 2022 2023 #define _mm512_mask_sqrt_round_pd(W, U, A, C) \ 2024 (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C) 2025 2026 #define _mm512_maskz_sqrt_round_pd(U, A, C) \ 2027 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C) 2028 2029 #define _mm512_sqrt_round_ps(A, C) \ 2030 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C) 2031 2032 #define _mm512_mask_sqrt_round_ps(W, U, A, C) \ 2033 (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C) 2034 2035 #define _mm512_maskz_sqrt_round_ps(U, A, C) \ 2036 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C) 2037 2038 #define _mm_sqrt_round_sd(A, B, C) \ 2039 (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, \ 2040 (__v2df) _mm_setzero_pd (), -1, C) 2041 2042 #define _mm_mask_sqrt_round_sd(W, U, A, B, C) \ 2043 (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, W, U, C) 2044 2045 #define _mm_maskz_sqrt_round_sd(U, A, B, C) \ 2046 (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, \ 2047 (__v2df) _mm_setzero_pd (), U, C) 2048 2049 #define _mm_sqrt_round_ss(A, B, C) \ 2050 (__m128)__builtin_ia32_sqrtss_mask_round (B, A, \ 2051 (__v4sf) _mm_setzero_ps (), -1, C) 2052 2053 #define _mm_mask_sqrt_round_ss(W, U, A, B, C) \ 2054 (__m128)__builtin_ia32_sqrtss_mask_round (B, A, W, U, C) 2055 2056 #define _mm_maskz_sqrt_round_ss(U, A, B, C) \ 2057 (__m128)__builtin_ia32_sqrtss_mask_round (B, A, \ 2058 (__v4sf) _mm_setzero_ps (), U, C) 2059 #endif 2060 2061 extern __inline __m512i 2062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2063 _mm512_cvtepi8_epi32 (__m128i __A) 2064 { 2065 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A, 2066 (__v16si) 2067 _mm512_undefined_epi32 (), 2068 (__mmask16) -1); 2069 } 2070 2071 extern __inline __m512i 2072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2073 _mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A) 2074 { 2075 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A, 2076 (__v16si) __W, 2077 (__mmask16) __U); 2078 } 2079 2080 extern __inline __m512i 2081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2082 _mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A) 2083 { 2084 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A, 2085 (__v16si) 2086 _mm512_setzero_si512 (), 2087 (__mmask16) __U); 2088 } 2089 2090 extern __inline __m512i 2091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2092 _mm512_cvtepi8_epi64 (__m128i __A) 2093 { 2094 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A, 2095 (__v8di) 2096 _mm512_undefined_epi32 (), 2097 (__mmask8) -1); 2098 } 2099 2100 extern __inline __m512i 2101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2102 _mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A) 2103 { 2104 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A, 2105 (__v8di) __W, 2106 (__mmask8) __U); 2107 } 2108 2109 extern __inline __m512i 2110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2111 _mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A) 2112 { 2113 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A, 2114 (__v8di) 2115 _mm512_setzero_si512 (), 2116 (__mmask8) __U); 2117 } 2118 2119 extern __inline __m512i 2120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2121 _mm512_cvtepi16_epi32 (__m256i __A) 2122 { 2123 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A, 2124 (__v16si) 2125 _mm512_undefined_epi32 (), 2126 (__mmask16) -1); 2127 } 2128 2129 extern __inline __m512i 2130 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2131 _mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A) 2132 { 2133 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A, 2134 (__v16si) __W, 2135 (__mmask16) __U); 2136 } 2137 2138 extern __inline __m512i 2139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2140 _mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A) 2141 { 2142 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A, 2143 (__v16si) 2144 _mm512_setzero_si512 (), 2145 (__mmask16) __U); 2146 } 2147 2148 extern __inline __m512i 2149 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2150 _mm512_cvtepi16_epi64 (__m128i __A) 2151 { 2152 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A, 2153 (__v8di) 2154 _mm512_undefined_epi32 (), 2155 (__mmask8) -1); 2156 } 2157 2158 extern __inline __m512i 2159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2160 _mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A) 2161 { 2162 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A, 2163 (__v8di) __W, 2164 (__mmask8) __U); 2165 } 2166 2167 extern __inline __m512i 2168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2169 _mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A) 2170 { 2171 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A, 2172 (__v8di) 2173 _mm512_setzero_si512 (), 2174 (__mmask8) __U); 2175 } 2176 2177 extern __inline __m512i 2178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2179 _mm512_cvtepi32_epi64 (__m256i __X) 2180 { 2181 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X, 2182 (__v8di) 2183 _mm512_undefined_epi32 (), 2184 (__mmask8) -1); 2185 } 2186 2187 extern __inline __m512i 2188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2189 _mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X) 2190 { 2191 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X, 2192 (__v8di) __W, 2193 (__mmask8) __U); 2194 } 2195 2196 extern __inline __m512i 2197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2198 _mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X) 2199 { 2200 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X, 2201 (__v8di) 2202 _mm512_setzero_si512 (), 2203 (__mmask8) __U); 2204 } 2205 2206 extern __inline __m512i 2207 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2208 _mm512_cvtepu8_epi32 (__m128i __A) 2209 { 2210 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A, 2211 (__v16si) 2212 _mm512_undefined_epi32 (), 2213 (__mmask16) -1); 2214 } 2215 2216 extern __inline __m512i 2217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2218 _mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A) 2219 { 2220 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A, 2221 (__v16si) __W, 2222 (__mmask16) __U); 2223 } 2224 2225 extern __inline __m512i 2226 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2227 _mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A) 2228 { 2229 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A, 2230 (__v16si) 2231 _mm512_setzero_si512 (), 2232 (__mmask16) __U); 2233 } 2234 2235 extern __inline __m512i 2236 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2237 _mm512_cvtepu8_epi64 (__m128i __A) 2238 { 2239 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A, 2240 (__v8di) 2241 _mm512_undefined_epi32 (), 2242 (__mmask8) -1); 2243 } 2244 2245 extern __inline __m512i 2246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2247 _mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A) 2248 { 2249 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A, 2250 (__v8di) __W, 2251 (__mmask8) __U); 2252 } 2253 2254 extern __inline __m512i 2255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2256 _mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A) 2257 { 2258 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A, 2259 (__v8di) 2260 _mm512_setzero_si512 (), 2261 (__mmask8) __U); 2262 } 2263 2264 extern __inline __m512i 2265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2266 _mm512_cvtepu16_epi32 (__m256i __A) 2267 { 2268 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A, 2269 (__v16si) 2270 _mm512_undefined_epi32 (), 2271 (__mmask16) -1); 2272 } 2273 2274 extern __inline __m512i 2275 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2276 _mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A) 2277 { 2278 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A, 2279 (__v16si) __W, 2280 (__mmask16) __U); 2281 } 2282 2283 extern __inline __m512i 2284 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2285 _mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A) 2286 { 2287 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A, 2288 (__v16si) 2289 _mm512_setzero_si512 (), 2290 (__mmask16) __U); 2291 } 2292 2293 extern __inline __m512i 2294 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2295 _mm512_cvtepu16_epi64 (__m128i __A) 2296 { 2297 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A, 2298 (__v8di) 2299 _mm512_undefined_epi32 (), 2300 (__mmask8) -1); 2301 } 2302 2303 extern __inline __m512i 2304 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2305 _mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A) 2306 { 2307 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A, 2308 (__v8di) __W, 2309 (__mmask8) __U); 2310 } 2311 2312 extern __inline __m512i 2313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2314 _mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A) 2315 { 2316 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A, 2317 (__v8di) 2318 _mm512_setzero_si512 (), 2319 (__mmask8) __U); 2320 } 2321 2322 extern __inline __m512i 2323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2324 _mm512_cvtepu32_epi64 (__m256i __X) 2325 { 2326 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X, 2327 (__v8di) 2328 _mm512_undefined_epi32 (), 2329 (__mmask8) -1); 2330 } 2331 2332 extern __inline __m512i 2333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2334 _mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X) 2335 { 2336 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X, 2337 (__v8di) __W, 2338 (__mmask8) __U); 2339 } 2340 2341 extern __inline __m512i 2342 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2343 _mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X) 2344 { 2345 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X, 2346 (__v8di) 2347 _mm512_setzero_si512 (), 2348 (__mmask8) __U); 2349 } 2350 2351 #ifdef __OPTIMIZE__ 2352 extern __inline __m512d 2353 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2354 _mm512_add_round_pd (__m512d __A, __m512d __B, const int __R) 2355 { 2356 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, 2357 (__v8df) __B, 2358 (__v8df) 2359 _mm512_undefined_pd (), 2360 (__mmask8) -1, __R); 2361 } 2362 2363 extern __inline __m512d 2364 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2365 _mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A, 2366 __m512d __B, const int __R) 2367 { 2368 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, 2369 (__v8df) __B, 2370 (__v8df) __W, 2371 (__mmask8) __U, __R); 2372 } 2373 2374 extern __inline __m512d 2375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2376 _mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 2377 const int __R) 2378 { 2379 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, 2380 (__v8df) __B, 2381 (__v8df) 2382 _mm512_setzero_pd (), 2383 (__mmask8) __U, __R); 2384 } 2385 2386 extern __inline __m512 2387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2388 _mm512_add_round_ps (__m512 __A, __m512 __B, const int __R) 2389 { 2390 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, 2391 (__v16sf) __B, 2392 (__v16sf) 2393 _mm512_undefined_ps (), 2394 (__mmask16) -1, __R); 2395 } 2396 2397 extern __inline __m512 2398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2399 _mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A, 2400 __m512 __B, const int __R) 2401 { 2402 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, 2403 (__v16sf) __B, 2404 (__v16sf) __W, 2405 (__mmask16) __U, __R); 2406 } 2407 2408 extern __inline __m512 2409 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2410 _mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) 2411 { 2412 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, 2413 (__v16sf) __B, 2414 (__v16sf) 2415 _mm512_setzero_ps (), 2416 (__mmask16) __U, __R); 2417 } 2418 2419 extern __inline __m512d 2420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2421 _mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R) 2422 { 2423 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, 2424 (__v8df) __B, 2425 (__v8df) 2426 _mm512_undefined_pd (), 2427 (__mmask8) -1, __R); 2428 } 2429 2430 extern __inline __m512d 2431 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2432 _mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A, 2433 __m512d __B, const int __R) 2434 { 2435 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, 2436 (__v8df) __B, 2437 (__v8df) __W, 2438 (__mmask8) __U, __R); 2439 } 2440 2441 extern __inline __m512d 2442 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2443 _mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 2444 const int __R) 2445 { 2446 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, 2447 (__v8df) __B, 2448 (__v8df) 2449 _mm512_setzero_pd (), 2450 (__mmask8) __U, __R); 2451 } 2452 2453 extern __inline __m512 2454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2455 _mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R) 2456 { 2457 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, 2458 (__v16sf) __B, 2459 (__v16sf) 2460 _mm512_undefined_ps (), 2461 (__mmask16) -1, __R); 2462 } 2463 2464 extern __inline __m512 2465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2466 _mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A, 2467 __m512 __B, const int __R) 2468 { 2469 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, 2470 (__v16sf) __B, 2471 (__v16sf) __W, 2472 (__mmask16) __U, __R); 2473 } 2474 2475 extern __inline __m512 2476 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2477 _mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) 2478 { 2479 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, 2480 (__v16sf) __B, 2481 (__v16sf) 2482 _mm512_setzero_ps (), 2483 (__mmask16) __U, __R); 2484 } 2485 #else 2486 #define _mm512_add_round_pd(A, B, C) \ 2487 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C) 2488 2489 #define _mm512_mask_add_round_pd(W, U, A, B, C) \ 2490 (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C) 2491 2492 #define _mm512_maskz_add_round_pd(U, A, B, C) \ 2493 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C) 2494 2495 #define _mm512_add_round_ps(A, B, C) \ 2496 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C) 2497 2498 #define _mm512_mask_add_round_ps(W, U, A, B, C) \ 2499 (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C) 2500 2501 #define _mm512_maskz_add_round_ps(U, A, B, C) \ 2502 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C) 2503 2504 #define _mm512_sub_round_pd(A, B, C) \ 2505 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C) 2506 2507 #define _mm512_mask_sub_round_pd(W, U, A, B, C) \ 2508 (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C) 2509 2510 #define _mm512_maskz_sub_round_pd(U, A, B, C) \ 2511 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C) 2512 2513 #define _mm512_sub_round_ps(A, B, C) \ 2514 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C) 2515 2516 #define _mm512_mask_sub_round_ps(W, U, A, B, C) \ 2517 (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C) 2518 2519 #define _mm512_maskz_sub_round_ps(U, A, B, C) \ 2520 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C) 2521 #endif 2522 2523 #ifdef __OPTIMIZE__ 2524 extern __inline __m512d 2525 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2526 _mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R) 2527 { 2528 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, 2529 (__v8df) __B, 2530 (__v8df) 2531 _mm512_undefined_pd (), 2532 (__mmask8) -1, __R); 2533 } 2534 2535 extern __inline __m512d 2536 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2537 _mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A, 2538 __m512d __B, const int __R) 2539 { 2540 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, 2541 (__v8df) __B, 2542 (__v8df) __W, 2543 (__mmask8) __U, __R); 2544 } 2545 2546 extern __inline __m512d 2547 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2548 _mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 2549 const int __R) 2550 { 2551 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, 2552 (__v8df) __B, 2553 (__v8df) 2554 _mm512_setzero_pd (), 2555 (__mmask8) __U, __R); 2556 } 2557 2558 extern __inline __m512 2559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2560 _mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R) 2561 { 2562 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, 2563 (__v16sf) __B, 2564 (__v16sf) 2565 _mm512_undefined_ps (), 2566 (__mmask16) -1, __R); 2567 } 2568 2569 extern __inline __m512 2570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2571 _mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A, 2572 __m512 __B, const int __R) 2573 { 2574 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, 2575 (__v16sf) __B, 2576 (__v16sf) __W, 2577 (__mmask16) __U, __R); 2578 } 2579 2580 extern __inline __m512 2581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2582 _mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) 2583 { 2584 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, 2585 (__v16sf) __B, 2586 (__v16sf) 2587 _mm512_setzero_ps (), 2588 (__mmask16) __U, __R); 2589 } 2590 2591 extern __inline __m512d 2592 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2593 _mm512_div_round_pd (__m512d __M, __m512d __V, const int __R) 2594 { 2595 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M, 2596 (__v8df) __V, 2597 (__v8df) 2598 _mm512_undefined_pd (), 2599 (__mmask8) -1, __R); 2600 } 2601 2602 extern __inline __m512d 2603 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2604 _mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M, 2605 __m512d __V, const int __R) 2606 { 2607 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M, 2608 (__v8df) __V, 2609 (__v8df) __W, 2610 (__mmask8) __U, __R); 2611 } 2612 2613 extern __inline __m512d 2614 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2615 _mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V, 2616 const int __R) 2617 { 2618 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M, 2619 (__v8df) __V, 2620 (__v8df) 2621 _mm512_setzero_pd (), 2622 (__mmask8) __U, __R); 2623 } 2624 2625 extern __inline __m512 2626 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2627 _mm512_div_round_ps (__m512 __A, __m512 __B, const int __R) 2628 { 2629 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, 2630 (__v16sf) __B, 2631 (__v16sf) 2632 _mm512_undefined_ps (), 2633 (__mmask16) -1, __R); 2634 } 2635 2636 extern __inline __m512 2637 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2638 _mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A, 2639 __m512 __B, const int __R) 2640 { 2641 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, 2642 (__v16sf) __B, 2643 (__v16sf) __W, 2644 (__mmask16) __U, __R); 2645 } 2646 2647 extern __inline __m512 2648 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2649 _mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) 2650 { 2651 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, 2652 (__v16sf) __B, 2653 (__v16sf) 2654 _mm512_setzero_ps (), 2655 (__mmask16) __U, __R); 2656 } 2657 2658 extern __inline __m128d 2659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2660 _mm_mul_round_sd (__m128d __A, __m128d __B, const int __R) 2661 { 2662 return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, 2663 (__v2df) __B, 2664 __R); 2665 } 2666 2667 extern __inline __m128d 2668 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2669 _mm_mask_mul_round_sd (__m128d __W, __mmask8 __U, __m128d __A, 2670 __m128d __B, const int __R) 2671 { 2672 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A, 2673 (__v2df) __B, 2674 (__v2df) __W, 2675 (__mmask8) __U, __R); 2676 } 2677 2678 extern __inline __m128d 2679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2680 _mm_maskz_mul_round_sd (__mmask8 __U, __m128d __A, __m128d __B, 2681 const int __R) 2682 { 2683 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A, 2684 (__v2df) __B, 2685 (__v2df) 2686 _mm_setzero_pd (), 2687 (__mmask8) __U, __R); 2688 } 2689 2690 extern __inline __m128 2691 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2692 _mm_mul_round_ss (__m128 __A, __m128 __B, const int __R) 2693 { 2694 return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, 2695 (__v4sf) __B, 2696 __R); 2697 } 2698 2699 extern __inline __m128 2700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2701 _mm_mask_mul_round_ss (__m128 __W, __mmask8 __U, __m128 __A, 2702 __m128 __B, const int __R) 2703 { 2704 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A, 2705 (__v4sf) __B, 2706 (__v4sf) __W, 2707 (__mmask8) __U, __R); 2708 } 2709 2710 extern __inline __m128 2711 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2712 _mm_maskz_mul_round_ss (__mmask8 __U, __m128 __A, __m128 __B, 2713 const int __R) 2714 { 2715 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A, 2716 (__v4sf) __B, 2717 (__v4sf) 2718 _mm_setzero_ps (), 2719 (__mmask8) __U, __R); 2720 } 2721 2722 extern __inline __m128d 2723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2724 _mm_div_round_sd (__m128d __A, __m128d __B, const int __R) 2725 { 2726 return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, 2727 (__v2df) __B, 2728 __R); 2729 } 2730 2731 extern __inline __m128d 2732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2733 _mm_mask_div_round_sd (__m128d __W, __mmask8 __U, __m128d __A, 2734 __m128d __B, const int __R) 2735 { 2736 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A, 2737 (__v2df) __B, 2738 (__v2df) __W, 2739 (__mmask8) __U, __R); 2740 } 2741 2742 extern __inline __m128d 2743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2744 _mm_maskz_div_round_sd (__mmask8 __U, __m128d __A, __m128d __B, 2745 const int __R) 2746 { 2747 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A, 2748 (__v2df) __B, 2749 (__v2df) 2750 _mm_setzero_pd (), 2751 (__mmask8) __U, __R); 2752 } 2753 2754 extern __inline __m128 2755 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2756 _mm_div_round_ss (__m128 __A, __m128 __B, const int __R) 2757 { 2758 return (__m128) __builtin_ia32_divss_round ((__v4sf) __A, 2759 (__v4sf) __B, 2760 __R); 2761 } 2762 2763 extern __inline __m128 2764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2765 _mm_mask_div_round_ss (__m128 __W, __mmask8 __U, __m128 __A, 2766 __m128 __B, const int __R) 2767 { 2768 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A, 2769 (__v4sf) __B, 2770 (__v4sf) __W, 2771 (__mmask8) __U, __R); 2772 } 2773 2774 extern __inline __m128 2775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2776 _mm_maskz_div_round_ss (__mmask8 __U, __m128 __A, __m128 __B, 2777 const int __R) 2778 { 2779 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A, 2780 (__v4sf) __B, 2781 (__v4sf) 2782 _mm_setzero_ps (), 2783 (__mmask8) __U, __R); 2784 } 2785 2786 #else 2787 #define _mm512_mul_round_pd(A, B, C) \ 2788 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C) 2789 2790 #define _mm512_mask_mul_round_pd(W, U, A, B, C) \ 2791 (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C) 2792 2793 #define _mm512_maskz_mul_round_pd(U, A, B, C) \ 2794 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C) 2795 2796 #define _mm512_mul_round_ps(A, B, C) \ 2797 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C) 2798 2799 #define _mm512_mask_mul_round_ps(W, U, A, B, C) \ 2800 (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C) 2801 2802 #define _mm512_maskz_mul_round_ps(U, A, B, C) \ 2803 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C) 2804 2805 #define _mm512_div_round_pd(A, B, C) \ 2806 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C) 2807 2808 #define _mm512_mask_div_round_pd(W, U, A, B, C) \ 2809 (__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C) 2810 2811 #define _mm512_maskz_div_round_pd(U, A, B, C) \ 2812 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C) 2813 2814 #define _mm512_div_round_ps(A, B, C) \ 2815 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C) 2816 2817 #define _mm512_mask_div_round_ps(W, U, A, B, C) \ 2818 (__m512)__builtin_ia32_divps512_mask(A, B, W, U, C) 2819 2820 #define _mm512_maskz_div_round_ps(U, A, B, C) \ 2821 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C) 2822 2823 #define _mm_mul_round_sd(A, B, C) \ 2824 (__m128d)__builtin_ia32_mulsd_round(A, B, C) 2825 2826 #define _mm_mask_mul_round_sd(W, U, A, B, C) \ 2827 (__m128d)__builtin_ia32_mulsd_mask_round(A, B, W, U, C) 2828 2829 #define _mm_maskz_mul_round_sd(U, A, B, C) \ 2830 (__m128d)__builtin_ia32_mulsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C) 2831 2832 #define _mm_mul_round_ss(A, B, C) \ 2833 (__m128)__builtin_ia32_mulss_round(A, B, C) 2834 2835 #define _mm_mask_mul_round_ss(W, U, A, B, C) \ 2836 (__m128)__builtin_ia32_mulss_mask_round(A, B, W, U, C) 2837 2838 #define _mm_maskz_mul_round_ss(U, A, B, C) \ 2839 (__m128)__builtin_ia32_mulss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C) 2840 2841 #define _mm_div_round_sd(A, B, C) \ 2842 (__m128d)__builtin_ia32_divsd_round(A, B, C) 2843 2844 #define _mm_mask_div_round_sd(W, U, A, B, C) \ 2845 (__m128d)__builtin_ia32_divsd_mask_round(A, B, W, U, C) 2846 2847 #define _mm_maskz_div_round_sd(U, A, B, C) \ 2848 (__m128d)__builtin_ia32_divsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C) 2849 2850 #define _mm_div_round_ss(A, B, C) \ 2851 (__m128)__builtin_ia32_divss_round(A, B, C) 2852 2853 #define _mm_mask_div_round_ss(W, U, A, B, C) \ 2854 (__m128)__builtin_ia32_divss_mask_round(A, B, W, U, C) 2855 2856 #define _mm_maskz_div_round_ss(U, A, B, C) \ 2857 (__m128)__builtin_ia32_divss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C) 2858 2859 #endif 2860 2861 #ifdef __OPTIMIZE__ 2862 extern __inline __m512d 2863 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2864 _mm512_max_round_pd (__m512d __A, __m512d __B, const int __R) 2865 { 2866 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, 2867 (__v8df) __B, 2868 (__v8df) 2869 _mm512_undefined_pd (), 2870 (__mmask8) -1, __R); 2871 } 2872 2873 extern __inline __m512d 2874 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2875 _mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A, 2876 __m512d __B, const int __R) 2877 { 2878 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, 2879 (__v8df) __B, 2880 (__v8df) __W, 2881 (__mmask8) __U, __R); 2882 } 2883 2884 extern __inline __m512d 2885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2886 _mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 2887 const int __R) 2888 { 2889 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, 2890 (__v8df) __B, 2891 (__v8df) 2892 _mm512_setzero_pd (), 2893 (__mmask8) __U, __R); 2894 } 2895 2896 extern __inline __m512 2897 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2898 _mm512_max_round_ps (__m512 __A, __m512 __B, const int __R) 2899 { 2900 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, 2901 (__v16sf) __B, 2902 (__v16sf) 2903 _mm512_undefined_ps (), 2904 (__mmask16) -1, __R); 2905 } 2906 2907 extern __inline __m512 2908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2909 _mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A, 2910 __m512 __B, const int __R) 2911 { 2912 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, 2913 (__v16sf) __B, 2914 (__v16sf) __W, 2915 (__mmask16) __U, __R); 2916 } 2917 2918 extern __inline __m512 2919 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2920 _mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) 2921 { 2922 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, 2923 (__v16sf) __B, 2924 (__v16sf) 2925 _mm512_setzero_ps (), 2926 (__mmask16) __U, __R); 2927 } 2928 2929 extern __inline __m512d 2930 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2931 _mm512_min_round_pd (__m512d __A, __m512d __B, const int __R) 2932 { 2933 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, 2934 (__v8df) __B, 2935 (__v8df) 2936 _mm512_undefined_pd (), 2937 (__mmask8) -1, __R); 2938 } 2939 2940 extern __inline __m512d 2941 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2942 _mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A, 2943 __m512d __B, const int __R) 2944 { 2945 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, 2946 (__v8df) __B, 2947 (__v8df) __W, 2948 (__mmask8) __U, __R); 2949 } 2950 2951 extern __inline __m512d 2952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2953 _mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 2954 const int __R) 2955 { 2956 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, 2957 (__v8df) __B, 2958 (__v8df) 2959 _mm512_setzero_pd (), 2960 (__mmask8) __U, __R); 2961 } 2962 2963 extern __inline __m512 2964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2965 _mm512_min_round_ps (__m512 __A, __m512 __B, const int __R) 2966 { 2967 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, 2968 (__v16sf) __B, 2969 (__v16sf) 2970 _mm512_undefined_ps (), 2971 (__mmask16) -1, __R); 2972 } 2973 2974 extern __inline __m512 2975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2976 _mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A, 2977 __m512 __B, const int __R) 2978 { 2979 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, 2980 (__v16sf) __B, 2981 (__v16sf) __W, 2982 (__mmask16) __U, __R); 2983 } 2984 2985 extern __inline __m512 2986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2987 _mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) 2988 { 2989 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, 2990 (__v16sf) __B, 2991 (__v16sf) 2992 _mm512_setzero_ps (), 2993 (__mmask16) __U, __R); 2994 } 2995 #else 2996 #define _mm512_max_round_pd(A, B, R) \ 2997 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R) 2998 2999 #define _mm512_mask_max_round_pd(W, U, A, B, R) \ 3000 (__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R) 3001 3002 #define _mm512_maskz_max_round_pd(U, A, B, R) \ 3003 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R) 3004 3005 #define _mm512_max_round_ps(A, B, R) \ 3006 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_undefined_pd(), -1, R) 3007 3008 #define _mm512_mask_max_round_ps(W, U, A, B, R) \ 3009 (__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R) 3010 3011 #define _mm512_maskz_max_round_ps(U, A, B, R) \ 3012 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R) 3013 3014 #define _mm512_min_round_pd(A, B, R) \ 3015 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R) 3016 3017 #define _mm512_mask_min_round_pd(W, U, A, B, R) \ 3018 (__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R) 3019 3020 #define _mm512_maskz_min_round_pd(U, A, B, R) \ 3021 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R) 3022 3023 #define _mm512_min_round_ps(A, B, R) \ 3024 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, R) 3025 3026 #define _mm512_mask_min_round_ps(W, U, A, B, R) \ 3027 (__m512)__builtin_ia32_minps512_mask(A, B, W, U, R) 3028 3029 #define _mm512_maskz_min_round_ps(U, A, B, R) \ 3030 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R) 3031 #endif 3032 3033 #ifdef __OPTIMIZE__ 3034 extern __inline __m512d 3035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3036 _mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R) 3037 { 3038 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, 3039 (__v8df) __B, 3040 (__v8df) 3041 _mm512_undefined_pd (), 3042 (__mmask8) -1, __R); 3043 } 3044 3045 extern __inline __m512d 3046 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3047 _mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A, 3048 __m512d __B, const int __R) 3049 { 3050 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, 3051 (__v8df) __B, 3052 (__v8df) __W, 3053 (__mmask8) __U, __R); 3054 } 3055 3056 extern __inline __m512d 3057 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3058 _mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 3059 const int __R) 3060 { 3061 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, 3062 (__v8df) __B, 3063 (__v8df) 3064 _mm512_setzero_pd (), 3065 (__mmask8) __U, __R); 3066 } 3067 3068 extern __inline __m512 3069 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3070 _mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R) 3071 { 3072 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, 3073 (__v16sf) __B, 3074 (__v16sf) 3075 _mm512_undefined_ps (), 3076 (__mmask16) -1, __R); 3077 } 3078 3079 extern __inline __m512 3080 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3081 _mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A, 3082 __m512 __B, const int __R) 3083 { 3084 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, 3085 (__v16sf) __B, 3086 (__v16sf) __W, 3087 (__mmask16) __U, __R); 3088 } 3089 3090 extern __inline __m512 3091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3092 _mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B, 3093 const int __R) 3094 { 3095 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, 3096 (__v16sf) __B, 3097 (__v16sf) 3098 _mm512_setzero_ps (), 3099 (__mmask16) __U, __R); 3100 } 3101 3102 extern __inline __m128d 3103 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3104 _mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R) 3105 { 3106 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A, 3107 (__v2df) __B, 3108 (__v2df) 3109 _mm_setzero_pd (), 3110 (__mmask8) -1, __R); 3111 } 3112 3113 extern __inline __m128d 3114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3115 _mm_mask_scalef_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B, 3116 const int __R) 3117 { 3118 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A, 3119 (__v2df) __B, 3120 (__v2df) __W, 3121 (__mmask8) __U, __R); 3122 } 3123 3124 extern __inline __m128d 3125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3126 _mm_maskz_scalef_round_sd (__mmask8 __U, __m128d __A, __m128d __B, 3127 const int __R) 3128 { 3129 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A, 3130 (__v2df) __B, 3131 (__v2df) 3132 _mm_setzero_pd (), 3133 (__mmask8) __U, __R); 3134 } 3135 3136 extern __inline __m128 3137 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3138 _mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R) 3139 { 3140 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A, 3141 (__v4sf) __B, 3142 (__v4sf) 3143 _mm_setzero_ps (), 3144 (__mmask8) -1, __R); 3145 } 3146 3147 extern __inline __m128 3148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3149 _mm_mask_scalef_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B, 3150 const int __R) 3151 { 3152 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A, 3153 (__v4sf) __B, 3154 (__v4sf) __W, 3155 (__mmask8) __U, __R); 3156 } 3157 3158 extern __inline __m128 3159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3160 _mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R) 3161 { 3162 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A, 3163 (__v4sf) __B, 3164 (__v4sf) 3165 _mm_setzero_ps (), 3166 (__mmask8) __U, __R); 3167 } 3168 #else 3169 #define _mm512_scalef_round_pd(A, B, C) \ 3170 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C) 3171 3172 #define _mm512_mask_scalef_round_pd(W, U, A, B, C) \ 3173 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C) 3174 3175 #define _mm512_maskz_scalef_round_pd(U, A, B, C) \ 3176 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C) 3177 3178 #define _mm512_scalef_round_ps(A, B, C) \ 3179 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C) 3180 3181 #define _mm512_mask_scalef_round_ps(W, U, A, B, C) \ 3182 (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C) 3183 3184 #define _mm512_maskz_scalef_round_ps(U, A, B, C) \ 3185 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C) 3186 3187 #define _mm_scalef_round_sd(A, B, C) \ 3188 (__m128d)__builtin_ia32_scalefsd_mask_round (A, B, \ 3189 (__v2df)_mm_setzero_pd (), -1, C) 3190 3191 #define _mm_scalef_round_ss(A, B, C) \ 3192 (__m128)__builtin_ia32_scalefss_mask_round (A, B, \ 3193 (__v4sf)_mm_setzero_ps (), -1, C) 3194 #endif 3195 3196 #ifdef __OPTIMIZE__ 3197 extern __inline __m512d 3198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3199 _mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R) 3200 { 3201 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 3202 (__v8df) __B, 3203 (__v8df) __C, 3204 (__mmask8) -1, __R); 3205 } 3206 3207 extern __inline __m512d 3208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3209 _mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B, 3210 __m512d __C, const int __R) 3211 { 3212 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 3213 (__v8df) __B, 3214 (__v8df) __C, 3215 (__mmask8) __U, __R); 3216 } 3217 3218 extern __inline __m512d 3219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3220 _mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, 3221 __mmask8 __U, const int __R) 3222 { 3223 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A, 3224 (__v8df) __B, 3225 (__v8df) __C, 3226 (__mmask8) __U, __R); 3227 } 3228 3229 extern __inline __m512d 3230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3231 _mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 3232 __m512d __C, const int __R) 3233 { 3234 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, 3235 (__v8df) __B, 3236 (__v8df) __C, 3237 (__mmask8) __U, __R); 3238 } 3239 3240 extern __inline __m512 3241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3242 _mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R) 3243 { 3244 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 3245 (__v16sf) __B, 3246 (__v16sf) __C, 3247 (__mmask16) -1, __R); 3248 } 3249 3250 extern __inline __m512 3251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3252 _mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B, 3253 __m512 __C, const int __R) 3254 { 3255 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 3256 (__v16sf) __B, 3257 (__v16sf) __C, 3258 (__mmask16) __U, __R); 3259 } 3260 3261 extern __inline __m512 3262 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3263 _mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, 3264 __mmask16 __U, const int __R) 3265 { 3266 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A, 3267 (__v16sf) __B, 3268 (__v16sf) __C, 3269 (__mmask16) __U, __R); 3270 } 3271 3272 extern __inline __m512 3273 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3274 _mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B, 3275 __m512 __C, const int __R) 3276 { 3277 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, 3278 (__v16sf) __B, 3279 (__v16sf) __C, 3280 (__mmask16) __U, __R); 3281 } 3282 3283 extern __inline __m512d 3284 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3285 _mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R) 3286 { 3287 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 3288 (__v8df) __B, 3289 -(__v8df) __C, 3290 (__mmask8) -1, __R); 3291 } 3292 3293 extern __inline __m512d 3294 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3295 _mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B, 3296 __m512d __C, const int __R) 3297 { 3298 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 3299 (__v8df) __B, 3300 -(__v8df) __C, 3301 (__mmask8) __U, __R); 3302 } 3303 3304 extern __inline __m512d 3305 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3306 _mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, 3307 __mmask8 __U, const int __R) 3308 { 3309 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A, 3310 (__v8df) __B, 3311 (__v8df) __C, 3312 (__mmask8) __U, __R); 3313 } 3314 3315 extern __inline __m512d 3316 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3317 _mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 3318 __m512d __C, const int __R) 3319 { 3320 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, 3321 (__v8df) __B, 3322 -(__v8df) __C, 3323 (__mmask8) __U, __R); 3324 } 3325 3326 extern __inline __m512 3327 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3328 _mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R) 3329 { 3330 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 3331 (__v16sf) __B, 3332 -(__v16sf) __C, 3333 (__mmask16) -1, __R); 3334 } 3335 3336 extern __inline __m512 3337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3338 _mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B, 3339 __m512 __C, const int __R) 3340 { 3341 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 3342 (__v16sf) __B, 3343 -(__v16sf) __C, 3344 (__mmask16) __U, __R); 3345 } 3346 3347 extern __inline __m512 3348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3349 _mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, 3350 __mmask16 __U, const int __R) 3351 { 3352 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A, 3353 (__v16sf) __B, 3354 (__v16sf) __C, 3355 (__mmask16) __U, __R); 3356 } 3357 3358 extern __inline __m512 3359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3360 _mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, 3361 __m512 __C, const int __R) 3362 { 3363 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, 3364 (__v16sf) __B, 3365 -(__v16sf) __C, 3366 (__mmask16) __U, __R); 3367 } 3368 3369 extern __inline __m512d 3370 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3371 _mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R) 3372 { 3373 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 3374 (__v8df) __B, 3375 (__v8df) __C, 3376 (__mmask8) -1, __R); 3377 } 3378 3379 extern __inline __m512d 3380 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3381 _mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B, 3382 __m512d __C, const int __R) 3383 { 3384 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 3385 (__v8df) __B, 3386 (__v8df) __C, 3387 (__mmask8) __U, __R); 3388 } 3389 3390 extern __inline __m512d 3391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3392 _mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, 3393 __mmask8 __U, const int __R) 3394 { 3395 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A, 3396 (__v8df) __B, 3397 (__v8df) __C, 3398 (__mmask8) __U, __R); 3399 } 3400 3401 extern __inline __m512d 3402 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3403 _mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 3404 __m512d __C, const int __R) 3405 { 3406 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, 3407 (__v8df) __B, 3408 (__v8df) __C, 3409 (__mmask8) __U, __R); 3410 } 3411 3412 extern __inline __m512 3413 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3414 _mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R) 3415 { 3416 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 3417 (__v16sf) __B, 3418 (__v16sf) __C, 3419 (__mmask16) -1, __R); 3420 } 3421 3422 extern __inline __m512 3423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3424 _mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B, 3425 __m512 __C, const int __R) 3426 { 3427 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 3428 (__v16sf) __B, 3429 (__v16sf) __C, 3430 (__mmask16) __U, __R); 3431 } 3432 3433 extern __inline __m512 3434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3435 _mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, 3436 __mmask16 __U, const int __R) 3437 { 3438 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A, 3439 (__v16sf) __B, 3440 (__v16sf) __C, 3441 (__mmask16) __U, __R); 3442 } 3443 3444 extern __inline __m512 3445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3446 _mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, 3447 __m512 __C, const int __R) 3448 { 3449 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, 3450 (__v16sf) __B, 3451 (__v16sf) __C, 3452 (__mmask16) __U, __R); 3453 } 3454 3455 extern __inline __m512d 3456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3457 _mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R) 3458 { 3459 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 3460 (__v8df) __B, 3461 -(__v8df) __C, 3462 (__mmask8) -1, __R); 3463 } 3464 3465 extern __inline __m512d 3466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3467 _mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B, 3468 __m512d __C, const int __R) 3469 { 3470 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 3471 (__v8df) __B, 3472 -(__v8df) __C, 3473 (__mmask8) __U, __R); 3474 } 3475 3476 extern __inline __m512d 3477 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3478 _mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, 3479 __mmask8 __U, const int __R) 3480 { 3481 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A, 3482 (__v8df) __B, 3483 (__v8df) __C, 3484 (__mmask8) __U, __R); 3485 } 3486 3487 extern __inline __m512d 3488 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3489 _mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 3490 __m512d __C, const int __R) 3491 { 3492 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, 3493 (__v8df) __B, 3494 -(__v8df) __C, 3495 (__mmask8) __U, __R); 3496 } 3497 3498 extern __inline __m512 3499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3500 _mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R) 3501 { 3502 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 3503 (__v16sf) __B, 3504 -(__v16sf) __C, 3505 (__mmask16) -1, __R); 3506 } 3507 3508 extern __inline __m512 3509 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3510 _mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B, 3511 __m512 __C, const int __R) 3512 { 3513 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 3514 (__v16sf) __B, 3515 -(__v16sf) __C, 3516 (__mmask16) __U, __R); 3517 } 3518 3519 extern __inline __m512 3520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3521 _mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, 3522 __mmask16 __U, const int __R) 3523 { 3524 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A, 3525 (__v16sf) __B, 3526 (__v16sf) __C, 3527 (__mmask16) __U, __R); 3528 } 3529 3530 extern __inline __m512 3531 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3532 _mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B, 3533 __m512 __C, const int __R) 3534 { 3535 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, 3536 (__v16sf) __B, 3537 -(__v16sf) __C, 3538 (__mmask16) __U, __R); 3539 } 3540 3541 extern __inline __m512d 3542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3543 _mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R) 3544 { 3545 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A, 3546 (__v8df) __B, 3547 (__v8df) __C, 3548 (__mmask8) -1, __R); 3549 } 3550 3551 extern __inline __m512d 3552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3553 _mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B, 3554 __m512d __C, const int __R) 3555 { 3556 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A, 3557 (__v8df) __B, 3558 (__v8df) __C, 3559 (__mmask8) __U, __R); 3560 } 3561 3562 extern __inline __m512d 3563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3564 _mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, 3565 __mmask8 __U, const int __R) 3566 { 3567 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A, 3568 (__v8df) __B, 3569 (__v8df) __C, 3570 (__mmask8) __U, __R); 3571 } 3572 3573 extern __inline __m512d 3574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3575 _mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 3576 __m512d __C, const int __R) 3577 { 3578 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, 3579 (__v8df) __B, 3580 (__v8df) __C, 3581 (__mmask8) __U, __R); 3582 } 3583 3584 extern __inline __m512 3585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3586 _mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R) 3587 { 3588 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A, 3589 (__v16sf) __B, 3590 (__v16sf) __C, 3591 (__mmask16) -1, __R); 3592 } 3593 3594 extern __inline __m512 3595 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3596 _mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B, 3597 __m512 __C, const int __R) 3598 { 3599 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A, 3600 (__v16sf) __B, 3601 (__v16sf) __C, 3602 (__mmask16) __U, __R); 3603 } 3604 3605 extern __inline __m512 3606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3607 _mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, 3608 __mmask16 __U, const int __R) 3609 { 3610 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A, 3611 (__v16sf) __B, 3612 (__v16sf) __C, 3613 (__mmask16) __U, __R); 3614 } 3615 3616 extern __inline __m512 3617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3618 _mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B, 3619 __m512 __C, const int __R) 3620 { 3621 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, 3622 (__v16sf) __B, 3623 (__v16sf) __C, 3624 (__mmask16) __U, __R); 3625 } 3626 3627 extern __inline __m512d 3628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3629 _mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R) 3630 { 3631 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A, 3632 (__v8df) __B, 3633 -(__v8df) __C, 3634 (__mmask8) -1, __R); 3635 } 3636 3637 extern __inline __m512d 3638 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3639 _mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B, 3640 __m512d __C, const int __R) 3641 { 3642 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A, 3643 (__v8df) __B, 3644 (__v8df) __C, 3645 (__mmask8) __U, __R); 3646 } 3647 3648 extern __inline __m512d 3649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3650 _mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, 3651 __mmask8 __U, const int __R) 3652 { 3653 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A, 3654 (__v8df) __B, 3655 (__v8df) __C, 3656 (__mmask8) __U, __R); 3657 } 3658 3659 extern __inline __m512d 3660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3661 _mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 3662 __m512d __C, const int __R) 3663 { 3664 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, 3665 (__v8df) __B, 3666 -(__v8df) __C, 3667 (__mmask8) __U, __R); 3668 } 3669 3670 extern __inline __m512 3671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3672 _mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R) 3673 { 3674 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A, 3675 (__v16sf) __B, 3676 -(__v16sf) __C, 3677 (__mmask16) -1, __R); 3678 } 3679 3680 extern __inline __m512 3681 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3682 _mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B, 3683 __m512 __C, const int __R) 3684 { 3685 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A, 3686 (__v16sf) __B, 3687 (__v16sf) __C, 3688 (__mmask16) __U, __R); 3689 } 3690 3691 extern __inline __m512 3692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3693 _mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, 3694 __mmask16 __U, const int __R) 3695 { 3696 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A, 3697 (__v16sf) __B, 3698 (__v16sf) __C, 3699 (__mmask16) __U, __R); 3700 } 3701 3702 extern __inline __m512 3703 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3704 _mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, 3705 __m512 __C, const int __R) 3706 { 3707 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, 3708 (__v16sf) __B, 3709 -(__v16sf) __C, 3710 (__mmask16) __U, __R); 3711 } 3712 #else 3713 #define _mm512_fmadd_round_pd(A, B, C, R) \ 3714 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, -1, R) 3715 3716 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \ 3717 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R) 3718 3719 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \ 3720 (__m512d)__builtin_ia32_vfmaddpd512_mask3(A, B, C, U, R) 3721 3722 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \ 3723 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, C, U, R) 3724 3725 #define _mm512_fmadd_round_ps(A, B, C, R) \ 3726 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, -1, R) 3727 3728 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \ 3729 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, U, R) 3730 3731 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \ 3732 (__m512)__builtin_ia32_vfmaddps512_mask3(A, B, C, U, R) 3733 3734 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \ 3735 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R) 3736 3737 #define _mm512_fmsub_round_pd(A, B, C, R) \ 3738 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), -1, R) 3739 3740 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \ 3741 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), U, R) 3742 3743 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \ 3744 (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R) 3745 3746 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \ 3747 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, -(C), U, R) 3748 3749 #define _mm512_fmsub_round_ps(A, B, C, R) \ 3750 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), -1, R) 3751 3752 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \ 3753 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), U, R) 3754 3755 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \ 3756 (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R) 3757 3758 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \ 3759 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, -(C), U, R) 3760 3761 #define _mm512_fmaddsub_round_pd(A, B, C, R) \ 3762 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R) 3763 3764 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \ 3765 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, U, R) 3766 3767 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \ 3768 (__m512d)__builtin_ia32_vfmaddsubpd512_mask3(A, B, C, U, R) 3769 3770 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \ 3771 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, C, U, R) 3772 3773 #define _mm512_fmaddsub_round_ps(A, B, C, R) \ 3774 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, -1, R) 3775 3776 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \ 3777 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, U, R) 3778 3779 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \ 3780 (__m512)__builtin_ia32_vfmaddsubps512_mask3(A, B, C, U, R) 3781 3782 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \ 3783 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, C, U, R) 3784 3785 #define _mm512_fmsubadd_round_pd(A, B, C, R) \ 3786 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), -1, R) 3787 3788 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \ 3789 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), U, R) 3790 3791 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \ 3792 (__m512d)__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, U, R) 3793 3794 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \ 3795 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, -(C), U, R) 3796 3797 #define _mm512_fmsubadd_round_ps(A, B, C, R) \ 3798 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), -1, R) 3799 3800 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \ 3801 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), U, R) 3802 3803 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \ 3804 (__m512)__builtin_ia32_vfmsubaddps512_mask3(A, B, C, U, R) 3805 3806 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \ 3807 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R) 3808 3809 #define _mm512_fnmadd_round_pd(A, B, C, R) \ 3810 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, C, -1, R) 3811 3812 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \ 3813 (__m512d)__builtin_ia32_vfnmaddpd512_mask(-(A), B, C, U, R) 3814 3815 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \ 3816 (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(A), B, C, U, R) 3817 3818 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \ 3819 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, C, U, R) 3820 3821 #define _mm512_fnmadd_round_ps(A, B, C, R) \ 3822 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, C, -1, R) 3823 3824 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \ 3825 (__m512)__builtin_ia32_vfnmaddps512_mask(-(A), B, C, U, R) 3826 3827 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \ 3828 (__m512)__builtin_ia32_vfmaddps512_mask3(-(A), B, C, U, R) 3829 3830 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \ 3831 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, C, U, R) 3832 3833 #define _mm512_fnmsub_round_pd(A, B, C, R) \ 3834 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, -(C), -1, R) 3835 3836 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \ 3837 (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R) 3838 3839 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \ 3840 (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R) 3841 3842 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \ 3843 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, -(C), U, R) 3844 3845 #define _mm512_fnmsub_round_ps(A, B, C, R) \ 3846 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, -(C), -1, R) 3847 3848 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \ 3849 (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R) 3850 3851 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \ 3852 (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R) 3853 3854 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \ 3855 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, -(C), U, R) 3856 #endif 3857 3858 extern __inline __m512i 3859 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3860 _mm512_abs_epi64 (__m512i __A) 3861 { 3862 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A, 3863 (__v8di) 3864 _mm512_undefined_epi32 (), 3865 (__mmask8) -1); 3866 } 3867 3868 extern __inline __m512i 3869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3870 _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A) 3871 { 3872 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A, 3873 (__v8di) __W, 3874 (__mmask8) __U); 3875 } 3876 3877 extern __inline __m512i 3878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3879 _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A) 3880 { 3881 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A, 3882 (__v8di) 3883 _mm512_setzero_si512 (), 3884 (__mmask8) __U); 3885 } 3886 3887 extern __inline __m512i 3888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3889 _mm512_abs_epi32 (__m512i __A) 3890 { 3891 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A, 3892 (__v16si) 3893 _mm512_undefined_epi32 (), 3894 (__mmask16) -1); 3895 } 3896 3897 extern __inline __m512i 3898 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3899 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A) 3900 { 3901 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A, 3902 (__v16si) __W, 3903 (__mmask16) __U); 3904 } 3905 3906 extern __inline __m512i 3907 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3908 _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A) 3909 { 3910 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A, 3911 (__v16si) 3912 _mm512_setzero_si512 (), 3913 (__mmask16) __U); 3914 } 3915 3916 extern __inline __m512 3917 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3918 _mm512_broadcastss_ps (__m128 __A) 3919 { 3920 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A, 3921 (__v16sf) 3922 _mm512_undefined_ps (), 3923 (__mmask16) -1); 3924 } 3925 3926 extern __inline __m512 3927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3928 _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A) 3929 { 3930 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A, 3931 (__v16sf) __O, __M); 3932 } 3933 3934 extern __inline __m512 3935 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3936 _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A) 3937 { 3938 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A, 3939 (__v16sf) 3940 _mm512_setzero_ps (), 3941 __M); 3942 } 3943 3944 extern __inline __m512d 3945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3946 _mm512_broadcastsd_pd (__m128d __A) 3947 { 3948 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A, 3949 (__v8df) 3950 _mm512_undefined_pd (), 3951 (__mmask8) -1); 3952 } 3953 3954 extern __inline __m512d 3955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3956 _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A) 3957 { 3958 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A, 3959 (__v8df) __O, __M); 3960 } 3961 3962 extern __inline __m512d 3963 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3964 _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A) 3965 { 3966 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A, 3967 (__v8df) 3968 _mm512_setzero_pd (), 3969 __M); 3970 } 3971 3972 extern __inline __m512i 3973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3974 _mm512_broadcastd_epi32 (__m128i __A) 3975 { 3976 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A, 3977 (__v16si) 3978 _mm512_undefined_epi32 (), 3979 (__mmask16) -1); 3980 } 3981 3982 extern __inline __m512i 3983 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3984 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A) 3985 { 3986 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A, 3987 (__v16si) __O, __M); 3988 } 3989 3990 extern __inline __m512i 3991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3992 _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A) 3993 { 3994 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A, 3995 (__v16si) 3996 _mm512_setzero_si512 (), 3997 __M); 3998 } 3999 4000 extern __inline __m512i 4001 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4002 _mm512_set1_epi32 (int __A) 4003 { 4004 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, 4005 (__v16si) 4006 _mm512_undefined_epi32 (), 4007 (__mmask16)(-1)); 4008 } 4009 4010 extern __inline __m512i 4011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4012 _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A) 4013 { 4014 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O, 4015 __M); 4016 } 4017 4018 extern __inline __m512i 4019 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4020 _mm512_maskz_set1_epi32 (__mmask16 __M, int __A) 4021 { 4022 return (__m512i) 4023 __builtin_ia32_pbroadcastd512_gpr_mask (__A, 4024 (__v16si) _mm512_setzero_si512 (), 4025 __M); 4026 } 4027 4028 extern __inline __m512i 4029 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4030 _mm512_broadcastq_epi64 (__m128i __A) 4031 { 4032 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A, 4033 (__v8di) 4034 _mm512_undefined_epi32 (), 4035 (__mmask8) -1); 4036 } 4037 4038 extern __inline __m512i 4039 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4040 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A) 4041 { 4042 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A, 4043 (__v8di) __O, __M); 4044 } 4045 4046 extern __inline __m512i 4047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4048 _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) 4049 { 4050 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A, 4051 (__v8di) 4052 _mm512_setzero_si512 (), 4053 __M); 4054 } 4055 4056 extern __inline __m512i 4057 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4058 _mm512_set1_epi64 (long long __A) 4059 { 4060 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, 4061 (__v8di) 4062 _mm512_undefined_epi32 (), 4063 (__mmask8)(-1)); 4064 } 4065 4066 extern __inline __m512i 4067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4068 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A) 4069 { 4070 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O, 4071 __M); 4072 } 4073 4074 extern __inline __m512i 4075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4076 _mm512_maskz_set1_epi64 (__mmask8 __M, long long __A) 4077 { 4078 return (__m512i) 4079 __builtin_ia32_pbroadcastq512_gpr_mask (__A, 4080 (__v8di) _mm512_setzero_si512 (), 4081 __M); 4082 } 4083 4084 extern __inline __m512 4085 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4086 _mm512_broadcast_f32x4 (__m128 __A) 4087 { 4088 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A, 4089 (__v16sf) 4090 _mm512_undefined_ps (), 4091 (__mmask16) -1); 4092 } 4093 4094 extern __inline __m512 4095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4096 _mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A) 4097 { 4098 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A, 4099 (__v16sf) __O, 4100 __M); 4101 } 4102 4103 extern __inline __m512 4104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4105 _mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A) 4106 { 4107 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A, 4108 (__v16sf) 4109 _mm512_setzero_ps (), 4110 __M); 4111 } 4112 4113 extern __inline __m512i 4114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4115 _mm512_broadcast_i32x4 (__m128i __A) 4116 { 4117 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A, 4118 (__v16si) 4119 _mm512_undefined_epi32 (), 4120 (__mmask16) -1); 4121 } 4122 4123 extern __inline __m512i 4124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4125 _mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A) 4126 { 4127 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A, 4128 (__v16si) __O, 4129 __M); 4130 } 4131 4132 extern __inline __m512i 4133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4134 _mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A) 4135 { 4136 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A, 4137 (__v16si) 4138 _mm512_setzero_si512 (), 4139 __M); 4140 } 4141 4142 extern __inline __m512d 4143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4144 _mm512_broadcast_f64x4 (__m256d __A) 4145 { 4146 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A, 4147 (__v8df) 4148 _mm512_undefined_pd (), 4149 (__mmask8) -1); 4150 } 4151 4152 extern __inline __m512d 4153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4154 _mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A) 4155 { 4156 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A, 4157 (__v8df) __O, 4158 __M); 4159 } 4160 4161 extern __inline __m512d 4162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4163 _mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A) 4164 { 4165 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A, 4166 (__v8df) 4167 _mm512_setzero_pd (), 4168 __M); 4169 } 4170 4171 extern __inline __m512i 4172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4173 _mm512_broadcast_i64x4 (__m256i __A) 4174 { 4175 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A, 4176 (__v8di) 4177 _mm512_undefined_epi32 (), 4178 (__mmask8) -1); 4179 } 4180 4181 extern __inline __m512i 4182 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4183 _mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A) 4184 { 4185 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A, 4186 (__v8di) __O, 4187 __M); 4188 } 4189 4190 extern __inline __m512i 4191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4192 _mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A) 4193 { 4194 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A, 4195 (__v8di) 4196 _mm512_setzero_si512 (), 4197 __M); 4198 } 4199 4200 typedef enum 4201 { 4202 _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02, 4203 _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05, 4204 _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08, 4205 _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B, 4206 _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E, 4207 _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11, 4208 _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14, 4209 _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17, 4210 _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A, 4211 _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D, 4212 _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20, 4213 _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23, 4214 _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26, 4215 _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29, 4216 _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C, 4217 _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F, 4218 _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32, 4219 _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35, 4220 _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38, 4221 _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B, 4222 _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E, 4223 _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41, 4224 _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44, 4225 _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47, 4226 _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A, 4227 _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D, 4228 _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50, 4229 _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53, 4230 _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56, 4231 _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59, 4232 _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C, 4233 _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F, 4234 _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62, 4235 _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65, 4236 _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68, 4237 _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B, 4238 _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E, 4239 _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71, 4240 _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74, 4241 _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77, 4242 _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A, 4243 _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D, 4244 _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80, 4245 _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83, 4246 _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86, 4247 _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89, 4248 _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C, 4249 _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F, 4250 _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92, 4251 _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95, 4252 _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98, 4253 _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B, 4254 _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E, 4255 _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1, 4256 _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4, 4257 _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7, 4258 _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA, 4259 _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD, 4260 _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0, 4261 _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3, 4262 _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6, 4263 _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9, 4264 _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC, 4265 _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF, 4266 _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2, 4267 _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5, 4268 _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8, 4269 _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB, 4270 _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE, 4271 _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1, 4272 _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4, 4273 _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7, 4274 _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA, 4275 _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD, 4276 _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0, 4277 _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3, 4278 _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6, 4279 _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9, 4280 _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC, 4281 _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF, 4282 _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2, 4283 _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5, 4284 _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8, 4285 _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB, 4286 _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE, 4287 _MM_PERM_DDDD = 0xFF 4288 } _MM_PERM_ENUM; 4289 4290 #ifdef __OPTIMIZE__ 4291 extern __inline __m512i 4292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4293 _mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask) 4294 { 4295 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A, 4296 __mask, 4297 (__v16si) 4298 _mm512_undefined_epi32 (), 4299 (__mmask16) -1); 4300 } 4301 4302 extern __inline __m512i 4303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4304 _mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A, 4305 _MM_PERM_ENUM __mask) 4306 { 4307 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A, 4308 __mask, 4309 (__v16si) __W, 4310 (__mmask16) __U); 4311 } 4312 4313 extern __inline __m512i 4314 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4315 _mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A, _MM_PERM_ENUM __mask) 4316 { 4317 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A, 4318 __mask, 4319 (__v16si) 4320 _mm512_setzero_si512 (), 4321 (__mmask16) __U); 4322 } 4323 4324 extern __inline __m512i 4325 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4326 _mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm) 4327 { 4328 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A, 4329 (__v8di) __B, __imm, 4330 (__v8di) 4331 _mm512_undefined_epi32 (), 4332 (__mmask8) -1); 4333 } 4334 4335 extern __inline __m512i 4336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4337 _mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A, 4338 __m512i __B, const int __imm) 4339 { 4340 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A, 4341 (__v8di) __B, __imm, 4342 (__v8di) __W, 4343 (__mmask8) __U); 4344 } 4345 4346 extern __inline __m512i 4347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4348 _mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B, 4349 const int __imm) 4350 { 4351 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A, 4352 (__v8di) __B, __imm, 4353 (__v8di) 4354 _mm512_setzero_si512 (), 4355 (__mmask8) __U); 4356 } 4357 4358 extern __inline __m512i 4359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4360 _mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm) 4361 { 4362 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A, 4363 (__v16si) __B, 4364 __imm, 4365 (__v16si) 4366 _mm512_undefined_epi32 (), 4367 (__mmask16) -1); 4368 } 4369 4370 extern __inline __m512i 4371 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4372 _mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A, 4373 __m512i __B, const int __imm) 4374 { 4375 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A, 4376 (__v16si) __B, 4377 __imm, 4378 (__v16si) __W, 4379 (__mmask16) __U); 4380 } 4381 4382 extern __inline __m512i 4383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4384 _mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B, 4385 const int __imm) 4386 { 4387 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A, 4388 (__v16si) __B, 4389 __imm, 4390 (__v16si) 4391 _mm512_setzero_si512 (), 4392 (__mmask16) __U); 4393 } 4394 4395 extern __inline __m512d 4396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4397 _mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm) 4398 { 4399 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A, 4400 (__v8df) __B, __imm, 4401 (__v8df) 4402 _mm512_undefined_pd (), 4403 (__mmask8) -1); 4404 } 4405 4406 extern __inline __m512d 4407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4408 _mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A, 4409 __m512d __B, const int __imm) 4410 { 4411 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A, 4412 (__v8df) __B, __imm, 4413 (__v8df) __W, 4414 (__mmask8) __U); 4415 } 4416 4417 extern __inline __m512d 4418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4419 _mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B, 4420 const int __imm) 4421 { 4422 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A, 4423 (__v8df) __B, __imm, 4424 (__v8df) 4425 _mm512_setzero_pd (), 4426 (__mmask8) __U); 4427 } 4428 4429 extern __inline __m512 4430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4431 _mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm) 4432 { 4433 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A, 4434 (__v16sf) __B, __imm, 4435 (__v16sf) 4436 _mm512_undefined_ps (), 4437 (__mmask16) -1); 4438 } 4439 4440 extern __inline __m512 4441 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4442 _mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A, 4443 __m512 __B, const int __imm) 4444 { 4445 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A, 4446 (__v16sf) __B, __imm, 4447 (__v16sf) __W, 4448 (__mmask16) __U); 4449 } 4450 4451 extern __inline __m512 4452 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4453 _mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B, 4454 const int __imm) 4455 { 4456 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A, 4457 (__v16sf) __B, __imm, 4458 (__v16sf) 4459 _mm512_setzero_ps (), 4460 (__mmask16) __U); 4461 } 4462 4463 #else 4464 #define _mm512_shuffle_epi32(X, C) \ 4465 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\ 4466 (__v16si)(__m512i)_mm512_undefined_epi32 (),\ 4467 (__mmask16)-1)) 4468 4469 #define _mm512_mask_shuffle_epi32(W, U, X, C) \ 4470 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\ 4471 (__v16si)(__m512i)(W),\ 4472 (__mmask16)(U))) 4473 4474 #define _mm512_maskz_shuffle_epi32(U, X, C) \ 4475 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\ 4476 (__v16si)(__m512i)_mm512_setzero_si512 (),\ 4477 (__mmask16)(U))) 4478 4479 #define _mm512_shuffle_i64x2(X, Y, C) \ 4480 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \ 4481 (__v8di)(__m512i)(Y), (int)(C),\ 4482 (__v8di)(__m512i)_mm512_undefined_epi32 (),\ 4483 (__mmask8)-1)) 4484 4485 #define _mm512_mask_shuffle_i64x2(W, U, X, Y, C) \ 4486 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \ 4487 (__v8di)(__m512i)(Y), (int)(C),\ 4488 (__v8di)(__m512i)(W),\ 4489 (__mmask8)(U))) 4490 4491 #define _mm512_maskz_shuffle_i64x2(U, X, Y, C) \ 4492 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \ 4493 (__v8di)(__m512i)(Y), (int)(C),\ 4494 (__v8di)(__m512i)_mm512_setzero_si512 (),\ 4495 (__mmask8)(U))) 4496 4497 #define _mm512_shuffle_i32x4(X, Y, C) \ 4498 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \ 4499 (__v16si)(__m512i)(Y), (int)(C),\ 4500 (__v16si)(__m512i)_mm512_undefined_epi32 (),\ 4501 (__mmask16)-1)) 4502 4503 #define _mm512_mask_shuffle_i32x4(W, U, X, Y, C) \ 4504 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \ 4505 (__v16si)(__m512i)(Y), (int)(C),\ 4506 (__v16si)(__m512i)(W),\ 4507 (__mmask16)(U))) 4508 4509 #define _mm512_maskz_shuffle_i32x4(U, X, Y, C) \ 4510 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \ 4511 (__v16si)(__m512i)(Y), (int)(C),\ 4512 (__v16si)(__m512i)_mm512_setzero_si512 (),\ 4513 (__mmask16)(U))) 4514 4515 #define _mm512_shuffle_f64x2(X, Y, C) \ 4516 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \ 4517 (__v8df)(__m512d)(Y), (int)(C),\ 4518 (__v8df)(__m512d)_mm512_undefined_pd(),\ 4519 (__mmask8)-1)) 4520 4521 #define _mm512_mask_shuffle_f64x2(W, U, X, Y, C) \ 4522 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \ 4523 (__v8df)(__m512d)(Y), (int)(C),\ 4524 (__v8df)(__m512d)(W),\ 4525 (__mmask8)(U))) 4526 4527 #define _mm512_maskz_shuffle_f64x2(U, X, Y, C) \ 4528 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \ 4529 (__v8df)(__m512d)(Y), (int)(C),\ 4530 (__v8df)(__m512d)_mm512_setzero_pd(),\ 4531 (__mmask8)(U))) 4532 4533 #define _mm512_shuffle_f32x4(X, Y, C) \ 4534 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \ 4535 (__v16sf)(__m512)(Y), (int)(C),\ 4536 (__v16sf)(__m512)_mm512_undefined_ps(),\ 4537 (__mmask16)-1)) 4538 4539 #define _mm512_mask_shuffle_f32x4(W, U, X, Y, C) \ 4540 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \ 4541 (__v16sf)(__m512)(Y), (int)(C),\ 4542 (__v16sf)(__m512)(W),\ 4543 (__mmask16)(U))) 4544 4545 #define _mm512_maskz_shuffle_f32x4(U, X, Y, C) \ 4546 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \ 4547 (__v16sf)(__m512)(Y), (int)(C),\ 4548 (__v16sf)(__m512)_mm512_setzero_ps(),\ 4549 (__mmask16)(U))) 4550 #endif 4551 4552 extern __inline __m512i 4553 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4554 _mm512_rolv_epi32 (__m512i __A, __m512i __B) 4555 { 4556 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A, 4557 (__v16si) __B, 4558 (__v16si) 4559 _mm512_undefined_epi32 (), 4560 (__mmask16) -1); 4561 } 4562 4563 extern __inline __m512i 4564 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4565 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 4566 { 4567 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A, 4568 (__v16si) __B, 4569 (__v16si) __W, 4570 (__mmask16) __U); 4571 } 4572 4573 extern __inline __m512i 4574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4575 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 4576 { 4577 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A, 4578 (__v16si) __B, 4579 (__v16si) 4580 _mm512_setzero_si512 (), 4581 (__mmask16) __U); 4582 } 4583 4584 extern __inline __m512i 4585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4586 _mm512_rorv_epi32 (__m512i __A, __m512i __B) 4587 { 4588 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A, 4589 (__v16si) __B, 4590 (__v16si) 4591 _mm512_undefined_epi32 (), 4592 (__mmask16) -1); 4593 } 4594 4595 extern __inline __m512i 4596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4597 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 4598 { 4599 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A, 4600 (__v16si) __B, 4601 (__v16si) __W, 4602 (__mmask16) __U); 4603 } 4604 4605 extern __inline __m512i 4606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4607 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 4608 { 4609 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A, 4610 (__v16si) __B, 4611 (__v16si) 4612 _mm512_setzero_si512 (), 4613 (__mmask16) __U); 4614 } 4615 4616 extern __inline __m512i 4617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4618 _mm512_rolv_epi64 (__m512i __A, __m512i __B) 4619 { 4620 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A, 4621 (__v8di) __B, 4622 (__v8di) 4623 _mm512_undefined_epi32 (), 4624 (__mmask8) -1); 4625 } 4626 4627 extern __inline __m512i 4628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4629 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 4630 { 4631 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A, 4632 (__v8di) __B, 4633 (__v8di) __W, 4634 (__mmask8) __U); 4635 } 4636 4637 extern __inline __m512i 4638 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4639 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 4640 { 4641 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A, 4642 (__v8di) __B, 4643 (__v8di) 4644 _mm512_setzero_si512 (), 4645 (__mmask8) __U); 4646 } 4647 4648 extern __inline __m512i 4649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4650 _mm512_rorv_epi64 (__m512i __A, __m512i __B) 4651 { 4652 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A, 4653 (__v8di) __B, 4654 (__v8di) 4655 _mm512_undefined_epi32 (), 4656 (__mmask8) -1); 4657 } 4658 4659 extern __inline __m512i 4660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4661 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 4662 { 4663 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A, 4664 (__v8di) __B, 4665 (__v8di) __W, 4666 (__mmask8) __U); 4667 } 4668 4669 extern __inline __m512i 4670 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4671 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 4672 { 4673 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A, 4674 (__v8di) __B, 4675 (__v8di) 4676 _mm512_setzero_si512 (), 4677 (__mmask8) __U); 4678 } 4679 4680 #ifdef __OPTIMIZE__ 4681 extern __inline __m256i 4682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4683 _mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R) 4684 { 4685 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, 4686 (__v8si) 4687 _mm256_undefined_si256 (), 4688 (__mmask8) -1, __R); 4689 } 4690 4691 extern __inline __m256i 4692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4693 _mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A, 4694 const int __R) 4695 { 4696 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, 4697 (__v8si) __W, 4698 (__mmask8) __U, __R); 4699 } 4700 4701 extern __inline __m256i 4702 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4703 _mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R) 4704 { 4705 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, 4706 (__v8si) 4707 _mm256_setzero_si256 (), 4708 (__mmask8) __U, __R); 4709 } 4710 4711 extern __inline __m256i 4712 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4713 _mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R) 4714 { 4715 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, 4716 (__v8si) 4717 _mm256_undefined_si256 (), 4718 (__mmask8) -1, __R); 4719 } 4720 4721 extern __inline __m256i 4722 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4723 _mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A, 4724 const int __R) 4725 { 4726 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, 4727 (__v8si) __W, 4728 (__mmask8) __U, __R); 4729 } 4730 4731 extern __inline __m256i 4732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4733 _mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R) 4734 { 4735 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, 4736 (__v8si) 4737 _mm256_setzero_si256 (), 4738 (__mmask8) __U, __R); 4739 } 4740 #else 4741 #define _mm512_cvtt_roundpd_epi32(A, B) \ 4742 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B)) 4743 4744 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B) \ 4745 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B)) 4746 4747 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, B) \ 4748 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B)) 4749 4750 #define _mm512_cvtt_roundpd_epu32(A, B) \ 4751 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B)) 4752 4753 #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B) \ 4754 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B)) 4755 4756 #define _mm512_maskz_cvtt_roundpd_epu32(U, A, B) \ 4757 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B)) 4758 #endif 4759 4760 #ifdef __OPTIMIZE__ 4761 extern __inline __m256i 4762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4763 _mm512_cvt_roundpd_epi32 (__m512d __A, const int __R) 4764 { 4765 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, 4766 (__v8si) 4767 _mm256_undefined_si256 (), 4768 (__mmask8) -1, __R); 4769 } 4770 4771 extern __inline __m256i 4772 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4773 _mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A, 4774 const int __R) 4775 { 4776 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, 4777 (__v8si) __W, 4778 (__mmask8) __U, __R); 4779 } 4780 4781 extern __inline __m256i 4782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4783 _mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R) 4784 { 4785 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, 4786 (__v8si) 4787 _mm256_setzero_si256 (), 4788 (__mmask8) __U, __R); 4789 } 4790 4791 extern __inline __m256i 4792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4793 _mm512_cvt_roundpd_epu32 (__m512d __A, const int __R) 4794 { 4795 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, 4796 (__v8si) 4797 _mm256_undefined_si256 (), 4798 (__mmask8) -1, __R); 4799 } 4800 4801 extern __inline __m256i 4802 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4803 _mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A, 4804 const int __R) 4805 { 4806 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, 4807 (__v8si) __W, 4808 (__mmask8) __U, __R); 4809 } 4810 4811 extern __inline __m256i 4812 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4813 _mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R) 4814 { 4815 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, 4816 (__v8si) 4817 _mm256_setzero_si256 (), 4818 (__mmask8) __U, __R); 4819 } 4820 #else 4821 #define _mm512_cvt_roundpd_epi32(A, B) \ 4822 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B)) 4823 4824 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, B) \ 4825 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B)) 4826 4827 #define _mm512_maskz_cvt_roundpd_epi32(U, A, B) \ 4828 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B)) 4829 4830 #define _mm512_cvt_roundpd_epu32(A, B) \ 4831 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B)) 4832 4833 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, B) \ 4834 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B)) 4835 4836 #define _mm512_maskz_cvt_roundpd_epu32(U, A, B) \ 4837 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B)) 4838 #endif 4839 4840 #ifdef __OPTIMIZE__ 4841 extern __inline __m512i 4842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4843 _mm512_cvtt_roundps_epi32 (__m512 __A, const int __R) 4844 { 4845 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, 4846 (__v16si) 4847 _mm512_undefined_epi32 (), 4848 (__mmask16) -1, __R); 4849 } 4850 4851 extern __inline __m512i 4852 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4853 _mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A, 4854 const int __R) 4855 { 4856 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, 4857 (__v16si) __W, 4858 (__mmask16) __U, __R); 4859 } 4860 4861 extern __inline __m512i 4862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4863 _mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R) 4864 { 4865 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, 4866 (__v16si) 4867 _mm512_setzero_si512 (), 4868 (__mmask16) __U, __R); 4869 } 4870 4871 extern __inline __m512i 4872 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4873 _mm512_cvtt_roundps_epu32 (__m512 __A, const int __R) 4874 { 4875 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 4876 (__v16si) 4877 _mm512_undefined_epi32 (), 4878 (__mmask16) -1, __R); 4879 } 4880 4881 extern __inline __m512i 4882 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4883 _mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A, 4884 const int __R) 4885 { 4886 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 4887 (__v16si) __W, 4888 (__mmask16) __U, __R); 4889 } 4890 4891 extern __inline __m512i 4892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4893 _mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R) 4894 { 4895 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 4896 (__v16si) 4897 _mm512_setzero_si512 (), 4898 (__mmask16) __U, __R); 4899 } 4900 #else 4901 #define _mm512_cvtt_roundps_epi32(A, B) \ 4902 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B)) 4903 4904 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, B) \ 4905 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B)) 4906 4907 #define _mm512_maskz_cvtt_roundps_epi32(U, A, B) \ 4908 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B)) 4909 4910 #define _mm512_cvtt_roundps_epu32(A, B) \ 4911 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B)) 4912 4913 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, B) \ 4914 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B)) 4915 4916 #define _mm512_maskz_cvtt_roundps_epu32(U, A, B) \ 4917 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B)) 4918 #endif 4919 4920 #ifdef __OPTIMIZE__ 4921 extern __inline __m512i 4922 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4923 _mm512_cvt_roundps_epi32 (__m512 __A, const int __R) 4924 { 4925 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, 4926 (__v16si) 4927 _mm512_undefined_epi32 (), 4928 (__mmask16) -1, __R); 4929 } 4930 4931 extern __inline __m512i 4932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4933 _mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A, 4934 const int __R) 4935 { 4936 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, 4937 (__v16si) __W, 4938 (__mmask16) __U, __R); 4939 } 4940 4941 extern __inline __m512i 4942 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4943 _mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R) 4944 { 4945 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, 4946 (__v16si) 4947 _mm512_setzero_si512 (), 4948 (__mmask16) __U, __R); 4949 } 4950 4951 extern __inline __m512i 4952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4953 _mm512_cvt_roundps_epu32 (__m512 __A, const int __R) 4954 { 4955 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, 4956 (__v16si) 4957 _mm512_undefined_epi32 (), 4958 (__mmask16) -1, __R); 4959 } 4960 4961 extern __inline __m512i 4962 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4963 _mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A, 4964 const int __R) 4965 { 4966 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, 4967 (__v16si) __W, 4968 (__mmask16) __U, __R); 4969 } 4970 4971 extern __inline __m512i 4972 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4973 _mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R) 4974 { 4975 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, 4976 (__v16si) 4977 _mm512_setzero_si512 (), 4978 (__mmask16) __U, __R); 4979 } 4980 #else 4981 #define _mm512_cvt_roundps_epi32(A, B) \ 4982 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B)) 4983 4984 #define _mm512_mask_cvt_roundps_epi32(W, U, A, B) \ 4985 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B)) 4986 4987 #define _mm512_maskz_cvt_roundps_epi32(U, A, B) \ 4988 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B)) 4989 4990 #define _mm512_cvt_roundps_epu32(A, B) \ 4991 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B)) 4992 4993 #define _mm512_mask_cvt_roundps_epu32(W, U, A, B) \ 4994 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B)) 4995 4996 #define _mm512_maskz_cvt_roundps_epu32(U, A, B) \ 4997 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B)) 4998 #endif 4999 5000 extern __inline __m128d 5001 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5002 _mm_cvtu32_sd (__m128d __A, unsigned __B) 5003 { 5004 return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B); 5005 } 5006 5007 #ifdef __x86_64__ 5008 #ifdef __OPTIMIZE__ 5009 extern __inline __m128d 5010 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5011 _mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R) 5012 { 5013 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R); 5014 } 5015 5016 extern __inline __m128d 5017 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5018 _mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R) 5019 { 5020 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R); 5021 } 5022 5023 extern __inline __m128d 5024 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5025 _mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R) 5026 { 5027 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R); 5028 } 5029 #else 5030 #define _mm_cvt_roundu64_sd(A, B, C) \ 5031 (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C) 5032 5033 #define _mm_cvt_roundi64_sd(A, B, C) \ 5034 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C) 5035 5036 #define _mm_cvt_roundsi64_sd(A, B, C) \ 5037 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C) 5038 #endif 5039 5040 #endif 5041 5042 #ifdef __OPTIMIZE__ 5043 extern __inline __m128 5044 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5045 _mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R) 5046 { 5047 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R); 5048 } 5049 5050 extern __inline __m128 5051 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5052 _mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R) 5053 { 5054 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R); 5055 } 5056 5057 extern __inline __m128 5058 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5059 _mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R) 5060 { 5061 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R); 5062 } 5063 #else 5064 #define _mm_cvt_roundu32_ss(A, B, C) \ 5065 (__m128)__builtin_ia32_cvtusi2ss32(A, B, C) 5066 5067 #define _mm_cvt_roundi32_ss(A, B, C) \ 5068 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C) 5069 5070 #define _mm_cvt_roundsi32_ss(A, B, C) \ 5071 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C) 5072 #endif 5073 5074 #ifdef __x86_64__ 5075 #ifdef __OPTIMIZE__ 5076 extern __inline __m128 5077 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5078 _mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R) 5079 { 5080 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R); 5081 } 5082 5083 extern __inline __m128 5084 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5085 _mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R) 5086 { 5087 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R); 5088 } 5089 5090 extern __inline __m128 5091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5092 _mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R) 5093 { 5094 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R); 5095 } 5096 #else 5097 #define _mm_cvt_roundu64_ss(A, B, C) \ 5098 (__m128)__builtin_ia32_cvtusi2ss64(A, B, C) 5099 5100 #define _mm_cvt_roundi64_ss(A, B, C) \ 5101 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C) 5102 5103 #define _mm_cvt_roundsi64_ss(A, B, C) \ 5104 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C) 5105 #endif 5106 5107 #endif 5108 5109 extern __inline __m128i 5110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5111 _mm512_cvtepi32_epi8 (__m512i __A) 5112 { 5113 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, 5114 (__v16qi) 5115 _mm_undefined_si128 (), 5116 (__mmask16) -1); 5117 } 5118 5119 extern __inline void 5120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5121 _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) 5122 { 5123 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); 5124 } 5125 5126 extern __inline __m128i 5127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5128 _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) 5129 { 5130 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, 5131 (__v16qi) __O, __M); 5132 } 5133 5134 extern __inline __m128i 5135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5136 _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A) 5137 { 5138 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, 5139 (__v16qi) 5140 _mm_setzero_si128 (), 5141 __M); 5142 } 5143 5144 extern __inline __m128i 5145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5146 _mm512_cvtsepi32_epi8 (__m512i __A) 5147 { 5148 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, 5149 (__v16qi) 5150 _mm_undefined_si128 (), 5151 (__mmask16) -1); 5152 } 5153 5154 extern __inline void 5155 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5156 _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) 5157 { 5158 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); 5159 } 5160 5161 extern __inline __m128i 5162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5163 _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) 5164 { 5165 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, 5166 (__v16qi) __O, __M); 5167 } 5168 5169 extern __inline __m128i 5170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5171 _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A) 5172 { 5173 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, 5174 (__v16qi) 5175 _mm_setzero_si128 (), 5176 __M); 5177 } 5178 5179 extern __inline __m128i 5180 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5181 _mm512_cvtusepi32_epi8 (__m512i __A) 5182 { 5183 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, 5184 (__v16qi) 5185 _mm_undefined_si128 (), 5186 (__mmask16) -1); 5187 } 5188 5189 extern __inline void 5190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5191 _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) 5192 { 5193 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); 5194 } 5195 5196 extern __inline __m128i 5197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5198 _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) 5199 { 5200 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, 5201 (__v16qi) __O, 5202 __M); 5203 } 5204 5205 extern __inline __m128i 5206 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5207 _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A) 5208 { 5209 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, 5210 (__v16qi) 5211 _mm_setzero_si128 (), 5212 __M); 5213 } 5214 5215 extern __inline __m256i 5216 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5217 _mm512_cvtepi32_epi16 (__m512i __A) 5218 { 5219 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, 5220 (__v16hi) 5221 _mm256_undefined_si256 (), 5222 (__mmask16) -1); 5223 } 5224 5225 extern __inline void 5226 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5227 _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A) 5228 { 5229 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M); 5230 } 5231 5232 extern __inline __m256i 5233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5234 _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) 5235 { 5236 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, 5237 (__v16hi) __O, __M); 5238 } 5239 5240 extern __inline __m256i 5241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5242 _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A) 5243 { 5244 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, 5245 (__v16hi) 5246 _mm256_setzero_si256 (), 5247 __M); 5248 } 5249 5250 extern __inline __m256i 5251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5252 _mm512_cvtsepi32_epi16 (__m512i __A) 5253 { 5254 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, 5255 (__v16hi) 5256 _mm256_undefined_si256 (), 5257 (__mmask16) -1); 5258 } 5259 5260 extern __inline void 5261 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5262 _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A) 5263 { 5264 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M); 5265 } 5266 5267 extern __inline __m256i 5268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5269 _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) 5270 { 5271 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, 5272 (__v16hi) __O, __M); 5273 } 5274 5275 extern __inline __m256i 5276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5277 _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A) 5278 { 5279 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, 5280 (__v16hi) 5281 _mm256_setzero_si256 (), 5282 __M); 5283 } 5284 5285 extern __inline __m256i 5286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5287 _mm512_cvtusepi32_epi16 (__m512i __A) 5288 { 5289 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, 5290 (__v16hi) 5291 _mm256_undefined_si256 (), 5292 (__mmask16) -1); 5293 } 5294 5295 extern __inline void 5296 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5297 _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A) 5298 { 5299 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M); 5300 } 5301 5302 extern __inline __m256i 5303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5304 _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) 5305 { 5306 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, 5307 (__v16hi) __O, 5308 __M); 5309 } 5310 5311 extern __inline __m256i 5312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5313 _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A) 5314 { 5315 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, 5316 (__v16hi) 5317 _mm256_setzero_si256 (), 5318 __M); 5319 } 5320 5321 extern __inline __m256i 5322 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5323 _mm512_cvtepi64_epi32 (__m512i __A) 5324 { 5325 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, 5326 (__v8si) 5327 _mm256_undefined_si256 (), 5328 (__mmask8) -1); 5329 } 5330 5331 extern __inline void 5332 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5333 _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A) 5334 { 5335 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M); 5336 } 5337 5338 extern __inline __m256i 5339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5340 _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) 5341 { 5342 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, 5343 (__v8si) __O, __M); 5344 } 5345 5346 extern __inline __m256i 5347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5348 _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A) 5349 { 5350 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, 5351 (__v8si) 5352 _mm256_setzero_si256 (), 5353 __M); 5354 } 5355 5356 extern __inline __m256i 5357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5358 _mm512_cvtsepi64_epi32 (__m512i __A) 5359 { 5360 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, 5361 (__v8si) 5362 _mm256_undefined_si256 (), 5363 (__mmask8) -1); 5364 } 5365 5366 extern __inline void 5367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5368 _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A) 5369 { 5370 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M); 5371 } 5372 5373 extern __inline __m256i 5374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5375 _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) 5376 { 5377 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, 5378 (__v8si) __O, __M); 5379 } 5380 5381 extern __inline __m256i 5382 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5383 _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A) 5384 { 5385 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, 5386 (__v8si) 5387 _mm256_setzero_si256 (), 5388 __M); 5389 } 5390 5391 extern __inline __m256i 5392 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5393 _mm512_cvtusepi64_epi32 (__m512i __A) 5394 { 5395 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, 5396 (__v8si) 5397 _mm256_undefined_si256 (), 5398 (__mmask8) -1); 5399 } 5400 5401 extern __inline void 5402 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5403 _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A) 5404 { 5405 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M); 5406 } 5407 5408 extern __inline __m256i 5409 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5410 _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) 5411 { 5412 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, 5413 (__v8si) __O, __M); 5414 } 5415 5416 extern __inline __m256i 5417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5418 _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A) 5419 { 5420 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, 5421 (__v8si) 5422 _mm256_setzero_si256 (), 5423 __M); 5424 } 5425 5426 extern __inline __m128i 5427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5428 _mm512_cvtepi64_epi16 (__m512i __A) 5429 { 5430 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, 5431 (__v8hi) 5432 _mm_undefined_si128 (), 5433 (__mmask8) -1); 5434 } 5435 5436 extern __inline void 5437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5438 _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) 5439 { 5440 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M); 5441 } 5442 5443 extern __inline __m128i 5444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5445 _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) 5446 { 5447 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, 5448 (__v8hi) __O, __M); 5449 } 5450 5451 extern __inline __m128i 5452 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5453 _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A) 5454 { 5455 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, 5456 (__v8hi) 5457 _mm_setzero_si128 (), 5458 __M); 5459 } 5460 5461 extern __inline __m128i 5462 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5463 _mm512_cvtsepi64_epi16 (__m512i __A) 5464 { 5465 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, 5466 (__v8hi) 5467 _mm_undefined_si128 (), 5468 (__mmask8) -1); 5469 } 5470 5471 extern __inline void 5472 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5473 _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A) 5474 { 5475 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M); 5476 } 5477 5478 extern __inline __m128i 5479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5480 _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) 5481 { 5482 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, 5483 (__v8hi) __O, __M); 5484 } 5485 5486 extern __inline __m128i 5487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5488 _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A) 5489 { 5490 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, 5491 (__v8hi) 5492 _mm_setzero_si128 (), 5493 __M); 5494 } 5495 5496 extern __inline __m128i 5497 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5498 _mm512_cvtusepi64_epi16 (__m512i __A) 5499 { 5500 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, 5501 (__v8hi) 5502 _mm_undefined_si128 (), 5503 (__mmask8) -1); 5504 } 5505 5506 extern __inline void 5507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5508 _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) 5509 { 5510 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M); 5511 } 5512 5513 extern __inline __m128i 5514 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5515 _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) 5516 { 5517 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, 5518 (__v8hi) __O, __M); 5519 } 5520 5521 extern __inline __m128i 5522 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5523 _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A) 5524 { 5525 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, 5526 (__v8hi) 5527 _mm_setzero_si128 (), 5528 __M); 5529 } 5530 5531 extern __inline __m128i 5532 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5533 _mm512_cvtepi64_epi8 (__m512i __A) 5534 { 5535 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, 5536 (__v16qi) 5537 _mm_undefined_si128 (), 5538 (__mmask8) -1); 5539 } 5540 5541 extern __inline void 5542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5543 _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) 5544 { 5545 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); 5546 } 5547 5548 extern __inline __m128i 5549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5550 _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) 5551 { 5552 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, 5553 (__v16qi) __O, __M); 5554 } 5555 5556 extern __inline __m128i 5557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5558 _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A) 5559 { 5560 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, 5561 (__v16qi) 5562 _mm_setzero_si128 (), 5563 __M); 5564 } 5565 5566 extern __inline __m128i 5567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5568 _mm512_cvtsepi64_epi8 (__m512i __A) 5569 { 5570 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, 5571 (__v16qi) 5572 _mm_undefined_si128 (), 5573 (__mmask8) -1); 5574 } 5575 5576 extern __inline void 5577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5578 _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) 5579 { 5580 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); 5581 } 5582 5583 extern __inline __m128i 5584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5585 _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) 5586 { 5587 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, 5588 (__v16qi) __O, __M); 5589 } 5590 5591 extern __inline __m128i 5592 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5593 _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A) 5594 { 5595 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, 5596 (__v16qi) 5597 _mm_setzero_si128 (), 5598 __M); 5599 } 5600 5601 extern __inline __m128i 5602 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5603 _mm512_cvtusepi64_epi8 (__m512i __A) 5604 { 5605 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, 5606 (__v16qi) 5607 _mm_undefined_si128 (), 5608 (__mmask8) -1); 5609 } 5610 5611 extern __inline void 5612 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5613 _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) 5614 { 5615 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); 5616 } 5617 5618 extern __inline __m128i 5619 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5620 _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) 5621 { 5622 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, 5623 (__v16qi) __O, 5624 __M); 5625 } 5626 5627 extern __inline __m128i 5628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5629 _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A) 5630 { 5631 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, 5632 (__v16qi) 5633 _mm_setzero_si128 (), 5634 __M); 5635 } 5636 5637 extern __inline __m512d 5638 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5639 _mm512_cvtepi32_pd (__m256i __A) 5640 { 5641 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A, 5642 (__v8df) 5643 _mm512_undefined_pd (), 5644 (__mmask8) -1); 5645 } 5646 5647 extern __inline __m512d 5648 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5649 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A) 5650 { 5651 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A, 5652 (__v8df) __W, 5653 (__mmask8) __U); 5654 } 5655 5656 extern __inline __m512d 5657 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5658 _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A) 5659 { 5660 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A, 5661 (__v8df) 5662 _mm512_setzero_pd (), 5663 (__mmask8) __U); 5664 } 5665 5666 extern __inline __m512d 5667 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5668 _mm512_cvtepu32_pd (__m256i __A) 5669 { 5670 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A, 5671 (__v8df) 5672 _mm512_undefined_pd (), 5673 (__mmask8) -1); 5674 } 5675 5676 extern __inline __m512d 5677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5678 _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A) 5679 { 5680 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A, 5681 (__v8df) __W, 5682 (__mmask8) __U); 5683 } 5684 5685 extern __inline __m512d 5686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5687 _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A) 5688 { 5689 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A, 5690 (__v8df) 5691 _mm512_setzero_pd (), 5692 (__mmask8) __U); 5693 } 5694 5695 #ifdef __OPTIMIZE__ 5696 extern __inline __m512 5697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5698 _mm512_cvt_roundepi32_ps (__m512i __A, const int __R) 5699 { 5700 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, 5701 (__v16sf) 5702 _mm512_undefined_ps (), 5703 (__mmask16) -1, __R); 5704 } 5705 5706 extern __inline __m512 5707 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5708 _mm512_mask_cvt_roundepi32_ps (__m512 __W, __mmask16 __U, __m512i __A, 5709 const int __R) 5710 { 5711 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, 5712 (__v16sf) __W, 5713 (__mmask16) __U, __R); 5714 } 5715 5716 extern __inline __m512 5717 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5718 _mm512_maskz_cvt_roundepi32_ps (__mmask16 __U, __m512i __A, const int __R) 5719 { 5720 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, 5721 (__v16sf) 5722 _mm512_setzero_ps (), 5723 (__mmask16) __U, __R); 5724 } 5725 5726 extern __inline __m512 5727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5728 _mm512_cvt_roundepu32_ps (__m512i __A, const int __R) 5729 { 5730 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, 5731 (__v16sf) 5732 _mm512_undefined_ps (), 5733 (__mmask16) -1, __R); 5734 } 5735 5736 extern __inline __m512 5737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5738 _mm512_mask_cvt_roundepu32_ps (__m512 __W, __mmask16 __U, __m512i __A, 5739 const int __R) 5740 { 5741 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, 5742 (__v16sf) __W, 5743 (__mmask16) __U, __R); 5744 } 5745 5746 extern __inline __m512 5747 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5748 _mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R) 5749 { 5750 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, 5751 (__v16sf) 5752 _mm512_setzero_ps (), 5753 (__mmask16) __U, __R); 5754 } 5755 5756 #else 5757 #define _mm512_cvt_roundepi32_ps(A, B) \ 5758 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B) 5759 5760 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, B) \ 5761 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B) 5762 5763 #define _mm512_maskz_cvt_roundepi32_ps(U, A, B) \ 5764 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B) 5765 5766 #define _mm512_cvt_roundepu32_ps(A, B) \ 5767 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B) 5768 5769 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, B) \ 5770 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B) 5771 5772 #define _mm512_maskz_cvt_roundepu32_ps(U, A, B) \ 5773 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B) 5774 #endif 5775 5776 #ifdef __OPTIMIZE__ 5777 extern __inline __m256d 5778 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5779 _mm512_extractf64x4_pd (__m512d __A, const int __imm) 5780 { 5781 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A, 5782 __imm, 5783 (__v4df) 5784 _mm256_undefined_pd (), 5785 (__mmask8) -1); 5786 } 5787 5788 extern __inline __m256d 5789 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5790 _mm512_mask_extractf64x4_pd (__m256d __W, __mmask8 __U, __m512d __A, 5791 const int __imm) 5792 { 5793 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A, 5794 __imm, 5795 (__v4df) __W, 5796 (__mmask8) __U); 5797 } 5798 5799 extern __inline __m256d 5800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5801 _mm512_maskz_extractf64x4_pd (__mmask8 __U, __m512d __A, const int __imm) 5802 { 5803 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A, 5804 __imm, 5805 (__v4df) 5806 _mm256_setzero_pd (), 5807 (__mmask8) __U); 5808 } 5809 5810 extern __inline __m128 5811 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5812 _mm512_extractf32x4_ps (__m512 __A, const int __imm) 5813 { 5814 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A, 5815 __imm, 5816 (__v4sf) 5817 _mm_undefined_ps (), 5818 (__mmask8) -1); 5819 } 5820 5821 extern __inline __m128 5822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5823 _mm512_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m512 __A, 5824 const int __imm) 5825 { 5826 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A, 5827 __imm, 5828 (__v4sf) __W, 5829 (__mmask8) __U); 5830 } 5831 5832 extern __inline __m128 5833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5834 _mm512_maskz_extractf32x4_ps (__mmask8 __U, __m512 __A, const int __imm) 5835 { 5836 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A, 5837 __imm, 5838 (__v4sf) 5839 _mm_setzero_ps (), 5840 (__mmask8) __U); 5841 } 5842 5843 extern __inline __m256i 5844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5845 _mm512_extracti64x4_epi64 (__m512i __A, const int __imm) 5846 { 5847 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A, 5848 __imm, 5849 (__v4di) 5850 _mm256_undefined_si256 (), 5851 (__mmask8) -1); 5852 } 5853 5854 extern __inline __m256i 5855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5856 _mm512_mask_extracti64x4_epi64 (__m256i __W, __mmask8 __U, __m512i __A, 5857 const int __imm) 5858 { 5859 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A, 5860 __imm, 5861 (__v4di) __W, 5862 (__mmask8) __U); 5863 } 5864 5865 extern __inline __m256i 5866 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5867 _mm512_maskz_extracti64x4_epi64 (__mmask8 __U, __m512i __A, const int __imm) 5868 { 5869 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A, 5870 __imm, 5871 (__v4di) 5872 _mm256_setzero_si256 (), 5873 (__mmask8) __U); 5874 } 5875 5876 extern __inline __m128i 5877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5878 _mm512_extracti32x4_epi32 (__m512i __A, const int __imm) 5879 { 5880 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A, 5881 __imm, 5882 (__v4si) 5883 _mm_undefined_si128 (), 5884 (__mmask8) -1); 5885 } 5886 5887 extern __inline __m128i 5888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5889 _mm512_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m512i __A, 5890 const int __imm) 5891 { 5892 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A, 5893 __imm, 5894 (__v4si) __W, 5895 (__mmask8) __U); 5896 } 5897 5898 extern __inline __m128i 5899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5900 _mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm) 5901 { 5902 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A, 5903 __imm, 5904 (__v4si) 5905 _mm_setzero_si128 (), 5906 (__mmask8) __U); 5907 } 5908 #else 5909 5910 #define _mm512_extractf64x4_pd(X, C) \ 5911 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \ 5912 (int) (C),\ 5913 (__v4df)(__m256d)_mm256_undefined_pd(),\ 5914 (__mmask8)-1)) 5915 5916 #define _mm512_mask_extractf64x4_pd(W, U, X, C) \ 5917 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \ 5918 (int) (C),\ 5919 (__v4df)(__m256d)(W),\ 5920 (__mmask8)(U))) 5921 5922 #define _mm512_maskz_extractf64x4_pd(U, X, C) \ 5923 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \ 5924 (int) (C),\ 5925 (__v4df)(__m256d)_mm256_setzero_pd(),\ 5926 (__mmask8)(U))) 5927 5928 #define _mm512_extractf32x4_ps(X, C) \ 5929 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \ 5930 (int) (C),\ 5931 (__v4sf)(__m128)_mm_undefined_ps(),\ 5932 (__mmask8)-1)) 5933 5934 #define _mm512_mask_extractf32x4_ps(W, U, X, C) \ 5935 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \ 5936 (int) (C),\ 5937 (__v4sf)(__m128)(W),\ 5938 (__mmask8)(U))) 5939 5940 #define _mm512_maskz_extractf32x4_ps(U, X, C) \ 5941 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \ 5942 (int) (C),\ 5943 (__v4sf)(__m128)_mm_setzero_ps(),\ 5944 (__mmask8)(U))) 5945 5946 #define _mm512_extracti64x4_epi64(X, C) \ 5947 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \ 5948 (int) (C),\ 5949 (__v4di)(__m256i)_mm256_undefined_si256 (),\ 5950 (__mmask8)-1)) 5951 5952 #define _mm512_mask_extracti64x4_epi64(W, U, X, C) \ 5953 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \ 5954 (int) (C),\ 5955 (__v4di)(__m256i)(W),\ 5956 (__mmask8)(U))) 5957 5958 #define _mm512_maskz_extracti64x4_epi64(U, X, C) \ 5959 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \ 5960 (int) (C),\ 5961 (__v4di)(__m256i)_mm256_setzero_si256 (),\ 5962 (__mmask8)(U))) 5963 5964 #define _mm512_extracti32x4_epi32(X, C) \ 5965 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \ 5966 (int) (C),\ 5967 (__v4si)(__m128i)_mm_undefined_si128 (),\ 5968 (__mmask8)-1)) 5969 5970 #define _mm512_mask_extracti32x4_epi32(W, U, X, C) \ 5971 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \ 5972 (int) (C),\ 5973 (__v4si)(__m128i)(W),\ 5974 (__mmask8)(U))) 5975 5976 #define _mm512_maskz_extracti32x4_epi32(U, X, C) \ 5977 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \ 5978 (int) (C),\ 5979 (__v4si)(__m128i)_mm_setzero_si128 (),\ 5980 (__mmask8)(U))) 5981 #endif 5982 5983 #ifdef __OPTIMIZE__ 5984 extern __inline __m512i 5985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5986 _mm512_inserti32x4 (__m512i __A, __m128i __B, const int __imm) 5987 { 5988 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __A, 5989 (__v4si) __B, 5990 __imm, 5991 (__v16si) __A, -1); 5992 } 5993 5994 extern __inline __m512 5995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5996 _mm512_insertf32x4 (__m512 __A, __m128 __B, const int __imm) 5997 { 5998 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __A, 5999 (__v4sf) __B, 6000 __imm, 6001 (__v16sf) __A, -1); 6002 } 6003 6004 extern __inline __m512i 6005 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6006 _mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm) 6007 { 6008 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A, 6009 (__v4di) __B, 6010 __imm, 6011 (__v8di) 6012 _mm512_undefined_epi32 (), 6013 (__mmask8) -1); 6014 } 6015 6016 extern __inline __m512i 6017 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6018 _mm512_mask_inserti64x4 (__m512i __W, __mmask8 __U, __m512i __A, 6019 __m256i __B, const int __imm) 6020 { 6021 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A, 6022 (__v4di) __B, 6023 __imm, 6024 (__v8di) __W, 6025 (__mmask8) __U); 6026 } 6027 6028 extern __inline __m512i 6029 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6030 _mm512_maskz_inserti64x4 (__mmask8 __U, __m512i __A, __m256i __B, 6031 const int __imm) 6032 { 6033 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A, 6034 (__v4di) __B, 6035 __imm, 6036 (__v8di) 6037 _mm512_setzero_si512 (), 6038 (__mmask8) __U); 6039 } 6040 6041 extern __inline __m512d 6042 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6043 _mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm) 6044 { 6045 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A, 6046 (__v4df) __B, 6047 __imm, 6048 (__v8df) 6049 _mm512_undefined_pd (), 6050 (__mmask8) -1); 6051 } 6052 6053 extern __inline __m512d 6054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6055 _mm512_mask_insertf64x4 (__m512d __W, __mmask8 __U, __m512d __A, 6056 __m256d __B, const int __imm) 6057 { 6058 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A, 6059 (__v4df) __B, 6060 __imm, 6061 (__v8df) __W, 6062 (__mmask8) __U); 6063 } 6064 6065 extern __inline __m512d 6066 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6067 _mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B, 6068 const int __imm) 6069 { 6070 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A, 6071 (__v4df) __B, 6072 __imm, 6073 (__v8df) 6074 _mm512_setzero_pd (), 6075 (__mmask8) __U); 6076 } 6077 #else 6078 #define _mm512_insertf32x4(X, Y, C) \ 6079 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \ 6080 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (X), (__mmask16)(-1))) 6081 6082 #define _mm512_inserti32x4(X, Y, C) \ 6083 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \ 6084 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (X), (__mmask16)(-1))) 6085 6086 #define _mm512_insertf64x4(X, Y, C) \ 6087 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \ 6088 (__v4df)(__m256d) (Y), (int) (C), \ 6089 (__v8df)(__m512d)_mm512_undefined_pd(), \ 6090 (__mmask8)-1)) 6091 6092 #define _mm512_mask_insertf64x4(W, U, X, Y, C) \ 6093 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \ 6094 (__v4df)(__m256d) (Y), (int) (C), \ 6095 (__v8df)(__m512d)(W), \ 6096 (__mmask8)(U))) 6097 6098 #define _mm512_maskz_insertf64x4(U, X, Y, C) \ 6099 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \ 6100 (__v4df)(__m256d) (Y), (int) (C), \ 6101 (__v8df)(__m512d)_mm512_setzero_pd(), \ 6102 (__mmask8)(U))) 6103 6104 #define _mm512_inserti64x4(X, Y, C) \ 6105 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \ 6106 (__v4di)(__m256i) (Y), (int) (C), \ 6107 (__v8di)(__m512i)_mm512_undefined_epi32 (), \ 6108 (__mmask8)-1)) 6109 6110 #define _mm512_mask_inserti64x4(W, U, X, Y, C) \ 6111 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \ 6112 (__v4di)(__m256i) (Y), (int) (C),\ 6113 (__v8di)(__m512i)(W),\ 6114 (__mmask8)(U))) 6115 6116 #define _mm512_maskz_inserti64x4(U, X, Y, C) \ 6117 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \ 6118 (__v4di)(__m256i) (Y), (int) (C), \ 6119 (__v8di)(__m512i)_mm512_setzero_si512 (), \ 6120 (__mmask8)(U))) 6121 #endif 6122 6123 extern __inline __m512d 6124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6125 _mm512_loadu_pd (void const *__P) 6126 { 6127 return *(__m512d_u *)__P; 6128 } 6129 6130 extern __inline __m512d 6131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6132 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P) 6133 { 6134 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P, 6135 (__v8df) __W, 6136 (__mmask8) __U); 6137 } 6138 6139 extern __inline __m512d 6140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6141 _mm512_maskz_loadu_pd (__mmask8 __U, void const *__P) 6142 { 6143 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P, 6144 (__v8df) 6145 _mm512_setzero_pd (), 6146 (__mmask8) __U); 6147 } 6148 6149 extern __inline void 6150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6151 _mm512_storeu_pd (void *__P, __m512d __A) 6152 { 6153 *(__m512d_u *)__P = __A; 6154 } 6155 6156 extern __inline void 6157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6158 _mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A) 6159 { 6160 __builtin_ia32_storeupd512_mask ((double *) __P, (__v8df) __A, 6161 (__mmask8) __U); 6162 } 6163 6164 extern __inline __m512 6165 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6166 _mm512_loadu_ps (void const *__P) 6167 { 6168 return *(__m512_u *)__P; 6169 } 6170 6171 extern __inline __m512 6172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6173 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P) 6174 { 6175 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P, 6176 (__v16sf) __W, 6177 (__mmask16) __U); 6178 } 6179 6180 extern __inline __m512 6181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6182 _mm512_maskz_loadu_ps (__mmask16 __U, void const *__P) 6183 { 6184 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P, 6185 (__v16sf) 6186 _mm512_setzero_ps (), 6187 (__mmask16) __U); 6188 } 6189 6190 extern __inline void 6191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6192 _mm512_storeu_ps (void *__P, __m512 __A) 6193 { 6194 *(__m512_u *)__P = __A; 6195 } 6196 6197 extern __inline void 6198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6199 _mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A) 6200 { 6201 __builtin_ia32_storeups512_mask ((float *) __P, (__v16sf) __A, 6202 (__mmask16) __U); 6203 } 6204 6205 extern __inline __m512i 6206 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6207 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P) 6208 { 6209 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P, 6210 (__v8di) __W, 6211 (__mmask8) __U); 6212 } 6213 6214 extern __inline __m512i 6215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6216 _mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P) 6217 { 6218 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P, 6219 (__v8di) 6220 _mm512_setzero_si512 (), 6221 (__mmask8) __U); 6222 } 6223 6224 extern __inline void 6225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6226 _mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A) 6227 { 6228 __builtin_ia32_storedqudi512_mask ((long long *) __P, (__v8di) __A, 6229 (__mmask8) __U); 6230 } 6231 6232 extern __inline __m512i 6233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6234 _mm512_loadu_si512 (void const *__P) 6235 { 6236 return *(__m512i_u *)__P; 6237 } 6238 6239 extern __inline __m512i 6240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6241 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P) 6242 { 6243 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P, 6244 (__v16si) __W, 6245 (__mmask16) __U); 6246 } 6247 6248 extern __inline __m512i 6249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6250 _mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P) 6251 { 6252 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P, 6253 (__v16si) 6254 _mm512_setzero_si512 (), 6255 (__mmask16) __U); 6256 } 6257 6258 extern __inline void 6259 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6260 _mm512_storeu_si512 (void *__P, __m512i __A) 6261 { 6262 *(__m512i_u *)__P = __A; 6263 } 6264 6265 extern __inline void 6266 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6267 _mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A) 6268 { 6269 __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A, 6270 (__mmask16) __U); 6271 } 6272 6273 extern __inline __m512d 6274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6275 _mm512_permutevar_pd (__m512d __A, __m512i __C) 6276 { 6277 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A, 6278 (__v8di) __C, 6279 (__v8df) 6280 _mm512_undefined_pd (), 6281 (__mmask8) -1); 6282 } 6283 6284 extern __inline __m512d 6285 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6286 _mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) 6287 { 6288 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A, 6289 (__v8di) __C, 6290 (__v8df) __W, 6291 (__mmask8) __U); 6292 } 6293 6294 extern __inline __m512d 6295 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6296 _mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C) 6297 { 6298 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A, 6299 (__v8di) __C, 6300 (__v8df) 6301 _mm512_setzero_pd (), 6302 (__mmask8) __U); 6303 } 6304 6305 extern __inline __m512 6306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6307 _mm512_permutevar_ps (__m512 __A, __m512i __C) 6308 { 6309 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A, 6310 (__v16si) __C, 6311 (__v16sf) 6312 _mm512_undefined_ps (), 6313 (__mmask16) -1); 6314 } 6315 6316 extern __inline __m512 6317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6318 _mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) 6319 { 6320 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A, 6321 (__v16si) __C, 6322 (__v16sf) __W, 6323 (__mmask16) __U); 6324 } 6325 6326 extern __inline __m512 6327 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6328 _mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C) 6329 { 6330 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A, 6331 (__v16si) __C, 6332 (__v16sf) 6333 _mm512_setzero_ps (), 6334 (__mmask16) __U); 6335 } 6336 6337 extern __inline __m512i 6338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6339 _mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B) 6340 { 6341 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I 6342 /* idx */ , 6343 (__v8di) __A, 6344 (__v8di) __B, 6345 (__mmask8) -1); 6346 } 6347 6348 extern __inline __m512i 6349 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6350 _mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I, 6351 __m512i __B) 6352 { 6353 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I 6354 /* idx */ , 6355 (__v8di) __A, 6356 (__v8di) __B, 6357 (__mmask8) __U); 6358 } 6359 6360 extern __inline __m512i 6361 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6362 _mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I, 6363 __mmask8 __U, __m512i __B) 6364 { 6365 return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A, 6366 (__v8di) __I 6367 /* idx */ , 6368 (__v8di) __B, 6369 (__mmask8) __U); 6370 } 6371 6372 extern __inline __m512i 6373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6374 _mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A, 6375 __m512i __I, __m512i __B) 6376 { 6377 return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I 6378 /* idx */ , 6379 (__v8di) __A, 6380 (__v8di) __B, 6381 (__mmask8) __U); 6382 } 6383 6384 extern __inline __m512i 6385 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6386 _mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B) 6387 { 6388 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I 6389 /* idx */ , 6390 (__v16si) __A, 6391 (__v16si) __B, 6392 (__mmask16) -1); 6393 } 6394 6395 extern __inline __m512i 6396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6397 _mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U, 6398 __m512i __I, __m512i __B) 6399 { 6400 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I 6401 /* idx */ , 6402 (__v16si) __A, 6403 (__v16si) __B, 6404 (__mmask16) __U); 6405 } 6406 6407 extern __inline __m512i 6408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6409 _mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I, 6410 __mmask16 __U, __m512i __B) 6411 { 6412 return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A, 6413 (__v16si) __I 6414 /* idx */ , 6415 (__v16si) __B, 6416 (__mmask16) __U); 6417 } 6418 6419 extern __inline __m512i 6420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6421 _mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A, 6422 __m512i __I, __m512i __B) 6423 { 6424 return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I 6425 /* idx */ , 6426 (__v16si) __A, 6427 (__v16si) __B, 6428 (__mmask16) __U); 6429 } 6430 6431 extern __inline __m512d 6432 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6433 _mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B) 6434 { 6435 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I 6436 /* idx */ , 6437 (__v8df) __A, 6438 (__v8df) __B, 6439 (__mmask8) -1); 6440 } 6441 6442 extern __inline __m512d 6443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6444 _mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I, 6445 __m512d __B) 6446 { 6447 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I 6448 /* idx */ , 6449 (__v8df) __A, 6450 (__v8df) __B, 6451 (__mmask8) __U); 6452 } 6453 6454 extern __inline __m512d 6455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6456 _mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U, 6457 __m512d __B) 6458 { 6459 return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A, 6460 (__v8di) __I 6461 /* idx */ , 6462 (__v8df) __B, 6463 (__mmask8) __U); 6464 } 6465 6466 extern __inline __m512d 6467 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6468 _mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I, 6469 __m512d __B) 6470 { 6471 return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I 6472 /* idx */ , 6473 (__v8df) __A, 6474 (__v8df) __B, 6475 (__mmask8) __U); 6476 } 6477 6478 extern __inline __m512 6479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6480 _mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B) 6481 { 6482 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I 6483 /* idx */ , 6484 (__v16sf) __A, 6485 (__v16sf) __B, 6486 (__mmask16) -1); 6487 } 6488 6489 extern __inline __m512 6490 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6491 _mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B) 6492 { 6493 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I 6494 /* idx */ , 6495 (__v16sf) __A, 6496 (__v16sf) __B, 6497 (__mmask16) __U); 6498 } 6499 6500 extern __inline __m512 6501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6502 _mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U, 6503 __m512 __B) 6504 { 6505 return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A, 6506 (__v16si) __I 6507 /* idx */ , 6508 (__v16sf) __B, 6509 (__mmask16) __U); 6510 } 6511 6512 extern __inline __m512 6513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6514 _mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I, 6515 __m512 __B) 6516 { 6517 return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I 6518 /* idx */ , 6519 (__v16sf) __A, 6520 (__v16sf) __B, 6521 (__mmask16) __U); 6522 } 6523 6524 #ifdef __OPTIMIZE__ 6525 extern __inline __m512d 6526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6527 _mm512_permute_pd (__m512d __X, const int __C) 6528 { 6529 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C, 6530 (__v8df) 6531 _mm512_undefined_pd (), 6532 (__mmask8) -1); 6533 } 6534 6535 extern __inline __m512d 6536 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6537 _mm512_mask_permute_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __C) 6538 { 6539 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C, 6540 (__v8df) __W, 6541 (__mmask8) __U); 6542 } 6543 6544 extern __inline __m512d 6545 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6546 _mm512_maskz_permute_pd (__mmask8 __U, __m512d __X, const int __C) 6547 { 6548 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C, 6549 (__v8df) 6550 _mm512_setzero_pd (), 6551 (__mmask8) __U); 6552 } 6553 6554 extern __inline __m512 6555 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6556 _mm512_permute_ps (__m512 __X, const int __C) 6557 { 6558 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C, 6559 (__v16sf) 6560 _mm512_undefined_ps (), 6561 (__mmask16) -1); 6562 } 6563 6564 extern __inline __m512 6565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6566 _mm512_mask_permute_ps (__m512 __W, __mmask16 __U, __m512 __X, const int __C) 6567 { 6568 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C, 6569 (__v16sf) __W, 6570 (__mmask16) __U); 6571 } 6572 6573 extern __inline __m512 6574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6575 _mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C) 6576 { 6577 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C, 6578 (__v16sf) 6579 _mm512_setzero_ps (), 6580 (__mmask16) __U); 6581 } 6582 #else 6583 #define _mm512_permute_pd(X, C) \ 6584 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \ 6585 (__v8df)(__m512d)_mm512_undefined_pd(),\ 6586 (__mmask8)(-1))) 6587 6588 #define _mm512_mask_permute_pd(W, U, X, C) \ 6589 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \ 6590 (__v8df)(__m512d)(W), \ 6591 (__mmask8)(U))) 6592 6593 #define _mm512_maskz_permute_pd(U, X, C) \ 6594 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \ 6595 (__v8df)(__m512d)_mm512_setzero_pd(), \ 6596 (__mmask8)(U))) 6597 6598 #define _mm512_permute_ps(X, C) \ 6599 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \ 6600 (__v16sf)(__m512)_mm512_undefined_ps(),\ 6601 (__mmask16)(-1))) 6602 6603 #define _mm512_mask_permute_ps(W, U, X, C) \ 6604 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \ 6605 (__v16sf)(__m512)(W), \ 6606 (__mmask16)(U))) 6607 6608 #define _mm512_maskz_permute_ps(U, X, C) \ 6609 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \ 6610 (__v16sf)(__m512)_mm512_setzero_ps(), \ 6611 (__mmask16)(U))) 6612 #endif 6613 6614 #ifdef __OPTIMIZE__ 6615 extern __inline __m512i 6616 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6617 _mm512_permutex_epi64 (__m512i __X, const int __I) 6618 { 6619 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I, 6620 (__v8di) 6621 _mm512_undefined_epi32 (), 6622 (__mmask8) (-1)); 6623 } 6624 6625 extern __inline __m512i 6626 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6627 _mm512_mask_permutex_epi64 (__m512i __W, __mmask8 __M, 6628 __m512i __X, const int __I) 6629 { 6630 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I, 6631 (__v8di) __W, 6632 (__mmask8) __M); 6633 } 6634 6635 extern __inline __m512i 6636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6637 _mm512_maskz_permutex_epi64 (__mmask8 __M, __m512i __X, const int __I) 6638 { 6639 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I, 6640 (__v8di) 6641 _mm512_setzero_si512 (), 6642 (__mmask8) __M); 6643 } 6644 6645 extern __inline __m512d 6646 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6647 _mm512_permutex_pd (__m512d __X, const int __M) 6648 { 6649 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M, 6650 (__v8df) 6651 _mm512_undefined_pd (), 6652 (__mmask8) -1); 6653 } 6654 6655 extern __inline __m512d 6656 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6657 _mm512_mask_permutex_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __M) 6658 { 6659 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M, 6660 (__v8df) __W, 6661 (__mmask8) __U); 6662 } 6663 6664 extern __inline __m512d 6665 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6666 _mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M) 6667 { 6668 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M, 6669 (__v8df) 6670 _mm512_setzero_pd (), 6671 (__mmask8) __U); 6672 } 6673 #else 6674 #define _mm512_permutex_pd(X, M) \ 6675 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \ 6676 (__v8df)(__m512d)_mm512_undefined_pd(),\ 6677 (__mmask8)-1)) 6678 6679 #define _mm512_mask_permutex_pd(W, U, X, M) \ 6680 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \ 6681 (__v8df)(__m512d)(W), (__mmask8)(U))) 6682 6683 #define _mm512_maskz_permutex_pd(U, X, M) \ 6684 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \ 6685 (__v8df)(__m512d)_mm512_setzero_pd(),\ 6686 (__mmask8)(U))) 6687 6688 #define _mm512_permutex_epi64(X, I) \ 6689 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \ 6690 (int)(I), \ 6691 (__v8di)(__m512i) \ 6692 (_mm512_undefined_epi32 ()),\ 6693 (__mmask8)(-1))) 6694 6695 #define _mm512_maskz_permutex_epi64(M, X, I) \ 6696 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \ 6697 (int)(I), \ 6698 (__v8di)(__m512i) \ 6699 (_mm512_setzero_si512 ()),\ 6700 (__mmask8)(M))) 6701 6702 #define _mm512_mask_permutex_epi64(W, M, X, I) \ 6703 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \ 6704 (int)(I), \ 6705 (__v8di)(__m512i)(W), \ 6706 (__mmask8)(M))) 6707 #endif 6708 6709 extern __inline __m512i 6710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6711 _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y) 6712 { 6713 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y, 6714 (__v8di) __X, 6715 (__v8di) 6716 _mm512_setzero_si512 (), 6717 __M); 6718 } 6719 6720 extern __inline __m512i 6721 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6722 _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y) 6723 { 6724 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y, 6725 (__v8di) __X, 6726 (__v8di) 6727 _mm512_undefined_epi32 (), 6728 (__mmask8) -1); 6729 } 6730 6731 extern __inline __m512i 6732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6733 _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X, 6734 __m512i __Y) 6735 { 6736 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y, 6737 (__v8di) __X, 6738 (__v8di) __W, 6739 __M); 6740 } 6741 6742 extern __inline __m512i 6743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6744 _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y) 6745 { 6746 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y, 6747 (__v16si) __X, 6748 (__v16si) 6749 _mm512_setzero_si512 (), 6750 __M); 6751 } 6752 6753 extern __inline __m512i 6754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6755 _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y) 6756 { 6757 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y, 6758 (__v16si) __X, 6759 (__v16si) 6760 _mm512_undefined_epi32 (), 6761 (__mmask16) -1); 6762 } 6763 6764 extern __inline __m512i 6765 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6766 _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X, 6767 __m512i __Y) 6768 { 6769 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y, 6770 (__v16si) __X, 6771 (__v16si) __W, 6772 __M); 6773 } 6774 6775 extern __inline __m512d 6776 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6777 _mm512_permutexvar_pd (__m512i __X, __m512d __Y) 6778 { 6779 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y, 6780 (__v8di) __X, 6781 (__v8df) 6782 _mm512_undefined_pd (), 6783 (__mmask8) -1); 6784 } 6785 6786 extern __inline __m512d 6787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6788 _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y) 6789 { 6790 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y, 6791 (__v8di) __X, 6792 (__v8df) __W, 6793 (__mmask8) __U); 6794 } 6795 6796 extern __inline __m512d 6797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6798 _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y) 6799 { 6800 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y, 6801 (__v8di) __X, 6802 (__v8df) 6803 _mm512_setzero_pd (), 6804 (__mmask8) __U); 6805 } 6806 6807 extern __inline __m512 6808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6809 _mm512_permutexvar_ps (__m512i __X, __m512 __Y) 6810 { 6811 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y, 6812 (__v16si) __X, 6813 (__v16sf) 6814 _mm512_undefined_ps (), 6815 (__mmask16) -1); 6816 } 6817 6818 extern __inline __m512 6819 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6820 _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y) 6821 { 6822 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y, 6823 (__v16si) __X, 6824 (__v16sf) __W, 6825 (__mmask16) __U); 6826 } 6827 6828 extern __inline __m512 6829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6830 _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y) 6831 { 6832 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y, 6833 (__v16si) __X, 6834 (__v16sf) 6835 _mm512_setzero_ps (), 6836 (__mmask16) __U); 6837 } 6838 6839 #ifdef __OPTIMIZE__ 6840 extern __inline __m512 6841 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6842 _mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm) 6843 { 6844 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M, 6845 (__v16sf) __V, __imm, 6846 (__v16sf) 6847 _mm512_undefined_ps (), 6848 (__mmask16) -1); 6849 } 6850 6851 extern __inline __m512 6852 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6853 _mm512_mask_shuffle_ps (__m512 __W, __mmask16 __U, __m512 __M, 6854 __m512 __V, const int __imm) 6855 { 6856 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M, 6857 (__v16sf) __V, __imm, 6858 (__v16sf) __W, 6859 (__mmask16) __U); 6860 } 6861 6862 extern __inline __m512 6863 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6864 _mm512_maskz_shuffle_ps (__mmask16 __U, __m512 __M, __m512 __V, const int __imm) 6865 { 6866 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M, 6867 (__v16sf) __V, __imm, 6868 (__v16sf) 6869 _mm512_setzero_ps (), 6870 (__mmask16) __U); 6871 } 6872 6873 extern __inline __m512d 6874 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6875 _mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm) 6876 { 6877 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M, 6878 (__v8df) __V, __imm, 6879 (__v8df) 6880 _mm512_undefined_pd (), 6881 (__mmask8) -1); 6882 } 6883 6884 extern __inline __m512d 6885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6886 _mm512_mask_shuffle_pd (__m512d __W, __mmask8 __U, __m512d __M, 6887 __m512d __V, const int __imm) 6888 { 6889 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M, 6890 (__v8df) __V, __imm, 6891 (__v8df) __W, 6892 (__mmask8) __U); 6893 } 6894 6895 extern __inline __m512d 6896 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6897 _mm512_maskz_shuffle_pd (__mmask8 __U, __m512d __M, __m512d __V, 6898 const int __imm) 6899 { 6900 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M, 6901 (__v8df) __V, __imm, 6902 (__v8df) 6903 _mm512_setzero_pd (), 6904 (__mmask8) __U); 6905 } 6906 6907 extern __inline __m512d 6908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6909 _mm512_fixupimm_round_pd (__m512d __A, __m512d __B, __m512i __C, 6910 const int __imm, const int __R) 6911 { 6912 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A, 6913 (__v8df) __B, 6914 (__v8di) __C, 6915 __imm, 6916 (__mmask8) -1, __R); 6917 } 6918 6919 extern __inline __m512d 6920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6921 _mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B, 6922 __m512i __C, const int __imm, const int __R) 6923 { 6924 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A, 6925 (__v8df) __B, 6926 (__v8di) __C, 6927 __imm, 6928 (__mmask8) __U, __R); 6929 } 6930 6931 extern __inline __m512d 6932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6933 _mm512_maskz_fixupimm_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 6934 __m512i __C, const int __imm, const int __R) 6935 { 6936 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A, 6937 (__v8df) __B, 6938 (__v8di) __C, 6939 __imm, 6940 (__mmask8) __U, __R); 6941 } 6942 6943 extern __inline __m512 6944 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6945 _mm512_fixupimm_round_ps (__m512 __A, __m512 __B, __m512i __C, 6946 const int __imm, const int __R) 6947 { 6948 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A, 6949 (__v16sf) __B, 6950 (__v16si) __C, 6951 __imm, 6952 (__mmask16) -1, __R); 6953 } 6954 6955 extern __inline __m512 6956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6957 _mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B, 6958 __m512i __C, const int __imm, const int __R) 6959 { 6960 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A, 6961 (__v16sf) __B, 6962 (__v16si) __C, 6963 __imm, 6964 (__mmask16) __U, __R); 6965 } 6966 6967 extern __inline __m512 6968 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6969 _mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B, 6970 __m512i __C, const int __imm, const int __R) 6971 { 6972 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A, 6973 (__v16sf) __B, 6974 (__v16si) __C, 6975 __imm, 6976 (__mmask16) __U, __R); 6977 } 6978 6979 extern __inline __m128d 6980 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6981 _mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C, 6982 const int __imm, const int __R) 6983 { 6984 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A, 6985 (__v2df) __B, 6986 (__v2di) __C, __imm, 6987 (__mmask8) -1, __R); 6988 } 6989 6990 extern __inline __m128d 6991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6992 _mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B, 6993 __m128i __C, const int __imm, const int __R) 6994 { 6995 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A, 6996 (__v2df) __B, 6997 (__v2di) __C, __imm, 6998 (__mmask8) __U, __R); 6999 } 7000 7001 extern __inline __m128d 7002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7003 _mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B, 7004 __m128i __C, const int __imm, const int __R) 7005 { 7006 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A, 7007 (__v2df) __B, 7008 (__v2di) __C, 7009 __imm, 7010 (__mmask8) __U, __R); 7011 } 7012 7013 extern __inline __m128 7014 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7015 _mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C, 7016 const int __imm, const int __R) 7017 { 7018 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A, 7019 (__v4sf) __B, 7020 (__v4si) __C, __imm, 7021 (__mmask8) -1, __R); 7022 } 7023 7024 extern __inline __m128 7025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7026 _mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B, 7027 __m128i __C, const int __imm, const int __R) 7028 { 7029 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A, 7030 (__v4sf) __B, 7031 (__v4si) __C, __imm, 7032 (__mmask8) __U, __R); 7033 } 7034 7035 extern __inline __m128 7036 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7037 _mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B, 7038 __m128i __C, const int __imm, const int __R) 7039 { 7040 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A, 7041 (__v4sf) __B, 7042 (__v4si) __C, __imm, 7043 (__mmask8) __U, __R); 7044 } 7045 7046 #else 7047 #define _mm512_shuffle_pd(X, Y, C) \ 7048 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \ 7049 (__v8df)(__m512d)(Y), (int)(C),\ 7050 (__v8df)(__m512d)_mm512_undefined_pd(),\ 7051 (__mmask8)-1)) 7052 7053 #define _mm512_mask_shuffle_pd(W, U, X, Y, C) \ 7054 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \ 7055 (__v8df)(__m512d)(Y), (int)(C),\ 7056 (__v8df)(__m512d)(W),\ 7057 (__mmask8)(U))) 7058 7059 #define _mm512_maskz_shuffle_pd(U, X, Y, C) \ 7060 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \ 7061 (__v8df)(__m512d)(Y), (int)(C),\ 7062 (__v8df)(__m512d)_mm512_setzero_pd(),\ 7063 (__mmask8)(U))) 7064 7065 #define _mm512_shuffle_ps(X, Y, C) \ 7066 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \ 7067 (__v16sf)(__m512)(Y), (int)(C),\ 7068 (__v16sf)(__m512)_mm512_undefined_ps(),\ 7069 (__mmask16)-1)) 7070 7071 #define _mm512_mask_shuffle_ps(W, U, X, Y, C) \ 7072 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \ 7073 (__v16sf)(__m512)(Y), (int)(C),\ 7074 (__v16sf)(__m512)(W),\ 7075 (__mmask16)(U))) 7076 7077 #define _mm512_maskz_shuffle_ps(U, X, Y, C) \ 7078 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \ 7079 (__v16sf)(__m512)(Y), (int)(C),\ 7080 (__v16sf)(__m512)_mm512_setzero_ps(),\ 7081 (__mmask16)(U))) 7082 7083 #define _mm512_fixupimm_round_pd(X, Y, Z, C, R) \ 7084 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \ 7085 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \ 7086 (__mmask8)(-1), (R))) 7087 7088 #define _mm512_mask_fixupimm_round_pd(X, U, Y, Z, C, R) \ 7089 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \ 7090 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \ 7091 (__mmask8)(U), (R))) 7092 7093 #define _mm512_maskz_fixupimm_round_pd(U, X, Y, Z, C, R) \ 7094 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \ 7095 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \ 7096 (__mmask8)(U), (R))) 7097 7098 #define _mm512_fixupimm_round_ps(X, Y, Z, C, R) \ 7099 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \ 7100 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \ 7101 (__mmask16)(-1), (R))) 7102 7103 #define _mm512_mask_fixupimm_round_ps(X, U, Y, Z, C, R) \ 7104 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \ 7105 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \ 7106 (__mmask16)(U), (R))) 7107 7108 #define _mm512_maskz_fixupimm_round_ps(U, X, Y, Z, C, R) \ 7109 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \ 7110 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \ 7111 (__mmask16)(U), (R))) 7112 7113 #define _mm_fixupimm_round_sd(X, Y, Z, C, R) \ 7114 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \ 7115 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \ 7116 (__mmask8)(-1), (R))) 7117 7118 #define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R) \ 7119 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \ 7120 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \ 7121 (__mmask8)(U), (R))) 7122 7123 #define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R) \ 7124 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \ 7125 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \ 7126 (__mmask8)(U), (R))) 7127 7128 #define _mm_fixupimm_round_ss(X, Y, Z, C, R) \ 7129 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \ 7130 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \ 7131 (__mmask8)(-1), (R))) 7132 7133 #define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R) \ 7134 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \ 7135 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \ 7136 (__mmask8)(U), (R))) 7137 7138 #define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R) \ 7139 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \ 7140 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \ 7141 (__mmask8)(U), (R))) 7142 #endif 7143 7144 extern __inline __m512 7145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7146 _mm512_movehdup_ps (__m512 __A) 7147 { 7148 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A, 7149 (__v16sf) 7150 _mm512_undefined_ps (), 7151 (__mmask16) -1); 7152 } 7153 7154 extern __inline __m512 7155 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7156 _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A) 7157 { 7158 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A, 7159 (__v16sf) __W, 7160 (__mmask16) __U); 7161 } 7162 7163 extern __inline __m512 7164 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7165 _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A) 7166 { 7167 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A, 7168 (__v16sf) 7169 _mm512_setzero_ps (), 7170 (__mmask16) __U); 7171 } 7172 7173 extern __inline __m512 7174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7175 _mm512_moveldup_ps (__m512 __A) 7176 { 7177 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A, 7178 (__v16sf) 7179 _mm512_undefined_ps (), 7180 (__mmask16) -1); 7181 } 7182 7183 extern __inline __m512 7184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7185 _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A) 7186 { 7187 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A, 7188 (__v16sf) __W, 7189 (__mmask16) __U); 7190 } 7191 7192 extern __inline __m512 7193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7194 _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A) 7195 { 7196 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A, 7197 (__v16sf) 7198 _mm512_setzero_ps (), 7199 (__mmask16) __U); 7200 } 7201 7202 extern __inline __m512i 7203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7204 _mm512_or_si512 (__m512i __A, __m512i __B) 7205 { 7206 return (__m512i) ((__v16su) __A | (__v16su) __B); 7207 } 7208 7209 extern __inline __m512i 7210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7211 _mm512_or_epi32 (__m512i __A, __m512i __B) 7212 { 7213 return (__m512i) ((__v16su) __A | (__v16su) __B); 7214 } 7215 7216 extern __inline __m512i 7217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7218 _mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 7219 { 7220 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A, 7221 (__v16si) __B, 7222 (__v16si) __W, 7223 (__mmask16) __U); 7224 } 7225 7226 extern __inline __m512i 7227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7228 _mm512_maskz_or_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 7229 { 7230 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A, 7231 (__v16si) __B, 7232 (__v16si) 7233 _mm512_setzero_si512 (), 7234 (__mmask16) __U); 7235 } 7236 7237 extern __inline __m512i 7238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7239 _mm512_or_epi64 (__m512i __A, __m512i __B) 7240 { 7241 return (__m512i) ((__v8du) __A | (__v8du) __B); 7242 } 7243 7244 extern __inline __m512i 7245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7246 _mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 7247 { 7248 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A, 7249 (__v8di) __B, 7250 (__v8di) __W, 7251 (__mmask8) __U); 7252 } 7253 7254 extern __inline __m512i 7255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7256 _mm512_maskz_or_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 7257 { 7258 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A, 7259 (__v8di) __B, 7260 (__v8di) 7261 _mm512_setzero_si512 (), 7262 (__mmask8) __U); 7263 } 7264 7265 extern __inline __m512i 7266 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7267 _mm512_xor_si512 (__m512i __A, __m512i __B) 7268 { 7269 return (__m512i) ((__v16su) __A ^ (__v16su) __B); 7270 } 7271 7272 extern __inline __m512i 7273 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7274 _mm512_xor_epi32 (__m512i __A, __m512i __B) 7275 { 7276 return (__m512i) ((__v16su) __A ^ (__v16su) __B); 7277 } 7278 7279 extern __inline __m512i 7280 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7281 _mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 7282 { 7283 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A, 7284 (__v16si) __B, 7285 (__v16si) __W, 7286 (__mmask16) __U); 7287 } 7288 7289 extern __inline __m512i 7290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7291 _mm512_maskz_xor_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 7292 { 7293 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A, 7294 (__v16si) __B, 7295 (__v16si) 7296 _mm512_setzero_si512 (), 7297 (__mmask16) __U); 7298 } 7299 7300 extern __inline __m512i 7301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7302 _mm512_xor_epi64 (__m512i __A, __m512i __B) 7303 { 7304 return (__m512i) ((__v8du) __A ^ (__v8du) __B); 7305 } 7306 7307 extern __inline __m512i 7308 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7309 _mm512_mask_xor_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 7310 { 7311 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A, 7312 (__v8di) __B, 7313 (__v8di) __W, 7314 (__mmask8) __U); 7315 } 7316 7317 extern __inline __m512i 7318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7319 _mm512_maskz_xor_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 7320 { 7321 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A, 7322 (__v8di) __B, 7323 (__v8di) 7324 _mm512_setzero_si512 (), 7325 (__mmask8) __U); 7326 } 7327 7328 #ifdef __OPTIMIZE__ 7329 extern __inline __m512i 7330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7331 _mm512_rol_epi32 (__m512i __A, const int __B) 7332 { 7333 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B, 7334 (__v16si) 7335 _mm512_undefined_epi32 (), 7336 (__mmask16) -1); 7337 } 7338 7339 extern __inline __m512i 7340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7341 _mm512_mask_rol_epi32 (__m512i __W, __mmask16 __U, __m512i __A, const int __B) 7342 { 7343 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B, 7344 (__v16si) __W, 7345 (__mmask16) __U); 7346 } 7347 7348 extern __inline __m512i 7349 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7350 _mm512_maskz_rol_epi32 (__mmask16 __U, __m512i __A, const int __B) 7351 { 7352 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B, 7353 (__v16si) 7354 _mm512_setzero_si512 (), 7355 (__mmask16) __U); 7356 } 7357 7358 extern __inline __m512i 7359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7360 _mm512_ror_epi32 (__m512i __A, int __B) 7361 { 7362 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B, 7363 (__v16si) 7364 _mm512_undefined_epi32 (), 7365 (__mmask16) -1); 7366 } 7367 7368 extern __inline __m512i 7369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7370 _mm512_mask_ror_epi32 (__m512i __W, __mmask16 __U, __m512i __A, int __B) 7371 { 7372 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B, 7373 (__v16si) __W, 7374 (__mmask16) __U); 7375 } 7376 7377 extern __inline __m512i 7378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7379 _mm512_maskz_ror_epi32 (__mmask16 __U, __m512i __A, int __B) 7380 { 7381 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B, 7382 (__v16si) 7383 _mm512_setzero_si512 (), 7384 (__mmask16) __U); 7385 } 7386 7387 extern __inline __m512i 7388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7389 _mm512_rol_epi64 (__m512i __A, const int __B) 7390 { 7391 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B, 7392 (__v8di) 7393 _mm512_undefined_epi32 (), 7394 (__mmask8) -1); 7395 } 7396 7397 extern __inline __m512i 7398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7399 _mm512_mask_rol_epi64 (__m512i __W, __mmask8 __U, __m512i __A, const int __B) 7400 { 7401 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B, 7402 (__v8di) __W, 7403 (__mmask8) __U); 7404 } 7405 7406 extern __inline __m512i 7407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7408 _mm512_maskz_rol_epi64 (__mmask8 __U, __m512i __A, const int __B) 7409 { 7410 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B, 7411 (__v8di) 7412 _mm512_setzero_si512 (), 7413 (__mmask8) __U); 7414 } 7415 7416 extern __inline __m512i 7417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7418 _mm512_ror_epi64 (__m512i __A, int __B) 7419 { 7420 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B, 7421 (__v8di) 7422 _mm512_undefined_epi32 (), 7423 (__mmask8) -1); 7424 } 7425 7426 extern __inline __m512i 7427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7428 _mm512_mask_ror_epi64 (__m512i __W, __mmask8 __U, __m512i __A, int __B) 7429 { 7430 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B, 7431 (__v8di) __W, 7432 (__mmask8) __U); 7433 } 7434 7435 extern __inline __m512i 7436 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7437 _mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B) 7438 { 7439 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B, 7440 (__v8di) 7441 _mm512_setzero_si512 (), 7442 (__mmask8) __U); 7443 } 7444 7445 #else 7446 #define _mm512_rol_epi32(A, B) \ 7447 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \ 7448 (int)(B), \ 7449 (__v16si)_mm512_undefined_epi32 (), \ 7450 (__mmask16)(-1))) 7451 #define _mm512_mask_rol_epi32(W, U, A, B) \ 7452 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \ 7453 (int)(B), \ 7454 (__v16si)(__m512i)(W), \ 7455 (__mmask16)(U))) 7456 #define _mm512_maskz_rol_epi32(U, A, B) \ 7457 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \ 7458 (int)(B), \ 7459 (__v16si)_mm512_setzero_si512 (), \ 7460 (__mmask16)(U))) 7461 #define _mm512_ror_epi32(A, B) \ 7462 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \ 7463 (int)(B), \ 7464 (__v16si)_mm512_undefined_epi32 (), \ 7465 (__mmask16)(-1))) 7466 #define _mm512_mask_ror_epi32(W, U, A, B) \ 7467 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \ 7468 (int)(B), \ 7469 (__v16si)(__m512i)(W), \ 7470 (__mmask16)(U))) 7471 #define _mm512_maskz_ror_epi32(U, A, B) \ 7472 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \ 7473 (int)(B), \ 7474 (__v16si)_mm512_setzero_si512 (), \ 7475 (__mmask16)(U))) 7476 #define _mm512_rol_epi64(A, B) \ 7477 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \ 7478 (int)(B), \ 7479 (__v8di)_mm512_undefined_epi32 (), \ 7480 (__mmask8)(-1))) 7481 #define _mm512_mask_rol_epi64(W, U, A, B) \ 7482 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \ 7483 (int)(B), \ 7484 (__v8di)(__m512i)(W), \ 7485 (__mmask8)(U))) 7486 #define _mm512_maskz_rol_epi64(U, A, B) \ 7487 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \ 7488 (int)(B), \ 7489 (__v8di)_mm512_setzero_si512 (), \ 7490 (__mmask8)(U))) 7491 7492 #define _mm512_ror_epi64(A, B) \ 7493 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \ 7494 (int)(B), \ 7495 (__v8di)_mm512_undefined_epi32 (), \ 7496 (__mmask8)(-1))) 7497 #define _mm512_mask_ror_epi64(W, U, A, B) \ 7498 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \ 7499 (int)(B), \ 7500 (__v8di)(__m512i)(W), \ 7501 (__mmask8)(U))) 7502 #define _mm512_maskz_ror_epi64(U, A, B) \ 7503 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \ 7504 (int)(B), \ 7505 (__v8di)_mm512_setzero_si512 (), \ 7506 (__mmask8)(U))) 7507 #endif 7508 7509 extern __inline __m512i 7510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7511 _mm512_and_si512 (__m512i __A, __m512i __B) 7512 { 7513 return (__m512i) ((__v16su) __A & (__v16su) __B); 7514 } 7515 7516 extern __inline __m512i 7517 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7518 _mm512_and_epi32 (__m512i __A, __m512i __B) 7519 { 7520 return (__m512i) ((__v16su) __A & (__v16su) __B); 7521 } 7522 7523 extern __inline __m512i 7524 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7525 _mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 7526 { 7527 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A, 7528 (__v16si) __B, 7529 (__v16si) __W, 7530 (__mmask16) __U); 7531 } 7532 7533 extern __inline __m512i 7534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7535 _mm512_maskz_and_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 7536 { 7537 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A, 7538 (__v16si) __B, 7539 (__v16si) 7540 _mm512_setzero_si512 (), 7541 (__mmask16) __U); 7542 } 7543 7544 extern __inline __m512i 7545 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7546 _mm512_and_epi64 (__m512i __A, __m512i __B) 7547 { 7548 return (__m512i) ((__v8du) __A & (__v8du) __B); 7549 } 7550 7551 extern __inline __m512i 7552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7553 _mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 7554 { 7555 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A, 7556 (__v8di) __B, 7557 (__v8di) __W, __U); 7558 } 7559 7560 extern __inline __m512i 7561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7562 _mm512_maskz_and_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 7563 { 7564 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A, 7565 (__v8di) __B, 7566 (__v8di) 7567 _mm512_setzero_pd (), 7568 __U); 7569 } 7570 7571 extern __inline __m512i 7572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7573 _mm512_andnot_si512 (__m512i __A, __m512i __B) 7574 { 7575 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A, 7576 (__v16si) __B, 7577 (__v16si) 7578 _mm512_undefined_epi32 (), 7579 (__mmask16) -1); 7580 } 7581 7582 extern __inline __m512i 7583 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7584 _mm512_andnot_epi32 (__m512i __A, __m512i __B) 7585 { 7586 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A, 7587 (__v16si) __B, 7588 (__v16si) 7589 _mm512_undefined_epi32 (), 7590 (__mmask16) -1); 7591 } 7592 7593 extern __inline __m512i 7594 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7595 _mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 7596 { 7597 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A, 7598 (__v16si) __B, 7599 (__v16si) __W, 7600 (__mmask16) __U); 7601 } 7602 7603 extern __inline __m512i 7604 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7605 _mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 7606 { 7607 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A, 7608 (__v16si) __B, 7609 (__v16si) 7610 _mm512_setzero_si512 (), 7611 (__mmask16) __U); 7612 } 7613 7614 extern __inline __m512i 7615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7616 _mm512_andnot_epi64 (__m512i __A, __m512i __B) 7617 { 7618 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A, 7619 (__v8di) __B, 7620 (__v8di) 7621 _mm512_undefined_epi32 (), 7622 (__mmask8) -1); 7623 } 7624 7625 extern __inline __m512i 7626 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7627 _mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 7628 { 7629 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A, 7630 (__v8di) __B, 7631 (__v8di) __W, __U); 7632 } 7633 7634 extern __inline __m512i 7635 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7636 _mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 7637 { 7638 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A, 7639 (__v8di) __B, 7640 (__v8di) 7641 _mm512_setzero_pd (), 7642 __U); 7643 } 7644 7645 extern __inline __mmask16 7646 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7647 _mm512_test_epi32_mask (__m512i __A, __m512i __B) 7648 { 7649 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A, 7650 (__v16si) __B, 7651 (__mmask16) -1); 7652 } 7653 7654 extern __inline __mmask16 7655 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7656 _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) 7657 { 7658 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A, 7659 (__v16si) __B, __U); 7660 } 7661 7662 extern __inline __mmask8 7663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7664 _mm512_test_epi64_mask (__m512i __A, __m512i __B) 7665 { 7666 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, 7667 (__v8di) __B, 7668 (__mmask8) -1); 7669 } 7670 7671 extern __inline __mmask8 7672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7673 _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) 7674 { 7675 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U); 7676 } 7677 7678 extern __inline __mmask16 7679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7680 _mm512_testn_epi32_mask (__m512i __A, __m512i __B) 7681 { 7682 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A, 7683 (__v16si) __B, 7684 (__mmask16) -1); 7685 } 7686 7687 extern __inline __mmask16 7688 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7689 _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) 7690 { 7691 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A, 7692 (__v16si) __B, __U); 7693 } 7694 7695 extern __inline __mmask8 7696 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7697 _mm512_testn_epi64_mask (__m512i __A, __m512i __B) 7698 { 7699 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A, 7700 (__v8di) __B, 7701 (__mmask8) -1); 7702 } 7703 7704 extern __inline __mmask8 7705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7706 _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) 7707 { 7708 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A, 7709 (__v8di) __B, __U); 7710 } 7711 7712 extern __inline __m512 7713 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7714 _mm512_abs_ps (__m512 __A) 7715 { 7716 return (__m512) _mm512_and_epi32 ((__m512i) __A, 7717 _mm512_set1_epi32 (0x7fffffff)); 7718 } 7719 7720 extern __inline __m512 7721 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7722 _mm512_mask_abs_ps (__m512 __W, __mmask16 __U, __m512 __A) 7723 { 7724 return (__m512) _mm512_mask_and_epi32 ((__m512i) __W, __U, (__m512i) __A, 7725 _mm512_set1_epi32 (0x7fffffff)); 7726 } 7727 7728 extern __inline __m512d 7729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7730 _mm512_abs_pd (__m512d __A) 7731 { 7732 return (__m512d) _mm512_and_epi64 ((__m512i) __A, 7733 _mm512_set1_epi64 (0x7fffffffffffffffLL)); 7734 } 7735 7736 extern __inline __m512d 7737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7738 _mm512_mask_abs_pd (__m512d __W, __mmask8 __U, __m512d __A) 7739 { 7740 return (__m512d) 7741 _mm512_mask_and_epi64 ((__m512i) __W, __U, (__m512i) __A, 7742 _mm512_set1_epi64 (0x7fffffffffffffffLL)); 7743 } 7744 7745 extern __inline __m512i 7746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7747 _mm512_unpackhi_epi32 (__m512i __A, __m512i __B) 7748 { 7749 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A, 7750 (__v16si) __B, 7751 (__v16si) 7752 _mm512_undefined_epi32 (), 7753 (__mmask16) -1); 7754 } 7755 7756 extern __inline __m512i 7757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7758 _mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A, 7759 __m512i __B) 7760 { 7761 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A, 7762 (__v16si) __B, 7763 (__v16si) __W, 7764 (__mmask16) __U); 7765 } 7766 7767 extern __inline __m512i 7768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7769 _mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 7770 { 7771 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A, 7772 (__v16si) __B, 7773 (__v16si) 7774 _mm512_setzero_si512 (), 7775 (__mmask16) __U); 7776 } 7777 7778 extern __inline __m512i 7779 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7780 _mm512_unpackhi_epi64 (__m512i __A, __m512i __B) 7781 { 7782 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A, 7783 (__v8di) __B, 7784 (__v8di) 7785 _mm512_undefined_epi32 (), 7786 (__mmask8) -1); 7787 } 7788 7789 extern __inline __m512i 7790 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7791 _mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 7792 { 7793 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A, 7794 (__v8di) __B, 7795 (__v8di) __W, 7796 (__mmask8) __U); 7797 } 7798 7799 extern __inline __m512i 7800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7801 _mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 7802 { 7803 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A, 7804 (__v8di) __B, 7805 (__v8di) 7806 _mm512_setzero_si512 (), 7807 (__mmask8) __U); 7808 } 7809 7810 extern __inline __m512i 7811 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7812 _mm512_unpacklo_epi32 (__m512i __A, __m512i __B) 7813 { 7814 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A, 7815 (__v16si) __B, 7816 (__v16si) 7817 _mm512_undefined_epi32 (), 7818 (__mmask16) -1); 7819 } 7820 7821 extern __inline __m512i 7822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7823 _mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A, 7824 __m512i __B) 7825 { 7826 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A, 7827 (__v16si) __B, 7828 (__v16si) __W, 7829 (__mmask16) __U); 7830 } 7831 7832 extern __inline __m512i 7833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7834 _mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 7835 { 7836 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A, 7837 (__v16si) __B, 7838 (__v16si) 7839 _mm512_setzero_si512 (), 7840 (__mmask16) __U); 7841 } 7842 7843 extern __inline __m512i 7844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7845 _mm512_unpacklo_epi64 (__m512i __A, __m512i __B) 7846 { 7847 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A, 7848 (__v8di) __B, 7849 (__v8di) 7850 _mm512_undefined_epi32 (), 7851 (__mmask8) -1); 7852 } 7853 7854 extern __inline __m512i 7855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7856 _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 7857 { 7858 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A, 7859 (__v8di) __B, 7860 (__v8di) __W, 7861 (__mmask8) __U); 7862 } 7863 7864 extern __inline __m512i 7865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7866 _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 7867 { 7868 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A, 7869 (__v8di) __B, 7870 (__v8di) 7871 _mm512_setzero_si512 (), 7872 (__mmask8) __U); 7873 } 7874 7875 #ifdef __x86_64__ 7876 #ifdef __OPTIMIZE__ 7877 extern __inline unsigned long long 7878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7879 _mm_cvt_roundss_u64 (__m128 __A, const int __R) 7880 { 7881 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R); 7882 } 7883 7884 extern __inline long long 7885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7886 _mm_cvt_roundss_si64 (__m128 __A, const int __R) 7887 { 7888 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R); 7889 } 7890 7891 extern __inline long long 7892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7893 _mm_cvt_roundss_i64 (__m128 __A, const int __R) 7894 { 7895 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R); 7896 } 7897 7898 extern __inline unsigned long long 7899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7900 _mm_cvtt_roundss_u64 (__m128 __A, const int __R) 7901 { 7902 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R); 7903 } 7904 7905 extern __inline long long 7906 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7907 _mm_cvtt_roundss_i64 (__m128 __A, const int __R) 7908 { 7909 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R); 7910 } 7911 7912 extern __inline long long 7913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7914 _mm_cvtt_roundss_si64 (__m128 __A, const int __R) 7915 { 7916 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R); 7917 } 7918 #else 7919 #define _mm_cvt_roundss_u64(A, B) \ 7920 ((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B)) 7921 7922 #define _mm_cvt_roundss_si64(A, B) \ 7923 ((long long)__builtin_ia32_vcvtss2si64(A, B)) 7924 7925 #define _mm_cvt_roundss_i64(A, B) \ 7926 ((long long)__builtin_ia32_vcvtss2si64(A, B)) 7927 7928 #define _mm_cvtt_roundss_u64(A, B) \ 7929 ((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B)) 7930 7931 #define _mm_cvtt_roundss_i64(A, B) \ 7932 ((long long)__builtin_ia32_vcvttss2si64(A, B)) 7933 7934 #define _mm_cvtt_roundss_si64(A, B) \ 7935 ((long long)__builtin_ia32_vcvttss2si64(A, B)) 7936 #endif 7937 #endif 7938 7939 #ifdef __OPTIMIZE__ 7940 extern __inline unsigned 7941 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7942 _mm_cvt_roundss_u32 (__m128 __A, const int __R) 7943 { 7944 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R); 7945 } 7946 7947 extern __inline int 7948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7949 _mm_cvt_roundss_si32 (__m128 __A, const int __R) 7950 { 7951 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R); 7952 } 7953 7954 extern __inline int 7955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7956 _mm_cvt_roundss_i32 (__m128 __A, const int __R) 7957 { 7958 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R); 7959 } 7960 7961 extern __inline unsigned 7962 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7963 _mm_cvtt_roundss_u32 (__m128 __A, const int __R) 7964 { 7965 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R); 7966 } 7967 7968 extern __inline int 7969 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7970 _mm_cvtt_roundss_i32 (__m128 __A, const int __R) 7971 { 7972 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R); 7973 } 7974 7975 extern __inline int 7976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7977 _mm_cvtt_roundss_si32 (__m128 __A, const int __R) 7978 { 7979 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R); 7980 } 7981 #else 7982 #define _mm_cvt_roundss_u32(A, B) \ 7983 ((unsigned)__builtin_ia32_vcvtss2usi32(A, B)) 7984 7985 #define _mm_cvt_roundss_si32(A, B) \ 7986 ((int)__builtin_ia32_vcvtss2si32(A, B)) 7987 7988 #define _mm_cvt_roundss_i32(A, B) \ 7989 ((int)__builtin_ia32_vcvtss2si32(A, B)) 7990 7991 #define _mm_cvtt_roundss_u32(A, B) \ 7992 ((unsigned)__builtin_ia32_vcvttss2usi32(A, B)) 7993 7994 #define _mm_cvtt_roundss_si32(A, B) \ 7995 ((int)__builtin_ia32_vcvttss2si32(A, B)) 7996 7997 #define _mm_cvtt_roundss_i32(A, B) \ 7998 ((int)__builtin_ia32_vcvttss2si32(A, B)) 7999 #endif 8000 8001 #ifdef __x86_64__ 8002 #ifdef __OPTIMIZE__ 8003 extern __inline unsigned long long 8004 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8005 _mm_cvt_roundsd_u64 (__m128d __A, const int __R) 8006 { 8007 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R); 8008 } 8009 8010 extern __inline long long 8011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8012 _mm_cvt_roundsd_si64 (__m128d __A, const int __R) 8013 { 8014 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R); 8015 } 8016 8017 extern __inline long long 8018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8019 _mm_cvt_roundsd_i64 (__m128d __A, const int __R) 8020 { 8021 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R); 8022 } 8023 8024 extern __inline unsigned long long 8025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8026 _mm_cvtt_roundsd_u64 (__m128d __A, const int __R) 8027 { 8028 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R); 8029 } 8030 8031 extern __inline long long 8032 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8033 _mm_cvtt_roundsd_si64 (__m128d __A, const int __R) 8034 { 8035 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R); 8036 } 8037 8038 extern __inline long long 8039 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8040 _mm_cvtt_roundsd_i64 (__m128d __A, const int __R) 8041 { 8042 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R); 8043 } 8044 #else 8045 #define _mm_cvt_roundsd_u64(A, B) \ 8046 ((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B)) 8047 8048 #define _mm_cvt_roundsd_si64(A, B) \ 8049 ((long long)__builtin_ia32_vcvtsd2si64(A, B)) 8050 8051 #define _mm_cvt_roundsd_i64(A, B) \ 8052 ((long long)__builtin_ia32_vcvtsd2si64(A, B)) 8053 8054 #define _mm_cvtt_roundsd_u64(A, B) \ 8055 ((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B)) 8056 8057 #define _mm_cvtt_roundsd_si64(A, B) \ 8058 ((long long)__builtin_ia32_vcvttsd2si64(A, B)) 8059 8060 #define _mm_cvtt_roundsd_i64(A, B) \ 8061 ((long long)__builtin_ia32_vcvttsd2si64(A, B)) 8062 #endif 8063 #endif 8064 8065 #ifdef __OPTIMIZE__ 8066 extern __inline unsigned 8067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8068 _mm_cvt_roundsd_u32 (__m128d __A, const int __R) 8069 { 8070 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R); 8071 } 8072 8073 extern __inline int 8074 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8075 _mm_cvt_roundsd_si32 (__m128d __A, const int __R) 8076 { 8077 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R); 8078 } 8079 8080 extern __inline int 8081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8082 _mm_cvt_roundsd_i32 (__m128d __A, const int __R) 8083 { 8084 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R); 8085 } 8086 8087 extern __inline unsigned 8088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8089 _mm_cvtt_roundsd_u32 (__m128d __A, const int __R) 8090 { 8091 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R); 8092 } 8093 8094 extern __inline int 8095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8096 _mm_cvtt_roundsd_i32 (__m128d __A, const int __R) 8097 { 8098 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R); 8099 } 8100 8101 extern __inline int 8102 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8103 _mm_cvtt_roundsd_si32 (__m128d __A, const int __R) 8104 { 8105 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R); 8106 } 8107 #else 8108 #define _mm_cvt_roundsd_u32(A, B) \ 8109 ((unsigned)__builtin_ia32_vcvtsd2usi32(A, B)) 8110 8111 #define _mm_cvt_roundsd_si32(A, B) \ 8112 ((int)__builtin_ia32_vcvtsd2si32(A, B)) 8113 8114 #define _mm_cvt_roundsd_i32(A, B) \ 8115 ((int)__builtin_ia32_vcvtsd2si32(A, B)) 8116 8117 #define _mm_cvtt_roundsd_u32(A, B) \ 8118 ((unsigned)__builtin_ia32_vcvttsd2usi32(A, B)) 8119 8120 #define _mm_cvtt_roundsd_si32(A, B) \ 8121 ((int)__builtin_ia32_vcvttsd2si32(A, B)) 8122 8123 #define _mm_cvtt_roundsd_i32(A, B) \ 8124 ((int)__builtin_ia32_vcvttsd2si32(A, B)) 8125 #endif 8126 8127 extern __inline __m512d 8128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8129 _mm512_movedup_pd (__m512d __A) 8130 { 8131 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A, 8132 (__v8df) 8133 _mm512_undefined_pd (), 8134 (__mmask8) -1); 8135 } 8136 8137 extern __inline __m512d 8138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8139 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A) 8140 { 8141 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A, 8142 (__v8df) __W, 8143 (__mmask8) __U); 8144 } 8145 8146 extern __inline __m512d 8147 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8148 _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A) 8149 { 8150 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A, 8151 (__v8df) 8152 _mm512_setzero_pd (), 8153 (__mmask8) __U); 8154 } 8155 8156 extern __inline __m512d 8157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8158 _mm512_unpacklo_pd (__m512d __A, __m512d __B) 8159 { 8160 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A, 8161 (__v8df) __B, 8162 (__v8df) 8163 _mm512_undefined_pd (), 8164 (__mmask8) -1); 8165 } 8166 8167 extern __inline __m512d 8168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8169 _mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 8170 { 8171 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A, 8172 (__v8df) __B, 8173 (__v8df) __W, 8174 (__mmask8) __U); 8175 } 8176 8177 extern __inline __m512d 8178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8179 _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B) 8180 { 8181 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A, 8182 (__v8df) __B, 8183 (__v8df) 8184 _mm512_setzero_pd (), 8185 (__mmask8) __U); 8186 } 8187 8188 extern __inline __m512d 8189 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8190 _mm512_unpackhi_pd (__m512d __A, __m512d __B) 8191 { 8192 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A, 8193 (__v8df) __B, 8194 (__v8df) 8195 _mm512_undefined_pd (), 8196 (__mmask8) -1); 8197 } 8198 8199 extern __inline __m512d 8200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8201 _mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 8202 { 8203 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A, 8204 (__v8df) __B, 8205 (__v8df) __W, 8206 (__mmask8) __U); 8207 } 8208 8209 extern __inline __m512d 8210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8211 _mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B) 8212 { 8213 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A, 8214 (__v8df) __B, 8215 (__v8df) 8216 _mm512_setzero_pd (), 8217 (__mmask8) __U); 8218 } 8219 8220 extern __inline __m512 8221 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8222 _mm512_unpackhi_ps (__m512 __A, __m512 __B) 8223 { 8224 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A, 8225 (__v16sf) __B, 8226 (__v16sf) 8227 _mm512_undefined_ps (), 8228 (__mmask16) -1); 8229 } 8230 8231 extern __inline __m512 8232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8233 _mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 8234 { 8235 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A, 8236 (__v16sf) __B, 8237 (__v16sf) __W, 8238 (__mmask16) __U); 8239 } 8240 8241 extern __inline __m512 8242 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8243 _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B) 8244 { 8245 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A, 8246 (__v16sf) __B, 8247 (__v16sf) 8248 _mm512_setzero_ps (), 8249 (__mmask16) __U); 8250 } 8251 8252 #ifdef __OPTIMIZE__ 8253 extern __inline __m512d 8254 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8255 _mm512_cvt_roundps_pd (__m256 __A, const int __R) 8256 { 8257 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, 8258 (__v8df) 8259 _mm512_undefined_pd (), 8260 (__mmask8) -1, __R); 8261 } 8262 8263 extern __inline __m512d 8264 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8265 _mm512_mask_cvt_roundps_pd (__m512d __W, __mmask8 __U, __m256 __A, 8266 const int __R) 8267 { 8268 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, 8269 (__v8df) __W, 8270 (__mmask8) __U, __R); 8271 } 8272 8273 extern __inline __m512d 8274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8275 _mm512_maskz_cvt_roundps_pd (__mmask8 __U, __m256 __A, const int __R) 8276 { 8277 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, 8278 (__v8df) 8279 _mm512_setzero_pd (), 8280 (__mmask8) __U, __R); 8281 } 8282 8283 extern __inline __m512 8284 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8285 _mm512_cvt_roundph_ps (__m256i __A, const int __R) 8286 { 8287 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 8288 (__v16sf) 8289 _mm512_undefined_ps (), 8290 (__mmask16) -1, __R); 8291 } 8292 8293 extern __inline __m512 8294 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8295 _mm512_mask_cvt_roundph_ps (__m512 __W, __mmask16 __U, __m256i __A, 8296 const int __R) 8297 { 8298 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 8299 (__v16sf) __W, 8300 (__mmask16) __U, __R); 8301 } 8302 8303 extern __inline __m512 8304 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8305 _mm512_maskz_cvt_roundph_ps (__mmask16 __U, __m256i __A, const int __R) 8306 { 8307 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 8308 (__v16sf) 8309 _mm512_setzero_ps (), 8310 (__mmask16) __U, __R); 8311 } 8312 8313 extern __inline __m256i 8314 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8315 _mm512_cvt_roundps_ph (__m512 __A, const int __I) 8316 { 8317 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A, 8318 __I, 8319 (__v16hi) 8320 _mm256_undefined_si256 (), 8321 -1); 8322 } 8323 8324 extern __inline __m256i 8325 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8326 _mm512_cvtps_ph (__m512 __A, const int __I) 8327 { 8328 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A, 8329 __I, 8330 (__v16hi) 8331 _mm256_undefined_si256 (), 8332 -1); 8333 } 8334 8335 extern __inline __m256i 8336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8337 _mm512_mask_cvt_roundps_ph (__m256i __U, __mmask16 __W, __m512 __A, 8338 const int __I) 8339 { 8340 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A, 8341 __I, 8342 (__v16hi) __U, 8343 (__mmask16) __W); 8344 } 8345 8346 extern __inline __m256i 8347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8348 _mm512_mask_cvtps_ph (__m256i __U, __mmask16 __W, __m512 __A, const int __I) 8349 { 8350 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A, 8351 __I, 8352 (__v16hi) __U, 8353 (__mmask16) __W); 8354 } 8355 8356 extern __inline __m256i 8357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8358 _mm512_maskz_cvt_roundps_ph (__mmask16 __W, __m512 __A, const int __I) 8359 { 8360 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A, 8361 __I, 8362 (__v16hi) 8363 _mm256_setzero_si256 (), 8364 (__mmask16) __W); 8365 } 8366 8367 extern __inline __m256i 8368 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8369 _mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I) 8370 { 8371 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A, 8372 __I, 8373 (__v16hi) 8374 _mm256_setzero_si256 (), 8375 (__mmask16) __W); 8376 } 8377 #else 8378 #define _mm512_cvt_roundps_pd(A, B) \ 8379 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, B) 8380 8381 #define _mm512_mask_cvt_roundps_pd(W, U, A, B) \ 8382 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B) 8383 8384 #define _mm512_maskz_cvt_roundps_pd(U, A, B) \ 8385 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B) 8386 8387 #define _mm512_cvt_roundph_ps(A, B) \ 8388 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_undefined_ps(), -1, B) 8389 8390 #define _mm512_mask_cvt_roundph_ps(W, U, A, B) \ 8391 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B) 8392 8393 #define _mm512_maskz_cvt_roundph_ps(U, A, B) \ 8394 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), U, B) 8395 8396 #define _mm512_cvt_roundps_ph(A, I) \ 8397 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\ 8398 (__v16hi)_mm256_undefined_si256 (), -1)) 8399 #define _mm512_cvtps_ph(A, I) \ 8400 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\ 8401 (__v16hi)_mm256_undefined_si256 (), -1)) 8402 #define _mm512_mask_cvt_roundps_ph(U, W, A, I) \ 8403 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\ 8404 (__v16hi)(__m256i)(U), (__mmask16) (W))) 8405 #define _mm512_mask_cvtps_ph(U, W, A, I) \ 8406 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\ 8407 (__v16hi)(__m256i)(U), (__mmask16) (W))) 8408 #define _mm512_maskz_cvt_roundps_ph(W, A, I) \ 8409 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\ 8410 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W))) 8411 #define _mm512_maskz_cvtps_ph(W, A, I) \ 8412 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\ 8413 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W))) 8414 #endif 8415 8416 #ifdef __OPTIMIZE__ 8417 extern __inline __m256 8418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8419 _mm512_cvt_roundpd_ps (__m512d __A, const int __R) 8420 { 8421 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, 8422 (__v8sf) 8423 _mm256_undefined_ps (), 8424 (__mmask8) -1, __R); 8425 } 8426 8427 extern __inline __m256 8428 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8429 _mm512_mask_cvt_roundpd_ps (__m256 __W, __mmask8 __U, __m512d __A, 8430 const int __R) 8431 { 8432 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, 8433 (__v8sf) __W, 8434 (__mmask8) __U, __R); 8435 } 8436 8437 extern __inline __m256 8438 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8439 _mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R) 8440 { 8441 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, 8442 (__v8sf) 8443 _mm256_setzero_ps (), 8444 (__mmask8) __U, __R); 8445 } 8446 8447 extern __inline __m128 8448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8449 _mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R) 8450 { 8451 return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A, 8452 (__v2df) __B, 8453 __R); 8454 } 8455 8456 extern __inline __m128d 8457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8458 _mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R) 8459 { 8460 return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A, 8461 (__v4sf) __B, 8462 __R); 8463 } 8464 #else 8465 #define _mm512_cvt_roundpd_ps(A, B) \ 8466 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_undefined_ps(), -1, B) 8467 8468 #define _mm512_mask_cvt_roundpd_ps(W, U, A, B) \ 8469 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B) 8470 8471 #define _mm512_maskz_cvt_roundpd_ps(U, A, B) \ 8472 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B) 8473 8474 #define _mm_cvt_roundsd_ss(A, B, C) \ 8475 (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C) 8476 8477 #define _mm_cvt_roundss_sd(A, B, C) \ 8478 (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C) 8479 #endif 8480 8481 extern __inline void 8482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8483 _mm512_stream_si512 (__m512i * __P, __m512i __A) 8484 { 8485 __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A); 8486 } 8487 8488 extern __inline void 8489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8490 _mm512_stream_ps (float *__P, __m512 __A) 8491 { 8492 __builtin_ia32_movntps512 (__P, (__v16sf) __A); 8493 } 8494 8495 extern __inline void 8496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8497 _mm512_stream_pd (double *__P, __m512d __A) 8498 { 8499 __builtin_ia32_movntpd512 (__P, (__v8df) __A); 8500 } 8501 8502 extern __inline __m512i 8503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8504 _mm512_stream_load_si512 (void *__P) 8505 { 8506 return __builtin_ia32_movntdqa512 ((__v8di *)__P); 8507 } 8508 8509 /* Constants for mantissa extraction */ 8510 typedef enum 8511 { 8512 _MM_MANT_NORM_1_2, /* interval [1, 2) */ 8513 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */ 8514 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */ 8515 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */ 8516 } _MM_MANTISSA_NORM_ENUM; 8517 8518 typedef enum 8519 { 8520 _MM_MANT_SIGN_src, /* sign = sign(SRC) */ 8521 _MM_MANT_SIGN_zero, /* sign = 0 */ 8522 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */ 8523 } _MM_MANTISSA_SIGN_ENUM; 8524 8525 #ifdef __OPTIMIZE__ 8526 extern __inline __m128 8527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8528 _mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R) 8529 { 8530 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A, 8531 (__v4sf) __B, 8532 __R); 8533 } 8534 8535 extern __inline __m128 8536 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8537 _mm_mask_getexp_round_ss (__m128 __W, __mmask8 __U, __m128 __A, 8538 __m128 __B, const int __R) 8539 { 8540 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A, 8541 (__v4sf) __B, 8542 (__v4sf) __W, 8543 (__mmask8) __U, __R); 8544 } 8545 8546 extern __inline __m128 8547 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8548 _mm_maskz_getexp_round_ss (__mmask8 __U, __m128 __A, __m128 __B, 8549 const int __R) 8550 { 8551 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A, 8552 (__v4sf) __B, 8553 (__v4sf) 8554 _mm_setzero_ps (), 8555 (__mmask8) __U, __R); 8556 } 8557 8558 extern __inline __m128d 8559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8560 _mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R) 8561 { 8562 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A, 8563 (__v2df) __B, 8564 __R); 8565 } 8566 8567 extern __inline __m128d 8568 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8569 _mm_mask_getexp_round_sd (__m128d __W, __mmask8 __U, __m128d __A, 8570 __m128d __B, const int __R) 8571 { 8572 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A, 8573 (__v2df) __B, 8574 (__v2df) __W, 8575 (__mmask8) __U, __R); 8576 } 8577 8578 extern __inline __m128d 8579 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8580 _mm_maskz_getexp_round_sd (__mmask8 __U, __m128d __A, __m128d __B, 8581 const int __R) 8582 { 8583 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A, 8584 (__v2df) __B, 8585 (__v2df) 8586 _mm_setzero_pd (), 8587 (__mmask8) __U, __R); 8588 } 8589 8590 extern __inline __m512 8591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8592 _mm512_getexp_round_ps (__m512 __A, const int __R) 8593 { 8594 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, 8595 (__v16sf) 8596 _mm512_undefined_ps (), 8597 (__mmask16) -1, __R); 8598 } 8599 8600 extern __inline __m512 8601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8602 _mm512_mask_getexp_round_ps (__m512 __W, __mmask16 __U, __m512 __A, 8603 const int __R) 8604 { 8605 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, 8606 (__v16sf) __W, 8607 (__mmask16) __U, __R); 8608 } 8609 8610 extern __inline __m512 8611 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8612 _mm512_maskz_getexp_round_ps (__mmask16 __U, __m512 __A, const int __R) 8613 { 8614 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, 8615 (__v16sf) 8616 _mm512_setzero_ps (), 8617 (__mmask16) __U, __R); 8618 } 8619 8620 extern __inline __m512d 8621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8622 _mm512_getexp_round_pd (__m512d __A, const int __R) 8623 { 8624 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, 8625 (__v8df) 8626 _mm512_undefined_pd (), 8627 (__mmask8) -1, __R); 8628 } 8629 8630 extern __inline __m512d 8631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8632 _mm512_mask_getexp_round_pd (__m512d __W, __mmask8 __U, __m512d __A, 8633 const int __R) 8634 { 8635 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, 8636 (__v8df) __W, 8637 (__mmask8) __U, __R); 8638 } 8639 8640 extern __inline __m512d 8641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8642 _mm512_maskz_getexp_round_pd (__mmask8 __U, __m512d __A, const int __R) 8643 { 8644 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, 8645 (__v8df) 8646 _mm512_setzero_pd (), 8647 (__mmask8) __U, __R); 8648 } 8649 8650 extern __inline __m512d 8651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8652 _mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B, 8653 _MM_MANTISSA_SIGN_ENUM __C, const int __R) 8654 { 8655 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A, 8656 (__C << 2) | __B, 8657 _mm512_undefined_pd (), 8658 (__mmask8) -1, __R); 8659 } 8660 8661 extern __inline __m512d 8662 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8663 _mm512_mask_getmant_round_pd (__m512d __W, __mmask8 __U, __m512d __A, 8664 _MM_MANTISSA_NORM_ENUM __B, 8665 _MM_MANTISSA_SIGN_ENUM __C, const int __R) 8666 { 8667 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A, 8668 (__C << 2) | __B, 8669 (__v8df) __W, __U, 8670 __R); 8671 } 8672 8673 extern __inline __m512d 8674 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8675 _mm512_maskz_getmant_round_pd (__mmask8 __U, __m512d __A, 8676 _MM_MANTISSA_NORM_ENUM __B, 8677 _MM_MANTISSA_SIGN_ENUM __C, const int __R) 8678 { 8679 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A, 8680 (__C << 2) | __B, 8681 (__v8df) 8682 _mm512_setzero_pd (), 8683 __U, __R); 8684 } 8685 8686 extern __inline __m512 8687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8688 _mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B, 8689 _MM_MANTISSA_SIGN_ENUM __C, const int __R) 8690 { 8691 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A, 8692 (__C << 2) | __B, 8693 _mm512_undefined_ps (), 8694 (__mmask16) -1, __R); 8695 } 8696 8697 extern __inline __m512 8698 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8699 _mm512_mask_getmant_round_ps (__m512 __W, __mmask16 __U, __m512 __A, 8700 _MM_MANTISSA_NORM_ENUM __B, 8701 _MM_MANTISSA_SIGN_ENUM __C, const int __R) 8702 { 8703 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A, 8704 (__C << 2) | __B, 8705 (__v16sf) __W, __U, 8706 __R); 8707 } 8708 8709 extern __inline __m512 8710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8711 _mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A, 8712 _MM_MANTISSA_NORM_ENUM __B, 8713 _MM_MANTISSA_SIGN_ENUM __C, const int __R) 8714 { 8715 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A, 8716 (__C << 2) | __B, 8717 (__v16sf) 8718 _mm512_setzero_ps (), 8719 __U, __R); 8720 } 8721 8722 extern __inline __m128d 8723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8724 _mm_getmant_round_sd (__m128d __A, __m128d __B, 8725 _MM_MANTISSA_NORM_ENUM __C, 8726 _MM_MANTISSA_SIGN_ENUM __D, const int __R) 8727 { 8728 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A, 8729 (__v2df) __B, 8730 (__D << 2) | __C, 8731 __R); 8732 } 8733 8734 extern __inline __m128d 8735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8736 _mm_mask_getmant_round_sd (__m128d __W, __mmask8 __U, __m128d __A, 8737 __m128d __B, _MM_MANTISSA_NORM_ENUM __C, 8738 _MM_MANTISSA_SIGN_ENUM __D, const int __R) 8739 { 8740 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A, 8741 (__v2df) __B, 8742 (__D << 2) | __C, 8743 (__v2df) __W, 8744 __U, __R); 8745 } 8746 8747 extern __inline __m128d 8748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8749 _mm_maskz_getmant_round_sd (__mmask8 __U, __m128d __A, __m128d __B, 8750 _MM_MANTISSA_NORM_ENUM __C, 8751 _MM_MANTISSA_SIGN_ENUM __D, const int __R) 8752 { 8753 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A, 8754 (__v2df) __B, 8755 (__D << 2) | __C, 8756 (__v2df) 8757 _mm_setzero_pd(), 8758 __U, __R); 8759 } 8760 8761 extern __inline __m128 8762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8763 _mm_getmant_round_ss (__m128 __A, __m128 __B, 8764 _MM_MANTISSA_NORM_ENUM __C, 8765 _MM_MANTISSA_SIGN_ENUM __D, const int __R) 8766 { 8767 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A, 8768 (__v4sf) __B, 8769 (__D << 2) | __C, 8770 __R); 8771 } 8772 8773 extern __inline __m128 8774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8775 _mm_mask_getmant_round_ss (__m128 __W, __mmask8 __U, __m128 __A, 8776 __m128 __B, _MM_MANTISSA_NORM_ENUM __C, 8777 _MM_MANTISSA_SIGN_ENUM __D, const int __R) 8778 { 8779 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A, 8780 (__v4sf) __B, 8781 (__D << 2) | __C, 8782 (__v4sf) __W, 8783 __U, __R); 8784 } 8785 8786 extern __inline __m128 8787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8788 _mm_maskz_getmant_round_ss (__mmask8 __U, __m128 __A, __m128 __B, 8789 _MM_MANTISSA_NORM_ENUM __C, 8790 _MM_MANTISSA_SIGN_ENUM __D, const int __R) 8791 { 8792 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A, 8793 (__v4sf) __B, 8794 (__D << 2) | __C, 8795 (__v4sf) 8796 _mm_setzero_ps(), 8797 __U, __R); 8798 } 8799 8800 #else 8801 #define _mm512_getmant_round_pd(X, B, C, R) \ 8802 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \ 8803 (int)(((C)<<2) | (B)), \ 8804 (__v8df)(__m512d)_mm512_undefined_pd(), \ 8805 (__mmask8)-1,\ 8806 (R))) 8807 8808 #define _mm512_mask_getmant_round_pd(W, U, X, B, C, R) \ 8809 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \ 8810 (int)(((C)<<2) | (B)), \ 8811 (__v8df)(__m512d)(W), \ 8812 (__mmask8)(U),\ 8813 (R))) 8814 8815 #define _mm512_maskz_getmant_round_pd(U, X, B, C, R) \ 8816 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \ 8817 (int)(((C)<<2) | (B)), \ 8818 (__v8df)(__m512d)_mm512_setzero_pd(), \ 8819 (__mmask8)(U),\ 8820 (R))) 8821 #define _mm512_getmant_round_ps(X, B, C, R) \ 8822 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \ 8823 (int)(((C)<<2) | (B)), \ 8824 (__v16sf)(__m512)_mm512_undefined_ps(), \ 8825 (__mmask16)-1,\ 8826 (R))) 8827 8828 #define _mm512_mask_getmant_round_ps(W, U, X, B, C, R) \ 8829 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \ 8830 (int)(((C)<<2) | (B)), \ 8831 (__v16sf)(__m512)(W), \ 8832 (__mmask16)(U),\ 8833 (R))) 8834 8835 #define _mm512_maskz_getmant_round_ps(U, X, B, C, R) \ 8836 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \ 8837 (int)(((C)<<2) | (B)), \ 8838 (__v16sf)(__m512)_mm512_setzero_ps(), \ 8839 (__mmask16)(U),\ 8840 (R))) 8841 #define _mm_getmant_round_sd(X, Y, C, D, R) \ 8842 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \ 8843 (__v2df)(__m128d)(Y), \ 8844 (int)(((D)<<2) | (C)), \ 8845 (R))) 8846 8847 #define _mm_mask_getmant_round_sd(W, U, X, Y, C, D, R) \ 8848 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \ 8849 (__v2df)(__m128d)(Y), \ 8850 (int)(((D)<<2) | (C)), \ 8851 (__v2df)(__m128d)(W), \ 8852 (__mmask8)(U),\ 8853 (R))) 8854 8855 #define _mm_maskz_getmant_round_sd(U, X, Y, C, D, R) \ 8856 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \ 8857 (__v2df)(__m128d)(Y), \ 8858 (int)(((D)<<2) | (C)), \ 8859 (__v2df)(__m128d)_mm_setzero_pd(), \ 8860 (__mmask8)(U),\ 8861 (R))) 8862 8863 #define _mm_getmant_round_ss(X, Y, C, D, R) \ 8864 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \ 8865 (__v4sf)(__m128)(Y), \ 8866 (int)(((D)<<2) | (C)), \ 8867 (R))) 8868 8869 #define _mm_mask_getmant_round_ss(W, U, X, Y, C, D, R) \ 8870 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \ 8871 (__v4sf)(__m128)(Y), \ 8872 (int)(((D)<<2) | (C)), \ 8873 (__v4sf)(__m128)(W), \ 8874 (__mmask8)(U),\ 8875 (R))) 8876 8877 #define _mm_maskz_getmant_round_ss(U, X, Y, C, D, R) \ 8878 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \ 8879 (__v4sf)(__m128)(Y), \ 8880 (int)(((D)<<2) | (C)), \ 8881 (__v4sf)(__m128)_mm_setzero_ps(), \ 8882 (__mmask8)(U),\ 8883 (R))) 8884 8885 #define _mm_getexp_round_ss(A, B, R) \ 8886 ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R)) 8887 8888 #define _mm_mask_getexp_round_ss(W, U, A, B, C) \ 8889 (__m128)__builtin_ia32_getexpss_mask_round(A, B, W, U, C) 8890 8891 #define _mm_maskz_getexp_round_ss(U, A, B, C) \ 8892 (__m128)__builtin_ia32_getexpss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C) 8893 8894 #define _mm_getexp_round_sd(A, B, R) \ 8895 ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R)) 8896 8897 #define _mm_mask_getexp_round_sd(W, U, A, B, C) \ 8898 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, W, U, C) 8899 8900 #define _mm_maskz_getexp_round_sd(U, A, B, C) \ 8901 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C) 8902 8903 8904 #define _mm512_getexp_round_ps(A, R) \ 8905 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 8906 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, R)) 8907 8908 #define _mm512_mask_getexp_round_ps(W, U, A, R) \ 8909 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 8910 (__v16sf)(__m512)(W), (__mmask16)(U), R)) 8911 8912 #define _mm512_maskz_getexp_round_ps(U, A, R) \ 8913 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 8914 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), R)) 8915 8916 #define _mm512_getexp_round_pd(A, R) \ 8917 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 8918 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, R)) 8919 8920 #define _mm512_mask_getexp_round_pd(W, U, A, R) \ 8921 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 8922 (__v8df)(__m512d)(W), (__mmask8)(U), R)) 8923 8924 #define _mm512_maskz_getexp_round_pd(U, A, R) \ 8925 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 8926 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), R)) 8927 #endif 8928 8929 #ifdef __OPTIMIZE__ 8930 extern __inline __m512 8931 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8932 _mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R) 8933 { 8934 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm, 8935 (__v16sf) 8936 _mm512_undefined_ps (), 8937 -1, __R); 8938 } 8939 8940 extern __inline __m512 8941 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8942 _mm512_mask_roundscale_round_ps (__m512 __A, __mmask16 __B, __m512 __C, 8943 const int __imm, const int __R) 8944 { 8945 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm, 8946 (__v16sf) __A, 8947 (__mmask16) __B, __R); 8948 } 8949 8950 extern __inline __m512 8951 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8952 _mm512_maskz_roundscale_round_ps (__mmask16 __A, __m512 __B, 8953 const int __imm, const int __R) 8954 { 8955 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B, 8956 __imm, 8957 (__v16sf) 8958 _mm512_setzero_ps (), 8959 (__mmask16) __A, __R); 8960 } 8961 8962 extern __inline __m512d 8963 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8964 _mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R) 8965 { 8966 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm, 8967 (__v8df) 8968 _mm512_undefined_pd (), 8969 -1, __R); 8970 } 8971 8972 extern __inline __m512d 8973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8974 _mm512_mask_roundscale_round_pd (__m512d __A, __mmask8 __B, 8975 __m512d __C, const int __imm, const int __R) 8976 { 8977 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm, 8978 (__v8df) __A, 8979 (__mmask8) __B, __R); 8980 } 8981 8982 extern __inline __m512d 8983 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8984 _mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B, 8985 const int __imm, const int __R) 8986 { 8987 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B, 8988 __imm, 8989 (__v8df) 8990 _mm512_setzero_pd (), 8991 (__mmask8) __A, __R); 8992 } 8993 8994 extern __inline __m128 8995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8996 _mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm, const int __R) 8997 { 8998 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A, 8999 (__v4sf) __B, __imm, __R); 9000 } 9001 9002 extern __inline __m128d 9003 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9004 _mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm, 9005 const int __R) 9006 { 9007 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A, 9008 (__v2df) __B, __imm, __R); 9009 } 9010 9011 #else 9012 #define _mm512_roundscale_round_ps(A, B, R) \ 9013 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\ 9014 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), R)) 9015 #define _mm512_mask_roundscale_round_ps(A, B, C, D, R) \ 9016 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \ 9017 (int)(D), \ 9018 (__v16sf)(__m512)(A), \ 9019 (__mmask16)(B), R)) 9020 #define _mm512_maskz_roundscale_round_ps(A, B, C, R) \ 9021 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \ 9022 (int)(C), \ 9023 (__v16sf)_mm512_setzero_ps(),\ 9024 (__mmask16)(A), R)) 9025 #define _mm512_roundscale_round_pd(A, B, R) \ 9026 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\ 9027 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), R)) 9028 #define _mm512_mask_roundscale_round_pd(A, B, C, D, R) \ 9029 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \ 9030 (int)(D), \ 9031 (__v8df)(__m512d)(A), \ 9032 (__mmask8)(B), R)) 9033 #define _mm512_maskz_roundscale_round_pd(A, B, C, R) \ 9034 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \ 9035 (int)(C), \ 9036 (__v8df)_mm512_setzero_pd(),\ 9037 (__mmask8)(A), R)) 9038 #define _mm_roundscale_round_ss(A, B, C, R) \ 9039 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \ 9040 (__v4sf)(__m128)(B), (int)(C), R)) 9041 #define _mm_roundscale_round_sd(A, B, C, R) \ 9042 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \ 9043 (__v2df)(__m128d)(B), (int)(C), R)) 9044 #endif 9045 9046 extern __inline __m512 9047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9048 _mm512_floor_ps (__m512 __A) 9049 { 9050 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 9051 _MM_FROUND_FLOOR, 9052 (__v16sf) __A, -1, 9053 _MM_FROUND_CUR_DIRECTION); 9054 } 9055 9056 extern __inline __m512d 9057 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9058 _mm512_floor_pd (__m512d __A) 9059 { 9060 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 9061 _MM_FROUND_FLOOR, 9062 (__v8df) __A, -1, 9063 _MM_FROUND_CUR_DIRECTION); 9064 } 9065 9066 extern __inline __m512 9067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9068 _mm512_ceil_ps (__m512 __A) 9069 { 9070 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 9071 _MM_FROUND_CEIL, 9072 (__v16sf) __A, -1, 9073 _MM_FROUND_CUR_DIRECTION); 9074 } 9075 9076 extern __inline __m512d 9077 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9078 _mm512_ceil_pd (__m512d __A) 9079 { 9080 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 9081 _MM_FROUND_CEIL, 9082 (__v8df) __A, -1, 9083 _MM_FROUND_CUR_DIRECTION); 9084 } 9085 9086 extern __inline __m512 9087 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9088 _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A) 9089 { 9090 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 9091 _MM_FROUND_FLOOR, 9092 (__v16sf) __W, __U, 9093 _MM_FROUND_CUR_DIRECTION); 9094 } 9095 9096 extern __inline __m512d 9097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9098 _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A) 9099 { 9100 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 9101 _MM_FROUND_FLOOR, 9102 (__v8df) __W, __U, 9103 _MM_FROUND_CUR_DIRECTION); 9104 } 9105 9106 extern __inline __m512 9107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9108 _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A) 9109 { 9110 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 9111 _MM_FROUND_CEIL, 9112 (__v16sf) __W, __U, 9113 _MM_FROUND_CUR_DIRECTION); 9114 } 9115 9116 extern __inline __m512d 9117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9118 _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A) 9119 { 9120 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 9121 _MM_FROUND_CEIL, 9122 (__v8df) __W, __U, 9123 _MM_FROUND_CUR_DIRECTION); 9124 } 9125 9126 #ifdef __OPTIMIZE__ 9127 extern __inline __m512i 9128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9129 _mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm) 9130 { 9131 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A, 9132 (__v16si) __B, __imm, 9133 (__v16si) 9134 _mm512_undefined_epi32 (), 9135 (__mmask16) -1); 9136 } 9137 9138 extern __inline __m512i 9139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9140 _mm512_mask_alignr_epi32 (__m512i __W, __mmask16 __U, __m512i __A, 9141 __m512i __B, const int __imm) 9142 { 9143 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A, 9144 (__v16si) __B, __imm, 9145 (__v16si) __W, 9146 (__mmask16) __U); 9147 } 9148 9149 extern __inline __m512i 9150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9151 _mm512_maskz_alignr_epi32 (__mmask16 __U, __m512i __A, __m512i __B, 9152 const int __imm) 9153 { 9154 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A, 9155 (__v16si) __B, __imm, 9156 (__v16si) 9157 _mm512_setzero_si512 (), 9158 (__mmask16) __U); 9159 } 9160 9161 extern __inline __m512i 9162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9163 _mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm) 9164 { 9165 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A, 9166 (__v8di) __B, __imm, 9167 (__v8di) 9168 _mm512_undefined_epi32 (), 9169 (__mmask8) -1); 9170 } 9171 9172 extern __inline __m512i 9173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9174 _mm512_mask_alignr_epi64 (__m512i __W, __mmask8 __U, __m512i __A, 9175 __m512i __B, const int __imm) 9176 { 9177 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A, 9178 (__v8di) __B, __imm, 9179 (__v8di) __W, 9180 (__mmask8) __U); 9181 } 9182 9183 extern __inline __m512i 9184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9185 _mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B, 9186 const int __imm) 9187 { 9188 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A, 9189 (__v8di) __B, __imm, 9190 (__v8di) 9191 _mm512_setzero_si512 (), 9192 (__mmask8) __U); 9193 } 9194 #else 9195 #define _mm512_alignr_epi32(X, Y, C) \ 9196 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \ 9197 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_undefined_epi32 (),\ 9198 (__mmask16)-1)) 9199 9200 #define _mm512_mask_alignr_epi32(W, U, X, Y, C) \ 9201 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \ 9202 (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(W), \ 9203 (__mmask16)(U))) 9204 9205 #define _mm512_maskz_alignr_epi32(U, X, Y, C) \ 9206 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \ 9207 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_setzero_si512 (),\ 9208 (__mmask16)(U))) 9209 9210 #define _mm512_alignr_epi64(X, Y, C) \ 9211 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \ 9212 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_undefined_epi32 (), \ 9213 (__mmask8)-1)) 9214 9215 #define _mm512_mask_alignr_epi64(W, U, X, Y, C) \ 9216 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \ 9217 (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(W), (__mmask8)(U))) 9218 9219 #define _mm512_maskz_alignr_epi64(U, X, Y, C) \ 9220 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \ 9221 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_setzero_si512 (),\ 9222 (__mmask8)(U))) 9223 #endif 9224 9225 extern __inline __mmask16 9226 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9227 _mm512_cmpeq_epi32_mask (__m512i __A, __m512i __B) 9228 { 9229 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A, 9230 (__v16si) __B, 9231 (__mmask16) -1); 9232 } 9233 9234 extern __inline __mmask16 9235 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9236 _mm512_mask_cmpeq_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) 9237 { 9238 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A, 9239 (__v16si) __B, __U); 9240 } 9241 9242 extern __inline __mmask8 9243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9244 _mm512_mask_cmpeq_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) 9245 { 9246 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A, 9247 (__v8di) __B, __U); 9248 } 9249 9250 extern __inline __mmask8 9251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9252 _mm512_cmpeq_epi64_mask (__m512i __A, __m512i __B) 9253 { 9254 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A, 9255 (__v8di) __B, 9256 (__mmask8) -1); 9257 } 9258 9259 extern __inline __mmask16 9260 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9261 _mm512_cmpgt_epi32_mask (__m512i __A, __m512i __B) 9262 { 9263 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A, 9264 (__v16si) __B, 9265 (__mmask16) -1); 9266 } 9267 9268 extern __inline __mmask16 9269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9270 _mm512_mask_cmpgt_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) 9271 { 9272 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A, 9273 (__v16si) __B, __U); 9274 } 9275 9276 extern __inline __mmask8 9277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9278 _mm512_mask_cmpgt_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) 9279 { 9280 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A, 9281 (__v8di) __B, __U); 9282 } 9283 9284 extern __inline __mmask8 9285 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9286 _mm512_cmpgt_epi64_mask (__m512i __A, __m512i __B) 9287 { 9288 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A, 9289 (__v8di) __B, 9290 (__mmask8) -1); 9291 } 9292 9293 extern __inline __mmask16 9294 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9295 _mm512_cmpge_epi32_mask (__m512i __X, __m512i __Y) 9296 { 9297 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, 9298 (__v16si) __Y, 5, 9299 (__mmask16) -1); 9300 } 9301 9302 extern __inline __mmask16 9303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9304 _mm512_mask_cmpge_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y) 9305 { 9306 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, 9307 (__v16si) __Y, 5, 9308 (__mmask16) __M); 9309 } 9310 9311 extern __inline __mmask16 9312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9313 _mm512_mask_cmpge_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y) 9314 { 9315 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, 9316 (__v16si) __Y, 5, 9317 (__mmask16) __M); 9318 } 9319 9320 extern __inline __mmask16 9321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9322 _mm512_cmpge_epu32_mask (__m512i __X, __m512i __Y) 9323 { 9324 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, 9325 (__v16si) __Y, 5, 9326 (__mmask16) -1); 9327 } 9328 9329 extern __inline __mmask8 9330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9331 _mm512_mask_cmpge_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y) 9332 { 9333 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, 9334 (__v8di) __Y, 5, 9335 (__mmask8) __M); 9336 } 9337 9338 extern __inline __mmask8 9339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9340 _mm512_cmpge_epi64_mask (__m512i __X, __m512i __Y) 9341 { 9342 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, 9343 (__v8di) __Y, 5, 9344 (__mmask8) -1); 9345 } 9346 9347 extern __inline __mmask8 9348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9349 _mm512_mask_cmpge_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y) 9350 { 9351 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, 9352 (__v8di) __Y, 5, 9353 (__mmask8) __M); 9354 } 9355 9356 extern __inline __mmask8 9357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9358 _mm512_cmpge_epu64_mask (__m512i __X, __m512i __Y) 9359 { 9360 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, 9361 (__v8di) __Y, 5, 9362 (__mmask8) -1); 9363 } 9364 9365 extern __inline __mmask16 9366 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9367 _mm512_mask_cmple_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y) 9368 { 9369 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, 9370 (__v16si) __Y, 2, 9371 (__mmask16) __M); 9372 } 9373 9374 extern __inline __mmask16 9375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9376 _mm512_cmple_epi32_mask (__m512i __X, __m512i __Y) 9377 { 9378 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, 9379 (__v16si) __Y, 2, 9380 (__mmask16) -1); 9381 } 9382 9383 extern __inline __mmask16 9384 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9385 _mm512_mask_cmple_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y) 9386 { 9387 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, 9388 (__v16si) __Y, 2, 9389 (__mmask16) __M); 9390 } 9391 9392 extern __inline __mmask16 9393 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9394 _mm512_cmple_epu32_mask (__m512i __X, __m512i __Y) 9395 { 9396 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, 9397 (__v16si) __Y, 2, 9398 (__mmask16) -1); 9399 } 9400 9401 extern __inline __mmask8 9402 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9403 _mm512_mask_cmple_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y) 9404 { 9405 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, 9406 (__v8di) __Y, 2, 9407 (__mmask8) __M); 9408 } 9409 9410 extern __inline __mmask8 9411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9412 _mm512_cmple_epi64_mask (__m512i __X, __m512i __Y) 9413 { 9414 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, 9415 (__v8di) __Y, 2, 9416 (__mmask8) -1); 9417 } 9418 9419 extern __inline __mmask8 9420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9421 _mm512_mask_cmple_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y) 9422 { 9423 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, 9424 (__v8di) __Y, 2, 9425 (__mmask8) __M); 9426 } 9427 9428 extern __inline __mmask8 9429 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9430 _mm512_cmple_epu64_mask (__m512i __X, __m512i __Y) 9431 { 9432 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, 9433 (__v8di) __Y, 2, 9434 (__mmask8) -1); 9435 } 9436 9437 extern __inline __mmask16 9438 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9439 _mm512_mask_cmplt_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y) 9440 { 9441 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, 9442 (__v16si) __Y, 1, 9443 (__mmask16) __M); 9444 } 9445 9446 extern __inline __mmask16 9447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9448 _mm512_cmplt_epi32_mask (__m512i __X, __m512i __Y) 9449 { 9450 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, 9451 (__v16si) __Y, 1, 9452 (__mmask16) -1); 9453 } 9454 9455 extern __inline __mmask16 9456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9457 _mm512_mask_cmplt_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y) 9458 { 9459 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, 9460 (__v16si) __Y, 1, 9461 (__mmask16) __M); 9462 } 9463 9464 extern __inline __mmask16 9465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9466 _mm512_cmplt_epu32_mask (__m512i __X, __m512i __Y) 9467 { 9468 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, 9469 (__v16si) __Y, 1, 9470 (__mmask16) -1); 9471 } 9472 9473 extern __inline __mmask8 9474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9475 _mm512_mask_cmplt_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y) 9476 { 9477 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, 9478 (__v8di) __Y, 1, 9479 (__mmask8) __M); 9480 } 9481 9482 extern __inline __mmask8 9483 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9484 _mm512_cmplt_epi64_mask (__m512i __X, __m512i __Y) 9485 { 9486 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, 9487 (__v8di) __Y, 1, 9488 (__mmask8) -1); 9489 } 9490 9491 extern __inline __mmask8 9492 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9493 _mm512_mask_cmplt_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y) 9494 { 9495 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, 9496 (__v8di) __Y, 1, 9497 (__mmask8) __M); 9498 } 9499 9500 extern __inline __mmask8 9501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9502 _mm512_cmplt_epu64_mask (__m512i __X, __m512i __Y) 9503 { 9504 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, 9505 (__v8di) __Y, 1, 9506 (__mmask8) -1); 9507 } 9508 9509 extern __inline __mmask16 9510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9511 _mm512_cmpneq_epi32_mask (__m512i __X, __m512i __Y) 9512 { 9513 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, 9514 (__v16si) __Y, 4, 9515 (__mmask16) -1); 9516 } 9517 9518 extern __inline __mmask16 9519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9520 _mm512_mask_cmpneq_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y) 9521 { 9522 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, 9523 (__v16si) __Y, 4, 9524 (__mmask16) __M); 9525 } 9526 9527 extern __inline __mmask16 9528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9529 _mm512_mask_cmpneq_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y) 9530 { 9531 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, 9532 (__v16si) __Y, 4, 9533 (__mmask16) __M); 9534 } 9535 9536 extern __inline __mmask16 9537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9538 _mm512_cmpneq_epu32_mask (__m512i __X, __m512i __Y) 9539 { 9540 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, 9541 (__v16si) __Y, 4, 9542 (__mmask16) -1); 9543 } 9544 9545 extern __inline __mmask8 9546 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9547 _mm512_mask_cmpneq_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y) 9548 { 9549 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, 9550 (__v8di) __Y, 4, 9551 (__mmask8) __M); 9552 } 9553 9554 extern __inline __mmask8 9555 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9556 _mm512_cmpneq_epi64_mask (__m512i __X, __m512i __Y) 9557 { 9558 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, 9559 (__v8di) __Y, 4, 9560 (__mmask8) -1); 9561 } 9562 9563 extern __inline __mmask8 9564 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9565 _mm512_mask_cmpneq_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y) 9566 { 9567 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, 9568 (__v8di) __Y, 4, 9569 (__mmask8) __M); 9570 } 9571 9572 extern __inline __mmask8 9573 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9574 _mm512_cmpneq_epu64_mask (__m512i __X, __m512i __Y) 9575 { 9576 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, 9577 (__v8di) __Y, 4, 9578 (__mmask8) -1); 9579 } 9580 9581 #define _MM_CMPINT_EQ 0x0 9582 #define _MM_CMPINT_LT 0x1 9583 #define _MM_CMPINT_LE 0x2 9584 #define _MM_CMPINT_UNUSED 0x3 9585 #define _MM_CMPINT_NE 0x4 9586 #define _MM_CMPINT_NLT 0x5 9587 #define _MM_CMPINT_GE 0x5 9588 #define _MM_CMPINT_NLE 0x6 9589 #define _MM_CMPINT_GT 0x6 9590 9591 #ifdef __OPTIMIZE__ 9592 extern __inline __mmask16 9593 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9594 _kshiftli_mask16 (__mmask16 __A, unsigned int __B) 9595 { 9596 return (__mmask16) __builtin_ia32_kshiftlihi ((__mmask16) __A, 9597 (__mmask8) __B); 9598 } 9599 9600 extern __inline __mmask16 9601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9602 _kshiftri_mask16 (__mmask16 __A, unsigned int __B) 9603 { 9604 return (__mmask16) __builtin_ia32_kshiftrihi ((__mmask16) __A, 9605 (__mmask8) __B); 9606 } 9607 9608 extern __inline __mmask8 9609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9610 _mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P) 9611 { 9612 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, 9613 (__v8di) __Y, __P, 9614 (__mmask8) -1); 9615 } 9616 9617 extern __inline __mmask16 9618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9619 _mm512_cmp_epi32_mask (__m512i __X, __m512i __Y, const int __P) 9620 { 9621 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, 9622 (__v16si) __Y, __P, 9623 (__mmask16) -1); 9624 } 9625 9626 extern __inline __mmask8 9627 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9628 _mm512_cmp_epu64_mask (__m512i __X, __m512i __Y, const int __P) 9629 { 9630 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, 9631 (__v8di) __Y, __P, 9632 (__mmask8) -1); 9633 } 9634 9635 extern __inline __mmask16 9636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9637 _mm512_cmp_epu32_mask (__m512i __X, __m512i __Y, const int __P) 9638 { 9639 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, 9640 (__v16si) __Y, __P, 9641 (__mmask16) -1); 9642 } 9643 9644 extern __inline __mmask8 9645 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9646 _mm512_cmp_round_pd_mask (__m512d __X, __m512d __Y, const int __P, 9647 const int __R) 9648 { 9649 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 9650 (__v8df) __Y, __P, 9651 (__mmask8) -1, __R); 9652 } 9653 9654 extern __inline __mmask16 9655 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9656 _mm512_cmp_round_ps_mask (__m512 __X, __m512 __Y, const int __P, const int __R) 9657 { 9658 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 9659 (__v16sf) __Y, __P, 9660 (__mmask16) -1, __R); 9661 } 9662 9663 extern __inline __mmask8 9664 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9665 _mm512_mask_cmp_epi64_mask (__mmask8 __U, __m512i __X, __m512i __Y, 9666 const int __P) 9667 { 9668 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, 9669 (__v8di) __Y, __P, 9670 (__mmask8) __U); 9671 } 9672 9673 extern __inline __mmask16 9674 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9675 _mm512_mask_cmp_epi32_mask (__mmask16 __U, __m512i __X, __m512i __Y, 9676 const int __P) 9677 { 9678 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, 9679 (__v16si) __Y, __P, 9680 (__mmask16) __U); 9681 } 9682 9683 extern __inline __mmask8 9684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9685 _mm512_mask_cmp_epu64_mask (__mmask8 __U, __m512i __X, __m512i __Y, 9686 const int __P) 9687 { 9688 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, 9689 (__v8di) __Y, __P, 9690 (__mmask8) __U); 9691 } 9692 9693 extern __inline __mmask16 9694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9695 _mm512_mask_cmp_epu32_mask (__mmask16 __U, __m512i __X, __m512i __Y, 9696 const int __P) 9697 { 9698 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, 9699 (__v16si) __Y, __P, 9700 (__mmask16) __U); 9701 } 9702 9703 extern __inline __mmask8 9704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9705 _mm512_mask_cmp_round_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, 9706 const int __P, const int __R) 9707 { 9708 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 9709 (__v8df) __Y, __P, 9710 (__mmask8) __U, __R); 9711 } 9712 9713 extern __inline __mmask16 9714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9715 _mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, 9716 const int __P, const int __R) 9717 { 9718 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 9719 (__v16sf) __Y, __P, 9720 (__mmask16) __U, __R); 9721 } 9722 9723 extern __inline __mmask8 9724 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9725 _mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R) 9726 { 9727 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X, 9728 (__v2df) __Y, __P, 9729 (__mmask8) -1, __R); 9730 } 9731 9732 extern __inline __mmask8 9733 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9734 _mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, 9735 const int __P, const int __R) 9736 { 9737 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X, 9738 (__v2df) __Y, __P, 9739 (__mmask8) __M, __R); 9740 } 9741 9742 extern __inline __mmask8 9743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9744 _mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R) 9745 { 9746 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X, 9747 (__v4sf) __Y, __P, 9748 (__mmask8) -1, __R); 9749 } 9750 9751 extern __inline __mmask8 9752 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9753 _mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, 9754 const int __P, const int __R) 9755 { 9756 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X, 9757 (__v4sf) __Y, __P, 9758 (__mmask8) __M, __R); 9759 } 9760 9761 #else 9762 #define _kshiftli_mask16(X, Y) \ 9763 ((__mmask16) __builtin_ia32_kshiftlihi ((__mmask16)(X), (__mmask8)(Y))) 9764 9765 #define _kshiftri_mask16(X, Y) \ 9766 ((__mmask16) __builtin_ia32_kshiftrihi ((__mmask16)(X), (__mmask8)(Y))) 9767 9768 #define _mm512_cmp_epi64_mask(X, Y, P) \ 9769 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \ 9770 (__v8di)(__m512i)(Y), (int)(P),\ 9771 (__mmask8)-1)) 9772 9773 #define _mm512_cmp_epi32_mask(X, Y, P) \ 9774 ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \ 9775 (__v16si)(__m512i)(Y), (int)(P), \ 9776 (__mmask16)-1)) 9777 9778 #define _mm512_cmp_epu64_mask(X, Y, P) \ 9779 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \ 9780 (__v8di)(__m512i)(Y), (int)(P),\ 9781 (__mmask8)-1)) 9782 9783 #define _mm512_cmp_epu32_mask(X, Y, P) \ 9784 ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \ 9785 (__v16si)(__m512i)(Y), (int)(P), \ 9786 (__mmask16)-1)) 9787 9788 #define _mm512_cmp_round_pd_mask(X, Y, P, R) \ 9789 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \ 9790 (__v8df)(__m512d)(Y), (int)(P),\ 9791 (__mmask8)-1, R)) 9792 9793 #define _mm512_cmp_round_ps_mask(X, Y, P, R) \ 9794 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \ 9795 (__v16sf)(__m512)(Y), (int)(P),\ 9796 (__mmask16)-1, R)) 9797 9798 #define _mm512_mask_cmp_epi64_mask(M, X, Y, P) \ 9799 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \ 9800 (__v8di)(__m512i)(Y), (int)(P),\ 9801 (__mmask8)M)) 9802 9803 #define _mm512_mask_cmp_epi32_mask(M, X, Y, P) \ 9804 ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \ 9805 (__v16si)(__m512i)(Y), (int)(P), \ 9806 (__mmask16)M)) 9807 9808 #define _mm512_mask_cmp_epu64_mask(M, X, Y, P) \ 9809 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \ 9810 (__v8di)(__m512i)(Y), (int)(P),\ 9811 (__mmask8)M)) 9812 9813 #define _mm512_mask_cmp_epu32_mask(M, X, Y, P) \ 9814 ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \ 9815 (__v16si)(__m512i)(Y), (int)(P), \ 9816 (__mmask16)M)) 9817 9818 #define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R) \ 9819 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \ 9820 (__v8df)(__m512d)(Y), (int)(P),\ 9821 (__mmask8)M, R)) 9822 9823 #define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R) \ 9824 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \ 9825 (__v16sf)(__m512)(Y), (int)(P),\ 9826 (__mmask16)M, R)) 9827 9828 #define _mm_cmp_round_sd_mask(X, Y, P, R) \ 9829 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \ 9830 (__v2df)(__m128d)(Y), (int)(P),\ 9831 (__mmask8)-1, R)) 9832 9833 #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \ 9834 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \ 9835 (__v2df)(__m128d)(Y), (int)(P),\ 9836 (M), R)) 9837 9838 #define _mm_cmp_round_ss_mask(X, Y, P, R) \ 9839 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \ 9840 (__v4sf)(__m128)(Y), (int)(P), \ 9841 (__mmask8)-1, R)) 9842 9843 #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \ 9844 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \ 9845 (__v4sf)(__m128)(Y), (int)(P), \ 9846 (M), R)) 9847 #endif 9848 9849 #ifdef __OPTIMIZE__ 9850 extern __inline __m512 9851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9852 _mm512_i32gather_ps (__m512i __index, void const *__addr, int __scale) 9853 { 9854 __m512 __v1_old = _mm512_undefined_ps (); 9855 __mmask16 __mask = 0xFFFF; 9856 9857 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old, 9858 __addr, 9859 (__v16si) __index, 9860 __mask, __scale); 9861 } 9862 9863 extern __inline __m512 9864 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9865 _mm512_mask_i32gather_ps (__m512 __v1_old, __mmask16 __mask, 9866 __m512i __index, void const *__addr, int __scale) 9867 { 9868 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old, 9869 __addr, 9870 (__v16si) __index, 9871 __mask, __scale); 9872 } 9873 9874 extern __inline __m512d 9875 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9876 _mm512_i32gather_pd (__m256i __index, void const *__addr, int __scale) 9877 { 9878 __m512d __v1_old = _mm512_undefined_pd (); 9879 __mmask8 __mask = 0xFF; 9880 9881 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old, 9882 __addr, 9883 (__v8si) __index, __mask, 9884 __scale); 9885 } 9886 9887 extern __inline __m512d 9888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9889 _mm512_mask_i32gather_pd (__m512d __v1_old, __mmask8 __mask, 9890 __m256i __index, void const *__addr, int __scale) 9891 { 9892 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old, 9893 __addr, 9894 (__v8si) __index, 9895 __mask, __scale); 9896 } 9897 9898 extern __inline __m256 9899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9900 _mm512_i64gather_ps (__m512i __index, void const *__addr, int __scale) 9901 { 9902 __m256 __v1_old = _mm256_undefined_ps (); 9903 __mmask8 __mask = 0xFF; 9904 9905 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old, 9906 __addr, 9907 (__v8di) __index, __mask, 9908 __scale); 9909 } 9910 9911 extern __inline __m256 9912 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9913 _mm512_mask_i64gather_ps (__m256 __v1_old, __mmask8 __mask, 9914 __m512i __index, void const *__addr, int __scale) 9915 { 9916 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old, 9917 __addr, 9918 (__v8di) __index, 9919 __mask, __scale); 9920 } 9921 9922 extern __inline __m512d 9923 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9924 _mm512_i64gather_pd (__m512i __index, void const *__addr, int __scale) 9925 { 9926 __m512d __v1_old = _mm512_undefined_pd (); 9927 __mmask8 __mask = 0xFF; 9928 9929 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old, 9930 __addr, 9931 (__v8di) __index, __mask, 9932 __scale); 9933 } 9934 9935 extern __inline __m512d 9936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9937 _mm512_mask_i64gather_pd (__m512d __v1_old, __mmask8 __mask, 9938 __m512i __index, void const *__addr, int __scale) 9939 { 9940 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old, 9941 __addr, 9942 (__v8di) __index, 9943 __mask, __scale); 9944 } 9945 9946 extern __inline __m512i 9947 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9948 _mm512_i32gather_epi32 (__m512i __index, void const *__addr, int __scale) 9949 { 9950 __m512i __v1_old = _mm512_undefined_epi32 (); 9951 __mmask16 __mask = 0xFFFF; 9952 9953 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old, 9954 __addr, 9955 (__v16si) __index, 9956 __mask, __scale); 9957 } 9958 9959 extern __inline __m512i 9960 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9961 _mm512_mask_i32gather_epi32 (__m512i __v1_old, __mmask16 __mask, 9962 __m512i __index, void const *__addr, int __scale) 9963 { 9964 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old, 9965 __addr, 9966 (__v16si) __index, 9967 __mask, __scale); 9968 } 9969 9970 extern __inline __m512i 9971 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9972 _mm512_i32gather_epi64 (__m256i __index, void const *__addr, int __scale) 9973 { 9974 __m512i __v1_old = _mm512_undefined_epi32 (); 9975 __mmask8 __mask = 0xFF; 9976 9977 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old, 9978 __addr, 9979 (__v8si) __index, __mask, 9980 __scale); 9981 } 9982 9983 extern __inline __m512i 9984 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9985 _mm512_mask_i32gather_epi64 (__m512i __v1_old, __mmask8 __mask, 9986 __m256i __index, void const *__addr, 9987 int __scale) 9988 { 9989 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old, 9990 __addr, 9991 (__v8si) __index, 9992 __mask, __scale); 9993 } 9994 9995 extern __inline __m256i 9996 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9997 _mm512_i64gather_epi32 (__m512i __index, void const *__addr, int __scale) 9998 { 9999 __m256i __v1_old = _mm256_undefined_si256 (); 10000 __mmask8 __mask = 0xFF; 10001 10002 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old, 10003 __addr, 10004 (__v8di) __index, 10005 __mask, __scale); 10006 } 10007 10008 extern __inline __m256i 10009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10010 _mm512_mask_i64gather_epi32 (__m256i __v1_old, __mmask8 __mask, 10011 __m512i __index, void const *__addr, int __scale) 10012 { 10013 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old, 10014 __addr, 10015 (__v8di) __index, 10016 __mask, __scale); 10017 } 10018 10019 extern __inline __m512i 10020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10021 _mm512_i64gather_epi64 (__m512i __index, void const *__addr, int __scale) 10022 { 10023 __m512i __v1_old = _mm512_undefined_epi32 (); 10024 __mmask8 __mask = 0xFF; 10025 10026 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old, 10027 __addr, 10028 (__v8di) __index, __mask, 10029 __scale); 10030 } 10031 10032 extern __inline __m512i 10033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10034 _mm512_mask_i64gather_epi64 (__m512i __v1_old, __mmask8 __mask, 10035 __m512i __index, void const *__addr, 10036 int __scale) 10037 { 10038 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old, 10039 __addr, 10040 (__v8di) __index, 10041 __mask, __scale); 10042 } 10043 10044 extern __inline void 10045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10046 _mm512_i32scatter_ps (void *__addr, __m512i __index, __m512 __v1, int __scale) 10047 { 10048 __builtin_ia32_scattersiv16sf (__addr, (__mmask16) 0xFFFF, 10049 (__v16si) __index, (__v16sf) __v1, __scale); 10050 } 10051 10052 extern __inline void 10053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10054 _mm512_mask_i32scatter_ps (void *__addr, __mmask16 __mask, 10055 __m512i __index, __m512 __v1, int __scale) 10056 { 10057 __builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index, 10058 (__v16sf) __v1, __scale); 10059 } 10060 10061 extern __inline void 10062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10063 _mm512_i32scatter_pd (void *__addr, __m256i __index, __m512d __v1, 10064 int __scale) 10065 { 10066 __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF, 10067 (__v8si) __index, (__v8df) __v1, __scale); 10068 } 10069 10070 extern __inline void 10071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10072 _mm512_mask_i32scatter_pd (void *__addr, __mmask8 __mask, 10073 __m256i __index, __m512d __v1, int __scale) 10074 { 10075 __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index, 10076 (__v8df) __v1, __scale); 10077 } 10078 10079 extern __inline void 10080 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10081 _mm512_i64scatter_ps (void *__addr, __m512i __index, __m256 __v1, int __scale) 10082 { 10083 __builtin_ia32_scatterdiv16sf (__addr, (__mmask8) 0xFF, 10084 (__v8di) __index, (__v8sf) __v1, __scale); 10085 } 10086 10087 extern __inline void 10088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10089 _mm512_mask_i64scatter_ps (void *__addr, __mmask8 __mask, 10090 __m512i __index, __m256 __v1, int __scale) 10091 { 10092 __builtin_ia32_scatterdiv16sf (__addr, __mask, (__v8di) __index, 10093 (__v8sf) __v1, __scale); 10094 } 10095 10096 extern __inline void 10097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10098 _mm512_i64scatter_pd (void *__addr, __m512i __index, __m512d __v1, 10099 int __scale) 10100 { 10101 __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF, 10102 (__v8di) __index, (__v8df) __v1, __scale); 10103 } 10104 10105 extern __inline void 10106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10107 _mm512_mask_i64scatter_pd (void *__addr, __mmask8 __mask, 10108 __m512i __index, __m512d __v1, int __scale) 10109 { 10110 __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index, 10111 (__v8df) __v1, __scale); 10112 } 10113 10114 extern __inline void 10115 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10116 _mm512_i32scatter_epi32 (void *__addr, __m512i __index, 10117 __m512i __v1, int __scale) 10118 { 10119 __builtin_ia32_scattersiv16si (__addr, (__mmask16) 0xFFFF, 10120 (__v16si) __index, (__v16si) __v1, __scale); 10121 } 10122 10123 extern __inline void 10124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10125 _mm512_mask_i32scatter_epi32 (void *__addr, __mmask16 __mask, 10126 __m512i __index, __m512i __v1, int __scale) 10127 { 10128 __builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index, 10129 (__v16si) __v1, __scale); 10130 } 10131 10132 extern __inline void 10133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10134 _mm512_i32scatter_epi64 (void *__addr, __m256i __index, 10135 __m512i __v1, int __scale) 10136 { 10137 __builtin_ia32_scattersiv8di (__addr, (__mmask8) 0xFF, 10138 (__v8si) __index, (__v8di) __v1, __scale); 10139 } 10140 10141 extern __inline void 10142 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10143 _mm512_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask, 10144 __m256i __index, __m512i __v1, int __scale) 10145 { 10146 __builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index, 10147 (__v8di) __v1, __scale); 10148 } 10149 10150 extern __inline void 10151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10152 _mm512_i64scatter_epi32 (void *__addr, __m512i __index, 10153 __m256i __v1, int __scale) 10154 { 10155 __builtin_ia32_scatterdiv16si (__addr, (__mmask8) 0xFF, 10156 (__v8di) __index, (__v8si) __v1, __scale); 10157 } 10158 10159 extern __inline void 10160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10161 _mm512_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask, 10162 __m512i __index, __m256i __v1, int __scale) 10163 { 10164 __builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index, 10165 (__v8si) __v1, __scale); 10166 } 10167 10168 extern __inline void 10169 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10170 _mm512_i64scatter_epi64 (void *__addr, __m512i __index, 10171 __m512i __v1, int __scale) 10172 { 10173 __builtin_ia32_scatterdiv8di (__addr, (__mmask8) 0xFF, 10174 (__v8di) __index, (__v8di) __v1, __scale); 10175 } 10176 10177 extern __inline void 10178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10179 _mm512_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask, 10180 __m512i __index, __m512i __v1, int __scale) 10181 { 10182 __builtin_ia32_scatterdiv8di (__addr, __mask, (__v8di) __index, 10183 (__v8di) __v1, __scale); 10184 } 10185 #else 10186 #define _mm512_i32gather_ps(INDEX, ADDR, SCALE) \ 10187 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_undefined_ps(),\ 10188 (void const *)ADDR, \ 10189 (__v16si)(__m512i)INDEX, \ 10190 (__mmask16)0xFFFF, (int)SCALE) 10191 10192 #define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \ 10193 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512)V1OLD, \ 10194 (void const *)ADDR, \ 10195 (__v16si)(__m512i)INDEX, \ 10196 (__mmask16)MASK, (int)SCALE) 10197 10198 #define _mm512_i32gather_pd(INDEX, ADDR, SCALE) \ 10199 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_undefined_pd(), \ 10200 (void const *)ADDR, \ 10201 (__v8si)(__m256i)INDEX, \ 10202 (__mmask8)0xFF, (int)SCALE) 10203 10204 #define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \ 10205 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d)V1OLD, \ 10206 (void const *)ADDR, \ 10207 (__v8si)(__m256i)INDEX, \ 10208 (__mmask8)MASK, (int)SCALE) 10209 10210 #define _mm512_i64gather_ps(INDEX, ADDR, SCALE) \ 10211 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_undefined_ps(), \ 10212 (void const *)ADDR, \ 10213 (__v8di)(__m512i)INDEX, \ 10214 (__mmask8)0xFF, (int)SCALE) 10215 10216 #define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \ 10217 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256)V1OLD, \ 10218 (void const *)ADDR, \ 10219 (__v8di)(__m512i)INDEX, \ 10220 (__mmask8)MASK, (int)SCALE) 10221 10222 #define _mm512_i64gather_pd(INDEX, ADDR, SCALE) \ 10223 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_undefined_pd(), \ 10224 (void const *)ADDR, \ 10225 (__v8di)(__m512i)INDEX, \ 10226 (__mmask8)0xFF, (int)SCALE) 10227 10228 #define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \ 10229 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d)V1OLD, \ 10230 (void const *)ADDR, \ 10231 (__v8di)(__m512i)INDEX, \ 10232 (__mmask8)MASK, (int)SCALE) 10233 10234 #define _mm512_i32gather_epi32(INDEX, ADDR, SCALE) \ 10235 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_epi32 (), \ 10236 (void const *)ADDR, \ 10237 (__v16si)(__m512i)INDEX, \ 10238 (__mmask16)0xFFFF, (int)SCALE) 10239 10240 #define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \ 10241 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i)V1OLD, \ 10242 (void const *)ADDR, \ 10243 (__v16si)(__m512i)INDEX, \ 10244 (__mmask16)MASK, (int)SCALE) 10245 10246 #define _mm512_i32gather_epi64(INDEX, ADDR, SCALE) \ 10247 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_epi32 (), \ 10248 (void const *)ADDR, \ 10249 (__v8si)(__m256i)INDEX, \ 10250 (__mmask8)0xFF, (int)SCALE) 10251 10252 #define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \ 10253 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i)V1OLD, \ 10254 (void const *)ADDR, \ 10255 (__v8si)(__m256i)INDEX, \ 10256 (__mmask8)MASK, (int)SCALE) 10257 10258 #define _mm512_i64gather_epi32(INDEX, ADDR, SCALE) \ 10259 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(), \ 10260 (void const *)ADDR, \ 10261 (__v8di)(__m512i)INDEX, \ 10262 (__mmask8)0xFF, (int)SCALE) 10263 10264 #define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \ 10265 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i)V1OLD, \ 10266 (void const *)ADDR, \ 10267 (__v8di)(__m512i)INDEX, \ 10268 (__mmask8)MASK, (int)SCALE) 10269 10270 #define _mm512_i64gather_epi64(INDEX, ADDR, SCALE) \ 10271 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_epi32 (), \ 10272 (void const *)ADDR, \ 10273 (__v8di)(__m512i)INDEX, \ 10274 (__mmask8)0xFF, (int)SCALE) 10275 10276 #define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \ 10277 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i)V1OLD, \ 10278 (void const *)ADDR, \ 10279 (__v8di)(__m512i)INDEX, \ 10280 (__mmask8)MASK, (int)SCALE) 10281 10282 #define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE) \ 10283 __builtin_ia32_scattersiv16sf ((void *)ADDR, (__mmask16)0xFFFF, \ 10284 (__v16si)(__m512i)INDEX, \ 10285 (__v16sf)(__m512)V1, (int)SCALE) 10286 10287 #define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \ 10288 __builtin_ia32_scattersiv16sf ((void *)ADDR, (__mmask16)MASK, \ 10289 (__v16si)(__m512i)INDEX, \ 10290 (__v16sf)(__m512)V1, (int)SCALE) 10291 10292 #define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE) \ 10293 __builtin_ia32_scattersiv8df ((void *)ADDR, (__mmask8)0xFF, \ 10294 (__v8si)(__m256i)INDEX, \ 10295 (__v8df)(__m512d)V1, (int)SCALE) 10296 10297 #define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \ 10298 __builtin_ia32_scattersiv8df ((void *)ADDR, (__mmask8)MASK, \ 10299 (__v8si)(__m256i)INDEX, \ 10300 (__v8df)(__m512d)V1, (int)SCALE) 10301 10302 #define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE) \ 10303 __builtin_ia32_scatterdiv16sf ((void *)ADDR, (__mmask8)0xFF, \ 10304 (__v8di)(__m512i)INDEX, \ 10305 (__v8sf)(__m256)V1, (int)SCALE) 10306 10307 #define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \ 10308 __builtin_ia32_scatterdiv16sf ((void *)ADDR, (__mmask16)MASK, \ 10309 (__v8di)(__m512i)INDEX, \ 10310 (__v8sf)(__m256)V1, (int)SCALE) 10311 10312 #define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE) \ 10313 __builtin_ia32_scatterdiv8df ((void *)ADDR, (__mmask8)0xFF, \ 10314 (__v8di)(__m512i)INDEX, \ 10315 (__v8df)(__m512d)V1, (int)SCALE) 10316 10317 #define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \ 10318 __builtin_ia32_scatterdiv8df ((void *)ADDR, (__mmask8)MASK, \ 10319 (__v8di)(__m512i)INDEX, \ 10320 (__v8df)(__m512d)V1, (int)SCALE) 10321 10322 #define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \ 10323 __builtin_ia32_scattersiv16si ((void *)ADDR, (__mmask16)0xFFFF, \ 10324 (__v16si)(__m512i)INDEX, \ 10325 (__v16si)(__m512i)V1, (int)SCALE) 10326 10327 #define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \ 10328 __builtin_ia32_scattersiv16si ((void *)ADDR, (__mmask16)MASK, \ 10329 (__v16si)(__m512i)INDEX, \ 10330 (__v16si)(__m512i)V1, (int)SCALE) 10331 10332 #define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \ 10333 __builtin_ia32_scattersiv8di ((void *)ADDR, (__mmask8)0xFF, \ 10334 (__v8si)(__m256i)INDEX, \ 10335 (__v8di)(__m512i)V1, (int)SCALE) 10336 10337 #define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \ 10338 __builtin_ia32_scattersiv8di ((void *)ADDR, (__mmask8)MASK, \ 10339 (__v8si)(__m256i)INDEX, \ 10340 (__v8di)(__m512i)V1, (int)SCALE) 10341 10342 #define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \ 10343 __builtin_ia32_scatterdiv16si ((void *)ADDR, (__mmask8)0xFF, \ 10344 (__v8di)(__m512i)INDEX, \ 10345 (__v8si)(__m256i)V1, (int)SCALE) 10346 10347 #define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \ 10348 __builtin_ia32_scatterdiv16si ((void *)ADDR, (__mmask8)MASK, \ 10349 (__v8di)(__m512i)INDEX, \ 10350 (__v8si)(__m256i)V1, (int)SCALE) 10351 10352 #define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \ 10353 __builtin_ia32_scatterdiv8di ((void *)ADDR, (__mmask8)0xFF, \ 10354 (__v8di)(__m512i)INDEX, \ 10355 (__v8di)(__m512i)V1, (int)SCALE) 10356 10357 #define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \ 10358 __builtin_ia32_scatterdiv8di ((void *)ADDR, (__mmask8)MASK, \ 10359 (__v8di)(__m512i)INDEX, \ 10360 (__v8di)(__m512i)V1, (int)SCALE) 10361 #endif 10362 10363 extern __inline __m512d 10364 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10365 _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A) 10366 { 10367 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A, 10368 (__v8df) __W, 10369 (__mmask8) __U); 10370 } 10371 10372 extern __inline __m512d 10373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10374 _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A) 10375 { 10376 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A, 10377 (__v8df) 10378 _mm512_setzero_pd (), 10379 (__mmask8) __U); 10380 } 10381 10382 extern __inline void 10383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10384 _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A) 10385 { 10386 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A, 10387 (__mmask8) __U); 10388 } 10389 10390 extern __inline __m512 10391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10392 _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A) 10393 { 10394 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A, 10395 (__v16sf) __W, 10396 (__mmask16) __U); 10397 } 10398 10399 extern __inline __m512 10400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10401 _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A) 10402 { 10403 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A, 10404 (__v16sf) 10405 _mm512_setzero_ps (), 10406 (__mmask16) __U); 10407 } 10408 10409 extern __inline void 10410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10411 _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A) 10412 { 10413 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A, 10414 (__mmask16) __U); 10415 } 10416 10417 extern __inline __m512i 10418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10419 _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A) 10420 { 10421 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A, 10422 (__v8di) __W, 10423 (__mmask8) __U); 10424 } 10425 10426 extern __inline __m512i 10427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10428 _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A) 10429 { 10430 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A, 10431 (__v8di) 10432 _mm512_setzero_si512 (), 10433 (__mmask8) __U); 10434 } 10435 10436 extern __inline void 10437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10438 _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A) 10439 { 10440 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A, 10441 (__mmask8) __U); 10442 } 10443 10444 extern __inline __m512i 10445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10446 _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A) 10447 { 10448 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A, 10449 (__v16si) __W, 10450 (__mmask16) __U); 10451 } 10452 10453 extern __inline __m512i 10454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10455 _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A) 10456 { 10457 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A, 10458 (__v16si) 10459 _mm512_setzero_si512 (), 10460 (__mmask16) __U); 10461 } 10462 10463 extern __inline void 10464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10465 _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A) 10466 { 10467 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A, 10468 (__mmask16) __U); 10469 } 10470 10471 extern __inline __m512d 10472 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10473 _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A) 10474 { 10475 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A, 10476 (__v8df) __W, 10477 (__mmask8) __U); 10478 } 10479 10480 extern __inline __m512d 10481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10482 _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A) 10483 { 10484 return (__m512d) __builtin_ia32_expanddf512_maskz ((__v8df) __A, 10485 (__v8df) 10486 _mm512_setzero_pd (), 10487 (__mmask8) __U); 10488 } 10489 10490 extern __inline __m512d 10491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10492 _mm512_mask_expandloadu_pd (__m512d __W, __mmask8 __U, void const *__P) 10493 { 10494 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *) __P, 10495 (__v8df) __W, 10496 (__mmask8) __U); 10497 } 10498 10499 extern __inline __m512d 10500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10501 _mm512_maskz_expandloadu_pd (__mmask8 __U, void const *__P) 10502 { 10503 return (__m512d) __builtin_ia32_expandloaddf512_maskz ((const __v8df *) __P, 10504 (__v8df) 10505 _mm512_setzero_pd (), 10506 (__mmask8) __U); 10507 } 10508 10509 extern __inline __m512 10510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10511 _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A) 10512 { 10513 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A, 10514 (__v16sf) __W, 10515 (__mmask16) __U); 10516 } 10517 10518 extern __inline __m512 10519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10520 _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A) 10521 { 10522 return (__m512) __builtin_ia32_expandsf512_maskz ((__v16sf) __A, 10523 (__v16sf) 10524 _mm512_setzero_ps (), 10525 (__mmask16) __U); 10526 } 10527 10528 extern __inline __m512 10529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10530 _mm512_mask_expandloadu_ps (__m512 __W, __mmask16 __U, void const *__P) 10531 { 10532 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *) __P, 10533 (__v16sf) __W, 10534 (__mmask16) __U); 10535 } 10536 10537 extern __inline __m512 10538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10539 _mm512_maskz_expandloadu_ps (__mmask16 __U, void const *__P) 10540 { 10541 return (__m512) __builtin_ia32_expandloadsf512_maskz ((const __v16sf *) __P, 10542 (__v16sf) 10543 _mm512_setzero_ps (), 10544 (__mmask16) __U); 10545 } 10546 10547 extern __inline __m512i 10548 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10549 _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A) 10550 { 10551 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A, 10552 (__v8di) __W, 10553 (__mmask8) __U); 10554 } 10555 10556 extern __inline __m512i 10557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10558 _mm512_maskz_expand_epi64 (__mmask8 __U, __m512i __A) 10559 { 10560 return (__m512i) __builtin_ia32_expanddi512_maskz ((__v8di) __A, 10561 (__v8di) 10562 _mm512_setzero_si512 (), 10563 (__mmask8) __U); 10564 } 10565 10566 extern __inline __m512i 10567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10568 _mm512_mask_expandloadu_epi64 (__m512i __W, __mmask8 __U, void const *__P) 10569 { 10570 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *) __P, 10571 (__v8di) __W, 10572 (__mmask8) __U); 10573 } 10574 10575 extern __inline __m512i 10576 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10577 _mm512_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) 10578 { 10579 return (__m512i) 10580 __builtin_ia32_expandloaddi512_maskz ((const __v8di *) __P, 10581 (__v8di) 10582 _mm512_setzero_si512 (), 10583 (__mmask8) __U); 10584 } 10585 10586 extern __inline __m512i 10587 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10588 _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A) 10589 { 10590 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A, 10591 (__v16si) __W, 10592 (__mmask16) __U); 10593 } 10594 10595 extern __inline __m512i 10596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10597 _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A) 10598 { 10599 return (__m512i) __builtin_ia32_expandsi512_maskz ((__v16si) __A, 10600 (__v16si) 10601 _mm512_setzero_si512 (), 10602 (__mmask16) __U); 10603 } 10604 10605 extern __inline __m512i 10606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10607 _mm512_mask_expandloadu_epi32 (__m512i __W, __mmask16 __U, void const *__P) 10608 { 10609 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *) __P, 10610 (__v16si) __W, 10611 (__mmask16) __U); 10612 } 10613 10614 extern __inline __m512i 10615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10616 _mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P) 10617 { 10618 return (__m512i) __builtin_ia32_expandloadsi512_maskz ((const __v16si *) __P, 10619 (__v16si) 10620 _mm512_setzero_si512 10621 (), (__mmask16) __U); 10622 } 10623 10624 /* Mask arithmetic operations */ 10625 #define _kand_mask16 _mm512_kand 10626 #define _kandn_mask16 _mm512_kandn 10627 #define _knot_mask16 _mm512_knot 10628 #define _kor_mask16 _mm512_kor 10629 #define _kxnor_mask16 _mm512_kxnor 10630 #define _kxor_mask16 _mm512_kxor 10631 10632 extern __inline unsigned char 10633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10634 _kortest_mask16_u8 (__mmask16 __A, __mmask16 __B, unsigned char *__CF) 10635 { 10636 *__CF = (unsigned char) __builtin_ia32_kortestchi (__A, __B); 10637 return (unsigned char) __builtin_ia32_kortestzhi (__A, __B); 10638 } 10639 10640 extern __inline unsigned char 10641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10642 _kortestz_mask16_u8 (__mmask16 __A, __mmask16 __B) 10643 { 10644 return (unsigned char) __builtin_ia32_kortestzhi ((__mmask16) __A, 10645 (__mmask16) __B); 10646 } 10647 10648 extern __inline unsigned char 10649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10650 _kortestc_mask16_u8 (__mmask16 __A, __mmask16 __B) 10651 { 10652 return (unsigned char) __builtin_ia32_kortestchi ((__mmask16) __A, 10653 (__mmask16) __B); 10654 } 10655 10656 extern __inline unsigned int 10657 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10658 _cvtmask16_u32 (__mmask16 __A) 10659 { 10660 return (unsigned int) __builtin_ia32_kmovw ((__mmask16 ) __A); 10661 } 10662 10663 extern __inline __mmask16 10664 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10665 _cvtu32_mask16 (unsigned int __A) 10666 { 10667 return (__mmask16) __builtin_ia32_kmovw ((__mmask16 ) __A); 10668 } 10669 10670 extern __inline __mmask16 10671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10672 _load_mask16 (__mmask16 *__A) 10673 { 10674 return (__mmask16) __builtin_ia32_kmovw (*(__mmask16 *) __A); 10675 } 10676 10677 extern __inline void 10678 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10679 _store_mask16 (__mmask16 *__A, __mmask16 __B) 10680 { 10681 *(__mmask16 *) __A = __builtin_ia32_kmovw (__B); 10682 } 10683 10684 extern __inline __mmask16 10685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10686 _mm512_kand (__mmask16 __A, __mmask16 __B) 10687 { 10688 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B); 10689 } 10690 10691 extern __inline __mmask16 10692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10693 _mm512_kandn (__mmask16 __A, __mmask16 __B) 10694 { 10695 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, 10696 (__mmask16) __B); 10697 } 10698 10699 extern __inline __mmask16 10700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10701 _mm512_kor (__mmask16 __A, __mmask16 __B) 10702 { 10703 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B); 10704 } 10705 10706 extern __inline int 10707 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10708 _mm512_kortestz (__mmask16 __A, __mmask16 __B) 10709 { 10710 return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A, 10711 (__mmask16) __B); 10712 } 10713 10714 extern __inline int 10715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10716 _mm512_kortestc (__mmask16 __A, __mmask16 __B) 10717 { 10718 return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A, 10719 (__mmask16) __B); 10720 } 10721 10722 extern __inline __mmask16 10723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10724 _mm512_kxnor (__mmask16 __A, __mmask16 __B) 10725 { 10726 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B); 10727 } 10728 10729 extern __inline __mmask16 10730 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10731 _mm512_kxor (__mmask16 __A, __mmask16 __B) 10732 { 10733 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B); 10734 } 10735 10736 extern __inline __mmask16 10737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10738 _mm512_knot (__mmask16 __A) 10739 { 10740 return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A); 10741 } 10742 10743 extern __inline __mmask16 10744 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10745 _mm512_kunpackb (__mmask16 __A, __mmask16 __B) 10746 { 10747 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B); 10748 } 10749 10750 extern __inline __mmask16 10751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10752 _kunpackb_mask16 (__mmask8 __A, __mmask8 __B) 10753 { 10754 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B); 10755 } 10756 10757 #ifdef __OPTIMIZE__ 10758 extern __inline __m512i 10759 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10760 _mm512_maskz_inserti32x4 (__mmask16 __B, __m512i __C, __m128i __D, 10761 const int __imm) 10762 { 10763 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C, 10764 (__v4si) __D, 10765 __imm, 10766 (__v16si) 10767 _mm512_setzero_si512 (), 10768 __B); 10769 } 10770 10771 extern __inline __m512 10772 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10773 _mm512_maskz_insertf32x4 (__mmask16 __B, __m512 __C, __m128 __D, 10774 const int __imm) 10775 { 10776 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C, 10777 (__v4sf) __D, 10778 __imm, 10779 (__v16sf) 10780 _mm512_setzero_ps (), __B); 10781 } 10782 10783 extern __inline __m512i 10784 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10785 _mm512_mask_inserti32x4 (__m512i __A, __mmask16 __B, __m512i __C, 10786 __m128i __D, const int __imm) 10787 { 10788 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C, 10789 (__v4si) __D, 10790 __imm, 10791 (__v16si) __A, 10792 __B); 10793 } 10794 10795 extern __inline __m512 10796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10797 _mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C, 10798 __m128 __D, const int __imm) 10799 { 10800 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C, 10801 (__v4sf) __D, 10802 __imm, 10803 (__v16sf) __A, __B); 10804 } 10805 #else 10806 #define _mm512_maskz_insertf32x4(A, X, Y, C) \ 10807 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \ 10808 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(), \ 10809 (__mmask16)(A))) 10810 10811 #define _mm512_maskz_inserti32x4(A, X, Y, C) \ 10812 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \ 10813 (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (), \ 10814 (__mmask16)(A))) 10815 10816 #define _mm512_mask_insertf32x4(A, B, X, Y, C) \ 10817 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \ 10818 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A), \ 10819 (__mmask16)(B))) 10820 10821 #define _mm512_mask_inserti32x4(A, B, X, Y, C) \ 10822 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \ 10823 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A), \ 10824 (__mmask16)(B))) 10825 #endif 10826 10827 extern __inline __m512i 10828 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10829 _mm512_max_epi64 (__m512i __A, __m512i __B) 10830 { 10831 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A, 10832 (__v8di) __B, 10833 (__v8di) 10834 _mm512_undefined_epi32 (), 10835 (__mmask8) -1); 10836 } 10837 10838 extern __inline __m512i 10839 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10840 _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B) 10841 { 10842 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A, 10843 (__v8di) __B, 10844 (__v8di) 10845 _mm512_setzero_si512 (), 10846 __M); 10847 } 10848 10849 extern __inline __m512i 10850 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10851 _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) 10852 { 10853 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A, 10854 (__v8di) __B, 10855 (__v8di) __W, __M); 10856 } 10857 10858 extern __inline __m512i 10859 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10860 _mm512_min_epi64 (__m512i __A, __m512i __B) 10861 { 10862 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A, 10863 (__v8di) __B, 10864 (__v8di) 10865 _mm512_undefined_epi32 (), 10866 (__mmask8) -1); 10867 } 10868 10869 extern __inline __m512i 10870 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10871 _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) 10872 { 10873 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A, 10874 (__v8di) __B, 10875 (__v8di) __W, __M); 10876 } 10877 10878 extern __inline __m512i 10879 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10880 _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B) 10881 { 10882 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A, 10883 (__v8di) __B, 10884 (__v8di) 10885 _mm512_setzero_si512 (), 10886 __M); 10887 } 10888 10889 extern __inline __m512i 10890 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10891 _mm512_max_epu64 (__m512i __A, __m512i __B) 10892 { 10893 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A, 10894 (__v8di) __B, 10895 (__v8di) 10896 _mm512_undefined_epi32 (), 10897 (__mmask8) -1); 10898 } 10899 10900 extern __inline __m512i 10901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10902 _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B) 10903 { 10904 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A, 10905 (__v8di) __B, 10906 (__v8di) 10907 _mm512_setzero_si512 (), 10908 __M); 10909 } 10910 10911 extern __inline __m512i 10912 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10913 _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) 10914 { 10915 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A, 10916 (__v8di) __B, 10917 (__v8di) __W, __M); 10918 } 10919 10920 extern __inline __m512i 10921 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10922 _mm512_min_epu64 (__m512i __A, __m512i __B) 10923 { 10924 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A, 10925 (__v8di) __B, 10926 (__v8di) 10927 _mm512_undefined_epi32 (), 10928 (__mmask8) -1); 10929 } 10930 10931 extern __inline __m512i 10932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10933 _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) 10934 { 10935 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A, 10936 (__v8di) __B, 10937 (__v8di) __W, __M); 10938 } 10939 10940 extern __inline __m512i 10941 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10942 _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B) 10943 { 10944 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A, 10945 (__v8di) __B, 10946 (__v8di) 10947 _mm512_setzero_si512 (), 10948 __M); 10949 } 10950 10951 extern __inline __m512i 10952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10953 _mm512_max_epi32 (__m512i __A, __m512i __B) 10954 { 10955 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A, 10956 (__v16si) __B, 10957 (__v16si) 10958 _mm512_undefined_epi32 (), 10959 (__mmask16) -1); 10960 } 10961 10962 extern __inline __m512i 10963 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10964 _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B) 10965 { 10966 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A, 10967 (__v16si) __B, 10968 (__v16si) 10969 _mm512_setzero_si512 (), 10970 __M); 10971 } 10972 10973 extern __inline __m512i 10974 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10975 _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 10976 { 10977 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A, 10978 (__v16si) __B, 10979 (__v16si) __W, __M); 10980 } 10981 10982 extern __inline __m512i 10983 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10984 _mm512_min_epi32 (__m512i __A, __m512i __B) 10985 { 10986 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A, 10987 (__v16si) __B, 10988 (__v16si) 10989 _mm512_undefined_epi32 (), 10990 (__mmask16) -1); 10991 } 10992 10993 extern __inline __m512i 10994 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10995 _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B) 10996 { 10997 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A, 10998 (__v16si) __B, 10999 (__v16si) 11000 _mm512_setzero_si512 (), 11001 __M); 11002 } 11003 11004 extern __inline __m512i 11005 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11006 _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 11007 { 11008 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A, 11009 (__v16si) __B, 11010 (__v16si) __W, __M); 11011 } 11012 11013 extern __inline __m512i 11014 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11015 _mm512_max_epu32 (__m512i __A, __m512i __B) 11016 { 11017 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A, 11018 (__v16si) __B, 11019 (__v16si) 11020 _mm512_undefined_epi32 (), 11021 (__mmask16) -1); 11022 } 11023 11024 extern __inline __m512i 11025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11026 _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B) 11027 { 11028 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A, 11029 (__v16si) __B, 11030 (__v16si) 11031 _mm512_setzero_si512 (), 11032 __M); 11033 } 11034 11035 extern __inline __m512i 11036 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11037 _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 11038 { 11039 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A, 11040 (__v16si) __B, 11041 (__v16si) __W, __M); 11042 } 11043 11044 extern __inline __m512i 11045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11046 _mm512_min_epu32 (__m512i __A, __m512i __B) 11047 { 11048 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A, 11049 (__v16si) __B, 11050 (__v16si) 11051 _mm512_undefined_epi32 (), 11052 (__mmask16) -1); 11053 } 11054 11055 extern __inline __m512i 11056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11057 _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B) 11058 { 11059 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A, 11060 (__v16si) __B, 11061 (__v16si) 11062 _mm512_setzero_si512 (), 11063 __M); 11064 } 11065 11066 extern __inline __m512i 11067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11068 _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 11069 { 11070 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A, 11071 (__v16si) __B, 11072 (__v16si) __W, __M); 11073 } 11074 11075 extern __inline __m512 11076 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11077 _mm512_unpacklo_ps (__m512 __A, __m512 __B) 11078 { 11079 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A, 11080 (__v16sf) __B, 11081 (__v16sf) 11082 _mm512_undefined_ps (), 11083 (__mmask16) -1); 11084 } 11085 11086 extern __inline __m512 11087 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11088 _mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 11089 { 11090 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A, 11091 (__v16sf) __B, 11092 (__v16sf) __W, 11093 (__mmask16) __U); 11094 } 11095 11096 extern __inline __m512 11097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11098 _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B) 11099 { 11100 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A, 11101 (__v16sf) __B, 11102 (__v16sf) 11103 _mm512_setzero_ps (), 11104 (__mmask16) __U); 11105 } 11106 11107 #ifdef __OPTIMIZE__ 11108 extern __inline __m128d 11109 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11110 _mm_max_round_sd (__m128d __A, __m128d __B, const int __R) 11111 { 11112 return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, 11113 (__v2df) __B, 11114 __R); 11115 } 11116 11117 extern __inline __m128d 11118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11119 _mm_mask_max_round_sd (__m128d __W, __mmask8 __U, __m128d __A, 11120 __m128d __B, const int __R) 11121 { 11122 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A, 11123 (__v2df) __B, 11124 (__v2df) __W, 11125 (__mmask8) __U, __R); 11126 } 11127 11128 extern __inline __m128d 11129 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11130 _mm_maskz_max_round_sd (__mmask8 __U, __m128d __A, __m128d __B, 11131 const int __R) 11132 { 11133 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A, 11134 (__v2df) __B, 11135 (__v2df) 11136 _mm_setzero_pd (), 11137 (__mmask8) __U, __R); 11138 } 11139 11140 extern __inline __m128 11141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11142 _mm_max_round_ss (__m128 __A, __m128 __B, const int __R) 11143 { 11144 return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, 11145 (__v4sf) __B, 11146 __R); 11147 } 11148 11149 extern __inline __m128 11150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11151 _mm_mask_max_round_ss (__m128 __W, __mmask8 __U, __m128 __A, 11152 __m128 __B, const int __R) 11153 { 11154 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A, 11155 (__v4sf) __B, 11156 (__v4sf) __W, 11157 (__mmask8) __U, __R); 11158 } 11159 11160 extern __inline __m128 11161 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11162 _mm_maskz_max_round_ss (__mmask8 __U, __m128 __A, __m128 __B, 11163 const int __R) 11164 { 11165 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A, 11166 (__v4sf) __B, 11167 (__v4sf) 11168 _mm_setzero_ps (), 11169 (__mmask8) __U, __R); 11170 } 11171 11172 extern __inline __m128d 11173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11174 _mm_min_round_sd (__m128d __A, __m128d __B, const int __R) 11175 { 11176 return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, 11177 (__v2df) __B, 11178 __R); 11179 } 11180 11181 extern __inline __m128d 11182 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11183 _mm_mask_min_round_sd (__m128d __W, __mmask8 __U, __m128d __A, 11184 __m128d __B, const int __R) 11185 { 11186 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A, 11187 (__v2df) __B, 11188 (__v2df) __W, 11189 (__mmask8) __U, __R); 11190 } 11191 11192 extern __inline __m128d 11193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11194 _mm_maskz_min_round_sd (__mmask8 __U, __m128d __A, __m128d __B, 11195 const int __R) 11196 { 11197 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A, 11198 (__v2df) __B, 11199 (__v2df) 11200 _mm_setzero_pd (), 11201 (__mmask8) __U, __R); 11202 } 11203 11204 extern __inline __m128 11205 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11206 _mm_min_round_ss (__m128 __A, __m128 __B, const int __R) 11207 { 11208 return (__m128) __builtin_ia32_minss_round ((__v4sf) __A, 11209 (__v4sf) __B, 11210 __R); 11211 } 11212 11213 extern __inline __m128 11214 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11215 _mm_mask_min_round_ss (__m128 __W, __mmask8 __U, __m128 __A, 11216 __m128 __B, const int __R) 11217 { 11218 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A, 11219 (__v4sf) __B, 11220 (__v4sf) __W, 11221 (__mmask8) __U, __R); 11222 } 11223 11224 extern __inline __m128 11225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11226 _mm_maskz_min_round_ss (__mmask8 __U, __m128 __A, __m128 __B, 11227 const int __R) 11228 { 11229 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A, 11230 (__v4sf) __B, 11231 (__v4sf) 11232 _mm_setzero_ps (), 11233 (__mmask8) __U, __R); 11234 } 11235 11236 #else 11237 #define _mm_max_round_sd(A, B, C) \ 11238 (__m128d)__builtin_ia32_maxsd_round(A, B, C) 11239 11240 #define _mm_mask_max_round_sd(W, U, A, B, C) \ 11241 (__m128d)__builtin_ia32_maxsd_mask_round(A, B, W, U, C) 11242 11243 #define _mm_maskz_max_round_sd(U, A, B, C) \ 11244 (__m128d)__builtin_ia32_maxsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C) 11245 11246 #define _mm_max_round_ss(A, B, C) \ 11247 (__m128)__builtin_ia32_maxss_round(A, B, C) 11248 11249 #define _mm_mask_max_round_ss(W, U, A, B, C) \ 11250 (__m128)__builtin_ia32_maxss_mask_round(A, B, W, U, C) 11251 11252 #define _mm_maskz_max_round_ss(U, A, B, C) \ 11253 (__m128)__builtin_ia32_maxss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C) 11254 11255 #define _mm_min_round_sd(A, B, C) \ 11256 (__m128d)__builtin_ia32_minsd_round(A, B, C) 11257 11258 #define _mm_mask_min_round_sd(W, U, A, B, C) \ 11259 (__m128d)__builtin_ia32_minsd_mask_round(A, B, W, U, C) 11260 11261 #define _mm_maskz_min_round_sd(U, A, B, C) \ 11262 (__m128d)__builtin_ia32_minsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C) 11263 11264 #define _mm_min_round_ss(A, B, C) \ 11265 (__m128)__builtin_ia32_minss_round(A, B, C) 11266 11267 #define _mm_mask_min_round_ss(W, U, A, B, C) \ 11268 (__m128)__builtin_ia32_minss_mask_round(A, B, W, U, C) 11269 11270 #define _mm_maskz_min_round_ss(U, A, B, C) \ 11271 (__m128)__builtin_ia32_minss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C) 11272 11273 #endif 11274 11275 extern __inline __m512d 11276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11277 _mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W) 11278 { 11279 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A, 11280 (__v8df) __W, 11281 (__mmask8) __U); 11282 } 11283 11284 extern __inline __m512 11285 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11286 _mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W) 11287 { 11288 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A, 11289 (__v16sf) __W, 11290 (__mmask16) __U); 11291 } 11292 11293 extern __inline __m512i 11294 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11295 _mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W) 11296 { 11297 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A, 11298 (__v8di) __W, 11299 (__mmask8) __U); 11300 } 11301 11302 extern __inline __m512i 11303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11304 _mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W) 11305 { 11306 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A, 11307 (__v16si) __W, 11308 (__mmask16) __U); 11309 } 11310 11311 #ifdef __OPTIMIZE__ 11312 extern __inline __m128d 11313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11314 _mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R) 11315 { 11316 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W, 11317 (__v2df) __A, 11318 (__v2df) __B, 11319 __R); 11320 } 11321 11322 extern __inline __m128 11323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11324 _mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R) 11325 { 11326 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W, 11327 (__v4sf) __A, 11328 (__v4sf) __B, 11329 __R); 11330 } 11331 11332 extern __inline __m128d 11333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11334 _mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R) 11335 { 11336 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W, 11337 (__v2df) __A, 11338 -(__v2df) __B, 11339 __R); 11340 } 11341 11342 extern __inline __m128 11343 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11344 _mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R) 11345 { 11346 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W, 11347 (__v4sf) __A, 11348 -(__v4sf) __B, 11349 __R); 11350 } 11351 11352 extern __inline __m128d 11353 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11354 _mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R) 11355 { 11356 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W, 11357 -(__v2df) __A, 11358 (__v2df) __B, 11359 __R); 11360 } 11361 11362 extern __inline __m128 11363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11364 _mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R) 11365 { 11366 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W, 11367 -(__v4sf) __A, 11368 (__v4sf) __B, 11369 __R); 11370 } 11371 11372 extern __inline __m128d 11373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11374 _mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R) 11375 { 11376 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W, 11377 -(__v2df) __A, 11378 -(__v2df) __B, 11379 __R); 11380 } 11381 11382 extern __inline __m128 11383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11384 _mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R) 11385 { 11386 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W, 11387 -(__v4sf) __A, 11388 -(__v4sf) __B, 11389 __R); 11390 } 11391 #else 11392 #define _mm_fmadd_round_sd(A, B, C, R) \ 11393 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R) 11394 11395 #define _mm_fmadd_round_ss(A, B, C, R) \ 11396 (__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R) 11397 11398 #define _mm_fmsub_round_sd(A, B, C, R) \ 11399 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R) 11400 11401 #define _mm_fmsub_round_ss(A, B, C, R) \ 11402 (__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R) 11403 11404 #define _mm_fnmadd_round_sd(A, B, C, R) \ 11405 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R) 11406 11407 #define _mm_fnmadd_round_ss(A, B, C, R) \ 11408 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R) 11409 11410 #define _mm_fnmsub_round_sd(A, B, C, R) \ 11411 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R) 11412 11413 #define _mm_fnmsub_round_ss(A, B, C, R) \ 11414 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R) 11415 #endif 11416 11417 #ifdef __OPTIMIZE__ 11418 extern __inline int 11419 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11420 _mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R) 11421 { 11422 return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R); 11423 } 11424 11425 extern __inline int 11426 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11427 _mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R) 11428 { 11429 return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R); 11430 } 11431 #else 11432 #define _mm_comi_round_ss(A, B, C, D)\ 11433 __builtin_ia32_vcomiss(A, B, C, D) 11434 #define _mm_comi_round_sd(A, B, C, D)\ 11435 __builtin_ia32_vcomisd(A, B, C, D) 11436 #endif 11437 11438 extern __inline __m512d 11439 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11440 _mm512_sqrt_pd (__m512d __A) 11441 { 11442 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, 11443 (__v8df) 11444 _mm512_undefined_pd (), 11445 (__mmask8) -1, 11446 _MM_FROUND_CUR_DIRECTION); 11447 } 11448 11449 extern __inline __m512d 11450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11451 _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A) 11452 { 11453 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, 11454 (__v8df) __W, 11455 (__mmask8) __U, 11456 _MM_FROUND_CUR_DIRECTION); 11457 } 11458 11459 extern __inline __m512d 11460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11461 _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A) 11462 { 11463 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, 11464 (__v8df) 11465 _mm512_setzero_pd (), 11466 (__mmask8) __U, 11467 _MM_FROUND_CUR_DIRECTION); 11468 } 11469 11470 extern __inline __m512 11471 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11472 _mm512_sqrt_ps (__m512 __A) 11473 { 11474 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A, 11475 (__v16sf) 11476 _mm512_undefined_ps (), 11477 (__mmask16) -1, 11478 _MM_FROUND_CUR_DIRECTION); 11479 } 11480 11481 extern __inline __m512 11482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11483 _mm512_mask_sqrt_ps (__m512 __W, __mmask16 __U, __m512 __A) 11484 { 11485 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A, 11486 (__v16sf) __W, 11487 (__mmask16) __U, 11488 _MM_FROUND_CUR_DIRECTION); 11489 } 11490 11491 extern __inline __m512 11492 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11493 _mm512_maskz_sqrt_ps (__mmask16 __U, __m512 __A) 11494 { 11495 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A, 11496 (__v16sf) 11497 _mm512_setzero_ps (), 11498 (__mmask16) __U, 11499 _MM_FROUND_CUR_DIRECTION); 11500 } 11501 11502 extern __inline __m512d 11503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11504 _mm512_add_pd (__m512d __A, __m512d __B) 11505 { 11506 return (__m512d) ((__v8df)__A + (__v8df)__B); 11507 } 11508 11509 extern __inline __m512d 11510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11511 _mm512_mask_add_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 11512 { 11513 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, 11514 (__v8df) __B, 11515 (__v8df) __W, 11516 (__mmask8) __U, 11517 _MM_FROUND_CUR_DIRECTION); 11518 } 11519 11520 extern __inline __m512d 11521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11522 _mm512_maskz_add_pd (__mmask8 __U, __m512d __A, __m512d __B) 11523 { 11524 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, 11525 (__v8df) __B, 11526 (__v8df) 11527 _mm512_setzero_pd (), 11528 (__mmask8) __U, 11529 _MM_FROUND_CUR_DIRECTION); 11530 } 11531 11532 extern __inline __m512 11533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11534 _mm512_add_ps (__m512 __A, __m512 __B) 11535 { 11536 return (__m512) ((__v16sf)__A + (__v16sf)__B); 11537 } 11538 11539 extern __inline __m512 11540 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11541 _mm512_mask_add_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 11542 { 11543 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, 11544 (__v16sf) __B, 11545 (__v16sf) __W, 11546 (__mmask16) __U, 11547 _MM_FROUND_CUR_DIRECTION); 11548 } 11549 11550 extern __inline __m512 11551 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11552 _mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B) 11553 { 11554 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, 11555 (__v16sf) __B, 11556 (__v16sf) 11557 _mm512_setzero_ps (), 11558 (__mmask16) __U, 11559 _MM_FROUND_CUR_DIRECTION); 11560 } 11561 11562 extern __inline __m128d 11563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11564 _mm_mask_add_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 11565 { 11566 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A, 11567 (__v2df) __B, 11568 (__v2df) __W, 11569 (__mmask8) __U, 11570 _MM_FROUND_CUR_DIRECTION); 11571 } 11572 11573 extern __inline __m128d 11574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11575 _mm_maskz_add_sd (__mmask8 __U, __m128d __A, __m128d __B) 11576 { 11577 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A, 11578 (__v2df) __B, 11579 (__v2df) 11580 _mm_setzero_pd (), 11581 (__mmask8) __U, 11582 _MM_FROUND_CUR_DIRECTION); 11583 } 11584 11585 extern __inline __m128 11586 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11587 _mm_mask_add_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 11588 { 11589 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A, 11590 (__v4sf) __B, 11591 (__v4sf) __W, 11592 (__mmask8) __U, 11593 _MM_FROUND_CUR_DIRECTION); 11594 } 11595 11596 extern __inline __m128 11597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11598 _mm_maskz_add_ss (__mmask8 __U, __m128 __A, __m128 __B) 11599 { 11600 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A, 11601 (__v4sf) __B, 11602 (__v4sf) 11603 _mm_setzero_ps (), 11604 (__mmask8) __U, 11605 _MM_FROUND_CUR_DIRECTION); 11606 } 11607 11608 extern __inline __m512d 11609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11610 _mm512_sub_pd (__m512d __A, __m512d __B) 11611 { 11612 return (__m512d) ((__v8df)__A - (__v8df)__B); 11613 } 11614 11615 extern __inline __m512d 11616 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11617 _mm512_mask_sub_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 11618 { 11619 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, 11620 (__v8df) __B, 11621 (__v8df) __W, 11622 (__mmask8) __U, 11623 _MM_FROUND_CUR_DIRECTION); 11624 } 11625 11626 extern __inline __m512d 11627 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11628 _mm512_maskz_sub_pd (__mmask8 __U, __m512d __A, __m512d __B) 11629 { 11630 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, 11631 (__v8df) __B, 11632 (__v8df) 11633 _mm512_setzero_pd (), 11634 (__mmask8) __U, 11635 _MM_FROUND_CUR_DIRECTION); 11636 } 11637 11638 extern __inline __m512 11639 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11640 _mm512_sub_ps (__m512 __A, __m512 __B) 11641 { 11642 return (__m512) ((__v16sf)__A - (__v16sf)__B); 11643 } 11644 11645 extern __inline __m512 11646 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11647 _mm512_mask_sub_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 11648 { 11649 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, 11650 (__v16sf) __B, 11651 (__v16sf) __W, 11652 (__mmask16) __U, 11653 _MM_FROUND_CUR_DIRECTION); 11654 } 11655 11656 extern __inline __m512 11657 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11658 _mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B) 11659 { 11660 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, 11661 (__v16sf) __B, 11662 (__v16sf) 11663 _mm512_setzero_ps (), 11664 (__mmask16) __U, 11665 _MM_FROUND_CUR_DIRECTION); 11666 } 11667 11668 extern __inline __m128d 11669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11670 _mm_mask_sub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 11671 { 11672 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A, 11673 (__v2df) __B, 11674 (__v2df) __W, 11675 (__mmask8) __U, 11676 _MM_FROUND_CUR_DIRECTION); 11677 } 11678 11679 extern __inline __m128d 11680 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11681 _mm_maskz_sub_sd (__mmask8 __U, __m128d __A, __m128d __B) 11682 { 11683 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A, 11684 (__v2df) __B, 11685 (__v2df) 11686 _mm_setzero_pd (), 11687 (__mmask8) __U, 11688 _MM_FROUND_CUR_DIRECTION); 11689 } 11690 11691 extern __inline __m128 11692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11693 _mm_mask_sub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 11694 { 11695 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A, 11696 (__v4sf) __B, 11697 (__v4sf) __W, 11698 (__mmask8) __U, 11699 _MM_FROUND_CUR_DIRECTION); 11700 } 11701 11702 extern __inline __m128 11703 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11704 _mm_maskz_sub_ss (__mmask8 __U, __m128 __A, __m128 __B) 11705 { 11706 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A, 11707 (__v4sf) __B, 11708 (__v4sf) 11709 _mm_setzero_ps (), 11710 (__mmask8) __U, 11711 _MM_FROUND_CUR_DIRECTION); 11712 } 11713 11714 extern __inline __m512d 11715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11716 _mm512_mul_pd (__m512d __A, __m512d __B) 11717 { 11718 return (__m512d) ((__v8df)__A * (__v8df)__B); 11719 } 11720 11721 extern __inline __m512d 11722 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11723 _mm512_mask_mul_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 11724 { 11725 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, 11726 (__v8df) __B, 11727 (__v8df) __W, 11728 (__mmask8) __U, 11729 _MM_FROUND_CUR_DIRECTION); 11730 } 11731 11732 extern __inline __m512d 11733 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11734 _mm512_maskz_mul_pd (__mmask8 __U, __m512d __A, __m512d __B) 11735 { 11736 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, 11737 (__v8df) __B, 11738 (__v8df) 11739 _mm512_setzero_pd (), 11740 (__mmask8) __U, 11741 _MM_FROUND_CUR_DIRECTION); 11742 } 11743 11744 extern __inline __m512 11745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11746 _mm512_mul_ps (__m512 __A, __m512 __B) 11747 { 11748 return (__m512) ((__v16sf)__A * (__v16sf)__B); 11749 } 11750 11751 extern __inline __m512 11752 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11753 _mm512_mask_mul_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 11754 { 11755 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, 11756 (__v16sf) __B, 11757 (__v16sf) __W, 11758 (__mmask16) __U, 11759 _MM_FROUND_CUR_DIRECTION); 11760 } 11761 11762 extern __inline __m512 11763 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11764 _mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B) 11765 { 11766 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, 11767 (__v16sf) __B, 11768 (__v16sf) 11769 _mm512_setzero_ps (), 11770 (__mmask16) __U, 11771 _MM_FROUND_CUR_DIRECTION); 11772 } 11773 11774 extern __inline __m128d 11775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11776 _mm_mask_mul_sd (__m128d __W, __mmask8 __U, __m128d __A, 11777 __m128d __B) 11778 { 11779 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A, 11780 (__v2df) __B, 11781 (__v2df) __W, 11782 (__mmask8) __U, 11783 _MM_FROUND_CUR_DIRECTION); 11784 } 11785 11786 extern __inline __m128d 11787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11788 _mm_maskz_mul_sd (__mmask8 __U, __m128d __A, __m128d __B) 11789 { 11790 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A, 11791 (__v2df) __B, 11792 (__v2df) 11793 _mm_setzero_pd (), 11794 (__mmask8) __U, 11795 _MM_FROUND_CUR_DIRECTION); 11796 } 11797 11798 extern __inline __m128 11799 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11800 _mm_mask_mul_ss (__m128 __W, __mmask8 __U, __m128 __A, 11801 __m128 __B) 11802 { 11803 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A, 11804 (__v4sf) __B, 11805 (__v4sf) __W, 11806 (__mmask8) __U, 11807 _MM_FROUND_CUR_DIRECTION); 11808 } 11809 11810 extern __inline __m128 11811 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11812 _mm_maskz_mul_ss (__mmask8 __U, __m128 __A, __m128 __B) 11813 { 11814 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A, 11815 (__v4sf) __B, 11816 (__v4sf) 11817 _mm_setzero_ps (), 11818 (__mmask8) __U, 11819 _MM_FROUND_CUR_DIRECTION); 11820 } 11821 11822 extern __inline __m512d 11823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11824 _mm512_div_pd (__m512d __M, __m512d __V) 11825 { 11826 return (__m512d) ((__v8df)__M / (__v8df)__V); 11827 } 11828 11829 extern __inline __m512d 11830 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11831 _mm512_mask_div_pd (__m512d __W, __mmask8 __U, __m512d __M, __m512d __V) 11832 { 11833 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M, 11834 (__v8df) __V, 11835 (__v8df) __W, 11836 (__mmask8) __U, 11837 _MM_FROUND_CUR_DIRECTION); 11838 } 11839 11840 extern __inline __m512d 11841 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11842 _mm512_maskz_div_pd (__mmask8 __U, __m512d __M, __m512d __V) 11843 { 11844 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M, 11845 (__v8df) __V, 11846 (__v8df) 11847 _mm512_setzero_pd (), 11848 (__mmask8) __U, 11849 _MM_FROUND_CUR_DIRECTION); 11850 } 11851 11852 extern __inline __m512 11853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11854 _mm512_div_ps (__m512 __A, __m512 __B) 11855 { 11856 return (__m512) ((__v16sf)__A / (__v16sf)__B); 11857 } 11858 11859 extern __inline __m512 11860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11861 _mm512_mask_div_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 11862 { 11863 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, 11864 (__v16sf) __B, 11865 (__v16sf) __W, 11866 (__mmask16) __U, 11867 _MM_FROUND_CUR_DIRECTION); 11868 } 11869 11870 extern __inline __m512 11871 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11872 _mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B) 11873 { 11874 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, 11875 (__v16sf) __B, 11876 (__v16sf) 11877 _mm512_setzero_ps (), 11878 (__mmask16) __U, 11879 _MM_FROUND_CUR_DIRECTION); 11880 } 11881 11882 extern __inline __m128d 11883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11884 _mm_mask_div_sd (__m128d __W, __mmask8 __U, __m128d __A, 11885 __m128d __B) 11886 { 11887 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A, 11888 (__v2df) __B, 11889 (__v2df) __W, 11890 (__mmask8) __U, 11891 _MM_FROUND_CUR_DIRECTION); 11892 } 11893 11894 extern __inline __m128d 11895 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11896 _mm_maskz_div_sd (__mmask8 __U, __m128d __A, __m128d __B) 11897 { 11898 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A, 11899 (__v2df) __B, 11900 (__v2df) 11901 _mm_setzero_pd (), 11902 (__mmask8) __U, 11903 _MM_FROUND_CUR_DIRECTION); 11904 } 11905 11906 extern __inline __m128 11907 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11908 _mm_mask_div_ss (__m128 __W, __mmask8 __U, __m128 __A, 11909 __m128 __B) 11910 { 11911 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A, 11912 (__v4sf) __B, 11913 (__v4sf) __W, 11914 (__mmask8) __U, 11915 _MM_FROUND_CUR_DIRECTION); 11916 } 11917 11918 extern __inline __m128 11919 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11920 _mm_maskz_div_ss (__mmask8 __U, __m128 __A, __m128 __B) 11921 { 11922 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A, 11923 (__v4sf) __B, 11924 (__v4sf) 11925 _mm_setzero_ps (), 11926 (__mmask8) __U, 11927 _MM_FROUND_CUR_DIRECTION); 11928 } 11929 11930 extern __inline __m512d 11931 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11932 _mm512_max_pd (__m512d __A, __m512d __B) 11933 { 11934 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, 11935 (__v8df) __B, 11936 (__v8df) 11937 _mm512_undefined_pd (), 11938 (__mmask8) -1, 11939 _MM_FROUND_CUR_DIRECTION); 11940 } 11941 11942 extern __inline __m512d 11943 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11944 _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 11945 { 11946 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, 11947 (__v8df) __B, 11948 (__v8df) __W, 11949 (__mmask8) __U, 11950 _MM_FROUND_CUR_DIRECTION); 11951 } 11952 11953 extern __inline __m512d 11954 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11955 _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B) 11956 { 11957 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, 11958 (__v8df) __B, 11959 (__v8df) 11960 _mm512_setzero_pd (), 11961 (__mmask8) __U, 11962 _MM_FROUND_CUR_DIRECTION); 11963 } 11964 11965 extern __inline __m512 11966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11967 _mm512_max_ps (__m512 __A, __m512 __B) 11968 { 11969 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, 11970 (__v16sf) __B, 11971 (__v16sf) 11972 _mm512_undefined_ps (), 11973 (__mmask16) -1, 11974 _MM_FROUND_CUR_DIRECTION); 11975 } 11976 11977 extern __inline __m512 11978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11979 _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 11980 { 11981 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, 11982 (__v16sf) __B, 11983 (__v16sf) __W, 11984 (__mmask16) __U, 11985 _MM_FROUND_CUR_DIRECTION); 11986 } 11987 11988 extern __inline __m512 11989 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11990 _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B) 11991 { 11992 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, 11993 (__v16sf) __B, 11994 (__v16sf) 11995 _mm512_setzero_ps (), 11996 (__mmask16) __U, 11997 _MM_FROUND_CUR_DIRECTION); 11998 } 11999 12000 extern __inline __m128d 12001 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12002 _mm_mask_max_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 12003 { 12004 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A, 12005 (__v2df) __B, 12006 (__v2df) __W, 12007 (__mmask8) __U, 12008 _MM_FROUND_CUR_DIRECTION); 12009 } 12010 12011 extern __inline __m128d 12012 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12013 _mm_maskz_max_sd (__mmask8 __U, __m128d __A, __m128d __B) 12014 { 12015 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A, 12016 (__v2df) __B, 12017 (__v2df) 12018 _mm_setzero_pd (), 12019 (__mmask8) __U, 12020 _MM_FROUND_CUR_DIRECTION); 12021 } 12022 12023 extern __inline __m128 12024 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12025 _mm_mask_max_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 12026 { 12027 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A, 12028 (__v4sf) __B, 12029 (__v4sf) __W, 12030 (__mmask8) __U, 12031 _MM_FROUND_CUR_DIRECTION); 12032 } 12033 12034 extern __inline __m128 12035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12036 _mm_maskz_max_ss (__mmask8 __U, __m128 __A, __m128 __B) 12037 { 12038 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A, 12039 (__v4sf) __B, 12040 (__v4sf) 12041 _mm_setzero_ps (), 12042 (__mmask8) __U, 12043 _MM_FROUND_CUR_DIRECTION); 12044 } 12045 12046 extern __inline __m512d 12047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12048 _mm512_min_pd (__m512d __A, __m512d __B) 12049 { 12050 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, 12051 (__v8df) __B, 12052 (__v8df) 12053 _mm512_undefined_pd (), 12054 (__mmask8) -1, 12055 _MM_FROUND_CUR_DIRECTION); 12056 } 12057 12058 extern __inline __m512d 12059 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12060 _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 12061 { 12062 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, 12063 (__v8df) __B, 12064 (__v8df) __W, 12065 (__mmask8) __U, 12066 _MM_FROUND_CUR_DIRECTION); 12067 } 12068 12069 extern __inline __m512d 12070 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12071 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B) 12072 { 12073 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, 12074 (__v8df) __B, 12075 (__v8df) 12076 _mm512_setzero_pd (), 12077 (__mmask8) __U, 12078 _MM_FROUND_CUR_DIRECTION); 12079 } 12080 12081 extern __inline __m512 12082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12083 _mm512_min_ps (__m512 __A, __m512 __B) 12084 { 12085 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, 12086 (__v16sf) __B, 12087 (__v16sf) 12088 _mm512_undefined_ps (), 12089 (__mmask16) -1, 12090 _MM_FROUND_CUR_DIRECTION); 12091 } 12092 12093 extern __inline __m512 12094 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12095 _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 12096 { 12097 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, 12098 (__v16sf) __B, 12099 (__v16sf) __W, 12100 (__mmask16) __U, 12101 _MM_FROUND_CUR_DIRECTION); 12102 } 12103 12104 extern __inline __m512 12105 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12106 _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B) 12107 { 12108 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, 12109 (__v16sf) __B, 12110 (__v16sf) 12111 _mm512_setzero_ps (), 12112 (__mmask16) __U, 12113 _MM_FROUND_CUR_DIRECTION); 12114 } 12115 12116 extern __inline __m128d 12117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12118 _mm_mask_min_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 12119 { 12120 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A, 12121 (__v2df) __B, 12122 (__v2df) __W, 12123 (__mmask8) __U, 12124 _MM_FROUND_CUR_DIRECTION); 12125 } 12126 12127 extern __inline __m128d 12128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12129 _mm_maskz_min_sd (__mmask8 __U, __m128d __A, __m128d __B) 12130 { 12131 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A, 12132 (__v2df) __B, 12133 (__v2df) 12134 _mm_setzero_pd (), 12135 (__mmask8) __U, 12136 _MM_FROUND_CUR_DIRECTION); 12137 } 12138 12139 extern __inline __m128 12140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12141 _mm_mask_min_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 12142 { 12143 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A, 12144 (__v4sf) __B, 12145 (__v4sf) __W, 12146 (__mmask8) __U, 12147 _MM_FROUND_CUR_DIRECTION); 12148 } 12149 12150 extern __inline __m128 12151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12152 _mm_maskz_min_ss (__mmask8 __U, __m128 __A, __m128 __B) 12153 { 12154 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A, 12155 (__v4sf) __B, 12156 (__v4sf) 12157 _mm_setzero_ps (), 12158 (__mmask8) __U, 12159 _MM_FROUND_CUR_DIRECTION); 12160 } 12161 12162 extern __inline __m512d 12163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12164 _mm512_scalef_pd (__m512d __A, __m512d __B) 12165 { 12166 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, 12167 (__v8df) __B, 12168 (__v8df) 12169 _mm512_undefined_pd (), 12170 (__mmask8) -1, 12171 _MM_FROUND_CUR_DIRECTION); 12172 } 12173 12174 extern __inline __m512d 12175 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12176 _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 12177 { 12178 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, 12179 (__v8df) __B, 12180 (__v8df) __W, 12181 (__mmask8) __U, 12182 _MM_FROUND_CUR_DIRECTION); 12183 } 12184 12185 extern __inline __m512d 12186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12187 _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B) 12188 { 12189 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, 12190 (__v8df) __B, 12191 (__v8df) 12192 _mm512_setzero_pd (), 12193 (__mmask8) __U, 12194 _MM_FROUND_CUR_DIRECTION); 12195 } 12196 12197 extern __inline __m512 12198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12199 _mm512_scalef_ps (__m512 __A, __m512 __B) 12200 { 12201 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, 12202 (__v16sf) __B, 12203 (__v16sf) 12204 _mm512_undefined_ps (), 12205 (__mmask16) -1, 12206 _MM_FROUND_CUR_DIRECTION); 12207 } 12208 12209 extern __inline __m512 12210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12211 _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 12212 { 12213 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, 12214 (__v16sf) __B, 12215 (__v16sf) __W, 12216 (__mmask16) __U, 12217 _MM_FROUND_CUR_DIRECTION); 12218 } 12219 12220 extern __inline __m512 12221 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12222 _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B) 12223 { 12224 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, 12225 (__v16sf) __B, 12226 (__v16sf) 12227 _mm512_setzero_ps (), 12228 (__mmask16) __U, 12229 _MM_FROUND_CUR_DIRECTION); 12230 } 12231 12232 extern __inline __m128d 12233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12234 _mm_scalef_sd (__m128d __A, __m128d __B) 12235 { 12236 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A, 12237 (__v2df) __B, 12238 (__v2df) 12239 _mm_setzero_pd (), 12240 (__mmask8) -1, 12241 _MM_FROUND_CUR_DIRECTION); 12242 } 12243 12244 extern __inline __m128 12245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12246 _mm_scalef_ss (__m128 __A, __m128 __B) 12247 { 12248 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A, 12249 (__v4sf) __B, 12250 (__v4sf) 12251 _mm_setzero_ps (), 12252 (__mmask8) -1, 12253 _MM_FROUND_CUR_DIRECTION); 12254 } 12255 12256 extern __inline __m512d 12257 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12258 _mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C) 12259 { 12260 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 12261 (__v8df) __B, 12262 (__v8df) __C, 12263 (__mmask8) -1, 12264 _MM_FROUND_CUR_DIRECTION); 12265 } 12266 12267 extern __inline __m512d 12268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12269 _mm512_mask_fmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 12270 { 12271 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 12272 (__v8df) __B, 12273 (__v8df) __C, 12274 (__mmask8) __U, 12275 _MM_FROUND_CUR_DIRECTION); 12276 } 12277 12278 extern __inline __m512d 12279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12280 _mm512_mask3_fmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 12281 { 12282 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A, 12283 (__v8df) __B, 12284 (__v8df) __C, 12285 (__mmask8) __U, 12286 _MM_FROUND_CUR_DIRECTION); 12287 } 12288 12289 extern __inline __m512d 12290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12291 _mm512_maskz_fmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 12292 { 12293 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, 12294 (__v8df) __B, 12295 (__v8df) __C, 12296 (__mmask8) __U, 12297 _MM_FROUND_CUR_DIRECTION); 12298 } 12299 12300 extern __inline __m512 12301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12302 _mm512_fmadd_ps (__m512 __A, __m512 __B, __m512 __C) 12303 { 12304 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 12305 (__v16sf) __B, 12306 (__v16sf) __C, 12307 (__mmask16) -1, 12308 _MM_FROUND_CUR_DIRECTION); 12309 } 12310 12311 extern __inline __m512 12312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12313 _mm512_mask_fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 12314 { 12315 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 12316 (__v16sf) __B, 12317 (__v16sf) __C, 12318 (__mmask16) __U, 12319 _MM_FROUND_CUR_DIRECTION); 12320 } 12321 12322 extern __inline __m512 12323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12324 _mm512_mask3_fmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 12325 { 12326 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A, 12327 (__v16sf) __B, 12328 (__v16sf) __C, 12329 (__mmask16) __U, 12330 _MM_FROUND_CUR_DIRECTION); 12331 } 12332 12333 extern __inline __m512 12334 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12335 _mm512_maskz_fmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 12336 { 12337 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, 12338 (__v16sf) __B, 12339 (__v16sf) __C, 12340 (__mmask16) __U, 12341 _MM_FROUND_CUR_DIRECTION); 12342 } 12343 12344 extern __inline __m512d 12345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12346 _mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C) 12347 { 12348 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 12349 (__v8df) __B, 12350 -(__v8df) __C, 12351 (__mmask8) -1, 12352 _MM_FROUND_CUR_DIRECTION); 12353 } 12354 12355 extern __inline __m512d 12356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12357 _mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 12358 { 12359 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 12360 (__v8df) __B, 12361 -(__v8df) __C, 12362 (__mmask8) __U, 12363 _MM_FROUND_CUR_DIRECTION); 12364 } 12365 12366 extern __inline __m512d 12367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12368 _mm512_mask3_fmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 12369 { 12370 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A, 12371 (__v8df) __B, 12372 (__v8df) __C, 12373 (__mmask8) __U, 12374 _MM_FROUND_CUR_DIRECTION); 12375 } 12376 12377 extern __inline __m512d 12378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12379 _mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 12380 { 12381 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, 12382 (__v8df) __B, 12383 -(__v8df) __C, 12384 (__mmask8) __U, 12385 _MM_FROUND_CUR_DIRECTION); 12386 } 12387 12388 extern __inline __m512 12389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12390 _mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C) 12391 { 12392 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 12393 (__v16sf) __B, 12394 -(__v16sf) __C, 12395 (__mmask16) -1, 12396 _MM_FROUND_CUR_DIRECTION); 12397 } 12398 12399 extern __inline __m512 12400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12401 _mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 12402 { 12403 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 12404 (__v16sf) __B, 12405 -(__v16sf) __C, 12406 (__mmask16) __U, 12407 _MM_FROUND_CUR_DIRECTION); 12408 } 12409 12410 extern __inline __m512 12411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12412 _mm512_mask3_fmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 12413 { 12414 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A, 12415 (__v16sf) __B, 12416 (__v16sf) __C, 12417 (__mmask16) __U, 12418 _MM_FROUND_CUR_DIRECTION); 12419 } 12420 12421 extern __inline __m512 12422 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12423 _mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 12424 { 12425 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, 12426 (__v16sf) __B, 12427 -(__v16sf) __C, 12428 (__mmask16) __U, 12429 _MM_FROUND_CUR_DIRECTION); 12430 } 12431 12432 extern __inline __m512d 12433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12434 _mm512_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C) 12435 { 12436 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 12437 (__v8df) __B, 12438 (__v8df) __C, 12439 (__mmask8) -1, 12440 _MM_FROUND_CUR_DIRECTION); 12441 } 12442 12443 extern __inline __m512d 12444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12445 _mm512_mask_fmaddsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 12446 { 12447 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 12448 (__v8df) __B, 12449 (__v8df) __C, 12450 (__mmask8) __U, 12451 _MM_FROUND_CUR_DIRECTION); 12452 } 12453 12454 extern __inline __m512d 12455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12456 _mm512_mask3_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 12457 { 12458 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A, 12459 (__v8df) __B, 12460 (__v8df) __C, 12461 (__mmask8) __U, 12462 _MM_FROUND_CUR_DIRECTION); 12463 } 12464 12465 extern __inline __m512d 12466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12467 _mm512_maskz_fmaddsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 12468 { 12469 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, 12470 (__v8df) __B, 12471 (__v8df) __C, 12472 (__mmask8) __U, 12473 _MM_FROUND_CUR_DIRECTION); 12474 } 12475 12476 extern __inline __m512 12477 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12478 _mm512_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C) 12479 { 12480 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 12481 (__v16sf) __B, 12482 (__v16sf) __C, 12483 (__mmask16) -1, 12484 _MM_FROUND_CUR_DIRECTION); 12485 } 12486 12487 extern __inline __m512 12488 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12489 _mm512_mask_fmaddsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 12490 { 12491 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 12492 (__v16sf) __B, 12493 (__v16sf) __C, 12494 (__mmask16) __U, 12495 _MM_FROUND_CUR_DIRECTION); 12496 } 12497 12498 extern __inline __m512 12499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12500 _mm512_mask3_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 12501 { 12502 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A, 12503 (__v16sf) __B, 12504 (__v16sf) __C, 12505 (__mmask16) __U, 12506 _MM_FROUND_CUR_DIRECTION); 12507 } 12508 12509 extern __inline __m512 12510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12511 _mm512_maskz_fmaddsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 12512 { 12513 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, 12514 (__v16sf) __B, 12515 (__v16sf) __C, 12516 (__mmask16) __U, 12517 _MM_FROUND_CUR_DIRECTION); 12518 } 12519 12520 extern __inline __m512d 12521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12522 _mm512_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C) 12523 { 12524 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 12525 (__v8df) __B, 12526 -(__v8df) __C, 12527 (__mmask8) -1, 12528 _MM_FROUND_CUR_DIRECTION); 12529 } 12530 12531 extern __inline __m512d 12532 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12533 _mm512_mask_fmsubadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 12534 { 12535 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 12536 (__v8df) __B, 12537 -(__v8df) __C, 12538 (__mmask8) __U, 12539 _MM_FROUND_CUR_DIRECTION); 12540 } 12541 12542 extern __inline __m512d 12543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12544 _mm512_mask3_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 12545 { 12546 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A, 12547 (__v8df) __B, 12548 (__v8df) __C, 12549 (__mmask8) __U, 12550 _MM_FROUND_CUR_DIRECTION); 12551 } 12552 12553 extern __inline __m512d 12554 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12555 _mm512_maskz_fmsubadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 12556 { 12557 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, 12558 (__v8df) __B, 12559 -(__v8df) __C, 12560 (__mmask8) __U, 12561 _MM_FROUND_CUR_DIRECTION); 12562 } 12563 12564 extern __inline __m512 12565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12566 _mm512_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C) 12567 { 12568 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 12569 (__v16sf) __B, 12570 -(__v16sf) __C, 12571 (__mmask16) -1, 12572 _MM_FROUND_CUR_DIRECTION); 12573 } 12574 12575 extern __inline __m512 12576 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12577 _mm512_mask_fmsubadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 12578 { 12579 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 12580 (__v16sf) __B, 12581 -(__v16sf) __C, 12582 (__mmask16) __U, 12583 _MM_FROUND_CUR_DIRECTION); 12584 } 12585 12586 extern __inline __m512 12587 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12588 _mm512_mask3_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 12589 { 12590 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A, 12591 (__v16sf) __B, 12592 (__v16sf) __C, 12593 (__mmask16) __U, 12594 _MM_FROUND_CUR_DIRECTION); 12595 } 12596 12597 extern __inline __m512 12598 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12599 _mm512_maskz_fmsubadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 12600 { 12601 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, 12602 (__v16sf) __B, 12603 -(__v16sf) __C, 12604 (__mmask16) __U, 12605 _MM_FROUND_CUR_DIRECTION); 12606 } 12607 12608 extern __inline __m512d 12609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12610 _mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C) 12611 { 12612 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A, 12613 (__v8df) __B, 12614 (__v8df) __C, 12615 (__mmask8) -1, 12616 _MM_FROUND_CUR_DIRECTION); 12617 } 12618 12619 extern __inline __m512d 12620 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12621 _mm512_mask_fnmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 12622 { 12623 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A, 12624 (__v8df) __B, 12625 (__v8df) __C, 12626 (__mmask8) __U, 12627 _MM_FROUND_CUR_DIRECTION); 12628 } 12629 12630 extern __inline __m512d 12631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12632 _mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 12633 { 12634 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A, 12635 (__v8df) __B, 12636 (__v8df) __C, 12637 (__mmask8) __U, 12638 _MM_FROUND_CUR_DIRECTION); 12639 } 12640 12641 extern __inline __m512d 12642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12643 _mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 12644 { 12645 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, 12646 (__v8df) __B, 12647 (__v8df) __C, 12648 (__mmask8) __U, 12649 _MM_FROUND_CUR_DIRECTION); 12650 } 12651 12652 extern __inline __m512 12653 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12654 _mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C) 12655 { 12656 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A, 12657 (__v16sf) __B, 12658 (__v16sf) __C, 12659 (__mmask16) -1, 12660 _MM_FROUND_CUR_DIRECTION); 12661 } 12662 12663 extern __inline __m512 12664 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12665 _mm512_mask_fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 12666 { 12667 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A, 12668 (__v16sf) __B, 12669 (__v16sf) __C, 12670 (__mmask16) __U, 12671 _MM_FROUND_CUR_DIRECTION); 12672 } 12673 12674 extern __inline __m512 12675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12676 _mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 12677 { 12678 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A, 12679 (__v16sf) __B, 12680 (__v16sf) __C, 12681 (__mmask16) __U, 12682 _MM_FROUND_CUR_DIRECTION); 12683 } 12684 12685 extern __inline __m512 12686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12687 _mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 12688 { 12689 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, 12690 (__v16sf) __B, 12691 (__v16sf) __C, 12692 (__mmask16) __U, 12693 _MM_FROUND_CUR_DIRECTION); 12694 } 12695 12696 extern __inline __m512d 12697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12698 _mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C) 12699 { 12700 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A, 12701 (__v8df) __B, 12702 -(__v8df) __C, 12703 (__mmask8) -1, 12704 _MM_FROUND_CUR_DIRECTION); 12705 } 12706 12707 extern __inline __m512d 12708 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12709 _mm512_mask_fnmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 12710 { 12711 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A, 12712 (__v8df) __B, 12713 (__v8df) __C, 12714 (__mmask8) __U, 12715 _MM_FROUND_CUR_DIRECTION); 12716 } 12717 12718 extern __inline __m512d 12719 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12720 _mm512_mask3_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 12721 { 12722 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A, 12723 (__v8df) __B, 12724 (__v8df) __C, 12725 (__mmask8) __U, 12726 _MM_FROUND_CUR_DIRECTION); 12727 } 12728 12729 extern __inline __m512d 12730 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12731 _mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 12732 { 12733 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, 12734 (__v8df) __B, 12735 -(__v8df) __C, 12736 (__mmask8) __U, 12737 _MM_FROUND_CUR_DIRECTION); 12738 } 12739 12740 extern __inline __m512 12741 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12742 _mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C) 12743 { 12744 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A, 12745 (__v16sf) __B, 12746 -(__v16sf) __C, 12747 (__mmask16) -1, 12748 _MM_FROUND_CUR_DIRECTION); 12749 } 12750 12751 extern __inline __m512 12752 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12753 _mm512_mask_fnmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 12754 { 12755 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A, 12756 (__v16sf) __B, 12757 (__v16sf) __C, 12758 (__mmask16) __U, 12759 _MM_FROUND_CUR_DIRECTION); 12760 } 12761 12762 extern __inline __m512 12763 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12764 _mm512_mask3_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 12765 { 12766 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A, 12767 (__v16sf) __B, 12768 (__v16sf) __C, 12769 (__mmask16) __U, 12770 _MM_FROUND_CUR_DIRECTION); 12771 } 12772 12773 extern __inline __m512 12774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12775 _mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 12776 { 12777 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, 12778 (__v16sf) __B, 12779 -(__v16sf) __C, 12780 (__mmask16) __U, 12781 _MM_FROUND_CUR_DIRECTION); 12782 } 12783 12784 extern __inline __m256i 12785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12786 _mm512_cvttpd_epi32 (__m512d __A) 12787 { 12788 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, 12789 (__v8si) 12790 _mm256_undefined_si256 (), 12791 (__mmask8) -1, 12792 _MM_FROUND_CUR_DIRECTION); 12793 } 12794 12795 extern __inline __m256i 12796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12797 _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) 12798 { 12799 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, 12800 (__v8si) __W, 12801 (__mmask8) __U, 12802 _MM_FROUND_CUR_DIRECTION); 12803 } 12804 12805 extern __inline __m256i 12806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12807 _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A) 12808 { 12809 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, 12810 (__v8si) 12811 _mm256_setzero_si256 (), 12812 (__mmask8) __U, 12813 _MM_FROUND_CUR_DIRECTION); 12814 } 12815 12816 extern __inline __m256i 12817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12818 _mm512_cvttpd_epu32 (__m512d __A) 12819 { 12820 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, 12821 (__v8si) 12822 _mm256_undefined_si256 (), 12823 (__mmask8) -1, 12824 _MM_FROUND_CUR_DIRECTION); 12825 } 12826 12827 extern __inline __m256i 12828 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12829 _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A) 12830 { 12831 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, 12832 (__v8si) __W, 12833 (__mmask8) __U, 12834 _MM_FROUND_CUR_DIRECTION); 12835 } 12836 12837 extern __inline __m256i 12838 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12839 _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A) 12840 { 12841 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, 12842 (__v8si) 12843 _mm256_setzero_si256 (), 12844 (__mmask8) __U, 12845 _MM_FROUND_CUR_DIRECTION); 12846 } 12847 12848 extern __inline __m256i 12849 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12850 _mm512_cvtpd_epi32 (__m512d __A) 12851 { 12852 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, 12853 (__v8si) 12854 _mm256_undefined_si256 (), 12855 (__mmask8) -1, 12856 _MM_FROUND_CUR_DIRECTION); 12857 } 12858 12859 extern __inline __m256i 12860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12861 _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) 12862 { 12863 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, 12864 (__v8si) __W, 12865 (__mmask8) __U, 12866 _MM_FROUND_CUR_DIRECTION); 12867 } 12868 12869 extern __inline __m256i 12870 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12871 _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A) 12872 { 12873 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, 12874 (__v8si) 12875 _mm256_setzero_si256 (), 12876 (__mmask8) __U, 12877 _MM_FROUND_CUR_DIRECTION); 12878 } 12879 12880 extern __inline __m256i 12881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12882 _mm512_cvtpd_epu32 (__m512d __A) 12883 { 12884 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, 12885 (__v8si) 12886 _mm256_undefined_si256 (), 12887 (__mmask8) -1, 12888 _MM_FROUND_CUR_DIRECTION); 12889 } 12890 12891 extern __inline __m256i 12892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12893 _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A) 12894 { 12895 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, 12896 (__v8si) __W, 12897 (__mmask8) __U, 12898 _MM_FROUND_CUR_DIRECTION); 12899 } 12900 12901 extern __inline __m256i 12902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12903 _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A) 12904 { 12905 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, 12906 (__v8si) 12907 _mm256_setzero_si256 (), 12908 (__mmask8) __U, 12909 _MM_FROUND_CUR_DIRECTION); 12910 } 12911 12912 extern __inline __m512i 12913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12914 _mm512_cvttps_epi32 (__m512 __A) 12915 { 12916 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, 12917 (__v16si) 12918 _mm512_undefined_epi32 (), 12919 (__mmask16) -1, 12920 _MM_FROUND_CUR_DIRECTION); 12921 } 12922 12923 extern __inline __m512i 12924 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12925 _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) 12926 { 12927 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, 12928 (__v16si) __W, 12929 (__mmask16) __U, 12930 _MM_FROUND_CUR_DIRECTION); 12931 } 12932 12933 extern __inline __m512i 12934 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12935 _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A) 12936 { 12937 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, 12938 (__v16si) 12939 _mm512_setzero_si512 (), 12940 (__mmask16) __U, 12941 _MM_FROUND_CUR_DIRECTION); 12942 } 12943 12944 extern __inline __m512i 12945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12946 _mm512_cvttps_epu32 (__m512 __A) 12947 { 12948 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 12949 (__v16si) 12950 _mm512_undefined_epi32 (), 12951 (__mmask16) -1, 12952 _MM_FROUND_CUR_DIRECTION); 12953 } 12954 12955 extern __inline __m512i 12956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12957 _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) 12958 { 12959 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 12960 (__v16si) __W, 12961 (__mmask16) __U, 12962 _MM_FROUND_CUR_DIRECTION); 12963 } 12964 12965 extern __inline __m512i 12966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12967 _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A) 12968 { 12969 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 12970 (__v16si) 12971 _mm512_setzero_si512 (), 12972 (__mmask16) __U, 12973 _MM_FROUND_CUR_DIRECTION); 12974 } 12975 12976 extern __inline __m512i 12977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12978 _mm512_cvtps_epi32 (__m512 __A) 12979 { 12980 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, 12981 (__v16si) 12982 _mm512_undefined_epi32 (), 12983 (__mmask16) -1, 12984 _MM_FROUND_CUR_DIRECTION); 12985 } 12986 12987 extern __inline __m512i 12988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12989 _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) 12990 { 12991 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, 12992 (__v16si) __W, 12993 (__mmask16) __U, 12994 _MM_FROUND_CUR_DIRECTION); 12995 } 12996 12997 extern __inline __m512i 12998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12999 _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A) 13000 { 13001 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, 13002 (__v16si) 13003 _mm512_setzero_si512 (), 13004 (__mmask16) __U, 13005 _MM_FROUND_CUR_DIRECTION); 13006 } 13007 13008 extern __inline __m512i 13009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13010 _mm512_cvtps_epu32 (__m512 __A) 13011 { 13012 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, 13013 (__v16si) 13014 _mm512_undefined_epi32 (), 13015 (__mmask16) -1, 13016 _MM_FROUND_CUR_DIRECTION); 13017 } 13018 13019 extern __inline __m512i 13020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13021 _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) 13022 { 13023 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, 13024 (__v16si) __W, 13025 (__mmask16) __U, 13026 _MM_FROUND_CUR_DIRECTION); 13027 } 13028 13029 extern __inline __m512i 13030 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13031 _mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A) 13032 { 13033 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, 13034 (__v16si) 13035 _mm512_setzero_si512 (), 13036 (__mmask16) __U, 13037 _MM_FROUND_CUR_DIRECTION); 13038 } 13039 13040 extern __inline double 13041 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13042 _mm512_cvtsd_f64 (__m512d __A) 13043 { 13044 return __A[0]; 13045 } 13046 13047 extern __inline float 13048 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13049 _mm512_cvtss_f32 (__m512 __A) 13050 { 13051 return __A[0]; 13052 } 13053 13054 #ifdef __x86_64__ 13055 extern __inline __m128 13056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13057 _mm_cvtu64_ss (__m128 __A, unsigned long long __B) 13058 { 13059 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, 13060 _MM_FROUND_CUR_DIRECTION); 13061 } 13062 13063 extern __inline __m128d 13064 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13065 _mm_cvtu64_sd (__m128d __A, unsigned long long __B) 13066 { 13067 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, 13068 _MM_FROUND_CUR_DIRECTION); 13069 } 13070 #endif 13071 13072 extern __inline __m128 13073 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13074 _mm_cvtu32_ss (__m128 __A, unsigned __B) 13075 { 13076 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, 13077 _MM_FROUND_CUR_DIRECTION); 13078 } 13079 13080 extern __inline __m512 13081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13082 _mm512_cvtepi32_ps (__m512i __A) 13083 { 13084 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, 13085 (__v16sf) 13086 _mm512_undefined_ps (), 13087 (__mmask16) -1, 13088 _MM_FROUND_CUR_DIRECTION); 13089 } 13090 13091 extern __inline __m512 13092 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13093 _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A) 13094 { 13095 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, 13096 (__v16sf) __W, 13097 (__mmask16) __U, 13098 _MM_FROUND_CUR_DIRECTION); 13099 } 13100 13101 extern __inline __m512 13102 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13103 _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A) 13104 { 13105 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, 13106 (__v16sf) 13107 _mm512_setzero_ps (), 13108 (__mmask16) __U, 13109 _MM_FROUND_CUR_DIRECTION); 13110 } 13111 13112 extern __inline __m512 13113 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13114 _mm512_cvtepu32_ps (__m512i __A) 13115 { 13116 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, 13117 (__v16sf) 13118 _mm512_undefined_ps (), 13119 (__mmask16) -1, 13120 _MM_FROUND_CUR_DIRECTION); 13121 } 13122 13123 extern __inline __m512 13124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13125 _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A) 13126 { 13127 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, 13128 (__v16sf) __W, 13129 (__mmask16) __U, 13130 _MM_FROUND_CUR_DIRECTION); 13131 } 13132 13133 extern __inline __m512 13134 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13135 _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A) 13136 { 13137 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, 13138 (__v16sf) 13139 _mm512_setzero_ps (), 13140 (__mmask16) __U, 13141 _MM_FROUND_CUR_DIRECTION); 13142 } 13143 13144 #ifdef __OPTIMIZE__ 13145 extern __inline __m512d 13146 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13147 _mm512_fixupimm_pd (__m512d __A, __m512d __B, __m512i __C, const int __imm) 13148 { 13149 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A, 13150 (__v8df) __B, 13151 (__v8di) __C, 13152 __imm, 13153 (__mmask8) -1, 13154 _MM_FROUND_CUR_DIRECTION); 13155 } 13156 13157 extern __inline __m512d 13158 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13159 _mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B, 13160 __m512i __C, const int __imm) 13161 { 13162 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A, 13163 (__v8df) __B, 13164 (__v8di) __C, 13165 __imm, 13166 (__mmask8) __U, 13167 _MM_FROUND_CUR_DIRECTION); 13168 } 13169 13170 extern __inline __m512d 13171 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13172 _mm512_maskz_fixupimm_pd (__mmask8 __U, __m512d __A, __m512d __B, 13173 __m512i __C, const int __imm) 13174 { 13175 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A, 13176 (__v8df) __B, 13177 (__v8di) __C, 13178 __imm, 13179 (__mmask8) __U, 13180 _MM_FROUND_CUR_DIRECTION); 13181 } 13182 13183 extern __inline __m512 13184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13185 _mm512_fixupimm_ps (__m512 __A, __m512 __B, __m512i __C, const int __imm) 13186 { 13187 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A, 13188 (__v16sf) __B, 13189 (__v16si) __C, 13190 __imm, 13191 (__mmask16) -1, 13192 _MM_FROUND_CUR_DIRECTION); 13193 } 13194 13195 extern __inline __m512 13196 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13197 _mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B, 13198 __m512i __C, const int __imm) 13199 { 13200 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A, 13201 (__v16sf) __B, 13202 (__v16si) __C, 13203 __imm, 13204 (__mmask16) __U, 13205 _MM_FROUND_CUR_DIRECTION); 13206 } 13207 13208 extern __inline __m512 13209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13210 _mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B, 13211 __m512i __C, const int __imm) 13212 { 13213 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A, 13214 (__v16sf) __B, 13215 (__v16si) __C, 13216 __imm, 13217 (__mmask16) __U, 13218 _MM_FROUND_CUR_DIRECTION); 13219 } 13220 13221 extern __inline __m128d 13222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13223 _mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm) 13224 { 13225 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A, 13226 (__v2df) __B, 13227 (__v2di) __C, __imm, 13228 (__mmask8) -1, 13229 _MM_FROUND_CUR_DIRECTION); 13230 } 13231 13232 extern __inline __m128d 13233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13234 _mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B, 13235 __m128i __C, const int __imm) 13236 { 13237 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A, 13238 (__v2df) __B, 13239 (__v2di) __C, __imm, 13240 (__mmask8) __U, 13241 _MM_FROUND_CUR_DIRECTION); 13242 } 13243 13244 extern __inline __m128d 13245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13246 _mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B, 13247 __m128i __C, const int __imm) 13248 { 13249 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A, 13250 (__v2df) __B, 13251 (__v2di) __C, 13252 __imm, 13253 (__mmask8) __U, 13254 _MM_FROUND_CUR_DIRECTION); 13255 } 13256 13257 extern __inline __m128 13258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13259 _mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm) 13260 { 13261 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A, 13262 (__v4sf) __B, 13263 (__v4si) __C, __imm, 13264 (__mmask8) -1, 13265 _MM_FROUND_CUR_DIRECTION); 13266 } 13267 13268 extern __inline __m128 13269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13270 _mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B, 13271 __m128i __C, const int __imm) 13272 { 13273 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A, 13274 (__v4sf) __B, 13275 (__v4si) __C, __imm, 13276 (__mmask8) __U, 13277 _MM_FROUND_CUR_DIRECTION); 13278 } 13279 13280 extern __inline __m128 13281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13282 _mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B, 13283 __m128i __C, const int __imm) 13284 { 13285 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A, 13286 (__v4sf) __B, 13287 (__v4si) __C, __imm, 13288 (__mmask8) __U, 13289 _MM_FROUND_CUR_DIRECTION); 13290 } 13291 #else 13292 #define _mm512_fixupimm_pd(X, Y, Z, C) \ 13293 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \ 13294 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \ 13295 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION)) 13296 13297 #define _mm512_mask_fixupimm_pd(X, U, Y, Z, C) \ 13298 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \ 13299 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \ 13300 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 13301 13302 #define _mm512_maskz_fixupimm_pd(U, X, Y, Z, C) \ 13303 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \ 13304 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \ 13305 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 13306 13307 #define _mm512_fixupimm_ps(X, Y, Z, C) \ 13308 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \ 13309 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \ 13310 (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION)) 13311 13312 #define _mm512_mask_fixupimm_ps(X, U, Y, Z, C) \ 13313 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \ 13314 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \ 13315 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) 13316 13317 #define _mm512_maskz_fixupimm_ps(U, X, Y, Z, C) \ 13318 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \ 13319 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \ 13320 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) 13321 13322 #define _mm_fixupimm_sd(X, Y, Z, C) \ 13323 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \ 13324 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \ 13325 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION)) 13326 13327 #define _mm_mask_fixupimm_sd(X, U, Y, Z, C) \ 13328 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \ 13329 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \ 13330 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 13331 13332 #define _mm_maskz_fixupimm_sd(U, X, Y, Z, C) \ 13333 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \ 13334 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \ 13335 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 13336 13337 #define _mm_fixupimm_ss(X, Y, Z, C) \ 13338 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \ 13339 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \ 13340 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION)) 13341 13342 #define _mm_mask_fixupimm_ss(X, U, Y, Z, C) \ 13343 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \ 13344 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \ 13345 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 13346 13347 #define _mm_maskz_fixupimm_ss(U, X, Y, Z, C) \ 13348 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \ 13349 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \ 13350 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 13351 #endif 13352 13353 #ifdef __x86_64__ 13354 extern __inline unsigned long long 13355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13356 _mm_cvtss_u64 (__m128 __A) 13357 { 13358 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) 13359 __A, 13360 _MM_FROUND_CUR_DIRECTION); 13361 } 13362 13363 extern __inline unsigned long long 13364 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13365 _mm_cvttss_u64 (__m128 __A) 13366 { 13367 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) 13368 __A, 13369 _MM_FROUND_CUR_DIRECTION); 13370 } 13371 13372 extern __inline long long 13373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13374 _mm_cvttss_i64 (__m128 __A) 13375 { 13376 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, 13377 _MM_FROUND_CUR_DIRECTION); 13378 } 13379 #endif /* __x86_64__ */ 13380 13381 extern __inline unsigned 13382 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13383 _mm_cvtss_u32 (__m128 __A) 13384 { 13385 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, 13386 _MM_FROUND_CUR_DIRECTION); 13387 } 13388 13389 extern __inline unsigned 13390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13391 _mm_cvttss_u32 (__m128 __A) 13392 { 13393 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, 13394 _MM_FROUND_CUR_DIRECTION); 13395 } 13396 13397 extern __inline int 13398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13399 _mm_cvttss_i32 (__m128 __A) 13400 { 13401 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, 13402 _MM_FROUND_CUR_DIRECTION); 13403 } 13404 13405 #ifdef __x86_64__ 13406 extern __inline unsigned long long 13407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13408 _mm_cvtsd_u64 (__m128d __A) 13409 { 13410 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) 13411 __A, 13412 _MM_FROUND_CUR_DIRECTION); 13413 } 13414 13415 extern __inline unsigned long long 13416 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13417 _mm_cvttsd_u64 (__m128d __A) 13418 { 13419 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) 13420 __A, 13421 _MM_FROUND_CUR_DIRECTION); 13422 } 13423 13424 extern __inline long long 13425 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13426 _mm_cvttsd_i64 (__m128d __A) 13427 { 13428 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, 13429 _MM_FROUND_CUR_DIRECTION); 13430 } 13431 #endif /* __x86_64__ */ 13432 13433 extern __inline unsigned 13434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13435 _mm_cvtsd_u32 (__m128d __A) 13436 { 13437 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, 13438 _MM_FROUND_CUR_DIRECTION); 13439 } 13440 13441 extern __inline unsigned 13442 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13443 _mm_cvttsd_u32 (__m128d __A) 13444 { 13445 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, 13446 _MM_FROUND_CUR_DIRECTION); 13447 } 13448 13449 extern __inline int 13450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13451 _mm_cvttsd_i32 (__m128d __A) 13452 { 13453 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, 13454 _MM_FROUND_CUR_DIRECTION); 13455 } 13456 13457 extern __inline __m512d 13458 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13459 _mm512_cvtps_pd (__m256 __A) 13460 { 13461 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, 13462 (__v8df) 13463 _mm512_undefined_pd (), 13464 (__mmask8) -1, 13465 _MM_FROUND_CUR_DIRECTION); 13466 } 13467 13468 extern __inline __m512d 13469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13470 _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A) 13471 { 13472 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, 13473 (__v8df) __W, 13474 (__mmask8) __U, 13475 _MM_FROUND_CUR_DIRECTION); 13476 } 13477 13478 extern __inline __m512d 13479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13480 _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A) 13481 { 13482 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, 13483 (__v8df) 13484 _mm512_setzero_pd (), 13485 (__mmask8) __U, 13486 _MM_FROUND_CUR_DIRECTION); 13487 } 13488 13489 extern __inline __m512 13490 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13491 _mm512_cvtph_ps (__m256i __A) 13492 { 13493 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 13494 (__v16sf) 13495 _mm512_undefined_ps (), 13496 (__mmask16) -1, 13497 _MM_FROUND_CUR_DIRECTION); 13498 } 13499 13500 extern __inline __m512 13501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13502 _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A) 13503 { 13504 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 13505 (__v16sf) __W, 13506 (__mmask16) __U, 13507 _MM_FROUND_CUR_DIRECTION); 13508 } 13509 13510 extern __inline __m512 13511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13512 _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A) 13513 { 13514 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 13515 (__v16sf) 13516 _mm512_setzero_ps (), 13517 (__mmask16) __U, 13518 _MM_FROUND_CUR_DIRECTION); 13519 } 13520 13521 extern __inline __m256 13522 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13523 _mm512_cvtpd_ps (__m512d __A) 13524 { 13525 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, 13526 (__v8sf) 13527 _mm256_undefined_ps (), 13528 (__mmask8) -1, 13529 _MM_FROUND_CUR_DIRECTION); 13530 } 13531 13532 extern __inline __m256 13533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13534 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A) 13535 { 13536 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, 13537 (__v8sf) __W, 13538 (__mmask8) __U, 13539 _MM_FROUND_CUR_DIRECTION); 13540 } 13541 13542 extern __inline __m256 13543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13544 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A) 13545 { 13546 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, 13547 (__v8sf) 13548 _mm256_setzero_ps (), 13549 (__mmask8) __U, 13550 _MM_FROUND_CUR_DIRECTION); 13551 } 13552 13553 #ifdef __OPTIMIZE__ 13554 extern __inline __m512 13555 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13556 _mm512_getexp_ps (__m512 __A) 13557 { 13558 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, 13559 (__v16sf) 13560 _mm512_undefined_ps (), 13561 (__mmask16) -1, 13562 _MM_FROUND_CUR_DIRECTION); 13563 } 13564 13565 extern __inline __m512 13566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13567 _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A) 13568 { 13569 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, 13570 (__v16sf) __W, 13571 (__mmask16) __U, 13572 _MM_FROUND_CUR_DIRECTION); 13573 } 13574 13575 extern __inline __m512 13576 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13577 _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A) 13578 { 13579 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, 13580 (__v16sf) 13581 _mm512_setzero_ps (), 13582 (__mmask16) __U, 13583 _MM_FROUND_CUR_DIRECTION); 13584 } 13585 13586 extern __inline __m512d 13587 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13588 _mm512_getexp_pd (__m512d __A) 13589 { 13590 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, 13591 (__v8df) 13592 _mm512_undefined_pd (), 13593 (__mmask8) -1, 13594 _MM_FROUND_CUR_DIRECTION); 13595 } 13596 13597 extern __inline __m512d 13598 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13599 _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A) 13600 { 13601 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, 13602 (__v8df) __W, 13603 (__mmask8) __U, 13604 _MM_FROUND_CUR_DIRECTION); 13605 } 13606 13607 extern __inline __m512d 13608 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13609 _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A) 13610 { 13611 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, 13612 (__v8df) 13613 _mm512_setzero_pd (), 13614 (__mmask8) __U, 13615 _MM_FROUND_CUR_DIRECTION); 13616 } 13617 13618 extern __inline __m128 13619 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13620 _mm_getexp_ss (__m128 __A, __m128 __B) 13621 { 13622 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A, 13623 (__v4sf) __B, 13624 _MM_FROUND_CUR_DIRECTION); 13625 } 13626 13627 extern __inline __m128 13628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13629 _mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 13630 { 13631 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A, 13632 (__v4sf) __B, 13633 (__v4sf) __W, 13634 (__mmask8) __U, 13635 _MM_FROUND_CUR_DIRECTION); 13636 } 13637 13638 extern __inline __m128 13639 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13640 _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B) 13641 { 13642 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A, 13643 (__v4sf) __B, 13644 (__v4sf) 13645 _mm_setzero_ps (), 13646 (__mmask8) __U, 13647 _MM_FROUND_CUR_DIRECTION); 13648 } 13649 13650 extern __inline __m128d 13651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13652 _mm_getexp_sd (__m128d __A, __m128d __B) 13653 { 13654 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A, 13655 (__v2df) __B, 13656 _MM_FROUND_CUR_DIRECTION); 13657 } 13658 13659 extern __inline __m128d 13660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13661 _mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 13662 { 13663 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A, 13664 (__v2df) __B, 13665 (__v2df) __W, 13666 (__mmask8) __U, 13667 _MM_FROUND_CUR_DIRECTION); 13668 } 13669 13670 extern __inline __m128d 13671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13672 _mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B) 13673 { 13674 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A, 13675 (__v2df) __B, 13676 (__v2df) 13677 _mm_setzero_pd (), 13678 (__mmask8) __U, 13679 _MM_FROUND_CUR_DIRECTION); 13680 } 13681 13682 extern __inline __m512d 13683 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13684 _mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B, 13685 _MM_MANTISSA_SIGN_ENUM __C) 13686 { 13687 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A, 13688 (__C << 2) | __B, 13689 _mm512_undefined_pd (), 13690 (__mmask8) -1, 13691 _MM_FROUND_CUR_DIRECTION); 13692 } 13693 13694 extern __inline __m512d 13695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13696 _mm512_mask_getmant_pd (__m512d __W, __mmask8 __U, __m512d __A, 13697 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) 13698 { 13699 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A, 13700 (__C << 2) | __B, 13701 (__v8df) __W, __U, 13702 _MM_FROUND_CUR_DIRECTION); 13703 } 13704 13705 extern __inline __m512d 13706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13707 _mm512_maskz_getmant_pd (__mmask8 __U, __m512d __A, 13708 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) 13709 { 13710 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A, 13711 (__C << 2) | __B, 13712 (__v8df) 13713 _mm512_setzero_pd (), 13714 __U, 13715 _MM_FROUND_CUR_DIRECTION); 13716 } 13717 13718 extern __inline __m512 13719 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13720 _mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B, 13721 _MM_MANTISSA_SIGN_ENUM __C) 13722 { 13723 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A, 13724 (__C << 2) | __B, 13725 _mm512_undefined_ps (), 13726 (__mmask16) -1, 13727 _MM_FROUND_CUR_DIRECTION); 13728 } 13729 13730 extern __inline __m512 13731 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13732 _mm512_mask_getmant_ps (__m512 __W, __mmask16 __U, __m512 __A, 13733 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) 13734 { 13735 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A, 13736 (__C << 2) | __B, 13737 (__v16sf) __W, __U, 13738 _MM_FROUND_CUR_DIRECTION); 13739 } 13740 13741 extern __inline __m512 13742 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13743 _mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A, 13744 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) 13745 { 13746 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A, 13747 (__C << 2) | __B, 13748 (__v16sf) 13749 _mm512_setzero_ps (), 13750 __U, 13751 _MM_FROUND_CUR_DIRECTION); 13752 } 13753 13754 extern __inline __m128d 13755 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13756 _mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C, 13757 _MM_MANTISSA_SIGN_ENUM __D) 13758 { 13759 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A, 13760 (__v2df) __B, 13761 (__D << 2) | __C, 13762 _MM_FROUND_CUR_DIRECTION); 13763 } 13764 13765 extern __inline __m128d 13766 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13767 _mm_mask_getmant_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B, 13768 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D) 13769 { 13770 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A, 13771 (__v2df) __B, 13772 (__D << 2) | __C, 13773 (__v2df) __W, 13774 __U, 13775 _MM_FROUND_CUR_DIRECTION); 13776 } 13777 13778 extern __inline __m128d 13779 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13780 _mm_maskz_getmant_sd (__mmask8 __U, __m128d __A, __m128d __B, 13781 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D) 13782 { 13783 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A, 13784 (__v2df) __B, 13785 (__D << 2) | __C, 13786 (__v2df) 13787 _mm_setzero_pd(), 13788 __U, 13789 _MM_FROUND_CUR_DIRECTION); 13790 } 13791 13792 extern __inline __m128 13793 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13794 _mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C, 13795 _MM_MANTISSA_SIGN_ENUM __D) 13796 { 13797 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A, 13798 (__v4sf) __B, 13799 (__D << 2) | __C, 13800 _MM_FROUND_CUR_DIRECTION); 13801 } 13802 13803 extern __inline __m128 13804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13805 _mm_mask_getmant_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B, 13806 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D) 13807 { 13808 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A, 13809 (__v4sf) __B, 13810 (__D << 2) | __C, 13811 (__v4sf) __W, 13812 __U, 13813 _MM_FROUND_CUR_DIRECTION); 13814 } 13815 13816 extern __inline __m128 13817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13818 _mm_maskz_getmant_ss (__mmask8 __U, __m128 __A, __m128 __B, 13819 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D) 13820 { 13821 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A, 13822 (__v4sf) __B, 13823 (__D << 2) | __C, 13824 (__v4sf) 13825 _mm_setzero_ps(), 13826 __U, 13827 _MM_FROUND_CUR_DIRECTION); 13828 } 13829 13830 #else 13831 #define _mm512_getmant_pd(X, B, C) \ 13832 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \ 13833 (int)(((C)<<2) | (B)), \ 13834 (__v8df)_mm512_undefined_pd(), \ 13835 (__mmask8)-1,\ 13836 _MM_FROUND_CUR_DIRECTION)) 13837 13838 #define _mm512_mask_getmant_pd(W, U, X, B, C) \ 13839 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \ 13840 (int)(((C)<<2) | (B)), \ 13841 (__v8df)(__m512d)(W), \ 13842 (__mmask8)(U),\ 13843 _MM_FROUND_CUR_DIRECTION)) 13844 13845 #define _mm512_maskz_getmant_pd(U, X, B, C) \ 13846 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \ 13847 (int)(((C)<<2) | (B)), \ 13848 (__v8df)_mm512_setzero_pd(), \ 13849 (__mmask8)(U),\ 13850 _MM_FROUND_CUR_DIRECTION)) 13851 #define _mm512_getmant_ps(X, B, C) \ 13852 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \ 13853 (int)(((C)<<2) | (B)), \ 13854 (__v16sf)_mm512_undefined_ps(), \ 13855 (__mmask16)-1,\ 13856 _MM_FROUND_CUR_DIRECTION)) 13857 13858 #define _mm512_mask_getmant_ps(W, U, X, B, C) \ 13859 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \ 13860 (int)(((C)<<2) | (B)), \ 13861 (__v16sf)(__m512)(W), \ 13862 (__mmask16)(U),\ 13863 _MM_FROUND_CUR_DIRECTION)) 13864 13865 #define _mm512_maskz_getmant_ps(U, X, B, C) \ 13866 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \ 13867 (int)(((C)<<2) | (B)), \ 13868 (__v16sf)_mm512_setzero_ps(), \ 13869 (__mmask16)(U),\ 13870 _MM_FROUND_CUR_DIRECTION)) 13871 #define _mm_getmant_sd(X, Y, C, D) \ 13872 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \ 13873 (__v2df)(__m128d)(Y), \ 13874 (int)(((D)<<2) | (C)), \ 13875 _MM_FROUND_CUR_DIRECTION)) 13876 13877 #define _mm_mask_getmant_sd(W, U, X, Y, C, D) \ 13878 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \ 13879 (__v2df)(__m128d)(Y), \ 13880 (int)(((D)<<2) | (C)), \ 13881 (__v2df)(__m128d)(W), \ 13882 (__mmask8)(U),\ 13883 _MM_FROUND_CUR_DIRECTION)) 13884 13885 #define _mm_maskz_getmant_sd(U, X, Y, C, D) \ 13886 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \ 13887 (__v2df)(__m128d)(Y), \ 13888 (int)(((D)<<2) | (C)), \ 13889 (__v2df)_mm_setzero_pd(), \ 13890 (__mmask8)(U),\ 13891 _MM_FROUND_CUR_DIRECTION)) 13892 13893 #define _mm_getmant_ss(X, Y, C, D) \ 13894 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \ 13895 (__v4sf)(__m128)(Y), \ 13896 (int)(((D)<<2) | (C)), \ 13897 _MM_FROUND_CUR_DIRECTION)) 13898 13899 #define _mm_mask_getmant_ss(W, U, X, Y, C, D) \ 13900 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \ 13901 (__v4sf)(__m128)(Y), \ 13902 (int)(((D)<<2) | (C)), \ 13903 (__v4sf)(__m128)(W), \ 13904 (__mmask8)(U),\ 13905 _MM_FROUND_CUR_DIRECTION)) 13906 13907 #define _mm_maskz_getmant_ss(U, X, Y, C, D) \ 13908 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \ 13909 (__v4sf)(__m128)(Y), \ 13910 (int)(((D)<<2) | (C)), \ 13911 (__v4sf)_mm_setzero_ps(), \ 13912 (__mmask8)(U),\ 13913 _MM_FROUND_CUR_DIRECTION)) 13914 13915 #define _mm_getexp_ss(A, B) \ 13916 ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \ 13917 _MM_FROUND_CUR_DIRECTION)) 13918 13919 #define _mm_mask_getexp_ss(W, U, A, B) \ 13920 (__m128)__builtin_ia32_getexpss_mask_round(A, B, W, U,\ 13921 _MM_FROUND_CUR_DIRECTION) 13922 13923 #define _mm_maskz_getexp_ss(U, A, B) \ 13924 (__m128)__builtin_ia32_getexpss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U,\ 13925 _MM_FROUND_CUR_DIRECTION) 13926 13927 #define _mm_getexp_sd(A, B) \ 13928 ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\ 13929 _MM_FROUND_CUR_DIRECTION)) 13930 13931 #define _mm_mask_getexp_sd(W, U, A, B) \ 13932 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, W, U,\ 13933 _MM_FROUND_CUR_DIRECTION) 13934 13935 #define _mm_maskz_getexp_sd(U, A, B) \ 13936 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U,\ 13937 _MM_FROUND_CUR_DIRECTION) 13938 13939 #define _mm512_getexp_ps(A) \ 13940 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 13941 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION)) 13942 13943 #define _mm512_mask_getexp_ps(W, U, A) \ 13944 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 13945 (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) 13946 13947 #define _mm512_maskz_getexp_ps(U, A) \ 13948 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 13949 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) 13950 13951 #define _mm512_getexp_pd(A) \ 13952 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 13953 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION)) 13954 13955 #define _mm512_mask_getexp_pd(W, U, A) \ 13956 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 13957 (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 13958 13959 #define _mm512_maskz_getexp_pd(U, A) \ 13960 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 13961 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 13962 #endif 13963 13964 #ifdef __OPTIMIZE__ 13965 extern __inline __m512 13966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13967 _mm512_roundscale_ps (__m512 __A, const int __imm) 13968 { 13969 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm, 13970 (__v16sf) 13971 _mm512_undefined_ps (), 13972 -1, 13973 _MM_FROUND_CUR_DIRECTION); 13974 } 13975 13976 extern __inline __m512 13977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13978 _mm512_mask_roundscale_ps (__m512 __A, __mmask16 __B, __m512 __C, 13979 const int __imm) 13980 { 13981 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm, 13982 (__v16sf) __A, 13983 (__mmask16) __B, 13984 _MM_FROUND_CUR_DIRECTION); 13985 } 13986 13987 extern __inline __m512 13988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13989 _mm512_maskz_roundscale_ps (__mmask16 __A, __m512 __B, const int __imm) 13990 { 13991 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B, 13992 __imm, 13993 (__v16sf) 13994 _mm512_setzero_ps (), 13995 (__mmask16) __A, 13996 _MM_FROUND_CUR_DIRECTION); 13997 } 13998 13999 extern __inline __m512d 14000 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14001 _mm512_roundscale_pd (__m512d __A, const int __imm) 14002 { 14003 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm, 14004 (__v8df) 14005 _mm512_undefined_pd (), 14006 -1, 14007 _MM_FROUND_CUR_DIRECTION); 14008 } 14009 14010 extern __inline __m512d 14011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14012 _mm512_mask_roundscale_pd (__m512d __A, __mmask8 __B, __m512d __C, 14013 const int __imm) 14014 { 14015 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm, 14016 (__v8df) __A, 14017 (__mmask8) __B, 14018 _MM_FROUND_CUR_DIRECTION); 14019 } 14020 14021 extern __inline __m512d 14022 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14023 _mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm) 14024 { 14025 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B, 14026 __imm, 14027 (__v8df) 14028 _mm512_setzero_pd (), 14029 (__mmask8) __A, 14030 _MM_FROUND_CUR_DIRECTION); 14031 } 14032 14033 extern __inline __m128 14034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14035 _mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm) 14036 { 14037 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A, 14038 (__v4sf) __B, __imm, 14039 _MM_FROUND_CUR_DIRECTION); 14040 } 14041 14042 extern __inline __m128d 14043 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14044 _mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm) 14045 { 14046 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A, 14047 (__v2df) __B, __imm, 14048 _MM_FROUND_CUR_DIRECTION); 14049 } 14050 14051 #else 14052 #define _mm512_roundscale_ps(A, B) \ 14053 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\ 14054 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION)) 14055 #define _mm512_mask_roundscale_ps(A, B, C, D) \ 14056 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \ 14057 (int)(D), \ 14058 (__v16sf)(__m512)(A), \ 14059 (__mmask16)(B), _MM_FROUND_CUR_DIRECTION)) 14060 #define _mm512_maskz_roundscale_ps(A, B, C) \ 14061 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \ 14062 (int)(C), \ 14063 (__v16sf)_mm512_setzero_ps(),\ 14064 (__mmask16)(A), _MM_FROUND_CUR_DIRECTION)) 14065 #define _mm512_roundscale_pd(A, B) \ 14066 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\ 14067 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION)) 14068 #define _mm512_mask_roundscale_pd(A, B, C, D) \ 14069 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \ 14070 (int)(D), \ 14071 (__v8df)(__m512d)(A), \ 14072 (__mmask8)(B), _MM_FROUND_CUR_DIRECTION)) 14073 #define _mm512_maskz_roundscale_pd(A, B, C) \ 14074 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \ 14075 (int)(C), \ 14076 (__v8df)_mm512_setzero_pd(),\ 14077 (__mmask8)(A), _MM_FROUND_CUR_DIRECTION)) 14078 #define _mm_roundscale_ss(A, B, C) \ 14079 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \ 14080 (__v4sf)(__m128)(B), (int)(C), _MM_FROUND_CUR_DIRECTION)) 14081 #define _mm_roundscale_sd(A, B, C) \ 14082 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \ 14083 (__v2df)(__m128d)(B), (int)(C), _MM_FROUND_CUR_DIRECTION)) 14084 #endif 14085 14086 #ifdef __OPTIMIZE__ 14087 extern __inline __mmask8 14088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14089 _mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P) 14090 { 14091 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 14092 (__v8df) __Y, __P, 14093 (__mmask8) -1, 14094 _MM_FROUND_CUR_DIRECTION); 14095 } 14096 14097 extern __inline __mmask16 14098 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14099 _mm512_cmp_ps_mask (__m512 __X, __m512 __Y, const int __P) 14100 { 14101 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 14102 (__v16sf) __Y, __P, 14103 (__mmask16) -1, 14104 _MM_FROUND_CUR_DIRECTION); 14105 } 14106 14107 extern __inline __mmask16 14108 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14109 _mm512_mask_cmp_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, const int __P) 14110 { 14111 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 14112 (__v16sf) __Y, __P, 14113 (__mmask16) __U, 14114 _MM_FROUND_CUR_DIRECTION); 14115 } 14116 14117 extern __inline __mmask8 14118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14119 _mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P) 14120 { 14121 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 14122 (__v8df) __Y, __P, 14123 (__mmask8) __U, 14124 _MM_FROUND_CUR_DIRECTION); 14125 } 14126 14127 extern __inline __mmask8 14128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14129 _mm512_cmpeq_pd_mask (__m512d __X, __m512d __Y) 14130 { 14131 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 14132 (__v8df) __Y, _CMP_EQ_OQ, 14133 (__mmask8) -1, 14134 _MM_FROUND_CUR_DIRECTION); 14135 } 14136 14137 extern __inline __mmask8 14138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14139 _mm512_mask_cmpeq_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y) 14140 { 14141 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 14142 (__v8df) __Y, _CMP_EQ_OQ, 14143 (__mmask8) __U, 14144 _MM_FROUND_CUR_DIRECTION); 14145 } 14146 14147 extern __inline __mmask8 14148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14149 _mm512_cmplt_pd_mask (__m512d __X, __m512d __Y) 14150 { 14151 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 14152 (__v8df) __Y, _CMP_LT_OS, 14153 (__mmask8) -1, 14154 _MM_FROUND_CUR_DIRECTION); 14155 } 14156 14157 extern __inline __mmask8 14158 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14159 _mm512_mask_cmplt_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y) 14160 { 14161 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 14162 (__v8df) __Y, _CMP_LT_OS, 14163 (__mmask8) __U, 14164 _MM_FROUND_CUR_DIRECTION); 14165 } 14166 14167 extern __inline __mmask8 14168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14169 _mm512_cmple_pd_mask (__m512d __X, __m512d __Y) 14170 { 14171 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 14172 (__v8df) __Y, _CMP_LE_OS, 14173 (__mmask8) -1, 14174 _MM_FROUND_CUR_DIRECTION); 14175 } 14176 14177 extern __inline __mmask8 14178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14179 _mm512_mask_cmple_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y) 14180 { 14181 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 14182 (__v8df) __Y, _CMP_LE_OS, 14183 (__mmask8) __U, 14184 _MM_FROUND_CUR_DIRECTION); 14185 } 14186 14187 extern __inline __mmask8 14188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14189 _mm512_cmpunord_pd_mask (__m512d __X, __m512d __Y) 14190 { 14191 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 14192 (__v8df) __Y, _CMP_UNORD_Q, 14193 (__mmask8) -1, 14194 _MM_FROUND_CUR_DIRECTION); 14195 } 14196 14197 extern __inline __mmask8 14198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14199 _mm512_mask_cmpunord_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y) 14200 { 14201 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 14202 (__v8df) __Y, _CMP_UNORD_Q, 14203 (__mmask8) __U, 14204 _MM_FROUND_CUR_DIRECTION); 14205 } 14206 14207 extern __inline __mmask8 14208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14209 _mm512_cmpneq_pd_mask (__m512d __X, __m512d __Y) 14210 { 14211 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 14212 (__v8df) __Y, _CMP_NEQ_UQ, 14213 (__mmask8) -1, 14214 _MM_FROUND_CUR_DIRECTION); 14215 } 14216 14217 extern __inline __mmask8 14218 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14219 _mm512_mask_cmpneq_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y) 14220 { 14221 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 14222 (__v8df) __Y, _CMP_NEQ_UQ, 14223 (__mmask8) __U, 14224 _MM_FROUND_CUR_DIRECTION); 14225 } 14226 14227 extern __inline __mmask8 14228 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14229 _mm512_cmpnlt_pd_mask (__m512d __X, __m512d __Y) 14230 { 14231 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 14232 (__v8df) __Y, _CMP_NLT_US, 14233 (__mmask8) -1, 14234 _MM_FROUND_CUR_DIRECTION); 14235 } 14236 14237 extern __inline __mmask8 14238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14239 _mm512_mask_cmpnlt_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y) 14240 { 14241 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 14242 (__v8df) __Y, _CMP_NLT_US, 14243 (__mmask8) __U, 14244 _MM_FROUND_CUR_DIRECTION); 14245 } 14246 14247 extern __inline __mmask8 14248 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14249 _mm512_cmpnle_pd_mask (__m512d __X, __m512d __Y) 14250 { 14251 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 14252 (__v8df) __Y, _CMP_NLE_US, 14253 (__mmask8) -1, 14254 _MM_FROUND_CUR_DIRECTION); 14255 } 14256 14257 extern __inline __mmask8 14258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14259 _mm512_mask_cmpnle_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y) 14260 { 14261 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 14262 (__v8df) __Y, _CMP_NLE_US, 14263 (__mmask8) __U, 14264 _MM_FROUND_CUR_DIRECTION); 14265 } 14266 14267 extern __inline __mmask8 14268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14269 _mm512_cmpord_pd_mask (__m512d __X, __m512d __Y) 14270 { 14271 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 14272 (__v8df) __Y, _CMP_ORD_Q, 14273 (__mmask8) -1, 14274 _MM_FROUND_CUR_DIRECTION); 14275 } 14276 14277 extern __inline __mmask8 14278 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14279 _mm512_mask_cmpord_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y) 14280 { 14281 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 14282 (__v8df) __Y, _CMP_ORD_Q, 14283 (__mmask8) __U, 14284 _MM_FROUND_CUR_DIRECTION); 14285 } 14286 14287 extern __inline __mmask16 14288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14289 _mm512_cmpeq_ps_mask (__m512 __X, __m512 __Y) 14290 { 14291 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 14292 (__v16sf) __Y, _CMP_EQ_OQ, 14293 (__mmask16) -1, 14294 _MM_FROUND_CUR_DIRECTION); 14295 } 14296 14297 extern __inline __mmask16 14298 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14299 _mm512_mask_cmpeq_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y) 14300 { 14301 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 14302 (__v16sf) __Y, _CMP_EQ_OQ, 14303 (__mmask16) __U, 14304 _MM_FROUND_CUR_DIRECTION); 14305 } 14306 14307 extern __inline __mmask16 14308 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14309 _mm512_cmplt_ps_mask (__m512 __X, __m512 __Y) 14310 { 14311 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 14312 (__v16sf) __Y, _CMP_LT_OS, 14313 (__mmask16) -1, 14314 _MM_FROUND_CUR_DIRECTION); 14315 } 14316 14317 extern __inline __mmask16 14318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14319 _mm512_mask_cmplt_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y) 14320 { 14321 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 14322 (__v16sf) __Y, _CMP_LT_OS, 14323 (__mmask16) __U, 14324 _MM_FROUND_CUR_DIRECTION); 14325 } 14326 14327 extern __inline __mmask16 14328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14329 _mm512_cmple_ps_mask (__m512 __X, __m512 __Y) 14330 { 14331 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 14332 (__v16sf) __Y, _CMP_LE_OS, 14333 (__mmask16) -1, 14334 _MM_FROUND_CUR_DIRECTION); 14335 } 14336 14337 extern __inline __mmask16 14338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14339 _mm512_mask_cmple_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y) 14340 { 14341 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 14342 (__v16sf) __Y, _CMP_LE_OS, 14343 (__mmask16) __U, 14344 _MM_FROUND_CUR_DIRECTION); 14345 } 14346 14347 extern __inline __mmask16 14348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14349 _mm512_cmpunord_ps_mask (__m512 __X, __m512 __Y) 14350 { 14351 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 14352 (__v16sf) __Y, _CMP_UNORD_Q, 14353 (__mmask16) -1, 14354 _MM_FROUND_CUR_DIRECTION); 14355 } 14356 14357 extern __inline __mmask16 14358 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14359 _mm512_mask_cmpunord_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y) 14360 { 14361 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 14362 (__v16sf) __Y, _CMP_UNORD_Q, 14363 (__mmask16) __U, 14364 _MM_FROUND_CUR_DIRECTION); 14365 } 14366 14367 extern __inline __mmask16 14368 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14369 _mm512_cmpneq_ps_mask (__m512 __X, __m512 __Y) 14370 { 14371 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 14372 (__v16sf) __Y, _CMP_NEQ_UQ, 14373 (__mmask16) -1, 14374 _MM_FROUND_CUR_DIRECTION); 14375 } 14376 14377 extern __inline __mmask16 14378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14379 _mm512_mask_cmpneq_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y) 14380 { 14381 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 14382 (__v16sf) __Y, _CMP_NEQ_UQ, 14383 (__mmask16) __U, 14384 _MM_FROUND_CUR_DIRECTION); 14385 } 14386 14387 extern __inline __mmask16 14388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14389 _mm512_cmpnlt_ps_mask (__m512 __X, __m512 __Y) 14390 { 14391 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 14392 (__v16sf) __Y, _CMP_NLT_US, 14393 (__mmask16) -1, 14394 _MM_FROUND_CUR_DIRECTION); 14395 } 14396 14397 extern __inline __mmask16 14398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14399 _mm512_mask_cmpnlt_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y) 14400 { 14401 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 14402 (__v16sf) __Y, _CMP_NLT_US, 14403 (__mmask16) __U, 14404 _MM_FROUND_CUR_DIRECTION); 14405 } 14406 14407 extern __inline __mmask16 14408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14409 _mm512_cmpnle_ps_mask (__m512 __X, __m512 __Y) 14410 { 14411 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 14412 (__v16sf) __Y, _CMP_NLE_US, 14413 (__mmask16) -1, 14414 _MM_FROUND_CUR_DIRECTION); 14415 } 14416 14417 extern __inline __mmask16 14418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14419 _mm512_mask_cmpnle_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y) 14420 { 14421 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 14422 (__v16sf) __Y, _CMP_NLE_US, 14423 (__mmask16) __U, 14424 _MM_FROUND_CUR_DIRECTION); 14425 } 14426 14427 extern __inline __mmask16 14428 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14429 _mm512_cmpord_ps_mask (__m512 __X, __m512 __Y) 14430 { 14431 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 14432 (__v16sf) __Y, _CMP_ORD_Q, 14433 (__mmask16) -1, 14434 _MM_FROUND_CUR_DIRECTION); 14435 } 14436 14437 extern __inline __mmask16 14438 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14439 _mm512_mask_cmpord_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y) 14440 { 14441 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 14442 (__v16sf) __Y, _CMP_ORD_Q, 14443 (__mmask16) __U, 14444 _MM_FROUND_CUR_DIRECTION); 14445 } 14446 14447 extern __inline __mmask8 14448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14449 _mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P) 14450 { 14451 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X, 14452 (__v2df) __Y, __P, 14453 (__mmask8) -1, 14454 _MM_FROUND_CUR_DIRECTION); 14455 } 14456 14457 extern __inline __mmask8 14458 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14459 _mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P) 14460 { 14461 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X, 14462 (__v2df) __Y, __P, 14463 (__mmask8) __M, 14464 _MM_FROUND_CUR_DIRECTION); 14465 } 14466 14467 extern __inline __mmask8 14468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14469 _mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P) 14470 { 14471 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X, 14472 (__v4sf) __Y, __P, 14473 (__mmask8) -1, 14474 _MM_FROUND_CUR_DIRECTION); 14475 } 14476 14477 extern __inline __mmask8 14478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14479 _mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P) 14480 { 14481 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X, 14482 (__v4sf) __Y, __P, 14483 (__mmask8) __M, 14484 _MM_FROUND_CUR_DIRECTION); 14485 } 14486 14487 #else 14488 #define _mm512_cmp_pd_mask(X, Y, P) \ 14489 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \ 14490 (__v8df)(__m512d)(Y), (int)(P),\ 14491 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION)) 14492 14493 #define _mm512_cmp_ps_mask(X, Y, P) \ 14494 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \ 14495 (__v16sf)(__m512)(Y), (int)(P),\ 14496 (__mmask16)-1,_MM_FROUND_CUR_DIRECTION)) 14497 14498 #define _mm512_mask_cmp_pd_mask(M, X, Y, P) \ 14499 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \ 14500 (__v8df)(__m512d)(Y), (int)(P),\ 14501 (__mmask8)M, _MM_FROUND_CUR_DIRECTION)) 14502 14503 #define _mm512_mask_cmp_ps_mask(M, X, Y, P) \ 14504 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \ 14505 (__v16sf)(__m512)(Y), (int)(P),\ 14506 (__mmask16)M,_MM_FROUND_CUR_DIRECTION)) 14507 14508 #define _mm_cmp_sd_mask(X, Y, P) \ 14509 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \ 14510 (__v2df)(__m128d)(Y), (int)(P),\ 14511 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION)) 14512 14513 #define _mm_mask_cmp_sd_mask(M, X, Y, P) \ 14514 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \ 14515 (__v2df)(__m128d)(Y), (int)(P),\ 14516 M,_MM_FROUND_CUR_DIRECTION)) 14517 14518 #define _mm_cmp_ss_mask(X, Y, P) \ 14519 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \ 14520 (__v4sf)(__m128)(Y), (int)(P), \ 14521 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION)) 14522 14523 #define _mm_mask_cmp_ss_mask(M, X, Y, P) \ 14524 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \ 14525 (__v4sf)(__m128)(Y), (int)(P), \ 14526 M,_MM_FROUND_CUR_DIRECTION)) 14527 #endif 14528 14529 extern __inline __mmask16 14530 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14531 _mm512_kmov (__mmask16 __A) 14532 { 14533 return __builtin_ia32_kmovw (__A); 14534 } 14535 14536 extern __inline __m512 14537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14538 _mm512_castpd_ps (__m512d __A) 14539 { 14540 return (__m512) (__A); 14541 } 14542 14543 extern __inline __m512i 14544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14545 _mm512_castpd_si512 (__m512d __A) 14546 { 14547 return (__m512i) (__A); 14548 } 14549 14550 extern __inline __m512d 14551 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14552 _mm512_castps_pd (__m512 __A) 14553 { 14554 return (__m512d) (__A); 14555 } 14556 14557 extern __inline __m512i 14558 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14559 _mm512_castps_si512 (__m512 __A) 14560 { 14561 return (__m512i) (__A); 14562 } 14563 14564 extern __inline __m512 14565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14566 _mm512_castsi512_ps (__m512i __A) 14567 { 14568 return (__m512) (__A); 14569 } 14570 14571 extern __inline __m512d 14572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14573 _mm512_castsi512_pd (__m512i __A) 14574 { 14575 return (__m512d) (__A); 14576 } 14577 14578 extern __inline __m128d 14579 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14580 _mm512_castpd512_pd128 (__m512d __A) 14581 { 14582 return (__m128d)_mm512_extractf32x4_ps((__m512)__A, 0); 14583 } 14584 14585 extern __inline __m128 14586 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14587 _mm512_castps512_ps128 (__m512 __A) 14588 { 14589 return _mm512_extractf32x4_ps(__A, 0); 14590 } 14591 14592 extern __inline __m128i 14593 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14594 _mm512_castsi512_si128 (__m512i __A) 14595 { 14596 return (__m128i)_mm512_extracti32x4_epi32((__m512i)__A, 0); 14597 } 14598 14599 extern __inline __m256d 14600 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14601 _mm512_castpd512_pd256 (__m512d __A) 14602 { 14603 return _mm512_extractf64x4_pd(__A, 0); 14604 } 14605 14606 extern __inline __m256 14607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14608 _mm512_castps512_ps256 (__m512 __A) 14609 { 14610 return (__m256)_mm512_extractf64x4_pd((__m512d)__A, 0); 14611 } 14612 14613 extern __inline __m256i 14614 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14615 _mm512_castsi512_si256 (__m512i __A) 14616 { 14617 return (__m256i)_mm512_extractf64x4_pd((__m512d)__A, 0); 14618 } 14619 14620 extern __inline __m512d 14621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14622 _mm512_castpd128_pd512 (__m128d __A) 14623 { 14624 return (__m512d) __builtin_ia32_pd512_pd((__m128d)__A); 14625 } 14626 14627 extern __inline __m512 14628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14629 _mm512_castps128_ps512 (__m128 __A) 14630 { 14631 return (__m512) __builtin_ia32_ps512_ps((__m128)__A); 14632 } 14633 14634 extern __inline __m512i 14635 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14636 _mm512_castsi128_si512 (__m128i __A) 14637 { 14638 return (__m512i) __builtin_ia32_si512_si((__v4si)__A); 14639 } 14640 14641 extern __inline __m512d 14642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14643 _mm512_castpd256_pd512 (__m256d __A) 14644 { 14645 return __builtin_ia32_pd512_256pd (__A); 14646 } 14647 14648 extern __inline __m512 14649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14650 _mm512_castps256_ps512 (__m256 __A) 14651 { 14652 return __builtin_ia32_ps512_256ps (__A); 14653 } 14654 14655 extern __inline __m512i 14656 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14657 _mm512_castsi256_si512 (__m256i __A) 14658 { 14659 return (__m512i)__builtin_ia32_si512_256si ((__v8si)__A); 14660 } 14661 14662 extern __inline __mmask16 14663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14664 _mm512_cmpeq_epu32_mask (__m512i __A, __m512i __B) 14665 { 14666 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A, 14667 (__v16si) __B, 0, 14668 (__mmask16) -1); 14669 } 14670 14671 extern __inline __mmask16 14672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14673 _mm512_mask_cmpeq_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B) 14674 { 14675 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A, 14676 (__v16si) __B, 0, __U); 14677 } 14678 14679 extern __inline __mmask8 14680 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14681 _mm512_mask_cmpeq_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B) 14682 { 14683 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A, 14684 (__v8di) __B, 0, __U); 14685 } 14686 14687 extern __inline __mmask8 14688 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14689 _mm512_cmpeq_epu64_mask (__m512i __A, __m512i __B) 14690 { 14691 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A, 14692 (__v8di) __B, 0, 14693 (__mmask8) -1); 14694 } 14695 14696 extern __inline __mmask16 14697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14698 _mm512_cmpgt_epu32_mask (__m512i __A, __m512i __B) 14699 { 14700 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A, 14701 (__v16si) __B, 6, 14702 (__mmask16) -1); 14703 } 14704 14705 extern __inline __mmask16 14706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14707 _mm512_mask_cmpgt_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B) 14708 { 14709 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A, 14710 (__v16si) __B, 6, __U); 14711 } 14712 14713 extern __inline __mmask8 14714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14715 _mm512_mask_cmpgt_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B) 14716 { 14717 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A, 14718 (__v8di) __B, 6, __U); 14719 } 14720 14721 extern __inline __mmask8 14722 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14723 _mm512_cmpgt_epu64_mask (__m512i __A, __m512i __B) 14724 { 14725 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A, 14726 (__v8di) __B, 6, 14727 (__mmask8) -1); 14728 } 14729 14730 #undef __MM512_REDUCE_OP 14731 #define __MM512_REDUCE_OP(op) \ 14732 __v8si __T1 = (__v8si) _mm512_extracti64x4_epi64 (__A, 1); \ 14733 __v8si __T2 = (__v8si) _mm512_extracti64x4_epi64 (__A, 0); \ 14734 __m256i __T3 = (__m256i) (__T1 op __T2); \ 14735 __v4si __T4 = (__v4si) _mm256_extracti128_si256 (__T3, 1); \ 14736 __v4si __T5 = (__v4si) _mm256_extracti128_si256 (__T3, 0); \ 14737 __v4si __T6 = __T4 op __T5; \ 14738 __v4si __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \ 14739 __v4si __T8 = __T6 op __T7; \ 14740 return __T8[0] op __T8[1] 14741 14742 extern __inline int 14743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14744 _mm512_reduce_add_epi32 (__m512i __A) 14745 { 14746 __MM512_REDUCE_OP (+); 14747 } 14748 14749 extern __inline int 14750 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14751 _mm512_reduce_mul_epi32 (__m512i __A) 14752 { 14753 __MM512_REDUCE_OP (*); 14754 } 14755 14756 extern __inline int 14757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14758 _mm512_reduce_and_epi32 (__m512i __A) 14759 { 14760 __MM512_REDUCE_OP (&); 14761 } 14762 14763 extern __inline int 14764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14765 _mm512_reduce_or_epi32 (__m512i __A) 14766 { 14767 __MM512_REDUCE_OP (|); 14768 } 14769 14770 extern __inline int 14771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14772 _mm512_mask_reduce_add_epi32 (__mmask16 __U, __m512i __A) 14773 { 14774 __A = _mm512_maskz_mov_epi32 (__U, __A); 14775 __MM512_REDUCE_OP (+); 14776 } 14777 14778 extern __inline int 14779 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14780 _mm512_mask_reduce_mul_epi32 (__mmask16 __U, __m512i __A) 14781 { 14782 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (1), __U, __A); 14783 __MM512_REDUCE_OP (*); 14784 } 14785 14786 extern __inline int 14787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14788 _mm512_mask_reduce_and_epi32 (__mmask16 __U, __m512i __A) 14789 { 14790 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (~0), __U, __A); 14791 __MM512_REDUCE_OP (&); 14792 } 14793 14794 extern __inline int 14795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14796 _mm512_mask_reduce_or_epi32 (__mmask16 __U, __m512i __A) 14797 { 14798 __A = _mm512_maskz_mov_epi32 (__U, __A); 14799 __MM512_REDUCE_OP (|); 14800 } 14801 14802 #undef __MM512_REDUCE_OP 14803 #define __MM512_REDUCE_OP(op) \ 14804 __m256i __T1 = (__m256i) _mm512_extracti64x4_epi64 (__A, 1); \ 14805 __m256i __T2 = (__m256i) _mm512_extracti64x4_epi64 (__A, 0); \ 14806 __m256i __T3 = _mm256_##op (__T1, __T2); \ 14807 __m128i __T4 = (__m128i) _mm256_extracti128_si256 (__T3, 1); \ 14808 __m128i __T5 = (__m128i) _mm256_extracti128_si256 (__T3, 0); \ 14809 __m128i __T6 = _mm_##op (__T4, __T5); \ 14810 __m128i __T7 = (__m128i) __builtin_shuffle ((__v4si) __T6, \ 14811 (__v4si) { 2, 3, 0, 1 }); \ 14812 __m128i __T8 = _mm_##op (__T6, __T7); \ 14813 __m128i __T9 = (__m128i) __builtin_shuffle ((__v4si) __T8, \ 14814 (__v4si) { 1, 0, 1, 0 }); \ 14815 __v4si __T10 = (__v4si) _mm_##op (__T8, __T9); \ 14816 return __T10[0] 14817 14818 extern __inline int 14819 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14820 _mm512_reduce_min_epi32 (__m512i __A) 14821 { 14822 __MM512_REDUCE_OP (min_epi32); 14823 } 14824 14825 extern __inline int 14826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14827 _mm512_reduce_max_epi32 (__m512i __A) 14828 { 14829 __MM512_REDUCE_OP (max_epi32); 14830 } 14831 14832 extern __inline unsigned int 14833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14834 _mm512_reduce_min_epu32 (__m512i __A) 14835 { 14836 __MM512_REDUCE_OP (min_epu32); 14837 } 14838 14839 extern __inline unsigned int 14840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14841 _mm512_reduce_max_epu32 (__m512i __A) 14842 { 14843 __MM512_REDUCE_OP (max_epu32); 14844 } 14845 14846 extern __inline int 14847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14848 _mm512_mask_reduce_min_epi32 (__mmask16 __U, __m512i __A) 14849 { 14850 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (__INT_MAX__), __U, __A); 14851 __MM512_REDUCE_OP (min_epi32); 14852 } 14853 14854 extern __inline int 14855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14856 _mm512_mask_reduce_max_epi32 (__mmask16 __U, __m512i __A) 14857 { 14858 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (-__INT_MAX__ - 1), __U, __A); 14859 __MM512_REDUCE_OP (max_epi32); 14860 } 14861 14862 extern __inline unsigned int 14863 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14864 _mm512_mask_reduce_min_epu32 (__mmask16 __U, __m512i __A) 14865 { 14866 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (~0), __U, __A); 14867 __MM512_REDUCE_OP (min_epu32); 14868 } 14869 14870 extern __inline unsigned int 14871 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14872 _mm512_mask_reduce_max_epu32 (__mmask16 __U, __m512i __A) 14873 { 14874 __A = _mm512_maskz_mov_epi32 (__U, __A); 14875 __MM512_REDUCE_OP (max_epu32); 14876 } 14877 14878 #undef __MM512_REDUCE_OP 14879 #define __MM512_REDUCE_OP(op) \ 14880 __m256 __T1 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 1); \ 14881 __m256 __T2 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 0); \ 14882 __m256 __T3 = __T1 op __T2; \ 14883 __m128 __T4 = _mm256_extractf128_ps (__T3, 1); \ 14884 __m128 __T5 = _mm256_extractf128_ps (__T3, 0); \ 14885 __m128 __T6 = __T4 op __T5; \ 14886 __m128 __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \ 14887 __m128 __T8 = __T6 op __T7; \ 14888 return __T8[0] op __T8[1] 14889 14890 extern __inline float 14891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14892 _mm512_reduce_add_ps (__m512 __A) 14893 { 14894 __MM512_REDUCE_OP (+); 14895 } 14896 14897 extern __inline float 14898 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14899 _mm512_reduce_mul_ps (__m512 __A) 14900 { 14901 __MM512_REDUCE_OP (*); 14902 } 14903 14904 extern __inline float 14905 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14906 _mm512_mask_reduce_add_ps (__mmask16 __U, __m512 __A) 14907 { 14908 __A = _mm512_maskz_mov_ps (__U, __A); 14909 __MM512_REDUCE_OP (+); 14910 } 14911 14912 extern __inline float 14913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14914 _mm512_mask_reduce_mul_ps (__mmask16 __U, __m512 __A) 14915 { 14916 __A = _mm512_mask_mov_ps (_mm512_set1_ps (1.0f), __U, __A); 14917 __MM512_REDUCE_OP (*); 14918 } 14919 14920 #undef __MM512_REDUCE_OP 14921 #define __MM512_REDUCE_OP(op) \ 14922 __m256 __T1 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 1); \ 14923 __m256 __T2 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 0); \ 14924 __m256 __T3 = _mm256_##op (__T1, __T2); \ 14925 __m128 __T4 = _mm256_extractf128_ps (__T3, 1); \ 14926 __m128 __T5 = _mm256_extractf128_ps (__T3, 0); \ 14927 __m128 __T6 = _mm_##op (__T4, __T5); \ 14928 __m128 __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \ 14929 __m128 __T8 = _mm_##op (__T6, __T7); \ 14930 __m128 __T9 = __builtin_shuffle (__T8, (__v4si) { 1, 0, 1, 0 }); \ 14931 __m128 __T10 = _mm_##op (__T8, __T9); \ 14932 return __T10[0] 14933 14934 extern __inline float 14935 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14936 _mm512_reduce_min_ps (__m512 __A) 14937 { 14938 __MM512_REDUCE_OP (min_ps); 14939 } 14940 14941 extern __inline float 14942 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14943 _mm512_reduce_max_ps (__m512 __A) 14944 { 14945 __MM512_REDUCE_OP (max_ps); 14946 } 14947 14948 extern __inline float 14949 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14950 _mm512_mask_reduce_min_ps (__mmask16 __U, __m512 __A) 14951 { 14952 __A = _mm512_mask_mov_ps (_mm512_set1_ps (__builtin_inff ()), __U, __A); 14953 __MM512_REDUCE_OP (min_ps); 14954 } 14955 14956 extern __inline float 14957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14958 _mm512_mask_reduce_max_ps (__mmask16 __U, __m512 __A) 14959 { 14960 __A = _mm512_mask_mov_ps (_mm512_set1_ps (-__builtin_inff ()), __U, __A); 14961 __MM512_REDUCE_OP (max_ps); 14962 } 14963 14964 #undef __MM512_REDUCE_OP 14965 #define __MM512_REDUCE_OP(op) \ 14966 __v4di __T1 = (__v4di) _mm512_extracti64x4_epi64 (__A, 1); \ 14967 __v4di __T2 = (__v4di) _mm512_extracti64x4_epi64 (__A, 0); \ 14968 __m256i __T3 = (__m256i) (__T1 op __T2); \ 14969 __v2di __T4 = (__v2di) _mm256_extracti128_si256 (__T3, 1); \ 14970 __v2di __T5 = (__v2di) _mm256_extracti128_si256 (__T3, 0); \ 14971 __v2di __T6 = __T4 op __T5; \ 14972 return __T6[0] op __T6[1] 14973 14974 extern __inline long long 14975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14976 _mm512_reduce_add_epi64 (__m512i __A) 14977 { 14978 __MM512_REDUCE_OP (+); 14979 } 14980 14981 extern __inline long long 14982 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14983 _mm512_reduce_mul_epi64 (__m512i __A) 14984 { 14985 __MM512_REDUCE_OP (*); 14986 } 14987 14988 extern __inline long long 14989 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14990 _mm512_reduce_and_epi64 (__m512i __A) 14991 { 14992 __MM512_REDUCE_OP (&); 14993 } 14994 14995 extern __inline long long 14996 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 14997 _mm512_reduce_or_epi64 (__m512i __A) 14998 { 14999 __MM512_REDUCE_OP (|); 15000 } 15001 15002 extern __inline long long 15003 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15004 _mm512_mask_reduce_add_epi64 (__mmask8 __U, __m512i __A) 15005 { 15006 __A = _mm512_maskz_mov_epi64 (__U, __A); 15007 __MM512_REDUCE_OP (+); 15008 } 15009 15010 extern __inline long long 15011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15012 _mm512_mask_reduce_mul_epi64 (__mmask8 __U, __m512i __A) 15013 { 15014 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (1LL), __U, __A); 15015 __MM512_REDUCE_OP (*); 15016 } 15017 15018 extern __inline long long 15019 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15020 _mm512_mask_reduce_and_epi64 (__mmask8 __U, __m512i __A) 15021 { 15022 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (~0LL), __U, __A); 15023 __MM512_REDUCE_OP (&); 15024 } 15025 15026 extern __inline long long 15027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15028 _mm512_mask_reduce_or_epi64 (__mmask8 __U, __m512i __A) 15029 { 15030 __A = _mm512_maskz_mov_epi64 (__U, __A); 15031 __MM512_REDUCE_OP (|); 15032 } 15033 15034 #undef __MM512_REDUCE_OP 15035 #define __MM512_REDUCE_OP(op) \ 15036 __m512i __T1 = _mm512_shuffle_i64x2 (__A, __A, 0x4e); \ 15037 __m512i __T2 = _mm512_##op (__A, __T1); \ 15038 __m512i __T3 \ 15039 = (__m512i) __builtin_shuffle ((__v8di) __T2, \ 15040 (__v8di) { 2, 3, 0, 1, 6, 7, 4, 5 });\ 15041 __m512i __T4 = _mm512_##op (__T2, __T3); \ 15042 __m512i __T5 \ 15043 = (__m512i) __builtin_shuffle ((__v8di) __T4, \ 15044 (__v8di) { 1, 0, 3, 2, 5, 4, 7, 6 });\ 15045 __v8di __T6 = (__v8di) _mm512_##op (__T4, __T5); \ 15046 return __T6[0] 15047 15048 extern __inline long long 15049 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15050 _mm512_reduce_min_epi64 (__m512i __A) 15051 { 15052 __MM512_REDUCE_OP (min_epi64); 15053 } 15054 15055 extern __inline long long 15056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15057 _mm512_reduce_max_epi64 (__m512i __A) 15058 { 15059 __MM512_REDUCE_OP (max_epi64); 15060 } 15061 15062 extern __inline long long 15063 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15064 _mm512_mask_reduce_min_epi64 (__mmask8 __U, __m512i __A) 15065 { 15066 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (__LONG_LONG_MAX__), 15067 __U, __A); 15068 __MM512_REDUCE_OP (min_epi64); 15069 } 15070 15071 extern __inline long long 15072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15073 _mm512_mask_reduce_max_epi64 (__mmask8 __U, __m512i __A) 15074 { 15075 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (-__LONG_LONG_MAX__ - 1), 15076 __U, __A); 15077 __MM512_REDUCE_OP (max_epi64); 15078 } 15079 15080 extern __inline unsigned long long 15081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15082 _mm512_reduce_min_epu64 (__m512i __A) 15083 { 15084 __MM512_REDUCE_OP (min_epu64); 15085 } 15086 15087 extern __inline unsigned long long 15088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15089 _mm512_reduce_max_epu64 (__m512i __A) 15090 { 15091 __MM512_REDUCE_OP (max_epu64); 15092 } 15093 15094 extern __inline unsigned long long 15095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15096 _mm512_mask_reduce_min_epu64 (__mmask8 __U, __m512i __A) 15097 { 15098 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (~0LL), __U, __A); 15099 __MM512_REDUCE_OP (min_epu64); 15100 } 15101 15102 extern __inline unsigned long long 15103 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15104 _mm512_mask_reduce_max_epu64 (__mmask8 __U, __m512i __A) 15105 { 15106 __A = _mm512_maskz_mov_epi64 (__U, __A); 15107 __MM512_REDUCE_OP (max_epu64); 15108 } 15109 15110 #undef __MM512_REDUCE_OP 15111 #define __MM512_REDUCE_OP(op) \ 15112 __m256d __T1 = (__m256d) _mm512_extractf64x4_pd (__A, 1); \ 15113 __m256d __T2 = (__m256d) _mm512_extractf64x4_pd (__A, 0); \ 15114 __m256d __T3 = __T1 op __T2; \ 15115 __m128d __T4 = _mm256_extractf128_pd (__T3, 1); \ 15116 __m128d __T5 = _mm256_extractf128_pd (__T3, 0); \ 15117 __m128d __T6 = __T4 op __T5; \ 15118 return __T6[0] op __T6[1] 15119 15120 extern __inline double 15121 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15122 _mm512_reduce_add_pd (__m512d __A) 15123 { 15124 __MM512_REDUCE_OP (+); 15125 } 15126 15127 extern __inline double 15128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15129 _mm512_reduce_mul_pd (__m512d __A) 15130 { 15131 __MM512_REDUCE_OP (*); 15132 } 15133 15134 extern __inline double 15135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15136 _mm512_mask_reduce_add_pd (__mmask8 __U, __m512d __A) 15137 { 15138 __A = _mm512_maskz_mov_pd (__U, __A); 15139 __MM512_REDUCE_OP (+); 15140 } 15141 15142 extern __inline double 15143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15144 _mm512_mask_reduce_mul_pd (__mmask8 __U, __m512d __A) 15145 { 15146 __A = _mm512_mask_mov_pd (_mm512_set1_pd (1.0), __U, __A); 15147 __MM512_REDUCE_OP (*); 15148 } 15149 15150 #undef __MM512_REDUCE_OP 15151 #define __MM512_REDUCE_OP(op) \ 15152 __m256d __T1 = (__m256d) _mm512_extractf64x4_pd (__A, 1); \ 15153 __m256d __T2 = (__m256d) _mm512_extractf64x4_pd (__A, 0); \ 15154 __m256d __T3 = _mm256_##op (__T1, __T2); \ 15155 __m128d __T4 = _mm256_extractf128_pd (__T3, 1); \ 15156 __m128d __T5 = _mm256_extractf128_pd (__T3, 0); \ 15157 __m128d __T6 = _mm_##op (__T4, __T5); \ 15158 __m128d __T7 = (__m128d) __builtin_shuffle (__T6, (__v2di) { 1, 0 }); \ 15159 __m128d __T8 = _mm_##op (__T6, __T7); \ 15160 return __T8[0] 15161 15162 extern __inline double 15163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15164 _mm512_reduce_min_pd (__m512d __A) 15165 { 15166 __MM512_REDUCE_OP (min_pd); 15167 } 15168 15169 extern __inline double 15170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15171 _mm512_reduce_max_pd (__m512d __A) 15172 { 15173 __MM512_REDUCE_OP (max_pd); 15174 } 15175 15176 extern __inline double 15177 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15178 _mm512_mask_reduce_min_pd (__mmask8 __U, __m512d __A) 15179 { 15180 __A = _mm512_mask_mov_pd (_mm512_set1_pd (__builtin_inf ()), __U, __A); 15181 __MM512_REDUCE_OP (min_pd); 15182 } 15183 15184 extern __inline double 15185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 15186 _mm512_mask_reduce_max_pd (__mmask8 __U, __m512d __A) 15187 { 15188 __A = _mm512_mask_mov_pd (_mm512_set1_pd (-__builtin_inf ()), __U, __A); 15189 __MM512_REDUCE_OP (max_pd); 15190 } 15191 15192 #undef __MM512_REDUCE_OP 15193 15194 #ifdef __DISABLE_AVX512F__ 15195 #undef __DISABLE_AVX512F__ 15196 #pragma GCC pop_options 15197 #endif /* __DISABLE_AVX512F__ */ 15198 15199 #endif /* _AVX512FINTRIN_H_INCLUDED */ 15200