1 /* Copyright (C) 2014-2018 Free Software Foundation, Inc. 2 3 This file is part of GCC. 4 5 GCC is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 3, or (at your option) 8 any later version. 9 10 GCC is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 Under Section 7 of GPL version 3, you are granted additional 16 permissions described in the GCC Runtime Library Exception, version 17 3.1, as published by the Free Software Foundation. 18 19 You should have received a copy of the GNU General Public License and 20 a copy of the GCC Runtime Library Exception along with this program; 21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 <http://www.gnu.org/licenses/>. */ 23 24 #ifndef _IMMINTRIN_H_INCLUDED 25 #error "Never use <avx512dqintrin.h> directly; include <immintrin.h> instead." 26 #endif 27 28 #ifndef _AVX512DQINTRIN_H_INCLUDED 29 #define _AVX512DQINTRIN_H_INCLUDED 30 31 #ifndef __AVX512DQ__ 32 #pragma GCC push_options 33 #pragma GCC target("avx512dq") 34 #define __DISABLE_AVX512DQ__ 35 #endif /* __AVX512DQ__ */ 36 37 extern __inline unsigned char 38 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 39 _ktest_mask8_u8 (__mmask8 __A, __mmask8 __B, unsigned char *__CF) 40 { 41 *__CF = (unsigned char) __builtin_ia32_ktestcqi (__A, __B); 42 return (unsigned char) __builtin_ia32_ktestzqi (__A, __B); 43 } 44 45 extern __inline unsigned char 46 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 47 _ktestz_mask8_u8 (__mmask8 __A, __mmask8 __B) 48 { 49 return (unsigned char) __builtin_ia32_ktestzqi (__A, __B); 50 } 51 52 extern __inline unsigned char 53 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 54 _ktestc_mask8_u8 (__mmask8 __A, __mmask8 __B) 55 { 56 return (unsigned char) __builtin_ia32_ktestcqi (__A, __B); 57 } 58 59 extern __inline unsigned char 60 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 61 _ktest_mask16_u8 (__mmask16 __A, __mmask16 __B, unsigned char *__CF) 62 { 63 *__CF = (unsigned char) __builtin_ia32_ktestchi (__A, __B); 64 return (unsigned char) __builtin_ia32_ktestzhi (__A, __B); 65 } 66 67 extern __inline unsigned char 68 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 69 _ktestz_mask16_u8 (__mmask16 __A, __mmask16 __B) 70 { 71 return (unsigned char) __builtin_ia32_ktestzhi (__A, __B); 72 } 73 74 extern __inline unsigned char 75 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 76 _ktestc_mask16_u8 (__mmask16 __A, __mmask16 __B) 77 { 78 return (unsigned char) __builtin_ia32_ktestchi (__A, __B); 79 } 80 81 extern __inline unsigned char 82 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 83 _kortest_mask8_u8 (__mmask8 __A, __mmask8 __B, unsigned char *__CF) 84 { 85 *__CF = (unsigned char) __builtin_ia32_kortestcqi (__A, __B); 86 return (unsigned char) __builtin_ia32_kortestzqi (__A, __B); 87 } 88 89 extern __inline unsigned char 90 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 91 _kortestz_mask8_u8 (__mmask8 __A, __mmask8 __B) 92 { 93 return (unsigned char) __builtin_ia32_kortestzqi (__A, __B); 94 } 95 96 extern __inline unsigned char 97 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 98 _kortestc_mask8_u8 (__mmask8 __A, __mmask8 __B) 99 { 100 return (unsigned char) __builtin_ia32_kortestcqi (__A, __B); 101 } 102 103 extern __inline __mmask8 104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 105 _kadd_mask8 (__mmask8 __A, __mmask8 __B) 106 { 107 return (__mmask8) __builtin_ia32_kaddqi ((__mmask8) __A, (__mmask8) __B); 108 } 109 110 extern __inline __mmask16 111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 112 _kadd_mask16 (__mmask16 __A, __mmask16 __B) 113 { 114 return (__mmask16) __builtin_ia32_kaddhi ((__mmask16) __A, (__mmask16) __B); 115 } 116 117 extern __inline unsigned int 118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 119 _cvtmask8_u32 (__mmask8 __A) 120 { 121 return (unsigned int) __builtin_ia32_kmovb ((__mmask8 ) __A); 122 } 123 124 extern __inline __mmask8 125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 126 _cvtu32_mask8 (unsigned int __A) 127 { 128 return (__mmask8) __builtin_ia32_kmovb ((__mmask8) __A); 129 } 130 131 extern __inline __mmask8 132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 133 _load_mask8 (__mmask8 *__A) 134 { 135 return (__mmask8) __builtin_ia32_kmovb (*(__mmask8 *) __A); 136 } 137 138 extern __inline void 139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 140 _store_mask8 (__mmask8 *__A, __mmask8 __B) 141 { 142 *(__mmask8 *) __A = __builtin_ia32_kmovb (__B); 143 } 144 145 extern __inline __mmask8 146 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 147 _knot_mask8 (__mmask8 __A) 148 { 149 return (__mmask8) __builtin_ia32_knotqi ((__mmask8) __A); 150 } 151 152 extern __inline __mmask8 153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 154 _kor_mask8 (__mmask8 __A, __mmask8 __B) 155 { 156 return (__mmask8) __builtin_ia32_korqi ((__mmask8) __A, (__mmask8) __B); 157 } 158 159 extern __inline __mmask8 160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 161 _kxnor_mask8 (__mmask8 __A, __mmask8 __B) 162 { 163 return (__mmask8) __builtin_ia32_kxnorqi ((__mmask8) __A, (__mmask8) __B); 164 } 165 166 extern __inline __mmask8 167 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 168 _kxor_mask8 (__mmask8 __A, __mmask8 __B) 169 { 170 return (__mmask8) __builtin_ia32_kxorqi ((__mmask8) __A, (__mmask8) __B); 171 } 172 173 extern __inline __mmask8 174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 175 _kand_mask8 (__mmask8 __A, __mmask8 __B) 176 { 177 return (__mmask8) __builtin_ia32_kandqi ((__mmask8) __A, (__mmask8) __B); 178 } 179 180 extern __inline __mmask8 181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 182 _kandn_mask8 (__mmask8 __A, __mmask8 __B) 183 { 184 return (__mmask8) __builtin_ia32_kandnqi ((__mmask8) __A, (__mmask8) __B); 185 } 186 187 extern __inline __m512d 188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 189 _mm512_broadcast_f64x2 (__m128d __A) 190 { 191 return (__m512d) 192 __builtin_ia32_broadcastf64x2_512_mask ((__v2df) __A, 193 _mm512_undefined_pd (), 194 (__mmask8) -1); 195 } 196 197 extern __inline __m512d 198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 199 _mm512_mask_broadcast_f64x2 (__m512d __O, __mmask8 __M, __m128d __A) 200 { 201 return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df) 202 __A, 203 (__v8df) 204 __O, __M); 205 } 206 207 extern __inline __m512d 208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 209 _mm512_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A) 210 { 211 return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df) 212 __A, 213 (__v8df) 214 _mm512_setzero_ps (), 215 __M); 216 } 217 218 extern __inline __m512i 219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 220 _mm512_broadcast_i64x2 (__m128i __A) 221 { 222 return (__m512i) 223 __builtin_ia32_broadcasti64x2_512_mask ((__v2di) __A, 224 _mm512_undefined_epi32 (), 225 (__mmask8) -1); 226 } 227 228 extern __inline __m512i 229 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 230 _mm512_mask_broadcast_i64x2 (__m512i __O, __mmask8 __M, __m128i __A) 231 { 232 return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di) 233 __A, 234 (__v8di) 235 __O, __M); 236 } 237 238 extern __inline __m512i 239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 240 _mm512_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) 241 { 242 return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di) 243 __A, 244 (__v8di) 245 _mm512_setzero_si512 (), 246 __M); 247 } 248 249 extern __inline __m512 250 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 251 _mm512_broadcast_f32x2 (__m128 __A) 252 { 253 return (__m512) 254 __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A, 255 (__v16sf)_mm512_undefined_ps (), 256 (__mmask16) -1); 257 } 258 259 extern __inline __m512 260 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 261 _mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A) 262 { 263 return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A, 264 (__v16sf) 265 __O, __M); 266 } 267 268 extern __inline __m512 269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 270 _mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A) 271 { 272 return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A, 273 (__v16sf) 274 _mm512_setzero_ps (), 275 __M); 276 } 277 278 extern __inline __m512i 279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 280 _mm512_broadcast_i32x2 (__m128i __A) 281 { 282 return (__m512i) 283 __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A, 284 (__v16si) 285 _mm512_undefined_epi32 (), 286 (__mmask16) -1); 287 } 288 289 extern __inline __m512i 290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 291 _mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A) 292 { 293 return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) 294 __A, 295 (__v16si) 296 __O, __M); 297 } 298 299 extern __inline __m512i 300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 301 _mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A) 302 { 303 return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) 304 __A, 305 (__v16si) 306 _mm512_setzero_si512 (), 307 __M); 308 } 309 310 extern __inline __m512 311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 312 _mm512_broadcast_f32x8 (__m256 __A) 313 { 314 return (__m512) 315 __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A, 316 _mm512_undefined_ps (), 317 (__mmask16) -1); 318 } 319 320 extern __inline __m512 321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 322 _mm512_mask_broadcast_f32x8 (__m512 __O, __mmask16 __M, __m256 __A) 323 { 324 return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A, 325 (__v16sf)__O, 326 __M); 327 } 328 329 extern __inline __m512 330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 331 _mm512_maskz_broadcast_f32x8 (__mmask16 __M, __m256 __A) 332 { 333 return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A, 334 (__v16sf) 335 _mm512_setzero_ps (), 336 __M); 337 } 338 339 extern __inline __m512i 340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 341 _mm512_broadcast_i32x8 (__m256i __A) 342 { 343 return (__m512i) 344 __builtin_ia32_broadcasti32x8_512_mask ((__v8si) __A, 345 (__v16si) 346 _mm512_undefined_epi32 (), 347 (__mmask16) -1); 348 } 349 350 extern __inline __m512i 351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 352 _mm512_mask_broadcast_i32x8 (__m512i __O, __mmask16 __M, __m256i __A) 353 { 354 return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si) 355 __A, 356 (__v16si)__O, 357 __M); 358 } 359 360 extern __inline __m512i 361 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 362 _mm512_maskz_broadcast_i32x8 (__mmask16 __M, __m256i __A) 363 { 364 return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si) 365 __A, 366 (__v16si) 367 _mm512_setzero_si512 (), 368 __M); 369 } 370 371 extern __inline __m512i 372 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 373 _mm512_mullo_epi64 (__m512i __A, __m512i __B) 374 { 375 return (__m512i) ((__v8du) __A * (__v8du) __B); 376 } 377 378 extern __inline __m512i 379 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 380 _mm512_mask_mullo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, 381 __m512i __B) 382 { 383 return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A, 384 (__v8di) __B, 385 (__v8di) __W, 386 (__mmask8) __U); 387 } 388 389 extern __inline __m512i 390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 391 _mm512_maskz_mullo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 392 { 393 return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A, 394 (__v8di) __B, 395 (__v8di) 396 _mm512_setzero_si512 (), 397 (__mmask8) __U); 398 } 399 400 extern __inline __m512d 401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 402 _mm512_xor_pd (__m512d __A, __m512d __B) 403 { 404 return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A, 405 (__v8df) __B, 406 (__v8df) 407 _mm512_setzero_pd (), 408 (__mmask8) -1); 409 } 410 411 extern __inline __m512d 412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 413 _mm512_mask_xor_pd (__m512d __W, __mmask8 __U, __m512d __A, 414 __m512d __B) 415 { 416 return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A, 417 (__v8df) __B, 418 (__v8df) __W, 419 (__mmask8) __U); 420 } 421 422 extern __inline __m512d 423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 424 _mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B) 425 { 426 return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A, 427 (__v8df) __B, 428 (__v8df) 429 _mm512_setzero_pd (), 430 (__mmask8) __U); 431 } 432 433 extern __inline __m512 434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 435 _mm512_xor_ps (__m512 __A, __m512 __B) 436 { 437 return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A, 438 (__v16sf) __B, 439 (__v16sf) 440 _mm512_setzero_ps (), 441 (__mmask16) -1); 442 } 443 444 extern __inline __m512 445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 446 _mm512_mask_xor_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 447 { 448 return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A, 449 (__v16sf) __B, 450 (__v16sf) __W, 451 (__mmask16) __U); 452 } 453 454 extern __inline __m512 455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 456 _mm512_maskz_xor_ps (__mmask16 __U, __m512 __A, __m512 __B) 457 { 458 return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A, 459 (__v16sf) __B, 460 (__v16sf) 461 _mm512_setzero_ps (), 462 (__mmask16) __U); 463 } 464 465 extern __inline __m512d 466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 467 _mm512_or_pd (__m512d __A, __m512d __B) 468 { 469 return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A, 470 (__v8df) __B, 471 (__v8df) 472 _mm512_setzero_pd (), 473 (__mmask8) -1); 474 } 475 476 extern __inline __m512d 477 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 478 _mm512_mask_or_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 479 { 480 return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A, 481 (__v8df) __B, 482 (__v8df) __W, 483 (__mmask8) __U); 484 } 485 486 extern __inline __m512d 487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 488 _mm512_maskz_or_pd (__mmask8 __U, __m512d __A, __m512d __B) 489 { 490 return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A, 491 (__v8df) __B, 492 (__v8df) 493 _mm512_setzero_pd (), 494 (__mmask8) __U); 495 } 496 497 extern __inline __m512 498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 499 _mm512_or_ps (__m512 __A, __m512 __B) 500 { 501 return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A, 502 (__v16sf) __B, 503 (__v16sf) 504 _mm512_setzero_ps (), 505 (__mmask16) -1); 506 } 507 508 extern __inline __m512 509 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 510 _mm512_mask_or_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 511 { 512 return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A, 513 (__v16sf) __B, 514 (__v16sf) __W, 515 (__mmask16) __U); 516 } 517 518 extern __inline __m512 519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 520 _mm512_maskz_or_ps (__mmask16 __U, __m512 __A, __m512 __B) 521 { 522 return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A, 523 (__v16sf) __B, 524 (__v16sf) 525 _mm512_setzero_ps (), 526 (__mmask16) __U); 527 } 528 529 extern __inline __m512d 530 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 531 _mm512_and_pd (__m512d __A, __m512d __B) 532 { 533 return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A, 534 (__v8df) __B, 535 (__v8df) 536 _mm512_setzero_pd (), 537 (__mmask8) -1); 538 } 539 540 extern __inline __m512d 541 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 542 _mm512_mask_and_pd (__m512d __W, __mmask8 __U, __m512d __A, 543 __m512d __B) 544 { 545 return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A, 546 (__v8df) __B, 547 (__v8df) __W, 548 (__mmask8) __U); 549 } 550 551 extern __inline __m512d 552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 553 _mm512_maskz_and_pd (__mmask8 __U, __m512d __A, __m512d __B) 554 { 555 return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A, 556 (__v8df) __B, 557 (__v8df) 558 _mm512_setzero_pd (), 559 (__mmask8) __U); 560 } 561 562 extern __inline __m512 563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 564 _mm512_and_ps (__m512 __A, __m512 __B) 565 { 566 return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A, 567 (__v16sf) __B, 568 (__v16sf) 569 _mm512_setzero_ps (), 570 (__mmask16) -1); 571 } 572 573 extern __inline __m512 574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 575 _mm512_mask_and_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 576 { 577 return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A, 578 (__v16sf) __B, 579 (__v16sf) __W, 580 (__mmask16) __U); 581 } 582 583 extern __inline __m512 584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 585 _mm512_maskz_and_ps (__mmask16 __U, __m512 __A, __m512 __B) 586 { 587 return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A, 588 (__v16sf) __B, 589 (__v16sf) 590 _mm512_setzero_ps (), 591 (__mmask16) __U); 592 } 593 594 extern __inline __m512d 595 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 596 _mm512_andnot_pd (__m512d __A, __m512d __B) 597 { 598 return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A, 599 (__v8df) __B, 600 (__v8df) 601 _mm512_setzero_pd (), 602 (__mmask8) -1); 603 } 604 605 extern __inline __m512d 606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 607 _mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A, 608 __m512d __B) 609 { 610 return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A, 611 (__v8df) __B, 612 (__v8df) __W, 613 (__mmask8) __U); 614 } 615 616 extern __inline __m512d 617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 618 _mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B) 619 { 620 return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A, 621 (__v8df) __B, 622 (__v8df) 623 _mm512_setzero_pd (), 624 (__mmask8) __U); 625 } 626 627 extern __inline __m512 628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 629 _mm512_andnot_ps (__m512 __A, __m512 __B) 630 { 631 return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A, 632 (__v16sf) __B, 633 (__v16sf) 634 _mm512_setzero_ps (), 635 (__mmask16) -1); 636 } 637 638 extern __inline __m512 639 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 640 _mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A, 641 __m512 __B) 642 { 643 return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A, 644 (__v16sf) __B, 645 (__v16sf) __W, 646 (__mmask16) __U); 647 } 648 649 extern __inline __m512 650 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 651 _mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B) 652 { 653 return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A, 654 (__v16sf) __B, 655 (__v16sf) 656 _mm512_setzero_ps (), 657 (__mmask16) __U); 658 } 659 660 extern __inline __mmask16 661 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 662 _mm512_movepi32_mask (__m512i __A) 663 { 664 return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A); 665 } 666 667 extern __inline __mmask8 668 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 669 _mm512_movepi64_mask (__m512i __A) 670 { 671 return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A); 672 } 673 674 extern __inline __m512i 675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 676 _mm512_movm_epi32 (__mmask16 __A) 677 { 678 return (__m512i) __builtin_ia32_cvtmask2d512 (__A); 679 } 680 681 extern __inline __m512i 682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 683 _mm512_movm_epi64 (__mmask8 __A) 684 { 685 return (__m512i) __builtin_ia32_cvtmask2q512 (__A); 686 } 687 688 extern __inline __m512i 689 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 690 _mm512_cvttpd_epi64 (__m512d __A) 691 { 692 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, 693 (__v8di) 694 _mm512_setzero_si512 (), 695 (__mmask8) -1, 696 _MM_FROUND_CUR_DIRECTION); 697 } 698 699 extern __inline __m512i 700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 701 _mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) 702 { 703 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, 704 (__v8di) __W, 705 (__mmask8) __U, 706 _MM_FROUND_CUR_DIRECTION); 707 } 708 709 extern __inline __m512i 710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 711 _mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A) 712 { 713 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, 714 (__v8di) 715 _mm512_setzero_si512 (), 716 (__mmask8) __U, 717 _MM_FROUND_CUR_DIRECTION); 718 } 719 720 extern __inline __m512i 721 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 722 _mm512_cvttpd_epu64 (__m512d __A) 723 { 724 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, 725 (__v8di) 726 _mm512_setzero_si512 (), 727 (__mmask8) -1, 728 _MM_FROUND_CUR_DIRECTION); 729 } 730 731 extern __inline __m512i 732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 733 _mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) 734 { 735 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, 736 (__v8di) __W, 737 (__mmask8) __U, 738 _MM_FROUND_CUR_DIRECTION); 739 } 740 741 extern __inline __m512i 742 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 743 _mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A) 744 { 745 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, 746 (__v8di) 747 _mm512_setzero_si512 (), 748 (__mmask8) __U, 749 _MM_FROUND_CUR_DIRECTION); 750 } 751 752 extern __inline __m512i 753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 754 _mm512_cvttps_epi64 (__m256 __A) 755 { 756 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, 757 (__v8di) 758 _mm512_setzero_si512 (), 759 (__mmask8) -1, 760 _MM_FROUND_CUR_DIRECTION); 761 } 762 763 extern __inline __m512i 764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 765 _mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) 766 { 767 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, 768 (__v8di) __W, 769 (__mmask8) __U, 770 _MM_FROUND_CUR_DIRECTION); 771 } 772 773 extern __inline __m512i 774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 775 _mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A) 776 { 777 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, 778 (__v8di) 779 _mm512_setzero_si512 (), 780 (__mmask8) __U, 781 _MM_FROUND_CUR_DIRECTION); 782 } 783 784 extern __inline __m512i 785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 786 _mm512_cvttps_epu64 (__m256 __A) 787 { 788 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, 789 (__v8di) 790 _mm512_setzero_si512 (), 791 (__mmask8) -1, 792 _MM_FROUND_CUR_DIRECTION); 793 } 794 795 extern __inline __m512i 796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 797 _mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) 798 { 799 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, 800 (__v8di) __W, 801 (__mmask8) __U, 802 _MM_FROUND_CUR_DIRECTION); 803 } 804 805 extern __inline __m512i 806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 807 _mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A) 808 { 809 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, 810 (__v8di) 811 _mm512_setzero_si512 (), 812 (__mmask8) __U, 813 _MM_FROUND_CUR_DIRECTION); 814 } 815 816 extern __inline __m512i 817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 818 _mm512_cvtpd_epi64 (__m512d __A) 819 { 820 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, 821 (__v8di) 822 _mm512_setzero_si512 (), 823 (__mmask8) -1, 824 _MM_FROUND_CUR_DIRECTION); 825 } 826 827 extern __inline __m512i 828 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 829 _mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) 830 { 831 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, 832 (__v8di) __W, 833 (__mmask8) __U, 834 _MM_FROUND_CUR_DIRECTION); 835 } 836 837 extern __inline __m512i 838 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 839 _mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A) 840 { 841 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, 842 (__v8di) 843 _mm512_setzero_si512 (), 844 (__mmask8) __U, 845 _MM_FROUND_CUR_DIRECTION); 846 } 847 848 extern __inline __m512i 849 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 850 _mm512_cvtpd_epu64 (__m512d __A) 851 { 852 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, 853 (__v8di) 854 _mm512_setzero_si512 (), 855 (__mmask8) -1, 856 _MM_FROUND_CUR_DIRECTION); 857 } 858 859 extern __inline __m512i 860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 861 _mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) 862 { 863 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, 864 (__v8di) __W, 865 (__mmask8) __U, 866 _MM_FROUND_CUR_DIRECTION); 867 } 868 869 extern __inline __m512i 870 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 871 _mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A) 872 { 873 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, 874 (__v8di) 875 _mm512_setzero_si512 (), 876 (__mmask8) __U, 877 _MM_FROUND_CUR_DIRECTION); 878 } 879 880 extern __inline __m512i 881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 882 _mm512_cvtps_epi64 (__m256 __A) 883 { 884 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, 885 (__v8di) 886 _mm512_setzero_si512 (), 887 (__mmask8) -1, 888 _MM_FROUND_CUR_DIRECTION); 889 } 890 891 extern __inline __m512i 892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 893 _mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) 894 { 895 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, 896 (__v8di) __W, 897 (__mmask8) __U, 898 _MM_FROUND_CUR_DIRECTION); 899 } 900 901 extern __inline __m512i 902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 903 _mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A) 904 { 905 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, 906 (__v8di) 907 _mm512_setzero_si512 (), 908 (__mmask8) __U, 909 _MM_FROUND_CUR_DIRECTION); 910 } 911 912 extern __inline __m512i 913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 914 _mm512_cvtps_epu64 (__m256 __A) 915 { 916 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, 917 (__v8di) 918 _mm512_setzero_si512 (), 919 (__mmask8) -1, 920 _MM_FROUND_CUR_DIRECTION); 921 } 922 923 extern __inline __m512i 924 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 925 _mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) 926 { 927 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, 928 (__v8di) __W, 929 (__mmask8) __U, 930 _MM_FROUND_CUR_DIRECTION); 931 } 932 933 extern __inline __m512i 934 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 935 _mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A) 936 { 937 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, 938 (__v8di) 939 _mm512_setzero_si512 (), 940 (__mmask8) __U, 941 _MM_FROUND_CUR_DIRECTION); 942 } 943 944 extern __inline __m256 945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 946 _mm512_cvtepi64_ps (__m512i __A) 947 { 948 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, 949 (__v8sf) 950 _mm256_setzero_ps (), 951 (__mmask8) -1, 952 _MM_FROUND_CUR_DIRECTION); 953 } 954 955 extern __inline __m256 956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 957 _mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A) 958 { 959 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, 960 (__v8sf) __W, 961 (__mmask8) __U, 962 _MM_FROUND_CUR_DIRECTION); 963 } 964 965 extern __inline __m256 966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 967 _mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A) 968 { 969 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, 970 (__v8sf) 971 _mm256_setzero_ps (), 972 (__mmask8) __U, 973 _MM_FROUND_CUR_DIRECTION); 974 } 975 976 extern __inline __m256 977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 978 _mm512_cvtepu64_ps (__m512i __A) 979 { 980 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, 981 (__v8sf) 982 _mm256_setzero_ps (), 983 (__mmask8) -1, 984 _MM_FROUND_CUR_DIRECTION); 985 } 986 987 extern __inline __m256 988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 989 _mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A) 990 { 991 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, 992 (__v8sf) __W, 993 (__mmask8) __U, 994 _MM_FROUND_CUR_DIRECTION); 995 } 996 997 extern __inline __m256 998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 999 _mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) 1000 { 1001 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, 1002 (__v8sf) 1003 _mm256_setzero_ps (), 1004 (__mmask8) __U, 1005 _MM_FROUND_CUR_DIRECTION); 1006 } 1007 1008 extern __inline __m512d 1009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1010 _mm512_cvtepi64_pd (__m512i __A) 1011 { 1012 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A, 1013 (__v8df) 1014 _mm512_setzero_pd (), 1015 (__mmask8) -1, 1016 _MM_FROUND_CUR_DIRECTION); 1017 } 1018 1019 extern __inline __m512d 1020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1021 _mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A) 1022 { 1023 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A, 1024 (__v8df) __W, 1025 (__mmask8) __U, 1026 _MM_FROUND_CUR_DIRECTION); 1027 } 1028 1029 extern __inline __m512d 1030 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1031 _mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A) 1032 { 1033 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A, 1034 (__v8df) 1035 _mm512_setzero_pd (), 1036 (__mmask8) __U, 1037 _MM_FROUND_CUR_DIRECTION); 1038 } 1039 1040 extern __inline __m512d 1041 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1042 _mm512_cvtepu64_pd (__m512i __A) 1043 { 1044 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A, 1045 (__v8df) 1046 _mm512_setzero_pd (), 1047 (__mmask8) -1, 1048 _MM_FROUND_CUR_DIRECTION); 1049 } 1050 1051 extern __inline __m512d 1052 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1053 _mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A) 1054 { 1055 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A, 1056 (__v8df) __W, 1057 (__mmask8) __U, 1058 _MM_FROUND_CUR_DIRECTION); 1059 } 1060 1061 extern __inline __m512d 1062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1063 _mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A) 1064 { 1065 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A, 1066 (__v8df) 1067 _mm512_setzero_pd (), 1068 (__mmask8) __U, 1069 _MM_FROUND_CUR_DIRECTION); 1070 } 1071 1072 #ifdef __OPTIMIZE__ 1073 extern __inline __mmask8 1074 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1075 _kshiftli_mask8 (__mmask8 __A, unsigned int __B) 1076 { 1077 return (__mmask8) __builtin_ia32_kshiftliqi ((__mmask8) __A, (__mmask8) __B); 1078 } 1079 1080 extern __inline __mmask8 1081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1082 _kshiftri_mask8 (__mmask8 __A, unsigned int __B) 1083 { 1084 return (__mmask8) __builtin_ia32_kshiftriqi ((__mmask8) __A, (__mmask8) __B); 1085 } 1086 1087 extern __inline __m512d 1088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1089 _mm512_range_pd (__m512d __A, __m512d __B, int __C) 1090 { 1091 return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A, 1092 (__v8df) __B, __C, 1093 (__v8df) 1094 _mm512_setzero_pd (), 1095 (__mmask8) -1, 1096 _MM_FROUND_CUR_DIRECTION); 1097 } 1098 1099 extern __inline __m512d 1100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1101 _mm512_mask_range_pd (__m512d __W, __mmask8 __U, 1102 __m512d __A, __m512d __B, int __C) 1103 { 1104 return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A, 1105 (__v8df) __B, __C, 1106 (__v8df) __W, 1107 (__mmask8) __U, 1108 _MM_FROUND_CUR_DIRECTION); 1109 } 1110 1111 extern __inline __m512d 1112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1113 _mm512_maskz_range_pd (__mmask8 __U, __m512d __A, __m512d __B, int __C) 1114 { 1115 return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A, 1116 (__v8df) __B, __C, 1117 (__v8df) 1118 _mm512_setzero_pd (), 1119 (__mmask8) __U, 1120 _MM_FROUND_CUR_DIRECTION); 1121 } 1122 1123 extern __inline __m512 1124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1125 _mm512_range_ps (__m512 __A, __m512 __B, int __C) 1126 { 1127 return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A, 1128 (__v16sf) __B, __C, 1129 (__v16sf) 1130 _mm512_setzero_ps (), 1131 (__mmask16) -1, 1132 _MM_FROUND_CUR_DIRECTION); 1133 } 1134 1135 extern __inline __m512 1136 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1137 _mm512_mask_range_ps (__m512 __W, __mmask16 __U, 1138 __m512 __A, __m512 __B, int __C) 1139 { 1140 return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A, 1141 (__v16sf) __B, __C, 1142 (__v16sf) __W, 1143 (__mmask16) __U, 1144 _MM_FROUND_CUR_DIRECTION); 1145 } 1146 1147 extern __inline __m512 1148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1149 _mm512_maskz_range_ps (__mmask16 __U, __m512 __A, __m512 __B, int __C) 1150 { 1151 return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A, 1152 (__v16sf) __B, __C, 1153 (__v16sf) 1154 _mm512_setzero_ps (), 1155 (__mmask16) __U, 1156 _MM_FROUND_CUR_DIRECTION); 1157 } 1158 1159 extern __inline __m128d 1160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1161 _mm_reduce_sd (__m128d __A, __m128d __B, int __C) 1162 { 1163 return (__m128d) __builtin_ia32_reducesd_mask ((__v2df) __A, 1164 (__v2df) __B, __C, 1165 (__v2df) _mm_setzero_pd (), 1166 (__mmask8) -1); 1167 } 1168 1169 extern __inline __m128d 1170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1171 _mm_mask_reduce_sd (__m128d __W, __mmask8 __U, __m128d __A, 1172 __m128d __B, int __C) 1173 { 1174 return (__m128d) __builtin_ia32_reducesd_mask ((__v2df) __A, 1175 (__v2df) __B, __C, 1176 (__v2df) __W, 1177 (__mmask8) __U); 1178 } 1179 1180 extern __inline __m128d 1181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1182 _mm_maskz_reduce_sd (__mmask8 __U, __m128d __A, __m128d __B, int __C) 1183 { 1184 return (__m128d) __builtin_ia32_reducesd_mask ((__v2df) __A, 1185 (__v2df) __B, __C, 1186 (__v2df) _mm_setzero_pd (), 1187 (__mmask8) __U); 1188 } 1189 1190 extern __inline __m128 1191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1192 _mm_reduce_ss (__m128 __A, __m128 __B, int __C) 1193 { 1194 return (__m128) __builtin_ia32_reducess_mask ((__v4sf) __A, 1195 (__v4sf) __B, __C, 1196 (__v4sf) _mm_setzero_ps (), 1197 (__mmask8) -1); 1198 } 1199 1200 1201 extern __inline __m128 1202 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1203 _mm_mask_reduce_ss (__m128 __W, __mmask8 __U, __m128 __A, 1204 __m128 __B, int __C) 1205 { 1206 return (__m128) __builtin_ia32_reducess_mask ((__v4sf) __A, 1207 (__v4sf) __B, __C, 1208 (__v4sf) __W, 1209 (__mmask8) __U); 1210 } 1211 1212 extern __inline __m128 1213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1214 _mm_maskz_reduce_ss (__mmask8 __U, __m128 __A, __m128 __B, int __C) 1215 { 1216 return (__m128) __builtin_ia32_reducess_mask ((__v4sf) __A, 1217 (__v4sf) __B, __C, 1218 (__v4sf) _mm_setzero_ps (), 1219 (__mmask8) __U); 1220 } 1221 1222 extern __inline __m128d 1223 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1224 _mm_range_sd (__m128d __A, __m128d __B, int __C) 1225 { 1226 return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A, 1227 (__v2df) __B, __C, 1228 (__v2df) 1229 _mm_setzero_pd (), 1230 (__mmask8) -1, 1231 _MM_FROUND_CUR_DIRECTION); 1232 } 1233 1234 extern __inline __m128d 1235 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1236 _mm_mask_range_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B, int __C) 1237 { 1238 return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A, 1239 (__v2df) __B, __C, 1240 (__v2df) __W, 1241 (__mmask8) __U, 1242 _MM_FROUND_CUR_DIRECTION); 1243 } 1244 1245 extern __inline __m128d 1246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1247 _mm_maskz_range_sd (__mmask8 __U, __m128d __A, __m128d __B, int __C) 1248 { 1249 return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A, 1250 (__v2df) __B, __C, 1251 (__v2df) 1252 _mm_setzero_pd (), 1253 (__mmask8) __U, 1254 _MM_FROUND_CUR_DIRECTION); 1255 } 1256 1257 extern __inline __m128 1258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1259 _mm_range_ss (__m128 __A, __m128 __B, int __C) 1260 { 1261 return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A, 1262 (__v4sf) __B, __C, 1263 (__v4sf) 1264 _mm_setzero_ps (), 1265 (__mmask8) -1, 1266 _MM_FROUND_CUR_DIRECTION); 1267 } 1268 1269 extern __inline __m128 1270 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1271 _mm_mask_range_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B, int __C) 1272 { 1273 return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A, 1274 (__v4sf) __B, __C, 1275 (__v4sf) __W, 1276 (__mmask8) __U, 1277 _MM_FROUND_CUR_DIRECTION); 1278 } 1279 1280 1281 extern __inline __m128 1282 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1283 _mm_maskz_range_ss (__mmask8 __U, __m128 __A, __m128 __B, int __C) 1284 { 1285 return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A, 1286 (__v4sf) __B, __C, 1287 (__v4sf) 1288 _mm_setzero_ps (), 1289 (__mmask8) __U, 1290 _MM_FROUND_CUR_DIRECTION); 1291 } 1292 1293 extern __inline __m128d 1294 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1295 _mm_range_round_sd (__m128d __A, __m128d __B, int __C, const int __R) 1296 { 1297 return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A, 1298 (__v2df) __B, __C, 1299 (__v2df) 1300 _mm_setzero_pd (), 1301 (__mmask8) -1, __R); 1302 } 1303 1304 extern __inline __m128d 1305 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1306 _mm_mask_range_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B, 1307 int __C, const int __R) 1308 { 1309 return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A, 1310 (__v2df) __B, __C, 1311 (__v2df) __W, 1312 (__mmask8) __U, __R); 1313 } 1314 1315 extern __inline __m128d 1316 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1317 _mm_maskz_range_round_sd (__mmask8 __U, __m128d __A, __m128d __B, int __C, 1318 const int __R) 1319 { 1320 return (__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df) __A, 1321 (__v2df) __B, __C, 1322 (__v2df) 1323 _mm_setzero_pd (), 1324 (__mmask8) __U, __R); 1325 } 1326 1327 extern __inline __m128 1328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1329 _mm_range_round_ss (__m128 __A, __m128 __B, int __C, const int __R) 1330 { 1331 return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A, 1332 (__v4sf) __B, __C, 1333 (__v4sf) 1334 _mm_setzero_ps (), 1335 (__mmask8) -1, __R); 1336 } 1337 1338 extern __inline __m128 1339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1340 _mm_mask_range_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B, 1341 int __C, const int __R) 1342 { 1343 return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A, 1344 (__v4sf) __B, __C, 1345 (__v4sf) __W, 1346 (__mmask8) __U, __R); 1347 } 1348 1349 extern __inline __m128 1350 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1351 _mm_maskz_range_round_ss (__mmask8 __U, __m128 __A, __m128 __B, int __C, 1352 const int __R) 1353 { 1354 return (__m128) __builtin_ia32_rangess128_mask_round ((__v4sf) __A, 1355 (__v4sf) __B, __C, 1356 (__v4sf) 1357 _mm_setzero_ps (), 1358 (__mmask8) __U, __R); 1359 } 1360 1361 extern __inline __mmask8 1362 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1363 _mm_fpclass_ss_mask (__m128 __A, const int __imm) 1364 { 1365 return (__mmask8) __builtin_ia32_fpclassss ((__v4sf) __A, __imm); 1366 } 1367 1368 extern __inline __mmask8 1369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1370 _mm_fpclass_sd_mask (__m128d __A, const int __imm) 1371 { 1372 return (__mmask8) __builtin_ia32_fpclasssd ((__v2df) __A, __imm); 1373 } 1374 1375 extern __inline __m512i 1376 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1377 _mm512_cvtt_roundpd_epi64 (__m512d __A, const int __R) 1378 { 1379 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, 1380 (__v8di) 1381 _mm512_setzero_si512 (), 1382 (__mmask8) -1, 1383 __R); 1384 } 1385 1386 extern __inline __m512i 1387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1388 _mm512_mask_cvtt_roundpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A, 1389 const int __R) 1390 { 1391 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, 1392 (__v8di) __W, 1393 (__mmask8) __U, 1394 __R); 1395 } 1396 1397 extern __inline __m512i 1398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1399 _mm512_maskz_cvtt_roundpd_epi64 (__mmask8 __U, __m512d __A, 1400 const int __R) 1401 { 1402 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, 1403 (__v8di) 1404 _mm512_setzero_si512 (), 1405 (__mmask8) __U, 1406 __R); 1407 } 1408 1409 extern __inline __m512i 1410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1411 _mm512_cvtt_roundpd_epu64 (__m512d __A, const int __R) 1412 { 1413 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, 1414 (__v8di) 1415 _mm512_setzero_si512 (), 1416 (__mmask8) -1, 1417 __R); 1418 } 1419 1420 extern __inline __m512i 1421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1422 _mm512_mask_cvtt_roundpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A, 1423 const int __R) 1424 { 1425 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, 1426 (__v8di) __W, 1427 (__mmask8) __U, 1428 __R); 1429 } 1430 1431 extern __inline __m512i 1432 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1433 _mm512_maskz_cvtt_roundpd_epu64 (__mmask8 __U, __m512d __A, 1434 const int __R) 1435 { 1436 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, 1437 (__v8di) 1438 _mm512_setzero_si512 (), 1439 (__mmask8) __U, 1440 __R); 1441 } 1442 1443 extern __inline __m512i 1444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1445 _mm512_cvtt_roundps_epi64 (__m256 __A, const int __R) 1446 { 1447 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, 1448 (__v8di) 1449 _mm512_setzero_si512 (), 1450 (__mmask8) -1, 1451 __R); 1452 } 1453 1454 extern __inline __m512i 1455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1456 _mm512_mask_cvtt_roundps_epi64 (__m512i __W, __mmask8 __U, __m256 __A, 1457 const int __R) 1458 { 1459 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, 1460 (__v8di) __W, 1461 (__mmask8) __U, 1462 __R); 1463 } 1464 1465 extern __inline __m512i 1466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1467 _mm512_maskz_cvtt_roundps_epi64 (__mmask8 __U, __m256 __A, 1468 const int __R) 1469 { 1470 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, 1471 (__v8di) 1472 _mm512_setzero_si512 (), 1473 (__mmask8) __U, 1474 __R); 1475 } 1476 1477 extern __inline __m512i 1478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1479 _mm512_cvtt_roundps_epu64 (__m256 __A, const int __R) 1480 { 1481 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, 1482 (__v8di) 1483 _mm512_setzero_si512 (), 1484 (__mmask8) -1, 1485 __R); 1486 } 1487 1488 extern __inline __m512i 1489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1490 _mm512_mask_cvtt_roundps_epu64 (__m512i __W, __mmask8 __U, __m256 __A, 1491 const int __R) 1492 { 1493 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, 1494 (__v8di) __W, 1495 (__mmask8) __U, 1496 __R); 1497 } 1498 1499 extern __inline __m512i 1500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1501 _mm512_maskz_cvtt_roundps_epu64 (__mmask8 __U, __m256 __A, 1502 const int __R) 1503 { 1504 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, 1505 (__v8di) 1506 _mm512_setzero_si512 (), 1507 (__mmask8) __U, 1508 __R); 1509 } 1510 1511 extern __inline __m512i 1512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1513 _mm512_cvt_roundpd_epi64 (__m512d __A, const int __R) 1514 { 1515 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, 1516 (__v8di) 1517 _mm512_setzero_si512 (), 1518 (__mmask8) -1, 1519 __R); 1520 } 1521 1522 extern __inline __m512i 1523 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1524 _mm512_mask_cvt_roundpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A, 1525 const int __R) 1526 { 1527 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, 1528 (__v8di) __W, 1529 (__mmask8) __U, 1530 __R); 1531 } 1532 1533 extern __inline __m512i 1534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1535 _mm512_maskz_cvt_roundpd_epi64 (__mmask8 __U, __m512d __A, 1536 const int __R) 1537 { 1538 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, 1539 (__v8di) 1540 _mm512_setzero_si512 (), 1541 (__mmask8) __U, 1542 __R); 1543 } 1544 1545 extern __inline __m512i 1546 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1547 _mm512_cvt_roundpd_epu64 (__m512d __A, const int __R) 1548 { 1549 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, 1550 (__v8di) 1551 _mm512_setzero_si512 (), 1552 (__mmask8) -1, 1553 __R); 1554 } 1555 1556 extern __inline __m512i 1557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1558 _mm512_mask_cvt_roundpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A, 1559 const int __R) 1560 { 1561 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, 1562 (__v8di) __W, 1563 (__mmask8) __U, 1564 __R); 1565 } 1566 1567 extern __inline __m512i 1568 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1569 _mm512_maskz_cvt_roundpd_epu64 (__mmask8 __U, __m512d __A, 1570 const int __R) 1571 { 1572 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, 1573 (__v8di) 1574 _mm512_setzero_si512 (), 1575 (__mmask8) __U, 1576 __R); 1577 } 1578 1579 extern __inline __m512i 1580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1581 _mm512_cvt_roundps_epi64 (__m256 __A, const int __R) 1582 { 1583 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, 1584 (__v8di) 1585 _mm512_setzero_si512 (), 1586 (__mmask8) -1, 1587 __R); 1588 } 1589 1590 extern __inline __m512i 1591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1592 _mm512_mask_cvt_roundps_epi64 (__m512i __W, __mmask8 __U, __m256 __A, 1593 const int __R) 1594 { 1595 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, 1596 (__v8di) __W, 1597 (__mmask8) __U, 1598 __R); 1599 } 1600 1601 extern __inline __m512i 1602 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1603 _mm512_maskz_cvt_roundps_epi64 (__mmask8 __U, __m256 __A, 1604 const int __R) 1605 { 1606 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, 1607 (__v8di) 1608 _mm512_setzero_si512 (), 1609 (__mmask8) __U, 1610 __R); 1611 } 1612 1613 extern __inline __m512i 1614 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1615 _mm512_cvt_roundps_epu64 (__m256 __A, const int __R) 1616 { 1617 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, 1618 (__v8di) 1619 _mm512_setzero_si512 (), 1620 (__mmask8) -1, 1621 __R); 1622 } 1623 1624 extern __inline __m512i 1625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1626 _mm512_mask_cvt_roundps_epu64 (__m512i __W, __mmask8 __U, __m256 __A, 1627 const int __R) 1628 { 1629 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, 1630 (__v8di) __W, 1631 (__mmask8) __U, 1632 __R); 1633 } 1634 1635 extern __inline __m512i 1636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1637 _mm512_maskz_cvt_roundps_epu64 (__mmask8 __U, __m256 __A, 1638 const int __R) 1639 { 1640 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, 1641 (__v8di) 1642 _mm512_setzero_si512 (), 1643 (__mmask8) __U, 1644 __R); 1645 } 1646 1647 extern __inline __m256 1648 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1649 _mm512_cvt_roundepi64_ps (__m512i __A, const int __R) 1650 { 1651 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, 1652 (__v8sf) 1653 _mm256_setzero_ps (), 1654 (__mmask8) -1, 1655 __R); 1656 } 1657 1658 extern __inline __m256 1659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1660 _mm512_mask_cvt_roundepi64_ps (__m256 __W, __mmask8 __U, __m512i __A, 1661 const int __R) 1662 { 1663 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, 1664 (__v8sf) __W, 1665 (__mmask8) __U, 1666 __R); 1667 } 1668 1669 extern __inline __m256 1670 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1671 _mm512_maskz_cvt_roundepi64_ps (__mmask8 __U, __m512i __A, 1672 const int __R) 1673 { 1674 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, 1675 (__v8sf) 1676 _mm256_setzero_ps (), 1677 (__mmask8) __U, 1678 __R); 1679 } 1680 1681 extern __inline __m256 1682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1683 _mm512_cvt_roundepu64_ps (__m512i __A, const int __R) 1684 { 1685 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, 1686 (__v8sf) 1687 _mm256_setzero_ps (), 1688 (__mmask8) -1, 1689 __R); 1690 } 1691 1692 extern __inline __m256 1693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1694 _mm512_mask_cvt_roundepu64_ps (__m256 __W, __mmask8 __U, __m512i __A, 1695 const int __R) 1696 { 1697 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, 1698 (__v8sf) __W, 1699 (__mmask8) __U, 1700 __R); 1701 } 1702 1703 extern __inline __m256 1704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1705 _mm512_maskz_cvt_roundepu64_ps (__mmask8 __U, __m512i __A, 1706 const int __R) 1707 { 1708 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, 1709 (__v8sf) 1710 _mm256_setzero_ps (), 1711 (__mmask8) __U, 1712 __R); 1713 } 1714 1715 extern __inline __m512d 1716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1717 _mm512_cvt_roundepi64_pd (__m512i __A, const int __R) 1718 { 1719 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A, 1720 (__v8df) 1721 _mm512_setzero_pd (), 1722 (__mmask8) -1, 1723 __R); 1724 } 1725 1726 extern __inline __m512d 1727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1728 _mm512_mask_cvt_roundepi64_pd (__m512d __W, __mmask8 __U, __m512i __A, 1729 const int __R) 1730 { 1731 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A, 1732 (__v8df) __W, 1733 (__mmask8) __U, 1734 __R); 1735 } 1736 1737 extern __inline __m512d 1738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1739 _mm512_maskz_cvt_roundepi64_pd (__mmask8 __U, __m512i __A, 1740 const int __R) 1741 { 1742 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A, 1743 (__v8df) 1744 _mm512_setzero_pd (), 1745 (__mmask8) __U, 1746 __R); 1747 } 1748 1749 extern __inline __m512d 1750 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1751 _mm512_cvt_roundepu64_pd (__m512i __A, const int __R) 1752 { 1753 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A, 1754 (__v8df) 1755 _mm512_setzero_pd (), 1756 (__mmask8) -1, 1757 __R); 1758 } 1759 1760 extern __inline __m512d 1761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1762 _mm512_mask_cvt_roundepu64_pd (__m512d __W, __mmask8 __U, __m512i __A, 1763 const int __R) 1764 { 1765 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A, 1766 (__v8df) __W, 1767 (__mmask8) __U, 1768 __R); 1769 } 1770 1771 extern __inline __m512d 1772 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1773 _mm512_maskz_cvt_roundepu64_pd (__mmask8 __U, __m512i __A, 1774 const int __R) 1775 { 1776 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A, 1777 (__v8df) 1778 _mm512_setzero_pd (), 1779 (__mmask8) __U, 1780 __R); 1781 } 1782 1783 extern __inline __m512d 1784 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1785 _mm512_reduce_pd (__m512d __A, int __B) 1786 { 1787 return (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B, 1788 (__v8df) 1789 _mm512_setzero_pd (), 1790 (__mmask8) -1); 1791 } 1792 1793 extern __inline __m512d 1794 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1795 _mm512_mask_reduce_pd (__m512d __W, __mmask8 __U, __m512d __A, int __B) 1796 { 1797 return (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B, 1798 (__v8df) __W, 1799 (__mmask8) __U); 1800 } 1801 1802 extern __inline __m512d 1803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1804 _mm512_maskz_reduce_pd (__mmask8 __U, __m512d __A, int __B) 1805 { 1806 return (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B, 1807 (__v8df) 1808 _mm512_setzero_pd (), 1809 (__mmask8) __U); 1810 } 1811 1812 extern __inline __m512 1813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1814 _mm512_reduce_ps (__m512 __A, int __B) 1815 { 1816 return (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B, 1817 (__v16sf) 1818 _mm512_setzero_ps (), 1819 (__mmask16) -1); 1820 } 1821 1822 extern __inline __m512 1823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1824 _mm512_mask_reduce_ps (__m512 __W, __mmask16 __U, __m512 __A, int __B) 1825 { 1826 return (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B, 1827 (__v16sf) __W, 1828 (__mmask16) __U); 1829 } 1830 1831 extern __inline __m512 1832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1833 _mm512_maskz_reduce_ps (__mmask16 __U, __m512 __A, int __B) 1834 { 1835 return (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B, 1836 (__v16sf) 1837 _mm512_setzero_ps (), 1838 (__mmask16) __U); 1839 } 1840 1841 extern __inline __m256 1842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1843 _mm512_extractf32x8_ps (__m512 __A, const int __imm) 1844 { 1845 return (__m256) __builtin_ia32_extractf32x8_mask ((__v16sf) __A, 1846 __imm, 1847 (__v8sf) 1848 _mm256_setzero_ps (), 1849 (__mmask8) -1); 1850 } 1851 1852 extern __inline __m256 1853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1854 _mm512_mask_extractf32x8_ps (__m256 __W, __mmask8 __U, __m512 __A, 1855 const int __imm) 1856 { 1857 return (__m256) __builtin_ia32_extractf32x8_mask ((__v16sf) __A, 1858 __imm, 1859 (__v8sf) __W, 1860 (__mmask8) __U); 1861 } 1862 1863 extern __inline __m256 1864 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1865 _mm512_maskz_extractf32x8_ps (__mmask8 __U, __m512 __A, 1866 const int __imm) 1867 { 1868 return (__m256) __builtin_ia32_extractf32x8_mask ((__v16sf) __A, 1869 __imm, 1870 (__v8sf) 1871 _mm256_setzero_ps (), 1872 (__mmask8) __U); 1873 } 1874 1875 extern __inline __m128d 1876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1877 _mm512_extractf64x2_pd (__m512d __A, const int __imm) 1878 { 1879 return (__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df) __A, 1880 __imm, 1881 (__v2df) 1882 _mm_setzero_pd (), 1883 (__mmask8) -1); 1884 } 1885 1886 extern __inline __m128d 1887 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1888 _mm512_mask_extractf64x2_pd (__m128d __W, __mmask8 __U, __m512d __A, 1889 const int __imm) 1890 { 1891 return (__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df) __A, 1892 __imm, 1893 (__v2df) __W, 1894 (__mmask8) 1895 __U); 1896 } 1897 1898 extern __inline __m128d 1899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1900 _mm512_maskz_extractf64x2_pd (__mmask8 __U, __m512d __A, 1901 const int __imm) 1902 { 1903 return (__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df) __A, 1904 __imm, 1905 (__v2df) 1906 _mm_setzero_pd (), 1907 (__mmask8) 1908 __U); 1909 } 1910 1911 extern __inline __m256i 1912 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1913 _mm512_extracti32x8_epi32 (__m512i __A, const int __imm) 1914 { 1915 return (__m256i) __builtin_ia32_extracti32x8_mask ((__v16si) __A, 1916 __imm, 1917 (__v8si) 1918 _mm256_setzero_si256 (), 1919 (__mmask8) -1); 1920 } 1921 1922 extern __inline __m256i 1923 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1924 _mm512_mask_extracti32x8_epi32 (__m256i __W, __mmask8 __U, __m512i __A, 1925 const int __imm) 1926 { 1927 return (__m256i) __builtin_ia32_extracti32x8_mask ((__v16si) __A, 1928 __imm, 1929 (__v8si) __W, 1930 (__mmask8) __U); 1931 } 1932 1933 extern __inline __m256i 1934 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1935 _mm512_maskz_extracti32x8_epi32 (__mmask8 __U, __m512i __A, 1936 const int __imm) 1937 { 1938 return (__m256i) __builtin_ia32_extracti32x8_mask ((__v16si) __A, 1939 __imm, 1940 (__v8si) 1941 _mm256_setzero_si256 (), 1942 (__mmask8) __U); 1943 } 1944 1945 extern __inline __m128i 1946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1947 _mm512_extracti64x2_epi64 (__m512i __A, const int __imm) 1948 { 1949 return (__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di) __A, 1950 __imm, 1951 (__v2di) 1952 _mm_setzero_si128 (), 1953 (__mmask8) -1); 1954 } 1955 1956 extern __inline __m128i 1957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1958 _mm512_mask_extracti64x2_epi64 (__m128i __W, __mmask8 __U, __m512i __A, 1959 const int __imm) 1960 { 1961 return (__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di) __A, 1962 __imm, 1963 (__v2di) __W, 1964 (__mmask8) 1965 __U); 1966 } 1967 1968 extern __inline __m128i 1969 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1970 _mm512_maskz_extracti64x2_epi64 (__mmask8 __U, __m512i __A, 1971 const int __imm) 1972 { 1973 return (__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di) __A, 1974 __imm, 1975 (__v2di) 1976 _mm_setzero_si128 (), 1977 (__mmask8) 1978 __U); 1979 } 1980 1981 extern __inline __m512d 1982 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1983 _mm512_range_round_pd (__m512d __A, __m512d __B, int __C, 1984 const int __R) 1985 { 1986 return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A, 1987 (__v8df) __B, __C, 1988 (__v8df) 1989 _mm512_setzero_pd (), 1990 (__mmask8) -1, 1991 __R); 1992 } 1993 1994 extern __inline __m512d 1995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1996 _mm512_mask_range_round_pd (__m512d __W, __mmask8 __U, 1997 __m512d __A, __m512d __B, int __C, 1998 const int __R) 1999 { 2000 return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A, 2001 (__v8df) __B, __C, 2002 (__v8df) __W, 2003 (__mmask8) __U, 2004 __R); 2005 } 2006 2007 extern __inline __m512d 2008 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2009 _mm512_maskz_range_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 2010 int __C, const int __R) 2011 { 2012 return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A, 2013 (__v8df) __B, __C, 2014 (__v8df) 2015 _mm512_setzero_pd (), 2016 (__mmask8) __U, 2017 __R); 2018 } 2019 2020 extern __inline __m512 2021 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2022 _mm512_range_round_ps (__m512 __A, __m512 __B, int __C, const int __R) 2023 { 2024 return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A, 2025 (__v16sf) __B, __C, 2026 (__v16sf) 2027 _mm512_setzero_ps (), 2028 (__mmask16) -1, 2029 __R); 2030 } 2031 2032 extern __inline __m512 2033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2034 _mm512_mask_range_round_ps (__m512 __W, __mmask16 __U, 2035 __m512 __A, __m512 __B, int __C, 2036 const int __R) 2037 { 2038 return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A, 2039 (__v16sf) __B, __C, 2040 (__v16sf) __W, 2041 (__mmask16) __U, 2042 __R); 2043 } 2044 2045 extern __inline __m512 2046 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2047 _mm512_maskz_range_round_ps (__mmask16 __U, __m512 __A, __m512 __B, 2048 int __C, const int __R) 2049 { 2050 return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A, 2051 (__v16sf) __B, __C, 2052 (__v16sf) 2053 _mm512_setzero_ps (), 2054 (__mmask16) __U, 2055 __R); 2056 } 2057 2058 extern __inline __m512i 2059 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2060 _mm512_inserti32x8 (__m512i __A, __m256i __B, const int __imm) 2061 { 2062 return (__m512i) __builtin_ia32_inserti32x8_mask ((__v16si) __A, 2063 (__v8si) __B, 2064 __imm, 2065 (__v16si) 2066 _mm512_setzero_si512 (), 2067 (__mmask16) -1); 2068 } 2069 2070 extern __inline __m512i 2071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2072 _mm512_mask_inserti32x8 (__m512i __W, __mmask16 __U, __m512i __A, 2073 __m256i __B, const int __imm) 2074 { 2075 return (__m512i) __builtin_ia32_inserti32x8_mask ((__v16si) __A, 2076 (__v8si) __B, 2077 __imm, 2078 (__v16si) __W, 2079 (__mmask16) __U); 2080 } 2081 2082 extern __inline __m512i 2083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2084 _mm512_maskz_inserti32x8 (__mmask16 __U, __m512i __A, __m256i __B, 2085 const int __imm) 2086 { 2087 return (__m512i) __builtin_ia32_inserti32x8_mask ((__v16si) __A, 2088 (__v8si) __B, 2089 __imm, 2090 (__v16si) 2091 _mm512_setzero_si512 (), 2092 (__mmask16) __U); 2093 } 2094 2095 extern __inline __m512 2096 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2097 _mm512_insertf32x8 (__m512 __A, __m256 __B, const int __imm) 2098 { 2099 return (__m512) __builtin_ia32_insertf32x8_mask ((__v16sf) __A, 2100 (__v8sf) __B, 2101 __imm, 2102 (__v16sf) 2103 _mm512_setzero_ps (), 2104 (__mmask16) -1); 2105 } 2106 2107 extern __inline __m512 2108 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2109 _mm512_mask_insertf32x8 (__m512 __W, __mmask16 __U, __m512 __A, 2110 __m256 __B, const int __imm) 2111 { 2112 return (__m512) __builtin_ia32_insertf32x8_mask ((__v16sf) __A, 2113 (__v8sf) __B, 2114 __imm, 2115 (__v16sf) __W, 2116 (__mmask16) __U); 2117 } 2118 2119 extern __inline __m512 2120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2121 _mm512_maskz_insertf32x8 (__mmask16 __U, __m512 __A, __m256 __B, 2122 const int __imm) 2123 { 2124 return (__m512) __builtin_ia32_insertf32x8_mask ((__v16sf) __A, 2125 (__v8sf) __B, 2126 __imm, 2127 (__v16sf) 2128 _mm512_setzero_ps (), 2129 (__mmask16) __U); 2130 } 2131 2132 extern __inline __m512i 2133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2134 _mm512_inserti64x2 (__m512i __A, __m128i __B, const int __imm) 2135 { 2136 return (__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di) __A, 2137 (__v2di) __B, 2138 __imm, 2139 (__v8di) 2140 _mm512_setzero_si512 (), 2141 (__mmask8) -1); 2142 } 2143 2144 extern __inline __m512i 2145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2146 _mm512_mask_inserti64x2 (__m512i __W, __mmask8 __U, __m512i __A, 2147 __m128i __B, const int __imm) 2148 { 2149 return (__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di) __A, 2150 (__v2di) __B, 2151 __imm, 2152 (__v8di) __W, 2153 (__mmask8) 2154 __U); 2155 } 2156 2157 extern __inline __m512i 2158 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2159 _mm512_maskz_inserti64x2 (__mmask8 __U, __m512i __A, __m128i __B, 2160 const int __imm) 2161 { 2162 return (__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di) __A, 2163 (__v2di) __B, 2164 __imm, 2165 (__v8di) 2166 _mm512_setzero_si512 (), 2167 (__mmask8) 2168 __U); 2169 } 2170 2171 extern __inline __m512d 2172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2173 _mm512_insertf64x2 (__m512d __A, __m128d __B, const int __imm) 2174 { 2175 return (__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df) __A, 2176 (__v2df) __B, 2177 __imm, 2178 (__v8df) 2179 _mm512_setzero_pd (), 2180 (__mmask8) -1); 2181 } 2182 2183 extern __inline __m512d 2184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2185 _mm512_mask_insertf64x2 (__m512d __W, __mmask8 __U, __m512d __A, 2186 __m128d __B, const int __imm) 2187 { 2188 return (__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df) __A, 2189 (__v2df) __B, 2190 __imm, 2191 (__v8df) __W, 2192 (__mmask8) 2193 __U); 2194 } 2195 2196 extern __inline __m512d 2197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2198 _mm512_maskz_insertf64x2 (__mmask8 __U, __m512d __A, __m128d __B, 2199 const int __imm) 2200 { 2201 return (__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df) __A, 2202 (__v2df) __B, 2203 __imm, 2204 (__v8df) 2205 _mm512_setzero_pd (), 2206 (__mmask8) 2207 __U); 2208 } 2209 2210 extern __inline __mmask8 2211 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2212 _mm512_mask_fpclass_pd_mask (__mmask8 __U, __m512d __A, 2213 const int __imm) 2214 { 2215 return (__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) __A, 2216 __imm, __U); 2217 } 2218 2219 extern __inline __mmask8 2220 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2221 _mm512_fpclass_pd_mask (__m512d __A, const int __imm) 2222 { 2223 return (__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) __A, 2224 __imm, 2225 (__mmask8) -1); 2226 } 2227 2228 extern __inline __mmask16 2229 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2230 _mm512_mask_fpclass_ps_mask (__mmask16 __U, __m512 __A, 2231 const int __imm) 2232 { 2233 return (__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) __A, 2234 __imm, __U); 2235 } 2236 2237 extern __inline __mmask16 2238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2239 _mm512_fpclass_ps_mask (__m512 __A, const int __imm) 2240 { 2241 return (__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) __A, 2242 __imm, 2243 (__mmask16) -1); 2244 } 2245 2246 #else 2247 #define _kshiftli_mask8(X, Y) \ 2248 ((__mmask8) __builtin_ia32_kshiftliqi ((__mmask8)(X), (__mmask8)(Y))) 2249 2250 #define _kshiftri_mask8(X, Y) \ 2251 ((__mmask8) __builtin_ia32_kshiftriqi ((__mmask8)(X), (__mmask8)(Y))) 2252 2253 #define _mm_range_sd(A, B, C) \ 2254 ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \ 2255 (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \ 2256 (__mmask8) -1, _MM_FROUND_CUR_DIRECTION)) 2257 2258 #define _mm_mask_range_sd(W, U, A, B, C) \ 2259 ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \ 2260 (__v2df)(__m128d)(B), (int)(C), (__v2df)(__m128d)(W), \ 2261 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 2262 2263 #define _mm_maskz_range_sd(U, A, B, C) \ 2264 ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \ 2265 (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \ 2266 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 2267 2268 #define _mm_range_ss(A, B, C) \ 2269 ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \ 2270 (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \ 2271 (__mmask8) -1, _MM_FROUND_CUR_DIRECTION)) 2272 2273 #define _mm_mask_range_ss(W, U, A, B, C) \ 2274 ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \ 2275 (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W), \ 2276 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 2277 2278 #define _mm_maskz_range_ss(U, A, B, C) \ 2279 ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \ 2280 (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \ 2281 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 2282 2283 #define _mm_range_round_sd(A, B, C, R) \ 2284 ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \ 2285 (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \ 2286 (__mmask8) -1, (R))) 2287 2288 #define _mm_mask_range_round_sd(W, U, A, B, C, R) \ 2289 ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \ 2290 (__v2df)(__m128d)(B), (int)(C), (__v2df)(__m128d)(W), \ 2291 (__mmask8)(U), (R))) 2292 2293 #define _mm_maskz_range_round_sd(U, A, B, C, R) \ 2294 ((__m128d) __builtin_ia32_rangesd128_mask_round ((__v2df)(__m128d)(A), \ 2295 (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \ 2296 (__mmask8)(U), (R))) 2297 2298 #define _mm_range_round_ss(A, B, C, R) \ 2299 ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \ 2300 (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \ 2301 (__mmask8) -1, (R))) 2302 2303 #define _mm_mask_range_round_ss(W, U, A, B, C, R) \ 2304 ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \ 2305 (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W), \ 2306 (__mmask8)(U), (R))) 2307 2308 #define _mm_maskz_range_round_ss(U, A, B, C, R) \ 2309 ((__m128) __builtin_ia32_rangess128_mask_round ((__v4sf)(__m128)(A), \ 2310 (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \ 2311 (__mmask8)(U), (R))) 2312 2313 #define _mm512_cvtt_roundpd_epi64(A, B) \ 2314 ((__m512i)__builtin_ia32_cvttpd2qq512_mask ((A), (__v8di) \ 2315 _mm512_setzero_si512 (), \ 2316 -1, (B))) 2317 2318 #define _mm512_mask_cvtt_roundpd_epi64(W, U, A, B) \ 2319 ((__m512i)__builtin_ia32_cvttpd2qq512_mask ((A), (__v8di)(W), (U), (B))) 2320 2321 #define _mm512_maskz_cvtt_roundpd_epi64(U, A, B) \ 2322 ((__m512i)__builtin_ia32_cvttpd2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B))) 2323 2324 #define _mm512_cvtt_roundpd_epu64(A, B) \ 2325 ((__m512i)__builtin_ia32_cvttpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B))) 2326 2327 #define _mm512_mask_cvtt_roundpd_epu64(W, U, A, B) \ 2328 ((__m512i)__builtin_ia32_cvttpd2uqq512_mask ((A), (__v8di)(W), (U), (B))) 2329 2330 #define _mm512_maskz_cvtt_roundpd_epu64(U, A, B) \ 2331 ((__m512i)__builtin_ia32_cvttpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B))) 2332 2333 #define _mm512_cvtt_roundps_epi64(A, B) \ 2334 ((__m512i)__builtin_ia32_cvttps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B))) 2335 2336 #define _mm512_mask_cvtt_roundps_epi64(W, U, A, B) \ 2337 ((__m512i)__builtin_ia32_cvttps2qq512_mask ((A), (__v8di)(W), (U), (B))) 2338 2339 #define _mm512_maskz_cvtt_roundps_epi64(U, A, B) \ 2340 ((__m512i)__builtin_ia32_cvttps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B))) 2341 2342 #define _mm512_cvtt_roundps_epu64(A, B) \ 2343 ((__m512i)__builtin_ia32_cvttps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B))) 2344 2345 #define _mm512_mask_cvtt_roundps_epu64(W, U, A, B) \ 2346 ((__m512i)__builtin_ia32_cvttps2uqq512_mask ((A), (__v8di)(W), (U), (B))) 2347 2348 #define _mm512_maskz_cvtt_roundps_epu64(U, A, B) \ 2349 ((__m512i)__builtin_ia32_cvttps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B))) 2350 2351 #define _mm512_cvt_roundpd_epi64(A, B) \ 2352 ((__m512i)__builtin_ia32_cvtpd2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B))) 2353 2354 #define _mm512_mask_cvt_roundpd_epi64(W, U, A, B) \ 2355 ((__m512i)__builtin_ia32_cvtpd2qq512_mask ((A), (__v8di)(W), (U), (B))) 2356 2357 #define _mm512_maskz_cvt_roundpd_epi64(U, A, B) \ 2358 ((__m512i)__builtin_ia32_cvtpd2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B))) 2359 2360 #define _mm512_cvt_roundpd_epu64(A, B) \ 2361 ((__m512i)__builtin_ia32_cvtpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B))) 2362 2363 #define _mm512_mask_cvt_roundpd_epu64(W, U, A, B) \ 2364 ((__m512i)__builtin_ia32_cvtpd2uqq512_mask ((A), (__v8di)(W), (U), (B))) 2365 2366 #define _mm512_maskz_cvt_roundpd_epu64(U, A, B) \ 2367 ((__m512i)__builtin_ia32_cvtpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B))) 2368 2369 #define _mm512_cvt_roundps_epi64(A, B) \ 2370 ((__m512i)__builtin_ia32_cvtps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B))) 2371 2372 #define _mm512_mask_cvt_roundps_epi64(W, U, A, B) \ 2373 ((__m512i)__builtin_ia32_cvtps2qq512_mask ((A), (__v8di)(W), (U), (B))) 2374 2375 #define _mm512_maskz_cvt_roundps_epi64(U, A, B) \ 2376 ((__m512i)__builtin_ia32_cvtps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B))) 2377 2378 #define _mm512_cvt_roundps_epu64(A, B) \ 2379 ((__m512i)__builtin_ia32_cvtps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B))) 2380 2381 #define _mm512_mask_cvt_roundps_epu64(W, U, A, B) \ 2382 ((__m512i)__builtin_ia32_cvtps2uqq512_mask ((A), (__v8di)(W), (U), (B))) 2383 2384 #define _mm512_maskz_cvt_roundps_epu64(U, A, B) \ 2385 ((__m512i)__builtin_ia32_cvtps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B))) 2386 2387 #define _mm512_cvt_roundepi64_ps(A, B) \ 2388 ((__m256)__builtin_ia32_cvtqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), -1, (B))) 2389 2390 #define _mm512_mask_cvt_roundepi64_ps(W, U, A, B) \ 2391 ((__m256)__builtin_ia32_cvtqq2ps512_mask ((__v8di)(A), (W), (U), (B))) 2392 2393 #define _mm512_maskz_cvt_roundepi64_ps(U, A, B) \ 2394 ((__m256)__builtin_ia32_cvtqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), (U), (B))) 2395 2396 #define _mm512_cvt_roundepu64_ps(A, B) \ 2397 ((__m256)__builtin_ia32_cvtuqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), -1, (B))) 2398 2399 #define _mm512_mask_cvt_roundepu64_ps(W, U, A, B) \ 2400 ((__m256)__builtin_ia32_cvtuqq2ps512_mask ((__v8di)(A), (W), (U), (B))) 2401 2402 #define _mm512_maskz_cvt_roundepu64_ps(U, A, B) \ 2403 ((__m256)__builtin_ia32_cvtuqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), (U), (B))) 2404 2405 #define _mm512_cvt_roundepi64_pd(A, B) \ 2406 ((__m512d)__builtin_ia32_cvtqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), -1, (B))) 2407 2408 #define _mm512_mask_cvt_roundepi64_pd(W, U, A, B) \ 2409 ((__m512d)__builtin_ia32_cvtqq2pd512_mask ((__v8di)(A), (W), (U), (B))) 2410 2411 #define _mm512_maskz_cvt_roundepi64_pd(U, A, B) \ 2412 ((__m512d)__builtin_ia32_cvtqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), (U), (B))) 2413 2414 #define _mm512_cvt_roundepu64_pd(A, B) \ 2415 ((__m512d)__builtin_ia32_cvtuqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), -1, (B))) 2416 2417 #define _mm512_mask_cvt_roundepu64_pd(W, U, A, B) \ 2418 ((__m512d)__builtin_ia32_cvtuqq2pd512_mask ((__v8di)(A), (W), (U), (B))) 2419 2420 #define _mm512_maskz_cvt_roundepu64_pd(U, A, B) \ 2421 ((__m512d)__builtin_ia32_cvtuqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), (U), (B))) 2422 2423 #define _mm512_reduce_pd(A, B) \ 2424 ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A), \ 2425 (int)(B), (__v8df)_mm512_setzero_pd (), (__mmask8)-1)) 2426 2427 #define _mm512_mask_reduce_pd(W, U, A, B) \ 2428 ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A), \ 2429 (int)(B), (__v8df)(__m512d)(W), (__mmask8)(U))) 2430 2431 #define _mm512_maskz_reduce_pd(U, A, B) \ 2432 ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A), \ 2433 (int)(B), (__v8df)_mm512_setzero_pd (), (__mmask8)(U))) 2434 2435 #define _mm512_reduce_ps(A, B) \ 2436 ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A), \ 2437 (int)(B), (__v16sf)_mm512_setzero_ps (), (__mmask16)-1)) 2438 2439 #define _mm512_mask_reduce_ps(W, U, A, B) \ 2440 ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A), \ 2441 (int)(B), (__v16sf)(__m512)(W), (__mmask16)(U))) 2442 2443 #define _mm512_maskz_reduce_ps(U, A, B) \ 2444 ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A), \ 2445 (int)(B), (__v16sf)_mm512_setzero_ps (), (__mmask16)(U))) 2446 2447 #define _mm512_extractf32x8_ps(X, C) \ 2448 ((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X), \ 2449 (int) (C), (__v8sf)(__m256) _mm256_setzero_ps (), (__mmask8)-1)) 2450 2451 #define _mm512_mask_extractf32x8_ps(W, U, X, C) \ 2452 ((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X), \ 2453 (int) (C), (__v8sf)(__m256) (W), (__mmask8) (U))) 2454 2455 #define _mm512_maskz_extractf32x8_ps(U, X, C) \ 2456 ((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X), \ 2457 (int) (C), (__v8sf)(__m256) _mm256_setzero_ps (), (__mmask8) (U))) 2458 2459 #define _mm512_extractf64x2_pd(X, C) \ 2460 ((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\ 2461 (int) (C), (__v2df)(__m128d) _mm_setzero_pd (), (__mmask8)-1)) 2462 2463 #define _mm512_mask_extractf64x2_pd(W, U, X, C) \ 2464 ((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\ 2465 (int) (C), (__v2df)(__m128d) (W), (__mmask8) (U))) 2466 2467 #define _mm512_maskz_extractf64x2_pd(U, X, C) \ 2468 ((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\ 2469 (int) (C), (__v2df)(__m128d) _mm_setzero_pd (), (__mmask8) (U))) 2470 2471 #define _mm512_extracti32x8_epi32(X, C) \ 2472 ((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X), \ 2473 (int) (C), (__v8si)(__m256i) _mm256_setzero_si256 (), (__mmask8)-1)) 2474 2475 #define _mm512_mask_extracti32x8_epi32(W, U, X, C) \ 2476 ((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X), \ 2477 (int) (C), (__v8si)(__m256i) (W), (__mmask8) (U))) 2478 2479 #define _mm512_maskz_extracti32x8_epi32(U, X, C) \ 2480 ((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X), \ 2481 (int) (C), (__v8si)(__m256i) _mm256_setzero_si256 (), (__mmask8) (U))) 2482 2483 #define _mm512_extracti64x2_epi64(X, C) \ 2484 ((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\ 2485 (int) (C), (__v2di)(__m128i) _mm_setzero_si128 (), (__mmask8)-1)) 2486 2487 #define _mm512_mask_extracti64x2_epi64(W, U, X, C) \ 2488 ((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\ 2489 (int) (C), (__v2di)(__m128i) (W), (__mmask8) (U))) 2490 2491 #define _mm512_maskz_extracti64x2_epi64(U, X, C) \ 2492 ((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\ 2493 (int) (C), (__v2di)(__m128i) _mm_setzero_si128 (), (__mmask8) (U))) 2494 2495 #define _mm512_range_pd(A, B, C) \ 2496 ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \ 2497 (__v8df)(__m512d)(B), (int)(C), \ 2498 (__v8df)_mm512_setzero_pd (), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION)) 2499 2500 #define _mm512_mask_range_pd(W, U, A, B, C) \ 2501 ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \ 2502 (__v8df)(__m512d)(B), (int)(C), \ 2503 (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 2504 2505 #define _mm512_maskz_range_pd(U, A, B, C) \ 2506 ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \ 2507 (__v8df)(__m512d)(B), (int)(C), \ 2508 (__v8df)_mm512_setzero_pd (), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 2509 2510 #define _mm512_range_ps(A, B, C) \ 2511 ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \ 2512 (__v16sf)(__m512)(B), (int)(C), \ 2513 (__v16sf)_mm512_setzero_ps (), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION)) 2514 2515 #define _mm512_mask_range_ps(W, U, A, B, C) \ 2516 ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \ 2517 (__v16sf)(__m512)(B), (int)(C), \ 2518 (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) 2519 2520 #define _mm512_maskz_range_ps(U, A, B, C) \ 2521 ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \ 2522 (__v16sf)(__m512)(B), (int)(C), \ 2523 (__v16sf)_mm512_setzero_ps (), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) 2524 2525 #define _mm512_range_round_pd(A, B, C, R) \ 2526 ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \ 2527 (__v8df)(__m512d)(B), (int)(C), \ 2528 (__v8df)_mm512_setzero_pd (), (__mmask8)-1, (R))) 2529 2530 #define _mm512_mask_range_round_pd(W, U, A, B, C, R) \ 2531 ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \ 2532 (__v8df)(__m512d)(B), (int)(C), \ 2533 (__v8df)(__m512d)(W), (__mmask8)(U), (R))) 2534 2535 #define _mm512_maskz_range_round_pd(U, A, B, C, R) \ 2536 ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \ 2537 (__v8df)(__m512d)(B), (int)(C), \ 2538 (__v8df)_mm512_setzero_pd (), (__mmask8)(U), (R))) 2539 2540 #define _mm512_range_round_ps(A, B, C, R) \ 2541 ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \ 2542 (__v16sf)(__m512)(B), (int)(C), \ 2543 (__v16sf)_mm512_setzero_ps (), (__mmask16)-1, (R))) 2544 2545 #define _mm512_mask_range_round_ps(W, U, A, B, C, R) \ 2546 ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \ 2547 (__v16sf)(__m512)(B), (int)(C), \ 2548 (__v16sf)(__m512)(W), (__mmask16)(U), (R))) 2549 2550 #define _mm512_maskz_range_round_ps(U, A, B, C, R) \ 2551 ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \ 2552 (__v16sf)(__m512)(B), (int)(C), \ 2553 (__v16sf)_mm512_setzero_ps (), (__mmask16)(U), (R))) 2554 2555 #define _mm512_insertf64x2(X, Y, C) \ 2556 ((__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df)(__m512d) (X),\ 2557 (__v2df)(__m128d) (Y), (int) (C), (__v8df)(__m512d) (X), \ 2558 (__mmask8)-1)) 2559 2560 #define _mm512_mask_insertf64x2(W, U, X, Y, C) \ 2561 ((__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df)(__m512d) (X),\ 2562 (__v2df)(__m128d) (Y), (int) (C), (__v8df)(__m512d) (W), \ 2563 (__mmask8) (U))) 2564 2565 #define _mm512_maskz_insertf64x2(U, X, Y, C) \ 2566 ((__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df)(__m512d) (X),\ 2567 (__v2df)(__m128d) (Y), (int) (C), \ 2568 (__v8df)(__m512d) _mm512_setzero_pd (), (__mmask8) (U))) 2569 2570 #define _mm512_inserti64x2(X, Y, C) \ 2571 ((__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di)(__m512i) (X),\ 2572 (__v2di)(__m128i) (Y), (int) (C), (__v8di)(__m512i) (X), (__mmask8)-1)) 2573 2574 #define _mm512_mask_inserti64x2(W, U, X, Y, C) \ 2575 ((__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di)(__m512i) (X),\ 2576 (__v2di)(__m128i) (Y), (int) (C), (__v8di)(__m512i) (W), \ 2577 (__mmask8) (U))) 2578 2579 #define _mm512_maskz_inserti64x2(U, X, Y, C) \ 2580 ((__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di)(__m512i) (X),\ 2581 (__v2di)(__m128i) (Y), (int) (C), \ 2582 (__v8di)(__m512i) _mm512_setzero_si512 (), (__mmask8) (U))) 2583 2584 #define _mm512_insertf32x8(X, Y, C) \ 2585 ((__m512) __builtin_ia32_insertf32x8_mask ((__v16sf)(__m512) (X), \ 2586 (__v8sf)(__m256) (Y), (int) (C),\ 2587 (__v16sf)(__m512)_mm512_setzero_ps (),\ 2588 (__mmask16)-1)) 2589 2590 #define _mm512_mask_insertf32x8(W, U, X, Y, C) \ 2591 ((__m512) __builtin_ia32_insertf32x8_mask ((__v16sf)(__m512) (X), \ 2592 (__v8sf)(__m256) (Y), (int) (C),\ 2593 (__v16sf)(__m512)(W),\ 2594 (__mmask16)(U))) 2595 2596 #define _mm512_maskz_insertf32x8(U, X, Y, C) \ 2597 ((__m512) __builtin_ia32_insertf32x8_mask ((__v16sf)(__m512) (X), \ 2598 (__v8sf)(__m256) (Y), (int) (C),\ 2599 (__v16sf)(__m512)_mm512_setzero_ps (),\ 2600 (__mmask16)(U))) 2601 2602 #define _mm512_inserti32x8(X, Y, C) \ 2603 ((__m512i) __builtin_ia32_inserti32x8_mask ((__v16si)(__m512i) (X), \ 2604 (__v8si)(__m256i) (Y), (int) (C),\ 2605 (__v16si)(__m512i)_mm512_setzero_si512 (),\ 2606 (__mmask16)-1)) 2607 2608 #define _mm512_mask_inserti32x8(W, U, X, Y, C) \ 2609 ((__m512i) __builtin_ia32_inserti32x8_mask ((__v16si)(__m512i) (X), \ 2610 (__v8si)(__m256i) (Y), (int) (C),\ 2611 (__v16si)(__m512i)(W),\ 2612 (__mmask16)(U))) 2613 2614 #define _mm512_maskz_inserti32x8(U, X, Y, C) \ 2615 ((__m512i) __builtin_ia32_inserti32x8_mask ((__v16si)(__m512i) (X), \ 2616 (__v8si)(__m256i) (Y), (int) (C),\ 2617 (__v16si)(__m512i)_mm512_setzero_si512 (),\ 2618 (__mmask16)(U))) 2619 2620 #define _mm_fpclass_ss_mask(X, C) \ 2621 ((__mmask8) __builtin_ia32_fpclassss ((__v4sf) (__m128) (X), (int) (C))) \ 2622 2623 #define _mm_fpclass_sd_mask(X, C) \ 2624 ((__mmask8) __builtin_ia32_fpclasssd ((__v2df) (__m128d) (X), (int) (C))) \ 2625 2626 #define _mm512_mask_fpclass_pd_mask(u, X, C) \ 2627 ((__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) (__m512d) (X), \ 2628 (int) (C), (__mmask8)(u))) 2629 2630 #define _mm512_mask_fpclass_ps_mask(u, x, c) \ 2631 ((__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) (__m512) (x),\ 2632 (int) (c),(__mmask8)(u))) 2633 2634 #define _mm512_fpclass_pd_mask(X, C) \ 2635 ((__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) (__m512d) (X), \ 2636 (int) (C), (__mmask8)-1)) 2637 2638 #define _mm512_fpclass_ps_mask(x, c) \ 2639 ((__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) (__m512) (x),\ 2640 (int) (c),(__mmask8)-1)) 2641 2642 #define _mm_reduce_sd(A, B, C) \ 2643 ((__m128d) __builtin_ia32_reducesd_mask ((__v2df)(__m128d)(A), \ 2644 (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \ 2645 (__mmask8)-1)) 2646 2647 #define _mm_mask_reduce_sd(W, U, A, B, C) \ 2648 ((__m128d) __builtin_ia32_reducesd_mask ((__v2df)(__m128d)(A), \ 2649 (__v2df)(__m128d)(B), (int)(C), (__v2df)(__m128d)(W), (__mmask8)(U))) 2650 2651 #define _mm_maskz_reduce_sd(U, A, B, C) \ 2652 ((__m128d) __builtin_ia32_reducesd_mask ((__v2df)(__m128d)(A), \ 2653 (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (), \ 2654 (__mmask8)(U))) 2655 2656 #define _mm_reduce_ss(A, B, C) \ 2657 ((__m128) __builtin_ia32_reducess_mask ((__v4sf)(__m128)(A), \ 2658 (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \ 2659 (__mmask8)-1)) 2660 2661 #define _mm_mask_reduce_ss(W, U, A, B, C) \ 2662 ((__m128) __builtin_ia32_reducess_mask ((__v4sf)(__m128)(A), \ 2663 (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W), (__mmask8)(U))) 2664 2665 #define _mm_maskz_reduce_ss(U, A, B, C) \ 2666 ((__m128) __builtin_ia32_reducess_mask ((__v4sf)(__m128)(A), \ 2667 (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (), \ 2668 (__mmask8)(U))) 2669 2670 2671 2672 #endif 2673 2674 #ifdef __DISABLE_AVX512DQ__ 2675 #undef __DISABLE_AVX512DQ__ 2676 #pragma GCC pop_options 2677 #endif /* __DISABLE_AVX512DQ__ */ 2678 2679 #endif /* _AVX512DQINTRIN_H_INCLUDED */ 2680