1;; GCC machine description for SSE instructions 2;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 3;; Free Software Foundation, Inc. 4;; 5;; This file is part of GCC. 6;; 7;; GCC is free software; you can redistribute it and/or modify 8;; it under the terms of the GNU General Public License as published by 9;; the Free Software Foundation; either version 3, or (at your option) 10;; any later version. 11;; 12;; GCC is distributed in the hope that it will be useful, 13;; but WITHOUT ANY WARRANTY; without even the implied warranty of 14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15;; GNU General Public License for more details. 16;; 17;; You should have received a copy of the GNU General Public License 18;; along with GCC; see the file COPYING3. If not see 19;; <http://www.gnu.org/licenses/>. 20 21(define_c_enum "unspec" [ 22 ;; SSE 23 UNSPEC_MOVNT 24 UNSPEC_MOVU 25 26 ;; SSE3 27 UNSPEC_LDDQU 28 29 ;; SSSE3 30 UNSPEC_PSHUFB 31 UNSPEC_PSIGN 32 UNSPEC_PALIGNR 33 34 ;; For SSE4A support 35 UNSPEC_EXTRQI 36 UNSPEC_EXTRQ 37 UNSPEC_INSERTQI 38 UNSPEC_INSERTQ 39 40 ;; For SSE4.1 support 41 UNSPEC_BLENDV 42 UNSPEC_INSERTPS 43 UNSPEC_DP 44 UNSPEC_MOVNTDQA 45 UNSPEC_MPSADBW 46 UNSPEC_PHMINPOSUW 47 UNSPEC_PTEST 48 49 ;; For SSE4.2 support 50 UNSPEC_PCMPESTR 51 UNSPEC_PCMPISTR 52 53 ;; For FMA4 support 54 UNSPEC_FMADDSUB 55 UNSPEC_XOP_UNSIGNED_CMP 56 UNSPEC_XOP_TRUEFALSE 57 UNSPEC_XOP_PERMUTE 58 UNSPEC_FRCZ 59 60 ;; For AES support 61 UNSPEC_AESENC 62 UNSPEC_AESENCLAST 63 UNSPEC_AESDEC 64 UNSPEC_AESDECLAST 65 UNSPEC_AESIMC 66 UNSPEC_AESKEYGENASSIST 67 68 ;; For PCLMUL support 69 UNSPEC_PCLMUL 70 71 ;; For AVX support 72 UNSPEC_PCMP 73 UNSPEC_VPERMIL 74 UNSPEC_VPERMIL2 75 UNSPEC_VPERMIL2F128 76 UNSPEC_CAST 77 UNSPEC_VTESTP 78 UNSPEC_VCVTPH2PS 79 UNSPEC_VCVTPS2PH 80 81 ;; For AVX2 support 82 UNSPEC_VPERMSI 83 UNSPEC_VPERMDF 84 UNSPEC_VPERMSF 85 UNSPEC_VPERMTI 86 UNSPEC_GATHER 87 UNSPEC_VSIBADDR 88]) 89 90(define_c_enum "unspecv" [ 91 UNSPECV_LDMXCSR 92 UNSPECV_STMXCSR 93 UNSPECV_CLFLUSH 94 UNSPECV_MONITOR 95 UNSPECV_MWAIT 96 UNSPECV_VZEROALL 97 UNSPECV_VZEROUPPER 98]) 99 100;; All vector modes including V?TImode, used in move patterns. 101(define_mode_iterator V16 102 [(V32QI "TARGET_AVX") V16QI 103 (V16HI "TARGET_AVX") V8HI 104 (V8SI "TARGET_AVX") V4SI 105 (V4DI "TARGET_AVX") V2DI 106 (V2TI "TARGET_AVX") V1TI 107 (V8SF "TARGET_AVX") V4SF 108 (V4DF "TARGET_AVX") V2DF]) 109 110;; All vector modes 111(define_mode_iterator V 112 [(V32QI "TARGET_AVX") V16QI 113 (V16HI "TARGET_AVX") V8HI 114 (V8SI "TARGET_AVX") V4SI 115 (V4DI "TARGET_AVX") V2DI 116 (V8SF "TARGET_AVX") V4SF 117 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) 118 119;; All 128bit vector modes 120(define_mode_iterator V_128 121 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")]) 122 123;; All 256bit vector modes 124(define_mode_iterator V_256 125 [V32QI V16HI V8SI V4DI V8SF V4DF]) 126 127;; All vector float modes 128(define_mode_iterator VF 129 [(V8SF "TARGET_AVX") V4SF 130 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) 131 132;; All SFmode vector float modes 133(define_mode_iterator VF1 134 [(V8SF "TARGET_AVX") V4SF]) 135 136;; All DFmode vector float modes 137(define_mode_iterator VF2 138 [(V4DF "TARGET_AVX") V2DF]) 139 140;; All 128bit vector float modes 141(define_mode_iterator VF_128 142 [V4SF (V2DF "TARGET_SSE2")]) 143 144;; All 256bit vector float modes 145(define_mode_iterator VF_256 146 [V8SF V4DF]) 147 148;; All vector integer modes 149(define_mode_iterator VI 150 [(V32QI "TARGET_AVX") V16QI 151 (V16HI "TARGET_AVX") V8HI 152 (V8SI "TARGET_AVX") V4SI 153 (V4DI "TARGET_AVX") V2DI]) 154 155(define_mode_iterator VI_AVX2 156 [(V32QI "TARGET_AVX2") V16QI 157 (V16HI "TARGET_AVX2") V8HI 158 (V8SI "TARGET_AVX2") V4SI 159 (V4DI "TARGET_AVX2") V2DI]) 160 161;; All QImode vector integer modes 162(define_mode_iterator VI1 163 [(V32QI "TARGET_AVX") V16QI]) 164 165;; All DImode vector integer modes 166(define_mode_iterator VI8 167 [(V4DI "TARGET_AVX") V2DI]) 168 169(define_mode_iterator VI1_AVX2 170 [(V32QI "TARGET_AVX2") V16QI]) 171 172(define_mode_iterator VI2_AVX2 173 [(V16HI "TARGET_AVX2") V8HI]) 174 175(define_mode_iterator VI4_AVX2 176 [(V8SI "TARGET_AVX2") V4SI]) 177 178(define_mode_iterator VI8_AVX2 179 [(V4DI "TARGET_AVX2") V2DI]) 180 181;; ??? We should probably use TImode instead. 182(define_mode_iterator VIMAX_AVX2 183 [(V2TI "TARGET_AVX2") V1TI]) 184 185;; ??? This should probably be dropped in favor of VIMAX_AVX2. 186(define_mode_iterator SSESCALARMODE 187 [(V2TI "TARGET_AVX2") TI]) 188 189(define_mode_iterator VI12_AVX2 190 [(V32QI "TARGET_AVX2") V16QI 191 (V16HI "TARGET_AVX2") V8HI]) 192 193(define_mode_iterator VI24_AVX2 194 [(V16HI "TARGET_AVX2") V8HI 195 (V8SI "TARGET_AVX2") V4SI]) 196 197(define_mode_iterator VI124_AVX2 198 [(V32QI "TARGET_AVX2") V16QI 199 (V16HI "TARGET_AVX2") V8HI 200 (V8SI "TARGET_AVX2") V4SI]) 201 202(define_mode_iterator VI248_AVX2 203 [(V16HI "TARGET_AVX2") V8HI 204 (V8SI "TARGET_AVX2") V4SI 205 (V4DI "TARGET_AVX2") V2DI]) 206 207(define_mode_iterator VI48_AVX2 208 [(V8SI "TARGET_AVX2") V4SI 209 (V4DI "TARGET_AVX2") V2DI]) 210 211(define_mode_iterator V48_AVX2 212 [V4SF V2DF 213 V8SF V4DF 214 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2") 215 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")]) 216 217(define_mode_attr sse2_avx2 218 [(V16QI "sse2") (V32QI "avx2") 219 (V8HI "sse2") (V16HI "avx2") 220 (V4SI "sse2") (V8SI "avx2") 221 (V2DI "sse2") (V4DI "avx2") 222 (V1TI "sse2") (V2TI "avx2")]) 223 224(define_mode_attr ssse3_avx2 225 [(V16QI "ssse3") (V32QI "avx2") 226 (V8HI "ssse3") (V16HI "avx2") 227 (V4SI "ssse3") (V8SI "avx2") 228 (V2DI "ssse3") (V4DI "avx2") 229 (TI "ssse3") (V2TI "avx2")]) 230 231(define_mode_attr sse4_1_avx2 232 [(V16QI "sse4_1") (V32QI "avx2") 233 (V8HI "sse4_1") (V16HI "avx2") 234 (V4SI "sse4_1") (V8SI "avx2") 235 (V2DI "sse4_1") (V4DI "avx2")]) 236 237(define_mode_attr avx_avx2 238 [(V4SF "avx") (V2DF "avx") 239 (V8SF "avx") (V4DF "avx") 240 (V4SI "avx2") (V2DI "avx2") 241 (V8SI "avx2") (V4DI "avx2")]) 242 243(define_mode_attr vec_avx2 244 [(V16QI "vec") (V32QI "avx2") 245 (V8HI "vec") (V16HI "avx2") 246 (V4SI "vec") (V8SI "avx2") 247 (V2DI "vec") (V4DI "avx2")]) 248 249(define_mode_attr ssedoublemode 250 [(V16HI "V16SI") (V8HI "V8SI")]) 251 252(define_mode_attr ssebytemode 253 [(V4DI "V32QI") (V2DI "V16QI")]) 254 255;; All 128bit vector integer modes 256(define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI]) 257 258;; All 256bit vector integer modes 259(define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI]) 260 261;; Random 128bit vector integer mode combinations 262(define_mode_iterator VI12_128 [V16QI V8HI]) 263(define_mode_iterator VI14_128 [V16QI V4SI]) 264(define_mode_iterator VI124_128 [V16QI V8HI V4SI]) 265(define_mode_iterator VI128_128 [V16QI V8HI V2DI]) 266(define_mode_iterator VI24_128 [V8HI V4SI]) 267(define_mode_iterator VI248_128 [V8HI V4SI V2DI]) 268(define_mode_iterator VI48_128 [V4SI V2DI]) 269 270;; Random 256bit vector integer mode combinations 271(define_mode_iterator VI124_256 [V32QI V16HI V8SI]) 272(define_mode_iterator VI48_256 [V8SI V4DI]) 273 274;; Int-float size matches 275(define_mode_iterator VI4F_128 [V4SI V4SF]) 276(define_mode_iterator VI8F_128 [V2DI V2DF]) 277(define_mode_iterator VI4F_256 [V8SI V8SF]) 278(define_mode_iterator VI8F_256 [V4DI V4DF]) 279 280;; Mapping from float mode to required SSE level 281(define_mode_attr sse 282 [(SF "sse") (DF "sse2") 283 (V4SF "sse") (V2DF "sse2") 284 (V8SF "avx") (V4DF "avx")]) 285 286(define_mode_attr sse2 287 [(V16QI "sse2") (V32QI "avx") 288 (V2DI "sse2") (V4DI "avx")]) 289 290(define_mode_attr sse3 291 [(V16QI "sse3") (V32QI "avx")]) 292 293(define_mode_attr sse4_1 294 [(V4SF "sse4_1") (V2DF "sse4_1") 295 (V8SF "avx") (V4DF "avx")]) 296 297(define_mode_attr avxsizesuffix 298 [(V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256") 299 (V16QI "") (V8HI "") (V4SI "") (V2DI "") 300 (V8SF "256") (V4DF "256") 301 (V4SF "") (V2DF "")]) 302 303;; SSE instruction mode 304(define_mode_attr sseinsnmode 305 [(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI") 306 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI") 307 (V8SF "V8SF") (V4DF "V4DF") 308 (V4SF "V4SF") (V2DF "V2DF") 309 (TI "TI")]) 310 311;; Mapping of vector float modes to an integer mode of the same size 312(define_mode_attr sseintvecmode 313 [(V8SF "V8SI") (V4DF "V4DI") 314 (V4SF "V4SI") (V2DF "V2DI") 315 (V8SI "V8SI") (V4DI "V4DI") 316 (V4SI "V4SI") (V2DI "V2DI") 317 (V16HI "V16HI") (V8HI "V8HI") 318 (V32QI "V32QI") (V16QI "V16QI")]) 319 320(define_mode_attr sseintvecmodelower 321 [(V8SF "v8si") (V4DF "v4di") 322 (V4SF "v4si") (V2DF "v2di") 323 (V8SI "v8si") (V4DI "v4di") 324 (V4SI "v4si") (V2DI "v2di") 325 (V16HI "v16hi") (V8HI "v8hi") 326 (V32QI "v32qi") (V16QI "v16qi")]) 327 328;; Mapping of vector modes to a vector mode of double size 329(define_mode_attr ssedoublevecmode 330 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI") 331 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI") 332 (V8SF "V16SF") (V4DF "V8DF") 333 (V4SF "V8SF") (V2DF "V4DF")]) 334 335;; Mapping of vector modes to a vector mode of half size 336(define_mode_attr ssehalfvecmode 337 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI") 338 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI") 339 (V8SF "V4SF") (V4DF "V2DF") 340 (V4SF "V2SF")]) 341 342;; Mapping of vector modes back to the scalar modes 343(define_mode_attr ssescalarmode 344 [(V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI") 345 (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI") 346 (V8SF "SF") (V4DF "DF") 347 (V4SF "SF") (V2DF "DF")]) 348 349;; Number of scalar elements in each vector type 350(define_mode_attr ssescalarnum 351 [(V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4") 352 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2") 353 (V8SF "8") (V4DF "4") 354 (V4SF "4") (V2DF "2")]) 355 356;; SSE prefix for integer vector modes 357(define_mode_attr sseintprefix 358 [(V2DI "p") (V2DF "") 359 (V4DI "p") (V4DF "") 360 (V4SI "p") (V4SF "") 361 (V8SI "p") (V8SF "")]) 362 363;; SSE scalar suffix for vector modes 364(define_mode_attr ssescalarmodesuffix 365 [(SF "ss") (DF "sd") 366 (V8SF "ss") (V4DF "sd") 367 (V4SF "ss") (V2DF "sd") 368 (V8SI "ss") (V4DI "sd") 369 (V4SI "d")]) 370 371;; Pack/unpack vector modes 372(define_mode_attr sseunpackmode 373 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI") 374 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")]) 375 376(define_mode_attr ssepackmode 377 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI") 378 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")]) 379 380;; Mapping of the max integer size for xop rotate immediate constraint 381(define_mode_attr sserotatemax 382 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")]) 383 384;; Mapping of mode to cast intrinsic name 385(define_mode_attr castmode [(V8SI "si") (V8SF "ps") (V4DF "pd")]) 386 387;; Instruction suffix for sign and zero extensions. 388(define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")]) 389 390;; i128 for integer vectors and TARGET_AVX2, f128 otherwise. 391(define_mode_attr i128 392 [(V8SF "f128") (V4DF "f128") (V32QI "%~128") (V16HI "%~128") 393 (V8SI "%~128") (V4DI "%~128")]) 394 395;; Mix-n-match 396(define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF]) 397 398;; Mapping of immediate bits for blend instructions 399(define_mode_attr blendbits 400 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")]) 401 402;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics. 403 404;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 405;; 406;; Move patterns 407;; 408;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 409 410;; All of these patterns are enabled for SSE1 as well as SSE2. 411;; This is essential for maintaining stable calling conventions. 412 413(define_expand "mov<mode>" 414 [(set (match_operand:V16 0 "nonimmediate_operand" "") 415 (match_operand:V16 1 "nonimmediate_operand" ""))] 416 "TARGET_SSE" 417{ 418 ix86_expand_vector_move (<MODE>mode, operands); 419 DONE; 420}) 421 422(define_insn "*mov<mode>_internal" 423 [(set (match_operand:V16 0 "nonimmediate_operand" "=x,x ,m") 424 (match_operand:V16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))] 425 "TARGET_SSE 426 && (register_operand (operands[0], <MODE>mode) 427 || register_operand (operands[1], <MODE>mode))" 428{ 429 switch (which_alternative) 430 { 431 case 0: 432 return standard_sse_constant_opcode (insn, operands[1]); 433 case 1: 434 case 2: 435 switch (get_attr_mode (insn)) 436 { 437 case MODE_V8SF: 438 case MODE_V4SF: 439 if (TARGET_AVX 440 && (misaligned_operand (operands[0], <MODE>mode) 441 || misaligned_operand (operands[1], <MODE>mode))) 442 return "vmovups\t{%1, %0|%0, %1}"; 443 else 444 return "%vmovaps\t{%1, %0|%0, %1}"; 445 446 case MODE_V4DF: 447 case MODE_V2DF: 448 if (TARGET_AVX 449 && (misaligned_operand (operands[0], <MODE>mode) 450 || misaligned_operand (operands[1], <MODE>mode))) 451 return "vmovupd\t{%1, %0|%0, %1}"; 452 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) 453 return "%vmovaps\t{%1, %0|%0, %1}"; 454 else 455 return "%vmovapd\t{%1, %0|%0, %1}"; 456 457 case MODE_OI: 458 case MODE_TI: 459 if (TARGET_AVX 460 && (misaligned_operand (operands[0], <MODE>mode) 461 || misaligned_operand (operands[1], <MODE>mode))) 462 return "vmovdqu\t{%1, %0|%0, %1}"; 463 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) 464 return "%vmovaps\t{%1, %0|%0, %1}"; 465 else 466 return "%vmovdqa\t{%1, %0|%0, %1}"; 467 468 default: 469 gcc_unreachable (); 470 } 471 default: 472 gcc_unreachable (); 473 } 474} 475 [(set_attr "type" "sselog1,ssemov,ssemov") 476 (set_attr "prefix" "maybe_vex") 477 (set (attr "mode") 478 (cond [(match_test "TARGET_AVX") 479 (const_string "<sseinsnmode>") 480 (ior (ior (match_test "optimize_function_for_size_p (cfun)") 481 (not (match_test "TARGET_SSE2"))) 482 (and (eq_attr "alternative" "2") 483 (match_test "TARGET_SSE_TYPELESS_STORES"))) 484 (const_string "V4SF") 485 (eq (const_string "<MODE>mode") (const_string "V4SFmode")) 486 (const_string "V4SF") 487 (eq (const_string "<MODE>mode") (const_string "V2DFmode")) 488 (const_string "V2DF") 489 ] 490 (const_string "TI")))]) 491 492(define_insn "sse2_movq128" 493 [(set (match_operand:V2DI 0 "register_operand" "=x") 494 (vec_concat:V2DI 495 (vec_select:DI 496 (match_operand:V2DI 1 "nonimmediate_operand" "xm") 497 (parallel [(const_int 0)])) 498 (const_int 0)))] 499 "TARGET_SSE2" 500 "%vmovq\t{%1, %0|%0, %1}" 501 [(set_attr "type" "ssemov") 502 (set_attr "prefix" "maybe_vex") 503 (set_attr "mode" "TI")]) 504 505;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm. 506;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded 507;; from memory, we'd prefer to load the memory directly into the %xmm 508;; register. To facilitate this happy circumstance, this pattern won't 509;; split until after register allocation. If the 64-bit value didn't 510;; come from memory, this is the best we can do. This is much better 511;; than storing %edx:%eax into a stack temporary and loading an %xmm 512;; from there. 513 514(define_insn_and_split "movdi_to_sse" 515 [(parallel 516 [(set (match_operand:V4SI 0 "register_operand" "=?x,x") 517 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0)) 518 (clobber (match_scratch:V4SI 2 "=&x,X"))])] 519 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES" 520 "#" 521 "&& reload_completed" 522 [(const_int 0)] 523{ 524 if (register_operand (operands[1], DImode)) 525 { 526 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax). 527 Assemble the 64-bit DImode value in an xmm register. */ 528 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode), 529 gen_rtx_SUBREG (SImode, operands[1], 0))); 530 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode), 531 gen_rtx_SUBREG (SImode, operands[1], 4))); 532 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0], 533 operands[2])); 534 } 535 else if (memory_operand (operands[1], DImode)) 536 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), 537 operands[1], const0_rtx)); 538 else 539 gcc_unreachable (); 540}) 541 542(define_split 543 [(set (match_operand:V4SF 0 "register_operand" "") 544 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))] 545 "TARGET_SSE && reload_completed" 546 [(set (match_dup 0) 547 (vec_merge:V4SF 548 (vec_duplicate:V4SF (match_dup 1)) 549 (match_dup 2) 550 (const_int 1)))] 551{ 552 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0); 553 operands[2] = CONST0_RTX (V4SFmode); 554}) 555 556(define_split 557 [(set (match_operand:V2DF 0 "register_operand" "") 558 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))] 559 "TARGET_SSE2 && reload_completed" 560 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))] 561{ 562 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0); 563 operands[2] = CONST0_RTX (DFmode); 564}) 565 566(define_expand "push<mode>1" 567 [(match_operand:V16 0 "register_operand" "")] 568 "TARGET_SSE" 569{ 570 ix86_expand_push (<MODE>mode, operands[0]); 571 DONE; 572}) 573 574(define_expand "movmisalign<mode>" 575 [(set (match_operand:V16 0 "nonimmediate_operand" "") 576 (match_operand:V16 1 "nonimmediate_operand" ""))] 577 "TARGET_SSE" 578{ 579 ix86_expand_vector_move_misalign (<MODE>mode, operands); 580 DONE; 581}) 582 583(define_insn "<sse>_movu<ssemodesuffix><avxsizesuffix>" 584 [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m") 585 (unspec:VF 586 [(match_operand:VF 1 "nonimmediate_operand" "xm,x")] 587 UNSPEC_MOVU))] 588 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 589 "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}" 590 [(set_attr "type" "ssemov") 591 (set_attr "movu" "1") 592 (set_attr "prefix" "maybe_vex") 593 (set_attr "mode" "<MODE>")]) 594 595(define_insn "<sse2>_movdqu<avxsizesuffix>" 596 [(set (match_operand:VI1 0 "nonimmediate_operand" "=x,m") 597 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")] 598 UNSPEC_MOVU))] 599 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 600 "%vmovdqu\t{%1, %0|%0, %1}" 601 [(set_attr "type" "ssemov") 602 (set_attr "movu" "1") 603 (set (attr "prefix_data16") 604 (if_then_else 605 (match_test "TARGET_AVX") 606 (const_string "*") 607 (const_string "1"))) 608 (set_attr "prefix" "maybe_vex") 609 (set_attr "mode" "<sseinsnmode>")]) 610 611(define_insn "<sse3>_lddqu<avxsizesuffix>" 612 [(set (match_operand:VI1 0 "register_operand" "=x") 613 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")] 614 UNSPEC_LDDQU))] 615 "TARGET_SSE3" 616 "%vlddqu\t{%1, %0|%0, %1}" 617 [(set_attr "type" "ssemov") 618 (set_attr "movu" "1") 619 (set (attr "prefix_data16") 620 (if_then_else 621 (match_test "TARGET_AVX") 622 (const_string "*") 623 (const_string "0"))) 624 (set (attr "prefix_rep") 625 (if_then_else 626 (match_test "TARGET_AVX") 627 (const_string "*") 628 (const_string "1"))) 629 (set_attr "prefix" "maybe_vex") 630 (set_attr "mode" "<sseinsnmode>")]) 631 632(define_insn "sse2_movnti<mode>" 633 [(set (match_operand:SWI48 0 "memory_operand" "=m") 634 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")] 635 UNSPEC_MOVNT))] 636 "TARGET_SSE2" 637 "movnti\t{%1, %0|%0, %1}" 638 [(set_attr "type" "ssemov") 639 (set_attr "prefix_data16" "0") 640 (set_attr "mode" "<MODE>")]) 641 642(define_insn "<sse>_movnt<mode>" 643 [(set (match_operand:VF 0 "memory_operand" "=m") 644 (unspec:VF [(match_operand:VF 1 "register_operand" "x")] 645 UNSPEC_MOVNT))] 646 "TARGET_SSE" 647 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}" 648 [(set_attr "type" "ssemov") 649 (set_attr "prefix" "maybe_vex") 650 (set_attr "mode" "<MODE>")]) 651 652(define_insn "<sse2>_movnt<mode>" 653 [(set (match_operand:VI8 0 "memory_operand" "=m") 654 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "x")] 655 UNSPEC_MOVNT))] 656 "TARGET_SSE2" 657 "%vmovntdq\t{%1, %0|%0, %1}" 658 [(set_attr "type" "ssecvt") 659 (set (attr "prefix_data16") 660 (if_then_else 661 (match_test "TARGET_AVX") 662 (const_string "*") 663 (const_string "1"))) 664 (set_attr "prefix" "maybe_vex") 665 (set_attr "mode" "<sseinsnmode>")]) 666 667; Expand patterns for non-temporal stores. At the moment, only those 668; that directly map to insns are defined; it would be possible to 669; define patterns for other modes that would expand to several insns. 670 671;; Modes handled by storent patterns. 672(define_mode_iterator STORENT_MODE 673 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2") 674 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A") 675 (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2") 676 (V8SF "TARGET_AVX") V4SF 677 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) 678 679(define_expand "storent<mode>" 680 [(set (match_operand:STORENT_MODE 0 "memory_operand" "") 681 (unspec:STORENT_MODE 682 [(match_operand:STORENT_MODE 1 "register_operand" "")] 683 UNSPEC_MOVNT))] 684 "TARGET_SSE") 685 686;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 687;; 688;; Parallel floating point arithmetic 689;; 690;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 691 692(define_expand "<code><mode>2" 693 [(set (match_operand:VF 0 "register_operand" "") 694 (absneg:VF 695 (match_operand:VF 1 "register_operand" "")))] 696 "TARGET_SSE" 697 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;") 698 699(define_insn_and_split "*absneg<mode>2" 700 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x") 701 (match_operator:VF 3 "absneg_operator" 702 [(match_operand:VF 1 "nonimmediate_operand" "0, xm,x, m")])) 703 (use (match_operand:VF 2 "nonimmediate_operand" "xm,0, xm,x"))] 704 "TARGET_SSE" 705 "#" 706 "&& reload_completed" 707 [(const_int 0)] 708{ 709 enum rtx_code absneg_op; 710 rtx op1, op2; 711 rtx t; 712 713 if (TARGET_AVX) 714 { 715 if (MEM_P (operands[1])) 716 op1 = operands[2], op2 = operands[1]; 717 else 718 op1 = operands[1], op2 = operands[2]; 719 } 720 else 721 { 722 op1 = operands[0]; 723 if (rtx_equal_p (operands[0], operands[1])) 724 op2 = operands[2]; 725 else 726 op2 = operands[1]; 727 } 728 729 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND; 730 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2); 731 t = gen_rtx_SET (VOIDmode, operands[0], t); 732 emit_insn (t); 733 DONE; 734} 735 [(set_attr "isa" "noavx,noavx,avx,avx")]) 736 737(define_expand "<plusminus_insn><mode>3" 738 [(set (match_operand:VF 0 "register_operand" "") 739 (plusminus:VF 740 (match_operand:VF 1 "nonimmediate_operand" "") 741 (match_operand:VF 2 "nonimmediate_operand" "")))] 742 "TARGET_SSE" 743 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") 744 745(define_insn "*<plusminus_insn><mode>3" 746 [(set (match_operand:VF 0 "register_operand" "=x,x") 747 (plusminus:VF 748 (match_operand:VF 1 "nonimmediate_operand" "<comm>0,x") 749 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))] 750 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" 751 "@ 752 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2} 753 v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 754 [(set_attr "isa" "noavx,avx") 755 (set_attr "type" "sseadd") 756 (set_attr "prefix" "orig,vex") 757 (set_attr "mode" "<MODE>")]) 758 759(define_insn "<sse>_vm<plusminus_insn><mode>3" 760 [(set (match_operand:VF_128 0 "register_operand" "=x,x") 761 (vec_merge:VF_128 762 (plusminus:VF_128 763 (match_operand:VF_128 1 "register_operand" "0,x") 764 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")) 765 (match_dup 1) 766 (const_int 1)))] 767 "TARGET_SSE" 768 "@ 769 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2} 770 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}" 771 [(set_attr "isa" "noavx,avx") 772 (set_attr "type" "sseadd") 773 (set_attr "prefix" "orig,vex") 774 (set_attr "mode" "<ssescalarmode>")]) 775 776(define_expand "mul<mode>3" 777 [(set (match_operand:VF 0 "register_operand" "") 778 (mult:VF 779 (match_operand:VF 1 "nonimmediate_operand" "") 780 (match_operand:VF 2 "nonimmediate_operand" "")))] 781 "TARGET_SSE" 782 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);") 783 784(define_insn "*mul<mode>3" 785 [(set (match_operand:VF 0 "register_operand" "=x,x") 786 (mult:VF 787 (match_operand:VF 1 "nonimmediate_operand" "%0,x") 788 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))] 789 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)" 790 "@ 791 mul<ssemodesuffix>\t{%2, %0|%0, %2} 792 vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 793 [(set_attr "isa" "noavx,avx") 794 (set_attr "type" "ssemul") 795 (set_attr "prefix" "orig,vex") 796 (set_attr "mode" "<MODE>")]) 797 798(define_insn "<sse>_vmmul<mode>3" 799 [(set (match_operand:VF_128 0 "register_operand" "=x,x") 800 (vec_merge:VF_128 801 (mult:VF_128 802 (match_operand:VF_128 1 "register_operand" "0,x") 803 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")) 804 (match_dup 1) 805 (const_int 1)))] 806 "TARGET_SSE" 807 "@ 808 mul<ssescalarmodesuffix>\t{%2, %0|%0, %2} 809 vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}" 810 [(set_attr "isa" "noavx,avx") 811 (set_attr "type" "ssemul") 812 (set_attr "prefix" "orig,vex") 813 (set_attr "mode" "<ssescalarmode>")]) 814 815(define_expand "div<mode>3" 816 [(set (match_operand:VF2 0 "register_operand" "") 817 (div:VF2 (match_operand:VF2 1 "register_operand" "") 818 (match_operand:VF2 2 "nonimmediate_operand" "")))] 819 "TARGET_SSE2" 820 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);") 821 822(define_expand "div<mode>3" 823 [(set (match_operand:VF1 0 "register_operand" "") 824 (div:VF1 (match_operand:VF1 1 "register_operand" "") 825 (match_operand:VF1 2 "nonimmediate_operand" "")))] 826 "TARGET_SSE" 827{ 828 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands); 829 830 if (TARGET_SSE_MATH 831 && TARGET_RECIP_VEC_DIV 832 && !optimize_insn_for_size_p () 833 && flag_finite_math_only && !flag_trapping_math 834 && flag_unsafe_math_optimizations) 835 { 836 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode); 837 DONE; 838 } 839}) 840 841(define_insn "<sse>_div<mode>3" 842 [(set (match_operand:VF 0 "register_operand" "=x,x") 843 (div:VF 844 (match_operand:VF 1 "register_operand" "0,x") 845 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))] 846 "TARGET_SSE" 847 "@ 848 div<ssemodesuffix>\t{%2, %0|%0, %2} 849 vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 850 [(set_attr "isa" "noavx,avx") 851 (set_attr "type" "ssediv") 852 (set_attr "prefix" "orig,vex") 853 (set_attr "mode" "<MODE>")]) 854 855(define_insn "<sse>_vmdiv<mode>3" 856 [(set (match_operand:VF_128 0 "register_operand" "=x,x") 857 (vec_merge:VF_128 858 (div:VF_128 859 (match_operand:VF_128 1 "register_operand" "0,x") 860 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")) 861 (match_dup 1) 862 (const_int 1)))] 863 "TARGET_SSE" 864 "@ 865 div<ssescalarmodesuffix>\t{%2, %0|%0, %2} 866 vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}" 867 [(set_attr "isa" "noavx,avx") 868 (set_attr "type" "ssediv") 869 (set_attr "prefix" "orig,vex") 870 (set_attr "mode" "<ssescalarmode>")]) 871 872(define_insn "<sse>_rcp<mode>2" 873 [(set (match_operand:VF1 0 "register_operand" "=x") 874 (unspec:VF1 875 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))] 876 "TARGET_SSE" 877 "%vrcpps\t{%1, %0|%0, %1}" 878 [(set_attr "type" "sse") 879 (set_attr "atom_sse_attr" "rcp") 880 (set_attr "prefix" "maybe_vex") 881 (set_attr "mode" "<MODE>")]) 882 883(define_insn "sse_vmrcpv4sf2" 884 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 885 (vec_merge:V4SF 886 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")] 887 UNSPEC_RCP) 888 (match_operand:V4SF 2 "register_operand" "0,x") 889 (const_int 1)))] 890 "TARGET_SSE" 891 "@ 892 rcpss\t{%1, %0|%0, %1} 893 vrcpss\t{%1, %2, %0|%0, %2, %1}" 894 [(set_attr "isa" "noavx,avx") 895 (set_attr "type" "sse") 896 (set_attr "atom_sse_attr" "rcp") 897 (set_attr "prefix" "orig,vex") 898 (set_attr "mode" "SF")]) 899 900(define_expand "sqrt<mode>2" 901 [(set (match_operand:VF2 0 "register_operand" "") 902 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand" "")))] 903 "TARGET_SSE2") 904 905(define_expand "sqrt<mode>2" 906 [(set (match_operand:VF1 0 "register_operand" "") 907 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand" "")))] 908 "TARGET_SSE" 909{ 910 if (TARGET_SSE_MATH 911 && TARGET_RECIP_VEC_SQRT 912 && !optimize_insn_for_size_p () 913 && flag_finite_math_only && !flag_trapping_math 914 && flag_unsafe_math_optimizations) 915 { 916 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false); 917 DONE; 918 } 919}) 920 921(define_insn "<sse>_sqrt<mode>2" 922 [(set (match_operand:VF 0 "register_operand" "=x") 923 (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "xm")))] 924 "TARGET_SSE" 925 "%vsqrt<ssemodesuffix>\t{%1, %0|%0, %1}" 926 [(set_attr "type" "sse") 927 (set_attr "atom_sse_attr" "sqrt") 928 (set_attr "prefix" "maybe_vex") 929 (set_attr "mode" "<MODE>")]) 930 931(define_insn "<sse>_vmsqrt<mode>2" 932 [(set (match_operand:VF_128 0 "register_operand" "=x,x") 933 (vec_merge:VF_128 934 (sqrt:VF_128 935 (match_operand:VF_128 1 "nonimmediate_operand" "xm,xm")) 936 (match_operand:VF_128 2 "register_operand" "0,x") 937 (const_int 1)))] 938 "TARGET_SSE" 939 "@ 940 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1} 941 vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}" 942 [(set_attr "isa" "noavx,avx") 943 (set_attr "type" "sse") 944 (set_attr "atom_sse_attr" "sqrt") 945 (set_attr "prefix" "orig,vex") 946 (set_attr "mode" "<ssescalarmode>")]) 947 948(define_expand "rsqrt<mode>2" 949 [(set (match_operand:VF1 0 "register_operand" "") 950 (unspec:VF1 951 [(match_operand:VF1 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))] 952 "TARGET_SSE_MATH" 953{ 954 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true); 955 DONE; 956}) 957 958(define_insn "<sse>_rsqrt<mode>2" 959 [(set (match_operand:VF1 0 "register_operand" "=x") 960 (unspec:VF1 961 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))] 962 "TARGET_SSE" 963 "%vrsqrtps\t{%1, %0|%0, %1}" 964 [(set_attr "type" "sse") 965 (set_attr "prefix" "maybe_vex") 966 (set_attr "mode" "<MODE>")]) 967 968(define_insn "sse_vmrsqrtv4sf2" 969 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 970 (vec_merge:V4SF 971 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")] 972 UNSPEC_RSQRT) 973 (match_operand:V4SF 2 "register_operand" "0,x") 974 (const_int 1)))] 975 "TARGET_SSE" 976 "@ 977 rsqrtss\t{%1, %0|%0, %1} 978 vrsqrtss\t{%1, %2, %0|%0, %2, %1}" 979 [(set_attr "isa" "noavx,avx") 980 (set_attr "type" "sse") 981 (set_attr "prefix" "orig,vex") 982 (set_attr "mode" "SF")]) 983 984;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX 985;; isn't really correct, as those rtl operators aren't defined when 986;; applied to NaNs. Hopefully the optimizers won't get too smart on us. 987 988(define_expand "<code><mode>3" 989 [(set (match_operand:VF 0 "register_operand" "") 990 (smaxmin:VF 991 (match_operand:VF 1 "nonimmediate_operand" "") 992 (match_operand:VF 2 "nonimmediate_operand" "")))] 993 "TARGET_SSE" 994{ 995 if (!flag_finite_math_only) 996 operands[1] = force_reg (<MODE>mode, operands[1]); 997 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands); 998}) 999 1000(define_insn "*<code><mode>3_finite" 1001 [(set (match_operand:VF 0 "register_operand" "=x,x") 1002 (smaxmin:VF 1003 (match_operand:VF 1 "nonimmediate_operand" "%0,x") 1004 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))] 1005 "TARGET_SSE && flag_finite_math_only 1006 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" 1007 "@ 1008 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2} 1009 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 1010 [(set_attr "isa" "noavx,avx") 1011 (set_attr "type" "sseadd") 1012 (set_attr "prefix" "orig,vex") 1013 (set_attr "mode" "<MODE>")]) 1014 1015(define_insn "*<code><mode>3" 1016 [(set (match_operand:VF 0 "register_operand" "=x,x") 1017 (smaxmin:VF 1018 (match_operand:VF 1 "register_operand" "0,x") 1019 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))] 1020 "TARGET_SSE && !flag_finite_math_only" 1021 "@ 1022 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2} 1023 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 1024 [(set_attr "isa" "noavx,avx") 1025 (set_attr "type" "sseadd") 1026 (set_attr "prefix" "orig,vex") 1027 (set_attr "mode" "<MODE>")]) 1028 1029(define_insn "<sse>_vm<code><mode>3" 1030 [(set (match_operand:VF_128 0 "register_operand" "=x,x") 1031 (vec_merge:VF_128 1032 (smaxmin:VF_128 1033 (match_operand:VF_128 1 "register_operand" "0,x") 1034 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")) 1035 (match_dup 1) 1036 (const_int 1)))] 1037 "TARGET_SSE" 1038 "@ 1039 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2} 1040 v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}" 1041 [(set_attr "isa" "noavx,avx") 1042 (set_attr "type" "sse") 1043 (set_attr "prefix" "orig,vex") 1044 (set_attr "mode" "<ssescalarmode>")]) 1045 1046;; These versions of the min/max patterns implement exactly the operations 1047;; min = (op1 < op2 ? op1 : op2) 1048;; max = (!(op1 < op2) ? op1 : op2) 1049;; Their operands are not commutative, and thus they may be used in the 1050;; presence of -0.0 and NaN. 1051 1052(define_insn "*ieee_smin<mode>3" 1053 [(set (match_operand:VF 0 "register_operand" "=x,x") 1054 (unspec:VF 1055 [(match_operand:VF 1 "register_operand" "0,x") 1056 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")] 1057 UNSPEC_IEEE_MIN))] 1058 "TARGET_SSE" 1059 "@ 1060 min<ssemodesuffix>\t{%2, %0|%0, %2} 1061 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 1062 [(set_attr "isa" "noavx,avx") 1063 (set_attr "type" "sseadd") 1064 (set_attr "prefix" "orig,vex") 1065 (set_attr "mode" "<MODE>")]) 1066 1067(define_insn "*ieee_smax<mode>3" 1068 [(set (match_operand:VF 0 "register_operand" "=x,x") 1069 (unspec:VF 1070 [(match_operand:VF 1 "register_operand" "0,x") 1071 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")] 1072 UNSPEC_IEEE_MAX))] 1073 "TARGET_SSE" 1074 "@ 1075 max<ssemodesuffix>\t{%2, %0|%0, %2} 1076 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 1077 [(set_attr "isa" "noavx,avx") 1078 (set_attr "type" "sseadd") 1079 (set_attr "prefix" "orig,vex") 1080 (set_attr "mode" "<MODE>")]) 1081 1082(define_insn "avx_addsubv4df3" 1083 [(set (match_operand:V4DF 0 "register_operand" "=x") 1084 (vec_merge:V4DF 1085 (plus:V4DF 1086 (match_operand:V4DF 1 "register_operand" "x") 1087 (match_operand:V4DF 2 "nonimmediate_operand" "xm")) 1088 (minus:V4DF (match_dup 1) (match_dup 2)) 1089 (const_int 10)))] 1090 "TARGET_AVX" 1091 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}" 1092 [(set_attr "type" "sseadd") 1093 (set_attr "prefix" "vex") 1094 (set_attr "mode" "V4DF")]) 1095 1096(define_insn "sse3_addsubv2df3" 1097 [(set (match_operand:V2DF 0 "register_operand" "=x,x") 1098 (vec_merge:V2DF 1099 (plus:V2DF 1100 (match_operand:V2DF 1 "register_operand" "0,x") 1101 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")) 1102 (minus:V2DF (match_dup 1) (match_dup 2)) 1103 (const_int 2)))] 1104 "TARGET_SSE3" 1105 "@ 1106 addsubpd\t{%2, %0|%0, %2} 1107 vaddsubpd\t{%2, %1, %0|%0, %1, %2}" 1108 [(set_attr "isa" "noavx,avx") 1109 (set_attr "type" "sseadd") 1110 (set_attr "atom_unit" "complex") 1111 (set_attr "prefix" "orig,vex") 1112 (set_attr "mode" "V2DF")]) 1113 1114(define_insn "avx_addsubv8sf3" 1115 [(set (match_operand:V8SF 0 "register_operand" "=x") 1116 (vec_merge:V8SF 1117 (plus:V8SF 1118 (match_operand:V8SF 1 "register_operand" "x") 1119 (match_operand:V8SF 2 "nonimmediate_operand" "xm")) 1120 (minus:V8SF (match_dup 1) (match_dup 2)) 1121 (const_int 170)))] 1122 "TARGET_AVX" 1123 "vaddsubps\t{%2, %1, %0|%0, %1, %2}" 1124 [(set_attr "type" "sseadd") 1125 (set_attr "prefix" "vex") 1126 (set_attr "mode" "V8SF")]) 1127 1128(define_insn "sse3_addsubv4sf3" 1129 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 1130 (vec_merge:V4SF 1131 (plus:V4SF 1132 (match_operand:V4SF 1 "register_operand" "0,x") 1133 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")) 1134 (minus:V4SF (match_dup 1) (match_dup 2)) 1135 (const_int 10)))] 1136 "TARGET_SSE3" 1137 "@ 1138 addsubps\t{%2, %0|%0, %2} 1139 vaddsubps\t{%2, %1, %0|%0, %1, %2}" 1140 [(set_attr "isa" "noavx,avx") 1141 (set_attr "type" "sseadd") 1142 (set_attr "prefix" "orig,vex") 1143 (set_attr "prefix_rep" "1,*") 1144 (set_attr "mode" "V4SF")]) 1145 1146(define_insn "avx_h<plusminus_insn>v4df3" 1147 [(set (match_operand:V4DF 0 "register_operand" "=x") 1148 (vec_concat:V4DF 1149 (vec_concat:V2DF 1150 (plusminus:DF 1151 (vec_select:DF 1152 (match_operand:V4DF 1 "register_operand" "x") 1153 (parallel [(const_int 0)])) 1154 (vec_select:DF (match_dup 1) (parallel [(const_int 1)]))) 1155 (plusminus:DF 1156 (vec_select:DF 1157 (match_operand:V4DF 2 "nonimmediate_operand" "xm") 1158 (parallel [(const_int 0)])) 1159 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))) 1160 (vec_concat:V2DF 1161 (plusminus:DF 1162 (vec_select:DF (match_dup 1) (parallel [(const_int 2)])) 1163 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))) 1164 (plusminus:DF 1165 (vec_select:DF (match_dup 2) (parallel [(const_int 2)])) 1166 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))] 1167 "TARGET_AVX" 1168 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}" 1169 [(set_attr "type" "sseadd") 1170 (set_attr "prefix" "vex") 1171 (set_attr "mode" "V4DF")]) 1172 1173(define_insn "sse3_h<plusminus_insn>v2df3" 1174 [(set (match_operand:V2DF 0 "register_operand" "=x,x") 1175 (vec_concat:V2DF 1176 (plusminus:DF 1177 (vec_select:DF 1178 (match_operand:V2DF 1 "register_operand" "0,x") 1179 (parallel [(const_int 0)])) 1180 (vec_select:DF (match_dup 1) (parallel [(const_int 1)]))) 1181 (plusminus:DF 1182 (vec_select:DF 1183 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm") 1184 (parallel [(const_int 0)])) 1185 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))] 1186 "TARGET_SSE3" 1187 "@ 1188 h<plusminus_mnemonic>pd\t{%2, %0|%0, %2} 1189 vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}" 1190 [(set_attr "isa" "noavx,avx") 1191 (set_attr "type" "sseadd") 1192 (set_attr "prefix" "orig,vex") 1193 (set_attr "mode" "V2DF")]) 1194 1195(define_insn "avx_h<plusminus_insn>v8sf3" 1196 [(set (match_operand:V8SF 0 "register_operand" "=x") 1197 (vec_concat:V8SF 1198 (vec_concat:V4SF 1199 (vec_concat:V2SF 1200 (plusminus:SF 1201 (vec_select:SF 1202 (match_operand:V8SF 1 "register_operand" "x") 1203 (parallel [(const_int 0)])) 1204 (vec_select:SF (match_dup 1) (parallel [(const_int 1)]))) 1205 (plusminus:SF 1206 (vec_select:SF (match_dup 1) (parallel [(const_int 2)])) 1207 (vec_select:SF (match_dup 1) (parallel [(const_int 3)])))) 1208 (vec_concat:V2SF 1209 (plusminus:SF 1210 (vec_select:SF 1211 (match_operand:V8SF 2 "nonimmediate_operand" "xm") 1212 (parallel [(const_int 0)])) 1213 (vec_select:SF (match_dup 2) (parallel [(const_int 1)]))) 1214 (plusminus:SF 1215 (vec_select:SF (match_dup 2) (parallel [(const_int 2)])) 1216 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))) 1217 (vec_concat:V4SF 1218 (vec_concat:V2SF 1219 (plusminus:SF 1220 (vec_select:SF (match_dup 1) (parallel [(const_int 4)])) 1221 (vec_select:SF (match_dup 1) (parallel [(const_int 5)]))) 1222 (plusminus:SF 1223 (vec_select:SF (match_dup 1) (parallel [(const_int 6)])) 1224 (vec_select:SF (match_dup 1) (parallel [(const_int 7)])))) 1225 (vec_concat:V2SF 1226 (plusminus:SF 1227 (vec_select:SF (match_dup 2) (parallel [(const_int 4)])) 1228 (vec_select:SF (match_dup 2) (parallel [(const_int 5)]))) 1229 (plusminus:SF 1230 (vec_select:SF (match_dup 2) (parallel [(const_int 6)])) 1231 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))] 1232 "TARGET_AVX" 1233 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}" 1234 [(set_attr "type" "sseadd") 1235 (set_attr "prefix" "vex") 1236 (set_attr "mode" "V8SF")]) 1237 1238(define_insn "sse3_h<plusminus_insn>v4sf3" 1239 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 1240 (vec_concat:V4SF 1241 (vec_concat:V2SF 1242 (plusminus:SF 1243 (vec_select:SF 1244 (match_operand:V4SF 1 "register_operand" "0,x") 1245 (parallel [(const_int 0)])) 1246 (vec_select:SF (match_dup 1) (parallel [(const_int 1)]))) 1247 (plusminus:SF 1248 (vec_select:SF (match_dup 1) (parallel [(const_int 2)])) 1249 (vec_select:SF (match_dup 1) (parallel [(const_int 3)])))) 1250 (vec_concat:V2SF 1251 (plusminus:SF 1252 (vec_select:SF 1253 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm") 1254 (parallel [(const_int 0)])) 1255 (vec_select:SF (match_dup 2) (parallel [(const_int 1)]))) 1256 (plusminus:SF 1257 (vec_select:SF (match_dup 2) (parallel [(const_int 2)])) 1258 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))] 1259 "TARGET_SSE3" 1260 "@ 1261 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2} 1262 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}" 1263 [(set_attr "isa" "noavx,avx") 1264 (set_attr "type" "sseadd") 1265 (set_attr "atom_unit" "complex") 1266 (set_attr "prefix" "orig,vex") 1267 (set_attr "prefix_rep" "1,*") 1268 (set_attr "mode" "V4SF")]) 1269 1270(define_expand "reduc_splus_v4df" 1271 [(match_operand:V4DF 0 "register_operand" "") 1272 (match_operand:V4DF 1 "register_operand" "")] 1273 "TARGET_AVX" 1274{ 1275 rtx tmp = gen_reg_rtx (V4DFmode); 1276 rtx tmp2 = gen_reg_rtx (V4DFmode); 1277 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1])); 1278 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1))); 1279 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2)); 1280 DONE; 1281}) 1282 1283(define_expand "reduc_splus_v2df" 1284 [(match_operand:V2DF 0 "register_operand" "") 1285 (match_operand:V2DF 1 "register_operand" "")] 1286 "TARGET_SSE3" 1287{ 1288 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1])); 1289 DONE; 1290}) 1291 1292(define_expand "reduc_splus_v8sf" 1293 [(match_operand:V8SF 0 "register_operand" "") 1294 (match_operand:V8SF 1 "register_operand" "")] 1295 "TARGET_AVX" 1296{ 1297 rtx tmp = gen_reg_rtx (V8SFmode); 1298 rtx tmp2 = gen_reg_rtx (V8SFmode); 1299 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1])); 1300 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp)); 1301 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1))); 1302 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2)); 1303 DONE; 1304}) 1305 1306(define_expand "reduc_splus_v4sf" 1307 [(match_operand:V4SF 0 "register_operand" "") 1308 (match_operand:V4SF 1 "register_operand" "")] 1309 "TARGET_SSE" 1310{ 1311 if (TARGET_SSE3) 1312 { 1313 rtx tmp = gen_reg_rtx (V4SFmode); 1314 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1])); 1315 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp)); 1316 } 1317 else 1318 ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]); 1319 DONE; 1320}) 1321 1322;; Modes handled by reduc_sm{in,ax}* patterns. 1323(define_mode_iterator REDUC_SMINMAX_MODE 1324 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2") 1325 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2") 1326 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX") 1327 (V4SF "TARGET_SSE")]) 1328 1329(define_expand "reduc_<code>_<mode>" 1330 [(smaxmin:REDUC_SMINMAX_MODE 1331 (match_operand:REDUC_SMINMAX_MODE 0 "register_operand" "") 1332 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand" ""))] 1333 "" 1334{ 1335 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]); 1336 DONE; 1337}) 1338 1339(define_expand "reduc_<code>_<mode>" 1340 [(umaxmin:VI_256 1341 (match_operand:VI_256 0 "register_operand" "") 1342 (match_operand:VI_256 1 "register_operand" ""))] 1343 "TARGET_AVX2" 1344{ 1345 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]); 1346 DONE; 1347}) 1348 1349(define_expand "reduc_umin_v8hi" 1350 [(umin:V8HI 1351 (match_operand:V8HI 0 "register_operand" "") 1352 (match_operand:V8HI 1 "register_operand" ""))] 1353 "TARGET_SSE4_1" 1354{ 1355 ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]); 1356 DONE; 1357}) 1358 1359;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1360;; 1361;; Parallel floating point comparisons 1362;; 1363;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1364 1365(define_insn "avx_cmp<mode>3" 1366 [(set (match_operand:VF 0 "register_operand" "=x") 1367 (unspec:VF 1368 [(match_operand:VF 1 "register_operand" "x") 1369 (match_operand:VF 2 "nonimmediate_operand" "xm") 1370 (match_operand:SI 3 "const_0_to_31_operand" "n")] 1371 UNSPEC_PCMP))] 1372 "TARGET_AVX" 1373 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 1374 [(set_attr "type" "ssecmp") 1375 (set_attr "length_immediate" "1") 1376 (set_attr "prefix" "vex") 1377 (set_attr "mode" "<MODE>")]) 1378 1379(define_insn "avx_vmcmp<mode>3" 1380 [(set (match_operand:VF_128 0 "register_operand" "=x") 1381 (vec_merge:VF_128 1382 (unspec:VF_128 1383 [(match_operand:VF_128 1 "register_operand" "x") 1384 (match_operand:VF_128 2 "nonimmediate_operand" "xm") 1385 (match_operand:SI 3 "const_0_to_31_operand" "n")] 1386 UNSPEC_PCMP) 1387 (match_dup 1) 1388 (const_int 1)))] 1389 "TARGET_AVX" 1390 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 1391 [(set_attr "type" "ssecmp") 1392 (set_attr "length_immediate" "1") 1393 (set_attr "prefix" "vex") 1394 (set_attr "mode" "<ssescalarmode>")]) 1395 1396(define_insn "*<sse>_maskcmp<mode>3_comm" 1397 [(set (match_operand:VF 0 "register_operand" "=x,x") 1398 (match_operator:VF 3 "sse_comparison_operator" 1399 [(match_operand:VF 1 "register_operand" "%0,x") 1400 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))] 1401 "TARGET_SSE 1402 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE" 1403 "@ 1404 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2} 1405 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 1406 [(set_attr "isa" "noavx,avx") 1407 (set_attr "type" "ssecmp") 1408 (set_attr "length_immediate" "1") 1409 (set_attr "prefix" "orig,vex") 1410 (set_attr "mode" "<MODE>")]) 1411 1412(define_insn "<sse>_maskcmp<mode>3" 1413 [(set (match_operand:VF 0 "register_operand" "=x,x") 1414 (match_operator:VF 3 "sse_comparison_operator" 1415 [(match_operand:VF 1 "register_operand" "0,x") 1416 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))] 1417 "TARGET_SSE" 1418 "@ 1419 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2} 1420 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 1421 [(set_attr "isa" "noavx,avx") 1422 (set_attr "type" "ssecmp") 1423 (set_attr "length_immediate" "1") 1424 (set_attr "prefix" "orig,vex") 1425 (set_attr "mode" "<MODE>")]) 1426 1427(define_insn "<sse>_vmmaskcmp<mode>3" 1428 [(set (match_operand:VF_128 0 "register_operand" "=x,x") 1429 (vec_merge:VF_128 1430 (match_operator:VF_128 3 "sse_comparison_operator" 1431 [(match_operand:VF_128 1 "register_operand" "0,x") 1432 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")]) 1433 (match_dup 1) 1434 (const_int 1)))] 1435 "TARGET_SSE" 1436 "@ 1437 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2} 1438 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}" 1439 [(set_attr "isa" "noavx,avx") 1440 (set_attr "type" "ssecmp") 1441 (set_attr "length_immediate" "1,*") 1442 (set_attr "prefix" "orig,vex") 1443 (set_attr "mode" "<ssescalarmode>")]) 1444 1445(define_insn "<sse>_comi" 1446 [(set (reg:CCFP FLAGS_REG) 1447 (compare:CCFP 1448 (vec_select:MODEF 1449 (match_operand:<ssevecmode> 0 "register_operand" "x") 1450 (parallel [(const_int 0)])) 1451 (vec_select:MODEF 1452 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm") 1453 (parallel [(const_int 0)]))))] 1454 "SSE_FLOAT_MODE_P (<MODE>mode)" 1455 "%vcomi<ssemodesuffix>\t{%1, %0|%0, %1}" 1456 [(set_attr "type" "ssecomi") 1457 (set_attr "prefix" "maybe_vex") 1458 (set_attr "prefix_rep" "0") 1459 (set (attr "prefix_data16") 1460 (if_then_else (eq_attr "mode" "DF") 1461 (const_string "1") 1462 (const_string "0"))) 1463 (set_attr "mode" "<MODE>")]) 1464 1465(define_insn "<sse>_ucomi" 1466 [(set (reg:CCFPU FLAGS_REG) 1467 (compare:CCFPU 1468 (vec_select:MODEF 1469 (match_operand:<ssevecmode> 0 "register_operand" "x") 1470 (parallel [(const_int 0)])) 1471 (vec_select:MODEF 1472 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm") 1473 (parallel [(const_int 0)]))))] 1474 "SSE_FLOAT_MODE_P (<MODE>mode)" 1475 "%vucomi<ssemodesuffix>\t{%1, %0|%0, %1}" 1476 [(set_attr "type" "ssecomi") 1477 (set_attr "prefix" "maybe_vex") 1478 (set_attr "prefix_rep" "0") 1479 (set (attr "prefix_data16") 1480 (if_then_else (eq_attr "mode" "DF") 1481 (const_string "1") 1482 (const_string "0"))) 1483 (set_attr "mode" "<MODE>")]) 1484 1485(define_expand "vcond<V_256:mode><VF_256:mode>" 1486 [(set (match_operand:V_256 0 "register_operand" "") 1487 (if_then_else:V_256 1488 (match_operator 3 "" 1489 [(match_operand:VF_256 4 "nonimmediate_operand" "") 1490 (match_operand:VF_256 5 "nonimmediate_operand" "")]) 1491 (match_operand:V_256 1 "general_operand" "") 1492 (match_operand:V_256 2 "general_operand" "")))] 1493 "TARGET_AVX 1494 && (GET_MODE_NUNITS (<V_256:MODE>mode) 1495 == GET_MODE_NUNITS (<VF_256:MODE>mode))" 1496{ 1497 bool ok = ix86_expand_fp_vcond (operands); 1498 gcc_assert (ok); 1499 DONE; 1500}) 1501 1502(define_expand "vcond<V_128:mode><VF_128:mode>" 1503 [(set (match_operand:V_128 0 "register_operand" "") 1504 (if_then_else:V_128 1505 (match_operator 3 "" 1506 [(match_operand:VF_128 4 "nonimmediate_operand" "") 1507 (match_operand:VF_128 5 "nonimmediate_operand" "")]) 1508 (match_operand:V_128 1 "general_operand" "") 1509 (match_operand:V_128 2 "general_operand" "")))] 1510 "TARGET_SSE 1511 && (GET_MODE_NUNITS (<V_128:MODE>mode) 1512 == GET_MODE_NUNITS (<VF_128:MODE>mode))" 1513{ 1514 bool ok = ix86_expand_fp_vcond (operands); 1515 gcc_assert (ok); 1516 DONE; 1517}) 1518 1519;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1520;; 1521;; Parallel floating point logical operations 1522;; 1523;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1524 1525(define_insn "<sse>_andnot<mode>3" 1526 [(set (match_operand:VF 0 "register_operand" "=x,x") 1527 (and:VF 1528 (not:VF 1529 (match_operand:VF 1 "register_operand" "0,x")) 1530 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))] 1531 "TARGET_SSE" 1532{ 1533 static char buf[32]; 1534 const char *insn; 1535 const char *suffix 1536 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>"; 1537 1538 switch (which_alternative) 1539 { 1540 case 0: 1541 insn = "andn%s\t{%%2, %%0|%%0, %%2}"; 1542 break; 1543 case 1: 1544 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; 1545 break; 1546 default: 1547 gcc_unreachable (); 1548 } 1549 1550 snprintf (buf, sizeof (buf), insn, suffix); 1551 return buf; 1552} 1553 [(set_attr "isa" "noavx,avx") 1554 (set_attr "type" "sselog") 1555 (set_attr "prefix" "orig,vex") 1556 (set_attr "mode" "<MODE>")]) 1557 1558(define_expand "<code><mode>3" 1559 [(set (match_operand:VF 0 "register_operand" "") 1560 (any_logic:VF 1561 (match_operand:VF 1 "nonimmediate_operand" "") 1562 (match_operand:VF 2 "nonimmediate_operand" "")))] 1563 "TARGET_SSE" 1564 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") 1565 1566(define_insn "*<code><mode>3" 1567 [(set (match_operand:VF 0 "register_operand" "=x,x") 1568 (any_logic:VF 1569 (match_operand:VF 1 "nonimmediate_operand" "%0,x") 1570 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))] 1571 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" 1572{ 1573 static char buf[32]; 1574 const char *insn; 1575 const char *suffix 1576 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>"; 1577 1578 switch (which_alternative) 1579 { 1580 case 0: 1581 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}"; 1582 break; 1583 case 1: 1584 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; 1585 break; 1586 default: 1587 gcc_unreachable (); 1588 } 1589 1590 snprintf (buf, sizeof (buf), insn, suffix); 1591 return buf; 1592} 1593 [(set_attr "isa" "noavx,avx") 1594 (set_attr "type" "sselog") 1595 (set_attr "prefix" "orig,vex") 1596 (set_attr "mode" "<MODE>")]) 1597 1598(define_expand "copysign<mode>3" 1599 [(set (match_dup 4) 1600 (and:VF 1601 (not:VF (match_dup 3)) 1602 (match_operand:VF 1 "nonimmediate_operand" ""))) 1603 (set (match_dup 5) 1604 (and:VF (match_dup 3) 1605 (match_operand:VF 2 "nonimmediate_operand" ""))) 1606 (set (match_operand:VF 0 "register_operand" "") 1607 (ior:VF (match_dup 4) (match_dup 5)))] 1608 "TARGET_SSE" 1609{ 1610 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0); 1611 1612 operands[4] = gen_reg_rtx (<MODE>mode); 1613 operands[5] = gen_reg_rtx (<MODE>mode); 1614}) 1615 1616;; Also define scalar versions. These are used for abs, neg, and 1617;; conditional move. Using subregs into vector modes causes register 1618;; allocation lossage. These patterns do not allow memory operands 1619;; because the native instructions read the full 128-bits. 1620 1621(define_insn "*andnot<mode>3" 1622 [(set (match_operand:MODEF 0 "register_operand" "=x,x") 1623 (and:MODEF 1624 (not:MODEF 1625 (match_operand:MODEF 1 "register_operand" "0,x")) 1626 (match_operand:MODEF 2 "register_operand" "x,x")))] 1627 "SSE_FLOAT_MODE_P (<MODE>mode)" 1628{ 1629 static char buf[32]; 1630 const char *insn; 1631 const char *suffix 1632 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>"; 1633 1634 switch (which_alternative) 1635 { 1636 case 0: 1637 insn = "andn%s\t{%%2, %%0|%%0, %%2}"; 1638 break; 1639 case 1: 1640 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; 1641 break; 1642 default: 1643 gcc_unreachable (); 1644 } 1645 1646 snprintf (buf, sizeof (buf), insn, suffix); 1647 return buf; 1648} 1649 [(set_attr "isa" "noavx,avx") 1650 (set_attr "type" "sselog") 1651 (set_attr "prefix" "orig,vex") 1652 (set_attr "mode" "<ssevecmode>")]) 1653 1654(define_insn "*<code><mode>3" 1655 [(set (match_operand:MODEF 0 "register_operand" "=x,x") 1656 (any_logic:MODEF 1657 (match_operand:MODEF 1 "register_operand" "%0,x") 1658 (match_operand:MODEF 2 "register_operand" "x,x")))] 1659 "SSE_FLOAT_MODE_P (<MODE>mode)" 1660{ 1661 static char buf[32]; 1662 const char *insn; 1663 const char *suffix 1664 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>"; 1665 1666 switch (which_alternative) 1667 { 1668 case 0: 1669 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}"; 1670 break; 1671 case 1: 1672 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; 1673 break; 1674 default: 1675 gcc_unreachable (); 1676 } 1677 1678 snprintf (buf, sizeof (buf), insn, suffix); 1679 return buf; 1680} 1681 [(set_attr "isa" "noavx,avx") 1682 (set_attr "type" "sselog") 1683 (set_attr "prefix" "orig,vex") 1684 (set_attr "mode" "<ssevecmode>")]) 1685 1686;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1687;; 1688;; FMA floating point multiply/accumulate instructions. These include 1689;; scalar versions of the instructions as well as vector versions. 1690;; 1691;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1692 1693(define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF]) 1694 1695;; The standard names for fma is only available with SSE math enabled. 1696(define_expand "fma<mode>4" 1697 [(set (match_operand:FMAMODE 0 "register_operand") 1698 (fma:FMAMODE 1699 (match_operand:FMAMODE 1 "nonimmediate_operand") 1700 (match_operand:FMAMODE 2 "nonimmediate_operand") 1701 (match_operand:FMAMODE 3 "nonimmediate_operand")))] 1702 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH") 1703 1704(define_expand "fms<mode>4" 1705 [(set (match_operand:FMAMODE 0 "register_operand") 1706 (fma:FMAMODE 1707 (match_operand:FMAMODE 1 "nonimmediate_operand") 1708 (match_operand:FMAMODE 2 "nonimmediate_operand") 1709 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))] 1710 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH") 1711 1712(define_expand "fnma<mode>4" 1713 [(set (match_operand:FMAMODE 0 "register_operand") 1714 (fma:FMAMODE 1715 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand")) 1716 (match_operand:FMAMODE 2 "nonimmediate_operand") 1717 (match_operand:FMAMODE 3 "nonimmediate_operand")))] 1718 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH") 1719 1720(define_expand "fnms<mode>4" 1721 [(set (match_operand:FMAMODE 0 "register_operand") 1722 (fma:FMAMODE 1723 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand")) 1724 (match_operand:FMAMODE 2 "nonimmediate_operand") 1725 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))] 1726 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH") 1727 1728;; The builtin for intrinsics is not constrained by SSE math enabled. 1729(define_expand "fma4i_fmadd_<mode>" 1730 [(set (match_operand:FMAMODE 0 "register_operand") 1731 (fma:FMAMODE 1732 (match_operand:FMAMODE 1 "nonimmediate_operand") 1733 (match_operand:FMAMODE 2 "nonimmediate_operand") 1734 (match_operand:FMAMODE 3 "nonimmediate_operand")))] 1735 "TARGET_FMA || TARGET_FMA4") 1736 1737(define_insn "*fma_fmadd_<mode>" 1738 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x") 1739 (fma:FMAMODE 1740 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x") 1741 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m") 1742 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x")))] 1743 "TARGET_FMA || TARGET_FMA4" 1744 "@ 1745 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} 1746 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} 1747 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2} 1748 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3} 1749 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 1750 [(set_attr "isa" "fma,fma,fma,fma4,fma4") 1751 (set_attr "type" "ssemuladd") 1752 (set_attr "mode" "<MODE>")]) 1753 1754(define_insn "*fma_fmsub_<mode>" 1755 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x") 1756 (fma:FMAMODE 1757 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x") 1758 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m") 1759 (neg:FMAMODE 1760 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x"))))] 1761 "TARGET_FMA || TARGET_FMA4" 1762 "@ 1763 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} 1764 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} 1765 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2} 1766 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3} 1767 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 1768 [(set_attr "isa" "fma,fma,fma,fma4,fma4") 1769 (set_attr "type" "ssemuladd") 1770 (set_attr "mode" "<MODE>")]) 1771 1772(define_insn "*fma_fnmadd_<mode>" 1773 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x") 1774 (fma:FMAMODE 1775 (neg:FMAMODE 1776 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x")) 1777 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m") 1778 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x")))] 1779 "TARGET_FMA || TARGET_FMA4" 1780 "@ 1781 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} 1782 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} 1783 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2} 1784 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3} 1785 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 1786 [(set_attr "isa" "fma,fma,fma,fma4,fma4") 1787 (set_attr "type" "ssemuladd") 1788 (set_attr "mode" "<MODE>")]) 1789 1790(define_insn "*fma_fnmsub_<mode>" 1791 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x") 1792 (fma:FMAMODE 1793 (neg:FMAMODE 1794 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x")) 1795 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m") 1796 (neg:FMAMODE 1797 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x"))))] 1798 "TARGET_FMA || TARGET_FMA4" 1799 "@ 1800 vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} 1801 vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} 1802 vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2} 1803 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3} 1804 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 1805 [(set_attr "isa" "fma,fma,fma,fma4,fma4") 1806 (set_attr "type" "ssemuladd") 1807 (set_attr "mode" "<MODE>")]) 1808 1809;; FMA parallel floating point multiply addsub and subadd operations. 1810 1811;; It would be possible to represent these without the UNSPEC as 1812;; 1813;; (vec_merge 1814;; (fma op1 op2 op3) 1815;; (fma op1 op2 (neg op3)) 1816;; (merge-const)) 1817;; 1818;; But this doesn't seem useful in practice. 1819 1820(define_expand "fmaddsub_<mode>" 1821 [(set (match_operand:VF 0 "register_operand") 1822 (unspec:VF 1823 [(match_operand:VF 1 "nonimmediate_operand") 1824 (match_operand:VF 2 "nonimmediate_operand") 1825 (match_operand:VF 3 "nonimmediate_operand")] 1826 UNSPEC_FMADDSUB))] 1827 "TARGET_FMA || TARGET_FMA4") 1828 1829(define_insn "*fma_fmaddsub_<mode>" 1830 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x,x") 1831 (unspec:VF 1832 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x, x,x") 1833 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm,x,m") 1834 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0,xm,x")] 1835 UNSPEC_FMADDSUB))] 1836 "TARGET_FMA || TARGET_FMA4" 1837 "@ 1838 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} 1839 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} 1840 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2} 1841 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3} 1842 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 1843 [(set_attr "isa" "fma,fma,fma,fma4,fma4") 1844 (set_attr "type" "ssemuladd") 1845 (set_attr "mode" "<MODE>")]) 1846 1847(define_insn "*fma_fmsubadd_<mode>" 1848 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x,x") 1849 (unspec:VF 1850 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x, x,x") 1851 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm,x,m") 1852 (neg:VF 1853 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0,xm,x"))] 1854 UNSPEC_FMADDSUB))] 1855 "TARGET_FMA || TARGET_FMA4" 1856 "@ 1857 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} 1858 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} 1859 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2} 1860 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3} 1861 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 1862 [(set_attr "isa" "fma,fma,fma,fma4,fma4") 1863 (set_attr "type" "ssemuladd") 1864 (set_attr "mode" "<MODE>")]) 1865 1866;; FMA3 floating point scalar intrinsics. These merge result with 1867;; high-order elements from the destination register. 1868 1869(define_expand "fmai_vmfmadd_<mode>" 1870 [(set (match_operand:VF_128 0 "register_operand") 1871 (vec_merge:VF_128 1872 (fma:VF_128 1873 (match_operand:VF_128 1 "nonimmediate_operand") 1874 (match_operand:VF_128 2 "nonimmediate_operand") 1875 (match_operand:VF_128 3 "nonimmediate_operand")) 1876 (match_dup 0) 1877 (const_int 1)))] 1878 "TARGET_FMA") 1879 1880(define_insn "*fmai_fmadd_<mode>" 1881 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x") 1882 (vec_merge:VF_128 1883 (fma:VF_128 1884 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x") 1885 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm") 1886 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")) 1887 (match_dup 0) 1888 (const_int 1)))] 1889 "TARGET_FMA" 1890 "@ 1891 vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2} 1892 vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3} 1893 vfmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}" 1894 [(set_attr "type" "ssemuladd") 1895 (set_attr "mode" "<MODE>")]) 1896 1897(define_insn "*fmai_fmsub_<mode>" 1898 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x") 1899 (vec_merge:VF_128 1900 (fma:VF_128 1901 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x") 1902 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm") 1903 (neg:VF_128 1904 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))) 1905 (match_dup 0) 1906 (const_int 1)))] 1907 "TARGET_FMA" 1908 "@ 1909 vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2} 1910 vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3} 1911 vfmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}" 1912 [(set_attr "type" "ssemuladd") 1913 (set_attr "mode" "<MODE>")]) 1914 1915(define_insn "*fmai_fnmadd_<mode>" 1916 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x") 1917 (vec_merge:VF_128 1918 (fma:VF_128 1919 (neg:VF_128 1920 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")) 1921 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm") 1922 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")) 1923 (match_dup 0) 1924 (const_int 1)))] 1925 "TARGET_FMA" 1926 "@ 1927 vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2} 1928 vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3} 1929 vfnmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}" 1930 [(set_attr "type" "ssemuladd") 1931 (set_attr "mode" "<MODE>")]) 1932 1933(define_insn "*fmai_fnmsub_<mode>" 1934 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x") 1935 (vec_merge:VF_128 1936 (fma:VF_128 1937 (neg:VF_128 1938 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")) 1939 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm") 1940 (neg:VF_128 1941 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))) 1942 (match_dup 0) 1943 (const_int 1)))] 1944 "TARGET_FMA" 1945 "@ 1946 vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2} 1947 vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3} 1948 vfnmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}" 1949 [(set_attr "type" "ssemuladd") 1950 (set_attr "mode" "<MODE>")]) 1951 1952;; FMA4 floating point scalar intrinsics. These write the 1953;; entire destination register, with the high-order elements zeroed. 1954 1955(define_expand "fma4i_vmfmadd_<mode>" 1956 [(set (match_operand:VF_128 0 "register_operand") 1957 (vec_merge:VF_128 1958 (fma:VF_128 1959 (match_operand:VF_128 1 "nonimmediate_operand") 1960 (match_operand:VF_128 2 "nonimmediate_operand") 1961 (match_operand:VF_128 3 "nonimmediate_operand")) 1962 (match_dup 4) 1963 (const_int 1)))] 1964 "TARGET_FMA4" 1965{ 1966 operands[4] = CONST0_RTX (<MODE>mode); 1967}) 1968 1969(define_insn "*fma4i_vmfmadd_<mode>" 1970 [(set (match_operand:VF_128 0 "register_operand" "=x,x") 1971 (vec_merge:VF_128 1972 (fma:VF_128 1973 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x") 1974 (match_operand:VF_128 2 "nonimmediate_operand" " x,m") 1975 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")) 1976 (match_operand:VF_128 4 "const0_operand" "") 1977 (const_int 1)))] 1978 "TARGET_FMA4" 1979 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 1980 [(set_attr "type" "ssemuladd") 1981 (set_attr "mode" "<MODE>")]) 1982 1983(define_insn "*fma4i_vmfmsub_<mode>" 1984 [(set (match_operand:VF_128 0 "register_operand" "=x,x") 1985 (vec_merge:VF_128 1986 (fma:VF_128 1987 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x") 1988 (match_operand:VF_128 2 "nonimmediate_operand" " x,m") 1989 (neg:VF_128 1990 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))) 1991 (match_operand:VF_128 4 "const0_operand" "") 1992 (const_int 1)))] 1993 "TARGET_FMA4" 1994 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 1995 [(set_attr "type" "ssemuladd") 1996 (set_attr "mode" "<MODE>")]) 1997 1998(define_insn "*fma4i_vmfnmadd_<mode>" 1999 [(set (match_operand:VF_128 0 "register_operand" "=x,x") 2000 (vec_merge:VF_128 2001 (fma:VF_128 2002 (neg:VF_128 2003 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")) 2004 (match_operand:VF_128 2 "nonimmediate_operand" " x,m") 2005 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")) 2006 (match_operand:VF_128 4 "const0_operand" "") 2007 (const_int 1)))] 2008 "TARGET_FMA4" 2009 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 2010 [(set_attr "type" "ssemuladd") 2011 (set_attr "mode" "<MODE>")]) 2012 2013(define_insn "*fma4i_vmfnmsub_<mode>" 2014 [(set (match_operand:VF_128 0 "register_operand" "=x,x") 2015 (vec_merge:VF_128 2016 (fma:VF_128 2017 (neg:VF_128 2018 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")) 2019 (match_operand:VF_128 2 "nonimmediate_operand" " x,m") 2020 (neg:VF_128 2021 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))) 2022 (match_operand:VF_128 4 "const0_operand" "") 2023 (const_int 1)))] 2024 "TARGET_FMA4" 2025 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 2026 [(set_attr "type" "ssemuladd") 2027 (set_attr "mode" "<MODE>")]) 2028 2029;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2030;; 2031;; Parallel single-precision floating point conversion operations 2032;; 2033;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2034 2035(define_insn "sse_cvtpi2ps" 2036 [(set (match_operand:V4SF 0 "register_operand" "=x") 2037 (vec_merge:V4SF 2038 (vec_duplicate:V4SF 2039 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym"))) 2040 (match_operand:V4SF 1 "register_operand" "0") 2041 (const_int 3)))] 2042 "TARGET_SSE" 2043 "cvtpi2ps\t{%2, %0|%0, %2}" 2044 [(set_attr "type" "ssecvt") 2045 (set_attr "mode" "V4SF")]) 2046 2047(define_insn "sse_cvtps2pi" 2048 [(set (match_operand:V2SI 0 "register_operand" "=y") 2049 (vec_select:V2SI 2050 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 2051 UNSPEC_FIX_NOTRUNC) 2052 (parallel [(const_int 0) (const_int 1)])))] 2053 "TARGET_SSE" 2054 "cvtps2pi\t{%1, %0|%0, %1}" 2055 [(set_attr "type" "ssecvt") 2056 (set_attr "unit" "mmx") 2057 (set_attr "mode" "DI")]) 2058 2059(define_insn "sse_cvttps2pi" 2060 [(set (match_operand:V2SI 0 "register_operand" "=y") 2061 (vec_select:V2SI 2062 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")) 2063 (parallel [(const_int 0) (const_int 1)])))] 2064 "TARGET_SSE" 2065 "cvttps2pi\t{%1, %0|%0, %1}" 2066 [(set_attr "type" "ssecvt") 2067 (set_attr "unit" "mmx") 2068 (set_attr "prefix_rep" "0") 2069 (set_attr "mode" "SF")]) 2070 2071(define_insn "sse_cvtsi2ss" 2072 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x") 2073 (vec_merge:V4SF 2074 (vec_duplicate:V4SF 2075 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm"))) 2076 (match_operand:V4SF 1 "register_operand" "0,0,x") 2077 (const_int 1)))] 2078 "TARGET_SSE" 2079 "@ 2080 cvtsi2ss\t{%2, %0|%0, %2} 2081 cvtsi2ss\t{%2, %0|%0, %2} 2082 vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}" 2083 [(set_attr "isa" "noavx,noavx,avx") 2084 (set_attr "type" "sseicvt") 2085 (set_attr "athlon_decode" "vector,double,*") 2086 (set_attr "amdfam10_decode" "vector,double,*") 2087 (set_attr "bdver1_decode" "double,direct,*") 2088 (set_attr "prefix" "orig,orig,vex") 2089 (set_attr "mode" "SF")]) 2090 2091(define_insn "sse_cvtsi2ssq" 2092 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x") 2093 (vec_merge:V4SF 2094 (vec_duplicate:V4SF 2095 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm"))) 2096 (match_operand:V4SF 1 "register_operand" "0,0,x") 2097 (const_int 1)))] 2098 "TARGET_SSE && TARGET_64BIT" 2099 "@ 2100 cvtsi2ssq\t{%2, %0|%0, %2} 2101 cvtsi2ssq\t{%2, %0|%0, %2} 2102 vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}" 2103 [(set_attr "isa" "noavx,noavx,avx") 2104 (set_attr "type" "sseicvt") 2105 (set_attr "athlon_decode" "vector,double,*") 2106 (set_attr "amdfam10_decode" "vector,double,*") 2107 (set_attr "bdver1_decode" "double,direct,*") 2108 (set_attr "length_vex" "*,*,4") 2109 (set_attr "prefix_rex" "1,1,*") 2110 (set_attr "prefix" "orig,orig,vex") 2111 (set_attr "mode" "SF")]) 2112 2113(define_insn "sse_cvtss2si" 2114 [(set (match_operand:SI 0 "register_operand" "=r,r") 2115 (unspec:SI 2116 [(vec_select:SF 2117 (match_operand:V4SF 1 "nonimmediate_operand" "x,m") 2118 (parallel [(const_int 0)]))] 2119 UNSPEC_FIX_NOTRUNC))] 2120 "TARGET_SSE" 2121 "%vcvtss2si\t{%1, %0|%0, %1}" 2122 [(set_attr "type" "sseicvt") 2123 (set_attr "athlon_decode" "double,vector") 2124 (set_attr "bdver1_decode" "double,double") 2125 (set_attr "prefix_rep" "1") 2126 (set_attr "prefix" "maybe_vex") 2127 (set_attr "mode" "SI")]) 2128 2129(define_insn "sse_cvtss2si_2" 2130 [(set (match_operand:SI 0 "register_operand" "=r,r") 2131 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")] 2132 UNSPEC_FIX_NOTRUNC))] 2133 "TARGET_SSE" 2134 "%vcvtss2si\t{%1, %0|%0, %1}" 2135 [(set_attr "type" "sseicvt") 2136 (set_attr "athlon_decode" "double,vector") 2137 (set_attr "amdfam10_decode" "double,double") 2138 (set_attr "bdver1_decode" "double,double") 2139 (set_attr "prefix_rep" "1") 2140 (set_attr "prefix" "maybe_vex") 2141 (set_attr "mode" "SI")]) 2142 2143(define_insn "sse_cvtss2siq" 2144 [(set (match_operand:DI 0 "register_operand" "=r,r") 2145 (unspec:DI 2146 [(vec_select:SF 2147 (match_operand:V4SF 1 "nonimmediate_operand" "x,m") 2148 (parallel [(const_int 0)]))] 2149 UNSPEC_FIX_NOTRUNC))] 2150 "TARGET_SSE && TARGET_64BIT" 2151 "%vcvtss2si{q}\t{%1, %0|%0, %1}" 2152 [(set_attr "type" "sseicvt") 2153 (set_attr "athlon_decode" "double,vector") 2154 (set_attr "bdver1_decode" "double,double") 2155 (set_attr "prefix_rep" "1") 2156 (set_attr "prefix" "maybe_vex") 2157 (set_attr "mode" "DI")]) 2158 2159(define_insn "sse_cvtss2siq_2" 2160 [(set (match_operand:DI 0 "register_operand" "=r,r") 2161 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")] 2162 UNSPEC_FIX_NOTRUNC))] 2163 "TARGET_SSE && TARGET_64BIT" 2164 "%vcvtss2si{q}\t{%1, %0|%0, %1}" 2165 [(set_attr "type" "sseicvt") 2166 (set_attr "athlon_decode" "double,vector") 2167 (set_attr "amdfam10_decode" "double,double") 2168 (set_attr "bdver1_decode" "double,double") 2169 (set_attr "prefix_rep" "1") 2170 (set_attr "prefix" "maybe_vex") 2171 (set_attr "mode" "DI")]) 2172 2173(define_insn "sse_cvttss2si" 2174 [(set (match_operand:SI 0 "register_operand" "=r,r") 2175 (fix:SI 2176 (vec_select:SF 2177 (match_operand:V4SF 1 "nonimmediate_operand" "x,m") 2178 (parallel [(const_int 0)]))))] 2179 "TARGET_SSE" 2180 "%vcvttss2si\t{%1, %0|%0, %1}" 2181 [(set_attr "type" "sseicvt") 2182 (set_attr "athlon_decode" "double,vector") 2183 (set_attr "amdfam10_decode" "double,double") 2184 (set_attr "bdver1_decode" "double,double") 2185 (set_attr "prefix_rep" "1") 2186 (set_attr "prefix" "maybe_vex") 2187 (set_attr "mode" "SI")]) 2188 2189(define_insn "sse_cvttss2siq" 2190 [(set (match_operand:DI 0 "register_operand" "=r,r") 2191 (fix:DI 2192 (vec_select:SF 2193 (match_operand:V4SF 1 "nonimmediate_operand" "x,m") 2194 (parallel [(const_int 0)]))))] 2195 "TARGET_SSE && TARGET_64BIT" 2196 "%vcvttss2si{q}\t{%1, %0|%0, %1}" 2197 [(set_attr "type" "sseicvt") 2198 (set_attr "athlon_decode" "double,vector") 2199 (set_attr "amdfam10_decode" "double,double") 2200 (set_attr "bdver1_decode" "double,double") 2201 (set_attr "prefix_rep" "1") 2202 (set_attr "prefix" "maybe_vex") 2203 (set_attr "mode" "DI")]) 2204 2205(define_insn "float<sseintvecmodelower><mode>2" 2206 [(set (match_operand:VF1 0 "register_operand" "=x") 2207 (float:VF1 2208 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "xm")))] 2209 "TARGET_SSE2" 2210 "%vcvtdq2ps\t{%1, %0|%0, %1}" 2211 [(set_attr "type" "ssecvt") 2212 (set_attr "prefix" "maybe_vex") 2213 (set_attr "mode" "<sseinsnmode>")]) 2214 2215(define_expand "floatuns<sseintvecmodelower><mode>2" 2216 [(match_operand:VF1 0 "register_operand" "") 2217 (match_operand:<sseintvecmode> 1 "register_operand" "")] 2218 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)" 2219{ 2220 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]); 2221 DONE; 2222}) 2223 2224(define_insn "avx_cvtps2dq256" 2225 [(set (match_operand:V8SI 0 "register_operand" "=x") 2226 (unspec:V8SI [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] 2227 UNSPEC_FIX_NOTRUNC))] 2228 "TARGET_AVX" 2229 "vcvtps2dq\t{%1, %0|%0, %1}" 2230 [(set_attr "type" "ssecvt") 2231 (set_attr "prefix" "vex") 2232 (set_attr "mode" "OI")]) 2233 2234(define_insn "sse2_cvtps2dq" 2235 [(set (match_operand:V4SI 0 "register_operand" "=x") 2236 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 2237 UNSPEC_FIX_NOTRUNC))] 2238 "TARGET_SSE2" 2239 "%vcvtps2dq\t{%1, %0|%0, %1}" 2240 [(set_attr "type" "ssecvt") 2241 (set (attr "prefix_data16") 2242 (if_then_else 2243 (match_test "TARGET_AVX") 2244 (const_string "*") 2245 (const_string "1"))) 2246 (set_attr "prefix" "maybe_vex") 2247 (set_attr "mode" "TI")]) 2248 2249(define_insn "fix_truncv8sfv8si2" 2250 [(set (match_operand:V8SI 0 "register_operand" "=x") 2251 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))] 2252 "TARGET_AVX" 2253 "vcvttps2dq\t{%1, %0|%0, %1}" 2254 [(set_attr "type" "ssecvt") 2255 (set_attr "prefix" "vex") 2256 (set_attr "mode" "OI")]) 2257 2258(define_insn "fix_truncv4sfv4si2" 2259 [(set (match_operand:V4SI 0 "register_operand" "=x") 2260 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] 2261 "TARGET_SSE2" 2262 "%vcvttps2dq\t{%1, %0|%0, %1}" 2263 [(set_attr "type" "ssecvt") 2264 (set (attr "prefix_rep") 2265 (if_then_else 2266 (match_test "TARGET_AVX") 2267 (const_string "*") 2268 (const_string "1"))) 2269 (set (attr "prefix_data16") 2270 (if_then_else 2271 (match_test "TARGET_AVX") 2272 (const_string "*") 2273 (const_string "0"))) 2274 (set_attr "prefix_data16" "0") 2275 (set_attr "prefix" "maybe_vex") 2276 (set_attr "mode" "TI")]) 2277 2278(define_expand "fixuns_trunc<mode><sseintvecmodelower>2" 2279 [(match_operand:<sseintvecmode> 0 "register_operand" "") 2280 (match_operand:VF1 1 "register_operand" "")] 2281 "TARGET_SSE2" 2282{ 2283 rtx tmp[3]; 2284 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]); 2285 tmp[1] = gen_reg_rtx (<sseintvecmode>mode); 2286 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0])); 2287 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2])); 2288 DONE; 2289}) 2290 2291;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2292;; 2293;; Parallel double-precision floating point conversion operations 2294;; 2295;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2296 2297(define_insn "sse2_cvtpi2pd" 2298 [(set (match_operand:V2DF 0 "register_operand" "=x,x") 2299 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))] 2300 "TARGET_SSE2" 2301 "cvtpi2pd\t{%1, %0|%0, %1}" 2302 [(set_attr "type" "ssecvt") 2303 (set_attr "unit" "mmx,*") 2304 (set_attr "prefix_data16" "1,*") 2305 (set_attr "mode" "V2DF")]) 2306 2307(define_insn "sse2_cvtpd2pi" 2308 [(set (match_operand:V2SI 0 "register_operand" "=y") 2309 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")] 2310 UNSPEC_FIX_NOTRUNC))] 2311 "TARGET_SSE2" 2312 "cvtpd2pi\t{%1, %0|%0, %1}" 2313 [(set_attr "type" "ssecvt") 2314 (set_attr "unit" "mmx") 2315 (set_attr "bdver1_decode" "double") 2316 (set_attr "prefix_data16" "1") 2317 (set_attr "mode" "DI")]) 2318 2319(define_insn "sse2_cvttpd2pi" 2320 [(set (match_operand:V2SI 0 "register_operand" "=y") 2321 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))] 2322 "TARGET_SSE2" 2323 "cvttpd2pi\t{%1, %0|%0, %1}" 2324 [(set_attr "type" "ssecvt") 2325 (set_attr "unit" "mmx") 2326 (set_attr "bdver1_decode" "double") 2327 (set_attr "prefix_data16" "1") 2328 (set_attr "mode" "TI")]) 2329 2330(define_insn "sse2_cvtsi2sd" 2331 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x") 2332 (vec_merge:V2DF 2333 (vec_duplicate:V2DF 2334 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm"))) 2335 (match_operand:V2DF 1 "register_operand" "0,0,x") 2336 (const_int 1)))] 2337 "TARGET_SSE2" 2338 "@ 2339 cvtsi2sd\t{%2, %0|%0, %2} 2340 cvtsi2sd\t{%2, %0|%0, %2} 2341 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}" 2342 [(set_attr "isa" "noavx,noavx,avx") 2343 (set_attr "type" "sseicvt") 2344 (set_attr "athlon_decode" "double,direct,*") 2345 (set_attr "amdfam10_decode" "vector,double,*") 2346 (set_attr "bdver1_decode" "double,direct,*") 2347 (set_attr "prefix" "orig,orig,vex") 2348 (set_attr "mode" "DF")]) 2349 2350(define_insn "sse2_cvtsi2sdq" 2351 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x") 2352 (vec_merge:V2DF 2353 (vec_duplicate:V2DF 2354 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm"))) 2355 (match_operand:V2DF 1 "register_operand" "0,0,x") 2356 (const_int 1)))] 2357 "TARGET_SSE2 && TARGET_64BIT" 2358 "@ 2359 cvtsi2sdq\t{%2, %0|%0, %2} 2360 cvtsi2sdq\t{%2, %0|%0, %2} 2361 vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}" 2362 [(set_attr "isa" "noavx,noavx,avx") 2363 (set_attr "type" "sseicvt") 2364 (set_attr "athlon_decode" "double,direct,*") 2365 (set_attr "amdfam10_decode" "vector,double,*") 2366 (set_attr "bdver1_decode" "double,direct,*") 2367 (set_attr "length_vex" "*,*,4") 2368 (set_attr "prefix_rex" "1,1,*") 2369 (set_attr "prefix" "orig,orig,vex") 2370 (set_attr "mode" "DF")]) 2371 2372(define_insn "sse2_cvtsd2si" 2373 [(set (match_operand:SI 0 "register_operand" "=r,r") 2374 (unspec:SI 2375 [(vec_select:DF 2376 (match_operand:V2DF 1 "nonimmediate_operand" "x,m") 2377 (parallel [(const_int 0)]))] 2378 UNSPEC_FIX_NOTRUNC))] 2379 "TARGET_SSE2" 2380 "%vcvtsd2si\t{%1, %0|%0, %1}" 2381 [(set_attr "type" "sseicvt") 2382 (set_attr "athlon_decode" "double,vector") 2383 (set_attr "bdver1_decode" "double,double") 2384 (set_attr "prefix_rep" "1") 2385 (set_attr "prefix" "maybe_vex") 2386 (set_attr "mode" "SI")]) 2387 2388(define_insn "sse2_cvtsd2si_2" 2389 [(set (match_operand:SI 0 "register_operand" "=r,r") 2390 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")] 2391 UNSPEC_FIX_NOTRUNC))] 2392 "TARGET_SSE2" 2393 "%vcvtsd2si\t{%1, %0|%0, %1}" 2394 [(set_attr "type" "sseicvt") 2395 (set_attr "athlon_decode" "double,vector") 2396 (set_attr "amdfam10_decode" "double,double") 2397 (set_attr "bdver1_decode" "double,double") 2398 (set_attr "prefix_rep" "1") 2399 (set_attr "prefix" "maybe_vex") 2400 (set_attr "mode" "SI")]) 2401 2402(define_insn "sse2_cvtsd2siq" 2403 [(set (match_operand:DI 0 "register_operand" "=r,r") 2404 (unspec:DI 2405 [(vec_select:DF 2406 (match_operand:V2DF 1 "nonimmediate_operand" "x,m") 2407 (parallel [(const_int 0)]))] 2408 UNSPEC_FIX_NOTRUNC))] 2409 "TARGET_SSE2 && TARGET_64BIT" 2410 "%vcvtsd2si{q}\t{%1, %0|%0, %1}" 2411 [(set_attr "type" "sseicvt") 2412 (set_attr "athlon_decode" "double,vector") 2413 (set_attr "bdver1_decode" "double,double") 2414 (set_attr "prefix_rep" "1") 2415 (set_attr "prefix" "maybe_vex") 2416 (set_attr "mode" "DI")]) 2417 2418(define_insn "sse2_cvtsd2siq_2" 2419 [(set (match_operand:DI 0 "register_operand" "=r,r") 2420 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")] 2421 UNSPEC_FIX_NOTRUNC))] 2422 "TARGET_SSE2 && TARGET_64BIT" 2423 "%vcvtsd2si{q}\t{%1, %0|%0, %1}" 2424 [(set_attr "type" "sseicvt") 2425 (set_attr "athlon_decode" "double,vector") 2426 (set_attr "amdfam10_decode" "double,double") 2427 (set_attr "bdver1_decode" "double,double") 2428 (set_attr "prefix_rep" "1") 2429 (set_attr "prefix" "maybe_vex") 2430 (set_attr "mode" "DI")]) 2431 2432(define_insn "sse2_cvttsd2si" 2433 [(set (match_operand:SI 0 "register_operand" "=r,r") 2434 (fix:SI 2435 (vec_select:DF 2436 (match_operand:V2DF 1 "nonimmediate_operand" "x,m") 2437 (parallel [(const_int 0)]))))] 2438 "TARGET_SSE2" 2439 "%vcvttsd2si\t{%1, %0|%0, %1}" 2440 [(set_attr "type" "sseicvt") 2441 (set_attr "athlon_decode" "double,vector") 2442 (set_attr "amdfam10_decode" "double,double") 2443 (set_attr "bdver1_decode" "double,double") 2444 (set_attr "prefix_rep" "1") 2445 (set_attr "prefix" "maybe_vex") 2446 (set_attr "mode" "SI")]) 2447 2448(define_insn "sse2_cvttsd2siq" 2449 [(set (match_operand:DI 0 "register_operand" "=r,r") 2450 (fix:DI 2451 (vec_select:DF 2452 (match_operand:V2DF 1 "nonimmediate_operand" "x,m") 2453 (parallel [(const_int 0)]))))] 2454 "TARGET_SSE2 && TARGET_64BIT" 2455 "%vcvttsd2si{q}\t{%1, %0|%0, %1}" 2456 [(set_attr "type" "sseicvt") 2457 (set_attr "athlon_decode" "double,vector") 2458 (set_attr "amdfam10_decode" "double,double") 2459 (set_attr "bdver1_decode" "double,double") 2460 (set_attr "prefix_rep" "1") 2461 (set_attr "prefix" "maybe_vex") 2462 (set_attr "mode" "DI")]) 2463 2464(define_insn "floatv4siv4df2" 2465 [(set (match_operand:V4DF 0 "register_operand" "=x") 2466 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))] 2467 "TARGET_AVX" 2468 "vcvtdq2pd\t{%1, %0|%0, %1}" 2469 [(set_attr "type" "ssecvt") 2470 (set_attr "prefix" "vex") 2471 (set_attr "mode" "V4DF")]) 2472 2473(define_insn "avx_cvtdq2pd256_2" 2474 [(set (match_operand:V4DF 0 "register_operand" "=x") 2475 (float:V4DF 2476 (vec_select:V4SI 2477 (match_operand:V8SI 1 "nonimmediate_operand" "xm") 2478 (parallel [(const_int 0) (const_int 1) 2479 (const_int 2) (const_int 3)]))))] 2480 "TARGET_AVX" 2481 "vcvtdq2pd\t{%x1, %0|%0, %x1}" 2482 [(set_attr "type" "ssecvt") 2483 (set_attr "prefix" "vex") 2484 (set_attr "mode" "V4DF")]) 2485 2486(define_insn "sse2_cvtdq2pd" 2487 [(set (match_operand:V2DF 0 "register_operand" "=x") 2488 (float:V2DF 2489 (vec_select:V2SI 2490 (match_operand:V4SI 1 "nonimmediate_operand" "xm") 2491 (parallel [(const_int 0) (const_int 1)]))))] 2492 "TARGET_SSE2" 2493 "%vcvtdq2pd\t{%1, %0|%0, %q1}" 2494 [(set_attr "type" "ssecvt") 2495 (set_attr "prefix" "maybe_vex") 2496 (set_attr "mode" "V2DF")]) 2497 2498(define_insn "avx_cvtpd2dq256" 2499 [(set (match_operand:V4SI 0 "register_operand" "=x") 2500 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")] 2501 UNSPEC_FIX_NOTRUNC))] 2502 "TARGET_AVX" 2503 "vcvtpd2dq{y}\t{%1, %0|%0, %1}" 2504 [(set_attr "type" "ssecvt") 2505 (set_attr "prefix" "vex") 2506 (set_attr "mode" "OI")]) 2507 2508(define_expand "avx_cvtpd2dq256_2" 2509 [(set (match_operand:V8SI 0 "register_operand" "") 2510 (vec_concat:V8SI 2511 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "")] 2512 UNSPEC_FIX_NOTRUNC) 2513 (match_dup 2)))] 2514 "TARGET_AVX" 2515 "operands[2] = CONST0_RTX (V4SImode);") 2516 2517(define_insn "*avx_cvtpd2dq256_2" 2518 [(set (match_operand:V8SI 0 "register_operand" "=x") 2519 (vec_concat:V8SI 2520 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")] 2521 UNSPEC_FIX_NOTRUNC) 2522 (match_operand:V4SI 2 "const0_operand" "")))] 2523 "TARGET_AVX" 2524 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}" 2525 [(set_attr "type" "ssecvt") 2526 (set_attr "prefix" "vex") 2527 (set_attr "mode" "OI")]) 2528 2529(define_expand "sse2_cvtpd2dq" 2530 [(set (match_operand:V4SI 0 "register_operand" "") 2531 (vec_concat:V4SI 2532 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")] 2533 UNSPEC_FIX_NOTRUNC) 2534 (match_dup 2)))] 2535 "TARGET_SSE2" 2536 "operands[2] = CONST0_RTX (V2SImode);") 2537 2538(define_insn "*sse2_cvtpd2dq" 2539 [(set (match_operand:V4SI 0 "register_operand" "=x") 2540 (vec_concat:V4SI 2541 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")] 2542 UNSPEC_FIX_NOTRUNC) 2543 (match_operand:V2SI 2 "const0_operand" "")))] 2544 "TARGET_SSE2" 2545{ 2546 if (TARGET_AVX) 2547 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}"; 2548 else 2549 return "cvtpd2dq\t{%1, %0|%0, %1}"; 2550} 2551 [(set_attr "type" "ssecvt") 2552 (set_attr "prefix_rep" "1") 2553 (set_attr "prefix_data16" "0") 2554 (set_attr "prefix" "maybe_vex") 2555 (set_attr "mode" "TI") 2556 (set_attr "amdfam10_decode" "double") 2557 (set_attr "athlon_decode" "vector") 2558 (set_attr "bdver1_decode" "double")]) 2559 2560(define_insn "fix_truncv4dfv4si2" 2561 [(set (match_operand:V4SI 0 "register_operand" "=x") 2562 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))] 2563 "TARGET_AVX" 2564 "vcvttpd2dq{y}\t{%1, %0|%0, %1}" 2565 [(set_attr "type" "ssecvt") 2566 (set_attr "prefix" "vex") 2567 (set_attr "mode" "OI")]) 2568 2569(define_expand "avx_cvttpd2dq256_2" 2570 [(set (match_operand:V8SI 0 "register_operand" "") 2571 (vec_concat:V8SI 2572 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "")) 2573 (match_dup 2)))] 2574 "TARGET_AVX" 2575 "operands[2] = CONST0_RTX (V4SImode);") 2576 2577(define_insn "*avx_cvttpd2dq256_2" 2578 [(set (match_operand:V8SI 0 "register_operand" "=x") 2579 (vec_concat:V8SI 2580 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")) 2581 (match_operand:V4SI 2 "const0_operand" "")))] 2582 "TARGET_AVX" 2583 "vcvttpd2dq{y}\t{%1, %x0|%x0, %1}" 2584 [(set_attr "type" "ssecvt") 2585 (set_attr "prefix" "vex") 2586 (set_attr "mode" "OI")]) 2587 2588(define_expand "sse2_cvttpd2dq" 2589 [(set (match_operand:V4SI 0 "register_operand" "") 2590 (vec_concat:V4SI 2591 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "")) 2592 (match_dup 2)))] 2593 "TARGET_SSE2" 2594 "operands[2] = CONST0_RTX (V2SImode);") 2595 2596(define_insn "*sse2_cvttpd2dq" 2597 [(set (match_operand:V4SI 0 "register_operand" "=x") 2598 (vec_concat:V4SI 2599 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")) 2600 (match_operand:V2SI 2 "const0_operand" "")))] 2601 "TARGET_SSE2" 2602{ 2603 if (TARGET_AVX) 2604 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}"; 2605 else 2606 return "cvttpd2dq\t{%1, %0|%0, %1}"; 2607} 2608 [(set_attr "type" "ssecvt") 2609 (set_attr "amdfam10_decode" "double") 2610 (set_attr "athlon_decode" "vector") 2611 (set_attr "bdver1_decode" "double") 2612 (set_attr "prefix" "maybe_vex") 2613 (set_attr "mode" "TI")]) 2614 2615(define_insn "sse2_cvtsd2ss" 2616 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x") 2617 (vec_merge:V4SF 2618 (vec_duplicate:V4SF 2619 (float_truncate:V2SF 2620 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,xm"))) 2621 (match_operand:V4SF 1 "register_operand" "0,0,x") 2622 (const_int 1)))] 2623 "TARGET_SSE2" 2624 "@ 2625 cvtsd2ss\t{%2, %0|%0, %2} 2626 cvtsd2ss\t{%2, %0|%0, %2} 2627 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}" 2628 [(set_attr "isa" "noavx,noavx,avx") 2629 (set_attr "type" "ssecvt") 2630 (set_attr "athlon_decode" "vector,double,*") 2631 (set_attr "amdfam10_decode" "vector,double,*") 2632 (set_attr "bdver1_decode" "direct,direct,*") 2633 (set_attr "prefix" "orig,orig,vex") 2634 (set_attr "mode" "SF")]) 2635 2636(define_insn "sse2_cvtss2sd" 2637 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x") 2638 (vec_merge:V2DF 2639 (float_extend:V2DF 2640 (vec_select:V2SF 2641 (match_operand:V4SF 2 "nonimmediate_operand" "x,m,xm") 2642 (parallel [(const_int 0) (const_int 1)]))) 2643 (match_operand:V2DF 1 "register_operand" "0,0,x") 2644 (const_int 1)))] 2645 "TARGET_SSE2" 2646 "@ 2647 cvtss2sd\t{%2, %0|%0, %2} 2648 cvtss2sd\t{%2, %0|%0, %2} 2649 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}" 2650 [(set_attr "isa" "noavx,noavx,avx") 2651 (set_attr "type" "ssecvt") 2652 (set_attr "amdfam10_decode" "vector,double,*") 2653 (set_attr "athlon_decode" "direct,direct,*") 2654 (set_attr "bdver1_decode" "direct,direct,*") 2655 (set_attr "prefix" "orig,orig,vex") 2656 (set_attr "mode" "DF")]) 2657 2658(define_insn "avx_cvtpd2ps256" 2659 [(set (match_operand:V4SF 0 "register_operand" "=x") 2660 (float_truncate:V4SF 2661 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))] 2662 "TARGET_AVX" 2663 "vcvtpd2ps{y}\t{%1, %0|%0, %1}" 2664 [(set_attr "type" "ssecvt") 2665 (set_attr "prefix" "vex") 2666 (set_attr "mode" "V4SF")]) 2667 2668(define_expand "sse2_cvtpd2ps" 2669 [(set (match_operand:V4SF 0 "register_operand" "") 2670 (vec_concat:V4SF 2671 (float_truncate:V2SF 2672 (match_operand:V2DF 1 "nonimmediate_operand" "")) 2673 (match_dup 2)))] 2674 "TARGET_SSE2" 2675 "operands[2] = CONST0_RTX (V2SFmode);") 2676 2677(define_insn "*sse2_cvtpd2ps" 2678 [(set (match_operand:V4SF 0 "register_operand" "=x") 2679 (vec_concat:V4SF 2680 (float_truncate:V2SF 2681 (match_operand:V2DF 1 "nonimmediate_operand" "xm")) 2682 (match_operand:V2SF 2 "const0_operand" "")))] 2683 "TARGET_SSE2" 2684{ 2685 if (TARGET_AVX) 2686 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}"; 2687 else 2688 return "cvtpd2ps\t{%1, %0|%0, %1}"; 2689} 2690 [(set_attr "type" "ssecvt") 2691 (set_attr "amdfam10_decode" "double") 2692 (set_attr "athlon_decode" "vector") 2693 (set_attr "bdver1_decode" "double") 2694 (set_attr "prefix_data16" "1") 2695 (set_attr "prefix" "maybe_vex") 2696 (set_attr "mode" "V4SF")]) 2697 2698(define_insn "avx_cvtps2pd256" 2699 [(set (match_operand:V4DF 0 "register_operand" "=x") 2700 (float_extend:V4DF 2701 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] 2702 "TARGET_AVX" 2703 "vcvtps2pd\t{%1, %0|%0, %1}" 2704 [(set_attr "type" "ssecvt") 2705 (set_attr "prefix" "vex") 2706 (set_attr "mode" "V4DF")]) 2707 2708(define_insn "*avx_cvtps2pd256_2" 2709 [(set (match_operand:V4DF 0 "register_operand" "=x") 2710 (float_extend:V4DF 2711 (vec_select:V4SF 2712 (match_operand:V8SF 1 "nonimmediate_operand" "xm") 2713 (parallel [(const_int 0) (const_int 1) 2714 (const_int 2) (const_int 3)]))))] 2715 "TARGET_AVX" 2716 "vcvtps2pd\t{%x1, %0|%0, %x1}" 2717 [(set_attr "type" "ssecvt") 2718 (set_attr "prefix" "vex") 2719 (set_attr "mode" "V4DF")]) 2720 2721(define_insn "sse2_cvtps2pd" 2722 [(set (match_operand:V2DF 0 "register_operand" "=x") 2723 (float_extend:V2DF 2724 (vec_select:V2SF 2725 (match_operand:V4SF 1 "nonimmediate_operand" "xm") 2726 (parallel [(const_int 0) (const_int 1)]))))] 2727 "TARGET_SSE2" 2728 "%vcvtps2pd\t{%1, %0|%0, %q1}" 2729 [(set_attr "type" "ssecvt") 2730 (set_attr "amdfam10_decode" "direct") 2731 (set_attr "athlon_decode" "double") 2732 (set_attr "bdver1_decode" "double") 2733 (set_attr "prefix_data16" "0") 2734 (set_attr "prefix" "maybe_vex") 2735 (set_attr "mode" "V2DF")]) 2736 2737(define_expand "vec_unpacks_hi_v4sf" 2738 [(set (match_dup 2) 2739 (vec_select:V4SF 2740 (vec_concat:V8SF 2741 (match_dup 2) 2742 (match_operand:V4SF 1 "nonimmediate_operand" "")) 2743 (parallel [(const_int 6) (const_int 7) 2744 (const_int 2) (const_int 3)]))) 2745 (set (match_operand:V2DF 0 "register_operand" "") 2746 (float_extend:V2DF 2747 (vec_select:V2SF 2748 (match_dup 2) 2749 (parallel [(const_int 0) (const_int 1)]))))] 2750 "TARGET_SSE2" 2751 "operands[2] = gen_reg_rtx (V4SFmode);") 2752 2753(define_expand "vec_unpacks_hi_v8sf" 2754 [(set (match_dup 2) 2755 (vec_select:V4SF 2756 (match_operand:V8SF 1 "nonimmediate_operand" "") 2757 (parallel [(const_int 4) (const_int 5) 2758 (const_int 6) (const_int 7)]))) 2759 (set (match_operand:V4DF 0 "register_operand" "") 2760 (float_extend:V4DF 2761 (match_dup 2)))] 2762 "TARGET_AVX" 2763 "operands[2] = gen_reg_rtx (V4SFmode);") 2764 2765(define_expand "vec_unpacks_lo_v4sf" 2766 [(set (match_operand:V2DF 0 "register_operand" "") 2767 (float_extend:V2DF 2768 (vec_select:V2SF 2769 (match_operand:V4SF 1 "nonimmediate_operand" "") 2770 (parallel [(const_int 0) (const_int 1)]))))] 2771 "TARGET_SSE2") 2772 2773(define_expand "vec_unpacks_lo_v8sf" 2774 [(set (match_operand:V4DF 0 "register_operand" "") 2775 (float_extend:V4DF 2776 (vec_select:V4SF 2777 (match_operand:V8SF 1 "nonimmediate_operand" "") 2778 (parallel [(const_int 0) (const_int 1) 2779 (const_int 2) (const_int 3)]))))] 2780 "TARGET_AVX") 2781 2782(define_mode_attr sseunpackfltmode 2783 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF") (V8SI "V4DF")]) 2784 2785(define_expand "vec_unpacks_float_hi_<mode>" 2786 [(match_operand:<sseunpackfltmode> 0 "register_operand" "") 2787 (match_operand:VI2_AVX2 1 "register_operand" "")] 2788 "TARGET_SSE2" 2789{ 2790 rtx tmp = gen_reg_rtx (<sseunpackmode>mode); 2791 2792 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1])); 2793 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 2794 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp))); 2795 DONE; 2796}) 2797 2798(define_expand "vec_unpacks_float_lo_<mode>" 2799 [(match_operand:<sseunpackfltmode> 0 "register_operand" "") 2800 (match_operand:VI2_AVX2 1 "register_operand" "")] 2801 "TARGET_SSE2" 2802{ 2803 rtx tmp = gen_reg_rtx (<sseunpackmode>mode); 2804 2805 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1])); 2806 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 2807 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp))); 2808 DONE; 2809}) 2810 2811(define_expand "vec_unpacku_float_hi_<mode>" 2812 [(match_operand:<sseunpackfltmode> 0 "register_operand" "") 2813 (match_operand:VI2_AVX2 1 "register_operand" "")] 2814 "TARGET_SSE2" 2815{ 2816 rtx tmp = gen_reg_rtx (<sseunpackmode>mode); 2817 2818 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1])); 2819 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 2820 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp))); 2821 DONE; 2822}) 2823 2824(define_expand "vec_unpacku_float_lo_<mode>" 2825 [(match_operand:<sseunpackfltmode> 0 "register_operand" "") 2826 (match_operand:VI2_AVX2 1 "register_operand" "")] 2827 "TARGET_SSE2" 2828{ 2829 rtx tmp = gen_reg_rtx (<sseunpackmode>mode); 2830 2831 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1])); 2832 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 2833 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp))); 2834 DONE; 2835}) 2836 2837(define_expand "vec_unpacks_float_hi_v4si" 2838 [(set (match_dup 2) 2839 (vec_select:V4SI 2840 (match_operand:V4SI 1 "nonimmediate_operand" "") 2841 (parallel [(const_int 2) (const_int 3) 2842 (const_int 2) (const_int 3)]))) 2843 (set (match_operand:V2DF 0 "register_operand" "") 2844 (float:V2DF 2845 (vec_select:V2SI 2846 (match_dup 2) 2847 (parallel [(const_int 0) (const_int 1)]))))] 2848 "TARGET_SSE2" 2849 "operands[2] = gen_reg_rtx (V4SImode);") 2850 2851(define_expand "vec_unpacks_float_lo_v4si" 2852 [(set (match_operand:V2DF 0 "register_operand" "") 2853 (float:V2DF 2854 (vec_select:V2SI 2855 (match_operand:V4SI 1 "nonimmediate_operand" "") 2856 (parallel [(const_int 0) (const_int 1)]))))] 2857 "TARGET_SSE2") 2858 2859(define_expand "vec_unpacks_float_hi_v8si" 2860 [(set (match_dup 2) 2861 (vec_select:V4SI 2862 (match_operand:V8SI 1 "nonimmediate_operand" "") 2863 (parallel [(const_int 4) (const_int 5) 2864 (const_int 6) (const_int 7)]))) 2865 (set (match_operand:V4DF 0 "register_operand" "") 2866 (float:V4DF 2867 (match_dup 2)))] 2868 "TARGET_AVX" 2869 "operands[2] = gen_reg_rtx (V4SImode);") 2870 2871(define_expand "vec_unpacks_float_lo_v8si" 2872 [(set (match_operand:V4DF 0 "register_operand" "") 2873 (float:V4DF 2874 (vec_select:V4SI 2875 (match_operand:V8SI 1 "nonimmediate_operand" "") 2876 (parallel [(const_int 0) (const_int 1) 2877 (const_int 2) (const_int 3)]))))] 2878 "TARGET_AVX") 2879 2880(define_expand "vec_unpacku_float_hi_v4si" 2881 [(set (match_dup 5) 2882 (vec_select:V4SI 2883 (match_operand:V4SI 1 "nonimmediate_operand" "") 2884 (parallel [(const_int 2) (const_int 3) 2885 (const_int 2) (const_int 3)]))) 2886 (set (match_dup 6) 2887 (float:V2DF 2888 (vec_select:V2SI 2889 (match_dup 5) 2890 (parallel [(const_int 0) (const_int 1)])))) 2891 (set (match_dup 7) 2892 (lt:V2DF (match_dup 6) (match_dup 3))) 2893 (set (match_dup 8) 2894 (and:V2DF (match_dup 7) (match_dup 4))) 2895 (set (match_operand:V2DF 0 "register_operand" "") 2896 (plus:V2DF (match_dup 6) (match_dup 8)))] 2897 "TARGET_SSE2" 2898{ 2899 REAL_VALUE_TYPE TWO32r; 2900 rtx x; 2901 int i; 2902 2903 real_ldexp (&TWO32r, &dconst1, 32); 2904 x = const_double_from_real_value (TWO32r, DFmode); 2905 2906 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode)); 2907 operands[4] = force_reg (V2DFmode, 2908 ix86_build_const_vector (V2DFmode, 1, x)); 2909 2910 operands[5] = gen_reg_rtx (V4SImode); 2911 2912 for (i = 6; i < 9; i++) 2913 operands[i] = gen_reg_rtx (V2DFmode); 2914}) 2915 2916(define_expand "vec_unpacku_float_lo_v4si" 2917 [(set (match_dup 5) 2918 (float:V2DF 2919 (vec_select:V2SI 2920 (match_operand:V4SI 1 "nonimmediate_operand" "") 2921 (parallel [(const_int 0) (const_int 1)])))) 2922 (set (match_dup 6) 2923 (lt:V2DF (match_dup 5) (match_dup 3))) 2924 (set (match_dup 7) 2925 (and:V2DF (match_dup 6) (match_dup 4))) 2926 (set (match_operand:V2DF 0 "register_operand" "") 2927 (plus:V2DF (match_dup 5) (match_dup 7)))] 2928 "TARGET_SSE2" 2929{ 2930 REAL_VALUE_TYPE TWO32r; 2931 rtx x; 2932 int i; 2933 2934 real_ldexp (&TWO32r, &dconst1, 32); 2935 x = const_double_from_real_value (TWO32r, DFmode); 2936 2937 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode)); 2938 operands[4] = force_reg (V2DFmode, 2939 ix86_build_const_vector (V2DFmode, 1, x)); 2940 2941 for (i = 5; i < 8; i++) 2942 operands[i] = gen_reg_rtx (V2DFmode); 2943}) 2944 2945(define_expand "vec_unpacku_float_hi_v8si" 2946 [(match_operand:V4DF 0 "register_operand" "") 2947 (match_operand:V8SI 1 "register_operand" "")] 2948 "TARGET_AVX" 2949{ 2950 REAL_VALUE_TYPE TWO32r; 2951 rtx x, tmp[6]; 2952 int i; 2953 2954 real_ldexp (&TWO32r, &dconst1, 32); 2955 x = const_double_from_real_value (TWO32r, DFmode); 2956 2957 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode)); 2958 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x)); 2959 tmp[5] = gen_reg_rtx (V4SImode); 2960 2961 for (i = 2; i < 5; i++) 2962 tmp[i] = gen_reg_rtx (V4DFmode); 2963 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1])); 2964 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5])); 2965 emit_insn (gen_rtx_SET (VOIDmode, tmp[3], 2966 gen_rtx_LT (V4DFmode, tmp[2], tmp[0]))); 2967 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1])); 2968 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4])); 2969 DONE; 2970}) 2971 2972(define_expand "vec_unpacku_float_lo_v8si" 2973 [(match_operand:V4DF 0 "register_operand" "") 2974 (match_operand:V8SI 1 "nonimmediate_operand" "")] 2975 "TARGET_AVX" 2976{ 2977 REAL_VALUE_TYPE TWO32r; 2978 rtx x, tmp[5]; 2979 int i; 2980 2981 real_ldexp (&TWO32r, &dconst1, 32); 2982 x = const_double_from_real_value (TWO32r, DFmode); 2983 2984 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode)); 2985 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x)); 2986 2987 for (i = 2; i < 5; i++) 2988 tmp[i] = gen_reg_rtx (V4DFmode); 2989 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1])); 2990 emit_insn (gen_rtx_SET (VOIDmode, tmp[3], 2991 gen_rtx_LT (V4DFmode, tmp[2], tmp[0]))); 2992 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1])); 2993 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4])); 2994 DONE; 2995}) 2996 2997(define_expand "vec_pack_trunc_v4df" 2998 [(set (match_dup 3) 2999 (float_truncate:V4SF 3000 (match_operand:V4DF 1 "nonimmediate_operand" ""))) 3001 (set (match_dup 4) 3002 (float_truncate:V4SF 3003 (match_operand:V4DF 2 "nonimmediate_operand" ""))) 3004 (set (match_operand:V8SF 0 "register_operand" "") 3005 (vec_concat:V8SF 3006 (match_dup 3) 3007 (match_dup 4)))] 3008 "TARGET_AVX" 3009{ 3010 operands[3] = gen_reg_rtx (V4SFmode); 3011 operands[4] = gen_reg_rtx (V4SFmode); 3012}) 3013 3014(define_expand "vec_pack_trunc_v2df" 3015 [(match_operand:V4SF 0 "register_operand" "") 3016 (match_operand:V2DF 1 "nonimmediate_operand" "") 3017 (match_operand:V2DF 2 "nonimmediate_operand" "")] 3018 "TARGET_SSE2" 3019{ 3020 rtx tmp0, tmp1; 3021 3022 if (TARGET_AVX && !TARGET_PREFER_AVX128) 3023 { 3024 tmp0 = gen_reg_rtx (V4DFmode); 3025 tmp1 = force_reg (V2DFmode, operands[1]); 3026 3027 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2])); 3028 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0)); 3029 } 3030 else 3031 { 3032 tmp0 = gen_reg_rtx (V4SFmode); 3033 tmp1 = gen_reg_rtx (V4SFmode); 3034 3035 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1])); 3036 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2])); 3037 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1)); 3038 } 3039 DONE; 3040}) 3041 3042(define_expand "vec_pack_sfix_trunc_v4df" 3043 [(match_operand:V8SI 0 "register_operand" "") 3044 (match_operand:V4DF 1 "nonimmediate_operand" "") 3045 (match_operand:V4DF 2 "nonimmediate_operand" "")] 3046 "TARGET_AVX" 3047{ 3048 rtx r1, r2; 3049 3050 r1 = gen_reg_rtx (V4SImode); 3051 r2 = gen_reg_rtx (V4SImode); 3052 3053 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1])); 3054 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2])); 3055 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2)); 3056 DONE; 3057}) 3058 3059(define_expand "vec_pack_sfix_trunc_v2df" 3060 [(match_operand:V4SI 0 "register_operand" "") 3061 (match_operand:V2DF 1 "nonimmediate_operand" "") 3062 (match_operand:V2DF 2 "nonimmediate_operand" "")] 3063 "TARGET_SSE2" 3064{ 3065 rtx tmp0, tmp1; 3066 3067 if (TARGET_AVX && !TARGET_PREFER_AVX128) 3068 { 3069 tmp0 = gen_reg_rtx (V4DFmode); 3070 tmp1 = force_reg (V2DFmode, operands[1]); 3071 3072 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2])); 3073 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0)); 3074 } 3075 else 3076 { 3077 tmp0 = gen_reg_rtx (V4SImode); 3078 tmp1 = gen_reg_rtx (V4SImode); 3079 3080 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1])); 3081 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2])); 3082 emit_insn 3083 (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]), 3084 gen_lowpart (V2DImode, tmp0), 3085 gen_lowpart (V2DImode, tmp1))); 3086 } 3087 DONE; 3088}) 3089 3090(define_mode_attr ssepackfltmode 3091 [(V4DF "V8SI") (V2DF "V4SI")]) 3092 3093(define_expand "vec_pack_ufix_trunc_<mode>" 3094 [(match_operand:<ssepackfltmode> 0 "register_operand" "") 3095 (match_operand:VF2 1 "register_operand" "") 3096 (match_operand:VF2 2 "register_operand" "")] 3097 "TARGET_SSE2" 3098{ 3099 rtx tmp[7]; 3100 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]); 3101 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]); 3102 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode); 3103 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1])); 3104 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2) 3105 { 3106 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode); 3107 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0); 3108 } 3109 else 3110 { 3111 tmp[5] = gen_reg_rtx (V8SFmode); 3112 ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]), 3113 gen_lowpart (V8SFmode, tmp[3]), 0); 3114 tmp[5] = gen_lowpart (V8SImode, tmp[5]); 3115 } 3116 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5], 3117 operands[0], 0, OPTAB_DIRECT); 3118 if (tmp[6] != operands[0]) 3119 emit_move_insn (operands[0], tmp[6]); 3120 DONE; 3121}) 3122 3123(define_expand "vec_pack_sfix_v4df" 3124 [(match_operand:V8SI 0 "register_operand" "") 3125 (match_operand:V4DF 1 "nonimmediate_operand" "") 3126 (match_operand:V4DF 2 "nonimmediate_operand" "")] 3127 "TARGET_AVX" 3128{ 3129 rtx r1, r2; 3130 3131 r1 = gen_reg_rtx (V4SImode); 3132 r2 = gen_reg_rtx (V4SImode); 3133 3134 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1])); 3135 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2])); 3136 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2)); 3137 DONE; 3138}) 3139 3140(define_expand "vec_pack_sfix_v2df" 3141 [(match_operand:V4SI 0 "register_operand" "") 3142 (match_operand:V2DF 1 "nonimmediate_operand" "") 3143 (match_operand:V2DF 2 "nonimmediate_operand" "")] 3144 "TARGET_SSE2" 3145{ 3146 rtx tmp0, tmp1; 3147 3148 if (TARGET_AVX && !TARGET_PREFER_AVX128) 3149 { 3150 tmp0 = gen_reg_rtx (V4DFmode); 3151 tmp1 = force_reg (V2DFmode, operands[1]); 3152 3153 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2])); 3154 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0)); 3155 } 3156 else 3157 { 3158 tmp0 = gen_reg_rtx (V4SImode); 3159 tmp1 = gen_reg_rtx (V4SImode); 3160 3161 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1])); 3162 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2])); 3163 emit_insn 3164 (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]), 3165 gen_lowpart (V2DImode, tmp0), 3166 gen_lowpart (V2DImode, tmp1))); 3167 } 3168 DONE; 3169}) 3170 3171;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 3172;; 3173;; Parallel single-precision floating point element swizzling 3174;; 3175;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 3176 3177(define_expand "sse_movhlps_exp" 3178 [(set (match_operand:V4SF 0 "nonimmediate_operand" "") 3179 (vec_select:V4SF 3180 (vec_concat:V8SF 3181 (match_operand:V4SF 1 "nonimmediate_operand" "") 3182 (match_operand:V4SF 2 "nonimmediate_operand" "")) 3183 (parallel [(const_int 6) 3184 (const_int 7) 3185 (const_int 2) 3186 (const_int 3)])))] 3187 "TARGET_SSE" 3188{ 3189 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands); 3190 3191 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2])); 3192 3193 /* Fix up the destination if needed. */ 3194 if (dst != operands[0]) 3195 emit_move_insn (operands[0], dst); 3196 3197 DONE; 3198}) 3199 3200(define_insn "sse_movhlps" 3201 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m") 3202 (vec_select:V4SF 3203 (vec_concat:V8SF 3204 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0") 3205 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x")) 3206 (parallel [(const_int 6) 3207 (const_int 7) 3208 (const_int 2) 3209 (const_int 3)])))] 3210 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))" 3211 "@ 3212 movhlps\t{%2, %0|%0, %2} 3213 vmovhlps\t{%2, %1, %0|%0, %1, %2} 3214 movlps\t{%H2, %0|%0, %H2} 3215 vmovlps\t{%H2, %1, %0|%0, %1, %H2} 3216 %vmovhps\t{%2, %0|%0, %2}" 3217 [(set_attr "isa" "noavx,avx,noavx,avx,*") 3218 (set_attr "type" "ssemov") 3219 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex") 3220 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")]) 3221 3222(define_expand "sse_movlhps_exp" 3223 [(set (match_operand:V4SF 0 "nonimmediate_operand" "") 3224 (vec_select:V4SF 3225 (vec_concat:V8SF 3226 (match_operand:V4SF 1 "nonimmediate_operand" "") 3227 (match_operand:V4SF 2 "nonimmediate_operand" "")) 3228 (parallel [(const_int 0) 3229 (const_int 1) 3230 (const_int 4) 3231 (const_int 5)])))] 3232 "TARGET_SSE" 3233{ 3234 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands); 3235 3236 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2])); 3237 3238 /* Fix up the destination if needed. */ 3239 if (dst != operands[0]) 3240 emit_move_insn (operands[0], dst); 3241 3242 DONE; 3243}) 3244 3245(define_insn "sse_movlhps" 3246 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o") 3247 (vec_select:V4SF 3248 (vec_concat:V8SF 3249 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0") 3250 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,x,x")) 3251 (parallel [(const_int 0) 3252 (const_int 1) 3253 (const_int 4) 3254 (const_int 5)])))] 3255 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)" 3256 "@ 3257 movlhps\t{%2, %0|%0, %2} 3258 vmovlhps\t{%2, %1, %0|%0, %1, %2} 3259 movhps\t{%2, %0|%0, %2} 3260 vmovhps\t{%2, %1, %0|%0, %1, %2} 3261 %vmovlps\t{%2, %H0|%H0, %2}" 3262 [(set_attr "isa" "noavx,avx,noavx,avx,*") 3263 (set_attr "type" "ssemov") 3264 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex") 3265 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")]) 3266 3267;; Recall that the 256-bit unpck insns only shuffle within their lanes. 3268(define_insn "avx_unpckhps256" 3269 [(set (match_operand:V8SF 0 "register_operand" "=x") 3270 (vec_select:V8SF 3271 (vec_concat:V16SF 3272 (match_operand:V8SF 1 "register_operand" "x") 3273 (match_operand:V8SF 2 "nonimmediate_operand" "xm")) 3274 (parallel [(const_int 2) (const_int 10) 3275 (const_int 3) (const_int 11) 3276 (const_int 6) (const_int 14) 3277 (const_int 7) (const_int 15)])))] 3278 "TARGET_AVX" 3279 "vunpckhps\t{%2, %1, %0|%0, %1, %2}" 3280 [(set_attr "type" "sselog") 3281 (set_attr "prefix" "vex") 3282 (set_attr "mode" "V8SF")]) 3283 3284(define_expand "vec_interleave_highv8sf" 3285 [(set (match_dup 3) 3286 (vec_select:V8SF 3287 (vec_concat:V16SF 3288 (match_operand:V8SF 1 "register_operand" "x") 3289 (match_operand:V8SF 2 "nonimmediate_operand" "xm")) 3290 (parallel [(const_int 0) (const_int 8) 3291 (const_int 1) (const_int 9) 3292 (const_int 4) (const_int 12) 3293 (const_int 5) (const_int 13)]))) 3294 (set (match_dup 4) 3295 (vec_select:V8SF 3296 (vec_concat:V16SF 3297 (match_dup 1) 3298 (match_dup 2)) 3299 (parallel [(const_int 2) (const_int 10) 3300 (const_int 3) (const_int 11) 3301 (const_int 6) (const_int 14) 3302 (const_int 7) (const_int 15)]))) 3303 (set (match_operand:V8SF 0 "register_operand" "") 3304 (vec_select:V8SF 3305 (vec_concat:V16SF 3306 (match_dup 3) 3307 (match_dup 4)) 3308 (parallel [(const_int 4) (const_int 5) 3309 (const_int 6) (const_int 7) 3310 (const_int 12) (const_int 13) 3311 (const_int 14) (const_int 15)])))] 3312 "TARGET_AVX" 3313{ 3314 operands[3] = gen_reg_rtx (V8SFmode); 3315 operands[4] = gen_reg_rtx (V8SFmode); 3316}) 3317 3318(define_insn "vec_interleave_highv4sf" 3319 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 3320 (vec_select:V4SF 3321 (vec_concat:V8SF 3322 (match_operand:V4SF 1 "register_operand" "0,x") 3323 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")) 3324 (parallel [(const_int 2) (const_int 6) 3325 (const_int 3) (const_int 7)])))] 3326 "TARGET_SSE" 3327 "@ 3328 unpckhps\t{%2, %0|%0, %2} 3329 vunpckhps\t{%2, %1, %0|%0, %1, %2}" 3330 [(set_attr "isa" "noavx,avx") 3331 (set_attr "type" "sselog") 3332 (set_attr "prefix" "orig,vex") 3333 (set_attr "mode" "V4SF")]) 3334 3335;; Recall that the 256-bit unpck insns only shuffle within their lanes. 3336(define_insn "avx_unpcklps256" 3337 [(set (match_operand:V8SF 0 "register_operand" "=x") 3338 (vec_select:V8SF 3339 (vec_concat:V16SF 3340 (match_operand:V8SF 1 "register_operand" "x") 3341 (match_operand:V8SF 2 "nonimmediate_operand" "xm")) 3342 (parallel [(const_int 0) (const_int 8) 3343 (const_int 1) (const_int 9) 3344 (const_int 4) (const_int 12) 3345 (const_int 5) (const_int 13)])))] 3346 "TARGET_AVX" 3347 "vunpcklps\t{%2, %1, %0|%0, %1, %2}" 3348 [(set_attr "type" "sselog") 3349 (set_attr "prefix" "vex") 3350 (set_attr "mode" "V8SF")]) 3351 3352(define_expand "vec_interleave_lowv8sf" 3353 [(set (match_dup 3) 3354 (vec_select:V8SF 3355 (vec_concat:V16SF 3356 (match_operand:V8SF 1 "register_operand" "x") 3357 (match_operand:V8SF 2 "nonimmediate_operand" "xm")) 3358 (parallel [(const_int 0) (const_int 8) 3359 (const_int 1) (const_int 9) 3360 (const_int 4) (const_int 12) 3361 (const_int 5) (const_int 13)]))) 3362 (set (match_dup 4) 3363 (vec_select:V8SF 3364 (vec_concat:V16SF 3365 (match_dup 1) 3366 (match_dup 2)) 3367 (parallel [(const_int 2) (const_int 10) 3368 (const_int 3) (const_int 11) 3369 (const_int 6) (const_int 14) 3370 (const_int 7) (const_int 15)]))) 3371 (set (match_operand:V8SF 0 "register_operand" "") 3372 (vec_select:V8SF 3373 (vec_concat:V16SF 3374 (match_dup 3) 3375 (match_dup 4)) 3376 (parallel [(const_int 0) (const_int 1) 3377 (const_int 2) (const_int 3) 3378 (const_int 8) (const_int 9) 3379 (const_int 10) (const_int 11)])))] 3380 "TARGET_AVX" 3381{ 3382 operands[3] = gen_reg_rtx (V8SFmode); 3383 operands[4] = gen_reg_rtx (V8SFmode); 3384}) 3385 3386(define_insn "vec_interleave_lowv4sf" 3387 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 3388 (vec_select:V4SF 3389 (vec_concat:V8SF 3390 (match_operand:V4SF 1 "register_operand" "0,x") 3391 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")) 3392 (parallel [(const_int 0) (const_int 4) 3393 (const_int 1) (const_int 5)])))] 3394 "TARGET_SSE" 3395 "@ 3396 unpcklps\t{%2, %0|%0, %2} 3397 vunpcklps\t{%2, %1, %0|%0, %1, %2}" 3398 [(set_attr "isa" "noavx,avx") 3399 (set_attr "type" "sselog") 3400 (set_attr "prefix" "orig,vex") 3401 (set_attr "mode" "V4SF")]) 3402 3403;; These are modeled with the same vec_concat as the others so that we 3404;; capture users of shufps that can use the new instructions 3405(define_insn "avx_movshdup256" 3406 [(set (match_operand:V8SF 0 "register_operand" "=x") 3407 (vec_select:V8SF 3408 (vec_concat:V16SF 3409 (match_operand:V8SF 1 "nonimmediate_operand" "xm") 3410 (match_dup 1)) 3411 (parallel [(const_int 1) (const_int 1) 3412 (const_int 3) (const_int 3) 3413 (const_int 5) (const_int 5) 3414 (const_int 7) (const_int 7)])))] 3415 "TARGET_AVX" 3416 "vmovshdup\t{%1, %0|%0, %1}" 3417 [(set_attr "type" "sse") 3418 (set_attr "prefix" "vex") 3419 (set_attr "mode" "V8SF")]) 3420 3421(define_insn "sse3_movshdup" 3422 [(set (match_operand:V4SF 0 "register_operand" "=x") 3423 (vec_select:V4SF 3424 (vec_concat:V8SF 3425 (match_operand:V4SF 1 "nonimmediate_operand" "xm") 3426 (match_dup 1)) 3427 (parallel [(const_int 1) 3428 (const_int 1) 3429 (const_int 7) 3430 (const_int 7)])))] 3431 "TARGET_SSE3" 3432 "%vmovshdup\t{%1, %0|%0, %1}" 3433 [(set_attr "type" "sse") 3434 (set_attr "prefix_rep" "1") 3435 (set_attr "prefix" "maybe_vex") 3436 (set_attr "mode" "V4SF")]) 3437 3438(define_insn "avx_movsldup256" 3439 [(set (match_operand:V8SF 0 "register_operand" "=x") 3440 (vec_select:V8SF 3441 (vec_concat:V16SF 3442 (match_operand:V8SF 1 "nonimmediate_operand" "xm") 3443 (match_dup 1)) 3444 (parallel [(const_int 0) (const_int 0) 3445 (const_int 2) (const_int 2) 3446 (const_int 4) (const_int 4) 3447 (const_int 6) (const_int 6)])))] 3448 "TARGET_AVX" 3449 "vmovsldup\t{%1, %0|%0, %1}" 3450 [(set_attr "type" "sse") 3451 (set_attr "prefix" "vex") 3452 (set_attr "mode" "V8SF")]) 3453 3454(define_insn "sse3_movsldup" 3455 [(set (match_operand:V4SF 0 "register_operand" "=x") 3456 (vec_select:V4SF 3457 (vec_concat:V8SF 3458 (match_operand:V4SF 1 "nonimmediate_operand" "xm") 3459 (match_dup 1)) 3460 (parallel [(const_int 0) 3461 (const_int 0) 3462 (const_int 6) 3463 (const_int 6)])))] 3464 "TARGET_SSE3" 3465 "%vmovsldup\t{%1, %0|%0, %1}" 3466 [(set_attr "type" "sse") 3467 (set_attr "prefix_rep" "1") 3468 (set_attr "prefix" "maybe_vex") 3469 (set_attr "mode" "V4SF")]) 3470 3471(define_expand "avx_shufps256" 3472 [(match_operand:V8SF 0 "register_operand" "") 3473 (match_operand:V8SF 1 "register_operand" "") 3474 (match_operand:V8SF 2 "nonimmediate_operand" "") 3475 (match_operand:SI 3 "const_int_operand" "")] 3476 "TARGET_AVX" 3477{ 3478 int mask = INTVAL (operands[3]); 3479 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2], 3480 GEN_INT ((mask >> 0) & 3), 3481 GEN_INT ((mask >> 2) & 3), 3482 GEN_INT (((mask >> 4) & 3) + 8), 3483 GEN_INT (((mask >> 6) & 3) + 8), 3484 GEN_INT (((mask >> 0) & 3) + 4), 3485 GEN_INT (((mask >> 2) & 3) + 4), 3486 GEN_INT (((mask >> 4) & 3) + 12), 3487 GEN_INT (((mask >> 6) & 3) + 12))); 3488 DONE; 3489}) 3490 3491;; One bit in mask selects 2 elements. 3492(define_insn "avx_shufps256_1" 3493 [(set (match_operand:V8SF 0 "register_operand" "=x") 3494 (vec_select:V8SF 3495 (vec_concat:V16SF 3496 (match_operand:V8SF 1 "register_operand" "x") 3497 (match_operand:V8SF 2 "nonimmediate_operand" "xm")) 3498 (parallel [(match_operand 3 "const_0_to_3_operand" "") 3499 (match_operand 4 "const_0_to_3_operand" "") 3500 (match_operand 5 "const_8_to_11_operand" "") 3501 (match_operand 6 "const_8_to_11_operand" "") 3502 (match_operand 7 "const_4_to_7_operand" "") 3503 (match_operand 8 "const_4_to_7_operand" "") 3504 (match_operand 9 "const_12_to_15_operand" "") 3505 (match_operand 10 "const_12_to_15_operand" "")])))] 3506 "TARGET_AVX 3507 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4) 3508 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4) 3509 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4) 3510 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))" 3511{ 3512 int mask; 3513 mask = INTVAL (operands[3]); 3514 mask |= INTVAL (operands[4]) << 2; 3515 mask |= (INTVAL (operands[5]) - 8) << 4; 3516 mask |= (INTVAL (operands[6]) - 8) << 6; 3517 operands[3] = GEN_INT (mask); 3518 3519 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}"; 3520} 3521 [(set_attr "type" "sselog") 3522 (set_attr "length_immediate" "1") 3523 (set_attr "prefix" "vex") 3524 (set_attr "mode" "V8SF")]) 3525 3526(define_expand "sse_shufps" 3527 [(match_operand:V4SF 0 "register_operand" "") 3528 (match_operand:V4SF 1 "register_operand" "") 3529 (match_operand:V4SF 2 "nonimmediate_operand" "") 3530 (match_operand:SI 3 "const_int_operand" "")] 3531 "TARGET_SSE" 3532{ 3533 int mask = INTVAL (operands[3]); 3534 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2], 3535 GEN_INT ((mask >> 0) & 3), 3536 GEN_INT ((mask >> 2) & 3), 3537 GEN_INT (((mask >> 4) & 3) + 4), 3538 GEN_INT (((mask >> 6) & 3) + 4))); 3539 DONE; 3540}) 3541 3542(define_insn "sse_shufps_<mode>" 3543 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x") 3544 (vec_select:VI4F_128 3545 (vec_concat:<ssedoublevecmode> 3546 (match_operand:VI4F_128 1 "register_operand" "0,x") 3547 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm")) 3548 (parallel [(match_operand 3 "const_0_to_3_operand" "") 3549 (match_operand 4 "const_0_to_3_operand" "") 3550 (match_operand 5 "const_4_to_7_operand" "") 3551 (match_operand 6 "const_4_to_7_operand" "")])))] 3552 "TARGET_SSE" 3553{ 3554 int mask = 0; 3555 mask |= INTVAL (operands[3]) << 0; 3556 mask |= INTVAL (operands[4]) << 2; 3557 mask |= (INTVAL (operands[5]) - 4) << 4; 3558 mask |= (INTVAL (operands[6]) - 4) << 6; 3559 operands[3] = GEN_INT (mask); 3560 3561 switch (which_alternative) 3562 { 3563 case 0: 3564 return "shufps\t{%3, %2, %0|%0, %2, %3}"; 3565 case 1: 3566 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}"; 3567 default: 3568 gcc_unreachable (); 3569 } 3570} 3571 [(set_attr "isa" "noavx,avx") 3572 (set_attr "type" "sselog") 3573 (set_attr "length_immediate" "1") 3574 (set_attr "prefix" "orig,vex") 3575 (set_attr "mode" "V4SF")]) 3576 3577(define_insn "sse_storehps" 3578 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x") 3579 (vec_select:V2SF 3580 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o") 3581 (parallel [(const_int 2) (const_int 3)])))] 3582 "TARGET_SSE" 3583 "@ 3584 %vmovhps\t{%1, %0|%0, %1} 3585 %vmovhlps\t{%1, %d0|%d0, %1} 3586 %vmovlps\t{%H1, %d0|%d0, %H1}" 3587 [(set_attr "type" "ssemov") 3588 (set_attr "prefix" "maybe_vex") 3589 (set_attr "mode" "V2SF,V4SF,V2SF")]) 3590 3591(define_expand "sse_loadhps_exp" 3592 [(set (match_operand:V4SF 0 "nonimmediate_operand" "") 3593 (vec_concat:V4SF 3594 (vec_select:V2SF 3595 (match_operand:V4SF 1 "nonimmediate_operand" "") 3596 (parallel [(const_int 0) (const_int 1)])) 3597 (match_operand:V2SF 2 "nonimmediate_operand" "")))] 3598 "TARGET_SSE" 3599{ 3600 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands); 3601 3602 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2])); 3603 3604 /* Fix up the destination if needed. */ 3605 if (dst != operands[0]) 3606 emit_move_insn (operands[0], dst); 3607 3608 DONE; 3609}) 3610 3611(define_insn "sse_loadhps" 3612 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o") 3613 (vec_concat:V4SF 3614 (vec_select:V2SF 3615 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0") 3616 (parallel [(const_int 0) (const_int 1)])) 3617 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))] 3618 "TARGET_SSE" 3619 "@ 3620 movhps\t{%2, %0|%0, %2} 3621 vmovhps\t{%2, %1, %0|%0, %1, %2} 3622 movlhps\t{%2, %0|%0, %2} 3623 vmovlhps\t{%2, %1, %0|%0, %1, %2} 3624 %vmovlps\t{%2, %H0|%H0, %2}" 3625 [(set_attr "isa" "noavx,avx,noavx,avx,*") 3626 (set_attr "type" "ssemov") 3627 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex") 3628 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")]) 3629 3630(define_insn "sse_storelps" 3631 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x") 3632 (vec_select:V2SF 3633 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m") 3634 (parallel [(const_int 0) (const_int 1)])))] 3635 "TARGET_SSE" 3636 "@ 3637 %vmovlps\t{%1, %0|%0, %1} 3638 %vmovaps\t{%1, %0|%0, %1} 3639 %vmovlps\t{%1, %d0|%d0, %1}" 3640 [(set_attr "type" "ssemov") 3641 (set_attr "prefix" "maybe_vex") 3642 (set_attr "mode" "V2SF,V4SF,V2SF")]) 3643 3644(define_expand "sse_loadlps_exp" 3645 [(set (match_operand:V4SF 0 "nonimmediate_operand" "") 3646 (vec_concat:V4SF 3647 (match_operand:V2SF 2 "nonimmediate_operand" "") 3648 (vec_select:V2SF 3649 (match_operand:V4SF 1 "nonimmediate_operand" "") 3650 (parallel [(const_int 2) (const_int 3)]))))] 3651 "TARGET_SSE" 3652{ 3653 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands); 3654 3655 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2])); 3656 3657 /* Fix up the destination if needed. */ 3658 if (dst != operands[0]) 3659 emit_move_insn (operands[0], dst); 3660 3661 DONE; 3662}) 3663 3664(define_insn "sse_loadlps" 3665 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m") 3666 (vec_concat:V4SF 3667 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,m,x") 3668 (vec_select:V2SF 3669 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0") 3670 (parallel [(const_int 2) (const_int 3)]))))] 3671 "TARGET_SSE" 3672 "@ 3673 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4} 3674 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4} 3675 movlps\t{%2, %0|%0, %2} 3676 vmovlps\t{%2, %1, %0|%0, %1, %2} 3677 %vmovlps\t{%2, %0|%0, %2}" 3678 [(set_attr "isa" "noavx,avx,noavx,avx,*") 3679 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov") 3680 (set_attr "length_immediate" "1,1,*,*,*") 3681 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex") 3682 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")]) 3683 3684(define_insn "sse_movss" 3685 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 3686 (vec_merge:V4SF 3687 (match_operand:V4SF 2 "register_operand" " x,x") 3688 (match_operand:V4SF 1 "register_operand" " 0,x") 3689 (const_int 1)))] 3690 "TARGET_SSE" 3691 "@ 3692 movss\t{%2, %0|%0, %2} 3693 vmovss\t{%2, %1, %0|%0, %1, %2}" 3694 [(set_attr "isa" "noavx,avx") 3695 (set_attr "type" "ssemov") 3696 (set_attr "prefix" "orig,vex") 3697 (set_attr "mode" "SF")]) 3698 3699(define_insn "avx2_vec_dup<mode>" 3700 [(set (match_operand:VF1 0 "register_operand" "=x") 3701 (vec_duplicate:VF1 3702 (vec_select:SF 3703 (match_operand:V4SF 1 "register_operand" "x") 3704 (parallel [(const_int 0)]))))] 3705 "TARGET_AVX2" 3706 "vbroadcastss\t{%1, %0|%0, %1}" 3707 [(set_attr "type" "sselog1") 3708 (set_attr "prefix" "vex") 3709 (set_attr "mode" "<MODE>")]) 3710 3711(define_insn "vec_dupv4sf" 3712 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x") 3713 (vec_duplicate:V4SF 3714 (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))] 3715 "TARGET_SSE" 3716 "@ 3717 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0} 3718 vbroadcastss\t{%1, %0|%0, %1} 3719 shufps\t{$0, %0, %0|%0, %0, 0}" 3720 [(set_attr "isa" "avx,avx,noavx") 3721 (set_attr "type" "sselog1,ssemov,sselog1") 3722 (set_attr "length_immediate" "1,0,1") 3723 (set_attr "prefix_extra" "0,1,*") 3724 (set_attr "prefix" "vex,vex,orig") 3725 (set_attr "mode" "V4SF")]) 3726 3727;; Although insertps takes register source, we prefer 3728;; unpcklps with register source since it is shorter. 3729(define_insn "*vec_concatv2sf_sse4_1" 3730 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y") 3731 (vec_concat:V2SF 3732 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m") 3733 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))] 3734 "TARGET_SSE4_1" 3735 "@ 3736 unpcklps\t{%2, %0|%0, %2} 3737 vunpcklps\t{%2, %1, %0|%0, %1, %2} 3738 insertps\t{$0x10, %2, %0|%0, %2, 0x10} 3739 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10} 3740 %vmovss\t{%1, %0|%0, %1} 3741 punpckldq\t{%2, %0|%0, %2} 3742 movd\t{%1, %0|%0, %1}" 3743 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*") 3744 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov") 3745 (set_attr "prefix_data16" "*,*,1,*,*,*,*") 3746 (set_attr "prefix_extra" "*,*,1,1,*,*,*") 3747 (set_attr "length_immediate" "*,*,1,1,*,*,*") 3748 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig") 3749 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")]) 3750 3751;; ??? In theory we can match memory for the MMX alternative, but allowing 3752;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE 3753;; alternatives pretty much forces the MMX alternative to be chosen. 3754(define_insn "*vec_concatv2sf_sse" 3755 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y") 3756 (vec_concat:V2SF 3757 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m") 3758 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))] 3759 "TARGET_SSE" 3760 "@ 3761 unpcklps\t{%2, %0|%0, %2} 3762 movss\t{%1, %0|%0, %1} 3763 punpckldq\t{%2, %0|%0, %2} 3764 movd\t{%1, %0|%0, %1}" 3765 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov") 3766 (set_attr "mode" "V4SF,SF,DI,DI")]) 3767 3768(define_insn "*vec_concatv4sf" 3769 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x") 3770 (vec_concat:V4SF 3771 (match_operand:V2SF 1 "register_operand" " 0,x,0,x") 3772 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))] 3773 "TARGET_SSE" 3774 "@ 3775 movlhps\t{%2, %0|%0, %2} 3776 vmovlhps\t{%2, %1, %0|%0, %1, %2} 3777 movhps\t{%2, %0|%0, %2} 3778 vmovhps\t{%2, %1, %0|%0, %1, %2}" 3779 [(set_attr "isa" "noavx,avx,noavx,avx") 3780 (set_attr "type" "ssemov") 3781 (set_attr "prefix" "orig,vex,orig,vex") 3782 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")]) 3783 3784(define_expand "vec_init<mode>" 3785 [(match_operand:V_128 0 "register_operand" "") 3786 (match_operand 1 "" "")] 3787 "TARGET_SSE" 3788{ 3789 ix86_expand_vector_init (false, operands[0], operands[1]); 3790 DONE; 3791}) 3792 3793;; Avoid combining registers from different units in a single alternative, 3794;; see comment above inline_secondary_memory_needed function in i386.c 3795(define_insn "vec_set<mode>_0" 3796 [(set (match_operand:VI4F_128 0 "nonimmediate_operand" 3797 "=x,x,x ,x,x,x,x ,x ,m ,m ,m") 3798 (vec_merge:VI4F_128 3799 (vec_duplicate:VI4F_128 3800 (match_operand:<ssescalarmode> 2 "general_operand" 3801 " x,m,*r,m,x,x,*rm,*rm,!x,!*re,!*fF")) 3802 (match_operand:VI4F_128 1 "vector_move_operand" 3803 " C,C,C ,C,0,x,0 ,x ,0 ,0 ,0") 3804 (const_int 1)))] 3805 "TARGET_SSE" 3806 "@ 3807 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe} 3808 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2} 3809 %vmovd\t{%2, %0|%0, %2} 3810 movss\t{%2, %0|%0, %2} 3811 movss\t{%2, %0|%0, %2} 3812 vmovss\t{%2, %1, %0|%0, %1, %2} 3813 pinsrd\t{$0, %2, %0|%0, %2, 0} 3814 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0} 3815 # 3816 # 3817 #" 3818 [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*") 3819 (set (attr "type") 3820 (cond [(eq_attr "alternative" "0,6,7") 3821 (const_string "sselog") 3822 (eq_attr "alternative" "9") 3823 (const_string "imov") 3824 (eq_attr "alternative" "10") 3825 (const_string "fmov") 3826 ] 3827 (const_string "ssemov"))) 3828 (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*") 3829 (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*") 3830 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*") 3831 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")]) 3832 3833;; A subset is vec_setv4sf. 3834(define_insn "*vec_setv4sf_sse4_1" 3835 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 3836 (vec_merge:V4SF 3837 (vec_duplicate:V4SF 3838 (match_operand:SF 2 "nonimmediate_operand" "xm,xm")) 3839 (match_operand:V4SF 1 "register_operand" "0,x") 3840 (match_operand:SI 3 "const_int_operand" "")))] 3841 "TARGET_SSE4_1 3842 && ((unsigned) exact_log2 (INTVAL (operands[3])) 3843 < GET_MODE_NUNITS (V4SFmode))" 3844{ 3845 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4); 3846 switch (which_alternative) 3847 { 3848 case 0: 3849 return "insertps\t{%3, %2, %0|%0, %2, %3}"; 3850 case 1: 3851 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}"; 3852 default: 3853 gcc_unreachable (); 3854 } 3855} 3856 [(set_attr "isa" "noavx,avx") 3857 (set_attr "type" "sselog") 3858 (set_attr "prefix_data16" "1,*") 3859 (set_attr "prefix_extra" "1") 3860 (set_attr "length_immediate" "1") 3861 (set_attr "prefix" "orig,vex") 3862 (set_attr "mode" "V4SF")]) 3863 3864(define_insn "sse4_1_insertps" 3865 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 3866 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm") 3867 (match_operand:V4SF 1 "register_operand" "0,x") 3868 (match_operand:SI 3 "const_0_to_255_operand" "n,n")] 3869 UNSPEC_INSERTPS))] 3870 "TARGET_SSE4_1" 3871{ 3872 if (MEM_P (operands[2])) 3873 { 3874 unsigned count_s = INTVAL (operands[3]) >> 6; 3875 if (count_s) 3876 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f); 3877 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4); 3878 } 3879 switch (which_alternative) 3880 { 3881 case 0: 3882 return "insertps\t{%3, %2, %0|%0, %2, %3}"; 3883 case 1: 3884 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}"; 3885 default: 3886 gcc_unreachable (); 3887 } 3888} 3889 [(set_attr "isa" "noavx,avx") 3890 (set_attr "type" "sselog") 3891 (set_attr "prefix_data16" "1,*") 3892 (set_attr "prefix_extra" "1") 3893 (set_attr "length_immediate" "1") 3894 (set_attr "prefix" "orig,vex") 3895 (set_attr "mode" "V4SF")]) 3896 3897(define_split 3898 [(set (match_operand:VI4F_128 0 "memory_operand" "") 3899 (vec_merge:VI4F_128 3900 (vec_duplicate:VI4F_128 3901 (match_operand:<ssescalarmode> 1 "nonmemory_operand" "")) 3902 (match_dup 0) 3903 (const_int 1)))] 3904 "TARGET_SSE && reload_completed" 3905 [(const_int 0)] 3906{ 3907 emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0), 3908 operands[1]); 3909 DONE; 3910}) 3911 3912(define_expand "vec_set<mode>" 3913 [(match_operand:V 0 "register_operand" "") 3914 (match_operand:<ssescalarmode> 1 "register_operand" "") 3915 (match_operand 2 "const_int_operand" "")] 3916 "TARGET_SSE" 3917{ 3918 ix86_expand_vector_set (false, operands[0], operands[1], 3919 INTVAL (operands[2])); 3920 DONE; 3921}) 3922 3923(define_insn_and_split "*vec_extractv4sf_0" 3924 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r") 3925 (vec_select:SF 3926 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m") 3927 (parallel [(const_int 0)])))] 3928 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 3929 "#" 3930 "&& reload_completed" 3931 [(const_int 0)] 3932{ 3933 rtx op1 = operands[1]; 3934 if (REG_P (op1)) 3935 op1 = gen_rtx_REG (SFmode, REGNO (op1)); 3936 else 3937 op1 = gen_lowpart (SFmode, op1); 3938 emit_move_insn (operands[0], op1); 3939 DONE; 3940}) 3941 3942(define_insn_and_split "*sse4_1_extractps" 3943 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,x,x") 3944 (vec_select:SF 3945 (match_operand:V4SF 1 "register_operand" "x,0,x") 3946 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))] 3947 "TARGET_SSE4_1" 3948 "@ 3949 %vextractps\t{%2, %1, %0|%0, %1, %2} 3950 # 3951 #" 3952 "&& reload_completed && SSE_REG_P (operands[0])" 3953 [(const_int 0)] 3954{ 3955 rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0])); 3956 switch (INTVAL (operands[2])) 3957 { 3958 case 1: 3959 case 3: 3960 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1], 3961 operands[2], operands[2], 3962 GEN_INT (INTVAL (operands[2]) + 4), 3963 GEN_INT (INTVAL (operands[2]) + 4))); 3964 break; 3965 case 2: 3966 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1])); 3967 break; 3968 default: 3969 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */ 3970 gcc_unreachable (); 3971 } 3972 DONE; 3973} 3974 [(set_attr "isa" "*,noavx,avx") 3975 (set_attr "type" "sselog,*,*") 3976 (set_attr "prefix_data16" "1,*,*") 3977 (set_attr "prefix_extra" "1,*,*") 3978 (set_attr "length_immediate" "1,*,*") 3979 (set_attr "prefix" "maybe_vex,*,*") 3980 (set_attr "mode" "V4SF,*,*")]) 3981 3982(define_insn_and_split "*vec_extract_v4sf_mem" 3983 [(set (match_operand:SF 0 "register_operand" "=x,*r,f") 3984 (vec_select:SF 3985 (match_operand:V4SF 1 "memory_operand" "o,o,o") 3986 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))] 3987 "TARGET_SSE" 3988 "#" 3989 "&& reload_completed" 3990 [(const_int 0)] 3991{ 3992 int i = INTVAL (operands[2]); 3993 3994 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4)); 3995 DONE; 3996}) 3997 3998(define_expand "avx_vextractf128<mode>" 3999 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "") 4000 (match_operand:V_256 1 "register_operand" "") 4001 (match_operand:SI 2 "const_0_to_1_operand" "")] 4002 "TARGET_AVX" 4003{ 4004 rtx (*insn)(rtx, rtx); 4005 4006 switch (INTVAL (operands[2])) 4007 { 4008 case 0: 4009 insn = gen_vec_extract_lo_<mode>; 4010 break; 4011 case 1: 4012 insn = gen_vec_extract_hi_<mode>; 4013 break; 4014 default: 4015 gcc_unreachable (); 4016 } 4017 4018 emit_insn (insn (operands[0], operands[1])); 4019 DONE; 4020}) 4021 4022(define_insn_and_split "vec_extract_lo_<mode>" 4023 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m") 4024 (vec_select:<ssehalfvecmode> 4025 (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x") 4026 (parallel [(const_int 0) (const_int 1)])))] 4027 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 4028 "#" 4029 "&& reload_completed" 4030 [(const_int 0)] 4031{ 4032 rtx op1 = operands[1]; 4033 if (REG_P (op1)) 4034 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1)); 4035 else 4036 op1 = gen_lowpart (<ssehalfvecmode>mode, op1); 4037 emit_move_insn (operands[0], op1); 4038 DONE; 4039}) 4040 4041(define_insn "vec_extract_hi_<mode>" 4042 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m") 4043 (vec_select:<ssehalfvecmode> 4044 (match_operand:VI8F_256 1 "register_operand" "x,x") 4045 (parallel [(const_int 2) (const_int 3)])))] 4046 "TARGET_AVX" 4047 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}" 4048 [(set_attr "type" "sselog") 4049 (set_attr "prefix_extra" "1") 4050 (set_attr "length_immediate" "1") 4051 (set_attr "memory" "none,store") 4052 (set_attr "prefix" "vex") 4053 (set_attr "mode" "<sseinsnmode>")]) 4054 4055(define_insn_and_split "vec_extract_lo_<mode>" 4056 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m") 4057 (vec_select:<ssehalfvecmode> 4058 (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x") 4059 (parallel [(const_int 0) (const_int 1) 4060 (const_int 2) (const_int 3)])))] 4061 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 4062 "#" 4063 "&& reload_completed" 4064 [(const_int 0)] 4065{ 4066 rtx op1 = operands[1]; 4067 if (REG_P (op1)) 4068 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1)); 4069 else 4070 op1 = gen_lowpart (<ssehalfvecmode>mode, op1); 4071 emit_move_insn (operands[0], op1); 4072 DONE; 4073}) 4074 4075(define_insn "vec_extract_hi_<mode>" 4076 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m") 4077 (vec_select:<ssehalfvecmode> 4078 (match_operand:VI4F_256 1 "register_operand" "x,x") 4079 (parallel [(const_int 4) (const_int 5) 4080 (const_int 6) (const_int 7)])))] 4081 "TARGET_AVX" 4082 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}" 4083 [(set_attr "type" "sselog") 4084 (set_attr "prefix_extra" "1") 4085 (set_attr "length_immediate" "1") 4086 (set_attr "memory" "none,store") 4087 (set_attr "prefix" "vex") 4088 (set_attr "mode" "<sseinsnmode>")]) 4089 4090(define_insn_and_split "vec_extract_lo_v16hi" 4091 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m") 4092 (vec_select:V8HI 4093 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x") 4094 (parallel [(const_int 0) (const_int 1) 4095 (const_int 2) (const_int 3) 4096 (const_int 4) (const_int 5) 4097 (const_int 6) (const_int 7)])))] 4098 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 4099 "#" 4100 "&& reload_completed" 4101 [(const_int 0)] 4102{ 4103 rtx op1 = operands[1]; 4104 if (REG_P (op1)) 4105 op1 = gen_rtx_REG (V8HImode, REGNO (op1)); 4106 else 4107 op1 = gen_lowpart (V8HImode, op1); 4108 emit_move_insn (operands[0], op1); 4109 DONE; 4110}) 4111 4112(define_insn "vec_extract_hi_v16hi" 4113 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m") 4114 (vec_select:V8HI 4115 (match_operand:V16HI 1 "register_operand" "x,x") 4116 (parallel [(const_int 8) (const_int 9) 4117 (const_int 10) (const_int 11) 4118 (const_int 12) (const_int 13) 4119 (const_int 14) (const_int 15)])))] 4120 "TARGET_AVX" 4121 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}" 4122 [(set_attr "type" "sselog") 4123 (set_attr "prefix_extra" "1") 4124 (set_attr "length_immediate" "1") 4125 (set_attr "memory" "none,store") 4126 (set_attr "prefix" "vex") 4127 (set_attr "mode" "OI")]) 4128 4129(define_insn_and_split "vec_extract_lo_v32qi" 4130 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") 4131 (vec_select:V16QI 4132 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x") 4133 (parallel [(const_int 0) (const_int 1) 4134 (const_int 2) (const_int 3) 4135 (const_int 4) (const_int 5) 4136 (const_int 6) (const_int 7) 4137 (const_int 8) (const_int 9) 4138 (const_int 10) (const_int 11) 4139 (const_int 12) (const_int 13) 4140 (const_int 14) (const_int 15)])))] 4141 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 4142 "#" 4143 "&& reload_completed" 4144 [(const_int 0)] 4145{ 4146 rtx op1 = operands[1]; 4147 if (REG_P (op1)) 4148 op1 = gen_rtx_REG (V16QImode, REGNO (op1)); 4149 else 4150 op1 = gen_lowpart (V16QImode, op1); 4151 emit_move_insn (operands[0], op1); 4152 DONE; 4153}) 4154 4155(define_insn "vec_extract_hi_v32qi" 4156 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") 4157 (vec_select:V16QI 4158 (match_operand:V32QI 1 "register_operand" "x,x") 4159 (parallel [(const_int 16) (const_int 17) 4160 (const_int 18) (const_int 19) 4161 (const_int 20) (const_int 21) 4162 (const_int 22) (const_int 23) 4163 (const_int 24) (const_int 25) 4164 (const_int 26) (const_int 27) 4165 (const_int 28) (const_int 29) 4166 (const_int 30) (const_int 31)])))] 4167 "TARGET_AVX" 4168 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}" 4169 [(set_attr "type" "sselog") 4170 (set_attr "prefix_extra" "1") 4171 (set_attr "length_immediate" "1") 4172 (set_attr "memory" "none,store") 4173 (set_attr "prefix" "vex") 4174 (set_attr "mode" "OI")]) 4175 4176;; Modes handled by vec_extract patterns. 4177(define_mode_iterator VEC_EXTRACT_MODE 4178 [(V32QI "TARGET_AVX") V16QI 4179 (V16HI "TARGET_AVX") V8HI 4180 (V8SI "TARGET_AVX") V4SI 4181 (V4DI "TARGET_AVX") V2DI 4182 (V8SF "TARGET_AVX") V4SF 4183 (V4DF "TARGET_AVX") V2DF]) 4184 4185(define_expand "vec_extract<mode>" 4186 [(match_operand:<ssescalarmode> 0 "register_operand" "") 4187 (match_operand:VEC_EXTRACT_MODE 1 "register_operand" "") 4188 (match_operand 2 "const_int_operand" "")] 4189 "TARGET_SSE" 4190{ 4191 ix86_expand_vector_extract (false, operands[0], operands[1], 4192 INTVAL (operands[2])); 4193 DONE; 4194}) 4195 4196;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 4197;; 4198;; Parallel double-precision floating point element swizzling 4199;; 4200;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 4201 4202;; Recall that the 256-bit unpck insns only shuffle within their lanes. 4203(define_insn "avx_unpckhpd256" 4204 [(set (match_operand:V4DF 0 "register_operand" "=x") 4205 (vec_select:V4DF 4206 (vec_concat:V8DF 4207 (match_operand:V4DF 1 "register_operand" "x") 4208 (match_operand:V4DF 2 "nonimmediate_operand" "xm")) 4209 (parallel [(const_int 1) (const_int 5) 4210 (const_int 3) (const_int 7)])))] 4211 "TARGET_AVX" 4212 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}" 4213 [(set_attr "type" "sselog") 4214 (set_attr "prefix" "vex") 4215 (set_attr "mode" "V4DF")]) 4216 4217(define_expand "vec_interleave_highv4df" 4218 [(set (match_dup 3) 4219 (vec_select:V4DF 4220 (vec_concat:V8DF 4221 (match_operand:V4DF 1 "register_operand" "x") 4222 (match_operand:V4DF 2 "nonimmediate_operand" "xm")) 4223 (parallel [(const_int 0) (const_int 4) 4224 (const_int 2) (const_int 6)]))) 4225 (set (match_dup 4) 4226 (vec_select:V4DF 4227 (vec_concat:V8DF 4228 (match_dup 1) 4229 (match_dup 2)) 4230 (parallel [(const_int 1) (const_int 5) 4231 (const_int 3) (const_int 7)]))) 4232 (set (match_operand:V4DF 0 "register_operand" "") 4233 (vec_select:V4DF 4234 (vec_concat:V8DF 4235 (match_dup 3) 4236 (match_dup 4)) 4237 (parallel [(const_int 2) (const_int 3) 4238 (const_int 6) (const_int 7)])))] 4239 "TARGET_AVX" 4240{ 4241 operands[3] = gen_reg_rtx (V4DFmode); 4242 operands[4] = gen_reg_rtx (V4DFmode); 4243}) 4244 4245 4246(define_expand "vec_interleave_highv2df" 4247 [(set (match_operand:V2DF 0 "register_operand" "") 4248 (vec_select:V2DF 4249 (vec_concat:V4DF 4250 (match_operand:V2DF 1 "nonimmediate_operand" "") 4251 (match_operand:V2DF 2 "nonimmediate_operand" "")) 4252 (parallel [(const_int 1) 4253 (const_int 3)])))] 4254 "TARGET_SSE2" 4255{ 4256 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1)) 4257 operands[2] = force_reg (V2DFmode, operands[2]); 4258}) 4259 4260(define_insn "*vec_interleave_highv2df" 4261 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m") 4262 (vec_select:V2DF 4263 (vec_concat:V4DF 4264 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x") 4265 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0")) 4266 (parallel [(const_int 1) 4267 (const_int 3)])))] 4268 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)" 4269 "@ 4270 unpckhpd\t{%2, %0|%0, %2} 4271 vunpckhpd\t{%2, %1, %0|%0, %1, %2} 4272 %vmovddup\t{%H1, %0|%0, %H1} 4273 movlpd\t{%H1, %0|%0, %H1} 4274 vmovlpd\t{%H1, %2, %0|%0, %2, %H1} 4275 %vmovhpd\t{%1, %0|%0, %1}" 4276 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*") 4277 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov") 4278 (set_attr "prefix_data16" "*,*,*,1,*,1") 4279 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex") 4280 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")]) 4281 4282;; Recall that the 256-bit unpck insns only shuffle within their lanes. 4283(define_expand "avx_movddup256" 4284 [(set (match_operand:V4DF 0 "register_operand" "") 4285 (vec_select:V4DF 4286 (vec_concat:V8DF 4287 (match_operand:V4DF 1 "nonimmediate_operand" "") 4288 (match_dup 1)) 4289 (parallel [(const_int 0) (const_int 4) 4290 (const_int 2) (const_int 6)])))] 4291 "TARGET_AVX") 4292 4293(define_expand "avx_unpcklpd256" 4294 [(set (match_operand:V4DF 0 "register_operand" "") 4295 (vec_select:V4DF 4296 (vec_concat:V8DF 4297 (match_operand:V4DF 1 "register_operand" "") 4298 (match_operand:V4DF 2 "nonimmediate_operand" "")) 4299 (parallel [(const_int 0) (const_int 4) 4300 (const_int 2) (const_int 6)])))] 4301 "TARGET_AVX") 4302 4303(define_insn "*avx_unpcklpd256" 4304 [(set (match_operand:V4DF 0 "register_operand" "=x,x") 4305 (vec_select:V4DF 4306 (vec_concat:V8DF 4307 (match_operand:V4DF 1 "nonimmediate_operand" " x,m") 4308 (match_operand:V4DF 2 "nonimmediate_operand" "xm,1")) 4309 (parallel [(const_int 0) (const_int 4) 4310 (const_int 2) (const_int 6)])))] 4311 "TARGET_AVX" 4312 "@ 4313 vunpcklpd\t{%2, %1, %0|%0, %1, %2} 4314 vmovddup\t{%1, %0|%0, %1}" 4315 [(set_attr "type" "sselog") 4316 (set_attr "prefix" "vex") 4317 (set_attr "mode" "V4DF")]) 4318 4319(define_expand "vec_interleave_lowv4df" 4320 [(set (match_dup 3) 4321 (vec_select:V4DF 4322 (vec_concat:V8DF 4323 (match_operand:V4DF 1 "register_operand" "x") 4324 (match_operand:V4DF 2 "nonimmediate_operand" "xm")) 4325 (parallel [(const_int 0) (const_int 4) 4326 (const_int 2) (const_int 6)]))) 4327 (set (match_dup 4) 4328 (vec_select:V4DF 4329 (vec_concat:V8DF 4330 (match_dup 1) 4331 (match_dup 2)) 4332 (parallel [(const_int 1) (const_int 5) 4333 (const_int 3) (const_int 7)]))) 4334 (set (match_operand:V4DF 0 "register_operand" "") 4335 (vec_select:V4DF 4336 (vec_concat:V8DF 4337 (match_dup 3) 4338 (match_dup 4)) 4339 (parallel [(const_int 0) (const_int 1) 4340 (const_int 4) (const_int 5)])))] 4341 "TARGET_AVX" 4342{ 4343 operands[3] = gen_reg_rtx (V4DFmode); 4344 operands[4] = gen_reg_rtx (V4DFmode); 4345}) 4346 4347(define_expand "vec_interleave_lowv2df" 4348 [(set (match_operand:V2DF 0 "register_operand" "") 4349 (vec_select:V2DF 4350 (vec_concat:V4DF 4351 (match_operand:V2DF 1 "nonimmediate_operand" "") 4352 (match_operand:V2DF 2 "nonimmediate_operand" "")) 4353 (parallel [(const_int 0) 4354 (const_int 2)])))] 4355 "TARGET_SSE2" 4356{ 4357 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0)) 4358 operands[1] = force_reg (V2DFmode, operands[1]); 4359}) 4360 4361(define_insn "*vec_interleave_lowv2df" 4362 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o") 4363 (vec_select:V2DF 4364 (vec_concat:V4DF 4365 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0") 4366 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x")) 4367 (parallel [(const_int 0) 4368 (const_int 2)])))] 4369 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)" 4370 "@ 4371 unpcklpd\t{%2, %0|%0, %2} 4372 vunpcklpd\t{%2, %1, %0|%0, %1, %2} 4373 %vmovddup\t{%1, %0|%0, %1} 4374 movhpd\t{%2, %0|%0, %2} 4375 vmovhpd\t{%2, %1, %0|%0, %1, %2} 4376 %vmovlpd\t{%2, %H0|%H0, %2}" 4377 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*") 4378 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov") 4379 (set_attr "prefix_data16" "*,*,*,1,*,1") 4380 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex") 4381 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")]) 4382 4383(define_split 4384 [(set (match_operand:V2DF 0 "memory_operand" "") 4385 (vec_select:V2DF 4386 (vec_concat:V4DF 4387 (match_operand:V2DF 1 "register_operand" "") 4388 (match_dup 1)) 4389 (parallel [(const_int 0) 4390 (const_int 2)])))] 4391 "TARGET_SSE3 && reload_completed" 4392 [(const_int 0)] 4393{ 4394 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1])); 4395 emit_move_insn (adjust_address (operands[0], DFmode, 0), low); 4396 emit_move_insn (adjust_address (operands[0], DFmode, 8), low); 4397 DONE; 4398}) 4399 4400(define_split 4401 [(set (match_operand:V2DF 0 "register_operand" "") 4402 (vec_select:V2DF 4403 (vec_concat:V4DF 4404 (match_operand:V2DF 1 "memory_operand" "") 4405 (match_dup 1)) 4406 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "") 4407 (match_operand:SI 3 "const_int_operand" "")])))] 4408 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])" 4409 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))] 4410{ 4411 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8); 4412}) 4413 4414(define_expand "avx_shufpd256" 4415 [(match_operand:V4DF 0 "register_operand" "") 4416 (match_operand:V4DF 1 "register_operand" "") 4417 (match_operand:V4DF 2 "nonimmediate_operand" "") 4418 (match_operand:SI 3 "const_int_operand" "")] 4419 "TARGET_AVX" 4420{ 4421 int mask = INTVAL (operands[3]); 4422 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2], 4423 GEN_INT (mask & 1), 4424 GEN_INT (mask & 2 ? 5 : 4), 4425 GEN_INT (mask & 4 ? 3 : 2), 4426 GEN_INT (mask & 8 ? 7 : 6))); 4427 DONE; 4428}) 4429 4430(define_insn "avx_shufpd256_1" 4431 [(set (match_operand:V4DF 0 "register_operand" "=x") 4432 (vec_select:V4DF 4433 (vec_concat:V8DF 4434 (match_operand:V4DF 1 "register_operand" "x") 4435 (match_operand:V4DF 2 "nonimmediate_operand" "xm")) 4436 (parallel [(match_operand 3 "const_0_to_1_operand" "") 4437 (match_operand 4 "const_4_to_5_operand" "") 4438 (match_operand 5 "const_2_to_3_operand" "") 4439 (match_operand 6 "const_6_to_7_operand" "")])))] 4440 "TARGET_AVX" 4441{ 4442 int mask; 4443 mask = INTVAL (operands[3]); 4444 mask |= (INTVAL (operands[4]) - 4) << 1; 4445 mask |= (INTVAL (operands[5]) - 2) << 2; 4446 mask |= (INTVAL (operands[6]) - 6) << 3; 4447 operands[3] = GEN_INT (mask); 4448 4449 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"; 4450} 4451 [(set_attr "type" "sselog") 4452 (set_attr "length_immediate" "1") 4453 (set_attr "prefix" "vex") 4454 (set_attr "mode" "V4DF")]) 4455 4456(define_expand "sse2_shufpd" 4457 [(match_operand:V2DF 0 "register_operand" "") 4458 (match_operand:V2DF 1 "register_operand" "") 4459 (match_operand:V2DF 2 "nonimmediate_operand" "") 4460 (match_operand:SI 3 "const_int_operand" "")] 4461 "TARGET_SSE2" 4462{ 4463 int mask = INTVAL (operands[3]); 4464 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2], 4465 GEN_INT (mask & 1), 4466 GEN_INT (mask & 2 ? 3 : 2))); 4467 DONE; 4468}) 4469 4470;; punpcklqdq and punpckhqdq are shorter than shufpd. 4471(define_insn "avx2_interleave_highv4di" 4472 [(set (match_operand:V4DI 0 "register_operand" "=x") 4473 (vec_select:V4DI 4474 (vec_concat:V8DI 4475 (match_operand:V4DI 1 "register_operand" "x") 4476 (match_operand:V4DI 2 "nonimmediate_operand" "xm")) 4477 (parallel [(const_int 1) 4478 (const_int 5) 4479 (const_int 3) 4480 (const_int 7)])))] 4481 "TARGET_AVX2" 4482 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}" 4483 [(set_attr "type" "sselog") 4484 (set_attr "prefix" "vex") 4485 (set_attr "mode" "OI")]) 4486 4487(define_insn "vec_interleave_highv2di" 4488 [(set (match_operand:V2DI 0 "register_operand" "=x,x") 4489 (vec_select:V2DI 4490 (vec_concat:V4DI 4491 (match_operand:V2DI 1 "register_operand" "0,x") 4492 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")) 4493 (parallel [(const_int 1) 4494 (const_int 3)])))] 4495 "TARGET_SSE2" 4496 "@ 4497 punpckhqdq\t{%2, %0|%0, %2} 4498 vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}" 4499 [(set_attr "isa" "noavx,avx") 4500 (set_attr "type" "sselog") 4501 (set_attr "prefix_data16" "1,*") 4502 (set_attr "prefix" "orig,vex") 4503 (set_attr "mode" "TI")]) 4504 4505(define_insn "avx2_interleave_lowv4di" 4506 [(set (match_operand:V4DI 0 "register_operand" "=x") 4507 (vec_select:V4DI 4508 (vec_concat:V8DI 4509 (match_operand:V4DI 1 "register_operand" "x") 4510 (match_operand:V4DI 2 "nonimmediate_operand" "xm")) 4511 (parallel [(const_int 0) 4512 (const_int 4) 4513 (const_int 2) 4514 (const_int 6)])))] 4515 "TARGET_AVX2" 4516 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}" 4517 [(set_attr "type" "sselog") 4518 (set_attr "prefix" "vex") 4519 (set_attr "mode" "OI")]) 4520 4521(define_insn "vec_interleave_lowv2di" 4522 [(set (match_operand:V2DI 0 "register_operand" "=x,x") 4523 (vec_select:V2DI 4524 (vec_concat:V4DI 4525 (match_operand:V2DI 1 "register_operand" "0,x") 4526 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")) 4527 (parallel [(const_int 0) 4528 (const_int 2)])))] 4529 "TARGET_SSE2" 4530 "@ 4531 punpcklqdq\t{%2, %0|%0, %2} 4532 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}" 4533 [(set_attr "isa" "noavx,avx") 4534 (set_attr "type" "sselog") 4535 (set_attr "prefix_data16" "1,*") 4536 (set_attr "prefix" "orig,vex") 4537 (set_attr "mode" "TI")]) 4538 4539(define_insn "sse2_shufpd_<mode>" 4540 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x") 4541 (vec_select:VI8F_128 4542 (vec_concat:<ssedoublevecmode> 4543 (match_operand:VI8F_128 1 "register_operand" "0,x") 4544 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm")) 4545 (parallel [(match_operand 3 "const_0_to_1_operand" "") 4546 (match_operand 4 "const_2_to_3_operand" "")])))] 4547 "TARGET_SSE2" 4548{ 4549 int mask; 4550 mask = INTVAL (operands[3]); 4551 mask |= (INTVAL (operands[4]) - 2) << 1; 4552 operands[3] = GEN_INT (mask); 4553 4554 switch (which_alternative) 4555 { 4556 case 0: 4557 return "shufpd\t{%3, %2, %0|%0, %2, %3}"; 4558 case 1: 4559 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"; 4560 default: 4561 gcc_unreachable (); 4562 } 4563} 4564 [(set_attr "isa" "noavx,avx") 4565 (set_attr "type" "sselog") 4566 (set_attr "length_immediate" "1") 4567 (set_attr "prefix" "orig,vex") 4568 (set_attr "mode" "V2DF")]) 4569 4570;; Avoid combining registers from different units in a single alternative, 4571;; see comment above inline_secondary_memory_needed function in i386.c 4572(define_insn "sse2_storehpd" 4573 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r") 4574 (vec_select:DF 4575 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o") 4576 (parallel [(const_int 1)])))] 4577 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 4578 "@ 4579 %vmovhpd\t{%1, %0|%0, %1} 4580 unpckhpd\t%0, %0 4581 vunpckhpd\t{%d1, %0|%0, %d1} 4582 # 4583 # 4584 #" 4585 [(set_attr "isa" "*,noavx,avx,*,*,*") 4586 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov") 4587 (set (attr "prefix_data16") 4588 (if_then_else 4589 (and (eq_attr "alternative" "0") 4590 (not (match_test "TARGET_AVX"))) 4591 (const_string "1") 4592 (const_string "*"))) 4593 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*") 4594 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")]) 4595 4596(define_split 4597 [(set (match_operand:DF 0 "register_operand" "") 4598 (vec_select:DF 4599 (match_operand:V2DF 1 "memory_operand" "") 4600 (parallel [(const_int 1)])))] 4601 "TARGET_SSE2 && reload_completed" 4602 [(set (match_dup 0) (match_dup 1))] 4603 "operands[1] = adjust_address (operands[1], DFmode, 8);") 4604 4605(define_insn "*vec_extractv2df_1_sse" 4606 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x") 4607 (vec_select:DF 4608 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o") 4609 (parallel [(const_int 1)])))] 4610 "!TARGET_SSE2 && TARGET_SSE 4611 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 4612 "@ 4613 movhps\t{%1, %0|%0, %1} 4614 movhlps\t{%1, %0|%0, %1} 4615 movlps\t{%H1, %0|%0, %H1}" 4616 [(set_attr "type" "ssemov") 4617 (set_attr "mode" "V2SF,V4SF,V2SF")]) 4618 4619;; Avoid combining registers from different units in a single alternative, 4620;; see comment above inline_secondary_memory_needed function in i386.c 4621(define_insn "sse2_storelpd" 4622 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r") 4623 (vec_select:DF 4624 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m") 4625 (parallel [(const_int 0)])))] 4626 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 4627 "@ 4628 %vmovlpd\t{%1, %0|%0, %1} 4629 # 4630 # 4631 # 4632 #" 4633 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov") 4634 (set_attr "prefix_data16" "1,*,*,*,*") 4635 (set_attr "prefix" "maybe_vex") 4636 (set_attr "mode" "V1DF,DF,DF,DF,DF")]) 4637 4638(define_split 4639 [(set (match_operand:DF 0 "register_operand" "") 4640 (vec_select:DF 4641 (match_operand:V2DF 1 "nonimmediate_operand" "") 4642 (parallel [(const_int 0)])))] 4643 "TARGET_SSE2 && reload_completed" 4644 [(const_int 0)] 4645{ 4646 rtx op1 = operands[1]; 4647 if (REG_P (op1)) 4648 op1 = gen_rtx_REG (DFmode, REGNO (op1)); 4649 else 4650 op1 = gen_lowpart (DFmode, op1); 4651 emit_move_insn (operands[0], op1); 4652 DONE; 4653}) 4654 4655(define_insn "*vec_extractv2df_0_sse" 4656 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x") 4657 (vec_select:DF 4658 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m") 4659 (parallel [(const_int 0)])))] 4660 "!TARGET_SSE2 && TARGET_SSE 4661 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 4662 "@ 4663 movlps\t{%1, %0|%0, %1} 4664 movaps\t{%1, %0|%0, %1} 4665 movlps\t{%1, %0|%0, %1}" 4666 [(set_attr "type" "ssemov") 4667 (set_attr "mode" "V2SF,V4SF,V2SF")]) 4668 4669(define_expand "sse2_loadhpd_exp" 4670 [(set (match_operand:V2DF 0 "nonimmediate_operand" "") 4671 (vec_concat:V2DF 4672 (vec_select:DF 4673 (match_operand:V2DF 1 "nonimmediate_operand" "") 4674 (parallel [(const_int 0)])) 4675 (match_operand:DF 2 "nonimmediate_operand" "")))] 4676 "TARGET_SSE2" 4677{ 4678 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands); 4679 4680 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2])); 4681 4682 /* Fix up the destination if needed. */ 4683 if (dst != operands[0]) 4684 emit_move_insn (operands[0], dst); 4685 4686 DONE; 4687}) 4688 4689;; Avoid combining registers from different units in a single alternative, 4690;; see comment above inline_secondary_memory_needed function in i386.c 4691(define_insn "sse2_loadhpd" 4692 [(set (match_operand:V2DF 0 "nonimmediate_operand" 4693 "=x,x,x,x,o,o ,o") 4694 (vec_concat:V2DF 4695 (vec_select:DF 4696 (match_operand:V2DF 1 "nonimmediate_operand" 4697 " 0,x,0,x,0,0 ,0") 4698 (parallel [(const_int 0)])) 4699 (match_operand:DF 2 "nonimmediate_operand" 4700 " m,m,x,x,x,*f,r")))] 4701 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" 4702 "@ 4703 movhpd\t{%2, %0|%0, %2} 4704 vmovhpd\t{%2, %1, %0|%0, %1, %2} 4705 unpcklpd\t{%2, %0|%0, %2} 4706 vunpcklpd\t{%2, %1, %0|%0, %1, %2} 4707 # 4708 # 4709 #" 4710 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*") 4711 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov") 4712 (set_attr "prefix_data16" "1,*,*,*,*,*,*") 4713 (set_attr "prefix" "orig,vex,orig,vex,*,*,*") 4714 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")]) 4715 4716(define_split 4717 [(set (match_operand:V2DF 0 "memory_operand" "") 4718 (vec_concat:V2DF 4719 (vec_select:DF (match_dup 0) (parallel [(const_int 0)])) 4720 (match_operand:DF 1 "register_operand" "")))] 4721 "TARGET_SSE2 && reload_completed" 4722 [(set (match_dup 0) (match_dup 1))] 4723 "operands[0] = adjust_address (operands[0], DFmode, 8);") 4724 4725(define_expand "sse2_loadlpd_exp" 4726 [(set (match_operand:V2DF 0 "nonimmediate_operand" "") 4727 (vec_concat:V2DF 4728 (match_operand:DF 2 "nonimmediate_operand" "") 4729 (vec_select:DF 4730 (match_operand:V2DF 1 "nonimmediate_operand" "") 4731 (parallel [(const_int 1)]))))] 4732 "TARGET_SSE2" 4733{ 4734 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands); 4735 4736 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2])); 4737 4738 /* Fix up the destination if needed. */ 4739 if (dst != operands[0]) 4740 emit_move_insn (operands[0], dst); 4741 4742 DONE; 4743}) 4744 4745;; Avoid combining registers from different units in a single alternative, 4746;; see comment above inline_secondary_memory_needed function in i386.c 4747(define_insn "sse2_loadlpd" 4748 [(set (match_operand:V2DF 0 "nonimmediate_operand" 4749 "=x,x,x,x,x,x,x,x,m,m ,m") 4750 (vec_concat:V2DF 4751 (match_operand:DF 2 "nonimmediate_operand" 4752 " m,m,m,x,x,0,0,x,x,*f,r") 4753 (vec_select:DF 4754 (match_operand:V2DF 1 "vector_move_operand" 4755 " C,0,x,0,x,x,o,o,0,0 ,0") 4756 (parallel [(const_int 1)]))))] 4757 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" 4758 "@ 4759 %vmovsd\t{%2, %0|%0, %2} 4760 movlpd\t{%2, %0|%0, %2} 4761 vmovlpd\t{%2, %1, %0|%0, %1, %2} 4762 movsd\t{%2, %0|%0, %2} 4763 vmovsd\t{%2, %1, %0|%0, %1, %2} 4764 shufpd\t{$2, %1, %0|%0, %1, 2} 4765 movhpd\t{%H1, %0|%0, %H1} 4766 vmovhpd\t{%H1, %2, %0|%0, %2, %H1} 4767 # 4768 # 4769 #" 4770 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*") 4771 (set (attr "type") 4772 (cond [(eq_attr "alternative" "5") 4773 (const_string "sselog") 4774 (eq_attr "alternative" "9") 4775 (const_string "fmov") 4776 (eq_attr "alternative" "10") 4777 (const_string "imov") 4778 ] 4779 (const_string "ssemov"))) 4780 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*") 4781 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*") 4782 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*") 4783 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")]) 4784 4785(define_split 4786 [(set (match_operand:V2DF 0 "memory_operand" "") 4787 (vec_concat:V2DF 4788 (match_operand:DF 1 "register_operand" "") 4789 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))] 4790 "TARGET_SSE2 && reload_completed" 4791 [(set (match_dup 0) (match_dup 1))] 4792 "operands[0] = adjust_address (operands[0], DFmode, 0);") 4793 4794(define_insn "sse2_movsd" 4795 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o") 4796 (vec_merge:V2DF 4797 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0") 4798 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x") 4799 (const_int 1)))] 4800 "TARGET_SSE2" 4801 "@ 4802 movsd\t{%2, %0|%0, %2} 4803 vmovsd\t{%2, %1, %0|%0, %1, %2} 4804 movlpd\t{%2, %0|%0, %2} 4805 vmovlpd\t{%2, %1, %0|%0, %1, %2} 4806 %vmovlpd\t{%2, %0|%0, %2} 4807 shufpd\t{$2, %1, %0|%0, %1, 2} 4808 movhps\t{%H1, %0|%0, %H1} 4809 vmovhps\t{%H1, %2, %0|%0, %2, %H1} 4810 %vmovhps\t{%1, %H0|%H0, %1}" 4811 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*") 4812 (set (attr "type") 4813 (if_then_else 4814 (eq_attr "alternative" "5") 4815 (const_string "sselog") 4816 (const_string "ssemov"))) 4817 (set (attr "prefix_data16") 4818 (if_then_else 4819 (and (eq_attr "alternative" "2,4") 4820 (not (match_test "TARGET_AVX"))) 4821 (const_string "1") 4822 (const_string "*"))) 4823 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*") 4824 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex") 4825 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")]) 4826 4827(define_insn "vec_dupv2df" 4828 [(set (match_operand:V2DF 0 "register_operand" "=x,x") 4829 (vec_duplicate:V2DF 4830 (match_operand:DF 1 "nonimmediate_operand" " 0,xm")))] 4831 "TARGET_SSE2" 4832 "@ 4833 unpcklpd\t%0, %0 4834 %vmovddup\t{%1, %0|%0, %1}" 4835 [(set_attr "isa" "noavx,sse3") 4836 (set_attr "type" "sselog1") 4837 (set_attr "prefix" "orig,maybe_vex") 4838 (set_attr "mode" "V2DF")]) 4839 4840(define_insn "*vec_concatv2df" 4841 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x,x,x,x,x,x") 4842 (vec_concat:V2DF 4843 (match_operand:DF 1 "nonimmediate_operand" " 0,x,m,0,x,m,0,0") 4844 (match_operand:DF 2 "vector_move_operand" " x,x,1,m,m,C,x,m")))] 4845 "TARGET_SSE" 4846 "@ 4847 unpcklpd\t{%2, %0|%0, %2} 4848 vunpcklpd\t{%2, %1, %0|%0, %1, %2} 4849 %vmovddup\t{%1, %0|%0, %1} 4850 movhpd\t{%2, %0|%0, %2} 4851 vmovhpd\t{%2, %1, %0|%0, %1, %2} 4852 %vmovsd\t{%1, %0|%0, %1} 4853 movlhps\t{%2, %0|%0, %2} 4854 movhps\t{%2, %0|%0, %2}" 4855 [(set_attr "isa" "sse2_noavx,avx,sse3,sse2_noavx,avx,sse2,noavx,noavx") 4856 (set (attr "type") 4857 (if_then_else 4858 (eq_attr "alternative" "0,1,2") 4859 (const_string "sselog") 4860 (const_string "ssemov"))) 4861 (set_attr "prefix_data16" "*,*,*,1,*,*,*,*") 4862 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex,orig,orig") 4863 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,DF,V4SF,V2SF")]) 4864 4865;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 4866;; 4867;; Parallel integral arithmetic 4868;; 4869;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 4870 4871(define_expand "neg<mode>2" 4872 [(set (match_operand:VI_AVX2 0 "register_operand" "") 4873 (minus:VI_AVX2 4874 (match_dup 2) 4875 (match_operand:VI_AVX2 1 "nonimmediate_operand" "")))] 4876 "TARGET_SSE2" 4877 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));") 4878 4879(define_expand "<plusminus_insn><mode>3" 4880 [(set (match_operand:VI_AVX2 0 "register_operand" "") 4881 (plusminus:VI_AVX2 4882 (match_operand:VI_AVX2 1 "nonimmediate_operand" "") 4883 (match_operand:VI_AVX2 2 "nonimmediate_operand" "")))] 4884 "TARGET_SSE2" 4885 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") 4886 4887(define_insn "*<plusminus_insn><mode>3" 4888 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,x") 4889 (plusminus:VI_AVX2 4890 (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,x") 4891 (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,xm")))] 4892 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" 4893 "@ 4894 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2} 4895 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 4896 [(set_attr "isa" "noavx,avx") 4897 (set_attr "type" "sseiadd") 4898 (set_attr "prefix_data16" "1,*") 4899 (set_attr "prefix" "orig,vex") 4900 (set_attr "mode" "<sseinsnmode>")]) 4901 4902(define_expand "<sse2_avx2>_<plusminus_insn><mode>3" 4903 [(set (match_operand:VI12_AVX2 0 "register_operand" "") 4904 (sat_plusminus:VI12_AVX2 4905 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "") 4906 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "")))] 4907 "TARGET_SSE2" 4908 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") 4909 4910(define_insn "*<sse2_avx2>_<plusminus_insn><mode>3" 4911 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x") 4912 (sat_plusminus:VI12_AVX2 4913 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,x") 4914 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))] 4915 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" 4916 "@ 4917 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2} 4918 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 4919 [(set_attr "isa" "noavx,avx") 4920 (set_attr "type" "sseiadd") 4921 (set_attr "prefix_data16" "1,*") 4922 (set_attr "prefix" "orig,vex") 4923 (set_attr "mode" "TI")]) 4924 4925(define_insn_and_split "mul<mode>3" 4926 [(set (match_operand:VI1_AVX2 0 "register_operand" "") 4927 (mult:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "") 4928 (match_operand:VI1_AVX2 2 "register_operand" "")))] 4929 "TARGET_SSE2 4930 && can_create_pseudo_p ()" 4931 "#" 4932 "&& 1" 4933 [(const_int 0)] 4934{ 4935 rtx t[6]; 4936 int i; 4937 enum machine_mode mulmode = <sseunpackmode>mode; 4938 4939 for (i = 0; i < 6; ++i) 4940 t[i] = gen_reg_rtx (<MODE>mode); 4941 4942 /* Unpack data such that we've got a source byte in each low byte of 4943 each word. We don't care what goes into the high byte of each word. 4944 Rather than trying to get zero in there, most convenient is to let 4945 it be a copy of the low byte. */ 4946 emit_insn (gen_<vec_avx2>_interleave_high<mode> (t[0], operands[1], 4947 operands[1])); 4948 emit_insn (gen_<vec_avx2>_interleave_high<mode> (t[1], operands[2], 4949 operands[2])); 4950 emit_insn (gen_<vec_avx2>_interleave_low<mode> (t[2], operands[1], 4951 operands[1])); 4952 emit_insn (gen_<vec_avx2>_interleave_low<mode> (t[3], operands[2], 4953 operands[2])); 4954 4955 /* Multiply words. The end-of-line annotations here give a picture of what 4956 the output of that instruction looks like. Dot means don't care; the 4957 letters are the bytes of the result with A being the most significant. */ 4958 emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (mulmode, t[4]), 4959 gen_rtx_MULT (mulmode, /* .A.B.C.D.E.F.G.H */ 4960 gen_lowpart (mulmode, t[0]), 4961 gen_lowpart (mulmode, t[1])))); 4962 emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (mulmode, t[5]), 4963 gen_rtx_MULT (mulmode, /* .I.J.K.L.M.N.O.P */ 4964 gen_lowpart (mulmode, t[2]), 4965 gen_lowpart (mulmode, t[3])))); 4966 4967 /* Extract the even bytes and merge them back together. */ 4968 if (<MODE>mode == V16QImode) 4969 ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0); 4970 else 4971 { 4972 /* Since avx2_interleave_{low,high}v32qi used above aren't cross-lane, 4973 this can't be normal even extraction, but one where additionally 4974 the second and third quarter are swapped. That is even one insn 4975 shorter than even extraction. */ 4976 rtvec v = rtvec_alloc (32); 4977 for (i = 0; i < 32; ++i) 4978 RTVEC_ELT (v, i) 4979 = GEN_INT (i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0)); 4980 t[0] = operands[0]; 4981 t[1] = t[5]; 4982 t[2] = t[4]; 4983 t[3] = gen_rtx_CONST_VECTOR (<MODE>mode, v); 4984 ix86_expand_vec_perm_const (t); 4985 } 4986 4987 set_unique_reg_note (get_last_insn (), REG_EQUAL, 4988 gen_rtx_MULT (<MODE>mode, operands[1], operands[2])); 4989 DONE; 4990}) 4991 4992(define_expand "mul<mode>3" 4993 [(set (match_operand:VI2_AVX2 0 "register_operand" "") 4994 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "") 4995 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "")))] 4996 "TARGET_SSE2" 4997 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);") 4998 4999(define_insn "*mul<mode>3" 5000 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x") 5001 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x") 5002 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))] 5003 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)" 5004 "@ 5005 pmullw\t{%2, %0|%0, %2} 5006 vpmullw\t{%2, %1, %0|%0, %1, %2}" 5007 [(set_attr "isa" "noavx,avx") 5008 (set_attr "type" "sseimul") 5009 (set_attr "prefix_data16" "1,*") 5010 (set_attr "prefix" "orig,vex") 5011 (set_attr "mode" "<sseinsnmode>")]) 5012 5013(define_expand "<s>mul<mode>3_highpart" 5014 [(set (match_operand:VI2_AVX2 0 "register_operand" "") 5015 (truncate:VI2_AVX2 5016 (lshiftrt:<ssedoublemode> 5017 (mult:<ssedoublemode> 5018 (any_extend:<ssedoublemode> 5019 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "")) 5020 (any_extend:<ssedoublemode> 5021 (match_operand:VI2_AVX2 2 "nonimmediate_operand" ""))) 5022 (const_int 16))))] 5023 "TARGET_SSE2" 5024 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);") 5025 5026(define_insn "*<s>mul<mode>3_highpart" 5027 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x") 5028 (truncate:VI2_AVX2 5029 (lshiftrt:<ssedoublemode> 5030 (mult:<ssedoublemode> 5031 (any_extend:<ssedoublemode> 5032 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")) 5033 (any_extend:<ssedoublemode> 5034 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm"))) 5035 (const_int 16))))] 5036 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)" 5037 "@ 5038 pmulh<u>w\t{%2, %0|%0, %2} 5039 vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}" 5040 [(set_attr "isa" "noavx,avx") 5041 (set_attr "type" "sseimul") 5042 (set_attr "prefix_data16" "1,*") 5043 (set_attr "prefix" "orig,vex") 5044 (set_attr "mode" "<sseinsnmode>")]) 5045 5046(define_expand "avx2_umulv4siv4di3" 5047 [(set (match_operand:V4DI 0 "register_operand" "") 5048 (mult:V4DI 5049 (zero_extend:V4DI 5050 (vec_select:V4SI 5051 (match_operand:V8SI 1 "nonimmediate_operand" "") 5052 (parallel [(const_int 0) (const_int 2) 5053 (const_int 4) (const_int 6)]))) 5054 (zero_extend:V4DI 5055 (vec_select:V4SI 5056 (match_operand:V8SI 2 "nonimmediate_operand" "") 5057 (parallel [(const_int 0) (const_int 2) 5058 (const_int 4) (const_int 6)])))))] 5059 "TARGET_AVX2" 5060 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);") 5061 5062(define_insn "*avx_umulv4siv4di3" 5063 [(set (match_operand:V4DI 0 "register_operand" "=x") 5064 (mult:V4DI 5065 (zero_extend:V4DI 5066 (vec_select:V4SI 5067 (match_operand:V8SI 1 "nonimmediate_operand" "%x") 5068 (parallel [(const_int 0) (const_int 2) 5069 (const_int 4) (const_int 6)]))) 5070 (zero_extend:V4DI 5071 (vec_select:V4SI 5072 (match_operand:V8SI 2 "nonimmediate_operand" "xm") 5073 (parallel [(const_int 0) (const_int 2) 5074 (const_int 4) (const_int 6)])))))] 5075 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)" 5076 "vpmuludq\t{%2, %1, %0|%0, %1, %2}" 5077 [(set_attr "type" "sseimul") 5078 (set_attr "prefix" "vex") 5079 (set_attr "mode" "OI")]) 5080 5081(define_expand "sse2_umulv2siv2di3" 5082 [(set (match_operand:V2DI 0 "register_operand" "") 5083 (mult:V2DI 5084 (zero_extend:V2DI 5085 (vec_select:V2SI 5086 (match_operand:V4SI 1 "nonimmediate_operand" "") 5087 (parallel [(const_int 0) (const_int 2)]))) 5088 (zero_extend:V2DI 5089 (vec_select:V2SI 5090 (match_operand:V4SI 2 "nonimmediate_operand" "") 5091 (parallel [(const_int 0) (const_int 2)])))))] 5092 "TARGET_SSE2" 5093 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);") 5094 5095(define_insn "*sse2_umulv2siv2di3" 5096 [(set (match_operand:V2DI 0 "register_operand" "=x,x") 5097 (mult:V2DI 5098 (zero_extend:V2DI 5099 (vec_select:V2SI 5100 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x") 5101 (parallel [(const_int 0) (const_int 2)]))) 5102 (zero_extend:V2DI 5103 (vec_select:V2SI 5104 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm") 5105 (parallel [(const_int 0) (const_int 2)])))))] 5106 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)" 5107 "@ 5108 pmuludq\t{%2, %0|%0, %2} 5109 vpmuludq\t{%2, %1, %0|%0, %1, %2}" 5110 [(set_attr "isa" "noavx,avx") 5111 (set_attr "type" "sseimul") 5112 (set_attr "prefix_data16" "1,*") 5113 (set_attr "prefix" "orig,vex") 5114 (set_attr "mode" "TI")]) 5115 5116(define_expand "avx2_mulv4siv4di3" 5117 [(set (match_operand:V4DI 0 "register_operand" "") 5118 (mult:V4DI 5119 (sign_extend:V4DI 5120 (vec_select:V4SI 5121 (match_operand:V8SI 1 "nonimmediate_operand" "") 5122 (parallel [(const_int 0) (const_int 2) 5123 (const_int 4) (const_int 6)]))) 5124 (sign_extend:V4DI 5125 (vec_select:V4SI 5126 (match_operand:V8SI 2 "nonimmediate_operand" "") 5127 (parallel [(const_int 0) (const_int 2) 5128 (const_int 4) (const_int 6)])))))] 5129 "TARGET_AVX2" 5130 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);") 5131 5132(define_insn "*avx2_mulv4siv4di3" 5133 [(set (match_operand:V4DI 0 "register_operand" "=x") 5134 (mult:V4DI 5135 (sign_extend:V4DI 5136 (vec_select:V4SI 5137 (match_operand:V8SI 1 "nonimmediate_operand" "x") 5138 (parallel [(const_int 0) (const_int 2) 5139 (const_int 4) (const_int 6)]))) 5140 (sign_extend:V4DI 5141 (vec_select:V4SI 5142 (match_operand:V8SI 2 "nonimmediate_operand" "xm") 5143 (parallel [(const_int 0) (const_int 2) 5144 (const_int 4) (const_int 6)])))))] 5145 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)" 5146 "vpmuldq\t{%2, %1, %0|%0, %1, %2}" 5147 [(set_attr "isa" "avx") 5148 (set_attr "type" "sseimul") 5149 (set_attr "prefix_extra" "1") 5150 (set_attr "prefix" "vex") 5151 (set_attr "mode" "OI")]) 5152 5153(define_expand "sse4_1_mulv2siv2di3" 5154 [(set (match_operand:V2DI 0 "register_operand" "") 5155 (mult:V2DI 5156 (sign_extend:V2DI 5157 (vec_select:V2SI 5158 (match_operand:V4SI 1 "nonimmediate_operand" "") 5159 (parallel [(const_int 0) (const_int 2)]))) 5160 (sign_extend:V2DI 5161 (vec_select:V2SI 5162 (match_operand:V4SI 2 "nonimmediate_operand" "") 5163 (parallel [(const_int 0) (const_int 2)])))))] 5164 "TARGET_SSE4_1" 5165 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);") 5166 5167(define_insn "*sse4_1_mulv2siv2di3" 5168 [(set (match_operand:V2DI 0 "register_operand" "=x,x") 5169 (mult:V2DI 5170 (sign_extend:V2DI 5171 (vec_select:V2SI 5172 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x") 5173 (parallel [(const_int 0) (const_int 2)]))) 5174 (sign_extend:V2DI 5175 (vec_select:V2SI 5176 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm") 5177 (parallel [(const_int 0) (const_int 2)])))))] 5178 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)" 5179 "@ 5180 pmuldq\t{%2, %0|%0, %2} 5181 vpmuldq\t{%2, %1, %0|%0, %1, %2}" 5182 [(set_attr "isa" "noavx,avx") 5183 (set_attr "type" "sseimul") 5184 (set_attr "prefix_data16" "1,*") 5185 (set_attr "prefix_extra" "1") 5186 (set_attr "prefix" "orig,vex") 5187 (set_attr "mode" "TI")]) 5188 5189(define_expand "avx2_pmaddwd" 5190 [(set (match_operand:V8SI 0 "register_operand" "") 5191 (plus:V8SI 5192 (mult:V8SI 5193 (sign_extend:V8SI 5194 (vec_select:V8HI 5195 (match_operand:V16HI 1 "nonimmediate_operand" "") 5196 (parallel [(const_int 0) 5197 (const_int 2) 5198 (const_int 4) 5199 (const_int 6) 5200 (const_int 8) 5201 (const_int 10) 5202 (const_int 12) 5203 (const_int 14)]))) 5204 (sign_extend:V8SI 5205 (vec_select:V8HI 5206 (match_operand:V16HI 2 "nonimmediate_operand" "") 5207 (parallel [(const_int 0) 5208 (const_int 2) 5209 (const_int 4) 5210 (const_int 6) 5211 (const_int 8) 5212 (const_int 10) 5213 (const_int 12) 5214 (const_int 14)])))) 5215 (mult:V8SI 5216 (sign_extend:V8SI 5217 (vec_select:V8HI (match_dup 1) 5218 (parallel [(const_int 1) 5219 (const_int 3) 5220 (const_int 5) 5221 (const_int 7) 5222 (const_int 9) 5223 (const_int 11) 5224 (const_int 13) 5225 (const_int 15)]))) 5226 (sign_extend:V8SI 5227 (vec_select:V8HI (match_dup 2) 5228 (parallel [(const_int 1) 5229 (const_int 3) 5230 (const_int 5) 5231 (const_int 7) 5232 (const_int 9) 5233 (const_int 11) 5234 (const_int 13) 5235 (const_int 15)]))))))] 5236 "TARGET_AVX2" 5237 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);") 5238 5239(define_expand "sse2_pmaddwd" 5240 [(set (match_operand:V4SI 0 "register_operand" "") 5241 (plus:V4SI 5242 (mult:V4SI 5243 (sign_extend:V4SI 5244 (vec_select:V4HI 5245 (match_operand:V8HI 1 "nonimmediate_operand" "") 5246 (parallel [(const_int 0) 5247 (const_int 2) 5248 (const_int 4) 5249 (const_int 6)]))) 5250 (sign_extend:V4SI 5251 (vec_select:V4HI 5252 (match_operand:V8HI 2 "nonimmediate_operand" "") 5253 (parallel [(const_int 0) 5254 (const_int 2) 5255 (const_int 4) 5256 (const_int 6)])))) 5257 (mult:V4SI 5258 (sign_extend:V4SI 5259 (vec_select:V4HI (match_dup 1) 5260 (parallel [(const_int 1) 5261 (const_int 3) 5262 (const_int 5) 5263 (const_int 7)]))) 5264 (sign_extend:V4SI 5265 (vec_select:V4HI (match_dup 2) 5266 (parallel [(const_int 1) 5267 (const_int 3) 5268 (const_int 5) 5269 (const_int 7)]))))))] 5270 "TARGET_SSE2" 5271 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);") 5272 5273(define_insn "*avx2_pmaddwd" 5274 [(set (match_operand:V8SI 0 "register_operand" "=x") 5275 (plus:V8SI 5276 (mult:V8SI 5277 (sign_extend:V8SI 5278 (vec_select:V8HI 5279 (match_operand:V16HI 1 "nonimmediate_operand" "%x") 5280 (parallel [(const_int 0) 5281 (const_int 2) 5282 (const_int 4) 5283 (const_int 6) 5284 (const_int 8) 5285 (const_int 10) 5286 (const_int 12) 5287 (const_int 14)]))) 5288 (sign_extend:V8SI 5289 (vec_select:V8HI 5290 (match_operand:V16HI 2 "nonimmediate_operand" "xm") 5291 (parallel [(const_int 0) 5292 (const_int 2) 5293 (const_int 4) 5294 (const_int 6) 5295 (const_int 8) 5296 (const_int 10) 5297 (const_int 12) 5298 (const_int 14)])))) 5299 (mult:V8SI 5300 (sign_extend:V8SI 5301 (vec_select:V8HI (match_dup 1) 5302 (parallel [(const_int 1) 5303 (const_int 3) 5304 (const_int 5) 5305 (const_int 7) 5306 (const_int 9) 5307 (const_int 11) 5308 (const_int 13) 5309 (const_int 15)]))) 5310 (sign_extend:V8SI 5311 (vec_select:V8HI (match_dup 2) 5312 (parallel [(const_int 1) 5313 (const_int 3) 5314 (const_int 5) 5315 (const_int 7) 5316 (const_int 9) 5317 (const_int 11) 5318 (const_int 13) 5319 (const_int 15)]))))))] 5320 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)" 5321 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}" 5322 [(set_attr "type" "sseiadd") 5323 (set_attr "prefix" "vex") 5324 (set_attr "mode" "OI")]) 5325 5326(define_insn "*sse2_pmaddwd" 5327 [(set (match_operand:V4SI 0 "register_operand" "=x,x") 5328 (plus:V4SI 5329 (mult:V4SI 5330 (sign_extend:V4SI 5331 (vec_select:V4HI 5332 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x") 5333 (parallel [(const_int 0) 5334 (const_int 2) 5335 (const_int 4) 5336 (const_int 6)]))) 5337 (sign_extend:V4SI 5338 (vec_select:V4HI 5339 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm") 5340 (parallel [(const_int 0) 5341 (const_int 2) 5342 (const_int 4) 5343 (const_int 6)])))) 5344 (mult:V4SI 5345 (sign_extend:V4SI 5346 (vec_select:V4HI (match_dup 1) 5347 (parallel [(const_int 1) 5348 (const_int 3) 5349 (const_int 5) 5350 (const_int 7)]))) 5351 (sign_extend:V4SI 5352 (vec_select:V4HI (match_dup 2) 5353 (parallel [(const_int 1) 5354 (const_int 3) 5355 (const_int 5) 5356 (const_int 7)]))))))] 5357 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)" 5358 "@ 5359 pmaddwd\t{%2, %0|%0, %2} 5360 vpmaddwd\t{%2, %1, %0|%0, %1, %2}" 5361 [(set_attr "isa" "noavx,avx") 5362 (set_attr "type" "sseiadd") 5363 (set_attr "atom_unit" "simul") 5364 (set_attr "prefix_data16" "1,*") 5365 (set_attr "prefix" "orig,vex") 5366 (set_attr "mode" "TI")]) 5367 5368(define_expand "mul<mode>3" 5369 [(set (match_operand:VI4_AVX2 0 "register_operand" "") 5370 (mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "register_operand" "") 5371 (match_operand:VI4_AVX2 2 "register_operand" "")))] 5372 "TARGET_SSE2" 5373{ 5374 if (TARGET_SSE4_1 || TARGET_AVX) 5375 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands); 5376}) 5377 5378(define_insn "*<sse4_1_avx2>_mul<mode>3" 5379 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,x") 5380 (mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "nonimmediate_operand" "%0,x") 5381 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm,xm")))] 5382 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)" 5383 "@ 5384 pmulld\t{%2, %0|%0, %2} 5385 vpmulld\t{%2, %1, %0|%0, %1, %2}" 5386 [(set_attr "isa" "noavx,avx") 5387 (set_attr "type" "sseimul") 5388 (set_attr "prefix_extra" "1") 5389 (set_attr "prefix" "orig,vex") 5390 (set_attr "mode" "<sseinsnmode>")]) 5391 5392(define_insn_and_split "*sse2_mulv4si3" 5393 [(set (match_operand:V4SI 0 "register_operand" "") 5394 (mult:V4SI (match_operand:V4SI 1 "register_operand" "") 5395 (match_operand:V4SI 2 "register_operand" "")))] 5396 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX 5397 && can_create_pseudo_p ()" 5398 "#" 5399 "&& 1" 5400 [(const_int 0)] 5401{ 5402 rtx t1, t2, t3, t4, t5, t6, thirtytwo; 5403 rtx op0, op1, op2; 5404 5405 op0 = operands[0]; 5406 op1 = operands[1]; 5407 op2 = operands[2]; 5408 t1 = gen_reg_rtx (V4SImode); 5409 t2 = gen_reg_rtx (V4SImode); 5410 t3 = gen_reg_rtx (V4SImode); 5411 t4 = gen_reg_rtx (V4SImode); 5412 t5 = gen_reg_rtx (V4SImode); 5413 t6 = gen_reg_rtx (V4SImode); 5414 thirtytwo = GEN_INT (32); 5415 5416 /* Multiply elements 2 and 0. */ 5417 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1), 5418 op1, op2)); 5419 5420 /* Shift both input vectors down one element, so that elements 3 5421 and 1 are now in the slots for elements 2 and 0. For K8, at 5422 least, this is faster than using a shuffle. */ 5423 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2), 5424 gen_lowpart (V1TImode, op1), 5425 thirtytwo)); 5426 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3), 5427 gen_lowpart (V1TImode, op2), 5428 thirtytwo)); 5429 /* Multiply elements 3 and 1. */ 5430 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4), 5431 t2, t3)); 5432 5433 /* Move the results in element 2 down to element 1; we don't care 5434 what goes in elements 2 and 3. */ 5435 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx, 5436 const0_rtx, const0_rtx)); 5437 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx, 5438 const0_rtx, const0_rtx)); 5439 5440 /* Merge the parts back together. */ 5441 emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6)); 5442 5443 set_unique_reg_note (get_last_insn (), REG_EQUAL, 5444 gen_rtx_MULT (V4SImode, operands[1], operands[2])); 5445 DONE; 5446}) 5447 5448(define_insn_and_split "mul<mode>3" 5449 [(set (match_operand:VI8_AVX2 0 "register_operand" "") 5450 (mult:VI8_AVX2 (match_operand:VI8_AVX2 1 "register_operand" "") 5451 (match_operand:VI8_AVX2 2 "register_operand" "")))] 5452 "TARGET_SSE2 5453 && can_create_pseudo_p ()" 5454 "#" 5455 "&& 1" 5456 [(const_int 0)] 5457{ 5458 rtx t1, t2, t3, t4, t5, t6, thirtytwo; 5459 rtx op0, op1, op2; 5460 5461 op0 = operands[0]; 5462 op1 = operands[1]; 5463 op2 = operands[2]; 5464 5465 if (TARGET_XOP && <MODE>mode == V2DImode) 5466 { 5467 /* op1: A,B,C,D, op2: E,F,G,H */ 5468 op1 = gen_lowpart (V4SImode, op1); 5469 op2 = gen_lowpart (V4SImode, op2); 5470 5471 t1 = gen_reg_rtx (V4SImode); 5472 t2 = gen_reg_rtx (V4SImode); 5473 t3 = gen_reg_rtx (V2DImode); 5474 t4 = gen_reg_rtx (V2DImode); 5475 5476 /* t1: B,A,D,C */ 5477 emit_insn (gen_sse2_pshufd_1 (t1, op1, 5478 GEN_INT (1), 5479 GEN_INT (0), 5480 GEN_INT (3), 5481 GEN_INT (2))); 5482 5483 /* t2: (B*E),(A*F),(D*G),(C*H) */ 5484 emit_insn (gen_mulv4si3 (t2, t1, op2)); 5485 5486 /* t4: (B*E)+(A*F), (D*G)+(C*H) */ 5487 emit_insn (gen_xop_phadddq (t3, t2)); 5488 5489 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */ 5490 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32))); 5491 5492 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */ 5493 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4)); 5494 } 5495 else 5496 { 5497 t1 = gen_reg_rtx (<MODE>mode); 5498 t2 = gen_reg_rtx (<MODE>mode); 5499 t3 = gen_reg_rtx (<MODE>mode); 5500 t4 = gen_reg_rtx (<MODE>mode); 5501 t5 = gen_reg_rtx (<MODE>mode); 5502 t6 = gen_reg_rtx (<MODE>mode); 5503 thirtytwo = GEN_INT (32); 5504 5505 /* Multiply low parts. */ 5506 emit_insn (gen_<sse2_avx2>_umulv<ssescalarnum>si<mode>3 5507 (t1, gen_lowpart (<ssepackmode>mode, op1), 5508 gen_lowpart (<ssepackmode>mode, op2))); 5509 5510 /* Shift input vectors right 32 bits so we can multiply high parts. */ 5511 emit_insn (gen_lshr<mode>3 (t2, op1, thirtytwo)); 5512 emit_insn (gen_lshr<mode>3 (t3, op2, thirtytwo)); 5513 5514 /* Multiply high parts by low parts. */ 5515 emit_insn (gen_<sse2_avx2>_umulv<ssescalarnum>si<mode>3 5516 (t4, gen_lowpart (<ssepackmode>mode, op1), 5517 gen_lowpart (<ssepackmode>mode, t3))); 5518 emit_insn (gen_<sse2_avx2>_umulv<ssescalarnum>si<mode>3 5519 (t5, gen_lowpart (<ssepackmode>mode, op2), 5520 gen_lowpart (<ssepackmode>mode, t2))); 5521 5522 /* Shift them back. */ 5523 emit_insn (gen_ashl<mode>3 (t4, t4, thirtytwo)); 5524 emit_insn (gen_ashl<mode>3 (t5, t5, thirtytwo)); 5525 5526 /* Add the three parts together. */ 5527 emit_insn (gen_add<mode>3 (t6, t1, t4)); 5528 emit_insn (gen_add<mode>3 (op0, t6, t5)); 5529 } 5530 5531 set_unique_reg_note (get_last_insn (), REG_EQUAL, 5532 gen_rtx_MULT (<MODE>mode, operands[1], operands[2])); 5533 DONE; 5534}) 5535 5536(define_expand "vec_widen_<s>mult_hi_<mode>" 5537 [(match_operand:<sseunpackmode> 0 "register_operand" "") 5538 (any_extend:<sseunpackmode> 5539 (match_operand:VI2_AVX2 1 "register_operand" "")) 5540 (match_operand:VI2_AVX2 2 "register_operand" "")] 5541 "TARGET_SSE2" 5542{ 5543 rtx op1, op2, t1, t2, dest; 5544 5545 op1 = operands[1]; 5546 op2 = operands[2]; 5547 t1 = gen_reg_rtx (<MODE>mode); 5548 t2 = gen_reg_rtx (<MODE>mode); 5549 dest = gen_lowpart (<MODE>mode, operands[0]); 5550 5551 emit_insn (gen_mul<mode>3 (t1, op1, op2)); 5552 emit_insn (gen_<s>mul<mode>3_highpart (t2, op1, op2)); 5553 emit_insn (gen_vec_interleave_high<mode> (dest, t1, t2)); 5554 DONE; 5555}) 5556 5557(define_expand "vec_widen_<s>mult_lo_<mode>" 5558 [(match_operand:<sseunpackmode> 0 "register_operand" "") 5559 (any_extend:<sseunpackmode> 5560 (match_operand:VI2_AVX2 1 "register_operand" "")) 5561 (match_operand:VI2_AVX2 2 "register_operand" "")] 5562 "TARGET_SSE2" 5563{ 5564 rtx op1, op2, t1, t2, dest; 5565 5566 op1 = operands[1]; 5567 op2 = operands[2]; 5568 t1 = gen_reg_rtx (<MODE>mode); 5569 t2 = gen_reg_rtx (<MODE>mode); 5570 dest = gen_lowpart (<MODE>mode, operands[0]); 5571 5572 emit_insn (gen_mul<mode>3 (t1, op1, op2)); 5573 emit_insn (gen_<s>mul<mode>3_highpart (t2, op1, op2)); 5574 emit_insn (gen_vec_interleave_low<mode> (dest, t1, t2)); 5575 DONE; 5576}) 5577 5578(define_expand "vec_widen_<s>mult_hi_v8si" 5579 [(match_operand:V4DI 0 "register_operand" "") 5580 (any_extend:V4DI (match_operand:V8SI 1 "nonimmediate_operand" "")) 5581 (match_operand:V8SI 2 "nonimmediate_operand" "")] 5582 "TARGET_AVX2" 5583{ 5584 rtx t1, t2, t3, t4; 5585 5586 t1 = gen_reg_rtx (V4DImode); 5587 t2 = gen_reg_rtx (V4DImode); 5588 t3 = gen_reg_rtx (V8SImode); 5589 t4 = gen_reg_rtx (V8SImode); 5590 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, operands[1]), 5591 const0_rtx, const2_rtx, 5592 const1_rtx, GEN_INT (3))); 5593 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, operands[2]), 5594 const0_rtx, const2_rtx, 5595 const1_rtx, GEN_INT (3))); 5596 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), 5597 GEN_INT (2 + (2 << 2) + (3 << 4) + (3 << 6)))); 5598 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), 5599 GEN_INT (2 + (2 << 2) + (3 << 4) + (3 << 6)))); 5600 emit_insn (gen_avx2_<u>mulv4siv4di3 (operands[0], t3, t4)); 5601 DONE; 5602}) 5603 5604(define_expand "vec_widen_<s>mult_lo_v8si" 5605 [(match_operand:V4DI 0 "register_operand" "") 5606 (any_extend:V4DI (match_operand:V8SI 1 "nonimmediate_operand" "")) 5607 (match_operand:V8SI 2 "nonimmediate_operand" "")] 5608 "TARGET_AVX2" 5609{ 5610 rtx t1, t2, t3, t4; 5611 5612 t1 = gen_reg_rtx (V4DImode); 5613 t2 = gen_reg_rtx (V4DImode); 5614 t3 = gen_reg_rtx (V8SImode); 5615 t4 = gen_reg_rtx (V8SImode); 5616 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, operands[1]), 5617 const0_rtx, const2_rtx, 5618 const1_rtx, GEN_INT (3))); 5619 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, operands[2]), 5620 const0_rtx, const2_rtx, 5621 const1_rtx, GEN_INT (3))); 5622 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), 5623 GEN_INT (0 + (0 << 2) + (1 << 4) + (1 << 6)))); 5624 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), 5625 GEN_INT (0 + (0 << 2) + (1 << 4) + (1 << 6)))); 5626 emit_insn (gen_avx2_<u>mulv4siv4di3 (operands[0], t3, t4)); 5627 DONE; 5628}) 5629 5630(define_expand "vec_widen_smult_hi_v4si" 5631 [(match_operand:V2DI 0 "register_operand" "") 5632 (match_operand:V4SI 1 "register_operand" "") 5633 (match_operand:V4SI 2 "register_operand" "")] 5634 "TARGET_SSE4_1" 5635{ 5636 rtx op1, op2, t1, t2; 5637 5638 op1 = operands[1]; 5639 op2 = operands[2]; 5640 t1 = gen_reg_rtx (V4SImode); 5641 t2 = gen_reg_rtx (V4SImode); 5642 5643 if (TARGET_XOP) 5644 { 5645 rtx t3 = gen_reg_rtx (V2DImode); 5646 5647 emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2), 5648 GEN_INT (1), GEN_INT (3))); 5649 emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2), 5650 GEN_INT (1), GEN_INT (3))); 5651 emit_move_insn (t3, CONST0_RTX (V2DImode)); 5652 5653 emit_insn (gen_xop_pmacsdqh (operands[0], t1, t2, t3)); 5654 DONE; 5655 } 5656 5657 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1)); 5658 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2)); 5659 emit_insn (gen_sse4_1_mulv2siv2di3 (operands[0], t1, t2)); 5660 DONE; 5661}) 5662 5663(define_expand "vec_widen_smult_lo_v4si" 5664 [(match_operand:V2DI 0 "register_operand" "") 5665 (match_operand:V4SI 1 "register_operand" "") 5666 (match_operand:V4SI 2 "register_operand" "")] 5667 "TARGET_SSE4_1" 5668{ 5669 rtx op1, op2, t1, t2; 5670 5671 op1 = operands[1]; 5672 op2 = operands[2]; 5673 t1 = gen_reg_rtx (V4SImode); 5674 t2 = gen_reg_rtx (V4SImode); 5675 5676 if (TARGET_XOP) 5677 { 5678 rtx t3 = gen_reg_rtx (V2DImode); 5679 5680 emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2), 5681 GEN_INT (1), GEN_INT (3))); 5682 emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2), 5683 GEN_INT (1), GEN_INT (3))); 5684 emit_move_insn (t3, CONST0_RTX (V2DImode)); 5685 5686 emit_insn (gen_xop_pmacsdql (operands[0], t1, t2, t3)); 5687 DONE; 5688 } 5689 5690 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1)); 5691 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2)); 5692 emit_insn (gen_sse4_1_mulv2siv2di3 (operands[0], t1, t2)); 5693 DONE; 5694}) 5695 5696(define_expand "vec_widen_umult_hi_v4si" 5697 [(match_operand:V2DI 0 "register_operand" "") 5698 (match_operand:V4SI 1 "register_operand" "") 5699 (match_operand:V4SI 2 "register_operand" "")] 5700 "TARGET_SSE2" 5701{ 5702 rtx op1, op2, t1, t2; 5703 5704 op1 = operands[1]; 5705 op2 = operands[2]; 5706 t1 = gen_reg_rtx (V4SImode); 5707 t2 = gen_reg_rtx (V4SImode); 5708 5709 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1)); 5710 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2)); 5711 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2)); 5712 DONE; 5713}) 5714 5715(define_expand "vec_widen_umult_lo_v4si" 5716 [(match_operand:V2DI 0 "register_operand" "") 5717 (match_operand:V4SI 1 "register_operand" "") 5718 (match_operand:V4SI 2 "register_operand" "")] 5719 "TARGET_SSE2" 5720{ 5721 rtx op1, op2, t1, t2; 5722 5723 op1 = operands[1]; 5724 op2 = operands[2]; 5725 t1 = gen_reg_rtx (V4SImode); 5726 t2 = gen_reg_rtx (V4SImode); 5727 5728 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1)); 5729 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2)); 5730 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2)); 5731 DONE; 5732}) 5733 5734(define_expand "sdot_prod<mode>" 5735 [(match_operand:<sseunpackmode> 0 "register_operand" "") 5736 (match_operand:VI2_AVX2 1 "register_operand" "") 5737 (match_operand:VI2_AVX2 2 "register_operand" "") 5738 (match_operand:<sseunpackmode> 3 "register_operand" "")] 5739 "TARGET_SSE2" 5740{ 5741 rtx t = gen_reg_rtx (<sseunpackmode>mode); 5742 emit_insn (gen_<sse2_avx2>_pmaddwd (t, operands[1], operands[2])); 5743 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 5744 gen_rtx_PLUS (<sseunpackmode>mode, 5745 operands[3], t))); 5746 DONE; 5747}) 5748 5749(define_code_attr sse2_sse4_1 5750 [(zero_extend "sse2") (sign_extend "sse4_1")]) 5751 5752(define_expand "<s>dot_prodv4si" 5753 [(match_operand:V2DI 0 "register_operand" "") 5754 (any_extend:V2DI (match_operand:V4SI 1 "register_operand" "")) 5755 (match_operand:V4SI 2 "register_operand" "") 5756 (match_operand:V2DI 3 "register_operand" "")] 5757 "<CODE> == ZERO_EXTEND ? TARGET_SSE2 : TARGET_SSE4_1" 5758{ 5759 rtx t1, t2, t3, t4; 5760 5761 t1 = gen_reg_rtx (V2DImode); 5762 emit_insn (gen_<sse2_sse4_1>_<u>mulv2siv2di3 (t1, operands[1], operands[2])); 5763 emit_insn (gen_addv2di3 (t1, t1, operands[3])); 5764 5765 t2 = gen_reg_rtx (V4SImode); 5766 t3 = gen_reg_rtx (V4SImode); 5767 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2), 5768 gen_lowpart (V1TImode, operands[1]), 5769 GEN_INT (32))); 5770 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3), 5771 gen_lowpart (V1TImode, operands[2]), 5772 GEN_INT (32))); 5773 5774 t4 = gen_reg_rtx (V2DImode); 5775 emit_insn (gen_<sse2_sse4_1>_<u>mulv2siv2di3 (t4, t2, t3)); 5776 5777 emit_insn (gen_addv2di3 (operands[0], t1, t4)); 5778 DONE; 5779}) 5780 5781(define_expand "<s>dot_prodv8si" 5782 [(match_operand:V4DI 0 "register_operand" "") 5783 (any_extend:V4DI (match_operand:V8SI 1 "register_operand" "")) 5784 (match_operand:V8SI 2 "register_operand" "") 5785 (match_operand:V4DI 3 "register_operand" "")] 5786 "TARGET_AVX2" 5787{ 5788 rtx t1, t2, t3, t4; 5789 5790 t1 = gen_reg_rtx (V4DImode); 5791 emit_insn (gen_avx2_<u>mulv4siv4di3 (t1, operands[1], operands[2])); 5792 emit_insn (gen_addv4di3 (t1, t1, operands[3])); 5793 5794 t2 = gen_reg_rtx (V8SImode); 5795 t3 = gen_reg_rtx (V8SImode); 5796 emit_insn (gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, t2), 5797 gen_lowpart (V2TImode, operands[1]), 5798 GEN_INT (32))); 5799 emit_insn (gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, t3), 5800 gen_lowpart (V2TImode, operands[2]), 5801 GEN_INT (32))); 5802 5803 t4 = gen_reg_rtx (V4DImode); 5804 emit_insn (gen_avx2_<u>mulv4siv4di3 (t4, t2, t3)); 5805 5806 emit_insn (gen_addv4di3 (operands[0], t1, t4)); 5807 DONE; 5808}) 5809 5810(define_insn "ashr<mode>3" 5811 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x") 5812 (ashiftrt:VI24_AVX2 5813 (match_operand:VI24_AVX2 1 "register_operand" "0,x") 5814 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))] 5815 "TARGET_SSE2" 5816 "@ 5817 psra<ssemodesuffix>\t{%2, %0|%0, %2} 5818 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 5819 [(set_attr "isa" "noavx,avx") 5820 (set_attr "type" "sseishft") 5821 (set (attr "length_immediate") 5822 (if_then_else (match_operand 2 "const_int_operand" "") 5823 (const_string "1") 5824 (const_string "0"))) 5825 (set_attr "prefix_data16" "1,*") 5826 (set_attr "prefix" "orig,vex") 5827 (set_attr "mode" "<sseinsnmode>")]) 5828 5829(define_insn "<shift_insn><mode>3" 5830 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x") 5831 (any_lshift:VI248_AVX2 5832 (match_operand:VI248_AVX2 1 "register_operand" "0,x") 5833 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))] 5834 "TARGET_SSE2" 5835 "@ 5836 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2} 5837 vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 5838 [(set_attr "isa" "noavx,avx") 5839 (set_attr "type" "sseishft") 5840 (set (attr "length_immediate") 5841 (if_then_else (match_operand 2 "const_int_operand" "") 5842 (const_string "1") 5843 (const_string "0"))) 5844 (set_attr "prefix_data16" "1,*") 5845 (set_attr "prefix" "orig,vex") 5846 (set_attr "mode" "<sseinsnmode>")]) 5847 5848(define_expand "vec_shl_<mode>" 5849 [(set (match_operand:VI_128 0 "register_operand" "") 5850 (ashift:V1TI 5851 (match_operand:VI_128 1 "register_operand" "") 5852 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))] 5853 "TARGET_SSE2" 5854{ 5855 operands[0] = gen_lowpart (V1TImode, operands[0]); 5856 operands[1] = gen_lowpart (V1TImode, operands[1]); 5857}) 5858 5859(define_insn "<sse2_avx2>_ashl<mode>3" 5860 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x") 5861 (ashift:VIMAX_AVX2 5862 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x") 5863 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))] 5864 "TARGET_SSE2" 5865{ 5866 operands[2] = GEN_INT (INTVAL (operands[2]) / 8); 5867 5868 switch (which_alternative) 5869 { 5870 case 0: 5871 return "pslldq\t{%2, %0|%0, %2}"; 5872 case 1: 5873 return "vpslldq\t{%2, %1, %0|%0, %1, %2}"; 5874 default: 5875 gcc_unreachable (); 5876 } 5877} 5878 [(set_attr "isa" "noavx,avx") 5879 (set_attr "type" "sseishft") 5880 (set_attr "length_immediate" "1") 5881 (set_attr "prefix_data16" "1,*") 5882 (set_attr "prefix" "orig,vex") 5883 (set_attr "mode" "<sseinsnmode>")]) 5884 5885(define_expand "vec_shr_<mode>" 5886 [(set (match_operand:VI_128 0 "register_operand" "") 5887 (lshiftrt:V1TI 5888 (match_operand:VI_128 1 "register_operand" "") 5889 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))] 5890 "TARGET_SSE2" 5891{ 5892 operands[0] = gen_lowpart (V1TImode, operands[0]); 5893 operands[1] = gen_lowpart (V1TImode, operands[1]); 5894}) 5895 5896(define_insn "<sse2_avx2>_lshr<mode>3" 5897 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x") 5898 (lshiftrt:VIMAX_AVX2 5899 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x") 5900 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))] 5901 "TARGET_SSE2" 5902{ 5903 operands[2] = GEN_INT (INTVAL (operands[2]) / 8); 5904 5905 switch (which_alternative) 5906 { 5907 case 0: 5908 return "psrldq\t{%2, %0|%0, %2}"; 5909 case 1: 5910 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}"; 5911 default: 5912 gcc_unreachable (); 5913 } 5914} 5915 [(set_attr "isa" "noavx,avx") 5916 (set_attr "type" "sseishft") 5917 (set_attr "length_immediate" "1") 5918 (set_attr "atom_unit" "sishuf") 5919 (set_attr "prefix_data16" "1,*") 5920 (set_attr "prefix" "orig,vex") 5921 (set_attr "mode" "<sseinsnmode>")]) 5922 5923 5924(define_expand "<code><mode>3" 5925 [(set (match_operand:VI124_256 0 "register_operand" "") 5926 (maxmin:VI124_256 5927 (match_operand:VI124_256 1 "nonimmediate_operand" "") 5928 (match_operand:VI124_256 2 "nonimmediate_operand" "")))] 5929 "TARGET_AVX2" 5930 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") 5931 5932(define_insn "*avx2_<code><mode>3" 5933 [(set (match_operand:VI124_256 0 "register_operand" "=x") 5934 (maxmin:VI124_256 5935 (match_operand:VI124_256 1 "nonimmediate_operand" "%x") 5936 (match_operand:VI124_256 2 "nonimmediate_operand" "xm")))] 5937 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" 5938 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 5939 [(set_attr "type" "sseiadd") 5940 (set_attr "prefix_extra" "1") 5941 (set_attr "prefix" "vex") 5942 (set_attr "mode" "OI")]) 5943 5944(define_expand "<code><mode>3" 5945 [(set (match_operand:VI8_AVX2 0 "register_operand" "") 5946 (maxmin:VI8_AVX2 5947 (match_operand:VI8_AVX2 1 "register_operand" "") 5948 (match_operand:VI8_AVX2 2 "register_operand" "")))] 5949 "TARGET_SSE4_2" 5950{ 5951 enum rtx_code code; 5952 rtx xops[6]; 5953 bool ok; 5954 5955 xops[0] = operands[0]; 5956 5957 if (<CODE> == SMAX || <CODE> == UMAX) 5958 { 5959 xops[1] = operands[1]; 5960 xops[2] = operands[2]; 5961 } 5962 else 5963 { 5964 xops[1] = operands[2]; 5965 xops[2] = operands[1]; 5966 } 5967 5968 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT; 5969 5970 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]); 5971 xops[4] = operands[1]; 5972 xops[5] = operands[2]; 5973 5974 ok = ix86_expand_int_vcond (xops); 5975 gcc_assert (ok); 5976 DONE; 5977}) 5978 5979(define_expand "<code><mode>3" 5980 [(set (match_operand:VI124_128 0 "register_operand" "") 5981 (smaxmin:VI124_128 5982 (match_operand:VI124_128 1 "nonimmediate_operand" "") 5983 (match_operand:VI124_128 2 "nonimmediate_operand" "")))] 5984 "TARGET_SSE2" 5985{ 5986 if (TARGET_SSE4_1 || <MODE>mode == V8HImode) 5987 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands); 5988 else 5989 { 5990 rtx xops[6]; 5991 bool ok; 5992 5993 xops[0] = operands[0]; 5994 operands[1] = force_reg (<MODE>mode, operands[1]); 5995 operands[2] = force_reg (<MODE>mode, operands[2]); 5996 5997 if (<CODE> == SMAX) 5998 { 5999 xops[1] = operands[1]; 6000 xops[2] = operands[2]; 6001 } 6002 else 6003 { 6004 xops[1] = operands[2]; 6005 xops[2] = operands[1]; 6006 } 6007 6008 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]); 6009 xops[4] = operands[1]; 6010 xops[5] = operands[2]; 6011 6012 ok = ix86_expand_int_vcond (xops); 6013 gcc_assert (ok); 6014 DONE; 6015 } 6016}) 6017 6018(define_insn "*sse4_1_<code><mode>3" 6019 [(set (match_operand:VI14_128 0 "register_operand" "=x,x") 6020 (smaxmin:VI14_128 6021 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x") 6022 (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))] 6023 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" 6024 "@ 6025 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2} 6026 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 6027 [(set_attr "isa" "noavx,avx") 6028 (set_attr "type" "sseiadd") 6029 (set_attr "prefix_extra" "1,*") 6030 (set_attr "prefix" "orig,vex") 6031 (set_attr "mode" "TI")]) 6032 6033(define_insn "*<code>v8hi3" 6034 [(set (match_operand:V8HI 0 "register_operand" "=x,x") 6035 (smaxmin:V8HI 6036 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x") 6037 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))] 6038 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)" 6039 "@ 6040 p<maxmin_int>w\t{%2, %0|%0, %2} 6041 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}" 6042 [(set_attr "isa" "noavx,avx") 6043 (set_attr "type" "sseiadd") 6044 (set_attr "prefix_data16" "1,*") 6045 (set_attr "prefix_extra" "*,1") 6046 (set_attr "prefix" "orig,vex") 6047 (set_attr "mode" "TI")]) 6048 6049(define_expand "<code><mode>3" 6050 [(set (match_operand:VI124_128 0 "register_operand" "") 6051 (umaxmin:VI124_128 6052 (match_operand:VI124_128 1 "nonimmediate_operand" "") 6053 (match_operand:VI124_128 2 "nonimmediate_operand" "")))] 6054 "TARGET_SSE2" 6055{ 6056 if (TARGET_SSE4_1 || <MODE>mode == V16QImode) 6057 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands); 6058 else if (<CODE> == UMAX && <MODE>mode == V8HImode) 6059 { 6060 rtx op0 = operands[0], op2 = operands[2], op3 = op0; 6061 operands[1] = force_reg (<MODE>mode, operands[1]); 6062 if (rtx_equal_p (op3, op2)) 6063 op3 = gen_reg_rtx (V8HImode); 6064 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2)); 6065 emit_insn (gen_addv8hi3 (op0, op3, op2)); 6066 DONE; 6067 } 6068 else 6069 { 6070 rtx xops[6]; 6071 bool ok; 6072 6073 operands[1] = force_reg (<MODE>mode, operands[1]); 6074 operands[2] = force_reg (<MODE>mode, operands[2]); 6075 6076 xops[0] = operands[0]; 6077 6078 if (<CODE> == UMAX) 6079 { 6080 xops[1] = operands[1]; 6081 xops[2] = operands[2]; 6082 } 6083 else 6084 { 6085 xops[1] = operands[2]; 6086 xops[2] = operands[1]; 6087 } 6088 6089 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]); 6090 xops[4] = operands[1]; 6091 xops[5] = operands[2]; 6092 6093 ok = ix86_expand_int_vcond (xops); 6094 gcc_assert (ok); 6095 DONE; 6096 } 6097}) 6098 6099(define_insn "*sse4_1_<code><mode>3" 6100 [(set (match_operand:VI24_128 0 "register_operand" "=x,x") 6101 (umaxmin:VI24_128 6102 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x") 6103 (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))] 6104 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" 6105 "@ 6106 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2} 6107 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 6108 [(set_attr "isa" "noavx,avx") 6109 (set_attr "type" "sseiadd") 6110 (set_attr "prefix_extra" "1,*") 6111 (set_attr "prefix" "orig,vex") 6112 (set_attr "mode" "TI")]) 6113 6114(define_insn "*<code>v16qi3" 6115 [(set (match_operand:V16QI 0 "register_operand" "=x,x") 6116 (umaxmin:V16QI 6117 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x") 6118 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))] 6119 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)" 6120 "@ 6121 p<maxmin_int>b\t{%2, %0|%0, %2} 6122 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}" 6123 [(set_attr "isa" "noavx,avx") 6124 (set_attr "type" "sseiadd") 6125 (set_attr "prefix_data16" "1,*") 6126 (set_attr "prefix_extra" "*,1") 6127 (set_attr "prefix" "orig,vex") 6128 (set_attr "mode" "TI")]) 6129 6130;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 6131;; 6132;; Parallel integral comparisons 6133;; 6134;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 6135 6136(define_expand "avx2_eq<mode>3" 6137 [(set (match_operand:VI_256 0 "register_operand" "") 6138 (eq:VI_256 6139 (match_operand:VI_256 1 "nonimmediate_operand" "") 6140 (match_operand:VI_256 2 "nonimmediate_operand" "")))] 6141 "TARGET_AVX2" 6142 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);") 6143 6144(define_insn "*avx2_eq<mode>3" 6145 [(set (match_operand:VI_256 0 "register_operand" "=x") 6146 (eq:VI_256 6147 (match_operand:VI_256 1 "nonimmediate_operand" "%x") 6148 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))] 6149 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)" 6150 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 6151 [(set_attr "type" "ssecmp") 6152 (set_attr "prefix_extra" "1") 6153 (set_attr "prefix" "vex") 6154 (set_attr "mode" "OI")]) 6155 6156(define_insn "*sse4_1_eqv2di3" 6157 [(set (match_operand:V2DI 0 "register_operand" "=x,x") 6158 (eq:V2DI 6159 (match_operand:V2DI 1 "nonimmediate_operand" "%0,x") 6160 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))] 6161 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)" 6162 "@ 6163 pcmpeqq\t{%2, %0|%0, %2} 6164 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}" 6165 [(set_attr "isa" "noavx,avx") 6166 (set_attr "type" "ssecmp") 6167 (set_attr "prefix_extra" "1") 6168 (set_attr "prefix" "orig,vex") 6169 (set_attr "mode" "TI")]) 6170 6171(define_insn "*sse2_eq<mode>3" 6172 [(set (match_operand:VI124_128 0 "register_operand" "=x,x") 6173 (eq:VI124_128 6174 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x") 6175 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))] 6176 "TARGET_SSE2 && !TARGET_XOP 6177 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)" 6178 "@ 6179 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2} 6180 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 6181 [(set_attr "isa" "noavx,avx") 6182 (set_attr "type" "ssecmp") 6183 (set_attr "prefix_data16" "1,*") 6184 (set_attr "prefix" "orig,vex") 6185 (set_attr "mode" "TI")]) 6186 6187(define_expand "sse2_eq<mode>3" 6188 [(set (match_operand:VI124_128 0 "register_operand" "") 6189 (eq:VI124_128 6190 (match_operand:VI124_128 1 "nonimmediate_operand" "") 6191 (match_operand:VI124_128 2 "nonimmediate_operand" "")))] 6192 "TARGET_SSE2 && !TARGET_XOP " 6193 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);") 6194 6195(define_expand "sse4_1_eqv2di3" 6196 [(set (match_operand:V2DI 0 "register_operand" "") 6197 (eq:V2DI 6198 (match_operand:V2DI 1 "nonimmediate_operand" "") 6199 (match_operand:V2DI 2 "nonimmediate_operand" "")))] 6200 "TARGET_SSE4_1" 6201 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);") 6202 6203(define_insn "sse4_2_gtv2di3" 6204 [(set (match_operand:V2DI 0 "register_operand" "=x,x") 6205 (gt:V2DI 6206 (match_operand:V2DI 1 "register_operand" "0,x") 6207 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))] 6208 "TARGET_SSE4_2" 6209 "@ 6210 pcmpgtq\t{%2, %0|%0, %2} 6211 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}" 6212 [(set_attr "isa" "noavx,avx") 6213 (set_attr "type" "ssecmp") 6214 (set_attr "prefix_extra" "1") 6215 (set_attr "prefix" "orig,vex") 6216 (set_attr "mode" "TI")]) 6217 6218(define_insn "avx2_gt<mode>3" 6219 [(set (match_operand:VI_256 0 "register_operand" "=x") 6220 (gt:VI_256 6221 (match_operand:VI_256 1 "register_operand" "x") 6222 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))] 6223 "TARGET_AVX2" 6224 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 6225 [(set_attr "type" "ssecmp") 6226 (set_attr "prefix_extra" "1") 6227 (set_attr "prefix" "vex") 6228 (set_attr "mode" "OI")]) 6229 6230(define_insn "sse2_gt<mode>3" 6231 [(set (match_operand:VI124_128 0 "register_operand" "=x,x") 6232 (gt:VI124_128 6233 (match_operand:VI124_128 1 "register_operand" "0,x") 6234 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))] 6235 "TARGET_SSE2 && !TARGET_XOP" 6236 "@ 6237 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2} 6238 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 6239 [(set_attr "isa" "noavx,avx") 6240 (set_attr "type" "ssecmp") 6241 (set_attr "prefix_data16" "1,*") 6242 (set_attr "prefix" "orig,vex") 6243 (set_attr "mode" "TI")]) 6244 6245(define_expand "vcond<V_256:mode><VI_256:mode>" 6246 [(set (match_operand:V_256 0 "register_operand" "") 6247 (if_then_else:V_256 6248 (match_operator 3 "" 6249 [(match_operand:VI_256 4 "nonimmediate_operand" "") 6250 (match_operand:VI_256 5 "general_operand" "")]) 6251 (match_operand:V_256 1 "" "") 6252 (match_operand:V_256 2 "" "")))] 6253 "TARGET_AVX2 6254 && (GET_MODE_NUNITS (<V_256:MODE>mode) 6255 == GET_MODE_NUNITS (<VI_256:MODE>mode))" 6256{ 6257 bool ok = ix86_expand_int_vcond (operands); 6258 gcc_assert (ok); 6259 DONE; 6260}) 6261 6262(define_expand "vcond<V_128:mode><VI124_128:mode>" 6263 [(set (match_operand:V_128 0 "register_operand" "") 6264 (if_then_else:V_128 6265 (match_operator 3 "" 6266 [(match_operand:VI124_128 4 "nonimmediate_operand" "") 6267 (match_operand:VI124_128 5 "general_operand" "")]) 6268 (match_operand:V_128 1 "" "") 6269 (match_operand:V_128 2 "" "")))] 6270 "TARGET_SSE2 6271 && (GET_MODE_NUNITS (<V_128:MODE>mode) 6272 == GET_MODE_NUNITS (<VI124_128:MODE>mode))" 6273{ 6274 bool ok = ix86_expand_int_vcond (operands); 6275 gcc_assert (ok); 6276 DONE; 6277}) 6278 6279(define_expand "vcond<VI8F_128:mode>v2di" 6280 [(set (match_operand:VI8F_128 0 "register_operand" "") 6281 (if_then_else:VI8F_128 6282 (match_operator 3 "" 6283 [(match_operand:V2DI 4 "nonimmediate_operand" "") 6284 (match_operand:V2DI 5 "general_operand" "")]) 6285 (match_operand:VI8F_128 1 "" "") 6286 (match_operand:VI8F_128 2 "" "")))] 6287 "TARGET_SSE4_2" 6288{ 6289 bool ok = ix86_expand_int_vcond (operands); 6290 gcc_assert (ok); 6291 DONE; 6292}) 6293 6294(define_expand "vcondu<V_256:mode><VI_256:mode>" 6295 [(set (match_operand:V_256 0 "register_operand" "") 6296 (if_then_else:V_256 6297 (match_operator 3 "" 6298 [(match_operand:VI_256 4 "nonimmediate_operand" "") 6299 (match_operand:VI_256 5 "nonimmediate_operand" "")]) 6300 (match_operand:V_256 1 "general_operand" "") 6301 (match_operand:V_256 2 "general_operand" "")))] 6302 "TARGET_AVX2 6303 && (GET_MODE_NUNITS (<V_256:MODE>mode) 6304 == GET_MODE_NUNITS (<VI_256:MODE>mode))" 6305{ 6306 bool ok = ix86_expand_int_vcond (operands); 6307 gcc_assert (ok); 6308 DONE; 6309}) 6310 6311(define_expand "vcondu<V_128:mode><VI124_128:mode>" 6312 [(set (match_operand:V_128 0 "register_operand" "") 6313 (if_then_else:V_128 6314 (match_operator 3 "" 6315 [(match_operand:VI124_128 4 "nonimmediate_operand" "") 6316 (match_operand:VI124_128 5 "nonimmediate_operand" "")]) 6317 (match_operand:V_128 1 "general_operand" "") 6318 (match_operand:V_128 2 "general_operand" "")))] 6319 "TARGET_SSE2 6320 && (GET_MODE_NUNITS (<V_128:MODE>mode) 6321 == GET_MODE_NUNITS (<VI124_128:MODE>mode))" 6322{ 6323 bool ok = ix86_expand_int_vcond (operands); 6324 gcc_assert (ok); 6325 DONE; 6326}) 6327 6328(define_expand "vcondu<VI8F_128:mode>v2di" 6329 [(set (match_operand:VI8F_128 0 "register_operand" "") 6330 (if_then_else:VI8F_128 6331 (match_operator 3 "" 6332 [(match_operand:V2DI 4 "nonimmediate_operand" "") 6333 (match_operand:V2DI 5 "nonimmediate_operand" "")]) 6334 (match_operand:VI8F_128 1 "general_operand" "") 6335 (match_operand:VI8F_128 2 "general_operand" "")))] 6336 "TARGET_SSE4_2" 6337{ 6338 bool ok = ix86_expand_int_vcond (operands); 6339 gcc_assert (ok); 6340 DONE; 6341}) 6342 6343(define_mode_iterator VEC_PERM_AVX2 6344 [V16QI V8HI V4SI V2DI V4SF V2DF 6345 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2") 6346 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2") 6347 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")]) 6348 6349(define_expand "vec_perm<mode>" 6350 [(match_operand:VEC_PERM_AVX2 0 "register_operand" "") 6351 (match_operand:VEC_PERM_AVX2 1 "register_operand" "") 6352 (match_operand:VEC_PERM_AVX2 2 "register_operand" "") 6353 (match_operand:<sseintvecmode> 3 "register_operand" "")] 6354 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP" 6355{ 6356 ix86_expand_vec_perm (operands); 6357 DONE; 6358}) 6359 6360(define_mode_iterator VEC_PERM_CONST 6361 [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE") 6362 (V2DF "TARGET_SSE") (V2DI "TARGET_SSE") 6363 (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2") 6364 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX") 6365 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX") 6366 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")]) 6367 6368(define_expand "vec_perm_const<mode>" 6369 [(match_operand:VEC_PERM_CONST 0 "register_operand" "") 6370 (match_operand:VEC_PERM_CONST 1 "register_operand" "") 6371 (match_operand:VEC_PERM_CONST 2 "register_operand" "") 6372 (match_operand:<sseintvecmode> 3 "" "")] 6373 "" 6374{ 6375 if (ix86_expand_vec_perm_const (operands)) 6376 DONE; 6377 else 6378 FAIL; 6379}) 6380 6381;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 6382;; 6383;; Parallel bitwise logical operations 6384;; 6385;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 6386 6387(define_expand "one_cmpl<mode>2" 6388 [(set (match_operand:VI 0 "register_operand" "") 6389 (xor:VI (match_operand:VI 1 "nonimmediate_operand" "") 6390 (match_dup 2)))] 6391 "TARGET_SSE" 6392{ 6393 int i, n = GET_MODE_NUNITS (<MODE>mode); 6394 rtvec v = rtvec_alloc (n); 6395 6396 for (i = 0; i < n; ++i) 6397 RTVEC_ELT (v, i) = constm1_rtx; 6398 6399 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v)); 6400}) 6401 6402(define_expand "<sse2_avx2>_andnot<mode>3" 6403 [(set (match_operand:VI_AVX2 0 "register_operand" "") 6404 (and:VI_AVX2 6405 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand" "")) 6406 (match_operand:VI_AVX2 2 "nonimmediate_operand" "")))] 6407 "TARGET_SSE2") 6408 6409(define_insn "*andnot<mode>3" 6410 [(set (match_operand:VI 0 "register_operand" "=x,x") 6411 (and:VI 6412 (not:VI (match_operand:VI 1 "register_operand" "0,x")) 6413 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))] 6414 "TARGET_SSE" 6415{ 6416 static char buf[32]; 6417 const char *ops; 6418 const char *tmp; 6419 6420 switch (get_attr_mode (insn)) 6421 { 6422 case MODE_OI: 6423 gcc_assert (TARGET_AVX2); 6424 case MODE_TI: 6425 gcc_assert (TARGET_SSE2); 6426 6427 tmp = "pandn"; 6428 break; 6429 6430 case MODE_V8SF: 6431 gcc_assert (TARGET_AVX); 6432 case MODE_V4SF: 6433 gcc_assert (TARGET_SSE); 6434 6435 tmp = "andnps"; 6436 break; 6437 6438 default: 6439 gcc_unreachable (); 6440 } 6441 6442 switch (which_alternative) 6443 { 6444 case 0: 6445 ops = "%s\t{%%2, %%0|%%0, %%2}"; 6446 break; 6447 case 1: 6448 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; 6449 break; 6450 default: 6451 gcc_unreachable (); 6452 } 6453 6454 snprintf (buf, sizeof (buf), ops, tmp); 6455 return buf; 6456} 6457 [(set_attr "isa" "noavx,avx") 6458 (set_attr "type" "sselog") 6459 (set (attr "prefix_data16") 6460 (if_then_else 6461 (and (eq_attr "alternative" "0") 6462 (eq_attr "mode" "TI")) 6463 (const_string "1") 6464 (const_string "*"))) 6465 (set_attr "prefix" "orig,vex") 6466 (set (attr "mode") 6467 (cond [(and (not (match_test "TARGET_AVX2")) 6468 (match_test "GET_MODE_SIZE (<MODE>mode) > 16")) 6469 (const_string "V8SF") 6470 (not (match_test "TARGET_SSE2")) 6471 (const_string "V4SF") 6472 ] 6473 (const_string "<sseinsnmode>")))]) 6474 6475(define_expand "<code><mode>3" 6476 [(set (match_operand:VI 0 "register_operand" "") 6477 (any_logic:VI 6478 (match_operand:VI 1 "nonimmediate_operand" "") 6479 (match_operand:VI 2 "nonimmediate_operand" "")))] 6480 "TARGET_SSE" 6481 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") 6482 6483(define_insn "*<code><mode>3" 6484 [(set (match_operand:VI 0 "register_operand" "=x,x") 6485 (any_logic:VI 6486 (match_operand:VI 1 "nonimmediate_operand" "%0,x") 6487 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))] 6488 "TARGET_SSE 6489 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" 6490{ 6491 static char buf[32]; 6492 const char *ops; 6493 const char *tmp; 6494 6495 switch (get_attr_mode (insn)) 6496 { 6497 case MODE_OI: 6498 gcc_assert (TARGET_AVX2); 6499 case MODE_TI: 6500 gcc_assert (TARGET_SSE2); 6501 6502 tmp = "p<logic>"; 6503 break; 6504 6505 case MODE_V8SF: 6506 gcc_assert (TARGET_AVX); 6507 case MODE_V4SF: 6508 gcc_assert (TARGET_SSE); 6509 6510 tmp = "<logic>ps"; 6511 break; 6512 6513 default: 6514 gcc_unreachable (); 6515 } 6516 6517 switch (which_alternative) 6518 { 6519 case 0: 6520 ops = "%s\t{%%2, %%0|%%0, %%2}"; 6521 break; 6522 case 1: 6523 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; 6524 break; 6525 default: 6526 gcc_unreachable (); 6527 } 6528 6529 snprintf (buf, sizeof (buf), ops, tmp); 6530 return buf; 6531} 6532 [(set_attr "isa" "noavx,avx") 6533 (set_attr "type" "sselog") 6534 (set (attr "prefix_data16") 6535 (if_then_else 6536 (and (eq_attr "alternative" "0") 6537 (eq_attr "mode" "TI")) 6538 (const_string "1") 6539 (const_string "*"))) 6540 (set_attr "prefix" "orig,vex") 6541 (set (attr "mode") 6542 (cond [(and (not (match_test "TARGET_AVX2")) 6543 (match_test "GET_MODE_SIZE (<MODE>mode) > 16")) 6544 (const_string "V8SF") 6545 (not (match_test "TARGET_SSE2")) 6546 (const_string "V4SF") 6547 ] 6548 (const_string "<sseinsnmode>")))]) 6549 6550(define_insn "*andnottf3" 6551 [(set (match_operand:TF 0 "register_operand" "=x,x") 6552 (and:TF 6553 (not:TF (match_operand:TF 1 "register_operand" "0,x")) 6554 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))] 6555 "TARGET_SSE2" 6556 "@ 6557 pandn\t{%2, %0|%0, %2} 6558 vpandn\t{%2, %1, %0|%0, %1, %2}" 6559 [(set_attr "isa" "noavx,avx") 6560 (set_attr "type" "sselog") 6561 (set_attr "prefix_data16" "1,*") 6562 (set_attr "prefix" "orig,vex") 6563 (set_attr "mode" "TI")]) 6564 6565(define_expand "<code>tf3" 6566 [(set (match_operand:TF 0 "register_operand" "") 6567 (any_logic:TF 6568 (match_operand:TF 1 "nonimmediate_operand" "") 6569 (match_operand:TF 2 "nonimmediate_operand" "")))] 6570 "TARGET_SSE2" 6571 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);") 6572 6573(define_insn "*<code>tf3" 6574 [(set (match_operand:TF 0 "register_operand" "=x,x") 6575 (any_logic:TF 6576 (match_operand:TF 1 "nonimmediate_operand" "%0,x") 6577 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))] 6578 "TARGET_SSE2 6579 && ix86_binary_operator_ok (<CODE>, TFmode, operands)" 6580 "@ 6581 p<logic>\t{%2, %0|%0, %2} 6582 vp<logic>\t{%2, %1, %0|%0, %1, %2}" 6583 [(set_attr "isa" "noavx,avx") 6584 (set_attr "type" "sselog") 6585 (set_attr "prefix_data16" "1,*") 6586 (set_attr "prefix" "orig,vex") 6587 (set_attr "mode" "TI")]) 6588 6589;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 6590;; 6591;; Parallel integral element swizzling 6592;; 6593;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 6594 6595(define_expand "vec_pack_trunc_<mode>" 6596 [(match_operand:<ssepackmode> 0 "register_operand" "") 6597 (match_operand:VI248_AVX2 1 "register_operand" "") 6598 (match_operand:VI248_AVX2 2 "register_operand" "")] 6599 "TARGET_SSE2" 6600{ 6601 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]); 6602 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]); 6603 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0); 6604 DONE; 6605}) 6606 6607(define_insn "<sse2_avx2>_packsswb" 6608 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x") 6609 (vec_concat:VI1_AVX2 6610 (ss_truncate:<ssehalfvecmode> 6611 (match_operand:<sseunpackmode> 1 "register_operand" "0,x")) 6612 (ss_truncate:<ssehalfvecmode> 6613 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))] 6614 "TARGET_SSE2" 6615 "@ 6616 packsswb\t{%2, %0|%0, %2} 6617 vpacksswb\t{%2, %1, %0|%0, %1, %2}" 6618 [(set_attr "isa" "noavx,avx") 6619 (set_attr "type" "sselog") 6620 (set_attr "prefix_data16" "1,*") 6621 (set_attr "prefix" "orig,vex") 6622 (set_attr "mode" "<sseinsnmode>")]) 6623 6624(define_insn "<sse2_avx2>_packssdw" 6625 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x") 6626 (vec_concat:VI2_AVX2 6627 (ss_truncate:<ssehalfvecmode> 6628 (match_operand:<sseunpackmode> 1 "register_operand" "0,x")) 6629 (ss_truncate:<ssehalfvecmode> 6630 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))] 6631 "TARGET_SSE2" 6632 "@ 6633 packssdw\t{%2, %0|%0, %2} 6634 vpackssdw\t{%2, %1, %0|%0, %1, %2}" 6635 [(set_attr "isa" "noavx,avx") 6636 (set_attr "type" "sselog") 6637 (set_attr "prefix_data16" "1,*") 6638 (set_attr "prefix" "orig,vex") 6639 (set_attr "mode" "<sseinsnmode>")]) 6640 6641(define_insn "<sse2_avx2>_packuswb" 6642 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x") 6643 (vec_concat:VI1_AVX2 6644 (us_truncate:<ssehalfvecmode> 6645 (match_operand:<sseunpackmode> 1 "register_operand" "0,x")) 6646 (us_truncate:<ssehalfvecmode> 6647 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))] 6648 "TARGET_SSE2" 6649 "@ 6650 packuswb\t{%2, %0|%0, %2} 6651 vpackuswb\t{%2, %1, %0|%0, %1, %2}" 6652 [(set_attr "isa" "noavx,avx") 6653 (set_attr "type" "sselog") 6654 (set_attr "prefix_data16" "1,*") 6655 (set_attr "prefix" "orig,vex") 6656 (set_attr "mode" "<sseinsnmode>")]) 6657 6658(define_insn "avx2_interleave_highv32qi" 6659 [(set (match_operand:V32QI 0 "register_operand" "=x") 6660 (vec_select:V32QI 6661 (vec_concat:V64QI 6662 (match_operand:V32QI 1 "register_operand" "x") 6663 (match_operand:V32QI 2 "nonimmediate_operand" "xm")) 6664 (parallel [(const_int 8) (const_int 40) 6665 (const_int 9) (const_int 41) 6666 (const_int 10) (const_int 42) 6667 (const_int 11) (const_int 43) 6668 (const_int 12) (const_int 44) 6669 (const_int 13) (const_int 45) 6670 (const_int 14) (const_int 46) 6671 (const_int 15) (const_int 47) 6672 (const_int 24) (const_int 56) 6673 (const_int 25) (const_int 57) 6674 (const_int 26) (const_int 58) 6675 (const_int 27) (const_int 59) 6676 (const_int 28) (const_int 60) 6677 (const_int 29) (const_int 61) 6678 (const_int 30) (const_int 62) 6679 (const_int 31) (const_int 63)])))] 6680 "TARGET_AVX2" 6681 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}" 6682 [(set_attr "type" "sselog") 6683 (set_attr "prefix" "vex") 6684 (set_attr "mode" "OI")]) 6685 6686(define_insn "vec_interleave_highv16qi" 6687 [(set (match_operand:V16QI 0 "register_operand" "=x,x") 6688 (vec_select:V16QI 6689 (vec_concat:V32QI 6690 (match_operand:V16QI 1 "register_operand" "0,x") 6691 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")) 6692 (parallel [(const_int 8) (const_int 24) 6693 (const_int 9) (const_int 25) 6694 (const_int 10) (const_int 26) 6695 (const_int 11) (const_int 27) 6696 (const_int 12) (const_int 28) 6697 (const_int 13) (const_int 29) 6698 (const_int 14) (const_int 30) 6699 (const_int 15) (const_int 31)])))] 6700 "TARGET_SSE2" 6701 "@ 6702 punpckhbw\t{%2, %0|%0, %2} 6703 vpunpckhbw\t{%2, %1, %0|%0, %1, %2}" 6704 [(set_attr "isa" "noavx,avx") 6705 (set_attr "type" "sselog") 6706 (set_attr "prefix_data16" "1,*") 6707 (set_attr "prefix" "orig,vex") 6708 (set_attr "mode" "TI")]) 6709 6710(define_insn "avx2_interleave_lowv32qi" 6711 [(set (match_operand:V32QI 0 "register_operand" "=x") 6712 (vec_select:V32QI 6713 (vec_concat:V64QI 6714 (match_operand:V32QI 1 "register_operand" "x") 6715 (match_operand:V32QI 2 "nonimmediate_operand" "xm")) 6716 (parallel [(const_int 0) (const_int 32) 6717 (const_int 1) (const_int 33) 6718 (const_int 2) (const_int 34) 6719 (const_int 3) (const_int 35) 6720 (const_int 4) (const_int 36) 6721 (const_int 5) (const_int 37) 6722 (const_int 6) (const_int 38) 6723 (const_int 7) (const_int 39) 6724 (const_int 16) (const_int 48) 6725 (const_int 17) (const_int 49) 6726 (const_int 18) (const_int 50) 6727 (const_int 19) (const_int 51) 6728 (const_int 20) (const_int 52) 6729 (const_int 21) (const_int 53) 6730 (const_int 22) (const_int 54) 6731 (const_int 23) (const_int 55)])))] 6732 "TARGET_AVX2" 6733 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}" 6734 [(set_attr "type" "sselog") 6735 (set_attr "prefix" "vex") 6736 (set_attr "mode" "OI")]) 6737 6738(define_insn "vec_interleave_lowv16qi" 6739 [(set (match_operand:V16QI 0 "register_operand" "=x,x") 6740 (vec_select:V16QI 6741 (vec_concat:V32QI 6742 (match_operand:V16QI 1 "register_operand" "0,x") 6743 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")) 6744 (parallel [(const_int 0) (const_int 16) 6745 (const_int 1) (const_int 17) 6746 (const_int 2) (const_int 18) 6747 (const_int 3) (const_int 19) 6748 (const_int 4) (const_int 20) 6749 (const_int 5) (const_int 21) 6750 (const_int 6) (const_int 22) 6751 (const_int 7) (const_int 23)])))] 6752 "TARGET_SSE2" 6753 "@ 6754 punpcklbw\t{%2, %0|%0, %2} 6755 vpunpcklbw\t{%2, %1, %0|%0, %1, %2}" 6756 [(set_attr "isa" "noavx,avx") 6757 (set_attr "type" "sselog") 6758 (set_attr "prefix_data16" "1,*") 6759 (set_attr "prefix" "orig,vex") 6760 (set_attr "mode" "TI")]) 6761 6762(define_insn "avx2_interleave_highv16hi" 6763 [(set (match_operand:V16HI 0 "register_operand" "=x") 6764 (vec_select:V16HI 6765 (vec_concat:V32HI 6766 (match_operand:V16HI 1 "register_operand" "x") 6767 (match_operand:V16HI 2 "nonimmediate_operand" "xm")) 6768 (parallel [(const_int 4) (const_int 20) 6769 (const_int 5) (const_int 21) 6770 (const_int 6) (const_int 22) 6771 (const_int 7) (const_int 23) 6772 (const_int 12) (const_int 28) 6773 (const_int 13) (const_int 29) 6774 (const_int 14) (const_int 30) 6775 (const_int 15) (const_int 31)])))] 6776 "TARGET_AVX2" 6777 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}" 6778 [(set_attr "type" "sselog") 6779 (set_attr "prefix" "vex") 6780 (set_attr "mode" "OI")]) 6781 6782(define_insn "vec_interleave_highv8hi" 6783 [(set (match_operand:V8HI 0 "register_operand" "=x,x") 6784 (vec_select:V8HI 6785 (vec_concat:V16HI 6786 (match_operand:V8HI 1 "register_operand" "0,x") 6787 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")) 6788 (parallel [(const_int 4) (const_int 12) 6789 (const_int 5) (const_int 13) 6790 (const_int 6) (const_int 14) 6791 (const_int 7) (const_int 15)])))] 6792 "TARGET_SSE2" 6793 "@ 6794 punpckhwd\t{%2, %0|%0, %2} 6795 vpunpckhwd\t{%2, %1, %0|%0, %1, %2}" 6796 [(set_attr "isa" "noavx,avx") 6797 (set_attr "type" "sselog") 6798 (set_attr "prefix_data16" "1,*") 6799 (set_attr "prefix" "orig,vex") 6800 (set_attr "mode" "TI")]) 6801 6802(define_insn "avx2_interleave_lowv16hi" 6803 [(set (match_operand:V16HI 0 "register_operand" "=x") 6804 (vec_select:V16HI 6805 (vec_concat:V32HI 6806 (match_operand:V16HI 1 "register_operand" "x") 6807 (match_operand:V16HI 2 "nonimmediate_operand" "xm")) 6808 (parallel [(const_int 0) (const_int 16) 6809 (const_int 1) (const_int 17) 6810 (const_int 2) (const_int 18) 6811 (const_int 3) (const_int 19) 6812 (const_int 8) (const_int 24) 6813 (const_int 9) (const_int 25) 6814 (const_int 10) (const_int 26) 6815 (const_int 11) (const_int 27)])))] 6816 "TARGET_AVX2" 6817 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}" 6818 [(set_attr "type" "sselog") 6819 (set_attr "prefix" "vex") 6820 (set_attr "mode" "OI")]) 6821 6822(define_insn "vec_interleave_lowv8hi" 6823 [(set (match_operand:V8HI 0 "register_operand" "=x,x") 6824 (vec_select:V8HI 6825 (vec_concat:V16HI 6826 (match_operand:V8HI 1 "register_operand" "0,x") 6827 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")) 6828 (parallel [(const_int 0) (const_int 8) 6829 (const_int 1) (const_int 9) 6830 (const_int 2) (const_int 10) 6831 (const_int 3) (const_int 11)])))] 6832 "TARGET_SSE2" 6833 "@ 6834 punpcklwd\t{%2, %0|%0, %2} 6835 vpunpcklwd\t{%2, %1, %0|%0, %1, %2}" 6836 [(set_attr "isa" "noavx,avx") 6837 (set_attr "type" "sselog") 6838 (set_attr "prefix_data16" "1,*") 6839 (set_attr "prefix" "orig,vex") 6840 (set_attr "mode" "TI")]) 6841 6842(define_insn "avx2_interleave_highv8si" 6843 [(set (match_operand:V8SI 0 "register_operand" "=x") 6844 (vec_select:V8SI 6845 (vec_concat:V16SI 6846 (match_operand:V8SI 1 "register_operand" "x") 6847 (match_operand:V8SI 2 "nonimmediate_operand" "xm")) 6848 (parallel [(const_int 2) (const_int 10) 6849 (const_int 3) (const_int 11) 6850 (const_int 6) (const_int 14) 6851 (const_int 7) (const_int 15)])))] 6852 "TARGET_AVX2" 6853 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}" 6854 [(set_attr "type" "sselog") 6855 (set_attr "prefix" "vex") 6856 (set_attr "mode" "OI")]) 6857 6858(define_insn "vec_interleave_highv4si" 6859 [(set (match_operand:V4SI 0 "register_operand" "=x,x") 6860 (vec_select:V4SI 6861 (vec_concat:V8SI 6862 (match_operand:V4SI 1 "register_operand" "0,x") 6863 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")) 6864 (parallel [(const_int 2) (const_int 6) 6865 (const_int 3) (const_int 7)])))] 6866 "TARGET_SSE2" 6867 "@ 6868 punpckhdq\t{%2, %0|%0, %2} 6869 vpunpckhdq\t{%2, %1, %0|%0, %1, %2}" 6870 [(set_attr "isa" "noavx,avx") 6871 (set_attr "type" "sselog") 6872 (set_attr "prefix_data16" "1,*") 6873 (set_attr "prefix" "orig,vex") 6874 (set_attr "mode" "TI")]) 6875 6876(define_insn "avx2_interleave_lowv8si" 6877 [(set (match_operand:V8SI 0 "register_operand" "=x") 6878 (vec_select:V8SI 6879 (vec_concat:V16SI 6880 (match_operand:V8SI 1 "register_operand" "x") 6881 (match_operand:V8SI 2 "nonimmediate_operand" "xm")) 6882 (parallel [(const_int 0) (const_int 8) 6883 (const_int 1) (const_int 9) 6884 (const_int 4) (const_int 12) 6885 (const_int 5) (const_int 13)])))] 6886 "TARGET_AVX2" 6887 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}" 6888 [(set_attr "type" "sselog") 6889 (set_attr "prefix" "vex") 6890 (set_attr "mode" "OI")]) 6891 6892(define_insn "vec_interleave_lowv4si" 6893 [(set (match_operand:V4SI 0 "register_operand" "=x,x") 6894 (vec_select:V4SI 6895 (vec_concat:V8SI 6896 (match_operand:V4SI 1 "register_operand" "0,x") 6897 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")) 6898 (parallel [(const_int 0) (const_int 4) 6899 (const_int 1) (const_int 5)])))] 6900 "TARGET_SSE2" 6901 "@ 6902 punpckldq\t{%2, %0|%0, %2} 6903 vpunpckldq\t{%2, %1, %0|%0, %1, %2}" 6904 [(set_attr "isa" "noavx,avx") 6905 (set_attr "type" "sselog") 6906 (set_attr "prefix_data16" "1,*") 6907 (set_attr "prefix" "orig,vex") 6908 (set_attr "mode" "TI")]) 6909 6910(define_expand "vec_interleave_high<mode>" 6911 [(match_operand:VI_256 0 "register_operand" "=x") 6912 (match_operand:VI_256 1 "register_operand" "x") 6913 (match_operand:VI_256 2 "nonimmediate_operand" "xm")] 6914 "TARGET_AVX2" 6915{ 6916 rtx t1 = gen_reg_rtx (<MODE>mode); 6917 rtx t2 = gen_reg_rtx (<MODE>mode); 6918 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2])); 6919 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2])); 6920 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, operands[0]), 6921 gen_lowpart (V4DImode, t1), 6922 gen_lowpart (V4DImode, t2), GEN_INT (1 + (3 << 4)))); 6923 DONE; 6924}) 6925 6926(define_expand "vec_interleave_low<mode>" 6927 [(match_operand:VI_256 0 "register_operand" "=x") 6928 (match_operand:VI_256 1 "register_operand" "x") 6929 (match_operand:VI_256 2 "nonimmediate_operand" "xm")] 6930 "TARGET_AVX2" 6931{ 6932 rtx t1 = gen_reg_rtx (<MODE>mode); 6933 rtx t2 = gen_reg_rtx (<MODE>mode); 6934 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2])); 6935 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2])); 6936 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, operands[0]), 6937 gen_lowpart (V4DImode, t1), 6938 gen_lowpart (V4DImode, t2), GEN_INT (0 + (2 << 4)))); 6939 DONE; 6940}) 6941 6942;; Modes handled by pinsr patterns. 6943(define_mode_iterator PINSR_MODE 6944 [(V16QI "TARGET_SSE4_1") V8HI 6945 (V4SI "TARGET_SSE4_1") 6946 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")]) 6947 6948(define_mode_attr sse2p4_1 6949 [(V16QI "sse4_1") (V8HI "sse2") 6950 (V4SI "sse4_1") (V2DI "sse4_1")]) 6951 6952;; sse4_1_pinsrd must come before sse2_loadld since it is preferred. 6953(define_insn "<sse2p4_1>_pinsr<ssemodesuffix>" 6954 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x") 6955 (vec_merge:PINSR_MODE 6956 (vec_duplicate:PINSR_MODE 6957 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m")) 6958 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x") 6959 (match_operand:SI 3 "const_int_operand" "")))] 6960 "TARGET_SSE2 6961 && ((unsigned) exact_log2 (INTVAL (operands[3])) 6962 < GET_MODE_NUNITS (<MODE>mode))" 6963{ 6964 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); 6965 6966 switch (which_alternative) 6967 { 6968 case 0: 6969 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode)) 6970 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}"; 6971 /* FALLTHRU */ 6972 case 1: 6973 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"; 6974 case 2: 6975 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode)) 6976 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}"; 6977 /* FALLTHRU */ 6978 case 3: 6979 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"; 6980 default: 6981 gcc_unreachable (); 6982 } 6983} 6984 [(set_attr "isa" "noavx,noavx,avx,avx") 6985 (set_attr "type" "sselog") 6986 (set (attr "prefix_rex") 6987 (if_then_else 6988 (and (not (match_test "TARGET_AVX")) 6989 (eq (const_string "<MODE>mode") (const_string "V2DImode"))) 6990 (const_string "1") 6991 (const_string "*"))) 6992 (set (attr "prefix_data16") 6993 (if_then_else 6994 (and (not (match_test "TARGET_AVX")) 6995 (eq (const_string "<MODE>mode") (const_string "V8HImode"))) 6996 (const_string "1") 6997 (const_string "*"))) 6998 (set (attr "prefix_extra") 6999 (if_then_else 7000 (and (not (match_test "TARGET_AVX")) 7001 (eq (const_string "<MODE>mode") (const_string "V8HImode"))) 7002 (const_string "*") 7003 (const_string "1"))) 7004 (set_attr "length_immediate" "1") 7005 (set_attr "prefix" "orig,orig,vex,vex") 7006 (set_attr "mode" "TI")]) 7007 7008(define_insn "*sse4_1_pextrb_<mode>" 7009 [(set (match_operand:SWI48 0 "register_operand" "=r") 7010 (zero_extend:SWI48 7011 (vec_select:QI 7012 (match_operand:V16QI 1 "register_operand" "x") 7013 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))] 7014 "TARGET_SSE4_1" 7015 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}" 7016 [(set_attr "type" "sselog") 7017 (set_attr "prefix_extra" "1") 7018 (set_attr "length_immediate" "1") 7019 (set_attr "prefix" "maybe_vex") 7020 (set_attr "mode" "TI")]) 7021 7022(define_insn "*sse4_1_pextrb_memory" 7023 [(set (match_operand:QI 0 "memory_operand" "=m") 7024 (vec_select:QI 7025 (match_operand:V16QI 1 "register_operand" "x") 7026 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))] 7027 "TARGET_SSE4_1" 7028 "%vpextrb\t{%2, %1, %0|%0, %1, %2}" 7029 [(set_attr "type" "sselog") 7030 (set_attr "prefix_extra" "1") 7031 (set_attr "length_immediate" "1") 7032 (set_attr "prefix" "maybe_vex") 7033 (set_attr "mode" "TI")]) 7034 7035(define_insn "*sse2_pextrw_<mode>" 7036 [(set (match_operand:SWI48 0 "register_operand" "=r") 7037 (zero_extend:SWI48 7038 (vec_select:HI 7039 (match_operand:V8HI 1 "register_operand" "x") 7040 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))] 7041 "TARGET_SSE2" 7042 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}" 7043 [(set_attr "type" "sselog") 7044 (set_attr "prefix_data16" "1") 7045 (set_attr "length_immediate" "1") 7046 (set_attr "prefix" "maybe_vex") 7047 (set_attr "mode" "TI")]) 7048 7049(define_insn "*sse4_1_pextrw_memory" 7050 [(set (match_operand:HI 0 "memory_operand" "=m") 7051 (vec_select:HI 7052 (match_operand:V8HI 1 "register_operand" "x") 7053 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))] 7054 "TARGET_SSE4_1" 7055 "%vpextrw\t{%2, %1, %0|%0, %1, %2}" 7056 [(set_attr "type" "sselog") 7057 (set_attr "prefix_extra" "1") 7058 (set_attr "length_immediate" "1") 7059 (set_attr "prefix" "maybe_vex") 7060 (set_attr "mode" "TI")]) 7061 7062(define_insn "*sse4_1_pextrd" 7063 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") 7064 (vec_select:SI 7065 (match_operand:V4SI 1 "register_operand" "x") 7066 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))] 7067 "TARGET_SSE4_1" 7068 "%vpextrd\t{%2, %1, %0|%0, %1, %2}" 7069 [(set_attr "type" "sselog") 7070 (set_attr "prefix_extra" "1") 7071 (set_attr "length_immediate" "1") 7072 (set_attr "prefix" "maybe_vex") 7073 (set_attr "mode" "TI")]) 7074 7075(define_insn "*sse4_1_pextrd_zext" 7076 [(set (match_operand:DI 0 "register_operand" "=r") 7077 (zero_extend:DI 7078 (vec_select:SI 7079 (match_operand:V4SI 1 "register_operand" "x") 7080 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))] 7081 "TARGET_64BIT && TARGET_SSE4_1" 7082 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}" 7083 [(set_attr "type" "sselog") 7084 (set_attr "prefix_extra" "1") 7085 (set_attr "length_immediate" "1") 7086 (set_attr "prefix" "maybe_vex") 7087 (set_attr "mode" "TI")]) 7088 7089;; It must come before *vec_extractv2di_1_rex64 since it is preferred. 7090(define_insn "*sse4_1_pextrq" 7091 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") 7092 (vec_select:DI 7093 (match_operand:V2DI 1 "register_operand" "x") 7094 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))] 7095 "TARGET_SSE4_1 && TARGET_64BIT" 7096 "%vpextrq\t{%2, %1, %0|%0, %1, %2}" 7097 [(set_attr "type" "sselog") 7098 (set_attr "prefix_rex" "1") 7099 (set_attr "prefix_extra" "1") 7100 (set_attr "length_immediate" "1") 7101 (set_attr "prefix" "maybe_vex") 7102 (set_attr "mode" "TI")]) 7103 7104(define_expand "avx2_pshufdv3" 7105 [(match_operand:V8SI 0 "register_operand" "") 7106 (match_operand:V8SI 1 "nonimmediate_operand" "") 7107 (match_operand:SI 2 "const_0_to_255_operand" "")] 7108 "TARGET_AVX2" 7109{ 7110 int mask = INTVAL (operands[2]); 7111 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1], 7112 GEN_INT ((mask >> 0) & 3), 7113 GEN_INT ((mask >> 2) & 3), 7114 GEN_INT ((mask >> 4) & 3), 7115 GEN_INT ((mask >> 6) & 3), 7116 GEN_INT (((mask >> 0) & 3) + 4), 7117 GEN_INT (((mask >> 2) & 3) + 4), 7118 GEN_INT (((mask >> 4) & 3) + 4), 7119 GEN_INT (((mask >> 6) & 3) + 4))); 7120 DONE; 7121}) 7122 7123(define_insn "avx2_pshufd_1" 7124 [(set (match_operand:V8SI 0 "register_operand" "=x") 7125 (vec_select:V8SI 7126 (match_operand:V8SI 1 "nonimmediate_operand" "xm") 7127 (parallel [(match_operand 2 "const_0_to_3_operand" "") 7128 (match_operand 3 "const_0_to_3_operand" "") 7129 (match_operand 4 "const_0_to_3_operand" "") 7130 (match_operand 5 "const_0_to_3_operand" "") 7131 (match_operand 6 "const_4_to_7_operand" "") 7132 (match_operand 7 "const_4_to_7_operand" "") 7133 (match_operand 8 "const_4_to_7_operand" "") 7134 (match_operand 9 "const_4_to_7_operand" "")])))] 7135 "TARGET_AVX2 7136 && INTVAL (operands[2]) + 4 == INTVAL (operands[6]) 7137 && INTVAL (operands[3]) + 4 == INTVAL (operands[7]) 7138 && INTVAL (operands[4]) + 4 == INTVAL (operands[8]) 7139 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])" 7140{ 7141 int mask = 0; 7142 mask |= INTVAL (operands[2]) << 0; 7143 mask |= INTVAL (operands[3]) << 2; 7144 mask |= INTVAL (operands[4]) << 4; 7145 mask |= INTVAL (operands[5]) << 6; 7146 operands[2] = GEN_INT (mask); 7147 7148 return "vpshufd\t{%2, %1, %0|%0, %1, %2}"; 7149} 7150 [(set_attr "type" "sselog1") 7151 (set_attr "prefix" "vex") 7152 (set_attr "length_immediate" "1") 7153 (set_attr "mode" "OI")]) 7154 7155(define_expand "sse2_pshufd" 7156 [(match_operand:V4SI 0 "register_operand" "") 7157 (match_operand:V4SI 1 "nonimmediate_operand" "") 7158 (match_operand:SI 2 "const_int_operand" "")] 7159 "TARGET_SSE2" 7160{ 7161 int mask = INTVAL (operands[2]); 7162 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1], 7163 GEN_INT ((mask >> 0) & 3), 7164 GEN_INT ((mask >> 2) & 3), 7165 GEN_INT ((mask >> 4) & 3), 7166 GEN_INT ((mask >> 6) & 3))); 7167 DONE; 7168}) 7169 7170(define_insn "sse2_pshufd_1" 7171 [(set (match_operand:V4SI 0 "register_operand" "=x") 7172 (vec_select:V4SI 7173 (match_operand:V4SI 1 "nonimmediate_operand" "xm") 7174 (parallel [(match_operand 2 "const_0_to_3_operand" "") 7175 (match_operand 3 "const_0_to_3_operand" "") 7176 (match_operand 4 "const_0_to_3_operand" "") 7177 (match_operand 5 "const_0_to_3_operand" "")])))] 7178 "TARGET_SSE2" 7179{ 7180 int mask = 0; 7181 mask |= INTVAL (operands[2]) << 0; 7182 mask |= INTVAL (operands[3]) << 2; 7183 mask |= INTVAL (operands[4]) << 4; 7184 mask |= INTVAL (operands[5]) << 6; 7185 operands[2] = GEN_INT (mask); 7186 7187 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}"; 7188} 7189 [(set_attr "type" "sselog1") 7190 (set_attr "prefix_data16" "1") 7191 (set_attr "prefix" "maybe_vex") 7192 (set_attr "length_immediate" "1") 7193 (set_attr "mode" "TI")]) 7194 7195(define_expand "avx2_pshuflwv3" 7196 [(match_operand:V16HI 0 "register_operand" "") 7197 (match_operand:V16HI 1 "nonimmediate_operand" "") 7198 (match_operand:SI 2 "const_0_to_255_operand" "")] 7199 "TARGET_AVX2" 7200{ 7201 int mask = INTVAL (operands[2]); 7202 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1], 7203 GEN_INT ((mask >> 0) & 3), 7204 GEN_INT ((mask >> 2) & 3), 7205 GEN_INT ((mask >> 4) & 3), 7206 GEN_INT ((mask >> 6) & 3), 7207 GEN_INT (((mask >> 0) & 3) + 8), 7208 GEN_INT (((mask >> 2) & 3) + 8), 7209 GEN_INT (((mask >> 4) & 3) + 8), 7210 GEN_INT (((mask >> 6) & 3) + 8))); 7211 DONE; 7212}) 7213 7214(define_insn "avx2_pshuflw_1" 7215 [(set (match_operand:V16HI 0 "register_operand" "=x") 7216 (vec_select:V16HI 7217 (match_operand:V16HI 1 "nonimmediate_operand" "xm") 7218 (parallel [(match_operand 2 "const_0_to_3_operand" "") 7219 (match_operand 3 "const_0_to_3_operand" "") 7220 (match_operand 4 "const_0_to_3_operand" "") 7221 (match_operand 5 "const_0_to_3_operand" "") 7222 (const_int 4) 7223 (const_int 5) 7224 (const_int 6) 7225 (const_int 7) 7226 (match_operand 6 "const_8_to_11_operand" "") 7227 (match_operand 7 "const_8_to_11_operand" "") 7228 (match_operand 8 "const_8_to_11_operand" "") 7229 (match_operand 9 "const_8_to_11_operand" "") 7230 (const_int 12) 7231 (const_int 13) 7232 (const_int 14) 7233 (const_int 15)])))] 7234 "TARGET_AVX2 7235 && INTVAL (operands[2]) + 8 == INTVAL (operands[6]) 7236 && INTVAL (operands[3]) + 8 == INTVAL (operands[7]) 7237 && INTVAL (operands[4]) + 8 == INTVAL (operands[8]) 7238 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])" 7239{ 7240 int mask = 0; 7241 mask |= INTVAL (operands[2]) << 0; 7242 mask |= INTVAL (operands[3]) << 2; 7243 mask |= INTVAL (operands[4]) << 4; 7244 mask |= INTVAL (operands[5]) << 6; 7245 operands[2] = GEN_INT (mask); 7246 7247 return "vpshuflw\t{%2, %1, %0|%0, %1, %2}"; 7248} 7249 [(set_attr "type" "sselog") 7250 (set_attr "prefix" "vex") 7251 (set_attr "length_immediate" "1") 7252 (set_attr "mode" "OI")]) 7253 7254(define_expand "sse2_pshuflw" 7255 [(match_operand:V8HI 0 "register_operand" "") 7256 (match_operand:V8HI 1 "nonimmediate_operand" "") 7257 (match_operand:SI 2 "const_int_operand" "")] 7258 "TARGET_SSE2" 7259{ 7260 int mask = INTVAL (operands[2]); 7261 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1], 7262 GEN_INT ((mask >> 0) & 3), 7263 GEN_INT ((mask >> 2) & 3), 7264 GEN_INT ((mask >> 4) & 3), 7265 GEN_INT ((mask >> 6) & 3))); 7266 DONE; 7267}) 7268 7269(define_insn "sse2_pshuflw_1" 7270 [(set (match_operand:V8HI 0 "register_operand" "=x") 7271 (vec_select:V8HI 7272 (match_operand:V8HI 1 "nonimmediate_operand" "xm") 7273 (parallel [(match_operand 2 "const_0_to_3_operand" "") 7274 (match_operand 3 "const_0_to_3_operand" "") 7275 (match_operand 4 "const_0_to_3_operand" "") 7276 (match_operand 5 "const_0_to_3_operand" "") 7277 (const_int 4) 7278 (const_int 5) 7279 (const_int 6) 7280 (const_int 7)])))] 7281 "TARGET_SSE2" 7282{ 7283 int mask = 0; 7284 mask |= INTVAL (operands[2]) << 0; 7285 mask |= INTVAL (operands[3]) << 2; 7286 mask |= INTVAL (operands[4]) << 4; 7287 mask |= INTVAL (operands[5]) << 6; 7288 operands[2] = GEN_INT (mask); 7289 7290 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}"; 7291} 7292 [(set_attr "type" "sselog") 7293 (set_attr "prefix_data16" "0") 7294 (set_attr "prefix_rep" "1") 7295 (set_attr "prefix" "maybe_vex") 7296 (set_attr "length_immediate" "1") 7297 (set_attr "mode" "TI")]) 7298 7299(define_expand "avx2_pshufhwv3" 7300 [(match_operand:V16HI 0 "register_operand" "") 7301 (match_operand:V16HI 1 "nonimmediate_operand" "") 7302 (match_operand:SI 2 "const_0_to_255_operand" "")] 7303 "TARGET_AVX2" 7304{ 7305 int mask = INTVAL (operands[2]); 7306 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1], 7307 GEN_INT (((mask >> 0) & 3) + 4), 7308 GEN_INT (((mask >> 2) & 3) + 4), 7309 GEN_INT (((mask >> 4) & 3) + 4), 7310 GEN_INT (((mask >> 6) & 3) + 4), 7311 GEN_INT (((mask >> 0) & 3) + 12), 7312 GEN_INT (((mask >> 2) & 3) + 12), 7313 GEN_INT (((mask >> 4) & 3) + 12), 7314 GEN_INT (((mask >> 6) & 3) + 12))); 7315 DONE; 7316}) 7317 7318(define_insn "avx2_pshufhw_1" 7319 [(set (match_operand:V16HI 0 "register_operand" "=x") 7320 (vec_select:V16HI 7321 (match_operand:V16HI 1 "nonimmediate_operand" "xm") 7322 (parallel [(const_int 0) 7323 (const_int 1) 7324 (const_int 2) 7325 (const_int 3) 7326 (match_operand 2 "const_4_to_7_operand" "") 7327 (match_operand 3 "const_4_to_7_operand" "") 7328 (match_operand 4 "const_4_to_7_operand" "") 7329 (match_operand 5 "const_4_to_7_operand" "") 7330 (const_int 8) 7331 (const_int 9) 7332 (const_int 10) 7333 (const_int 11) 7334 (match_operand 6 "const_12_to_15_operand" "") 7335 (match_operand 7 "const_12_to_15_operand" "") 7336 (match_operand 8 "const_12_to_15_operand" "") 7337 (match_operand 9 "const_12_to_15_operand" "")])))] 7338 "TARGET_AVX2 7339 && INTVAL (operands[2]) + 8 == INTVAL (operands[6]) 7340 && INTVAL (operands[3]) + 8 == INTVAL (operands[7]) 7341 && INTVAL (operands[4]) + 8 == INTVAL (operands[8]) 7342 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])" 7343{ 7344 int mask = 0; 7345 mask |= (INTVAL (operands[2]) - 4) << 0; 7346 mask |= (INTVAL (operands[3]) - 4) << 2; 7347 mask |= (INTVAL (operands[4]) - 4) << 4; 7348 mask |= (INTVAL (operands[5]) - 4) << 6; 7349 operands[2] = GEN_INT (mask); 7350 7351 return "vpshufhw\t{%2, %1, %0|%0, %1, %2}"; 7352} 7353 [(set_attr "type" "sselog") 7354 (set_attr "prefix" "vex") 7355 (set_attr "length_immediate" "1") 7356 (set_attr "mode" "OI")]) 7357 7358(define_expand "sse2_pshufhw" 7359 [(match_operand:V8HI 0 "register_operand" "") 7360 (match_operand:V8HI 1 "nonimmediate_operand" "") 7361 (match_operand:SI 2 "const_int_operand" "")] 7362 "TARGET_SSE2" 7363{ 7364 int mask = INTVAL (operands[2]); 7365 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1], 7366 GEN_INT (((mask >> 0) & 3) + 4), 7367 GEN_INT (((mask >> 2) & 3) + 4), 7368 GEN_INT (((mask >> 4) & 3) + 4), 7369 GEN_INT (((mask >> 6) & 3) + 4))); 7370 DONE; 7371}) 7372 7373(define_insn "sse2_pshufhw_1" 7374 [(set (match_operand:V8HI 0 "register_operand" "=x") 7375 (vec_select:V8HI 7376 (match_operand:V8HI 1 "nonimmediate_operand" "xm") 7377 (parallel [(const_int 0) 7378 (const_int 1) 7379 (const_int 2) 7380 (const_int 3) 7381 (match_operand 2 "const_4_to_7_operand" "") 7382 (match_operand 3 "const_4_to_7_operand" "") 7383 (match_operand 4 "const_4_to_7_operand" "") 7384 (match_operand 5 "const_4_to_7_operand" "")])))] 7385 "TARGET_SSE2" 7386{ 7387 int mask = 0; 7388 mask |= (INTVAL (operands[2]) - 4) << 0; 7389 mask |= (INTVAL (operands[3]) - 4) << 2; 7390 mask |= (INTVAL (operands[4]) - 4) << 4; 7391 mask |= (INTVAL (operands[5]) - 4) << 6; 7392 operands[2] = GEN_INT (mask); 7393 7394 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}"; 7395} 7396 [(set_attr "type" "sselog") 7397 (set_attr "prefix_rep" "1") 7398 (set_attr "prefix_data16" "0") 7399 (set_attr "prefix" "maybe_vex") 7400 (set_attr "length_immediate" "1") 7401 (set_attr "mode" "TI")]) 7402 7403(define_expand "sse2_loadd" 7404 [(set (match_operand:V4SI 0 "register_operand" "") 7405 (vec_merge:V4SI 7406 (vec_duplicate:V4SI 7407 (match_operand:SI 1 "nonimmediate_operand" "")) 7408 (match_dup 2) 7409 (const_int 1)))] 7410 "TARGET_SSE" 7411 "operands[2] = CONST0_RTX (V4SImode);") 7412 7413(define_insn "sse2_loadld" 7414 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x") 7415 (vec_merge:V4SI 7416 (vec_duplicate:V4SI 7417 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x")) 7418 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x") 7419 (const_int 1)))] 7420 "TARGET_SSE" 7421 "@ 7422 %vmovd\t{%2, %0|%0, %2} 7423 %vmovd\t{%2, %0|%0, %2} 7424 movss\t{%2, %0|%0, %2} 7425 movss\t{%2, %0|%0, %2} 7426 vmovss\t{%2, %1, %0|%0, %1, %2}" 7427 [(set_attr "isa" "sse2,*,noavx,noavx,avx") 7428 (set_attr "type" "ssemov") 7429 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex") 7430 (set_attr "mode" "TI,TI,V4SF,SF,SF")]) 7431 7432(define_insn_and_split "sse2_stored" 7433 [(set (match_operand:SI 0 "nonimmediate_operand" "=xm,r") 7434 (vec_select:SI 7435 (match_operand:V4SI 1 "register_operand" "x,Yi") 7436 (parallel [(const_int 0)])))] 7437 "TARGET_SSE" 7438 "#" 7439 "&& reload_completed 7440 && (TARGET_INTER_UNIT_MOVES 7441 || MEM_P (operands [0]) 7442 || !GENERAL_REGNO_P (true_regnum (operands [0])))" 7443 [(set (match_dup 0) (match_dup 1))] 7444 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));") 7445 7446(define_insn_and_split "*vec_ext_v4si_mem" 7447 [(set (match_operand:SI 0 "register_operand" "=r") 7448 (vec_select:SI 7449 (match_operand:V4SI 1 "memory_operand" "o") 7450 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))] 7451 "" 7452 "#" 7453 "reload_completed" 7454 [(const_int 0)] 7455{ 7456 int i = INTVAL (operands[2]); 7457 7458 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4)); 7459 DONE; 7460}) 7461 7462(define_expand "sse_storeq" 7463 [(set (match_operand:DI 0 "nonimmediate_operand" "") 7464 (vec_select:DI 7465 (match_operand:V2DI 1 "register_operand" "") 7466 (parallel [(const_int 0)])))] 7467 "TARGET_SSE") 7468 7469(define_insn "*sse2_storeq_rex64" 7470 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm,*r,r") 7471 (vec_select:DI 7472 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o") 7473 (parallel [(const_int 0)])))] 7474 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 7475 "@ 7476 # 7477 # 7478 mov{q}\t{%1, %0|%0, %1}" 7479 [(set_attr "type" "*,*,imov") 7480 (set_attr "mode" "*,*,DI")]) 7481 7482(define_insn "*sse2_storeq" 7483 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm") 7484 (vec_select:DI 7485 (match_operand:V2DI 1 "register_operand" "x") 7486 (parallel [(const_int 0)])))] 7487 "TARGET_SSE" 7488 "#") 7489 7490(define_split 7491 [(set (match_operand:DI 0 "nonimmediate_operand" "") 7492 (vec_select:DI 7493 (match_operand:V2DI 1 "register_operand" "") 7494 (parallel [(const_int 0)])))] 7495 "TARGET_SSE 7496 && reload_completed 7497 && (TARGET_INTER_UNIT_MOVES 7498 || MEM_P (operands [0]) 7499 || !GENERAL_REGNO_P (true_regnum (operands [0])))" 7500 [(set (match_dup 0) (match_dup 1))] 7501 "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));") 7502 7503(define_insn "*vec_extractv2di_1_rex64" 7504 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,r") 7505 (vec_select:DI 7506 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,o") 7507 (parallel [(const_int 1)])))] 7508 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 7509 "@ 7510 %vmovhps\t{%1, %0|%0, %1} 7511 psrldq\t{$8, %0|%0, 8} 7512 vpsrldq\t{$8, %1, %0|%0, %1, 8} 7513 %vmovq\t{%H1, %0|%0, %H1} 7514 mov{q}\t{%H1, %0|%0, %H1}" 7515 [(set_attr "isa" "*,noavx,avx,*,*") 7516 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,imov") 7517 (set_attr "length_immediate" "*,1,1,*,*") 7518 (set_attr "memory" "*,none,none,*,*") 7519 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig") 7520 (set_attr "mode" "V2SF,TI,TI,TI,DI")]) 7521 7522(define_insn "*vec_extractv2di_1" 7523 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,x,x") 7524 (vec_select:DI 7525 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,x,o") 7526 (parallel [(const_int 1)])))] 7527 "!TARGET_64BIT && TARGET_SSE 7528 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 7529 "@ 7530 %vmovhps\t{%1, %0|%0, %1} 7531 psrldq\t{$8, %0|%0, 8} 7532 vpsrldq\t{$8, %1, %0|%0, %1, 8} 7533 %vmovq\t{%H1, %0|%0, %H1} 7534 movhlps\t{%1, %0|%0, %1} 7535 movlps\t{%H1, %0|%0, %H1}" 7536 [(set_attr "isa" "*,sse2_noavx,avx,sse2,noavx,noavx") 7537 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,ssemov,ssemov") 7538 (set_attr "length_immediate" "*,1,1,*,*,*") 7539 (set_attr "memory" "*,none,none,*,*,*") 7540 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig,orig") 7541 (set_attr "mode" "V2SF,TI,TI,TI,V4SF,V2SF")]) 7542 7543(define_insn "*vec_dupv4si" 7544 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") 7545 (vec_duplicate:V4SI 7546 (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))] 7547 "TARGET_SSE" 7548 "@ 7549 %vpshufd\t{$0, %1, %0|%0, %1, 0} 7550 vbroadcastss\t{%1, %0|%0, %1} 7551 shufps\t{$0, %0, %0|%0, %0, 0}" 7552 [(set_attr "isa" "sse2,avx,noavx") 7553 (set_attr "type" "sselog1,ssemov,sselog1") 7554 (set_attr "length_immediate" "1,0,1") 7555 (set_attr "prefix_extra" "0,1,*") 7556 (set_attr "prefix" "maybe_vex,vex,orig") 7557 (set_attr "mode" "TI,V4SF,V4SF")]) 7558 7559(define_insn "*vec_dupv2di" 7560 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x") 7561 (vec_duplicate:V2DI 7562 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))] 7563 "TARGET_SSE" 7564 "@ 7565 punpcklqdq\t%0, %0 7566 vpunpcklqdq\t{%d1, %0|%0, %d1} 7567 %vmovddup\t{%1, %0|%0, %1} 7568 movlhps\t%0, %0" 7569 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx") 7570 (set_attr "type" "sselog1,sselog1,sselog1,ssemov") 7571 (set_attr "prefix" "orig,vex,maybe_vex,orig") 7572 (set_attr "mode" "TI,TI,DF,V4SF")]) 7573 7574(define_insn "*vec_concatv2si_sse4_1" 7575 [(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y") 7576 (vec_concat:V2SI 7577 (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm, 0,rm") 7578 (match_operand:SI 2 "vector_move_operand" "rm,rm,x,x, C,*ym, C")))] 7579 "TARGET_SSE4_1" 7580 "@ 7581 pinsrd\t{$1, %2, %0|%0, %2, 1} 7582 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1} 7583 punpckldq\t{%2, %0|%0, %2} 7584 vpunpckldq\t{%2, %1, %0|%0, %1, %2} 7585 %vmovd\t{%1, %0|%0, %1} 7586 punpckldq\t{%2, %0|%0, %2} 7587 movd\t{%1, %0|%0, %1}" 7588 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*") 7589 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov") 7590 (set_attr "prefix_extra" "1,1,*,*,*,*,*") 7591 (set_attr "length_immediate" "1,1,*,*,*,*,*") 7592 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig") 7593 (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")]) 7594 7595;; ??? In theory we can match memory for the MMX alternative, but allowing 7596;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE 7597;; alternatives pretty much forces the MMX alternative to be chosen. 7598(define_insn "*vec_concatv2si_sse2" 7599 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y") 7600 (vec_concat:V2SI 7601 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm") 7602 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))] 7603 "TARGET_SSE2" 7604 "@ 7605 punpckldq\t{%2, %0|%0, %2} 7606 movd\t{%1, %0|%0, %1} 7607 punpckldq\t{%2, %0|%0, %2} 7608 movd\t{%1, %0|%0, %1}" 7609 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov") 7610 (set_attr "mode" "TI,TI,DI,DI")]) 7611 7612(define_insn "*vec_concatv2si_sse" 7613 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y") 7614 (vec_concat:V2SI 7615 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm") 7616 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))] 7617 "TARGET_SSE" 7618 "@ 7619 unpcklps\t{%2, %0|%0, %2} 7620 movss\t{%1, %0|%0, %1} 7621 punpckldq\t{%2, %0|%0, %2} 7622 movd\t{%1, %0|%0, %1}" 7623 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov") 7624 (set_attr "mode" "V4SF,V4SF,DI,DI")]) 7625 7626(define_insn "*vec_concatv4si" 7627 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x") 7628 (vec_concat:V4SI 7629 (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x") 7630 (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))] 7631 "TARGET_SSE" 7632 "@ 7633 punpcklqdq\t{%2, %0|%0, %2} 7634 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2} 7635 movlhps\t{%2, %0|%0, %2} 7636 movhps\t{%2, %0|%0, %2} 7637 vmovhps\t{%2, %1, %0|%0, %1, %2}" 7638 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx") 7639 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov") 7640 (set_attr "prefix" "orig,vex,orig,orig,vex") 7641 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")]) 7642 7643;; movd instead of movq is required to handle broken assemblers. 7644(define_insn "*vec_concatv2di_rex64" 7645 [(set (match_operand:V2DI 0 "register_operand" 7646 "=x,x ,x ,Yi,!x,x,x,x,x") 7647 (vec_concat:V2DI 7648 (match_operand:DI 1 "nonimmediate_operand" 7649 " 0,x ,xm,r ,*y,0,x,0,x") 7650 (match_operand:DI 2 "vector_move_operand" 7651 "rm,rm,C ,C ,C ,x,x,m,m")))] 7652 "TARGET_64BIT" 7653 "@ 7654 pinsrq\t{$1, %2, %0|%0, %2, 1} 7655 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1} 7656 %vmovq\t{%1, %0|%0, %1} 7657 %vmovd\t{%1, %0|%0, %1} 7658 movq2dq\t{%1, %0|%0, %1} 7659 punpcklqdq\t{%2, %0|%0, %2} 7660 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2} 7661 movhps\t{%2, %0|%0, %2} 7662 vmovhps\t{%2, %1, %0|%0, %1, %2}" 7663 [(set_attr "isa" "sse4_noavx,avx,*,*,*,noavx,avx,noavx,avx") 7664 (set (attr "type") 7665 (if_then_else 7666 (eq_attr "alternative" "0,1,5,6") 7667 (const_string "sselog") 7668 (const_string "ssemov"))) 7669 (set (attr "prefix_rex") 7670 (if_then_else 7671 (and (eq_attr "alternative" "0,3") 7672 (not (match_test "TARGET_AVX"))) 7673 (const_string "1") 7674 (const_string "*"))) 7675 (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*") 7676 (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*") 7677 (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex") 7678 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V2SF,V2SF")]) 7679 7680(define_insn "vec_concatv2di" 7681 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x,x,x,x") 7682 (vec_concat:V2DI 7683 (match_operand:DI 1 "nonimmediate_operand" "xm,*y,0,x,0,0,x") 7684 (match_operand:DI 2 "vector_move_operand" " C, C,x,x,x,m,m")))] 7685 "!TARGET_64BIT && TARGET_SSE" 7686 "@ 7687 %vmovq\t{%1, %0|%0, %1} 7688 movq2dq\t{%1, %0|%0, %1} 7689 punpcklqdq\t{%2, %0|%0, %2} 7690 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2} 7691 movlhps\t{%2, %0|%0, %2} 7692 movhps\t{%2, %0|%0, %2} 7693 vmovhps\t{%2, %1, %0|%0, %1, %2}" 7694 [(set_attr "isa" "sse2,sse2,sse2_noavx,avx,noavx,noavx,avx") 7695 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,ssemov,ssemov") 7696 (set_attr "prefix" "maybe_vex,orig,orig,vex,orig,orig,vex") 7697 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")]) 7698 7699(define_expand "vec_unpacks_lo_<mode>" 7700 [(match_operand:<sseunpackmode> 0 "register_operand" "") 7701 (match_operand:VI124_AVX2 1 "register_operand" "")] 7702 "TARGET_SSE2" 7703 "ix86_expand_sse_unpack (operands, false, false); DONE;") 7704 7705(define_expand "vec_unpacks_hi_<mode>" 7706 [(match_operand:<sseunpackmode> 0 "register_operand" "") 7707 (match_operand:VI124_AVX2 1 "register_operand" "")] 7708 "TARGET_SSE2" 7709 "ix86_expand_sse_unpack (operands, false, true); DONE;") 7710 7711(define_expand "vec_unpacku_lo_<mode>" 7712 [(match_operand:<sseunpackmode> 0 "register_operand" "") 7713 (match_operand:VI124_AVX2 1 "register_operand" "")] 7714 "TARGET_SSE2" 7715 "ix86_expand_sse_unpack (operands, true, false); DONE;") 7716 7717(define_expand "vec_unpacku_hi_<mode>" 7718 [(match_operand:<sseunpackmode> 0 "register_operand" "") 7719 (match_operand:VI124_AVX2 1 "register_operand" "")] 7720 "TARGET_SSE2" 7721 "ix86_expand_sse_unpack (operands, true, true); DONE;") 7722 7723;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 7724;; 7725;; Miscellaneous 7726;; 7727;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 7728 7729(define_expand "avx2_uavgv32qi3" 7730 [(set (match_operand:V32QI 0 "register_operand" "") 7731 (truncate:V32QI 7732 (lshiftrt:V32HI 7733 (plus:V32HI 7734 (plus:V32HI 7735 (zero_extend:V32HI 7736 (match_operand:V32QI 1 "nonimmediate_operand" "")) 7737 (zero_extend:V32HI 7738 (match_operand:V32QI 2 "nonimmediate_operand" ""))) 7739 (const_vector:V32QI [(const_int 1) (const_int 1) 7740 (const_int 1) (const_int 1) 7741 (const_int 1) (const_int 1) 7742 (const_int 1) (const_int 1) 7743 (const_int 1) (const_int 1) 7744 (const_int 1) (const_int 1) 7745 (const_int 1) (const_int 1) 7746 (const_int 1) (const_int 1) 7747 (const_int 1) (const_int 1) 7748 (const_int 1) (const_int 1) 7749 (const_int 1) (const_int 1) 7750 (const_int 1) (const_int 1) 7751 (const_int 1) (const_int 1) 7752 (const_int 1) (const_int 1) 7753 (const_int 1) (const_int 1) 7754 (const_int 1) (const_int 1)])) 7755 (const_int 1))))] 7756 "TARGET_AVX2" 7757 "ix86_fixup_binary_operands_no_copy (PLUS, V32QImode, operands);") 7758 7759(define_expand "sse2_uavgv16qi3" 7760 [(set (match_operand:V16QI 0 "register_operand" "") 7761 (truncate:V16QI 7762 (lshiftrt:V16HI 7763 (plus:V16HI 7764 (plus:V16HI 7765 (zero_extend:V16HI 7766 (match_operand:V16QI 1 "nonimmediate_operand" "")) 7767 (zero_extend:V16HI 7768 (match_operand:V16QI 2 "nonimmediate_operand" ""))) 7769 (const_vector:V16QI [(const_int 1) (const_int 1) 7770 (const_int 1) (const_int 1) 7771 (const_int 1) (const_int 1) 7772 (const_int 1) (const_int 1) 7773 (const_int 1) (const_int 1) 7774 (const_int 1) (const_int 1) 7775 (const_int 1) (const_int 1) 7776 (const_int 1) (const_int 1)])) 7777 (const_int 1))))] 7778 "TARGET_SSE2" 7779 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);") 7780 7781(define_insn "*avx2_uavgv32qi3" 7782 [(set (match_operand:V32QI 0 "register_operand" "=x") 7783 (truncate:V32QI 7784 (lshiftrt:V32HI 7785 (plus:V32HI 7786 (plus:V32HI 7787 (zero_extend:V32HI 7788 (match_operand:V32QI 1 "nonimmediate_operand" "%x")) 7789 (zero_extend:V32HI 7790 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))) 7791 (const_vector:V32QI [(const_int 1) (const_int 1) 7792 (const_int 1) (const_int 1) 7793 (const_int 1) (const_int 1) 7794 (const_int 1) (const_int 1) 7795 (const_int 1) (const_int 1) 7796 (const_int 1) (const_int 1) 7797 (const_int 1) (const_int 1) 7798 (const_int 1) (const_int 1) 7799 (const_int 1) (const_int 1) 7800 (const_int 1) (const_int 1) 7801 (const_int 1) (const_int 1) 7802 (const_int 1) (const_int 1) 7803 (const_int 1) (const_int 1) 7804 (const_int 1) (const_int 1) 7805 (const_int 1) (const_int 1) 7806 (const_int 1) (const_int 1)])) 7807 (const_int 1))))] 7808 "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V32QImode, operands)" 7809 "vpavgb\t{%2, %1, %0|%0, %1, %2}" 7810 [(set_attr "type" "sseiadd") 7811 (set_attr "prefix" "vex") 7812 (set_attr "mode" "OI")]) 7813 7814(define_insn "*sse2_uavgv16qi3" 7815 [(set (match_operand:V16QI 0 "register_operand" "=x,x") 7816 (truncate:V16QI 7817 (lshiftrt:V16HI 7818 (plus:V16HI 7819 (plus:V16HI 7820 (zero_extend:V16HI 7821 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")) 7822 (zero_extend:V16HI 7823 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))) 7824 (const_vector:V16QI [(const_int 1) (const_int 1) 7825 (const_int 1) (const_int 1) 7826 (const_int 1) (const_int 1) 7827 (const_int 1) (const_int 1) 7828 (const_int 1) (const_int 1) 7829 (const_int 1) (const_int 1) 7830 (const_int 1) (const_int 1) 7831 (const_int 1) (const_int 1)])) 7832 (const_int 1))))] 7833 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)" 7834 "@ 7835 pavgb\t{%2, %0|%0, %2} 7836 vpavgb\t{%2, %1, %0|%0, %1, %2}" 7837 [(set_attr "isa" "noavx,avx") 7838 (set_attr "type" "sseiadd") 7839 (set_attr "prefix_data16" "1,*") 7840 (set_attr "prefix" "orig,vex") 7841 (set_attr "mode" "TI")]) 7842 7843(define_expand "avx2_uavgv16hi3" 7844 [(set (match_operand:V16HI 0 "register_operand" "") 7845 (truncate:V16HI 7846 (lshiftrt:V16SI 7847 (plus:V16SI 7848 (plus:V16SI 7849 (zero_extend:V16SI 7850 (match_operand:V16HI 1 "nonimmediate_operand" "")) 7851 (zero_extend:V16SI 7852 (match_operand:V16HI 2 "nonimmediate_operand" ""))) 7853 (const_vector:V16HI [(const_int 1) (const_int 1) 7854 (const_int 1) (const_int 1) 7855 (const_int 1) (const_int 1) 7856 (const_int 1) (const_int 1) 7857 (const_int 1) (const_int 1) 7858 (const_int 1) (const_int 1) 7859 (const_int 1) (const_int 1) 7860 (const_int 1) (const_int 1)])) 7861 (const_int 1))))] 7862 "TARGET_AVX2" 7863 "ix86_fixup_binary_operands_no_copy (PLUS, V16HImode, operands);") 7864 7865(define_expand "sse2_uavgv8hi3" 7866 [(set (match_operand:V8HI 0 "register_operand" "") 7867 (truncate:V8HI 7868 (lshiftrt:V8SI 7869 (plus:V8SI 7870 (plus:V8SI 7871 (zero_extend:V8SI 7872 (match_operand:V8HI 1 "nonimmediate_operand" "")) 7873 (zero_extend:V8SI 7874 (match_operand:V8HI 2 "nonimmediate_operand" ""))) 7875 (const_vector:V8HI [(const_int 1) (const_int 1) 7876 (const_int 1) (const_int 1) 7877 (const_int 1) (const_int 1) 7878 (const_int 1) (const_int 1)])) 7879 (const_int 1))))] 7880 "TARGET_SSE2" 7881 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);") 7882 7883(define_insn "*avx2_uavgv16hi3" 7884 [(set (match_operand:V16HI 0 "register_operand" "=x") 7885 (truncate:V16HI 7886 (lshiftrt:V16SI 7887 (plus:V16SI 7888 (plus:V16SI 7889 (zero_extend:V16SI 7890 (match_operand:V16HI 1 "nonimmediate_operand" "%x")) 7891 (zero_extend:V16SI 7892 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))) 7893 (const_vector:V16HI [(const_int 1) (const_int 1) 7894 (const_int 1) (const_int 1) 7895 (const_int 1) (const_int 1) 7896 (const_int 1) (const_int 1) 7897 (const_int 1) (const_int 1) 7898 (const_int 1) (const_int 1) 7899 (const_int 1) (const_int 1) 7900 (const_int 1) (const_int 1)])) 7901 (const_int 1))))] 7902 "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V16HImode, operands)" 7903 "vpavgw\t{%2, %1, %0|%0, %1, %2}" 7904 [(set_attr "type" "sseiadd") 7905 (set_attr "prefix" "vex") 7906 (set_attr "mode" "OI")]) 7907 7908(define_insn "*sse2_uavgv8hi3" 7909 [(set (match_operand:V8HI 0 "register_operand" "=x,x") 7910 (truncate:V8HI 7911 (lshiftrt:V8SI 7912 (plus:V8SI 7913 (plus:V8SI 7914 (zero_extend:V8SI 7915 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")) 7916 (zero_extend:V8SI 7917 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))) 7918 (const_vector:V8HI [(const_int 1) (const_int 1) 7919 (const_int 1) (const_int 1) 7920 (const_int 1) (const_int 1) 7921 (const_int 1) (const_int 1)])) 7922 (const_int 1))))] 7923 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)" 7924 "@ 7925 pavgw\t{%2, %0|%0, %2} 7926 vpavgw\t{%2, %1, %0|%0, %1, %2}" 7927 [(set_attr "isa" "noavx,avx") 7928 (set_attr "type" "sseiadd") 7929 (set_attr "prefix_data16" "1,*") 7930 (set_attr "prefix" "orig,vex") 7931 (set_attr "mode" "TI")]) 7932 7933;; The correct representation for this is absolutely enormous, and 7934;; surely not generally useful. 7935(define_insn "<sse2_avx2>_psadbw" 7936 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,x") 7937 (unspec:VI8_AVX2 [(match_operand:<ssebytemode> 1 "register_operand" "0,x") 7938 (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,xm")] 7939 UNSPEC_PSADBW))] 7940 "TARGET_SSE2" 7941 "@ 7942 psadbw\t{%2, %0|%0, %2} 7943 vpsadbw\t{%2, %1, %0|%0, %1, %2}" 7944 [(set_attr "isa" "noavx,avx") 7945 (set_attr "type" "sseiadd") 7946 (set_attr "atom_unit" "simul") 7947 (set_attr "prefix_data16" "1,*") 7948 (set_attr "prefix" "orig,vex") 7949 (set_attr "mode" "<sseinsnmode>")]) 7950 7951(define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>" 7952 [(set (match_operand:SI 0 "register_operand" "=r") 7953 (unspec:SI 7954 [(match_operand:VF 1 "register_operand" "x")] 7955 UNSPEC_MOVMSK))] 7956 "TARGET_SSE" 7957 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}" 7958 [(set_attr "type" "ssemov") 7959 (set_attr "prefix" "maybe_vex") 7960 (set_attr "mode" "<MODE>")]) 7961 7962(define_insn "avx2_pmovmskb" 7963 [(set (match_operand:SI 0 "register_operand" "=r") 7964 (unspec:SI [(match_operand:V32QI 1 "register_operand" "x")] 7965 UNSPEC_MOVMSK))] 7966 "TARGET_AVX2" 7967 "vpmovmskb\t{%1, %0|%0, %1}" 7968 [(set_attr "type" "ssemov") 7969 (set_attr "prefix" "vex") 7970 (set_attr "mode" "DI")]) 7971 7972(define_insn "sse2_pmovmskb" 7973 [(set (match_operand:SI 0 "register_operand" "=r") 7974 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")] 7975 UNSPEC_MOVMSK))] 7976 "TARGET_SSE2" 7977 "%vpmovmskb\t{%1, %0|%0, %1}" 7978 [(set_attr "type" "ssemov") 7979 (set_attr "prefix_data16" "1") 7980 (set_attr "prefix" "maybe_vex") 7981 (set_attr "mode" "SI")]) 7982 7983(define_expand "sse2_maskmovdqu" 7984 [(set (match_operand:V16QI 0 "memory_operand" "") 7985 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "") 7986 (match_operand:V16QI 2 "register_operand" "") 7987 (match_dup 0)] 7988 UNSPEC_MASKMOV))] 7989 "TARGET_SSE2") 7990 7991(define_insn "*sse2_maskmovdqu" 7992 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D")) 7993 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") 7994 (match_operand:V16QI 2 "register_operand" "x") 7995 (mem:V16QI (match_dup 0))] 7996 UNSPEC_MASKMOV))] 7997 "TARGET_SSE2" 7998 "%vmaskmovdqu\t{%2, %1|%1, %2}" 7999 [(set_attr "type" "ssemov") 8000 (set_attr "prefix_data16" "1") 8001 ;; The implicit %rdi operand confuses default length_vex computation. 8002 (set (attr "length_vex") 8003 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))"))) 8004 (set_attr "prefix" "maybe_vex") 8005 (set_attr "mode" "TI")]) 8006 8007(define_insn "sse_ldmxcsr" 8008 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")] 8009 UNSPECV_LDMXCSR)] 8010 "TARGET_SSE" 8011 "%vldmxcsr\t%0" 8012 [(set_attr "type" "sse") 8013 (set_attr "atom_sse_attr" "mxcsr") 8014 (set_attr "prefix" "maybe_vex") 8015 (set_attr "memory" "load")]) 8016 8017(define_insn "sse_stmxcsr" 8018 [(set (match_operand:SI 0 "memory_operand" "=m") 8019 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))] 8020 "TARGET_SSE" 8021 "%vstmxcsr\t%0" 8022 [(set_attr "type" "sse") 8023 (set_attr "atom_sse_attr" "mxcsr") 8024 (set_attr "prefix" "maybe_vex") 8025 (set_attr "memory" "store")]) 8026 8027(define_insn "sse2_clflush" 8028 [(unspec_volatile [(match_operand 0 "address_operand" "p")] 8029 UNSPECV_CLFLUSH)] 8030 "TARGET_SSE2" 8031 "clflush\t%a0" 8032 [(set_attr "type" "sse") 8033 (set_attr "atom_sse_attr" "fence") 8034 (set_attr "memory" "unknown")]) 8035 8036 8037(define_insn "sse3_mwait" 8038 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a") 8039 (match_operand:SI 1 "register_operand" "c")] 8040 UNSPECV_MWAIT)] 8041 "TARGET_SSE3" 8042;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used. 8043;; Since 32bit register operands are implicitly zero extended to 64bit, 8044;; we only need to set up 32bit registers. 8045 "mwait" 8046 [(set_attr "length" "3")]) 8047 8048(define_insn "sse3_monitor" 8049 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a") 8050 (match_operand:SI 1 "register_operand" "c") 8051 (match_operand:SI 2 "register_operand" "d")] 8052 UNSPECV_MONITOR)] 8053 "TARGET_SSE3 && !TARGET_64BIT" 8054 "monitor\t%0, %1, %2" 8055 [(set_attr "length" "3")]) 8056 8057(define_insn "sse3_monitor64" 8058 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a") 8059 (match_operand:SI 1 "register_operand" "c") 8060 (match_operand:SI 2 "register_operand" "d")] 8061 UNSPECV_MONITOR)] 8062 "TARGET_SSE3 && TARGET_64BIT" 8063;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in 8064;; RCX and RDX are used. Since 32bit register operands are implicitly 8065;; zero extended to 64bit, we only need to set up 32bit registers. 8066 "monitor" 8067 [(set_attr "length" "3")]) 8068 8069;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 8070;; 8071;; SSSE3 instructions 8072;; 8073;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 8074 8075(define_insn "avx2_phaddwv16hi3" 8076 [(set (match_operand:V16HI 0 "register_operand" "=x") 8077 (vec_concat:V16HI 8078 (vec_concat:V8HI 8079 (vec_concat:V4HI 8080 (vec_concat:V2HI 8081 (plus:HI 8082 (vec_select:HI 8083 (match_operand:V16HI 1 "register_operand" "x") 8084 (parallel [(const_int 0)])) 8085 (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) 8086 (plus:HI 8087 (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) 8088 (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) 8089 (vec_concat:V2HI 8090 (plus:HI 8091 (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) 8092 (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) 8093 (plus:HI 8094 (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) 8095 (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) 8096 (vec_concat:V4HI 8097 (vec_concat:V2HI 8098 (plus:HI 8099 (vec_select:HI (match_dup 1) (parallel [(const_int 8)])) 8100 (vec_select:HI (match_dup 1) (parallel [(const_int 9)]))) 8101 (plus:HI 8102 (vec_select:HI (match_dup 1) (parallel [(const_int 10)])) 8103 (vec_select:HI (match_dup 1) (parallel [(const_int 11)])))) 8104 (vec_concat:V2HI 8105 (plus:HI 8106 (vec_select:HI (match_dup 1) (parallel [(const_int 12)])) 8107 (vec_select:HI (match_dup 1) (parallel [(const_int 13)]))) 8108 (plus:HI 8109 (vec_select:HI (match_dup 1) (parallel [(const_int 14)])) 8110 (vec_select:HI (match_dup 1) (parallel [(const_int 15)])))))) 8111 (vec_concat:V8HI 8112 (vec_concat:V4HI 8113 (vec_concat:V2HI 8114 (plus:HI 8115 (vec_select:HI 8116 (match_operand:V16HI 2 "nonimmediate_operand" "xm") 8117 (parallel [(const_int 0)])) 8118 (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) 8119 (plus:HI 8120 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) 8121 (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) 8122 (vec_concat:V2HI 8123 (plus:HI 8124 (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) 8125 (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) 8126 (plus:HI 8127 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) 8128 (vec_select:HI (match_dup 2) (parallel [(const_int 7)]))))) 8129 (vec_concat:V4HI 8130 (vec_concat:V2HI 8131 (plus:HI 8132 (vec_select:HI (match_dup 2) (parallel [(const_int 8)])) 8133 (vec_select:HI (match_dup 2) (parallel [(const_int 9)]))) 8134 (plus:HI 8135 (vec_select:HI (match_dup 2) (parallel [(const_int 10)])) 8136 (vec_select:HI (match_dup 2) (parallel [(const_int 11)])))) 8137 (vec_concat:V2HI 8138 (plus:HI 8139 (vec_select:HI (match_dup 2) (parallel [(const_int 12)])) 8140 (vec_select:HI (match_dup 2) (parallel [(const_int 13)]))) 8141 (plus:HI 8142 (vec_select:HI (match_dup 2) (parallel [(const_int 14)])) 8143 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))] 8144 "TARGET_AVX2" 8145 "vphaddw\t{%2, %1, %0|%0, %1, %2}" 8146 [(set_attr "type" "sseiadd") 8147 (set_attr "prefix_extra" "1") 8148 (set_attr "prefix" "vex") 8149 (set_attr "mode" "OI")]) 8150 8151(define_insn "ssse3_phaddwv8hi3" 8152 [(set (match_operand:V8HI 0 "register_operand" "=x,x") 8153 (vec_concat:V8HI 8154 (vec_concat:V4HI 8155 (vec_concat:V2HI 8156 (plus:HI 8157 (vec_select:HI 8158 (match_operand:V8HI 1 "register_operand" "0,x") 8159 (parallel [(const_int 0)])) 8160 (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) 8161 (plus:HI 8162 (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) 8163 (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) 8164 (vec_concat:V2HI 8165 (plus:HI 8166 (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) 8167 (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) 8168 (plus:HI 8169 (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) 8170 (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) 8171 (vec_concat:V4HI 8172 (vec_concat:V2HI 8173 (plus:HI 8174 (vec_select:HI 8175 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm") 8176 (parallel [(const_int 0)])) 8177 (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) 8178 (plus:HI 8179 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) 8180 (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) 8181 (vec_concat:V2HI 8182 (plus:HI 8183 (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) 8184 (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) 8185 (plus:HI 8186 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) 8187 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] 8188 "TARGET_SSSE3" 8189 "@ 8190 phaddw\t{%2, %0|%0, %2} 8191 vphaddw\t{%2, %1, %0|%0, %1, %2}" 8192 [(set_attr "isa" "noavx,avx") 8193 (set_attr "type" "sseiadd") 8194 (set_attr "atom_unit" "complex") 8195 (set_attr "prefix_data16" "1,*") 8196 (set_attr "prefix_extra" "1") 8197 (set_attr "prefix" "orig,vex") 8198 (set_attr "mode" "TI")]) 8199 8200(define_insn "ssse3_phaddwv4hi3" 8201 [(set (match_operand:V4HI 0 "register_operand" "=y") 8202 (vec_concat:V4HI 8203 (vec_concat:V2HI 8204 (plus:HI 8205 (vec_select:HI 8206 (match_operand:V4HI 1 "register_operand" "0") 8207 (parallel [(const_int 0)])) 8208 (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) 8209 (plus:HI 8210 (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) 8211 (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) 8212 (vec_concat:V2HI 8213 (plus:HI 8214 (vec_select:HI 8215 (match_operand:V4HI 2 "nonimmediate_operand" "ym") 8216 (parallel [(const_int 0)])) 8217 (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) 8218 (plus:HI 8219 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) 8220 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))] 8221 "TARGET_SSSE3" 8222 "phaddw\t{%2, %0|%0, %2}" 8223 [(set_attr "type" "sseiadd") 8224 (set_attr "atom_unit" "complex") 8225 (set_attr "prefix_extra" "1") 8226 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) 8227 (set_attr "mode" "DI")]) 8228 8229(define_insn "avx2_phadddv8si3" 8230 [(set (match_operand:V8SI 0 "register_operand" "=x") 8231 (vec_concat:V8SI 8232 (vec_concat:V4SI 8233 (vec_concat:V2SI 8234 (plus:SI 8235 (vec_select:SI 8236 (match_operand:V8SI 1 "register_operand" "x") 8237 (parallel [(const_int 0)])) 8238 (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) 8239 (plus:SI 8240 (vec_select:SI (match_dup 1) (parallel [(const_int 2)])) 8241 (vec_select:SI (match_dup 1) (parallel [(const_int 3)])))) 8242 (vec_concat:V2SI 8243 (plus:SI 8244 (vec_select:SI (match_dup 1) (parallel [(const_int 4)])) 8245 (vec_select:SI (match_dup 1) (parallel [(const_int 5)]))) 8246 (plus:SI 8247 (vec_select:SI (match_dup 1) (parallel [(const_int 6)])) 8248 (vec_select:SI (match_dup 1) (parallel [(const_int 7)]))))) 8249 (vec_concat:V4SI 8250 (vec_concat:V2SI 8251 (plus:SI 8252 (vec_select:SI 8253 (match_operand:V8SI 2 "nonimmediate_operand" "xm") 8254 (parallel [(const_int 0)])) 8255 (vec_select:SI (match_dup 2) (parallel [(const_int 1)]))) 8256 (plus:SI 8257 (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) 8258 (vec_select:SI (match_dup 2) (parallel [(const_int 3)])))) 8259 (vec_concat:V2SI 8260 (plus:SI 8261 (vec_select:SI (match_dup 2) (parallel [(const_int 4)])) 8262 (vec_select:SI (match_dup 2) (parallel [(const_int 5)]))) 8263 (plus:SI 8264 (vec_select:SI (match_dup 2) (parallel [(const_int 6)])) 8265 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))] 8266 "TARGET_AVX2" 8267 "vphaddd\t{%2, %1, %0|%0, %1, %2}" 8268 [(set_attr "type" "sseiadd") 8269 (set_attr "prefix_extra" "1") 8270 (set_attr "prefix" "vex") 8271 (set_attr "mode" "OI")]) 8272 8273(define_insn "ssse3_phadddv4si3" 8274 [(set (match_operand:V4SI 0 "register_operand" "=x,x") 8275 (vec_concat:V4SI 8276 (vec_concat:V2SI 8277 (plus:SI 8278 (vec_select:SI 8279 (match_operand:V4SI 1 "register_operand" "0,x") 8280 (parallel [(const_int 0)])) 8281 (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) 8282 (plus:SI 8283 (vec_select:SI (match_dup 1) (parallel [(const_int 2)])) 8284 (vec_select:SI (match_dup 1) (parallel [(const_int 3)])))) 8285 (vec_concat:V2SI 8286 (plus:SI 8287 (vec_select:SI 8288 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm") 8289 (parallel [(const_int 0)])) 8290 (vec_select:SI (match_dup 2) (parallel [(const_int 1)]))) 8291 (plus:SI 8292 (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) 8293 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))] 8294 "TARGET_SSSE3" 8295 "@ 8296 phaddd\t{%2, %0|%0, %2} 8297 vphaddd\t{%2, %1, %0|%0, %1, %2}" 8298 [(set_attr "isa" "noavx,avx") 8299 (set_attr "type" "sseiadd") 8300 (set_attr "atom_unit" "complex") 8301 (set_attr "prefix_data16" "1,*") 8302 (set_attr "prefix_extra" "1") 8303 (set_attr "prefix" "orig,vex") 8304 (set_attr "mode" "TI")]) 8305 8306(define_insn "ssse3_phadddv2si3" 8307 [(set (match_operand:V2SI 0 "register_operand" "=y") 8308 (vec_concat:V2SI 8309 (plus:SI 8310 (vec_select:SI 8311 (match_operand:V2SI 1 "register_operand" "0") 8312 (parallel [(const_int 0)])) 8313 (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) 8314 (plus:SI 8315 (vec_select:SI 8316 (match_operand:V2SI 2 "nonimmediate_operand" "ym") 8317 (parallel [(const_int 0)])) 8318 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))] 8319 "TARGET_SSSE3" 8320 "phaddd\t{%2, %0|%0, %2}" 8321 [(set_attr "type" "sseiadd") 8322 (set_attr "atom_unit" "complex") 8323 (set_attr "prefix_extra" "1") 8324 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) 8325 (set_attr "mode" "DI")]) 8326 8327(define_insn "avx2_phaddswv16hi3" 8328 [(set (match_operand:V16HI 0 "register_operand" "=x") 8329 (vec_concat:V16HI 8330 (vec_concat:V8HI 8331 (vec_concat:V4HI 8332 (vec_concat:V2HI 8333 (ss_plus:HI 8334 (vec_select:HI 8335 (match_operand:V16HI 1 "register_operand" "x") 8336 (parallel [(const_int 0)])) 8337 (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) 8338 (ss_plus:HI 8339 (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) 8340 (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) 8341 (vec_concat:V2HI 8342 (ss_plus:HI 8343 (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) 8344 (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) 8345 (ss_plus:HI 8346 (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) 8347 (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) 8348 (vec_concat:V4HI 8349 (vec_concat:V2HI 8350 (ss_plus:HI 8351 (vec_select:HI (match_dup 1) (parallel [(const_int 8)])) 8352 (vec_select:HI (match_dup 1) (parallel [(const_int 9)]))) 8353 (ss_plus:HI 8354 (vec_select:HI (match_dup 1) (parallel [(const_int 10)])) 8355 (vec_select:HI (match_dup 1) (parallel [(const_int 11)])))) 8356 (vec_concat:V2HI 8357 (ss_plus:HI 8358 (vec_select:HI (match_dup 1) (parallel [(const_int 12)])) 8359 (vec_select:HI (match_dup 1) (parallel [(const_int 13)]))) 8360 (ss_plus:HI 8361 (vec_select:HI (match_dup 1) (parallel [(const_int 14)])) 8362 (vec_select:HI (match_dup 1) (parallel [(const_int 15)])))))) 8363 (vec_concat:V8HI 8364 (vec_concat:V4HI 8365 (vec_concat:V2HI 8366 (ss_plus:HI 8367 (vec_select:HI 8368 (match_operand:V16HI 2 "nonimmediate_operand" "xm") 8369 (parallel [(const_int 0)])) 8370 (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) 8371 (ss_plus:HI 8372 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) 8373 (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) 8374 (vec_concat:V2HI 8375 (ss_plus:HI 8376 (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) 8377 (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) 8378 (ss_plus:HI 8379 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) 8380 (vec_select:HI (match_dup 2) (parallel [(const_int 7)]))))) 8381 (vec_concat:V4HI 8382 (vec_concat:V2HI 8383 (ss_plus:HI 8384 (vec_select:HI (match_dup 2) (parallel [(const_int 8)])) 8385 (vec_select:HI (match_dup 2) (parallel [(const_int 9)]))) 8386 (ss_plus:HI 8387 (vec_select:HI (match_dup 2) (parallel [(const_int 10)])) 8388 (vec_select:HI (match_dup 2) (parallel [(const_int 11)])))) 8389 (vec_concat:V2HI 8390 (ss_plus:HI 8391 (vec_select:HI (match_dup 2) (parallel [(const_int 12)])) 8392 (vec_select:HI (match_dup 2) (parallel [(const_int 13)]))) 8393 (ss_plus:HI 8394 (vec_select:HI (match_dup 2) (parallel [(const_int 14)])) 8395 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))] 8396 "TARGET_AVX2" 8397 "vphaddsw\t{%2, %1, %0|%0, %1, %2}" 8398 [(set_attr "type" "sseiadd") 8399 (set_attr "prefix_extra" "1") 8400 (set_attr "prefix" "vex") 8401 (set_attr "mode" "OI")]) 8402 8403(define_insn "ssse3_phaddswv8hi3" 8404 [(set (match_operand:V8HI 0 "register_operand" "=x,x") 8405 (vec_concat:V8HI 8406 (vec_concat:V4HI 8407 (vec_concat:V2HI 8408 (ss_plus:HI 8409 (vec_select:HI 8410 (match_operand:V8HI 1 "register_operand" "0,x") 8411 (parallel [(const_int 0)])) 8412 (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) 8413 (ss_plus:HI 8414 (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) 8415 (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) 8416 (vec_concat:V2HI 8417 (ss_plus:HI 8418 (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) 8419 (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) 8420 (ss_plus:HI 8421 (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) 8422 (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) 8423 (vec_concat:V4HI 8424 (vec_concat:V2HI 8425 (ss_plus:HI 8426 (vec_select:HI 8427 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm") 8428 (parallel [(const_int 0)])) 8429 (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) 8430 (ss_plus:HI 8431 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) 8432 (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) 8433 (vec_concat:V2HI 8434 (ss_plus:HI 8435 (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) 8436 (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) 8437 (ss_plus:HI 8438 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) 8439 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] 8440 "TARGET_SSSE3" 8441 "@ 8442 phaddsw\t{%2, %0|%0, %2} 8443 vphaddsw\t{%2, %1, %0|%0, %1, %2}" 8444 [(set_attr "isa" "noavx,avx") 8445 (set_attr "type" "sseiadd") 8446 (set_attr "atom_unit" "complex") 8447 (set_attr "prefix_data16" "1,*") 8448 (set_attr "prefix_extra" "1") 8449 (set_attr "prefix" "orig,vex") 8450 (set_attr "mode" "TI")]) 8451 8452(define_insn "ssse3_phaddswv4hi3" 8453 [(set (match_operand:V4HI 0 "register_operand" "=y") 8454 (vec_concat:V4HI 8455 (vec_concat:V2HI 8456 (ss_plus:HI 8457 (vec_select:HI 8458 (match_operand:V4HI 1 "register_operand" "0") 8459 (parallel [(const_int 0)])) 8460 (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) 8461 (ss_plus:HI 8462 (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) 8463 (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) 8464 (vec_concat:V2HI 8465 (ss_plus:HI 8466 (vec_select:HI 8467 (match_operand:V4HI 2 "nonimmediate_operand" "ym") 8468 (parallel [(const_int 0)])) 8469 (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) 8470 (ss_plus:HI 8471 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) 8472 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))] 8473 "TARGET_SSSE3" 8474 "phaddsw\t{%2, %0|%0, %2}" 8475 [(set_attr "type" "sseiadd") 8476 (set_attr "atom_unit" "complex") 8477 (set_attr "prefix_extra" "1") 8478 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) 8479 (set_attr "mode" "DI")]) 8480 8481(define_insn "avx2_phsubwv16hi3" 8482 [(set (match_operand:V16HI 0 "register_operand" "=x") 8483 (vec_concat:V16HI 8484 (vec_concat:V8HI 8485 (vec_concat:V4HI 8486 (vec_concat:V2HI 8487 (minus:HI 8488 (vec_select:HI 8489 (match_operand:V16HI 1 "register_operand" "x") 8490 (parallel [(const_int 0)])) 8491 (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) 8492 (minus:HI 8493 (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) 8494 (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) 8495 (vec_concat:V2HI 8496 (minus:HI 8497 (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) 8498 (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) 8499 (minus:HI 8500 (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) 8501 (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) 8502 (vec_concat:V4HI 8503 (vec_concat:V2HI 8504 (minus:HI 8505 (vec_select:HI (match_dup 1) (parallel [(const_int 8)])) 8506 (vec_select:HI (match_dup 1) (parallel [(const_int 9)]))) 8507 (minus:HI 8508 (vec_select:HI (match_dup 1) (parallel [(const_int 10)])) 8509 (vec_select:HI (match_dup 1) (parallel [(const_int 11)])))) 8510 (vec_concat:V2HI 8511 (minus:HI 8512 (vec_select:HI (match_dup 1) (parallel [(const_int 12)])) 8513 (vec_select:HI (match_dup 1) (parallel [(const_int 13)]))) 8514 (minus:HI 8515 (vec_select:HI (match_dup 1) (parallel [(const_int 14)])) 8516 (vec_select:HI (match_dup 1) (parallel [(const_int 15)])))))) 8517 (vec_concat:V8HI 8518 (vec_concat:V4HI 8519 (vec_concat:V2HI 8520 (minus:HI 8521 (vec_select:HI 8522 (match_operand:V16HI 2 "nonimmediate_operand" "xm") 8523 (parallel [(const_int 0)])) 8524 (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) 8525 (minus:HI 8526 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) 8527 (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) 8528 (vec_concat:V2HI 8529 (minus:HI 8530 (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) 8531 (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) 8532 (minus:HI 8533 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) 8534 (vec_select:HI (match_dup 2) (parallel [(const_int 7)]))))) 8535 (vec_concat:V4HI 8536 (vec_concat:V2HI 8537 (minus:HI 8538 (vec_select:HI (match_dup 2) (parallel [(const_int 8)])) 8539 (vec_select:HI (match_dup 2) (parallel [(const_int 9)]))) 8540 (minus:HI 8541 (vec_select:HI (match_dup 2) (parallel [(const_int 10)])) 8542 (vec_select:HI (match_dup 2) (parallel [(const_int 11)])))) 8543 (vec_concat:V2HI 8544 (minus:HI 8545 (vec_select:HI (match_dup 2) (parallel [(const_int 12)])) 8546 (vec_select:HI (match_dup 2) (parallel [(const_int 13)]))) 8547 (minus:HI 8548 (vec_select:HI (match_dup 2) (parallel [(const_int 14)])) 8549 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))] 8550 "TARGET_AVX2" 8551 "vphsubw\t{%2, %1, %0|%0, %1, %2}" 8552 [(set_attr "type" "sseiadd") 8553 (set_attr "prefix_extra" "1") 8554 (set_attr "prefix" "vex") 8555 (set_attr "mode" "OI")]) 8556 8557(define_insn "ssse3_phsubwv8hi3" 8558 [(set (match_operand:V8HI 0 "register_operand" "=x,x") 8559 (vec_concat:V8HI 8560 (vec_concat:V4HI 8561 (vec_concat:V2HI 8562 (minus:HI 8563 (vec_select:HI 8564 (match_operand:V8HI 1 "register_operand" "0,x") 8565 (parallel [(const_int 0)])) 8566 (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) 8567 (minus:HI 8568 (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) 8569 (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) 8570 (vec_concat:V2HI 8571 (minus:HI 8572 (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) 8573 (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) 8574 (minus:HI 8575 (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) 8576 (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) 8577 (vec_concat:V4HI 8578 (vec_concat:V2HI 8579 (minus:HI 8580 (vec_select:HI 8581 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm") 8582 (parallel [(const_int 0)])) 8583 (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) 8584 (minus:HI 8585 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) 8586 (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) 8587 (vec_concat:V2HI 8588 (minus:HI 8589 (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) 8590 (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) 8591 (minus:HI 8592 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) 8593 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] 8594 "TARGET_SSSE3" 8595 "@ 8596 phsubw\t{%2, %0|%0, %2} 8597 vphsubw\t{%2, %1, %0|%0, %1, %2}" 8598 [(set_attr "isa" "noavx,avx") 8599 (set_attr "type" "sseiadd") 8600 (set_attr "atom_unit" "complex") 8601 (set_attr "prefix_data16" "1,*") 8602 (set_attr "prefix_extra" "1") 8603 (set_attr "prefix" "orig,vex") 8604 (set_attr "mode" "TI")]) 8605 8606(define_insn "ssse3_phsubwv4hi3" 8607 [(set (match_operand:V4HI 0 "register_operand" "=y") 8608 (vec_concat:V4HI 8609 (vec_concat:V2HI 8610 (minus:HI 8611 (vec_select:HI 8612 (match_operand:V4HI 1 "register_operand" "0") 8613 (parallel [(const_int 0)])) 8614 (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) 8615 (minus:HI 8616 (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) 8617 (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) 8618 (vec_concat:V2HI 8619 (minus:HI 8620 (vec_select:HI 8621 (match_operand:V4HI 2 "nonimmediate_operand" "ym") 8622 (parallel [(const_int 0)])) 8623 (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) 8624 (minus:HI 8625 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) 8626 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))] 8627 "TARGET_SSSE3" 8628 "phsubw\t{%2, %0|%0, %2}" 8629 [(set_attr "type" "sseiadd") 8630 (set_attr "atom_unit" "complex") 8631 (set_attr "prefix_extra" "1") 8632 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) 8633 (set_attr "mode" "DI")]) 8634 8635(define_insn "avx2_phsubdv8si3" 8636 [(set (match_operand:V8SI 0 "register_operand" "=x") 8637 (vec_concat:V8SI 8638 (vec_concat:V4SI 8639 (vec_concat:V2SI 8640 (minus:SI 8641 (vec_select:SI 8642 (match_operand:V8SI 1 "register_operand" "x") 8643 (parallel [(const_int 0)])) 8644 (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) 8645 (minus:SI 8646 (vec_select:SI (match_dup 1) (parallel [(const_int 2)])) 8647 (vec_select:SI (match_dup 1) (parallel [(const_int 3)])))) 8648 (vec_concat:V2SI 8649 (minus:SI 8650 (vec_select:SI (match_dup 1) (parallel [(const_int 4)])) 8651 (vec_select:SI (match_dup 1) (parallel [(const_int 5)]))) 8652 (minus:SI 8653 (vec_select:SI (match_dup 1) (parallel [(const_int 6)])) 8654 (vec_select:SI (match_dup 1) (parallel [(const_int 7)]))))) 8655 (vec_concat:V4SI 8656 (vec_concat:V2SI 8657 (minus:SI 8658 (vec_select:SI 8659 (match_operand:V8SI 2 "nonimmediate_operand" "xm") 8660 (parallel [(const_int 0)])) 8661 (vec_select:SI (match_dup 2) (parallel [(const_int 1)]))) 8662 (minus:SI 8663 (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) 8664 (vec_select:SI (match_dup 2) (parallel [(const_int 3)])))) 8665 (vec_concat:V2SI 8666 (minus:SI 8667 (vec_select:SI (match_dup 2) (parallel [(const_int 4)])) 8668 (vec_select:SI (match_dup 2) (parallel [(const_int 5)]))) 8669 (minus:SI 8670 (vec_select:SI (match_dup 2) (parallel [(const_int 6)])) 8671 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))] 8672 "TARGET_AVX2" 8673 "vphsubd\t{%2, %1, %0|%0, %1, %2}" 8674 [(set_attr "type" "sseiadd") 8675 (set_attr "prefix_extra" "1") 8676 (set_attr "prefix" "vex") 8677 (set_attr "mode" "OI")]) 8678 8679(define_insn "ssse3_phsubdv4si3" 8680 [(set (match_operand:V4SI 0 "register_operand" "=x,x") 8681 (vec_concat:V4SI 8682 (vec_concat:V2SI 8683 (minus:SI 8684 (vec_select:SI 8685 (match_operand:V4SI 1 "register_operand" "0,x") 8686 (parallel [(const_int 0)])) 8687 (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) 8688 (minus:SI 8689 (vec_select:SI (match_dup 1) (parallel [(const_int 2)])) 8690 (vec_select:SI (match_dup 1) (parallel [(const_int 3)])))) 8691 (vec_concat:V2SI 8692 (minus:SI 8693 (vec_select:SI 8694 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm") 8695 (parallel [(const_int 0)])) 8696 (vec_select:SI (match_dup 2) (parallel [(const_int 1)]))) 8697 (minus:SI 8698 (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) 8699 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))] 8700 "TARGET_SSSE3" 8701 "@ 8702 phsubd\t{%2, %0|%0, %2} 8703 vphsubd\t{%2, %1, %0|%0, %1, %2}" 8704 8705 [(set_attr "isa" "noavx,avx") 8706 (set_attr "type" "sseiadd") 8707 (set_attr "atom_unit" "complex") 8708 (set_attr "prefix_data16" "1,*") 8709 (set_attr "prefix_extra" "1") 8710 (set_attr "prefix" "orig,vex") 8711 (set_attr "mode" "TI")]) 8712 8713(define_insn "ssse3_phsubdv2si3" 8714 [(set (match_operand:V2SI 0 "register_operand" "=y") 8715 (vec_concat:V2SI 8716 (minus:SI 8717 (vec_select:SI 8718 (match_operand:V2SI 1 "register_operand" "0") 8719 (parallel [(const_int 0)])) 8720 (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) 8721 (minus:SI 8722 (vec_select:SI 8723 (match_operand:V2SI 2 "nonimmediate_operand" "ym") 8724 (parallel [(const_int 0)])) 8725 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))] 8726 "TARGET_SSSE3" 8727 "phsubd\t{%2, %0|%0, %2}" 8728 [(set_attr "type" "sseiadd") 8729 (set_attr "atom_unit" "complex") 8730 (set_attr "prefix_extra" "1") 8731 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) 8732 (set_attr "mode" "DI")]) 8733 8734(define_insn "avx2_phsubswv16hi3" 8735 [(set (match_operand:V16HI 0 "register_operand" "=x") 8736 (vec_concat:V16HI 8737 (vec_concat:V8HI 8738 (vec_concat:V4HI 8739 (vec_concat:V2HI 8740 (ss_minus:HI 8741 (vec_select:HI 8742 (match_operand:V16HI 1 "register_operand" "x") 8743 (parallel [(const_int 0)])) 8744 (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) 8745 (ss_minus:HI 8746 (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) 8747 (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) 8748 (vec_concat:V2HI 8749 (ss_minus:HI 8750 (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) 8751 (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) 8752 (ss_minus:HI 8753 (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) 8754 (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) 8755 (vec_concat:V4HI 8756 (vec_concat:V2HI 8757 (ss_minus:HI 8758 (vec_select:HI (match_dup 1) (parallel [(const_int 8)])) 8759 (vec_select:HI (match_dup 1) (parallel [(const_int 9)]))) 8760 (ss_minus:HI 8761 (vec_select:HI (match_dup 1) (parallel [(const_int 10)])) 8762 (vec_select:HI (match_dup 1) (parallel [(const_int 11)])))) 8763 (vec_concat:V2HI 8764 (ss_minus:HI 8765 (vec_select:HI (match_dup 1) (parallel [(const_int 12)])) 8766 (vec_select:HI (match_dup 1) (parallel [(const_int 13)]))) 8767 (ss_minus:HI 8768 (vec_select:HI (match_dup 1) (parallel [(const_int 14)])) 8769 (vec_select:HI (match_dup 1) (parallel [(const_int 15)])))))) 8770 (vec_concat:V8HI 8771 (vec_concat:V4HI 8772 (vec_concat:V2HI 8773 (ss_minus:HI 8774 (vec_select:HI 8775 (match_operand:V16HI 2 "nonimmediate_operand" "xm") 8776 (parallel [(const_int 0)])) 8777 (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) 8778 (ss_minus:HI 8779 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) 8780 (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) 8781 (vec_concat:V2HI 8782 (ss_minus:HI 8783 (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) 8784 (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) 8785 (ss_minus:HI 8786 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) 8787 (vec_select:HI (match_dup 2) (parallel [(const_int 7)]))))) 8788 (vec_concat:V4HI 8789 (vec_concat:V2HI 8790 (ss_minus:HI 8791 (vec_select:HI (match_dup 2) (parallel [(const_int 8)])) 8792 (vec_select:HI (match_dup 2) (parallel [(const_int 9)]))) 8793 (ss_minus:HI 8794 (vec_select:HI (match_dup 2) (parallel [(const_int 10)])) 8795 (vec_select:HI (match_dup 2) (parallel [(const_int 11)])))) 8796 (vec_concat:V2HI 8797 (ss_minus:HI 8798 (vec_select:HI (match_dup 2) (parallel [(const_int 12)])) 8799 (vec_select:HI (match_dup 2) (parallel [(const_int 13)]))) 8800 (ss_minus:HI 8801 (vec_select:HI (match_dup 2) (parallel [(const_int 14)])) 8802 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))] 8803 "TARGET_AVX2" 8804 "vphsubsw\t{%2, %1, %0|%0, %1, %2}" 8805 [(set_attr "type" "sseiadd") 8806 (set_attr "prefix_extra" "1") 8807 (set_attr "prefix" "vex") 8808 (set_attr "mode" "OI")]) 8809 8810(define_insn "ssse3_phsubswv8hi3" 8811 [(set (match_operand:V8HI 0 "register_operand" "=x,x") 8812 (vec_concat:V8HI 8813 (vec_concat:V4HI 8814 (vec_concat:V2HI 8815 (ss_minus:HI 8816 (vec_select:HI 8817 (match_operand:V8HI 1 "register_operand" "0,x") 8818 (parallel [(const_int 0)])) 8819 (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) 8820 (ss_minus:HI 8821 (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) 8822 (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) 8823 (vec_concat:V2HI 8824 (ss_minus:HI 8825 (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) 8826 (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) 8827 (ss_minus:HI 8828 (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) 8829 (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) 8830 (vec_concat:V4HI 8831 (vec_concat:V2HI 8832 (ss_minus:HI 8833 (vec_select:HI 8834 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm") 8835 (parallel [(const_int 0)])) 8836 (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) 8837 (ss_minus:HI 8838 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) 8839 (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) 8840 (vec_concat:V2HI 8841 (ss_minus:HI 8842 (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) 8843 (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) 8844 (ss_minus:HI 8845 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) 8846 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] 8847 "TARGET_SSSE3" 8848 "@ 8849 phsubsw\t{%2, %0|%0, %2} 8850 vphsubsw\t{%2, %1, %0|%0, %1, %2}" 8851 [(set_attr "isa" "noavx,avx") 8852 (set_attr "type" "sseiadd") 8853 (set_attr "atom_unit" "complex") 8854 (set_attr "prefix_data16" "1,*") 8855 (set_attr "prefix_extra" "1") 8856 (set_attr "prefix" "orig,vex") 8857 (set_attr "mode" "TI")]) 8858 8859(define_insn "ssse3_phsubswv4hi3" 8860 [(set (match_operand:V4HI 0 "register_operand" "=y") 8861 (vec_concat:V4HI 8862 (vec_concat:V2HI 8863 (ss_minus:HI 8864 (vec_select:HI 8865 (match_operand:V4HI 1 "register_operand" "0") 8866 (parallel [(const_int 0)])) 8867 (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) 8868 (ss_minus:HI 8869 (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) 8870 (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) 8871 (vec_concat:V2HI 8872 (ss_minus:HI 8873 (vec_select:HI 8874 (match_operand:V4HI 2 "nonimmediate_operand" "ym") 8875 (parallel [(const_int 0)])) 8876 (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) 8877 (ss_minus:HI 8878 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) 8879 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))] 8880 "TARGET_SSSE3" 8881 "phsubsw\t{%2, %0|%0, %2}" 8882 [(set_attr "type" "sseiadd") 8883 (set_attr "atom_unit" "complex") 8884 (set_attr "prefix_extra" "1") 8885 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) 8886 (set_attr "mode" "DI")]) 8887 8888(define_insn "avx2_pmaddubsw256" 8889 [(set (match_operand:V16HI 0 "register_operand" "=x") 8890 (ss_plus:V16HI 8891 (mult:V16HI 8892 (zero_extend:V16HI 8893 (vec_select:V16QI 8894 (match_operand:V32QI 1 "register_operand" "x") 8895 (parallel [(const_int 0) 8896 (const_int 2) 8897 (const_int 4) 8898 (const_int 6) 8899 (const_int 8) 8900 (const_int 10) 8901 (const_int 12) 8902 (const_int 14) 8903 (const_int 16) 8904 (const_int 18) 8905 (const_int 20) 8906 (const_int 22) 8907 (const_int 24) 8908 (const_int 26) 8909 (const_int 28) 8910 (const_int 30)]))) 8911 (sign_extend:V16HI 8912 (vec_select:V16QI 8913 (match_operand:V32QI 2 "nonimmediate_operand" "xm") 8914 (parallel [(const_int 0) 8915 (const_int 2) 8916 (const_int 4) 8917 (const_int 6) 8918 (const_int 8) 8919 (const_int 10) 8920 (const_int 12) 8921 (const_int 14) 8922 (const_int 16) 8923 (const_int 18) 8924 (const_int 20) 8925 (const_int 22) 8926 (const_int 24) 8927 (const_int 26) 8928 (const_int 28) 8929 (const_int 30)])))) 8930 (mult:V16HI 8931 (zero_extend:V16HI 8932 (vec_select:V16QI (match_dup 1) 8933 (parallel [(const_int 1) 8934 (const_int 3) 8935 (const_int 5) 8936 (const_int 7) 8937 (const_int 9) 8938 (const_int 11) 8939 (const_int 13) 8940 (const_int 15) 8941 (const_int 17) 8942 (const_int 19) 8943 (const_int 21) 8944 (const_int 23) 8945 (const_int 25) 8946 (const_int 27) 8947 (const_int 29) 8948 (const_int 31)]))) 8949 (sign_extend:V16HI 8950 (vec_select:V16QI (match_dup 2) 8951 (parallel [(const_int 1) 8952 (const_int 3) 8953 (const_int 5) 8954 (const_int 7) 8955 (const_int 9) 8956 (const_int 11) 8957 (const_int 13) 8958 (const_int 15) 8959 (const_int 17) 8960 (const_int 19) 8961 (const_int 21) 8962 (const_int 23) 8963 (const_int 25) 8964 (const_int 27) 8965 (const_int 29) 8966 (const_int 31)]))))))] 8967 "TARGET_AVX2" 8968 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}" 8969 [(set_attr "type" "sseiadd") 8970 (set_attr "prefix_extra" "1") 8971 (set_attr "prefix" "vex") 8972 (set_attr "mode" "OI")]) 8973 8974(define_insn "ssse3_pmaddubsw128" 8975 [(set (match_operand:V8HI 0 "register_operand" "=x,x") 8976 (ss_plus:V8HI 8977 (mult:V8HI 8978 (zero_extend:V8HI 8979 (vec_select:V8QI 8980 (match_operand:V16QI 1 "register_operand" "0,x") 8981 (parallel [(const_int 0) 8982 (const_int 2) 8983 (const_int 4) 8984 (const_int 6) 8985 (const_int 8) 8986 (const_int 10) 8987 (const_int 12) 8988 (const_int 14)]))) 8989 (sign_extend:V8HI 8990 (vec_select:V8QI 8991 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm") 8992 (parallel [(const_int 0) 8993 (const_int 2) 8994 (const_int 4) 8995 (const_int 6) 8996 (const_int 8) 8997 (const_int 10) 8998 (const_int 12) 8999 (const_int 14)])))) 9000 (mult:V8HI 9001 (zero_extend:V8HI 9002 (vec_select:V8QI (match_dup 1) 9003 (parallel [(const_int 1) 9004 (const_int 3) 9005 (const_int 5) 9006 (const_int 7) 9007 (const_int 9) 9008 (const_int 11) 9009 (const_int 13) 9010 (const_int 15)]))) 9011 (sign_extend:V8HI 9012 (vec_select:V8QI (match_dup 2) 9013 (parallel [(const_int 1) 9014 (const_int 3) 9015 (const_int 5) 9016 (const_int 7) 9017 (const_int 9) 9018 (const_int 11) 9019 (const_int 13) 9020 (const_int 15)]))))))] 9021 "TARGET_SSSE3" 9022 "@ 9023 pmaddubsw\t{%2, %0|%0, %2} 9024 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}" 9025 [(set_attr "isa" "noavx,avx") 9026 (set_attr "type" "sseiadd") 9027 (set_attr "atom_unit" "simul") 9028 (set_attr "prefix_data16" "1,*") 9029 (set_attr "prefix_extra" "1") 9030 (set_attr "prefix" "orig,vex") 9031 (set_attr "mode" "TI")]) 9032 9033(define_insn "ssse3_pmaddubsw" 9034 [(set (match_operand:V4HI 0 "register_operand" "=y") 9035 (ss_plus:V4HI 9036 (mult:V4HI 9037 (zero_extend:V4HI 9038 (vec_select:V4QI 9039 (match_operand:V8QI 1 "register_operand" "0") 9040 (parallel [(const_int 0) 9041 (const_int 2) 9042 (const_int 4) 9043 (const_int 6)]))) 9044 (sign_extend:V4HI 9045 (vec_select:V4QI 9046 (match_operand:V8QI 2 "nonimmediate_operand" "ym") 9047 (parallel [(const_int 0) 9048 (const_int 2) 9049 (const_int 4) 9050 (const_int 6)])))) 9051 (mult:V4HI 9052 (zero_extend:V4HI 9053 (vec_select:V4QI (match_dup 1) 9054 (parallel [(const_int 1) 9055 (const_int 3) 9056 (const_int 5) 9057 (const_int 7)]))) 9058 (sign_extend:V4HI 9059 (vec_select:V4QI (match_dup 2) 9060 (parallel [(const_int 1) 9061 (const_int 3) 9062 (const_int 5) 9063 (const_int 7)]))))))] 9064 "TARGET_SSSE3" 9065 "pmaddubsw\t{%2, %0|%0, %2}" 9066 [(set_attr "type" "sseiadd") 9067 (set_attr "atom_unit" "simul") 9068 (set_attr "prefix_extra" "1") 9069 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) 9070 (set_attr "mode" "DI")]) 9071 9072(define_expand "avx2_umulhrswv16hi3" 9073 [(set (match_operand:V16HI 0 "register_operand" "") 9074 (truncate:V16HI 9075 (lshiftrt:V16SI 9076 (plus:V16SI 9077 (lshiftrt:V16SI 9078 (mult:V16SI 9079 (sign_extend:V16SI 9080 (match_operand:V16HI 1 "nonimmediate_operand" "")) 9081 (sign_extend:V16SI 9082 (match_operand:V16HI 2 "nonimmediate_operand" ""))) 9083 (const_int 14)) 9084 (const_vector:V16HI [(const_int 1) (const_int 1) 9085 (const_int 1) (const_int 1) 9086 (const_int 1) (const_int 1) 9087 (const_int 1) (const_int 1) 9088 (const_int 1) (const_int 1) 9089 (const_int 1) (const_int 1) 9090 (const_int 1) (const_int 1) 9091 (const_int 1) (const_int 1)])) 9092 (const_int 1))))] 9093 "TARGET_AVX2" 9094 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);") 9095 9096(define_insn "*avx2_umulhrswv16hi3" 9097 [(set (match_operand:V16HI 0 "register_operand" "=x") 9098 (truncate:V16HI 9099 (lshiftrt:V16SI 9100 (plus:V16SI 9101 (lshiftrt:V16SI 9102 (mult:V16SI 9103 (sign_extend:V16SI 9104 (match_operand:V16HI 1 "nonimmediate_operand" "%x")) 9105 (sign_extend:V16SI 9106 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))) 9107 (const_int 14)) 9108 (const_vector:V16HI [(const_int 1) (const_int 1) 9109 (const_int 1) (const_int 1) 9110 (const_int 1) (const_int 1) 9111 (const_int 1) (const_int 1) 9112 (const_int 1) (const_int 1) 9113 (const_int 1) (const_int 1) 9114 (const_int 1) (const_int 1) 9115 (const_int 1) (const_int 1)])) 9116 (const_int 1))))] 9117 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)" 9118 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}" 9119 [(set_attr "type" "sseimul") 9120 (set_attr "prefix_extra" "1") 9121 (set_attr "prefix" "vex") 9122 (set_attr "mode" "OI")]) 9123 9124(define_expand "ssse3_pmulhrswv8hi3" 9125 [(set (match_operand:V8HI 0 "register_operand" "") 9126 (truncate:V8HI 9127 (lshiftrt:V8SI 9128 (plus:V8SI 9129 (lshiftrt:V8SI 9130 (mult:V8SI 9131 (sign_extend:V8SI 9132 (match_operand:V8HI 1 "nonimmediate_operand" "")) 9133 (sign_extend:V8SI 9134 (match_operand:V8HI 2 "nonimmediate_operand" ""))) 9135 (const_int 14)) 9136 (const_vector:V8HI [(const_int 1) (const_int 1) 9137 (const_int 1) (const_int 1) 9138 (const_int 1) (const_int 1) 9139 (const_int 1) (const_int 1)])) 9140 (const_int 1))))] 9141 "TARGET_SSSE3" 9142 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);") 9143 9144(define_insn "*ssse3_pmulhrswv8hi3" 9145 [(set (match_operand:V8HI 0 "register_operand" "=x,x") 9146 (truncate:V8HI 9147 (lshiftrt:V8SI 9148 (plus:V8SI 9149 (lshiftrt:V8SI 9150 (mult:V8SI 9151 (sign_extend:V8SI 9152 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")) 9153 (sign_extend:V8SI 9154 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))) 9155 (const_int 14)) 9156 (const_vector:V8HI [(const_int 1) (const_int 1) 9157 (const_int 1) (const_int 1) 9158 (const_int 1) (const_int 1) 9159 (const_int 1) (const_int 1)])) 9160 (const_int 1))))] 9161 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)" 9162 "@ 9163 pmulhrsw\t{%2, %0|%0, %2} 9164 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}" 9165 [(set_attr "isa" "noavx,avx") 9166 (set_attr "type" "sseimul") 9167 (set_attr "prefix_data16" "1,*") 9168 (set_attr "prefix_extra" "1") 9169 (set_attr "prefix" "orig,vex") 9170 (set_attr "mode" "TI")]) 9171 9172(define_expand "ssse3_pmulhrswv4hi3" 9173 [(set (match_operand:V4HI 0 "register_operand" "") 9174 (truncate:V4HI 9175 (lshiftrt:V4SI 9176 (plus:V4SI 9177 (lshiftrt:V4SI 9178 (mult:V4SI 9179 (sign_extend:V4SI 9180 (match_operand:V4HI 1 "nonimmediate_operand" "")) 9181 (sign_extend:V4SI 9182 (match_operand:V4HI 2 "nonimmediate_operand" ""))) 9183 (const_int 14)) 9184 (const_vector:V4HI [(const_int 1) (const_int 1) 9185 (const_int 1) (const_int 1)])) 9186 (const_int 1))))] 9187 "TARGET_SSSE3" 9188 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);") 9189 9190(define_insn "*ssse3_pmulhrswv4hi3" 9191 [(set (match_operand:V4HI 0 "register_operand" "=y") 9192 (truncate:V4HI 9193 (lshiftrt:V4SI 9194 (plus:V4SI 9195 (lshiftrt:V4SI 9196 (mult:V4SI 9197 (sign_extend:V4SI 9198 (match_operand:V4HI 1 "nonimmediate_operand" "%0")) 9199 (sign_extend:V4SI 9200 (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) 9201 (const_int 14)) 9202 (const_vector:V4HI [(const_int 1) (const_int 1) 9203 (const_int 1) (const_int 1)])) 9204 (const_int 1))))] 9205 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)" 9206 "pmulhrsw\t{%2, %0|%0, %2}" 9207 [(set_attr "type" "sseimul") 9208 (set_attr "prefix_extra" "1") 9209 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) 9210 (set_attr "mode" "DI")]) 9211 9212(define_insn "<ssse3_avx2>_pshufb<mode>3" 9213 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x") 9214 (unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x") 9215 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")] 9216 UNSPEC_PSHUFB))] 9217 "TARGET_SSSE3" 9218 "@ 9219 pshufb\t{%2, %0|%0, %2} 9220 vpshufb\t{%2, %1, %0|%0, %1, %2}" 9221 [(set_attr "isa" "noavx,avx") 9222 (set_attr "type" "sselog1") 9223 (set_attr "prefix_data16" "1,*") 9224 (set_attr "prefix_extra" "1") 9225 (set_attr "prefix" "orig,vex") 9226 (set_attr "mode" "<sseinsnmode>")]) 9227 9228(define_insn "ssse3_pshufbv8qi3" 9229 [(set (match_operand:V8QI 0 "register_operand" "=y") 9230 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0") 9231 (match_operand:V8QI 2 "nonimmediate_operand" "ym")] 9232 UNSPEC_PSHUFB))] 9233 "TARGET_SSSE3" 9234 "pshufb\t{%2, %0|%0, %2}"; 9235 [(set_attr "type" "sselog1") 9236 (set_attr "prefix_extra" "1") 9237 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) 9238 (set_attr "mode" "DI")]) 9239 9240(define_insn "<ssse3_avx2>_psign<mode>3" 9241 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x") 9242 (unspec:VI124_AVX2 9243 [(match_operand:VI124_AVX2 1 "register_operand" "0,x") 9244 (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")] 9245 UNSPEC_PSIGN))] 9246 "TARGET_SSSE3" 9247 "@ 9248 psign<ssemodesuffix>\t{%2, %0|%0, %2} 9249 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 9250 [(set_attr "isa" "noavx,avx") 9251 (set_attr "type" "sselog1") 9252 (set_attr "prefix_data16" "1,*") 9253 (set_attr "prefix_extra" "1") 9254 (set_attr "prefix" "orig,vex") 9255 (set_attr "mode" "<sseinsnmode>")]) 9256 9257(define_insn "ssse3_psign<mode>3" 9258 [(set (match_operand:MMXMODEI 0 "register_operand" "=y") 9259 (unspec:MMXMODEI 9260 [(match_operand:MMXMODEI 1 "register_operand" "0") 9261 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")] 9262 UNSPEC_PSIGN))] 9263 "TARGET_SSSE3" 9264 "psign<mmxvecsize>\t{%2, %0|%0, %2}"; 9265 [(set_attr "type" "sselog1") 9266 (set_attr "prefix_extra" "1") 9267 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) 9268 (set_attr "mode" "DI")]) 9269 9270(define_insn "<ssse3_avx2>_palignr<mode>" 9271 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x") 9272 (unspec:SSESCALARMODE [(match_operand:SSESCALARMODE 1 "register_operand" "0,x") 9273 (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm") 9274 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")] 9275 UNSPEC_PALIGNR))] 9276 "TARGET_SSSE3" 9277{ 9278 operands[3] = GEN_INT (INTVAL (operands[3]) / 8); 9279 9280 switch (which_alternative) 9281 { 9282 case 0: 9283 return "palignr\t{%3, %2, %0|%0, %2, %3}"; 9284 case 1: 9285 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}"; 9286 default: 9287 gcc_unreachable (); 9288 } 9289} 9290 [(set_attr "isa" "noavx,avx") 9291 (set_attr "type" "sseishft") 9292 (set_attr "atom_unit" "sishuf") 9293 (set_attr "prefix_data16" "1,*") 9294 (set_attr "prefix_extra" "1") 9295 (set_attr "length_immediate" "1") 9296 (set_attr "prefix" "orig,vex") 9297 (set_attr "mode" "<sseinsnmode>")]) 9298 9299(define_insn "ssse3_palignrdi" 9300 [(set (match_operand:DI 0 "register_operand" "=y") 9301 (unspec:DI [(match_operand:DI 1 "register_operand" "0") 9302 (match_operand:DI 2 "nonimmediate_operand" "ym") 9303 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")] 9304 UNSPEC_PALIGNR))] 9305 "TARGET_SSSE3" 9306{ 9307 operands[3] = GEN_INT (INTVAL (operands[3]) / 8); 9308 return "palignr\t{%3, %2, %0|%0, %2, %3}"; 9309} 9310 [(set_attr "type" "sseishft") 9311 (set_attr "atom_unit" "sishuf") 9312 (set_attr "prefix_extra" "1") 9313 (set_attr "length_immediate" "1") 9314 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) 9315 (set_attr "mode" "DI")]) 9316 9317(define_insn "abs<mode>2" 9318 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x") 9319 (abs:VI124_AVX2 9320 (match_operand:VI124_AVX2 1 "nonimmediate_operand" "xm")))] 9321 "TARGET_SSSE3" 9322 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}" 9323 [(set_attr "type" "sselog1") 9324 (set_attr "prefix_data16" "1") 9325 (set_attr "prefix_extra" "1") 9326 (set_attr "prefix" "maybe_vex") 9327 (set_attr "mode" "<sseinsnmode>")]) 9328 9329(define_insn "abs<mode>2" 9330 [(set (match_operand:MMXMODEI 0 "register_operand" "=y") 9331 (abs:MMXMODEI 9332 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))] 9333 "TARGET_SSSE3" 9334 "pabs<mmxvecsize>\t{%1, %0|%0, %1}"; 9335 [(set_attr "type" "sselog1") 9336 (set_attr "prefix_rep" "0") 9337 (set_attr "prefix_extra" "1") 9338 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) 9339 (set_attr "mode" "DI")]) 9340 9341;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 9342;; 9343;; AMD SSE4A instructions 9344;; 9345;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 9346 9347(define_insn "sse4a_movnt<mode>" 9348 [(set (match_operand:MODEF 0 "memory_operand" "=m") 9349 (unspec:MODEF 9350 [(match_operand:MODEF 1 "register_operand" "x")] 9351 UNSPEC_MOVNT))] 9352 "TARGET_SSE4A" 9353 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}" 9354 [(set_attr "type" "ssemov") 9355 (set_attr "mode" "<MODE>")]) 9356 9357(define_insn "sse4a_vmmovnt<mode>" 9358 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m") 9359 (unspec:<ssescalarmode> 9360 [(vec_select:<ssescalarmode> 9361 (match_operand:VF_128 1 "register_operand" "x") 9362 (parallel [(const_int 0)]))] 9363 UNSPEC_MOVNT))] 9364 "TARGET_SSE4A" 9365 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}" 9366 [(set_attr "type" "ssemov") 9367 (set_attr "mode" "<ssescalarmode>")]) 9368 9369(define_insn "sse4a_extrqi" 9370 [(set (match_operand:V2DI 0 "register_operand" "=x") 9371 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") 9372 (match_operand 2 "const_0_to_255_operand" "") 9373 (match_operand 3 "const_0_to_255_operand" "")] 9374 UNSPEC_EXTRQI))] 9375 "TARGET_SSE4A" 9376 "extrq\t{%3, %2, %0|%0, %2, %3}" 9377 [(set_attr "type" "sse") 9378 (set_attr "prefix_data16" "1") 9379 (set_attr "length_immediate" "2") 9380 (set_attr "mode" "TI")]) 9381 9382(define_insn "sse4a_extrq" 9383 [(set (match_operand:V2DI 0 "register_operand" "=x") 9384 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") 9385 (match_operand:V16QI 2 "register_operand" "x")] 9386 UNSPEC_EXTRQ))] 9387 "TARGET_SSE4A" 9388 "extrq\t{%2, %0|%0, %2}" 9389 [(set_attr "type" "sse") 9390 (set_attr "prefix_data16" "1") 9391 (set_attr "mode" "TI")]) 9392 9393(define_insn "sse4a_insertqi" 9394 [(set (match_operand:V2DI 0 "register_operand" "=x") 9395 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") 9396 (match_operand:V2DI 2 "register_operand" "x") 9397 (match_operand 3 "const_0_to_255_operand" "") 9398 (match_operand 4 "const_0_to_255_operand" "")] 9399 UNSPEC_INSERTQI))] 9400 "TARGET_SSE4A" 9401 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}" 9402 [(set_attr "type" "sseins") 9403 (set_attr "prefix_data16" "0") 9404 (set_attr "prefix_rep" "1") 9405 (set_attr "length_immediate" "2") 9406 (set_attr "mode" "TI")]) 9407 9408(define_insn "sse4a_insertq" 9409 [(set (match_operand:V2DI 0 "register_operand" "=x") 9410 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") 9411 (match_operand:V2DI 2 "register_operand" "x")] 9412 UNSPEC_INSERTQ))] 9413 "TARGET_SSE4A" 9414 "insertq\t{%2, %0|%0, %2}" 9415 [(set_attr "type" "sseins") 9416 (set_attr "prefix_data16" "0") 9417 (set_attr "prefix_rep" "1") 9418 (set_attr "mode" "TI")]) 9419 9420;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 9421;; 9422;; Intel SSE4.1 instructions 9423;; 9424;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 9425 9426(define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>" 9427 [(set (match_operand:VF 0 "register_operand" "=x,x") 9428 (vec_merge:VF 9429 (match_operand:VF 2 "nonimmediate_operand" "xm,xm") 9430 (match_operand:VF 1 "register_operand" "0,x") 9431 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "")))] 9432 "TARGET_SSE4_1" 9433 "@ 9434 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} 9435 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 9436 [(set_attr "isa" "noavx,avx") 9437 (set_attr "type" "ssemov") 9438 (set_attr "length_immediate" "1") 9439 (set_attr "prefix_data16" "1,*") 9440 (set_attr "prefix_extra" "1") 9441 (set_attr "prefix" "orig,vex") 9442 (set_attr "mode" "<MODE>")]) 9443 9444(define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>" 9445 [(set (match_operand:VF 0 "reg_not_xmm0_operand_maybe_avx" "=x,x") 9446 (unspec:VF 9447 [(match_operand:VF 1 "reg_not_xmm0_operand_maybe_avx" "0,x") 9448 (match_operand:VF 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm") 9449 (match_operand:VF 3 "register_operand" "Yz,x")] 9450 UNSPEC_BLENDV))] 9451 "TARGET_SSE4_1" 9452 "@ 9453 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} 9454 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 9455 [(set_attr "isa" "noavx,avx") 9456 (set_attr "type" "ssemov") 9457 (set_attr "length_immediate" "1") 9458 (set_attr "prefix_data16" "1,*") 9459 (set_attr "prefix_extra" "1") 9460 (set_attr "prefix" "orig,vex") 9461 (set_attr "mode" "<MODE>")]) 9462 9463(define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>" 9464 [(set (match_operand:VF 0 "register_operand" "=x,x") 9465 (unspec:VF 9466 [(match_operand:VF 1 "nonimmediate_operand" "%0,x") 9467 (match_operand:VF 2 "nonimmediate_operand" "xm,xm") 9468 (match_operand:SI 3 "const_0_to_255_operand" "n,n")] 9469 UNSPEC_DP))] 9470 "TARGET_SSE4_1" 9471 "@ 9472 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} 9473 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 9474 [(set_attr "isa" "noavx,avx") 9475 (set_attr "type" "ssemul") 9476 (set_attr "length_immediate" "1") 9477 (set_attr "prefix_data16" "1,*") 9478 (set_attr "prefix_extra" "1") 9479 (set_attr "prefix" "orig,vex") 9480 (set_attr "mode" "<MODE>")]) 9481 9482(define_insn "<sse4_1_avx2>_movntdqa" 9483 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x") 9484 (unspec:VI8_AVX2 [(match_operand:VI8_AVX2 1 "memory_operand" "m")] 9485 UNSPEC_MOVNTDQA))] 9486 "TARGET_SSE4_1" 9487 "%vmovntdqa\t{%1, %0|%0, %1}" 9488 [(set_attr "type" "ssemov") 9489 (set_attr "prefix_extra" "1") 9490 (set_attr "prefix" "maybe_vex") 9491 (set_attr "mode" "<sseinsnmode>")]) 9492 9493(define_insn "<sse4_1_avx2>_mpsadbw" 9494 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x") 9495 (unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x") 9496 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm") 9497 (match_operand:SI 3 "const_0_to_255_operand" "n,n")] 9498 UNSPEC_MPSADBW))] 9499 "TARGET_SSE4_1" 9500 "@ 9501 mpsadbw\t{%3, %2, %0|%0, %2, %3} 9502 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}" 9503 [(set_attr "isa" "noavx,avx") 9504 (set_attr "type" "sselog1") 9505 (set_attr "length_immediate" "1") 9506 (set_attr "prefix_extra" "1") 9507 (set_attr "prefix" "orig,vex") 9508 (set_attr "mode" "<sseinsnmode>")]) 9509 9510(define_insn "avx2_packusdw" 9511 [(set (match_operand:V16HI 0 "register_operand" "=x") 9512 (vec_concat:V16HI 9513 (us_truncate:V8HI 9514 (match_operand:V8SI 1 "register_operand" "x")) 9515 (us_truncate:V8HI 9516 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))))] 9517 "TARGET_AVX2" 9518 "vpackusdw\t{%2, %1, %0|%0, %1, %2}" 9519 [(set_attr "type" "sselog") 9520 (set_attr "prefix_extra" "1") 9521 (set_attr "prefix" "vex") 9522 (set_attr "mode" "OI")]) 9523 9524(define_insn "sse4_1_packusdw" 9525 [(set (match_operand:V8HI 0 "register_operand" "=x,x") 9526 (vec_concat:V8HI 9527 (us_truncate:V4HI 9528 (match_operand:V4SI 1 "register_operand" "0,x")) 9529 (us_truncate:V4HI 9530 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))] 9531 "TARGET_SSE4_1" 9532 "@ 9533 packusdw\t{%2, %0|%0, %2} 9534 vpackusdw\t{%2, %1, %0|%0, %1, %2}" 9535 [(set_attr "isa" "noavx,avx") 9536 (set_attr "type" "sselog") 9537 (set_attr "prefix_extra" "1") 9538 (set_attr "prefix" "orig,vex") 9539 (set_attr "mode" "TI")]) 9540 9541(define_insn "<sse4_1_avx2>_pblendvb" 9542 [(set (match_operand:VI1_AVX2 0 "reg_not_xmm0_operand_maybe_avx" "=x,x") 9543 (unspec:VI1_AVX2 9544 [(match_operand:VI1_AVX2 1 "reg_not_xmm0_operand_maybe_avx" "0,x") 9545 (match_operand:VI1_AVX2 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm") 9546 (match_operand:VI1_AVX2 3 "register_operand" "Yz,x")] 9547 UNSPEC_BLENDV))] 9548 "TARGET_SSE4_1" 9549 "@ 9550 pblendvb\t{%3, %2, %0|%0, %2, %3} 9551 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}" 9552 [(set_attr "isa" "noavx,avx") 9553 (set_attr "type" "ssemov") 9554 (set_attr "prefix_extra" "1") 9555 (set_attr "length_immediate" "*,1") 9556 (set_attr "prefix" "orig,vex") 9557 (set_attr "mode" "<sseinsnmode>")]) 9558 9559(define_insn "sse4_1_pblendw" 9560 [(set (match_operand:V8HI 0 "register_operand" "=x,x") 9561 (vec_merge:V8HI 9562 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm") 9563 (match_operand:V8HI 1 "register_operand" "0,x") 9564 (match_operand:SI 3 "const_0_to_255_operand" "n,n")))] 9565 "TARGET_SSE4_1" 9566 "@ 9567 pblendw\t{%3, %2, %0|%0, %2, %3} 9568 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}" 9569 [(set_attr "isa" "noavx,avx") 9570 (set_attr "type" "ssemov") 9571 (set_attr "prefix_extra" "1") 9572 (set_attr "length_immediate" "1") 9573 (set_attr "prefix" "orig,vex") 9574 (set_attr "mode" "TI")]) 9575 9576;; The builtin uses an 8-bit immediate. Expand that. 9577(define_expand "avx2_pblendw" 9578 [(set (match_operand:V16HI 0 "register_operand" "") 9579 (vec_merge:V16HI 9580 (match_operand:V16HI 2 "nonimmediate_operand" "") 9581 (match_operand:V16HI 1 "register_operand" "") 9582 (match_operand:SI 3 "const_0_to_255_operand" "")))] 9583 "TARGET_AVX2" 9584{ 9585 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff; 9586 operands[3] = GEN_INT (val << 8 | val); 9587}) 9588 9589(define_insn "*avx2_pblendw" 9590 [(set (match_operand:V16HI 0 "register_operand" "=x") 9591 (vec_merge:V16HI 9592 (match_operand:V16HI 2 "nonimmediate_operand" "xm") 9593 (match_operand:V16HI 1 "register_operand" "x") 9594 (match_operand:SI 3 "avx2_pblendw_operand" "n")))] 9595 "TARGET_AVX2" 9596{ 9597 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff); 9598 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"; 9599} 9600 [(set_attr "type" "ssemov") 9601 (set_attr "prefix_extra" "1") 9602 (set_attr "length_immediate" "1") 9603 (set_attr "prefix" "vex") 9604 (set_attr "mode" "OI")]) 9605 9606(define_insn "avx2_pblendd<mode>" 9607 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x") 9608 (vec_merge:VI4_AVX2 9609 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm") 9610 (match_operand:VI4_AVX2 1 "register_operand" "x") 9611 (match_operand:SI 3 "const_0_to_255_operand" "n")))] 9612 "TARGET_AVX2" 9613 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}" 9614 [(set_attr "type" "ssemov") 9615 (set_attr "prefix_extra" "1") 9616 (set_attr "length_immediate" "1") 9617 (set_attr "prefix" "vex") 9618 (set_attr "mode" "<sseinsnmode>")]) 9619 9620(define_insn "sse4_1_phminposuw" 9621 [(set (match_operand:V8HI 0 "register_operand" "=x") 9622 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")] 9623 UNSPEC_PHMINPOSUW))] 9624 "TARGET_SSE4_1" 9625 "%vphminposuw\t{%1, %0|%0, %1}" 9626 [(set_attr "type" "sselog1") 9627 (set_attr "prefix_extra" "1") 9628 (set_attr "prefix" "maybe_vex") 9629 (set_attr "mode" "TI")]) 9630 9631(define_insn "avx2_<code>v16qiv16hi2" 9632 [(set (match_operand:V16HI 0 "register_operand" "=x") 9633 (any_extend:V16HI 9634 (match_operand:V16QI 1 "nonimmediate_operand" "xm")))] 9635 "TARGET_AVX2" 9636 "vpmov<extsuffix>bw\t{%1, %0|%0, %1}" 9637 [(set_attr "type" "ssemov") 9638 (set_attr "prefix_extra" "1") 9639 (set_attr "prefix" "vex") 9640 (set_attr "mode" "OI")]) 9641 9642(define_insn "sse4_1_<code>v8qiv8hi2" 9643 [(set (match_operand:V8HI 0 "register_operand" "=x") 9644 (any_extend:V8HI 9645 (vec_select:V8QI 9646 (match_operand:V16QI 1 "nonimmediate_operand" "xm") 9647 (parallel [(const_int 0) 9648 (const_int 1) 9649 (const_int 2) 9650 (const_int 3) 9651 (const_int 4) 9652 (const_int 5) 9653 (const_int 6) 9654 (const_int 7)]))))] 9655 "TARGET_SSE4_1" 9656 "%vpmov<extsuffix>bw\t{%1, %0|%0, %q1}" 9657 [(set_attr "type" "ssemov") 9658 (set_attr "prefix_extra" "1") 9659 (set_attr "prefix" "maybe_vex") 9660 (set_attr "mode" "TI")]) 9661 9662(define_insn "avx2_<code>v8qiv8si2" 9663 [(set (match_operand:V8SI 0 "register_operand" "=x") 9664 (any_extend:V8SI 9665 (vec_select:V8QI 9666 (match_operand:V16QI 1 "nonimmediate_operand" "xm") 9667 (parallel [(const_int 0) 9668 (const_int 1) 9669 (const_int 2) 9670 (const_int 3) 9671 (const_int 4) 9672 (const_int 5) 9673 (const_int 6) 9674 (const_int 7)]))))] 9675 "TARGET_AVX2" 9676 "vpmov<extsuffix>bd\t{%1, %0|%0, %q1}" 9677 [(set_attr "type" "ssemov") 9678 (set_attr "prefix_extra" "1") 9679 (set_attr "prefix" "vex") 9680 (set_attr "mode" "OI")]) 9681 9682(define_insn "sse4_1_<code>v4qiv4si2" 9683 [(set (match_operand:V4SI 0 "register_operand" "=x") 9684 (any_extend:V4SI 9685 (vec_select:V4QI 9686 (match_operand:V16QI 1 "nonimmediate_operand" "xm") 9687 (parallel [(const_int 0) 9688 (const_int 1) 9689 (const_int 2) 9690 (const_int 3)]))))] 9691 "TARGET_SSE4_1" 9692 "%vpmov<extsuffix>bd\t{%1, %0|%0, %k1}" 9693 [(set_attr "type" "ssemov") 9694 (set_attr "prefix_extra" "1") 9695 (set_attr "prefix" "maybe_vex") 9696 (set_attr "mode" "TI")]) 9697 9698(define_insn "avx2_<code>v8hiv8si2" 9699 [(set (match_operand:V8SI 0 "register_operand" "=x") 9700 (any_extend:V8SI 9701 (match_operand:V8HI 1 "nonimmediate_operand" "xm")))] 9702 "TARGET_AVX2" 9703 "vpmov<extsuffix>wd\t{%1, %0|%0, %1}" 9704 [(set_attr "type" "ssemov") 9705 (set_attr "prefix_extra" "1") 9706 (set_attr "prefix" "vex") 9707 (set_attr "mode" "OI")]) 9708 9709(define_insn "sse4_1_<code>v4hiv4si2" 9710 [(set (match_operand:V4SI 0 "register_operand" "=x") 9711 (any_extend:V4SI 9712 (vec_select:V4HI 9713 (match_operand:V8HI 1 "nonimmediate_operand" "xm") 9714 (parallel [(const_int 0) 9715 (const_int 1) 9716 (const_int 2) 9717 (const_int 3)]))))] 9718 "TARGET_SSE4_1" 9719 "%vpmov<extsuffix>wd\t{%1, %0|%0, %q1}" 9720 [(set_attr "type" "ssemov") 9721 (set_attr "prefix_extra" "1") 9722 (set_attr "prefix" "maybe_vex") 9723 (set_attr "mode" "TI")]) 9724 9725(define_insn "avx2_<code>v4qiv4di2" 9726 [(set (match_operand:V4DI 0 "register_operand" "=x") 9727 (any_extend:V4DI 9728 (vec_select:V4QI 9729 (match_operand:V16QI 1 "nonimmediate_operand" "xm") 9730 (parallel [(const_int 0) 9731 (const_int 1) 9732 (const_int 2) 9733 (const_int 3)]))))] 9734 "TARGET_AVX2" 9735 "vpmov<extsuffix>bq\t{%1, %0|%0, %k1}" 9736 [(set_attr "type" "ssemov") 9737 (set_attr "prefix_extra" "1") 9738 (set_attr "prefix" "vex") 9739 (set_attr "mode" "OI")]) 9740 9741(define_insn "sse4_1_<code>v2qiv2di2" 9742 [(set (match_operand:V2DI 0 "register_operand" "=x") 9743 (any_extend:V2DI 9744 (vec_select:V2QI 9745 (match_operand:V16QI 1 "nonimmediate_operand" "xm") 9746 (parallel [(const_int 0) 9747 (const_int 1)]))))] 9748 "TARGET_SSE4_1" 9749 "%vpmov<extsuffix>bq\t{%1, %0|%0, %w1}" 9750 [(set_attr "type" "ssemov") 9751 (set_attr "prefix_extra" "1") 9752 (set_attr "prefix" "maybe_vex") 9753 (set_attr "mode" "TI")]) 9754 9755(define_insn "avx2_<code>v4hiv4di2" 9756 [(set (match_operand:V4DI 0 "register_operand" "=x") 9757 (any_extend:V4DI 9758 (vec_select:V4HI 9759 (match_operand:V8HI 1 "nonimmediate_operand" "xm") 9760 (parallel [(const_int 0) 9761 (const_int 1) 9762 (const_int 2) 9763 (const_int 3)]))))] 9764 "TARGET_AVX2" 9765 "vpmov<extsuffix>wq\t{%1, %0|%0, %q1}" 9766 [(set_attr "type" "ssemov") 9767 (set_attr "prefix_extra" "1") 9768 (set_attr "prefix" "vex") 9769 (set_attr "mode" "OI")]) 9770 9771(define_insn "sse4_1_<code>v2hiv2di2" 9772 [(set (match_operand:V2DI 0 "register_operand" "=x") 9773 (any_extend:V2DI 9774 (vec_select:V2HI 9775 (match_operand:V8HI 1 "nonimmediate_operand" "xm") 9776 (parallel [(const_int 0) 9777 (const_int 1)]))))] 9778 "TARGET_SSE4_1" 9779 "%vpmov<extsuffix>wq\t{%1, %0|%0, %k1}" 9780 [(set_attr "type" "ssemov") 9781 (set_attr "prefix_extra" "1") 9782 (set_attr "prefix" "maybe_vex") 9783 (set_attr "mode" "TI")]) 9784 9785(define_insn "avx2_<code>v4siv4di2" 9786 [(set (match_operand:V4DI 0 "register_operand" "=x") 9787 (any_extend:V4DI 9788 (match_operand:V4SI 1 "nonimmediate_operand" "xm")))] 9789 "TARGET_AVX2" 9790 "vpmov<extsuffix>dq\t{%1, %0|%0, %1}" 9791 [(set_attr "type" "ssemov") 9792 (set_attr "prefix_extra" "1") 9793 (set_attr "mode" "OI")]) 9794 9795(define_insn "sse4_1_<code>v2siv2di2" 9796 [(set (match_operand:V2DI 0 "register_operand" "=x") 9797 (any_extend:V2DI 9798 (vec_select:V2SI 9799 (match_operand:V4SI 1 "nonimmediate_operand" "xm") 9800 (parallel [(const_int 0) 9801 (const_int 1)]))))] 9802 "TARGET_SSE4_1" 9803 "%vpmov<extsuffix>dq\t{%1, %0|%0, %q1}" 9804 [(set_attr "type" "ssemov") 9805 (set_attr "prefix_extra" "1") 9806 (set_attr "prefix" "maybe_vex") 9807 (set_attr "mode" "TI")]) 9808 9809;; ptestps/ptestpd are very similar to comiss and ucomiss when 9810;; setting FLAGS_REG. But it is not a really compare instruction. 9811(define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>" 9812 [(set (reg:CC FLAGS_REG) 9813 (unspec:CC [(match_operand:VF 0 "register_operand" "x") 9814 (match_operand:VF 1 "nonimmediate_operand" "xm")] 9815 UNSPEC_VTESTP))] 9816 "TARGET_AVX" 9817 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}" 9818 [(set_attr "type" "ssecomi") 9819 (set_attr "prefix_extra" "1") 9820 (set_attr "prefix" "vex") 9821 (set_attr "mode" "<MODE>")]) 9822 9823;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG. 9824;; But it is not a really compare instruction. 9825(define_insn "avx_ptest256" 9826 [(set (reg:CC FLAGS_REG) 9827 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x") 9828 (match_operand:V4DI 1 "nonimmediate_operand" "xm")] 9829 UNSPEC_PTEST))] 9830 "TARGET_AVX" 9831 "vptest\t{%1, %0|%0, %1}" 9832 [(set_attr "type" "ssecomi") 9833 (set_attr "prefix_extra" "1") 9834 (set_attr "prefix" "vex") 9835 (set_attr "mode" "OI")]) 9836 9837(define_insn "sse4_1_ptest" 9838 [(set (reg:CC FLAGS_REG) 9839 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x") 9840 (match_operand:V2DI 1 "nonimmediate_operand" "xm")] 9841 UNSPEC_PTEST))] 9842 "TARGET_SSE4_1" 9843 "%vptest\t{%1, %0|%0, %1}" 9844 [(set_attr "type" "ssecomi") 9845 (set_attr "prefix_extra" "1") 9846 (set_attr "prefix" "maybe_vex") 9847 (set_attr "mode" "TI")]) 9848 9849(define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>" 9850 [(set (match_operand:VF 0 "register_operand" "=x") 9851 (unspec:VF 9852 [(match_operand:VF 1 "nonimmediate_operand" "xm") 9853 (match_operand:SI 2 "const_0_to_15_operand" "n")] 9854 UNSPEC_ROUND))] 9855 "TARGET_ROUND" 9856 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 9857 [(set_attr "type" "ssecvt") 9858 (set (attr "prefix_data16") 9859 (if_then_else 9860 (match_test "TARGET_AVX") 9861 (const_string "*") 9862 (const_string "1"))) 9863 (set_attr "prefix_extra" "1") 9864 (set_attr "length_immediate" "1") 9865 (set_attr "prefix" "maybe_vex") 9866 (set_attr "mode" "<MODE>")]) 9867 9868(define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>" 9869 [(match_operand:<sseintvecmode> 0 "register_operand" "") 9870 (match_operand:VF1 1 "nonimmediate_operand" "") 9871 (match_operand:SI 2 "const_0_to_15_operand" "")] 9872 "TARGET_ROUND" 9873{ 9874 rtx tmp = gen_reg_rtx (<MODE>mode); 9875 9876 emit_insn 9877 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1], 9878 operands[2])); 9879 emit_insn 9880 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp)); 9881 DONE; 9882}) 9883 9884(define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>" 9885 [(match_operand:<ssepackfltmode> 0 "register_operand" "") 9886 (match_operand:VF2 1 "nonimmediate_operand" "") 9887 (match_operand:VF2 2 "nonimmediate_operand" "") 9888 (match_operand:SI 3 "const_0_to_15_operand" "")] 9889 "TARGET_ROUND" 9890{ 9891 rtx tmp0, tmp1; 9892 9893 if (<MODE>mode == V2DFmode 9894 && TARGET_AVX && !TARGET_PREFER_AVX128) 9895 { 9896 rtx tmp2 = gen_reg_rtx (V4DFmode); 9897 9898 tmp0 = gen_reg_rtx (V4DFmode); 9899 tmp1 = force_reg (V2DFmode, operands[1]); 9900 9901 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2])); 9902 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3])); 9903 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2)); 9904 } 9905 else 9906 { 9907 tmp0 = gen_reg_rtx (<MODE>mode); 9908 tmp1 = gen_reg_rtx (<MODE>mode); 9909 9910 emit_insn 9911 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1], 9912 operands[3])); 9913 emit_insn 9914 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2], 9915 operands[3])); 9916 emit_insn 9917 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1)); 9918 } 9919 DONE; 9920}) 9921 9922(define_insn "sse4_1_round<ssescalarmodesuffix>" 9923 [(set (match_operand:VF_128 0 "register_operand" "=x,x") 9924 (vec_merge:VF_128 9925 (unspec:VF_128 9926 [(match_operand:VF_128 2 "register_operand" "x,x") 9927 (match_operand:SI 3 "const_0_to_15_operand" "n,n")] 9928 UNSPEC_ROUND) 9929 (match_operand:VF_128 1 "register_operand" "0,x") 9930 (const_int 1)))] 9931 "TARGET_ROUND" 9932 "@ 9933 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3} 9934 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 9935 [(set_attr "isa" "noavx,avx") 9936 (set_attr "type" "ssecvt") 9937 (set_attr "length_immediate" "1") 9938 (set_attr "prefix_data16" "1,*") 9939 (set_attr "prefix_extra" "1") 9940 (set_attr "prefix" "orig,vex") 9941 (set_attr "mode" "<MODE>")]) 9942 9943(define_expand "round<mode>2" 9944 [(set (match_dup 4) 9945 (plus:VF 9946 (match_operand:VF 1 "register_operand" "") 9947 (match_dup 3))) 9948 (set (match_operand:VF 0 "register_operand" "") 9949 (unspec:VF 9950 [(match_dup 4) (match_dup 5)] 9951 UNSPEC_ROUND))] 9952 "TARGET_ROUND && !flag_trapping_math" 9953{ 9954 enum machine_mode scalar_mode; 9955 const struct real_format *fmt; 9956 REAL_VALUE_TYPE pred_half, half_minus_pred_half; 9957 rtx half, vec_half; 9958 9959 scalar_mode = GET_MODE_INNER (<MODE>mode); 9960 9961 /* load nextafter (0.5, 0.0) */ 9962 fmt = REAL_MODE_FORMAT (scalar_mode); 9963 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode); 9964 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half); 9965 half = const_double_from_real_value (pred_half, scalar_mode); 9966 9967 vec_half = ix86_build_const_vector (<MODE>mode, true, half); 9968 vec_half = force_reg (<MODE>mode, vec_half); 9969 9970 operands[3] = gen_reg_rtx (<MODE>mode); 9971 emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1])); 9972 9973 operands[4] = gen_reg_rtx (<MODE>mode); 9974 operands[5] = GEN_INT (ROUND_TRUNC); 9975}) 9976 9977(define_expand "round<mode>2_sfix" 9978 [(match_operand:<sseintvecmode> 0 "register_operand" "") 9979 (match_operand:VF1 1 "register_operand" "")] 9980 "TARGET_ROUND && !flag_trapping_math" 9981{ 9982 rtx tmp = gen_reg_rtx (<MODE>mode); 9983 9984 emit_insn (gen_round<mode>2 (tmp, operands[1])); 9985 9986 emit_insn 9987 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp)); 9988 DONE; 9989}) 9990 9991(define_expand "round<mode>2_vec_pack_sfix" 9992 [(match_operand:<ssepackfltmode> 0 "register_operand" "") 9993 (match_operand:VF2 1 "register_operand" "") 9994 (match_operand:VF2 2 "register_operand" "")] 9995 "TARGET_ROUND && !flag_trapping_math" 9996{ 9997 rtx tmp0, tmp1; 9998 9999 if (<MODE>mode == V2DFmode 10000 && TARGET_AVX && !TARGET_PREFER_AVX128) 10001 { 10002 rtx tmp2 = gen_reg_rtx (V4DFmode); 10003 10004 tmp0 = gen_reg_rtx (V4DFmode); 10005 tmp1 = force_reg (V2DFmode, operands[1]); 10006 10007 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2])); 10008 emit_insn (gen_roundv4df2 (tmp2, tmp0)); 10009 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2)); 10010 } 10011 else 10012 { 10013 tmp0 = gen_reg_rtx (<MODE>mode); 10014 tmp1 = gen_reg_rtx (<MODE>mode); 10015 10016 emit_insn (gen_round<mode>2 (tmp0, operands[1])); 10017 emit_insn (gen_round<mode>2 (tmp1, operands[2])); 10018 10019 emit_insn 10020 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1)); 10021 } 10022 DONE; 10023}) 10024 10025;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 10026;; 10027;; Intel SSE4.2 string/text processing instructions 10028;; 10029;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 10030 10031(define_insn_and_split "sse4_2_pcmpestr" 10032 [(set (match_operand:SI 0 "register_operand" "=c,c") 10033 (unspec:SI 10034 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x") 10035 (match_operand:SI 3 "register_operand" "a,a") 10036 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m") 10037 (match_operand:SI 5 "register_operand" "d,d") 10038 (match_operand:SI 6 "const_0_to_255_operand" "n,n")] 10039 UNSPEC_PCMPESTR)) 10040 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz") 10041 (unspec:V16QI 10042 [(match_dup 2) 10043 (match_dup 3) 10044 (match_dup 4) 10045 (match_dup 5) 10046 (match_dup 6)] 10047 UNSPEC_PCMPESTR)) 10048 (set (reg:CC FLAGS_REG) 10049 (unspec:CC 10050 [(match_dup 2) 10051 (match_dup 3) 10052 (match_dup 4) 10053 (match_dup 5) 10054 (match_dup 6)] 10055 UNSPEC_PCMPESTR))] 10056 "TARGET_SSE4_2 10057 && can_create_pseudo_p ()" 10058 "#" 10059 "&& 1" 10060 [(const_int 0)] 10061{ 10062 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0])); 10063 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1])); 10064 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG); 10065 10066 if (ecx) 10067 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2], 10068 operands[3], operands[4], 10069 operands[5], operands[6])); 10070 if (xmm0) 10071 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2], 10072 operands[3], operands[4], 10073 operands[5], operands[6])); 10074 if (flags && !(ecx || xmm0)) 10075 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL, 10076 operands[2], operands[3], 10077 operands[4], operands[5], 10078 operands[6])); 10079 if (!(flags || ecx || xmm0)) 10080 emit_note (NOTE_INSN_DELETED); 10081 10082 DONE; 10083} 10084 [(set_attr "type" "sselog") 10085 (set_attr "prefix_data16" "1") 10086 (set_attr "prefix_extra" "1") 10087 (set_attr "length_immediate" "1") 10088 (set_attr "memory" "none,load") 10089 (set_attr "mode" "TI")]) 10090 10091(define_insn "sse4_2_pcmpestri" 10092 [(set (match_operand:SI 0 "register_operand" "=c,c") 10093 (unspec:SI 10094 [(match_operand:V16QI 1 "register_operand" "x,x") 10095 (match_operand:SI 2 "register_operand" "a,a") 10096 (match_operand:V16QI 3 "nonimmediate_operand" "x,m") 10097 (match_operand:SI 4 "register_operand" "d,d") 10098 (match_operand:SI 5 "const_0_to_255_operand" "n,n")] 10099 UNSPEC_PCMPESTR)) 10100 (set (reg:CC FLAGS_REG) 10101 (unspec:CC 10102 [(match_dup 1) 10103 (match_dup 2) 10104 (match_dup 3) 10105 (match_dup 4) 10106 (match_dup 5)] 10107 UNSPEC_PCMPESTR))] 10108 "TARGET_SSE4_2" 10109 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}" 10110 [(set_attr "type" "sselog") 10111 (set_attr "prefix_data16" "1") 10112 (set_attr "prefix_extra" "1") 10113 (set_attr "prefix" "maybe_vex") 10114 (set_attr "length_immediate" "1") 10115 (set_attr "memory" "none,load") 10116 (set_attr "mode" "TI")]) 10117 10118(define_insn "sse4_2_pcmpestrm" 10119 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz") 10120 (unspec:V16QI 10121 [(match_operand:V16QI 1 "register_operand" "x,x") 10122 (match_operand:SI 2 "register_operand" "a,a") 10123 (match_operand:V16QI 3 "nonimmediate_operand" "x,m") 10124 (match_operand:SI 4 "register_operand" "d,d") 10125 (match_operand:SI 5 "const_0_to_255_operand" "n,n")] 10126 UNSPEC_PCMPESTR)) 10127 (set (reg:CC FLAGS_REG) 10128 (unspec:CC 10129 [(match_dup 1) 10130 (match_dup 2) 10131 (match_dup 3) 10132 (match_dup 4) 10133 (match_dup 5)] 10134 UNSPEC_PCMPESTR))] 10135 "TARGET_SSE4_2" 10136 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}" 10137 [(set_attr "type" "sselog") 10138 (set_attr "prefix_data16" "1") 10139 (set_attr "prefix_extra" "1") 10140 (set_attr "length_immediate" "1") 10141 (set_attr "prefix" "maybe_vex") 10142 (set_attr "memory" "none,load") 10143 (set_attr "mode" "TI")]) 10144 10145(define_insn "sse4_2_pcmpestr_cconly" 10146 [(set (reg:CC FLAGS_REG) 10147 (unspec:CC 10148 [(match_operand:V16QI 2 "register_operand" "x,x,x,x") 10149 (match_operand:SI 3 "register_operand" "a,a,a,a") 10150 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m") 10151 (match_operand:SI 5 "register_operand" "d,d,d,d") 10152 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")] 10153 UNSPEC_PCMPESTR)) 10154 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X")) 10155 (clobber (match_scratch:SI 1 "= X, X,c,c"))] 10156 "TARGET_SSE4_2" 10157 "@ 10158 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6} 10159 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6} 10160 %vpcmpestri\t{%6, %4, %2|%2, %4, %6} 10161 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}" 10162 [(set_attr "type" "sselog") 10163 (set_attr "prefix_data16" "1") 10164 (set_attr "prefix_extra" "1") 10165 (set_attr "length_immediate" "1") 10166 (set_attr "memory" "none,load,none,load") 10167 (set_attr "prefix" "maybe_vex") 10168 (set_attr "mode" "TI")]) 10169 10170(define_insn_and_split "sse4_2_pcmpistr" 10171 [(set (match_operand:SI 0 "register_operand" "=c,c") 10172 (unspec:SI 10173 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x") 10174 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m") 10175 (match_operand:SI 4 "const_0_to_255_operand" "n,n")] 10176 UNSPEC_PCMPISTR)) 10177 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz") 10178 (unspec:V16QI 10179 [(match_dup 2) 10180 (match_dup 3) 10181 (match_dup 4)] 10182 UNSPEC_PCMPISTR)) 10183 (set (reg:CC FLAGS_REG) 10184 (unspec:CC 10185 [(match_dup 2) 10186 (match_dup 3) 10187 (match_dup 4)] 10188 UNSPEC_PCMPISTR))] 10189 "TARGET_SSE4_2 10190 && can_create_pseudo_p ()" 10191 "#" 10192 "&& 1" 10193 [(const_int 0)] 10194{ 10195 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0])); 10196 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1])); 10197 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG); 10198 10199 if (ecx) 10200 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2], 10201 operands[3], operands[4])); 10202 if (xmm0) 10203 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2], 10204 operands[3], operands[4])); 10205 if (flags && !(ecx || xmm0)) 10206 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL, 10207 operands[2], operands[3], 10208 operands[4])); 10209 if (!(flags || ecx || xmm0)) 10210 emit_note (NOTE_INSN_DELETED); 10211 10212 DONE; 10213} 10214 [(set_attr "type" "sselog") 10215 (set_attr "prefix_data16" "1") 10216 (set_attr "prefix_extra" "1") 10217 (set_attr "length_immediate" "1") 10218 (set_attr "memory" "none,load") 10219 (set_attr "mode" "TI")]) 10220 10221(define_insn "sse4_2_pcmpistri" 10222 [(set (match_operand:SI 0 "register_operand" "=c,c") 10223 (unspec:SI 10224 [(match_operand:V16QI 1 "register_operand" "x,x") 10225 (match_operand:V16QI 2 "nonimmediate_operand" "x,m") 10226 (match_operand:SI 3 "const_0_to_255_operand" "n,n")] 10227 UNSPEC_PCMPISTR)) 10228 (set (reg:CC FLAGS_REG) 10229 (unspec:CC 10230 [(match_dup 1) 10231 (match_dup 2) 10232 (match_dup 3)] 10233 UNSPEC_PCMPISTR))] 10234 "TARGET_SSE4_2" 10235 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}" 10236 [(set_attr "type" "sselog") 10237 (set_attr "prefix_data16" "1") 10238 (set_attr "prefix_extra" "1") 10239 (set_attr "length_immediate" "1") 10240 (set_attr "prefix" "maybe_vex") 10241 (set_attr "memory" "none,load") 10242 (set_attr "mode" "TI")]) 10243 10244(define_insn "sse4_2_pcmpistrm" 10245 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz") 10246 (unspec:V16QI 10247 [(match_operand:V16QI 1 "register_operand" "x,x") 10248 (match_operand:V16QI 2 "nonimmediate_operand" "x,m") 10249 (match_operand:SI 3 "const_0_to_255_operand" "n,n")] 10250 UNSPEC_PCMPISTR)) 10251 (set (reg:CC FLAGS_REG) 10252 (unspec:CC 10253 [(match_dup 1) 10254 (match_dup 2) 10255 (match_dup 3)] 10256 UNSPEC_PCMPISTR))] 10257 "TARGET_SSE4_2" 10258 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}" 10259 [(set_attr "type" "sselog") 10260 (set_attr "prefix_data16" "1") 10261 (set_attr "prefix_extra" "1") 10262 (set_attr "length_immediate" "1") 10263 (set_attr "prefix" "maybe_vex") 10264 (set_attr "memory" "none,load") 10265 (set_attr "mode" "TI")]) 10266 10267(define_insn "sse4_2_pcmpistr_cconly" 10268 [(set (reg:CC FLAGS_REG) 10269 (unspec:CC 10270 [(match_operand:V16QI 2 "register_operand" "x,x,x,x") 10271 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m") 10272 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")] 10273 UNSPEC_PCMPISTR)) 10274 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X")) 10275 (clobber (match_scratch:SI 1 "= X, X,c,c"))] 10276 "TARGET_SSE4_2" 10277 "@ 10278 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4} 10279 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4} 10280 %vpcmpistri\t{%4, %3, %2|%2, %3, %4} 10281 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}" 10282 [(set_attr "type" "sselog") 10283 (set_attr "prefix_data16" "1") 10284 (set_attr "prefix_extra" "1") 10285 (set_attr "length_immediate" "1") 10286 (set_attr "memory" "none,load,none,load") 10287 (set_attr "prefix" "maybe_vex") 10288 (set_attr "mode" "TI")]) 10289 10290;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 10291;; 10292;; XOP instructions 10293;; 10294;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 10295 10296;; XOP parallel integer multiply/add instructions. 10297;; Note the XOP multiply/add instructions 10298;; a[i] = b[i] * c[i] + d[i]; 10299;; do not allow the value being added to be a memory operation. 10300(define_insn "xop_pmacsww" 10301 [(set (match_operand:V8HI 0 "register_operand" "=x") 10302 (plus:V8HI 10303 (mult:V8HI 10304 (match_operand:V8HI 1 "nonimmediate_operand" "%x") 10305 (match_operand:V8HI 2 "nonimmediate_operand" "xm")) 10306 (match_operand:V8HI 3 "nonimmediate_operand" "x")))] 10307 "TARGET_XOP" 10308 "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}" 10309 [(set_attr "type" "ssemuladd") 10310 (set_attr "mode" "TI")]) 10311 10312(define_insn "xop_pmacssww" 10313 [(set (match_operand:V8HI 0 "register_operand" "=x") 10314 (ss_plus:V8HI 10315 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x") 10316 (match_operand:V8HI 2 "nonimmediate_operand" "xm")) 10317 (match_operand:V8HI 3 "nonimmediate_operand" "x")))] 10318 "TARGET_XOP" 10319 "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}" 10320 [(set_attr "type" "ssemuladd") 10321 (set_attr "mode" "TI")]) 10322 10323(define_insn "xop_pmacsdd" 10324 [(set (match_operand:V4SI 0 "register_operand" "=x") 10325 (plus:V4SI 10326 (mult:V4SI 10327 (match_operand:V4SI 1 "nonimmediate_operand" "%x") 10328 (match_operand:V4SI 2 "nonimmediate_operand" "xm")) 10329 (match_operand:V4SI 3 "nonimmediate_operand" "x")))] 10330 "TARGET_XOP" 10331 "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}" 10332 [(set_attr "type" "ssemuladd") 10333 (set_attr "mode" "TI")]) 10334 10335(define_insn "xop_pmacssdd" 10336 [(set (match_operand:V4SI 0 "register_operand" "=x") 10337 (ss_plus:V4SI 10338 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x") 10339 (match_operand:V4SI 2 "nonimmediate_operand" "xm")) 10340 (match_operand:V4SI 3 "nonimmediate_operand" "x")))] 10341 "TARGET_XOP" 10342 "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}" 10343 [(set_attr "type" "ssemuladd") 10344 (set_attr "mode" "TI")]) 10345 10346(define_insn "xop_pmacssdql" 10347 [(set (match_operand:V2DI 0 "register_operand" "=x") 10348 (ss_plus:V2DI 10349 (mult:V2DI 10350 (sign_extend:V2DI 10351 (vec_select:V2SI 10352 (match_operand:V4SI 1 "nonimmediate_operand" "%x") 10353 (parallel [(const_int 0) 10354 (const_int 2)]))) 10355 (vec_select:V2SI 10356 (match_operand:V4SI 2 "nonimmediate_operand" "xm") 10357 (parallel [(const_int 0) 10358 (const_int 2)]))) 10359 (match_operand:V2DI 3 "nonimmediate_operand" "x")))] 10360 "TARGET_XOP" 10361 "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}" 10362 [(set_attr "type" "ssemuladd") 10363 (set_attr "mode" "TI")]) 10364 10365(define_insn "xop_pmacssdqh" 10366 [(set (match_operand:V2DI 0 "register_operand" "=x") 10367 (ss_plus:V2DI 10368 (mult:V2DI 10369 (sign_extend:V2DI 10370 (vec_select:V2SI 10371 (match_operand:V4SI 1 "nonimmediate_operand" "%x") 10372 (parallel [(const_int 1) 10373 (const_int 3)]))) 10374 (sign_extend:V2DI 10375 (vec_select:V2SI 10376 (match_operand:V4SI 2 "nonimmediate_operand" "xm") 10377 (parallel [(const_int 1) 10378 (const_int 3)])))) 10379 (match_operand:V2DI 3 "nonimmediate_operand" "x")))] 10380 "TARGET_XOP" 10381 "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}" 10382 [(set_attr "type" "ssemuladd") 10383 (set_attr "mode" "TI")]) 10384 10385(define_insn "xop_pmacsdql" 10386 [(set (match_operand:V2DI 0 "register_operand" "=x") 10387 (plus:V2DI 10388 (mult:V2DI 10389 (sign_extend:V2DI 10390 (vec_select:V2SI 10391 (match_operand:V4SI 1 "nonimmediate_operand" "%x") 10392 (parallel [(const_int 0) 10393 (const_int 2)]))) 10394 (sign_extend:V2DI 10395 (vec_select:V2SI 10396 (match_operand:V4SI 2 "nonimmediate_operand" "xm") 10397 (parallel [(const_int 0) 10398 (const_int 2)])))) 10399 (match_operand:V2DI 3 "nonimmediate_operand" "x")))] 10400 "TARGET_XOP" 10401 "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}" 10402 [(set_attr "type" "ssemuladd") 10403 (set_attr "mode" "TI")]) 10404 10405(define_insn "xop_pmacsdqh" 10406 [(set (match_operand:V2DI 0 "register_operand" "=x") 10407 (plus:V2DI 10408 (mult:V2DI 10409 (sign_extend:V2DI 10410 (vec_select:V2SI 10411 (match_operand:V4SI 1 "nonimmediate_operand" "%x") 10412 (parallel [(const_int 1) 10413 (const_int 3)]))) 10414 (sign_extend:V2DI 10415 (vec_select:V2SI 10416 (match_operand:V4SI 2 "nonimmediate_operand" "xm") 10417 (parallel [(const_int 1) 10418 (const_int 3)])))) 10419 (match_operand:V2DI 3 "nonimmediate_operand" "x")))] 10420 "TARGET_XOP" 10421 "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}" 10422 [(set_attr "type" "ssemuladd") 10423 (set_attr "mode" "TI")]) 10424 10425;; XOP parallel integer multiply/add instructions for the intrinisics 10426(define_insn "xop_pmacsswd" 10427 [(set (match_operand:V4SI 0 "register_operand" "=x") 10428 (ss_plus:V4SI 10429 (mult:V4SI 10430 (sign_extend:V4SI 10431 (vec_select:V4HI 10432 (match_operand:V8HI 1 "nonimmediate_operand" "%x") 10433 (parallel [(const_int 1) 10434 (const_int 3) 10435 (const_int 5) 10436 (const_int 7)]))) 10437 (sign_extend:V4SI 10438 (vec_select:V4HI 10439 (match_operand:V8HI 2 "nonimmediate_operand" "xm") 10440 (parallel [(const_int 1) 10441 (const_int 3) 10442 (const_int 5) 10443 (const_int 7)])))) 10444 (match_operand:V4SI 3 "nonimmediate_operand" "x")))] 10445 "TARGET_XOP" 10446 "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}" 10447 [(set_attr "type" "ssemuladd") 10448 (set_attr "mode" "TI")]) 10449 10450(define_insn "xop_pmacswd" 10451 [(set (match_operand:V4SI 0 "register_operand" "=x") 10452 (plus:V4SI 10453 (mult:V4SI 10454 (sign_extend:V4SI 10455 (vec_select:V4HI 10456 (match_operand:V8HI 1 "nonimmediate_operand" "%x") 10457 (parallel [(const_int 1) 10458 (const_int 3) 10459 (const_int 5) 10460 (const_int 7)]))) 10461 (sign_extend:V4SI 10462 (vec_select:V4HI 10463 (match_operand:V8HI 2 "nonimmediate_operand" "xm") 10464 (parallel [(const_int 1) 10465 (const_int 3) 10466 (const_int 5) 10467 (const_int 7)])))) 10468 (match_operand:V4SI 3 "nonimmediate_operand" "x")))] 10469 "TARGET_XOP" 10470 "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}" 10471 [(set_attr "type" "ssemuladd") 10472 (set_attr "mode" "TI")]) 10473 10474(define_insn "xop_pmadcsswd" 10475 [(set (match_operand:V4SI 0 "register_operand" "=x") 10476 (ss_plus:V4SI 10477 (plus:V4SI 10478 (mult:V4SI 10479 (sign_extend:V4SI 10480 (vec_select:V4HI 10481 (match_operand:V8HI 1 "nonimmediate_operand" "%x") 10482 (parallel [(const_int 0) 10483 (const_int 2) 10484 (const_int 4) 10485 (const_int 6)]))) 10486 (sign_extend:V4SI 10487 (vec_select:V4HI 10488 (match_operand:V8HI 2 "nonimmediate_operand" "xm") 10489 (parallel [(const_int 0) 10490 (const_int 2) 10491 (const_int 4) 10492 (const_int 6)])))) 10493 (mult:V4SI 10494 (sign_extend:V4SI 10495 (vec_select:V4HI 10496 (match_dup 1) 10497 (parallel [(const_int 1) 10498 (const_int 3) 10499 (const_int 5) 10500 (const_int 7)]))) 10501 (sign_extend:V4SI 10502 (vec_select:V4HI 10503 (match_dup 2) 10504 (parallel [(const_int 1) 10505 (const_int 3) 10506 (const_int 5) 10507 (const_int 7)]))))) 10508 (match_operand:V4SI 3 "nonimmediate_operand" "x")))] 10509 "TARGET_XOP" 10510 "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}" 10511 [(set_attr "type" "ssemuladd") 10512 (set_attr "mode" "TI")]) 10513 10514(define_insn "xop_pmadcswd" 10515 [(set (match_operand:V4SI 0 "register_operand" "=x") 10516 (plus:V4SI 10517 (plus:V4SI 10518 (mult:V4SI 10519 (sign_extend:V4SI 10520 (vec_select:V4HI 10521 (match_operand:V8HI 1 "nonimmediate_operand" "%x") 10522 (parallel [(const_int 0) 10523 (const_int 2) 10524 (const_int 4) 10525 (const_int 6)]))) 10526 (sign_extend:V4SI 10527 (vec_select:V4HI 10528 (match_operand:V8HI 2 "nonimmediate_operand" "xm") 10529 (parallel [(const_int 0) 10530 (const_int 2) 10531 (const_int 4) 10532 (const_int 6)])))) 10533 (mult:V4SI 10534 (sign_extend:V4SI 10535 (vec_select:V4HI 10536 (match_dup 1) 10537 (parallel [(const_int 1) 10538 (const_int 3) 10539 (const_int 5) 10540 (const_int 7)]))) 10541 (sign_extend:V4SI 10542 (vec_select:V4HI 10543 (match_dup 2) 10544 (parallel [(const_int 1) 10545 (const_int 3) 10546 (const_int 5) 10547 (const_int 7)]))))) 10548 (match_operand:V4SI 3 "nonimmediate_operand" "x")))] 10549 "TARGET_XOP" 10550 "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}" 10551 [(set_attr "type" "ssemuladd") 10552 (set_attr "mode" "TI")]) 10553 10554;; XOP parallel XMM conditional moves 10555(define_insn "xop_pcmov_<mode><avxsizesuffix>" 10556 [(set (match_operand:V 0 "register_operand" "=x,x") 10557 (if_then_else:V 10558 (match_operand:V 3 "nonimmediate_operand" "x,m") 10559 (match_operand:V 1 "register_operand" "x,x") 10560 (match_operand:V 2 "nonimmediate_operand" "xm,x")))] 10561 "TARGET_XOP" 10562 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}" 10563 [(set_attr "type" "sse4arg")]) 10564 10565;; XOP horizontal add/subtract instructions 10566(define_insn "xop_phaddbw" 10567 [(set (match_operand:V8HI 0 "register_operand" "=x") 10568 (plus:V8HI 10569 (sign_extend:V8HI 10570 (vec_select:V8QI 10571 (match_operand:V16QI 1 "nonimmediate_operand" "xm") 10572 (parallel [(const_int 0) 10573 (const_int 2) 10574 (const_int 4) 10575 (const_int 6) 10576 (const_int 8) 10577 (const_int 10) 10578 (const_int 12) 10579 (const_int 14)]))) 10580 (sign_extend:V8HI 10581 (vec_select:V8QI 10582 (match_dup 1) 10583 (parallel [(const_int 1) 10584 (const_int 3) 10585 (const_int 5) 10586 (const_int 7) 10587 (const_int 9) 10588 (const_int 11) 10589 (const_int 13) 10590 (const_int 15)])))))] 10591 "TARGET_XOP" 10592 "vphaddbw\t{%1, %0|%0, %1}" 10593 [(set_attr "type" "sseiadd1")]) 10594 10595(define_insn "xop_phaddbd" 10596 [(set (match_operand:V4SI 0 "register_operand" "=x") 10597 (plus:V4SI 10598 (plus:V4SI 10599 (sign_extend:V4SI 10600 (vec_select:V4QI 10601 (match_operand:V16QI 1 "nonimmediate_operand" "xm") 10602 (parallel [(const_int 0) 10603 (const_int 4) 10604 (const_int 8) 10605 (const_int 12)]))) 10606 (sign_extend:V4SI 10607 (vec_select:V4QI 10608 (match_dup 1) 10609 (parallel [(const_int 1) 10610 (const_int 5) 10611 (const_int 9) 10612 (const_int 13)])))) 10613 (plus:V4SI 10614 (sign_extend:V4SI 10615 (vec_select:V4QI 10616 (match_dup 1) 10617 (parallel [(const_int 2) 10618 (const_int 6) 10619 (const_int 10) 10620 (const_int 14)]))) 10621 (sign_extend:V4SI 10622 (vec_select:V4QI 10623 (match_dup 1) 10624 (parallel [(const_int 3) 10625 (const_int 7) 10626 (const_int 11) 10627 (const_int 15)]))))))] 10628 "TARGET_XOP" 10629 "vphaddbd\t{%1, %0|%0, %1}" 10630 [(set_attr "type" "sseiadd1")]) 10631 10632(define_insn "xop_phaddbq" 10633 [(set (match_operand:V2DI 0 "register_operand" "=x") 10634 (plus:V2DI 10635 (plus:V2DI 10636 (plus:V2DI 10637 (sign_extend:V2DI 10638 (vec_select:V2QI 10639 (match_operand:V16QI 1 "nonimmediate_operand" "xm") 10640 (parallel [(const_int 0) 10641 (const_int 8)]))) 10642 (sign_extend:V2DI 10643 (vec_select:V2QI 10644 (match_dup 1) 10645 (parallel [(const_int 1) 10646 (const_int 9)])))) 10647 (plus:V2DI 10648 (sign_extend:V2DI 10649 (vec_select:V2QI 10650 (match_dup 1) 10651 (parallel [(const_int 2) 10652 (const_int 10)]))) 10653 (sign_extend:V2DI 10654 (vec_select:V2QI 10655 (match_dup 1) 10656 (parallel [(const_int 3) 10657 (const_int 11)]))))) 10658 (plus:V2DI 10659 (plus:V2DI 10660 (sign_extend:V2DI 10661 (vec_select:V2QI 10662 (match_dup 1) 10663 (parallel [(const_int 4) 10664 (const_int 12)]))) 10665 (sign_extend:V2DI 10666 (vec_select:V2QI 10667 (match_dup 1) 10668 (parallel [(const_int 5) 10669 (const_int 13)])))) 10670 (plus:V2DI 10671 (sign_extend:V2DI 10672 (vec_select:V2QI 10673 (match_dup 1) 10674 (parallel [(const_int 6) 10675 (const_int 14)]))) 10676 (sign_extend:V2DI 10677 (vec_select:V2QI 10678 (match_dup 1) 10679 (parallel [(const_int 7) 10680 (const_int 15)])))))))] 10681 "TARGET_XOP" 10682 "vphaddbq\t{%1, %0|%0, %1}" 10683 [(set_attr "type" "sseiadd1")]) 10684 10685(define_insn "xop_phaddwd" 10686 [(set (match_operand:V4SI 0 "register_operand" "=x") 10687 (plus:V4SI 10688 (sign_extend:V4SI 10689 (vec_select:V4HI 10690 (match_operand:V8HI 1 "nonimmediate_operand" "xm") 10691 (parallel [(const_int 0) 10692 (const_int 2) 10693 (const_int 4) 10694 (const_int 6)]))) 10695 (sign_extend:V4SI 10696 (vec_select:V4HI 10697 (match_dup 1) 10698 (parallel [(const_int 1) 10699 (const_int 3) 10700 (const_int 5) 10701 (const_int 7)])))))] 10702 "TARGET_XOP" 10703 "vphaddwd\t{%1, %0|%0, %1}" 10704 [(set_attr "type" "sseiadd1")]) 10705 10706(define_insn "xop_phaddwq" 10707 [(set (match_operand:V2DI 0 "register_operand" "=x") 10708 (plus:V2DI 10709 (plus:V2DI 10710 (sign_extend:V2DI 10711 (vec_select:V2HI 10712 (match_operand:V8HI 1 "nonimmediate_operand" "xm") 10713 (parallel [(const_int 0) 10714 (const_int 4)]))) 10715 (sign_extend:V2DI 10716 (vec_select:V2HI 10717 (match_dup 1) 10718 (parallel [(const_int 1) 10719 (const_int 5)])))) 10720 (plus:V2DI 10721 (sign_extend:V2DI 10722 (vec_select:V2HI 10723 (match_dup 1) 10724 (parallel [(const_int 2) 10725 (const_int 6)]))) 10726 (sign_extend:V2DI 10727 (vec_select:V2HI 10728 (match_dup 1) 10729 (parallel [(const_int 3) 10730 (const_int 7)]))))))] 10731 "TARGET_XOP" 10732 "vphaddwq\t{%1, %0|%0, %1}" 10733 [(set_attr "type" "sseiadd1")]) 10734 10735(define_insn "xop_phadddq" 10736 [(set (match_operand:V2DI 0 "register_operand" "=x") 10737 (plus:V2DI 10738 (sign_extend:V2DI 10739 (vec_select:V2SI 10740 (match_operand:V4SI 1 "nonimmediate_operand" "xm") 10741 (parallel [(const_int 0) 10742 (const_int 2)]))) 10743 (sign_extend:V2DI 10744 (vec_select:V2SI 10745 (match_dup 1) 10746 (parallel [(const_int 1) 10747 (const_int 3)])))))] 10748 "TARGET_XOP" 10749 "vphadddq\t{%1, %0|%0, %1}" 10750 [(set_attr "type" "sseiadd1")]) 10751 10752(define_insn "xop_phaddubw" 10753 [(set (match_operand:V8HI 0 "register_operand" "=x") 10754 (plus:V8HI 10755 (zero_extend:V8HI 10756 (vec_select:V8QI 10757 (match_operand:V16QI 1 "nonimmediate_operand" "xm") 10758 (parallel [(const_int 0) 10759 (const_int 2) 10760 (const_int 4) 10761 (const_int 6) 10762 (const_int 8) 10763 (const_int 10) 10764 (const_int 12) 10765 (const_int 14)]))) 10766 (zero_extend:V8HI 10767 (vec_select:V8QI 10768 (match_dup 1) 10769 (parallel [(const_int 1) 10770 (const_int 3) 10771 (const_int 5) 10772 (const_int 7) 10773 (const_int 9) 10774 (const_int 11) 10775 (const_int 13) 10776 (const_int 15)])))))] 10777 "TARGET_XOP" 10778 "vphaddubw\t{%1, %0|%0, %1}" 10779 [(set_attr "type" "sseiadd1")]) 10780 10781(define_insn "xop_phaddubd" 10782 [(set (match_operand:V4SI 0 "register_operand" "=x") 10783 (plus:V4SI 10784 (plus:V4SI 10785 (zero_extend:V4SI 10786 (vec_select:V4QI 10787 (match_operand:V16QI 1 "nonimmediate_operand" "xm") 10788 (parallel [(const_int 0) 10789 (const_int 4) 10790 (const_int 8) 10791 (const_int 12)]))) 10792 (zero_extend:V4SI 10793 (vec_select:V4QI 10794 (match_dup 1) 10795 (parallel [(const_int 1) 10796 (const_int 5) 10797 (const_int 9) 10798 (const_int 13)])))) 10799 (plus:V4SI 10800 (zero_extend:V4SI 10801 (vec_select:V4QI 10802 (match_dup 1) 10803 (parallel [(const_int 2) 10804 (const_int 6) 10805 (const_int 10) 10806 (const_int 14)]))) 10807 (zero_extend:V4SI 10808 (vec_select:V4QI 10809 (match_dup 1) 10810 (parallel [(const_int 3) 10811 (const_int 7) 10812 (const_int 11) 10813 (const_int 15)]))))))] 10814 "TARGET_XOP" 10815 "vphaddubd\t{%1, %0|%0, %1}" 10816 [(set_attr "type" "sseiadd1")]) 10817 10818(define_insn "xop_phaddubq" 10819 [(set (match_operand:V2DI 0 "register_operand" "=x") 10820 (plus:V2DI 10821 (plus:V2DI 10822 (plus:V2DI 10823 (zero_extend:V2DI 10824 (vec_select:V2QI 10825 (match_operand:V16QI 1 "nonimmediate_operand" "xm") 10826 (parallel [(const_int 0) 10827 (const_int 8)]))) 10828 (sign_extend:V2DI 10829 (vec_select:V2QI 10830 (match_dup 1) 10831 (parallel [(const_int 1) 10832 (const_int 9)])))) 10833 (plus:V2DI 10834 (zero_extend:V2DI 10835 (vec_select:V2QI 10836 (match_dup 1) 10837 (parallel [(const_int 2) 10838 (const_int 10)]))) 10839 (zero_extend:V2DI 10840 (vec_select:V2QI 10841 (match_dup 1) 10842 (parallel [(const_int 3) 10843 (const_int 11)]))))) 10844 (plus:V2DI 10845 (plus:V2DI 10846 (zero_extend:V2DI 10847 (vec_select:V2QI 10848 (match_dup 1) 10849 (parallel [(const_int 4) 10850 (const_int 12)]))) 10851 (sign_extend:V2DI 10852 (vec_select:V2QI 10853 (match_dup 1) 10854 (parallel [(const_int 5) 10855 (const_int 13)])))) 10856 (plus:V2DI 10857 (zero_extend:V2DI 10858 (vec_select:V2QI 10859 (match_dup 1) 10860 (parallel [(const_int 6) 10861 (const_int 14)]))) 10862 (zero_extend:V2DI 10863 (vec_select:V2QI 10864 (match_dup 1) 10865 (parallel [(const_int 7) 10866 (const_int 15)])))))))] 10867 "TARGET_XOP" 10868 "vphaddubq\t{%1, %0|%0, %1}" 10869 [(set_attr "type" "sseiadd1")]) 10870 10871(define_insn "xop_phadduwd" 10872 [(set (match_operand:V4SI 0 "register_operand" "=x") 10873 (plus:V4SI 10874 (zero_extend:V4SI 10875 (vec_select:V4HI 10876 (match_operand:V8HI 1 "nonimmediate_operand" "xm") 10877 (parallel [(const_int 0) 10878 (const_int 2) 10879 (const_int 4) 10880 (const_int 6)]))) 10881 (zero_extend:V4SI 10882 (vec_select:V4HI 10883 (match_dup 1) 10884 (parallel [(const_int 1) 10885 (const_int 3) 10886 (const_int 5) 10887 (const_int 7)])))))] 10888 "TARGET_XOP" 10889 "vphadduwd\t{%1, %0|%0, %1}" 10890 [(set_attr "type" "sseiadd1")]) 10891 10892(define_insn "xop_phadduwq" 10893 [(set (match_operand:V2DI 0 "register_operand" "=x") 10894 (plus:V2DI 10895 (plus:V2DI 10896 (zero_extend:V2DI 10897 (vec_select:V2HI 10898 (match_operand:V8HI 1 "nonimmediate_operand" "xm") 10899 (parallel [(const_int 0) 10900 (const_int 4)]))) 10901 (zero_extend:V2DI 10902 (vec_select:V2HI 10903 (match_dup 1) 10904 (parallel [(const_int 1) 10905 (const_int 5)])))) 10906 (plus:V2DI 10907 (zero_extend:V2DI 10908 (vec_select:V2HI 10909 (match_dup 1) 10910 (parallel [(const_int 2) 10911 (const_int 6)]))) 10912 (zero_extend:V2DI 10913 (vec_select:V2HI 10914 (match_dup 1) 10915 (parallel [(const_int 3) 10916 (const_int 7)]))))))] 10917 "TARGET_XOP" 10918 "vphadduwq\t{%1, %0|%0, %1}" 10919 [(set_attr "type" "sseiadd1")]) 10920 10921(define_insn "xop_phaddudq" 10922 [(set (match_operand:V2DI 0 "register_operand" "=x") 10923 (plus:V2DI 10924 (zero_extend:V2DI 10925 (vec_select:V2SI 10926 (match_operand:V4SI 1 "nonimmediate_operand" "xm") 10927 (parallel [(const_int 0) 10928 (const_int 2)]))) 10929 (zero_extend:V2DI 10930 (vec_select:V2SI 10931 (match_dup 1) 10932 (parallel [(const_int 1) 10933 (const_int 3)])))))] 10934 "TARGET_XOP" 10935 "vphaddudq\t{%1, %0|%0, %1}" 10936 [(set_attr "type" "sseiadd1")]) 10937 10938(define_insn "xop_phsubbw" 10939 [(set (match_operand:V8HI 0 "register_operand" "=x") 10940 (minus:V8HI 10941 (sign_extend:V8HI 10942 (vec_select:V8QI 10943 (match_operand:V16QI 1 "nonimmediate_operand" "xm") 10944 (parallel [(const_int 0) 10945 (const_int 2) 10946 (const_int 4) 10947 (const_int 6) 10948 (const_int 8) 10949 (const_int 10) 10950 (const_int 12) 10951 (const_int 14)]))) 10952 (sign_extend:V8HI 10953 (vec_select:V8QI 10954 (match_dup 1) 10955 (parallel [(const_int 1) 10956 (const_int 3) 10957 (const_int 5) 10958 (const_int 7) 10959 (const_int 9) 10960 (const_int 11) 10961 (const_int 13) 10962 (const_int 15)])))))] 10963 "TARGET_XOP" 10964 "vphsubbw\t{%1, %0|%0, %1}" 10965 [(set_attr "type" "sseiadd1")]) 10966 10967(define_insn "xop_phsubwd" 10968 [(set (match_operand:V4SI 0 "register_operand" "=x") 10969 (minus:V4SI 10970 (sign_extend:V4SI 10971 (vec_select:V4HI 10972 (match_operand:V8HI 1 "nonimmediate_operand" "xm") 10973 (parallel [(const_int 0) 10974 (const_int 2) 10975 (const_int 4) 10976 (const_int 6)]))) 10977 (sign_extend:V4SI 10978 (vec_select:V4HI 10979 (match_dup 1) 10980 (parallel [(const_int 1) 10981 (const_int 3) 10982 (const_int 5) 10983 (const_int 7)])))))] 10984 "TARGET_XOP" 10985 "vphsubwd\t{%1, %0|%0, %1}" 10986 [(set_attr "type" "sseiadd1")]) 10987 10988(define_insn "xop_phsubdq" 10989 [(set (match_operand:V2DI 0 "register_operand" "=x") 10990 (minus:V2DI 10991 (sign_extend:V2DI 10992 (vec_select:V2SI 10993 (match_operand:V4SI 1 "nonimmediate_operand" "xm") 10994 (parallel [(const_int 0) 10995 (const_int 2)]))) 10996 (sign_extend:V2DI 10997 (vec_select:V2SI 10998 (match_dup 1) 10999 (parallel [(const_int 1) 11000 (const_int 3)])))))] 11001 "TARGET_XOP" 11002 "vphsubdq\t{%1, %0|%0, %1}" 11003 [(set_attr "type" "sseiadd1")]) 11004 11005;; XOP permute instructions 11006(define_insn "xop_pperm" 11007 [(set (match_operand:V16QI 0 "register_operand" "=x,x") 11008 (unspec:V16QI 11009 [(match_operand:V16QI 1 "register_operand" "x,x") 11010 (match_operand:V16QI 2 "nonimmediate_operand" "x,m") 11011 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")] 11012 UNSPEC_XOP_PERMUTE))] 11013 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))" 11014 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" 11015 [(set_attr "type" "sse4arg") 11016 (set_attr "mode" "TI")]) 11017 11018;; XOP pack instructions that combine two vectors into a smaller vector 11019(define_insn "xop_pperm_pack_v2di_v4si" 11020 [(set (match_operand:V4SI 0 "register_operand" "=x,x") 11021 (vec_concat:V4SI 11022 (truncate:V2SI 11023 (match_operand:V2DI 1 "register_operand" "x,x")) 11024 (truncate:V2SI 11025 (match_operand:V2DI 2 "nonimmediate_operand" "x,m")))) 11026 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))] 11027 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))" 11028 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" 11029 [(set_attr "type" "sse4arg") 11030 (set_attr "mode" "TI")]) 11031 11032(define_insn "xop_pperm_pack_v4si_v8hi" 11033 [(set (match_operand:V8HI 0 "register_operand" "=x,x") 11034 (vec_concat:V8HI 11035 (truncate:V4HI 11036 (match_operand:V4SI 1 "register_operand" "x,x")) 11037 (truncate:V4HI 11038 (match_operand:V4SI 2 "nonimmediate_operand" "x,m")))) 11039 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))] 11040 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))" 11041 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" 11042 [(set_attr "type" "sse4arg") 11043 (set_attr "mode" "TI")]) 11044 11045(define_insn "xop_pperm_pack_v8hi_v16qi" 11046 [(set (match_operand:V16QI 0 "register_operand" "=x,x") 11047 (vec_concat:V16QI 11048 (truncate:V8QI 11049 (match_operand:V8HI 1 "register_operand" "x,x")) 11050 (truncate:V8QI 11051 (match_operand:V8HI 2 "nonimmediate_operand" "x,m")))) 11052 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))] 11053 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))" 11054 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" 11055 [(set_attr "type" "sse4arg") 11056 (set_attr "mode" "TI")]) 11057 11058;; XOP packed rotate instructions 11059(define_expand "rotl<mode>3" 11060 [(set (match_operand:VI_128 0 "register_operand" "") 11061 (rotate:VI_128 11062 (match_operand:VI_128 1 "nonimmediate_operand" "") 11063 (match_operand:SI 2 "general_operand")))] 11064 "TARGET_XOP" 11065{ 11066 /* If we were given a scalar, convert it to parallel */ 11067 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode)) 11068 { 11069 rtvec vs = rtvec_alloc (<ssescalarnum>); 11070 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs); 11071 rtx reg = gen_reg_rtx (<MODE>mode); 11072 rtx op2 = operands[2]; 11073 int i; 11074 11075 if (GET_MODE (op2) != <ssescalarmode>mode) 11076 { 11077 op2 = gen_reg_rtx (<ssescalarmode>mode); 11078 convert_move (op2, operands[2], false); 11079 } 11080 11081 for (i = 0; i < <ssescalarnum>; i++) 11082 RTVEC_ELT (vs, i) = op2; 11083 11084 emit_insn (gen_vec_init<mode> (reg, par)); 11085 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg)); 11086 DONE; 11087 } 11088}) 11089 11090(define_expand "rotr<mode>3" 11091 [(set (match_operand:VI_128 0 "register_operand" "") 11092 (rotatert:VI_128 11093 (match_operand:VI_128 1 "nonimmediate_operand" "") 11094 (match_operand:SI 2 "general_operand")))] 11095 "TARGET_XOP" 11096{ 11097 /* If we were given a scalar, convert it to parallel */ 11098 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode)) 11099 { 11100 rtvec vs = rtvec_alloc (<ssescalarnum>); 11101 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs); 11102 rtx neg = gen_reg_rtx (<MODE>mode); 11103 rtx reg = gen_reg_rtx (<MODE>mode); 11104 rtx op2 = operands[2]; 11105 int i; 11106 11107 if (GET_MODE (op2) != <ssescalarmode>mode) 11108 { 11109 op2 = gen_reg_rtx (<ssescalarmode>mode); 11110 convert_move (op2, operands[2], false); 11111 } 11112 11113 for (i = 0; i < <ssescalarnum>; i++) 11114 RTVEC_ELT (vs, i) = op2; 11115 11116 emit_insn (gen_vec_init<mode> (reg, par)); 11117 emit_insn (gen_neg<mode>2 (neg, reg)); 11118 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg)); 11119 DONE; 11120 } 11121}) 11122 11123(define_insn "xop_rotl<mode>3" 11124 [(set (match_operand:VI_128 0 "register_operand" "=x") 11125 (rotate:VI_128 11126 (match_operand:VI_128 1 "nonimmediate_operand" "xm") 11127 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))] 11128 "TARGET_XOP" 11129 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 11130 [(set_attr "type" "sseishft") 11131 (set_attr "length_immediate" "1") 11132 (set_attr "mode" "TI")]) 11133 11134(define_insn "xop_rotr<mode>3" 11135 [(set (match_operand:VI_128 0 "register_operand" "=x") 11136 (rotatert:VI_128 11137 (match_operand:VI_128 1 "nonimmediate_operand" "xm") 11138 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))] 11139 "TARGET_XOP" 11140{ 11141 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2])); 11142 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\"; 11143} 11144 [(set_attr "type" "sseishft") 11145 (set_attr "length_immediate" "1") 11146 (set_attr "mode" "TI")]) 11147 11148(define_expand "vrotr<mode>3" 11149 [(match_operand:VI_128 0 "register_operand" "") 11150 (match_operand:VI_128 1 "register_operand" "") 11151 (match_operand:VI_128 2 "register_operand" "")] 11152 "TARGET_XOP" 11153{ 11154 rtx reg = gen_reg_rtx (<MODE>mode); 11155 emit_insn (gen_neg<mode>2 (reg, operands[2])); 11156 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg)); 11157 DONE; 11158}) 11159 11160(define_expand "vrotl<mode>3" 11161 [(match_operand:VI_128 0 "register_operand" "") 11162 (match_operand:VI_128 1 "register_operand" "") 11163 (match_operand:VI_128 2 "register_operand" "")] 11164 "TARGET_XOP" 11165{ 11166 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2])); 11167 DONE; 11168}) 11169 11170(define_insn "xop_vrotl<mode>3" 11171 [(set (match_operand:VI_128 0 "register_operand" "=x,x") 11172 (if_then_else:VI_128 11173 (ge:VI_128 11174 (match_operand:VI_128 2 "nonimmediate_operand" "x,m") 11175 (const_int 0)) 11176 (rotate:VI_128 11177 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x") 11178 (match_dup 2)) 11179 (rotatert:VI_128 11180 (match_dup 1) 11181 (neg:VI_128 (match_dup 2)))))] 11182 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))" 11183 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 11184 [(set_attr "type" "sseishft") 11185 (set_attr "prefix_data16" "0") 11186 (set_attr "prefix_extra" "2") 11187 (set_attr "mode" "TI")]) 11188 11189;; XOP packed shift instructions. 11190(define_expand "vlshr<mode>3" 11191 [(set (match_operand:VI12_128 0 "register_operand" "") 11192 (lshiftrt:VI12_128 11193 (match_operand:VI12_128 1 "register_operand" "") 11194 (match_operand:VI12_128 2 "nonimmediate_operand" "")))] 11195 "TARGET_XOP" 11196{ 11197 rtx neg = gen_reg_rtx (<MODE>mode); 11198 emit_insn (gen_neg<mode>2 (neg, operands[2])); 11199 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg)); 11200 DONE; 11201}) 11202 11203(define_expand "vlshr<mode>3" 11204 [(set (match_operand:VI48_128 0 "register_operand" "") 11205 (lshiftrt:VI48_128 11206 (match_operand:VI48_128 1 "register_operand" "") 11207 (match_operand:VI48_128 2 "nonimmediate_operand" "")))] 11208 "TARGET_AVX2 || TARGET_XOP" 11209{ 11210 if (!TARGET_AVX2) 11211 { 11212 rtx neg = gen_reg_rtx (<MODE>mode); 11213 emit_insn (gen_neg<mode>2 (neg, operands[2])); 11214 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg)); 11215 DONE; 11216 } 11217}) 11218 11219(define_expand "vlshr<mode>3" 11220 [(set (match_operand:VI48_256 0 "register_operand" "") 11221 (lshiftrt:VI48_256 11222 (match_operand:VI48_256 1 "register_operand" "") 11223 (match_operand:VI48_256 2 "nonimmediate_operand" "")))] 11224 "TARGET_AVX2") 11225 11226(define_expand "vashr<mode>3" 11227 [(set (match_operand:VI128_128 0 "register_operand" "") 11228 (ashiftrt:VI128_128 11229 (match_operand:VI128_128 1 "register_operand" "") 11230 (match_operand:VI128_128 2 "nonimmediate_operand" "")))] 11231 "TARGET_XOP" 11232{ 11233 rtx neg = gen_reg_rtx (<MODE>mode); 11234 emit_insn (gen_neg<mode>2 (neg, operands[2])); 11235 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], neg)); 11236 DONE; 11237}) 11238 11239(define_expand "vashrv4si3" 11240 [(set (match_operand:V4SI 0 "register_operand" "") 11241 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand" "") 11242 (match_operand:V4SI 2 "nonimmediate_operand" "")))] 11243 "TARGET_AVX2 || TARGET_XOP" 11244{ 11245 if (!TARGET_AVX2) 11246 { 11247 rtx neg = gen_reg_rtx (V4SImode); 11248 emit_insn (gen_negv4si2 (neg, operands[2])); 11249 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg)); 11250 DONE; 11251 } 11252}) 11253 11254(define_expand "vashrv8si3" 11255 [(set (match_operand:V8SI 0 "register_operand" "") 11256 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand" "") 11257 (match_operand:V8SI 2 "nonimmediate_operand" "")))] 11258 "TARGET_AVX2") 11259 11260(define_expand "vashl<mode>3" 11261 [(set (match_operand:VI12_128 0 "register_operand" "") 11262 (ashift:VI12_128 11263 (match_operand:VI12_128 1 "register_operand" "") 11264 (match_operand:VI12_128 2 "nonimmediate_operand" "")))] 11265 "TARGET_XOP" 11266{ 11267 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2])); 11268 DONE; 11269}) 11270 11271(define_expand "vashl<mode>3" 11272 [(set (match_operand:VI48_128 0 "register_operand" "") 11273 (ashift:VI48_128 11274 (match_operand:VI48_128 1 "register_operand" "") 11275 (match_operand:VI48_128 2 "nonimmediate_operand" "")))] 11276 "TARGET_AVX2 || TARGET_XOP" 11277{ 11278 if (!TARGET_AVX2) 11279 { 11280 operands[2] = force_reg (<MODE>mode, operands[2]); 11281 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2])); 11282 DONE; 11283 } 11284}) 11285 11286(define_expand "vashl<mode>3" 11287 [(set (match_operand:VI48_256 0 "register_operand" "") 11288 (ashift:VI48_256 11289 (match_operand:VI48_256 1 "register_operand" "") 11290 (match_operand:VI48_256 2 "nonimmediate_operand" "")))] 11291 "TARGET_AVX2") 11292 11293(define_insn "xop_sha<mode>3" 11294 [(set (match_operand:VI_128 0 "register_operand" "=x,x") 11295 (if_then_else:VI_128 11296 (ge:VI_128 11297 (match_operand:VI_128 2 "nonimmediate_operand" "x,m") 11298 (const_int 0)) 11299 (ashift:VI_128 11300 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x") 11301 (match_dup 2)) 11302 (ashiftrt:VI_128 11303 (match_dup 1) 11304 (neg:VI_128 (match_dup 2)))))] 11305 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))" 11306 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 11307 [(set_attr "type" "sseishft") 11308 (set_attr "prefix_data16" "0") 11309 (set_attr "prefix_extra" "2") 11310 (set_attr "mode" "TI")]) 11311 11312(define_insn "xop_shl<mode>3" 11313 [(set (match_operand:VI_128 0 "register_operand" "=x,x") 11314 (if_then_else:VI_128 11315 (ge:VI_128 11316 (match_operand:VI_128 2 "nonimmediate_operand" "x,m") 11317 (const_int 0)) 11318 (ashift:VI_128 11319 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x") 11320 (match_dup 2)) 11321 (lshiftrt:VI_128 11322 (match_dup 1) 11323 (neg:VI_128 (match_dup 2)))))] 11324 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))" 11325 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 11326 [(set_attr "type" "sseishft") 11327 (set_attr "prefix_data16" "0") 11328 (set_attr "prefix_extra" "2") 11329 (set_attr "mode" "TI")]) 11330 11331;; SSE2 doesn't have some shift variants, so define versions for XOP 11332(define_expand "ashlv16qi3" 11333 [(set (match_operand:V16QI 0 "register_operand" "") 11334 (ashift:V16QI 11335 (match_operand:V16QI 1 "register_operand" "") 11336 (match_operand:SI 2 "nonmemory_operand" "")))] 11337 "TARGET_XOP" 11338{ 11339 rtx reg = gen_reg_rtx (V16QImode); 11340 rtx par; 11341 int i; 11342 11343 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16)); 11344 for (i = 0; i < 16; i++) 11345 XVECEXP (par, 0, i) = operands[2]; 11346 11347 emit_insn (gen_vec_initv16qi (reg, par)); 11348 emit_insn (gen_xop_shav16qi3 (operands[0], operands[1], reg)); 11349 DONE; 11350}) 11351 11352(define_expand "<shift_insn>v16qi3" 11353 [(set (match_operand:V16QI 0 "register_operand" "") 11354 (any_shiftrt:V16QI 11355 (match_operand:V16QI 1 "register_operand" "") 11356 (match_operand:SI 2 "nonmemory_operand" "")))] 11357 "TARGET_XOP" 11358{ 11359 rtx reg = gen_reg_rtx (V16QImode); 11360 rtx par; 11361 bool negate = false; 11362 rtx (*shift_insn)(rtx, rtx, rtx); 11363 int i; 11364 11365 if (CONST_INT_P (operands[2])) 11366 operands[2] = GEN_INT (-INTVAL (operands[2])); 11367 else 11368 negate = true; 11369 11370 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16)); 11371 for (i = 0; i < 16; i++) 11372 XVECEXP (par, 0, i) = operands[2]; 11373 11374 emit_insn (gen_vec_initv16qi (reg, par)); 11375 11376 if (negate) 11377 emit_insn (gen_negv16qi2 (reg, reg)); 11378 11379 if (<CODE> == LSHIFTRT) 11380 shift_insn = gen_xop_shlv16qi3; 11381 else 11382 shift_insn = gen_xop_shav16qi3; 11383 11384 emit_insn (shift_insn (operands[0], operands[1], reg)); 11385 DONE; 11386}) 11387 11388(define_expand "ashrv2di3" 11389 [(set (match_operand:V2DI 0 "register_operand" "") 11390 (ashiftrt:V2DI 11391 (match_operand:V2DI 1 "register_operand" "") 11392 (match_operand:DI 2 "nonmemory_operand" "")))] 11393 "TARGET_XOP" 11394{ 11395 rtx reg = gen_reg_rtx (V2DImode); 11396 rtx par; 11397 bool negate = false; 11398 int i; 11399 11400 if (CONST_INT_P (operands[2])) 11401 operands[2] = GEN_INT (-INTVAL (operands[2])); 11402 else 11403 negate = true; 11404 11405 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2)); 11406 for (i = 0; i < 2; i++) 11407 XVECEXP (par, 0, i) = operands[2]; 11408 11409 emit_insn (gen_vec_initv2di (reg, par)); 11410 11411 if (negate) 11412 emit_insn (gen_negv2di2 (reg, reg)); 11413 11414 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg)); 11415 DONE; 11416}) 11417 11418;; XOP FRCZ support 11419(define_insn "xop_frcz<mode>2" 11420 [(set (match_operand:FMAMODE 0 "register_operand" "=x") 11421 (unspec:FMAMODE 11422 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")] 11423 UNSPEC_FRCZ))] 11424 "TARGET_XOP" 11425 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}" 11426 [(set_attr "type" "ssecvt1") 11427 (set_attr "mode" "<MODE>")]) 11428 11429;; scalar insns 11430(define_expand "xop_vmfrcz<mode>2" 11431 [(set (match_operand:VF_128 0 "register_operand") 11432 (vec_merge:VF_128 11433 (unspec:VF_128 11434 [(match_operand:VF_128 1 "nonimmediate_operand")] 11435 UNSPEC_FRCZ) 11436 (match_dup 3) 11437 (const_int 1)))] 11438 "TARGET_XOP" 11439{ 11440 operands[3] = CONST0_RTX (<MODE>mode); 11441}) 11442 11443(define_insn "*xop_vmfrcz_<mode>" 11444 [(set (match_operand:VF_128 0 "register_operand" "=x") 11445 (vec_merge:VF_128 11446 (unspec:VF_128 11447 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")] 11448 UNSPEC_FRCZ) 11449 (match_operand:VF_128 2 "const0_operand") 11450 (const_int 1)))] 11451 "TARGET_XOP" 11452 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %1}" 11453 [(set_attr "type" "ssecvt1") 11454 (set_attr "mode" "<MODE>")]) 11455 11456(define_insn "xop_maskcmp<mode>3" 11457 [(set (match_operand:VI_128 0 "register_operand" "=x") 11458 (match_operator:VI_128 1 "ix86_comparison_int_operator" 11459 [(match_operand:VI_128 2 "register_operand" "x") 11460 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))] 11461 "TARGET_XOP" 11462 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}" 11463 [(set_attr "type" "sse4arg") 11464 (set_attr "prefix_data16" "0") 11465 (set_attr "prefix_rep" "0") 11466 (set_attr "prefix_extra" "2") 11467 (set_attr "length_immediate" "1") 11468 (set_attr "mode" "TI")]) 11469 11470(define_insn "xop_maskcmp_uns<mode>3" 11471 [(set (match_operand:VI_128 0 "register_operand" "=x") 11472 (match_operator:VI_128 1 "ix86_comparison_uns_operator" 11473 [(match_operand:VI_128 2 "register_operand" "x") 11474 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))] 11475 "TARGET_XOP" 11476 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}" 11477 [(set_attr "type" "ssecmp") 11478 (set_attr "prefix_data16" "0") 11479 (set_attr "prefix_rep" "0") 11480 (set_attr "prefix_extra" "2") 11481 (set_attr "length_immediate" "1") 11482 (set_attr "mode" "TI")]) 11483 11484;; Version of pcom*u* that is called from the intrinsics that allows pcomequ* 11485;; and pcomneu* not to be converted to the signed ones in case somebody needs 11486;; the exact instruction generated for the intrinsic. 11487(define_insn "xop_maskcmp_uns2<mode>3" 11488 [(set (match_operand:VI_128 0 "register_operand" "=x") 11489 (unspec:VI_128 11490 [(match_operator:VI_128 1 "ix86_comparison_uns_operator" 11491 [(match_operand:VI_128 2 "register_operand" "x") 11492 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])] 11493 UNSPEC_XOP_UNSIGNED_CMP))] 11494 "TARGET_XOP" 11495 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}" 11496 [(set_attr "type" "ssecmp") 11497 (set_attr "prefix_data16" "0") 11498 (set_attr "prefix_extra" "2") 11499 (set_attr "length_immediate" "1") 11500 (set_attr "mode" "TI")]) 11501 11502;; Pcomtrue and pcomfalse support. These are useless instructions, but are 11503;; being added here to be complete. 11504(define_insn "xop_pcom_tf<mode>3" 11505 [(set (match_operand:VI_128 0 "register_operand" "=x") 11506 (unspec:VI_128 11507 [(match_operand:VI_128 1 "register_operand" "x") 11508 (match_operand:VI_128 2 "nonimmediate_operand" "xm") 11509 (match_operand:SI 3 "const_int_operand" "n")] 11510 UNSPEC_XOP_TRUEFALSE))] 11511 "TARGET_XOP" 11512{ 11513 return ((INTVAL (operands[3]) != 0) 11514 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 11515 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"); 11516} 11517 [(set_attr "type" "ssecmp") 11518 (set_attr "prefix_data16" "0") 11519 (set_attr "prefix_extra" "2") 11520 (set_attr "length_immediate" "1") 11521 (set_attr "mode" "TI")]) 11522 11523(define_insn "xop_vpermil2<mode>3" 11524 [(set (match_operand:VF 0 "register_operand" "=x") 11525 (unspec:VF 11526 [(match_operand:VF 1 "register_operand" "x") 11527 (match_operand:VF 2 "nonimmediate_operand" "%x") 11528 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm") 11529 (match_operand:SI 4 "const_0_to_3_operand" "n")] 11530 UNSPEC_VPERMIL2))] 11531 "TARGET_XOP" 11532 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}" 11533 [(set_attr "type" "sse4arg") 11534 (set_attr "length_immediate" "1") 11535 (set_attr "mode" "<MODE>")]) 11536 11537;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 11538 11539(define_insn "aesenc" 11540 [(set (match_operand:V2DI 0 "register_operand" "=x,x") 11541 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x") 11542 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")] 11543 UNSPEC_AESENC))] 11544 "TARGET_AES" 11545 "@ 11546 aesenc\t{%2, %0|%0, %2} 11547 vaesenc\t{%2, %1, %0|%0, %1, %2}" 11548 [(set_attr "isa" "noavx,avx") 11549 (set_attr "type" "sselog1") 11550 (set_attr "prefix_extra" "1") 11551 (set_attr "prefix" "orig,vex") 11552 (set_attr "mode" "TI")]) 11553 11554(define_insn "aesenclast" 11555 [(set (match_operand:V2DI 0 "register_operand" "=x,x") 11556 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x") 11557 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")] 11558 UNSPEC_AESENCLAST))] 11559 "TARGET_AES" 11560 "@ 11561 aesenclast\t{%2, %0|%0, %2} 11562 vaesenclast\t{%2, %1, %0|%0, %1, %2}" 11563 [(set_attr "isa" "noavx,avx") 11564 (set_attr "type" "sselog1") 11565 (set_attr "prefix_extra" "1") 11566 (set_attr "prefix" "orig,vex") 11567 (set_attr "mode" "TI")]) 11568 11569(define_insn "aesdec" 11570 [(set (match_operand:V2DI 0 "register_operand" "=x,x") 11571 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x") 11572 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")] 11573 UNSPEC_AESDEC))] 11574 "TARGET_AES" 11575 "@ 11576 aesdec\t{%2, %0|%0, %2} 11577 vaesdec\t{%2, %1, %0|%0, %1, %2}" 11578 [(set_attr "isa" "noavx,avx") 11579 (set_attr "type" "sselog1") 11580 (set_attr "prefix_extra" "1") 11581 (set_attr "prefix" "orig,vex") 11582 (set_attr "mode" "TI")]) 11583 11584(define_insn "aesdeclast" 11585 [(set (match_operand:V2DI 0 "register_operand" "=x,x") 11586 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x") 11587 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")] 11588 UNSPEC_AESDECLAST))] 11589 "TARGET_AES" 11590 "@ 11591 aesdeclast\t{%2, %0|%0, %2} 11592 vaesdeclast\t{%2, %1, %0|%0, %1, %2}" 11593 [(set_attr "isa" "noavx,avx") 11594 (set_attr "type" "sselog1") 11595 (set_attr "prefix_extra" "1") 11596 (set_attr "prefix" "orig,vex") 11597 (set_attr "mode" "TI")]) 11598 11599(define_insn "aesimc" 11600 [(set (match_operand:V2DI 0 "register_operand" "=x") 11601 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")] 11602 UNSPEC_AESIMC))] 11603 "TARGET_AES" 11604 "%vaesimc\t{%1, %0|%0, %1}" 11605 [(set_attr "type" "sselog1") 11606 (set_attr "prefix_extra" "1") 11607 (set_attr "prefix" "maybe_vex") 11608 (set_attr "mode" "TI")]) 11609 11610(define_insn "aeskeygenassist" 11611 [(set (match_operand:V2DI 0 "register_operand" "=x") 11612 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm") 11613 (match_operand:SI 2 "const_0_to_255_operand" "n")] 11614 UNSPEC_AESKEYGENASSIST))] 11615 "TARGET_AES" 11616 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}" 11617 [(set_attr "type" "sselog1") 11618 (set_attr "prefix_extra" "1") 11619 (set_attr "length_immediate" "1") 11620 (set_attr "prefix" "maybe_vex") 11621 (set_attr "mode" "TI")]) 11622 11623(define_insn "pclmulqdq" 11624 [(set (match_operand:V2DI 0 "register_operand" "=x,x") 11625 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x") 11626 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm") 11627 (match_operand:SI 3 "const_0_to_255_operand" "n,n")] 11628 UNSPEC_PCLMUL))] 11629 "TARGET_PCLMUL" 11630 "@ 11631 pclmulqdq\t{%3, %2, %0|%0, %2, %3} 11632 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}" 11633 [(set_attr "isa" "noavx,avx") 11634 (set_attr "type" "sselog1") 11635 (set_attr "prefix_extra" "1") 11636 (set_attr "length_immediate" "1") 11637 (set_attr "prefix" "orig,vex") 11638 (set_attr "mode" "TI")]) 11639 11640(define_expand "avx_vzeroall" 11641 [(match_par_dup 0 [(const_int 0)])] 11642 "TARGET_AVX" 11643{ 11644 int nregs = TARGET_64BIT ? 16 : 8; 11645 int regno; 11646 11647 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1)); 11648 11649 XVECEXP (operands[0], 0, 0) 11650 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx), 11651 UNSPECV_VZEROALL); 11652 11653 for (regno = 0; regno < nregs; regno++) 11654 XVECEXP (operands[0], 0, regno + 1) 11655 = gen_rtx_SET (VOIDmode, 11656 gen_rtx_REG (V8SImode, SSE_REGNO (regno)), 11657 CONST0_RTX (V8SImode)); 11658}) 11659 11660(define_insn "*avx_vzeroall" 11661 [(match_parallel 0 "vzeroall_operation" 11662 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])] 11663 "TARGET_AVX" 11664 "vzeroall" 11665 [(set_attr "type" "sse") 11666 (set_attr "modrm" "0") 11667 (set_attr "memory" "none") 11668 (set_attr "prefix" "vex") 11669 (set_attr "mode" "OI")]) 11670 11671;; Clear the upper 128bits of AVX registers, equivalent to a NOP 11672;; if the upper 128bits are unused. 11673(define_insn "avx_vzeroupper" 11674 [(unspec_volatile [(match_operand 0 "const_int_operand" "")] 11675 UNSPECV_VZEROUPPER)] 11676 "TARGET_AVX" 11677 "vzeroupper" 11678 [(set_attr "type" "sse") 11679 (set_attr "modrm" "0") 11680 (set_attr "memory" "none") 11681 (set_attr "prefix" "vex") 11682 (set_attr "mode" "OI")]) 11683 11684(define_mode_attr AVXTOSSEMODE 11685 [(V4DI "V2DI") (V2DI "V2DI") 11686 (V8SI "V4SI") (V4SI "V4SI") 11687 (V16HI "V8HI") (V8HI "V8HI") 11688 (V32QI "V16QI") (V16QI "V16QI")]) 11689 11690(define_insn "avx2_pbroadcast<mode>" 11691 [(set (match_operand:VI 0 "register_operand" "=x") 11692 (vec_duplicate:VI 11693 (vec_select:<ssescalarmode> 11694 (match_operand:<AVXTOSSEMODE> 1 "nonimmediate_operand" "xm") 11695 (parallel [(const_int 0)]))))] 11696 "TARGET_AVX2" 11697 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %1}" 11698 [(set_attr "type" "ssemov") 11699 (set_attr "prefix_extra" "1") 11700 (set_attr "prefix" "vex") 11701 (set_attr "mode" "<sseinsnmode>")]) 11702 11703(define_insn "avx2_permvarv8si" 11704 [(set (match_operand:V8SI 0 "register_operand" "=x") 11705 (unspec:V8SI 11706 [(match_operand:V8SI 1 "nonimmediate_operand" "xm") 11707 (match_operand:V8SI 2 "register_operand" "x")] 11708 UNSPEC_VPERMSI))] 11709 "TARGET_AVX2" 11710 "vpermd\t{%1, %2, %0|%0, %2, %1}" 11711 [(set_attr "type" "sselog") 11712 (set_attr "prefix" "vex") 11713 (set_attr "mode" "OI")]) 11714 11715(define_insn "avx2_permv4df" 11716 [(set (match_operand:V4DF 0 "register_operand" "=x") 11717 (unspec:V4DF 11718 [(match_operand:V4DF 1 "register_operand" "xm") 11719 (match_operand:SI 2 "const_0_to_255_operand" "n")] 11720 UNSPEC_VPERMDF))] 11721 "TARGET_AVX2" 11722 "vpermpd\t{%2, %1, %0|%0, %1, %2}" 11723 [(set_attr "type" "sselog") 11724 (set_attr "prefix_extra" "1") 11725 (set_attr "prefix" "vex") 11726 (set_attr "mode" "OI")]) 11727 11728(define_insn "avx2_permvarv8sf" 11729 [(set (match_operand:V8SF 0 "register_operand" "=x") 11730 (unspec:V8SF 11731 [(match_operand:V8SF 1 "nonimmediate_operand" "xm") 11732 (match_operand:V8SI 2 "register_operand" "x")] 11733 UNSPEC_VPERMSF))] 11734 "TARGET_AVX2" 11735 "vpermps\t{%1, %2, %0|%0, %2, %1}" 11736 [(set_attr "type" "sselog") 11737 (set_attr "prefix" "vex") 11738 (set_attr "mode" "OI")]) 11739 11740(define_expand "avx2_permv4di" 11741 [(match_operand:V4DI 0 "register_operand" "") 11742 (match_operand:V4DI 1 "nonimmediate_operand" "") 11743 (match_operand:SI 2 "const_0_to_255_operand" "")] 11744 "TARGET_AVX2" 11745{ 11746 int mask = INTVAL (operands[2]); 11747 emit_insn (gen_avx2_permv4di_1 (operands[0], operands[1], 11748 GEN_INT ((mask >> 0) & 3), 11749 GEN_INT ((mask >> 2) & 3), 11750 GEN_INT ((mask >> 4) & 3), 11751 GEN_INT ((mask >> 6) & 3))); 11752 DONE; 11753}) 11754 11755(define_insn "avx2_permv4di_1" 11756 [(set (match_operand:V4DI 0 "register_operand" "=x") 11757 (vec_select:V4DI 11758 (match_operand:V4DI 1 "nonimmediate_operand" "xm") 11759 (parallel [(match_operand 2 "const_0_to_3_operand" "") 11760 (match_operand 3 "const_0_to_3_operand" "") 11761 (match_operand 4 "const_0_to_3_operand" "") 11762 (match_operand 5 "const_0_to_3_operand" "")])))] 11763 "TARGET_AVX2" 11764{ 11765 int mask = 0; 11766 mask |= INTVAL (operands[2]) << 0; 11767 mask |= INTVAL (operands[3]) << 2; 11768 mask |= INTVAL (operands[4]) << 4; 11769 mask |= INTVAL (operands[5]) << 6; 11770 operands[2] = GEN_INT (mask); 11771 return "vpermq\t{%2, %1, %0|%0, %1, %2}"; 11772} 11773 [(set_attr "type" "sselog") 11774 (set_attr "prefix" "vex") 11775 (set_attr "mode" "OI")]) 11776 11777(define_insn "avx2_permv2ti" 11778 [(set (match_operand:V4DI 0 "register_operand" "=x") 11779 (unspec:V4DI 11780 [(match_operand:V4DI 1 "register_operand" "x") 11781 (match_operand:V4DI 2 "nonimmediate_operand" "xm") 11782 (match_operand:SI 3 "const_0_to_255_operand" "n")] 11783 UNSPEC_VPERMTI))] 11784 "TARGET_AVX2" 11785 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}" 11786 [(set_attr "type" "sselog") 11787 (set_attr "prefix" "vex") 11788 (set_attr "mode" "OI")]) 11789 11790(define_insn "avx2_vec_dupv4df" 11791 [(set (match_operand:V4DF 0 "register_operand" "=x") 11792 (vec_duplicate:V4DF 11793 (vec_select:DF 11794 (match_operand:V2DF 1 "register_operand" "x") 11795 (parallel [(const_int 0)]))))] 11796 "TARGET_AVX2" 11797 "vbroadcastsd\t{%1, %0|%0, %1}" 11798 [(set_attr "type" "sselog1") 11799 (set_attr "prefix" "vex") 11800 (set_attr "mode" "V4DF")]) 11801 11802;; Modes handled by AVX vec_dup patterns. 11803(define_mode_iterator AVX_VEC_DUP_MODE 11804 [V8SI V8SF V4DI V4DF]) 11805 11806(define_insn "vec_dup<mode>" 11807 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x") 11808 (vec_duplicate:AVX_VEC_DUP_MODE 11809 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,?x")))] 11810 "TARGET_AVX" 11811 "@ 11812 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1} 11813 #" 11814 [(set_attr "type" "ssemov") 11815 (set_attr "prefix_extra" "1") 11816 (set_attr "prefix" "vex") 11817 (set_attr "mode" "V8SF")]) 11818 11819(define_insn "avx2_vbroadcasti128_<mode>" 11820 [(set (match_operand:VI_256 0 "register_operand" "=x") 11821 (vec_concat:VI_256 11822 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m") 11823 (match_dup 1)))] 11824 "TARGET_AVX2" 11825 "vbroadcasti128\t{%1, %0|%0, %1}" 11826 [(set_attr "type" "ssemov") 11827 (set_attr "prefix_extra" "1") 11828 (set_attr "prefix" "vex") 11829 (set_attr "mode" "OI")]) 11830 11831(define_split 11832 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "") 11833 (vec_duplicate:AVX_VEC_DUP_MODE 11834 (match_operand:<ssescalarmode> 1 "register_operand" "")))] 11835 "TARGET_AVX && reload_completed" 11836 [(set (match_dup 2) 11837 (vec_duplicate:<ssehalfvecmode> (match_dup 1))) 11838 (set (match_dup 0) 11839 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))] 11840 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));") 11841 11842(define_insn "avx_vbroadcastf128_<mode>" 11843 [(set (match_operand:V_256 0 "register_operand" "=x,x,x") 11844 (vec_concat:V_256 11845 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x") 11846 (match_dup 1)))] 11847 "TARGET_AVX" 11848 "@ 11849 vbroadcast<i128>\t{%1, %0|%0, %1} 11850 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1} 11851 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}" 11852 [(set_attr "type" "ssemov,sselog1,sselog1") 11853 (set_attr "prefix_extra" "1") 11854 (set_attr "length_immediate" "0,1,1") 11855 (set_attr "prefix" "vex") 11856 (set_attr "mode" "<sseinsnmode>")]) 11857 11858;; Recognize broadcast as a vec_select as produced by builtin_vec_perm. 11859;; If it so happens that the input is in memory, use vbroadcast. 11860;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128). 11861(define_insn "*avx_vperm_broadcast_v4sf" 11862 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x") 11863 (vec_select:V4SF 11864 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x") 11865 (match_parallel 2 "avx_vbroadcast_operand" 11866 [(match_operand 3 "const_int_operand" "C,n,n")])))] 11867 "TARGET_AVX" 11868{ 11869 int elt = INTVAL (operands[3]); 11870 switch (which_alternative) 11871 { 11872 case 0: 11873 case 1: 11874 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4); 11875 return "vbroadcastss\t{%1, %0|%0, %1}"; 11876 case 2: 11877 operands[2] = GEN_INT (elt * 0x55); 11878 return "vpermilps\t{%2, %1, %0|%0, %1, %2}"; 11879 default: 11880 gcc_unreachable (); 11881 } 11882} 11883 [(set_attr "type" "ssemov,ssemov,sselog1") 11884 (set_attr "prefix_extra" "1") 11885 (set_attr "length_immediate" "0,0,1") 11886 (set_attr "prefix" "vex") 11887 (set_attr "mode" "SF,SF,V4SF")]) 11888 11889(define_insn_and_split "*avx_vperm_broadcast_<mode>" 11890 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x") 11891 (vec_select:VF_256 11892 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x") 11893 (match_parallel 2 "avx_vbroadcast_operand" 11894 [(match_operand 3 "const_int_operand" "C,n,n")])))] 11895 "TARGET_AVX" 11896 "#" 11897 "&& reload_completed" 11898 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))] 11899{ 11900 rtx op0 = operands[0], op1 = operands[1]; 11901 int elt = INTVAL (operands[3]); 11902 11903 if (REG_P (op1)) 11904 { 11905 int mask; 11906 11907 /* Shuffle element we care about into all elements of the 128-bit lane. 11908 The other lane gets shuffled too, but we don't care. */ 11909 if (<MODE>mode == V4DFmode) 11910 mask = (elt & 1 ? 15 : 0); 11911 else 11912 mask = (elt & 3) * 0x55; 11913 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask))); 11914 11915 /* Shuffle the lane we care about into both lanes of the dest. */ 11916 mask = (elt / (<ssescalarnum> / 2)) * 0x11; 11917 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask))); 11918 DONE; 11919 } 11920 11921 operands[1] = adjust_address_nv (op1, <ssescalarmode>mode, 11922 elt * GET_MODE_SIZE (<ssescalarmode>mode)); 11923}) 11924 11925(define_expand "avx_vpermil<mode>" 11926 [(set (match_operand:VF2 0 "register_operand" "") 11927 (vec_select:VF2 11928 (match_operand:VF2 1 "nonimmediate_operand" "") 11929 (match_operand:SI 2 "const_0_to_255_operand" "")))] 11930 "TARGET_AVX" 11931{ 11932 int mask = INTVAL (operands[2]); 11933 rtx perm[<ssescalarnum>]; 11934 11935 perm[0] = GEN_INT (mask & 1); 11936 perm[1] = GEN_INT ((mask >> 1) & 1); 11937 if (<MODE>mode == V4DFmode) 11938 { 11939 perm[2] = GEN_INT (((mask >> 2) & 1) + 2); 11940 perm[3] = GEN_INT (((mask >> 3) & 1) + 2); 11941 } 11942 11943 operands[2] 11944 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm)); 11945}) 11946 11947(define_expand "avx_vpermil<mode>" 11948 [(set (match_operand:VF1 0 "register_operand" "") 11949 (vec_select:VF1 11950 (match_operand:VF1 1 "nonimmediate_operand" "") 11951 (match_operand:SI 2 "const_0_to_255_operand" "")))] 11952 "TARGET_AVX" 11953{ 11954 int mask = INTVAL (operands[2]); 11955 rtx perm[<ssescalarnum>]; 11956 11957 perm[0] = GEN_INT (mask & 3); 11958 perm[1] = GEN_INT ((mask >> 2) & 3); 11959 perm[2] = GEN_INT ((mask >> 4) & 3); 11960 perm[3] = GEN_INT ((mask >> 6) & 3); 11961 if (<MODE>mode == V8SFmode) 11962 { 11963 perm[4] = GEN_INT ((mask & 3) + 4); 11964 perm[5] = GEN_INT (((mask >> 2) & 3) + 4); 11965 perm[6] = GEN_INT (((mask >> 4) & 3) + 4); 11966 perm[7] = GEN_INT (((mask >> 6) & 3) + 4); 11967 } 11968 11969 operands[2] 11970 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm)); 11971}) 11972 11973(define_insn "*avx_vpermilp<mode>" 11974 [(set (match_operand:VF 0 "register_operand" "=x") 11975 (vec_select:VF 11976 (match_operand:VF 1 "nonimmediate_operand" "xm") 11977 (match_parallel 2 "" 11978 [(match_operand 3 "const_int_operand" "")])))] 11979 "TARGET_AVX 11980 && avx_vpermilp_parallel (operands[2], <MODE>mode)" 11981{ 11982 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1; 11983 operands[2] = GEN_INT (mask); 11984 return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"; 11985} 11986 [(set_attr "type" "sselog") 11987 (set_attr "prefix_extra" "1") 11988 (set_attr "length_immediate" "1") 11989 (set_attr "prefix" "vex") 11990 (set_attr "mode" "<MODE>")]) 11991 11992(define_insn "avx_vpermilvar<mode>3" 11993 [(set (match_operand:VF 0 "register_operand" "=x") 11994 (unspec:VF 11995 [(match_operand:VF 1 "register_operand" "x") 11996 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "xm")] 11997 UNSPEC_VPERMIL))] 11998 "TARGET_AVX" 11999 "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 12000 [(set_attr "type" "sselog") 12001 (set_attr "prefix_extra" "1") 12002 (set_attr "prefix" "vex") 12003 (set_attr "mode" "<MODE>")]) 12004 12005(define_expand "avx_vperm2f128<mode>3" 12006 [(set (match_operand:AVX256MODE2P 0 "register_operand" "") 12007 (unspec:AVX256MODE2P 12008 [(match_operand:AVX256MODE2P 1 "register_operand" "") 12009 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "") 12010 (match_operand:SI 3 "const_0_to_255_operand" "")] 12011 UNSPEC_VPERMIL2F128))] 12012 "TARGET_AVX" 12013{ 12014 int mask = INTVAL (operands[3]); 12015 if ((mask & 0x88) == 0) 12016 { 12017 rtx perm[<ssescalarnum>], t1, t2; 12018 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2; 12019 12020 base = (mask & 3) * nelt2; 12021 for (i = 0; i < nelt2; ++i) 12022 perm[i] = GEN_INT (base + i); 12023 12024 base = ((mask >> 4) & 3) * nelt2; 12025 for (i = 0; i < nelt2; ++i) 12026 perm[i + nelt2] = GEN_INT (base + i); 12027 12028 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode, 12029 operands[1], operands[2]); 12030 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm)); 12031 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1); 12032 t2 = gen_rtx_SET (VOIDmode, operands[0], t2); 12033 emit_insn (t2); 12034 DONE; 12035 } 12036}) 12037 12038;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which 12039;; means that in order to represent this properly in rtl we'd have to 12040;; nest *another* vec_concat with a zero operand and do the select from 12041;; a 4x wide vector. That doesn't seem very nice. 12042(define_insn "*avx_vperm2f128<mode>_full" 12043 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x") 12044 (unspec:AVX256MODE2P 12045 [(match_operand:AVX256MODE2P 1 "register_operand" "x") 12046 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm") 12047 (match_operand:SI 3 "const_0_to_255_operand" "n")] 12048 UNSPEC_VPERMIL2F128))] 12049 "TARGET_AVX" 12050 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 12051 [(set_attr "type" "sselog") 12052 (set_attr "prefix_extra" "1") 12053 (set_attr "length_immediate" "1") 12054 (set_attr "prefix" "vex") 12055 (set_attr "mode" "<sseinsnmode>")]) 12056 12057(define_insn "*avx_vperm2f128<mode>_nozero" 12058 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x") 12059 (vec_select:AVX256MODE2P 12060 (vec_concat:<ssedoublevecmode> 12061 (match_operand:AVX256MODE2P 1 "register_operand" "x") 12062 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")) 12063 (match_parallel 3 "" 12064 [(match_operand 4 "const_int_operand" "")])))] 12065 "TARGET_AVX 12066 && avx_vperm2f128_parallel (operands[3], <MODE>mode)" 12067{ 12068 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1; 12069 if (mask == 0x12) 12070 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}"; 12071 if (mask == 0x20) 12072 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}"; 12073 operands[3] = GEN_INT (mask); 12074 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"; 12075} 12076 [(set_attr "type" "sselog") 12077 (set_attr "prefix_extra" "1") 12078 (set_attr "length_immediate" "1") 12079 (set_attr "prefix" "vex") 12080 (set_attr "mode" "<sseinsnmode>")]) 12081 12082(define_expand "avx_vinsertf128<mode>" 12083 [(match_operand:V_256 0 "register_operand" "") 12084 (match_operand:V_256 1 "register_operand" "") 12085 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "") 12086 (match_operand:SI 3 "const_0_to_1_operand" "")] 12087 "TARGET_AVX" 12088{ 12089 rtx (*insn)(rtx, rtx, rtx); 12090 12091 switch (INTVAL (operands[3])) 12092 { 12093 case 0: 12094 insn = gen_vec_set_lo_<mode>; 12095 break; 12096 case 1: 12097 insn = gen_vec_set_hi_<mode>; 12098 break; 12099 default: 12100 gcc_unreachable (); 12101 } 12102 12103 emit_insn (insn (operands[0], operands[1], operands[2])); 12104 DONE; 12105}) 12106 12107(define_insn "avx2_vec_set_lo_v4di" 12108 [(set (match_operand:V4DI 0 "register_operand" "=x") 12109 (vec_concat:V4DI 12110 (match_operand:V2DI 2 "nonimmediate_operand" "xm") 12111 (vec_select:V2DI 12112 (match_operand:V4DI 1 "register_operand" "x") 12113 (parallel [(const_int 2) (const_int 3)]))))] 12114 "TARGET_AVX2" 12115 "vinserti128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" 12116 [(set_attr "type" "sselog") 12117 (set_attr "prefix_extra" "1") 12118 (set_attr "length_immediate" "1") 12119 (set_attr "prefix" "vex") 12120 (set_attr "mode" "OI")]) 12121 12122(define_insn "avx2_vec_set_hi_v4di" 12123 [(set (match_operand:V4DI 0 "register_operand" "=x") 12124 (vec_concat:V4DI 12125 (vec_select:V2DI 12126 (match_operand:V4DI 1 "register_operand" "x") 12127 (parallel [(const_int 0) (const_int 1)])) 12128 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] 12129 "TARGET_AVX2" 12130 "vinserti128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" 12131 [(set_attr "type" "sselog") 12132 (set_attr "prefix_extra" "1") 12133 (set_attr "length_immediate" "1") 12134 (set_attr "prefix" "vex") 12135 (set_attr "mode" "OI")]) 12136 12137(define_insn "vec_set_lo_<mode>" 12138 [(set (match_operand:VI8F_256 0 "register_operand" "=x") 12139 (vec_concat:VI8F_256 12140 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm") 12141 (vec_select:<ssehalfvecmode> 12142 (match_operand:VI8F_256 1 "register_operand" "x") 12143 (parallel [(const_int 2) (const_int 3)]))))] 12144 "TARGET_AVX" 12145 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" 12146 [(set_attr "type" "sselog") 12147 (set_attr "prefix_extra" "1") 12148 (set_attr "length_immediate" "1") 12149 (set_attr "prefix" "vex") 12150 (set_attr "mode" "<sseinsnmode>")]) 12151 12152(define_insn "vec_set_hi_<mode>" 12153 [(set (match_operand:VI8F_256 0 "register_operand" "=x") 12154 (vec_concat:VI8F_256 12155 (vec_select:<ssehalfvecmode> 12156 (match_operand:VI8F_256 1 "register_operand" "x") 12157 (parallel [(const_int 0) (const_int 1)])) 12158 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))] 12159 "TARGET_AVX" 12160 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" 12161 [(set_attr "type" "sselog") 12162 (set_attr "prefix_extra" "1") 12163 (set_attr "length_immediate" "1") 12164 (set_attr "prefix" "vex") 12165 (set_attr "mode" "<sseinsnmode>")]) 12166 12167(define_insn "vec_set_lo_<mode>" 12168 [(set (match_operand:VI4F_256 0 "register_operand" "=x") 12169 (vec_concat:VI4F_256 12170 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm") 12171 (vec_select:<ssehalfvecmode> 12172 (match_operand:VI4F_256 1 "register_operand" "x") 12173 (parallel [(const_int 4) (const_int 5) 12174 (const_int 6) (const_int 7)]))))] 12175 "TARGET_AVX" 12176 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" 12177 [(set_attr "type" "sselog") 12178 (set_attr "prefix_extra" "1") 12179 (set_attr "length_immediate" "1") 12180 (set_attr "prefix" "vex") 12181 (set_attr "mode" "<sseinsnmode>")]) 12182 12183(define_insn "vec_set_hi_<mode>" 12184 [(set (match_operand:VI4F_256 0 "register_operand" "=x") 12185 (vec_concat:VI4F_256 12186 (vec_select:<ssehalfvecmode> 12187 (match_operand:VI4F_256 1 "register_operand" "x") 12188 (parallel [(const_int 0) (const_int 1) 12189 (const_int 2) (const_int 3)])) 12190 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))] 12191 "TARGET_AVX" 12192 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" 12193 [(set_attr "type" "sselog") 12194 (set_attr "prefix_extra" "1") 12195 (set_attr "length_immediate" "1") 12196 (set_attr "prefix" "vex") 12197 (set_attr "mode" "<sseinsnmode>")]) 12198 12199(define_insn "vec_set_lo_v16hi" 12200 [(set (match_operand:V16HI 0 "register_operand" "=x") 12201 (vec_concat:V16HI 12202 (match_operand:V8HI 2 "nonimmediate_operand" "xm") 12203 (vec_select:V8HI 12204 (match_operand:V16HI 1 "register_operand" "x") 12205 (parallel [(const_int 8) (const_int 9) 12206 (const_int 10) (const_int 11) 12207 (const_int 12) (const_int 13) 12208 (const_int 14) (const_int 15)]))))] 12209 "TARGET_AVX" 12210 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" 12211 [(set_attr "type" "sselog") 12212 (set_attr "prefix_extra" "1") 12213 (set_attr "length_immediate" "1") 12214 (set_attr "prefix" "vex") 12215 (set_attr "mode" "OI")]) 12216 12217(define_insn "vec_set_hi_v16hi" 12218 [(set (match_operand:V16HI 0 "register_operand" "=x") 12219 (vec_concat:V16HI 12220 (vec_select:V8HI 12221 (match_operand:V16HI 1 "register_operand" "x") 12222 (parallel [(const_int 0) (const_int 1) 12223 (const_int 2) (const_int 3) 12224 (const_int 4) (const_int 5) 12225 (const_int 6) (const_int 7)])) 12226 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] 12227 "TARGET_AVX" 12228 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" 12229 [(set_attr "type" "sselog") 12230 (set_attr "prefix_extra" "1") 12231 (set_attr "length_immediate" "1") 12232 (set_attr "prefix" "vex") 12233 (set_attr "mode" "OI")]) 12234 12235(define_insn "vec_set_lo_v32qi" 12236 [(set (match_operand:V32QI 0 "register_operand" "=x") 12237 (vec_concat:V32QI 12238 (match_operand:V16QI 2 "nonimmediate_operand" "xm") 12239 (vec_select:V16QI 12240 (match_operand:V32QI 1 "register_operand" "x") 12241 (parallel [(const_int 16) (const_int 17) 12242 (const_int 18) (const_int 19) 12243 (const_int 20) (const_int 21) 12244 (const_int 22) (const_int 23) 12245 (const_int 24) (const_int 25) 12246 (const_int 26) (const_int 27) 12247 (const_int 28) (const_int 29) 12248 (const_int 30) (const_int 31)]))))] 12249 "TARGET_AVX" 12250 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" 12251 [(set_attr "type" "sselog") 12252 (set_attr "prefix_extra" "1") 12253 (set_attr "length_immediate" "1") 12254 (set_attr "prefix" "vex") 12255 (set_attr "mode" "OI")]) 12256 12257(define_insn "vec_set_hi_v32qi" 12258 [(set (match_operand:V32QI 0 "register_operand" "=x") 12259 (vec_concat:V32QI 12260 (vec_select:V16QI 12261 (match_operand:V32QI 1 "register_operand" "x") 12262 (parallel [(const_int 0) (const_int 1) 12263 (const_int 2) (const_int 3) 12264 (const_int 4) (const_int 5) 12265 (const_int 6) (const_int 7) 12266 (const_int 8) (const_int 9) 12267 (const_int 10) (const_int 11) 12268 (const_int 12) (const_int 13) 12269 (const_int 14) (const_int 15)])) 12270 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] 12271 "TARGET_AVX" 12272 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" 12273 [(set_attr "type" "sselog") 12274 (set_attr "prefix_extra" "1") 12275 (set_attr "length_immediate" "1") 12276 (set_attr "prefix" "vex") 12277 (set_attr "mode" "OI")]) 12278 12279(define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>" 12280 [(set (match_operand:V48_AVX2 0 "register_operand" "=x") 12281 (unspec:V48_AVX2 12282 [(match_operand:<sseintvecmode> 2 "register_operand" "x") 12283 (match_operand:V48_AVX2 1 "memory_operand" "m")] 12284 UNSPEC_MASKMOV))] 12285 "TARGET_AVX" 12286 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}" 12287 [(set_attr "type" "sselog1") 12288 (set_attr "prefix_extra" "1") 12289 (set_attr "prefix" "vex") 12290 (set_attr "mode" "<sseinsnmode>")]) 12291 12292(define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>" 12293 [(set (match_operand:V48_AVX2 0 "memory_operand" "=m") 12294 (unspec:V48_AVX2 12295 [(match_operand:<sseintvecmode> 1 "register_operand" "x") 12296 (match_operand:V48_AVX2 2 "register_operand" "x") 12297 (match_dup 0)] 12298 UNSPEC_MASKMOV))] 12299 "TARGET_AVX" 12300 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 12301 [(set_attr "type" "sselog1") 12302 (set_attr "prefix_extra" "1") 12303 (set_attr "prefix" "vex") 12304 (set_attr "mode" "<sseinsnmode>")]) 12305 12306(define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>" 12307 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m") 12308 (unspec:AVX256MODE2P 12309 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")] 12310 UNSPEC_CAST))] 12311 "TARGET_AVX" 12312 "#" 12313 "&& reload_completed" 12314 [(const_int 0)] 12315{ 12316 rtx op0 = operands[0]; 12317 rtx op1 = operands[1]; 12318 if (REG_P (op0)) 12319 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0)); 12320 else 12321 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1)); 12322 emit_move_insn (op0, op1); 12323 DONE; 12324}) 12325 12326(define_expand "vec_init<mode>" 12327 [(match_operand:V_256 0 "register_operand" "") 12328 (match_operand 1 "" "")] 12329 "TARGET_AVX" 12330{ 12331 ix86_expand_vector_init (false, operands[0], operands[1]); 12332 DONE; 12333}) 12334 12335(define_expand "avx2_extracti128" 12336 [(match_operand:V2DI 0 "nonimmediate_operand" "") 12337 (match_operand:V4DI 1 "register_operand" "") 12338 (match_operand:SI 2 "const_0_to_1_operand" "")] 12339 "TARGET_AVX2" 12340{ 12341 rtx (*insn)(rtx, rtx); 12342 12343 switch (INTVAL (operands[2])) 12344 { 12345 case 0: 12346 insn = gen_vec_extract_lo_v4di; 12347 break; 12348 case 1: 12349 insn = gen_vec_extract_hi_v4di; 12350 break; 12351 default: 12352 gcc_unreachable (); 12353 } 12354 12355 emit_insn (insn (operands[0], operands[1])); 12356 DONE; 12357}) 12358 12359(define_expand "avx2_inserti128" 12360 [(match_operand:V4DI 0 "register_operand" "") 12361 (match_operand:V4DI 1 "register_operand" "") 12362 (match_operand:V2DI 2 "nonimmediate_operand" "") 12363 (match_operand:SI 3 "const_0_to_1_operand" "")] 12364 "TARGET_AVX2" 12365{ 12366 rtx (*insn)(rtx, rtx, rtx); 12367 12368 switch (INTVAL (operands[3])) 12369 { 12370 case 0: 12371 insn = gen_avx2_vec_set_lo_v4di; 12372 break; 12373 case 1: 12374 insn = gen_avx2_vec_set_hi_v4di; 12375 break; 12376 default: 12377 gcc_unreachable (); 12378 } 12379 12380 emit_insn (insn (operands[0], operands[1], operands[2])); 12381 DONE; 12382}) 12383 12384(define_insn "avx2_ashrv<mode>" 12385 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x") 12386 (ashiftrt:VI4_AVX2 12387 (match_operand:VI4_AVX2 1 "register_operand" "x") 12388 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")))] 12389 "TARGET_AVX2" 12390 "vpsravd\t{%2, %1, %0|%0, %1, %2}" 12391 [(set_attr "type" "sseishft") 12392 (set_attr "prefix" "vex") 12393 (set_attr "mode" "<sseinsnmode>")]) 12394 12395(define_insn "avx2_<shift_insn>v<mode>" 12396 [(set (match_operand:VI48_AVX2 0 "register_operand" "=x") 12397 (any_lshift:VI48_AVX2 12398 (match_operand:VI48_AVX2 1 "register_operand" "x") 12399 (match_operand:VI48_AVX2 2 "nonimmediate_operand" "xm")))] 12400 "TARGET_AVX2" 12401 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 12402 [(set_attr "type" "sseishft") 12403 (set_attr "prefix" "vex") 12404 (set_attr "mode" "<sseinsnmode>")]) 12405 12406(define_insn "avx_vec_concat<mode>" 12407 [(set (match_operand:V_256 0 "register_operand" "=x,x") 12408 (vec_concat:V_256 12409 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x") 12410 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))] 12411 "TARGET_AVX" 12412{ 12413 switch (which_alternative) 12414 { 12415 case 0: 12416 return "vinsert<i128>\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}"; 12417 case 1: 12418 switch (get_attr_mode (insn)) 12419 { 12420 case MODE_V8SF: 12421 return "vmovaps\t{%1, %x0|%x0, %1}"; 12422 case MODE_V4DF: 12423 return "vmovapd\t{%1, %x0|%x0, %1}"; 12424 default: 12425 return "vmovdqa\t{%1, %x0|%x0, %1}"; 12426 } 12427 default: 12428 gcc_unreachable (); 12429 } 12430} 12431 [(set_attr "type" "sselog,ssemov") 12432 (set_attr "prefix_extra" "1,*") 12433 (set_attr "length_immediate" "1,*") 12434 (set_attr "prefix" "vex") 12435 (set_attr "mode" "<sseinsnmode>")]) 12436 12437(define_insn "vcvtph2ps" 12438 [(set (match_operand:V4SF 0 "register_operand" "=x") 12439 (vec_select:V4SF 12440 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")] 12441 UNSPEC_VCVTPH2PS) 12442 (parallel [(const_int 0) (const_int 1) 12443 (const_int 2) (const_int 3)])))] 12444 "TARGET_F16C" 12445 "vcvtph2ps\t{%1, %0|%0, %1}" 12446 [(set_attr "type" "ssecvt") 12447 (set_attr "prefix" "vex") 12448 (set_attr "mode" "V4SF")]) 12449 12450(define_insn "*vcvtph2ps_load" 12451 [(set (match_operand:V4SF 0 "register_operand" "=x") 12452 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")] 12453 UNSPEC_VCVTPH2PS))] 12454 "TARGET_F16C" 12455 "vcvtph2ps\t{%1, %0|%0, %1}" 12456 [(set_attr "type" "ssecvt") 12457 (set_attr "prefix" "vex") 12458 (set_attr "mode" "V8SF")]) 12459 12460(define_insn "vcvtph2ps256" 12461 [(set (match_operand:V8SF 0 "register_operand" "=x") 12462 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")] 12463 UNSPEC_VCVTPH2PS))] 12464 "TARGET_F16C" 12465 "vcvtph2ps\t{%1, %0|%0, %1}" 12466 [(set_attr "type" "ssecvt") 12467 (set_attr "prefix" "vex") 12468 (set_attr "mode" "V8SF")]) 12469 12470(define_expand "vcvtps2ph" 12471 [(set (match_operand:V8HI 0 "register_operand" "") 12472 (vec_concat:V8HI 12473 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "") 12474 (match_operand:SI 2 "const_0_to_255_operand" "")] 12475 UNSPEC_VCVTPS2PH) 12476 (match_dup 3)))] 12477 "TARGET_F16C" 12478 "operands[3] = CONST0_RTX (V4HImode);") 12479 12480(define_insn "*vcvtps2ph" 12481 [(set (match_operand:V8HI 0 "register_operand" "=x") 12482 (vec_concat:V8HI 12483 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x") 12484 (match_operand:SI 2 "const_0_to_255_operand" "N")] 12485 UNSPEC_VCVTPS2PH) 12486 (match_operand:V4HI 3 "const0_operand" "")))] 12487 "TARGET_F16C" 12488 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}" 12489 [(set_attr "type" "ssecvt") 12490 (set_attr "prefix" "vex") 12491 (set_attr "mode" "V4SF")]) 12492 12493(define_insn "*vcvtps2ph_store" 12494 [(set (match_operand:V4HI 0 "memory_operand" "=m") 12495 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x") 12496 (match_operand:SI 2 "const_0_to_255_operand" "N")] 12497 UNSPEC_VCVTPS2PH))] 12498 "TARGET_F16C" 12499 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}" 12500 [(set_attr "type" "ssecvt") 12501 (set_attr "prefix" "vex") 12502 (set_attr "mode" "V4SF")]) 12503 12504(define_insn "vcvtps2ph256" 12505 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm") 12506 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x") 12507 (match_operand:SI 2 "const_0_to_255_operand" "N")] 12508 UNSPEC_VCVTPS2PH))] 12509 "TARGET_F16C" 12510 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}" 12511 [(set_attr "type" "ssecvt") 12512 (set_attr "prefix" "vex") 12513 (set_attr "mode" "V8SF")]) 12514 12515;; For gather* insn patterns 12516(define_mode_iterator VEC_GATHER_MODE 12517 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF]) 12518(define_mode_attr VEC_GATHER_IDXSI 12519 [(V2DI "V4SI") (V2DF "V4SI") 12520 (V4DI "V4SI") (V4DF "V4SI") 12521 (V4SI "V4SI") (V4SF "V4SI") 12522 (V8SI "V8SI") (V8SF "V8SI")]) 12523(define_mode_attr VEC_GATHER_IDXDI 12524 [(V2DI "V2DI") (V2DF "V2DI") 12525 (V4DI "V4DI") (V4DF "V4DI") 12526 (V4SI "V2DI") (V4SF "V2DI") 12527 (V8SI "V4DI") (V8SF "V4DI")]) 12528(define_mode_attr VEC_GATHER_SRCDI 12529 [(V2DI "V2DI") (V2DF "V2DF") 12530 (V4DI "V4DI") (V4DF "V4DF") 12531 (V4SI "V4SI") (V4SF "V4SF") 12532 (V8SI "V4SI") (V8SF "V4SF")]) 12533 12534(define_expand "avx2_gathersi<mode>" 12535 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "") 12536 (unspec:VEC_GATHER_MODE 12537 [(match_operand:VEC_GATHER_MODE 1 "register_operand" "") 12538 (mem:<ssescalarmode> 12539 (match_par_dup 7 12540 [(match_operand 2 "vsib_address_operand" "") 12541 (match_operand:<VEC_GATHER_IDXSI> 12542 3 "register_operand" "") 12543 (match_operand:SI 5 "const1248_operand " "")])) 12544 (mem:BLK (scratch)) 12545 (match_operand:VEC_GATHER_MODE 4 "register_operand" "")] 12546 UNSPEC_GATHER)) 12547 (clobber (match_scratch:VEC_GATHER_MODE 6 ""))])] 12548 "TARGET_AVX2" 12549{ 12550 operands[7] 12551 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3], 12552 operands[5]), UNSPEC_VSIBADDR); 12553}) 12554 12555(define_insn "*avx2_gathersi<mode>" 12556 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x") 12557 (unspec:VEC_GATHER_MODE 12558 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0") 12559 (match_operator:<ssescalarmode> 7 "vsib_mem_operator" 12560 [(unspec:P 12561 [(match_operand:P 3 "vsib_address_operand" "p") 12562 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x") 12563 (match_operand:SI 6 "const1248_operand" "n")] 12564 UNSPEC_VSIBADDR)]) 12565 (mem:BLK (scratch)) 12566 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")] 12567 UNSPEC_GATHER)) 12568 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))] 12569 "TARGET_AVX2" 12570 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}" 12571 [(set_attr "type" "ssemov") 12572 (set_attr "prefix" "vex") 12573 (set_attr "mode" "<sseinsnmode>")]) 12574 12575(define_insn "*avx2_gathersi<mode>_2" 12576 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x") 12577 (unspec:VEC_GATHER_MODE 12578 [(pc) 12579 (match_operator:<ssescalarmode> 6 "vsib_mem_operator" 12580 [(unspec:P 12581 [(match_operand:P 2 "vsib_address_operand" "p") 12582 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x") 12583 (match_operand:SI 5 "const1248_operand" "n")] 12584 UNSPEC_VSIBADDR)]) 12585 (mem:BLK (scratch)) 12586 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")] 12587 UNSPEC_GATHER)) 12588 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))] 12589 "TARGET_AVX2" 12590 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}" 12591 [(set_attr "type" "ssemov") 12592 (set_attr "prefix" "vex") 12593 (set_attr "mode" "<sseinsnmode>")]) 12594 12595(define_expand "avx2_gatherdi<mode>" 12596 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "") 12597 (unspec:VEC_GATHER_MODE 12598 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "") 12599 (mem:<ssescalarmode> 12600 (match_par_dup 7 12601 [(match_operand 2 "vsib_address_operand" "") 12602 (match_operand:<VEC_GATHER_IDXDI> 12603 3 "register_operand" "") 12604 (match_operand:SI 5 "const1248_operand " "")])) 12605 (mem:BLK (scratch)) 12606 (match_operand:<VEC_GATHER_SRCDI> 12607 4 "register_operand" "")] 12608 UNSPEC_GATHER)) 12609 (clobber (match_scratch:VEC_GATHER_MODE 6 ""))])] 12610 "TARGET_AVX2" 12611{ 12612 operands[7] 12613 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3], 12614 operands[5]), UNSPEC_VSIBADDR); 12615}) 12616 12617(define_insn "*avx2_gatherdi<mode>" 12618 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x") 12619 (unspec:VEC_GATHER_MODE 12620 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0") 12621 (match_operator:<ssescalarmode> 7 "vsib_mem_operator" 12622 [(unspec:P 12623 [(match_operand:P 3 "vsib_address_operand" "p") 12624 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x") 12625 (match_operand:SI 6 "const1248_operand" "n")] 12626 UNSPEC_VSIBADDR)]) 12627 (mem:BLK (scratch)) 12628 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")] 12629 UNSPEC_GATHER)) 12630 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))] 12631 "TARGET_AVX2" 12632 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}" 12633 [(set_attr "type" "ssemov") 12634 (set_attr "prefix" "vex") 12635 (set_attr "mode" "<sseinsnmode>")]) 12636 12637(define_insn "*avx2_gatherdi<mode>_2" 12638 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x") 12639 (unspec:VEC_GATHER_MODE 12640 [(pc) 12641 (match_operator:<ssescalarmode> 6 "vsib_mem_operator" 12642 [(unspec:P 12643 [(match_operand:P 2 "vsib_address_operand" "p") 12644 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x") 12645 (match_operand:SI 5 "const1248_operand" "n")] 12646 UNSPEC_VSIBADDR)]) 12647 (mem:BLK (scratch)) 12648 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")] 12649 UNSPEC_GATHER)) 12650 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))] 12651 "TARGET_AVX2" 12652{ 12653 if (<MODE>mode != <VEC_GATHER_SRCDI>mode) 12654 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}"; 12655 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"; 12656} 12657 [(set_attr "type" "ssemov") 12658 (set_attr "prefix" "vex") 12659 (set_attr "mode" "<sseinsnmode>")]) 12660 12661(define_insn "*avx2_gatherdi<mode>_3" 12662 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x") 12663 (vec_select:<VEC_GATHER_SRCDI> 12664 (unspec:VI4F_256 12665 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0") 12666 (match_operator:<ssescalarmode> 7 "vsib_mem_operator" 12667 [(unspec:P 12668 [(match_operand:P 3 "vsib_address_operand" "p") 12669 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x") 12670 (match_operand:SI 6 "const1248_operand" "n")] 12671 UNSPEC_VSIBADDR)]) 12672 (mem:BLK (scratch)) 12673 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")] 12674 UNSPEC_GATHER) 12675 (parallel [(const_int 0) (const_int 1) 12676 (const_int 2) (const_int 3)]))) 12677 (clobber (match_scratch:VI4F_256 1 "=&x"))] 12678 "TARGET_AVX2" 12679 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}" 12680 [(set_attr "type" "ssemov") 12681 (set_attr "prefix" "vex") 12682 (set_attr "mode" "<sseinsnmode>")]) 12683 12684(define_insn "*avx2_gatherdi<mode>_4" 12685 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x") 12686 (vec_select:<VEC_GATHER_SRCDI> 12687 (unspec:VI4F_256 12688 [(pc) 12689 (match_operator:<ssescalarmode> 6 "vsib_mem_operator" 12690 [(unspec:P 12691 [(match_operand:P 2 "vsib_address_operand" "p") 12692 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x") 12693 (match_operand:SI 5 "const1248_operand" "n")] 12694 UNSPEC_VSIBADDR)]) 12695 (mem:BLK (scratch)) 12696 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")] 12697 UNSPEC_GATHER) 12698 (parallel [(const_int 0) (const_int 1) 12699 (const_int 2) (const_int 3)]))) 12700 (clobber (match_scratch:VI4F_256 1 "=&x"))] 12701 "TARGET_AVX2" 12702 "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}" 12703 [(set_attr "type" "ssemov") 12704 (set_attr "prefix" "vex") 12705 (set_attr "mode" "<sseinsnmode>")]) 12706