1;; GCC machine description for SSE instructions 2;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 3;; Free Software Foundation, Inc. 4;; 5;; This file is part of GCC. 6;; 7;; GCC is free software; you can redistribute it and/or modify 8;; it under the terms of the GNU General Public License as published by 9;; the Free Software Foundation; either version 3, or (at your option) 10;; any later version. 11;; 12;; GCC is distributed in the hope that it will be useful, 13;; but WITHOUT ANY WARRANTY; without even the implied warranty of 14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15;; GNU General Public License for more details. 16;; 17;; You should have received a copy of the GNU General Public License 18;; along with GCC; see the file COPYING3. If not see 19;; <http://www.gnu.org/licenses/>. 20 21(define_c_enum "unspec" [ 22 ;; SSE 23 UNSPEC_MOVNT 24 UNSPEC_LOADU 25 UNSPEC_STOREU 26 27 ;; SSE3 28 UNSPEC_LDDQU 29 30 ;; SSSE3 31 UNSPEC_PSHUFB 32 UNSPEC_PSIGN 33 UNSPEC_PALIGNR 34 35 ;; For SSE4A support 36 UNSPEC_EXTRQI 37 UNSPEC_EXTRQ 38 UNSPEC_INSERTQI 39 UNSPEC_INSERTQ 40 41 ;; For SSE4.1 support 42 UNSPEC_BLENDV 43 UNSPEC_INSERTPS 44 UNSPEC_DP 45 UNSPEC_MOVNTDQA 46 UNSPEC_MPSADBW 47 UNSPEC_PHMINPOSUW 48 UNSPEC_PTEST 49 50 ;; For SSE4.2 support 51 UNSPEC_PCMPESTR 52 UNSPEC_PCMPISTR 53 54 ;; For FMA4 support 55 UNSPEC_FMADDSUB 56 UNSPEC_XOP_UNSIGNED_CMP 57 UNSPEC_XOP_TRUEFALSE 58 UNSPEC_XOP_PERMUTE 59 UNSPEC_FRCZ 60 61 ;; For AES support 62 UNSPEC_AESENC 63 UNSPEC_AESENCLAST 64 UNSPEC_AESDEC 65 UNSPEC_AESDECLAST 66 UNSPEC_AESIMC 67 UNSPEC_AESKEYGENASSIST 68 69 ;; For PCLMUL support 70 UNSPEC_PCLMUL 71 72 ;; For AVX support 73 UNSPEC_PCMP 74 UNSPEC_VPERMIL 75 UNSPEC_VPERMIL2 76 UNSPEC_VPERMIL2F128 77 UNSPEC_CAST 78 UNSPEC_VTESTP 79 UNSPEC_VCVTPH2PS 80 UNSPEC_VCVTPS2PH 81 82 ;; For AVX2 support 83 UNSPEC_VPERMSI 84 UNSPEC_VPERMDF 85 UNSPEC_VPERMSF 86 UNSPEC_VPERMTI 87 UNSPEC_GATHER 88 UNSPEC_VSIBADDR 89]) 90 91(define_c_enum "unspecv" [ 92 UNSPECV_LDMXCSR 93 UNSPECV_STMXCSR 94 UNSPECV_CLFLUSH 95 UNSPECV_MONITOR 96 UNSPECV_MWAIT 97 UNSPECV_VZEROALL 98 UNSPECV_VZEROUPPER 99]) 100 101;; All vector modes including V?TImode, used in move patterns. 102(define_mode_iterator V16 103 [(V32QI "TARGET_AVX") V16QI 104 (V16HI "TARGET_AVX") V8HI 105 (V8SI "TARGET_AVX") V4SI 106 (V4DI "TARGET_AVX") V2DI 107 (V2TI "TARGET_AVX") V1TI 108 (V8SF "TARGET_AVX") V4SF 109 (V4DF "TARGET_AVX") V2DF]) 110 111;; All vector modes 112(define_mode_iterator V 113 [(V32QI "TARGET_AVX") V16QI 114 (V16HI "TARGET_AVX") V8HI 115 (V8SI "TARGET_AVX") V4SI 116 (V4DI "TARGET_AVX") V2DI 117 (V8SF "TARGET_AVX") V4SF 118 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) 119 120;; All 128bit vector modes 121(define_mode_iterator V_128 122 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")]) 123 124;; All 256bit vector modes 125(define_mode_iterator V_256 126 [V32QI V16HI V8SI V4DI V8SF V4DF]) 127 128;; All vector float modes 129(define_mode_iterator VF 130 [(V8SF "TARGET_AVX") V4SF 131 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) 132 133;; All SFmode vector float modes 134(define_mode_iterator VF1 135 [(V8SF "TARGET_AVX") V4SF]) 136 137;; All DFmode vector float modes 138(define_mode_iterator VF2 139 [(V4DF "TARGET_AVX") V2DF]) 140 141;; All 128bit vector float modes 142(define_mode_iterator VF_128 143 [V4SF (V2DF "TARGET_SSE2")]) 144 145;; All 256bit vector float modes 146(define_mode_iterator VF_256 147 [V8SF V4DF]) 148 149;; All vector integer modes 150(define_mode_iterator VI 151 [(V32QI "TARGET_AVX") V16QI 152 (V16HI "TARGET_AVX") V8HI 153 (V8SI "TARGET_AVX") V4SI 154 (V4DI "TARGET_AVX") V2DI]) 155 156(define_mode_iterator VI_AVX2 157 [(V32QI "TARGET_AVX2") V16QI 158 (V16HI "TARGET_AVX2") V8HI 159 (V8SI "TARGET_AVX2") V4SI 160 (V4DI "TARGET_AVX2") V2DI]) 161 162;; All QImode vector integer modes 163(define_mode_iterator VI1 164 [(V32QI "TARGET_AVX") V16QI]) 165 166;; All DImode vector integer modes 167(define_mode_iterator VI8 168 [(V4DI "TARGET_AVX") V2DI]) 169 170(define_mode_iterator VI1_AVX2 171 [(V32QI "TARGET_AVX2") V16QI]) 172 173(define_mode_iterator VI2_AVX2 174 [(V16HI "TARGET_AVX2") V8HI]) 175 176(define_mode_iterator VI4_AVX2 177 [(V8SI "TARGET_AVX2") V4SI]) 178 179(define_mode_iterator VI8_AVX2 180 [(V4DI "TARGET_AVX2") V2DI]) 181 182;; ??? We should probably use TImode instead. 183(define_mode_iterator VIMAX_AVX2 184 [(V2TI "TARGET_AVX2") V1TI]) 185 186;; ??? This should probably be dropped in favor of VIMAX_AVX2. 187(define_mode_iterator SSESCALARMODE 188 [(V2TI "TARGET_AVX2") TI]) 189 190(define_mode_iterator VI12_AVX2 191 [(V32QI "TARGET_AVX2") V16QI 192 (V16HI "TARGET_AVX2") V8HI]) 193 194(define_mode_iterator VI24_AVX2 195 [(V16HI "TARGET_AVX2") V8HI 196 (V8SI "TARGET_AVX2") V4SI]) 197 198(define_mode_iterator VI124_AVX2 199 [(V32QI "TARGET_AVX2") V16QI 200 (V16HI "TARGET_AVX2") V8HI 201 (V8SI "TARGET_AVX2") V4SI]) 202 203(define_mode_iterator VI248_AVX2 204 [(V16HI "TARGET_AVX2") V8HI 205 (V8SI "TARGET_AVX2") V4SI 206 (V4DI "TARGET_AVX2") V2DI]) 207 208(define_mode_iterator VI48_AVX2 209 [(V8SI "TARGET_AVX2") V4SI 210 (V4DI "TARGET_AVX2") V2DI]) 211 212(define_mode_iterator V48_AVX2 213 [V4SF V2DF 214 V8SF V4DF 215 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2") 216 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")]) 217 218(define_mode_attr sse2_avx2 219 [(V16QI "sse2") (V32QI "avx2") 220 (V8HI "sse2") (V16HI "avx2") 221 (V4SI "sse2") (V8SI "avx2") 222 (V2DI "sse2") (V4DI "avx2") 223 (V1TI "sse2") (V2TI "avx2")]) 224 225(define_mode_attr ssse3_avx2 226 [(V16QI "ssse3") (V32QI "avx2") 227 (V8HI "ssse3") (V16HI "avx2") 228 (V4SI "ssse3") (V8SI "avx2") 229 (V2DI "ssse3") (V4DI "avx2") 230 (TI "ssse3") (V2TI "avx2")]) 231 232(define_mode_attr sse4_1_avx2 233 [(V16QI "sse4_1") (V32QI "avx2") 234 (V8HI "sse4_1") (V16HI "avx2") 235 (V4SI "sse4_1") (V8SI "avx2") 236 (V2DI "sse4_1") (V4DI "avx2")]) 237 238(define_mode_attr avx_avx2 239 [(V4SF "avx") (V2DF "avx") 240 (V8SF "avx") (V4DF "avx") 241 (V4SI "avx2") (V2DI "avx2") 242 (V8SI "avx2") (V4DI "avx2")]) 243 244(define_mode_attr vec_avx2 245 [(V16QI "vec") (V32QI "avx2") 246 (V8HI "vec") (V16HI "avx2") 247 (V4SI "vec") (V8SI "avx2") 248 (V2DI "vec") (V4DI "avx2")]) 249 250(define_mode_attr ssedoublemode 251 [(V16HI "V16SI") (V8HI "V8SI")]) 252 253(define_mode_attr ssebytemode 254 [(V4DI "V32QI") (V2DI "V16QI")]) 255 256;; All 128bit vector integer modes 257(define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI]) 258 259;; All 256bit vector integer modes 260(define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI]) 261 262;; Random 128bit vector integer mode combinations 263(define_mode_iterator VI12_128 [V16QI V8HI]) 264(define_mode_iterator VI14_128 [V16QI V4SI]) 265(define_mode_iterator VI124_128 [V16QI V8HI V4SI]) 266(define_mode_iterator VI128_128 [V16QI V8HI V2DI]) 267(define_mode_iterator VI24_128 [V8HI V4SI]) 268(define_mode_iterator VI248_128 [V8HI V4SI V2DI]) 269(define_mode_iterator VI48_128 [V4SI V2DI]) 270 271;; Random 256bit vector integer mode combinations 272(define_mode_iterator VI124_256 [V32QI V16HI V8SI]) 273(define_mode_iterator VI48_256 [V8SI V4DI]) 274 275;; Int-float size matches 276(define_mode_iterator VI4F_128 [V4SI V4SF]) 277(define_mode_iterator VI8F_128 [V2DI V2DF]) 278(define_mode_iterator VI4F_256 [V8SI V8SF]) 279(define_mode_iterator VI8F_256 [V4DI V4DF]) 280 281;; Mapping from float mode to required SSE level 282(define_mode_attr sse 283 [(SF "sse") (DF "sse2") 284 (V4SF "sse") (V2DF "sse2") 285 (V8SF "avx") (V4DF "avx")]) 286 287(define_mode_attr sse2 288 [(V16QI "sse2") (V32QI "avx") 289 (V2DI "sse2") (V4DI "avx")]) 290 291(define_mode_attr sse3 292 [(V16QI "sse3") (V32QI "avx")]) 293 294(define_mode_attr sse4_1 295 [(V4SF "sse4_1") (V2DF "sse4_1") 296 (V8SF "avx") (V4DF "avx")]) 297 298(define_mode_attr avxsizesuffix 299 [(V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256") 300 (V16QI "") (V8HI "") (V4SI "") (V2DI "") 301 (V8SF "256") (V4DF "256") 302 (V4SF "") (V2DF "")]) 303 304;; SSE instruction mode 305(define_mode_attr sseinsnmode 306 [(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI") 307 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI") 308 (V8SF "V8SF") (V4DF "V4DF") 309 (V4SF "V4SF") (V2DF "V2DF") 310 (TI "TI")]) 311 312;; Mapping of vector float modes to an integer mode of the same size 313(define_mode_attr sseintvecmode 314 [(V8SF "V8SI") (V4DF "V4DI") 315 (V4SF "V4SI") (V2DF "V2DI") 316 (V8SI "V8SI") (V4DI "V4DI") 317 (V4SI "V4SI") (V2DI "V2DI") 318 (V16HI "V16HI") (V8HI "V8HI") 319 (V32QI "V32QI") (V16QI "V16QI")]) 320 321(define_mode_attr sseintvecmodelower 322 [(V8SF "v8si") (V4DF "v4di") 323 (V4SF "v4si") (V2DF "v2di") 324 (V8SI "v8si") (V4DI "v4di") 325 (V4SI "v4si") (V2DI "v2di") 326 (V16HI "v16hi") (V8HI "v8hi") 327 (V32QI "v32qi") (V16QI "v16qi")]) 328 329;; Mapping of vector modes to a vector mode of double size 330(define_mode_attr ssedoublevecmode 331 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI") 332 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI") 333 (V8SF "V16SF") (V4DF "V8DF") 334 (V4SF "V8SF") (V2DF "V4DF")]) 335 336;; Mapping of vector modes to a vector mode of half size 337(define_mode_attr ssehalfvecmode 338 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI") 339 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI") 340 (V8SF "V4SF") (V4DF "V2DF") 341 (V4SF "V2SF")]) 342 343;; Mapping of vector modes back to the scalar modes 344(define_mode_attr ssescalarmode 345 [(V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI") 346 (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI") 347 (V8SF "SF") (V4DF "DF") 348 (V4SF "SF") (V2DF "DF")]) 349 350;; Number of scalar elements in each vector type 351(define_mode_attr ssescalarnum 352 [(V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4") 353 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2") 354 (V8SF "8") (V4DF "4") 355 (V4SF "4") (V2DF "2")]) 356 357;; SSE prefix for integer vector modes 358(define_mode_attr sseintprefix 359 [(V2DI "p") (V2DF "") 360 (V4DI "p") (V4DF "") 361 (V4SI "p") (V4SF "") 362 (V8SI "p") (V8SF "")]) 363 364;; SSE scalar suffix for vector modes 365(define_mode_attr ssescalarmodesuffix 366 [(SF "ss") (DF "sd") 367 (V8SF "ss") (V4DF "sd") 368 (V4SF "ss") (V2DF "sd") 369 (V8SI "ss") (V4DI "sd") 370 (V4SI "d")]) 371 372;; Pack/unpack vector modes 373(define_mode_attr sseunpackmode 374 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI") 375 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")]) 376 377(define_mode_attr ssepackmode 378 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI") 379 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")]) 380 381;; Mapping of the max integer size for xop rotate immediate constraint 382(define_mode_attr sserotatemax 383 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")]) 384 385;; Mapping of mode to cast intrinsic name 386(define_mode_attr castmode [(V8SI "si") (V8SF "ps") (V4DF "pd")]) 387 388;; Instruction suffix for sign and zero extensions. 389(define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")]) 390 391;; i128 for integer vectors and TARGET_AVX2, f128 otherwise. 392(define_mode_attr i128 393 [(V8SF "f128") (V4DF "f128") (V32QI "%~128") (V16HI "%~128") 394 (V8SI "%~128") (V4DI "%~128")]) 395 396;; Mix-n-match 397(define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF]) 398 399;; Mapping of immediate bits for blend instructions 400(define_mode_attr blendbits 401 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")]) 402 403;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics. 404 405;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 406;; 407;; Move patterns 408;; 409;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 410 411;; All of these patterns are enabled for SSE1 as well as SSE2. 412;; This is essential for maintaining stable calling conventions. 413 414(define_expand "mov<mode>" 415 [(set (match_operand:V16 0 "nonimmediate_operand" "") 416 (match_operand:V16 1 "nonimmediate_operand" ""))] 417 "TARGET_SSE" 418{ 419 ix86_expand_vector_move (<MODE>mode, operands); 420 DONE; 421}) 422 423(define_insn "*mov<mode>_internal" 424 [(set (match_operand:V16 0 "nonimmediate_operand" "=x,x ,m") 425 (match_operand:V16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))] 426 "TARGET_SSE 427 && (register_operand (operands[0], <MODE>mode) 428 || register_operand (operands[1], <MODE>mode))" 429{ 430 switch (which_alternative) 431 { 432 case 0: 433 return standard_sse_constant_opcode (insn, operands[1]); 434 case 1: 435 case 2: 436 switch (get_attr_mode (insn)) 437 { 438 case MODE_V8SF: 439 case MODE_V4SF: 440 if (TARGET_AVX 441 && (misaligned_operand (operands[0], <MODE>mode) 442 || misaligned_operand (operands[1], <MODE>mode))) 443 return "vmovups\t{%1, %0|%0, %1}"; 444 else 445 return "%vmovaps\t{%1, %0|%0, %1}"; 446 447 case MODE_V4DF: 448 case MODE_V2DF: 449 if (TARGET_AVX 450 && (misaligned_operand (operands[0], <MODE>mode) 451 || misaligned_operand (operands[1], <MODE>mode))) 452 return "vmovupd\t{%1, %0|%0, %1}"; 453 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) 454 return "%vmovaps\t{%1, %0|%0, %1}"; 455 else 456 return "%vmovapd\t{%1, %0|%0, %1}"; 457 458 case MODE_OI: 459 case MODE_TI: 460 if (TARGET_AVX 461 && (misaligned_operand (operands[0], <MODE>mode) 462 || misaligned_operand (operands[1], <MODE>mode))) 463 return "vmovdqu\t{%1, %0|%0, %1}"; 464 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) 465 return "%vmovaps\t{%1, %0|%0, %1}"; 466 else 467 return "%vmovdqa\t{%1, %0|%0, %1}"; 468 469 default: 470 gcc_unreachable (); 471 } 472 default: 473 gcc_unreachable (); 474 } 475} 476 [(set_attr "type" "sselog1,ssemov,ssemov") 477 (set_attr "prefix" "maybe_vex") 478 (set (attr "mode") 479 (cond [(match_test "TARGET_AVX") 480 (const_string "<sseinsnmode>") 481 (ior (ior (match_test "optimize_function_for_size_p (cfun)") 482 (not (match_test "TARGET_SSE2"))) 483 (and (eq_attr "alternative" "2") 484 (match_test "TARGET_SSE_TYPELESS_STORES"))) 485 (const_string "V4SF") 486 (eq (const_string "<MODE>mode") (const_string "V4SFmode")) 487 (const_string "V4SF") 488 (eq (const_string "<MODE>mode") (const_string "V2DFmode")) 489 (const_string "V2DF") 490 ] 491 (const_string "TI")))]) 492 493(define_insn "sse2_movq128" 494 [(set (match_operand:V2DI 0 "register_operand" "=x") 495 (vec_concat:V2DI 496 (vec_select:DI 497 (match_operand:V2DI 1 "nonimmediate_operand" "xm") 498 (parallel [(const_int 0)])) 499 (const_int 0)))] 500 "TARGET_SSE2" 501 "%vmovq\t{%1, %0|%0, %1}" 502 [(set_attr "type" "ssemov") 503 (set_attr "prefix" "maybe_vex") 504 (set_attr "mode" "TI")]) 505 506;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm. 507;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded 508;; from memory, we'd prefer to load the memory directly into the %xmm 509;; register. To facilitate this happy circumstance, this pattern won't 510;; split until after register allocation. If the 64-bit value didn't 511;; come from memory, this is the best we can do. This is much better 512;; than storing %edx:%eax into a stack temporary and loading an %xmm 513;; from there. 514 515(define_insn_and_split "movdi_to_sse" 516 [(parallel 517 [(set (match_operand:V4SI 0 "register_operand" "=?x,x") 518 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0)) 519 (clobber (match_scratch:V4SI 2 "=&x,X"))])] 520 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES" 521 "#" 522 "&& reload_completed" 523 [(const_int 0)] 524{ 525 if (register_operand (operands[1], DImode)) 526 { 527 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax). 528 Assemble the 64-bit DImode value in an xmm register. */ 529 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode), 530 gen_rtx_SUBREG (SImode, operands[1], 0))); 531 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode), 532 gen_rtx_SUBREG (SImode, operands[1], 4))); 533 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0], 534 operands[2])); 535 } 536 else if (memory_operand (operands[1], DImode)) 537 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), 538 operands[1], const0_rtx)); 539 else 540 gcc_unreachable (); 541}) 542 543(define_split 544 [(set (match_operand:V4SF 0 "register_operand" "") 545 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))] 546 "TARGET_SSE && reload_completed" 547 [(set (match_dup 0) 548 (vec_merge:V4SF 549 (vec_duplicate:V4SF (match_dup 1)) 550 (match_dup 2) 551 (const_int 1)))] 552{ 553 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0); 554 operands[2] = CONST0_RTX (V4SFmode); 555}) 556 557(define_split 558 [(set (match_operand:V2DF 0 "register_operand" "") 559 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))] 560 "TARGET_SSE2 && reload_completed" 561 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))] 562{ 563 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0); 564 operands[2] = CONST0_RTX (DFmode); 565}) 566 567(define_expand "push<mode>1" 568 [(match_operand:V16 0 "register_operand" "")] 569 "TARGET_SSE" 570{ 571 ix86_expand_push (<MODE>mode, operands[0]); 572 DONE; 573}) 574 575(define_expand "movmisalign<mode>" 576 [(set (match_operand:V16 0 "nonimmediate_operand" "") 577 (match_operand:V16 1 "nonimmediate_operand" ""))] 578 "TARGET_SSE" 579{ 580 ix86_expand_vector_move_misalign (<MODE>mode, operands); 581 DONE; 582}) 583 584(define_insn "<sse>_loadu<ssemodesuffix><avxsizesuffix>" 585 [(set (match_operand:VF 0 "register_operand" "=x") 586 (unspec:VF 587 [(match_operand:VF 1 "memory_operand" "m")] 588 UNSPEC_LOADU))] 589 "TARGET_SSE" 590 "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}" 591 [(set_attr "type" "ssemov") 592 (set_attr "movu" "1") 593 (set_attr "prefix" "maybe_vex") 594 (set_attr "mode" "<MODE>")]) 595 596(define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>" 597 [(set (match_operand:VF 0 "memory_operand" "=m") 598 (unspec:VF 599 [(match_operand:VF 1 "register_operand" "x")] 600 UNSPEC_STOREU))] 601 "TARGET_SSE" 602 "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}" 603 [(set_attr "type" "ssemov") 604 (set_attr "movu" "1") 605 (set_attr "prefix" "maybe_vex") 606 (set_attr "mode" "<MODE>")]) 607 608(define_insn "<sse2>_loaddqu<avxsizesuffix>" 609 [(set (match_operand:VI1 0 "register_operand" "=x") 610 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")] 611 UNSPEC_LOADU))] 612 "TARGET_SSE2" 613 "%vmovdqu\t{%1, %0|%0, %1}" 614 [(set_attr "type" "ssemov") 615 (set_attr "movu" "1") 616 (set (attr "prefix_data16") 617 (if_then_else 618 (match_test "TARGET_AVX") 619 (const_string "*") 620 (const_string "1"))) 621 (set_attr "prefix" "maybe_vex") 622 (set_attr "mode" "<sseinsnmode>")]) 623 624(define_insn "<sse2>_storedqu<avxsizesuffix>" 625 [(set (match_operand:VI1 0 "memory_operand" "=m") 626 (unspec:VI1 [(match_operand:VI1 1 "register_operand" "x")] 627 UNSPEC_STOREU))] 628 "TARGET_SSE2" 629 "%vmovdqu\t{%1, %0|%0, %1}" 630 [(set_attr "type" "ssemov") 631 (set_attr "movu" "1") 632 (set (attr "prefix_data16") 633 (if_then_else 634 (match_test "TARGET_AVX") 635 (const_string "*") 636 (const_string "1"))) 637 (set_attr "prefix" "maybe_vex") 638 (set_attr "mode" "<sseinsnmode>")]) 639 640(define_insn "<sse3>_lddqu<avxsizesuffix>" 641 [(set (match_operand:VI1 0 "register_operand" "=x") 642 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")] 643 UNSPEC_LDDQU))] 644 "TARGET_SSE3" 645 "%vlddqu\t{%1, %0|%0, %1}" 646 [(set_attr "type" "ssemov") 647 (set_attr "movu" "1") 648 (set (attr "prefix_data16") 649 (if_then_else 650 (match_test "TARGET_AVX") 651 (const_string "*") 652 (const_string "0"))) 653 (set (attr "prefix_rep") 654 (if_then_else 655 (match_test "TARGET_AVX") 656 (const_string "*") 657 (const_string "1"))) 658 (set_attr "prefix" "maybe_vex") 659 (set_attr "mode" "<sseinsnmode>")]) 660 661(define_insn "sse2_movnti<mode>" 662 [(set (match_operand:SWI48 0 "memory_operand" "=m") 663 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")] 664 UNSPEC_MOVNT))] 665 "TARGET_SSE2" 666 "movnti\t{%1, %0|%0, %1}" 667 [(set_attr "type" "ssemov") 668 (set_attr "prefix_data16" "0") 669 (set_attr "mode" "<MODE>")]) 670 671(define_insn "<sse>_movnt<mode>" 672 [(set (match_operand:VF 0 "memory_operand" "=m") 673 (unspec:VF [(match_operand:VF 1 "register_operand" "x")] 674 UNSPEC_MOVNT))] 675 "TARGET_SSE" 676 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}" 677 [(set_attr "type" "ssemov") 678 (set_attr "prefix" "maybe_vex") 679 (set_attr "mode" "<MODE>")]) 680 681(define_insn "<sse2>_movnt<mode>" 682 [(set (match_operand:VI8 0 "memory_operand" "=m") 683 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "x")] 684 UNSPEC_MOVNT))] 685 "TARGET_SSE2" 686 "%vmovntdq\t{%1, %0|%0, %1}" 687 [(set_attr "type" "ssecvt") 688 (set (attr "prefix_data16") 689 (if_then_else 690 (match_test "TARGET_AVX") 691 (const_string "*") 692 (const_string "1"))) 693 (set_attr "prefix" "maybe_vex") 694 (set_attr "mode" "<sseinsnmode>")]) 695 696; Expand patterns for non-temporal stores. At the moment, only those 697; that directly map to insns are defined; it would be possible to 698; define patterns for other modes that would expand to several insns. 699 700;; Modes handled by storent patterns. 701(define_mode_iterator STORENT_MODE 702 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2") 703 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A") 704 (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2") 705 (V8SF "TARGET_AVX") V4SF 706 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) 707 708(define_expand "storent<mode>" 709 [(set (match_operand:STORENT_MODE 0 "memory_operand" "") 710 (unspec:STORENT_MODE 711 [(match_operand:STORENT_MODE 1 "register_operand" "")] 712 UNSPEC_MOVNT))] 713 "TARGET_SSE") 714 715;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 716;; 717;; Parallel floating point arithmetic 718;; 719;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 720 721(define_expand "<code><mode>2" 722 [(set (match_operand:VF 0 "register_operand" "") 723 (absneg:VF 724 (match_operand:VF 1 "register_operand" "")))] 725 "TARGET_SSE" 726 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;") 727 728(define_insn_and_split "*absneg<mode>2" 729 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x") 730 (match_operator:VF 3 "absneg_operator" 731 [(match_operand:VF 1 "nonimmediate_operand" "0, xm,x, m")])) 732 (use (match_operand:VF 2 "nonimmediate_operand" "xm,0, xm,x"))] 733 "TARGET_SSE" 734 "#" 735 "&& reload_completed" 736 [(const_int 0)] 737{ 738 enum rtx_code absneg_op; 739 rtx op1, op2; 740 rtx t; 741 742 if (TARGET_AVX) 743 { 744 if (MEM_P (operands[1])) 745 op1 = operands[2], op2 = operands[1]; 746 else 747 op1 = operands[1], op2 = operands[2]; 748 } 749 else 750 { 751 op1 = operands[0]; 752 if (rtx_equal_p (operands[0], operands[1])) 753 op2 = operands[2]; 754 else 755 op2 = operands[1]; 756 } 757 758 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND; 759 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2); 760 t = gen_rtx_SET (VOIDmode, operands[0], t); 761 emit_insn (t); 762 DONE; 763} 764 [(set_attr "isa" "noavx,noavx,avx,avx")]) 765 766(define_expand "<plusminus_insn><mode>3" 767 [(set (match_operand:VF 0 "register_operand" "") 768 (plusminus:VF 769 (match_operand:VF 1 "nonimmediate_operand" "") 770 (match_operand:VF 2 "nonimmediate_operand" "")))] 771 "TARGET_SSE" 772 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") 773 774(define_insn "*<plusminus_insn><mode>3" 775 [(set (match_operand:VF 0 "register_operand" "=x,x") 776 (plusminus:VF 777 (match_operand:VF 1 "nonimmediate_operand" "<comm>0,x") 778 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))] 779 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" 780 "@ 781 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2} 782 v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 783 [(set_attr "isa" "noavx,avx") 784 (set_attr "type" "sseadd") 785 (set_attr "prefix" "orig,vex") 786 (set_attr "mode" "<MODE>")]) 787 788(define_insn "<sse>_vm<plusminus_insn><mode>3" 789 [(set (match_operand:VF_128 0 "register_operand" "=x,x") 790 (vec_merge:VF_128 791 (plusminus:VF_128 792 (match_operand:VF_128 1 "register_operand" "0,x") 793 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")) 794 (match_dup 1) 795 (const_int 1)))] 796 "TARGET_SSE" 797 "@ 798 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2} 799 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}" 800 [(set_attr "isa" "noavx,avx") 801 (set_attr "type" "sseadd") 802 (set_attr "prefix" "orig,vex") 803 (set_attr "mode" "<ssescalarmode>")]) 804 805(define_expand "mul<mode>3" 806 [(set (match_operand:VF 0 "register_operand" "") 807 (mult:VF 808 (match_operand:VF 1 "nonimmediate_operand" "") 809 (match_operand:VF 2 "nonimmediate_operand" "")))] 810 "TARGET_SSE" 811 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);") 812 813(define_insn "*mul<mode>3" 814 [(set (match_operand:VF 0 "register_operand" "=x,x") 815 (mult:VF 816 (match_operand:VF 1 "nonimmediate_operand" "%0,x") 817 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))] 818 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)" 819 "@ 820 mul<ssemodesuffix>\t{%2, %0|%0, %2} 821 vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 822 [(set_attr "isa" "noavx,avx") 823 (set_attr "type" "ssemul") 824 (set_attr "prefix" "orig,vex") 825 (set_attr "mode" "<MODE>")]) 826 827(define_insn "<sse>_vmmul<mode>3" 828 [(set (match_operand:VF_128 0 "register_operand" "=x,x") 829 (vec_merge:VF_128 830 (mult:VF_128 831 (match_operand:VF_128 1 "register_operand" "0,x") 832 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")) 833 (match_dup 1) 834 (const_int 1)))] 835 "TARGET_SSE" 836 "@ 837 mul<ssescalarmodesuffix>\t{%2, %0|%0, %2} 838 vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}" 839 [(set_attr "isa" "noavx,avx") 840 (set_attr "type" "ssemul") 841 (set_attr "prefix" "orig,vex") 842 (set_attr "mode" "<ssescalarmode>")]) 843 844(define_expand "div<mode>3" 845 [(set (match_operand:VF2 0 "register_operand" "") 846 (div:VF2 (match_operand:VF2 1 "register_operand" "") 847 (match_operand:VF2 2 "nonimmediate_operand" "")))] 848 "TARGET_SSE2" 849 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);") 850 851(define_expand "div<mode>3" 852 [(set (match_operand:VF1 0 "register_operand" "") 853 (div:VF1 (match_operand:VF1 1 "register_operand" "") 854 (match_operand:VF1 2 "nonimmediate_operand" "")))] 855 "TARGET_SSE" 856{ 857 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands); 858 859 if (TARGET_SSE_MATH 860 && TARGET_RECIP_VEC_DIV 861 && !optimize_insn_for_size_p () 862 && flag_finite_math_only && !flag_trapping_math 863 && flag_unsafe_math_optimizations) 864 { 865 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode); 866 DONE; 867 } 868}) 869 870(define_insn "<sse>_div<mode>3" 871 [(set (match_operand:VF 0 "register_operand" "=x,x") 872 (div:VF 873 (match_operand:VF 1 "register_operand" "0,x") 874 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))] 875 "TARGET_SSE" 876 "@ 877 div<ssemodesuffix>\t{%2, %0|%0, %2} 878 vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 879 [(set_attr "isa" "noavx,avx") 880 (set_attr "type" "ssediv") 881 (set_attr "prefix" "orig,vex") 882 (set_attr "mode" "<MODE>")]) 883 884(define_insn "<sse>_vmdiv<mode>3" 885 [(set (match_operand:VF_128 0 "register_operand" "=x,x") 886 (vec_merge:VF_128 887 (div:VF_128 888 (match_operand:VF_128 1 "register_operand" "0,x") 889 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")) 890 (match_dup 1) 891 (const_int 1)))] 892 "TARGET_SSE" 893 "@ 894 div<ssescalarmodesuffix>\t{%2, %0|%0, %2} 895 vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}" 896 [(set_attr "isa" "noavx,avx") 897 (set_attr "type" "ssediv") 898 (set_attr "prefix" "orig,vex") 899 (set_attr "mode" "<ssescalarmode>")]) 900 901(define_insn "<sse>_rcp<mode>2" 902 [(set (match_operand:VF1 0 "register_operand" "=x") 903 (unspec:VF1 904 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))] 905 "TARGET_SSE" 906 "%vrcpps\t{%1, %0|%0, %1}" 907 [(set_attr "type" "sse") 908 (set_attr "atom_sse_attr" "rcp") 909 (set_attr "prefix" "maybe_vex") 910 (set_attr "mode" "<MODE>")]) 911 912(define_insn "sse_vmrcpv4sf2" 913 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 914 (vec_merge:V4SF 915 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")] 916 UNSPEC_RCP) 917 (match_operand:V4SF 2 "register_operand" "0,x") 918 (const_int 1)))] 919 "TARGET_SSE" 920 "@ 921 rcpss\t{%1, %0|%0, %1} 922 vrcpss\t{%1, %2, %0|%0, %2, %1}" 923 [(set_attr "isa" "noavx,avx") 924 (set_attr "type" "sse") 925 (set_attr "atom_sse_attr" "rcp") 926 (set_attr "prefix" "orig,vex") 927 (set_attr "mode" "SF")]) 928 929(define_expand "sqrt<mode>2" 930 [(set (match_operand:VF2 0 "register_operand" "") 931 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand" "")))] 932 "TARGET_SSE2") 933 934(define_expand "sqrt<mode>2" 935 [(set (match_operand:VF1 0 "register_operand" "") 936 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand" "")))] 937 "TARGET_SSE" 938{ 939 if (TARGET_SSE_MATH 940 && TARGET_RECIP_VEC_SQRT 941 && !optimize_insn_for_size_p () 942 && flag_finite_math_only && !flag_trapping_math 943 && flag_unsafe_math_optimizations) 944 { 945 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false); 946 DONE; 947 } 948}) 949 950(define_insn "<sse>_sqrt<mode>2" 951 [(set (match_operand:VF 0 "register_operand" "=x") 952 (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "xm")))] 953 "TARGET_SSE" 954 "%vsqrt<ssemodesuffix>\t{%1, %0|%0, %1}" 955 [(set_attr "type" "sse") 956 (set_attr "atom_sse_attr" "sqrt") 957 (set_attr "prefix" "maybe_vex") 958 (set_attr "mode" "<MODE>")]) 959 960(define_insn "<sse>_vmsqrt<mode>2" 961 [(set (match_operand:VF_128 0 "register_operand" "=x,x") 962 (vec_merge:VF_128 963 (sqrt:VF_128 964 (match_operand:VF_128 1 "nonimmediate_operand" "xm,xm")) 965 (match_operand:VF_128 2 "register_operand" "0,x") 966 (const_int 1)))] 967 "TARGET_SSE" 968 "@ 969 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1} 970 vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}" 971 [(set_attr "isa" "noavx,avx") 972 (set_attr "type" "sse") 973 (set_attr "atom_sse_attr" "sqrt") 974 (set_attr "prefix" "orig,vex") 975 (set_attr "mode" "<ssescalarmode>")]) 976 977(define_expand "rsqrt<mode>2" 978 [(set (match_operand:VF1 0 "register_operand" "") 979 (unspec:VF1 980 [(match_operand:VF1 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))] 981 "TARGET_SSE_MATH" 982{ 983 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true); 984 DONE; 985}) 986 987(define_insn "<sse>_rsqrt<mode>2" 988 [(set (match_operand:VF1 0 "register_operand" "=x") 989 (unspec:VF1 990 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))] 991 "TARGET_SSE" 992 "%vrsqrtps\t{%1, %0|%0, %1}" 993 [(set_attr "type" "sse") 994 (set_attr "prefix" "maybe_vex") 995 (set_attr "mode" "<MODE>")]) 996 997(define_insn "sse_vmrsqrtv4sf2" 998 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 999 (vec_merge:V4SF 1000 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")] 1001 UNSPEC_RSQRT) 1002 (match_operand:V4SF 2 "register_operand" "0,x") 1003 (const_int 1)))] 1004 "TARGET_SSE" 1005 "@ 1006 rsqrtss\t{%1, %0|%0, %1} 1007 vrsqrtss\t{%1, %2, %0|%0, %2, %1}" 1008 [(set_attr "isa" "noavx,avx") 1009 (set_attr "type" "sse") 1010 (set_attr "prefix" "orig,vex") 1011 (set_attr "mode" "SF")]) 1012 1013;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX 1014;; isn't really correct, as those rtl operators aren't defined when 1015;; applied to NaNs. Hopefully the optimizers won't get too smart on us. 1016 1017(define_expand "<code><mode>3" 1018 [(set (match_operand:VF 0 "register_operand" "") 1019 (smaxmin:VF 1020 (match_operand:VF 1 "nonimmediate_operand" "") 1021 (match_operand:VF 2 "nonimmediate_operand" "")))] 1022 "TARGET_SSE" 1023{ 1024 if (!flag_finite_math_only) 1025 operands[1] = force_reg (<MODE>mode, operands[1]); 1026 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands); 1027}) 1028 1029(define_insn "*<code><mode>3_finite" 1030 [(set (match_operand:VF 0 "register_operand" "=x,x") 1031 (smaxmin:VF 1032 (match_operand:VF 1 "nonimmediate_operand" "%0,x") 1033 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))] 1034 "TARGET_SSE && flag_finite_math_only 1035 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" 1036 "@ 1037 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2} 1038 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 1039 [(set_attr "isa" "noavx,avx") 1040 (set_attr "type" "sseadd") 1041 (set_attr "prefix" "orig,vex") 1042 (set_attr "mode" "<MODE>")]) 1043 1044(define_insn "*<code><mode>3" 1045 [(set (match_operand:VF 0 "register_operand" "=x,x") 1046 (smaxmin:VF 1047 (match_operand:VF 1 "register_operand" "0,x") 1048 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))] 1049 "TARGET_SSE && !flag_finite_math_only" 1050 "@ 1051 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2} 1052 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 1053 [(set_attr "isa" "noavx,avx") 1054 (set_attr "type" "sseadd") 1055 (set_attr "prefix" "orig,vex") 1056 (set_attr "mode" "<MODE>")]) 1057 1058(define_insn "<sse>_vm<code><mode>3" 1059 [(set (match_operand:VF_128 0 "register_operand" "=x,x") 1060 (vec_merge:VF_128 1061 (smaxmin:VF_128 1062 (match_operand:VF_128 1 "register_operand" "0,x") 1063 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")) 1064 (match_dup 1) 1065 (const_int 1)))] 1066 "TARGET_SSE" 1067 "@ 1068 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2} 1069 v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}" 1070 [(set_attr "isa" "noavx,avx") 1071 (set_attr "type" "sse") 1072 (set_attr "prefix" "orig,vex") 1073 (set_attr "mode" "<ssescalarmode>")]) 1074 1075;; These versions of the min/max patterns implement exactly the operations 1076;; min = (op1 < op2 ? op1 : op2) 1077;; max = (!(op1 < op2) ? op1 : op2) 1078;; Their operands are not commutative, and thus they may be used in the 1079;; presence of -0.0 and NaN. 1080 1081(define_insn "*ieee_smin<mode>3" 1082 [(set (match_operand:VF 0 "register_operand" "=x,x") 1083 (unspec:VF 1084 [(match_operand:VF 1 "register_operand" "0,x") 1085 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")] 1086 UNSPEC_IEEE_MIN))] 1087 "TARGET_SSE" 1088 "@ 1089 min<ssemodesuffix>\t{%2, %0|%0, %2} 1090 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 1091 [(set_attr "isa" "noavx,avx") 1092 (set_attr "type" "sseadd") 1093 (set_attr "prefix" "orig,vex") 1094 (set_attr "mode" "<MODE>")]) 1095 1096(define_insn "*ieee_smax<mode>3" 1097 [(set (match_operand:VF 0 "register_operand" "=x,x") 1098 (unspec:VF 1099 [(match_operand:VF 1 "register_operand" "0,x") 1100 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")] 1101 UNSPEC_IEEE_MAX))] 1102 "TARGET_SSE" 1103 "@ 1104 max<ssemodesuffix>\t{%2, %0|%0, %2} 1105 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 1106 [(set_attr "isa" "noavx,avx") 1107 (set_attr "type" "sseadd") 1108 (set_attr "prefix" "orig,vex") 1109 (set_attr "mode" "<MODE>")]) 1110 1111(define_insn "avx_addsubv4df3" 1112 [(set (match_operand:V4DF 0 "register_operand" "=x") 1113 (vec_merge:V4DF 1114 (plus:V4DF 1115 (match_operand:V4DF 1 "register_operand" "x") 1116 (match_operand:V4DF 2 "nonimmediate_operand" "xm")) 1117 (minus:V4DF (match_dup 1) (match_dup 2)) 1118 (const_int 10)))] 1119 "TARGET_AVX" 1120 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}" 1121 [(set_attr "type" "sseadd") 1122 (set_attr "prefix" "vex") 1123 (set_attr "mode" "V4DF")]) 1124 1125(define_insn "sse3_addsubv2df3" 1126 [(set (match_operand:V2DF 0 "register_operand" "=x,x") 1127 (vec_merge:V2DF 1128 (plus:V2DF 1129 (match_operand:V2DF 1 "register_operand" "0,x") 1130 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")) 1131 (minus:V2DF (match_dup 1) (match_dup 2)) 1132 (const_int 2)))] 1133 "TARGET_SSE3" 1134 "@ 1135 addsubpd\t{%2, %0|%0, %2} 1136 vaddsubpd\t{%2, %1, %0|%0, %1, %2}" 1137 [(set_attr "isa" "noavx,avx") 1138 (set_attr "type" "sseadd") 1139 (set_attr "atom_unit" "complex") 1140 (set_attr "prefix" "orig,vex") 1141 (set_attr "mode" "V2DF")]) 1142 1143(define_insn "avx_addsubv8sf3" 1144 [(set (match_operand:V8SF 0 "register_operand" "=x") 1145 (vec_merge:V8SF 1146 (plus:V8SF 1147 (match_operand:V8SF 1 "register_operand" "x") 1148 (match_operand:V8SF 2 "nonimmediate_operand" "xm")) 1149 (minus:V8SF (match_dup 1) (match_dup 2)) 1150 (const_int 170)))] 1151 "TARGET_AVX" 1152 "vaddsubps\t{%2, %1, %0|%0, %1, %2}" 1153 [(set_attr "type" "sseadd") 1154 (set_attr "prefix" "vex") 1155 (set_attr "mode" "V8SF")]) 1156 1157(define_insn "sse3_addsubv4sf3" 1158 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 1159 (vec_merge:V4SF 1160 (plus:V4SF 1161 (match_operand:V4SF 1 "register_operand" "0,x") 1162 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")) 1163 (minus:V4SF (match_dup 1) (match_dup 2)) 1164 (const_int 10)))] 1165 "TARGET_SSE3" 1166 "@ 1167 addsubps\t{%2, %0|%0, %2} 1168 vaddsubps\t{%2, %1, %0|%0, %1, %2}" 1169 [(set_attr "isa" "noavx,avx") 1170 (set_attr "type" "sseadd") 1171 (set_attr "prefix" "orig,vex") 1172 (set_attr "prefix_rep" "1,*") 1173 (set_attr "mode" "V4SF")]) 1174 1175(define_insn "avx_h<plusminus_insn>v4df3" 1176 [(set (match_operand:V4DF 0 "register_operand" "=x") 1177 (vec_concat:V4DF 1178 (vec_concat:V2DF 1179 (plusminus:DF 1180 (vec_select:DF 1181 (match_operand:V4DF 1 "register_operand" "x") 1182 (parallel [(const_int 0)])) 1183 (vec_select:DF (match_dup 1) (parallel [(const_int 1)]))) 1184 (plusminus:DF 1185 (vec_select:DF 1186 (match_operand:V4DF 2 "nonimmediate_operand" "xm") 1187 (parallel [(const_int 0)])) 1188 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))) 1189 (vec_concat:V2DF 1190 (plusminus:DF 1191 (vec_select:DF (match_dup 1) (parallel [(const_int 2)])) 1192 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))) 1193 (plusminus:DF 1194 (vec_select:DF (match_dup 2) (parallel [(const_int 2)])) 1195 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))] 1196 "TARGET_AVX" 1197 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}" 1198 [(set_attr "type" "sseadd") 1199 (set_attr "prefix" "vex") 1200 (set_attr "mode" "V4DF")]) 1201 1202(define_insn "sse3_h<plusminus_insn>v2df3" 1203 [(set (match_operand:V2DF 0 "register_operand" "=x,x") 1204 (vec_concat:V2DF 1205 (plusminus:DF 1206 (vec_select:DF 1207 (match_operand:V2DF 1 "register_operand" "0,x") 1208 (parallel [(const_int 0)])) 1209 (vec_select:DF (match_dup 1) (parallel [(const_int 1)]))) 1210 (plusminus:DF 1211 (vec_select:DF 1212 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm") 1213 (parallel [(const_int 0)])) 1214 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))] 1215 "TARGET_SSE3" 1216 "@ 1217 h<plusminus_mnemonic>pd\t{%2, %0|%0, %2} 1218 vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}" 1219 [(set_attr "isa" "noavx,avx") 1220 (set_attr "type" "sseadd") 1221 (set_attr "prefix" "orig,vex") 1222 (set_attr "mode" "V2DF")]) 1223 1224(define_insn "avx_h<plusminus_insn>v8sf3" 1225 [(set (match_operand:V8SF 0 "register_operand" "=x") 1226 (vec_concat:V8SF 1227 (vec_concat:V4SF 1228 (vec_concat:V2SF 1229 (plusminus:SF 1230 (vec_select:SF 1231 (match_operand:V8SF 1 "register_operand" "x") 1232 (parallel [(const_int 0)])) 1233 (vec_select:SF (match_dup 1) (parallel [(const_int 1)]))) 1234 (plusminus:SF 1235 (vec_select:SF (match_dup 1) (parallel [(const_int 2)])) 1236 (vec_select:SF (match_dup 1) (parallel [(const_int 3)])))) 1237 (vec_concat:V2SF 1238 (plusminus:SF 1239 (vec_select:SF 1240 (match_operand:V8SF 2 "nonimmediate_operand" "xm") 1241 (parallel [(const_int 0)])) 1242 (vec_select:SF (match_dup 2) (parallel [(const_int 1)]))) 1243 (plusminus:SF 1244 (vec_select:SF (match_dup 2) (parallel [(const_int 2)])) 1245 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))) 1246 (vec_concat:V4SF 1247 (vec_concat:V2SF 1248 (plusminus:SF 1249 (vec_select:SF (match_dup 1) (parallel [(const_int 4)])) 1250 (vec_select:SF (match_dup 1) (parallel [(const_int 5)]))) 1251 (plusminus:SF 1252 (vec_select:SF (match_dup 1) (parallel [(const_int 6)])) 1253 (vec_select:SF (match_dup 1) (parallel [(const_int 7)])))) 1254 (vec_concat:V2SF 1255 (plusminus:SF 1256 (vec_select:SF (match_dup 2) (parallel [(const_int 4)])) 1257 (vec_select:SF (match_dup 2) (parallel [(const_int 5)]))) 1258 (plusminus:SF 1259 (vec_select:SF (match_dup 2) (parallel [(const_int 6)])) 1260 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))] 1261 "TARGET_AVX" 1262 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}" 1263 [(set_attr "type" "sseadd") 1264 (set_attr "prefix" "vex") 1265 (set_attr "mode" "V8SF")]) 1266 1267(define_insn "sse3_h<plusminus_insn>v4sf3" 1268 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 1269 (vec_concat:V4SF 1270 (vec_concat:V2SF 1271 (plusminus:SF 1272 (vec_select:SF 1273 (match_operand:V4SF 1 "register_operand" "0,x") 1274 (parallel [(const_int 0)])) 1275 (vec_select:SF (match_dup 1) (parallel [(const_int 1)]))) 1276 (plusminus:SF 1277 (vec_select:SF (match_dup 1) (parallel [(const_int 2)])) 1278 (vec_select:SF (match_dup 1) (parallel [(const_int 3)])))) 1279 (vec_concat:V2SF 1280 (plusminus:SF 1281 (vec_select:SF 1282 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm") 1283 (parallel [(const_int 0)])) 1284 (vec_select:SF (match_dup 2) (parallel [(const_int 1)]))) 1285 (plusminus:SF 1286 (vec_select:SF (match_dup 2) (parallel [(const_int 2)])) 1287 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))] 1288 "TARGET_SSE3" 1289 "@ 1290 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2} 1291 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}" 1292 [(set_attr "isa" "noavx,avx") 1293 (set_attr "type" "sseadd") 1294 (set_attr "atom_unit" "complex") 1295 (set_attr "prefix" "orig,vex") 1296 (set_attr "prefix_rep" "1,*") 1297 (set_attr "mode" "V4SF")]) 1298 1299(define_expand "reduc_splus_v4df" 1300 [(match_operand:V4DF 0 "register_operand" "") 1301 (match_operand:V4DF 1 "register_operand" "")] 1302 "TARGET_AVX" 1303{ 1304 rtx tmp = gen_reg_rtx (V4DFmode); 1305 rtx tmp2 = gen_reg_rtx (V4DFmode); 1306 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1])); 1307 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1))); 1308 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2)); 1309 DONE; 1310}) 1311 1312(define_expand "reduc_splus_v2df" 1313 [(match_operand:V2DF 0 "register_operand" "") 1314 (match_operand:V2DF 1 "register_operand" "")] 1315 "TARGET_SSE3" 1316{ 1317 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1])); 1318 DONE; 1319}) 1320 1321(define_expand "reduc_splus_v8sf" 1322 [(match_operand:V8SF 0 "register_operand" "") 1323 (match_operand:V8SF 1 "register_operand" "")] 1324 "TARGET_AVX" 1325{ 1326 rtx tmp = gen_reg_rtx (V8SFmode); 1327 rtx tmp2 = gen_reg_rtx (V8SFmode); 1328 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1])); 1329 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp)); 1330 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1))); 1331 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2)); 1332 DONE; 1333}) 1334 1335(define_expand "reduc_splus_v4sf" 1336 [(match_operand:V4SF 0 "register_operand" "") 1337 (match_operand:V4SF 1 "register_operand" "")] 1338 "TARGET_SSE" 1339{ 1340 if (TARGET_SSE3) 1341 { 1342 rtx tmp = gen_reg_rtx (V4SFmode); 1343 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1])); 1344 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp)); 1345 } 1346 else 1347 ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]); 1348 DONE; 1349}) 1350 1351;; Modes handled by reduc_sm{in,ax}* patterns. 1352(define_mode_iterator REDUC_SMINMAX_MODE 1353 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2") 1354 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2") 1355 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX") 1356 (V4SF "TARGET_SSE")]) 1357 1358(define_expand "reduc_<code>_<mode>" 1359 [(smaxmin:REDUC_SMINMAX_MODE 1360 (match_operand:REDUC_SMINMAX_MODE 0 "register_operand" "") 1361 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand" ""))] 1362 "" 1363{ 1364 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]); 1365 DONE; 1366}) 1367 1368(define_expand "reduc_<code>_<mode>" 1369 [(umaxmin:VI_256 1370 (match_operand:VI_256 0 "register_operand" "") 1371 (match_operand:VI_256 1 "register_operand" ""))] 1372 "TARGET_AVX2" 1373{ 1374 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]); 1375 DONE; 1376}) 1377 1378(define_expand "reduc_umin_v8hi" 1379 [(umin:V8HI 1380 (match_operand:V8HI 0 "register_operand" "") 1381 (match_operand:V8HI 1 "register_operand" ""))] 1382 "TARGET_SSE4_1" 1383{ 1384 ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]); 1385 DONE; 1386}) 1387 1388;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1389;; 1390;; Parallel floating point comparisons 1391;; 1392;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1393 1394(define_insn "avx_cmp<mode>3" 1395 [(set (match_operand:VF 0 "register_operand" "=x") 1396 (unspec:VF 1397 [(match_operand:VF 1 "register_operand" "x") 1398 (match_operand:VF 2 "nonimmediate_operand" "xm") 1399 (match_operand:SI 3 "const_0_to_31_operand" "n")] 1400 UNSPEC_PCMP))] 1401 "TARGET_AVX" 1402 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 1403 [(set_attr "type" "ssecmp") 1404 (set_attr "length_immediate" "1") 1405 (set_attr "prefix" "vex") 1406 (set_attr "mode" "<MODE>")]) 1407 1408(define_insn "avx_vmcmp<mode>3" 1409 [(set (match_operand:VF_128 0 "register_operand" "=x") 1410 (vec_merge:VF_128 1411 (unspec:VF_128 1412 [(match_operand:VF_128 1 "register_operand" "x") 1413 (match_operand:VF_128 2 "nonimmediate_operand" "xm") 1414 (match_operand:SI 3 "const_0_to_31_operand" "n")] 1415 UNSPEC_PCMP) 1416 (match_dup 1) 1417 (const_int 1)))] 1418 "TARGET_AVX" 1419 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 1420 [(set_attr "type" "ssecmp") 1421 (set_attr "length_immediate" "1") 1422 (set_attr "prefix" "vex") 1423 (set_attr "mode" "<ssescalarmode>")]) 1424 1425(define_insn "*<sse>_maskcmp<mode>3_comm" 1426 [(set (match_operand:VF 0 "register_operand" "=x,x") 1427 (match_operator:VF 3 "sse_comparison_operator" 1428 [(match_operand:VF 1 "register_operand" "%0,x") 1429 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))] 1430 "TARGET_SSE 1431 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE" 1432 "@ 1433 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2} 1434 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 1435 [(set_attr "isa" "noavx,avx") 1436 (set_attr "type" "ssecmp") 1437 (set_attr "length_immediate" "1") 1438 (set_attr "prefix" "orig,vex") 1439 (set_attr "mode" "<MODE>")]) 1440 1441(define_insn "<sse>_maskcmp<mode>3" 1442 [(set (match_operand:VF 0 "register_operand" "=x,x") 1443 (match_operator:VF 3 "sse_comparison_operator" 1444 [(match_operand:VF 1 "register_operand" "0,x") 1445 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))] 1446 "TARGET_SSE" 1447 "@ 1448 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2} 1449 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 1450 [(set_attr "isa" "noavx,avx") 1451 (set_attr "type" "ssecmp") 1452 (set_attr "length_immediate" "1") 1453 (set_attr "prefix" "orig,vex") 1454 (set_attr "mode" "<MODE>")]) 1455 1456(define_insn "<sse>_vmmaskcmp<mode>3" 1457 [(set (match_operand:VF_128 0 "register_operand" "=x,x") 1458 (vec_merge:VF_128 1459 (match_operator:VF_128 3 "sse_comparison_operator" 1460 [(match_operand:VF_128 1 "register_operand" "0,x") 1461 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")]) 1462 (match_dup 1) 1463 (const_int 1)))] 1464 "TARGET_SSE" 1465 "@ 1466 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2} 1467 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}" 1468 [(set_attr "isa" "noavx,avx") 1469 (set_attr "type" "ssecmp") 1470 (set_attr "length_immediate" "1,*") 1471 (set_attr "prefix" "orig,vex") 1472 (set_attr "mode" "<ssescalarmode>")]) 1473 1474(define_insn "<sse>_comi" 1475 [(set (reg:CCFP FLAGS_REG) 1476 (compare:CCFP 1477 (vec_select:MODEF 1478 (match_operand:<ssevecmode> 0 "register_operand" "x") 1479 (parallel [(const_int 0)])) 1480 (vec_select:MODEF 1481 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm") 1482 (parallel [(const_int 0)]))))] 1483 "SSE_FLOAT_MODE_P (<MODE>mode)" 1484 "%vcomi<ssemodesuffix>\t{%1, %0|%0, %1}" 1485 [(set_attr "type" "ssecomi") 1486 (set_attr "prefix" "maybe_vex") 1487 (set_attr "prefix_rep" "0") 1488 (set (attr "prefix_data16") 1489 (if_then_else (eq_attr "mode" "DF") 1490 (const_string "1") 1491 (const_string "0"))) 1492 (set_attr "mode" "<MODE>")]) 1493 1494(define_insn "<sse>_ucomi" 1495 [(set (reg:CCFPU FLAGS_REG) 1496 (compare:CCFPU 1497 (vec_select:MODEF 1498 (match_operand:<ssevecmode> 0 "register_operand" "x") 1499 (parallel [(const_int 0)])) 1500 (vec_select:MODEF 1501 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm") 1502 (parallel [(const_int 0)]))))] 1503 "SSE_FLOAT_MODE_P (<MODE>mode)" 1504 "%vucomi<ssemodesuffix>\t{%1, %0|%0, %1}" 1505 [(set_attr "type" "ssecomi") 1506 (set_attr "prefix" "maybe_vex") 1507 (set_attr "prefix_rep" "0") 1508 (set (attr "prefix_data16") 1509 (if_then_else (eq_attr "mode" "DF") 1510 (const_string "1") 1511 (const_string "0"))) 1512 (set_attr "mode" "<MODE>")]) 1513 1514(define_expand "vcond<V_256:mode><VF_256:mode>" 1515 [(set (match_operand:V_256 0 "register_operand" "") 1516 (if_then_else:V_256 1517 (match_operator 3 "" 1518 [(match_operand:VF_256 4 "nonimmediate_operand" "") 1519 (match_operand:VF_256 5 "nonimmediate_operand" "")]) 1520 (match_operand:V_256 1 "general_operand" "") 1521 (match_operand:V_256 2 "general_operand" "")))] 1522 "TARGET_AVX 1523 && (GET_MODE_NUNITS (<V_256:MODE>mode) 1524 == GET_MODE_NUNITS (<VF_256:MODE>mode))" 1525{ 1526 bool ok = ix86_expand_fp_vcond (operands); 1527 gcc_assert (ok); 1528 DONE; 1529}) 1530 1531(define_expand "vcond<V_128:mode><VF_128:mode>" 1532 [(set (match_operand:V_128 0 "register_operand" "") 1533 (if_then_else:V_128 1534 (match_operator 3 "" 1535 [(match_operand:VF_128 4 "nonimmediate_operand" "") 1536 (match_operand:VF_128 5 "nonimmediate_operand" "")]) 1537 (match_operand:V_128 1 "general_operand" "") 1538 (match_operand:V_128 2 "general_operand" "")))] 1539 "TARGET_SSE 1540 && (GET_MODE_NUNITS (<V_128:MODE>mode) 1541 == GET_MODE_NUNITS (<VF_128:MODE>mode))" 1542{ 1543 bool ok = ix86_expand_fp_vcond (operands); 1544 gcc_assert (ok); 1545 DONE; 1546}) 1547 1548;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1549;; 1550;; Parallel floating point logical operations 1551;; 1552;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1553 1554(define_insn "<sse>_andnot<mode>3" 1555 [(set (match_operand:VF 0 "register_operand" "=x,x") 1556 (and:VF 1557 (not:VF 1558 (match_operand:VF 1 "register_operand" "0,x")) 1559 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))] 1560 "TARGET_SSE" 1561{ 1562 static char buf[32]; 1563 const char *insn; 1564 const char *suffix 1565 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>"; 1566 1567 switch (which_alternative) 1568 { 1569 case 0: 1570 insn = "andn%s\t{%%2, %%0|%%0, %%2}"; 1571 break; 1572 case 1: 1573 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; 1574 break; 1575 default: 1576 gcc_unreachable (); 1577 } 1578 1579 snprintf (buf, sizeof (buf), insn, suffix); 1580 return buf; 1581} 1582 [(set_attr "isa" "noavx,avx") 1583 (set_attr "type" "sselog") 1584 (set_attr "prefix" "orig,vex") 1585 (set_attr "mode" "<MODE>")]) 1586 1587(define_expand "<code><mode>3" 1588 [(set (match_operand:VF 0 "register_operand" "") 1589 (any_logic:VF 1590 (match_operand:VF 1 "nonimmediate_operand" "") 1591 (match_operand:VF 2 "nonimmediate_operand" "")))] 1592 "TARGET_SSE" 1593 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") 1594 1595(define_insn "*<code><mode>3" 1596 [(set (match_operand:VF 0 "register_operand" "=x,x") 1597 (any_logic:VF 1598 (match_operand:VF 1 "nonimmediate_operand" "%0,x") 1599 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))] 1600 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" 1601{ 1602 static char buf[32]; 1603 const char *insn; 1604 const char *suffix 1605 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>"; 1606 1607 switch (which_alternative) 1608 { 1609 case 0: 1610 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}"; 1611 break; 1612 case 1: 1613 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; 1614 break; 1615 default: 1616 gcc_unreachable (); 1617 } 1618 1619 snprintf (buf, sizeof (buf), insn, suffix); 1620 return buf; 1621} 1622 [(set_attr "isa" "noavx,avx") 1623 (set_attr "type" "sselog") 1624 (set_attr "prefix" "orig,vex") 1625 (set_attr "mode" "<MODE>")]) 1626 1627(define_expand "copysign<mode>3" 1628 [(set (match_dup 4) 1629 (and:VF 1630 (not:VF (match_dup 3)) 1631 (match_operand:VF 1 "nonimmediate_operand" ""))) 1632 (set (match_dup 5) 1633 (and:VF (match_dup 3) 1634 (match_operand:VF 2 "nonimmediate_operand" ""))) 1635 (set (match_operand:VF 0 "register_operand" "") 1636 (ior:VF (match_dup 4) (match_dup 5)))] 1637 "TARGET_SSE" 1638{ 1639 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0); 1640 1641 operands[4] = gen_reg_rtx (<MODE>mode); 1642 operands[5] = gen_reg_rtx (<MODE>mode); 1643}) 1644 1645;; Also define scalar versions. These are used for abs, neg, and 1646;; conditional move. Using subregs into vector modes causes register 1647;; allocation lossage. These patterns do not allow memory operands 1648;; because the native instructions read the full 128-bits. 1649 1650(define_insn "*andnot<mode>3" 1651 [(set (match_operand:MODEF 0 "register_operand" "=x,x") 1652 (and:MODEF 1653 (not:MODEF 1654 (match_operand:MODEF 1 "register_operand" "0,x")) 1655 (match_operand:MODEF 2 "register_operand" "x,x")))] 1656 "SSE_FLOAT_MODE_P (<MODE>mode)" 1657{ 1658 static char buf[32]; 1659 const char *insn; 1660 const char *suffix 1661 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>"; 1662 1663 switch (which_alternative) 1664 { 1665 case 0: 1666 insn = "andn%s\t{%%2, %%0|%%0, %%2}"; 1667 break; 1668 case 1: 1669 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; 1670 break; 1671 default: 1672 gcc_unreachable (); 1673 } 1674 1675 snprintf (buf, sizeof (buf), insn, suffix); 1676 return buf; 1677} 1678 [(set_attr "isa" "noavx,avx") 1679 (set_attr "type" "sselog") 1680 (set_attr "prefix" "orig,vex") 1681 (set_attr "mode" "<ssevecmode>")]) 1682 1683(define_insn "*<code><mode>3" 1684 [(set (match_operand:MODEF 0 "register_operand" "=x,x") 1685 (any_logic:MODEF 1686 (match_operand:MODEF 1 "register_operand" "%0,x") 1687 (match_operand:MODEF 2 "register_operand" "x,x")))] 1688 "SSE_FLOAT_MODE_P (<MODE>mode)" 1689{ 1690 static char buf[32]; 1691 const char *insn; 1692 const char *suffix 1693 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>"; 1694 1695 switch (which_alternative) 1696 { 1697 case 0: 1698 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}"; 1699 break; 1700 case 1: 1701 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; 1702 break; 1703 default: 1704 gcc_unreachable (); 1705 } 1706 1707 snprintf (buf, sizeof (buf), insn, suffix); 1708 return buf; 1709} 1710 [(set_attr "isa" "noavx,avx") 1711 (set_attr "type" "sselog") 1712 (set_attr "prefix" "orig,vex") 1713 (set_attr "mode" "<ssevecmode>")]) 1714 1715;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1716;; 1717;; FMA floating point multiply/accumulate instructions. These include 1718;; scalar versions of the instructions as well as vector versions. 1719;; 1720;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1721 1722;; The standard names for scalar FMA are only available with SSE math enabled. 1723(define_mode_iterator FMAMODEM [(SF "TARGET_SSE_MATH") 1724 (DF "TARGET_SSE_MATH") 1725 V4SF V2DF V8SF V4DF]) 1726 1727(define_expand "fma<mode>4" 1728 [(set (match_operand:FMAMODEM 0 "register_operand") 1729 (fma:FMAMODEM 1730 (match_operand:FMAMODEM 1 "nonimmediate_operand") 1731 (match_operand:FMAMODEM 2 "nonimmediate_operand") 1732 (match_operand:FMAMODEM 3 "nonimmediate_operand")))] 1733 "TARGET_FMA || TARGET_FMA4") 1734 1735(define_expand "fms<mode>4" 1736 [(set (match_operand:FMAMODEM 0 "register_operand") 1737 (fma:FMAMODEM 1738 (match_operand:FMAMODEM 1 "nonimmediate_operand") 1739 (match_operand:FMAMODEM 2 "nonimmediate_operand") 1740 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))] 1741 "TARGET_FMA || TARGET_FMA4") 1742 1743(define_expand "fnma<mode>4" 1744 [(set (match_operand:FMAMODEM 0 "register_operand") 1745 (fma:FMAMODEM 1746 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand")) 1747 (match_operand:FMAMODEM 2 "nonimmediate_operand") 1748 (match_operand:FMAMODEM 3 "nonimmediate_operand")))] 1749 "TARGET_FMA || TARGET_FMA4") 1750 1751(define_expand "fnms<mode>4" 1752 [(set (match_operand:FMAMODEM 0 "register_operand") 1753 (fma:FMAMODEM 1754 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand")) 1755 (match_operand:FMAMODEM 2 "nonimmediate_operand") 1756 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))] 1757 "TARGET_FMA || TARGET_FMA4") 1758 1759;; The builtins for intrinsics are not constrained by SSE math enabled. 1760(define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF]) 1761 1762(define_expand "fma4i_fmadd_<mode>" 1763 [(set (match_operand:FMAMODE 0 "register_operand") 1764 (fma:FMAMODE 1765 (match_operand:FMAMODE 1 "nonimmediate_operand") 1766 (match_operand:FMAMODE 2 "nonimmediate_operand") 1767 (match_operand:FMAMODE 3 "nonimmediate_operand")))] 1768 "TARGET_FMA || TARGET_FMA4") 1769 1770(define_insn "*fma_fmadd_<mode>" 1771 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x") 1772 (fma:FMAMODE 1773 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x") 1774 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m") 1775 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x")))] 1776 "TARGET_FMA || TARGET_FMA4" 1777 "@ 1778 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} 1779 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} 1780 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2} 1781 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3} 1782 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 1783 [(set_attr "isa" "fma,fma,fma,fma4,fma4") 1784 (set_attr "type" "ssemuladd") 1785 (set_attr "mode" "<MODE>")]) 1786 1787(define_insn "*fma_fmsub_<mode>" 1788 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x") 1789 (fma:FMAMODE 1790 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x") 1791 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m") 1792 (neg:FMAMODE 1793 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x"))))] 1794 "TARGET_FMA || TARGET_FMA4" 1795 "@ 1796 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} 1797 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} 1798 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2} 1799 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3} 1800 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 1801 [(set_attr "isa" "fma,fma,fma,fma4,fma4") 1802 (set_attr "type" "ssemuladd") 1803 (set_attr "mode" "<MODE>")]) 1804 1805(define_insn "*fma_fnmadd_<mode>" 1806 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x") 1807 (fma:FMAMODE 1808 (neg:FMAMODE 1809 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x")) 1810 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m") 1811 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x")))] 1812 "TARGET_FMA || TARGET_FMA4" 1813 "@ 1814 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} 1815 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} 1816 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2} 1817 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3} 1818 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 1819 [(set_attr "isa" "fma,fma,fma,fma4,fma4") 1820 (set_attr "type" "ssemuladd") 1821 (set_attr "mode" "<MODE>")]) 1822 1823(define_insn "*fma_fnmsub_<mode>" 1824 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x") 1825 (fma:FMAMODE 1826 (neg:FMAMODE 1827 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x")) 1828 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m") 1829 (neg:FMAMODE 1830 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x"))))] 1831 "TARGET_FMA || TARGET_FMA4" 1832 "@ 1833 vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} 1834 vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} 1835 vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2} 1836 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3} 1837 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 1838 [(set_attr "isa" "fma,fma,fma,fma4,fma4") 1839 (set_attr "type" "ssemuladd") 1840 (set_attr "mode" "<MODE>")]) 1841 1842;; FMA parallel floating point multiply addsub and subadd operations. 1843 1844;; It would be possible to represent these without the UNSPEC as 1845;; 1846;; (vec_merge 1847;; (fma op1 op2 op3) 1848;; (fma op1 op2 (neg op3)) 1849;; (merge-const)) 1850;; 1851;; But this doesn't seem useful in practice. 1852 1853(define_expand "fmaddsub_<mode>" 1854 [(set (match_operand:VF 0 "register_operand") 1855 (unspec:VF 1856 [(match_operand:VF 1 "nonimmediate_operand") 1857 (match_operand:VF 2 "nonimmediate_operand") 1858 (match_operand:VF 3 "nonimmediate_operand")] 1859 UNSPEC_FMADDSUB))] 1860 "TARGET_FMA || TARGET_FMA4") 1861 1862(define_insn "*fma_fmaddsub_<mode>" 1863 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x,x") 1864 (unspec:VF 1865 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x, x,x") 1866 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm,x,m") 1867 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0,xm,x")] 1868 UNSPEC_FMADDSUB))] 1869 "TARGET_FMA || TARGET_FMA4" 1870 "@ 1871 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} 1872 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} 1873 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2} 1874 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3} 1875 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 1876 [(set_attr "isa" "fma,fma,fma,fma4,fma4") 1877 (set_attr "type" "ssemuladd") 1878 (set_attr "mode" "<MODE>")]) 1879 1880(define_insn "*fma_fmsubadd_<mode>" 1881 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x,x") 1882 (unspec:VF 1883 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x, x,x") 1884 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm,x,m") 1885 (neg:VF 1886 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0,xm,x"))] 1887 UNSPEC_FMADDSUB))] 1888 "TARGET_FMA || TARGET_FMA4" 1889 "@ 1890 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} 1891 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} 1892 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2} 1893 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3} 1894 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 1895 [(set_attr "isa" "fma,fma,fma,fma4,fma4") 1896 (set_attr "type" "ssemuladd") 1897 (set_attr "mode" "<MODE>")]) 1898 1899;; FMA3 floating point scalar intrinsics. These merge result with 1900;; high-order elements from the destination register. 1901 1902(define_expand "fmai_vmfmadd_<mode>" 1903 [(set (match_operand:VF_128 0 "register_operand") 1904 (vec_merge:VF_128 1905 (fma:VF_128 1906 (match_operand:VF_128 1 "nonimmediate_operand") 1907 (match_operand:VF_128 2 "nonimmediate_operand") 1908 (match_operand:VF_128 3 "nonimmediate_operand")) 1909 (match_dup 1) 1910 (const_int 1)))] 1911 "TARGET_FMA") 1912 1913(define_insn "*fmai_fmadd_<mode>" 1914 [(set (match_operand:VF_128 0 "register_operand" "=x,x") 1915 (vec_merge:VF_128 1916 (fma:VF_128 1917 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0") 1918 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x") 1919 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm")) 1920 (match_dup 1) 1921 (const_int 1)))] 1922 "TARGET_FMA" 1923 "@ 1924 vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2} 1925 vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}" 1926 [(set_attr "type" "ssemuladd") 1927 (set_attr "mode" "<MODE>")]) 1928 1929(define_insn "*fmai_fmsub_<mode>" 1930 [(set (match_operand:VF_128 0 "register_operand" "=x,x") 1931 (vec_merge:VF_128 1932 (fma:VF_128 1933 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0") 1934 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x") 1935 (neg:VF_128 1936 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm"))) 1937 (match_dup 1) 1938 (const_int 1)))] 1939 "TARGET_FMA" 1940 "@ 1941 vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2} 1942 vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}" 1943 [(set_attr "type" "ssemuladd") 1944 (set_attr "mode" "<MODE>")]) 1945 1946(define_insn "*fmai_fnmadd_<mode>" 1947 [(set (match_operand:VF_128 0 "register_operand" "=x,x") 1948 (vec_merge:VF_128 1949 (fma:VF_128 1950 (neg:VF_128 1951 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x")) 1952 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0") 1953 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm")) 1954 (match_dup 1) 1955 (const_int 1)))] 1956 "TARGET_FMA" 1957 "@ 1958 vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2} 1959 vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}" 1960 [(set_attr "type" "ssemuladd") 1961 (set_attr "mode" "<MODE>")]) 1962 1963(define_insn "*fmai_fnmsub_<mode>" 1964 [(set (match_operand:VF_128 0 "register_operand" "=x,x") 1965 (vec_merge:VF_128 1966 (fma:VF_128 1967 (neg:VF_128 1968 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x")) 1969 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0") 1970 (neg:VF_128 1971 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm"))) 1972 (match_dup 1) 1973 (const_int 1)))] 1974 "TARGET_FMA" 1975 "@ 1976 vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2} 1977 vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}" 1978 [(set_attr "type" "ssemuladd") 1979 (set_attr "mode" "<MODE>")]) 1980 1981;; FMA4 floating point scalar intrinsics. These write the 1982;; entire destination register, with the high-order elements zeroed. 1983 1984(define_expand "fma4i_vmfmadd_<mode>" 1985 [(set (match_operand:VF_128 0 "register_operand") 1986 (vec_merge:VF_128 1987 (fma:VF_128 1988 (match_operand:VF_128 1 "nonimmediate_operand") 1989 (match_operand:VF_128 2 "nonimmediate_operand") 1990 (match_operand:VF_128 3 "nonimmediate_operand")) 1991 (match_dup 4) 1992 (const_int 1)))] 1993 "TARGET_FMA4" 1994{ 1995 operands[4] = CONST0_RTX (<MODE>mode); 1996}) 1997 1998(define_insn "*fma4i_vmfmadd_<mode>" 1999 [(set (match_operand:VF_128 0 "register_operand" "=x,x") 2000 (vec_merge:VF_128 2001 (fma:VF_128 2002 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x") 2003 (match_operand:VF_128 2 "nonimmediate_operand" " x,m") 2004 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")) 2005 (match_operand:VF_128 4 "const0_operand" "") 2006 (const_int 1)))] 2007 "TARGET_FMA4" 2008 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 2009 [(set_attr "type" "ssemuladd") 2010 (set_attr "mode" "<MODE>")]) 2011 2012(define_insn "*fma4i_vmfmsub_<mode>" 2013 [(set (match_operand:VF_128 0 "register_operand" "=x,x") 2014 (vec_merge:VF_128 2015 (fma:VF_128 2016 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x") 2017 (match_operand:VF_128 2 "nonimmediate_operand" " x,m") 2018 (neg:VF_128 2019 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))) 2020 (match_operand:VF_128 4 "const0_operand" "") 2021 (const_int 1)))] 2022 "TARGET_FMA4" 2023 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 2024 [(set_attr "type" "ssemuladd") 2025 (set_attr "mode" "<MODE>")]) 2026 2027(define_insn "*fma4i_vmfnmadd_<mode>" 2028 [(set (match_operand:VF_128 0 "register_operand" "=x,x") 2029 (vec_merge:VF_128 2030 (fma:VF_128 2031 (neg:VF_128 2032 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")) 2033 (match_operand:VF_128 2 "nonimmediate_operand" " x,m") 2034 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")) 2035 (match_operand:VF_128 4 "const0_operand" "") 2036 (const_int 1)))] 2037 "TARGET_FMA4" 2038 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 2039 [(set_attr "type" "ssemuladd") 2040 (set_attr "mode" "<MODE>")]) 2041 2042(define_insn "*fma4i_vmfnmsub_<mode>" 2043 [(set (match_operand:VF_128 0 "register_operand" "=x,x") 2044 (vec_merge:VF_128 2045 (fma:VF_128 2046 (neg:VF_128 2047 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")) 2048 (match_operand:VF_128 2 "nonimmediate_operand" " x,m") 2049 (neg:VF_128 2050 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))) 2051 (match_operand:VF_128 4 "const0_operand" "") 2052 (const_int 1)))] 2053 "TARGET_FMA4" 2054 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 2055 [(set_attr "type" "ssemuladd") 2056 (set_attr "mode" "<MODE>")]) 2057 2058;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2059;; 2060;; Parallel single-precision floating point conversion operations 2061;; 2062;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2063 2064(define_insn "sse_cvtpi2ps" 2065 [(set (match_operand:V4SF 0 "register_operand" "=x") 2066 (vec_merge:V4SF 2067 (vec_duplicate:V4SF 2068 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym"))) 2069 (match_operand:V4SF 1 "register_operand" "0") 2070 (const_int 3)))] 2071 "TARGET_SSE" 2072 "cvtpi2ps\t{%2, %0|%0, %2}" 2073 [(set_attr "type" "ssecvt") 2074 (set_attr "mode" "V4SF")]) 2075 2076(define_insn "sse_cvtps2pi" 2077 [(set (match_operand:V2SI 0 "register_operand" "=y") 2078 (vec_select:V2SI 2079 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 2080 UNSPEC_FIX_NOTRUNC) 2081 (parallel [(const_int 0) (const_int 1)])))] 2082 "TARGET_SSE" 2083 "cvtps2pi\t{%1, %0|%0, %1}" 2084 [(set_attr "type" "ssecvt") 2085 (set_attr "unit" "mmx") 2086 (set_attr "mode" "DI")]) 2087 2088(define_insn "sse_cvttps2pi" 2089 [(set (match_operand:V2SI 0 "register_operand" "=y") 2090 (vec_select:V2SI 2091 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")) 2092 (parallel [(const_int 0) (const_int 1)])))] 2093 "TARGET_SSE" 2094 "cvttps2pi\t{%1, %0|%0, %1}" 2095 [(set_attr "type" "ssecvt") 2096 (set_attr "unit" "mmx") 2097 (set_attr "prefix_rep" "0") 2098 (set_attr "mode" "SF")]) 2099 2100(define_insn "sse_cvtsi2ss" 2101 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x") 2102 (vec_merge:V4SF 2103 (vec_duplicate:V4SF 2104 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm"))) 2105 (match_operand:V4SF 1 "register_operand" "0,0,x") 2106 (const_int 1)))] 2107 "TARGET_SSE" 2108 "@ 2109 cvtsi2ss\t{%2, %0|%0, %2} 2110 cvtsi2ss\t{%2, %0|%0, %2} 2111 vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}" 2112 [(set_attr "isa" "noavx,noavx,avx") 2113 (set_attr "type" "sseicvt") 2114 (set_attr "athlon_decode" "vector,double,*") 2115 (set_attr "amdfam10_decode" "vector,double,*") 2116 (set_attr "bdver1_decode" "double,direct,*") 2117 (set_attr "prefix" "orig,orig,vex") 2118 (set_attr "mode" "SF")]) 2119 2120(define_insn "sse_cvtsi2ssq" 2121 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x") 2122 (vec_merge:V4SF 2123 (vec_duplicate:V4SF 2124 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm"))) 2125 (match_operand:V4SF 1 "register_operand" "0,0,x") 2126 (const_int 1)))] 2127 "TARGET_SSE && TARGET_64BIT" 2128 "@ 2129 cvtsi2ssq\t{%2, %0|%0, %2} 2130 cvtsi2ssq\t{%2, %0|%0, %2} 2131 vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}" 2132 [(set_attr "isa" "noavx,noavx,avx") 2133 (set_attr "type" "sseicvt") 2134 (set_attr "athlon_decode" "vector,double,*") 2135 (set_attr "amdfam10_decode" "vector,double,*") 2136 (set_attr "bdver1_decode" "double,direct,*") 2137 (set_attr "length_vex" "*,*,4") 2138 (set_attr "prefix_rex" "1,1,*") 2139 (set_attr "prefix" "orig,orig,vex") 2140 (set_attr "mode" "SF")]) 2141 2142(define_insn "sse_cvtss2si" 2143 [(set (match_operand:SI 0 "register_operand" "=r,r") 2144 (unspec:SI 2145 [(vec_select:SF 2146 (match_operand:V4SF 1 "nonimmediate_operand" "x,m") 2147 (parallel [(const_int 0)]))] 2148 UNSPEC_FIX_NOTRUNC))] 2149 "TARGET_SSE" 2150 "%vcvtss2si\t{%1, %0|%0, %1}" 2151 [(set_attr "type" "sseicvt") 2152 (set_attr "athlon_decode" "double,vector") 2153 (set_attr "bdver1_decode" "double,double") 2154 (set_attr "prefix_rep" "1") 2155 (set_attr "prefix" "maybe_vex") 2156 (set_attr "mode" "SI")]) 2157 2158(define_insn "sse_cvtss2si_2" 2159 [(set (match_operand:SI 0 "register_operand" "=r,r") 2160 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")] 2161 UNSPEC_FIX_NOTRUNC))] 2162 "TARGET_SSE" 2163 "%vcvtss2si\t{%1, %0|%0, %1}" 2164 [(set_attr "type" "sseicvt") 2165 (set_attr "athlon_decode" "double,vector") 2166 (set_attr "amdfam10_decode" "double,double") 2167 (set_attr "bdver1_decode" "double,double") 2168 (set_attr "prefix_rep" "1") 2169 (set_attr "prefix" "maybe_vex") 2170 (set_attr "mode" "SI")]) 2171 2172(define_insn "sse_cvtss2siq" 2173 [(set (match_operand:DI 0 "register_operand" "=r,r") 2174 (unspec:DI 2175 [(vec_select:SF 2176 (match_operand:V4SF 1 "nonimmediate_operand" "x,m") 2177 (parallel [(const_int 0)]))] 2178 UNSPEC_FIX_NOTRUNC))] 2179 "TARGET_SSE && TARGET_64BIT" 2180 "%vcvtss2si{q}\t{%1, %0|%0, %1}" 2181 [(set_attr "type" "sseicvt") 2182 (set_attr "athlon_decode" "double,vector") 2183 (set_attr "bdver1_decode" "double,double") 2184 (set_attr "prefix_rep" "1") 2185 (set_attr "prefix" "maybe_vex") 2186 (set_attr "mode" "DI")]) 2187 2188(define_insn "sse_cvtss2siq_2" 2189 [(set (match_operand:DI 0 "register_operand" "=r,r") 2190 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")] 2191 UNSPEC_FIX_NOTRUNC))] 2192 "TARGET_SSE && TARGET_64BIT" 2193 "%vcvtss2si{q}\t{%1, %0|%0, %1}" 2194 [(set_attr "type" "sseicvt") 2195 (set_attr "athlon_decode" "double,vector") 2196 (set_attr "amdfam10_decode" "double,double") 2197 (set_attr "bdver1_decode" "double,double") 2198 (set_attr "prefix_rep" "1") 2199 (set_attr "prefix" "maybe_vex") 2200 (set_attr "mode" "DI")]) 2201 2202(define_insn "sse_cvttss2si" 2203 [(set (match_operand:SI 0 "register_operand" "=r,r") 2204 (fix:SI 2205 (vec_select:SF 2206 (match_operand:V4SF 1 "nonimmediate_operand" "x,m") 2207 (parallel [(const_int 0)]))))] 2208 "TARGET_SSE" 2209 "%vcvttss2si\t{%1, %0|%0, %1}" 2210 [(set_attr "type" "sseicvt") 2211 (set_attr "athlon_decode" "double,vector") 2212 (set_attr "amdfam10_decode" "double,double") 2213 (set_attr "bdver1_decode" "double,double") 2214 (set_attr "prefix_rep" "1") 2215 (set_attr "prefix" "maybe_vex") 2216 (set_attr "mode" "SI")]) 2217 2218(define_insn "sse_cvttss2siq" 2219 [(set (match_operand:DI 0 "register_operand" "=r,r") 2220 (fix:DI 2221 (vec_select:SF 2222 (match_operand:V4SF 1 "nonimmediate_operand" "x,m") 2223 (parallel [(const_int 0)]))))] 2224 "TARGET_SSE && TARGET_64BIT" 2225 "%vcvttss2si{q}\t{%1, %0|%0, %1}" 2226 [(set_attr "type" "sseicvt") 2227 (set_attr "athlon_decode" "double,vector") 2228 (set_attr "amdfam10_decode" "double,double") 2229 (set_attr "bdver1_decode" "double,double") 2230 (set_attr "prefix_rep" "1") 2231 (set_attr "prefix" "maybe_vex") 2232 (set_attr "mode" "DI")]) 2233 2234(define_insn "float<sseintvecmodelower><mode>2" 2235 [(set (match_operand:VF1 0 "register_operand" "=x") 2236 (float:VF1 2237 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "xm")))] 2238 "TARGET_SSE2" 2239 "%vcvtdq2ps\t{%1, %0|%0, %1}" 2240 [(set_attr "type" "ssecvt") 2241 (set_attr "prefix" "maybe_vex") 2242 (set_attr "mode" "<sseinsnmode>")]) 2243 2244(define_expand "floatuns<sseintvecmodelower><mode>2" 2245 [(match_operand:VF1 0 "register_operand" "") 2246 (match_operand:<sseintvecmode> 1 "register_operand" "")] 2247 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)" 2248{ 2249 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]); 2250 DONE; 2251}) 2252 2253(define_insn "avx_cvtps2dq256" 2254 [(set (match_operand:V8SI 0 "register_operand" "=x") 2255 (unspec:V8SI [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] 2256 UNSPEC_FIX_NOTRUNC))] 2257 "TARGET_AVX" 2258 "vcvtps2dq\t{%1, %0|%0, %1}" 2259 [(set_attr "type" "ssecvt") 2260 (set_attr "prefix" "vex") 2261 (set_attr "mode" "OI")]) 2262 2263(define_insn "sse2_cvtps2dq" 2264 [(set (match_operand:V4SI 0 "register_operand" "=x") 2265 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 2266 UNSPEC_FIX_NOTRUNC))] 2267 "TARGET_SSE2" 2268 "%vcvtps2dq\t{%1, %0|%0, %1}" 2269 [(set_attr "type" "ssecvt") 2270 (set (attr "prefix_data16") 2271 (if_then_else 2272 (match_test "TARGET_AVX") 2273 (const_string "*") 2274 (const_string "1"))) 2275 (set_attr "prefix" "maybe_vex") 2276 (set_attr "mode" "TI")]) 2277 2278(define_insn "fix_truncv8sfv8si2" 2279 [(set (match_operand:V8SI 0 "register_operand" "=x") 2280 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))] 2281 "TARGET_AVX" 2282 "vcvttps2dq\t{%1, %0|%0, %1}" 2283 [(set_attr "type" "ssecvt") 2284 (set_attr "prefix" "vex") 2285 (set_attr "mode" "OI")]) 2286 2287(define_insn "fix_truncv4sfv4si2" 2288 [(set (match_operand:V4SI 0 "register_operand" "=x") 2289 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] 2290 "TARGET_SSE2" 2291 "%vcvttps2dq\t{%1, %0|%0, %1}" 2292 [(set_attr "type" "ssecvt") 2293 (set (attr "prefix_rep") 2294 (if_then_else 2295 (match_test "TARGET_AVX") 2296 (const_string "*") 2297 (const_string "1"))) 2298 (set (attr "prefix_data16") 2299 (if_then_else 2300 (match_test "TARGET_AVX") 2301 (const_string "*") 2302 (const_string "0"))) 2303 (set_attr "prefix_data16" "0") 2304 (set_attr "prefix" "maybe_vex") 2305 (set_attr "mode" "TI")]) 2306 2307(define_expand "fixuns_trunc<mode><sseintvecmodelower>2" 2308 [(match_operand:<sseintvecmode> 0 "register_operand" "") 2309 (match_operand:VF1 1 "register_operand" "")] 2310 "TARGET_SSE2" 2311{ 2312 rtx tmp[3]; 2313 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]); 2314 tmp[1] = gen_reg_rtx (<sseintvecmode>mode); 2315 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0])); 2316 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2])); 2317 DONE; 2318}) 2319 2320;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2321;; 2322;; Parallel double-precision floating point conversion operations 2323;; 2324;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2325 2326(define_insn "sse2_cvtpi2pd" 2327 [(set (match_operand:V2DF 0 "register_operand" "=x,x") 2328 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))] 2329 "TARGET_SSE2" 2330 "cvtpi2pd\t{%1, %0|%0, %1}" 2331 [(set_attr "type" "ssecvt") 2332 (set_attr "unit" "mmx,*") 2333 (set_attr "prefix_data16" "1,*") 2334 (set_attr "mode" "V2DF")]) 2335 2336(define_insn "sse2_cvtpd2pi" 2337 [(set (match_operand:V2SI 0 "register_operand" "=y") 2338 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")] 2339 UNSPEC_FIX_NOTRUNC))] 2340 "TARGET_SSE2" 2341 "cvtpd2pi\t{%1, %0|%0, %1}" 2342 [(set_attr "type" "ssecvt") 2343 (set_attr "unit" "mmx") 2344 (set_attr "bdver1_decode" "double") 2345 (set_attr "prefix_data16" "1") 2346 (set_attr "mode" "DI")]) 2347 2348(define_insn "sse2_cvttpd2pi" 2349 [(set (match_operand:V2SI 0 "register_operand" "=y") 2350 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))] 2351 "TARGET_SSE2" 2352 "cvttpd2pi\t{%1, %0|%0, %1}" 2353 [(set_attr "type" "ssecvt") 2354 (set_attr "unit" "mmx") 2355 (set_attr "bdver1_decode" "double") 2356 (set_attr "prefix_data16" "1") 2357 (set_attr "mode" "TI")]) 2358 2359(define_insn "sse2_cvtsi2sd" 2360 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x") 2361 (vec_merge:V2DF 2362 (vec_duplicate:V2DF 2363 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm"))) 2364 (match_operand:V2DF 1 "register_operand" "0,0,x") 2365 (const_int 1)))] 2366 "TARGET_SSE2" 2367 "@ 2368 cvtsi2sd\t{%2, %0|%0, %2} 2369 cvtsi2sd\t{%2, %0|%0, %2} 2370 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}" 2371 [(set_attr "isa" "noavx,noavx,avx") 2372 (set_attr "type" "sseicvt") 2373 (set_attr "athlon_decode" "double,direct,*") 2374 (set_attr "amdfam10_decode" "vector,double,*") 2375 (set_attr "bdver1_decode" "double,direct,*") 2376 (set_attr "prefix" "orig,orig,vex") 2377 (set_attr "mode" "DF")]) 2378 2379(define_insn "sse2_cvtsi2sdq" 2380 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x") 2381 (vec_merge:V2DF 2382 (vec_duplicate:V2DF 2383 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm"))) 2384 (match_operand:V2DF 1 "register_operand" "0,0,x") 2385 (const_int 1)))] 2386 "TARGET_SSE2 && TARGET_64BIT" 2387 "@ 2388 cvtsi2sdq\t{%2, %0|%0, %2} 2389 cvtsi2sdq\t{%2, %0|%0, %2} 2390 vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}" 2391 [(set_attr "isa" "noavx,noavx,avx") 2392 (set_attr "type" "sseicvt") 2393 (set_attr "athlon_decode" "double,direct,*") 2394 (set_attr "amdfam10_decode" "vector,double,*") 2395 (set_attr "bdver1_decode" "double,direct,*") 2396 (set_attr "length_vex" "*,*,4") 2397 (set_attr "prefix_rex" "1,1,*") 2398 (set_attr "prefix" "orig,orig,vex") 2399 (set_attr "mode" "DF")]) 2400 2401(define_insn "sse2_cvtsd2si" 2402 [(set (match_operand:SI 0 "register_operand" "=r,r") 2403 (unspec:SI 2404 [(vec_select:DF 2405 (match_operand:V2DF 1 "nonimmediate_operand" "x,m") 2406 (parallel [(const_int 0)]))] 2407 UNSPEC_FIX_NOTRUNC))] 2408 "TARGET_SSE2" 2409 "%vcvtsd2si\t{%1, %0|%0, %1}" 2410 [(set_attr "type" "sseicvt") 2411 (set_attr "athlon_decode" "double,vector") 2412 (set_attr "bdver1_decode" "double,double") 2413 (set_attr "prefix_rep" "1") 2414 (set_attr "prefix" "maybe_vex") 2415 (set_attr "mode" "SI")]) 2416 2417(define_insn "sse2_cvtsd2si_2" 2418 [(set (match_operand:SI 0 "register_operand" "=r,r") 2419 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")] 2420 UNSPEC_FIX_NOTRUNC))] 2421 "TARGET_SSE2" 2422 "%vcvtsd2si\t{%1, %0|%0, %1}" 2423 [(set_attr "type" "sseicvt") 2424 (set_attr "athlon_decode" "double,vector") 2425 (set_attr "amdfam10_decode" "double,double") 2426 (set_attr "bdver1_decode" "double,double") 2427 (set_attr "prefix_rep" "1") 2428 (set_attr "prefix" "maybe_vex") 2429 (set_attr "mode" "SI")]) 2430 2431(define_insn "sse2_cvtsd2siq" 2432 [(set (match_operand:DI 0 "register_operand" "=r,r") 2433 (unspec:DI 2434 [(vec_select:DF 2435 (match_operand:V2DF 1 "nonimmediate_operand" "x,m") 2436 (parallel [(const_int 0)]))] 2437 UNSPEC_FIX_NOTRUNC))] 2438 "TARGET_SSE2 && TARGET_64BIT" 2439 "%vcvtsd2si{q}\t{%1, %0|%0, %1}" 2440 [(set_attr "type" "sseicvt") 2441 (set_attr "athlon_decode" "double,vector") 2442 (set_attr "bdver1_decode" "double,double") 2443 (set_attr "prefix_rep" "1") 2444 (set_attr "prefix" "maybe_vex") 2445 (set_attr "mode" "DI")]) 2446 2447(define_insn "sse2_cvtsd2siq_2" 2448 [(set (match_operand:DI 0 "register_operand" "=r,r") 2449 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")] 2450 UNSPEC_FIX_NOTRUNC))] 2451 "TARGET_SSE2 && TARGET_64BIT" 2452 "%vcvtsd2si{q}\t{%1, %0|%0, %1}" 2453 [(set_attr "type" "sseicvt") 2454 (set_attr "athlon_decode" "double,vector") 2455 (set_attr "amdfam10_decode" "double,double") 2456 (set_attr "bdver1_decode" "double,double") 2457 (set_attr "prefix_rep" "1") 2458 (set_attr "prefix" "maybe_vex") 2459 (set_attr "mode" "DI")]) 2460 2461(define_insn "sse2_cvttsd2si" 2462 [(set (match_operand:SI 0 "register_operand" "=r,r") 2463 (fix:SI 2464 (vec_select:DF 2465 (match_operand:V2DF 1 "nonimmediate_operand" "x,m") 2466 (parallel [(const_int 0)]))))] 2467 "TARGET_SSE2" 2468 "%vcvttsd2si\t{%1, %0|%0, %1}" 2469 [(set_attr "type" "sseicvt") 2470 (set_attr "athlon_decode" "double,vector") 2471 (set_attr "amdfam10_decode" "double,double") 2472 (set_attr "bdver1_decode" "double,double") 2473 (set_attr "prefix_rep" "1") 2474 (set_attr "prefix" "maybe_vex") 2475 (set_attr "mode" "SI")]) 2476 2477(define_insn "sse2_cvttsd2siq" 2478 [(set (match_operand:DI 0 "register_operand" "=r,r") 2479 (fix:DI 2480 (vec_select:DF 2481 (match_operand:V2DF 1 "nonimmediate_operand" "x,m") 2482 (parallel [(const_int 0)]))))] 2483 "TARGET_SSE2 && TARGET_64BIT" 2484 "%vcvttsd2si{q}\t{%1, %0|%0, %1}" 2485 [(set_attr "type" "sseicvt") 2486 (set_attr "athlon_decode" "double,vector") 2487 (set_attr "amdfam10_decode" "double,double") 2488 (set_attr "bdver1_decode" "double,double") 2489 (set_attr "prefix_rep" "1") 2490 (set_attr "prefix" "maybe_vex") 2491 (set_attr "mode" "DI")]) 2492 2493(define_insn "floatv4siv4df2" 2494 [(set (match_operand:V4DF 0 "register_operand" "=x") 2495 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))] 2496 "TARGET_AVX" 2497 "vcvtdq2pd\t{%1, %0|%0, %1}" 2498 [(set_attr "type" "ssecvt") 2499 (set_attr "prefix" "vex") 2500 (set_attr "mode" "V4DF")]) 2501 2502(define_insn "avx_cvtdq2pd256_2" 2503 [(set (match_operand:V4DF 0 "register_operand" "=x") 2504 (float:V4DF 2505 (vec_select:V4SI 2506 (match_operand:V8SI 1 "nonimmediate_operand" "xm") 2507 (parallel [(const_int 0) (const_int 1) 2508 (const_int 2) (const_int 3)]))))] 2509 "TARGET_AVX" 2510 "vcvtdq2pd\t{%x1, %0|%0, %x1}" 2511 [(set_attr "type" "ssecvt") 2512 (set_attr "prefix" "vex") 2513 (set_attr "mode" "V4DF")]) 2514 2515(define_insn "sse2_cvtdq2pd" 2516 [(set (match_operand:V2DF 0 "register_operand" "=x") 2517 (float:V2DF 2518 (vec_select:V2SI 2519 (match_operand:V4SI 1 "nonimmediate_operand" "xm") 2520 (parallel [(const_int 0) (const_int 1)]))))] 2521 "TARGET_SSE2" 2522 "%vcvtdq2pd\t{%1, %0|%0, %q1}" 2523 [(set_attr "type" "ssecvt") 2524 (set_attr "prefix" "maybe_vex") 2525 (set_attr "mode" "V2DF")]) 2526 2527(define_insn "avx_cvtpd2dq256" 2528 [(set (match_operand:V4SI 0 "register_operand" "=x") 2529 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")] 2530 UNSPEC_FIX_NOTRUNC))] 2531 "TARGET_AVX" 2532 "vcvtpd2dq{y}\t{%1, %0|%0, %1}" 2533 [(set_attr "type" "ssecvt") 2534 (set_attr "prefix" "vex") 2535 (set_attr "mode" "OI")]) 2536 2537(define_expand "avx_cvtpd2dq256_2" 2538 [(set (match_operand:V8SI 0 "register_operand" "") 2539 (vec_concat:V8SI 2540 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "")] 2541 UNSPEC_FIX_NOTRUNC) 2542 (match_dup 2)))] 2543 "TARGET_AVX" 2544 "operands[2] = CONST0_RTX (V4SImode);") 2545 2546(define_insn "*avx_cvtpd2dq256_2" 2547 [(set (match_operand:V8SI 0 "register_operand" "=x") 2548 (vec_concat:V8SI 2549 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")] 2550 UNSPEC_FIX_NOTRUNC) 2551 (match_operand:V4SI 2 "const0_operand" "")))] 2552 "TARGET_AVX" 2553 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}" 2554 [(set_attr "type" "ssecvt") 2555 (set_attr "prefix" "vex") 2556 (set_attr "mode" "OI")]) 2557 2558(define_expand "sse2_cvtpd2dq" 2559 [(set (match_operand:V4SI 0 "register_operand" "") 2560 (vec_concat:V4SI 2561 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")] 2562 UNSPEC_FIX_NOTRUNC) 2563 (match_dup 2)))] 2564 "TARGET_SSE2" 2565 "operands[2] = CONST0_RTX (V2SImode);") 2566 2567(define_insn "*sse2_cvtpd2dq" 2568 [(set (match_operand:V4SI 0 "register_operand" "=x") 2569 (vec_concat:V4SI 2570 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")] 2571 UNSPEC_FIX_NOTRUNC) 2572 (match_operand:V2SI 2 "const0_operand" "")))] 2573 "TARGET_SSE2" 2574{ 2575 if (TARGET_AVX) 2576 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}"; 2577 else 2578 return "cvtpd2dq\t{%1, %0|%0, %1}"; 2579} 2580 [(set_attr "type" "ssecvt") 2581 (set_attr "prefix_rep" "1") 2582 (set_attr "prefix_data16" "0") 2583 (set_attr "prefix" "maybe_vex") 2584 (set_attr "mode" "TI") 2585 (set_attr "amdfam10_decode" "double") 2586 (set_attr "athlon_decode" "vector") 2587 (set_attr "bdver1_decode" "double")]) 2588 2589(define_insn "fix_truncv4dfv4si2" 2590 [(set (match_operand:V4SI 0 "register_operand" "=x") 2591 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))] 2592 "TARGET_AVX" 2593 "vcvttpd2dq{y}\t{%1, %0|%0, %1}" 2594 [(set_attr "type" "ssecvt") 2595 (set_attr "prefix" "vex") 2596 (set_attr "mode" "OI")]) 2597 2598(define_expand "avx_cvttpd2dq256_2" 2599 [(set (match_operand:V8SI 0 "register_operand" "") 2600 (vec_concat:V8SI 2601 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "")) 2602 (match_dup 2)))] 2603 "TARGET_AVX" 2604 "operands[2] = CONST0_RTX (V4SImode);") 2605 2606(define_insn "*avx_cvttpd2dq256_2" 2607 [(set (match_operand:V8SI 0 "register_operand" "=x") 2608 (vec_concat:V8SI 2609 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")) 2610 (match_operand:V4SI 2 "const0_operand" "")))] 2611 "TARGET_AVX" 2612 "vcvttpd2dq{y}\t{%1, %x0|%x0, %1}" 2613 [(set_attr "type" "ssecvt") 2614 (set_attr "prefix" "vex") 2615 (set_attr "mode" "OI")]) 2616 2617(define_expand "sse2_cvttpd2dq" 2618 [(set (match_operand:V4SI 0 "register_operand" "") 2619 (vec_concat:V4SI 2620 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "")) 2621 (match_dup 2)))] 2622 "TARGET_SSE2" 2623 "operands[2] = CONST0_RTX (V2SImode);") 2624 2625(define_insn "*sse2_cvttpd2dq" 2626 [(set (match_operand:V4SI 0 "register_operand" "=x") 2627 (vec_concat:V4SI 2628 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")) 2629 (match_operand:V2SI 2 "const0_operand" "")))] 2630 "TARGET_SSE2" 2631{ 2632 if (TARGET_AVX) 2633 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}"; 2634 else 2635 return "cvttpd2dq\t{%1, %0|%0, %1}"; 2636} 2637 [(set_attr "type" "ssecvt") 2638 (set_attr "amdfam10_decode" "double") 2639 (set_attr "athlon_decode" "vector") 2640 (set_attr "bdver1_decode" "double") 2641 (set_attr "prefix" "maybe_vex") 2642 (set_attr "mode" "TI")]) 2643 2644(define_insn "sse2_cvtsd2ss" 2645 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x") 2646 (vec_merge:V4SF 2647 (vec_duplicate:V4SF 2648 (float_truncate:V2SF 2649 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,xm"))) 2650 (match_operand:V4SF 1 "register_operand" "0,0,x") 2651 (const_int 1)))] 2652 "TARGET_SSE2" 2653 "@ 2654 cvtsd2ss\t{%2, %0|%0, %2} 2655 cvtsd2ss\t{%2, %0|%0, %2} 2656 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}" 2657 [(set_attr "isa" "noavx,noavx,avx") 2658 (set_attr "type" "ssecvt") 2659 (set_attr "athlon_decode" "vector,double,*") 2660 (set_attr "amdfam10_decode" "vector,double,*") 2661 (set_attr "bdver1_decode" "direct,direct,*") 2662 (set_attr "prefix" "orig,orig,vex") 2663 (set_attr "mode" "SF")]) 2664 2665(define_insn "sse2_cvtss2sd" 2666 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x") 2667 (vec_merge:V2DF 2668 (float_extend:V2DF 2669 (vec_select:V2SF 2670 (match_operand:V4SF 2 "nonimmediate_operand" "x,m,xm") 2671 (parallel [(const_int 0) (const_int 1)]))) 2672 (match_operand:V2DF 1 "register_operand" "0,0,x") 2673 (const_int 1)))] 2674 "TARGET_SSE2" 2675 "@ 2676 cvtss2sd\t{%2, %0|%0, %2} 2677 cvtss2sd\t{%2, %0|%0, %2} 2678 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}" 2679 [(set_attr "isa" "noavx,noavx,avx") 2680 (set_attr "type" "ssecvt") 2681 (set_attr "amdfam10_decode" "vector,double,*") 2682 (set_attr "athlon_decode" "direct,direct,*") 2683 (set_attr "bdver1_decode" "direct,direct,*") 2684 (set_attr "prefix" "orig,orig,vex") 2685 (set_attr "mode" "DF")]) 2686 2687(define_insn "avx_cvtpd2ps256" 2688 [(set (match_operand:V4SF 0 "register_operand" "=x") 2689 (float_truncate:V4SF 2690 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))] 2691 "TARGET_AVX" 2692 "vcvtpd2ps{y}\t{%1, %0|%0, %1}" 2693 [(set_attr "type" "ssecvt") 2694 (set_attr "prefix" "vex") 2695 (set_attr "mode" "V4SF")]) 2696 2697(define_expand "sse2_cvtpd2ps" 2698 [(set (match_operand:V4SF 0 "register_operand" "") 2699 (vec_concat:V4SF 2700 (float_truncate:V2SF 2701 (match_operand:V2DF 1 "nonimmediate_operand" "")) 2702 (match_dup 2)))] 2703 "TARGET_SSE2" 2704 "operands[2] = CONST0_RTX (V2SFmode);") 2705 2706(define_insn "*sse2_cvtpd2ps" 2707 [(set (match_operand:V4SF 0 "register_operand" "=x") 2708 (vec_concat:V4SF 2709 (float_truncate:V2SF 2710 (match_operand:V2DF 1 "nonimmediate_operand" "xm")) 2711 (match_operand:V2SF 2 "const0_operand" "")))] 2712 "TARGET_SSE2" 2713{ 2714 if (TARGET_AVX) 2715 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}"; 2716 else 2717 return "cvtpd2ps\t{%1, %0|%0, %1}"; 2718} 2719 [(set_attr "type" "ssecvt") 2720 (set_attr "amdfam10_decode" "double") 2721 (set_attr "athlon_decode" "vector") 2722 (set_attr "bdver1_decode" "double") 2723 (set_attr "prefix_data16" "1") 2724 (set_attr "prefix" "maybe_vex") 2725 (set_attr "mode" "V4SF")]) 2726 2727(define_insn "avx_cvtps2pd256" 2728 [(set (match_operand:V4DF 0 "register_operand" "=x") 2729 (float_extend:V4DF 2730 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] 2731 "TARGET_AVX" 2732 "vcvtps2pd\t{%1, %0|%0, %1}" 2733 [(set_attr "type" "ssecvt") 2734 (set_attr "prefix" "vex") 2735 (set_attr "mode" "V4DF")]) 2736 2737(define_insn "*avx_cvtps2pd256_2" 2738 [(set (match_operand:V4DF 0 "register_operand" "=x") 2739 (float_extend:V4DF 2740 (vec_select:V4SF 2741 (match_operand:V8SF 1 "nonimmediate_operand" "xm") 2742 (parallel [(const_int 0) (const_int 1) 2743 (const_int 2) (const_int 3)]))))] 2744 "TARGET_AVX" 2745 "vcvtps2pd\t{%x1, %0|%0, %x1}" 2746 [(set_attr "type" "ssecvt") 2747 (set_attr "prefix" "vex") 2748 (set_attr "mode" "V4DF")]) 2749 2750(define_insn "sse2_cvtps2pd" 2751 [(set (match_operand:V2DF 0 "register_operand" "=x") 2752 (float_extend:V2DF 2753 (vec_select:V2SF 2754 (match_operand:V4SF 1 "nonimmediate_operand" "xm") 2755 (parallel [(const_int 0) (const_int 1)]))))] 2756 "TARGET_SSE2" 2757 "%vcvtps2pd\t{%1, %0|%0, %q1}" 2758 [(set_attr "type" "ssecvt") 2759 (set_attr "amdfam10_decode" "direct") 2760 (set_attr "athlon_decode" "double") 2761 (set_attr "bdver1_decode" "double") 2762 (set_attr "prefix_data16" "0") 2763 (set_attr "prefix" "maybe_vex") 2764 (set_attr "mode" "V2DF")]) 2765 2766(define_expand "vec_unpacks_hi_v4sf" 2767 [(set (match_dup 2) 2768 (vec_select:V4SF 2769 (vec_concat:V8SF 2770 (match_dup 2) 2771 (match_operand:V4SF 1 "nonimmediate_operand" "")) 2772 (parallel [(const_int 6) (const_int 7) 2773 (const_int 2) (const_int 3)]))) 2774 (set (match_operand:V2DF 0 "register_operand" "") 2775 (float_extend:V2DF 2776 (vec_select:V2SF 2777 (match_dup 2) 2778 (parallel [(const_int 0) (const_int 1)]))))] 2779 "TARGET_SSE2" 2780 "operands[2] = gen_reg_rtx (V4SFmode);") 2781 2782(define_expand "vec_unpacks_hi_v8sf" 2783 [(set (match_dup 2) 2784 (vec_select:V4SF 2785 (match_operand:V8SF 1 "nonimmediate_operand" "") 2786 (parallel [(const_int 4) (const_int 5) 2787 (const_int 6) (const_int 7)]))) 2788 (set (match_operand:V4DF 0 "register_operand" "") 2789 (float_extend:V4DF 2790 (match_dup 2)))] 2791 "TARGET_AVX" 2792 "operands[2] = gen_reg_rtx (V4SFmode);") 2793 2794(define_expand "vec_unpacks_lo_v4sf" 2795 [(set (match_operand:V2DF 0 "register_operand" "") 2796 (float_extend:V2DF 2797 (vec_select:V2SF 2798 (match_operand:V4SF 1 "nonimmediate_operand" "") 2799 (parallel [(const_int 0) (const_int 1)]))))] 2800 "TARGET_SSE2") 2801 2802(define_expand "vec_unpacks_lo_v8sf" 2803 [(set (match_operand:V4DF 0 "register_operand" "") 2804 (float_extend:V4DF 2805 (vec_select:V4SF 2806 (match_operand:V8SF 1 "nonimmediate_operand" "") 2807 (parallel [(const_int 0) (const_int 1) 2808 (const_int 2) (const_int 3)]))))] 2809 "TARGET_AVX") 2810 2811(define_mode_attr sseunpackfltmode 2812 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF") (V8SI "V4DF")]) 2813 2814(define_expand "vec_unpacks_float_hi_<mode>" 2815 [(match_operand:<sseunpackfltmode> 0 "register_operand" "") 2816 (match_operand:VI2_AVX2 1 "register_operand" "")] 2817 "TARGET_SSE2" 2818{ 2819 rtx tmp = gen_reg_rtx (<sseunpackmode>mode); 2820 2821 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1])); 2822 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 2823 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp))); 2824 DONE; 2825}) 2826 2827(define_expand "vec_unpacks_float_lo_<mode>" 2828 [(match_operand:<sseunpackfltmode> 0 "register_operand" "") 2829 (match_operand:VI2_AVX2 1 "register_operand" "")] 2830 "TARGET_SSE2" 2831{ 2832 rtx tmp = gen_reg_rtx (<sseunpackmode>mode); 2833 2834 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1])); 2835 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 2836 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp))); 2837 DONE; 2838}) 2839 2840(define_expand "vec_unpacku_float_hi_<mode>" 2841 [(match_operand:<sseunpackfltmode> 0 "register_operand" "") 2842 (match_operand:VI2_AVX2 1 "register_operand" "")] 2843 "TARGET_SSE2" 2844{ 2845 rtx tmp = gen_reg_rtx (<sseunpackmode>mode); 2846 2847 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1])); 2848 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 2849 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp))); 2850 DONE; 2851}) 2852 2853(define_expand "vec_unpacku_float_lo_<mode>" 2854 [(match_operand:<sseunpackfltmode> 0 "register_operand" "") 2855 (match_operand:VI2_AVX2 1 "register_operand" "")] 2856 "TARGET_SSE2" 2857{ 2858 rtx tmp = gen_reg_rtx (<sseunpackmode>mode); 2859 2860 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1])); 2861 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 2862 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp))); 2863 DONE; 2864}) 2865 2866(define_expand "vec_unpacks_float_hi_v4si" 2867 [(set (match_dup 2) 2868 (vec_select:V4SI 2869 (match_operand:V4SI 1 "nonimmediate_operand" "") 2870 (parallel [(const_int 2) (const_int 3) 2871 (const_int 2) (const_int 3)]))) 2872 (set (match_operand:V2DF 0 "register_operand" "") 2873 (float:V2DF 2874 (vec_select:V2SI 2875 (match_dup 2) 2876 (parallel [(const_int 0) (const_int 1)]))))] 2877 "TARGET_SSE2" 2878 "operands[2] = gen_reg_rtx (V4SImode);") 2879 2880(define_expand "vec_unpacks_float_lo_v4si" 2881 [(set (match_operand:V2DF 0 "register_operand" "") 2882 (float:V2DF 2883 (vec_select:V2SI 2884 (match_operand:V4SI 1 "nonimmediate_operand" "") 2885 (parallel [(const_int 0) (const_int 1)]))))] 2886 "TARGET_SSE2") 2887 2888(define_expand "vec_unpacks_float_hi_v8si" 2889 [(set (match_dup 2) 2890 (vec_select:V4SI 2891 (match_operand:V8SI 1 "nonimmediate_operand" "") 2892 (parallel [(const_int 4) (const_int 5) 2893 (const_int 6) (const_int 7)]))) 2894 (set (match_operand:V4DF 0 "register_operand" "") 2895 (float:V4DF 2896 (match_dup 2)))] 2897 "TARGET_AVX" 2898 "operands[2] = gen_reg_rtx (V4SImode);") 2899 2900(define_expand "vec_unpacks_float_lo_v8si" 2901 [(set (match_operand:V4DF 0 "register_operand" "") 2902 (float:V4DF 2903 (vec_select:V4SI 2904 (match_operand:V8SI 1 "nonimmediate_operand" "") 2905 (parallel [(const_int 0) (const_int 1) 2906 (const_int 2) (const_int 3)]))))] 2907 "TARGET_AVX") 2908 2909(define_expand "vec_unpacku_float_hi_v4si" 2910 [(set (match_dup 5) 2911 (vec_select:V4SI 2912 (match_operand:V4SI 1 "nonimmediate_operand" "") 2913 (parallel [(const_int 2) (const_int 3) 2914 (const_int 2) (const_int 3)]))) 2915 (set (match_dup 6) 2916 (float:V2DF 2917 (vec_select:V2SI 2918 (match_dup 5) 2919 (parallel [(const_int 0) (const_int 1)])))) 2920 (set (match_dup 7) 2921 (lt:V2DF (match_dup 6) (match_dup 3))) 2922 (set (match_dup 8) 2923 (and:V2DF (match_dup 7) (match_dup 4))) 2924 (set (match_operand:V2DF 0 "register_operand" "") 2925 (plus:V2DF (match_dup 6) (match_dup 8)))] 2926 "TARGET_SSE2" 2927{ 2928 REAL_VALUE_TYPE TWO32r; 2929 rtx x; 2930 int i; 2931 2932 real_ldexp (&TWO32r, &dconst1, 32); 2933 x = const_double_from_real_value (TWO32r, DFmode); 2934 2935 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode)); 2936 operands[4] = force_reg (V2DFmode, 2937 ix86_build_const_vector (V2DFmode, 1, x)); 2938 2939 operands[5] = gen_reg_rtx (V4SImode); 2940 2941 for (i = 6; i < 9; i++) 2942 operands[i] = gen_reg_rtx (V2DFmode); 2943}) 2944 2945(define_expand "vec_unpacku_float_lo_v4si" 2946 [(set (match_dup 5) 2947 (float:V2DF 2948 (vec_select:V2SI 2949 (match_operand:V4SI 1 "nonimmediate_operand" "") 2950 (parallel [(const_int 0) (const_int 1)])))) 2951 (set (match_dup 6) 2952 (lt:V2DF (match_dup 5) (match_dup 3))) 2953 (set (match_dup 7) 2954 (and:V2DF (match_dup 6) (match_dup 4))) 2955 (set (match_operand:V2DF 0 "register_operand" "") 2956 (plus:V2DF (match_dup 5) (match_dup 7)))] 2957 "TARGET_SSE2" 2958{ 2959 REAL_VALUE_TYPE TWO32r; 2960 rtx x; 2961 int i; 2962 2963 real_ldexp (&TWO32r, &dconst1, 32); 2964 x = const_double_from_real_value (TWO32r, DFmode); 2965 2966 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode)); 2967 operands[4] = force_reg (V2DFmode, 2968 ix86_build_const_vector (V2DFmode, 1, x)); 2969 2970 for (i = 5; i < 8; i++) 2971 operands[i] = gen_reg_rtx (V2DFmode); 2972}) 2973 2974(define_expand "vec_unpacku_float_hi_v8si" 2975 [(match_operand:V4DF 0 "register_operand" "") 2976 (match_operand:V8SI 1 "register_operand" "")] 2977 "TARGET_AVX" 2978{ 2979 REAL_VALUE_TYPE TWO32r; 2980 rtx x, tmp[6]; 2981 int i; 2982 2983 real_ldexp (&TWO32r, &dconst1, 32); 2984 x = const_double_from_real_value (TWO32r, DFmode); 2985 2986 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode)); 2987 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x)); 2988 tmp[5] = gen_reg_rtx (V4SImode); 2989 2990 for (i = 2; i < 5; i++) 2991 tmp[i] = gen_reg_rtx (V4DFmode); 2992 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1])); 2993 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5])); 2994 emit_insn (gen_rtx_SET (VOIDmode, tmp[3], 2995 gen_rtx_LT (V4DFmode, tmp[2], tmp[0]))); 2996 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1])); 2997 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4])); 2998 DONE; 2999}) 3000 3001(define_expand "vec_unpacku_float_lo_v8si" 3002 [(match_operand:V4DF 0 "register_operand" "") 3003 (match_operand:V8SI 1 "nonimmediate_operand" "")] 3004 "TARGET_AVX" 3005{ 3006 REAL_VALUE_TYPE TWO32r; 3007 rtx x, tmp[5]; 3008 int i; 3009 3010 real_ldexp (&TWO32r, &dconst1, 32); 3011 x = const_double_from_real_value (TWO32r, DFmode); 3012 3013 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode)); 3014 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x)); 3015 3016 for (i = 2; i < 5; i++) 3017 tmp[i] = gen_reg_rtx (V4DFmode); 3018 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1])); 3019 emit_insn (gen_rtx_SET (VOIDmode, tmp[3], 3020 gen_rtx_LT (V4DFmode, tmp[2], tmp[0]))); 3021 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1])); 3022 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4])); 3023 DONE; 3024}) 3025 3026(define_expand "vec_pack_trunc_v4df" 3027 [(set (match_dup 3) 3028 (float_truncate:V4SF 3029 (match_operand:V4DF 1 "nonimmediate_operand" ""))) 3030 (set (match_dup 4) 3031 (float_truncate:V4SF 3032 (match_operand:V4DF 2 "nonimmediate_operand" ""))) 3033 (set (match_operand:V8SF 0 "register_operand" "") 3034 (vec_concat:V8SF 3035 (match_dup 3) 3036 (match_dup 4)))] 3037 "TARGET_AVX" 3038{ 3039 operands[3] = gen_reg_rtx (V4SFmode); 3040 operands[4] = gen_reg_rtx (V4SFmode); 3041}) 3042 3043(define_expand "vec_pack_trunc_v2df" 3044 [(match_operand:V4SF 0 "register_operand" "") 3045 (match_operand:V2DF 1 "nonimmediate_operand" "") 3046 (match_operand:V2DF 2 "nonimmediate_operand" "")] 3047 "TARGET_SSE2" 3048{ 3049 rtx tmp0, tmp1; 3050 3051 if (TARGET_AVX && !TARGET_PREFER_AVX128) 3052 { 3053 tmp0 = gen_reg_rtx (V4DFmode); 3054 tmp1 = force_reg (V2DFmode, operands[1]); 3055 3056 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2])); 3057 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0)); 3058 } 3059 else 3060 { 3061 tmp0 = gen_reg_rtx (V4SFmode); 3062 tmp1 = gen_reg_rtx (V4SFmode); 3063 3064 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1])); 3065 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2])); 3066 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1)); 3067 } 3068 DONE; 3069}) 3070 3071(define_expand "vec_pack_sfix_trunc_v4df" 3072 [(match_operand:V8SI 0 "register_operand" "") 3073 (match_operand:V4DF 1 "nonimmediate_operand" "") 3074 (match_operand:V4DF 2 "nonimmediate_operand" "")] 3075 "TARGET_AVX" 3076{ 3077 rtx r1, r2; 3078 3079 r1 = gen_reg_rtx (V4SImode); 3080 r2 = gen_reg_rtx (V4SImode); 3081 3082 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1])); 3083 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2])); 3084 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2)); 3085 DONE; 3086}) 3087 3088(define_expand "vec_pack_sfix_trunc_v2df" 3089 [(match_operand:V4SI 0 "register_operand" "") 3090 (match_operand:V2DF 1 "nonimmediate_operand" "") 3091 (match_operand:V2DF 2 "nonimmediate_operand" "")] 3092 "TARGET_SSE2" 3093{ 3094 rtx tmp0, tmp1; 3095 3096 if (TARGET_AVX && !TARGET_PREFER_AVX128) 3097 { 3098 tmp0 = gen_reg_rtx (V4DFmode); 3099 tmp1 = force_reg (V2DFmode, operands[1]); 3100 3101 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2])); 3102 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0)); 3103 } 3104 else 3105 { 3106 tmp0 = gen_reg_rtx (V4SImode); 3107 tmp1 = gen_reg_rtx (V4SImode); 3108 3109 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1])); 3110 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2])); 3111 emit_insn 3112 (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]), 3113 gen_lowpart (V2DImode, tmp0), 3114 gen_lowpart (V2DImode, tmp1))); 3115 } 3116 DONE; 3117}) 3118 3119(define_mode_attr ssepackfltmode 3120 [(V4DF "V8SI") (V2DF "V4SI")]) 3121 3122(define_expand "vec_pack_ufix_trunc_<mode>" 3123 [(match_operand:<ssepackfltmode> 0 "register_operand" "") 3124 (match_operand:VF2 1 "register_operand" "") 3125 (match_operand:VF2 2 "register_operand" "")] 3126 "TARGET_SSE2" 3127{ 3128 rtx tmp[7]; 3129 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]); 3130 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]); 3131 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode); 3132 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1])); 3133 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2) 3134 { 3135 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode); 3136 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0); 3137 } 3138 else 3139 { 3140 tmp[5] = gen_reg_rtx (V8SFmode); 3141 ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]), 3142 gen_lowpart (V8SFmode, tmp[3]), 0); 3143 tmp[5] = gen_lowpart (V8SImode, tmp[5]); 3144 } 3145 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5], 3146 operands[0], 0, OPTAB_DIRECT); 3147 if (tmp[6] != operands[0]) 3148 emit_move_insn (operands[0], tmp[6]); 3149 DONE; 3150}) 3151 3152(define_expand "vec_pack_sfix_v4df" 3153 [(match_operand:V8SI 0 "register_operand" "") 3154 (match_operand:V4DF 1 "nonimmediate_operand" "") 3155 (match_operand:V4DF 2 "nonimmediate_operand" "")] 3156 "TARGET_AVX" 3157{ 3158 rtx r1, r2; 3159 3160 r1 = gen_reg_rtx (V4SImode); 3161 r2 = gen_reg_rtx (V4SImode); 3162 3163 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1])); 3164 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2])); 3165 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2)); 3166 DONE; 3167}) 3168 3169(define_expand "vec_pack_sfix_v2df" 3170 [(match_operand:V4SI 0 "register_operand" "") 3171 (match_operand:V2DF 1 "nonimmediate_operand" "") 3172 (match_operand:V2DF 2 "nonimmediate_operand" "")] 3173 "TARGET_SSE2" 3174{ 3175 rtx tmp0, tmp1; 3176 3177 if (TARGET_AVX && !TARGET_PREFER_AVX128) 3178 { 3179 tmp0 = gen_reg_rtx (V4DFmode); 3180 tmp1 = force_reg (V2DFmode, operands[1]); 3181 3182 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2])); 3183 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0)); 3184 } 3185 else 3186 { 3187 tmp0 = gen_reg_rtx (V4SImode); 3188 tmp1 = gen_reg_rtx (V4SImode); 3189 3190 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1])); 3191 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2])); 3192 emit_insn 3193 (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]), 3194 gen_lowpart (V2DImode, tmp0), 3195 gen_lowpart (V2DImode, tmp1))); 3196 } 3197 DONE; 3198}) 3199 3200;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 3201;; 3202;; Parallel single-precision floating point element swizzling 3203;; 3204;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 3205 3206(define_expand "sse_movhlps_exp" 3207 [(set (match_operand:V4SF 0 "nonimmediate_operand" "") 3208 (vec_select:V4SF 3209 (vec_concat:V8SF 3210 (match_operand:V4SF 1 "nonimmediate_operand" "") 3211 (match_operand:V4SF 2 "nonimmediate_operand" "")) 3212 (parallel [(const_int 6) 3213 (const_int 7) 3214 (const_int 2) 3215 (const_int 3)])))] 3216 "TARGET_SSE" 3217{ 3218 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands); 3219 3220 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2])); 3221 3222 /* Fix up the destination if needed. */ 3223 if (dst != operands[0]) 3224 emit_move_insn (operands[0], dst); 3225 3226 DONE; 3227}) 3228 3229(define_insn "sse_movhlps" 3230 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m") 3231 (vec_select:V4SF 3232 (vec_concat:V8SF 3233 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0") 3234 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x")) 3235 (parallel [(const_int 6) 3236 (const_int 7) 3237 (const_int 2) 3238 (const_int 3)])))] 3239 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))" 3240 "@ 3241 movhlps\t{%2, %0|%0, %2} 3242 vmovhlps\t{%2, %1, %0|%0, %1, %2} 3243 movlps\t{%H2, %0|%0, %H2} 3244 vmovlps\t{%H2, %1, %0|%0, %1, %H2} 3245 %vmovhps\t{%2, %0|%0, %2}" 3246 [(set_attr "isa" "noavx,avx,noavx,avx,*") 3247 (set_attr "type" "ssemov") 3248 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex") 3249 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")]) 3250 3251(define_expand "sse_movlhps_exp" 3252 [(set (match_operand:V4SF 0 "nonimmediate_operand" "") 3253 (vec_select:V4SF 3254 (vec_concat:V8SF 3255 (match_operand:V4SF 1 "nonimmediate_operand" "") 3256 (match_operand:V4SF 2 "nonimmediate_operand" "")) 3257 (parallel [(const_int 0) 3258 (const_int 1) 3259 (const_int 4) 3260 (const_int 5)])))] 3261 "TARGET_SSE" 3262{ 3263 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands); 3264 3265 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2])); 3266 3267 /* Fix up the destination if needed. */ 3268 if (dst != operands[0]) 3269 emit_move_insn (operands[0], dst); 3270 3271 DONE; 3272}) 3273 3274(define_insn "sse_movlhps" 3275 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o") 3276 (vec_select:V4SF 3277 (vec_concat:V8SF 3278 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0") 3279 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,m,x")) 3280 (parallel [(const_int 0) 3281 (const_int 1) 3282 (const_int 4) 3283 (const_int 5)])))] 3284 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)" 3285 "@ 3286 movlhps\t{%2, %0|%0, %2} 3287 vmovlhps\t{%2, %1, %0|%0, %1, %2} 3288 movhps\t{%2, %0|%0, %2} 3289 vmovhps\t{%2, %1, %0|%0, %1, %2} 3290 %vmovlps\t{%2, %H0|%H0, %2}" 3291 [(set_attr "isa" "noavx,avx,noavx,avx,*") 3292 (set_attr "type" "ssemov") 3293 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex") 3294 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")]) 3295 3296;; Recall that the 256-bit unpck insns only shuffle within their lanes. 3297(define_insn "avx_unpckhps256" 3298 [(set (match_operand:V8SF 0 "register_operand" "=x") 3299 (vec_select:V8SF 3300 (vec_concat:V16SF 3301 (match_operand:V8SF 1 "register_operand" "x") 3302 (match_operand:V8SF 2 "nonimmediate_operand" "xm")) 3303 (parallel [(const_int 2) (const_int 10) 3304 (const_int 3) (const_int 11) 3305 (const_int 6) (const_int 14) 3306 (const_int 7) (const_int 15)])))] 3307 "TARGET_AVX" 3308 "vunpckhps\t{%2, %1, %0|%0, %1, %2}" 3309 [(set_attr "type" "sselog") 3310 (set_attr "prefix" "vex") 3311 (set_attr "mode" "V8SF")]) 3312 3313(define_expand "vec_interleave_highv8sf" 3314 [(set (match_dup 3) 3315 (vec_select:V8SF 3316 (vec_concat:V16SF 3317 (match_operand:V8SF 1 "register_operand" "x") 3318 (match_operand:V8SF 2 "nonimmediate_operand" "xm")) 3319 (parallel [(const_int 0) (const_int 8) 3320 (const_int 1) (const_int 9) 3321 (const_int 4) (const_int 12) 3322 (const_int 5) (const_int 13)]))) 3323 (set (match_dup 4) 3324 (vec_select:V8SF 3325 (vec_concat:V16SF 3326 (match_dup 1) 3327 (match_dup 2)) 3328 (parallel [(const_int 2) (const_int 10) 3329 (const_int 3) (const_int 11) 3330 (const_int 6) (const_int 14) 3331 (const_int 7) (const_int 15)]))) 3332 (set (match_operand:V8SF 0 "register_operand" "") 3333 (vec_select:V8SF 3334 (vec_concat:V16SF 3335 (match_dup 3) 3336 (match_dup 4)) 3337 (parallel [(const_int 4) (const_int 5) 3338 (const_int 6) (const_int 7) 3339 (const_int 12) (const_int 13) 3340 (const_int 14) (const_int 15)])))] 3341 "TARGET_AVX" 3342{ 3343 operands[3] = gen_reg_rtx (V8SFmode); 3344 operands[4] = gen_reg_rtx (V8SFmode); 3345}) 3346 3347(define_insn "vec_interleave_highv4sf" 3348 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 3349 (vec_select:V4SF 3350 (vec_concat:V8SF 3351 (match_operand:V4SF 1 "register_operand" "0,x") 3352 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")) 3353 (parallel [(const_int 2) (const_int 6) 3354 (const_int 3) (const_int 7)])))] 3355 "TARGET_SSE" 3356 "@ 3357 unpckhps\t{%2, %0|%0, %2} 3358 vunpckhps\t{%2, %1, %0|%0, %1, %2}" 3359 [(set_attr "isa" "noavx,avx") 3360 (set_attr "type" "sselog") 3361 (set_attr "prefix" "orig,vex") 3362 (set_attr "mode" "V4SF")]) 3363 3364;; Recall that the 256-bit unpck insns only shuffle within their lanes. 3365(define_insn "avx_unpcklps256" 3366 [(set (match_operand:V8SF 0 "register_operand" "=x") 3367 (vec_select:V8SF 3368 (vec_concat:V16SF 3369 (match_operand:V8SF 1 "register_operand" "x") 3370 (match_operand:V8SF 2 "nonimmediate_operand" "xm")) 3371 (parallel [(const_int 0) (const_int 8) 3372 (const_int 1) (const_int 9) 3373 (const_int 4) (const_int 12) 3374 (const_int 5) (const_int 13)])))] 3375 "TARGET_AVX" 3376 "vunpcklps\t{%2, %1, %0|%0, %1, %2}" 3377 [(set_attr "type" "sselog") 3378 (set_attr "prefix" "vex") 3379 (set_attr "mode" "V8SF")]) 3380 3381(define_expand "vec_interleave_lowv8sf" 3382 [(set (match_dup 3) 3383 (vec_select:V8SF 3384 (vec_concat:V16SF 3385 (match_operand:V8SF 1 "register_operand" "x") 3386 (match_operand:V8SF 2 "nonimmediate_operand" "xm")) 3387 (parallel [(const_int 0) (const_int 8) 3388 (const_int 1) (const_int 9) 3389 (const_int 4) (const_int 12) 3390 (const_int 5) (const_int 13)]))) 3391 (set (match_dup 4) 3392 (vec_select:V8SF 3393 (vec_concat:V16SF 3394 (match_dup 1) 3395 (match_dup 2)) 3396 (parallel [(const_int 2) (const_int 10) 3397 (const_int 3) (const_int 11) 3398 (const_int 6) (const_int 14) 3399 (const_int 7) (const_int 15)]))) 3400 (set (match_operand:V8SF 0 "register_operand" "") 3401 (vec_select:V8SF 3402 (vec_concat:V16SF 3403 (match_dup 3) 3404 (match_dup 4)) 3405 (parallel [(const_int 0) (const_int 1) 3406 (const_int 2) (const_int 3) 3407 (const_int 8) (const_int 9) 3408 (const_int 10) (const_int 11)])))] 3409 "TARGET_AVX" 3410{ 3411 operands[3] = gen_reg_rtx (V8SFmode); 3412 operands[4] = gen_reg_rtx (V8SFmode); 3413}) 3414 3415(define_insn "vec_interleave_lowv4sf" 3416 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 3417 (vec_select:V4SF 3418 (vec_concat:V8SF 3419 (match_operand:V4SF 1 "register_operand" "0,x") 3420 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")) 3421 (parallel [(const_int 0) (const_int 4) 3422 (const_int 1) (const_int 5)])))] 3423 "TARGET_SSE" 3424 "@ 3425 unpcklps\t{%2, %0|%0, %2} 3426 vunpcklps\t{%2, %1, %0|%0, %1, %2}" 3427 [(set_attr "isa" "noavx,avx") 3428 (set_attr "type" "sselog") 3429 (set_attr "prefix" "orig,vex") 3430 (set_attr "mode" "V4SF")]) 3431 3432;; These are modeled with the same vec_concat as the others so that we 3433;; capture users of shufps that can use the new instructions 3434(define_insn "avx_movshdup256" 3435 [(set (match_operand:V8SF 0 "register_operand" "=x") 3436 (vec_select:V8SF 3437 (vec_concat:V16SF 3438 (match_operand:V8SF 1 "nonimmediate_operand" "xm") 3439 (match_dup 1)) 3440 (parallel [(const_int 1) (const_int 1) 3441 (const_int 3) (const_int 3) 3442 (const_int 5) (const_int 5) 3443 (const_int 7) (const_int 7)])))] 3444 "TARGET_AVX" 3445 "vmovshdup\t{%1, %0|%0, %1}" 3446 [(set_attr "type" "sse") 3447 (set_attr "prefix" "vex") 3448 (set_attr "mode" "V8SF")]) 3449 3450(define_insn "sse3_movshdup" 3451 [(set (match_operand:V4SF 0 "register_operand" "=x") 3452 (vec_select:V4SF 3453 (vec_concat:V8SF 3454 (match_operand:V4SF 1 "nonimmediate_operand" "xm") 3455 (match_dup 1)) 3456 (parallel [(const_int 1) 3457 (const_int 1) 3458 (const_int 7) 3459 (const_int 7)])))] 3460 "TARGET_SSE3" 3461 "%vmovshdup\t{%1, %0|%0, %1}" 3462 [(set_attr "type" "sse") 3463 (set_attr "prefix_rep" "1") 3464 (set_attr "prefix" "maybe_vex") 3465 (set_attr "mode" "V4SF")]) 3466 3467(define_insn "avx_movsldup256" 3468 [(set (match_operand:V8SF 0 "register_operand" "=x") 3469 (vec_select:V8SF 3470 (vec_concat:V16SF 3471 (match_operand:V8SF 1 "nonimmediate_operand" "xm") 3472 (match_dup 1)) 3473 (parallel [(const_int 0) (const_int 0) 3474 (const_int 2) (const_int 2) 3475 (const_int 4) (const_int 4) 3476 (const_int 6) (const_int 6)])))] 3477 "TARGET_AVX" 3478 "vmovsldup\t{%1, %0|%0, %1}" 3479 [(set_attr "type" "sse") 3480 (set_attr "prefix" "vex") 3481 (set_attr "mode" "V8SF")]) 3482 3483(define_insn "sse3_movsldup" 3484 [(set (match_operand:V4SF 0 "register_operand" "=x") 3485 (vec_select:V4SF 3486 (vec_concat:V8SF 3487 (match_operand:V4SF 1 "nonimmediate_operand" "xm") 3488 (match_dup 1)) 3489 (parallel [(const_int 0) 3490 (const_int 0) 3491 (const_int 6) 3492 (const_int 6)])))] 3493 "TARGET_SSE3" 3494 "%vmovsldup\t{%1, %0|%0, %1}" 3495 [(set_attr "type" "sse") 3496 (set_attr "prefix_rep" "1") 3497 (set_attr "prefix" "maybe_vex") 3498 (set_attr "mode" "V4SF")]) 3499 3500(define_expand "avx_shufps256" 3501 [(match_operand:V8SF 0 "register_operand" "") 3502 (match_operand:V8SF 1 "register_operand" "") 3503 (match_operand:V8SF 2 "nonimmediate_operand" "") 3504 (match_operand:SI 3 "const_int_operand" "")] 3505 "TARGET_AVX" 3506{ 3507 int mask = INTVAL (operands[3]); 3508 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2], 3509 GEN_INT ((mask >> 0) & 3), 3510 GEN_INT ((mask >> 2) & 3), 3511 GEN_INT (((mask >> 4) & 3) + 8), 3512 GEN_INT (((mask >> 6) & 3) + 8), 3513 GEN_INT (((mask >> 0) & 3) + 4), 3514 GEN_INT (((mask >> 2) & 3) + 4), 3515 GEN_INT (((mask >> 4) & 3) + 12), 3516 GEN_INT (((mask >> 6) & 3) + 12))); 3517 DONE; 3518}) 3519 3520;; One bit in mask selects 2 elements. 3521(define_insn "avx_shufps256_1" 3522 [(set (match_operand:V8SF 0 "register_operand" "=x") 3523 (vec_select:V8SF 3524 (vec_concat:V16SF 3525 (match_operand:V8SF 1 "register_operand" "x") 3526 (match_operand:V8SF 2 "nonimmediate_operand" "xm")) 3527 (parallel [(match_operand 3 "const_0_to_3_operand" "") 3528 (match_operand 4 "const_0_to_3_operand" "") 3529 (match_operand 5 "const_8_to_11_operand" "") 3530 (match_operand 6 "const_8_to_11_operand" "") 3531 (match_operand 7 "const_4_to_7_operand" "") 3532 (match_operand 8 "const_4_to_7_operand" "") 3533 (match_operand 9 "const_12_to_15_operand" "") 3534 (match_operand 10 "const_12_to_15_operand" "")])))] 3535 "TARGET_AVX 3536 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4) 3537 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4) 3538 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4) 3539 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))" 3540{ 3541 int mask; 3542 mask = INTVAL (operands[3]); 3543 mask |= INTVAL (operands[4]) << 2; 3544 mask |= (INTVAL (operands[5]) - 8) << 4; 3545 mask |= (INTVAL (operands[6]) - 8) << 6; 3546 operands[3] = GEN_INT (mask); 3547 3548 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}"; 3549} 3550 [(set_attr "type" "sselog") 3551 (set_attr "length_immediate" "1") 3552 (set_attr "prefix" "vex") 3553 (set_attr "mode" "V8SF")]) 3554 3555(define_expand "sse_shufps" 3556 [(match_operand:V4SF 0 "register_operand" "") 3557 (match_operand:V4SF 1 "register_operand" "") 3558 (match_operand:V4SF 2 "nonimmediate_operand" "") 3559 (match_operand:SI 3 "const_int_operand" "")] 3560 "TARGET_SSE" 3561{ 3562 int mask = INTVAL (operands[3]); 3563 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2], 3564 GEN_INT ((mask >> 0) & 3), 3565 GEN_INT ((mask >> 2) & 3), 3566 GEN_INT (((mask >> 4) & 3) + 4), 3567 GEN_INT (((mask >> 6) & 3) + 4))); 3568 DONE; 3569}) 3570 3571(define_insn "sse_shufps_<mode>" 3572 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x") 3573 (vec_select:VI4F_128 3574 (vec_concat:<ssedoublevecmode> 3575 (match_operand:VI4F_128 1 "register_operand" "0,x") 3576 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm")) 3577 (parallel [(match_operand 3 "const_0_to_3_operand" "") 3578 (match_operand 4 "const_0_to_3_operand" "") 3579 (match_operand 5 "const_4_to_7_operand" "") 3580 (match_operand 6 "const_4_to_7_operand" "")])))] 3581 "TARGET_SSE" 3582{ 3583 int mask = 0; 3584 mask |= INTVAL (operands[3]) << 0; 3585 mask |= INTVAL (operands[4]) << 2; 3586 mask |= (INTVAL (operands[5]) - 4) << 4; 3587 mask |= (INTVAL (operands[6]) - 4) << 6; 3588 operands[3] = GEN_INT (mask); 3589 3590 switch (which_alternative) 3591 { 3592 case 0: 3593 return "shufps\t{%3, %2, %0|%0, %2, %3}"; 3594 case 1: 3595 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}"; 3596 default: 3597 gcc_unreachable (); 3598 } 3599} 3600 [(set_attr "isa" "noavx,avx") 3601 (set_attr "type" "sselog") 3602 (set_attr "length_immediate" "1") 3603 (set_attr "prefix" "orig,vex") 3604 (set_attr "mode" "V4SF")]) 3605 3606(define_insn "sse_storehps" 3607 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x") 3608 (vec_select:V2SF 3609 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o") 3610 (parallel [(const_int 2) (const_int 3)])))] 3611 "TARGET_SSE" 3612 "@ 3613 %vmovhps\t{%1, %0|%0, %1} 3614 %vmovhlps\t{%1, %d0|%d0, %1} 3615 %vmovlps\t{%H1, %d0|%d0, %H1}" 3616 [(set_attr "type" "ssemov") 3617 (set_attr "prefix" "maybe_vex") 3618 (set_attr "mode" "V2SF,V4SF,V2SF")]) 3619 3620(define_expand "sse_loadhps_exp" 3621 [(set (match_operand:V4SF 0 "nonimmediate_operand" "") 3622 (vec_concat:V4SF 3623 (vec_select:V2SF 3624 (match_operand:V4SF 1 "nonimmediate_operand" "") 3625 (parallel [(const_int 0) (const_int 1)])) 3626 (match_operand:V2SF 2 "nonimmediate_operand" "")))] 3627 "TARGET_SSE" 3628{ 3629 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands); 3630 3631 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2])); 3632 3633 /* Fix up the destination if needed. */ 3634 if (dst != operands[0]) 3635 emit_move_insn (operands[0], dst); 3636 3637 DONE; 3638}) 3639 3640(define_insn "sse_loadhps" 3641 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o") 3642 (vec_concat:V4SF 3643 (vec_select:V2SF 3644 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0") 3645 (parallel [(const_int 0) (const_int 1)])) 3646 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))] 3647 "TARGET_SSE" 3648 "@ 3649 movhps\t{%2, %0|%0, %2} 3650 vmovhps\t{%2, %1, %0|%0, %1, %2} 3651 movlhps\t{%2, %0|%0, %2} 3652 vmovlhps\t{%2, %1, %0|%0, %1, %2} 3653 %vmovlps\t{%2, %H0|%H0, %2}" 3654 [(set_attr "isa" "noavx,avx,noavx,avx,*") 3655 (set_attr "type" "ssemov") 3656 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex") 3657 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")]) 3658 3659(define_insn "sse_storelps" 3660 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x") 3661 (vec_select:V2SF 3662 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m") 3663 (parallel [(const_int 0) (const_int 1)])))] 3664 "TARGET_SSE" 3665 "@ 3666 %vmovlps\t{%1, %0|%0, %1} 3667 %vmovaps\t{%1, %0|%0, %1} 3668 %vmovlps\t{%1, %d0|%d0, %1}" 3669 [(set_attr "type" "ssemov") 3670 (set_attr "prefix" "maybe_vex") 3671 (set_attr "mode" "V2SF,V4SF,V2SF")]) 3672 3673(define_expand "sse_loadlps_exp" 3674 [(set (match_operand:V4SF 0 "nonimmediate_operand" "") 3675 (vec_concat:V4SF 3676 (match_operand:V2SF 2 "nonimmediate_operand" "") 3677 (vec_select:V2SF 3678 (match_operand:V4SF 1 "nonimmediate_operand" "") 3679 (parallel [(const_int 2) (const_int 3)]))))] 3680 "TARGET_SSE" 3681{ 3682 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands); 3683 3684 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2])); 3685 3686 /* Fix up the destination if needed. */ 3687 if (dst != operands[0]) 3688 emit_move_insn (operands[0], dst); 3689 3690 DONE; 3691}) 3692 3693(define_insn "sse_loadlps" 3694 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m") 3695 (vec_concat:V4SF 3696 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,m,x") 3697 (vec_select:V2SF 3698 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0") 3699 (parallel [(const_int 2) (const_int 3)]))))] 3700 "TARGET_SSE" 3701 "@ 3702 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4} 3703 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4} 3704 movlps\t{%2, %0|%0, %2} 3705 vmovlps\t{%2, %1, %0|%0, %1, %2} 3706 %vmovlps\t{%2, %0|%0, %2}" 3707 [(set_attr "isa" "noavx,avx,noavx,avx,*") 3708 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov") 3709 (set_attr "length_immediate" "1,1,*,*,*") 3710 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex") 3711 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")]) 3712 3713(define_insn "sse_movss" 3714 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 3715 (vec_merge:V4SF 3716 (match_operand:V4SF 2 "register_operand" " x,x") 3717 (match_operand:V4SF 1 "register_operand" " 0,x") 3718 (const_int 1)))] 3719 "TARGET_SSE" 3720 "@ 3721 movss\t{%2, %0|%0, %2} 3722 vmovss\t{%2, %1, %0|%0, %1, %2}" 3723 [(set_attr "isa" "noavx,avx") 3724 (set_attr "type" "ssemov") 3725 (set_attr "prefix" "orig,vex") 3726 (set_attr "mode" "SF")]) 3727 3728(define_insn "avx2_vec_dup<mode>" 3729 [(set (match_operand:VF1 0 "register_operand" "=x") 3730 (vec_duplicate:VF1 3731 (vec_select:SF 3732 (match_operand:V4SF 1 "register_operand" "x") 3733 (parallel [(const_int 0)]))))] 3734 "TARGET_AVX2" 3735 "vbroadcastss\t{%1, %0|%0, %1}" 3736 [(set_attr "type" "sselog1") 3737 (set_attr "prefix" "vex") 3738 (set_attr "mode" "<MODE>")]) 3739 3740(define_insn "vec_dupv4sf" 3741 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x") 3742 (vec_duplicate:V4SF 3743 (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))] 3744 "TARGET_SSE" 3745 "@ 3746 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0} 3747 vbroadcastss\t{%1, %0|%0, %1} 3748 shufps\t{$0, %0, %0|%0, %0, 0}" 3749 [(set_attr "isa" "avx,avx,noavx") 3750 (set_attr "type" "sselog1,ssemov,sselog1") 3751 (set_attr "length_immediate" "1,0,1") 3752 (set_attr "prefix_extra" "0,1,*") 3753 (set_attr "prefix" "vex,vex,orig") 3754 (set_attr "mode" "V4SF")]) 3755 3756;; Although insertps takes register source, we prefer 3757;; unpcklps with register source since it is shorter. 3758(define_insn "*vec_concatv2sf_sse4_1" 3759 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y") 3760 (vec_concat:V2SF 3761 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m") 3762 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))] 3763 "TARGET_SSE4_1" 3764 "@ 3765 unpcklps\t{%2, %0|%0, %2} 3766 vunpcklps\t{%2, %1, %0|%0, %1, %2} 3767 insertps\t{$0x10, %2, %0|%0, %2, 0x10} 3768 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10} 3769 %vmovss\t{%1, %0|%0, %1} 3770 punpckldq\t{%2, %0|%0, %2} 3771 movd\t{%1, %0|%0, %1}" 3772 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*") 3773 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov") 3774 (set_attr "prefix_data16" "*,*,1,*,*,*,*") 3775 (set_attr "prefix_extra" "*,*,1,1,*,*,*") 3776 (set_attr "length_immediate" "*,*,1,1,*,*,*") 3777 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig") 3778 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")]) 3779 3780;; ??? In theory we can match memory for the MMX alternative, but allowing 3781;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE 3782;; alternatives pretty much forces the MMX alternative to be chosen. 3783(define_insn "*vec_concatv2sf_sse" 3784 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y") 3785 (vec_concat:V2SF 3786 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m") 3787 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))] 3788 "TARGET_SSE" 3789 "@ 3790 unpcklps\t{%2, %0|%0, %2} 3791 movss\t{%1, %0|%0, %1} 3792 punpckldq\t{%2, %0|%0, %2} 3793 movd\t{%1, %0|%0, %1}" 3794 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov") 3795 (set_attr "mode" "V4SF,SF,DI,DI")]) 3796 3797(define_insn "*vec_concatv4sf" 3798 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x") 3799 (vec_concat:V4SF 3800 (match_operand:V2SF 1 "register_operand" " 0,x,0,x") 3801 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))] 3802 "TARGET_SSE" 3803 "@ 3804 movlhps\t{%2, %0|%0, %2} 3805 vmovlhps\t{%2, %1, %0|%0, %1, %2} 3806 movhps\t{%2, %0|%0, %2} 3807 vmovhps\t{%2, %1, %0|%0, %1, %2}" 3808 [(set_attr "isa" "noavx,avx,noavx,avx") 3809 (set_attr "type" "ssemov") 3810 (set_attr "prefix" "orig,vex,orig,vex") 3811 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")]) 3812 3813(define_expand "vec_init<mode>" 3814 [(match_operand:V_128 0 "register_operand" "") 3815 (match_operand 1 "" "")] 3816 "TARGET_SSE" 3817{ 3818 ix86_expand_vector_init (false, operands[0], operands[1]); 3819 DONE; 3820}) 3821 3822;; Avoid combining registers from different units in a single alternative, 3823;; see comment above inline_secondary_memory_needed function in i386.c 3824(define_insn "vec_set<mode>_0" 3825 [(set (match_operand:VI4F_128 0 "nonimmediate_operand" 3826 "=x,x,x ,x,x,x,x ,x ,m ,m ,m") 3827 (vec_merge:VI4F_128 3828 (vec_duplicate:VI4F_128 3829 (match_operand:<ssescalarmode> 2 "general_operand" 3830 " x,m,*r,m,x,x,*rm,*rm,!x,!*re,!*fF")) 3831 (match_operand:VI4F_128 1 "vector_move_operand" 3832 " C,C,C ,C,0,x,0 ,x ,0 ,0 ,0") 3833 (const_int 1)))] 3834 "TARGET_SSE" 3835 "@ 3836 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe} 3837 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2} 3838 %vmovd\t{%2, %0|%0, %2} 3839 movss\t{%2, %0|%0, %2} 3840 movss\t{%2, %0|%0, %2} 3841 vmovss\t{%2, %1, %0|%0, %1, %2} 3842 pinsrd\t{$0, %2, %0|%0, %2, 0} 3843 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0} 3844 # 3845 # 3846 #" 3847 [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*") 3848 (set (attr "type") 3849 (cond [(eq_attr "alternative" "0,6,7") 3850 (const_string "sselog") 3851 (eq_attr "alternative" "9") 3852 (const_string "imov") 3853 (eq_attr "alternative" "10") 3854 (const_string "fmov") 3855 ] 3856 (const_string "ssemov"))) 3857 (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*") 3858 (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*") 3859 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*") 3860 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")]) 3861 3862;; A subset is vec_setv4sf. 3863(define_insn "*vec_setv4sf_sse4_1" 3864 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 3865 (vec_merge:V4SF 3866 (vec_duplicate:V4SF 3867 (match_operand:SF 2 "nonimmediate_operand" "xm,xm")) 3868 (match_operand:V4SF 1 "register_operand" "0,x") 3869 (match_operand:SI 3 "const_int_operand" "")))] 3870 "TARGET_SSE4_1 3871 && ((unsigned) exact_log2 (INTVAL (operands[3])) 3872 < GET_MODE_NUNITS (V4SFmode))" 3873{ 3874 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4); 3875 switch (which_alternative) 3876 { 3877 case 0: 3878 return "insertps\t{%3, %2, %0|%0, %2, %3}"; 3879 case 1: 3880 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}"; 3881 default: 3882 gcc_unreachable (); 3883 } 3884} 3885 [(set_attr "isa" "noavx,avx") 3886 (set_attr "type" "sselog") 3887 (set_attr "prefix_data16" "1,*") 3888 (set_attr "prefix_extra" "1") 3889 (set_attr "length_immediate" "1") 3890 (set_attr "prefix" "orig,vex") 3891 (set_attr "mode" "V4SF")]) 3892 3893(define_insn "sse4_1_insertps" 3894 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 3895 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm") 3896 (match_operand:V4SF 1 "register_operand" "0,x") 3897 (match_operand:SI 3 "const_0_to_255_operand" "n,n")] 3898 UNSPEC_INSERTPS))] 3899 "TARGET_SSE4_1" 3900{ 3901 if (MEM_P (operands[2])) 3902 { 3903 unsigned count_s = INTVAL (operands[3]) >> 6; 3904 if (count_s) 3905 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f); 3906 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4); 3907 } 3908 switch (which_alternative) 3909 { 3910 case 0: 3911 return "insertps\t{%3, %2, %0|%0, %2, %3}"; 3912 case 1: 3913 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}"; 3914 default: 3915 gcc_unreachable (); 3916 } 3917} 3918 [(set_attr "isa" "noavx,avx") 3919 (set_attr "type" "sselog") 3920 (set_attr "prefix_data16" "1,*") 3921 (set_attr "prefix_extra" "1") 3922 (set_attr "length_immediate" "1") 3923 (set_attr "prefix" "orig,vex") 3924 (set_attr "mode" "V4SF")]) 3925 3926(define_split 3927 [(set (match_operand:VI4F_128 0 "memory_operand" "") 3928 (vec_merge:VI4F_128 3929 (vec_duplicate:VI4F_128 3930 (match_operand:<ssescalarmode> 1 "nonmemory_operand" "")) 3931 (match_dup 0) 3932 (const_int 1)))] 3933 "TARGET_SSE && reload_completed" 3934 [(const_int 0)] 3935{ 3936 emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0), 3937 operands[1]); 3938 DONE; 3939}) 3940 3941(define_expand "vec_set<mode>" 3942 [(match_operand:V 0 "register_operand" "") 3943 (match_operand:<ssescalarmode> 1 "register_operand" "") 3944 (match_operand 2 "const_int_operand" "")] 3945 "TARGET_SSE" 3946{ 3947 ix86_expand_vector_set (false, operands[0], operands[1], 3948 INTVAL (operands[2])); 3949 DONE; 3950}) 3951 3952(define_insn_and_split "*vec_extractv4sf_0" 3953 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r") 3954 (vec_select:SF 3955 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m") 3956 (parallel [(const_int 0)])))] 3957 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 3958 "#" 3959 "&& reload_completed" 3960 [(const_int 0)] 3961{ 3962 rtx op1 = operands[1]; 3963 if (REG_P (op1)) 3964 op1 = gen_rtx_REG (SFmode, REGNO (op1)); 3965 else 3966 op1 = gen_lowpart (SFmode, op1); 3967 emit_move_insn (operands[0], op1); 3968 DONE; 3969}) 3970 3971(define_insn_and_split "*sse4_1_extractps" 3972 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,x,x") 3973 (vec_select:SF 3974 (match_operand:V4SF 1 "register_operand" "x,0,x") 3975 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))] 3976 "TARGET_SSE4_1" 3977 "@ 3978 %vextractps\t{%2, %1, %0|%0, %1, %2} 3979 # 3980 #" 3981 "&& reload_completed && SSE_REG_P (operands[0])" 3982 [(const_int 0)] 3983{ 3984 rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0])); 3985 switch (INTVAL (operands[2])) 3986 { 3987 case 1: 3988 case 3: 3989 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1], 3990 operands[2], operands[2], 3991 GEN_INT (INTVAL (operands[2]) + 4), 3992 GEN_INT (INTVAL (operands[2]) + 4))); 3993 break; 3994 case 2: 3995 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1])); 3996 break; 3997 default: 3998 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */ 3999 gcc_unreachable (); 4000 } 4001 DONE; 4002} 4003 [(set_attr "isa" "*,noavx,avx") 4004 (set_attr "type" "sselog,*,*") 4005 (set_attr "prefix_data16" "1,*,*") 4006 (set_attr "prefix_extra" "1,*,*") 4007 (set_attr "length_immediate" "1,*,*") 4008 (set_attr "prefix" "maybe_vex,*,*") 4009 (set_attr "mode" "V4SF,*,*")]) 4010 4011(define_insn_and_split "*vec_extract_v4sf_mem" 4012 [(set (match_operand:SF 0 "register_operand" "=x,*r,f") 4013 (vec_select:SF 4014 (match_operand:V4SF 1 "memory_operand" "o,o,o") 4015 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))] 4016 "TARGET_SSE" 4017 "#" 4018 "&& reload_completed" 4019 [(const_int 0)] 4020{ 4021 int i = INTVAL (operands[2]); 4022 4023 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4)); 4024 DONE; 4025}) 4026 4027(define_expand "avx_vextractf128<mode>" 4028 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "") 4029 (match_operand:V_256 1 "register_operand" "") 4030 (match_operand:SI 2 "const_0_to_1_operand" "")] 4031 "TARGET_AVX" 4032{ 4033 rtx (*insn)(rtx, rtx); 4034 4035 switch (INTVAL (operands[2])) 4036 { 4037 case 0: 4038 insn = gen_vec_extract_lo_<mode>; 4039 break; 4040 case 1: 4041 insn = gen_vec_extract_hi_<mode>; 4042 break; 4043 default: 4044 gcc_unreachable (); 4045 } 4046 4047 emit_insn (insn (operands[0], operands[1])); 4048 DONE; 4049}) 4050 4051(define_insn_and_split "vec_extract_lo_<mode>" 4052 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m") 4053 (vec_select:<ssehalfvecmode> 4054 (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x") 4055 (parallel [(const_int 0) (const_int 1)])))] 4056 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 4057 "#" 4058 "&& reload_completed" 4059 [(const_int 0)] 4060{ 4061 rtx op1 = operands[1]; 4062 if (REG_P (op1)) 4063 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1)); 4064 else 4065 op1 = gen_lowpart (<ssehalfvecmode>mode, op1); 4066 emit_move_insn (operands[0], op1); 4067 DONE; 4068}) 4069 4070(define_insn "vec_extract_hi_<mode>" 4071 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m") 4072 (vec_select:<ssehalfvecmode> 4073 (match_operand:VI8F_256 1 "register_operand" "x,x") 4074 (parallel [(const_int 2) (const_int 3)])))] 4075 "TARGET_AVX" 4076 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}" 4077 [(set_attr "type" "sselog") 4078 (set_attr "prefix_extra" "1") 4079 (set_attr "length_immediate" "1") 4080 (set_attr "memory" "none,store") 4081 (set_attr "prefix" "vex") 4082 (set_attr "mode" "<sseinsnmode>")]) 4083 4084(define_insn_and_split "vec_extract_lo_<mode>" 4085 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m") 4086 (vec_select:<ssehalfvecmode> 4087 (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x") 4088 (parallel [(const_int 0) (const_int 1) 4089 (const_int 2) (const_int 3)])))] 4090 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 4091 "#" 4092 "&& reload_completed" 4093 [(const_int 0)] 4094{ 4095 rtx op1 = operands[1]; 4096 if (REG_P (op1)) 4097 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1)); 4098 else 4099 op1 = gen_lowpart (<ssehalfvecmode>mode, op1); 4100 emit_move_insn (operands[0], op1); 4101 DONE; 4102}) 4103 4104(define_insn "vec_extract_hi_<mode>" 4105 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m") 4106 (vec_select:<ssehalfvecmode> 4107 (match_operand:VI4F_256 1 "register_operand" "x,x") 4108 (parallel [(const_int 4) (const_int 5) 4109 (const_int 6) (const_int 7)])))] 4110 "TARGET_AVX" 4111 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}" 4112 [(set_attr "type" "sselog") 4113 (set_attr "prefix_extra" "1") 4114 (set_attr "length_immediate" "1") 4115 (set_attr "memory" "none,store") 4116 (set_attr "prefix" "vex") 4117 (set_attr "mode" "<sseinsnmode>")]) 4118 4119(define_insn_and_split "vec_extract_lo_v16hi" 4120 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m") 4121 (vec_select:V8HI 4122 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x") 4123 (parallel [(const_int 0) (const_int 1) 4124 (const_int 2) (const_int 3) 4125 (const_int 4) (const_int 5) 4126 (const_int 6) (const_int 7)])))] 4127 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 4128 "#" 4129 "&& reload_completed" 4130 [(const_int 0)] 4131{ 4132 rtx op1 = operands[1]; 4133 if (REG_P (op1)) 4134 op1 = gen_rtx_REG (V8HImode, REGNO (op1)); 4135 else 4136 op1 = gen_lowpart (V8HImode, op1); 4137 emit_move_insn (operands[0], op1); 4138 DONE; 4139}) 4140 4141(define_insn "vec_extract_hi_v16hi" 4142 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m") 4143 (vec_select:V8HI 4144 (match_operand:V16HI 1 "register_operand" "x,x") 4145 (parallel [(const_int 8) (const_int 9) 4146 (const_int 10) (const_int 11) 4147 (const_int 12) (const_int 13) 4148 (const_int 14) (const_int 15)])))] 4149 "TARGET_AVX" 4150 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}" 4151 [(set_attr "type" "sselog") 4152 (set_attr "prefix_extra" "1") 4153 (set_attr "length_immediate" "1") 4154 (set_attr "memory" "none,store") 4155 (set_attr "prefix" "vex") 4156 (set_attr "mode" "OI")]) 4157 4158(define_insn_and_split "vec_extract_lo_v32qi" 4159 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") 4160 (vec_select:V16QI 4161 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x") 4162 (parallel [(const_int 0) (const_int 1) 4163 (const_int 2) (const_int 3) 4164 (const_int 4) (const_int 5) 4165 (const_int 6) (const_int 7) 4166 (const_int 8) (const_int 9) 4167 (const_int 10) (const_int 11) 4168 (const_int 12) (const_int 13) 4169 (const_int 14) (const_int 15)])))] 4170 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 4171 "#" 4172 "&& reload_completed" 4173 [(const_int 0)] 4174{ 4175 rtx op1 = operands[1]; 4176 if (REG_P (op1)) 4177 op1 = gen_rtx_REG (V16QImode, REGNO (op1)); 4178 else 4179 op1 = gen_lowpart (V16QImode, op1); 4180 emit_move_insn (operands[0], op1); 4181 DONE; 4182}) 4183 4184(define_insn "vec_extract_hi_v32qi" 4185 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") 4186 (vec_select:V16QI 4187 (match_operand:V32QI 1 "register_operand" "x,x") 4188 (parallel [(const_int 16) (const_int 17) 4189 (const_int 18) (const_int 19) 4190 (const_int 20) (const_int 21) 4191 (const_int 22) (const_int 23) 4192 (const_int 24) (const_int 25) 4193 (const_int 26) (const_int 27) 4194 (const_int 28) (const_int 29) 4195 (const_int 30) (const_int 31)])))] 4196 "TARGET_AVX" 4197 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}" 4198 [(set_attr "type" "sselog") 4199 (set_attr "prefix_extra" "1") 4200 (set_attr "length_immediate" "1") 4201 (set_attr "memory" "none,store") 4202 (set_attr "prefix" "vex") 4203 (set_attr "mode" "OI")]) 4204 4205;; Modes handled by vec_extract patterns. 4206(define_mode_iterator VEC_EXTRACT_MODE 4207 [(V32QI "TARGET_AVX") V16QI 4208 (V16HI "TARGET_AVX") V8HI 4209 (V8SI "TARGET_AVX") V4SI 4210 (V4DI "TARGET_AVX") V2DI 4211 (V8SF "TARGET_AVX") V4SF 4212 (V4DF "TARGET_AVX") V2DF]) 4213 4214(define_expand "vec_extract<mode>" 4215 [(match_operand:<ssescalarmode> 0 "register_operand" "") 4216 (match_operand:VEC_EXTRACT_MODE 1 "register_operand" "") 4217 (match_operand 2 "const_int_operand" "")] 4218 "TARGET_SSE" 4219{ 4220 ix86_expand_vector_extract (false, operands[0], operands[1], 4221 INTVAL (operands[2])); 4222 DONE; 4223}) 4224 4225;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 4226;; 4227;; Parallel double-precision floating point element swizzling 4228;; 4229;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 4230 4231;; Recall that the 256-bit unpck insns only shuffle within their lanes. 4232(define_insn "avx_unpckhpd256" 4233 [(set (match_operand:V4DF 0 "register_operand" "=x") 4234 (vec_select:V4DF 4235 (vec_concat:V8DF 4236 (match_operand:V4DF 1 "register_operand" "x") 4237 (match_operand:V4DF 2 "nonimmediate_operand" "xm")) 4238 (parallel [(const_int 1) (const_int 5) 4239 (const_int 3) (const_int 7)])))] 4240 "TARGET_AVX" 4241 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}" 4242 [(set_attr "type" "sselog") 4243 (set_attr "prefix" "vex") 4244 (set_attr "mode" "V4DF")]) 4245 4246(define_expand "vec_interleave_highv4df" 4247 [(set (match_dup 3) 4248 (vec_select:V4DF 4249 (vec_concat:V8DF 4250 (match_operand:V4DF 1 "register_operand" "x") 4251 (match_operand:V4DF 2 "nonimmediate_operand" "xm")) 4252 (parallel [(const_int 0) (const_int 4) 4253 (const_int 2) (const_int 6)]))) 4254 (set (match_dup 4) 4255 (vec_select:V4DF 4256 (vec_concat:V8DF 4257 (match_dup 1) 4258 (match_dup 2)) 4259 (parallel [(const_int 1) (const_int 5) 4260 (const_int 3) (const_int 7)]))) 4261 (set (match_operand:V4DF 0 "register_operand" "") 4262 (vec_select:V4DF 4263 (vec_concat:V8DF 4264 (match_dup 3) 4265 (match_dup 4)) 4266 (parallel [(const_int 2) (const_int 3) 4267 (const_int 6) (const_int 7)])))] 4268 "TARGET_AVX" 4269{ 4270 operands[3] = gen_reg_rtx (V4DFmode); 4271 operands[4] = gen_reg_rtx (V4DFmode); 4272}) 4273 4274 4275(define_expand "vec_interleave_highv2df" 4276 [(set (match_operand:V2DF 0 "register_operand" "") 4277 (vec_select:V2DF 4278 (vec_concat:V4DF 4279 (match_operand:V2DF 1 "nonimmediate_operand" "") 4280 (match_operand:V2DF 2 "nonimmediate_operand" "")) 4281 (parallel [(const_int 1) 4282 (const_int 3)])))] 4283 "TARGET_SSE2" 4284{ 4285 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1)) 4286 operands[2] = force_reg (V2DFmode, operands[2]); 4287}) 4288 4289(define_insn "*vec_interleave_highv2df" 4290 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m") 4291 (vec_select:V2DF 4292 (vec_concat:V4DF 4293 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x") 4294 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0")) 4295 (parallel [(const_int 1) 4296 (const_int 3)])))] 4297 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)" 4298 "@ 4299 unpckhpd\t{%2, %0|%0, %2} 4300 vunpckhpd\t{%2, %1, %0|%0, %1, %2} 4301 %vmovddup\t{%H1, %0|%0, %H1} 4302 movlpd\t{%H1, %0|%0, %H1} 4303 vmovlpd\t{%H1, %2, %0|%0, %2, %H1} 4304 %vmovhpd\t{%1, %0|%0, %1}" 4305 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*") 4306 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov") 4307 (set_attr "prefix_data16" "*,*,*,1,*,1") 4308 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex") 4309 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")]) 4310 4311;; Recall that the 256-bit unpck insns only shuffle within their lanes. 4312(define_expand "avx_movddup256" 4313 [(set (match_operand:V4DF 0 "register_operand" "") 4314 (vec_select:V4DF 4315 (vec_concat:V8DF 4316 (match_operand:V4DF 1 "nonimmediate_operand" "") 4317 (match_dup 1)) 4318 (parallel [(const_int 0) (const_int 4) 4319 (const_int 2) (const_int 6)])))] 4320 "TARGET_AVX") 4321 4322(define_expand "avx_unpcklpd256" 4323 [(set (match_operand:V4DF 0 "register_operand" "") 4324 (vec_select:V4DF 4325 (vec_concat:V8DF 4326 (match_operand:V4DF 1 "register_operand" "") 4327 (match_operand:V4DF 2 "nonimmediate_operand" "")) 4328 (parallel [(const_int 0) (const_int 4) 4329 (const_int 2) (const_int 6)])))] 4330 "TARGET_AVX") 4331 4332(define_insn "*avx_unpcklpd256" 4333 [(set (match_operand:V4DF 0 "register_operand" "=x,x") 4334 (vec_select:V4DF 4335 (vec_concat:V8DF 4336 (match_operand:V4DF 1 "nonimmediate_operand" " x,m") 4337 (match_operand:V4DF 2 "nonimmediate_operand" "xm,1")) 4338 (parallel [(const_int 0) (const_int 4) 4339 (const_int 2) (const_int 6)])))] 4340 "TARGET_AVX" 4341 "@ 4342 vunpcklpd\t{%2, %1, %0|%0, %1, %2} 4343 vmovddup\t{%1, %0|%0, %1}" 4344 [(set_attr "type" "sselog") 4345 (set_attr "prefix" "vex") 4346 (set_attr "mode" "V4DF")]) 4347 4348(define_expand "vec_interleave_lowv4df" 4349 [(set (match_dup 3) 4350 (vec_select:V4DF 4351 (vec_concat:V8DF 4352 (match_operand:V4DF 1 "register_operand" "x") 4353 (match_operand:V4DF 2 "nonimmediate_operand" "xm")) 4354 (parallel [(const_int 0) (const_int 4) 4355 (const_int 2) (const_int 6)]))) 4356 (set (match_dup 4) 4357 (vec_select:V4DF 4358 (vec_concat:V8DF 4359 (match_dup 1) 4360 (match_dup 2)) 4361 (parallel [(const_int 1) (const_int 5) 4362 (const_int 3) (const_int 7)]))) 4363 (set (match_operand:V4DF 0 "register_operand" "") 4364 (vec_select:V4DF 4365 (vec_concat:V8DF 4366 (match_dup 3) 4367 (match_dup 4)) 4368 (parallel [(const_int 0) (const_int 1) 4369 (const_int 4) (const_int 5)])))] 4370 "TARGET_AVX" 4371{ 4372 operands[3] = gen_reg_rtx (V4DFmode); 4373 operands[4] = gen_reg_rtx (V4DFmode); 4374}) 4375 4376(define_expand "vec_interleave_lowv2df" 4377 [(set (match_operand:V2DF 0 "register_operand" "") 4378 (vec_select:V2DF 4379 (vec_concat:V4DF 4380 (match_operand:V2DF 1 "nonimmediate_operand" "") 4381 (match_operand:V2DF 2 "nonimmediate_operand" "")) 4382 (parallel [(const_int 0) 4383 (const_int 2)])))] 4384 "TARGET_SSE2" 4385{ 4386 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0)) 4387 operands[1] = force_reg (V2DFmode, operands[1]); 4388}) 4389 4390(define_insn "*vec_interleave_lowv2df" 4391 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o") 4392 (vec_select:V2DF 4393 (vec_concat:V4DF 4394 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0") 4395 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x")) 4396 (parallel [(const_int 0) 4397 (const_int 2)])))] 4398 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)" 4399 "@ 4400 unpcklpd\t{%2, %0|%0, %2} 4401 vunpcklpd\t{%2, %1, %0|%0, %1, %2} 4402 %vmovddup\t{%1, %0|%0, %1} 4403 movhpd\t{%2, %0|%0, %2} 4404 vmovhpd\t{%2, %1, %0|%0, %1, %2} 4405 %vmovlpd\t{%2, %H0|%H0, %2}" 4406 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*") 4407 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov") 4408 (set_attr "prefix_data16" "*,*,*,1,*,1") 4409 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex") 4410 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")]) 4411 4412(define_split 4413 [(set (match_operand:V2DF 0 "memory_operand" "") 4414 (vec_select:V2DF 4415 (vec_concat:V4DF 4416 (match_operand:V2DF 1 "register_operand" "") 4417 (match_dup 1)) 4418 (parallel [(const_int 0) 4419 (const_int 2)])))] 4420 "TARGET_SSE3 && reload_completed" 4421 [(const_int 0)] 4422{ 4423 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1])); 4424 emit_move_insn (adjust_address (operands[0], DFmode, 0), low); 4425 emit_move_insn (adjust_address (operands[0], DFmode, 8), low); 4426 DONE; 4427}) 4428 4429(define_split 4430 [(set (match_operand:V2DF 0 "register_operand" "") 4431 (vec_select:V2DF 4432 (vec_concat:V4DF 4433 (match_operand:V2DF 1 "memory_operand" "") 4434 (match_dup 1)) 4435 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "") 4436 (match_operand:SI 3 "const_int_operand" "")])))] 4437 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])" 4438 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))] 4439{ 4440 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8); 4441}) 4442 4443(define_expand "avx_shufpd256" 4444 [(match_operand:V4DF 0 "register_operand" "") 4445 (match_operand:V4DF 1 "register_operand" "") 4446 (match_operand:V4DF 2 "nonimmediate_operand" "") 4447 (match_operand:SI 3 "const_int_operand" "")] 4448 "TARGET_AVX" 4449{ 4450 int mask = INTVAL (operands[3]); 4451 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2], 4452 GEN_INT (mask & 1), 4453 GEN_INT (mask & 2 ? 5 : 4), 4454 GEN_INT (mask & 4 ? 3 : 2), 4455 GEN_INT (mask & 8 ? 7 : 6))); 4456 DONE; 4457}) 4458 4459(define_insn "avx_shufpd256_1" 4460 [(set (match_operand:V4DF 0 "register_operand" "=x") 4461 (vec_select:V4DF 4462 (vec_concat:V8DF 4463 (match_operand:V4DF 1 "register_operand" "x") 4464 (match_operand:V4DF 2 "nonimmediate_operand" "xm")) 4465 (parallel [(match_operand 3 "const_0_to_1_operand" "") 4466 (match_operand 4 "const_4_to_5_operand" "") 4467 (match_operand 5 "const_2_to_3_operand" "") 4468 (match_operand 6 "const_6_to_7_operand" "")])))] 4469 "TARGET_AVX" 4470{ 4471 int mask; 4472 mask = INTVAL (operands[3]); 4473 mask |= (INTVAL (operands[4]) - 4) << 1; 4474 mask |= (INTVAL (operands[5]) - 2) << 2; 4475 mask |= (INTVAL (operands[6]) - 6) << 3; 4476 operands[3] = GEN_INT (mask); 4477 4478 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"; 4479} 4480 [(set_attr "type" "sselog") 4481 (set_attr "length_immediate" "1") 4482 (set_attr "prefix" "vex") 4483 (set_attr "mode" "V4DF")]) 4484 4485(define_expand "sse2_shufpd" 4486 [(match_operand:V2DF 0 "register_operand" "") 4487 (match_operand:V2DF 1 "register_operand" "") 4488 (match_operand:V2DF 2 "nonimmediate_operand" "") 4489 (match_operand:SI 3 "const_int_operand" "")] 4490 "TARGET_SSE2" 4491{ 4492 int mask = INTVAL (operands[3]); 4493 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2], 4494 GEN_INT (mask & 1), 4495 GEN_INT (mask & 2 ? 3 : 2))); 4496 DONE; 4497}) 4498 4499;; punpcklqdq and punpckhqdq are shorter than shufpd. 4500(define_insn "avx2_interleave_highv4di" 4501 [(set (match_operand:V4DI 0 "register_operand" "=x") 4502 (vec_select:V4DI 4503 (vec_concat:V8DI 4504 (match_operand:V4DI 1 "register_operand" "x") 4505 (match_operand:V4DI 2 "nonimmediate_operand" "xm")) 4506 (parallel [(const_int 1) 4507 (const_int 5) 4508 (const_int 3) 4509 (const_int 7)])))] 4510 "TARGET_AVX2" 4511 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}" 4512 [(set_attr "type" "sselog") 4513 (set_attr "prefix" "vex") 4514 (set_attr "mode" "OI")]) 4515 4516(define_insn "vec_interleave_highv2di" 4517 [(set (match_operand:V2DI 0 "register_operand" "=x,x") 4518 (vec_select:V2DI 4519 (vec_concat:V4DI 4520 (match_operand:V2DI 1 "register_operand" "0,x") 4521 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")) 4522 (parallel [(const_int 1) 4523 (const_int 3)])))] 4524 "TARGET_SSE2" 4525 "@ 4526 punpckhqdq\t{%2, %0|%0, %2} 4527 vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}" 4528 [(set_attr "isa" "noavx,avx") 4529 (set_attr "type" "sselog") 4530 (set_attr "prefix_data16" "1,*") 4531 (set_attr "prefix" "orig,vex") 4532 (set_attr "mode" "TI")]) 4533 4534(define_insn "avx2_interleave_lowv4di" 4535 [(set (match_operand:V4DI 0 "register_operand" "=x") 4536 (vec_select:V4DI 4537 (vec_concat:V8DI 4538 (match_operand:V4DI 1 "register_operand" "x") 4539 (match_operand:V4DI 2 "nonimmediate_operand" "xm")) 4540 (parallel [(const_int 0) 4541 (const_int 4) 4542 (const_int 2) 4543 (const_int 6)])))] 4544 "TARGET_AVX2" 4545 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}" 4546 [(set_attr "type" "sselog") 4547 (set_attr "prefix" "vex") 4548 (set_attr "mode" "OI")]) 4549 4550(define_insn "vec_interleave_lowv2di" 4551 [(set (match_operand:V2DI 0 "register_operand" "=x,x") 4552 (vec_select:V2DI 4553 (vec_concat:V4DI 4554 (match_operand:V2DI 1 "register_operand" "0,x") 4555 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")) 4556 (parallel [(const_int 0) 4557 (const_int 2)])))] 4558 "TARGET_SSE2" 4559 "@ 4560 punpcklqdq\t{%2, %0|%0, %2} 4561 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}" 4562 [(set_attr "isa" "noavx,avx") 4563 (set_attr "type" "sselog") 4564 (set_attr "prefix_data16" "1,*") 4565 (set_attr "prefix" "orig,vex") 4566 (set_attr "mode" "TI")]) 4567 4568(define_insn "sse2_shufpd_<mode>" 4569 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x") 4570 (vec_select:VI8F_128 4571 (vec_concat:<ssedoublevecmode> 4572 (match_operand:VI8F_128 1 "register_operand" "0,x") 4573 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm")) 4574 (parallel [(match_operand 3 "const_0_to_1_operand" "") 4575 (match_operand 4 "const_2_to_3_operand" "")])))] 4576 "TARGET_SSE2" 4577{ 4578 int mask; 4579 mask = INTVAL (operands[3]); 4580 mask |= (INTVAL (operands[4]) - 2) << 1; 4581 operands[3] = GEN_INT (mask); 4582 4583 switch (which_alternative) 4584 { 4585 case 0: 4586 return "shufpd\t{%3, %2, %0|%0, %2, %3}"; 4587 case 1: 4588 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"; 4589 default: 4590 gcc_unreachable (); 4591 } 4592} 4593 [(set_attr "isa" "noavx,avx") 4594 (set_attr "type" "sselog") 4595 (set_attr "length_immediate" "1") 4596 (set_attr "prefix" "orig,vex") 4597 (set_attr "mode" "V2DF")]) 4598 4599;; Avoid combining registers from different units in a single alternative, 4600;; see comment above inline_secondary_memory_needed function in i386.c 4601(define_insn "sse2_storehpd" 4602 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r") 4603 (vec_select:DF 4604 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o") 4605 (parallel [(const_int 1)])))] 4606 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 4607 "@ 4608 %vmovhpd\t{%1, %0|%0, %1} 4609 unpckhpd\t%0, %0 4610 vunpckhpd\t{%d1, %0|%0, %d1} 4611 # 4612 # 4613 #" 4614 [(set_attr "isa" "*,noavx,avx,*,*,*") 4615 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov") 4616 (set (attr "prefix_data16") 4617 (if_then_else 4618 (and (eq_attr "alternative" "0") 4619 (not (match_test "TARGET_AVX"))) 4620 (const_string "1") 4621 (const_string "*"))) 4622 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*") 4623 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")]) 4624 4625(define_split 4626 [(set (match_operand:DF 0 "register_operand" "") 4627 (vec_select:DF 4628 (match_operand:V2DF 1 "memory_operand" "") 4629 (parallel [(const_int 1)])))] 4630 "TARGET_SSE2 && reload_completed" 4631 [(set (match_dup 0) (match_dup 1))] 4632 "operands[1] = adjust_address (operands[1], DFmode, 8);") 4633 4634(define_insn "*vec_extractv2df_1_sse" 4635 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x") 4636 (vec_select:DF 4637 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o") 4638 (parallel [(const_int 1)])))] 4639 "!TARGET_SSE2 && TARGET_SSE 4640 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 4641 "@ 4642 movhps\t{%1, %0|%0, %1} 4643 movhlps\t{%1, %0|%0, %1} 4644 movlps\t{%H1, %0|%0, %H1}" 4645 [(set_attr "type" "ssemov") 4646 (set_attr "mode" "V2SF,V4SF,V2SF")]) 4647 4648;; Avoid combining registers from different units in a single alternative, 4649;; see comment above inline_secondary_memory_needed function in i386.c 4650(define_insn "sse2_storelpd" 4651 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r") 4652 (vec_select:DF 4653 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m") 4654 (parallel [(const_int 0)])))] 4655 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 4656 "@ 4657 %vmovlpd\t{%1, %0|%0, %1} 4658 # 4659 # 4660 # 4661 #" 4662 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov") 4663 (set_attr "prefix_data16" "1,*,*,*,*") 4664 (set_attr "prefix" "maybe_vex") 4665 (set_attr "mode" "V1DF,DF,DF,DF,DF")]) 4666 4667(define_split 4668 [(set (match_operand:DF 0 "register_operand" "") 4669 (vec_select:DF 4670 (match_operand:V2DF 1 "nonimmediate_operand" "") 4671 (parallel [(const_int 0)])))] 4672 "TARGET_SSE2 && reload_completed" 4673 [(const_int 0)] 4674{ 4675 rtx op1 = operands[1]; 4676 if (REG_P (op1)) 4677 op1 = gen_rtx_REG (DFmode, REGNO (op1)); 4678 else 4679 op1 = gen_lowpart (DFmode, op1); 4680 emit_move_insn (operands[0], op1); 4681 DONE; 4682}) 4683 4684(define_insn "*vec_extractv2df_0_sse" 4685 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x") 4686 (vec_select:DF 4687 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m") 4688 (parallel [(const_int 0)])))] 4689 "!TARGET_SSE2 && TARGET_SSE 4690 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 4691 "@ 4692 movlps\t{%1, %0|%0, %1} 4693 movaps\t{%1, %0|%0, %1} 4694 movlps\t{%1, %0|%0, %1}" 4695 [(set_attr "type" "ssemov") 4696 (set_attr "mode" "V2SF,V4SF,V2SF")]) 4697 4698(define_expand "sse2_loadhpd_exp" 4699 [(set (match_operand:V2DF 0 "nonimmediate_operand" "") 4700 (vec_concat:V2DF 4701 (vec_select:DF 4702 (match_operand:V2DF 1 "nonimmediate_operand" "") 4703 (parallel [(const_int 0)])) 4704 (match_operand:DF 2 "nonimmediate_operand" "")))] 4705 "TARGET_SSE2" 4706{ 4707 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands); 4708 4709 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2])); 4710 4711 /* Fix up the destination if needed. */ 4712 if (dst != operands[0]) 4713 emit_move_insn (operands[0], dst); 4714 4715 DONE; 4716}) 4717 4718;; Avoid combining registers from different units in a single alternative, 4719;; see comment above inline_secondary_memory_needed function in i386.c 4720(define_insn "sse2_loadhpd" 4721 [(set (match_operand:V2DF 0 "nonimmediate_operand" 4722 "=x,x,x,x,o,o ,o") 4723 (vec_concat:V2DF 4724 (vec_select:DF 4725 (match_operand:V2DF 1 "nonimmediate_operand" 4726 " 0,x,0,x,0,0 ,0") 4727 (parallel [(const_int 0)])) 4728 (match_operand:DF 2 "nonimmediate_operand" 4729 " m,m,x,x,x,*f,r")))] 4730 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" 4731 "@ 4732 movhpd\t{%2, %0|%0, %2} 4733 vmovhpd\t{%2, %1, %0|%0, %1, %2} 4734 unpcklpd\t{%2, %0|%0, %2} 4735 vunpcklpd\t{%2, %1, %0|%0, %1, %2} 4736 # 4737 # 4738 #" 4739 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*") 4740 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov") 4741 (set_attr "prefix_data16" "1,*,*,*,*,*,*") 4742 (set_attr "prefix" "orig,vex,orig,vex,*,*,*") 4743 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")]) 4744 4745(define_split 4746 [(set (match_operand:V2DF 0 "memory_operand" "") 4747 (vec_concat:V2DF 4748 (vec_select:DF (match_dup 0) (parallel [(const_int 0)])) 4749 (match_operand:DF 1 "register_operand" "")))] 4750 "TARGET_SSE2 && reload_completed" 4751 [(set (match_dup 0) (match_dup 1))] 4752 "operands[0] = adjust_address (operands[0], DFmode, 8);") 4753 4754(define_expand "sse2_loadlpd_exp" 4755 [(set (match_operand:V2DF 0 "nonimmediate_operand" "") 4756 (vec_concat:V2DF 4757 (match_operand:DF 2 "nonimmediate_operand" "") 4758 (vec_select:DF 4759 (match_operand:V2DF 1 "nonimmediate_operand" "") 4760 (parallel [(const_int 1)]))))] 4761 "TARGET_SSE2" 4762{ 4763 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands); 4764 4765 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2])); 4766 4767 /* Fix up the destination if needed. */ 4768 if (dst != operands[0]) 4769 emit_move_insn (operands[0], dst); 4770 4771 DONE; 4772}) 4773 4774;; Avoid combining registers from different units in a single alternative, 4775;; see comment above inline_secondary_memory_needed function in i386.c 4776(define_insn "sse2_loadlpd" 4777 [(set (match_operand:V2DF 0 "nonimmediate_operand" 4778 "=x,x,x,x,x,x,x,x,m,m ,m") 4779 (vec_concat:V2DF 4780 (match_operand:DF 2 "nonimmediate_operand" 4781 " m,m,m,x,x,0,0,x,x,*f,r") 4782 (vec_select:DF 4783 (match_operand:V2DF 1 "vector_move_operand" 4784 " C,0,x,0,x,x,o,o,0,0 ,0") 4785 (parallel [(const_int 1)]))))] 4786 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" 4787 "@ 4788 %vmovsd\t{%2, %0|%0, %2} 4789 movlpd\t{%2, %0|%0, %2} 4790 vmovlpd\t{%2, %1, %0|%0, %1, %2} 4791 movsd\t{%2, %0|%0, %2} 4792 vmovsd\t{%2, %1, %0|%0, %1, %2} 4793 shufpd\t{$2, %1, %0|%0, %1, 2} 4794 movhpd\t{%H1, %0|%0, %H1} 4795 vmovhpd\t{%H1, %2, %0|%0, %2, %H1} 4796 # 4797 # 4798 #" 4799 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*") 4800 (set (attr "type") 4801 (cond [(eq_attr "alternative" "5") 4802 (const_string "sselog") 4803 (eq_attr "alternative" "9") 4804 (const_string "fmov") 4805 (eq_attr "alternative" "10") 4806 (const_string "imov") 4807 ] 4808 (const_string "ssemov"))) 4809 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*") 4810 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*") 4811 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*") 4812 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")]) 4813 4814(define_split 4815 [(set (match_operand:V2DF 0 "memory_operand" "") 4816 (vec_concat:V2DF 4817 (match_operand:DF 1 "register_operand" "") 4818 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))] 4819 "TARGET_SSE2 && reload_completed" 4820 [(set (match_dup 0) (match_dup 1))] 4821 "operands[0] = adjust_address (operands[0], DFmode, 0);") 4822 4823(define_insn "sse2_movsd" 4824 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o") 4825 (vec_merge:V2DF 4826 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0") 4827 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x") 4828 (const_int 1)))] 4829 "TARGET_SSE2" 4830 "@ 4831 movsd\t{%2, %0|%0, %2} 4832 vmovsd\t{%2, %1, %0|%0, %1, %2} 4833 movlpd\t{%2, %0|%0, %2} 4834 vmovlpd\t{%2, %1, %0|%0, %1, %2} 4835 %vmovlpd\t{%2, %0|%0, %2} 4836 shufpd\t{$2, %1, %0|%0, %1, 2} 4837 movhps\t{%H1, %0|%0, %H1} 4838 vmovhps\t{%H1, %2, %0|%0, %2, %H1} 4839 %vmovhps\t{%1, %H0|%H0, %1}" 4840 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*") 4841 (set (attr "type") 4842 (if_then_else 4843 (eq_attr "alternative" "5") 4844 (const_string "sselog") 4845 (const_string "ssemov"))) 4846 (set (attr "prefix_data16") 4847 (if_then_else 4848 (and (eq_attr "alternative" "2,4") 4849 (not (match_test "TARGET_AVX"))) 4850 (const_string "1") 4851 (const_string "*"))) 4852 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*") 4853 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex") 4854 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")]) 4855 4856(define_insn "vec_dupv2df" 4857 [(set (match_operand:V2DF 0 "register_operand" "=x,x") 4858 (vec_duplicate:V2DF 4859 (match_operand:DF 1 "nonimmediate_operand" " 0,xm")))] 4860 "TARGET_SSE2" 4861 "@ 4862 unpcklpd\t%0, %0 4863 %vmovddup\t{%1, %0|%0, %1}" 4864 [(set_attr "isa" "noavx,sse3") 4865 (set_attr "type" "sselog1") 4866 (set_attr "prefix" "orig,maybe_vex") 4867 (set_attr "mode" "V2DF,DF")]) 4868 4869(define_insn "*vec_concatv2df" 4870 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x,x,x,x,x,x") 4871 (vec_concat:V2DF 4872 (match_operand:DF 1 "nonimmediate_operand" " 0,x,m,0,x,m,0,0") 4873 (match_operand:DF 2 "vector_move_operand" " x,x,1,m,m,C,x,m")))] 4874 "TARGET_SSE" 4875 "@ 4876 unpcklpd\t{%2, %0|%0, %2} 4877 vunpcklpd\t{%2, %1, %0|%0, %1, %2} 4878 %vmovddup\t{%1, %0|%0, %1} 4879 movhpd\t{%2, %0|%0, %2} 4880 vmovhpd\t{%2, %1, %0|%0, %1, %2} 4881 %vmovsd\t{%1, %0|%0, %1} 4882 movlhps\t{%2, %0|%0, %2} 4883 movhps\t{%2, %0|%0, %2}" 4884 [(set_attr "isa" "sse2_noavx,avx,sse3,sse2_noavx,avx,sse2,noavx,noavx") 4885 (set (attr "type") 4886 (if_then_else 4887 (eq_attr "alternative" "0,1,2") 4888 (const_string "sselog") 4889 (const_string "ssemov"))) 4890 (set_attr "prefix_data16" "*,*,*,1,*,*,*,*") 4891 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex,orig,orig") 4892 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,DF,V4SF,V2SF")]) 4893 4894;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 4895;; 4896;; Parallel integral arithmetic 4897;; 4898;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 4899 4900(define_expand "neg<mode>2" 4901 [(set (match_operand:VI_AVX2 0 "register_operand" "") 4902 (minus:VI_AVX2 4903 (match_dup 2) 4904 (match_operand:VI_AVX2 1 "nonimmediate_operand" "")))] 4905 "TARGET_SSE2" 4906 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));") 4907 4908(define_expand "<plusminus_insn><mode>3" 4909 [(set (match_operand:VI_AVX2 0 "register_operand" "") 4910 (plusminus:VI_AVX2 4911 (match_operand:VI_AVX2 1 "nonimmediate_operand" "") 4912 (match_operand:VI_AVX2 2 "nonimmediate_operand" "")))] 4913 "TARGET_SSE2" 4914 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") 4915 4916(define_insn "*<plusminus_insn><mode>3" 4917 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,x") 4918 (plusminus:VI_AVX2 4919 (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,x") 4920 (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,xm")))] 4921 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" 4922 "@ 4923 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2} 4924 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 4925 [(set_attr "isa" "noavx,avx") 4926 (set_attr "type" "sseiadd") 4927 (set_attr "prefix_data16" "1,*") 4928 (set_attr "prefix" "orig,vex") 4929 (set_attr "mode" "<sseinsnmode>")]) 4930 4931(define_expand "<sse2_avx2>_<plusminus_insn><mode>3" 4932 [(set (match_operand:VI12_AVX2 0 "register_operand" "") 4933 (sat_plusminus:VI12_AVX2 4934 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "") 4935 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "")))] 4936 "TARGET_SSE2" 4937 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") 4938 4939(define_insn "*<sse2_avx2>_<plusminus_insn><mode>3" 4940 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x") 4941 (sat_plusminus:VI12_AVX2 4942 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,x") 4943 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))] 4944 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" 4945 "@ 4946 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2} 4947 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 4948 [(set_attr "isa" "noavx,avx") 4949 (set_attr "type" "sseiadd") 4950 (set_attr "prefix_data16" "1,*") 4951 (set_attr "prefix" "orig,vex") 4952 (set_attr "mode" "TI")]) 4953 4954(define_insn_and_split "mul<mode>3" 4955 [(set (match_operand:VI1_AVX2 0 "register_operand" "") 4956 (mult:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "") 4957 (match_operand:VI1_AVX2 2 "register_operand" "")))] 4958 "TARGET_SSE2 4959 && can_create_pseudo_p ()" 4960 "#" 4961 "&& 1" 4962 [(const_int 0)] 4963{ 4964 rtx t[6]; 4965 int i; 4966 enum machine_mode mulmode = <sseunpackmode>mode; 4967 4968 for (i = 0; i < 6; ++i) 4969 t[i] = gen_reg_rtx (<MODE>mode); 4970 4971 /* Unpack data such that we've got a source byte in each low byte of 4972 each word. We don't care what goes into the high byte of each word. 4973 Rather than trying to get zero in there, most convenient is to let 4974 it be a copy of the low byte. */ 4975 emit_insn (gen_<vec_avx2>_interleave_high<mode> (t[0], operands[1], 4976 operands[1])); 4977 emit_insn (gen_<vec_avx2>_interleave_high<mode> (t[1], operands[2], 4978 operands[2])); 4979 emit_insn (gen_<vec_avx2>_interleave_low<mode> (t[2], operands[1], 4980 operands[1])); 4981 emit_insn (gen_<vec_avx2>_interleave_low<mode> (t[3], operands[2], 4982 operands[2])); 4983 4984 /* Multiply words. The end-of-line annotations here give a picture of what 4985 the output of that instruction looks like. Dot means don't care; the 4986 letters are the bytes of the result with A being the most significant. */ 4987 emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (mulmode, t[4]), 4988 gen_rtx_MULT (mulmode, /* .A.B.C.D.E.F.G.H */ 4989 gen_lowpart (mulmode, t[0]), 4990 gen_lowpart (mulmode, t[1])))); 4991 emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (mulmode, t[5]), 4992 gen_rtx_MULT (mulmode, /* .I.J.K.L.M.N.O.P */ 4993 gen_lowpart (mulmode, t[2]), 4994 gen_lowpart (mulmode, t[3])))); 4995 4996 /* Extract the even bytes and merge them back together. */ 4997 if (<MODE>mode == V16QImode) 4998 ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0); 4999 else 5000 { 5001 /* Since avx2_interleave_{low,high}v32qi used above aren't cross-lane, 5002 this can't be normal even extraction, but one where additionally 5003 the second and third quarter are swapped. That is even one insn 5004 shorter than even extraction. */ 5005 rtvec v = rtvec_alloc (32); 5006 for (i = 0; i < 32; ++i) 5007 RTVEC_ELT (v, i) 5008 = GEN_INT (i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0)); 5009 t[0] = operands[0]; 5010 t[1] = t[5]; 5011 t[2] = t[4]; 5012 t[3] = gen_rtx_CONST_VECTOR (<MODE>mode, v); 5013 ix86_expand_vec_perm_const (t); 5014 } 5015 5016 set_unique_reg_note (get_last_insn (), REG_EQUAL, 5017 gen_rtx_MULT (<MODE>mode, operands[1], operands[2])); 5018 DONE; 5019}) 5020 5021(define_expand "mul<mode>3" 5022 [(set (match_operand:VI2_AVX2 0 "register_operand" "") 5023 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "") 5024 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "")))] 5025 "TARGET_SSE2" 5026 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);") 5027 5028(define_insn "*mul<mode>3" 5029 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x") 5030 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x") 5031 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))] 5032 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)" 5033 "@ 5034 pmullw\t{%2, %0|%0, %2} 5035 vpmullw\t{%2, %1, %0|%0, %1, %2}" 5036 [(set_attr "isa" "noavx,avx") 5037 (set_attr "type" "sseimul") 5038 (set_attr "prefix_data16" "1,*") 5039 (set_attr "prefix" "orig,vex") 5040 (set_attr "mode" "<sseinsnmode>")]) 5041 5042(define_expand "<s>mul<mode>3_highpart" 5043 [(set (match_operand:VI2_AVX2 0 "register_operand" "") 5044 (truncate:VI2_AVX2 5045 (lshiftrt:<ssedoublemode> 5046 (mult:<ssedoublemode> 5047 (any_extend:<ssedoublemode> 5048 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "")) 5049 (any_extend:<ssedoublemode> 5050 (match_operand:VI2_AVX2 2 "nonimmediate_operand" ""))) 5051 (const_int 16))))] 5052 "TARGET_SSE2" 5053 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);") 5054 5055(define_insn "*<s>mul<mode>3_highpart" 5056 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x") 5057 (truncate:VI2_AVX2 5058 (lshiftrt:<ssedoublemode> 5059 (mult:<ssedoublemode> 5060 (any_extend:<ssedoublemode> 5061 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")) 5062 (any_extend:<ssedoublemode> 5063 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm"))) 5064 (const_int 16))))] 5065 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)" 5066 "@ 5067 pmulh<u>w\t{%2, %0|%0, %2} 5068 vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}" 5069 [(set_attr "isa" "noavx,avx") 5070 (set_attr "type" "sseimul") 5071 (set_attr "prefix_data16" "1,*") 5072 (set_attr "prefix" "orig,vex") 5073 (set_attr "mode" "<sseinsnmode>")]) 5074 5075(define_expand "avx2_umulv4siv4di3" 5076 [(set (match_operand:V4DI 0 "register_operand" "") 5077 (mult:V4DI 5078 (zero_extend:V4DI 5079 (vec_select:V4SI 5080 (match_operand:V8SI 1 "nonimmediate_operand" "") 5081 (parallel [(const_int 0) (const_int 2) 5082 (const_int 4) (const_int 6)]))) 5083 (zero_extend:V4DI 5084 (vec_select:V4SI 5085 (match_operand:V8SI 2 "nonimmediate_operand" "") 5086 (parallel [(const_int 0) (const_int 2) 5087 (const_int 4) (const_int 6)])))))] 5088 "TARGET_AVX2" 5089 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);") 5090 5091(define_insn "*avx_umulv4siv4di3" 5092 [(set (match_operand:V4DI 0 "register_operand" "=x") 5093 (mult:V4DI 5094 (zero_extend:V4DI 5095 (vec_select:V4SI 5096 (match_operand:V8SI 1 "nonimmediate_operand" "%x") 5097 (parallel [(const_int 0) (const_int 2) 5098 (const_int 4) (const_int 6)]))) 5099 (zero_extend:V4DI 5100 (vec_select:V4SI 5101 (match_operand:V8SI 2 "nonimmediate_operand" "xm") 5102 (parallel [(const_int 0) (const_int 2) 5103 (const_int 4) (const_int 6)])))))] 5104 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)" 5105 "vpmuludq\t{%2, %1, %0|%0, %1, %2}" 5106 [(set_attr "type" "sseimul") 5107 (set_attr "prefix" "vex") 5108 (set_attr "mode" "OI")]) 5109 5110(define_expand "sse2_umulv2siv2di3" 5111 [(set (match_operand:V2DI 0 "register_operand" "") 5112 (mult:V2DI 5113 (zero_extend:V2DI 5114 (vec_select:V2SI 5115 (match_operand:V4SI 1 "nonimmediate_operand" "") 5116 (parallel [(const_int 0) (const_int 2)]))) 5117 (zero_extend:V2DI 5118 (vec_select:V2SI 5119 (match_operand:V4SI 2 "nonimmediate_operand" "") 5120 (parallel [(const_int 0) (const_int 2)])))))] 5121 "TARGET_SSE2" 5122 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);") 5123 5124(define_insn "*sse2_umulv2siv2di3" 5125 [(set (match_operand:V2DI 0 "register_operand" "=x,x") 5126 (mult:V2DI 5127 (zero_extend:V2DI 5128 (vec_select:V2SI 5129 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x") 5130 (parallel [(const_int 0) (const_int 2)]))) 5131 (zero_extend:V2DI 5132 (vec_select:V2SI 5133 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm") 5134 (parallel [(const_int 0) (const_int 2)])))))] 5135 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)" 5136 "@ 5137 pmuludq\t{%2, %0|%0, %2} 5138 vpmuludq\t{%2, %1, %0|%0, %1, %2}" 5139 [(set_attr "isa" "noavx,avx") 5140 (set_attr "type" "sseimul") 5141 (set_attr "prefix_data16" "1,*") 5142 (set_attr "prefix" "orig,vex") 5143 (set_attr "mode" "TI")]) 5144 5145(define_expand "avx2_mulv4siv4di3" 5146 [(set (match_operand:V4DI 0 "register_operand" "") 5147 (mult:V4DI 5148 (sign_extend:V4DI 5149 (vec_select:V4SI 5150 (match_operand:V8SI 1 "nonimmediate_operand" "") 5151 (parallel [(const_int 0) (const_int 2) 5152 (const_int 4) (const_int 6)]))) 5153 (sign_extend:V4DI 5154 (vec_select:V4SI 5155 (match_operand:V8SI 2 "nonimmediate_operand" "") 5156 (parallel [(const_int 0) (const_int 2) 5157 (const_int 4) (const_int 6)])))))] 5158 "TARGET_AVX2" 5159 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);") 5160 5161(define_insn "*avx2_mulv4siv4di3" 5162 [(set (match_operand:V4DI 0 "register_operand" "=x") 5163 (mult:V4DI 5164 (sign_extend:V4DI 5165 (vec_select:V4SI 5166 (match_operand:V8SI 1 "nonimmediate_operand" "x") 5167 (parallel [(const_int 0) (const_int 2) 5168 (const_int 4) (const_int 6)]))) 5169 (sign_extend:V4DI 5170 (vec_select:V4SI 5171 (match_operand:V8SI 2 "nonimmediate_operand" "xm") 5172 (parallel [(const_int 0) (const_int 2) 5173 (const_int 4) (const_int 6)])))))] 5174 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)" 5175 "vpmuldq\t{%2, %1, %0|%0, %1, %2}" 5176 [(set_attr "isa" "avx") 5177 (set_attr "type" "sseimul") 5178 (set_attr "prefix_extra" "1") 5179 (set_attr "prefix" "vex") 5180 (set_attr "mode" "OI")]) 5181 5182(define_expand "sse4_1_mulv2siv2di3" 5183 [(set (match_operand:V2DI 0 "register_operand" "") 5184 (mult:V2DI 5185 (sign_extend:V2DI 5186 (vec_select:V2SI 5187 (match_operand:V4SI 1 "nonimmediate_operand" "") 5188 (parallel [(const_int 0) (const_int 2)]))) 5189 (sign_extend:V2DI 5190 (vec_select:V2SI 5191 (match_operand:V4SI 2 "nonimmediate_operand" "") 5192 (parallel [(const_int 0) (const_int 2)])))))] 5193 "TARGET_SSE4_1" 5194 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);") 5195 5196(define_insn "*sse4_1_mulv2siv2di3" 5197 [(set (match_operand:V2DI 0 "register_operand" "=x,x") 5198 (mult:V2DI 5199 (sign_extend:V2DI 5200 (vec_select:V2SI 5201 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x") 5202 (parallel [(const_int 0) (const_int 2)]))) 5203 (sign_extend:V2DI 5204 (vec_select:V2SI 5205 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm") 5206 (parallel [(const_int 0) (const_int 2)])))))] 5207 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)" 5208 "@ 5209 pmuldq\t{%2, %0|%0, %2} 5210 vpmuldq\t{%2, %1, %0|%0, %1, %2}" 5211 [(set_attr "isa" "noavx,avx") 5212 (set_attr "type" "sseimul") 5213 (set_attr "prefix_data16" "1,*") 5214 (set_attr "prefix_extra" "1") 5215 (set_attr "prefix" "orig,vex") 5216 (set_attr "mode" "TI")]) 5217 5218(define_expand "avx2_pmaddwd" 5219 [(set (match_operand:V8SI 0 "register_operand" "") 5220 (plus:V8SI 5221 (mult:V8SI 5222 (sign_extend:V8SI 5223 (vec_select:V8HI 5224 (match_operand:V16HI 1 "nonimmediate_operand" "") 5225 (parallel [(const_int 0) 5226 (const_int 2) 5227 (const_int 4) 5228 (const_int 6) 5229 (const_int 8) 5230 (const_int 10) 5231 (const_int 12) 5232 (const_int 14)]))) 5233 (sign_extend:V8SI 5234 (vec_select:V8HI 5235 (match_operand:V16HI 2 "nonimmediate_operand" "") 5236 (parallel [(const_int 0) 5237 (const_int 2) 5238 (const_int 4) 5239 (const_int 6) 5240 (const_int 8) 5241 (const_int 10) 5242 (const_int 12) 5243 (const_int 14)])))) 5244 (mult:V8SI 5245 (sign_extend:V8SI 5246 (vec_select:V8HI (match_dup 1) 5247 (parallel [(const_int 1) 5248 (const_int 3) 5249 (const_int 5) 5250 (const_int 7) 5251 (const_int 9) 5252 (const_int 11) 5253 (const_int 13) 5254 (const_int 15)]))) 5255 (sign_extend:V8SI 5256 (vec_select:V8HI (match_dup 2) 5257 (parallel [(const_int 1) 5258 (const_int 3) 5259 (const_int 5) 5260 (const_int 7) 5261 (const_int 9) 5262 (const_int 11) 5263 (const_int 13) 5264 (const_int 15)]))))))] 5265 "TARGET_AVX2" 5266 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);") 5267 5268(define_expand "sse2_pmaddwd" 5269 [(set (match_operand:V4SI 0 "register_operand" "") 5270 (plus:V4SI 5271 (mult:V4SI 5272 (sign_extend:V4SI 5273 (vec_select:V4HI 5274 (match_operand:V8HI 1 "nonimmediate_operand" "") 5275 (parallel [(const_int 0) 5276 (const_int 2) 5277 (const_int 4) 5278 (const_int 6)]))) 5279 (sign_extend:V4SI 5280 (vec_select:V4HI 5281 (match_operand:V8HI 2 "nonimmediate_operand" "") 5282 (parallel [(const_int 0) 5283 (const_int 2) 5284 (const_int 4) 5285 (const_int 6)])))) 5286 (mult:V4SI 5287 (sign_extend:V4SI 5288 (vec_select:V4HI (match_dup 1) 5289 (parallel [(const_int 1) 5290 (const_int 3) 5291 (const_int 5) 5292 (const_int 7)]))) 5293 (sign_extend:V4SI 5294 (vec_select:V4HI (match_dup 2) 5295 (parallel [(const_int 1) 5296 (const_int 3) 5297 (const_int 5) 5298 (const_int 7)]))))))] 5299 "TARGET_SSE2" 5300 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);") 5301 5302(define_insn "*avx2_pmaddwd" 5303 [(set (match_operand:V8SI 0 "register_operand" "=x") 5304 (plus:V8SI 5305 (mult:V8SI 5306 (sign_extend:V8SI 5307 (vec_select:V8HI 5308 (match_operand:V16HI 1 "nonimmediate_operand" "%x") 5309 (parallel [(const_int 0) 5310 (const_int 2) 5311 (const_int 4) 5312 (const_int 6) 5313 (const_int 8) 5314 (const_int 10) 5315 (const_int 12) 5316 (const_int 14)]))) 5317 (sign_extend:V8SI 5318 (vec_select:V8HI 5319 (match_operand:V16HI 2 "nonimmediate_operand" "xm") 5320 (parallel [(const_int 0) 5321 (const_int 2) 5322 (const_int 4) 5323 (const_int 6) 5324 (const_int 8) 5325 (const_int 10) 5326 (const_int 12) 5327 (const_int 14)])))) 5328 (mult:V8SI 5329 (sign_extend:V8SI 5330 (vec_select:V8HI (match_dup 1) 5331 (parallel [(const_int 1) 5332 (const_int 3) 5333 (const_int 5) 5334 (const_int 7) 5335 (const_int 9) 5336 (const_int 11) 5337 (const_int 13) 5338 (const_int 15)]))) 5339 (sign_extend:V8SI 5340 (vec_select:V8HI (match_dup 2) 5341 (parallel [(const_int 1) 5342 (const_int 3) 5343 (const_int 5) 5344 (const_int 7) 5345 (const_int 9) 5346 (const_int 11) 5347 (const_int 13) 5348 (const_int 15)]))))))] 5349 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)" 5350 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}" 5351 [(set_attr "type" "sseiadd") 5352 (set_attr "prefix" "vex") 5353 (set_attr "mode" "OI")]) 5354 5355(define_insn "*sse2_pmaddwd" 5356 [(set (match_operand:V4SI 0 "register_operand" "=x,x") 5357 (plus:V4SI 5358 (mult:V4SI 5359 (sign_extend:V4SI 5360 (vec_select:V4HI 5361 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x") 5362 (parallel [(const_int 0) 5363 (const_int 2) 5364 (const_int 4) 5365 (const_int 6)]))) 5366 (sign_extend:V4SI 5367 (vec_select:V4HI 5368 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm") 5369 (parallel [(const_int 0) 5370 (const_int 2) 5371 (const_int 4) 5372 (const_int 6)])))) 5373 (mult:V4SI 5374 (sign_extend:V4SI 5375 (vec_select:V4HI (match_dup 1) 5376 (parallel [(const_int 1) 5377 (const_int 3) 5378 (const_int 5) 5379 (const_int 7)]))) 5380 (sign_extend:V4SI 5381 (vec_select:V4HI (match_dup 2) 5382 (parallel [(const_int 1) 5383 (const_int 3) 5384 (const_int 5) 5385 (const_int 7)]))))))] 5386 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)" 5387 "@ 5388 pmaddwd\t{%2, %0|%0, %2} 5389 vpmaddwd\t{%2, %1, %0|%0, %1, %2}" 5390 [(set_attr "isa" "noavx,avx") 5391 (set_attr "type" "sseiadd") 5392 (set_attr "atom_unit" "simul") 5393 (set_attr "prefix_data16" "1,*") 5394 (set_attr "prefix" "orig,vex") 5395 (set_attr "mode" "TI")]) 5396 5397(define_expand "mul<mode>3" 5398 [(set (match_operand:VI4_AVX2 0 "register_operand" "") 5399 (mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "register_operand" "") 5400 (match_operand:VI4_AVX2 2 "register_operand" "")))] 5401 "TARGET_SSE2" 5402{ 5403 if (TARGET_SSE4_1 || TARGET_AVX) 5404 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands); 5405}) 5406 5407(define_insn "*<sse4_1_avx2>_mul<mode>3" 5408 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,x") 5409 (mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "nonimmediate_operand" "%0,x") 5410 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm,xm")))] 5411 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)" 5412 "@ 5413 pmulld\t{%2, %0|%0, %2} 5414 vpmulld\t{%2, %1, %0|%0, %1, %2}" 5415 [(set_attr "isa" "noavx,avx") 5416 (set_attr "type" "sseimul") 5417 (set_attr "prefix_extra" "1") 5418 (set_attr "prefix" "orig,vex") 5419 (set_attr "mode" "<sseinsnmode>")]) 5420 5421(define_insn_and_split "*sse2_mulv4si3" 5422 [(set (match_operand:V4SI 0 "register_operand" "") 5423 (mult:V4SI (match_operand:V4SI 1 "register_operand" "") 5424 (match_operand:V4SI 2 "register_operand" "")))] 5425 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX 5426 && can_create_pseudo_p ()" 5427 "#" 5428 "&& 1" 5429 [(const_int 0)] 5430{ 5431 rtx t1, t2, t3, t4, t5, t6, thirtytwo; 5432 rtx op0, op1, op2; 5433 5434 op0 = operands[0]; 5435 op1 = operands[1]; 5436 op2 = operands[2]; 5437 t1 = gen_reg_rtx (V4SImode); 5438 t2 = gen_reg_rtx (V4SImode); 5439 t3 = gen_reg_rtx (V4SImode); 5440 t4 = gen_reg_rtx (V4SImode); 5441 t5 = gen_reg_rtx (V4SImode); 5442 t6 = gen_reg_rtx (V4SImode); 5443 thirtytwo = GEN_INT (32); 5444 5445 /* Multiply elements 2 and 0. */ 5446 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1), 5447 op1, op2)); 5448 5449 /* Shift both input vectors down one element, so that elements 3 5450 and 1 are now in the slots for elements 2 and 0. For K8, at 5451 least, this is faster than using a shuffle. */ 5452 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2), 5453 gen_lowpart (V1TImode, op1), 5454 thirtytwo)); 5455 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3), 5456 gen_lowpart (V1TImode, op2), 5457 thirtytwo)); 5458 /* Multiply elements 3 and 1. */ 5459 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4), 5460 t2, t3)); 5461 5462 /* Move the results in element 2 down to element 1; we don't care 5463 what goes in elements 2 and 3. */ 5464 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx, 5465 const0_rtx, const0_rtx)); 5466 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx, 5467 const0_rtx, const0_rtx)); 5468 5469 /* Merge the parts back together. */ 5470 emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6)); 5471 5472 set_unique_reg_note (get_last_insn (), REG_EQUAL, 5473 gen_rtx_MULT (V4SImode, operands[1], operands[2])); 5474 DONE; 5475}) 5476 5477(define_insn_and_split "mul<mode>3" 5478 [(set (match_operand:VI8_AVX2 0 "register_operand" "") 5479 (mult:VI8_AVX2 (match_operand:VI8_AVX2 1 "register_operand" "") 5480 (match_operand:VI8_AVX2 2 "register_operand" "")))] 5481 "TARGET_SSE2 5482 && can_create_pseudo_p ()" 5483 "#" 5484 "&& 1" 5485 [(const_int 0)] 5486{ 5487 rtx t1, t2, t3, t4, t5, t6, thirtytwo; 5488 rtx op0, op1, op2; 5489 5490 op0 = operands[0]; 5491 op1 = operands[1]; 5492 op2 = operands[2]; 5493 5494 if (TARGET_XOP && <MODE>mode == V2DImode) 5495 { 5496 /* op1: A,B,C,D, op2: E,F,G,H */ 5497 op1 = gen_lowpart (V4SImode, op1); 5498 op2 = gen_lowpart (V4SImode, op2); 5499 5500 t1 = gen_reg_rtx (V4SImode); 5501 t2 = gen_reg_rtx (V4SImode); 5502 t3 = gen_reg_rtx (V2DImode); 5503 t4 = gen_reg_rtx (V2DImode); 5504 5505 /* t1: B,A,D,C */ 5506 emit_insn (gen_sse2_pshufd_1 (t1, op1, 5507 GEN_INT (1), 5508 GEN_INT (0), 5509 GEN_INT (3), 5510 GEN_INT (2))); 5511 5512 /* t2: (B*E),(A*F),(D*G),(C*H) */ 5513 emit_insn (gen_mulv4si3 (t2, t1, op2)); 5514 5515 /* t4: (B*E)+(A*F), (D*G)+(C*H) */ 5516 emit_insn (gen_xop_phadddq (t3, t2)); 5517 5518 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */ 5519 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32))); 5520 5521 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */ 5522 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4)); 5523 } 5524 else 5525 { 5526 t1 = gen_reg_rtx (<MODE>mode); 5527 t2 = gen_reg_rtx (<MODE>mode); 5528 t3 = gen_reg_rtx (<MODE>mode); 5529 t4 = gen_reg_rtx (<MODE>mode); 5530 t5 = gen_reg_rtx (<MODE>mode); 5531 t6 = gen_reg_rtx (<MODE>mode); 5532 thirtytwo = GEN_INT (32); 5533 5534 /* Multiply low parts. */ 5535 emit_insn (gen_<sse2_avx2>_umulv<ssescalarnum>si<mode>3 5536 (t1, gen_lowpart (<ssepackmode>mode, op1), 5537 gen_lowpart (<ssepackmode>mode, op2))); 5538 5539 /* Shift input vectors right 32 bits so we can multiply high parts. */ 5540 emit_insn (gen_lshr<mode>3 (t2, op1, thirtytwo)); 5541 emit_insn (gen_lshr<mode>3 (t3, op2, thirtytwo)); 5542 5543 /* Multiply high parts by low parts. */ 5544 emit_insn (gen_<sse2_avx2>_umulv<ssescalarnum>si<mode>3 5545 (t4, gen_lowpart (<ssepackmode>mode, op1), 5546 gen_lowpart (<ssepackmode>mode, t3))); 5547 emit_insn (gen_<sse2_avx2>_umulv<ssescalarnum>si<mode>3 5548 (t5, gen_lowpart (<ssepackmode>mode, op2), 5549 gen_lowpart (<ssepackmode>mode, t2))); 5550 5551 /* Shift them back. */ 5552 emit_insn (gen_ashl<mode>3 (t4, t4, thirtytwo)); 5553 emit_insn (gen_ashl<mode>3 (t5, t5, thirtytwo)); 5554 5555 /* Add the three parts together. */ 5556 emit_insn (gen_add<mode>3 (t6, t1, t4)); 5557 emit_insn (gen_add<mode>3 (op0, t6, t5)); 5558 } 5559 5560 set_unique_reg_note (get_last_insn (), REG_EQUAL, 5561 gen_rtx_MULT (<MODE>mode, operands[1], operands[2])); 5562 DONE; 5563}) 5564 5565(define_expand "vec_widen_<s>mult_hi_<mode>" 5566 [(match_operand:<sseunpackmode> 0 "register_operand" "") 5567 (any_extend:<sseunpackmode> 5568 (match_operand:VI2_AVX2 1 "register_operand" "")) 5569 (match_operand:VI2_AVX2 2 "register_operand" "")] 5570 "TARGET_SSE2" 5571{ 5572 rtx op1, op2, t1, t2, dest; 5573 5574 op1 = operands[1]; 5575 op2 = operands[2]; 5576 t1 = gen_reg_rtx (<MODE>mode); 5577 t2 = gen_reg_rtx (<MODE>mode); 5578 dest = gen_lowpart (<MODE>mode, operands[0]); 5579 5580 emit_insn (gen_mul<mode>3 (t1, op1, op2)); 5581 emit_insn (gen_<s>mul<mode>3_highpart (t2, op1, op2)); 5582 emit_insn (gen_vec_interleave_high<mode> (dest, t1, t2)); 5583 DONE; 5584}) 5585 5586(define_expand "vec_widen_<s>mult_lo_<mode>" 5587 [(match_operand:<sseunpackmode> 0 "register_operand" "") 5588 (any_extend:<sseunpackmode> 5589 (match_operand:VI2_AVX2 1 "register_operand" "")) 5590 (match_operand:VI2_AVX2 2 "register_operand" "")] 5591 "TARGET_SSE2" 5592{ 5593 rtx op1, op2, t1, t2, dest; 5594 5595 op1 = operands[1]; 5596 op2 = operands[2]; 5597 t1 = gen_reg_rtx (<MODE>mode); 5598 t2 = gen_reg_rtx (<MODE>mode); 5599 dest = gen_lowpart (<MODE>mode, operands[0]); 5600 5601 emit_insn (gen_mul<mode>3 (t1, op1, op2)); 5602 emit_insn (gen_<s>mul<mode>3_highpart (t2, op1, op2)); 5603 emit_insn (gen_vec_interleave_low<mode> (dest, t1, t2)); 5604 DONE; 5605}) 5606 5607(define_expand "vec_widen_<s>mult_hi_v8si" 5608 [(match_operand:V4DI 0 "register_operand" "") 5609 (any_extend:V4DI (match_operand:V8SI 1 "nonimmediate_operand" "")) 5610 (match_operand:V8SI 2 "nonimmediate_operand" "")] 5611 "TARGET_AVX2" 5612{ 5613 rtx t1, t2, t3, t4; 5614 5615 t1 = gen_reg_rtx (V4DImode); 5616 t2 = gen_reg_rtx (V4DImode); 5617 t3 = gen_reg_rtx (V8SImode); 5618 t4 = gen_reg_rtx (V8SImode); 5619 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, operands[1]), 5620 const0_rtx, const2_rtx, 5621 const1_rtx, GEN_INT (3))); 5622 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, operands[2]), 5623 const0_rtx, const2_rtx, 5624 const1_rtx, GEN_INT (3))); 5625 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), 5626 GEN_INT (2 + (2 << 2) + (3 << 4) + (3 << 6)))); 5627 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), 5628 GEN_INT (2 + (2 << 2) + (3 << 4) + (3 << 6)))); 5629 emit_insn (gen_avx2_<u>mulv4siv4di3 (operands[0], t3, t4)); 5630 DONE; 5631}) 5632 5633(define_expand "vec_widen_<s>mult_lo_v8si" 5634 [(match_operand:V4DI 0 "register_operand" "") 5635 (any_extend:V4DI (match_operand:V8SI 1 "nonimmediate_operand" "")) 5636 (match_operand:V8SI 2 "nonimmediate_operand" "")] 5637 "TARGET_AVX2" 5638{ 5639 rtx t1, t2, t3, t4; 5640 5641 t1 = gen_reg_rtx (V4DImode); 5642 t2 = gen_reg_rtx (V4DImode); 5643 t3 = gen_reg_rtx (V8SImode); 5644 t4 = gen_reg_rtx (V8SImode); 5645 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, operands[1]), 5646 const0_rtx, const2_rtx, 5647 const1_rtx, GEN_INT (3))); 5648 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, operands[2]), 5649 const0_rtx, const2_rtx, 5650 const1_rtx, GEN_INT (3))); 5651 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), 5652 GEN_INT (0 + (0 << 2) + (1 << 4) + (1 << 6)))); 5653 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), 5654 GEN_INT (0 + (0 << 2) + (1 << 4) + (1 << 6)))); 5655 emit_insn (gen_avx2_<u>mulv4siv4di3 (operands[0], t3, t4)); 5656 DONE; 5657}) 5658 5659(define_expand "vec_widen_smult_hi_v4si" 5660 [(match_operand:V2DI 0 "register_operand" "") 5661 (match_operand:V4SI 1 "register_operand" "") 5662 (match_operand:V4SI 2 "register_operand" "")] 5663 "TARGET_SSE4_1" 5664{ 5665 rtx op1, op2, t1, t2; 5666 5667 op1 = operands[1]; 5668 op2 = operands[2]; 5669 t1 = gen_reg_rtx (V4SImode); 5670 t2 = gen_reg_rtx (V4SImode); 5671 5672 if (TARGET_XOP) 5673 { 5674 rtx t3 = gen_reg_rtx (V2DImode); 5675 5676 emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2), 5677 GEN_INT (1), GEN_INT (3))); 5678 emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2), 5679 GEN_INT (1), GEN_INT (3))); 5680 emit_move_insn (t3, CONST0_RTX (V2DImode)); 5681 5682 emit_insn (gen_xop_pmacsdqh (operands[0], t1, t2, t3)); 5683 DONE; 5684 } 5685 5686 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1)); 5687 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2)); 5688 emit_insn (gen_sse4_1_mulv2siv2di3 (operands[0], t1, t2)); 5689 DONE; 5690}) 5691 5692(define_expand "vec_widen_smult_lo_v4si" 5693 [(match_operand:V2DI 0 "register_operand" "") 5694 (match_operand:V4SI 1 "register_operand" "") 5695 (match_operand:V4SI 2 "register_operand" "")] 5696 "TARGET_SSE4_1" 5697{ 5698 rtx op1, op2, t1, t2; 5699 5700 op1 = operands[1]; 5701 op2 = operands[2]; 5702 t1 = gen_reg_rtx (V4SImode); 5703 t2 = gen_reg_rtx (V4SImode); 5704 5705 if (TARGET_XOP) 5706 { 5707 rtx t3 = gen_reg_rtx (V2DImode); 5708 5709 emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2), 5710 GEN_INT (1), GEN_INT (3))); 5711 emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2), 5712 GEN_INT (1), GEN_INT (3))); 5713 emit_move_insn (t3, CONST0_RTX (V2DImode)); 5714 5715 emit_insn (gen_xop_pmacsdql (operands[0], t1, t2, t3)); 5716 DONE; 5717 } 5718 5719 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1)); 5720 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2)); 5721 emit_insn (gen_sse4_1_mulv2siv2di3 (operands[0], t1, t2)); 5722 DONE; 5723}) 5724 5725(define_expand "vec_widen_umult_hi_v4si" 5726 [(match_operand:V2DI 0 "register_operand" "") 5727 (match_operand:V4SI 1 "register_operand" "") 5728 (match_operand:V4SI 2 "register_operand" "")] 5729 "TARGET_SSE2" 5730{ 5731 rtx op1, op2, t1, t2; 5732 5733 op1 = operands[1]; 5734 op2 = operands[2]; 5735 t1 = gen_reg_rtx (V4SImode); 5736 t2 = gen_reg_rtx (V4SImode); 5737 5738 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1)); 5739 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2)); 5740 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2)); 5741 DONE; 5742}) 5743 5744(define_expand "vec_widen_umult_lo_v4si" 5745 [(match_operand:V2DI 0 "register_operand" "") 5746 (match_operand:V4SI 1 "register_operand" "") 5747 (match_operand:V4SI 2 "register_operand" "")] 5748 "TARGET_SSE2" 5749{ 5750 rtx op1, op2, t1, t2; 5751 5752 op1 = operands[1]; 5753 op2 = operands[2]; 5754 t1 = gen_reg_rtx (V4SImode); 5755 t2 = gen_reg_rtx (V4SImode); 5756 5757 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1)); 5758 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2)); 5759 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2)); 5760 DONE; 5761}) 5762 5763(define_expand "sdot_prod<mode>" 5764 [(match_operand:<sseunpackmode> 0 "register_operand" "") 5765 (match_operand:VI2_AVX2 1 "register_operand" "") 5766 (match_operand:VI2_AVX2 2 "register_operand" "") 5767 (match_operand:<sseunpackmode> 3 "register_operand" "")] 5768 "TARGET_SSE2" 5769{ 5770 rtx t = gen_reg_rtx (<sseunpackmode>mode); 5771 emit_insn (gen_<sse2_avx2>_pmaddwd (t, operands[1], operands[2])); 5772 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 5773 gen_rtx_PLUS (<sseunpackmode>mode, 5774 operands[3], t))); 5775 DONE; 5776}) 5777 5778(define_code_attr sse2_sse4_1 5779 [(zero_extend "sse2") (sign_extend "sse4_1")]) 5780 5781(define_expand "<s>dot_prodv4si" 5782 [(match_operand:V2DI 0 "register_operand" "") 5783 (any_extend:V2DI (match_operand:V4SI 1 "register_operand" "")) 5784 (match_operand:V4SI 2 "register_operand" "") 5785 (match_operand:V2DI 3 "register_operand" "")] 5786 "<CODE> == ZERO_EXTEND ? TARGET_SSE2 : TARGET_SSE4_1" 5787{ 5788 rtx t1, t2, t3, t4; 5789 5790 t1 = gen_reg_rtx (V2DImode); 5791 emit_insn (gen_<sse2_sse4_1>_<u>mulv2siv2di3 (t1, operands[1], operands[2])); 5792 emit_insn (gen_addv2di3 (t1, t1, operands[3])); 5793 5794 t2 = gen_reg_rtx (V4SImode); 5795 t3 = gen_reg_rtx (V4SImode); 5796 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2), 5797 gen_lowpart (V1TImode, operands[1]), 5798 GEN_INT (32))); 5799 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3), 5800 gen_lowpart (V1TImode, operands[2]), 5801 GEN_INT (32))); 5802 5803 t4 = gen_reg_rtx (V2DImode); 5804 emit_insn (gen_<sse2_sse4_1>_<u>mulv2siv2di3 (t4, t2, t3)); 5805 5806 emit_insn (gen_addv2di3 (operands[0], t1, t4)); 5807 DONE; 5808}) 5809 5810(define_expand "<s>dot_prodv8si" 5811 [(match_operand:V4DI 0 "register_operand" "") 5812 (any_extend:V4DI (match_operand:V8SI 1 "register_operand" "")) 5813 (match_operand:V8SI 2 "register_operand" "") 5814 (match_operand:V4DI 3 "register_operand" "")] 5815 "TARGET_AVX2" 5816{ 5817 rtx t1, t2, t3, t4; 5818 5819 t1 = gen_reg_rtx (V4DImode); 5820 emit_insn (gen_avx2_<u>mulv4siv4di3 (t1, operands[1], operands[2])); 5821 emit_insn (gen_addv4di3 (t1, t1, operands[3])); 5822 5823 t2 = gen_reg_rtx (V8SImode); 5824 t3 = gen_reg_rtx (V8SImode); 5825 emit_insn (gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, t2), 5826 gen_lowpart (V2TImode, operands[1]), 5827 GEN_INT (32))); 5828 emit_insn (gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, t3), 5829 gen_lowpart (V2TImode, operands[2]), 5830 GEN_INT (32))); 5831 5832 t4 = gen_reg_rtx (V4DImode); 5833 emit_insn (gen_avx2_<u>mulv4siv4di3 (t4, t2, t3)); 5834 5835 emit_insn (gen_addv4di3 (operands[0], t1, t4)); 5836 DONE; 5837}) 5838 5839(define_insn "ashr<mode>3" 5840 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x") 5841 (ashiftrt:VI24_AVX2 5842 (match_operand:VI24_AVX2 1 "register_operand" "0,x") 5843 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))] 5844 "TARGET_SSE2" 5845 "@ 5846 psra<ssemodesuffix>\t{%2, %0|%0, %2} 5847 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 5848 [(set_attr "isa" "noavx,avx") 5849 (set_attr "type" "sseishft") 5850 (set (attr "length_immediate") 5851 (if_then_else (match_operand 2 "const_int_operand" "") 5852 (const_string "1") 5853 (const_string "0"))) 5854 (set_attr "prefix_data16" "1,*") 5855 (set_attr "prefix" "orig,vex") 5856 (set_attr "mode" "<sseinsnmode>")]) 5857 5858(define_insn "<shift_insn><mode>3" 5859 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x") 5860 (any_lshift:VI248_AVX2 5861 (match_operand:VI248_AVX2 1 "register_operand" "0,x") 5862 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))] 5863 "TARGET_SSE2" 5864 "@ 5865 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2} 5866 vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 5867 [(set_attr "isa" "noavx,avx") 5868 (set_attr "type" "sseishft") 5869 (set (attr "length_immediate") 5870 (if_then_else (match_operand 2 "const_int_operand" "") 5871 (const_string "1") 5872 (const_string "0"))) 5873 (set_attr "prefix_data16" "1,*") 5874 (set_attr "prefix" "orig,vex") 5875 (set_attr "mode" "<sseinsnmode>")]) 5876 5877(define_expand "vec_shl_<mode>" 5878 [(set (match_operand:VI_128 0 "register_operand" "") 5879 (ashift:V1TI 5880 (match_operand:VI_128 1 "register_operand" "") 5881 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))] 5882 "TARGET_SSE2" 5883{ 5884 operands[0] = gen_lowpart (V1TImode, operands[0]); 5885 operands[1] = gen_lowpart (V1TImode, operands[1]); 5886}) 5887 5888(define_insn "<sse2_avx2>_ashl<mode>3" 5889 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x") 5890 (ashift:VIMAX_AVX2 5891 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x") 5892 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))] 5893 "TARGET_SSE2" 5894{ 5895 operands[2] = GEN_INT (INTVAL (operands[2]) / 8); 5896 5897 switch (which_alternative) 5898 { 5899 case 0: 5900 return "pslldq\t{%2, %0|%0, %2}"; 5901 case 1: 5902 return "vpslldq\t{%2, %1, %0|%0, %1, %2}"; 5903 default: 5904 gcc_unreachable (); 5905 } 5906} 5907 [(set_attr "isa" "noavx,avx") 5908 (set_attr "type" "sseishft") 5909 (set_attr "length_immediate" "1") 5910 (set_attr "prefix_data16" "1,*") 5911 (set_attr "prefix" "orig,vex") 5912 (set_attr "mode" "<sseinsnmode>")]) 5913 5914(define_expand "vec_shr_<mode>" 5915 [(set (match_operand:VI_128 0 "register_operand" "") 5916 (lshiftrt:V1TI 5917 (match_operand:VI_128 1 "register_operand" "") 5918 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))] 5919 "TARGET_SSE2" 5920{ 5921 operands[0] = gen_lowpart (V1TImode, operands[0]); 5922 operands[1] = gen_lowpart (V1TImode, operands[1]); 5923}) 5924 5925(define_insn "<sse2_avx2>_lshr<mode>3" 5926 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x") 5927 (lshiftrt:VIMAX_AVX2 5928 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x") 5929 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))] 5930 "TARGET_SSE2" 5931{ 5932 operands[2] = GEN_INT (INTVAL (operands[2]) / 8); 5933 5934 switch (which_alternative) 5935 { 5936 case 0: 5937 return "psrldq\t{%2, %0|%0, %2}"; 5938 case 1: 5939 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}"; 5940 default: 5941 gcc_unreachable (); 5942 } 5943} 5944 [(set_attr "isa" "noavx,avx") 5945 (set_attr "type" "sseishft") 5946 (set_attr "length_immediate" "1") 5947 (set_attr "atom_unit" "sishuf") 5948 (set_attr "prefix_data16" "1,*") 5949 (set_attr "prefix" "orig,vex") 5950 (set_attr "mode" "<sseinsnmode>")]) 5951 5952 5953(define_expand "<code><mode>3" 5954 [(set (match_operand:VI124_256 0 "register_operand" "") 5955 (maxmin:VI124_256 5956 (match_operand:VI124_256 1 "nonimmediate_operand" "") 5957 (match_operand:VI124_256 2 "nonimmediate_operand" "")))] 5958 "TARGET_AVX2" 5959 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") 5960 5961(define_insn "*avx2_<code><mode>3" 5962 [(set (match_operand:VI124_256 0 "register_operand" "=x") 5963 (maxmin:VI124_256 5964 (match_operand:VI124_256 1 "nonimmediate_operand" "%x") 5965 (match_operand:VI124_256 2 "nonimmediate_operand" "xm")))] 5966 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" 5967 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 5968 [(set_attr "type" "sseiadd") 5969 (set_attr "prefix_extra" "1") 5970 (set_attr "prefix" "vex") 5971 (set_attr "mode" "OI")]) 5972 5973(define_expand "<code><mode>3" 5974 [(set (match_operand:VI8_AVX2 0 "register_operand" "") 5975 (maxmin:VI8_AVX2 5976 (match_operand:VI8_AVX2 1 "register_operand" "") 5977 (match_operand:VI8_AVX2 2 "register_operand" "")))] 5978 "TARGET_SSE4_2" 5979{ 5980 enum rtx_code code; 5981 rtx xops[6]; 5982 bool ok; 5983 5984 xops[0] = operands[0]; 5985 5986 if (<CODE> == SMAX || <CODE> == UMAX) 5987 { 5988 xops[1] = operands[1]; 5989 xops[2] = operands[2]; 5990 } 5991 else 5992 { 5993 xops[1] = operands[2]; 5994 xops[2] = operands[1]; 5995 } 5996 5997 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT; 5998 5999 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]); 6000 xops[4] = operands[1]; 6001 xops[5] = operands[2]; 6002 6003 ok = ix86_expand_int_vcond (xops); 6004 gcc_assert (ok); 6005 DONE; 6006}) 6007 6008(define_expand "<code><mode>3" 6009 [(set (match_operand:VI124_128 0 "register_operand" "") 6010 (smaxmin:VI124_128 6011 (match_operand:VI124_128 1 "nonimmediate_operand" "") 6012 (match_operand:VI124_128 2 "nonimmediate_operand" "")))] 6013 "TARGET_SSE2" 6014{ 6015 if (TARGET_SSE4_1 || <MODE>mode == V8HImode) 6016 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands); 6017 else 6018 { 6019 rtx xops[6]; 6020 bool ok; 6021 6022 xops[0] = operands[0]; 6023 operands[1] = force_reg (<MODE>mode, operands[1]); 6024 operands[2] = force_reg (<MODE>mode, operands[2]); 6025 6026 if (<CODE> == SMAX) 6027 { 6028 xops[1] = operands[1]; 6029 xops[2] = operands[2]; 6030 } 6031 else 6032 { 6033 xops[1] = operands[2]; 6034 xops[2] = operands[1]; 6035 } 6036 6037 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]); 6038 xops[4] = operands[1]; 6039 xops[5] = operands[2]; 6040 6041 ok = ix86_expand_int_vcond (xops); 6042 gcc_assert (ok); 6043 DONE; 6044 } 6045}) 6046 6047(define_insn "*sse4_1_<code><mode>3" 6048 [(set (match_operand:VI14_128 0 "register_operand" "=x,x") 6049 (smaxmin:VI14_128 6050 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x") 6051 (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))] 6052 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" 6053 "@ 6054 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2} 6055 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 6056 [(set_attr "isa" "noavx,avx") 6057 (set_attr "type" "sseiadd") 6058 (set_attr "prefix_extra" "1,*") 6059 (set_attr "prefix" "orig,vex") 6060 (set_attr "mode" "TI")]) 6061 6062(define_insn "*<code>v8hi3" 6063 [(set (match_operand:V8HI 0 "register_operand" "=x,x") 6064 (smaxmin:V8HI 6065 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x") 6066 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))] 6067 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)" 6068 "@ 6069 p<maxmin_int>w\t{%2, %0|%0, %2} 6070 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}" 6071 [(set_attr "isa" "noavx,avx") 6072 (set_attr "type" "sseiadd") 6073 (set_attr "prefix_data16" "1,*") 6074 (set_attr "prefix_extra" "*,1") 6075 (set_attr "prefix" "orig,vex") 6076 (set_attr "mode" "TI")]) 6077 6078(define_expand "<code><mode>3" 6079 [(set (match_operand:VI124_128 0 "register_operand" "") 6080 (umaxmin:VI124_128 6081 (match_operand:VI124_128 1 "nonimmediate_operand" "") 6082 (match_operand:VI124_128 2 "nonimmediate_operand" "")))] 6083 "TARGET_SSE2" 6084{ 6085 if (TARGET_SSE4_1 || <MODE>mode == V16QImode) 6086 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands); 6087 else if (<CODE> == UMAX && <MODE>mode == V8HImode) 6088 { 6089 rtx op0 = operands[0], op2 = operands[2], op3 = op0; 6090 operands[1] = force_reg (<MODE>mode, operands[1]); 6091 if (rtx_equal_p (op3, op2)) 6092 op3 = gen_reg_rtx (V8HImode); 6093 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2)); 6094 emit_insn (gen_addv8hi3 (op0, op3, op2)); 6095 DONE; 6096 } 6097 else 6098 { 6099 rtx xops[6]; 6100 bool ok; 6101 6102 operands[1] = force_reg (<MODE>mode, operands[1]); 6103 operands[2] = force_reg (<MODE>mode, operands[2]); 6104 6105 xops[0] = operands[0]; 6106 6107 if (<CODE> == UMAX) 6108 { 6109 xops[1] = operands[1]; 6110 xops[2] = operands[2]; 6111 } 6112 else 6113 { 6114 xops[1] = operands[2]; 6115 xops[2] = operands[1]; 6116 } 6117 6118 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]); 6119 xops[4] = operands[1]; 6120 xops[5] = operands[2]; 6121 6122 ok = ix86_expand_int_vcond (xops); 6123 gcc_assert (ok); 6124 DONE; 6125 } 6126}) 6127 6128(define_insn "*sse4_1_<code><mode>3" 6129 [(set (match_operand:VI24_128 0 "register_operand" "=x,x") 6130 (umaxmin:VI24_128 6131 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x") 6132 (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))] 6133 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" 6134 "@ 6135 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2} 6136 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 6137 [(set_attr "isa" "noavx,avx") 6138 (set_attr "type" "sseiadd") 6139 (set_attr "prefix_extra" "1,*") 6140 (set_attr "prefix" "orig,vex") 6141 (set_attr "mode" "TI")]) 6142 6143(define_insn "*<code>v16qi3" 6144 [(set (match_operand:V16QI 0 "register_operand" "=x,x") 6145 (umaxmin:V16QI 6146 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x") 6147 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))] 6148 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)" 6149 "@ 6150 p<maxmin_int>b\t{%2, %0|%0, %2} 6151 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}" 6152 [(set_attr "isa" "noavx,avx") 6153 (set_attr "type" "sseiadd") 6154 (set_attr "prefix_data16" "1,*") 6155 (set_attr "prefix_extra" "*,1") 6156 (set_attr "prefix" "orig,vex") 6157 (set_attr "mode" "TI")]) 6158 6159;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 6160;; 6161;; Parallel integral comparisons 6162;; 6163;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 6164 6165(define_expand "avx2_eq<mode>3" 6166 [(set (match_operand:VI_256 0 "register_operand" "") 6167 (eq:VI_256 6168 (match_operand:VI_256 1 "nonimmediate_operand" "") 6169 (match_operand:VI_256 2 "nonimmediate_operand" "")))] 6170 "TARGET_AVX2" 6171 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);") 6172 6173(define_insn "*avx2_eq<mode>3" 6174 [(set (match_operand:VI_256 0 "register_operand" "=x") 6175 (eq:VI_256 6176 (match_operand:VI_256 1 "nonimmediate_operand" "%x") 6177 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))] 6178 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)" 6179 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 6180 [(set_attr "type" "ssecmp") 6181 (set_attr "prefix_extra" "1") 6182 (set_attr "prefix" "vex") 6183 (set_attr "mode" "OI")]) 6184 6185(define_insn "*sse4_1_eqv2di3" 6186 [(set (match_operand:V2DI 0 "register_operand" "=x,x") 6187 (eq:V2DI 6188 (match_operand:V2DI 1 "nonimmediate_operand" "%0,x") 6189 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))] 6190 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)" 6191 "@ 6192 pcmpeqq\t{%2, %0|%0, %2} 6193 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}" 6194 [(set_attr "isa" "noavx,avx") 6195 (set_attr "type" "ssecmp") 6196 (set_attr "prefix_extra" "1") 6197 (set_attr "prefix" "orig,vex") 6198 (set_attr "mode" "TI")]) 6199 6200(define_insn "*sse2_eq<mode>3" 6201 [(set (match_operand:VI124_128 0 "register_operand" "=x,x") 6202 (eq:VI124_128 6203 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x") 6204 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))] 6205 "TARGET_SSE2 && !TARGET_XOP 6206 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)" 6207 "@ 6208 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2} 6209 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 6210 [(set_attr "isa" "noavx,avx") 6211 (set_attr "type" "ssecmp") 6212 (set_attr "prefix_data16" "1,*") 6213 (set_attr "prefix" "orig,vex") 6214 (set_attr "mode" "TI")]) 6215 6216(define_expand "sse2_eq<mode>3" 6217 [(set (match_operand:VI124_128 0 "register_operand" "") 6218 (eq:VI124_128 6219 (match_operand:VI124_128 1 "nonimmediate_operand" "") 6220 (match_operand:VI124_128 2 "nonimmediate_operand" "")))] 6221 "TARGET_SSE2 && !TARGET_XOP " 6222 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);") 6223 6224(define_expand "sse4_1_eqv2di3" 6225 [(set (match_operand:V2DI 0 "register_operand" "") 6226 (eq:V2DI 6227 (match_operand:V2DI 1 "nonimmediate_operand" "") 6228 (match_operand:V2DI 2 "nonimmediate_operand" "")))] 6229 "TARGET_SSE4_1" 6230 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);") 6231 6232(define_insn "sse4_2_gtv2di3" 6233 [(set (match_operand:V2DI 0 "register_operand" "=x,x") 6234 (gt:V2DI 6235 (match_operand:V2DI 1 "register_operand" "0,x") 6236 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))] 6237 "TARGET_SSE4_2" 6238 "@ 6239 pcmpgtq\t{%2, %0|%0, %2} 6240 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}" 6241 [(set_attr "isa" "noavx,avx") 6242 (set_attr "type" "ssecmp") 6243 (set_attr "prefix_extra" "1") 6244 (set_attr "prefix" "orig,vex") 6245 (set_attr "mode" "TI")]) 6246 6247(define_insn "avx2_gt<mode>3" 6248 [(set (match_operand:VI_256 0 "register_operand" "=x") 6249 (gt:VI_256 6250 (match_operand:VI_256 1 "register_operand" "x") 6251 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))] 6252 "TARGET_AVX2" 6253 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 6254 [(set_attr "type" "ssecmp") 6255 (set_attr "prefix_extra" "1") 6256 (set_attr "prefix" "vex") 6257 (set_attr "mode" "OI")]) 6258 6259(define_insn "sse2_gt<mode>3" 6260 [(set (match_operand:VI124_128 0 "register_operand" "=x,x") 6261 (gt:VI124_128 6262 (match_operand:VI124_128 1 "register_operand" "0,x") 6263 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))] 6264 "TARGET_SSE2 && !TARGET_XOP" 6265 "@ 6266 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2} 6267 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 6268 [(set_attr "isa" "noavx,avx") 6269 (set_attr "type" "ssecmp") 6270 (set_attr "prefix_data16" "1,*") 6271 (set_attr "prefix" "orig,vex") 6272 (set_attr "mode" "TI")]) 6273 6274(define_expand "vcond<V_256:mode><VI_256:mode>" 6275 [(set (match_operand:V_256 0 "register_operand" "") 6276 (if_then_else:V_256 6277 (match_operator 3 "" 6278 [(match_operand:VI_256 4 "nonimmediate_operand" "") 6279 (match_operand:VI_256 5 "general_operand" "")]) 6280 (match_operand:V_256 1 "" "") 6281 (match_operand:V_256 2 "" "")))] 6282 "TARGET_AVX2 6283 && (GET_MODE_NUNITS (<V_256:MODE>mode) 6284 == GET_MODE_NUNITS (<VI_256:MODE>mode))" 6285{ 6286 bool ok = ix86_expand_int_vcond (operands); 6287 gcc_assert (ok); 6288 DONE; 6289}) 6290 6291(define_expand "vcond<V_128:mode><VI124_128:mode>" 6292 [(set (match_operand:V_128 0 "register_operand" "") 6293 (if_then_else:V_128 6294 (match_operator 3 "" 6295 [(match_operand:VI124_128 4 "nonimmediate_operand" "") 6296 (match_operand:VI124_128 5 "general_operand" "")]) 6297 (match_operand:V_128 1 "" "") 6298 (match_operand:V_128 2 "" "")))] 6299 "TARGET_SSE2 6300 && (GET_MODE_NUNITS (<V_128:MODE>mode) 6301 == GET_MODE_NUNITS (<VI124_128:MODE>mode))" 6302{ 6303 bool ok = ix86_expand_int_vcond (operands); 6304 gcc_assert (ok); 6305 DONE; 6306}) 6307 6308(define_expand "vcond<VI8F_128:mode>v2di" 6309 [(set (match_operand:VI8F_128 0 "register_operand" "") 6310 (if_then_else:VI8F_128 6311 (match_operator 3 "" 6312 [(match_operand:V2DI 4 "nonimmediate_operand" "") 6313 (match_operand:V2DI 5 "general_operand" "")]) 6314 (match_operand:VI8F_128 1 "" "") 6315 (match_operand:VI8F_128 2 "" "")))] 6316 "TARGET_SSE4_2" 6317{ 6318 bool ok = ix86_expand_int_vcond (operands); 6319 gcc_assert (ok); 6320 DONE; 6321}) 6322 6323(define_expand "vcondu<V_256:mode><VI_256:mode>" 6324 [(set (match_operand:V_256 0 "register_operand" "") 6325 (if_then_else:V_256 6326 (match_operator 3 "" 6327 [(match_operand:VI_256 4 "nonimmediate_operand" "") 6328 (match_operand:VI_256 5 "nonimmediate_operand" "")]) 6329 (match_operand:V_256 1 "general_operand" "") 6330 (match_operand:V_256 2 "general_operand" "")))] 6331 "TARGET_AVX2 6332 && (GET_MODE_NUNITS (<V_256:MODE>mode) 6333 == GET_MODE_NUNITS (<VI_256:MODE>mode))" 6334{ 6335 bool ok = ix86_expand_int_vcond (operands); 6336 gcc_assert (ok); 6337 DONE; 6338}) 6339 6340(define_expand "vcondu<V_128:mode><VI124_128:mode>" 6341 [(set (match_operand:V_128 0 "register_operand" "") 6342 (if_then_else:V_128 6343 (match_operator 3 "" 6344 [(match_operand:VI124_128 4 "nonimmediate_operand" "") 6345 (match_operand:VI124_128 5 "nonimmediate_operand" "")]) 6346 (match_operand:V_128 1 "general_operand" "") 6347 (match_operand:V_128 2 "general_operand" "")))] 6348 "TARGET_SSE2 6349 && (GET_MODE_NUNITS (<V_128:MODE>mode) 6350 == GET_MODE_NUNITS (<VI124_128:MODE>mode))" 6351{ 6352 bool ok = ix86_expand_int_vcond (operands); 6353 gcc_assert (ok); 6354 DONE; 6355}) 6356 6357(define_expand "vcondu<VI8F_128:mode>v2di" 6358 [(set (match_operand:VI8F_128 0 "register_operand" "") 6359 (if_then_else:VI8F_128 6360 (match_operator 3 "" 6361 [(match_operand:V2DI 4 "nonimmediate_operand" "") 6362 (match_operand:V2DI 5 "nonimmediate_operand" "")]) 6363 (match_operand:VI8F_128 1 "general_operand" "") 6364 (match_operand:VI8F_128 2 "general_operand" "")))] 6365 "TARGET_SSE4_2" 6366{ 6367 bool ok = ix86_expand_int_vcond (operands); 6368 gcc_assert (ok); 6369 DONE; 6370}) 6371 6372(define_mode_iterator VEC_PERM_AVX2 6373 [V16QI V8HI V4SI V2DI V4SF V2DF 6374 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2") 6375 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2") 6376 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")]) 6377 6378(define_expand "vec_perm<mode>" 6379 [(match_operand:VEC_PERM_AVX2 0 "register_operand" "") 6380 (match_operand:VEC_PERM_AVX2 1 "register_operand" "") 6381 (match_operand:VEC_PERM_AVX2 2 "register_operand" "") 6382 (match_operand:<sseintvecmode> 3 "register_operand" "")] 6383 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP" 6384{ 6385 ix86_expand_vec_perm (operands); 6386 DONE; 6387}) 6388 6389(define_mode_iterator VEC_PERM_CONST 6390 [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE") 6391 (V2DF "TARGET_SSE") (V2DI "TARGET_SSE") 6392 (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2") 6393 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX") 6394 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX") 6395 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")]) 6396 6397(define_expand "vec_perm_const<mode>" 6398 [(match_operand:VEC_PERM_CONST 0 "register_operand" "") 6399 (match_operand:VEC_PERM_CONST 1 "register_operand" "") 6400 (match_operand:VEC_PERM_CONST 2 "register_operand" "") 6401 (match_operand:<sseintvecmode> 3 "" "")] 6402 "" 6403{ 6404 if (ix86_expand_vec_perm_const (operands)) 6405 DONE; 6406 else 6407 FAIL; 6408}) 6409 6410;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 6411;; 6412;; Parallel bitwise logical operations 6413;; 6414;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 6415 6416(define_expand "one_cmpl<mode>2" 6417 [(set (match_operand:VI 0 "register_operand" "") 6418 (xor:VI (match_operand:VI 1 "nonimmediate_operand" "") 6419 (match_dup 2)))] 6420 "TARGET_SSE" 6421{ 6422 int i, n = GET_MODE_NUNITS (<MODE>mode); 6423 rtvec v = rtvec_alloc (n); 6424 6425 for (i = 0; i < n; ++i) 6426 RTVEC_ELT (v, i) = constm1_rtx; 6427 6428 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v)); 6429}) 6430 6431(define_expand "<sse2_avx2>_andnot<mode>3" 6432 [(set (match_operand:VI_AVX2 0 "register_operand" "") 6433 (and:VI_AVX2 6434 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand" "")) 6435 (match_operand:VI_AVX2 2 "nonimmediate_operand" "")))] 6436 "TARGET_SSE2") 6437 6438(define_insn "*andnot<mode>3" 6439 [(set (match_operand:VI 0 "register_operand" "=x,x") 6440 (and:VI 6441 (not:VI (match_operand:VI 1 "register_operand" "0,x")) 6442 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))] 6443 "TARGET_SSE" 6444{ 6445 static char buf[32]; 6446 const char *ops; 6447 const char *tmp; 6448 6449 switch (get_attr_mode (insn)) 6450 { 6451 case MODE_OI: 6452 gcc_assert (TARGET_AVX2); 6453 case MODE_TI: 6454 gcc_assert (TARGET_SSE2); 6455 6456 tmp = "pandn"; 6457 break; 6458 6459 case MODE_V8SF: 6460 gcc_assert (TARGET_AVX); 6461 case MODE_V4SF: 6462 gcc_assert (TARGET_SSE); 6463 6464 tmp = "andnps"; 6465 break; 6466 6467 default: 6468 gcc_unreachable (); 6469 } 6470 6471 switch (which_alternative) 6472 { 6473 case 0: 6474 ops = "%s\t{%%2, %%0|%%0, %%2}"; 6475 break; 6476 case 1: 6477 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; 6478 break; 6479 default: 6480 gcc_unreachable (); 6481 } 6482 6483 snprintf (buf, sizeof (buf), ops, tmp); 6484 return buf; 6485} 6486 [(set_attr "isa" "noavx,avx") 6487 (set_attr "type" "sselog") 6488 (set (attr "prefix_data16") 6489 (if_then_else 6490 (and (eq_attr "alternative" "0") 6491 (eq_attr "mode" "TI")) 6492 (const_string "1") 6493 (const_string "*"))) 6494 (set_attr "prefix" "orig,vex") 6495 (set (attr "mode") 6496 (cond [(and (not (match_test "TARGET_AVX2")) 6497 (match_test "GET_MODE_SIZE (<MODE>mode) > 16")) 6498 (const_string "V8SF") 6499 (not (match_test "TARGET_SSE2")) 6500 (const_string "V4SF") 6501 ] 6502 (const_string "<sseinsnmode>")))]) 6503 6504(define_expand "<code><mode>3" 6505 [(set (match_operand:VI 0 "register_operand" "") 6506 (any_logic:VI 6507 (match_operand:VI 1 "nonimmediate_operand" "") 6508 (match_operand:VI 2 "nonimmediate_operand" "")))] 6509 "TARGET_SSE" 6510 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") 6511 6512(define_insn "*<code><mode>3" 6513 [(set (match_operand:VI 0 "register_operand" "=x,x") 6514 (any_logic:VI 6515 (match_operand:VI 1 "nonimmediate_operand" "%0,x") 6516 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))] 6517 "TARGET_SSE 6518 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" 6519{ 6520 static char buf[32]; 6521 const char *ops; 6522 const char *tmp; 6523 6524 switch (get_attr_mode (insn)) 6525 { 6526 case MODE_OI: 6527 gcc_assert (TARGET_AVX2); 6528 case MODE_TI: 6529 gcc_assert (TARGET_SSE2); 6530 6531 tmp = "p<logic>"; 6532 break; 6533 6534 case MODE_V8SF: 6535 gcc_assert (TARGET_AVX); 6536 case MODE_V4SF: 6537 gcc_assert (TARGET_SSE); 6538 6539 tmp = "<logic>ps"; 6540 break; 6541 6542 default: 6543 gcc_unreachable (); 6544 } 6545 6546 switch (which_alternative) 6547 { 6548 case 0: 6549 ops = "%s\t{%%2, %%0|%%0, %%2}"; 6550 break; 6551 case 1: 6552 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; 6553 break; 6554 default: 6555 gcc_unreachable (); 6556 } 6557 6558 snprintf (buf, sizeof (buf), ops, tmp); 6559 return buf; 6560} 6561 [(set_attr "isa" "noavx,avx") 6562 (set_attr "type" "sselog") 6563 (set (attr "prefix_data16") 6564 (if_then_else 6565 (and (eq_attr "alternative" "0") 6566 (eq_attr "mode" "TI")) 6567 (const_string "1") 6568 (const_string "*"))) 6569 (set_attr "prefix" "orig,vex") 6570 (set (attr "mode") 6571 (cond [(and (not (match_test "TARGET_AVX2")) 6572 (match_test "GET_MODE_SIZE (<MODE>mode) > 16")) 6573 (const_string "V8SF") 6574 (not (match_test "TARGET_SSE2")) 6575 (const_string "V4SF") 6576 ] 6577 (const_string "<sseinsnmode>")))]) 6578 6579(define_insn "*andnottf3" 6580 [(set (match_operand:TF 0 "register_operand" "=x,x") 6581 (and:TF 6582 (not:TF (match_operand:TF 1 "register_operand" "0,x")) 6583 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))] 6584 "TARGET_SSE2" 6585 "@ 6586 pandn\t{%2, %0|%0, %2} 6587 vpandn\t{%2, %1, %0|%0, %1, %2}" 6588 [(set_attr "isa" "noavx,avx") 6589 (set_attr "type" "sselog") 6590 (set_attr "prefix_data16" "1,*") 6591 (set_attr "prefix" "orig,vex") 6592 (set_attr "mode" "TI")]) 6593 6594(define_expand "<code>tf3" 6595 [(set (match_operand:TF 0 "register_operand" "") 6596 (any_logic:TF 6597 (match_operand:TF 1 "nonimmediate_operand" "") 6598 (match_operand:TF 2 "nonimmediate_operand" "")))] 6599 "TARGET_SSE2" 6600 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);") 6601 6602(define_insn "*<code>tf3" 6603 [(set (match_operand:TF 0 "register_operand" "=x,x") 6604 (any_logic:TF 6605 (match_operand:TF 1 "nonimmediate_operand" "%0,x") 6606 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))] 6607 "TARGET_SSE2 6608 && ix86_binary_operator_ok (<CODE>, TFmode, operands)" 6609 "@ 6610 p<logic>\t{%2, %0|%0, %2} 6611 vp<logic>\t{%2, %1, %0|%0, %1, %2}" 6612 [(set_attr "isa" "noavx,avx") 6613 (set_attr "type" "sselog") 6614 (set_attr "prefix_data16" "1,*") 6615 (set_attr "prefix" "orig,vex") 6616 (set_attr "mode" "TI")]) 6617 6618;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 6619;; 6620;; Parallel integral element swizzling 6621;; 6622;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 6623 6624(define_expand "vec_pack_trunc_<mode>" 6625 [(match_operand:<ssepackmode> 0 "register_operand" "") 6626 (match_operand:VI248_AVX2 1 "register_operand" "") 6627 (match_operand:VI248_AVX2 2 "register_operand" "")] 6628 "TARGET_SSE2" 6629{ 6630 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]); 6631 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]); 6632 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0); 6633 DONE; 6634}) 6635 6636(define_insn "<sse2_avx2>_packsswb" 6637 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x") 6638 (vec_concat:VI1_AVX2 6639 (ss_truncate:<ssehalfvecmode> 6640 (match_operand:<sseunpackmode> 1 "register_operand" "0,x")) 6641 (ss_truncate:<ssehalfvecmode> 6642 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))] 6643 "TARGET_SSE2" 6644 "@ 6645 packsswb\t{%2, %0|%0, %2} 6646 vpacksswb\t{%2, %1, %0|%0, %1, %2}" 6647 [(set_attr "isa" "noavx,avx") 6648 (set_attr "type" "sselog") 6649 (set_attr "prefix_data16" "1,*") 6650 (set_attr "prefix" "orig,vex") 6651 (set_attr "mode" "<sseinsnmode>")]) 6652 6653(define_insn "<sse2_avx2>_packssdw" 6654 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x") 6655 (vec_concat:VI2_AVX2 6656 (ss_truncate:<ssehalfvecmode> 6657 (match_operand:<sseunpackmode> 1 "register_operand" "0,x")) 6658 (ss_truncate:<ssehalfvecmode> 6659 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))] 6660 "TARGET_SSE2" 6661 "@ 6662 packssdw\t{%2, %0|%0, %2} 6663 vpackssdw\t{%2, %1, %0|%0, %1, %2}" 6664 [(set_attr "isa" "noavx,avx") 6665 (set_attr "type" "sselog") 6666 (set_attr "prefix_data16" "1,*") 6667 (set_attr "prefix" "orig,vex") 6668 (set_attr "mode" "<sseinsnmode>")]) 6669 6670(define_insn "<sse2_avx2>_packuswb" 6671 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x") 6672 (vec_concat:VI1_AVX2 6673 (us_truncate:<ssehalfvecmode> 6674 (match_operand:<sseunpackmode> 1 "register_operand" "0,x")) 6675 (us_truncate:<ssehalfvecmode> 6676 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))] 6677 "TARGET_SSE2" 6678 "@ 6679 packuswb\t{%2, %0|%0, %2} 6680 vpackuswb\t{%2, %1, %0|%0, %1, %2}" 6681 [(set_attr "isa" "noavx,avx") 6682 (set_attr "type" "sselog") 6683 (set_attr "prefix_data16" "1,*") 6684 (set_attr "prefix" "orig,vex") 6685 (set_attr "mode" "<sseinsnmode>")]) 6686 6687(define_insn "avx2_interleave_highv32qi" 6688 [(set (match_operand:V32QI 0 "register_operand" "=x") 6689 (vec_select:V32QI 6690 (vec_concat:V64QI 6691 (match_operand:V32QI 1 "register_operand" "x") 6692 (match_operand:V32QI 2 "nonimmediate_operand" "xm")) 6693 (parallel [(const_int 8) (const_int 40) 6694 (const_int 9) (const_int 41) 6695 (const_int 10) (const_int 42) 6696 (const_int 11) (const_int 43) 6697 (const_int 12) (const_int 44) 6698 (const_int 13) (const_int 45) 6699 (const_int 14) (const_int 46) 6700 (const_int 15) (const_int 47) 6701 (const_int 24) (const_int 56) 6702 (const_int 25) (const_int 57) 6703 (const_int 26) (const_int 58) 6704 (const_int 27) (const_int 59) 6705 (const_int 28) (const_int 60) 6706 (const_int 29) (const_int 61) 6707 (const_int 30) (const_int 62) 6708 (const_int 31) (const_int 63)])))] 6709 "TARGET_AVX2" 6710 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}" 6711 [(set_attr "type" "sselog") 6712 (set_attr "prefix" "vex") 6713 (set_attr "mode" "OI")]) 6714 6715(define_insn "vec_interleave_highv16qi" 6716 [(set (match_operand:V16QI 0 "register_operand" "=x,x") 6717 (vec_select:V16QI 6718 (vec_concat:V32QI 6719 (match_operand:V16QI 1 "register_operand" "0,x") 6720 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")) 6721 (parallel [(const_int 8) (const_int 24) 6722 (const_int 9) (const_int 25) 6723 (const_int 10) (const_int 26) 6724 (const_int 11) (const_int 27) 6725 (const_int 12) (const_int 28) 6726 (const_int 13) (const_int 29) 6727 (const_int 14) (const_int 30) 6728 (const_int 15) (const_int 31)])))] 6729 "TARGET_SSE2" 6730 "@ 6731 punpckhbw\t{%2, %0|%0, %2} 6732 vpunpckhbw\t{%2, %1, %0|%0, %1, %2}" 6733 [(set_attr "isa" "noavx,avx") 6734 (set_attr "type" "sselog") 6735 (set_attr "prefix_data16" "1,*") 6736 (set_attr "prefix" "orig,vex") 6737 (set_attr "mode" "TI")]) 6738 6739(define_insn "avx2_interleave_lowv32qi" 6740 [(set (match_operand:V32QI 0 "register_operand" "=x") 6741 (vec_select:V32QI 6742 (vec_concat:V64QI 6743 (match_operand:V32QI 1 "register_operand" "x") 6744 (match_operand:V32QI 2 "nonimmediate_operand" "xm")) 6745 (parallel [(const_int 0) (const_int 32) 6746 (const_int 1) (const_int 33) 6747 (const_int 2) (const_int 34) 6748 (const_int 3) (const_int 35) 6749 (const_int 4) (const_int 36) 6750 (const_int 5) (const_int 37) 6751 (const_int 6) (const_int 38) 6752 (const_int 7) (const_int 39) 6753 (const_int 16) (const_int 48) 6754 (const_int 17) (const_int 49) 6755 (const_int 18) (const_int 50) 6756 (const_int 19) (const_int 51) 6757 (const_int 20) (const_int 52) 6758 (const_int 21) (const_int 53) 6759 (const_int 22) (const_int 54) 6760 (const_int 23) (const_int 55)])))] 6761 "TARGET_AVX2" 6762 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}" 6763 [(set_attr "type" "sselog") 6764 (set_attr "prefix" "vex") 6765 (set_attr "mode" "OI")]) 6766 6767(define_insn "vec_interleave_lowv16qi" 6768 [(set (match_operand:V16QI 0 "register_operand" "=x,x") 6769 (vec_select:V16QI 6770 (vec_concat:V32QI 6771 (match_operand:V16QI 1 "register_operand" "0,x") 6772 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")) 6773 (parallel [(const_int 0) (const_int 16) 6774 (const_int 1) (const_int 17) 6775 (const_int 2) (const_int 18) 6776 (const_int 3) (const_int 19) 6777 (const_int 4) (const_int 20) 6778 (const_int 5) (const_int 21) 6779 (const_int 6) (const_int 22) 6780 (const_int 7) (const_int 23)])))] 6781 "TARGET_SSE2" 6782 "@ 6783 punpcklbw\t{%2, %0|%0, %2} 6784 vpunpcklbw\t{%2, %1, %0|%0, %1, %2}" 6785 [(set_attr "isa" "noavx,avx") 6786 (set_attr "type" "sselog") 6787 (set_attr "prefix_data16" "1,*") 6788 (set_attr "prefix" "orig,vex") 6789 (set_attr "mode" "TI")]) 6790 6791(define_insn "avx2_interleave_highv16hi" 6792 [(set (match_operand:V16HI 0 "register_operand" "=x") 6793 (vec_select:V16HI 6794 (vec_concat:V32HI 6795 (match_operand:V16HI 1 "register_operand" "x") 6796 (match_operand:V16HI 2 "nonimmediate_operand" "xm")) 6797 (parallel [(const_int 4) (const_int 20) 6798 (const_int 5) (const_int 21) 6799 (const_int 6) (const_int 22) 6800 (const_int 7) (const_int 23) 6801 (const_int 12) (const_int 28) 6802 (const_int 13) (const_int 29) 6803 (const_int 14) (const_int 30) 6804 (const_int 15) (const_int 31)])))] 6805 "TARGET_AVX2" 6806 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}" 6807 [(set_attr "type" "sselog") 6808 (set_attr "prefix" "vex") 6809 (set_attr "mode" "OI")]) 6810 6811(define_insn "vec_interleave_highv8hi" 6812 [(set (match_operand:V8HI 0 "register_operand" "=x,x") 6813 (vec_select:V8HI 6814 (vec_concat:V16HI 6815 (match_operand:V8HI 1 "register_operand" "0,x") 6816 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")) 6817 (parallel [(const_int 4) (const_int 12) 6818 (const_int 5) (const_int 13) 6819 (const_int 6) (const_int 14) 6820 (const_int 7) (const_int 15)])))] 6821 "TARGET_SSE2" 6822 "@ 6823 punpckhwd\t{%2, %0|%0, %2} 6824 vpunpckhwd\t{%2, %1, %0|%0, %1, %2}" 6825 [(set_attr "isa" "noavx,avx") 6826 (set_attr "type" "sselog") 6827 (set_attr "prefix_data16" "1,*") 6828 (set_attr "prefix" "orig,vex") 6829 (set_attr "mode" "TI")]) 6830 6831(define_insn "avx2_interleave_lowv16hi" 6832 [(set (match_operand:V16HI 0 "register_operand" "=x") 6833 (vec_select:V16HI 6834 (vec_concat:V32HI 6835 (match_operand:V16HI 1 "register_operand" "x") 6836 (match_operand:V16HI 2 "nonimmediate_operand" "xm")) 6837 (parallel [(const_int 0) (const_int 16) 6838 (const_int 1) (const_int 17) 6839 (const_int 2) (const_int 18) 6840 (const_int 3) (const_int 19) 6841 (const_int 8) (const_int 24) 6842 (const_int 9) (const_int 25) 6843 (const_int 10) (const_int 26) 6844 (const_int 11) (const_int 27)])))] 6845 "TARGET_AVX2" 6846 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}" 6847 [(set_attr "type" "sselog") 6848 (set_attr "prefix" "vex") 6849 (set_attr "mode" "OI")]) 6850 6851(define_insn "vec_interleave_lowv8hi" 6852 [(set (match_operand:V8HI 0 "register_operand" "=x,x") 6853 (vec_select:V8HI 6854 (vec_concat:V16HI 6855 (match_operand:V8HI 1 "register_operand" "0,x") 6856 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")) 6857 (parallel [(const_int 0) (const_int 8) 6858 (const_int 1) (const_int 9) 6859 (const_int 2) (const_int 10) 6860 (const_int 3) (const_int 11)])))] 6861 "TARGET_SSE2" 6862 "@ 6863 punpcklwd\t{%2, %0|%0, %2} 6864 vpunpcklwd\t{%2, %1, %0|%0, %1, %2}" 6865 [(set_attr "isa" "noavx,avx") 6866 (set_attr "type" "sselog") 6867 (set_attr "prefix_data16" "1,*") 6868 (set_attr "prefix" "orig,vex") 6869 (set_attr "mode" "TI")]) 6870 6871(define_insn "avx2_interleave_highv8si" 6872 [(set (match_operand:V8SI 0 "register_operand" "=x") 6873 (vec_select:V8SI 6874 (vec_concat:V16SI 6875 (match_operand:V8SI 1 "register_operand" "x") 6876 (match_operand:V8SI 2 "nonimmediate_operand" "xm")) 6877 (parallel [(const_int 2) (const_int 10) 6878 (const_int 3) (const_int 11) 6879 (const_int 6) (const_int 14) 6880 (const_int 7) (const_int 15)])))] 6881 "TARGET_AVX2" 6882 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}" 6883 [(set_attr "type" "sselog") 6884 (set_attr "prefix" "vex") 6885 (set_attr "mode" "OI")]) 6886 6887(define_insn "vec_interleave_highv4si" 6888 [(set (match_operand:V4SI 0 "register_operand" "=x,x") 6889 (vec_select:V4SI 6890 (vec_concat:V8SI 6891 (match_operand:V4SI 1 "register_operand" "0,x") 6892 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")) 6893 (parallel [(const_int 2) (const_int 6) 6894 (const_int 3) (const_int 7)])))] 6895 "TARGET_SSE2" 6896 "@ 6897 punpckhdq\t{%2, %0|%0, %2} 6898 vpunpckhdq\t{%2, %1, %0|%0, %1, %2}" 6899 [(set_attr "isa" "noavx,avx") 6900 (set_attr "type" "sselog") 6901 (set_attr "prefix_data16" "1,*") 6902 (set_attr "prefix" "orig,vex") 6903 (set_attr "mode" "TI")]) 6904 6905(define_insn "avx2_interleave_lowv8si" 6906 [(set (match_operand:V8SI 0 "register_operand" "=x") 6907 (vec_select:V8SI 6908 (vec_concat:V16SI 6909 (match_operand:V8SI 1 "register_operand" "x") 6910 (match_operand:V8SI 2 "nonimmediate_operand" "xm")) 6911 (parallel [(const_int 0) (const_int 8) 6912 (const_int 1) (const_int 9) 6913 (const_int 4) (const_int 12) 6914 (const_int 5) (const_int 13)])))] 6915 "TARGET_AVX2" 6916 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}" 6917 [(set_attr "type" "sselog") 6918 (set_attr "prefix" "vex") 6919 (set_attr "mode" "OI")]) 6920 6921(define_insn "vec_interleave_lowv4si" 6922 [(set (match_operand:V4SI 0 "register_operand" "=x,x") 6923 (vec_select:V4SI 6924 (vec_concat:V8SI 6925 (match_operand:V4SI 1 "register_operand" "0,x") 6926 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")) 6927 (parallel [(const_int 0) (const_int 4) 6928 (const_int 1) (const_int 5)])))] 6929 "TARGET_SSE2" 6930 "@ 6931 punpckldq\t{%2, %0|%0, %2} 6932 vpunpckldq\t{%2, %1, %0|%0, %1, %2}" 6933 [(set_attr "isa" "noavx,avx") 6934 (set_attr "type" "sselog") 6935 (set_attr "prefix_data16" "1,*") 6936 (set_attr "prefix" "orig,vex") 6937 (set_attr "mode" "TI")]) 6938 6939(define_expand "vec_interleave_high<mode>" 6940 [(match_operand:VI_256 0 "register_operand" "=x") 6941 (match_operand:VI_256 1 "register_operand" "x") 6942 (match_operand:VI_256 2 "nonimmediate_operand" "xm")] 6943 "TARGET_AVX2" 6944{ 6945 rtx t1 = gen_reg_rtx (<MODE>mode); 6946 rtx t2 = gen_reg_rtx (<MODE>mode); 6947 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2])); 6948 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2])); 6949 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, operands[0]), 6950 gen_lowpart (V4DImode, t1), 6951 gen_lowpart (V4DImode, t2), GEN_INT (1 + (3 << 4)))); 6952 DONE; 6953}) 6954 6955(define_expand "vec_interleave_low<mode>" 6956 [(match_operand:VI_256 0 "register_operand" "=x") 6957 (match_operand:VI_256 1 "register_operand" "x") 6958 (match_operand:VI_256 2 "nonimmediate_operand" "xm")] 6959 "TARGET_AVX2" 6960{ 6961 rtx t1 = gen_reg_rtx (<MODE>mode); 6962 rtx t2 = gen_reg_rtx (<MODE>mode); 6963 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2])); 6964 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2])); 6965 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, operands[0]), 6966 gen_lowpart (V4DImode, t1), 6967 gen_lowpart (V4DImode, t2), GEN_INT (0 + (2 << 4)))); 6968 DONE; 6969}) 6970 6971;; Modes handled by pinsr patterns. 6972(define_mode_iterator PINSR_MODE 6973 [(V16QI "TARGET_SSE4_1") V8HI 6974 (V4SI "TARGET_SSE4_1") 6975 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")]) 6976 6977(define_mode_attr sse2p4_1 6978 [(V16QI "sse4_1") (V8HI "sse2") 6979 (V4SI "sse4_1") (V2DI "sse4_1")]) 6980 6981;; sse4_1_pinsrd must come before sse2_loadld since it is preferred. 6982(define_insn "<sse2p4_1>_pinsr<ssemodesuffix>" 6983 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x") 6984 (vec_merge:PINSR_MODE 6985 (vec_duplicate:PINSR_MODE 6986 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m")) 6987 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x") 6988 (match_operand:SI 3 "const_int_operand" "")))] 6989 "TARGET_SSE2 6990 && ((unsigned) exact_log2 (INTVAL (operands[3])) 6991 < GET_MODE_NUNITS (<MODE>mode))" 6992{ 6993 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); 6994 6995 switch (which_alternative) 6996 { 6997 case 0: 6998 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode)) 6999 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}"; 7000 /* FALLTHRU */ 7001 case 1: 7002 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"; 7003 case 2: 7004 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode)) 7005 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}"; 7006 /* FALLTHRU */ 7007 case 3: 7008 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"; 7009 default: 7010 gcc_unreachable (); 7011 } 7012} 7013 [(set_attr "isa" "noavx,noavx,avx,avx") 7014 (set_attr "type" "sselog") 7015 (set (attr "prefix_rex") 7016 (if_then_else 7017 (and (not (match_test "TARGET_AVX")) 7018 (eq (const_string "<MODE>mode") (const_string "V2DImode"))) 7019 (const_string "1") 7020 (const_string "*"))) 7021 (set (attr "prefix_data16") 7022 (if_then_else 7023 (and (not (match_test "TARGET_AVX")) 7024 (eq (const_string "<MODE>mode") (const_string "V8HImode"))) 7025 (const_string "1") 7026 (const_string "*"))) 7027 (set (attr "prefix_extra") 7028 (if_then_else 7029 (and (not (match_test "TARGET_AVX")) 7030 (eq (const_string "<MODE>mode") (const_string "V8HImode"))) 7031 (const_string "*") 7032 (const_string "1"))) 7033 (set_attr "length_immediate" "1") 7034 (set_attr "prefix" "orig,orig,vex,vex") 7035 (set_attr "mode" "TI")]) 7036 7037(define_insn "*sse4_1_pextrb_<mode>" 7038 [(set (match_operand:SWI48 0 "register_operand" "=r") 7039 (zero_extend:SWI48 7040 (vec_select:QI 7041 (match_operand:V16QI 1 "register_operand" "x") 7042 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))] 7043 "TARGET_SSE4_1" 7044 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}" 7045 [(set_attr "type" "sselog") 7046 (set_attr "prefix_extra" "1") 7047 (set_attr "length_immediate" "1") 7048 (set_attr "prefix" "maybe_vex") 7049 (set_attr "mode" "TI")]) 7050 7051(define_insn "*sse4_1_pextrb_memory" 7052 [(set (match_operand:QI 0 "memory_operand" "=m") 7053 (vec_select:QI 7054 (match_operand:V16QI 1 "register_operand" "x") 7055 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))] 7056 "TARGET_SSE4_1" 7057 "%vpextrb\t{%2, %1, %0|%0, %1, %2}" 7058 [(set_attr "type" "sselog") 7059 (set_attr "prefix_extra" "1") 7060 (set_attr "length_immediate" "1") 7061 (set_attr "prefix" "maybe_vex") 7062 (set_attr "mode" "TI")]) 7063 7064(define_insn "*sse2_pextrw_<mode>" 7065 [(set (match_operand:SWI48 0 "register_operand" "=r") 7066 (zero_extend:SWI48 7067 (vec_select:HI 7068 (match_operand:V8HI 1 "register_operand" "x") 7069 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))] 7070 "TARGET_SSE2" 7071 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}" 7072 [(set_attr "type" "sselog") 7073 (set_attr "prefix_data16" "1") 7074 (set_attr "length_immediate" "1") 7075 (set_attr "prefix" "maybe_vex") 7076 (set_attr "mode" "TI")]) 7077 7078(define_insn "*sse4_1_pextrw_memory" 7079 [(set (match_operand:HI 0 "memory_operand" "=m") 7080 (vec_select:HI 7081 (match_operand:V8HI 1 "register_operand" "x") 7082 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))] 7083 "TARGET_SSE4_1" 7084 "%vpextrw\t{%2, %1, %0|%0, %1, %2}" 7085 [(set_attr "type" "sselog") 7086 (set_attr "prefix_extra" "1") 7087 (set_attr "length_immediate" "1") 7088 (set_attr "prefix" "maybe_vex") 7089 (set_attr "mode" "TI")]) 7090 7091(define_insn "*sse4_1_pextrd" 7092 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") 7093 (vec_select:SI 7094 (match_operand:V4SI 1 "register_operand" "x") 7095 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))] 7096 "TARGET_SSE4_1" 7097 "%vpextrd\t{%2, %1, %0|%0, %1, %2}" 7098 [(set_attr "type" "sselog") 7099 (set_attr "prefix_extra" "1") 7100 (set_attr "length_immediate" "1") 7101 (set_attr "prefix" "maybe_vex") 7102 (set_attr "mode" "TI")]) 7103 7104(define_insn "*sse4_1_pextrd_zext" 7105 [(set (match_operand:DI 0 "register_operand" "=r") 7106 (zero_extend:DI 7107 (vec_select:SI 7108 (match_operand:V4SI 1 "register_operand" "x") 7109 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))] 7110 "TARGET_64BIT && TARGET_SSE4_1" 7111 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}" 7112 [(set_attr "type" "sselog") 7113 (set_attr "prefix_extra" "1") 7114 (set_attr "length_immediate" "1") 7115 (set_attr "prefix" "maybe_vex") 7116 (set_attr "mode" "TI")]) 7117 7118;; It must come before *vec_extractv2di_1_rex64 since it is preferred. 7119(define_insn "*sse4_1_pextrq" 7120 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") 7121 (vec_select:DI 7122 (match_operand:V2DI 1 "register_operand" "x") 7123 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))] 7124 "TARGET_SSE4_1 && TARGET_64BIT" 7125 "%vpextrq\t{%2, %1, %0|%0, %1, %2}" 7126 [(set_attr "type" "sselog") 7127 (set_attr "prefix_rex" "1") 7128 (set_attr "prefix_extra" "1") 7129 (set_attr "length_immediate" "1") 7130 (set_attr "prefix" "maybe_vex") 7131 (set_attr "mode" "TI")]) 7132 7133(define_expand "avx2_pshufdv3" 7134 [(match_operand:V8SI 0 "register_operand" "") 7135 (match_operand:V8SI 1 "nonimmediate_operand" "") 7136 (match_operand:SI 2 "const_0_to_255_operand" "")] 7137 "TARGET_AVX2" 7138{ 7139 int mask = INTVAL (operands[2]); 7140 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1], 7141 GEN_INT ((mask >> 0) & 3), 7142 GEN_INT ((mask >> 2) & 3), 7143 GEN_INT ((mask >> 4) & 3), 7144 GEN_INT ((mask >> 6) & 3), 7145 GEN_INT (((mask >> 0) & 3) + 4), 7146 GEN_INT (((mask >> 2) & 3) + 4), 7147 GEN_INT (((mask >> 4) & 3) + 4), 7148 GEN_INT (((mask >> 6) & 3) + 4))); 7149 DONE; 7150}) 7151 7152(define_insn "avx2_pshufd_1" 7153 [(set (match_operand:V8SI 0 "register_operand" "=x") 7154 (vec_select:V8SI 7155 (match_operand:V8SI 1 "nonimmediate_operand" "xm") 7156 (parallel [(match_operand 2 "const_0_to_3_operand" "") 7157 (match_operand 3 "const_0_to_3_operand" "") 7158 (match_operand 4 "const_0_to_3_operand" "") 7159 (match_operand 5 "const_0_to_3_operand" "") 7160 (match_operand 6 "const_4_to_7_operand" "") 7161 (match_operand 7 "const_4_to_7_operand" "") 7162 (match_operand 8 "const_4_to_7_operand" "") 7163 (match_operand 9 "const_4_to_7_operand" "")])))] 7164 "TARGET_AVX2 7165 && INTVAL (operands[2]) + 4 == INTVAL (operands[6]) 7166 && INTVAL (operands[3]) + 4 == INTVAL (operands[7]) 7167 && INTVAL (operands[4]) + 4 == INTVAL (operands[8]) 7168 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])" 7169{ 7170 int mask = 0; 7171 mask |= INTVAL (operands[2]) << 0; 7172 mask |= INTVAL (operands[3]) << 2; 7173 mask |= INTVAL (operands[4]) << 4; 7174 mask |= INTVAL (operands[5]) << 6; 7175 operands[2] = GEN_INT (mask); 7176 7177 return "vpshufd\t{%2, %1, %0|%0, %1, %2}"; 7178} 7179 [(set_attr "type" "sselog1") 7180 (set_attr "prefix" "vex") 7181 (set_attr "length_immediate" "1") 7182 (set_attr "mode" "OI")]) 7183 7184(define_expand "sse2_pshufd" 7185 [(match_operand:V4SI 0 "register_operand" "") 7186 (match_operand:V4SI 1 "nonimmediate_operand" "") 7187 (match_operand:SI 2 "const_int_operand" "")] 7188 "TARGET_SSE2" 7189{ 7190 int mask = INTVAL (operands[2]); 7191 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1], 7192 GEN_INT ((mask >> 0) & 3), 7193 GEN_INT ((mask >> 2) & 3), 7194 GEN_INT ((mask >> 4) & 3), 7195 GEN_INT ((mask >> 6) & 3))); 7196 DONE; 7197}) 7198 7199(define_insn "sse2_pshufd_1" 7200 [(set (match_operand:V4SI 0 "register_operand" "=x") 7201 (vec_select:V4SI 7202 (match_operand:V4SI 1 "nonimmediate_operand" "xm") 7203 (parallel [(match_operand 2 "const_0_to_3_operand" "") 7204 (match_operand 3 "const_0_to_3_operand" "") 7205 (match_operand 4 "const_0_to_3_operand" "") 7206 (match_operand 5 "const_0_to_3_operand" "")])))] 7207 "TARGET_SSE2" 7208{ 7209 int mask = 0; 7210 mask |= INTVAL (operands[2]) << 0; 7211 mask |= INTVAL (operands[3]) << 2; 7212 mask |= INTVAL (operands[4]) << 4; 7213 mask |= INTVAL (operands[5]) << 6; 7214 operands[2] = GEN_INT (mask); 7215 7216 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}"; 7217} 7218 [(set_attr "type" "sselog1") 7219 (set_attr "prefix_data16" "1") 7220 (set_attr "prefix" "maybe_vex") 7221 (set_attr "length_immediate" "1") 7222 (set_attr "mode" "TI")]) 7223 7224(define_expand "avx2_pshuflwv3" 7225 [(match_operand:V16HI 0 "register_operand" "") 7226 (match_operand:V16HI 1 "nonimmediate_operand" "") 7227 (match_operand:SI 2 "const_0_to_255_operand" "")] 7228 "TARGET_AVX2" 7229{ 7230 int mask = INTVAL (operands[2]); 7231 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1], 7232 GEN_INT ((mask >> 0) & 3), 7233 GEN_INT ((mask >> 2) & 3), 7234 GEN_INT ((mask >> 4) & 3), 7235 GEN_INT ((mask >> 6) & 3), 7236 GEN_INT (((mask >> 0) & 3) + 8), 7237 GEN_INT (((mask >> 2) & 3) + 8), 7238 GEN_INT (((mask >> 4) & 3) + 8), 7239 GEN_INT (((mask >> 6) & 3) + 8))); 7240 DONE; 7241}) 7242 7243(define_insn "avx2_pshuflw_1" 7244 [(set (match_operand:V16HI 0 "register_operand" "=x") 7245 (vec_select:V16HI 7246 (match_operand:V16HI 1 "nonimmediate_operand" "xm") 7247 (parallel [(match_operand 2 "const_0_to_3_operand" "") 7248 (match_operand 3 "const_0_to_3_operand" "") 7249 (match_operand 4 "const_0_to_3_operand" "") 7250 (match_operand 5 "const_0_to_3_operand" "") 7251 (const_int 4) 7252 (const_int 5) 7253 (const_int 6) 7254 (const_int 7) 7255 (match_operand 6 "const_8_to_11_operand" "") 7256 (match_operand 7 "const_8_to_11_operand" "") 7257 (match_operand 8 "const_8_to_11_operand" "") 7258 (match_operand 9 "const_8_to_11_operand" "") 7259 (const_int 12) 7260 (const_int 13) 7261 (const_int 14) 7262 (const_int 15)])))] 7263 "TARGET_AVX2 7264 && INTVAL (operands[2]) + 8 == INTVAL (operands[6]) 7265 && INTVAL (operands[3]) + 8 == INTVAL (operands[7]) 7266 && INTVAL (operands[4]) + 8 == INTVAL (operands[8]) 7267 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])" 7268{ 7269 int mask = 0; 7270 mask |= INTVAL (operands[2]) << 0; 7271 mask |= INTVAL (operands[3]) << 2; 7272 mask |= INTVAL (operands[4]) << 4; 7273 mask |= INTVAL (operands[5]) << 6; 7274 operands[2] = GEN_INT (mask); 7275 7276 return "vpshuflw\t{%2, %1, %0|%0, %1, %2}"; 7277} 7278 [(set_attr "type" "sselog") 7279 (set_attr "prefix" "vex") 7280 (set_attr "length_immediate" "1") 7281 (set_attr "mode" "OI")]) 7282 7283(define_expand "sse2_pshuflw" 7284 [(match_operand:V8HI 0 "register_operand" "") 7285 (match_operand:V8HI 1 "nonimmediate_operand" "") 7286 (match_operand:SI 2 "const_int_operand" "")] 7287 "TARGET_SSE2" 7288{ 7289 int mask = INTVAL (operands[2]); 7290 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1], 7291 GEN_INT ((mask >> 0) & 3), 7292 GEN_INT ((mask >> 2) & 3), 7293 GEN_INT ((mask >> 4) & 3), 7294 GEN_INT ((mask >> 6) & 3))); 7295 DONE; 7296}) 7297 7298(define_insn "sse2_pshuflw_1" 7299 [(set (match_operand:V8HI 0 "register_operand" "=x") 7300 (vec_select:V8HI 7301 (match_operand:V8HI 1 "nonimmediate_operand" "xm") 7302 (parallel [(match_operand 2 "const_0_to_3_operand" "") 7303 (match_operand 3 "const_0_to_3_operand" "") 7304 (match_operand 4 "const_0_to_3_operand" "") 7305 (match_operand 5 "const_0_to_3_operand" "") 7306 (const_int 4) 7307 (const_int 5) 7308 (const_int 6) 7309 (const_int 7)])))] 7310 "TARGET_SSE2" 7311{ 7312 int mask = 0; 7313 mask |= INTVAL (operands[2]) << 0; 7314 mask |= INTVAL (operands[3]) << 2; 7315 mask |= INTVAL (operands[4]) << 4; 7316 mask |= INTVAL (operands[5]) << 6; 7317 operands[2] = GEN_INT (mask); 7318 7319 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}"; 7320} 7321 [(set_attr "type" "sselog") 7322 (set_attr "prefix_data16" "0") 7323 (set_attr "prefix_rep" "1") 7324 (set_attr "prefix" "maybe_vex") 7325 (set_attr "length_immediate" "1") 7326 (set_attr "mode" "TI")]) 7327 7328(define_expand "avx2_pshufhwv3" 7329 [(match_operand:V16HI 0 "register_operand" "") 7330 (match_operand:V16HI 1 "nonimmediate_operand" "") 7331 (match_operand:SI 2 "const_0_to_255_operand" "")] 7332 "TARGET_AVX2" 7333{ 7334 int mask = INTVAL (operands[2]); 7335 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1], 7336 GEN_INT (((mask >> 0) & 3) + 4), 7337 GEN_INT (((mask >> 2) & 3) + 4), 7338 GEN_INT (((mask >> 4) & 3) + 4), 7339 GEN_INT (((mask >> 6) & 3) + 4), 7340 GEN_INT (((mask >> 0) & 3) + 12), 7341 GEN_INT (((mask >> 2) & 3) + 12), 7342 GEN_INT (((mask >> 4) & 3) + 12), 7343 GEN_INT (((mask >> 6) & 3) + 12))); 7344 DONE; 7345}) 7346 7347(define_insn "avx2_pshufhw_1" 7348 [(set (match_operand:V16HI 0 "register_operand" "=x") 7349 (vec_select:V16HI 7350 (match_operand:V16HI 1 "nonimmediate_operand" "xm") 7351 (parallel [(const_int 0) 7352 (const_int 1) 7353 (const_int 2) 7354 (const_int 3) 7355 (match_operand 2 "const_4_to_7_operand" "") 7356 (match_operand 3 "const_4_to_7_operand" "") 7357 (match_operand 4 "const_4_to_7_operand" "") 7358 (match_operand 5 "const_4_to_7_operand" "") 7359 (const_int 8) 7360 (const_int 9) 7361 (const_int 10) 7362 (const_int 11) 7363 (match_operand 6 "const_12_to_15_operand" "") 7364 (match_operand 7 "const_12_to_15_operand" "") 7365 (match_operand 8 "const_12_to_15_operand" "") 7366 (match_operand 9 "const_12_to_15_operand" "")])))] 7367 "TARGET_AVX2 7368 && INTVAL (operands[2]) + 8 == INTVAL (operands[6]) 7369 && INTVAL (operands[3]) + 8 == INTVAL (operands[7]) 7370 && INTVAL (operands[4]) + 8 == INTVAL (operands[8]) 7371 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])" 7372{ 7373 int mask = 0; 7374 mask |= (INTVAL (operands[2]) - 4) << 0; 7375 mask |= (INTVAL (operands[3]) - 4) << 2; 7376 mask |= (INTVAL (operands[4]) - 4) << 4; 7377 mask |= (INTVAL (operands[5]) - 4) << 6; 7378 operands[2] = GEN_INT (mask); 7379 7380 return "vpshufhw\t{%2, %1, %0|%0, %1, %2}"; 7381} 7382 [(set_attr "type" "sselog") 7383 (set_attr "prefix" "vex") 7384 (set_attr "length_immediate" "1") 7385 (set_attr "mode" "OI")]) 7386 7387(define_expand "sse2_pshufhw" 7388 [(match_operand:V8HI 0 "register_operand" "") 7389 (match_operand:V8HI 1 "nonimmediate_operand" "") 7390 (match_operand:SI 2 "const_int_operand" "")] 7391 "TARGET_SSE2" 7392{ 7393 int mask = INTVAL (operands[2]); 7394 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1], 7395 GEN_INT (((mask >> 0) & 3) + 4), 7396 GEN_INT (((mask >> 2) & 3) + 4), 7397 GEN_INT (((mask >> 4) & 3) + 4), 7398 GEN_INT (((mask >> 6) & 3) + 4))); 7399 DONE; 7400}) 7401 7402(define_insn "sse2_pshufhw_1" 7403 [(set (match_operand:V8HI 0 "register_operand" "=x") 7404 (vec_select:V8HI 7405 (match_operand:V8HI 1 "nonimmediate_operand" "xm") 7406 (parallel [(const_int 0) 7407 (const_int 1) 7408 (const_int 2) 7409 (const_int 3) 7410 (match_operand 2 "const_4_to_7_operand" "") 7411 (match_operand 3 "const_4_to_7_operand" "") 7412 (match_operand 4 "const_4_to_7_operand" "") 7413 (match_operand 5 "const_4_to_7_operand" "")])))] 7414 "TARGET_SSE2" 7415{ 7416 int mask = 0; 7417 mask |= (INTVAL (operands[2]) - 4) << 0; 7418 mask |= (INTVAL (operands[3]) - 4) << 2; 7419 mask |= (INTVAL (operands[4]) - 4) << 4; 7420 mask |= (INTVAL (operands[5]) - 4) << 6; 7421 operands[2] = GEN_INT (mask); 7422 7423 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}"; 7424} 7425 [(set_attr "type" "sselog") 7426 (set_attr "prefix_rep" "1") 7427 (set_attr "prefix_data16" "0") 7428 (set_attr "prefix" "maybe_vex") 7429 (set_attr "length_immediate" "1") 7430 (set_attr "mode" "TI")]) 7431 7432(define_expand "sse2_loadd" 7433 [(set (match_operand:V4SI 0 "register_operand" "") 7434 (vec_merge:V4SI 7435 (vec_duplicate:V4SI 7436 (match_operand:SI 1 "nonimmediate_operand" "")) 7437 (match_dup 2) 7438 (const_int 1)))] 7439 "TARGET_SSE" 7440 "operands[2] = CONST0_RTX (V4SImode);") 7441 7442(define_insn "sse2_loadld" 7443 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x") 7444 (vec_merge:V4SI 7445 (vec_duplicate:V4SI 7446 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x")) 7447 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x") 7448 (const_int 1)))] 7449 "TARGET_SSE" 7450 "@ 7451 %vmovd\t{%2, %0|%0, %2} 7452 %vmovd\t{%2, %0|%0, %2} 7453 movss\t{%2, %0|%0, %2} 7454 movss\t{%2, %0|%0, %2} 7455 vmovss\t{%2, %1, %0|%0, %1, %2}" 7456 [(set_attr "isa" "sse2,*,noavx,noavx,avx") 7457 (set_attr "type" "ssemov") 7458 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex") 7459 (set_attr "mode" "TI,TI,V4SF,SF,SF")]) 7460 7461(define_insn_and_split "sse2_stored" 7462 [(set (match_operand:SI 0 "nonimmediate_operand" "=xm,r") 7463 (vec_select:SI 7464 (match_operand:V4SI 1 "register_operand" "x,Yi") 7465 (parallel [(const_int 0)])))] 7466 "TARGET_SSE" 7467 "#" 7468 "&& reload_completed 7469 && (TARGET_INTER_UNIT_MOVES 7470 || MEM_P (operands [0]) 7471 || !GENERAL_REGNO_P (true_regnum (operands [0])))" 7472 [(set (match_dup 0) (match_dup 1))] 7473 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));") 7474 7475(define_insn_and_split "*vec_ext_v4si_mem" 7476 [(set (match_operand:SI 0 "register_operand" "=r") 7477 (vec_select:SI 7478 (match_operand:V4SI 1 "memory_operand" "o") 7479 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))] 7480 "" 7481 "#" 7482 "reload_completed" 7483 [(const_int 0)] 7484{ 7485 int i = INTVAL (operands[2]); 7486 7487 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4)); 7488 DONE; 7489}) 7490 7491(define_expand "sse_storeq" 7492 [(set (match_operand:DI 0 "nonimmediate_operand" "") 7493 (vec_select:DI 7494 (match_operand:V2DI 1 "register_operand" "") 7495 (parallel [(const_int 0)])))] 7496 "TARGET_SSE") 7497 7498(define_insn "*sse2_storeq_rex64" 7499 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm,*r,r") 7500 (vec_select:DI 7501 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o") 7502 (parallel [(const_int 0)])))] 7503 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 7504 "@ 7505 # 7506 # 7507 mov{q}\t{%1, %0|%0, %1}" 7508 [(set_attr "type" "*,*,imov") 7509 (set_attr "mode" "*,*,DI")]) 7510 7511(define_insn "*sse2_storeq" 7512 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm") 7513 (vec_select:DI 7514 (match_operand:V2DI 1 "register_operand" "x") 7515 (parallel [(const_int 0)])))] 7516 "TARGET_SSE" 7517 "#") 7518 7519(define_split 7520 [(set (match_operand:DI 0 "nonimmediate_operand" "") 7521 (vec_select:DI 7522 (match_operand:V2DI 1 "register_operand" "") 7523 (parallel [(const_int 0)])))] 7524 "TARGET_SSE 7525 && reload_completed 7526 && (TARGET_INTER_UNIT_MOVES 7527 || MEM_P (operands [0]) 7528 || !GENERAL_REGNO_P (true_regnum (operands [0])))" 7529 [(set (match_dup 0) (match_dup 1))] 7530 "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));") 7531 7532(define_insn "*vec_extractv2di_1_rex64" 7533 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,r") 7534 (vec_select:DI 7535 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,o") 7536 (parallel [(const_int 1)])))] 7537 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 7538 "@ 7539 %vmovhps\t{%1, %0|%0, %1} 7540 psrldq\t{$8, %0|%0, 8} 7541 vpsrldq\t{$8, %1, %0|%0, %1, 8} 7542 %vmovq\t{%H1, %0|%0, %H1} 7543 mov{q}\t{%H1, %0|%0, %H1}" 7544 [(set_attr "isa" "*,noavx,avx,*,*") 7545 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,imov") 7546 (set_attr "length_immediate" "*,1,1,*,*") 7547 (set_attr "memory" "*,none,none,*,*") 7548 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig") 7549 (set_attr "mode" "V2SF,TI,TI,TI,DI")]) 7550 7551(define_insn "*vec_extractv2di_1" 7552 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,x,x") 7553 (vec_select:DI 7554 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,x,o") 7555 (parallel [(const_int 1)])))] 7556 "!TARGET_64BIT && TARGET_SSE 7557 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 7558 "@ 7559 %vmovhps\t{%1, %0|%0, %1} 7560 psrldq\t{$8, %0|%0, 8} 7561 vpsrldq\t{$8, %1, %0|%0, %1, 8} 7562 %vmovq\t{%H1, %0|%0, %H1} 7563 movhlps\t{%1, %0|%0, %1} 7564 movlps\t{%H1, %0|%0, %H1}" 7565 [(set_attr "isa" "*,sse2_noavx,avx,sse2,noavx,noavx") 7566 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,ssemov,ssemov") 7567 (set_attr "length_immediate" "*,1,1,*,*,*") 7568 (set_attr "memory" "*,none,none,*,*,*") 7569 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig,orig") 7570 (set_attr "mode" "V2SF,TI,TI,TI,V4SF,V2SF")]) 7571 7572(define_insn "*vec_dupv4si" 7573 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") 7574 (vec_duplicate:V4SI 7575 (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))] 7576 "TARGET_SSE" 7577 "@ 7578 %vpshufd\t{$0, %1, %0|%0, %1, 0} 7579 vbroadcastss\t{%1, %0|%0, %1} 7580 shufps\t{$0, %0, %0|%0, %0, 0}" 7581 [(set_attr "isa" "sse2,avx,noavx") 7582 (set_attr "type" "sselog1,ssemov,sselog1") 7583 (set_attr "length_immediate" "1,0,1") 7584 (set_attr "prefix_extra" "0,1,*") 7585 (set_attr "prefix" "maybe_vex,vex,orig") 7586 (set_attr "mode" "TI,V4SF,V4SF")]) 7587 7588(define_insn "*vec_dupv2di" 7589 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x") 7590 (vec_duplicate:V2DI 7591 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))] 7592 "TARGET_SSE" 7593 "@ 7594 punpcklqdq\t%0, %0 7595 vpunpcklqdq\t{%d1, %0|%0, %d1} 7596 %vmovddup\t{%1, %0|%0, %1} 7597 movlhps\t%0, %0" 7598 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx") 7599 (set_attr "type" "sselog1,sselog1,sselog1,ssemov") 7600 (set_attr "prefix" "orig,vex,maybe_vex,orig") 7601 (set_attr "mode" "TI,TI,DF,V4SF")]) 7602 7603(define_insn "*vec_concatv2si_sse4_1" 7604 [(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y") 7605 (vec_concat:V2SI 7606 (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm, 0,rm") 7607 (match_operand:SI 2 "vector_move_operand" "rm,rm,x,x, C,*ym, C")))] 7608 "TARGET_SSE4_1" 7609 "@ 7610 pinsrd\t{$1, %2, %0|%0, %2, 1} 7611 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1} 7612 punpckldq\t{%2, %0|%0, %2} 7613 vpunpckldq\t{%2, %1, %0|%0, %1, %2} 7614 %vmovd\t{%1, %0|%0, %1} 7615 punpckldq\t{%2, %0|%0, %2} 7616 movd\t{%1, %0|%0, %1}" 7617 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*") 7618 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov") 7619 (set_attr "prefix_extra" "1,1,*,*,*,*,*") 7620 (set_attr "length_immediate" "1,1,*,*,*,*,*") 7621 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig") 7622 (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")]) 7623 7624;; ??? In theory we can match memory for the MMX alternative, but allowing 7625;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE 7626;; alternatives pretty much forces the MMX alternative to be chosen. 7627(define_insn "*vec_concatv2si_sse2" 7628 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y") 7629 (vec_concat:V2SI 7630 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm") 7631 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))] 7632 "TARGET_SSE2" 7633 "@ 7634 punpckldq\t{%2, %0|%0, %2} 7635 movd\t{%1, %0|%0, %1} 7636 punpckldq\t{%2, %0|%0, %2} 7637 movd\t{%1, %0|%0, %1}" 7638 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov") 7639 (set_attr "mode" "TI,TI,DI,DI")]) 7640 7641(define_insn "*vec_concatv2si_sse" 7642 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y") 7643 (vec_concat:V2SI 7644 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm") 7645 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))] 7646 "TARGET_SSE" 7647 "@ 7648 unpcklps\t{%2, %0|%0, %2} 7649 movss\t{%1, %0|%0, %1} 7650 punpckldq\t{%2, %0|%0, %2} 7651 movd\t{%1, %0|%0, %1}" 7652 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov") 7653 (set_attr "mode" "V4SF,V4SF,DI,DI")]) 7654 7655(define_insn "*vec_concatv4si" 7656 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x") 7657 (vec_concat:V4SI 7658 (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x") 7659 (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))] 7660 "TARGET_SSE" 7661 "@ 7662 punpcklqdq\t{%2, %0|%0, %2} 7663 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2} 7664 movlhps\t{%2, %0|%0, %2} 7665 movhps\t{%2, %0|%0, %2} 7666 vmovhps\t{%2, %1, %0|%0, %1, %2}" 7667 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx") 7668 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov") 7669 (set_attr "prefix" "orig,vex,orig,orig,vex") 7670 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")]) 7671 7672;; movd instead of movq is required to handle broken assemblers. 7673(define_insn "*vec_concatv2di_rex64" 7674 [(set (match_operand:V2DI 0 "register_operand" 7675 "=x,x ,x ,Yi,!x,x,x,x,x") 7676 (vec_concat:V2DI 7677 (match_operand:DI 1 "nonimmediate_operand" 7678 " 0,x ,xm,r ,*y,0,x,0,x") 7679 (match_operand:DI 2 "vector_move_operand" 7680 "rm,rm,C ,C ,C ,x,x,m,m")))] 7681 "TARGET_64BIT" 7682 "@ 7683 pinsrq\t{$1, %2, %0|%0, %2, 1} 7684 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1} 7685 %vmovq\t{%1, %0|%0, %1} 7686 %vmovd\t{%1, %0|%0, %1} 7687 movq2dq\t{%1, %0|%0, %1} 7688 punpcklqdq\t{%2, %0|%0, %2} 7689 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2} 7690 movhps\t{%2, %0|%0, %2} 7691 vmovhps\t{%2, %1, %0|%0, %1, %2}" 7692 [(set_attr "isa" "sse4_noavx,avx,*,*,*,noavx,avx,noavx,avx") 7693 (set (attr "type") 7694 (if_then_else 7695 (eq_attr "alternative" "0,1,5,6") 7696 (const_string "sselog") 7697 (const_string "ssemov"))) 7698 (set (attr "prefix_rex") 7699 (if_then_else 7700 (and (eq_attr "alternative" "0,3") 7701 (not (match_test "TARGET_AVX"))) 7702 (const_string "1") 7703 (const_string "*"))) 7704 (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*") 7705 (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*") 7706 (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex") 7707 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V2SF,V2SF")]) 7708 7709(define_insn "vec_concatv2di" 7710 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x,x,x,x") 7711 (vec_concat:V2DI 7712 (match_operand:DI 1 "nonimmediate_operand" "xm,*y,0,x,0,0,x") 7713 (match_operand:DI 2 "vector_move_operand" " C, C,x,x,x,m,m")))] 7714 "!TARGET_64BIT && TARGET_SSE" 7715 "@ 7716 %vmovq\t{%1, %0|%0, %1} 7717 movq2dq\t{%1, %0|%0, %1} 7718 punpcklqdq\t{%2, %0|%0, %2} 7719 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2} 7720 movlhps\t{%2, %0|%0, %2} 7721 movhps\t{%2, %0|%0, %2} 7722 vmovhps\t{%2, %1, %0|%0, %1, %2}" 7723 [(set_attr "isa" "sse2,sse2,sse2_noavx,avx,noavx,noavx,avx") 7724 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,ssemov,ssemov") 7725 (set_attr "prefix" "maybe_vex,orig,orig,vex,orig,orig,vex") 7726 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")]) 7727 7728(define_expand "vec_unpacks_lo_<mode>" 7729 [(match_operand:<sseunpackmode> 0 "register_operand" "") 7730 (match_operand:VI124_AVX2 1 "register_operand" "")] 7731 "TARGET_SSE2" 7732 "ix86_expand_sse_unpack (operands, false, false); DONE;") 7733 7734(define_expand "vec_unpacks_hi_<mode>" 7735 [(match_operand:<sseunpackmode> 0 "register_operand" "") 7736 (match_operand:VI124_AVX2 1 "register_operand" "")] 7737 "TARGET_SSE2" 7738 "ix86_expand_sse_unpack (operands, false, true); DONE;") 7739 7740(define_expand "vec_unpacku_lo_<mode>" 7741 [(match_operand:<sseunpackmode> 0 "register_operand" "") 7742 (match_operand:VI124_AVX2 1 "register_operand" "")] 7743 "TARGET_SSE2" 7744 "ix86_expand_sse_unpack (operands, true, false); DONE;") 7745 7746(define_expand "vec_unpacku_hi_<mode>" 7747 [(match_operand:<sseunpackmode> 0 "register_operand" "") 7748 (match_operand:VI124_AVX2 1 "register_operand" "")] 7749 "TARGET_SSE2" 7750 "ix86_expand_sse_unpack (operands, true, true); DONE;") 7751 7752;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 7753;; 7754;; Miscellaneous 7755;; 7756;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 7757 7758(define_expand "avx2_uavgv32qi3" 7759 [(set (match_operand:V32QI 0 "register_operand" "") 7760 (truncate:V32QI 7761 (lshiftrt:V32HI 7762 (plus:V32HI 7763 (plus:V32HI 7764 (zero_extend:V32HI 7765 (match_operand:V32QI 1 "nonimmediate_operand" "")) 7766 (zero_extend:V32HI 7767 (match_operand:V32QI 2 "nonimmediate_operand" ""))) 7768 (const_vector:V32QI [(const_int 1) (const_int 1) 7769 (const_int 1) (const_int 1) 7770 (const_int 1) (const_int 1) 7771 (const_int 1) (const_int 1) 7772 (const_int 1) (const_int 1) 7773 (const_int 1) (const_int 1) 7774 (const_int 1) (const_int 1) 7775 (const_int 1) (const_int 1) 7776 (const_int 1) (const_int 1) 7777 (const_int 1) (const_int 1) 7778 (const_int 1) (const_int 1) 7779 (const_int 1) (const_int 1) 7780 (const_int 1) (const_int 1) 7781 (const_int 1) (const_int 1) 7782 (const_int 1) (const_int 1) 7783 (const_int 1) (const_int 1)])) 7784 (const_int 1))))] 7785 "TARGET_AVX2" 7786 "ix86_fixup_binary_operands_no_copy (PLUS, V32QImode, operands);") 7787 7788(define_expand "sse2_uavgv16qi3" 7789 [(set (match_operand:V16QI 0 "register_operand" "") 7790 (truncate:V16QI 7791 (lshiftrt:V16HI 7792 (plus:V16HI 7793 (plus:V16HI 7794 (zero_extend:V16HI 7795 (match_operand:V16QI 1 "nonimmediate_operand" "")) 7796 (zero_extend:V16HI 7797 (match_operand:V16QI 2 "nonimmediate_operand" ""))) 7798 (const_vector:V16QI [(const_int 1) (const_int 1) 7799 (const_int 1) (const_int 1) 7800 (const_int 1) (const_int 1) 7801 (const_int 1) (const_int 1) 7802 (const_int 1) (const_int 1) 7803 (const_int 1) (const_int 1) 7804 (const_int 1) (const_int 1) 7805 (const_int 1) (const_int 1)])) 7806 (const_int 1))))] 7807 "TARGET_SSE2" 7808 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);") 7809 7810(define_insn "*avx2_uavgv32qi3" 7811 [(set (match_operand:V32QI 0 "register_operand" "=x") 7812 (truncate:V32QI 7813 (lshiftrt:V32HI 7814 (plus:V32HI 7815 (plus:V32HI 7816 (zero_extend:V32HI 7817 (match_operand:V32QI 1 "nonimmediate_operand" "%x")) 7818 (zero_extend:V32HI 7819 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))) 7820 (const_vector:V32QI [(const_int 1) (const_int 1) 7821 (const_int 1) (const_int 1) 7822 (const_int 1) (const_int 1) 7823 (const_int 1) (const_int 1) 7824 (const_int 1) (const_int 1) 7825 (const_int 1) (const_int 1) 7826 (const_int 1) (const_int 1) 7827 (const_int 1) (const_int 1) 7828 (const_int 1) (const_int 1) 7829 (const_int 1) (const_int 1) 7830 (const_int 1) (const_int 1) 7831 (const_int 1) (const_int 1) 7832 (const_int 1) (const_int 1) 7833 (const_int 1) (const_int 1) 7834 (const_int 1) (const_int 1) 7835 (const_int 1) (const_int 1)])) 7836 (const_int 1))))] 7837 "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V32QImode, operands)" 7838 "vpavgb\t{%2, %1, %0|%0, %1, %2}" 7839 [(set_attr "type" "sseiadd") 7840 (set_attr "prefix" "vex") 7841 (set_attr "mode" "OI")]) 7842 7843(define_insn "*sse2_uavgv16qi3" 7844 [(set (match_operand:V16QI 0 "register_operand" "=x,x") 7845 (truncate:V16QI 7846 (lshiftrt:V16HI 7847 (plus:V16HI 7848 (plus:V16HI 7849 (zero_extend:V16HI 7850 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")) 7851 (zero_extend:V16HI 7852 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))) 7853 (const_vector:V16QI [(const_int 1) (const_int 1) 7854 (const_int 1) (const_int 1) 7855 (const_int 1) (const_int 1) 7856 (const_int 1) (const_int 1) 7857 (const_int 1) (const_int 1) 7858 (const_int 1) (const_int 1) 7859 (const_int 1) (const_int 1) 7860 (const_int 1) (const_int 1)])) 7861 (const_int 1))))] 7862 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)" 7863 "@ 7864 pavgb\t{%2, %0|%0, %2} 7865 vpavgb\t{%2, %1, %0|%0, %1, %2}" 7866 [(set_attr "isa" "noavx,avx") 7867 (set_attr "type" "sseiadd") 7868 (set_attr "prefix_data16" "1,*") 7869 (set_attr "prefix" "orig,vex") 7870 (set_attr "mode" "TI")]) 7871 7872(define_expand "avx2_uavgv16hi3" 7873 [(set (match_operand:V16HI 0 "register_operand" "") 7874 (truncate:V16HI 7875 (lshiftrt:V16SI 7876 (plus:V16SI 7877 (plus:V16SI 7878 (zero_extend:V16SI 7879 (match_operand:V16HI 1 "nonimmediate_operand" "")) 7880 (zero_extend:V16SI 7881 (match_operand:V16HI 2 "nonimmediate_operand" ""))) 7882 (const_vector:V16HI [(const_int 1) (const_int 1) 7883 (const_int 1) (const_int 1) 7884 (const_int 1) (const_int 1) 7885 (const_int 1) (const_int 1) 7886 (const_int 1) (const_int 1) 7887 (const_int 1) (const_int 1) 7888 (const_int 1) (const_int 1) 7889 (const_int 1) (const_int 1)])) 7890 (const_int 1))))] 7891 "TARGET_AVX2" 7892 "ix86_fixup_binary_operands_no_copy (PLUS, V16HImode, operands);") 7893 7894(define_expand "sse2_uavgv8hi3" 7895 [(set (match_operand:V8HI 0 "register_operand" "") 7896 (truncate:V8HI 7897 (lshiftrt:V8SI 7898 (plus:V8SI 7899 (plus:V8SI 7900 (zero_extend:V8SI 7901 (match_operand:V8HI 1 "nonimmediate_operand" "")) 7902 (zero_extend:V8SI 7903 (match_operand:V8HI 2 "nonimmediate_operand" ""))) 7904 (const_vector:V8HI [(const_int 1) (const_int 1) 7905 (const_int 1) (const_int 1) 7906 (const_int 1) (const_int 1) 7907 (const_int 1) (const_int 1)])) 7908 (const_int 1))))] 7909 "TARGET_SSE2" 7910 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);") 7911 7912(define_insn "*avx2_uavgv16hi3" 7913 [(set (match_operand:V16HI 0 "register_operand" "=x") 7914 (truncate:V16HI 7915 (lshiftrt:V16SI 7916 (plus:V16SI 7917 (plus:V16SI 7918 (zero_extend:V16SI 7919 (match_operand:V16HI 1 "nonimmediate_operand" "%x")) 7920 (zero_extend:V16SI 7921 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))) 7922 (const_vector:V16HI [(const_int 1) (const_int 1) 7923 (const_int 1) (const_int 1) 7924 (const_int 1) (const_int 1) 7925 (const_int 1) (const_int 1) 7926 (const_int 1) (const_int 1) 7927 (const_int 1) (const_int 1) 7928 (const_int 1) (const_int 1) 7929 (const_int 1) (const_int 1)])) 7930 (const_int 1))))] 7931 "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V16HImode, operands)" 7932 "vpavgw\t{%2, %1, %0|%0, %1, %2}" 7933 [(set_attr "type" "sseiadd") 7934 (set_attr "prefix" "vex") 7935 (set_attr "mode" "OI")]) 7936 7937(define_insn "*sse2_uavgv8hi3" 7938 [(set (match_operand:V8HI 0 "register_operand" "=x,x") 7939 (truncate:V8HI 7940 (lshiftrt:V8SI 7941 (plus:V8SI 7942 (plus:V8SI 7943 (zero_extend:V8SI 7944 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")) 7945 (zero_extend:V8SI 7946 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))) 7947 (const_vector:V8HI [(const_int 1) (const_int 1) 7948 (const_int 1) (const_int 1) 7949 (const_int 1) (const_int 1) 7950 (const_int 1) (const_int 1)])) 7951 (const_int 1))))] 7952 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)" 7953 "@ 7954 pavgw\t{%2, %0|%0, %2} 7955 vpavgw\t{%2, %1, %0|%0, %1, %2}" 7956 [(set_attr "isa" "noavx,avx") 7957 (set_attr "type" "sseiadd") 7958 (set_attr "prefix_data16" "1,*") 7959 (set_attr "prefix" "orig,vex") 7960 (set_attr "mode" "TI")]) 7961 7962;; The correct representation for this is absolutely enormous, and 7963;; surely not generally useful. 7964(define_insn "<sse2_avx2>_psadbw" 7965 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,x") 7966 (unspec:VI8_AVX2 [(match_operand:<ssebytemode> 1 "register_operand" "0,x") 7967 (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,xm")] 7968 UNSPEC_PSADBW))] 7969 "TARGET_SSE2" 7970 "@ 7971 psadbw\t{%2, %0|%0, %2} 7972 vpsadbw\t{%2, %1, %0|%0, %1, %2}" 7973 [(set_attr "isa" "noavx,avx") 7974 (set_attr "type" "sseiadd") 7975 (set_attr "atom_unit" "simul") 7976 (set_attr "prefix_data16" "1,*") 7977 (set_attr "prefix" "orig,vex") 7978 (set_attr "mode" "<sseinsnmode>")]) 7979 7980(define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>" 7981 [(set (match_operand:SI 0 "register_operand" "=r") 7982 (unspec:SI 7983 [(match_operand:VF 1 "register_operand" "x")] 7984 UNSPEC_MOVMSK))] 7985 "TARGET_SSE" 7986 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}" 7987 [(set_attr "type" "ssemov") 7988 (set_attr "prefix" "maybe_vex") 7989 (set_attr "mode" "<MODE>")]) 7990 7991(define_insn "avx2_pmovmskb" 7992 [(set (match_operand:SI 0 "register_operand" "=r") 7993 (unspec:SI [(match_operand:V32QI 1 "register_operand" "x")] 7994 UNSPEC_MOVMSK))] 7995 "TARGET_AVX2" 7996 "vpmovmskb\t{%1, %0|%0, %1}" 7997 [(set_attr "type" "ssemov") 7998 (set_attr "prefix" "vex") 7999 (set_attr "mode" "DI")]) 8000 8001(define_insn "sse2_pmovmskb" 8002 [(set (match_operand:SI 0 "register_operand" "=r") 8003 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")] 8004 UNSPEC_MOVMSK))] 8005 "TARGET_SSE2" 8006 "%vpmovmskb\t{%1, %0|%0, %1}" 8007 [(set_attr "type" "ssemov") 8008 (set_attr "prefix_data16" "1") 8009 (set_attr "prefix" "maybe_vex") 8010 (set_attr "mode" "SI")]) 8011 8012(define_expand "sse2_maskmovdqu" 8013 [(set (match_operand:V16QI 0 "memory_operand" "") 8014 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "") 8015 (match_operand:V16QI 2 "register_operand" "") 8016 (match_dup 0)] 8017 UNSPEC_MASKMOV))] 8018 "TARGET_SSE2") 8019 8020(define_insn "*sse2_maskmovdqu" 8021 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D")) 8022 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") 8023 (match_operand:V16QI 2 "register_operand" "x") 8024 (mem:V16QI (match_dup 0))] 8025 UNSPEC_MASKMOV))] 8026 "TARGET_SSE2" 8027 "%vmaskmovdqu\t{%2, %1|%1, %2}" 8028 [(set_attr "type" "ssemov") 8029 (set_attr "prefix_data16" "1") 8030 ;; The implicit %rdi operand confuses default length_vex computation. 8031 (set (attr "length_vex") 8032 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))"))) 8033 (set_attr "prefix" "maybe_vex") 8034 (set_attr "mode" "TI")]) 8035 8036(define_insn "sse_ldmxcsr" 8037 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")] 8038 UNSPECV_LDMXCSR)] 8039 "TARGET_SSE" 8040 "%vldmxcsr\t%0" 8041 [(set_attr "type" "sse") 8042 (set_attr "atom_sse_attr" "mxcsr") 8043 (set_attr "prefix" "maybe_vex") 8044 (set_attr "memory" "load")]) 8045 8046(define_insn "sse_stmxcsr" 8047 [(set (match_operand:SI 0 "memory_operand" "=m") 8048 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))] 8049 "TARGET_SSE" 8050 "%vstmxcsr\t%0" 8051 [(set_attr "type" "sse") 8052 (set_attr "atom_sse_attr" "mxcsr") 8053 (set_attr "prefix" "maybe_vex") 8054 (set_attr "memory" "store")]) 8055 8056(define_insn "sse2_clflush" 8057 [(unspec_volatile [(match_operand 0 "address_operand" "p")] 8058 UNSPECV_CLFLUSH)] 8059 "TARGET_SSE2" 8060 "clflush\t%a0" 8061 [(set_attr "type" "sse") 8062 (set_attr "atom_sse_attr" "fence") 8063 (set_attr "memory" "unknown")]) 8064 8065 8066(define_insn "sse3_mwait" 8067 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a") 8068 (match_operand:SI 1 "register_operand" "c")] 8069 UNSPECV_MWAIT)] 8070 "TARGET_SSE3" 8071;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used. 8072;; Since 32bit register operands are implicitly zero extended to 64bit, 8073;; we only need to set up 32bit registers. 8074 "mwait" 8075 [(set_attr "length" "3")]) 8076 8077(define_insn "sse3_monitor" 8078 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a") 8079 (match_operand:SI 1 "register_operand" "c") 8080 (match_operand:SI 2 "register_operand" "d")] 8081 UNSPECV_MONITOR)] 8082 "TARGET_SSE3 && !TARGET_64BIT" 8083 "monitor\t%0, %1, %2" 8084 [(set_attr "length" "3")]) 8085 8086(define_insn "sse3_monitor64" 8087 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a") 8088 (match_operand:SI 1 "register_operand" "c") 8089 (match_operand:SI 2 "register_operand" "d")] 8090 UNSPECV_MONITOR)] 8091 "TARGET_SSE3 && TARGET_64BIT" 8092;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in 8093;; RCX and RDX are used. Since 32bit register operands are implicitly 8094;; zero extended to 64bit, we only need to set up 32bit registers. 8095 "monitor" 8096 [(set_attr "length" "3")]) 8097 8098;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 8099;; 8100;; SSSE3 instructions 8101;; 8102;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 8103 8104(define_insn "avx2_phaddwv16hi3" 8105 [(set (match_operand:V16HI 0 "register_operand" "=x") 8106 (vec_concat:V16HI 8107 (vec_concat:V8HI 8108 (vec_concat:V4HI 8109 (vec_concat:V2HI 8110 (plus:HI 8111 (vec_select:HI 8112 (match_operand:V16HI 1 "register_operand" "x") 8113 (parallel [(const_int 0)])) 8114 (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) 8115 (plus:HI 8116 (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) 8117 (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) 8118 (vec_concat:V2HI 8119 (plus:HI 8120 (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) 8121 (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) 8122 (plus:HI 8123 (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) 8124 (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) 8125 (vec_concat:V4HI 8126 (vec_concat:V2HI 8127 (plus:HI 8128 (vec_select:HI (match_dup 1) (parallel [(const_int 8)])) 8129 (vec_select:HI (match_dup 1) (parallel [(const_int 9)]))) 8130 (plus:HI 8131 (vec_select:HI (match_dup 1) (parallel [(const_int 10)])) 8132 (vec_select:HI (match_dup 1) (parallel [(const_int 11)])))) 8133 (vec_concat:V2HI 8134 (plus:HI 8135 (vec_select:HI (match_dup 1) (parallel [(const_int 12)])) 8136 (vec_select:HI (match_dup 1) (parallel [(const_int 13)]))) 8137 (plus:HI 8138 (vec_select:HI (match_dup 1) (parallel [(const_int 14)])) 8139 (vec_select:HI (match_dup 1) (parallel [(const_int 15)])))))) 8140 (vec_concat:V8HI 8141 (vec_concat:V4HI 8142 (vec_concat:V2HI 8143 (plus:HI 8144 (vec_select:HI 8145 (match_operand:V16HI 2 "nonimmediate_operand" "xm") 8146 (parallel [(const_int 0)])) 8147 (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) 8148 (plus:HI 8149 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) 8150 (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) 8151 (vec_concat:V2HI 8152 (plus:HI 8153 (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) 8154 (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) 8155 (plus:HI 8156 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) 8157 (vec_select:HI (match_dup 2) (parallel [(const_int 7)]))))) 8158 (vec_concat:V4HI 8159 (vec_concat:V2HI 8160 (plus:HI 8161 (vec_select:HI (match_dup 2) (parallel [(const_int 8)])) 8162 (vec_select:HI (match_dup 2) (parallel [(const_int 9)]))) 8163 (plus:HI 8164 (vec_select:HI (match_dup 2) (parallel [(const_int 10)])) 8165 (vec_select:HI (match_dup 2) (parallel [(const_int 11)])))) 8166 (vec_concat:V2HI 8167 (plus:HI 8168 (vec_select:HI (match_dup 2) (parallel [(const_int 12)])) 8169 (vec_select:HI (match_dup 2) (parallel [(const_int 13)]))) 8170 (plus:HI 8171 (vec_select:HI (match_dup 2) (parallel [(const_int 14)])) 8172 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))] 8173 "TARGET_AVX2" 8174 "vphaddw\t{%2, %1, %0|%0, %1, %2}" 8175 [(set_attr "type" "sseiadd") 8176 (set_attr "prefix_extra" "1") 8177 (set_attr "prefix" "vex") 8178 (set_attr "mode" "OI")]) 8179 8180(define_insn "ssse3_phaddwv8hi3" 8181 [(set (match_operand:V8HI 0 "register_operand" "=x,x") 8182 (vec_concat:V8HI 8183 (vec_concat:V4HI 8184 (vec_concat:V2HI 8185 (plus:HI 8186 (vec_select:HI 8187 (match_operand:V8HI 1 "register_operand" "0,x") 8188 (parallel [(const_int 0)])) 8189 (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) 8190 (plus:HI 8191 (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) 8192 (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) 8193 (vec_concat:V2HI 8194 (plus:HI 8195 (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) 8196 (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) 8197 (plus:HI 8198 (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) 8199 (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) 8200 (vec_concat:V4HI 8201 (vec_concat:V2HI 8202 (plus:HI 8203 (vec_select:HI 8204 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm") 8205 (parallel [(const_int 0)])) 8206 (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) 8207 (plus:HI 8208 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) 8209 (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) 8210 (vec_concat:V2HI 8211 (plus:HI 8212 (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) 8213 (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) 8214 (plus:HI 8215 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) 8216 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] 8217 "TARGET_SSSE3" 8218 "@ 8219 phaddw\t{%2, %0|%0, %2} 8220 vphaddw\t{%2, %1, %0|%0, %1, %2}" 8221 [(set_attr "isa" "noavx,avx") 8222 (set_attr "type" "sseiadd") 8223 (set_attr "atom_unit" "complex") 8224 (set_attr "prefix_data16" "1,*") 8225 (set_attr "prefix_extra" "1") 8226 (set_attr "prefix" "orig,vex") 8227 (set_attr "mode" "TI")]) 8228 8229(define_insn "ssse3_phaddwv4hi3" 8230 [(set (match_operand:V4HI 0 "register_operand" "=y") 8231 (vec_concat:V4HI 8232 (vec_concat:V2HI 8233 (plus:HI 8234 (vec_select:HI 8235 (match_operand:V4HI 1 "register_operand" "0") 8236 (parallel [(const_int 0)])) 8237 (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) 8238 (plus:HI 8239 (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) 8240 (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) 8241 (vec_concat:V2HI 8242 (plus:HI 8243 (vec_select:HI 8244 (match_operand:V4HI 2 "nonimmediate_operand" "ym") 8245 (parallel [(const_int 0)])) 8246 (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) 8247 (plus:HI 8248 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) 8249 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))] 8250 "TARGET_SSSE3" 8251 "phaddw\t{%2, %0|%0, %2}" 8252 [(set_attr "type" "sseiadd") 8253 (set_attr "atom_unit" "complex") 8254 (set_attr "prefix_extra" "1") 8255 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) 8256 (set_attr "mode" "DI")]) 8257 8258(define_insn "avx2_phadddv8si3" 8259 [(set (match_operand:V8SI 0 "register_operand" "=x") 8260 (vec_concat:V8SI 8261 (vec_concat:V4SI 8262 (vec_concat:V2SI 8263 (plus:SI 8264 (vec_select:SI 8265 (match_operand:V8SI 1 "register_operand" "x") 8266 (parallel [(const_int 0)])) 8267 (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) 8268 (plus:SI 8269 (vec_select:SI (match_dup 1) (parallel [(const_int 2)])) 8270 (vec_select:SI (match_dup 1) (parallel [(const_int 3)])))) 8271 (vec_concat:V2SI 8272 (plus:SI 8273 (vec_select:SI (match_dup 1) (parallel [(const_int 4)])) 8274 (vec_select:SI (match_dup 1) (parallel [(const_int 5)]))) 8275 (plus:SI 8276 (vec_select:SI (match_dup 1) (parallel [(const_int 6)])) 8277 (vec_select:SI (match_dup 1) (parallel [(const_int 7)]))))) 8278 (vec_concat:V4SI 8279 (vec_concat:V2SI 8280 (plus:SI 8281 (vec_select:SI 8282 (match_operand:V8SI 2 "nonimmediate_operand" "xm") 8283 (parallel [(const_int 0)])) 8284 (vec_select:SI (match_dup 2) (parallel [(const_int 1)]))) 8285 (plus:SI 8286 (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) 8287 (vec_select:SI (match_dup 2) (parallel [(const_int 3)])))) 8288 (vec_concat:V2SI 8289 (plus:SI 8290 (vec_select:SI (match_dup 2) (parallel [(const_int 4)])) 8291 (vec_select:SI (match_dup 2) (parallel [(const_int 5)]))) 8292 (plus:SI 8293 (vec_select:SI (match_dup 2) (parallel [(const_int 6)])) 8294 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))] 8295 "TARGET_AVX2" 8296 "vphaddd\t{%2, %1, %0|%0, %1, %2}" 8297 [(set_attr "type" "sseiadd") 8298 (set_attr "prefix_extra" "1") 8299 (set_attr "prefix" "vex") 8300 (set_attr "mode" "OI")]) 8301 8302(define_insn "ssse3_phadddv4si3" 8303 [(set (match_operand:V4SI 0 "register_operand" "=x,x") 8304 (vec_concat:V4SI 8305 (vec_concat:V2SI 8306 (plus:SI 8307 (vec_select:SI 8308 (match_operand:V4SI 1 "register_operand" "0,x") 8309 (parallel [(const_int 0)])) 8310 (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) 8311 (plus:SI 8312 (vec_select:SI (match_dup 1) (parallel [(const_int 2)])) 8313 (vec_select:SI (match_dup 1) (parallel [(const_int 3)])))) 8314 (vec_concat:V2SI 8315 (plus:SI 8316 (vec_select:SI 8317 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm") 8318 (parallel [(const_int 0)])) 8319 (vec_select:SI (match_dup 2) (parallel [(const_int 1)]))) 8320 (plus:SI 8321 (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) 8322 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))] 8323 "TARGET_SSSE3" 8324 "@ 8325 phaddd\t{%2, %0|%0, %2} 8326 vphaddd\t{%2, %1, %0|%0, %1, %2}" 8327 [(set_attr "isa" "noavx,avx") 8328 (set_attr "type" "sseiadd") 8329 (set_attr "atom_unit" "complex") 8330 (set_attr "prefix_data16" "1,*") 8331 (set_attr "prefix_extra" "1") 8332 (set_attr "prefix" "orig,vex") 8333 (set_attr "mode" "TI")]) 8334 8335(define_insn "ssse3_phadddv2si3" 8336 [(set (match_operand:V2SI 0 "register_operand" "=y") 8337 (vec_concat:V2SI 8338 (plus:SI 8339 (vec_select:SI 8340 (match_operand:V2SI 1 "register_operand" "0") 8341 (parallel [(const_int 0)])) 8342 (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) 8343 (plus:SI 8344 (vec_select:SI 8345 (match_operand:V2SI 2 "nonimmediate_operand" "ym") 8346 (parallel [(const_int 0)])) 8347 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))] 8348 "TARGET_SSSE3" 8349 "phaddd\t{%2, %0|%0, %2}" 8350 [(set_attr "type" "sseiadd") 8351 (set_attr "atom_unit" "complex") 8352 (set_attr "prefix_extra" "1") 8353 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) 8354 (set_attr "mode" "DI")]) 8355 8356(define_insn "avx2_phaddswv16hi3" 8357 [(set (match_operand:V16HI 0 "register_operand" "=x") 8358 (vec_concat:V16HI 8359 (vec_concat:V8HI 8360 (vec_concat:V4HI 8361 (vec_concat:V2HI 8362 (ss_plus:HI 8363 (vec_select:HI 8364 (match_operand:V16HI 1 "register_operand" "x") 8365 (parallel [(const_int 0)])) 8366 (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) 8367 (ss_plus:HI 8368 (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) 8369 (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) 8370 (vec_concat:V2HI 8371 (ss_plus:HI 8372 (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) 8373 (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) 8374 (ss_plus:HI 8375 (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) 8376 (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) 8377 (vec_concat:V4HI 8378 (vec_concat:V2HI 8379 (ss_plus:HI 8380 (vec_select:HI (match_dup 1) (parallel [(const_int 8)])) 8381 (vec_select:HI (match_dup 1) (parallel [(const_int 9)]))) 8382 (ss_plus:HI 8383 (vec_select:HI (match_dup 1) (parallel [(const_int 10)])) 8384 (vec_select:HI (match_dup 1) (parallel [(const_int 11)])))) 8385 (vec_concat:V2HI 8386 (ss_plus:HI 8387 (vec_select:HI (match_dup 1) (parallel [(const_int 12)])) 8388 (vec_select:HI (match_dup 1) (parallel [(const_int 13)]))) 8389 (ss_plus:HI 8390 (vec_select:HI (match_dup 1) (parallel [(const_int 14)])) 8391 (vec_select:HI (match_dup 1) (parallel [(const_int 15)])))))) 8392 (vec_concat:V8HI 8393 (vec_concat:V4HI 8394 (vec_concat:V2HI 8395 (ss_plus:HI 8396 (vec_select:HI 8397 (match_operand:V16HI 2 "nonimmediate_operand" "xm") 8398 (parallel [(const_int 0)])) 8399 (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) 8400 (ss_plus:HI 8401 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) 8402 (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) 8403 (vec_concat:V2HI 8404 (ss_plus:HI 8405 (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) 8406 (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) 8407 (ss_plus:HI 8408 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) 8409 (vec_select:HI (match_dup 2) (parallel [(const_int 7)]))))) 8410 (vec_concat:V4HI 8411 (vec_concat:V2HI 8412 (ss_plus:HI 8413 (vec_select:HI (match_dup 2) (parallel [(const_int 8)])) 8414 (vec_select:HI (match_dup 2) (parallel [(const_int 9)]))) 8415 (ss_plus:HI 8416 (vec_select:HI (match_dup 2) (parallel [(const_int 10)])) 8417 (vec_select:HI (match_dup 2) (parallel [(const_int 11)])))) 8418 (vec_concat:V2HI 8419 (ss_plus:HI 8420 (vec_select:HI (match_dup 2) (parallel [(const_int 12)])) 8421 (vec_select:HI (match_dup 2) (parallel [(const_int 13)]))) 8422 (ss_plus:HI 8423 (vec_select:HI (match_dup 2) (parallel [(const_int 14)])) 8424 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))] 8425 "TARGET_AVX2" 8426 "vphaddsw\t{%2, %1, %0|%0, %1, %2}" 8427 [(set_attr "type" "sseiadd") 8428 (set_attr "prefix_extra" "1") 8429 (set_attr "prefix" "vex") 8430 (set_attr "mode" "OI")]) 8431 8432(define_insn "ssse3_phaddswv8hi3" 8433 [(set (match_operand:V8HI 0 "register_operand" "=x,x") 8434 (vec_concat:V8HI 8435 (vec_concat:V4HI 8436 (vec_concat:V2HI 8437 (ss_plus:HI 8438 (vec_select:HI 8439 (match_operand:V8HI 1 "register_operand" "0,x") 8440 (parallel [(const_int 0)])) 8441 (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) 8442 (ss_plus:HI 8443 (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) 8444 (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) 8445 (vec_concat:V2HI 8446 (ss_plus:HI 8447 (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) 8448 (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) 8449 (ss_plus:HI 8450 (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) 8451 (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) 8452 (vec_concat:V4HI 8453 (vec_concat:V2HI 8454 (ss_plus:HI 8455 (vec_select:HI 8456 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm") 8457 (parallel [(const_int 0)])) 8458 (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) 8459 (ss_plus:HI 8460 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) 8461 (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) 8462 (vec_concat:V2HI 8463 (ss_plus:HI 8464 (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) 8465 (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) 8466 (ss_plus:HI 8467 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) 8468 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] 8469 "TARGET_SSSE3" 8470 "@ 8471 phaddsw\t{%2, %0|%0, %2} 8472 vphaddsw\t{%2, %1, %0|%0, %1, %2}" 8473 [(set_attr "isa" "noavx,avx") 8474 (set_attr "type" "sseiadd") 8475 (set_attr "atom_unit" "complex") 8476 (set_attr "prefix_data16" "1,*") 8477 (set_attr "prefix_extra" "1") 8478 (set_attr "prefix" "orig,vex") 8479 (set_attr "mode" "TI")]) 8480 8481(define_insn "ssse3_phaddswv4hi3" 8482 [(set (match_operand:V4HI 0 "register_operand" "=y") 8483 (vec_concat:V4HI 8484 (vec_concat:V2HI 8485 (ss_plus:HI 8486 (vec_select:HI 8487 (match_operand:V4HI 1 "register_operand" "0") 8488 (parallel [(const_int 0)])) 8489 (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) 8490 (ss_plus:HI 8491 (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) 8492 (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) 8493 (vec_concat:V2HI 8494 (ss_plus:HI 8495 (vec_select:HI 8496 (match_operand:V4HI 2 "nonimmediate_operand" "ym") 8497 (parallel [(const_int 0)])) 8498 (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) 8499 (ss_plus:HI 8500 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) 8501 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))] 8502 "TARGET_SSSE3" 8503 "phaddsw\t{%2, %0|%0, %2}" 8504 [(set_attr "type" "sseiadd") 8505 (set_attr "atom_unit" "complex") 8506 (set_attr "prefix_extra" "1") 8507 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) 8508 (set_attr "mode" "DI")]) 8509 8510(define_insn "avx2_phsubwv16hi3" 8511 [(set (match_operand:V16HI 0 "register_operand" "=x") 8512 (vec_concat:V16HI 8513 (vec_concat:V8HI 8514 (vec_concat:V4HI 8515 (vec_concat:V2HI 8516 (minus:HI 8517 (vec_select:HI 8518 (match_operand:V16HI 1 "register_operand" "x") 8519 (parallel [(const_int 0)])) 8520 (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) 8521 (minus:HI 8522 (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) 8523 (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) 8524 (vec_concat:V2HI 8525 (minus:HI 8526 (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) 8527 (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) 8528 (minus:HI 8529 (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) 8530 (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) 8531 (vec_concat:V4HI 8532 (vec_concat:V2HI 8533 (minus:HI 8534 (vec_select:HI (match_dup 1) (parallel [(const_int 8)])) 8535 (vec_select:HI (match_dup 1) (parallel [(const_int 9)]))) 8536 (minus:HI 8537 (vec_select:HI (match_dup 1) (parallel [(const_int 10)])) 8538 (vec_select:HI (match_dup 1) (parallel [(const_int 11)])))) 8539 (vec_concat:V2HI 8540 (minus:HI 8541 (vec_select:HI (match_dup 1) (parallel [(const_int 12)])) 8542 (vec_select:HI (match_dup 1) (parallel [(const_int 13)]))) 8543 (minus:HI 8544 (vec_select:HI (match_dup 1) (parallel [(const_int 14)])) 8545 (vec_select:HI (match_dup 1) (parallel [(const_int 15)])))))) 8546 (vec_concat:V8HI 8547 (vec_concat:V4HI 8548 (vec_concat:V2HI 8549 (minus:HI 8550 (vec_select:HI 8551 (match_operand:V16HI 2 "nonimmediate_operand" "xm") 8552 (parallel [(const_int 0)])) 8553 (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) 8554 (minus:HI 8555 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) 8556 (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) 8557 (vec_concat:V2HI 8558 (minus:HI 8559 (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) 8560 (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) 8561 (minus:HI 8562 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) 8563 (vec_select:HI (match_dup 2) (parallel [(const_int 7)]))))) 8564 (vec_concat:V4HI 8565 (vec_concat:V2HI 8566 (minus:HI 8567 (vec_select:HI (match_dup 2) (parallel [(const_int 8)])) 8568 (vec_select:HI (match_dup 2) (parallel [(const_int 9)]))) 8569 (minus:HI 8570 (vec_select:HI (match_dup 2) (parallel [(const_int 10)])) 8571 (vec_select:HI (match_dup 2) (parallel [(const_int 11)])))) 8572 (vec_concat:V2HI 8573 (minus:HI 8574 (vec_select:HI (match_dup 2) (parallel [(const_int 12)])) 8575 (vec_select:HI (match_dup 2) (parallel [(const_int 13)]))) 8576 (minus:HI 8577 (vec_select:HI (match_dup 2) (parallel [(const_int 14)])) 8578 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))] 8579 "TARGET_AVX2" 8580 "vphsubw\t{%2, %1, %0|%0, %1, %2}" 8581 [(set_attr "type" "sseiadd") 8582 (set_attr "prefix_extra" "1") 8583 (set_attr "prefix" "vex") 8584 (set_attr "mode" "OI")]) 8585 8586(define_insn "ssse3_phsubwv8hi3" 8587 [(set (match_operand:V8HI 0 "register_operand" "=x,x") 8588 (vec_concat:V8HI 8589 (vec_concat:V4HI 8590 (vec_concat:V2HI 8591 (minus:HI 8592 (vec_select:HI 8593 (match_operand:V8HI 1 "register_operand" "0,x") 8594 (parallel [(const_int 0)])) 8595 (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) 8596 (minus:HI 8597 (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) 8598 (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) 8599 (vec_concat:V2HI 8600 (minus:HI 8601 (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) 8602 (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) 8603 (minus:HI 8604 (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) 8605 (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) 8606 (vec_concat:V4HI 8607 (vec_concat:V2HI 8608 (minus:HI 8609 (vec_select:HI 8610 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm") 8611 (parallel [(const_int 0)])) 8612 (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) 8613 (minus:HI 8614 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) 8615 (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) 8616 (vec_concat:V2HI 8617 (minus:HI 8618 (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) 8619 (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) 8620 (minus:HI 8621 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) 8622 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] 8623 "TARGET_SSSE3" 8624 "@ 8625 phsubw\t{%2, %0|%0, %2} 8626 vphsubw\t{%2, %1, %0|%0, %1, %2}" 8627 [(set_attr "isa" "noavx,avx") 8628 (set_attr "type" "sseiadd") 8629 (set_attr "atom_unit" "complex") 8630 (set_attr "prefix_data16" "1,*") 8631 (set_attr "prefix_extra" "1") 8632 (set_attr "prefix" "orig,vex") 8633 (set_attr "mode" "TI")]) 8634 8635(define_insn "ssse3_phsubwv4hi3" 8636 [(set (match_operand:V4HI 0 "register_operand" "=y") 8637 (vec_concat:V4HI 8638 (vec_concat:V2HI 8639 (minus:HI 8640 (vec_select:HI 8641 (match_operand:V4HI 1 "register_operand" "0") 8642 (parallel [(const_int 0)])) 8643 (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) 8644 (minus:HI 8645 (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) 8646 (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) 8647 (vec_concat:V2HI 8648 (minus:HI 8649 (vec_select:HI 8650 (match_operand:V4HI 2 "nonimmediate_operand" "ym") 8651 (parallel [(const_int 0)])) 8652 (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) 8653 (minus:HI 8654 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) 8655 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))] 8656 "TARGET_SSSE3" 8657 "phsubw\t{%2, %0|%0, %2}" 8658 [(set_attr "type" "sseiadd") 8659 (set_attr "atom_unit" "complex") 8660 (set_attr "prefix_extra" "1") 8661 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) 8662 (set_attr "mode" "DI")]) 8663 8664(define_insn "avx2_phsubdv8si3" 8665 [(set (match_operand:V8SI 0 "register_operand" "=x") 8666 (vec_concat:V8SI 8667 (vec_concat:V4SI 8668 (vec_concat:V2SI 8669 (minus:SI 8670 (vec_select:SI 8671 (match_operand:V8SI 1 "register_operand" "x") 8672 (parallel [(const_int 0)])) 8673 (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) 8674 (minus:SI 8675 (vec_select:SI (match_dup 1) (parallel [(const_int 2)])) 8676 (vec_select:SI (match_dup 1) (parallel [(const_int 3)])))) 8677 (vec_concat:V2SI 8678 (minus:SI 8679 (vec_select:SI (match_dup 1) (parallel [(const_int 4)])) 8680 (vec_select:SI (match_dup 1) (parallel [(const_int 5)]))) 8681 (minus:SI 8682 (vec_select:SI (match_dup 1) (parallel [(const_int 6)])) 8683 (vec_select:SI (match_dup 1) (parallel [(const_int 7)]))))) 8684 (vec_concat:V4SI 8685 (vec_concat:V2SI 8686 (minus:SI 8687 (vec_select:SI 8688 (match_operand:V8SI 2 "nonimmediate_operand" "xm") 8689 (parallel [(const_int 0)])) 8690 (vec_select:SI (match_dup 2) (parallel [(const_int 1)]))) 8691 (minus:SI 8692 (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) 8693 (vec_select:SI (match_dup 2) (parallel [(const_int 3)])))) 8694 (vec_concat:V2SI 8695 (minus:SI 8696 (vec_select:SI (match_dup 2) (parallel [(const_int 4)])) 8697 (vec_select:SI (match_dup 2) (parallel [(const_int 5)]))) 8698 (minus:SI 8699 (vec_select:SI (match_dup 2) (parallel [(const_int 6)])) 8700 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))] 8701 "TARGET_AVX2" 8702 "vphsubd\t{%2, %1, %0|%0, %1, %2}" 8703 [(set_attr "type" "sseiadd") 8704 (set_attr "prefix_extra" "1") 8705 (set_attr "prefix" "vex") 8706 (set_attr "mode" "OI")]) 8707 8708(define_insn "ssse3_phsubdv4si3" 8709 [(set (match_operand:V4SI 0 "register_operand" "=x,x") 8710 (vec_concat:V4SI 8711 (vec_concat:V2SI 8712 (minus:SI 8713 (vec_select:SI 8714 (match_operand:V4SI 1 "register_operand" "0,x") 8715 (parallel [(const_int 0)])) 8716 (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) 8717 (minus:SI 8718 (vec_select:SI (match_dup 1) (parallel [(const_int 2)])) 8719 (vec_select:SI (match_dup 1) (parallel [(const_int 3)])))) 8720 (vec_concat:V2SI 8721 (minus:SI 8722 (vec_select:SI 8723 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm") 8724 (parallel [(const_int 0)])) 8725 (vec_select:SI (match_dup 2) (parallel [(const_int 1)]))) 8726 (minus:SI 8727 (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) 8728 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))] 8729 "TARGET_SSSE3" 8730 "@ 8731 phsubd\t{%2, %0|%0, %2} 8732 vphsubd\t{%2, %1, %0|%0, %1, %2}" 8733 8734 [(set_attr "isa" "noavx,avx") 8735 (set_attr "type" "sseiadd") 8736 (set_attr "atom_unit" "complex") 8737 (set_attr "prefix_data16" "1,*") 8738 (set_attr "prefix_extra" "1") 8739 (set_attr "prefix" "orig,vex") 8740 (set_attr "mode" "TI")]) 8741 8742(define_insn "ssse3_phsubdv2si3" 8743 [(set (match_operand:V2SI 0 "register_operand" "=y") 8744 (vec_concat:V2SI 8745 (minus:SI 8746 (vec_select:SI 8747 (match_operand:V2SI 1 "register_operand" "0") 8748 (parallel [(const_int 0)])) 8749 (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) 8750 (minus:SI 8751 (vec_select:SI 8752 (match_operand:V2SI 2 "nonimmediate_operand" "ym") 8753 (parallel [(const_int 0)])) 8754 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))] 8755 "TARGET_SSSE3" 8756 "phsubd\t{%2, %0|%0, %2}" 8757 [(set_attr "type" "sseiadd") 8758 (set_attr "atom_unit" "complex") 8759 (set_attr "prefix_extra" "1") 8760 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) 8761 (set_attr "mode" "DI")]) 8762 8763(define_insn "avx2_phsubswv16hi3" 8764 [(set (match_operand:V16HI 0 "register_operand" "=x") 8765 (vec_concat:V16HI 8766 (vec_concat:V8HI 8767 (vec_concat:V4HI 8768 (vec_concat:V2HI 8769 (ss_minus:HI 8770 (vec_select:HI 8771 (match_operand:V16HI 1 "register_operand" "x") 8772 (parallel [(const_int 0)])) 8773 (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) 8774 (ss_minus:HI 8775 (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) 8776 (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) 8777 (vec_concat:V2HI 8778 (ss_minus:HI 8779 (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) 8780 (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) 8781 (ss_minus:HI 8782 (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) 8783 (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) 8784 (vec_concat:V4HI 8785 (vec_concat:V2HI 8786 (ss_minus:HI 8787 (vec_select:HI (match_dup 1) (parallel [(const_int 8)])) 8788 (vec_select:HI (match_dup 1) (parallel [(const_int 9)]))) 8789 (ss_minus:HI 8790 (vec_select:HI (match_dup 1) (parallel [(const_int 10)])) 8791 (vec_select:HI (match_dup 1) (parallel [(const_int 11)])))) 8792 (vec_concat:V2HI 8793 (ss_minus:HI 8794 (vec_select:HI (match_dup 1) (parallel [(const_int 12)])) 8795 (vec_select:HI (match_dup 1) (parallel [(const_int 13)]))) 8796 (ss_minus:HI 8797 (vec_select:HI (match_dup 1) (parallel [(const_int 14)])) 8798 (vec_select:HI (match_dup 1) (parallel [(const_int 15)])))))) 8799 (vec_concat:V8HI 8800 (vec_concat:V4HI 8801 (vec_concat:V2HI 8802 (ss_minus:HI 8803 (vec_select:HI 8804 (match_operand:V16HI 2 "nonimmediate_operand" "xm") 8805 (parallel [(const_int 0)])) 8806 (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) 8807 (ss_minus:HI 8808 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) 8809 (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) 8810 (vec_concat:V2HI 8811 (ss_minus:HI 8812 (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) 8813 (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) 8814 (ss_minus:HI 8815 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) 8816 (vec_select:HI (match_dup 2) (parallel [(const_int 7)]))))) 8817 (vec_concat:V4HI 8818 (vec_concat:V2HI 8819 (ss_minus:HI 8820 (vec_select:HI (match_dup 2) (parallel [(const_int 8)])) 8821 (vec_select:HI (match_dup 2) (parallel [(const_int 9)]))) 8822 (ss_minus:HI 8823 (vec_select:HI (match_dup 2) (parallel [(const_int 10)])) 8824 (vec_select:HI (match_dup 2) (parallel [(const_int 11)])))) 8825 (vec_concat:V2HI 8826 (ss_minus:HI 8827 (vec_select:HI (match_dup 2) (parallel [(const_int 12)])) 8828 (vec_select:HI (match_dup 2) (parallel [(const_int 13)]))) 8829 (ss_minus:HI 8830 (vec_select:HI (match_dup 2) (parallel [(const_int 14)])) 8831 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))] 8832 "TARGET_AVX2" 8833 "vphsubsw\t{%2, %1, %0|%0, %1, %2}" 8834 [(set_attr "type" "sseiadd") 8835 (set_attr "prefix_extra" "1") 8836 (set_attr "prefix" "vex") 8837 (set_attr "mode" "OI")]) 8838 8839(define_insn "ssse3_phsubswv8hi3" 8840 [(set (match_operand:V8HI 0 "register_operand" "=x,x") 8841 (vec_concat:V8HI 8842 (vec_concat:V4HI 8843 (vec_concat:V2HI 8844 (ss_minus:HI 8845 (vec_select:HI 8846 (match_operand:V8HI 1 "register_operand" "0,x") 8847 (parallel [(const_int 0)])) 8848 (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) 8849 (ss_minus:HI 8850 (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) 8851 (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) 8852 (vec_concat:V2HI 8853 (ss_minus:HI 8854 (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) 8855 (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) 8856 (ss_minus:HI 8857 (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) 8858 (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) 8859 (vec_concat:V4HI 8860 (vec_concat:V2HI 8861 (ss_minus:HI 8862 (vec_select:HI 8863 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm") 8864 (parallel [(const_int 0)])) 8865 (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) 8866 (ss_minus:HI 8867 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) 8868 (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) 8869 (vec_concat:V2HI 8870 (ss_minus:HI 8871 (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) 8872 (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) 8873 (ss_minus:HI 8874 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) 8875 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] 8876 "TARGET_SSSE3" 8877 "@ 8878 phsubsw\t{%2, %0|%0, %2} 8879 vphsubsw\t{%2, %1, %0|%0, %1, %2}" 8880 [(set_attr "isa" "noavx,avx") 8881 (set_attr "type" "sseiadd") 8882 (set_attr "atom_unit" "complex") 8883 (set_attr "prefix_data16" "1,*") 8884 (set_attr "prefix_extra" "1") 8885 (set_attr "prefix" "orig,vex") 8886 (set_attr "mode" "TI")]) 8887 8888(define_insn "ssse3_phsubswv4hi3" 8889 [(set (match_operand:V4HI 0 "register_operand" "=y") 8890 (vec_concat:V4HI 8891 (vec_concat:V2HI 8892 (ss_minus:HI 8893 (vec_select:HI 8894 (match_operand:V4HI 1 "register_operand" "0") 8895 (parallel [(const_int 0)])) 8896 (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) 8897 (ss_minus:HI 8898 (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) 8899 (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) 8900 (vec_concat:V2HI 8901 (ss_minus:HI 8902 (vec_select:HI 8903 (match_operand:V4HI 2 "nonimmediate_operand" "ym") 8904 (parallel [(const_int 0)])) 8905 (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) 8906 (ss_minus:HI 8907 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) 8908 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))] 8909 "TARGET_SSSE3" 8910 "phsubsw\t{%2, %0|%0, %2}" 8911 [(set_attr "type" "sseiadd") 8912 (set_attr "atom_unit" "complex") 8913 (set_attr "prefix_extra" "1") 8914 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) 8915 (set_attr "mode" "DI")]) 8916 8917(define_insn "avx2_pmaddubsw256" 8918 [(set (match_operand:V16HI 0 "register_operand" "=x") 8919 (ss_plus:V16HI 8920 (mult:V16HI 8921 (zero_extend:V16HI 8922 (vec_select:V16QI 8923 (match_operand:V32QI 1 "register_operand" "x") 8924 (parallel [(const_int 0) 8925 (const_int 2) 8926 (const_int 4) 8927 (const_int 6) 8928 (const_int 8) 8929 (const_int 10) 8930 (const_int 12) 8931 (const_int 14) 8932 (const_int 16) 8933 (const_int 18) 8934 (const_int 20) 8935 (const_int 22) 8936 (const_int 24) 8937 (const_int 26) 8938 (const_int 28) 8939 (const_int 30)]))) 8940 (sign_extend:V16HI 8941 (vec_select:V16QI 8942 (match_operand:V32QI 2 "nonimmediate_operand" "xm") 8943 (parallel [(const_int 0) 8944 (const_int 2) 8945 (const_int 4) 8946 (const_int 6) 8947 (const_int 8) 8948 (const_int 10) 8949 (const_int 12) 8950 (const_int 14) 8951 (const_int 16) 8952 (const_int 18) 8953 (const_int 20) 8954 (const_int 22) 8955 (const_int 24) 8956 (const_int 26) 8957 (const_int 28) 8958 (const_int 30)])))) 8959 (mult:V16HI 8960 (zero_extend:V16HI 8961 (vec_select:V16QI (match_dup 1) 8962 (parallel [(const_int 1) 8963 (const_int 3) 8964 (const_int 5) 8965 (const_int 7) 8966 (const_int 9) 8967 (const_int 11) 8968 (const_int 13) 8969 (const_int 15) 8970 (const_int 17) 8971 (const_int 19) 8972 (const_int 21) 8973 (const_int 23) 8974 (const_int 25) 8975 (const_int 27) 8976 (const_int 29) 8977 (const_int 31)]))) 8978 (sign_extend:V16HI 8979 (vec_select:V16QI (match_dup 2) 8980 (parallel [(const_int 1) 8981 (const_int 3) 8982 (const_int 5) 8983 (const_int 7) 8984 (const_int 9) 8985 (const_int 11) 8986 (const_int 13) 8987 (const_int 15) 8988 (const_int 17) 8989 (const_int 19) 8990 (const_int 21) 8991 (const_int 23) 8992 (const_int 25) 8993 (const_int 27) 8994 (const_int 29) 8995 (const_int 31)]))))))] 8996 "TARGET_AVX2" 8997 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}" 8998 [(set_attr "type" "sseiadd") 8999 (set_attr "prefix_extra" "1") 9000 (set_attr "prefix" "vex") 9001 (set_attr "mode" "OI")]) 9002 9003(define_insn "ssse3_pmaddubsw128" 9004 [(set (match_operand:V8HI 0 "register_operand" "=x,x") 9005 (ss_plus:V8HI 9006 (mult:V8HI 9007 (zero_extend:V8HI 9008 (vec_select:V8QI 9009 (match_operand:V16QI 1 "register_operand" "0,x") 9010 (parallel [(const_int 0) 9011 (const_int 2) 9012 (const_int 4) 9013 (const_int 6) 9014 (const_int 8) 9015 (const_int 10) 9016 (const_int 12) 9017 (const_int 14)]))) 9018 (sign_extend:V8HI 9019 (vec_select:V8QI 9020 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm") 9021 (parallel [(const_int 0) 9022 (const_int 2) 9023 (const_int 4) 9024 (const_int 6) 9025 (const_int 8) 9026 (const_int 10) 9027 (const_int 12) 9028 (const_int 14)])))) 9029 (mult:V8HI 9030 (zero_extend:V8HI 9031 (vec_select:V8QI (match_dup 1) 9032 (parallel [(const_int 1) 9033 (const_int 3) 9034 (const_int 5) 9035 (const_int 7) 9036 (const_int 9) 9037 (const_int 11) 9038 (const_int 13) 9039 (const_int 15)]))) 9040 (sign_extend:V8HI 9041 (vec_select:V8QI (match_dup 2) 9042 (parallel [(const_int 1) 9043 (const_int 3) 9044 (const_int 5) 9045 (const_int 7) 9046 (const_int 9) 9047 (const_int 11) 9048 (const_int 13) 9049 (const_int 15)]))))))] 9050 "TARGET_SSSE3" 9051 "@ 9052 pmaddubsw\t{%2, %0|%0, %2} 9053 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}" 9054 [(set_attr "isa" "noavx,avx") 9055 (set_attr "type" "sseiadd") 9056 (set_attr "atom_unit" "simul") 9057 (set_attr "prefix_data16" "1,*") 9058 (set_attr "prefix_extra" "1") 9059 (set_attr "prefix" "orig,vex") 9060 (set_attr "mode" "TI")]) 9061 9062(define_insn "ssse3_pmaddubsw" 9063 [(set (match_operand:V4HI 0 "register_operand" "=y") 9064 (ss_plus:V4HI 9065 (mult:V4HI 9066 (zero_extend:V4HI 9067 (vec_select:V4QI 9068 (match_operand:V8QI 1 "register_operand" "0") 9069 (parallel [(const_int 0) 9070 (const_int 2) 9071 (const_int 4) 9072 (const_int 6)]))) 9073 (sign_extend:V4HI 9074 (vec_select:V4QI 9075 (match_operand:V8QI 2 "nonimmediate_operand" "ym") 9076 (parallel [(const_int 0) 9077 (const_int 2) 9078 (const_int 4) 9079 (const_int 6)])))) 9080 (mult:V4HI 9081 (zero_extend:V4HI 9082 (vec_select:V4QI (match_dup 1) 9083 (parallel [(const_int 1) 9084 (const_int 3) 9085 (const_int 5) 9086 (const_int 7)]))) 9087 (sign_extend:V4HI 9088 (vec_select:V4QI (match_dup 2) 9089 (parallel [(const_int 1) 9090 (const_int 3) 9091 (const_int 5) 9092 (const_int 7)]))))))] 9093 "TARGET_SSSE3" 9094 "pmaddubsw\t{%2, %0|%0, %2}" 9095 [(set_attr "type" "sseiadd") 9096 (set_attr "atom_unit" "simul") 9097 (set_attr "prefix_extra" "1") 9098 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) 9099 (set_attr "mode" "DI")]) 9100 9101(define_expand "avx2_umulhrswv16hi3" 9102 [(set (match_operand:V16HI 0 "register_operand" "") 9103 (truncate:V16HI 9104 (lshiftrt:V16SI 9105 (plus:V16SI 9106 (lshiftrt:V16SI 9107 (mult:V16SI 9108 (sign_extend:V16SI 9109 (match_operand:V16HI 1 "nonimmediate_operand" "")) 9110 (sign_extend:V16SI 9111 (match_operand:V16HI 2 "nonimmediate_operand" ""))) 9112 (const_int 14)) 9113 (const_vector:V16HI [(const_int 1) (const_int 1) 9114 (const_int 1) (const_int 1) 9115 (const_int 1) (const_int 1) 9116 (const_int 1) (const_int 1) 9117 (const_int 1) (const_int 1) 9118 (const_int 1) (const_int 1) 9119 (const_int 1) (const_int 1) 9120 (const_int 1) (const_int 1)])) 9121 (const_int 1))))] 9122 "TARGET_AVX2" 9123 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);") 9124 9125(define_insn "*avx2_umulhrswv16hi3" 9126 [(set (match_operand:V16HI 0 "register_operand" "=x") 9127 (truncate:V16HI 9128 (lshiftrt:V16SI 9129 (plus:V16SI 9130 (lshiftrt:V16SI 9131 (mult:V16SI 9132 (sign_extend:V16SI 9133 (match_operand:V16HI 1 "nonimmediate_operand" "%x")) 9134 (sign_extend:V16SI 9135 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))) 9136 (const_int 14)) 9137 (const_vector:V16HI [(const_int 1) (const_int 1) 9138 (const_int 1) (const_int 1) 9139 (const_int 1) (const_int 1) 9140 (const_int 1) (const_int 1) 9141 (const_int 1) (const_int 1) 9142 (const_int 1) (const_int 1) 9143 (const_int 1) (const_int 1) 9144 (const_int 1) (const_int 1)])) 9145 (const_int 1))))] 9146 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)" 9147 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}" 9148 [(set_attr "type" "sseimul") 9149 (set_attr "prefix_extra" "1") 9150 (set_attr "prefix" "vex") 9151 (set_attr "mode" "OI")]) 9152 9153(define_expand "ssse3_pmulhrswv8hi3" 9154 [(set (match_operand:V8HI 0 "register_operand" "") 9155 (truncate:V8HI 9156 (lshiftrt:V8SI 9157 (plus:V8SI 9158 (lshiftrt:V8SI 9159 (mult:V8SI 9160 (sign_extend:V8SI 9161 (match_operand:V8HI 1 "nonimmediate_operand" "")) 9162 (sign_extend:V8SI 9163 (match_operand:V8HI 2 "nonimmediate_operand" ""))) 9164 (const_int 14)) 9165 (const_vector:V8HI [(const_int 1) (const_int 1) 9166 (const_int 1) (const_int 1) 9167 (const_int 1) (const_int 1) 9168 (const_int 1) (const_int 1)])) 9169 (const_int 1))))] 9170 "TARGET_SSSE3" 9171 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);") 9172 9173(define_insn "*ssse3_pmulhrswv8hi3" 9174 [(set (match_operand:V8HI 0 "register_operand" "=x,x") 9175 (truncate:V8HI 9176 (lshiftrt:V8SI 9177 (plus:V8SI 9178 (lshiftrt:V8SI 9179 (mult:V8SI 9180 (sign_extend:V8SI 9181 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")) 9182 (sign_extend:V8SI 9183 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))) 9184 (const_int 14)) 9185 (const_vector:V8HI [(const_int 1) (const_int 1) 9186 (const_int 1) (const_int 1) 9187 (const_int 1) (const_int 1) 9188 (const_int 1) (const_int 1)])) 9189 (const_int 1))))] 9190 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)" 9191 "@ 9192 pmulhrsw\t{%2, %0|%0, %2} 9193 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}" 9194 [(set_attr "isa" "noavx,avx") 9195 (set_attr "type" "sseimul") 9196 (set_attr "prefix_data16" "1,*") 9197 (set_attr "prefix_extra" "1") 9198 (set_attr "prefix" "orig,vex") 9199 (set_attr "mode" "TI")]) 9200 9201(define_expand "ssse3_pmulhrswv4hi3" 9202 [(set (match_operand:V4HI 0 "register_operand" "") 9203 (truncate:V4HI 9204 (lshiftrt:V4SI 9205 (plus:V4SI 9206 (lshiftrt:V4SI 9207 (mult:V4SI 9208 (sign_extend:V4SI 9209 (match_operand:V4HI 1 "nonimmediate_operand" "")) 9210 (sign_extend:V4SI 9211 (match_operand:V4HI 2 "nonimmediate_operand" ""))) 9212 (const_int 14)) 9213 (const_vector:V4HI [(const_int 1) (const_int 1) 9214 (const_int 1) (const_int 1)])) 9215 (const_int 1))))] 9216 "TARGET_SSSE3" 9217 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);") 9218 9219(define_insn "*ssse3_pmulhrswv4hi3" 9220 [(set (match_operand:V4HI 0 "register_operand" "=y") 9221 (truncate:V4HI 9222 (lshiftrt:V4SI 9223 (plus:V4SI 9224 (lshiftrt:V4SI 9225 (mult:V4SI 9226 (sign_extend:V4SI 9227 (match_operand:V4HI 1 "nonimmediate_operand" "%0")) 9228 (sign_extend:V4SI 9229 (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) 9230 (const_int 14)) 9231 (const_vector:V4HI [(const_int 1) (const_int 1) 9232 (const_int 1) (const_int 1)])) 9233 (const_int 1))))] 9234 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)" 9235 "pmulhrsw\t{%2, %0|%0, %2}" 9236 [(set_attr "type" "sseimul") 9237 (set_attr "prefix_extra" "1") 9238 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) 9239 (set_attr "mode" "DI")]) 9240 9241(define_insn "<ssse3_avx2>_pshufb<mode>3" 9242 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x") 9243 (unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x") 9244 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")] 9245 UNSPEC_PSHUFB))] 9246 "TARGET_SSSE3" 9247 "@ 9248 pshufb\t{%2, %0|%0, %2} 9249 vpshufb\t{%2, %1, %0|%0, %1, %2}" 9250 [(set_attr "isa" "noavx,avx") 9251 (set_attr "type" "sselog1") 9252 (set_attr "prefix_data16" "1,*") 9253 (set_attr "prefix_extra" "1") 9254 (set_attr "prefix" "orig,vex") 9255 (set_attr "mode" "<sseinsnmode>")]) 9256 9257(define_insn "ssse3_pshufbv8qi3" 9258 [(set (match_operand:V8QI 0 "register_operand" "=y") 9259 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0") 9260 (match_operand:V8QI 2 "nonimmediate_operand" "ym")] 9261 UNSPEC_PSHUFB))] 9262 "TARGET_SSSE3" 9263 "pshufb\t{%2, %0|%0, %2}"; 9264 [(set_attr "type" "sselog1") 9265 (set_attr "prefix_extra" "1") 9266 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) 9267 (set_attr "mode" "DI")]) 9268 9269(define_insn "<ssse3_avx2>_psign<mode>3" 9270 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x") 9271 (unspec:VI124_AVX2 9272 [(match_operand:VI124_AVX2 1 "register_operand" "0,x") 9273 (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")] 9274 UNSPEC_PSIGN))] 9275 "TARGET_SSSE3" 9276 "@ 9277 psign<ssemodesuffix>\t{%2, %0|%0, %2} 9278 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 9279 [(set_attr "isa" "noavx,avx") 9280 (set_attr "type" "sselog1") 9281 (set_attr "prefix_data16" "1,*") 9282 (set_attr "prefix_extra" "1") 9283 (set_attr "prefix" "orig,vex") 9284 (set_attr "mode" "<sseinsnmode>")]) 9285 9286(define_insn "ssse3_psign<mode>3" 9287 [(set (match_operand:MMXMODEI 0 "register_operand" "=y") 9288 (unspec:MMXMODEI 9289 [(match_operand:MMXMODEI 1 "register_operand" "0") 9290 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")] 9291 UNSPEC_PSIGN))] 9292 "TARGET_SSSE3" 9293 "psign<mmxvecsize>\t{%2, %0|%0, %2}"; 9294 [(set_attr "type" "sselog1") 9295 (set_attr "prefix_extra" "1") 9296 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) 9297 (set_attr "mode" "DI")]) 9298 9299(define_insn "<ssse3_avx2>_palignr<mode>" 9300 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x") 9301 (unspec:SSESCALARMODE [(match_operand:SSESCALARMODE 1 "register_operand" "0,x") 9302 (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm") 9303 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")] 9304 UNSPEC_PALIGNR))] 9305 "TARGET_SSSE3" 9306{ 9307 operands[3] = GEN_INT (INTVAL (operands[3]) / 8); 9308 9309 switch (which_alternative) 9310 { 9311 case 0: 9312 return "palignr\t{%3, %2, %0|%0, %2, %3}"; 9313 case 1: 9314 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}"; 9315 default: 9316 gcc_unreachable (); 9317 } 9318} 9319 [(set_attr "isa" "noavx,avx") 9320 (set_attr "type" "sseishft") 9321 (set_attr "atom_unit" "sishuf") 9322 (set_attr "prefix_data16" "1,*") 9323 (set_attr "prefix_extra" "1") 9324 (set_attr "length_immediate" "1") 9325 (set_attr "prefix" "orig,vex") 9326 (set_attr "mode" "<sseinsnmode>")]) 9327 9328(define_insn "ssse3_palignrdi" 9329 [(set (match_operand:DI 0 "register_operand" "=y") 9330 (unspec:DI [(match_operand:DI 1 "register_operand" "0") 9331 (match_operand:DI 2 "nonimmediate_operand" "ym") 9332 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")] 9333 UNSPEC_PALIGNR))] 9334 "TARGET_SSSE3" 9335{ 9336 operands[3] = GEN_INT (INTVAL (operands[3]) / 8); 9337 return "palignr\t{%3, %2, %0|%0, %2, %3}"; 9338} 9339 [(set_attr "type" "sseishft") 9340 (set_attr "atom_unit" "sishuf") 9341 (set_attr "prefix_extra" "1") 9342 (set_attr "length_immediate" "1") 9343 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) 9344 (set_attr "mode" "DI")]) 9345 9346(define_insn "abs<mode>2" 9347 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x") 9348 (abs:VI124_AVX2 9349 (match_operand:VI124_AVX2 1 "nonimmediate_operand" "xm")))] 9350 "TARGET_SSSE3" 9351 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}" 9352 [(set_attr "type" "sselog1") 9353 (set_attr "prefix_data16" "1") 9354 (set_attr "prefix_extra" "1") 9355 (set_attr "prefix" "maybe_vex") 9356 (set_attr "mode" "<sseinsnmode>")]) 9357 9358(define_insn "abs<mode>2" 9359 [(set (match_operand:MMXMODEI 0 "register_operand" "=y") 9360 (abs:MMXMODEI 9361 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))] 9362 "TARGET_SSSE3" 9363 "pabs<mmxvecsize>\t{%1, %0|%0, %1}"; 9364 [(set_attr "type" "sselog1") 9365 (set_attr "prefix_rep" "0") 9366 (set_attr "prefix_extra" "1") 9367 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) 9368 (set_attr "mode" "DI")]) 9369 9370;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 9371;; 9372;; AMD SSE4A instructions 9373;; 9374;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 9375 9376(define_insn "sse4a_movnt<mode>" 9377 [(set (match_operand:MODEF 0 "memory_operand" "=m") 9378 (unspec:MODEF 9379 [(match_operand:MODEF 1 "register_operand" "x")] 9380 UNSPEC_MOVNT))] 9381 "TARGET_SSE4A" 9382 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}" 9383 [(set_attr "type" "ssemov") 9384 (set_attr "mode" "<MODE>")]) 9385 9386(define_insn "sse4a_vmmovnt<mode>" 9387 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m") 9388 (unspec:<ssescalarmode> 9389 [(vec_select:<ssescalarmode> 9390 (match_operand:VF_128 1 "register_operand" "x") 9391 (parallel [(const_int 0)]))] 9392 UNSPEC_MOVNT))] 9393 "TARGET_SSE4A" 9394 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}" 9395 [(set_attr "type" "ssemov") 9396 (set_attr "mode" "<ssescalarmode>")]) 9397 9398(define_insn "sse4a_extrqi" 9399 [(set (match_operand:V2DI 0 "register_operand" "=x") 9400 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") 9401 (match_operand 2 "const_0_to_255_operand" "") 9402 (match_operand 3 "const_0_to_255_operand" "")] 9403 UNSPEC_EXTRQI))] 9404 "TARGET_SSE4A" 9405 "extrq\t{%3, %2, %0|%0, %2, %3}" 9406 [(set_attr "type" "sse") 9407 (set_attr "prefix_data16" "1") 9408 (set_attr "length_immediate" "2") 9409 (set_attr "mode" "TI")]) 9410 9411(define_insn "sse4a_extrq" 9412 [(set (match_operand:V2DI 0 "register_operand" "=x") 9413 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") 9414 (match_operand:V16QI 2 "register_operand" "x")] 9415 UNSPEC_EXTRQ))] 9416 "TARGET_SSE4A" 9417 "extrq\t{%2, %0|%0, %2}" 9418 [(set_attr "type" "sse") 9419 (set_attr "prefix_data16" "1") 9420 (set_attr "mode" "TI")]) 9421 9422(define_insn "sse4a_insertqi" 9423 [(set (match_operand:V2DI 0 "register_operand" "=x") 9424 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") 9425 (match_operand:V2DI 2 "register_operand" "x") 9426 (match_operand 3 "const_0_to_255_operand" "") 9427 (match_operand 4 "const_0_to_255_operand" "")] 9428 UNSPEC_INSERTQI))] 9429 "TARGET_SSE4A" 9430 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}" 9431 [(set_attr "type" "sseins") 9432 (set_attr "prefix_data16" "0") 9433 (set_attr "prefix_rep" "1") 9434 (set_attr "length_immediate" "2") 9435 (set_attr "mode" "TI")]) 9436 9437(define_insn "sse4a_insertq" 9438 [(set (match_operand:V2DI 0 "register_operand" "=x") 9439 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") 9440 (match_operand:V2DI 2 "register_operand" "x")] 9441 UNSPEC_INSERTQ))] 9442 "TARGET_SSE4A" 9443 "insertq\t{%2, %0|%0, %2}" 9444 [(set_attr "type" "sseins") 9445 (set_attr "prefix_data16" "0") 9446 (set_attr "prefix_rep" "1") 9447 (set_attr "mode" "TI")]) 9448 9449;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 9450;; 9451;; Intel SSE4.1 instructions 9452;; 9453;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 9454 9455(define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>" 9456 [(set (match_operand:VF 0 "register_operand" "=x,x") 9457 (vec_merge:VF 9458 (match_operand:VF 2 "nonimmediate_operand" "xm,xm") 9459 (match_operand:VF 1 "register_operand" "0,x") 9460 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "")))] 9461 "TARGET_SSE4_1" 9462 "@ 9463 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} 9464 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 9465 [(set_attr "isa" "noavx,avx") 9466 (set_attr "type" "ssemov") 9467 (set_attr "length_immediate" "1") 9468 (set_attr "prefix_data16" "1,*") 9469 (set_attr "prefix_extra" "1") 9470 (set_attr "prefix" "orig,vex") 9471 (set_attr "mode" "<MODE>")]) 9472 9473(define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>" 9474 [(set (match_operand:VF 0 "reg_not_xmm0_operand_maybe_avx" "=x,x") 9475 (unspec:VF 9476 [(match_operand:VF 1 "reg_not_xmm0_operand_maybe_avx" "0,x") 9477 (match_operand:VF 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm") 9478 (match_operand:VF 3 "register_operand" "Yz,x")] 9479 UNSPEC_BLENDV))] 9480 "TARGET_SSE4_1" 9481 "@ 9482 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} 9483 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 9484 [(set_attr "isa" "noavx,avx") 9485 (set_attr "type" "ssemov") 9486 (set_attr "length_immediate" "1") 9487 (set_attr "prefix_data16" "1,*") 9488 (set_attr "prefix_extra" "1") 9489 (set_attr "prefix" "orig,vex") 9490 (set_attr "mode" "<MODE>")]) 9491 9492(define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>" 9493 [(set (match_operand:VF 0 "register_operand" "=x,x") 9494 (unspec:VF 9495 [(match_operand:VF 1 "nonimmediate_operand" "%0,x") 9496 (match_operand:VF 2 "nonimmediate_operand" "xm,xm") 9497 (match_operand:SI 3 "const_0_to_255_operand" "n,n")] 9498 UNSPEC_DP))] 9499 "TARGET_SSE4_1" 9500 "@ 9501 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} 9502 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 9503 [(set_attr "isa" "noavx,avx") 9504 (set_attr "type" "ssemul") 9505 (set_attr "length_immediate" "1") 9506 (set_attr "prefix_data16" "1,*") 9507 (set_attr "prefix_extra" "1") 9508 (set_attr "prefix" "orig,vex") 9509 (set_attr "mode" "<MODE>")]) 9510 9511(define_insn "<sse4_1_avx2>_movntdqa" 9512 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x") 9513 (unspec:VI8_AVX2 [(match_operand:VI8_AVX2 1 "memory_operand" "m")] 9514 UNSPEC_MOVNTDQA))] 9515 "TARGET_SSE4_1" 9516 "%vmovntdqa\t{%1, %0|%0, %1}" 9517 [(set_attr "type" "ssemov") 9518 (set_attr "prefix_extra" "1") 9519 (set_attr "prefix" "maybe_vex") 9520 (set_attr "mode" "<sseinsnmode>")]) 9521 9522(define_insn "<sse4_1_avx2>_mpsadbw" 9523 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x") 9524 (unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x") 9525 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm") 9526 (match_operand:SI 3 "const_0_to_255_operand" "n,n")] 9527 UNSPEC_MPSADBW))] 9528 "TARGET_SSE4_1" 9529 "@ 9530 mpsadbw\t{%3, %2, %0|%0, %2, %3} 9531 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}" 9532 [(set_attr "isa" "noavx,avx") 9533 (set_attr "type" "sselog1") 9534 (set_attr "length_immediate" "1") 9535 (set_attr "prefix_extra" "1") 9536 (set_attr "prefix" "orig,vex") 9537 (set_attr "mode" "<sseinsnmode>")]) 9538 9539(define_insn "avx2_packusdw" 9540 [(set (match_operand:V16HI 0 "register_operand" "=x") 9541 (vec_concat:V16HI 9542 (us_truncate:V8HI 9543 (match_operand:V8SI 1 "register_operand" "x")) 9544 (us_truncate:V8HI 9545 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))))] 9546 "TARGET_AVX2" 9547 "vpackusdw\t{%2, %1, %0|%0, %1, %2}" 9548 [(set_attr "type" "sselog") 9549 (set_attr "prefix_extra" "1") 9550 (set_attr "prefix" "vex") 9551 (set_attr "mode" "OI")]) 9552 9553(define_insn "sse4_1_packusdw" 9554 [(set (match_operand:V8HI 0 "register_operand" "=x,x") 9555 (vec_concat:V8HI 9556 (us_truncate:V4HI 9557 (match_operand:V4SI 1 "register_operand" "0,x")) 9558 (us_truncate:V4HI 9559 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))] 9560 "TARGET_SSE4_1" 9561 "@ 9562 packusdw\t{%2, %0|%0, %2} 9563 vpackusdw\t{%2, %1, %0|%0, %1, %2}" 9564 [(set_attr "isa" "noavx,avx") 9565 (set_attr "type" "sselog") 9566 (set_attr "prefix_extra" "1") 9567 (set_attr "prefix" "orig,vex") 9568 (set_attr "mode" "TI")]) 9569 9570(define_insn "<sse4_1_avx2>_pblendvb" 9571 [(set (match_operand:VI1_AVX2 0 "reg_not_xmm0_operand_maybe_avx" "=x,x") 9572 (unspec:VI1_AVX2 9573 [(match_operand:VI1_AVX2 1 "reg_not_xmm0_operand_maybe_avx" "0,x") 9574 (match_operand:VI1_AVX2 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm") 9575 (match_operand:VI1_AVX2 3 "register_operand" "Yz,x")] 9576 UNSPEC_BLENDV))] 9577 "TARGET_SSE4_1" 9578 "@ 9579 pblendvb\t{%3, %2, %0|%0, %2, %3} 9580 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}" 9581 [(set_attr "isa" "noavx,avx") 9582 (set_attr "type" "ssemov") 9583 (set_attr "prefix_extra" "1") 9584 (set_attr "length_immediate" "*,1") 9585 (set_attr "prefix" "orig,vex") 9586 (set_attr "mode" "<sseinsnmode>")]) 9587 9588(define_insn "sse4_1_pblendw" 9589 [(set (match_operand:V8HI 0 "register_operand" "=x,x") 9590 (vec_merge:V8HI 9591 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm") 9592 (match_operand:V8HI 1 "register_operand" "0,x") 9593 (match_operand:SI 3 "const_0_to_255_operand" "n,n")))] 9594 "TARGET_SSE4_1" 9595 "@ 9596 pblendw\t{%3, %2, %0|%0, %2, %3} 9597 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}" 9598 [(set_attr "isa" "noavx,avx") 9599 (set_attr "type" "ssemov") 9600 (set_attr "prefix_extra" "1") 9601 (set_attr "length_immediate" "1") 9602 (set_attr "prefix" "orig,vex") 9603 (set_attr "mode" "TI")]) 9604 9605;; The builtin uses an 8-bit immediate. Expand that. 9606(define_expand "avx2_pblendw" 9607 [(set (match_operand:V16HI 0 "register_operand" "") 9608 (vec_merge:V16HI 9609 (match_operand:V16HI 2 "nonimmediate_operand" "") 9610 (match_operand:V16HI 1 "register_operand" "") 9611 (match_operand:SI 3 "const_0_to_255_operand" "")))] 9612 "TARGET_AVX2" 9613{ 9614 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff; 9615 operands[3] = GEN_INT (val << 8 | val); 9616}) 9617 9618(define_insn "*avx2_pblendw" 9619 [(set (match_operand:V16HI 0 "register_operand" "=x") 9620 (vec_merge:V16HI 9621 (match_operand:V16HI 2 "nonimmediate_operand" "xm") 9622 (match_operand:V16HI 1 "register_operand" "x") 9623 (match_operand:SI 3 "avx2_pblendw_operand" "n")))] 9624 "TARGET_AVX2" 9625{ 9626 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff); 9627 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"; 9628} 9629 [(set_attr "type" "ssemov") 9630 (set_attr "prefix_extra" "1") 9631 (set_attr "length_immediate" "1") 9632 (set_attr "prefix" "vex") 9633 (set_attr "mode" "OI")]) 9634 9635(define_insn "avx2_pblendd<mode>" 9636 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x") 9637 (vec_merge:VI4_AVX2 9638 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm") 9639 (match_operand:VI4_AVX2 1 "register_operand" "x") 9640 (match_operand:SI 3 "const_0_to_255_operand" "n")))] 9641 "TARGET_AVX2" 9642 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}" 9643 [(set_attr "type" "ssemov") 9644 (set_attr "prefix_extra" "1") 9645 (set_attr "length_immediate" "1") 9646 (set_attr "prefix" "vex") 9647 (set_attr "mode" "<sseinsnmode>")]) 9648 9649(define_insn "sse4_1_phminposuw" 9650 [(set (match_operand:V8HI 0 "register_operand" "=x") 9651 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")] 9652 UNSPEC_PHMINPOSUW))] 9653 "TARGET_SSE4_1" 9654 "%vphminposuw\t{%1, %0|%0, %1}" 9655 [(set_attr "type" "sselog1") 9656 (set_attr "prefix_extra" "1") 9657 (set_attr "prefix" "maybe_vex") 9658 (set_attr "mode" "TI")]) 9659 9660(define_insn "avx2_<code>v16qiv16hi2" 9661 [(set (match_operand:V16HI 0 "register_operand" "=x") 9662 (any_extend:V16HI 9663 (match_operand:V16QI 1 "nonimmediate_operand" "xm")))] 9664 "TARGET_AVX2" 9665 "vpmov<extsuffix>bw\t{%1, %0|%0, %1}" 9666 [(set_attr "type" "ssemov") 9667 (set_attr "prefix_extra" "1") 9668 (set_attr "prefix" "vex") 9669 (set_attr "mode" "OI")]) 9670 9671(define_insn "sse4_1_<code>v8qiv8hi2" 9672 [(set (match_operand:V8HI 0 "register_operand" "=x") 9673 (any_extend:V8HI 9674 (vec_select:V8QI 9675 (match_operand:V16QI 1 "nonimmediate_operand" "xm") 9676 (parallel [(const_int 0) 9677 (const_int 1) 9678 (const_int 2) 9679 (const_int 3) 9680 (const_int 4) 9681 (const_int 5) 9682 (const_int 6) 9683 (const_int 7)]))))] 9684 "TARGET_SSE4_1" 9685 "%vpmov<extsuffix>bw\t{%1, %0|%0, %q1}" 9686 [(set_attr "type" "ssemov") 9687 (set_attr "prefix_extra" "1") 9688 (set_attr "prefix" "maybe_vex") 9689 (set_attr "mode" "TI")]) 9690 9691(define_insn "avx2_<code>v8qiv8si2" 9692 [(set (match_operand:V8SI 0 "register_operand" "=x") 9693 (any_extend:V8SI 9694 (vec_select:V8QI 9695 (match_operand:V16QI 1 "nonimmediate_operand" "xm") 9696 (parallel [(const_int 0) 9697 (const_int 1) 9698 (const_int 2) 9699 (const_int 3) 9700 (const_int 4) 9701 (const_int 5) 9702 (const_int 6) 9703 (const_int 7)]))))] 9704 "TARGET_AVX2" 9705 "vpmov<extsuffix>bd\t{%1, %0|%0, %q1}" 9706 [(set_attr "type" "ssemov") 9707 (set_attr "prefix_extra" "1") 9708 (set_attr "prefix" "vex") 9709 (set_attr "mode" "OI")]) 9710 9711(define_insn "sse4_1_<code>v4qiv4si2" 9712 [(set (match_operand:V4SI 0 "register_operand" "=x") 9713 (any_extend:V4SI 9714 (vec_select:V4QI 9715 (match_operand:V16QI 1 "nonimmediate_operand" "xm") 9716 (parallel [(const_int 0) 9717 (const_int 1) 9718 (const_int 2) 9719 (const_int 3)]))))] 9720 "TARGET_SSE4_1" 9721 "%vpmov<extsuffix>bd\t{%1, %0|%0, %k1}" 9722 [(set_attr "type" "ssemov") 9723 (set_attr "prefix_extra" "1") 9724 (set_attr "prefix" "maybe_vex") 9725 (set_attr "mode" "TI")]) 9726 9727(define_insn "avx2_<code>v8hiv8si2" 9728 [(set (match_operand:V8SI 0 "register_operand" "=x") 9729 (any_extend:V8SI 9730 (match_operand:V8HI 1 "nonimmediate_operand" "xm")))] 9731 "TARGET_AVX2" 9732 "vpmov<extsuffix>wd\t{%1, %0|%0, %1}" 9733 [(set_attr "type" "ssemov") 9734 (set_attr "prefix_extra" "1") 9735 (set_attr "prefix" "vex") 9736 (set_attr "mode" "OI")]) 9737 9738(define_insn "sse4_1_<code>v4hiv4si2" 9739 [(set (match_operand:V4SI 0 "register_operand" "=x") 9740 (any_extend:V4SI 9741 (vec_select:V4HI 9742 (match_operand:V8HI 1 "nonimmediate_operand" "xm") 9743 (parallel [(const_int 0) 9744 (const_int 1) 9745 (const_int 2) 9746 (const_int 3)]))))] 9747 "TARGET_SSE4_1" 9748 "%vpmov<extsuffix>wd\t{%1, %0|%0, %q1}" 9749 [(set_attr "type" "ssemov") 9750 (set_attr "prefix_extra" "1") 9751 (set_attr "prefix" "maybe_vex") 9752 (set_attr "mode" "TI")]) 9753 9754(define_insn "avx2_<code>v4qiv4di2" 9755 [(set (match_operand:V4DI 0 "register_operand" "=x") 9756 (any_extend:V4DI 9757 (vec_select:V4QI 9758 (match_operand:V16QI 1 "nonimmediate_operand" "xm") 9759 (parallel [(const_int 0) 9760 (const_int 1) 9761 (const_int 2) 9762 (const_int 3)]))))] 9763 "TARGET_AVX2" 9764 "vpmov<extsuffix>bq\t{%1, %0|%0, %k1}" 9765 [(set_attr "type" "ssemov") 9766 (set_attr "prefix_extra" "1") 9767 (set_attr "prefix" "vex") 9768 (set_attr "mode" "OI")]) 9769 9770(define_insn "sse4_1_<code>v2qiv2di2" 9771 [(set (match_operand:V2DI 0 "register_operand" "=x") 9772 (any_extend:V2DI 9773 (vec_select:V2QI 9774 (match_operand:V16QI 1 "nonimmediate_operand" "xm") 9775 (parallel [(const_int 0) 9776 (const_int 1)]))))] 9777 "TARGET_SSE4_1" 9778 "%vpmov<extsuffix>bq\t{%1, %0|%0, %w1}" 9779 [(set_attr "type" "ssemov") 9780 (set_attr "prefix_extra" "1") 9781 (set_attr "prefix" "maybe_vex") 9782 (set_attr "mode" "TI")]) 9783 9784(define_insn "avx2_<code>v4hiv4di2" 9785 [(set (match_operand:V4DI 0 "register_operand" "=x") 9786 (any_extend:V4DI 9787 (vec_select:V4HI 9788 (match_operand:V8HI 1 "nonimmediate_operand" "xm") 9789 (parallel [(const_int 0) 9790 (const_int 1) 9791 (const_int 2) 9792 (const_int 3)]))))] 9793 "TARGET_AVX2" 9794 "vpmov<extsuffix>wq\t{%1, %0|%0, %q1}" 9795 [(set_attr "type" "ssemov") 9796 (set_attr "prefix_extra" "1") 9797 (set_attr "prefix" "vex") 9798 (set_attr "mode" "OI")]) 9799 9800(define_insn "sse4_1_<code>v2hiv2di2" 9801 [(set (match_operand:V2DI 0 "register_operand" "=x") 9802 (any_extend:V2DI 9803 (vec_select:V2HI 9804 (match_operand:V8HI 1 "nonimmediate_operand" "xm") 9805 (parallel [(const_int 0) 9806 (const_int 1)]))))] 9807 "TARGET_SSE4_1" 9808 "%vpmov<extsuffix>wq\t{%1, %0|%0, %k1}" 9809 [(set_attr "type" "ssemov") 9810 (set_attr "prefix_extra" "1") 9811 (set_attr "prefix" "maybe_vex") 9812 (set_attr "mode" "TI")]) 9813 9814(define_insn "avx2_<code>v4siv4di2" 9815 [(set (match_operand:V4DI 0 "register_operand" "=x") 9816 (any_extend:V4DI 9817 (match_operand:V4SI 1 "nonimmediate_operand" "xm")))] 9818 "TARGET_AVX2" 9819 "vpmov<extsuffix>dq\t{%1, %0|%0, %1}" 9820 [(set_attr "type" "ssemov") 9821 (set_attr "prefix_extra" "1") 9822 (set_attr "mode" "OI")]) 9823 9824(define_insn "sse4_1_<code>v2siv2di2" 9825 [(set (match_operand:V2DI 0 "register_operand" "=x") 9826 (any_extend:V2DI 9827 (vec_select:V2SI 9828 (match_operand:V4SI 1 "nonimmediate_operand" "xm") 9829 (parallel [(const_int 0) 9830 (const_int 1)]))))] 9831 "TARGET_SSE4_1" 9832 "%vpmov<extsuffix>dq\t{%1, %0|%0, %q1}" 9833 [(set_attr "type" "ssemov") 9834 (set_attr "prefix_extra" "1") 9835 (set_attr "prefix" "maybe_vex") 9836 (set_attr "mode" "TI")]) 9837 9838;; ptestps/ptestpd are very similar to comiss and ucomiss when 9839;; setting FLAGS_REG. But it is not a really compare instruction. 9840(define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>" 9841 [(set (reg:CC FLAGS_REG) 9842 (unspec:CC [(match_operand:VF 0 "register_operand" "x") 9843 (match_operand:VF 1 "nonimmediate_operand" "xm")] 9844 UNSPEC_VTESTP))] 9845 "TARGET_AVX" 9846 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}" 9847 [(set_attr "type" "ssecomi") 9848 (set_attr "prefix_extra" "1") 9849 (set_attr "prefix" "vex") 9850 (set_attr "mode" "<MODE>")]) 9851 9852;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG. 9853;; But it is not a really compare instruction. 9854(define_insn "avx_ptest256" 9855 [(set (reg:CC FLAGS_REG) 9856 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x") 9857 (match_operand:V4DI 1 "nonimmediate_operand" "xm")] 9858 UNSPEC_PTEST))] 9859 "TARGET_AVX" 9860 "vptest\t{%1, %0|%0, %1}" 9861 [(set_attr "type" "ssecomi") 9862 (set_attr "prefix_extra" "1") 9863 (set_attr "prefix" "vex") 9864 (set_attr "mode" "OI")]) 9865 9866(define_insn "sse4_1_ptest" 9867 [(set (reg:CC FLAGS_REG) 9868 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x") 9869 (match_operand:V2DI 1 "nonimmediate_operand" "xm")] 9870 UNSPEC_PTEST))] 9871 "TARGET_SSE4_1" 9872 "%vptest\t{%1, %0|%0, %1}" 9873 [(set_attr "type" "ssecomi") 9874 (set_attr "prefix_extra" "1") 9875 (set_attr "prefix" "maybe_vex") 9876 (set_attr "mode" "TI")]) 9877 9878(define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>" 9879 [(set (match_operand:VF 0 "register_operand" "=x") 9880 (unspec:VF 9881 [(match_operand:VF 1 "nonimmediate_operand" "xm") 9882 (match_operand:SI 2 "const_0_to_15_operand" "n")] 9883 UNSPEC_ROUND))] 9884 "TARGET_ROUND" 9885 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 9886 [(set_attr "type" "ssecvt") 9887 (set (attr "prefix_data16") 9888 (if_then_else 9889 (match_test "TARGET_AVX") 9890 (const_string "*") 9891 (const_string "1"))) 9892 (set_attr "prefix_extra" "1") 9893 (set_attr "length_immediate" "1") 9894 (set_attr "prefix" "maybe_vex") 9895 (set_attr "mode" "<MODE>")]) 9896 9897(define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>" 9898 [(match_operand:<sseintvecmode> 0 "register_operand" "") 9899 (match_operand:VF1 1 "nonimmediate_operand" "") 9900 (match_operand:SI 2 "const_0_to_15_operand" "")] 9901 "TARGET_ROUND" 9902{ 9903 rtx tmp = gen_reg_rtx (<MODE>mode); 9904 9905 emit_insn 9906 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1], 9907 operands[2])); 9908 emit_insn 9909 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp)); 9910 DONE; 9911}) 9912 9913(define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>" 9914 [(match_operand:<ssepackfltmode> 0 "register_operand" "") 9915 (match_operand:VF2 1 "nonimmediate_operand" "") 9916 (match_operand:VF2 2 "nonimmediate_operand" "") 9917 (match_operand:SI 3 "const_0_to_15_operand" "")] 9918 "TARGET_ROUND" 9919{ 9920 rtx tmp0, tmp1; 9921 9922 if (<MODE>mode == V2DFmode 9923 && TARGET_AVX && !TARGET_PREFER_AVX128) 9924 { 9925 rtx tmp2 = gen_reg_rtx (V4DFmode); 9926 9927 tmp0 = gen_reg_rtx (V4DFmode); 9928 tmp1 = force_reg (V2DFmode, operands[1]); 9929 9930 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2])); 9931 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3])); 9932 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2)); 9933 } 9934 else 9935 { 9936 tmp0 = gen_reg_rtx (<MODE>mode); 9937 tmp1 = gen_reg_rtx (<MODE>mode); 9938 9939 emit_insn 9940 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1], 9941 operands[3])); 9942 emit_insn 9943 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2], 9944 operands[3])); 9945 emit_insn 9946 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1)); 9947 } 9948 DONE; 9949}) 9950 9951(define_insn "sse4_1_round<ssescalarmodesuffix>" 9952 [(set (match_operand:VF_128 0 "register_operand" "=x,x") 9953 (vec_merge:VF_128 9954 (unspec:VF_128 9955 [(match_operand:VF_128 2 "register_operand" "x,x") 9956 (match_operand:SI 3 "const_0_to_15_operand" "n,n")] 9957 UNSPEC_ROUND) 9958 (match_operand:VF_128 1 "register_operand" "0,x") 9959 (const_int 1)))] 9960 "TARGET_ROUND" 9961 "@ 9962 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3} 9963 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 9964 [(set_attr "isa" "noavx,avx") 9965 (set_attr "type" "ssecvt") 9966 (set_attr "length_immediate" "1") 9967 (set_attr "prefix_data16" "1,*") 9968 (set_attr "prefix_extra" "1") 9969 (set_attr "prefix" "orig,vex") 9970 (set_attr "mode" "<MODE>")]) 9971 9972(define_expand "round<mode>2" 9973 [(set (match_dup 4) 9974 (plus:VF 9975 (match_operand:VF 1 "register_operand" "") 9976 (match_dup 3))) 9977 (set (match_operand:VF 0 "register_operand" "") 9978 (unspec:VF 9979 [(match_dup 4) (match_dup 5)] 9980 UNSPEC_ROUND))] 9981 "TARGET_ROUND && !flag_trapping_math" 9982{ 9983 enum machine_mode scalar_mode; 9984 const struct real_format *fmt; 9985 REAL_VALUE_TYPE pred_half, half_minus_pred_half; 9986 rtx half, vec_half; 9987 9988 scalar_mode = GET_MODE_INNER (<MODE>mode); 9989 9990 /* load nextafter (0.5, 0.0) */ 9991 fmt = REAL_MODE_FORMAT (scalar_mode); 9992 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode); 9993 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half); 9994 half = const_double_from_real_value (pred_half, scalar_mode); 9995 9996 vec_half = ix86_build_const_vector (<MODE>mode, true, half); 9997 vec_half = force_reg (<MODE>mode, vec_half); 9998 9999 operands[3] = gen_reg_rtx (<MODE>mode); 10000 emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1])); 10001 10002 operands[4] = gen_reg_rtx (<MODE>mode); 10003 operands[5] = GEN_INT (ROUND_TRUNC); 10004}) 10005 10006(define_expand "round<mode>2_sfix" 10007 [(match_operand:<sseintvecmode> 0 "register_operand" "") 10008 (match_operand:VF1 1 "register_operand" "")] 10009 "TARGET_ROUND && !flag_trapping_math" 10010{ 10011 rtx tmp = gen_reg_rtx (<MODE>mode); 10012 10013 emit_insn (gen_round<mode>2 (tmp, operands[1])); 10014 10015 emit_insn 10016 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp)); 10017 DONE; 10018}) 10019 10020(define_expand "round<mode>2_vec_pack_sfix" 10021 [(match_operand:<ssepackfltmode> 0 "register_operand" "") 10022 (match_operand:VF2 1 "register_operand" "") 10023 (match_operand:VF2 2 "register_operand" "")] 10024 "TARGET_ROUND && !flag_trapping_math" 10025{ 10026 rtx tmp0, tmp1; 10027 10028 if (<MODE>mode == V2DFmode 10029 && TARGET_AVX && !TARGET_PREFER_AVX128) 10030 { 10031 rtx tmp2 = gen_reg_rtx (V4DFmode); 10032 10033 tmp0 = gen_reg_rtx (V4DFmode); 10034 tmp1 = force_reg (V2DFmode, operands[1]); 10035 10036 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2])); 10037 emit_insn (gen_roundv4df2 (tmp2, tmp0)); 10038 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2)); 10039 } 10040 else 10041 { 10042 tmp0 = gen_reg_rtx (<MODE>mode); 10043 tmp1 = gen_reg_rtx (<MODE>mode); 10044 10045 emit_insn (gen_round<mode>2 (tmp0, operands[1])); 10046 emit_insn (gen_round<mode>2 (tmp1, operands[2])); 10047 10048 emit_insn 10049 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1)); 10050 } 10051 DONE; 10052}) 10053 10054;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 10055;; 10056;; Intel SSE4.2 string/text processing instructions 10057;; 10058;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 10059 10060(define_insn_and_split "sse4_2_pcmpestr" 10061 [(set (match_operand:SI 0 "register_operand" "=c,c") 10062 (unspec:SI 10063 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x") 10064 (match_operand:SI 3 "register_operand" "a,a") 10065 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m") 10066 (match_operand:SI 5 "register_operand" "d,d") 10067 (match_operand:SI 6 "const_0_to_255_operand" "n,n")] 10068 UNSPEC_PCMPESTR)) 10069 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz") 10070 (unspec:V16QI 10071 [(match_dup 2) 10072 (match_dup 3) 10073 (match_dup 4) 10074 (match_dup 5) 10075 (match_dup 6)] 10076 UNSPEC_PCMPESTR)) 10077 (set (reg:CC FLAGS_REG) 10078 (unspec:CC 10079 [(match_dup 2) 10080 (match_dup 3) 10081 (match_dup 4) 10082 (match_dup 5) 10083 (match_dup 6)] 10084 UNSPEC_PCMPESTR))] 10085 "TARGET_SSE4_2 10086 && can_create_pseudo_p ()" 10087 "#" 10088 "&& 1" 10089 [(const_int 0)] 10090{ 10091 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0])); 10092 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1])); 10093 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG); 10094 10095 if (ecx) 10096 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2], 10097 operands[3], operands[4], 10098 operands[5], operands[6])); 10099 if (xmm0) 10100 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2], 10101 operands[3], operands[4], 10102 operands[5], operands[6])); 10103 if (flags && !(ecx || xmm0)) 10104 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL, 10105 operands[2], operands[3], 10106 operands[4], operands[5], 10107 operands[6])); 10108 if (!(flags || ecx || xmm0)) 10109 emit_note (NOTE_INSN_DELETED); 10110 10111 DONE; 10112} 10113 [(set_attr "type" "sselog") 10114 (set_attr "prefix_data16" "1") 10115 (set_attr "prefix_extra" "1") 10116 (set_attr "length_immediate" "1") 10117 (set_attr "memory" "none,load") 10118 (set_attr "mode" "TI")]) 10119 10120(define_insn "sse4_2_pcmpestri" 10121 [(set (match_operand:SI 0 "register_operand" "=c,c") 10122 (unspec:SI 10123 [(match_operand:V16QI 1 "register_operand" "x,x") 10124 (match_operand:SI 2 "register_operand" "a,a") 10125 (match_operand:V16QI 3 "nonimmediate_operand" "x,m") 10126 (match_operand:SI 4 "register_operand" "d,d") 10127 (match_operand:SI 5 "const_0_to_255_operand" "n,n")] 10128 UNSPEC_PCMPESTR)) 10129 (set (reg:CC FLAGS_REG) 10130 (unspec:CC 10131 [(match_dup 1) 10132 (match_dup 2) 10133 (match_dup 3) 10134 (match_dup 4) 10135 (match_dup 5)] 10136 UNSPEC_PCMPESTR))] 10137 "TARGET_SSE4_2" 10138 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}" 10139 [(set_attr "type" "sselog") 10140 (set_attr "prefix_data16" "1") 10141 (set_attr "prefix_extra" "1") 10142 (set_attr "prefix" "maybe_vex") 10143 (set_attr "length_immediate" "1") 10144 (set_attr "memory" "none,load") 10145 (set_attr "mode" "TI")]) 10146 10147(define_insn "sse4_2_pcmpestrm" 10148 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz") 10149 (unspec:V16QI 10150 [(match_operand:V16QI 1 "register_operand" "x,x") 10151 (match_operand:SI 2 "register_operand" "a,a") 10152 (match_operand:V16QI 3 "nonimmediate_operand" "x,m") 10153 (match_operand:SI 4 "register_operand" "d,d") 10154 (match_operand:SI 5 "const_0_to_255_operand" "n,n")] 10155 UNSPEC_PCMPESTR)) 10156 (set (reg:CC FLAGS_REG) 10157 (unspec:CC 10158 [(match_dup 1) 10159 (match_dup 2) 10160 (match_dup 3) 10161 (match_dup 4) 10162 (match_dup 5)] 10163 UNSPEC_PCMPESTR))] 10164 "TARGET_SSE4_2" 10165 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}" 10166 [(set_attr "type" "sselog") 10167 (set_attr "prefix_data16" "1") 10168 (set_attr "prefix_extra" "1") 10169 (set_attr "length_immediate" "1") 10170 (set_attr "prefix" "maybe_vex") 10171 (set_attr "memory" "none,load") 10172 (set_attr "mode" "TI")]) 10173 10174(define_insn "sse4_2_pcmpestr_cconly" 10175 [(set (reg:CC FLAGS_REG) 10176 (unspec:CC 10177 [(match_operand:V16QI 2 "register_operand" "x,x,x,x") 10178 (match_operand:SI 3 "register_operand" "a,a,a,a") 10179 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m") 10180 (match_operand:SI 5 "register_operand" "d,d,d,d") 10181 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")] 10182 UNSPEC_PCMPESTR)) 10183 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X")) 10184 (clobber (match_scratch:SI 1 "= X, X,c,c"))] 10185 "TARGET_SSE4_2" 10186 "@ 10187 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6} 10188 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6} 10189 %vpcmpestri\t{%6, %4, %2|%2, %4, %6} 10190 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}" 10191 [(set_attr "type" "sselog") 10192 (set_attr "prefix_data16" "1") 10193 (set_attr "prefix_extra" "1") 10194 (set_attr "length_immediate" "1") 10195 (set_attr "memory" "none,load,none,load") 10196 (set_attr "prefix" "maybe_vex") 10197 (set_attr "mode" "TI")]) 10198 10199(define_insn_and_split "sse4_2_pcmpistr" 10200 [(set (match_operand:SI 0 "register_operand" "=c,c") 10201 (unspec:SI 10202 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x") 10203 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m") 10204 (match_operand:SI 4 "const_0_to_255_operand" "n,n")] 10205 UNSPEC_PCMPISTR)) 10206 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz") 10207 (unspec:V16QI 10208 [(match_dup 2) 10209 (match_dup 3) 10210 (match_dup 4)] 10211 UNSPEC_PCMPISTR)) 10212 (set (reg:CC FLAGS_REG) 10213 (unspec:CC 10214 [(match_dup 2) 10215 (match_dup 3) 10216 (match_dup 4)] 10217 UNSPEC_PCMPISTR))] 10218 "TARGET_SSE4_2 10219 && can_create_pseudo_p ()" 10220 "#" 10221 "&& 1" 10222 [(const_int 0)] 10223{ 10224 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0])); 10225 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1])); 10226 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG); 10227 10228 if (ecx) 10229 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2], 10230 operands[3], operands[4])); 10231 if (xmm0) 10232 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2], 10233 operands[3], operands[4])); 10234 if (flags && !(ecx || xmm0)) 10235 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL, 10236 operands[2], operands[3], 10237 operands[4])); 10238 if (!(flags || ecx || xmm0)) 10239 emit_note (NOTE_INSN_DELETED); 10240 10241 DONE; 10242} 10243 [(set_attr "type" "sselog") 10244 (set_attr "prefix_data16" "1") 10245 (set_attr "prefix_extra" "1") 10246 (set_attr "length_immediate" "1") 10247 (set_attr "memory" "none,load") 10248 (set_attr "mode" "TI")]) 10249 10250(define_insn "sse4_2_pcmpistri" 10251 [(set (match_operand:SI 0 "register_operand" "=c,c") 10252 (unspec:SI 10253 [(match_operand:V16QI 1 "register_operand" "x,x") 10254 (match_operand:V16QI 2 "nonimmediate_operand" "x,m") 10255 (match_operand:SI 3 "const_0_to_255_operand" "n,n")] 10256 UNSPEC_PCMPISTR)) 10257 (set (reg:CC FLAGS_REG) 10258 (unspec:CC 10259 [(match_dup 1) 10260 (match_dup 2) 10261 (match_dup 3)] 10262 UNSPEC_PCMPISTR))] 10263 "TARGET_SSE4_2" 10264 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}" 10265 [(set_attr "type" "sselog") 10266 (set_attr "prefix_data16" "1") 10267 (set_attr "prefix_extra" "1") 10268 (set_attr "length_immediate" "1") 10269 (set_attr "prefix" "maybe_vex") 10270 (set_attr "memory" "none,load") 10271 (set_attr "mode" "TI")]) 10272 10273(define_insn "sse4_2_pcmpistrm" 10274 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz") 10275 (unspec:V16QI 10276 [(match_operand:V16QI 1 "register_operand" "x,x") 10277 (match_operand:V16QI 2 "nonimmediate_operand" "x,m") 10278 (match_operand:SI 3 "const_0_to_255_operand" "n,n")] 10279 UNSPEC_PCMPISTR)) 10280 (set (reg:CC FLAGS_REG) 10281 (unspec:CC 10282 [(match_dup 1) 10283 (match_dup 2) 10284 (match_dup 3)] 10285 UNSPEC_PCMPISTR))] 10286 "TARGET_SSE4_2" 10287 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}" 10288 [(set_attr "type" "sselog") 10289 (set_attr "prefix_data16" "1") 10290 (set_attr "prefix_extra" "1") 10291 (set_attr "length_immediate" "1") 10292 (set_attr "prefix" "maybe_vex") 10293 (set_attr "memory" "none,load") 10294 (set_attr "mode" "TI")]) 10295 10296(define_insn "sse4_2_pcmpistr_cconly" 10297 [(set (reg:CC FLAGS_REG) 10298 (unspec:CC 10299 [(match_operand:V16QI 2 "register_operand" "x,x,x,x") 10300 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m") 10301 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")] 10302 UNSPEC_PCMPISTR)) 10303 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X")) 10304 (clobber (match_scratch:SI 1 "= X, X,c,c"))] 10305 "TARGET_SSE4_2" 10306 "@ 10307 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4} 10308 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4} 10309 %vpcmpistri\t{%4, %3, %2|%2, %3, %4} 10310 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}" 10311 [(set_attr "type" "sselog") 10312 (set_attr "prefix_data16" "1") 10313 (set_attr "prefix_extra" "1") 10314 (set_attr "length_immediate" "1") 10315 (set_attr "memory" "none,load,none,load") 10316 (set_attr "prefix" "maybe_vex") 10317 (set_attr "mode" "TI")]) 10318 10319;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 10320;; 10321;; XOP instructions 10322;; 10323;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 10324 10325;; XOP parallel integer multiply/add instructions. 10326;; Note the XOP multiply/add instructions 10327;; a[i] = b[i] * c[i] + d[i]; 10328;; do not allow the value being added to be a memory operation. 10329(define_insn "xop_pmacsww" 10330 [(set (match_operand:V8HI 0 "register_operand" "=x") 10331 (plus:V8HI 10332 (mult:V8HI 10333 (match_operand:V8HI 1 "nonimmediate_operand" "%x") 10334 (match_operand:V8HI 2 "nonimmediate_operand" "xm")) 10335 (match_operand:V8HI 3 "nonimmediate_operand" "x")))] 10336 "TARGET_XOP" 10337 "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}" 10338 [(set_attr "type" "ssemuladd") 10339 (set_attr "mode" "TI")]) 10340 10341(define_insn "xop_pmacssww" 10342 [(set (match_operand:V8HI 0 "register_operand" "=x") 10343 (ss_plus:V8HI 10344 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x") 10345 (match_operand:V8HI 2 "nonimmediate_operand" "xm")) 10346 (match_operand:V8HI 3 "nonimmediate_operand" "x")))] 10347 "TARGET_XOP" 10348 "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}" 10349 [(set_attr "type" "ssemuladd") 10350 (set_attr "mode" "TI")]) 10351 10352(define_insn "xop_pmacsdd" 10353 [(set (match_operand:V4SI 0 "register_operand" "=x") 10354 (plus:V4SI 10355 (mult:V4SI 10356 (match_operand:V4SI 1 "nonimmediate_operand" "%x") 10357 (match_operand:V4SI 2 "nonimmediate_operand" "xm")) 10358 (match_operand:V4SI 3 "nonimmediate_operand" "x")))] 10359 "TARGET_XOP" 10360 "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}" 10361 [(set_attr "type" "ssemuladd") 10362 (set_attr "mode" "TI")]) 10363 10364(define_insn "xop_pmacssdd" 10365 [(set (match_operand:V4SI 0 "register_operand" "=x") 10366 (ss_plus:V4SI 10367 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x") 10368 (match_operand:V4SI 2 "nonimmediate_operand" "xm")) 10369 (match_operand:V4SI 3 "nonimmediate_operand" "x")))] 10370 "TARGET_XOP" 10371 "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}" 10372 [(set_attr "type" "ssemuladd") 10373 (set_attr "mode" "TI")]) 10374 10375(define_insn "xop_pmacssdql" 10376 [(set (match_operand:V2DI 0 "register_operand" "=x") 10377 (ss_plus:V2DI 10378 (mult:V2DI 10379 (sign_extend:V2DI 10380 (vec_select:V2SI 10381 (match_operand:V4SI 1 "nonimmediate_operand" "%x") 10382 (parallel [(const_int 0) 10383 (const_int 2)]))) 10384 (vec_select:V2SI 10385 (match_operand:V4SI 2 "nonimmediate_operand" "xm") 10386 (parallel [(const_int 0) 10387 (const_int 2)]))) 10388 (match_operand:V2DI 3 "nonimmediate_operand" "x")))] 10389 "TARGET_XOP" 10390 "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}" 10391 [(set_attr "type" "ssemuladd") 10392 (set_attr "mode" "TI")]) 10393 10394(define_insn "xop_pmacssdqh" 10395 [(set (match_operand:V2DI 0 "register_operand" "=x") 10396 (ss_plus:V2DI 10397 (mult:V2DI 10398 (sign_extend:V2DI 10399 (vec_select:V2SI 10400 (match_operand:V4SI 1 "nonimmediate_operand" "%x") 10401 (parallel [(const_int 1) 10402 (const_int 3)]))) 10403 (sign_extend:V2DI 10404 (vec_select:V2SI 10405 (match_operand:V4SI 2 "nonimmediate_operand" "xm") 10406 (parallel [(const_int 1) 10407 (const_int 3)])))) 10408 (match_operand:V2DI 3 "nonimmediate_operand" "x")))] 10409 "TARGET_XOP" 10410 "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}" 10411 [(set_attr "type" "ssemuladd") 10412 (set_attr "mode" "TI")]) 10413 10414(define_insn "xop_pmacsdql" 10415 [(set (match_operand:V2DI 0 "register_operand" "=x") 10416 (plus:V2DI 10417 (mult:V2DI 10418 (sign_extend:V2DI 10419 (vec_select:V2SI 10420 (match_operand:V4SI 1 "nonimmediate_operand" "%x") 10421 (parallel [(const_int 0) 10422 (const_int 2)]))) 10423 (sign_extend:V2DI 10424 (vec_select:V2SI 10425 (match_operand:V4SI 2 "nonimmediate_operand" "xm") 10426 (parallel [(const_int 0) 10427 (const_int 2)])))) 10428 (match_operand:V2DI 3 "nonimmediate_operand" "x")))] 10429 "TARGET_XOP" 10430 "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}" 10431 [(set_attr "type" "ssemuladd") 10432 (set_attr "mode" "TI")]) 10433 10434(define_insn "xop_pmacsdqh" 10435 [(set (match_operand:V2DI 0 "register_operand" "=x") 10436 (plus:V2DI 10437 (mult:V2DI 10438 (sign_extend:V2DI 10439 (vec_select:V2SI 10440 (match_operand:V4SI 1 "nonimmediate_operand" "%x") 10441 (parallel [(const_int 1) 10442 (const_int 3)]))) 10443 (sign_extend:V2DI 10444 (vec_select:V2SI 10445 (match_operand:V4SI 2 "nonimmediate_operand" "xm") 10446 (parallel [(const_int 1) 10447 (const_int 3)])))) 10448 (match_operand:V2DI 3 "nonimmediate_operand" "x")))] 10449 "TARGET_XOP" 10450 "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}" 10451 [(set_attr "type" "ssemuladd") 10452 (set_attr "mode" "TI")]) 10453 10454;; XOP parallel integer multiply/add instructions for the intrinisics 10455(define_insn "xop_pmacsswd" 10456 [(set (match_operand:V4SI 0 "register_operand" "=x") 10457 (ss_plus:V4SI 10458 (mult:V4SI 10459 (sign_extend:V4SI 10460 (vec_select:V4HI 10461 (match_operand:V8HI 1 "nonimmediate_operand" "%x") 10462 (parallel [(const_int 1) 10463 (const_int 3) 10464 (const_int 5) 10465 (const_int 7)]))) 10466 (sign_extend:V4SI 10467 (vec_select:V4HI 10468 (match_operand:V8HI 2 "nonimmediate_operand" "xm") 10469 (parallel [(const_int 1) 10470 (const_int 3) 10471 (const_int 5) 10472 (const_int 7)])))) 10473 (match_operand:V4SI 3 "nonimmediate_operand" "x")))] 10474 "TARGET_XOP" 10475 "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}" 10476 [(set_attr "type" "ssemuladd") 10477 (set_attr "mode" "TI")]) 10478 10479(define_insn "xop_pmacswd" 10480 [(set (match_operand:V4SI 0 "register_operand" "=x") 10481 (plus:V4SI 10482 (mult:V4SI 10483 (sign_extend:V4SI 10484 (vec_select:V4HI 10485 (match_operand:V8HI 1 "nonimmediate_operand" "%x") 10486 (parallel [(const_int 1) 10487 (const_int 3) 10488 (const_int 5) 10489 (const_int 7)]))) 10490 (sign_extend:V4SI 10491 (vec_select:V4HI 10492 (match_operand:V8HI 2 "nonimmediate_operand" "xm") 10493 (parallel [(const_int 1) 10494 (const_int 3) 10495 (const_int 5) 10496 (const_int 7)])))) 10497 (match_operand:V4SI 3 "nonimmediate_operand" "x")))] 10498 "TARGET_XOP" 10499 "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}" 10500 [(set_attr "type" "ssemuladd") 10501 (set_attr "mode" "TI")]) 10502 10503(define_insn "xop_pmadcsswd" 10504 [(set (match_operand:V4SI 0 "register_operand" "=x") 10505 (ss_plus:V4SI 10506 (plus:V4SI 10507 (mult:V4SI 10508 (sign_extend:V4SI 10509 (vec_select:V4HI 10510 (match_operand:V8HI 1 "nonimmediate_operand" "%x") 10511 (parallel [(const_int 0) 10512 (const_int 2) 10513 (const_int 4) 10514 (const_int 6)]))) 10515 (sign_extend:V4SI 10516 (vec_select:V4HI 10517 (match_operand:V8HI 2 "nonimmediate_operand" "xm") 10518 (parallel [(const_int 0) 10519 (const_int 2) 10520 (const_int 4) 10521 (const_int 6)])))) 10522 (mult:V4SI 10523 (sign_extend:V4SI 10524 (vec_select:V4HI 10525 (match_dup 1) 10526 (parallel [(const_int 1) 10527 (const_int 3) 10528 (const_int 5) 10529 (const_int 7)]))) 10530 (sign_extend:V4SI 10531 (vec_select:V4HI 10532 (match_dup 2) 10533 (parallel [(const_int 1) 10534 (const_int 3) 10535 (const_int 5) 10536 (const_int 7)]))))) 10537 (match_operand:V4SI 3 "nonimmediate_operand" "x")))] 10538 "TARGET_XOP" 10539 "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}" 10540 [(set_attr "type" "ssemuladd") 10541 (set_attr "mode" "TI")]) 10542 10543(define_insn "xop_pmadcswd" 10544 [(set (match_operand:V4SI 0 "register_operand" "=x") 10545 (plus:V4SI 10546 (plus:V4SI 10547 (mult:V4SI 10548 (sign_extend:V4SI 10549 (vec_select:V4HI 10550 (match_operand:V8HI 1 "nonimmediate_operand" "%x") 10551 (parallel [(const_int 0) 10552 (const_int 2) 10553 (const_int 4) 10554 (const_int 6)]))) 10555 (sign_extend:V4SI 10556 (vec_select:V4HI 10557 (match_operand:V8HI 2 "nonimmediate_operand" "xm") 10558 (parallel [(const_int 0) 10559 (const_int 2) 10560 (const_int 4) 10561 (const_int 6)])))) 10562 (mult:V4SI 10563 (sign_extend:V4SI 10564 (vec_select:V4HI 10565 (match_dup 1) 10566 (parallel [(const_int 1) 10567 (const_int 3) 10568 (const_int 5) 10569 (const_int 7)]))) 10570 (sign_extend:V4SI 10571 (vec_select:V4HI 10572 (match_dup 2) 10573 (parallel [(const_int 1) 10574 (const_int 3) 10575 (const_int 5) 10576 (const_int 7)]))))) 10577 (match_operand:V4SI 3 "nonimmediate_operand" "x")))] 10578 "TARGET_XOP" 10579 "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}" 10580 [(set_attr "type" "ssemuladd") 10581 (set_attr "mode" "TI")]) 10582 10583;; XOP parallel XMM conditional moves 10584(define_insn "xop_pcmov_<mode><avxsizesuffix>" 10585 [(set (match_operand:V 0 "register_operand" "=x,x") 10586 (if_then_else:V 10587 (match_operand:V 3 "nonimmediate_operand" "x,m") 10588 (match_operand:V 1 "register_operand" "x,x") 10589 (match_operand:V 2 "nonimmediate_operand" "xm,x")))] 10590 "TARGET_XOP" 10591 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}" 10592 [(set_attr "type" "sse4arg")]) 10593 10594;; XOP horizontal add/subtract instructions 10595(define_insn "xop_phaddbw" 10596 [(set (match_operand:V8HI 0 "register_operand" "=x") 10597 (plus:V8HI 10598 (sign_extend:V8HI 10599 (vec_select:V8QI 10600 (match_operand:V16QI 1 "nonimmediate_operand" "xm") 10601 (parallel [(const_int 0) 10602 (const_int 2) 10603 (const_int 4) 10604 (const_int 6) 10605 (const_int 8) 10606 (const_int 10) 10607 (const_int 12) 10608 (const_int 14)]))) 10609 (sign_extend:V8HI 10610 (vec_select:V8QI 10611 (match_dup 1) 10612 (parallel [(const_int 1) 10613 (const_int 3) 10614 (const_int 5) 10615 (const_int 7) 10616 (const_int 9) 10617 (const_int 11) 10618 (const_int 13) 10619 (const_int 15)])))))] 10620 "TARGET_XOP" 10621 "vphaddbw\t{%1, %0|%0, %1}" 10622 [(set_attr "type" "sseiadd1")]) 10623 10624(define_insn "xop_phaddbd" 10625 [(set (match_operand:V4SI 0 "register_operand" "=x") 10626 (plus:V4SI 10627 (plus:V4SI 10628 (sign_extend:V4SI 10629 (vec_select:V4QI 10630 (match_operand:V16QI 1 "nonimmediate_operand" "xm") 10631 (parallel [(const_int 0) 10632 (const_int 4) 10633 (const_int 8) 10634 (const_int 12)]))) 10635 (sign_extend:V4SI 10636 (vec_select:V4QI 10637 (match_dup 1) 10638 (parallel [(const_int 1) 10639 (const_int 5) 10640 (const_int 9) 10641 (const_int 13)])))) 10642 (plus:V4SI 10643 (sign_extend:V4SI 10644 (vec_select:V4QI 10645 (match_dup 1) 10646 (parallel [(const_int 2) 10647 (const_int 6) 10648 (const_int 10) 10649 (const_int 14)]))) 10650 (sign_extend:V4SI 10651 (vec_select:V4QI 10652 (match_dup 1) 10653 (parallel [(const_int 3) 10654 (const_int 7) 10655 (const_int 11) 10656 (const_int 15)]))))))] 10657 "TARGET_XOP" 10658 "vphaddbd\t{%1, %0|%0, %1}" 10659 [(set_attr "type" "sseiadd1")]) 10660 10661(define_insn "xop_phaddbq" 10662 [(set (match_operand:V2DI 0 "register_operand" "=x") 10663 (plus:V2DI 10664 (plus:V2DI 10665 (plus:V2DI 10666 (sign_extend:V2DI 10667 (vec_select:V2QI 10668 (match_operand:V16QI 1 "nonimmediate_operand" "xm") 10669 (parallel [(const_int 0) 10670 (const_int 8)]))) 10671 (sign_extend:V2DI 10672 (vec_select:V2QI 10673 (match_dup 1) 10674 (parallel [(const_int 1) 10675 (const_int 9)])))) 10676 (plus:V2DI 10677 (sign_extend:V2DI 10678 (vec_select:V2QI 10679 (match_dup 1) 10680 (parallel [(const_int 2) 10681 (const_int 10)]))) 10682 (sign_extend:V2DI 10683 (vec_select:V2QI 10684 (match_dup 1) 10685 (parallel [(const_int 3) 10686 (const_int 11)]))))) 10687 (plus:V2DI 10688 (plus:V2DI 10689 (sign_extend:V2DI 10690 (vec_select:V2QI 10691 (match_dup 1) 10692 (parallel [(const_int 4) 10693 (const_int 12)]))) 10694 (sign_extend:V2DI 10695 (vec_select:V2QI 10696 (match_dup 1) 10697 (parallel [(const_int 5) 10698 (const_int 13)])))) 10699 (plus:V2DI 10700 (sign_extend:V2DI 10701 (vec_select:V2QI 10702 (match_dup 1) 10703 (parallel [(const_int 6) 10704 (const_int 14)]))) 10705 (sign_extend:V2DI 10706 (vec_select:V2QI 10707 (match_dup 1) 10708 (parallel [(const_int 7) 10709 (const_int 15)])))))))] 10710 "TARGET_XOP" 10711 "vphaddbq\t{%1, %0|%0, %1}" 10712 [(set_attr "type" "sseiadd1")]) 10713 10714(define_insn "xop_phaddwd" 10715 [(set (match_operand:V4SI 0 "register_operand" "=x") 10716 (plus:V4SI 10717 (sign_extend:V4SI 10718 (vec_select:V4HI 10719 (match_operand:V8HI 1 "nonimmediate_operand" "xm") 10720 (parallel [(const_int 0) 10721 (const_int 2) 10722 (const_int 4) 10723 (const_int 6)]))) 10724 (sign_extend:V4SI 10725 (vec_select:V4HI 10726 (match_dup 1) 10727 (parallel [(const_int 1) 10728 (const_int 3) 10729 (const_int 5) 10730 (const_int 7)])))))] 10731 "TARGET_XOP" 10732 "vphaddwd\t{%1, %0|%0, %1}" 10733 [(set_attr "type" "sseiadd1")]) 10734 10735(define_insn "xop_phaddwq" 10736 [(set (match_operand:V2DI 0 "register_operand" "=x") 10737 (plus:V2DI 10738 (plus:V2DI 10739 (sign_extend:V2DI 10740 (vec_select:V2HI 10741 (match_operand:V8HI 1 "nonimmediate_operand" "xm") 10742 (parallel [(const_int 0) 10743 (const_int 4)]))) 10744 (sign_extend:V2DI 10745 (vec_select:V2HI 10746 (match_dup 1) 10747 (parallel [(const_int 1) 10748 (const_int 5)])))) 10749 (plus:V2DI 10750 (sign_extend:V2DI 10751 (vec_select:V2HI 10752 (match_dup 1) 10753 (parallel [(const_int 2) 10754 (const_int 6)]))) 10755 (sign_extend:V2DI 10756 (vec_select:V2HI 10757 (match_dup 1) 10758 (parallel [(const_int 3) 10759 (const_int 7)]))))))] 10760 "TARGET_XOP" 10761 "vphaddwq\t{%1, %0|%0, %1}" 10762 [(set_attr "type" "sseiadd1")]) 10763 10764(define_insn "xop_phadddq" 10765 [(set (match_operand:V2DI 0 "register_operand" "=x") 10766 (plus:V2DI 10767 (sign_extend:V2DI 10768 (vec_select:V2SI 10769 (match_operand:V4SI 1 "nonimmediate_operand" "xm") 10770 (parallel [(const_int 0) 10771 (const_int 2)]))) 10772 (sign_extend:V2DI 10773 (vec_select:V2SI 10774 (match_dup 1) 10775 (parallel [(const_int 1) 10776 (const_int 3)])))))] 10777 "TARGET_XOP" 10778 "vphadddq\t{%1, %0|%0, %1}" 10779 [(set_attr "type" "sseiadd1")]) 10780 10781(define_insn "xop_phaddubw" 10782 [(set (match_operand:V8HI 0 "register_operand" "=x") 10783 (plus:V8HI 10784 (zero_extend:V8HI 10785 (vec_select:V8QI 10786 (match_operand:V16QI 1 "nonimmediate_operand" "xm") 10787 (parallel [(const_int 0) 10788 (const_int 2) 10789 (const_int 4) 10790 (const_int 6) 10791 (const_int 8) 10792 (const_int 10) 10793 (const_int 12) 10794 (const_int 14)]))) 10795 (zero_extend:V8HI 10796 (vec_select:V8QI 10797 (match_dup 1) 10798 (parallel [(const_int 1) 10799 (const_int 3) 10800 (const_int 5) 10801 (const_int 7) 10802 (const_int 9) 10803 (const_int 11) 10804 (const_int 13) 10805 (const_int 15)])))))] 10806 "TARGET_XOP" 10807 "vphaddubw\t{%1, %0|%0, %1}" 10808 [(set_attr "type" "sseiadd1")]) 10809 10810(define_insn "xop_phaddubd" 10811 [(set (match_operand:V4SI 0 "register_operand" "=x") 10812 (plus:V4SI 10813 (plus:V4SI 10814 (zero_extend:V4SI 10815 (vec_select:V4QI 10816 (match_operand:V16QI 1 "nonimmediate_operand" "xm") 10817 (parallel [(const_int 0) 10818 (const_int 4) 10819 (const_int 8) 10820 (const_int 12)]))) 10821 (zero_extend:V4SI 10822 (vec_select:V4QI 10823 (match_dup 1) 10824 (parallel [(const_int 1) 10825 (const_int 5) 10826 (const_int 9) 10827 (const_int 13)])))) 10828 (plus:V4SI 10829 (zero_extend:V4SI 10830 (vec_select:V4QI 10831 (match_dup 1) 10832 (parallel [(const_int 2) 10833 (const_int 6) 10834 (const_int 10) 10835 (const_int 14)]))) 10836 (zero_extend:V4SI 10837 (vec_select:V4QI 10838 (match_dup 1) 10839 (parallel [(const_int 3) 10840 (const_int 7) 10841 (const_int 11) 10842 (const_int 15)]))))))] 10843 "TARGET_XOP" 10844 "vphaddubd\t{%1, %0|%0, %1}" 10845 [(set_attr "type" "sseiadd1")]) 10846 10847(define_insn "xop_phaddubq" 10848 [(set (match_operand:V2DI 0 "register_operand" "=x") 10849 (plus:V2DI 10850 (plus:V2DI 10851 (plus:V2DI 10852 (zero_extend:V2DI 10853 (vec_select:V2QI 10854 (match_operand:V16QI 1 "nonimmediate_operand" "xm") 10855 (parallel [(const_int 0) 10856 (const_int 8)]))) 10857 (sign_extend:V2DI 10858 (vec_select:V2QI 10859 (match_dup 1) 10860 (parallel [(const_int 1) 10861 (const_int 9)])))) 10862 (plus:V2DI 10863 (zero_extend:V2DI 10864 (vec_select:V2QI 10865 (match_dup 1) 10866 (parallel [(const_int 2) 10867 (const_int 10)]))) 10868 (zero_extend:V2DI 10869 (vec_select:V2QI 10870 (match_dup 1) 10871 (parallel [(const_int 3) 10872 (const_int 11)]))))) 10873 (plus:V2DI 10874 (plus:V2DI 10875 (zero_extend:V2DI 10876 (vec_select:V2QI 10877 (match_dup 1) 10878 (parallel [(const_int 4) 10879 (const_int 12)]))) 10880 (sign_extend:V2DI 10881 (vec_select:V2QI 10882 (match_dup 1) 10883 (parallel [(const_int 5) 10884 (const_int 13)])))) 10885 (plus:V2DI 10886 (zero_extend:V2DI 10887 (vec_select:V2QI 10888 (match_dup 1) 10889 (parallel [(const_int 6) 10890 (const_int 14)]))) 10891 (zero_extend:V2DI 10892 (vec_select:V2QI 10893 (match_dup 1) 10894 (parallel [(const_int 7) 10895 (const_int 15)])))))))] 10896 "TARGET_XOP" 10897 "vphaddubq\t{%1, %0|%0, %1}" 10898 [(set_attr "type" "sseiadd1")]) 10899 10900(define_insn "xop_phadduwd" 10901 [(set (match_operand:V4SI 0 "register_operand" "=x") 10902 (plus:V4SI 10903 (zero_extend:V4SI 10904 (vec_select:V4HI 10905 (match_operand:V8HI 1 "nonimmediate_operand" "xm") 10906 (parallel [(const_int 0) 10907 (const_int 2) 10908 (const_int 4) 10909 (const_int 6)]))) 10910 (zero_extend:V4SI 10911 (vec_select:V4HI 10912 (match_dup 1) 10913 (parallel [(const_int 1) 10914 (const_int 3) 10915 (const_int 5) 10916 (const_int 7)])))))] 10917 "TARGET_XOP" 10918 "vphadduwd\t{%1, %0|%0, %1}" 10919 [(set_attr "type" "sseiadd1")]) 10920 10921(define_insn "xop_phadduwq" 10922 [(set (match_operand:V2DI 0 "register_operand" "=x") 10923 (plus:V2DI 10924 (plus:V2DI 10925 (zero_extend:V2DI 10926 (vec_select:V2HI 10927 (match_operand:V8HI 1 "nonimmediate_operand" "xm") 10928 (parallel [(const_int 0) 10929 (const_int 4)]))) 10930 (zero_extend:V2DI 10931 (vec_select:V2HI 10932 (match_dup 1) 10933 (parallel [(const_int 1) 10934 (const_int 5)])))) 10935 (plus:V2DI 10936 (zero_extend:V2DI 10937 (vec_select:V2HI 10938 (match_dup 1) 10939 (parallel [(const_int 2) 10940 (const_int 6)]))) 10941 (zero_extend:V2DI 10942 (vec_select:V2HI 10943 (match_dup 1) 10944 (parallel [(const_int 3) 10945 (const_int 7)]))))))] 10946 "TARGET_XOP" 10947 "vphadduwq\t{%1, %0|%0, %1}" 10948 [(set_attr "type" "sseiadd1")]) 10949 10950(define_insn "xop_phaddudq" 10951 [(set (match_operand:V2DI 0 "register_operand" "=x") 10952 (plus:V2DI 10953 (zero_extend:V2DI 10954 (vec_select:V2SI 10955 (match_operand:V4SI 1 "nonimmediate_operand" "xm") 10956 (parallel [(const_int 0) 10957 (const_int 2)]))) 10958 (zero_extend:V2DI 10959 (vec_select:V2SI 10960 (match_dup 1) 10961 (parallel [(const_int 1) 10962 (const_int 3)])))))] 10963 "TARGET_XOP" 10964 "vphaddudq\t{%1, %0|%0, %1}" 10965 [(set_attr "type" "sseiadd1")]) 10966 10967(define_insn "xop_phsubbw" 10968 [(set (match_operand:V8HI 0 "register_operand" "=x") 10969 (minus:V8HI 10970 (sign_extend:V8HI 10971 (vec_select:V8QI 10972 (match_operand:V16QI 1 "nonimmediate_operand" "xm") 10973 (parallel [(const_int 0) 10974 (const_int 2) 10975 (const_int 4) 10976 (const_int 6) 10977 (const_int 8) 10978 (const_int 10) 10979 (const_int 12) 10980 (const_int 14)]))) 10981 (sign_extend:V8HI 10982 (vec_select:V8QI 10983 (match_dup 1) 10984 (parallel [(const_int 1) 10985 (const_int 3) 10986 (const_int 5) 10987 (const_int 7) 10988 (const_int 9) 10989 (const_int 11) 10990 (const_int 13) 10991 (const_int 15)])))))] 10992 "TARGET_XOP" 10993 "vphsubbw\t{%1, %0|%0, %1}" 10994 [(set_attr "type" "sseiadd1")]) 10995 10996(define_insn "xop_phsubwd" 10997 [(set (match_operand:V4SI 0 "register_operand" "=x") 10998 (minus:V4SI 10999 (sign_extend:V4SI 11000 (vec_select:V4HI 11001 (match_operand:V8HI 1 "nonimmediate_operand" "xm") 11002 (parallel [(const_int 0) 11003 (const_int 2) 11004 (const_int 4) 11005 (const_int 6)]))) 11006 (sign_extend:V4SI 11007 (vec_select:V4HI 11008 (match_dup 1) 11009 (parallel [(const_int 1) 11010 (const_int 3) 11011 (const_int 5) 11012 (const_int 7)])))))] 11013 "TARGET_XOP" 11014 "vphsubwd\t{%1, %0|%0, %1}" 11015 [(set_attr "type" "sseiadd1")]) 11016 11017(define_insn "xop_phsubdq" 11018 [(set (match_operand:V2DI 0 "register_operand" "=x") 11019 (minus:V2DI 11020 (sign_extend:V2DI 11021 (vec_select:V2SI 11022 (match_operand:V4SI 1 "nonimmediate_operand" "xm") 11023 (parallel [(const_int 0) 11024 (const_int 2)]))) 11025 (sign_extend:V2DI 11026 (vec_select:V2SI 11027 (match_dup 1) 11028 (parallel [(const_int 1) 11029 (const_int 3)])))))] 11030 "TARGET_XOP" 11031 "vphsubdq\t{%1, %0|%0, %1}" 11032 [(set_attr "type" "sseiadd1")]) 11033 11034;; XOP permute instructions 11035(define_insn "xop_pperm" 11036 [(set (match_operand:V16QI 0 "register_operand" "=x,x") 11037 (unspec:V16QI 11038 [(match_operand:V16QI 1 "register_operand" "x,x") 11039 (match_operand:V16QI 2 "nonimmediate_operand" "x,m") 11040 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")] 11041 UNSPEC_XOP_PERMUTE))] 11042 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))" 11043 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" 11044 [(set_attr "type" "sse4arg") 11045 (set_attr "mode" "TI")]) 11046 11047;; XOP pack instructions that combine two vectors into a smaller vector 11048(define_insn "xop_pperm_pack_v2di_v4si" 11049 [(set (match_operand:V4SI 0 "register_operand" "=x,x") 11050 (vec_concat:V4SI 11051 (truncate:V2SI 11052 (match_operand:V2DI 1 "register_operand" "x,x")) 11053 (truncate:V2SI 11054 (match_operand:V2DI 2 "nonimmediate_operand" "x,m")))) 11055 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))] 11056 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))" 11057 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" 11058 [(set_attr "type" "sse4arg") 11059 (set_attr "mode" "TI")]) 11060 11061(define_insn "xop_pperm_pack_v4si_v8hi" 11062 [(set (match_operand:V8HI 0 "register_operand" "=x,x") 11063 (vec_concat:V8HI 11064 (truncate:V4HI 11065 (match_operand:V4SI 1 "register_operand" "x,x")) 11066 (truncate:V4HI 11067 (match_operand:V4SI 2 "nonimmediate_operand" "x,m")))) 11068 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))] 11069 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))" 11070 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" 11071 [(set_attr "type" "sse4arg") 11072 (set_attr "mode" "TI")]) 11073 11074(define_insn "xop_pperm_pack_v8hi_v16qi" 11075 [(set (match_operand:V16QI 0 "register_operand" "=x,x") 11076 (vec_concat:V16QI 11077 (truncate:V8QI 11078 (match_operand:V8HI 1 "register_operand" "x,x")) 11079 (truncate:V8QI 11080 (match_operand:V8HI 2 "nonimmediate_operand" "x,m")))) 11081 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))] 11082 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))" 11083 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" 11084 [(set_attr "type" "sse4arg") 11085 (set_attr "mode" "TI")]) 11086 11087;; XOP packed rotate instructions 11088(define_expand "rotl<mode>3" 11089 [(set (match_operand:VI_128 0 "register_operand" "") 11090 (rotate:VI_128 11091 (match_operand:VI_128 1 "nonimmediate_operand" "") 11092 (match_operand:SI 2 "general_operand")))] 11093 "TARGET_XOP" 11094{ 11095 /* If we were given a scalar, convert it to parallel */ 11096 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode)) 11097 { 11098 rtvec vs = rtvec_alloc (<ssescalarnum>); 11099 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs); 11100 rtx reg = gen_reg_rtx (<MODE>mode); 11101 rtx op2 = operands[2]; 11102 int i; 11103 11104 if (GET_MODE (op2) != <ssescalarmode>mode) 11105 { 11106 op2 = gen_reg_rtx (<ssescalarmode>mode); 11107 convert_move (op2, operands[2], false); 11108 } 11109 11110 for (i = 0; i < <ssescalarnum>; i++) 11111 RTVEC_ELT (vs, i) = op2; 11112 11113 emit_insn (gen_vec_init<mode> (reg, par)); 11114 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg)); 11115 DONE; 11116 } 11117}) 11118 11119(define_expand "rotr<mode>3" 11120 [(set (match_operand:VI_128 0 "register_operand" "") 11121 (rotatert:VI_128 11122 (match_operand:VI_128 1 "nonimmediate_operand" "") 11123 (match_operand:SI 2 "general_operand")))] 11124 "TARGET_XOP" 11125{ 11126 /* If we were given a scalar, convert it to parallel */ 11127 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode)) 11128 { 11129 rtvec vs = rtvec_alloc (<ssescalarnum>); 11130 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs); 11131 rtx neg = gen_reg_rtx (<MODE>mode); 11132 rtx reg = gen_reg_rtx (<MODE>mode); 11133 rtx op2 = operands[2]; 11134 int i; 11135 11136 if (GET_MODE (op2) != <ssescalarmode>mode) 11137 { 11138 op2 = gen_reg_rtx (<ssescalarmode>mode); 11139 convert_move (op2, operands[2], false); 11140 } 11141 11142 for (i = 0; i < <ssescalarnum>; i++) 11143 RTVEC_ELT (vs, i) = op2; 11144 11145 emit_insn (gen_vec_init<mode> (reg, par)); 11146 emit_insn (gen_neg<mode>2 (neg, reg)); 11147 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg)); 11148 DONE; 11149 } 11150}) 11151 11152(define_insn "xop_rotl<mode>3" 11153 [(set (match_operand:VI_128 0 "register_operand" "=x") 11154 (rotate:VI_128 11155 (match_operand:VI_128 1 "nonimmediate_operand" "xm") 11156 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))] 11157 "TARGET_XOP" 11158 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 11159 [(set_attr "type" "sseishft") 11160 (set_attr "length_immediate" "1") 11161 (set_attr "mode" "TI")]) 11162 11163(define_insn "xop_rotr<mode>3" 11164 [(set (match_operand:VI_128 0 "register_operand" "=x") 11165 (rotatert:VI_128 11166 (match_operand:VI_128 1 "nonimmediate_operand" "xm") 11167 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))] 11168 "TARGET_XOP" 11169{ 11170 operands[3] 11171 = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2])); 11172 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\"; 11173} 11174 [(set_attr "type" "sseishft") 11175 (set_attr "length_immediate" "1") 11176 (set_attr "mode" "TI")]) 11177 11178(define_expand "vrotr<mode>3" 11179 [(match_operand:VI_128 0 "register_operand" "") 11180 (match_operand:VI_128 1 "register_operand" "") 11181 (match_operand:VI_128 2 "register_operand" "")] 11182 "TARGET_XOP" 11183{ 11184 rtx reg = gen_reg_rtx (<MODE>mode); 11185 emit_insn (gen_neg<mode>2 (reg, operands[2])); 11186 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg)); 11187 DONE; 11188}) 11189 11190(define_expand "vrotl<mode>3" 11191 [(match_operand:VI_128 0 "register_operand" "") 11192 (match_operand:VI_128 1 "register_operand" "") 11193 (match_operand:VI_128 2 "register_operand" "")] 11194 "TARGET_XOP" 11195{ 11196 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2])); 11197 DONE; 11198}) 11199 11200(define_insn "xop_vrotl<mode>3" 11201 [(set (match_operand:VI_128 0 "register_operand" "=x,x") 11202 (if_then_else:VI_128 11203 (ge:VI_128 11204 (match_operand:VI_128 2 "nonimmediate_operand" "x,m") 11205 (const_int 0)) 11206 (rotate:VI_128 11207 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x") 11208 (match_dup 2)) 11209 (rotatert:VI_128 11210 (match_dup 1) 11211 (neg:VI_128 (match_dup 2)))))] 11212 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))" 11213 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 11214 [(set_attr "type" "sseishft") 11215 (set_attr "prefix_data16" "0") 11216 (set_attr "prefix_extra" "2") 11217 (set_attr "mode" "TI")]) 11218 11219;; XOP packed shift instructions. 11220(define_expand "vlshr<mode>3" 11221 [(set (match_operand:VI12_128 0 "register_operand" "") 11222 (lshiftrt:VI12_128 11223 (match_operand:VI12_128 1 "register_operand" "") 11224 (match_operand:VI12_128 2 "nonimmediate_operand" "")))] 11225 "TARGET_XOP" 11226{ 11227 rtx neg = gen_reg_rtx (<MODE>mode); 11228 emit_insn (gen_neg<mode>2 (neg, operands[2])); 11229 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg)); 11230 DONE; 11231}) 11232 11233(define_expand "vlshr<mode>3" 11234 [(set (match_operand:VI48_128 0 "register_operand" "") 11235 (lshiftrt:VI48_128 11236 (match_operand:VI48_128 1 "register_operand" "") 11237 (match_operand:VI48_128 2 "nonimmediate_operand" "")))] 11238 "TARGET_AVX2 || TARGET_XOP" 11239{ 11240 if (!TARGET_AVX2) 11241 { 11242 rtx neg = gen_reg_rtx (<MODE>mode); 11243 emit_insn (gen_neg<mode>2 (neg, operands[2])); 11244 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg)); 11245 DONE; 11246 } 11247}) 11248 11249(define_expand "vlshr<mode>3" 11250 [(set (match_operand:VI48_256 0 "register_operand" "") 11251 (lshiftrt:VI48_256 11252 (match_operand:VI48_256 1 "register_operand" "") 11253 (match_operand:VI48_256 2 "nonimmediate_operand" "")))] 11254 "TARGET_AVX2") 11255 11256(define_expand "vashr<mode>3" 11257 [(set (match_operand:VI128_128 0 "register_operand" "") 11258 (ashiftrt:VI128_128 11259 (match_operand:VI128_128 1 "register_operand" "") 11260 (match_operand:VI128_128 2 "nonimmediate_operand" "")))] 11261 "TARGET_XOP" 11262{ 11263 rtx neg = gen_reg_rtx (<MODE>mode); 11264 emit_insn (gen_neg<mode>2 (neg, operands[2])); 11265 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], neg)); 11266 DONE; 11267}) 11268 11269(define_expand "vashrv4si3" 11270 [(set (match_operand:V4SI 0 "register_operand" "") 11271 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand" "") 11272 (match_operand:V4SI 2 "nonimmediate_operand" "")))] 11273 "TARGET_AVX2 || TARGET_XOP" 11274{ 11275 if (!TARGET_AVX2) 11276 { 11277 rtx neg = gen_reg_rtx (V4SImode); 11278 emit_insn (gen_negv4si2 (neg, operands[2])); 11279 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg)); 11280 DONE; 11281 } 11282}) 11283 11284(define_expand "vashrv8si3" 11285 [(set (match_operand:V8SI 0 "register_operand" "") 11286 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand" "") 11287 (match_operand:V8SI 2 "nonimmediate_operand" "")))] 11288 "TARGET_AVX2") 11289 11290(define_expand "vashl<mode>3" 11291 [(set (match_operand:VI12_128 0 "register_operand" "") 11292 (ashift:VI12_128 11293 (match_operand:VI12_128 1 "register_operand" "") 11294 (match_operand:VI12_128 2 "nonimmediate_operand" "")))] 11295 "TARGET_XOP" 11296{ 11297 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2])); 11298 DONE; 11299}) 11300 11301(define_expand "vashl<mode>3" 11302 [(set (match_operand:VI48_128 0 "register_operand" "") 11303 (ashift:VI48_128 11304 (match_operand:VI48_128 1 "register_operand" "") 11305 (match_operand:VI48_128 2 "nonimmediate_operand" "")))] 11306 "TARGET_AVX2 || TARGET_XOP" 11307{ 11308 if (!TARGET_AVX2) 11309 { 11310 operands[2] = force_reg (<MODE>mode, operands[2]); 11311 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2])); 11312 DONE; 11313 } 11314}) 11315 11316(define_expand "vashl<mode>3" 11317 [(set (match_operand:VI48_256 0 "register_operand" "") 11318 (ashift:VI48_256 11319 (match_operand:VI48_256 1 "register_operand" "") 11320 (match_operand:VI48_256 2 "nonimmediate_operand" "")))] 11321 "TARGET_AVX2") 11322 11323(define_insn "xop_sha<mode>3" 11324 [(set (match_operand:VI_128 0 "register_operand" "=x,x") 11325 (if_then_else:VI_128 11326 (ge:VI_128 11327 (match_operand:VI_128 2 "nonimmediate_operand" "x,m") 11328 (const_int 0)) 11329 (ashift:VI_128 11330 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x") 11331 (match_dup 2)) 11332 (ashiftrt:VI_128 11333 (match_dup 1) 11334 (neg:VI_128 (match_dup 2)))))] 11335 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))" 11336 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 11337 [(set_attr "type" "sseishft") 11338 (set_attr "prefix_data16" "0") 11339 (set_attr "prefix_extra" "2") 11340 (set_attr "mode" "TI")]) 11341 11342(define_insn "xop_shl<mode>3" 11343 [(set (match_operand:VI_128 0 "register_operand" "=x,x") 11344 (if_then_else:VI_128 11345 (ge:VI_128 11346 (match_operand:VI_128 2 "nonimmediate_operand" "x,m") 11347 (const_int 0)) 11348 (ashift:VI_128 11349 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x") 11350 (match_dup 2)) 11351 (lshiftrt:VI_128 11352 (match_dup 1) 11353 (neg:VI_128 (match_dup 2)))))] 11354 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))" 11355 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 11356 [(set_attr "type" "sseishft") 11357 (set_attr "prefix_data16" "0") 11358 (set_attr "prefix_extra" "2") 11359 (set_attr "mode" "TI")]) 11360 11361;; SSE2 doesn't have some shift variants, so define versions for XOP 11362(define_expand "ashlv16qi3" 11363 [(set (match_operand:V16QI 0 "register_operand" "") 11364 (ashift:V16QI 11365 (match_operand:V16QI 1 "register_operand" "") 11366 (match_operand:SI 2 "nonmemory_operand" "")))] 11367 "TARGET_XOP" 11368{ 11369 rtx reg = gen_reg_rtx (V16QImode); 11370 rtx par; 11371 int i; 11372 11373 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16)); 11374 for (i = 0; i < 16; i++) 11375 XVECEXP (par, 0, i) = operands[2]; 11376 11377 emit_insn (gen_vec_initv16qi (reg, par)); 11378 emit_insn (gen_xop_shav16qi3 (operands[0], operands[1], reg)); 11379 DONE; 11380}) 11381 11382(define_expand "<shift_insn>v16qi3" 11383 [(set (match_operand:V16QI 0 "register_operand" "") 11384 (any_shiftrt:V16QI 11385 (match_operand:V16QI 1 "register_operand" "") 11386 (match_operand:SI 2 "nonmemory_operand" "")))] 11387 "TARGET_XOP" 11388{ 11389 rtx reg = gen_reg_rtx (V16QImode); 11390 rtx par; 11391 bool negate = false; 11392 rtx (*shift_insn)(rtx, rtx, rtx); 11393 int i; 11394 11395 if (CONST_INT_P (operands[2])) 11396 operands[2] = GEN_INT (-INTVAL (operands[2])); 11397 else 11398 negate = true; 11399 11400 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16)); 11401 for (i = 0; i < 16; i++) 11402 XVECEXP (par, 0, i) = operands[2]; 11403 11404 emit_insn (gen_vec_initv16qi (reg, par)); 11405 11406 if (negate) 11407 emit_insn (gen_negv16qi2 (reg, reg)); 11408 11409 if (<CODE> == LSHIFTRT) 11410 shift_insn = gen_xop_shlv16qi3; 11411 else 11412 shift_insn = gen_xop_shav16qi3; 11413 11414 emit_insn (shift_insn (operands[0], operands[1], reg)); 11415 DONE; 11416}) 11417 11418(define_expand "ashrv2di3" 11419 [(set (match_operand:V2DI 0 "register_operand" "") 11420 (ashiftrt:V2DI 11421 (match_operand:V2DI 1 "register_operand" "") 11422 (match_operand:DI 2 "nonmemory_operand" "")))] 11423 "TARGET_XOP" 11424{ 11425 rtx reg = gen_reg_rtx (V2DImode); 11426 rtx par; 11427 bool negate = false; 11428 int i; 11429 11430 if (CONST_INT_P (operands[2])) 11431 operands[2] = GEN_INT (-INTVAL (operands[2])); 11432 else 11433 negate = true; 11434 11435 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2)); 11436 for (i = 0; i < 2; i++) 11437 XVECEXP (par, 0, i) = operands[2]; 11438 11439 emit_insn (gen_vec_initv2di (reg, par)); 11440 11441 if (negate) 11442 emit_insn (gen_negv2di2 (reg, reg)); 11443 11444 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg)); 11445 DONE; 11446}) 11447 11448;; XOP FRCZ support 11449(define_insn "xop_frcz<mode>2" 11450 [(set (match_operand:FMAMODE 0 "register_operand" "=x") 11451 (unspec:FMAMODE 11452 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")] 11453 UNSPEC_FRCZ))] 11454 "TARGET_XOP" 11455 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}" 11456 [(set_attr "type" "ssecvt1") 11457 (set_attr "mode" "<MODE>")]) 11458 11459(define_expand "xop_vmfrcz<mode>2" 11460 [(set (match_operand:VF_128 0 "register_operand") 11461 (vec_merge:VF_128 11462 (unspec:VF_128 11463 [(match_operand:VF_128 1 "nonimmediate_operand")] 11464 UNSPEC_FRCZ) 11465 (match_dup 2) 11466 (const_int 1)))] 11467 "TARGET_XOP" 11468 "operands[2] = CONST0_RTX (<MODE>mode);") 11469 11470(define_insn "*xop_vmfrcz<mode>2" 11471 [(set (match_operand:VF_128 0 "register_operand" "=x") 11472 (vec_merge:VF_128 11473 (unspec:VF_128 11474 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")] 11475 UNSPEC_FRCZ) 11476 (match_operand:VF_128 2 "const0_operand") 11477 (const_int 1)))] 11478 "TARGET_XOP" 11479 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %1}" 11480 [(set_attr "type" "ssecvt1") 11481 (set_attr "mode" "<MODE>")]) 11482 11483(define_insn "xop_maskcmp<mode>3" 11484 [(set (match_operand:VI_128 0 "register_operand" "=x") 11485 (match_operator:VI_128 1 "ix86_comparison_int_operator" 11486 [(match_operand:VI_128 2 "register_operand" "x") 11487 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))] 11488 "TARGET_XOP" 11489 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}" 11490 [(set_attr "type" "sse4arg") 11491 (set_attr "prefix_data16" "0") 11492 (set_attr "prefix_rep" "0") 11493 (set_attr "prefix_extra" "2") 11494 (set_attr "length_immediate" "1") 11495 (set_attr "mode" "TI")]) 11496 11497(define_insn "xop_maskcmp_uns<mode>3" 11498 [(set (match_operand:VI_128 0 "register_operand" "=x") 11499 (match_operator:VI_128 1 "ix86_comparison_uns_operator" 11500 [(match_operand:VI_128 2 "register_operand" "x") 11501 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))] 11502 "TARGET_XOP" 11503 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}" 11504 [(set_attr "type" "ssecmp") 11505 (set_attr "prefix_data16" "0") 11506 (set_attr "prefix_rep" "0") 11507 (set_attr "prefix_extra" "2") 11508 (set_attr "length_immediate" "1") 11509 (set_attr "mode" "TI")]) 11510 11511;; Version of pcom*u* that is called from the intrinsics that allows pcomequ* 11512;; and pcomneu* not to be converted to the signed ones in case somebody needs 11513;; the exact instruction generated for the intrinsic. 11514(define_insn "xop_maskcmp_uns2<mode>3" 11515 [(set (match_operand:VI_128 0 "register_operand" "=x") 11516 (unspec:VI_128 11517 [(match_operator:VI_128 1 "ix86_comparison_uns_operator" 11518 [(match_operand:VI_128 2 "register_operand" "x") 11519 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])] 11520 UNSPEC_XOP_UNSIGNED_CMP))] 11521 "TARGET_XOP" 11522 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}" 11523 [(set_attr "type" "ssecmp") 11524 (set_attr "prefix_data16" "0") 11525 (set_attr "prefix_extra" "2") 11526 (set_attr "length_immediate" "1") 11527 (set_attr "mode" "TI")]) 11528 11529;; Pcomtrue and pcomfalse support. These are useless instructions, but are 11530;; being added here to be complete. 11531(define_insn "xop_pcom_tf<mode>3" 11532 [(set (match_operand:VI_128 0 "register_operand" "=x") 11533 (unspec:VI_128 11534 [(match_operand:VI_128 1 "register_operand" "x") 11535 (match_operand:VI_128 2 "nonimmediate_operand" "xm") 11536 (match_operand:SI 3 "const_int_operand" "n")] 11537 UNSPEC_XOP_TRUEFALSE))] 11538 "TARGET_XOP" 11539{ 11540 return ((INTVAL (operands[3]) != 0) 11541 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 11542 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"); 11543} 11544 [(set_attr "type" "ssecmp") 11545 (set_attr "prefix_data16" "0") 11546 (set_attr "prefix_extra" "2") 11547 (set_attr "length_immediate" "1") 11548 (set_attr "mode" "TI")]) 11549 11550(define_insn "xop_vpermil2<mode>3" 11551 [(set (match_operand:VF 0 "register_operand" "=x") 11552 (unspec:VF 11553 [(match_operand:VF 1 "register_operand" "x") 11554 (match_operand:VF 2 "nonimmediate_operand" "%x") 11555 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm") 11556 (match_operand:SI 4 "const_0_to_3_operand" "n")] 11557 UNSPEC_VPERMIL2))] 11558 "TARGET_XOP" 11559 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}" 11560 [(set_attr "type" "sse4arg") 11561 (set_attr "length_immediate" "1") 11562 (set_attr "mode" "<MODE>")]) 11563 11564;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 11565 11566(define_insn "aesenc" 11567 [(set (match_operand:V2DI 0 "register_operand" "=x,x") 11568 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x") 11569 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")] 11570 UNSPEC_AESENC))] 11571 "TARGET_AES" 11572 "@ 11573 aesenc\t{%2, %0|%0, %2} 11574 vaesenc\t{%2, %1, %0|%0, %1, %2}" 11575 [(set_attr "isa" "noavx,avx") 11576 (set_attr "type" "sselog1") 11577 (set_attr "prefix_extra" "1") 11578 (set_attr "prefix" "orig,vex") 11579 (set_attr "mode" "TI")]) 11580 11581(define_insn "aesenclast" 11582 [(set (match_operand:V2DI 0 "register_operand" "=x,x") 11583 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x") 11584 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")] 11585 UNSPEC_AESENCLAST))] 11586 "TARGET_AES" 11587 "@ 11588 aesenclast\t{%2, %0|%0, %2} 11589 vaesenclast\t{%2, %1, %0|%0, %1, %2}" 11590 [(set_attr "isa" "noavx,avx") 11591 (set_attr "type" "sselog1") 11592 (set_attr "prefix_extra" "1") 11593 (set_attr "prefix" "orig,vex") 11594 (set_attr "mode" "TI")]) 11595 11596(define_insn "aesdec" 11597 [(set (match_operand:V2DI 0 "register_operand" "=x,x") 11598 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x") 11599 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")] 11600 UNSPEC_AESDEC))] 11601 "TARGET_AES" 11602 "@ 11603 aesdec\t{%2, %0|%0, %2} 11604 vaesdec\t{%2, %1, %0|%0, %1, %2}" 11605 [(set_attr "isa" "noavx,avx") 11606 (set_attr "type" "sselog1") 11607 (set_attr "prefix_extra" "1") 11608 (set_attr "prefix" "orig,vex") 11609 (set_attr "mode" "TI")]) 11610 11611(define_insn "aesdeclast" 11612 [(set (match_operand:V2DI 0 "register_operand" "=x,x") 11613 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x") 11614 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")] 11615 UNSPEC_AESDECLAST))] 11616 "TARGET_AES" 11617 "@ 11618 aesdeclast\t{%2, %0|%0, %2} 11619 vaesdeclast\t{%2, %1, %0|%0, %1, %2}" 11620 [(set_attr "isa" "noavx,avx") 11621 (set_attr "type" "sselog1") 11622 (set_attr "prefix_extra" "1") 11623 (set_attr "prefix" "orig,vex") 11624 (set_attr "mode" "TI")]) 11625 11626(define_insn "aesimc" 11627 [(set (match_operand:V2DI 0 "register_operand" "=x") 11628 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")] 11629 UNSPEC_AESIMC))] 11630 "TARGET_AES" 11631 "%vaesimc\t{%1, %0|%0, %1}" 11632 [(set_attr "type" "sselog1") 11633 (set_attr "prefix_extra" "1") 11634 (set_attr "prefix" "maybe_vex") 11635 (set_attr "mode" "TI")]) 11636 11637(define_insn "aeskeygenassist" 11638 [(set (match_operand:V2DI 0 "register_operand" "=x") 11639 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm") 11640 (match_operand:SI 2 "const_0_to_255_operand" "n")] 11641 UNSPEC_AESKEYGENASSIST))] 11642 "TARGET_AES" 11643 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}" 11644 [(set_attr "type" "sselog1") 11645 (set_attr "prefix_extra" "1") 11646 (set_attr "length_immediate" "1") 11647 (set_attr "prefix" "maybe_vex") 11648 (set_attr "mode" "TI")]) 11649 11650(define_insn "pclmulqdq" 11651 [(set (match_operand:V2DI 0 "register_operand" "=x,x") 11652 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x") 11653 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm") 11654 (match_operand:SI 3 "const_0_to_255_operand" "n,n")] 11655 UNSPEC_PCLMUL))] 11656 "TARGET_PCLMUL" 11657 "@ 11658 pclmulqdq\t{%3, %2, %0|%0, %2, %3} 11659 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}" 11660 [(set_attr "isa" "noavx,avx") 11661 (set_attr "type" "sselog1") 11662 (set_attr "prefix_extra" "1") 11663 (set_attr "length_immediate" "1") 11664 (set_attr "prefix" "orig,vex") 11665 (set_attr "mode" "TI")]) 11666 11667(define_expand "avx_vzeroall" 11668 [(match_par_dup 0 [(const_int 0)])] 11669 "TARGET_AVX" 11670{ 11671 int nregs = TARGET_64BIT ? 16 : 8; 11672 int regno; 11673 11674 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1)); 11675 11676 XVECEXP (operands[0], 0, 0) 11677 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx), 11678 UNSPECV_VZEROALL); 11679 11680 for (regno = 0; regno < nregs; regno++) 11681 XVECEXP (operands[0], 0, regno + 1) 11682 = gen_rtx_SET (VOIDmode, 11683 gen_rtx_REG (V8SImode, SSE_REGNO (regno)), 11684 CONST0_RTX (V8SImode)); 11685}) 11686 11687(define_insn "*avx_vzeroall" 11688 [(match_parallel 0 "vzeroall_operation" 11689 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])] 11690 "TARGET_AVX" 11691 "vzeroall" 11692 [(set_attr "type" "sse") 11693 (set_attr "modrm" "0") 11694 (set_attr "memory" "none") 11695 (set_attr "prefix" "vex") 11696 (set_attr "mode" "OI")]) 11697 11698;; Clear the upper 128bits of AVX registers, equivalent to a NOP 11699;; if the upper 128bits are unused. 11700(define_insn "avx_vzeroupper" 11701 [(unspec_volatile [(match_operand 0 "const_int_operand" "")] 11702 UNSPECV_VZEROUPPER)] 11703 "TARGET_AVX" 11704 "vzeroupper" 11705 [(set_attr "type" "sse") 11706 (set_attr "modrm" "0") 11707 (set_attr "memory" "none") 11708 (set_attr "prefix" "vex") 11709 (set_attr "mode" "OI")]) 11710 11711(define_mode_attr AVXTOSSEMODE 11712 [(V4DI "V2DI") (V2DI "V2DI") 11713 (V8SI "V4SI") (V4SI "V4SI") 11714 (V16HI "V8HI") (V8HI "V8HI") 11715 (V32QI "V16QI") (V16QI "V16QI")]) 11716 11717(define_insn "avx2_pbroadcast<mode>" 11718 [(set (match_operand:VI 0 "register_operand" "=x") 11719 (vec_duplicate:VI 11720 (vec_select:<ssescalarmode> 11721 (match_operand:<AVXTOSSEMODE> 1 "nonimmediate_operand" "xm") 11722 (parallel [(const_int 0)]))))] 11723 "TARGET_AVX2" 11724 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %1}" 11725 [(set_attr "type" "ssemov") 11726 (set_attr "prefix_extra" "1") 11727 (set_attr "prefix" "vex") 11728 (set_attr "mode" "<sseinsnmode>")]) 11729 11730(define_insn "avx2_permvarv8si" 11731 [(set (match_operand:V8SI 0 "register_operand" "=x") 11732 (unspec:V8SI 11733 [(match_operand:V8SI 1 "nonimmediate_operand" "xm") 11734 (match_operand:V8SI 2 "register_operand" "x")] 11735 UNSPEC_VPERMSI))] 11736 "TARGET_AVX2" 11737 "vpermd\t{%1, %2, %0|%0, %2, %1}" 11738 [(set_attr "type" "sselog") 11739 (set_attr "prefix" "vex") 11740 (set_attr "mode" "OI")]) 11741 11742(define_insn "avx2_permv4df" 11743 [(set (match_operand:V4DF 0 "register_operand" "=x") 11744 (unspec:V4DF 11745 [(match_operand:V4DF 1 "register_operand" "xm") 11746 (match_operand:SI 2 "const_0_to_255_operand" "n")] 11747 UNSPEC_VPERMDF))] 11748 "TARGET_AVX2" 11749 "vpermpd\t{%2, %1, %0|%0, %1, %2}" 11750 [(set_attr "type" "sselog") 11751 (set_attr "prefix_extra" "1") 11752 (set_attr "prefix" "vex") 11753 (set_attr "mode" "OI")]) 11754 11755(define_insn "avx2_permvarv8sf" 11756 [(set (match_operand:V8SF 0 "register_operand" "=x") 11757 (unspec:V8SF 11758 [(match_operand:V8SF 1 "nonimmediate_operand" "xm") 11759 (match_operand:V8SI 2 "register_operand" "x")] 11760 UNSPEC_VPERMSF))] 11761 "TARGET_AVX2" 11762 "vpermps\t{%1, %2, %0|%0, %2, %1}" 11763 [(set_attr "type" "sselog") 11764 (set_attr "prefix" "vex") 11765 (set_attr "mode" "OI")]) 11766 11767(define_expand "avx2_permv4di" 11768 [(match_operand:V4DI 0 "register_operand" "") 11769 (match_operand:V4DI 1 "nonimmediate_operand" "") 11770 (match_operand:SI 2 "const_0_to_255_operand" "")] 11771 "TARGET_AVX2" 11772{ 11773 int mask = INTVAL (operands[2]); 11774 emit_insn (gen_avx2_permv4di_1 (operands[0], operands[1], 11775 GEN_INT ((mask >> 0) & 3), 11776 GEN_INT ((mask >> 2) & 3), 11777 GEN_INT ((mask >> 4) & 3), 11778 GEN_INT ((mask >> 6) & 3))); 11779 DONE; 11780}) 11781 11782(define_insn "avx2_permv4di_1" 11783 [(set (match_operand:V4DI 0 "register_operand" "=x") 11784 (vec_select:V4DI 11785 (match_operand:V4DI 1 "nonimmediate_operand" "xm") 11786 (parallel [(match_operand 2 "const_0_to_3_operand" "") 11787 (match_operand 3 "const_0_to_3_operand" "") 11788 (match_operand 4 "const_0_to_3_operand" "") 11789 (match_operand 5 "const_0_to_3_operand" "")])))] 11790 "TARGET_AVX2" 11791{ 11792 int mask = 0; 11793 mask |= INTVAL (operands[2]) << 0; 11794 mask |= INTVAL (operands[3]) << 2; 11795 mask |= INTVAL (operands[4]) << 4; 11796 mask |= INTVAL (operands[5]) << 6; 11797 operands[2] = GEN_INT (mask); 11798 return "vpermq\t{%2, %1, %0|%0, %1, %2}"; 11799} 11800 [(set_attr "type" "sselog") 11801 (set_attr "prefix" "vex") 11802 (set_attr "mode" "OI")]) 11803 11804(define_insn "avx2_permv2ti" 11805 [(set (match_operand:V4DI 0 "register_operand" "=x") 11806 (unspec:V4DI 11807 [(match_operand:V4DI 1 "register_operand" "x") 11808 (match_operand:V4DI 2 "nonimmediate_operand" "xm") 11809 (match_operand:SI 3 "const_0_to_255_operand" "n")] 11810 UNSPEC_VPERMTI))] 11811 "TARGET_AVX2" 11812 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}" 11813 [(set_attr "type" "sselog") 11814 (set_attr "prefix" "vex") 11815 (set_attr "mode" "OI")]) 11816 11817(define_insn "avx2_vec_dupv4df" 11818 [(set (match_operand:V4DF 0 "register_operand" "=x") 11819 (vec_duplicate:V4DF 11820 (vec_select:DF 11821 (match_operand:V2DF 1 "register_operand" "x") 11822 (parallel [(const_int 0)]))))] 11823 "TARGET_AVX2" 11824 "vbroadcastsd\t{%1, %0|%0, %1}" 11825 [(set_attr "type" "sselog1") 11826 (set_attr "prefix" "vex") 11827 (set_attr "mode" "V4DF")]) 11828 11829;; Modes handled by AVX vec_dup patterns. 11830(define_mode_iterator AVX_VEC_DUP_MODE 11831 [V8SI V8SF V4DI V4DF]) 11832 11833(define_insn "vec_dup<mode>" 11834 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x") 11835 (vec_duplicate:AVX_VEC_DUP_MODE 11836 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,?x")))] 11837 "TARGET_AVX" 11838 "@ 11839 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1} 11840 #" 11841 [(set_attr "type" "ssemov") 11842 (set_attr "prefix_extra" "1") 11843 (set_attr "prefix" "vex") 11844 (set_attr "mode" "V8SF")]) 11845 11846(define_insn "avx2_vbroadcasti128_<mode>" 11847 [(set (match_operand:VI_256 0 "register_operand" "=x") 11848 (vec_concat:VI_256 11849 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m") 11850 (match_dup 1)))] 11851 "TARGET_AVX2" 11852 "vbroadcasti128\t{%1, %0|%0, %1}" 11853 [(set_attr "type" "ssemov") 11854 (set_attr "prefix_extra" "1") 11855 (set_attr "prefix" "vex") 11856 (set_attr "mode" "OI")]) 11857 11858(define_split 11859 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "") 11860 (vec_duplicate:AVX_VEC_DUP_MODE 11861 (match_operand:<ssescalarmode> 1 "register_operand" "")))] 11862 "TARGET_AVX && reload_completed" 11863 [(set (match_dup 2) 11864 (vec_duplicate:<ssehalfvecmode> (match_dup 1))) 11865 (set (match_dup 0) 11866 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))] 11867 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));") 11868 11869(define_insn "avx_vbroadcastf128_<mode>" 11870 [(set (match_operand:V_256 0 "register_operand" "=x,x,x") 11871 (vec_concat:V_256 11872 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x") 11873 (match_dup 1)))] 11874 "TARGET_AVX" 11875 "@ 11876 vbroadcast<i128>\t{%1, %0|%0, %1} 11877 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1} 11878 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}" 11879 [(set_attr "type" "ssemov,sselog1,sselog1") 11880 (set_attr "prefix_extra" "1") 11881 (set_attr "length_immediate" "0,1,1") 11882 (set_attr "prefix" "vex") 11883 (set_attr "mode" "<sseinsnmode>")]) 11884 11885;; Recognize broadcast as a vec_select as produced by builtin_vec_perm. 11886;; If it so happens that the input is in memory, use vbroadcast. 11887;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128). 11888(define_insn "*avx_vperm_broadcast_v4sf" 11889 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x") 11890 (vec_select:V4SF 11891 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x") 11892 (match_parallel 2 "avx_vbroadcast_operand" 11893 [(match_operand 3 "const_int_operand" "C,n,n")])))] 11894 "TARGET_AVX" 11895{ 11896 int elt = INTVAL (operands[3]); 11897 switch (which_alternative) 11898 { 11899 case 0: 11900 case 1: 11901 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4); 11902 return "vbroadcastss\t{%1, %0|%0, %1}"; 11903 case 2: 11904 operands[2] = GEN_INT (elt * 0x55); 11905 return "vpermilps\t{%2, %1, %0|%0, %1, %2}"; 11906 default: 11907 gcc_unreachable (); 11908 } 11909} 11910 [(set_attr "type" "ssemov,ssemov,sselog1") 11911 (set_attr "prefix_extra" "1") 11912 (set_attr "length_immediate" "0,0,1") 11913 (set_attr "prefix" "vex") 11914 (set_attr "mode" "SF,SF,V4SF")]) 11915 11916(define_insn_and_split "*avx_vperm_broadcast_<mode>" 11917 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x") 11918 (vec_select:VF_256 11919 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x") 11920 (match_parallel 2 "avx_vbroadcast_operand" 11921 [(match_operand 3 "const_int_operand" "C,n,n")])))] 11922 "TARGET_AVX" 11923 "#" 11924 "&& reload_completed" 11925 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))] 11926{ 11927 rtx op0 = operands[0], op1 = operands[1]; 11928 int elt = INTVAL (operands[3]); 11929 11930 if (REG_P (op1)) 11931 { 11932 int mask; 11933 11934 /* Shuffle element we care about into all elements of the 128-bit lane. 11935 The other lane gets shuffled too, but we don't care. */ 11936 if (<MODE>mode == V4DFmode) 11937 mask = (elt & 1 ? 15 : 0); 11938 else 11939 mask = (elt & 3) * 0x55; 11940 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask))); 11941 11942 /* Shuffle the lane we care about into both lanes of the dest. */ 11943 mask = (elt / (<ssescalarnum> / 2)) * 0x11; 11944 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask))); 11945 DONE; 11946 } 11947 11948 operands[1] = adjust_address_nv (op1, <ssescalarmode>mode, 11949 elt * GET_MODE_SIZE (<ssescalarmode>mode)); 11950}) 11951 11952(define_expand "avx_vpermil<mode>" 11953 [(set (match_operand:VF2 0 "register_operand" "") 11954 (vec_select:VF2 11955 (match_operand:VF2 1 "nonimmediate_operand" "") 11956 (match_operand:SI 2 "const_0_to_255_operand" "")))] 11957 "TARGET_AVX" 11958{ 11959 int mask = INTVAL (operands[2]); 11960 rtx perm[<ssescalarnum>]; 11961 11962 perm[0] = GEN_INT (mask & 1); 11963 perm[1] = GEN_INT ((mask >> 1) & 1); 11964 if (<MODE>mode == V4DFmode) 11965 { 11966 perm[2] = GEN_INT (((mask >> 2) & 1) + 2); 11967 perm[3] = GEN_INT (((mask >> 3) & 1) + 2); 11968 } 11969 11970 operands[2] 11971 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm)); 11972}) 11973 11974(define_expand "avx_vpermil<mode>" 11975 [(set (match_operand:VF1 0 "register_operand" "") 11976 (vec_select:VF1 11977 (match_operand:VF1 1 "nonimmediate_operand" "") 11978 (match_operand:SI 2 "const_0_to_255_operand" "")))] 11979 "TARGET_AVX" 11980{ 11981 int mask = INTVAL (operands[2]); 11982 rtx perm[<ssescalarnum>]; 11983 11984 perm[0] = GEN_INT (mask & 3); 11985 perm[1] = GEN_INT ((mask >> 2) & 3); 11986 perm[2] = GEN_INT ((mask >> 4) & 3); 11987 perm[3] = GEN_INT ((mask >> 6) & 3); 11988 if (<MODE>mode == V8SFmode) 11989 { 11990 perm[4] = GEN_INT ((mask & 3) + 4); 11991 perm[5] = GEN_INT (((mask >> 2) & 3) + 4); 11992 perm[6] = GEN_INT (((mask >> 4) & 3) + 4); 11993 perm[7] = GEN_INT (((mask >> 6) & 3) + 4); 11994 } 11995 11996 operands[2] 11997 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm)); 11998}) 11999 12000(define_insn "*avx_vpermilp<mode>" 12001 [(set (match_operand:VF 0 "register_operand" "=x") 12002 (vec_select:VF 12003 (match_operand:VF 1 "nonimmediate_operand" "xm") 12004 (match_parallel 2 "" 12005 [(match_operand 3 "const_int_operand" "")])))] 12006 "TARGET_AVX 12007 && avx_vpermilp_parallel (operands[2], <MODE>mode)" 12008{ 12009 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1; 12010 operands[2] = GEN_INT (mask); 12011 return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"; 12012} 12013 [(set_attr "type" "sselog") 12014 (set_attr "prefix_extra" "1") 12015 (set_attr "length_immediate" "1") 12016 (set_attr "prefix" "vex") 12017 (set_attr "mode" "<MODE>")]) 12018 12019(define_insn "avx_vpermilvar<mode>3" 12020 [(set (match_operand:VF 0 "register_operand" "=x") 12021 (unspec:VF 12022 [(match_operand:VF 1 "register_operand" "x") 12023 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "xm")] 12024 UNSPEC_VPERMIL))] 12025 "TARGET_AVX" 12026 "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 12027 [(set_attr "type" "sselog") 12028 (set_attr "prefix_extra" "1") 12029 (set_attr "prefix" "vex") 12030 (set_attr "mode" "<MODE>")]) 12031 12032(define_expand "avx_vperm2f128<mode>3" 12033 [(set (match_operand:AVX256MODE2P 0 "register_operand" "") 12034 (unspec:AVX256MODE2P 12035 [(match_operand:AVX256MODE2P 1 "register_operand" "") 12036 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "") 12037 (match_operand:SI 3 "const_0_to_255_operand" "")] 12038 UNSPEC_VPERMIL2F128))] 12039 "TARGET_AVX" 12040{ 12041 int mask = INTVAL (operands[3]); 12042 if ((mask & 0x88) == 0) 12043 { 12044 rtx perm[<ssescalarnum>], t1, t2; 12045 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2; 12046 12047 base = (mask & 3) * nelt2; 12048 for (i = 0; i < nelt2; ++i) 12049 perm[i] = GEN_INT (base + i); 12050 12051 base = ((mask >> 4) & 3) * nelt2; 12052 for (i = 0; i < nelt2; ++i) 12053 perm[i + nelt2] = GEN_INT (base + i); 12054 12055 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode, 12056 operands[1], operands[2]); 12057 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm)); 12058 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1); 12059 t2 = gen_rtx_SET (VOIDmode, operands[0], t2); 12060 emit_insn (t2); 12061 DONE; 12062 } 12063}) 12064 12065;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which 12066;; means that in order to represent this properly in rtl we'd have to 12067;; nest *another* vec_concat with a zero operand and do the select from 12068;; a 4x wide vector. That doesn't seem very nice. 12069(define_insn "*avx_vperm2f128<mode>_full" 12070 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x") 12071 (unspec:AVX256MODE2P 12072 [(match_operand:AVX256MODE2P 1 "register_operand" "x") 12073 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm") 12074 (match_operand:SI 3 "const_0_to_255_operand" "n")] 12075 UNSPEC_VPERMIL2F128))] 12076 "TARGET_AVX" 12077 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 12078 [(set_attr "type" "sselog") 12079 (set_attr "prefix_extra" "1") 12080 (set_attr "length_immediate" "1") 12081 (set_attr "prefix" "vex") 12082 (set_attr "mode" "<sseinsnmode>")]) 12083 12084(define_insn "*avx_vperm2f128<mode>_nozero" 12085 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x") 12086 (vec_select:AVX256MODE2P 12087 (vec_concat:<ssedoublevecmode> 12088 (match_operand:AVX256MODE2P 1 "register_operand" "x") 12089 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")) 12090 (match_parallel 3 "" 12091 [(match_operand 4 "const_int_operand" "")])))] 12092 "TARGET_AVX 12093 && avx_vperm2f128_parallel (operands[3], <MODE>mode)" 12094{ 12095 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1; 12096 if (mask == 0x12) 12097 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}"; 12098 if (mask == 0x20) 12099 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}"; 12100 operands[3] = GEN_INT (mask); 12101 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"; 12102} 12103 [(set_attr "type" "sselog") 12104 (set_attr "prefix_extra" "1") 12105 (set_attr "length_immediate" "1") 12106 (set_attr "prefix" "vex") 12107 (set_attr "mode" "<sseinsnmode>")]) 12108 12109(define_expand "avx_vinsertf128<mode>" 12110 [(match_operand:V_256 0 "register_operand" "") 12111 (match_operand:V_256 1 "register_operand" "") 12112 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "") 12113 (match_operand:SI 3 "const_0_to_1_operand" "")] 12114 "TARGET_AVX" 12115{ 12116 rtx (*insn)(rtx, rtx, rtx); 12117 12118 switch (INTVAL (operands[3])) 12119 { 12120 case 0: 12121 insn = gen_vec_set_lo_<mode>; 12122 break; 12123 case 1: 12124 insn = gen_vec_set_hi_<mode>; 12125 break; 12126 default: 12127 gcc_unreachable (); 12128 } 12129 12130 emit_insn (insn (operands[0], operands[1], operands[2])); 12131 DONE; 12132}) 12133 12134(define_insn "avx2_vec_set_lo_v4di" 12135 [(set (match_operand:V4DI 0 "register_operand" "=x") 12136 (vec_concat:V4DI 12137 (match_operand:V2DI 2 "nonimmediate_operand" "xm") 12138 (vec_select:V2DI 12139 (match_operand:V4DI 1 "register_operand" "x") 12140 (parallel [(const_int 2) (const_int 3)]))))] 12141 "TARGET_AVX2" 12142 "vinserti128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" 12143 [(set_attr "type" "sselog") 12144 (set_attr "prefix_extra" "1") 12145 (set_attr "length_immediate" "1") 12146 (set_attr "prefix" "vex") 12147 (set_attr "mode" "OI")]) 12148 12149(define_insn "avx2_vec_set_hi_v4di" 12150 [(set (match_operand:V4DI 0 "register_operand" "=x") 12151 (vec_concat:V4DI 12152 (vec_select:V2DI 12153 (match_operand:V4DI 1 "register_operand" "x") 12154 (parallel [(const_int 0) (const_int 1)])) 12155 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] 12156 "TARGET_AVX2" 12157 "vinserti128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" 12158 [(set_attr "type" "sselog") 12159 (set_attr "prefix_extra" "1") 12160 (set_attr "length_immediate" "1") 12161 (set_attr "prefix" "vex") 12162 (set_attr "mode" "OI")]) 12163 12164(define_insn "vec_set_lo_<mode>" 12165 [(set (match_operand:VI8F_256 0 "register_operand" "=x") 12166 (vec_concat:VI8F_256 12167 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm") 12168 (vec_select:<ssehalfvecmode> 12169 (match_operand:VI8F_256 1 "register_operand" "x") 12170 (parallel [(const_int 2) (const_int 3)]))))] 12171 "TARGET_AVX" 12172 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" 12173 [(set_attr "type" "sselog") 12174 (set_attr "prefix_extra" "1") 12175 (set_attr "length_immediate" "1") 12176 (set_attr "prefix" "vex") 12177 (set_attr "mode" "<sseinsnmode>")]) 12178 12179(define_insn "vec_set_hi_<mode>" 12180 [(set (match_operand:VI8F_256 0 "register_operand" "=x") 12181 (vec_concat:VI8F_256 12182 (vec_select:<ssehalfvecmode> 12183 (match_operand:VI8F_256 1 "register_operand" "x") 12184 (parallel [(const_int 0) (const_int 1)])) 12185 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))] 12186 "TARGET_AVX" 12187 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" 12188 [(set_attr "type" "sselog") 12189 (set_attr "prefix_extra" "1") 12190 (set_attr "length_immediate" "1") 12191 (set_attr "prefix" "vex") 12192 (set_attr "mode" "<sseinsnmode>")]) 12193 12194(define_insn "vec_set_lo_<mode>" 12195 [(set (match_operand:VI4F_256 0 "register_operand" "=x") 12196 (vec_concat:VI4F_256 12197 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm") 12198 (vec_select:<ssehalfvecmode> 12199 (match_operand:VI4F_256 1 "register_operand" "x") 12200 (parallel [(const_int 4) (const_int 5) 12201 (const_int 6) (const_int 7)]))))] 12202 "TARGET_AVX" 12203 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" 12204 [(set_attr "type" "sselog") 12205 (set_attr "prefix_extra" "1") 12206 (set_attr "length_immediate" "1") 12207 (set_attr "prefix" "vex") 12208 (set_attr "mode" "<sseinsnmode>")]) 12209 12210(define_insn "vec_set_hi_<mode>" 12211 [(set (match_operand:VI4F_256 0 "register_operand" "=x") 12212 (vec_concat:VI4F_256 12213 (vec_select:<ssehalfvecmode> 12214 (match_operand:VI4F_256 1 "register_operand" "x") 12215 (parallel [(const_int 0) (const_int 1) 12216 (const_int 2) (const_int 3)])) 12217 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))] 12218 "TARGET_AVX" 12219 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" 12220 [(set_attr "type" "sselog") 12221 (set_attr "prefix_extra" "1") 12222 (set_attr "length_immediate" "1") 12223 (set_attr "prefix" "vex") 12224 (set_attr "mode" "<sseinsnmode>")]) 12225 12226(define_insn "vec_set_lo_v16hi" 12227 [(set (match_operand:V16HI 0 "register_operand" "=x") 12228 (vec_concat:V16HI 12229 (match_operand:V8HI 2 "nonimmediate_operand" "xm") 12230 (vec_select:V8HI 12231 (match_operand:V16HI 1 "register_operand" "x") 12232 (parallel [(const_int 8) (const_int 9) 12233 (const_int 10) (const_int 11) 12234 (const_int 12) (const_int 13) 12235 (const_int 14) (const_int 15)]))))] 12236 "TARGET_AVX" 12237 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" 12238 [(set_attr "type" "sselog") 12239 (set_attr "prefix_extra" "1") 12240 (set_attr "length_immediate" "1") 12241 (set_attr "prefix" "vex") 12242 (set_attr "mode" "OI")]) 12243 12244(define_insn "vec_set_hi_v16hi" 12245 [(set (match_operand:V16HI 0 "register_operand" "=x") 12246 (vec_concat:V16HI 12247 (vec_select:V8HI 12248 (match_operand:V16HI 1 "register_operand" "x") 12249 (parallel [(const_int 0) (const_int 1) 12250 (const_int 2) (const_int 3) 12251 (const_int 4) (const_int 5) 12252 (const_int 6) (const_int 7)])) 12253 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] 12254 "TARGET_AVX" 12255 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" 12256 [(set_attr "type" "sselog") 12257 (set_attr "prefix_extra" "1") 12258 (set_attr "length_immediate" "1") 12259 (set_attr "prefix" "vex") 12260 (set_attr "mode" "OI")]) 12261 12262(define_insn "vec_set_lo_v32qi" 12263 [(set (match_operand:V32QI 0 "register_operand" "=x") 12264 (vec_concat:V32QI 12265 (match_operand:V16QI 2 "nonimmediate_operand" "xm") 12266 (vec_select:V16QI 12267 (match_operand:V32QI 1 "register_operand" "x") 12268 (parallel [(const_int 16) (const_int 17) 12269 (const_int 18) (const_int 19) 12270 (const_int 20) (const_int 21) 12271 (const_int 22) (const_int 23) 12272 (const_int 24) (const_int 25) 12273 (const_int 26) (const_int 27) 12274 (const_int 28) (const_int 29) 12275 (const_int 30) (const_int 31)]))))] 12276 "TARGET_AVX" 12277 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" 12278 [(set_attr "type" "sselog") 12279 (set_attr "prefix_extra" "1") 12280 (set_attr "length_immediate" "1") 12281 (set_attr "prefix" "vex") 12282 (set_attr "mode" "OI")]) 12283 12284(define_insn "vec_set_hi_v32qi" 12285 [(set (match_operand:V32QI 0 "register_operand" "=x") 12286 (vec_concat:V32QI 12287 (vec_select:V16QI 12288 (match_operand:V32QI 1 "register_operand" "x") 12289 (parallel [(const_int 0) (const_int 1) 12290 (const_int 2) (const_int 3) 12291 (const_int 4) (const_int 5) 12292 (const_int 6) (const_int 7) 12293 (const_int 8) (const_int 9) 12294 (const_int 10) (const_int 11) 12295 (const_int 12) (const_int 13) 12296 (const_int 14) (const_int 15)])) 12297 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] 12298 "TARGET_AVX" 12299 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" 12300 [(set_attr "type" "sselog") 12301 (set_attr "prefix_extra" "1") 12302 (set_attr "length_immediate" "1") 12303 (set_attr "prefix" "vex") 12304 (set_attr "mode" "OI")]) 12305 12306(define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>" 12307 [(set (match_operand:V48_AVX2 0 "register_operand" "=x") 12308 (unspec:V48_AVX2 12309 [(match_operand:<sseintvecmode> 2 "register_operand" "x") 12310 (match_operand:V48_AVX2 1 "memory_operand" "m")] 12311 UNSPEC_MASKMOV))] 12312 "TARGET_AVX" 12313 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}" 12314 [(set_attr "type" "sselog1") 12315 (set_attr "prefix_extra" "1") 12316 (set_attr "prefix" "vex") 12317 (set_attr "mode" "<sseinsnmode>")]) 12318 12319(define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>" 12320 [(set (match_operand:V48_AVX2 0 "memory_operand" "+m") 12321 (unspec:V48_AVX2 12322 [(match_operand:<sseintvecmode> 1 "register_operand" "x") 12323 (match_operand:V48_AVX2 2 "register_operand" "x") 12324 (match_dup 0)] 12325 UNSPEC_MASKMOV))] 12326 "TARGET_AVX" 12327 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 12328 [(set_attr "type" "sselog1") 12329 (set_attr "prefix_extra" "1") 12330 (set_attr "prefix" "vex") 12331 (set_attr "mode" "<sseinsnmode>")]) 12332 12333(define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>" 12334 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m") 12335 (unspec:AVX256MODE2P 12336 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")] 12337 UNSPEC_CAST))] 12338 "TARGET_AVX" 12339 "#" 12340 "&& reload_completed" 12341 [(const_int 0)] 12342{ 12343 rtx op0 = operands[0]; 12344 rtx op1 = operands[1]; 12345 if (REG_P (op0)) 12346 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0)); 12347 else 12348 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1)); 12349 emit_move_insn (op0, op1); 12350 DONE; 12351}) 12352 12353(define_expand "vec_init<mode>" 12354 [(match_operand:V_256 0 "register_operand" "") 12355 (match_operand 1 "" "")] 12356 "TARGET_AVX" 12357{ 12358 ix86_expand_vector_init (false, operands[0], operands[1]); 12359 DONE; 12360}) 12361 12362(define_expand "avx2_extracti128" 12363 [(match_operand:V2DI 0 "nonimmediate_operand" "") 12364 (match_operand:V4DI 1 "register_operand" "") 12365 (match_operand:SI 2 "const_0_to_1_operand" "")] 12366 "TARGET_AVX2" 12367{ 12368 rtx (*insn)(rtx, rtx); 12369 12370 switch (INTVAL (operands[2])) 12371 { 12372 case 0: 12373 insn = gen_vec_extract_lo_v4di; 12374 break; 12375 case 1: 12376 insn = gen_vec_extract_hi_v4di; 12377 break; 12378 default: 12379 gcc_unreachable (); 12380 } 12381 12382 emit_insn (insn (operands[0], operands[1])); 12383 DONE; 12384}) 12385 12386(define_expand "avx2_inserti128" 12387 [(match_operand:V4DI 0 "register_operand" "") 12388 (match_operand:V4DI 1 "register_operand" "") 12389 (match_operand:V2DI 2 "nonimmediate_operand" "") 12390 (match_operand:SI 3 "const_0_to_1_operand" "")] 12391 "TARGET_AVX2" 12392{ 12393 rtx (*insn)(rtx, rtx, rtx); 12394 12395 switch (INTVAL (operands[3])) 12396 { 12397 case 0: 12398 insn = gen_avx2_vec_set_lo_v4di; 12399 break; 12400 case 1: 12401 insn = gen_avx2_vec_set_hi_v4di; 12402 break; 12403 default: 12404 gcc_unreachable (); 12405 } 12406 12407 emit_insn (insn (operands[0], operands[1], operands[2])); 12408 DONE; 12409}) 12410 12411(define_insn "avx2_ashrv<mode>" 12412 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x") 12413 (ashiftrt:VI4_AVX2 12414 (match_operand:VI4_AVX2 1 "register_operand" "x") 12415 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")))] 12416 "TARGET_AVX2" 12417 "vpsravd\t{%2, %1, %0|%0, %1, %2}" 12418 [(set_attr "type" "sseishft") 12419 (set_attr "prefix" "vex") 12420 (set_attr "mode" "<sseinsnmode>")]) 12421 12422(define_insn "avx2_<shift_insn>v<mode>" 12423 [(set (match_operand:VI48_AVX2 0 "register_operand" "=x") 12424 (any_lshift:VI48_AVX2 12425 (match_operand:VI48_AVX2 1 "register_operand" "x") 12426 (match_operand:VI48_AVX2 2 "nonimmediate_operand" "xm")))] 12427 "TARGET_AVX2" 12428 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 12429 [(set_attr "type" "sseishft") 12430 (set_attr "prefix" "vex") 12431 (set_attr "mode" "<sseinsnmode>")]) 12432 12433(define_insn "avx_vec_concat<mode>" 12434 [(set (match_operand:V_256 0 "register_operand" "=x,x") 12435 (vec_concat:V_256 12436 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x") 12437 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))] 12438 "TARGET_AVX" 12439{ 12440 switch (which_alternative) 12441 { 12442 case 0: 12443 return "vinsert<i128>\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}"; 12444 case 1: 12445 switch (get_attr_mode (insn)) 12446 { 12447 case MODE_V8SF: 12448 return "vmovaps\t{%1, %x0|%x0, %1}"; 12449 case MODE_V4DF: 12450 return "vmovapd\t{%1, %x0|%x0, %1}"; 12451 default: 12452 return "vmovdqa\t{%1, %x0|%x0, %1}"; 12453 } 12454 default: 12455 gcc_unreachable (); 12456 } 12457} 12458 [(set_attr "type" "sselog,ssemov") 12459 (set_attr "prefix_extra" "1,*") 12460 (set_attr "length_immediate" "1,*") 12461 (set_attr "prefix" "vex") 12462 (set_attr "mode" "<sseinsnmode>")]) 12463 12464(define_insn "vcvtph2ps" 12465 [(set (match_operand:V4SF 0 "register_operand" "=x") 12466 (vec_select:V4SF 12467 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")] 12468 UNSPEC_VCVTPH2PS) 12469 (parallel [(const_int 0) (const_int 1) 12470 (const_int 2) (const_int 3)])))] 12471 "TARGET_F16C" 12472 "vcvtph2ps\t{%1, %0|%0, %1}" 12473 [(set_attr "type" "ssecvt") 12474 (set_attr "prefix" "vex") 12475 (set_attr "mode" "V4SF")]) 12476 12477(define_insn "*vcvtph2ps_load" 12478 [(set (match_operand:V4SF 0 "register_operand" "=x") 12479 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")] 12480 UNSPEC_VCVTPH2PS))] 12481 "TARGET_F16C" 12482 "vcvtph2ps\t{%1, %0|%0, %1}" 12483 [(set_attr "type" "ssecvt") 12484 (set_attr "prefix" "vex") 12485 (set_attr "mode" "V8SF")]) 12486 12487(define_insn "vcvtph2ps256" 12488 [(set (match_operand:V8SF 0 "register_operand" "=x") 12489 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")] 12490 UNSPEC_VCVTPH2PS))] 12491 "TARGET_F16C" 12492 "vcvtph2ps\t{%1, %0|%0, %1}" 12493 [(set_attr "type" "ssecvt") 12494 (set_attr "prefix" "vex") 12495 (set_attr "mode" "V8SF")]) 12496 12497(define_expand "vcvtps2ph" 12498 [(set (match_operand:V8HI 0 "register_operand" "") 12499 (vec_concat:V8HI 12500 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "") 12501 (match_operand:SI 2 "const_0_to_255_operand" "")] 12502 UNSPEC_VCVTPS2PH) 12503 (match_dup 3)))] 12504 "TARGET_F16C" 12505 "operands[3] = CONST0_RTX (V4HImode);") 12506 12507(define_insn "*vcvtps2ph" 12508 [(set (match_operand:V8HI 0 "register_operand" "=x") 12509 (vec_concat:V8HI 12510 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x") 12511 (match_operand:SI 2 "const_0_to_255_operand" "N")] 12512 UNSPEC_VCVTPS2PH) 12513 (match_operand:V4HI 3 "const0_operand" "")))] 12514 "TARGET_F16C" 12515 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}" 12516 [(set_attr "type" "ssecvt") 12517 (set_attr "prefix" "vex") 12518 (set_attr "mode" "V4SF")]) 12519 12520(define_insn "*vcvtps2ph_store" 12521 [(set (match_operand:V4HI 0 "memory_operand" "=m") 12522 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x") 12523 (match_operand:SI 2 "const_0_to_255_operand" "N")] 12524 UNSPEC_VCVTPS2PH))] 12525 "TARGET_F16C" 12526 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}" 12527 [(set_attr "type" "ssecvt") 12528 (set_attr "prefix" "vex") 12529 (set_attr "mode" "V4SF")]) 12530 12531(define_insn "vcvtps2ph256" 12532 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm") 12533 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x") 12534 (match_operand:SI 2 "const_0_to_255_operand" "N")] 12535 UNSPEC_VCVTPS2PH))] 12536 "TARGET_F16C" 12537 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}" 12538 [(set_attr "type" "ssecvt") 12539 (set_attr "prefix" "vex") 12540 (set_attr "mode" "V8SF")]) 12541 12542;; For gather* insn patterns 12543(define_mode_iterator VEC_GATHER_MODE 12544 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF]) 12545(define_mode_attr VEC_GATHER_IDXSI 12546 [(V2DI "V4SI") (V2DF "V4SI") 12547 (V4DI "V4SI") (V4DF "V4SI") 12548 (V4SI "V4SI") (V4SF "V4SI") 12549 (V8SI "V8SI") (V8SF "V8SI")]) 12550(define_mode_attr VEC_GATHER_IDXDI 12551 [(V2DI "V2DI") (V2DF "V2DI") 12552 (V4DI "V4DI") (V4DF "V4DI") 12553 (V4SI "V2DI") (V4SF "V2DI") 12554 (V8SI "V4DI") (V8SF "V4DI")]) 12555(define_mode_attr VEC_GATHER_SRCDI 12556 [(V2DI "V2DI") (V2DF "V2DF") 12557 (V4DI "V4DI") (V4DF "V4DF") 12558 (V4SI "V4SI") (V4SF "V4SF") 12559 (V8SI "V4SI") (V8SF "V4SF")]) 12560 12561(define_expand "avx2_gathersi<mode>" 12562 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "") 12563 (unspec:VEC_GATHER_MODE 12564 [(match_operand:VEC_GATHER_MODE 1 "register_operand" "") 12565 (mem:<ssescalarmode> 12566 (match_par_dup 7 12567 [(match_operand 2 "vsib_address_operand" "") 12568 (match_operand:<VEC_GATHER_IDXSI> 12569 3 "register_operand" "") 12570 (match_operand:SI 5 "const1248_operand " "")])) 12571 (mem:BLK (scratch)) 12572 (match_operand:VEC_GATHER_MODE 4 "register_operand" "")] 12573 UNSPEC_GATHER)) 12574 (clobber (match_scratch:VEC_GATHER_MODE 6 ""))])] 12575 "TARGET_AVX2" 12576{ 12577 operands[7] 12578 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3], 12579 operands[5]), UNSPEC_VSIBADDR); 12580}) 12581 12582(define_insn "*avx2_gathersi<mode>" 12583 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x") 12584 (unspec:VEC_GATHER_MODE 12585 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0") 12586 (match_operator:<ssescalarmode> 7 "vsib_mem_operator" 12587 [(unspec:P 12588 [(match_operand:P 3 "vsib_address_operand" "p") 12589 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x") 12590 (match_operand:SI 6 "const1248_operand" "n")] 12591 UNSPEC_VSIBADDR)]) 12592 (mem:BLK (scratch)) 12593 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")] 12594 UNSPEC_GATHER)) 12595 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))] 12596 "TARGET_AVX2" 12597 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}" 12598 [(set_attr "type" "ssemov") 12599 (set_attr "prefix" "vex") 12600 (set_attr "mode" "<sseinsnmode>")]) 12601 12602(define_insn "*avx2_gathersi<mode>_2" 12603 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x") 12604 (unspec:VEC_GATHER_MODE 12605 [(pc) 12606 (match_operator:<ssescalarmode> 6 "vsib_mem_operator" 12607 [(unspec:P 12608 [(match_operand:P 2 "vsib_address_operand" "p") 12609 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x") 12610 (match_operand:SI 5 "const1248_operand" "n")] 12611 UNSPEC_VSIBADDR)]) 12612 (mem:BLK (scratch)) 12613 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")] 12614 UNSPEC_GATHER)) 12615 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))] 12616 "TARGET_AVX2" 12617 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}" 12618 [(set_attr "type" "ssemov") 12619 (set_attr "prefix" "vex") 12620 (set_attr "mode" "<sseinsnmode>")]) 12621 12622(define_expand "avx2_gatherdi<mode>" 12623 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "") 12624 (unspec:VEC_GATHER_MODE 12625 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "") 12626 (mem:<ssescalarmode> 12627 (match_par_dup 7 12628 [(match_operand 2 "vsib_address_operand" "") 12629 (match_operand:<VEC_GATHER_IDXDI> 12630 3 "register_operand" "") 12631 (match_operand:SI 5 "const1248_operand " "")])) 12632 (mem:BLK (scratch)) 12633 (match_operand:<VEC_GATHER_SRCDI> 12634 4 "register_operand" "")] 12635 UNSPEC_GATHER)) 12636 (clobber (match_scratch:VEC_GATHER_MODE 6 ""))])] 12637 "TARGET_AVX2" 12638{ 12639 operands[7] 12640 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3], 12641 operands[5]), UNSPEC_VSIBADDR); 12642}) 12643 12644(define_insn "*avx2_gatherdi<mode>" 12645 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x") 12646 (unspec:VEC_GATHER_MODE 12647 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0") 12648 (match_operator:<ssescalarmode> 7 "vsib_mem_operator" 12649 [(unspec:P 12650 [(match_operand:P 3 "vsib_address_operand" "p") 12651 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x") 12652 (match_operand:SI 6 "const1248_operand" "n")] 12653 UNSPEC_VSIBADDR)]) 12654 (mem:BLK (scratch)) 12655 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")] 12656 UNSPEC_GATHER)) 12657 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))] 12658 "TARGET_AVX2" 12659 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}" 12660 [(set_attr "type" "ssemov") 12661 (set_attr "prefix" "vex") 12662 (set_attr "mode" "<sseinsnmode>")]) 12663 12664(define_insn "*avx2_gatherdi<mode>_2" 12665 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x") 12666 (unspec:VEC_GATHER_MODE 12667 [(pc) 12668 (match_operator:<ssescalarmode> 6 "vsib_mem_operator" 12669 [(unspec:P 12670 [(match_operand:P 2 "vsib_address_operand" "p") 12671 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x") 12672 (match_operand:SI 5 "const1248_operand" "n")] 12673 UNSPEC_VSIBADDR)]) 12674 (mem:BLK (scratch)) 12675 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")] 12676 UNSPEC_GATHER)) 12677 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))] 12678 "TARGET_AVX2" 12679{ 12680 if (<MODE>mode != <VEC_GATHER_SRCDI>mode) 12681 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}"; 12682 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"; 12683} 12684 [(set_attr "type" "ssemov") 12685 (set_attr "prefix" "vex") 12686 (set_attr "mode" "<sseinsnmode>")]) 12687 12688(define_insn "*avx2_gatherdi<mode>_3" 12689 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x") 12690 (vec_select:<VEC_GATHER_SRCDI> 12691 (unspec:VI4F_256 12692 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0") 12693 (match_operator:<ssescalarmode> 7 "vsib_mem_operator" 12694 [(unspec:P 12695 [(match_operand:P 3 "vsib_address_operand" "p") 12696 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x") 12697 (match_operand:SI 6 "const1248_operand" "n")] 12698 UNSPEC_VSIBADDR)]) 12699 (mem:BLK (scratch)) 12700 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")] 12701 UNSPEC_GATHER) 12702 (parallel [(const_int 0) (const_int 1) 12703 (const_int 2) (const_int 3)]))) 12704 (clobber (match_scratch:VI4F_256 1 "=&x"))] 12705 "TARGET_AVX2" 12706 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}" 12707 [(set_attr "type" "ssemov") 12708 (set_attr "prefix" "vex") 12709 (set_attr "mode" "<sseinsnmode>")]) 12710 12711(define_insn "*avx2_gatherdi<mode>_4" 12712 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x") 12713 (vec_select:<VEC_GATHER_SRCDI> 12714 (unspec:VI4F_256 12715 [(pc) 12716 (match_operator:<ssescalarmode> 6 "vsib_mem_operator" 12717 [(unspec:P 12718 [(match_operand:P 2 "vsib_address_operand" "p") 12719 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x") 12720 (match_operand:SI 5 "const1248_operand" "n")] 12721 UNSPEC_VSIBADDR)]) 12722 (mem:BLK (scratch)) 12723 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")] 12724 UNSPEC_GATHER) 12725 (parallel [(const_int 0) (const_int 1) 12726 (const_int 2) (const_int 3)]))) 12727 (clobber (match_scratch:VI4F_256 1 "=&x"))] 12728 "TARGET_AVX2" 12729 "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}" 12730 [(set_attr "type" "ssemov") 12731 (set_attr "prefix" "vex") 12732 (set_attr "mode" "<sseinsnmode>")]) 12733