1;; VSX patterns. 2;; Copyright (C) 2009-2020 Free Software Foundation, Inc. 3;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com> 4 5;; This file is part of GCC. 6 7;; GCC is free software; you can redistribute it and/or modify it 8;; under the terms of the GNU General Public License as published 9;; by the Free Software Foundation; either version 3, or (at your 10;; option) any later version. 11 12;; GCC is distributed in the hope that it will be useful, but WITHOUT 13;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15;; License for more details. 16 17;; You should have received a copy of the GNU General Public License 18;; along with GCC; see the file COPYING3. If not see 19;; <http://www.gnu.org/licenses/>. 20 21;; Iterator for comparison types 22(define_code_iterator CMP_TEST [eq lt gt unordered]) 23 24;; Mode attribute for vector floate and floato conversions 25(define_mode_attr VF_sxddp [(V2DI "sxd") (V2DF "dp")]) 26 27;; Iterator for both scalar and vector floating point types supported by VSX 28(define_mode_iterator VSX_B [DF V4SF V2DF]) 29 30;; Iterator for the 2 64-bit vector types 31(define_mode_iterator VSX_D [V2DF V2DI]) 32 33;; Mode iterator to handle swapping words on little endian for the 128-bit 34;; types that goes in a single vector register. 35(define_mode_iterator VSX_LE_128 [(KF "FLOAT128_VECTOR_P (KFmode)") 36 (TF "FLOAT128_VECTOR_P (TFmode)") 37 TI 38 V1TI]) 39 40;; Iterator for 128-bit integer types that go in a single vector register. 41(define_mode_iterator VSX_TI [TI V1TI]) 42 43;; Iterator for the 2 32-bit vector types 44(define_mode_iterator VSX_W [V4SF V4SI]) 45 46;; Iterator for the DF types 47(define_mode_iterator VSX_DF [V2DF DF]) 48 49;; Iterator for vector floating point types supported by VSX 50(define_mode_iterator VSX_F [V4SF V2DF]) 51 52;; Iterator for logical types supported by VSX 53(define_mode_iterator VSX_L [V16QI 54 V8HI 55 V4SI 56 V2DI 57 V4SF 58 V2DF 59 V1TI 60 TI 61 (KF "FLOAT128_VECTOR_P (KFmode)") 62 (TF "FLOAT128_VECTOR_P (TFmode)")]) 63 64;; Iterator for memory moves. 65(define_mode_iterator VSX_M [V16QI 66 V8HI 67 V4SI 68 V2DI 69 V4SF 70 V2DF 71 V1TI 72 (KF "FLOAT128_VECTOR_P (KFmode)") 73 (TF "FLOAT128_VECTOR_P (TFmode)") 74 TI]) 75 76(define_mode_attr VSX_XXBR [(V8HI "h") 77 (V4SI "w") 78 (V4SF "w") 79 (V2DF "d") 80 (V2DI "d") 81 (V1TI "q")]) 82 83;; Map into the appropriate load/store name based on the type 84(define_mode_attr VSm [(V16QI "vw4") 85 (V8HI "vw4") 86 (V4SI "vw4") 87 (V4SF "vw4") 88 (V2DF "vd2") 89 (V2DI "vd2") 90 (DF "d") 91 (TF "vd2") 92 (KF "vd2") 93 (V1TI "vd2") 94 (TI "vd2")]) 95 96;; Map the register class used 97(define_mode_attr VSr [(V16QI "v") 98 (V8HI "v") 99 (V4SI "v") 100 (V4SF "wa") 101 (V2DI "wa") 102 (V2DF "wa") 103 (DI "wa") 104 (DF "wa") 105 (SF "wa") 106 (TF "wa") 107 (KF "wa") 108 (V1TI "v") 109 (TI "wa")]) 110 111;; What value we need in the "isa" field, to make the IEEE QP float work. 112(define_mode_attr VSisa [(V16QI "*") 113 (V8HI "*") 114 (V4SI "*") 115 (V4SF "*") 116 (V2DI "*") 117 (V2DF "*") 118 (DI "*") 119 (DF "*") 120 (SF "*") 121 (V1TI "*") 122 (TI "*") 123 (TF "p9tf") 124 (KF "p9kf")]) 125 126;; A mode attribute to disparage use of GPR registers, except for scalar 127;; integer modes. 128(define_mode_attr ??r [(V16QI "??r") 129 (V8HI "??r") 130 (V4SI "??r") 131 (V4SF "??r") 132 (V2DI "??r") 133 (V2DF "??r") 134 (V1TI "??r") 135 (KF "??r") 136 (TF "??r") 137 (TI "r")]) 138 139;; A mode attribute used for 128-bit constant values. 140(define_mode_attr nW [(V16QI "W") 141 (V8HI "W") 142 (V4SI "W") 143 (V4SF "W") 144 (V2DI "W") 145 (V2DF "W") 146 (V1TI "W") 147 (KF "W") 148 (TF "W") 149 (TI "n")]) 150 151;; Same size integer type for floating point data 152(define_mode_attr VSi [(V4SF "v4si") 153 (V2DF "v2di") 154 (DF "di")]) 155 156(define_mode_attr VSI [(V4SF "V4SI") 157 (V2DF "V2DI") 158 (DF "DI")]) 159 160;; Word size for same size conversion 161(define_mode_attr VSc [(V4SF "w") 162 (V2DF "d") 163 (DF "d")]) 164 165;; Map into either s or v, depending on whether this is a scalar or vector 166;; operation 167(define_mode_attr VSv [(V16QI "v") 168 (V8HI "v") 169 (V4SI "v") 170 (V4SF "v") 171 (V2DI "v") 172 (V2DF "v") 173 (V1TI "v") 174 (DF "s") 175 (KF "v")]) 176 177;; Appropriate type for add ops (and other simple FP ops) 178(define_mode_attr VStype_simple [(V2DF "vecdouble") 179 (V4SF "vecfloat") 180 (DF "fp")]) 181 182;; Appropriate type for multiply ops 183(define_mode_attr VStype_mul [(V2DF "vecdouble") 184 (V4SF "vecfloat") 185 (DF "dmul")]) 186 187;; Appropriate type for divide ops. 188(define_mode_attr VStype_div [(V2DF "vecdiv") 189 (V4SF "vecfdiv") 190 (DF "ddiv")]) 191 192;; Map the scalar mode for a vector type 193(define_mode_attr VS_scalar [(V1TI "TI") 194 (V2DF "DF") 195 (V2DI "DI") 196 (V4SF "SF") 197 (V4SI "SI") 198 (V8HI "HI") 199 (V16QI "QI")]) 200 201;; Map to a double-sized vector mode 202(define_mode_attr VS_double [(V4SI "V8SI") 203 (V4SF "V8SF") 204 (V2DI "V4DI") 205 (V2DF "V4DF") 206 (V1TI "V2TI")]) 207 208;; Iterators for loading constants with xxspltib 209(define_mode_iterator VSINT_84 [V4SI V2DI DI SI]) 210(define_mode_iterator VSINT_842 [V8HI V4SI V2DI]) 211 212;; Vector reverse byte modes 213(define_mode_iterator VEC_REVB [V8HI V4SI V2DI V4SF V2DF V1TI]) 214 215;; Iterator for ISA 3.0 vector extract/insert of small integer vectors. 216;; VSX_EXTRACT_I2 doesn't include V4SImode because SI extracts can be 217;; done on ISA 2.07 and not just ISA 3.0. 218(define_mode_iterator VSX_EXTRACT_I [V16QI V8HI V4SI]) 219(define_mode_iterator VSX_EXTRACT_I2 [V16QI V8HI]) 220 221(define_mode_attr VSX_EXTRACT_WIDTH [(V16QI "b") 222 (V8HI "h") 223 (V4SI "w")]) 224 225;; Mode attribute to give the correct predicate for ISA 3.0 vector extract and 226;; insert to validate the operand number. 227(define_mode_attr VSX_EXTRACT_PREDICATE [(V16QI "const_0_to_15_operand") 228 (V8HI "const_0_to_7_operand") 229 (V4SI "const_0_to_3_operand")]) 230 231;; Mode attribute to give the constraint for vector extract and insert 232;; operations. 233(define_mode_attr VSX_EX [(V16QI "v") 234 (V8HI "v") 235 (V4SI "wa")]) 236 237;; Mode iterator for binary floating types other than double to 238;; optimize convert to that floating point type from an extract 239;; of an integer type 240(define_mode_iterator VSX_EXTRACT_FL [SF 241 (IF "FLOAT128_2REG_P (IFmode)") 242 (KF "TARGET_FLOAT128_HW") 243 (TF "FLOAT128_2REG_P (TFmode) 244 || (FLOAT128_IEEE_P (TFmode) 245 && TARGET_FLOAT128_HW)")]) 246 247;; Mode iterator for binary floating types that have a direct conversion 248;; from 64-bit integer to floating point 249(define_mode_iterator FL_CONV [SF 250 DF 251 (KF "TARGET_FLOAT128_HW") 252 (TF "TARGET_FLOAT128_HW 253 && FLOAT128_IEEE_P (TFmode)")]) 254 255;; Iterator for the 2 short vector types to do a splat from an integer 256(define_mode_iterator VSX_SPLAT_I [V16QI V8HI]) 257 258;; Mode attribute to give the count for the splat instruction to splat 259;; the value in the 64-bit integer slot 260(define_mode_attr VSX_SPLAT_COUNT [(V16QI "7") (V8HI "3")]) 261 262;; Mode attribute to give the suffix for the splat instruction 263(define_mode_attr VSX_SPLAT_SUFFIX [(V16QI "b") (V8HI "h")]) 264 265;; Constants for creating unspecs 266(define_c_enum "unspec" 267 [UNSPEC_VSX_CONCAT 268 UNSPEC_VSX_CVDPSXWS 269 UNSPEC_VSX_CVDPUXWS 270 UNSPEC_VSX_CVSPDP 271 UNSPEC_VSX_CVHPSP 272 UNSPEC_VSX_CVSPDPN 273 UNSPEC_VSX_CVDPSPN 274 UNSPEC_VSX_CVSXWDP 275 UNSPEC_VSX_CVUXWDP 276 UNSPEC_VSX_CVSXDSP 277 UNSPEC_VSX_CVUXDSP 278 UNSPEC_VSX_FLOAT2 279 UNSPEC_VSX_UNS_FLOAT2 280 UNSPEC_VSX_FLOATE 281 UNSPEC_VSX_UNS_FLOATE 282 UNSPEC_VSX_FLOATO 283 UNSPEC_VSX_UNS_FLOATO 284 UNSPEC_VSX_TDIV 285 UNSPEC_VSX_TSQRT 286 UNSPEC_VSX_SET 287 UNSPEC_VSX_ROUND_I 288 UNSPEC_VSX_ROUND_IC 289 UNSPEC_VSX_SLDWI 290 UNSPEC_VSX_XXPERM 291 292 UNSPEC_VSX_XXSPLTW 293 UNSPEC_VSX_XXSPLTD 294 UNSPEC_VSX_DIVSD 295 UNSPEC_VSX_DIVUD 296 UNSPEC_VSX_MULSD 297 UNSPEC_VSX_SIGN_EXTEND 298 UNSPEC_VSX_XVCVBF16SPN 299 UNSPEC_VSX_XVCVSPBF16 300 UNSPEC_VSX_XVCVSPSXDS 301 UNSPEC_VSX_XVCVSPHP 302 UNSPEC_VSX_VSLO 303 UNSPEC_VSX_EXTRACT 304 UNSPEC_VSX_SXEXPDP 305 UNSPEC_VSX_SXSIG 306 UNSPEC_VSX_SIEXPDP 307 UNSPEC_VSX_SIEXPQP 308 UNSPEC_VSX_SCMPEXPDP 309 UNSPEC_VSX_SCMPEXPQP 310 UNSPEC_VSX_STSTDC 311 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH 312 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL 313 UNSPEC_VSX_VXEXP 314 UNSPEC_VSX_VXSIG 315 UNSPEC_VSX_VIEXP 316 UNSPEC_VSX_VTSTDC 317 UNSPEC_VSX_VSIGNED2 318 319 UNSPEC_LXVL 320 UNSPEC_LXVLL 321 UNSPEC_LVSL_REG 322 UNSPEC_LVSR_REG 323 UNSPEC_STXVL 324 UNSPEC_STXVLL 325 UNSPEC_XL_LEN_R 326 UNSPEC_XST_LEN_R 327 328 UNSPEC_VCLZLSBB 329 UNSPEC_VCTZLSBB 330 UNSPEC_VEXTUBLX 331 UNSPEC_VEXTUHLX 332 UNSPEC_VEXTUWLX 333 UNSPEC_VEXTUBRX 334 UNSPEC_VEXTUHRX 335 UNSPEC_VEXTUWRX 336 UNSPEC_VCMPNEB 337 UNSPEC_VCMPNEZB 338 UNSPEC_VCMPNEH 339 UNSPEC_VCMPNEZH 340 UNSPEC_VCMPNEW 341 UNSPEC_VCMPNEZW 342 UNSPEC_XXEXTRACTUW 343 UNSPEC_XXINSERTW 344 UNSPEC_VSX_FIRST_MATCH_INDEX 345 UNSPEC_VSX_FIRST_MATCH_EOS_INDEX 346 UNSPEC_VSX_FIRST_MISMATCH_INDEX 347 UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX 348 ]) 349 350(define_int_iterator XVCVBF16 [UNSPEC_VSX_XVCVSPBF16 351 UNSPEC_VSX_XVCVBF16SPN]) 352 353(define_int_attr xvcvbf16 [(UNSPEC_VSX_XVCVSPBF16 "xvcvspbf16") 354 (UNSPEC_VSX_XVCVBF16SPN "xvcvbf16spn")]) 355 356;; VSX moves 357 358;; The patterns for LE permuted loads and stores come before the general 359;; VSX moves so they match first. 360(define_insn_and_split "*vsx_le_perm_load_<mode>" 361 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") 362 (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z"))] 363 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" 364 "#" 365 "&& 1" 366 [(set (match_dup 2) 367 (vec_select:<MODE> 368 (match_dup 1) 369 (parallel [(const_int 1) (const_int 0)]))) 370 (set (match_dup 0) 371 (vec_select:<MODE> 372 (match_dup 2) 373 (parallel [(const_int 1) (const_int 0)])))] 374{ 375 rtx mem = operands[1]; 376 377 /* Don't apply the swap optimization if we've already performed register 378 allocation and the hard register destination is not in the altivec 379 range. */ 380 if ((MEM_ALIGN (mem) >= 128) 381 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[0])) 382 || ALTIVEC_REGNO_P (reg_or_subregno (operands[0])))) 383 { 384 rtx mem_address = XEXP (mem, 0); 385 enum machine_mode mode = GET_MODE (mem); 386 387 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) 388 { 389 /* Replace the source memory address with masked address. */ 390 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem); 391 emit_insn (lvx_set_expr); 392 DONE; 393 } 394 else if (rs6000_quadword_masked_address_p (mem_address)) 395 { 396 /* This rtl is already in the form that matches lvx 397 instruction, so leave it alone. */ 398 DONE; 399 } 400 /* Otherwise, fall through to transform into a swapping load. */ 401 } 402 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) 403 : operands[0]; 404} 405 [(set_attr "type" "vecload") 406 (set_attr "length" "8")]) 407 408(define_insn_and_split "*vsx_le_perm_load_<mode>" 409 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa") 410 (match_operand:VSX_W 1 "indexed_or_indirect_operand" "Z"))] 411 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" 412 "#" 413 "&& 1" 414 [(set (match_dup 2) 415 (vec_select:<MODE> 416 (match_dup 1) 417 (parallel [(const_int 2) (const_int 3) 418 (const_int 0) (const_int 1)]))) 419 (set (match_dup 0) 420 (vec_select:<MODE> 421 (match_dup 2) 422 (parallel [(const_int 2) (const_int 3) 423 (const_int 0) (const_int 1)])))] 424{ 425 rtx mem = operands[1]; 426 427 /* Don't apply the swap optimization if we've already performed register 428 allocation and the hard register destination is not in the altivec 429 range. */ 430 if ((MEM_ALIGN (mem) >= 128) 431 && (!HARD_REGISTER_P (operands[0]) 432 || ALTIVEC_REGNO_P (REGNO(operands[0])))) 433 { 434 rtx mem_address = XEXP (mem, 0); 435 enum machine_mode mode = GET_MODE (mem); 436 437 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) 438 { 439 /* Replace the source memory address with masked address. */ 440 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem); 441 emit_insn (lvx_set_expr); 442 DONE; 443 } 444 else if (rs6000_quadword_masked_address_p (mem_address)) 445 { 446 /* This rtl is already in the form that matches lvx 447 instruction, so leave it alone. */ 448 DONE; 449 } 450 /* Otherwise, fall through to transform into a swapping load. */ 451 } 452 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) 453 : operands[0]; 454} 455 [(set_attr "type" "vecload") 456 (set_attr "length" "8")]) 457 458(define_insn_and_split "*vsx_le_perm_load_v8hi" 459 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") 460 (match_operand:V8HI 1 "indexed_or_indirect_operand" "Z"))] 461 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" 462 "#" 463 "&& 1" 464 [(set (match_dup 2) 465 (vec_select:V8HI 466 (match_dup 1) 467 (parallel [(const_int 4) (const_int 5) 468 (const_int 6) (const_int 7) 469 (const_int 0) (const_int 1) 470 (const_int 2) (const_int 3)]))) 471 (set (match_dup 0) 472 (vec_select:V8HI 473 (match_dup 2) 474 (parallel [(const_int 4) (const_int 5) 475 (const_int 6) (const_int 7) 476 (const_int 0) (const_int 1) 477 (const_int 2) (const_int 3)])))] 478{ 479 rtx mem = operands[1]; 480 481 /* Don't apply the swap optimization if we've already performed register 482 allocation and the hard register destination is not in the altivec 483 range. */ 484 if ((MEM_ALIGN (mem) >= 128) 485 && (!HARD_REGISTER_P (operands[0]) 486 || ALTIVEC_REGNO_P (REGNO(operands[0])))) 487 { 488 rtx mem_address = XEXP (mem, 0); 489 enum machine_mode mode = GET_MODE (mem); 490 491 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) 492 { 493 /* Replace the source memory address with masked address. */ 494 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem); 495 emit_insn (lvx_set_expr); 496 DONE; 497 } 498 else if (rs6000_quadword_masked_address_p (mem_address)) 499 { 500 /* This rtl is already in the form that matches lvx 501 instruction, so leave it alone. */ 502 DONE; 503 } 504 /* Otherwise, fall through to transform into a swapping load. */ 505 } 506 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) 507 : operands[0]; 508} 509 [(set_attr "type" "vecload") 510 (set_attr "length" "8")]) 511 512(define_insn_and_split "*vsx_le_perm_load_v16qi" 513 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") 514 (match_operand:V16QI 1 "indexed_or_indirect_operand" "Z"))] 515 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" 516 "#" 517 "&& 1" 518 [(set (match_dup 2) 519 (vec_select:V16QI 520 (match_dup 1) 521 (parallel [(const_int 8) (const_int 9) 522 (const_int 10) (const_int 11) 523 (const_int 12) (const_int 13) 524 (const_int 14) (const_int 15) 525 (const_int 0) (const_int 1) 526 (const_int 2) (const_int 3) 527 (const_int 4) (const_int 5) 528 (const_int 6) (const_int 7)]))) 529 (set (match_dup 0) 530 (vec_select:V16QI 531 (match_dup 2) 532 (parallel [(const_int 8) (const_int 9) 533 (const_int 10) (const_int 11) 534 (const_int 12) (const_int 13) 535 (const_int 14) (const_int 15) 536 (const_int 0) (const_int 1) 537 (const_int 2) (const_int 3) 538 (const_int 4) (const_int 5) 539 (const_int 6) (const_int 7)])))] 540{ 541 rtx mem = operands[1]; 542 543 /* Don't apply the swap optimization if we've already performed register 544 allocation and the hard register destination is not in the altivec 545 range. */ 546 if ((MEM_ALIGN (mem) >= 128) 547 && (!HARD_REGISTER_P (operands[0]) 548 || ALTIVEC_REGNO_P (REGNO(operands[0])))) 549 { 550 rtx mem_address = XEXP (mem, 0); 551 enum machine_mode mode = GET_MODE (mem); 552 553 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) 554 { 555 /* Replace the source memory address with masked address. */ 556 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem); 557 emit_insn (lvx_set_expr); 558 DONE; 559 } 560 else if (rs6000_quadword_masked_address_p (mem_address)) 561 { 562 /* This rtl is already in the form that matches lvx 563 instruction, so leave it alone. */ 564 DONE; 565 } 566 /* Otherwise, fall through to transform into a swapping load. */ 567 } 568 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) 569 : operands[0]; 570} 571 [(set_attr "type" "vecload") 572 (set_attr "length" "8")]) 573 574(define_insn "*vsx_le_perm_store_<mode>" 575 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand" "=Z") 576 (match_operand:VSX_D 1 "vsx_register_operand" "+wa"))] 577 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" 578 "#" 579 [(set_attr "type" "vecstore") 580 (set_attr "length" "12")]) 581 582(define_split 583 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand") 584 (match_operand:VSX_D 1 "vsx_register_operand"))] 585 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed" 586 [(set (match_dup 2) 587 (vec_select:<MODE> 588 (match_dup 1) 589 (parallel [(const_int 1) (const_int 0)]))) 590 (set (match_dup 0) 591 (vec_select:<MODE> 592 (match_dup 2) 593 (parallel [(const_int 1) (const_int 0)])))] 594{ 595 rtx mem = operands[0]; 596 597 /* Don't apply the swap optimization if we've already performed register 598 allocation and the hard register source is not in the altivec range. */ 599 if ((MEM_ALIGN (mem) >= 128) 600 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1])) 601 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1])))) 602 { 603 rtx mem_address = XEXP (mem, 0); 604 enum machine_mode mode = GET_MODE (mem); 605 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) 606 { 607 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]); 608 emit_insn (stvx_set_expr); 609 DONE; 610 } 611 else if (rs6000_quadword_masked_address_p (mem_address)) 612 { 613 /* This rtl is already in the form that matches stvx instruction, 614 so leave it alone. */ 615 DONE; 616 } 617 /* Otherwise, fall through to transform into a swapping store. */ 618 } 619 620 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) 621 : operands[1]; 622}) 623 624;; The post-reload split requires that we re-permute the source 625;; register in case it is still live. 626(define_split 627 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand") 628 (match_operand:VSX_D 1 "vsx_register_operand"))] 629 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed" 630 [(set (match_dup 1) 631 (vec_select:<MODE> 632 (match_dup 1) 633 (parallel [(const_int 1) (const_int 0)]))) 634 (set (match_dup 0) 635 (vec_select:<MODE> 636 (match_dup 1) 637 (parallel [(const_int 1) (const_int 0)]))) 638 (set (match_dup 1) 639 (vec_select:<MODE> 640 (match_dup 1) 641 (parallel [(const_int 1) (const_int 0)])))] 642 "") 643 644(define_insn "*vsx_le_perm_store_<mode>" 645 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand" "=Z") 646 (match_operand:VSX_W 1 "vsx_register_operand" "+wa"))] 647 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" 648 "#" 649 [(set_attr "type" "vecstore") 650 (set_attr "length" "12")]) 651 652(define_split 653 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand") 654 (match_operand:VSX_W 1 "vsx_register_operand"))] 655 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed" 656 [(set (match_dup 2) 657 (vec_select:<MODE> 658 (match_dup 1) 659 (parallel [(const_int 2) (const_int 3) 660 (const_int 0) (const_int 1)]))) 661 (set (match_dup 0) 662 (vec_select:<MODE> 663 (match_dup 2) 664 (parallel [(const_int 2) (const_int 3) 665 (const_int 0) (const_int 1)])))] 666{ 667 rtx mem = operands[0]; 668 669 /* Don't apply the swap optimization if we've already performed register 670 allocation and the hard register source is not in the altivec range. */ 671 if ((MEM_ALIGN (mem) >= 128) 672 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1])) 673 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1])))) 674 { 675 rtx mem_address = XEXP (mem, 0); 676 enum machine_mode mode = GET_MODE (mem); 677 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) 678 { 679 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]); 680 emit_insn (stvx_set_expr); 681 DONE; 682 } 683 else if (rs6000_quadword_masked_address_p (mem_address)) 684 { 685 /* This rtl is already in the form that matches stvx instruction, 686 so leave it alone. */ 687 DONE; 688 } 689 /* Otherwise, fall through to transform into a swapping store. */ 690 } 691 692 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) 693 : operands[1]; 694}) 695 696;; The post-reload split requires that we re-permute the source 697;; register in case it is still live. 698(define_split 699 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand") 700 (match_operand:VSX_W 1 "vsx_register_operand"))] 701 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed" 702 [(set (match_dup 1) 703 (vec_select:<MODE> 704 (match_dup 1) 705 (parallel [(const_int 2) (const_int 3) 706 (const_int 0) (const_int 1)]))) 707 (set (match_dup 0) 708 (vec_select:<MODE> 709 (match_dup 1) 710 (parallel [(const_int 2) (const_int 3) 711 (const_int 0) (const_int 1)]))) 712 (set (match_dup 1) 713 (vec_select:<MODE> 714 (match_dup 1) 715 (parallel [(const_int 2) (const_int 3) 716 (const_int 0) (const_int 1)])))] 717 "") 718 719(define_insn "*vsx_le_perm_store_v8hi" 720 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand" "=Z") 721 (match_operand:V8HI 1 "vsx_register_operand" "+wa"))] 722 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" 723 "#" 724 [(set_attr "type" "vecstore") 725 (set_attr "length" "12")]) 726 727(define_split 728 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand") 729 (match_operand:V8HI 1 "vsx_register_operand"))] 730 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed" 731 [(set (match_dup 2) 732 (vec_select:V8HI 733 (match_dup 1) 734 (parallel [(const_int 4) (const_int 5) 735 (const_int 6) (const_int 7) 736 (const_int 0) (const_int 1) 737 (const_int 2) (const_int 3)]))) 738 (set (match_dup 0) 739 (vec_select:V8HI 740 (match_dup 2) 741 (parallel [(const_int 4) (const_int 5) 742 (const_int 6) (const_int 7) 743 (const_int 0) (const_int 1) 744 (const_int 2) (const_int 3)])))] 745{ 746 rtx mem = operands[0]; 747 748 /* Don't apply the swap optimization if we've already performed register 749 allocation and the hard register source is not in the altivec range. */ 750 if ((MEM_ALIGN (mem) >= 128) 751 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1])) 752 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1])))) 753 { 754 rtx mem_address = XEXP (mem, 0); 755 enum machine_mode mode = GET_MODE (mem); 756 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) 757 { 758 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]); 759 emit_insn (stvx_set_expr); 760 DONE; 761 } 762 else if (rs6000_quadword_masked_address_p (mem_address)) 763 { 764 /* This rtl is already in the form that matches stvx instruction, 765 so leave it alone. */ 766 DONE; 767 } 768 /* Otherwise, fall through to transform into a swapping store. */ 769 } 770 771 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) 772 : operands[1]; 773}) 774 775;; The post-reload split requires that we re-permute the source 776;; register in case it is still live. 777(define_split 778 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand") 779 (match_operand:V8HI 1 "vsx_register_operand"))] 780 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed" 781 [(set (match_dup 1) 782 (vec_select:V8HI 783 (match_dup 1) 784 (parallel [(const_int 4) (const_int 5) 785 (const_int 6) (const_int 7) 786 (const_int 0) (const_int 1) 787 (const_int 2) (const_int 3)]))) 788 (set (match_dup 0) 789 (vec_select:V8HI 790 (match_dup 1) 791 (parallel [(const_int 4) (const_int 5) 792 (const_int 6) (const_int 7) 793 (const_int 0) (const_int 1) 794 (const_int 2) (const_int 3)]))) 795 (set (match_dup 1) 796 (vec_select:V8HI 797 (match_dup 1) 798 (parallel [(const_int 4) (const_int 5) 799 (const_int 6) (const_int 7) 800 (const_int 0) (const_int 1) 801 (const_int 2) (const_int 3)])))] 802 "") 803 804(define_insn "*vsx_le_perm_store_v16qi" 805 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand" "=Z") 806 (match_operand:V16QI 1 "vsx_register_operand" "+wa"))] 807 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" 808 "#" 809 [(set_attr "type" "vecstore") 810 (set_attr "length" "12")]) 811 812(define_split 813 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand") 814 (match_operand:V16QI 1 "vsx_register_operand"))] 815 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed" 816 [(set (match_dup 2) 817 (vec_select:V16QI 818 (match_dup 1) 819 (parallel [(const_int 8) (const_int 9) 820 (const_int 10) (const_int 11) 821 (const_int 12) (const_int 13) 822 (const_int 14) (const_int 15) 823 (const_int 0) (const_int 1) 824 (const_int 2) (const_int 3) 825 (const_int 4) (const_int 5) 826 (const_int 6) (const_int 7)]))) 827 (set (match_dup 0) 828 (vec_select:V16QI 829 (match_dup 2) 830 (parallel [(const_int 8) (const_int 9) 831 (const_int 10) (const_int 11) 832 (const_int 12) (const_int 13) 833 (const_int 14) (const_int 15) 834 (const_int 0) (const_int 1) 835 (const_int 2) (const_int 3) 836 (const_int 4) (const_int 5) 837 (const_int 6) (const_int 7)])))] 838{ 839 rtx mem = operands[0]; 840 841 /* Don't apply the swap optimization if we've already performed register 842 allocation and the hard register source is not in the altivec range. */ 843 if ((MEM_ALIGN (mem) >= 128) 844 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1])) 845 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1])))) 846 { 847 rtx mem_address = XEXP (mem, 0); 848 enum machine_mode mode = GET_MODE (mem); 849 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) 850 { 851 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]); 852 emit_insn (stvx_set_expr); 853 DONE; 854 } 855 else if (rs6000_quadword_masked_address_p (mem_address)) 856 { 857 /* This rtl is already in the form that matches stvx instruction, 858 so leave it alone. */ 859 DONE; 860 } 861 /* Otherwise, fall through to transform into a swapping store. */ 862 } 863 864 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) 865 : operands[1]; 866}) 867 868;; The post-reload split requires that we re-permute the source 869;; register in case it is still live. 870(define_split 871 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand") 872 (match_operand:V16QI 1 "vsx_register_operand"))] 873 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed" 874 [(set (match_dup 1) 875 (vec_select:V16QI 876 (match_dup 1) 877 (parallel [(const_int 8) (const_int 9) 878 (const_int 10) (const_int 11) 879 (const_int 12) (const_int 13) 880 (const_int 14) (const_int 15) 881 (const_int 0) (const_int 1) 882 (const_int 2) (const_int 3) 883 (const_int 4) (const_int 5) 884 (const_int 6) (const_int 7)]))) 885 (set (match_dup 0) 886 (vec_select:V16QI 887 (match_dup 1) 888 (parallel [(const_int 8) (const_int 9) 889 (const_int 10) (const_int 11) 890 (const_int 12) (const_int 13) 891 (const_int 14) (const_int 15) 892 (const_int 0) (const_int 1) 893 (const_int 2) (const_int 3) 894 (const_int 4) (const_int 5) 895 (const_int 6) (const_int 7)]))) 896 (set (match_dup 1) 897 (vec_select:V16QI 898 (match_dup 1) 899 (parallel [(const_int 8) (const_int 9) 900 (const_int 10) (const_int 11) 901 (const_int 12) (const_int 13) 902 (const_int 14) (const_int 15) 903 (const_int 0) (const_int 1) 904 (const_int 2) (const_int 3) 905 (const_int 4) (const_int 5) 906 (const_int 6) (const_int 7)])))] 907 "") 908 909;; Little endian word swapping for 128-bit types that are either scalars or the 910;; special V1TI container class, which it is not appropriate to use vec_select 911;; for the type. 912(define_insn "*vsx_le_permute_<mode>" 913 [(set (match_operand:VSX_TI 0 "nonimmediate_operand" "=wa,wa,Z,&r,&r,Q") 914 (rotate:VSX_TI 915 (match_operand:VSX_TI 1 "input_operand" "wa,Z,wa,r,Q,r") 916 (const_int 64)))] 917 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" 918 "@ 919 xxpermdi %x0,%x1,%x1,2 920 lxvd2x %x0,%y1 921 stxvd2x %x1,%y0 922 mr %0,%L1\;mr %L0,%1 923 ld%U1%X1 %0,%L1\;ld%U1%X1 %L0,%1 924 std%U0%X0 %L1,%0\;std%U0%X0 %1,%L0" 925 [(set_attr "length" "*,*,*,8,8,8") 926 (set_attr "type" "vecperm,vecload,vecstore,*,load,store")]) 927 928(define_insn_and_split "*vsx_le_undo_permute_<mode>" 929 [(set (match_operand:VSX_TI 0 "vsx_register_operand" "=wa,wa") 930 (rotate:VSX_TI 931 (rotate:VSX_TI 932 (match_operand:VSX_TI 1 "vsx_register_operand" "0,wa") 933 (const_int 64)) 934 (const_int 64)))] 935 "!BYTES_BIG_ENDIAN && TARGET_VSX" 936 "@ 937 # 938 xxlor %x0,%x1" 939 "" 940 [(set (match_dup 0) (match_dup 1))] 941{ 942 if (reload_completed && REGNO (operands[0]) == REGNO (operands[1])) 943 { 944 emit_note (NOTE_INSN_DELETED); 945 DONE; 946 } 947} 948 [(set_attr "length" "0,4") 949 (set_attr "type" "veclogical")]) 950 951(define_insn_and_split "*vsx_le_perm_load_<mode>" 952 [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=wa,r") 953 (match_operand:VSX_LE_128 1 "memory_operand" "Z,Q"))] 954 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR 955 && !altivec_indexed_or_indirect_operand (operands[1], <MODE>mode)" 956 "@ 957 # 958 #" 959 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR 960 && !altivec_indexed_or_indirect_operand (operands[1], <MODE>mode)" 961 [(const_int 0)] 962{ 963 rtx tmp = (can_create_pseudo_p () 964 ? gen_reg_rtx_and_attrs (operands[0]) 965 : operands[0]); 966 rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode); 967 rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode); 968 DONE; 969} 970 [(set_attr "type" "vecload,load") 971 (set_attr "length" "8,8") 972 (set_attr "isa" "<VSisa>,*")]) 973 974(define_insn "*vsx_le_perm_store_<mode>" 975 [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z,Q") 976 (match_operand:VSX_LE_128 1 "vsx_register_operand" "+wa,r"))] 977 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR 978 & !altivec_indexed_or_indirect_operand (operands[0], <MODE>mode)" 979 "@ 980 # 981 #" 982 [(set_attr "type" "vecstore,store") 983 (set_attr "length" "12,8") 984 (set_attr "isa" "<VSisa>,*")]) 985 986(define_split 987 [(set (match_operand:VSX_LE_128 0 "memory_operand") 988 (match_operand:VSX_LE_128 1 "vsx_register_operand"))] 989 "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR 990 && !altivec_indexed_or_indirect_operand (operands[0], <MODE>mode)" 991 [(const_int 0)] 992{ 993 rtx tmp = (can_create_pseudo_p () 994 ? gen_reg_rtx_and_attrs (operands[0]) 995 : operands[0]); 996 rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode); 997 rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode); 998 DONE; 999}) 1000 1001;; Peepholes to catch loads and stores for TImode if TImode landed in 1002;; GPR registers on a little endian system. 1003(define_peephole2 1004 [(set (match_operand:VSX_TI 0 "int_reg_operand") 1005 (rotate:VSX_TI (match_operand:VSX_TI 1 "memory_operand") 1006 (const_int 64))) 1007 (set (match_operand:VSX_TI 2 "int_reg_operand") 1008 (rotate:VSX_TI (match_dup 0) 1009 (const_int 64)))] 1010 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR 1011 && (rtx_equal_p (operands[0], operands[2]) 1012 || peep2_reg_dead_p (2, operands[0]))" 1013 [(set (match_dup 2) (match_dup 1))]) 1014 1015(define_peephole2 1016 [(set (match_operand:VSX_TI 0 "int_reg_operand") 1017 (rotate:VSX_TI (match_operand:VSX_TI 1 "int_reg_operand") 1018 (const_int 64))) 1019 (set (match_operand:VSX_TI 2 "memory_operand") 1020 (rotate:VSX_TI (match_dup 0) 1021 (const_int 64)))] 1022 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR 1023 && peep2_reg_dead_p (2, operands[0])" 1024 [(set (match_dup 2) (match_dup 1))]) 1025 1026;; Peephole to catch memory to memory transfers for TImode if TImode landed in 1027;; VSX registers on a little endian system. The vector types and IEEE 128-bit 1028;; floating point are handled by the more generic swap elimination pass. 1029(define_peephole2 1030 [(set (match_operand:TI 0 "vsx_register_operand") 1031 (rotate:TI (match_operand:TI 1 "vsx_register_operand") 1032 (const_int 64))) 1033 (set (match_operand:TI 2 "vsx_register_operand") 1034 (rotate:TI (match_dup 0) 1035 (const_int 64)))] 1036 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR 1037 && (rtx_equal_p (operands[0], operands[2]) 1038 || peep2_reg_dead_p (2, operands[0]))" 1039 [(set (match_dup 2) (match_dup 1))]) 1040 1041;; The post-reload split requires that we re-permute the source 1042;; register in case it is still live. 1043(define_split 1044 [(set (match_operand:VSX_LE_128 0 "memory_operand") 1045 (match_operand:VSX_LE_128 1 "vsx_register_operand"))] 1046 "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR 1047 && !altivec_indexed_or_indirect_operand (operands[0], <MODE>mode)" 1048 [(const_int 0)] 1049{ 1050 rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode); 1051 rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode); 1052 rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode); 1053 DONE; 1054}) 1055 1056;; Vector constants that can be generated with XXSPLTIB that was added in ISA 1057;; 3.0. Both (const_vector [..]) and (vec_duplicate ...) forms are recognized. 1058(define_insn "xxspltib_v16qi" 1059 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") 1060 (vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "n")))] 1061 "TARGET_P9_VECTOR" 1062{ 1063 operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff); 1064 return "xxspltib %x0,%2"; 1065} 1066 [(set_attr "type" "vecperm")]) 1067 1068(define_insn "xxspltib_<mode>_nosplit" 1069 [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa,wa") 1070 (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "jwM,wE"))] 1071 "TARGET_P9_VECTOR" 1072{ 1073 rtx op1 = operands[1]; 1074 int value = 256; 1075 int num_insns = -1; 1076 1077 if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value) 1078 || num_insns != 1) 1079 gcc_unreachable (); 1080 1081 operands[2] = GEN_INT (value & 0xff); 1082 return "xxspltib %x0,%2"; 1083} 1084 [(set_attr "type" "vecperm")]) 1085 1086(define_insn_and_split "*xxspltib_<mode>_split" 1087 [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v") 1088 (match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))] 1089 "TARGET_P9_VECTOR" 1090 "#" 1091 "&& 1" 1092 [(const_int 0)] 1093{ 1094 int value = 256; 1095 int num_insns = -1; 1096 rtx op0 = operands[0]; 1097 rtx op1 = operands[1]; 1098 rtx tmp = ((can_create_pseudo_p ()) 1099 ? gen_reg_rtx (V16QImode) 1100 : gen_lowpart (V16QImode, op0)); 1101 1102 if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value) 1103 || num_insns != 2) 1104 gcc_unreachable (); 1105 1106 emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value))); 1107 1108 if (<MODE>mode == V2DImode) 1109 emit_insn (gen_vsx_sign_extend_qi_v2di (op0, tmp)); 1110 1111 else if (<MODE>mode == V4SImode) 1112 emit_insn (gen_vsx_sign_extend_qi_v4si (op0, tmp)); 1113 1114 else if (<MODE>mode == V8HImode) 1115 emit_insn (gen_altivec_vupkhsb (op0, tmp)); 1116 1117 else 1118 gcc_unreachable (); 1119 1120 DONE; 1121} 1122 [(set_attr "type" "vecperm") 1123 (set_attr "length" "8")]) 1124 1125 1126;; Prefer using vector registers over GPRs. Prefer using ISA 3.0's XXSPLTISB 1127;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or 1128;; all 1's, since the machine does not have to wait for the previous 1129;; instruction using the register being set (such as a store waiting on a slow 1130;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move. 1131 1132;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR) 1133;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW 1134;; VSX 0/-1 VMX const GPR const LVX (VMX) STVX (VMX) 1135(define_insn "vsx_mov<mode>_64bit" 1136 [(set (match_operand:VSX_M 0 "nonimmediate_operand" 1137 "=ZwO, wa, wa, r, we, ?wQ, 1138 ?&r, ??r, ??Y, <??r>, wa, v, 1139 ?wa, v, <??r>, wZ, v") 1140 1141 (match_operand:VSX_M 1 "input_operand" 1142 "wa, ZwO, wa, we, r, r, 1143 wQ, Y, r, r, wE, jwM, 1144 ?jwM, W, <nW>, v, wZ"))] 1145 1146 "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode) 1147 && (register_operand (operands[0], <MODE>mode) 1148 || register_operand (operands[1], <MODE>mode))" 1149{ 1150 return rs6000_output_move_128bit (operands); 1151} 1152 [(set_attr "type" 1153 "vecstore, vecload, vecsimple, mffgpr, mftgpr, load, 1154 store, load, store, *, vecsimple, vecsimple, 1155 vecsimple, *, *, vecstore, vecload") 1156 (set_attr "num_insns" 1157 "*, *, *, 2, *, 2, 1158 2, 2, 2, 2, *, *, 1159 *, 5, 2, *, *") 1160 (set_attr "max_prefixed_insns" 1161 "*, *, *, *, *, 2, 1162 2, 2, 2, 2, *, *, 1163 *, *, *, *, *") 1164 (set_attr "length" 1165 "*, *, *, 8, *, 8, 1166 8, 8, 8, 8, *, *, 1167 *, 20, 8, *, *") 1168 (set_attr "isa" 1169 "<VSisa>, <VSisa>, <VSisa>, *, *, *, 1170 *, *, *, *, p9v, *, 1171 <VSisa>, *, *, *, *")]) 1172 1173;; VSX store VSX load VSX move GPR load GPR store GPR move 1174;; XXSPLTIB VSPLTISW VSX 0/-1 VMX const GPR const 1175;; LVX (VMX) STVX (VMX) 1176(define_insn "*vsx_mov<mode>_32bit" 1177 [(set (match_operand:VSX_M 0 "nonimmediate_operand" 1178 "=ZwO, wa, wa, ??r, ??Y, <??r>, 1179 wa, v, ?wa, v, <??r>, 1180 wZ, v") 1181 1182 (match_operand:VSX_M 1 "input_operand" 1183 "wa, ZwO, wa, Y, r, r, 1184 wE, jwM, ?jwM, W, <nW>, 1185 v, wZ"))] 1186 1187 "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode) 1188 && (register_operand (operands[0], <MODE>mode) 1189 || register_operand (operands[1], <MODE>mode))" 1190{ 1191 return rs6000_output_move_128bit (operands); 1192} 1193 [(set_attr "type" 1194 "vecstore, vecload, vecsimple, load, store, *, 1195 vecsimple, vecsimple, vecsimple, *, *, 1196 vecstore, vecload") 1197 (set_attr "length" 1198 "*, *, *, 16, 16, 16, 1199 *, *, *, 20, 16, 1200 *, *") 1201 (set_attr "isa" 1202 "<VSisa>, <VSisa>, <VSisa>, *, *, *, 1203 p9v, *, <VSisa>, *, *, 1204 *, *")]) 1205 1206;; Explicit load/store expanders for the builtin functions 1207(define_expand "vsx_load_<mode>" 1208 [(set (match_operand:VSX_M 0 "vsx_register_operand") 1209 (match_operand:VSX_M 1 "memory_operand"))] 1210 "VECTOR_MEM_VSX_P (<MODE>mode)" 1211{ 1212 /* Expand to swaps if needed, prior to swap optimization. */ 1213 if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR 1214 && !altivec_indexed_or_indirect_operand(operands[1], <MODE>mode)) 1215 { 1216 rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode); 1217 DONE; 1218 } 1219}) 1220 1221(define_expand "vsx_store_<mode>" 1222 [(set (match_operand:VSX_M 0 "memory_operand") 1223 (match_operand:VSX_M 1 "vsx_register_operand"))] 1224 "VECTOR_MEM_VSX_P (<MODE>mode)" 1225{ 1226 /* Expand to swaps if needed, prior to swap optimization. */ 1227 if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR 1228 && !altivec_indexed_or_indirect_operand(operands[0], <MODE>mode)) 1229 { 1230 rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode); 1231 DONE; 1232 } 1233}) 1234 1235;; Explicit load/store expanders for the builtin functions for lxvd2x, etc., 1236;; when you really want their element-reversing behavior. 1237(define_insn "vsx_ld_elemrev_v2di" 1238 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") 1239 (vec_select:V2DI 1240 (match_operand:V2DI 1 "memory_operand" "Z") 1241 (parallel [(const_int 1) (const_int 0)])))] 1242 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN" 1243 "lxvd2x %x0,%y1" 1244 [(set_attr "type" "vecload")]) 1245 1246(define_insn "vsx_ld_elemrev_v1ti" 1247 [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa") 1248 (vec_select:V1TI 1249 (match_operand:V1TI 1 "memory_operand" "Z") 1250 (parallel [(const_int 0)])))] 1251 "VECTOR_MEM_VSX_P (V1TImode) && !BYTES_BIG_ENDIAN" 1252{ 1253 return "lxvd2x %x0,%y1\;xxpermdi %x0,%x0,%x0,2"; 1254} 1255 [(set_attr "type" "vecload")]) 1256 1257(define_insn "vsx_ld_elemrev_v2df" 1258 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa") 1259 (vec_select:V2DF 1260 (match_operand:V2DF 1 "memory_operand" "Z") 1261 (parallel [(const_int 1) (const_int 0)])))] 1262 "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN" 1263 "lxvd2x %x0,%y1" 1264 [(set_attr "type" "vecload")]) 1265 1266(define_insn "vsx_ld_elemrev_v4si" 1267 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa") 1268 (vec_select:V4SI 1269 (match_operand:V4SI 1 "memory_operand" "Z") 1270 (parallel [(const_int 3) (const_int 2) 1271 (const_int 1) (const_int 0)])))] 1272 "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN" 1273 "lxvw4x %x0,%y1" 1274 [(set_attr "type" "vecload")]) 1275 1276(define_insn "vsx_ld_elemrev_v4sf" 1277 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") 1278 (vec_select:V4SF 1279 (match_operand:V4SF 1 "memory_operand" "Z") 1280 (parallel [(const_int 3) (const_int 2) 1281 (const_int 1) (const_int 0)])))] 1282 "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN" 1283 "lxvw4x %x0,%y1" 1284 [(set_attr "type" "vecload")]) 1285 1286(define_expand "vsx_ld_elemrev_v8hi" 1287 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") 1288 (vec_select:V8HI 1289 (match_operand:V8HI 1 "memory_operand" "Z") 1290 (parallel [(const_int 7) (const_int 6) 1291 (const_int 5) (const_int 4) 1292 (const_int 3) (const_int 2) 1293 (const_int 1) (const_int 0)])))] 1294 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN" 1295{ 1296 if (!TARGET_P9_VECTOR) 1297 { 1298 rtx tmp = gen_reg_rtx (V4SImode); 1299 rtx subreg, subreg2, perm[16], pcv; 1300 /* 2 is leftmost element in register */ 1301 unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2}; 1302 int i; 1303 1304 subreg = simplify_gen_subreg (V4SImode, operands[1], V8HImode, 0); 1305 emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg)); 1306 subreg2 = simplify_gen_subreg (V8HImode, tmp, V4SImode, 0); 1307 1308 for (i = 0; i < 16; ++i) 1309 perm[i] = GEN_INT (reorder[i]); 1310 1311 pcv = force_reg (V16QImode, 1312 gen_rtx_CONST_VECTOR (V16QImode, 1313 gen_rtvec_v (16, perm))); 1314 emit_insn (gen_altivec_vperm_v8hi_direct (operands[0], subreg2, 1315 subreg2, pcv)); 1316 DONE; 1317 } 1318}) 1319 1320(define_insn "*vsx_ld_elemrev_v8hi_internal" 1321 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") 1322 (vec_select:V8HI 1323 (match_operand:V8HI 1 "memory_operand" "Z") 1324 (parallel [(const_int 7) (const_int 6) 1325 (const_int 5) (const_int 4) 1326 (const_int 3) (const_int 2) 1327 (const_int 1) (const_int 0)])))] 1328 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" 1329 "lxvh8x %x0,%y1" 1330 [(set_attr "type" "vecload")]) 1331 1332(define_expand "vsx_ld_elemrev_v16qi" 1333 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") 1334 (vec_select:V16QI 1335 (match_operand:V16QI 1 "memory_operand" "Z") 1336 (parallel [(const_int 15) (const_int 14) 1337 (const_int 13) (const_int 12) 1338 (const_int 11) (const_int 10) 1339 (const_int 9) (const_int 8) 1340 (const_int 7) (const_int 6) 1341 (const_int 5) (const_int 4) 1342 (const_int 3) (const_int 2) 1343 (const_int 1) (const_int 0)])))] 1344 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN" 1345{ 1346 if (!TARGET_P9_VECTOR) 1347 { 1348 rtx tmp = gen_reg_rtx (V4SImode); 1349 rtx subreg, subreg2, perm[16], pcv; 1350 /* 3 is leftmost element in register */ 1351 unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3}; 1352 int i; 1353 1354 subreg = simplify_gen_subreg (V4SImode, operands[1], V16QImode, 0); 1355 emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg)); 1356 subreg2 = simplify_gen_subreg (V16QImode, tmp, V4SImode, 0); 1357 1358 for (i = 0; i < 16; ++i) 1359 perm[i] = GEN_INT (reorder[i]); 1360 1361 pcv = force_reg (V16QImode, 1362 gen_rtx_CONST_VECTOR (V16QImode, 1363 gen_rtvec_v (16, perm))); 1364 emit_insn (gen_altivec_vperm_v16qi_direct (operands[0], subreg2, 1365 subreg2, pcv)); 1366 DONE; 1367 } 1368}) 1369 1370(define_insn "vsx_ld_elemrev_v16qi_internal" 1371 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") 1372 (vec_select:V16QI 1373 (match_operand:V16QI 1 "memory_operand" "Z") 1374 (parallel [(const_int 15) (const_int 14) 1375 (const_int 13) (const_int 12) 1376 (const_int 11) (const_int 10) 1377 (const_int 9) (const_int 8) 1378 (const_int 7) (const_int 6) 1379 (const_int 5) (const_int 4) 1380 (const_int 3) (const_int 2) 1381 (const_int 1) (const_int 0)])))] 1382 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" 1383 "lxvb16x %x0,%y1" 1384 [(set_attr "type" "vecload")]) 1385 1386(define_insn "vsx_st_elemrev_v1ti" 1387 [(set (match_operand:V1TI 0 "memory_operand" "=Z") 1388 (vec_select:V1TI 1389 (match_operand:V1TI 1 "vsx_register_operand" "+wa") 1390 (parallel [(const_int 0)]))) 1391 (clobber (match_dup 1))] 1392 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN" 1393{ 1394 return "xxpermdi %x1,%x1,%x1,2\;stxvd2x %x1,%y0"; 1395} 1396 [(set_attr "type" "vecstore")]) 1397 1398(define_insn "vsx_st_elemrev_v2df" 1399 [(set (match_operand:V2DF 0 "memory_operand" "=Z") 1400 (vec_select:V2DF 1401 (match_operand:V2DF 1 "vsx_register_operand" "wa") 1402 (parallel [(const_int 1) (const_int 0)])))] 1403 "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN" 1404 "stxvd2x %x1,%y0" 1405 [(set_attr "type" "vecstore")]) 1406 1407(define_insn "vsx_st_elemrev_v2di" 1408 [(set (match_operand:V2DI 0 "memory_operand" "=Z") 1409 (vec_select:V2DI 1410 (match_operand:V2DI 1 "vsx_register_operand" "wa") 1411 (parallel [(const_int 1) (const_int 0)])))] 1412 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN" 1413 "stxvd2x %x1,%y0" 1414 [(set_attr "type" "vecstore")]) 1415 1416(define_insn "vsx_st_elemrev_v4sf" 1417 [(set (match_operand:V4SF 0 "memory_operand" "=Z") 1418 (vec_select:V4SF 1419 (match_operand:V4SF 1 "vsx_register_operand" "wa") 1420 (parallel [(const_int 3) (const_int 2) 1421 (const_int 1) (const_int 0)])))] 1422 "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN" 1423 "stxvw4x %x1,%y0" 1424 [(set_attr "type" "vecstore")]) 1425 1426(define_insn "vsx_st_elemrev_v4si" 1427 [(set (match_operand:V4SI 0 "memory_operand" "=Z") 1428 (vec_select:V4SI 1429 (match_operand:V4SI 1 "vsx_register_operand" "wa") 1430 (parallel [(const_int 3) (const_int 2) 1431 (const_int 1) (const_int 0)])))] 1432 "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN" 1433 "stxvw4x %x1,%y0" 1434 [(set_attr "type" "vecstore")]) 1435 1436(define_expand "vsx_st_elemrev_v8hi" 1437 [(set (match_operand:V8HI 0 "memory_operand" "=Z") 1438 (vec_select:V8HI 1439 (match_operand:V8HI 1 "vsx_register_operand" "wa") 1440 (parallel [(const_int 7) (const_int 6) 1441 (const_int 5) (const_int 4) 1442 (const_int 3) (const_int 2) 1443 (const_int 1) (const_int 0)])))] 1444 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN" 1445{ 1446 if (!TARGET_P9_VECTOR) 1447 { 1448 rtx mem_subreg, subreg, perm[16], pcv; 1449 rtx tmp = gen_reg_rtx (V8HImode); 1450 /* 2 is leftmost element in register */ 1451 unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2}; 1452 int i; 1453 1454 for (i = 0; i < 16; ++i) 1455 perm[i] = GEN_INT (reorder[i]); 1456 1457 pcv = force_reg (V16QImode, 1458 gen_rtx_CONST_VECTOR (V16QImode, 1459 gen_rtvec_v (16, perm))); 1460 emit_insn (gen_altivec_vperm_v8hi_direct (tmp, operands[1], 1461 operands[1], pcv)); 1462 subreg = simplify_gen_subreg (V4SImode, tmp, V8HImode, 0); 1463 mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0); 1464 emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg)); 1465 DONE; 1466 } 1467}) 1468 1469(define_insn "*vsx_st_elemrev_v2di_internal" 1470 [(set (match_operand:V2DI 0 "memory_operand" "=Z") 1471 (vec_select:V2DI 1472 (match_operand:V2DI 1 "vsx_register_operand" "wa") 1473 (parallel [(const_int 1) (const_int 0)])))] 1474 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" 1475 "stxvd2x %x1,%y0" 1476 [(set_attr "type" "vecstore")]) 1477 1478(define_insn "*vsx_st_elemrev_v8hi_internal" 1479 [(set (match_operand:V8HI 0 "memory_operand" "=Z") 1480 (vec_select:V8HI 1481 (match_operand:V8HI 1 "vsx_register_operand" "wa") 1482 (parallel [(const_int 7) (const_int 6) 1483 (const_int 5) (const_int 4) 1484 (const_int 3) (const_int 2) 1485 (const_int 1) (const_int 0)])))] 1486 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" 1487 "stxvh8x %x1,%y0" 1488 [(set_attr "type" "vecstore")]) 1489 1490(define_expand "vsx_st_elemrev_v16qi" 1491 [(set (match_operand:V16QI 0 "memory_operand" "=Z") 1492 (vec_select:V16QI 1493 (match_operand:V16QI 1 "vsx_register_operand" "wa") 1494 (parallel [(const_int 15) (const_int 14) 1495 (const_int 13) (const_int 12) 1496 (const_int 11) (const_int 10) 1497 (const_int 9) (const_int 8) 1498 (const_int 7) (const_int 6) 1499 (const_int 5) (const_int 4) 1500 (const_int 3) (const_int 2) 1501 (const_int 1) (const_int 0)])))] 1502 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN" 1503{ 1504 if (!TARGET_P9_VECTOR) 1505 { 1506 rtx mem_subreg, subreg, perm[16], pcv; 1507 rtx tmp = gen_reg_rtx (V16QImode); 1508 /* 3 is leftmost element in register */ 1509 unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3}; 1510 int i; 1511 1512 for (i = 0; i < 16; ++i) 1513 perm[i] = GEN_INT (reorder[i]); 1514 1515 pcv = force_reg (V16QImode, 1516 gen_rtx_CONST_VECTOR (V16QImode, 1517 gen_rtvec_v (16, perm))); 1518 emit_insn (gen_altivec_vperm_v16qi_direct (tmp, operands[1], 1519 operands[1], pcv)); 1520 subreg = simplify_gen_subreg (V4SImode, tmp, V16QImode, 0); 1521 mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V16QImode, 0); 1522 emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg)); 1523 DONE; 1524 } 1525}) 1526 1527(define_insn "*vsx_st_elemrev_v16qi_internal" 1528 [(set (match_operand:V16QI 0 "memory_operand" "=Z") 1529 (vec_select:V16QI 1530 (match_operand:V16QI 1 "vsx_register_operand" "wa") 1531 (parallel [(const_int 15) (const_int 14) 1532 (const_int 13) (const_int 12) 1533 (const_int 11) (const_int 10) 1534 (const_int 9) (const_int 8) 1535 (const_int 7) (const_int 6) 1536 (const_int 5) (const_int 4) 1537 (const_int 3) (const_int 2) 1538 (const_int 1) (const_int 0)])))] 1539 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" 1540 "stxvb16x %x1,%y0" 1541 [(set_attr "type" "vecstore")]) 1542 1543 1544;; VSX vector floating point arithmetic instructions. The VSX scalar 1545;; instructions are now combined with the insn for the traditional floating 1546;; point unit. 1547(define_insn "*vsx_add<mode>3" 1548 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 1549 (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa") 1550 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))] 1551 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1552 "xvadd<sd>p %x0,%x1,%x2" 1553 [(set_attr "type" "<VStype_simple>")]) 1554 1555(define_insn "*vsx_sub<mode>3" 1556 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa>") 1557 (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa") 1558 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))] 1559 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1560 "xvsub<sd>p %x0,%x1,%x2" 1561 [(set_attr "type" "<VStype_simple>")]) 1562 1563(define_insn "*vsx_mul<mode>3" 1564 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 1565 (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa") 1566 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))] 1567 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1568 "xvmul<sd>p %x0,%x1,%x2" 1569 [(set_attr "type" "<VStype_simple>")]) 1570 1571; Emulate vector with scalar for vec_mul in V2DImode 1572(define_insn_and_split "vsx_mul_v2di" 1573 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") 1574 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa") 1575 (match_operand:V2DI 2 "vsx_register_operand" "wa")] 1576 UNSPEC_VSX_MULSD))] 1577 "VECTOR_MEM_VSX_P (V2DImode)" 1578 "#" 1579 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed" 1580 [(const_int 0)] 1581{ 1582 rtx op0 = operands[0]; 1583 rtx op1 = operands[1]; 1584 rtx op2 = operands[2]; 1585 rtx op3 = gen_reg_rtx (DImode); 1586 rtx op4 = gen_reg_rtx (DImode); 1587 rtx op5 = gen_reg_rtx (DImode); 1588 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0))); 1589 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0))); 1590 if (TARGET_POWERPC64) 1591 emit_insn (gen_muldi3 (op5, op3, op4)); 1592 else 1593 { 1594 rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false); 1595 emit_move_insn (op5, ret); 1596 } 1597 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1))); 1598 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1))); 1599 if (TARGET_POWERPC64) 1600 emit_insn (gen_muldi3 (op3, op3, op4)); 1601 else 1602 { 1603 rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false); 1604 emit_move_insn (op3, ret); 1605 } 1606 emit_insn (gen_vsx_concat_v2di (op0, op5, op3)); 1607 DONE; 1608} 1609 [(set_attr "type" "mul")]) 1610 1611(define_insn "*vsx_div<mode>3" 1612 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 1613 (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa") 1614 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))] 1615 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1616 "xvdiv<sd>p %x0,%x1,%x2" 1617 [(set_attr "type" "<VStype_div>")]) 1618 1619; Emulate vector with scalar for vec_div in V2DImode 1620(define_insn_and_split "vsx_div_v2di" 1621 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") 1622 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa") 1623 (match_operand:V2DI 2 "vsx_register_operand" "wa")] 1624 UNSPEC_VSX_DIVSD))] 1625 "VECTOR_MEM_VSX_P (V2DImode)" 1626 "#" 1627 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed" 1628 [(const_int 0)] 1629{ 1630 rtx op0 = operands[0]; 1631 rtx op1 = operands[1]; 1632 rtx op2 = operands[2]; 1633 rtx op3 = gen_reg_rtx (DImode); 1634 rtx op4 = gen_reg_rtx (DImode); 1635 rtx op5 = gen_reg_rtx (DImode); 1636 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0))); 1637 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0))); 1638 if (TARGET_POWERPC64) 1639 emit_insn (gen_divdi3 (op5, op3, op4)); 1640 else 1641 { 1642 rtx libfunc = optab_libfunc (sdiv_optab, DImode); 1643 rtx target = emit_library_call_value (libfunc, 1644 op5, LCT_NORMAL, DImode, 1645 op3, DImode, 1646 op4, DImode); 1647 emit_move_insn (op5, target); 1648 } 1649 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1))); 1650 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1))); 1651 if (TARGET_POWERPC64) 1652 emit_insn (gen_divdi3 (op3, op3, op4)); 1653 else 1654 { 1655 rtx libfunc = optab_libfunc (sdiv_optab, DImode); 1656 rtx target = emit_library_call_value (libfunc, 1657 op3, LCT_NORMAL, DImode, 1658 op3, DImode, 1659 op4, DImode); 1660 emit_move_insn (op3, target); 1661 } 1662 emit_insn (gen_vsx_concat_v2di (op0, op5, op3)); 1663 DONE; 1664} 1665 [(set_attr "type" "div")]) 1666 1667(define_insn_and_split "vsx_udiv_v2di" 1668 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") 1669 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa") 1670 (match_operand:V2DI 2 "vsx_register_operand" "wa")] 1671 UNSPEC_VSX_DIVUD))] 1672 "VECTOR_MEM_VSX_P (V2DImode)" 1673 "#" 1674 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed" 1675 [(const_int 0)] 1676{ 1677 rtx op0 = operands[0]; 1678 rtx op1 = operands[1]; 1679 rtx op2 = operands[2]; 1680 rtx op3 = gen_reg_rtx (DImode); 1681 rtx op4 = gen_reg_rtx (DImode); 1682 rtx op5 = gen_reg_rtx (DImode); 1683 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0))); 1684 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0))); 1685 if (TARGET_POWERPC64) 1686 emit_insn (gen_udivdi3 (op5, op3, op4)); 1687 else 1688 { 1689 rtx libfunc = optab_libfunc (udiv_optab, DImode); 1690 rtx target = emit_library_call_value (libfunc, 1691 op5, LCT_NORMAL, DImode, 1692 op3, DImode, 1693 op4, DImode); 1694 emit_move_insn (op5, target); 1695 } 1696 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1))); 1697 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1))); 1698 if (TARGET_POWERPC64) 1699 emit_insn (gen_udivdi3 (op3, op3, op4)); 1700 else 1701 { 1702 rtx libfunc = optab_libfunc (udiv_optab, DImode); 1703 rtx target = emit_library_call_value (libfunc, 1704 op3, LCT_NORMAL, DImode, 1705 op3, DImode, 1706 op4, DImode); 1707 emit_move_insn (op3, target); 1708 } 1709 emit_insn (gen_vsx_concat_v2di (op0, op5, op3)); 1710 DONE; 1711} 1712 [(set_attr "type" "div")]) 1713 1714;; *tdiv* instruction returning the FG flag 1715(define_expand "vsx_tdiv<mode>3_fg" 1716 [(set (match_dup 3) 1717 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand") 1718 (match_operand:VSX_B 2 "vsx_register_operand")] 1719 UNSPEC_VSX_TDIV)) 1720 (set (match_operand:SI 0 "gpc_reg_operand") 1721 (gt:SI (match_dup 3) 1722 (const_int 0)))] 1723 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1724{ 1725 operands[3] = gen_reg_rtx (CCFPmode); 1726}) 1727 1728;; *tdiv* instruction returning the FE flag 1729(define_expand "vsx_tdiv<mode>3_fe" 1730 [(set (match_dup 3) 1731 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand") 1732 (match_operand:VSX_B 2 "vsx_register_operand")] 1733 UNSPEC_VSX_TDIV)) 1734 (set (match_operand:SI 0 "gpc_reg_operand") 1735 (eq:SI (match_dup 3) 1736 (const_int 0)))] 1737 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1738{ 1739 operands[3] = gen_reg_rtx (CCFPmode); 1740}) 1741 1742(define_insn "*vsx_tdiv<mode>3_internal" 1743 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x") 1744 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "wa") 1745 (match_operand:VSX_B 2 "vsx_register_operand" "wa")] 1746 UNSPEC_VSX_TDIV))] 1747 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1748 "x<VSv>tdiv<sd>p %0,%x1,%x2" 1749 [(set_attr "type" "<VStype_simple>")]) 1750 1751(define_insn "vsx_fre<mode>2" 1752 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 1753 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")] 1754 UNSPEC_FRES))] 1755 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1756 "xvre<sd>p %x0,%x1" 1757 [(set_attr "type" "<VStype_simple>")]) 1758 1759(define_insn "*vsx_neg<mode>2" 1760 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 1761 (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))] 1762 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1763 "xvneg<sd>p %x0,%x1" 1764 [(set_attr "type" "<VStype_simple>")]) 1765 1766(define_insn "*vsx_abs<mode>2" 1767 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 1768 (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))] 1769 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1770 "xvabs<sd>p %x0,%x1" 1771 [(set_attr "type" "<VStype_simple>")]) 1772 1773(define_insn "vsx_nabs<mode>2" 1774 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 1775 (neg:VSX_F 1776 (abs:VSX_F 1777 (match_operand:VSX_F 1 "vsx_register_operand" "wa"))))] 1778 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1779 "xvnabs<sd>p %x0,%x1" 1780 [(set_attr "type" "<VStype_simple>")]) 1781 1782(define_insn "vsx_smax<mode>3" 1783 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 1784 (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa") 1785 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))] 1786 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1787 "xvmax<sd>p %x0,%x1,%x2" 1788 [(set_attr "type" "<VStype_simple>")]) 1789 1790(define_insn "*vsx_smin<mode>3" 1791 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 1792 (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa") 1793 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))] 1794 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1795 "xvmin<sd>p %x0,%x1,%x2" 1796 [(set_attr "type" "<VStype_simple>")]) 1797 1798(define_insn "*vsx_sqrt<mode>2" 1799 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 1800 (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))] 1801 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1802 "xvsqrt<sd>p %x0,%x1" 1803 [(set_attr "type" "<sd>sqrt")]) 1804 1805(define_insn "*vsx_rsqrte<mode>2" 1806 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 1807 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")] 1808 UNSPEC_RSQRT))] 1809 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1810 "xvrsqrte<sd>p %x0,%x1" 1811 [(set_attr "type" "<VStype_simple>")]) 1812 1813;; *tsqrt* returning the fg flag 1814(define_expand "vsx_tsqrt<mode>2_fg" 1815 [(set (match_dup 2) 1816 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")] 1817 UNSPEC_VSX_TSQRT)) 1818 (set (match_operand:SI 0 "gpc_reg_operand") 1819 (gt:SI (match_dup 2) 1820 (const_int 0)))] 1821 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1822{ 1823 operands[2] = gen_reg_rtx (CCFPmode); 1824}) 1825 1826;; *tsqrt* returning the fe flag 1827(define_expand "vsx_tsqrt<mode>2_fe" 1828 [(set (match_dup 2) 1829 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")] 1830 UNSPEC_VSX_TSQRT)) 1831 (set (match_operand:SI 0 "gpc_reg_operand") 1832 (eq:SI (match_dup 2) 1833 (const_int 0)))] 1834 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1835{ 1836 operands[2] = gen_reg_rtx (CCFPmode); 1837}) 1838 1839(define_insn "*vsx_tsqrt<mode>2_internal" 1840 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x") 1841 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "wa")] 1842 UNSPEC_VSX_TSQRT))] 1843 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1844 "x<VSv>tsqrt<sd>p %0,%x1" 1845 [(set_attr "type" "<VStype_simple>")]) 1846 1847;; Fused vector multiply/add instructions. Support the classical Altivec 1848;; versions of fma, which allows the target to be a separate register from the 1849;; 3 inputs. Under VSX, the target must be either the addend or the first 1850;; multiply. 1851 1852(define_insn "*vsx_fmav4sf4" 1853 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,v") 1854 (fma:V4SF 1855 (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa,v") 1856 (match_operand:V4SF 2 "vsx_register_operand" "wa,0,v") 1857 (match_operand:V4SF 3 "vsx_register_operand" "0,wa,v")))] 1858 "VECTOR_UNIT_VSX_P (V4SFmode)" 1859 "@ 1860 xvmaddasp %x0,%x1,%x2 1861 xvmaddmsp %x0,%x1,%x3 1862 vmaddfp %0,%1,%2,%3" 1863 [(set_attr "type" "vecfloat")]) 1864 1865(define_insn "*vsx_fmav2df4" 1866 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,wa") 1867 (fma:V2DF 1868 (match_operand:V2DF 1 "vsx_register_operand" "%wa,wa") 1869 (match_operand:V2DF 2 "vsx_register_operand" "wa,0") 1870 (match_operand:V2DF 3 "vsx_register_operand" "0,wa")))] 1871 "VECTOR_UNIT_VSX_P (V2DFmode)" 1872 "@ 1873 xvmaddadp %x0,%x1,%x2 1874 xvmaddmdp %x0,%x1,%x3" 1875 [(set_attr "type" "vecdouble")]) 1876 1877(define_insn "*vsx_fms<mode>4" 1878 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa,wa") 1879 (fma:VSX_F 1880 (match_operand:VSX_F 1 "vsx_register_operand" "%wa,wa") 1881 (match_operand:VSX_F 2 "vsx_register_operand" "wa,0") 1882 (neg:VSX_F 1883 (match_operand:VSX_F 3 "vsx_register_operand" "0,wa"))))] 1884 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1885 "@ 1886 xvmsuba<sd>p %x0,%x1,%x2 1887 xvmsubm<sd>p %x0,%x1,%x3" 1888 [(set_attr "type" "<VStype_mul>")]) 1889 1890(define_insn "*vsx_nfma<mode>4" 1891 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa,wa") 1892 (neg:VSX_F 1893 (fma:VSX_F 1894 (match_operand:VSX_F 1 "vsx_register_operand" "wa,wa") 1895 (match_operand:VSX_F 2 "vsx_register_operand" "wa,0") 1896 (match_operand:VSX_F 3 "vsx_register_operand" "0,wa"))))] 1897 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1898 "@ 1899 xvnmadda<sd>p %x0,%x1,%x2 1900 xvnmaddm<sd>p %x0,%x1,%x3" 1901 [(set_attr "type" "<VStype_mul>")]) 1902 1903(define_insn "*vsx_nfmsv4sf4" 1904 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,v") 1905 (neg:V4SF 1906 (fma:V4SF 1907 (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa,v") 1908 (match_operand:V4SF 2 "vsx_register_operand" "wa,0,v") 1909 (neg:V4SF 1910 (match_operand:V4SF 3 "vsx_register_operand" "0,wa,v")))))] 1911 "VECTOR_UNIT_VSX_P (V4SFmode)" 1912 "@ 1913 xvnmsubasp %x0,%x1,%x2 1914 xvnmsubmsp %x0,%x1,%x3 1915 vnmsubfp %0,%1,%2,%3" 1916 [(set_attr "type" "vecfloat")]) 1917 1918(define_insn "*vsx_nfmsv2df4" 1919 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,wa") 1920 (neg:V2DF 1921 (fma:V2DF 1922 (match_operand:V2DF 1 "vsx_register_operand" "%wa,wa") 1923 (match_operand:V2DF 2 "vsx_register_operand" "wa,0") 1924 (neg:V2DF 1925 (match_operand:V2DF 3 "vsx_register_operand" "0,wa")))))] 1926 "VECTOR_UNIT_VSX_P (V2DFmode)" 1927 "@ 1928 xvnmsubadp %x0,%x1,%x2 1929 xvnmsubmdp %x0,%x1,%x3" 1930 [(set_attr "type" "vecdouble")]) 1931 1932;; Vector conditional expressions (no scalar version for these instructions) 1933(define_insn "vsx_eq<mode>" 1934 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 1935 (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa") 1936 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))] 1937 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1938 "xvcmpeq<sd>p %x0,%x1,%x2" 1939 [(set_attr "type" "<VStype_simple>")]) 1940 1941(define_insn "vsx_gt<mode>" 1942 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 1943 (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa") 1944 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))] 1945 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1946 "xvcmpgt<sd>p %x0,%x1,%x2" 1947 [(set_attr "type" "<VStype_simple>")]) 1948 1949(define_insn "*vsx_ge<mode>" 1950 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 1951 (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa") 1952 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))] 1953 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1954 "xvcmpge<sd>p %x0,%x1,%x2" 1955 [(set_attr "type" "<VStype_simple>")]) 1956 1957;; Compare vectors producing a vector result and a predicate, setting CR6 to 1958;; indicate a combined status 1959(define_insn "*vsx_eq_<mode>_p" 1960 [(set (reg:CC CR6_REGNO) 1961 (unspec:CC 1962 [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa") 1963 (match_operand:VSX_F 2 "vsx_register_operand" "wa"))] 1964 UNSPEC_PREDICATE)) 1965 (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 1966 (eq:VSX_F (match_dup 1) 1967 (match_dup 2)))] 1968 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1969 "xvcmpeq<sd>p. %x0,%x1,%x2" 1970 [(set_attr "type" "<VStype_simple>")]) 1971 1972(define_insn "*vsx_gt_<mode>_p" 1973 [(set (reg:CC CR6_REGNO) 1974 (unspec:CC 1975 [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa") 1976 (match_operand:VSX_F 2 "vsx_register_operand" "wa"))] 1977 UNSPEC_PREDICATE)) 1978 (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 1979 (gt:VSX_F (match_dup 1) 1980 (match_dup 2)))] 1981 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1982 "xvcmpgt<sd>p. %x0,%x1,%x2" 1983 [(set_attr "type" "<VStype_simple>")]) 1984 1985(define_insn "*vsx_ge_<mode>_p" 1986 [(set (reg:CC CR6_REGNO) 1987 (unspec:CC 1988 [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa") 1989 (match_operand:VSX_F 2 "vsx_register_operand" "wa"))] 1990 UNSPEC_PREDICATE)) 1991 (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 1992 (ge:VSX_F (match_dup 1) 1993 (match_dup 2)))] 1994 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1995 "xvcmpge<sd>p. %x0,%x1,%x2" 1996 [(set_attr "type" "<VStype_simple>")]) 1997 1998;; Vector select 1999(define_insn "*vsx_xxsel<mode>" 2000 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa") 2001 (if_then_else:VSX_L 2002 (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa") 2003 (match_operand:VSX_L 4 "zero_constant" "")) 2004 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa") 2005 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))] 2006 "VECTOR_MEM_VSX_P (<MODE>mode)" 2007 "xxsel %x0,%x3,%x2,%x1" 2008 [(set_attr "type" "vecmove") 2009 (set_attr "isa" "<VSisa>")]) 2010 2011(define_insn "*vsx_xxsel<mode>_uns" 2012 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa") 2013 (if_then_else:VSX_L 2014 (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa") 2015 (match_operand:VSX_L 4 "zero_constant" "")) 2016 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa") 2017 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))] 2018 "VECTOR_MEM_VSX_P (<MODE>mode)" 2019 "xxsel %x0,%x3,%x2,%x1" 2020 [(set_attr "type" "vecmove") 2021 (set_attr "isa" "<VSisa>")]) 2022 2023;; Copy sign 2024(define_insn "vsx_copysign<mode>3" 2025 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 2026 (unspec:VSX_F 2027 [(match_operand:VSX_F 1 "vsx_register_operand" "wa") 2028 (match_operand:VSX_F 2 "vsx_register_operand" "wa")] 2029 UNSPEC_COPYSIGN))] 2030 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2031 "xvcpsgn<sd>p %x0,%x2,%x1" 2032 [(set_attr "type" "<VStype_simple>")]) 2033 2034;; For the conversions, limit the register class for the integer value to be 2035;; the fprs because we don't want to add the altivec registers to movdi/movsi. 2036;; For the unsigned tests, there isn't a generic double -> unsigned conversion 2037;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX. 2038;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md 2039;; in allowing virtual registers. 2040(define_insn "vsx_float<VSi><mode>2" 2041 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=wa") 2042 (float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "wa")))] 2043 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2044 "xvcvsx<VSc><sd>p %x0,%x1" 2045 [(set_attr "type" "<VStype_simple>")]) 2046 2047(define_insn "vsx_floatuns<VSi><mode>2" 2048 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=wa") 2049 (unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "wa")))] 2050 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2051 "xvcvux<VSc><sd>p %x0,%x1" 2052 [(set_attr "type" "<VStype_simple>")]) 2053 2054(define_insn "vsx_fix_trunc<mode><VSi>2" 2055 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=wa") 2056 (fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "wa")))] 2057 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2058 "x<VSv>cv<sd>psx<VSc>s %x0,%x1" 2059 [(set_attr "type" "<VStype_simple>")]) 2060 2061(define_insn "vsx_fixuns_trunc<mode><VSi>2" 2062 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=wa") 2063 (unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "wa")))] 2064 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2065 "x<VSv>cv<sd>pux<VSc>s %x0,%x1" 2066 [(set_attr "type" "<VStype_simple>")]) 2067 2068;; Math rounding functions 2069(define_insn "vsx_x<VSv>r<sd>pi" 2070 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa") 2071 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")] 2072 UNSPEC_VSX_ROUND_I))] 2073 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2074 "x<VSv>r<sd>pi %x0,%x1" 2075 [(set_attr "type" "<VStype_simple>")]) 2076 2077(define_insn "vsx_x<VSv>r<sd>pic" 2078 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa") 2079 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")] 2080 UNSPEC_VSX_ROUND_IC))] 2081 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2082 "x<VSv>r<sd>pic %x0,%x1" 2083 [(set_attr "type" "<VStype_simple>")]) 2084 2085(define_insn "vsx_btrunc<mode>2" 2086 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 2087 (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))] 2088 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2089 "xvr<sd>piz %x0,%x1" 2090 [(set_attr "type" "<VStype_simple>")]) 2091 2092(define_insn "*vsx_b2trunc<mode>2" 2093 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa") 2094 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")] 2095 UNSPEC_FRIZ))] 2096 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2097 "x<VSv>r<sd>piz %x0,%x1" 2098 [(set_attr "type" "<VStype_simple>")]) 2099 2100(define_insn "vsx_floor<mode>2" 2101 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 2102 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")] 2103 UNSPEC_FRIM))] 2104 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2105 "xvr<sd>pim %x0,%x1" 2106 [(set_attr "type" "<VStype_simple>")]) 2107 2108(define_insn "vsx_ceil<mode>2" 2109 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 2110 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")] 2111 UNSPEC_FRIP))] 2112 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2113 "xvr<sd>pip %x0,%x1" 2114 [(set_attr "type" "<VStype_simple>")]) 2115 2116 2117;; VSX convert to/from double vector 2118 2119;; Convert between single and double precision 2120;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal 2121;; scalar single precision instructions internally use the double format. 2122;; Prefer the altivec registers, since we likely will need to do a vperm 2123(define_insn "vsx_xscvdpsp" 2124 [(set (match_operand:V4SF 0 "vsx_register_operand" "=f,?wa") 2125 (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "f,wa")] 2126 UNSPEC_VSX_CVSPDP))] 2127 "VECTOR_UNIT_VSX_P (DFmode)" 2128 "xscvdpsp %x0,%x1" 2129 [(set_attr "type" "fp")]) 2130 2131(define_insn "vsx_xvcvspdp_be" 2132 [(set (match_operand:V2DF 0 "vsx_register_operand" "=v,?wa") 2133 (float_extend:V2DF 2134 (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa") 2135 (parallel [(const_int 0) (const_int 2)]))))] 2136 "VECTOR_UNIT_VSX_P (V4SFmode) && BYTES_BIG_ENDIAN" 2137 "xvcvspdp %x0,%x1" 2138 [(set_attr "type" "vecdouble")]) 2139 2140(define_insn "vsx_xvcvspdp_le" 2141 [(set (match_operand:V2DF 0 "vsx_register_operand" "=v,?wa") 2142 (float_extend:V2DF 2143 (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa") 2144 (parallel [(const_int 1) (const_int 3)]))))] 2145 "VECTOR_UNIT_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN" 2146 "xvcvspdp %x0,%x1" 2147 [(set_attr "type" "vecdouble")]) 2148 2149(define_expand "vsx_xvcvspdp" 2150 [(match_operand:V2DF 0 "vsx_register_operand") 2151 (match_operand:V4SF 1 "vsx_register_operand")] 2152 "VECTOR_UNIT_VSX_P (V4SFmode)" 2153{ 2154 if (BYTES_BIG_ENDIAN) 2155 emit_insn (gen_vsx_xvcvspdp_be (operands[0], operands[1])); 2156 else 2157 emit_insn (gen_vsx_xvcvspdp_le (operands[0], operands[1])); 2158 DONE; 2159}) 2160 2161(define_insn "vsx_xvcvdpsp" 2162 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,?wa") 2163 (unspec:V4SF [(match_operand:V2DF 1 "vsx_register_operand" "v,wa")] 2164 UNSPEC_VSX_CVSPDP))] 2165 "VECTOR_UNIT_VSX_P (V2DFmode)" 2166 "xvcvdpsp %x0,%x1" 2167 [(set_attr "type" "vecdouble")]) 2168 2169;; xscvspdp, represent the scalar SF type as V4SF 2170(define_insn "vsx_xscvspdp" 2171 [(set (match_operand:DF 0 "vsx_register_operand" "=wa") 2172 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")] 2173 UNSPEC_VSX_CVSPDP))] 2174 "VECTOR_UNIT_VSX_P (V4SFmode)" 2175 "xscvspdp %x0,%x1" 2176 [(set_attr "type" "fp")]) 2177 2178;; Same as vsx_xscvspdp, but use SF as the type 2179(define_insn "vsx_xscvspdp_scalar2" 2180 [(set (match_operand:SF 0 "vsx_register_operand" "=wa") 2181 (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")] 2182 UNSPEC_VSX_CVSPDP))] 2183 "VECTOR_UNIT_VSX_P (V4SFmode)" 2184 "xscvspdp %x0,%x1" 2185 [(set_attr "type" "fp")]) 2186 2187;; Generate xvcvhpsp instruction 2188(define_insn "vsx_xvcvhpsp" 2189 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") 2190 (unspec:V4SF [(match_operand: V16QI 1 "vsx_register_operand" "wa")] 2191 UNSPEC_VSX_CVHPSP))] 2192 "TARGET_P9_VECTOR" 2193 "xvcvhpsp %x0,%x1" 2194 [(set_attr "type" "vecfloat")]) 2195 2196;; Generate xvcvsphp 2197(define_insn "vsx_xvcvsphp" 2198 [(set (match_operand:V4SI 0 "register_operand" "=wa") 2199 (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")] 2200 UNSPEC_VSX_XVCVSPHP))] 2201 "TARGET_P9_VECTOR" 2202 "xvcvsphp %x0,%x1" 2203[(set_attr "type" "vecfloat")]) 2204 2205;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF 2206;; format of scalars is actually DF. 2207(define_insn "vsx_xscvdpsp_scalar" 2208 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") 2209 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")] 2210 UNSPEC_VSX_CVSPDP))] 2211 "VECTOR_UNIT_VSX_P (V4SFmode)" 2212 "xscvdpsp %x0,%x1" 2213 [(set_attr "type" "fp")]) 2214 2215;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs 2216(define_insn "vsx_xscvdpspn" 2217 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") 2218 (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wa")] 2219 UNSPEC_VSX_CVDPSPN))] 2220 "TARGET_XSCVDPSPN" 2221 "xscvdpspn %x0,%x1" 2222 [(set_attr "type" "fp")]) 2223 2224(define_insn "vsx_xscvspdpn" 2225 [(set (match_operand:DF 0 "vsx_register_operand" "=wa") 2226 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")] 2227 UNSPEC_VSX_CVSPDPN))] 2228 "TARGET_XSCVSPDPN" 2229 "xscvspdpn %x0,%x1" 2230 [(set_attr "type" "fp")]) 2231 2232(define_insn "vsx_xscvdpspn_scalar" 2233 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") 2234 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")] 2235 UNSPEC_VSX_CVDPSPN))] 2236 "TARGET_XSCVDPSPN" 2237 "xscvdpspn %x0,%x1" 2238 [(set_attr "type" "fp")]) 2239 2240;; Used by direct move to move a SFmode value from GPR to VSX register 2241(define_insn "vsx_xscvspdpn_directmove" 2242 [(set (match_operand:SF 0 "vsx_register_operand" "=wa") 2243 (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")] 2244 UNSPEC_VSX_CVSPDPN))] 2245 "TARGET_XSCVSPDPN" 2246 "xscvspdpn %x0,%x1" 2247 [(set_attr "type" "fp")]) 2248 2249;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long) 2250 2251(define_insn "vsx_xvcv<su>xwsp" 2252 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") 2253 (any_float:V4SF (match_operand:V4SI 1 "vsx_register_operand" "wa")))] 2254 "VECTOR_UNIT_VSX_P (V4SFmode)" 2255 "xvcv<su>xwsp %x0,%x1" 2256 [(set_attr "type" "vecfloat")]) 2257 2258(define_insn "vsx_xvcv<su>xddp" 2259 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa") 2260 (any_float:V2DF (match_operand:V2DI 1 "vsx_register_operand" "wa")))] 2261 "VECTOR_UNIT_VSX_P (V2DFmode)" 2262 "xvcv<su>xddp %x0,%x1" 2263 [(set_attr "type" "vecdouble")]) 2264 2265(define_insn "vsx_xvcvsp<su>xws" 2266 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa") 2267 (any_fix:V4SI (match_operand:V4SF 1 "vsx_register_operand" "wa")))] 2268 "VECTOR_UNIT_VSX_P (V4SFmode)" 2269 "xvcvsp<su>xws %x0,%x1" 2270 [(set_attr "type" "vecfloat")]) 2271 2272(define_insn "vsx_xvcvdp<su>xds" 2273 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") 2274 (any_fix:V2DI (match_operand:V2DF 1 "vsx_register_operand" "wa")))] 2275 "VECTOR_UNIT_VSX_P (V2DFmode)" 2276 "xvcvdp<su>xds %x0,%x1" 2277 [(set_attr "type" "vecdouble")]) 2278 2279(define_expand "vsx_xvcvsxddp_scale" 2280 [(match_operand:V2DF 0 "vsx_register_operand") 2281 (match_operand:V2DI 1 "vsx_register_operand") 2282 (match_operand:QI 2 "immediate_operand")] 2283 "VECTOR_UNIT_VSX_P (V2DFmode)" 2284{ 2285 rtx op0 = operands[0]; 2286 rtx op1 = operands[1]; 2287 int scale = INTVAL(operands[2]); 2288 emit_insn (gen_vsx_xvcvsxddp (op0, op1)); 2289 if (scale != 0) 2290 rs6000_scale_v2df (op0, op0, -scale); 2291 DONE; 2292}) 2293 2294(define_expand "vsx_xvcvuxddp_scale" 2295 [(match_operand:V2DF 0 "vsx_register_operand") 2296 (match_operand:V2DI 1 "vsx_register_operand") 2297 (match_operand:QI 2 "immediate_operand")] 2298 "VECTOR_UNIT_VSX_P (V2DFmode)" 2299{ 2300 rtx op0 = operands[0]; 2301 rtx op1 = operands[1]; 2302 int scale = INTVAL(operands[2]); 2303 emit_insn (gen_vsx_xvcvuxddp (op0, op1)); 2304 if (scale != 0) 2305 rs6000_scale_v2df (op0, op0, -scale); 2306 DONE; 2307}) 2308 2309(define_expand "vsx_xvcvdpsxds_scale" 2310 [(match_operand:V2DI 0 "vsx_register_operand") 2311 (match_operand:V2DF 1 "vsx_register_operand") 2312 (match_operand:QI 2 "immediate_operand")] 2313 "VECTOR_UNIT_VSX_P (V2DFmode)" 2314{ 2315 rtx op0 = operands[0]; 2316 rtx op1 = operands[1]; 2317 rtx tmp; 2318 int scale = INTVAL (operands[2]); 2319 if (scale == 0) 2320 tmp = op1; 2321 else 2322 { 2323 tmp = gen_reg_rtx (V2DFmode); 2324 rs6000_scale_v2df (tmp, op1, scale); 2325 } 2326 emit_insn (gen_vsx_xvcvdpsxds (op0, tmp)); 2327 DONE; 2328}) 2329 2330;; convert vector of 64-bit floating point numbers to vector of 2331;; 64-bit unsigned integer 2332(define_expand "vsx_xvcvdpuxds_scale" 2333 [(match_operand:V2DI 0 "vsx_register_operand") 2334 (match_operand:V2DF 1 "vsx_register_operand") 2335 (match_operand:QI 2 "immediate_operand")] 2336 "VECTOR_UNIT_VSX_P (V2DFmode)" 2337{ 2338 rtx op0 = operands[0]; 2339 rtx op1 = operands[1]; 2340 rtx tmp; 2341 int scale = INTVAL (operands[2]); 2342 if (scale == 0) 2343 tmp = op1; 2344 else 2345 { 2346 tmp = gen_reg_rtx (V2DFmode); 2347 rs6000_scale_v2df (tmp, op1, scale); 2348 } 2349 emit_insn (gen_vsx_xvcvdpuxds (op0, tmp)); 2350 DONE; 2351}) 2352 2353;; Convert from 64-bit to 32-bit types 2354;; Note, favor the Altivec registers since the usual use of these instructions 2355;; is in vector converts and we need to use the Altivec vperm instruction. 2356 2357(define_insn "vsx_xvcvdpsxws" 2358 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa") 2359 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wa,wa")] 2360 UNSPEC_VSX_CVDPSXWS))] 2361 "VECTOR_UNIT_VSX_P (V2DFmode)" 2362 "xvcvdpsxws %x0,%x1" 2363 [(set_attr "type" "vecdouble")]) 2364 2365(define_insn "vsx_xvcvdpuxws" 2366 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa") 2367 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wa,wa")] 2368 UNSPEC_VSX_CVDPUXWS))] 2369 "VECTOR_UNIT_VSX_P (V2DFmode)" 2370 "xvcvdpuxws %x0,%x1" 2371 [(set_attr "type" "vecdouble")]) 2372 2373(define_insn "vsx_xvcvsxdsp" 2374 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") 2375 (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wa")] 2376 UNSPEC_VSX_CVSXDSP))] 2377 "VECTOR_UNIT_VSX_P (V2DFmode)" 2378 "xvcvsxdsp %x0,%x1" 2379 [(set_attr "type" "vecfloat")]) 2380 2381(define_insn "vsx_xvcvuxdsp" 2382 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") 2383 (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wa")] 2384 UNSPEC_VSX_CVUXDSP))] 2385 "VECTOR_UNIT_VSX_P (V2DFmode)" 2386 "xvcvuxdsp %x0,%x1" 2387 [(set_attr "type" "vecdouble")]) 2388 2389;; Convert vector of 32-bit signed/unsigned integers to vector of 2390;; 64-bit floating point numbers. 2391(define_insn "vsx_xvcv<su>xwdp_be" 2392 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa") 2393 (any_float:V2DF 2394 (vec_select:V2SI (match_operand:V4SI 1 "vsx_register_operand" "wa") 2395 (parallel [(const_int 0) (const_int 2)]))))] 2396 "VECTOR_UNIT_VSX_P (V2DFmode) && BYTES_BIG_ENDIAN" 2397 "xvcv<su>xwdp %x0,%x1" 2398 [(set_attr "type" "vecdouble")]) 2399 2400(define_insn "vsx_xvcv<su>xwdp_le" 2401 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa") 2402 (any_float:V2DF 2403 (vec_select:V2SI (match_operand:V4SI 1 "vsx_register_operand" "wa") 2404 (parallel [(const_int 1) (const_int 3)]))))] 2405 "VECTOR_UNIT_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN" 2406 "xvcv<su>xwdp %x0,%x1" 2407 [(set_attr "type" "vecdouble")]) 2408 2409(define_expand "vsx_xvcv<su>xwdp" 2410 [(match_operand:V2DF 0 "vsx_register_operand") 2411 (match_operand:V4SI 1 "vsx_register_operand") 2412 (any_float (pc))] 2413 "VECTOR_UNIT_VSX_P (V2DFmode)" 2414{ 2415 if (BYTES_BIG_ENDIAN) 2416 emit_insn (gen_vsx_xvcv<su>xwdp_be (operands[0], operands[1])); 2417 else 2418 emit_insn (gen_vsx_xvcv<su>xwdp_le (operands[0], operands[1])); 2419 DONE; 2420}) 2421 2422(define_insn "vsx_xvcvsxwdp_df" 2423 [(set (match_operand:DF 0 "vsx_register_operand" "=wa") 2424 (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")] 2425 UNSPEC_VSX_CVSXWDP))] 2426 "TARGET_VSX" 2427 "xvcvsxwdp %x0,%x1" 2428 [(set_attr "type" "vecdouble")]) 2429 2430(define_insn "vsx_xvcvuxwdp_df" 2431 [(set (match_operand:DF 0 "vsx_register_operand" "=wa") 2432 (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")] 2433 UNSPEC_VSX_CVUXWDP))] 2434 "TARGET_VSX" 2435 "xvcvuxwdp %x0,%x1" 2436 [(set_attr "type" "vecdouble")]) 2437 2438;; Convert vector of 32-bit floating point numbers to vector of 2439;; 64-bit signed/unsigned integers. 2440(define_insn "vsx_xvcvsp<su>xds_be" 2441 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa") 2442 (any_fix:V2DI 2443 (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa") 2444 (parallel [(const_int 0) (const_int 2)]))))] 2445 "VECTOR_UNIT_VSX_P (V2DFmode) && BYTES_BIG_ENDIAN" 2446 "xvcvsp<su>xds %x0,%x1" 2447 [(set_attr "type" "vecdouble")]) 2448 2449(define_insn "vsx_xvcvsp<su>xds_le" 2450 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa") 2451 (any_fix:V2DI 2452 (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa") 2453 (parallel [(const_int 1) (const_int 3)]))))] 2454 "VECTOR_UNIT_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN" 2455 "xvcvsp<su>xds %x0,%x1" 2456 [(set_attr "type" "vecdouble")]) 2457 2458(define_expand "vsx_xvcvsp<su>xds" 2459 [(match_operand:V2DI 0 "vsx_register_operand") 2460 (match_operand:V4SF 1 "vsx_register_operand") 2461 (any_fix (pc))] 2462 "VECTOR_UNIT_VSX_P (V2DFmode)" 2463{ 2464 if (BYTES_BIG_ENDIAN) 2465 emit_insn (gen_vsx_xvcvsp<su>xds_be (operands[0], operands[1])); 2466 else 2467 emit_insn (gen_vsx_xvcvsp<su>xds_le (operands[0], operands[1])); 2468 DONE; 2469}) 2470 2471;; Generate float2 double 2472;; convert two double to float 2473(define_expand "float2_v2df" 2474 [(use (match_operand:V4SF 0 "register_operand" "=wa")) 2475 (use (match_operand:V2DF 1 "register_operand" "wa")) 2476 (use (match_operand:V2DF 2 "register_operand" "wa"))] 2477 "VECTOR_UNIT_VSX_P (V4SFmode)" 2478{ 2479 rtx rtx_src1, rtx_src2, rtx_dst; 2480 2481 rtx_dst = operands[0]; 2482 rtx_src1 = operands[1]; 2483 rtx_src2 = operands[2]; 2484 2485 rs6000_generate_float2_double_code (rtx_dst, rtx_src1, rtx_src2); 2486 DONE; 2487}) 2488 2489;; Generate float2 2490;; convert two long long signed ints to float 2491(define_expand "float2_v2di" 2492 [(use (match_operand:V4SF 0 "register_operand" "=wa")) 2493 (use (match_operand:V2DI 1 "register_operand" "wa")) 2494 (use (match_operand:V2DI 2 "register_operand" "wa"))] 2495 "VECTOR_UNIT_VSX_P (V4SFmode)" 2496{ 2497 rtx rtx_src1, rtx_src2, rtx_dst; 2498 2499 rtx_dst = operands[0]; 2500 rtx_src1 = operands[1]; 2501 rtx_src2 = operands[2]; 2502 2503 rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2); 2504 DONE; 2505}) 2506 2507;; Generate uns_float2 2508;; convert two long long unsigned ints to float 2509(define_expand "uns_float2_v2di" 2510 [(use (match_operand:V4SF 0 "register_operand" "=wa")) 2511 (use (match_operand:V2DI 1 "register_operand" "wa")) 2512 (use (match_operand:V2DI 2 "register_operand" "wa"))] 2513 "VECTOR_UNIT_VSX_P (V4SFmode)" 2514{ 2515 rtx rtx_src1, rtx_src2, rtx_dst; 2516 2517 rtx_dst = operands[0]; 2518 rtx_src1 = operands[1]; 2519 rtx_src2 = operands[2]; 2520 2521 rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2); 2522 DONE; 2523}) 2524 2525;; Generate floate 2526;; convert double or long long signed to float 2527;; (Only even words are valid, BE numbering) 2528(define_expand "floate<mode>" 2529 [(use (match_operand:V4SF 0 "register_operand" "=wa")) 2530 (use (match_operand:VSX_D 1 "register_operand" "wa"))] 2531 "VECTOR_UNIT_VSX_P (V4SFmode)" 2532{ 2533 if (BYTES_BIG_ENDIAN) 2534 { 2535 /* Shift left one word to put even word correct location */ 2536 rtx rtx_tmp; 2537 rtx rtx_val = GEN_INT (4); 2538 2539 rtx_tmp = gen_reg_rtx (V4SFmode); 2540 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1])); 2541 emit_insn (gen_altivec_vsldoi_v4sf (operands[0], 2542 rtx_tmp, rtx_tmp, rtx_val)); 2543 } 2544 else 2545 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1])); 2546 2547 DONE; 2548}) 2549 2550;; Generate uns_floate 2551;; convert long long unsigned to float 2552;; (Only even words are valid, BE numbering) 2553(define_expand "unsfloatev2di" 2554 [(use (match_operand:V4SF 0 "register_operand" "=wa")) 2555 (use (match_operand:V2DI 1 "register_operand" "wa"))] 2556 "VECTOR_UNIT_VSX_P (V4SFmode)" 2557{ 2558 if (BYTES_BIG_ENDIAN) 2559 { 2560 /* Shift left one word to put even word correct location */ 2561 rtx rtx_tmp; 2562 rtx rtx_val = GEN_INT (4); 2563 2564 rtx_tmp = gen_reg_rtx (V4SFmode); 2565 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1])); 2566 emit_insn (gen_altivec_vsldoi_v4sf (operands[0], 2567 rtx_tmp, rtx_tmp, rtx_val)); 2568 } 2569 else 2570 emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1])); 2571 2572 DONE; 2573}) 2574 2575;; Generate floato 2576;; convert double or long long signed to float 2577;; Only odd words are valid, BE numbering) 2578(define_expand "floato<mode>" 2579 [(use (match_operand:V4SF 0 "register_operand" "=wa")) 2580 (use (match_operand:VSX_D 1 "register_operand" "wa"))] 2581 "VECTOR_UNIT_VSX_P (V4SFmode)" 2582{ 2583 if (BYTES_BIG_ENDIAN) 2584 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1])); 2585 else 2586 { 2587 /* Shift left one word to put odd word correct location */ 2588 rtx rtx_tmp; 2589 rtx rtx_val = GEN_INT (4); 2590 2591 rtx_tmp = gen_reg_rtx (V4SFmode); 2592 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1])); 2593 emit_insn (gen_altivec_vsldoi_v4sf (operands[0], 2594 rtx_tmp, rtx_tmp, rtx_val)); 2595 } 2596 DONE; 2597}) 2598 2599;; Generate uns_floato 2600;; convert long long unsigned to float 2601;; (Only odd words are valid, BE numbering) 2602(define_expand "unsfloatov2di" 2603 [(use (match_operand:V4SF 0 "register_operand" "=wa")) 2604 (use (match_operand:V2DI 1 "register_operand" "wa"))] 2605 "VECTOR_UNIT_VSX_P (V4SFmode)" 2606{ 2607 if (BYTES_BIG_ENDIAN) 2608 emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1])); 2609 else 2610 { 2611 /* Shift left one word to put odd word correct location */ 2612 rtx rtx_tmp; 2613 rtx rtx_val = GEN_INT (4); 2614 2615 rtx_tmp = gen_reg_rtx (V4SFmode); 2616 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1])); 2617 emit_insn (gen_altivec_vsldoi_v4sf (operands[0], 2618 rtx_tmp, rtx_tmp, rtx_val)); 2619 } 2620 DONE; 2621}) 2622 2623;; Generate vsigned2 2624;; convert two double float vectors to a vector of single precision ints 2625(define_expand "vsigned2_v2df" 2626 [(match_operand:V4SI 0 "register_operand" "=wa") 2627 (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "wa") 2628 (match_operand:V2DF 2 "register_operand" "wa")] 2629 UNSPEC_VSX_VSIGNED2)] 2630 "TARGET_VSX" 2631{ 2632 rtx rtx_src1, rtx_src2, rtx_dst; 2633 bool signed_convert=true; 2634 2635 rtx_dst = operands[0]; 2636 rtx_src1 = operands[1]; 2637 rtx_src2 = operands[2]; 2638 2639 rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2); 2640 DONE; 2641}) 2642 2643;; Generate vsignedo_v2df 2644;; signed double float to int convert odd word 2645(define_expand "vsignedo_v2df" 2646 [(set (match_operand:V4SI 0 "register_operand" "=wa") 2647 (match_operand:V2DF 1 "register_operand" "wa"))] 2648 "TARGET_VSX" 2649{ 2650 if (BYTES_BIG_ENDIAN) 2651 { 2652 rtx rtx_tmp; 2653 rtx rtx_val = GEN_INT (12); 2654 rtx_tmp = gen_reg_rtx (V4SImode); 2655 2656 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1])); 2657 2658 /* Big endian word numbering for words in operand is 0 1 2 3. 2659 take (operand[1] operand[1]) and shift left one word 2660 0 1 2 3 0 1 2 3 => 1 2 3 0 2661 Words 1 and 3 are now are now where they need to be for result. */ 2662 2663 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp, 2664 rtx_tmp, rtx_val)); 2665 } 2666 else 2667 /* Little endian word numbering for operand is 3 2 1 0. 2668 Result words 3 and 1 are where they need to be. */ 2669 emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1])); 2670 2671 DONE; 2672} 2673 [(set_attr "type" "veccomplex")]) 2674 2675;; Generate vsignede_v2df 2676;; signed double float to int even word 2677(define_expand "vsignede_v2df" 2678 [(set (match_operand:V4SI 0 "register_operand" "=v") 2679 (match_operand:V2DF 1 "register_operand" "v"))] 2680 "TARGET_VSX" 2681{ 2682 if (BYTES_BIG_ENDIAN) 2683 /* Big endian word numbering for words in operand is 0 1 2684 Result words 0 is where they need to be. */ 2685 emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1])); 2686 2687 else 2688 { 2689 rtx rtx_tmp; 2690 rtx rtx_val = GEN_INT (12); 2691 rtx_tmp = gen_reg_rtx (V4SImode); 2692 2693 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1])); 2694 2695 /* Little endian word numbering for operand is 3 2 1 0. 2696 take (operand[1] operand[1]) and shift left three words 2697 0 1 2 3 0 1 2 3 => 3 0 1 2 2698 Words 0 and 2 are now where they need to be for the result. */ 2699 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp, 2700 rtx_tmp, rtx_val)); 2701 } 2702 DONE; 2703} 2704 [(set_attr "type" "veccomplex")]) 2705 2706;; Generate unsigned2 2707;; convert two double float vectors to a vector of single precision 2708;; unsigned ints 2709(define_expand "vunsigned2_v2df" 2710[(match_operand:V4SI 0 "register_operand" "=v") 2711 (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "v") 2712 (match_operand:V2DF 2 "register_operand" "v")] 2713 UNSPEC_VSX_VSIGNED2)] 2714 "TARGET_VSX" 2715{ 2716 rtx rtx_src1, rtx_src2, rtx_dst; 2717 bool signed_convert=false; 2718 2719 rtx_dst = operands[0]; 2720 rtx_src1 = operands[1]; 2721 rtx_src2 = operands[2]; 2722 2723 rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2); 2724 DONE; 2725}) 2726 2727;; Generate vunsignedo_v2df 2728;; unsigned double float to int convert odd word 2729(define_expand "vunsignedo_v2df" 2730 [(set (match_operand:V4SI 0 "register_operand" "=v") 2731 (match_operand:V2DF 1 "register_operand" "v"))] 2732 "TARGET_VSX" 2733{ 2734 if (BYTES_BIG_ENDIAN) 2735 { 2736 rtx rtx_tmp; 2737 rtx rtx_val = GEN_INT (12); 2738 rtx_tmp = gen_reg_rtx (V4SImode); 2739 2740 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1])); 2741 2742 /* Big endian word numbering for words in operand is 0 1 2 3. 2743 take (operand[1] operand[1]) and shift left one word 2744 0 1 2 3 0 1 2 3 => 1 2 3 0 2745 Words 1 and 3 are now are now where they need to be for result. */ 2746 2747 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp, 2748 rtx_tmp, rtx_val)); 2749 } 2750 else 2751 /* Little endian word numbering for operand is 3 2 1 0. 2752 Result words 3 and 1 are where they need to be. */ 2753 emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1])); 2754 2755 DONE; 2756} 2757 [(set_attr "type" "veccomplex")]) 2758 2759;; Generate vunsignede_v2df 2760;; unsigned double float to int even word 2761(define_expand "vunsignede_v2df" 2762 [(set (match_operand:V4SI 0 "register_operand" "=v") 2763 (match_operand:V2DF 1 "register_operand" "v"))] 2764 "TARGET_VSX" 2765{ 2766 if (BYTES_BIG_ENDIAN) 2767 /* Big endian word numbering for words in operand is 0 1 2768 Result words 0 is where they need to be. */ 2769 emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1])); 2770 2771 else 2772 { 2773 rtx rtx_tmp; 2774 rtx rtx_val = GEN_INT (12); 2775 rtx_tmp = gen_reg_rtx (V4SImode); 2776 2777 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1])); 2778 2779 /* Little endian word numbering for operand is 3 2 1 0. 2780 take (operand[1] operand[1]) and shift left three words 2781 0 1 2 3 0 1 2 3 => 3 0 1 2 2782 Words 0 and 2 are now where they need to be for the result. */ 2783 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp, 2784 rtx_tmp, rtx_val)); 2785 } 2786 DONE; 2787} 2788 [(set_attr "type" "veccomplex")]) 2789 2790;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since 2791;; since the xvrdpiz instruction does not truncate the value if the floating 2792;; point value is < LONG_MIN or > LONG_MAX. 2793(define_insn "*vsx_float_fix_v2df2" 2794 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,?wa") 2795 (float:V2DF 2796 (fix:V2DI 2797 (match_operand:V2DF 1 "vsx_register_operand" "wa,?wa"))))] 2798 "TARGET_HARD_FLOAT 2799 && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations 2800 && !flag_trapping_math && TARGET_FRIZ" 2801 "xvrdpiz %x0,%x1" 2802 [(set_attr "type" "vecdouble")]) 2803 2804 2805;; Permute operations 2806 2807;; Build a V2DF/V2DI vector from two scalars 2808(define_insn "vsx_concat_<mode>" 2809 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we") 2810 (vec_concat:VSX_D 2811 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b") 2812 (match_operand:<VS_scalar> 2 "gpc_reg_operand" "wa,b")))] 2813 "VECTOR_MEM_VSX_P (<MODE>mode)" 2814{ 2815 if (which_alternative == 0) 2816 return (BYTES_BIG_ENDIAN 2817 ? "xxpermdi %x0,%x1,%x2,0" 2818 : "xxpermdi %x0,%x2,%x1,0"); 2819 2820 else if (which_alternative == 1) 2821 return (BYTES_BIG_ENDIAN 2822 ? "mtvsrdd %x0,%1,%2" 2823 : "mtvsrdd %x0,%2,%1"); 2824 2825 else 2826 gcc_unreachable (); 2827} 2828 [(set_attr "type" "vecperm")]) 2829 2830;; Combiner patterns to allow creating XXPERMDI's to access either double 2831;; word element in a vector register. 2832(define_insn "*vsx_concat_<mode>_1" 2833 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") 2834 (vec_concat:VSX_D 2835 (vec_select:<VS_scalar> 2836 (match_operand:VSX_D 1 "gpc_reg_operand" "wa") 2837 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")])) 2838 (match_operand:<VS_scalar> 3 "gpc_reg_operand" "wa")))] 2839 "VECTOR_MEM_VSX_P (<MODE>mode)" 2840{ 2841 HOST_WIDE_INT dword = INTVAL (operands[2]); 2842 if (BYTES_BIG_ENDIAN) 2843 { 2844 operands[4] = GEN_INT (2*dword); 2845 return "xxpermdi %x0,%x1,%x3,%4"; 2846 } 2847 else 2848 { 2849 operands[4] = GEN_INT (!dword); 2850 return "xxpermdi %x0,%x3,%x1,%4"; 2851 } 2852} 2853 [(set_attr "type" "vecperm")]) 2854 2855(define_insn "*vsx_concat_<mode>_2" 2856 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") 2857 (vec_concat:VSX_D 2858 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa") 2859 (vec_select:<VS_scalar> 2860 (match_operand:VSX_D 2 "gpc_reg_operand" "wa") 2861 (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))))] 2862 "VECTOR_MEM_VSX_P (<MODE>mode)" 2863{ 2864 HOST_WIDE_INT dword = INTVAL (operands[3]); 2865 if (BYTES_BIG_ENDIAN) 2866 { 2867 operands[4] = GEN_INT (dword); 2868 return "xxpermdi %x0,%x1,%x2,%4"; 2869 } 2870 else 2871 { 2872 operands[4] = GEN_INT (2 * !dword); 2873 return "xxpermdi %x0,%x2,%x1,%4"; 2874 } 2875} 2876 [(set_attr "type" "vecperm")]) 2877 2878(define_insn "*vsx_concat_<mode>_3" 2879 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") 2880 (vec_concat:VSX_D 2881 (vec_select:<VS_scalar> 2882 (match_operand:VSX_D 1 "gpc_reg_operand" "wa") 2883 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")])) 2884 (vec_select:<VS_scalar> 2885 (match_operand:VSX_D 3 "gpc_reg_operand" "wa") 2886 (parallel [(match_operand:QI 4 "const_0_to_1_operand" "n")]))))] 2887 "VECTOR_MEM_VSX_P (<MODE>mode)" 2888{ 2889 HOST_WIDE_INT dword1 = INTVAL (operands[2]); 2890 HOST_WIDE_INT dword2 = INTVAL (operands[4]); 2891 if (BYTES_BIG_ENDIAN) 2892 { 2893 operands[5] = GEN_INT ((2 * dword1) + dword2); 2894 return "xxpermdi %x0,%x1,%x3,%5"; 2895 } 2896 else 2897 { 2898 operands[5] = GEN_INT ((2 * !dword2) + !dword1); 2899 return "xxpermdi %x0,%x3,%x1,%5"; 2900 } 2901} 2902 [(set_attr "type" "vecperm")]) 2903 2904;; Special purpose concat using xxpermdi to glue two single precision values 2905;; together, relying on the fact that internally scalar floats are represented 2906;; as doubles. This is used to initialize a V4SF vector with 4 floats 2907(define_insn "vsx_concat_v2sf" 2908 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa") 2909 (unspec:V2DF 2910 [(match_operand:SF 1 "vsx_register_operand" "wa") 2911 (match_operand:SF 2 "vsx_register_operand" "wa")] 2912 UNSPEC_VSX_CONCAT))] 2913 "VECTOR_MEM_VSX_P (V2DFmode)" 2914{ 2915 if (BYTES_BIG_ENDIAN) 2916 return "xxpermdi %x0,%x1,%x2,0"; 2917 else 2918 return "xxpermdi %x0,%x2,%x1,0"; 2919} 2920 [(set_attr "type" "vecperm")]) 2921 2922;; Concatenate 4 SImode elements into a V4SImode reg. 2923(define_expand "vsx_init_v4si" 2924 [(use (match_operand:V4SI 0 "gpc_reg_operand")) 2925 (use (match_operand:SI 1 "gpc_reg_operand")) 2926 (use (match_operand:SI 2 "gpc_reg_operand")) 2927 (use (match_operand:SI 3 "gpc_reg_operand")) 2928 (use (match_operand:SI 4 "gpc_reg_operand"))] 2929 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" 2930{ 2931 rtx a = gen_reg_rtx (DImode); 2932 rtx b = gen_reg_rtx (DImode); 2933 rtx c = gen_reg_rtx (DImode); 2934 rtx d = gen_reg_rtx (DImode); 2935 emit_insn (gen_zero_extendsidi2 (a, operands[1])); 2936 emit_insn (gen_zero_extendsidi2 (b, operands[2])); 2937 emit_insn (gen_zero_extendsidi2 (c, operands[3])); 2938 emit_insn (gen_zero_extendsidi2 (d, operands[4])); 2939 if (!BYTES_BIG_ENDIAN) 2940 { 2941 std::swap (a, b); 2942 std::swap (c, d); 2943 } 2944 2945 rtx aa = gen_reg_rtx (DImode); 2946 rtx ab = gen_reg_rtx (DImode); 2947 rtx cc = gen_reg_rtx (DImode); 2948 rtx cd = gen_reg_rtx (DImode); 2949 emit_insn (gen_ashldi3 (aa, a, GEN_INT (32))); 2950 emit_insn (gen_ashldi3 (cc, c, GEN_INT (32))); 2951 emit_insn (gen_iordi3 (ab, aa, b)); 2952 emit_insn (gen_iordi3 (cd, cc, d)); 2953 2954 rtx abcd = gen_reg_rtx (V2DImode); 2955 emit_insn (gen_vsx_concat_v2di (abcd, ab, cd)); 2956 emit_move_insn (operands[0], gen_lowpart (V4SImode, abcd)); 2957 DONE; 2958}) 2959 2960;; xxpermdi for little endian loads and stores. We need several of 2961;; these since the form of the PARALLEL differs by mode. 2962(define_insn "*vsx_xxpermdi2_le_<mode>" 2963 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") 2964 (vec_select:VSX_D 2965 (match_operand:VSX_D 1 "vsx_register_operand" "wa") 2966 (parallel [(const_int 1) (const_int 0)])))] 2967 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)" 2968 "xxpermdi %x0,%x1,%x1,2" 2969 [(set_attr "type" "vecperm")]) 2970 2971(define_insn "xxswapd_v16qi" 2972 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") 2973 (vec_select:V16QI 2974 (match_operand:V16QI 1 "vsx_register_operand" "wa") 2975 (parallel [(const_int 8) (const_int 9) 2976 (const_int 10) (const_int 11) 2977 (const_int 12) (const_int 13) 2978 (const_int 14) (const_int 15) 2979 (const_int 0) (const_int 1) 2980 (const_int 2) (const_int 3) 2981 (const_int 4) (const_int 5) 2982 (const_int 6) (const_int 7)])))] 2983 "TARGET_VSX" 2984;; AIX does not support the extended mnemonic xxswapd. Use the basic 2985;; mnemonic xxpermdi instead. 2986 "xxpermdi %x0,%x1,%x1,2" 2987 [(set_attr "type" "vecperm")]) 2988 2989(define_insn "xxswapd_v8hi" 2990 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") 2991 (vec_select:V8HI 2992 (match_operand:V8HI 1 "vsx_register_operand" "wa") 2993 (parallel [(const_int 4) (const_int 5) 2994 (const_int 6) (const_int 7) 2995 (const_int 0) (const_int 1) 2996 (const_int 2) (const_int 3)])))] 2997 "TARGET_VSX" 2998;; AIX does not support the extended mnemonic xxswapd. Use the basic 2999;; mnemonic xxpermdi instead. 3000 "xxpermdi %x0,%x1,%x1,2" 3001 [(set_attr "type" "vecperm")]) 3002 3003(define_insn "xxswapd_<mode>" 3004 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa") 3005 (vec_select:VSX_W 3006 (match_operand:VSX_W 1 "vsx_register_operand" "wa") 3007 (parallel [(const_int 2) (const_int 3) 3008 (const_int 0) (const_int 1)])))] 3009 "TARGET_VSX" 3010;; AIX does not support extended mnemonic xxswapd. Use the basic 3011;; mnemonic xxpermdi instead. 3012 "xxpermdi %x0,%x1,%x1,2" 3013 [(set_attr "type" "vecperm")]) 3014 3015(define_insn "xxswapd_<mode>" 3016 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") 3017 (vec_select:VSX_D 3018 (match_operand:VSX_D 1 "vsx_register_operand" "wa") 3019 (parallel [(const_int 1) (const_int 0)])))] 3020 "TARGET_VSX" 3021;; AIX does not support extended mnemonic xxswapd. Use the basic 3022;; mnemonic xxpermdi instead. 3023 "xxpermdi %x0,%x1,%x1,2" 3024 [(set_attr "type" "vecperm")]) 3025 3026;; lxvd2x for little endian loads. We need several of 3027;; these since the form of the PARALLEL differs by mode. 3028(define_insn "*vsx_lxvd2x2_le_<mode>" 3029 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") 3030 (vec_select:VSX_D 3031 (match_operand:VSX_D 1 "memory_operand" "Z") 3032 (parallel [(const_int 1) (const_int 0)])))] 3033 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR" 3034 "lxvd2x %x0,%y1" 3035 [(set_attr "type" "vecload")]) 3036 3037(define_insn "*vsx_lxvd2x4_le_<mode>" 3038 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa") 3039 (vec_select:VSX_W 3040 (match_operand:VSX_W 1 "memory_operand" "Z") 3041 (parallel [(const_int 2) (const_int 3) 3042 (const_int 0) (const_int 1)])))] 3043 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR" 3044 "lxvd2x %x0,%y1" 3045 [(set_attr "type" "vecload")]) 3046 3047(define_insn "*vsx_lxvd2x8_le_V8HI" 3048 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") 3049 (vec_select:V8HI 3050 (match_operand:V8HI 1 "memory_operand" "Z") 3051 (parallel [(const_int 4) (const_int 5) 3052 (const_int 6) (const_int 7) 3053 (const_int 0) (const_int 1) 3054 (const_int 2) (const_int 3)])))] 3055 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR" 3056 "lxvd2x %x0,%y1" 3057 [(set_attr "type" "vecload")]) 3058 3059(define_insn "*vsx_lxvd2x16_le_V16QI" 3060 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") 3061 (vec_select:V16QI 3062 (match_operand:V16QI 1 "memory_operand" "Z") 3063 (parallel [(const_int 8) (const_int 9) 3064 (const_int 10) (const_int 11) 3065 (const_int 12) (const_int 13) 3066 (const_int 14) (const_int 15) 3067 (const_int 0) (const_int 1) 3068 (const_int 2) (const_int 3) 3069 (const_int 4) (const_int 5) 3070 (const_int 6) (const_int 7)])))] 3071 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR" 3072 "lxvd2x %x0,%y1" 3073 [(set_attr "type" "vecload")]) 3074 3075;; stxvd2x for little endian stores. We need several of 3076;; these since the form of the PARALLEL differs by mode. 3077(define_insn "*vsx_stxvd2x2_le_<mode>" 3078 [(set (match_operand:VSX_D 0 "memory_operand" "=Z") 3079 (vec_select:VSX_D 3080 (match_operand:VSX_D 1 "vsx_register_operand" "wa") 3081 (parallel [(const_int 1) (const_int 0)])))] 3082 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR" 3083 "stxvd2x %x1,%y0" 3084 [(set_attr "type" "vecstore")]) 3085 3086(define_insn "*vsx_stxvd2x4_le_<mode>" 3087 [(set (match_operand:VSX_W 0 "memory_operand" "=Z") 3088 (vec_select:VSX_W 3089 (match_operand:VSX_W 1 "vsx_register_operand" "wa") 3090 (parallel [(const_int 2) (const_int 3) 3091 (const_int 0) (const_int 1)])))] 3092 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR" 3093 "stxvd2x %x1,%y0" 3094 [(set_attr "type" "vecstore")]) 3095 3096(define_insn "*vsx_stxvd2x8_le_V8HI" 3097 [(set (match_operand:V8HI 0 "memory_operand" "=Z") 3098 (vec_select:V8HI 3099 (match_operand:V8HI 1 "vsx_register_operand" "wa") 3100 (parallel [(const_int 4) (const_int 5) 3101 (const_int 6) (const_int 7) 3102 (const_int 0) (const_int 1) 3103 (const_int 2) (const_int 3)])))] 3104 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR" 3105 "stxvd2x %x1,%y0" 3106 [(set_attr "type" "vecstore")]) 3107 3108(define_insn "*vsx_stxvd2x16_le_V16QI" 3109 [(set (match_operand:V16QI 0 "memory_operand" "=Z") 3110 (vec_select:V16QI 3111 (match_operand:V16QI 1 "vsx_register_operand" "wa") 3112 (parallel [(const_int 8) (const_int 9) 3113 (const_int 10) (const_int 11) 3114 (const_int 12) (const_int 13) 3115 (const_int 14) (const_int 15) 3116 (const_int 0) (const_int 1) 3117 (const_int 2) (const_int 3) 3118 (const_int 4) (const_int 5) 3119 (const_int 6) (const_int 7)])))] 3120 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR" 3121 "stxvd2x %x1,%y0" 3122 [(set_attr "type" "vecstore")]) 3123 3124;; Convert a TImode value into V1TImode 3125(define_expand "vsx_set_v1ti" 3126 [(match_operand:V1TI 0 "nonimmediate_operand") 3127 (match_operand:V1TI 1 "nonimmediate_operand") 3128 (match_operand:TI 2 "input_operand") 3129 (match_operand:QI 3 "u5bit_cint_operand")] 3130 "VECTOR_MEM_VSX_P (V1TImode)" 3131{ 3132 if (operands[3] != const0_rtx) 3133 gcc_unreachable (); 3134 3135 emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1])); 3136 DONE; 3137}) 3138 3139;; Rewrite V2DF/V2DI set in terms of VEC_CONCAT 3140(define_expand "vsx_set_<mode>" 3141 [(use (match_operand:VSX_D 0 "vsx_register_operand")) 3142 (use (match_operand:VSX_D 1 "vsx_register_operand")) 3143 (use (match_operand:<VS_scalar> 2 "gpc_reg_operand")) 3144 (use (match_operand:QI 3 "const_0_to_1_operand"))] 3145 "VECTOR_MEM_VSX_P (<MODE>mode)" 3146{ 3147 rtx dest = operands[0]; 3148 rtx vec_reg = operands[1]; 3149 rtx value = operands[2]; 3150 rtx ele = operands[3]; 3151 rtx tmp = gen_reg_rtx (<VS_scalar>mode); 3152 3153 if (ele == const0_rtx) 3154 { 3155 emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const1_rtx)); 3156 emit_insn (gen_vsx_concat_<mode> (dest, value, tmp)); 3157 DONE; 3158 } 3159 else if (ele == const1_rtx) 3160 { 3161 emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const0_rtx)); 3162 emit_insn (gen_vsx_concat_<mode> (dest, tmp, value)); 3163 DONE; 3164 } 3165 else 3166 gcc_unreachable (); 3167}) 3168 3169;; Extract a DF/DI element from V2DF/V2DI 3170;; Optimize cases were we can do a simple or direct move. 3171;; Or see if we can avoid doing the move at all 3172 3173;; There are some unresolved problems with reload that show up if an Altivec 3174;; register was picked. Limit the scalar value to FPRs for now. 3175 3176(define_insn "vsx_extract_<mode>" 3177 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=d, d, wr, wr") 3178 (vec_select:<VS_scalar> 3179 (match_operand:VSX_D 1 "gpc_reg_operand" "wa, wa, wa, wa") 3180 (parallel 3181 [(match_operand:QI 2 "const_0_to_1_operand" "wD, n, wD, n")])))] 3182 "VECTOR_MEM_VSX_P (<MODE>mode)" 3183{ 3184 int element = INTVAL (operands[2]); 3185 int op0_regno = REGNO (operands[0]); 3186 int op1_regno = REGNO (operands[1]); 3187 int fldDM; 3188 3189 gcc_assert (IN_RANGE (element, 0, 1)); 3190 gcc_assert (VSX_REGNO_P (op1_regno)); 3191 3192 if (element == VECTOR_ELEMENT_SCALAR_64BIT) 3193 { 3194 if (op0_regno == op1_regno) 3195 return ASM_COMMENT_START " vec_extract to same register"; 3196 3197 else if (INT_REGNO_P (op0_regno) && TARGET_DIRECT_MOVE 3198 && TARGET_POWERPC64) 3199 return "mfvsrd %0,%x1"; 3200 3201 else if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno)) 3202 return "fmr %0,%1"; 3203 3204 else if (VSX_REGNO_P (op0_regno)) 3205 return "xxlor %x0,%x1,%x1"; 3206 3207 else 3208 gcc_unreachable (); 3209 } 3210 3211 else if (element == VECTOR_ELEMENT_MFVSRLD_64BIT && INT_REGNO_P (op0_regno) 3212 && TARGET_P9_VECTOR && TARGET_POWERPC64 && TARGET_DIRECT_MOVE) 3213 return "mfvsrld %0,%x1"; 3214 3215 else if (VSX_REGNO_P (op0_regno)) 3216 { 3217 fldDM = element << 1; 3218 if (!BYTES_BIG_ENDIAN) 3219 fldDM = 3 - fldDM; 3220 operands[3] = GEN_INT (fldDM); 3221 return "xxpermdi %x0,%x1,%x1,%3"; 3222 } 3223 3224 else 3225 gcc_unreachable (); 3226} 3227 [(set_attr "type" "veclogical,mftgpr,mftgpr,vecperm") 3228 (set_attr "isa" "*,*,p8v,p9v")]) 3229 3230;; Optimize extracting a single scalar element from memory. 3231(define_insn_and_split "*vsx_extract_<P:mode>_<VSX_D:mode>_load" 3232 [(set (match_operand:<VS_scalar> 0 "register_operand" "=wa,wr") 3233 (vec_select:<VSX_D:VS_scalar> 3234 (match_operand:VSX_D 1 "memory_operand" "m,m") 3235 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n,n")]))) 3236 (clobber (match_scratch:P 3 "=&b,&b"))] 3237 "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<VSX_D:MODE>mode)" 3238 "#" 3239 "&& reload_completed" 3240 [(set (match_dup 0) (match_dup 4))] 3241{ 3242 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2], 3243 operands[3], <VSX_D:VS_scalar>mode); 3244} 3245 [(set_attr "type" "fpload,load") 3246 (set_attr "length" "8")]) 3247 3248;; Optimize storing a single scalar element that is the right location to 3249;; memory 3250(define_insn "*vsx_extract_<mode>_store" 3251 [(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,wY") 3252 (vec_select:<VS_scalar> 3253 (match_operand:VSX_D 1 "register_operand" "d,v,v") 3254 (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))] 3255 "VECTOR_MEM_VSX_P (<MODE>mode)" 3256 "@ 3257 stfd%U0%X0 %1,%0 3258 stxsdx %x1,%y0 3259 stxsd %1,%0" 3260 [(set_attr "type" "fpstore") 3261 (set_attr "isa" "*,p7v,p9v")]) 3262 3263;; Variable V2DI/V2DF extract shift 3264(define_insn "vsx_vslo_<mode>" 3265 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v") 3266 (unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v") 3267 (match_operand:V2DI 2 "gpc_reg_operand" "v")] 3268 UNSPEC_VSX_VSLO))] 3269 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" 3270 "vslo %0,%1,%2" 3271 [(set_attr "type" "vecperm")]) 3272 3273;; Variable V2DI/V2DF extract from a register 3274(define_insn_and_split "vsx_extract_<mode>_var" 3275 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v") 3276 (unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v") 3277 (match_operand:DI 2 "gpc_reg_operand" "r")] 3278 UNSPEC_VSX_EXTRACT)) 3279 (clobber (match_scratch:DI 3 "=r")) 3280 (clobber (match_scratch:V2DI 4 "=&v"))] 3281 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" 3282 "#" 3283 "&& reload_completed" 3284 [(const_int 0)] 3285{ 3286 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2], 3287 operands[3], operands[4]); 3288 DONE; 3289}) 3290 3291;; Variable V2DI/V2DF extract from memory 3292(define_insn_and_split "*vsx_extract_<mode>_var_load" 3293 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=wa,r") 3294 (unspec:<VS_scalar> [(match_operand:VSX_D 1 "memory_operand" "Q,Q") 3295 (match_operand:DI 2 "gpc_reg_operand" "r,r")] 3296 UNSPEC_VSX_EXTRACT)) 3297 (clobber (match_scratch:DI 3 "=&b,&b"))] 3298 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" 3299 "#" 3300 "&& reload_completed" 3301 [(set (match_dup 0) (match_dup 4))] 3302{ 3303 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2], 3304 operands[3], <VS_scalar>mode); 3305} 3306 [(set_attr "type" "fpload,load")]) 3307 3308;; Extract a SF element from V4SF 3309(define_insn_and_split "vsx_extract_v4sf" 3310 [(set (match_operand:SF 0 "vsx_register_operand" "=wa") 3311 (vec_select:SF 3312 (match_operand:V4SF 1 "vsx_register_operand" "wa") 3313 (parallel [(match_operand:QI 2 "u5bit_cint_operand" "n")]))) 3314 (clobber (match_scratch:V4SF 3 "=0"))] 3315 "VECTOR_UNIT_VSX_P (V4SFmode)" 3316 "#" 3317 "&& 1" 3318 [(const_int 0)] 3319{ 3320 rtx op0 = operands[0]; 3321 rtx op1 = operands[1]; 3322 rtx op2 = operands[2]; 3323 rtx op3 = operands[3]; 3324 rtx tmp; 3325 HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2); 3326 3327 if (ele == 0) 3328 tmp = op1; 3329 else 3330 { 3331 if (GET_CODE (op3) == SCRATCH) 3332 op3 = gen_reg_rtx (V4SFmode); 3333 emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele))); 3334 tmp = op3; 3335 } 3336 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp)); 3337 DONE; 3338} 3339 [(set_attr "length" "8") 3340 (set_attr "type" "fp")]) 3341 3342(define_insn_and_split "*vsx_extract_v4sf_<mode>_load" 3343 [(set (match_operand:SF 0 "register_operand" "=f,v,v,?r") 3344 (vec_select:SF 3345 (match_operand:V4SF 1 "memory_operand" "m,Z,m,m") 3346 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")]))) 3347 (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))] 3348 "VECTOR_MEM_VSX_P (V4SFmode)" 3349 "#" 3350 "&& reload_completed" 3351 [(set (match_dup 0) (match_dup 4))] 3352{ 3353 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2], 3354 operands[3], SFmode); 3355} 3356 [(set_attr "type" "fpload,fpload,fpload,load") 3357 (set_attr "length" "8") 3358 (set_attr "isa" "*,p7v,p9v,*")]) 3359 3360;; Variable V4SF extract from a register 3361(define_insn_and_split "vsx_extract_v4sf_var" 3362 [(set (match_operand:SF 0 "gpc_reg_operand" "=wa") 3363 (unspec:SF [(match_operand:V4SF 1 "gpc_reg_operand" "v") 3364 (match_operand:DI 2 "gpc_reg_operand" "r")] 3365 UNSPEC_VSX_EXTRACT)) 3366 (clobber (match_scratch:DI 3 "=r")) 3367 (clobber (match_scratch:V2DI 4 "=&v"))] 3368 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT" 3369 "#" 3370 "&& reload_completed" 3371 [(const_int 0)] 3372{ 3373 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2], 3374 operands[3], operands[4]); 3375 DONE; 3376}) 3377 3378;; Variable V4SF extract from memory 3379(define_insn_and_split "*vsx_extract_v4sf_var_load" 3380 [(set (match_operand:SF 0 "gpc_reg_operand" "=wa,?r") 3381 (unspec:SF [(match_operand:V4SF 1 "memory_operand" "Q,Q") 3382 (match_operand:DI 2 "gpc_reg_operand" "r,r")] 3383 UNSPEC_VSX_EXTRACT)) 3384 (clobber (match_scratch:DI 3 "=&b,&b"))] 3385 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT" 3386 "#" 3387 "&& reload_completed" 3388 [(set (match_dup 0) (match_dup 4))] 3389{ 3390 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2], 3391 operands[3], SFmode); 3392} 3393 [(set_attr "type" "fpload,load")]) 3394 3395;; Expand the builtin form of xxpermdi to canonical rtl. 3396(define_expand "vsx_xxpermdi_<mode>" 3397 [(match_operand:VSX_L 0 "vsx_register_operand") 3398 (match_operand:VSX_L 1 "vsx_register_operand") 3399 (match_operand:VSX_L 2 "vsx_register_operand") 3400 (match_operand:QI 3 "u5bit_cint_operand")] 3401 "VECTOR_MEM_VSX_P (<MODE>mode)" 3402{ 3403 rtx target = operands[0]; 3404 rtx op0 = operands[1]; 3405 rtx op1 = operands[2]; 3406 int mask = INTVAL (operands[3]); 3407 rtx perm0 = GEN_INT ((mask >> 1) & 1); 3408 rtx perm1 = GEN_INT ((mask & 1) + 2); 3409 rtx (*gen) (rtx, rtx, rtx, rtx, rtx); 3410 3411 if (<MODE>mode == V2DFmode) 3412 gen = gen_vsx_xxpermdi2_v2df_1; 3413 else 3414 { 3415 gen = gen_vsx_xxpermdi2_v2di_1; 3416 if (<MODE>mode != V2DImode) 3417 { 3418 target = gen_lowpart (V2DImode, target); 3419 op0 = gen_lowpart (V2DImode, op0); 3420 op1 = gen_lowpart (V2DImode, op1); 3421 } 3422 } 3423 emit_insn (gen (target, op0, op1, perm0, perm1)); 3424 DONE; 3425}) 3426 3427;; Special version of xxpermdi that retains big-endian semantics. 3428(define_expand "vsx_xxpermdi_<mode>_be" 3429 [(match_operand:VSX_L 0 "vsx_register_operand") 3430 (match_operand:VSX_L 1 "vsx_register_operand") 3431 (match_operand:VSX_L 2 "vsx_register_operand") 3432 (match_operand:QI 3 "u5bit_cint_operand")] 3433 "VECTOR_MEM_VSX_P (<MODE>mode)" 3434{ 3435 rtx target = operands[0]; 3436 rtx op0 = operands[1]; 3437 rtx op1 = operands[2]; 3438 int mask = INTVAL (operands[3]); 3439 rtx perm0 = GEN_INT ((mask >> 1) & 1); 3440 rtx perm1 = GEN_INT ((mask & 1) + 2); 3441 rtx (*gen) (rtx, rtx, rtx, rtx, rtx); 3442 3443 if (<MODE>mode == V2DFmode) 3444 gen = gen_vsx_xxpermdi2_v2df_1; 3445 else 3446 { 3447 gen = gen_vsx_xxpermdi2_v2di_1; 3448 if (<MODE>mode != V2DImode) 3449 { 3450 target = gen_lowpart (V2DImode, target); 3451 op0 = gen_lowpart (V2DImode, op0); 3452 op1 = gen_lowpart (V2DImode, op1); 3453 } 3454 } 3455 /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a 3456 transformation we don't want; it is necessary for 3457 rs6000_expand_vec_perm_const_1 but not for this use. So we 3458 prepare for that by reversing the transformation here. */ 3459 if (BYTES_BIG_ENDIAN) 3460 emit_insn (gen (target, op0, op1, perm0, perm1)); 3461 else 3462 { 3463 rtx p0 = GEN_INT (3 - INTVAL (perm1)); 3464 rtx p1 = GEN_INT (3 - INTVAL (perm0)); 3465 emit_insn (gen (target, op1, op0, p0, p1)); 3466 } 3467 DONE; 3468}) 3469 3470(define_insn "vsx_xxpermdi2_<mode>_1" 3471 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") 3472 (vec_select:VSX_D 3473 (vec_concat:<VS_double> 3474 (match_operand:VSX_D 1 "vsx_register_operand" "wa") 3475 (match_operand:VSX_D 2 "vsx_register_operand" "wa")) 3476 (parallel [(match_operand 3 "const_0_to_1_operand" "") 3477 (match_operand 4 "const_2_to_3_operand" "")])))] 3478 "VECTOR_MEM_VSX_P (<MODE>mode)" 3479{ 3480 int op3, op4, mask; 3481 3482 /* For little endian, swap operands and invert/swap selectors 3483 to get the correct xxpermdi. The operand swap sets up the 3484 inputs as a little endian array. The selectors are swapped 3485 because they are defined to use big endian ordering. The 3486 selectors are inverted to get the correct doublewords for 3487 little endian ordering. */ 3488 if (BYTES_BIG_ENDIAN) 3489 { 3490 op3 = INTVAL (operands[3]); 3491 op4 = INTVAL (operands[4]); 3492 } 3493 else 3494 { 3495 op3 = 3 - INTVAL (operands[4]); 3496 op4 = 3 - INTVAL (operands[3]); 3497 } 3498 3499 mask = (op3 << 1) | (op4 - 2); 3500 operands[3] = GEN_INT (mask); 3501 3502 if (BYTES_BIG_ENDIAN) 3503 return "xxpermdi %x0,%x1,%x2,%3"; 3504 else 3505 return "xxpermdi %x0,%x2,%x1,%3"; 3506} 3507 [(set_attr "type" "vecperm")]) 3508 3509;; Extraction of a single element in a small integer vector. Until ISA 3.0, 3510;; none of the small types were allowed in a vector register, so we had to 3511;; extract to a DImode and either do a direct move or store. 3512(define_expand "vsx_extract_<mode>" 3513 [(parallel [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand") 3514 (vec_select:<VS_scalar> 3515 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand") 3516 (parallel [(match_operand:QI 2 "const_int_operand")]))) 3517 (clobber (match_scratch:VSX_EXTRACT_I 3))])] 3518 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" 3519{ 3520 /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}. */ 3521 if (TARGET_P9_VECTOR) 3522 { 3523 emit_insn (gen_vsx_extract_<mode>_p9 (operands[0], operands[1], 3524 operands[2])); 3525 DONE; 3526 } 3527}) 3528 3529(define_insn "vsx_extract_<mode>_p9" 3530 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,<VSX_EX>") 3531 (vec_select:<VS_scalar> 3532 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,<VSX_EX>") 3533 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")]))) 3534 (clobber (match_scratch:SI 3 "=r,X"))] 3535 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB" 3536{ 3537 if (which_alternative == 0) 3538 return "#"; 3539 3540 else 3541 { 3542 HOST_WIDE_INT elt = INTVAL (operands[2]); 3543 HOST_WIDE_INT elt_adj = (!BYTES_BIG_ENDIAN 3544 ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt 3545 : elt); 3546 3547 HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode); 3548 HOST_WIDE_INT offset = unit_size * elt_adj; 3549 3550 operands[2] = GEN_INT (offset); 3551 if (unit_size == 4) 3552 return "xxextractuw %x0,%x1,%2"; 3553 else 3554 return "vextractu<wd> %0,%1,%2"; 3555 } 3556} 3557 [(set_attr "type" "vecsimple") 3558 (set_attr "isa" "p9v,*")]) 3559 3560(define_split 3561 [(set (match_operand:<VS_scalar> 0 "int_reg_operand") 3562 (vec_select:<VS_scalar> 3563 (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand") 3564 (parallel [(match_operand:QI 2 "const_int_operand")]))) 3565 (clobber (match_operand:SI 3 "int_reg_operand"))] 3566 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB && reload_completed" 3567 [(const_int 0)] 3568{ 3569 rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0])); 3570 rtx op1 = operands[1]; 3571 rtx op2 = operands[2]; 3572 rtx op3 = operands[3]; 3573 HOST_WIDE_INT offset = INTVAL (op2) * GET_MODE_UNIT_SIZE (<MODE>mode); 3574 3575 emit_move_insn (op3, GEN_INT (offset)); 3576 if (BYTES_BIG_ENDIAN) 3577 emit_insn (gen_vextu<wd>lx (op0_si, op3, op1)); 3578 else 3579 emit_insn (gen_vextu<wd>rx (op0_si, op3, op1)); 3580 DONE; 3581}) 3582 3583;; Optimize zero extracts to eliminate the AND after the extract. 3584(define_insn_and_split "*vsx_extract_<mode>_di_p9" 3585 [(set (match_operand:DI 0 "gpc_reg_operand" "=r,<VSX_EX>") 3586 (zero_extend:DI 3587 (vec_select:<VS_scalar> 3588 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,<VSX_EX>") 3589 (parallel [(match_operand:QI 2 "const_int_operand" "n,n")])))) 3590 (clobber (match_scratch:SI 3 "=r,X"))] 3591 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB" 3592 "#" 3593 "&& reload_completed" 3594 [(parallel [(set (match_dup 4) 3595 (vec_select:<VS_scalar> 3596 (match_dup 1) 3597 (parallel [(match_dup 2)]))) 3598 (clobber (match_dup 3))])] 3599{ 3600 operands[4] = gen_rtx_REG (<VS_scalar>mode, REGNO (operands[0])); 3601} 3602 [(set_attr "isa" "p9v,*")]) 3603 3604;; Optimize stores to use the ISA 3.0 scalar store instructions 3605(define_insn_and_split "*vsx_extract_<mode>_store_p9" 3606 [(set (match_operand:<VS_scalar> 0 "memory_operand" "=Z,m") 3607 (vec_select:<VS_scalar> 3608 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>,v") 3609 (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))) 3610 (clobber (match_scratch:<VS_scalar> 3 "=<VSX_EX>,&r")) 3611 (clobber (match_scratch:SI 4 "=X,&r"))] 3612 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB" 3613 "#" 3614 "&& reload_completed" 3615 [(parallel [(set (match_dup 3) 3616 (vec_select:<VS_scalar> 3617 (match_dup 1) 3618 (parallel [(match_dup 2)]))) 3619 (clobber (match_dup 4))]) 3620 (set (match_dup 0) 3621 (match_dup 3))]) 3622 3623(define_insn_and_split "*vsx_extract_si" 3624 [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z") 3625 (vec_select:SI 3626 (match_operand:V4SI 1 "gpc_reg_operand" "v,v,v") 3627 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")]))) 3628 (clobber (match_scratch:V4SI 3 "=v,v,v"))] 3629 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT && !TARGET_P9_VECTOR" 3630 "#" 3631 "&& reload_completed" 3632 [(const_int 0)] 3633{ 3634 rtx dest = operands[0]; 3635 rtx src = operands[1]; 3636 rtx element = operands[2]; 3637 rtx vec_tmp = operands[3]; 3638 int value; 3639 3640 if (!BYTES_BIG_ENDIAN) 3641 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element)); 3642 3643 /* If the value is in the correct position, we can avoid doing the VSPLT<x> 3644 instruction. */ 3645 value = INTVAL (element); 3646 if (value != 1) 3647 emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element)); 3648 else 3649 vec_tmp = src; 3650 3651 if (MEM_P (operands[0])) 3652 { 3653 if (can_create_pseudo_p ()) 3654 dest = rs6000_force_indexed_or_indirect_mem (dest); 3655 3656 if (TARGET_P8_VECTOR) 3657 emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp))); 3658 else 3659 emit_insn (gen_stfiwx (dest, gen_rtx_REG (DImode, REGNO (vec_tmp)))); 3660 } 3661 3662 else if (TARGET_P8_VECTOR) 3663 emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp))); 3664 else 3665 emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)), 3666 gen_rtx_REG (DImode, REGNO (vec_tmp))); 3667 3668 DONE; 3669} 3670 [(set_attr "type" "mftgpr,vecperm,fpstore") 3671 (set_attr "length" "8") 3672 (set_attr "isa" "*,p8v,*")]) 3673 3674(define_insn_and_split "*vsx_extract_<mode>_p8" 3675 [(set (match_operand:<VS_scalar> 0 "nonimmediate_operand" "=r") 3676 (vec_select:<VS_scalar> 3677 (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand" "v") 3678 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")]))) 3679 (clobber (match_scratch:VSX_EXTRACT_I2 3 "=v"))] 3680 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT 3681 && !TARGET_P9_VECTOR" 3682 "#" 3683 "&& reload_completed" 3684 [(const_int 0)] 3685{ 3686 rtx dest = operands[0]; 3687 rtx src = operands[1]; 3688 rtx element = operands[2]; 3689 rtx vec_tmp = operands[3]; 3690 int value; 3691 3692 if (!BYTES_BIG_ENDIAN) 3693 element = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1 - INTVAL (element)); 3694 3695 /* If the value is in the correct position, we can avoid doing the VSPLT<x> 3696 instruction. */ 3697 value = INTVAL (element); 3698 if (<MODE>mode == V16QImode) 3699 { 3700 if (value != 7) 3701 emit_insn (gen_altivec_vspltb_direct (vec_tmp, src, element)); 3702 else 3703 vec_tmp = src; 3704 } 3705 else if (<MODE>mode == V8HImode) 3706 { 3707 if (value != 3) 3708 emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element)); 3709 else 3710 vec_tmp = src; 3711 } 3712 else 3713 gcc_unreachable (); 3714 3715 emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)), 3716 gen_rtx_REG (DImode, REGNO (vec_tmp))); 3717 DONE; 3718} 3719 [(set_attr "type" "mftgpr")]) 3720 3721;; Optimize extracting a single scalar element from memory. 3722(define_insn_and_split "*vsx_extract_<mode>_load" 3723 [(set (match_operand:<VS_scalar> 0 "register_operand" "=r") 3724 (vec_select:<VS_scalar> 3725 (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m") 3726 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")]))) 3727 (clobber (match_scratch:DI 3 "=&b"))] 3728 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" 3729 "#" 3730 "&& reload_completed" 3731 [(set (match_dup 0) (match_dup 4))] 3732{ 3733 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2], 3734 operands[3], <VS_scalar>mode); 3735} 3736 [(set_attr "type" "load") 3737 (set_attr "length" "8")]) 3738 3739;; Variable V16QI/V8HI/V4SI extract from a register 3740(define_insn_and_split "vsx_extract_<mode>_var" 3741 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r") 3742 (unspec:<VS_scalar> 3743 [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,v") 3744 (match_operand:DI 2 "gpc_reg_operand" "r,r")] 3745 UNSPEC_VSX_EXTRACT)) 3746 (clobber (match_scratch:DI 3 "=r,r")) 3747 (clobber (match_scratch:V2DI 4 "=X,&v"))] 3748 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" 3749 "#" 3750 "&& reload_completed" 3751 [(const_int 0)] 3752{ 3753 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2], 3754 operands[3], operands[4]); 3755 DONE; 3756} 3757 [(set_attr "isa" "p9v,*")]) 3758 3759;; Variable V16QI/V8HI/V4SI extract from memory 3760(define_insn_and_split "*vsx_extract_<mode>_var_load" 3761 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r") 3762 (unspec:<VS_scalar> 3763 [(match_operand:VSX_EXTRACT_I 1 "memory_operand" "Q") 3764 (match_operand:DI 2 "gpc_reg_operand" "r")] 3765 UNSPEC_VSX_EXTRACT)) 3766 (clobber (match_scratch:DI 3 "=&b"))] 3767 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" 3768 "#" 3769 "&& reload_completed" 3770 [(set (match_dup 0) (match_dup 4))] 3771{ 3772 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2], 3773 operands[3], <VS_scalar>mode); 3774} 3775 [(set_attr "type" "load")]) 3776 3777;; VSX_EXTRACT optimizations 3778;; Optimize double d = (double) vec_extract (vi, <n>) 3779;; Get the element into the top position and use XVCVSWDP/XVCVUWDP 3780(define_insn_and_split "*vsx_extract_si_<uns>float_df" 3781 [(set (match_operand:DF 0 "gpc_reg_operand" "=wa") 3782 (any_float:DF 3783 (vec_select:SI 3784 (match_operand:V4SI 1 "gpc_reg_operand" "v") 3785 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")])))) 3786 (clobber (match_scratch:V4SI 3 "=v"))] 3787 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" 3788 "#" 3789 "&& 1" 3790 [(const_int 0)] 3791{ 3792 rtx dest = operands[0]; 3793 rtx src = operands[1]; 3794 rtx element = operands[2]; 3795 rtx v4si_tmp = operands[3]; 3796 int value; 3797 3798 if (!BYTES_BIG_ENDIAN) 3799 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element)); 3800 3801 /* If the value is in the correct position, we can avoid doing the VSPLT<x> 3802 instruction. */ 3803 value = INTVAL (element); 3804 if (value != 0) 3805 { 3806 if (GET_CODE (v4si_tmp) == SCRATCH) 3807 v4si_tmp = gen_reg_rtx (V4SImode); 3808 emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element)); 3809 } 3810 else 3811 v4si_tmp = src; 3812 3813 emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp)); 3814 DONE; 3815}) 3816 3817;; Optimize <type> f = (<type>) vec_extract (vi, <n>) 3818;; where <type> is a floating point type that supported by the hardware that is 3819;; not double. First convert the value to double, and then to the desired 3820;; type. 3821(define_insn_and_split "*vsx_extract_si_<uns>float_<mode>" 3822 [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=wa") 3823 (any_float:VSX_EXTRACT_FL 3824 (vec_select:SI 3825 (match_operand:V4SI 1 "gpc_reg_operand" "v") 3826 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")])))) 3827 (clobber (match_scratch:V4SI 3 "=v")) 3828 (clobber (match_scratch:DF 4 "=wa"))] 3829 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" 3830 "#" 3831 "&& 1" 3832 [(const_int 0)] 3833{ 3834 rtx dest = operands[0]; 3835 rtx src = operands[1]; 3836 rtx element = operands[2]; 3837 rtx v4si_tmp = operands[3]; 3838 rtx df_tmp = operands[4]; 3839 int value; 3840 3841 if (!BYTES_BIG_ENDIAN) 3842 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element)); 3843 3844 /* If the value is in the correct position, we can avoid doing the VSPLT<x> 3845 instruction. */ 3846 value = INTVAL (element); 3847 if (value != 0) 3848 { 3849 if (GET_CODE (v4si_tmp) == SCRATCH) 3850 v4si_tmp = gen_reg_rtx (V4SImode); 3851 emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element)); 3852 } 3853 else 3854 v4si_tmp = src; 3855 3856 if (GET_CODE (df_tmp) == SCRATCH) 3857 df_tmp = gen_reg_rtx (DFmode); 3858 3859 emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp)); 3860 3861 if (<MODE>mode == SFmode) 3862 emit_insn (gen_truncdfsf2 (dest, df_tmp)); 3863 else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode)) 3864 emit_insn (gen_extenddftf2_vsx (dest, df_tmp)); 3865 else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode) 3866 && TARGET_FLOAT128_HW) 3867 emit_insn (gen_extenddftf2_hw (dest, df_tmp)); 3868 else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode)) 3869 emit_insn (gen_extenddfif2 (dest, df_tmp)); 3870 else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW) 3871 emit_insn (gen_extenddfkf2_hw (dest, df_tmp)); 3872 else 3873 gcc_unreachable (); 3874 3875 DONE; 3876}) 3877 3878;; Optimize <type> f = (<ftype>) vec_extract (<vtype>, <n>) 3879;; Where <ftype> is SFmode, DFmode (and KFmode/TFmode if those types are IEEE 3880;; 128-bit hardware types) and <vtype> is vector char, vector unsigned char, 3881;; vector short or vector unsigned short. 3882(define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_fl_<FL_CONV:mode>" 3883 [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=wa") 3884 (float:FL_CONV 3885 (vec_select:<VSX_EXTRACT_I:VS_scalar> 3886 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v") 3887 (parallel [(match_operand:QI 2 "const_int_operand" "n")])))) 3888 (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))] 3889 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT 3890 && TARGET_P9_VECTOR" 3891 "#" 3892 "&& reload_completed" 3893 [(parallel [(set (match_dup 3) 3894 (vec_select:<VSX_EXTRACT_I:VS_scalar> 3895 (match_dup 1) 3896 (parallel [(match_dup 2)]))) 3897 (clobber (scratch:SI))]) 3898 (set (match_dup 4) 3899 (sign_extend:DI (match_dup 3))) 3900 (set (match_dup 0) 3901 (float:<FL_CONV:MODE> (match_dup 4)))] 3902{ 3903 operands[4] = gen_rtx_REG (DImode, REGNO (operands[3])); 3904} 3905 [(set_attr "isa" "<FL_CONV:VSisa>")]) 3906 3907(define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_ufl_<FL_CONV:mode>" 3908 [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=wa") 3909 (unsigned_float:FL_CONV 3910 (vec_select:<VSX_EXTRACT_I:VS_scalar> 3911 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v") 3912 (parallel [(match_operand:QI 2 "const_int_operand" "n")])))) 3913 (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))] 3914 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT 3915 && TARGET_P9_VECTOR" 3916 "#" 3917 "&& reload_completed" 3918 [(parallel [(set (match_dup 3) 3919 (vec_select:<VSX_EXTRACT_I:VS_scalar> 3920 (match_dup 1) 3921 (parallel [(match_dup 2)]))) 3922 (clobber (scratch:SI))]) 3923 (set (match_dup 0) 3924 (float:<FL_CONV:MODE> (match_dup 4)))] 3925{ 3926 operands[4] = gen_rtx_REG (DImode, REGNO (operands[3])); 3927} 3928 [(set_attr "isa" "<FL_CONV:VSisa>")]) 3929 3930;; V4SI/V8HI/V16QI set operation on ISA 3.0 3931(define_insn "vsx_set_<mode>_p9" 3932 [(set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=<VSX_EX>") 3933 (unspec:VSX_EXTRACT_I 3934 [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "0") 3935 (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VSX_EX>") 3936 (match_operand:QI 3 "<VSX_EXTRACT_PREDICATE>" "n")] 3937 UNSPEC_VSX_SET))] 3938 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_P9_VECTOR && TARGET_POWERPC64" 3939{ 3940 int ele = INTVAL (operands[3]); 3941 int nunits = GET_MODE_NUNITS (<MODE>mode); 3942 3943 if (!BYTES_BIG_ENDIAN) 3944 ele = nunits - 1 - ele; 3945 3946 operands[3] = GEN_INT (GET_MODE_SIZE (<VS_scalar>mode) * ele); 3947 if (<MODE>mode == V4SImode) 3948 return "xxinsertw %x0,%x2,%3"; 3949 else 3950 return "vinsert<wd> %0,%2,%3"; 3951} 3952 [(set_attr "type" "vecperm")]) 3953 3954(define_insn_and_split "vsx_set_v4sf_p9" 3955 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa") 3956 (unspec:V4SF 3957 [(match_operand:V4SF 1 "gpc_reg_operand" "0") 3958 (match_operand:SF 2 "gpc_reg_operand" "wa") 3959 (match_operand:QI 3 "const_0_to_3_operand" "n")] 3960 UNSPEC_VSX_SET)) 3961 (clobber (match_scratch:SI 4 "=&wa"))] 3962 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64" 3963 "#" 3964 "&& reload_completed" 3965 [(set (match_dup 5) 3966 (unspec:V4SF [(match_dup 2)] 3967 UNSPEC_VSX_CVDPSPN)) 3968 (parallel [(set (match_dup 4) 3969 (vec_select:SI (match_dup 6) 3970 (parallel [(match_dup 7)]))) 3971 (clobber (scratch:SI))]) 3972 (set (match_dup 8) 3973 (unspec:V4SI [(match_dup 8) 3974 (match_dup 4) 3975 (match_dup 3)] 3976 UNSPEC_VSX_SET))] 3977{ 3978 unsigned int tmp_regno = reg_or_subregno (operands[4]); 3979 3980 operands[5] = gen_rtx_REG (V4SFmode, tmp_regno); 3981 operands[6] = gen_rtx_REG (V4SImode, tmp_regno); 3982 operands[7] = GEN_INT (BYTES_BIG_ENDIAN ? 0 : 3); 3983 operands[8] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0])); 3984} 3985 [(set_attr "type" "vecperm") 3986 (set_attr "length" "12") 3987 (set_attr "isa" "p9v")]) 3988 3989;; Special case setting 0.0f to a V4SF element 3990(define_insn_and_split "*vsx_set_v4sf_p9_zero" 3991 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa") 3992 (unspec:V4SF 3993 [(match_operand:V4SF 1 "gpc_reg_operand" "0") 3994 (match_operand:SF 2 "zero_fp_constant" "j") 3995 (match_operand:QI 3 "const_0_to_3_operand" "n")] 3996 UNSPEC_VSX_SET)) 3997 (clobber (match_scratch:SI 4 "=&wa"))] 3998 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64" 3999 "#" 4000 "&& reload_completed" 4001 [(set (match_dup 4) 4002 (const_int 0)) 4003 (set (match_dup 5) 4004 (unspec:V4SI [(match_dup 5) 4005 (match_dup 4) 4006 (match_dup 3)] 4007 UNSPEC_VSX_SET))] 4008{ 4009 operands[5] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0])); 4010} 4011 [(set_attr "type" "vecperm") 4012 (set_attr "length" "8") 4013 (set_attr "isa" "p9v")]) 4014 4015;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is the element 4016;; that is in the default scalar position (1 for big endian, 2 for little 4017;; endian). We just need to do an xxinsertw since the element is in the 4018;; correct location. 4019 4020(define_insn "*vsx_insert_extract_v4sf_p9" 4021 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa") 4022 (unspec:V4SF 4023 [(match_operand:V4SF 1 "gpc_reg_operand" "0") 4024 (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa") 4025 (parallel 4026 [(match_operand:QI 3 "const_0_to_3_operand" "n")])) 4027 (match_operand:QI 4 "const_0_to_3_operand" "n")] 4028 UNSPEC_VSX_SET))] 4029 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64 4030 && (INTVAL (operands[3]) == (BYTES_BIG_ENDIAN ? 1 : 2))" 4031{ 4032 int ele = INTVAL (operands[4]); 4033 4034 if (!BYTES_BIG_ENDIAN) 4035 ele = GET_MODE_NUNITS (V4SFmode) - 1 - ele; 4036 4037 operands[4] = GEN_INT (GET_MODE_SIZE (SFmode) * ele); 4038 return "xxinsertw %x0,%x2,%4"; 4039} 4040 [(set_attr "type" "vecperm")]) 4041 4042;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is not the element 4043;; that is in the default scalar position (1 for big endian, 2 for little 4044;; endian). Convert the insert/extract to int and avoid doing the conversion. 4045 4046(define_insn_and_split "*vsx_insert_extract_v4sf_p9_2" 4047 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa") 4048 (unspec:V4SF 4049 [(match_operand:V4SF 1 "gpc_reg_operand" "0") 4050 (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa") 4051 (parallel 4052 [(match_operand:QI 3 "const_0_to_3_operand" "n")])) 4053 (match_operand:QI 4 "const_0_to_3_operand" "n")] 4054 UNSPEC_VSX_SET)) 4055 (clobber (match_scratch:SI 5 "=&wa"))] 4056 "VECTOR_MEM_VSX_P (V4SFmode) && VECTOR_MEM_VSX_P (V4SImode) 4057 && TARGET_P9_VECTOR && TARGET_POWERPC64 4058 && (INTVAL (operands[3]) != (BYTES_BIG_ENDIAN ? 1 : 2))" 4059 "#" 4060 "&& 1" 4061 [(parallel [(set (match_dup 5) 4062 (vec_select:SI (match_dup 6) 4063 (parallel [(match_dup 3)]))) 4064 (clobber (scratch:SI))]) 4065 (set (match_dup 7) 4066 (unspec:V4SI [(match_dup 8) 4067 (match_dup 5) 4068 (match_dup 4)] 4069 UNSPEC_VSX_SET))] 4070{ 4071 if (GET_CODE (operands[5]) == SCRATCH) 4072 operands[5] = gen_reg_rtx (SImode); 4073 4074 operands[6] = gen_lowpart (V4SImode, operands[2]); 4075 operands[7] = gen_lowpart (V4SImode, operands[0]); 4076 operands[8] = gen_lowpart (V4SImode, operands[1]); 4077} 4078 [(set_attr "type" "vecperm") 4079 (set_attr "isa" "p9v")]) 4080 4081;; Expanders for builtins 4082(define_expand "vsx_mergel_<mode>" 4083 [(use (match_operand:VSX_D 0 "vsx_register_operand")) 4084 (use (match_operand:VSX_D 1 "vsx_register_operand")) 4085 (use (match_operand:VSX_D 2 "vsx_register_operand"))] 4086 "VECTOR_MEM_VSX_P (<MODE>mode)" 4087{ 4088 rtvec v = gen_rtvec (2, GEN_INT (1), GEN_INT (3)); 4089 rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]); 4090 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v)); 4091 emit_insn (gen_rtx_SET (operands[0], x)); 4092 DONE; 4093}) 4094 4095(define_expand "vsx_mergeh_<mode>" 4096 [(use (match_operand:VSX_D 0 "vsx_register_operand")) 4097 (use (match_operand:VSX_D 1 "vsx_register_operand")) 4098 (use (match_operand:VSX_D 2 "vsx_register_operand"))] 4099 "VECTOR_MEM_VSX_P (<MODE>mode)" 4100{ 4101 rtvec v = gen_rtvec (2, GEN_INT (0), GEN_INT (2)); 4102 rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]); 4103 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v)); 4104 emit_insn (gen_rtx_SET (operands[0], x)); 4105 DONE; 4106}) 4107 4108;; V2DF/V2DI splat 4109;; We separate the register splat insn from the memory splat insn to force the 4110;; register allocator to generate the indexed form of the SPLAT when it is 4111;; given an offsettable memory reference. Otherwise, if the register and 4112;; memory insns were combined into a single insn, the register allocator will 4113;; load the value into a register, and then do a double word permute. 4114(define_expand "vsx_splat_<mode>" 4115 [(set (match_operand:VSX_D 0 "vsx_register_operand") 4116 (vec_duplicate:VSX_D 4117 (match_operand:<VS_scalar> 1 "input_operand")))] 4118 "VECTOR_MEM_VSX_P (<MODE>mode)" 4119{ 4120 rtx op1 = operands[1]; 4121 if (MEM_P (op1)) 4122 operands[1] = rs6000_force_indexed_or_indirect_mem (op1); 4123 else if (!REG_P (op1)) 4124 op1 = force_reg (<VSX_D:VS_scalar>mode, op1); 4125}) 4126 4127(define_insn "vsx_splat_<mode>_reg" 4128 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we") 4129 (vec_duplicate:VSX_D 4130 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b")))] 4131 "VECTOR_MEM_VSX_P (<MODE>mode)" 4132 "@ 4133 xxpermdi %x0,%x1,%x1,0 4134 mtvsrdd %x0,%1,%1" 4135 [(set_attr "type" "vecperm")]) 4136 4137(define_insn "vsx_splat_<mode>_mem" 4138 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") 4139 (vec_duplicate:VSX_D 4140 (match_operand:<VSX_D:VS_scalar> 1 "memory_operand" "Z")))] 4141 "VECTOR_MEM_VSX_P (<MODE>mode)" 4142 "lxvdsx %x0,%y1" 4143 [(set_attr "type" "vecload")]) 4144 4145;; V4SI splat support 4146(define_insn "vsx_splat_v4si" 4147 [(set (match_operand:V4SI 0 "vsx_register_operand" "=we,we") 4148 (vec_duplicate:V4SI 4149 (match_operand:SI 1 "splat_input_operand" "r,Z")))] 4150 "TARGET_P9_VECTOR" 4151 "@ 4152 mtvsrws %x0,%1 4153 lxvwsx %x0,%y1" 4154 [(set_attr "type" "vecperm,vecload")]) 4155 4156;; SImode is not currently allowed in vector registers. This pattern 4157;; allows us to use direct move to get the value in a vector register 4158;; so that we can use XXSPLTW 4159(define_insn "vsx_splat_v4si_di" 4160 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,we") 4161 (vec_duplicate:V4SI 4162 (truncate:SI 4163 (match_operand:DI 1 "gpc_reg_operand" "wa,r"))))] 4164 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" 4165 "@ 4166 xxspltw %x0,%x1,1 4167 mtvsrws %x0,%1" 4168 [(set_attr "type" "vecperm") 4169 (set_attr "isa" "p8v,*")]) 4170 4171;; V4SF splat (ISA 3.0) 4172(define_insn_and_split "vsx_splat_v4sf" 4173 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa") 4174 (vec_duplicate:V4SF 4175 (match_operand:SF 1 "splat_input_operand" "Z,wa,r")))] 4176 "TARGET_P9_VECTOR" 4177 "@ 4178 lxvwsx %x0,%y1 4179 # 4180 mtvsrws %x0,%1" 4181 "&& reload_completed && vsx_register_operand (operands[1], SFmode)" 4182 [(set (match_dup 0) 4183 (unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN)) 4184 (set (match_dup 0) 4185 (unspec:V4SF [(match_dup 0) 4186 (const_int 0)] UNSPEC_VSX_XXSPLTW))] 4187 "" 4188 [(set_attr "type" "vecload,vecperm,mftgpr") 4189 (set_attr "length" "*,8,*") 4190 (set_attr "isa" "*,p8v,*")]) 4191 4192;; V4SF/V4SI splat from a vector element 4193(define_insn "vsx_xxspltw_<mode>" 4194 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa") 4195 (vec_duplicate:VSX_W 4196 (vec_select:<VS_scalar> 4197 (match_operand:VSX_W 1 "vsx_register_operand" "wa") 4198 (parallel 4199 [(match_operand:QI 2 "u5bit_cint_operand" "n")]))))] 4200 "VECTOR_MEM_VSX_P (<MODE>mode)" 4201{ 4202 if (!BYTES_BIG_ENDIAN) 4203 operands[2] = GEN_INT (3 - INTVAL (operands[2])); 4204 4205 return "xxspltw %x0,%x1,%2"; 4206} 4207 [(set_attr "type" "vecperm")]) 4208 4209(define_insn "vsx_xxspltw_<mode>_direct" 4210 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa") 4211 (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "wa") 4212 (match_operand:QI 2 "u5bit_cint_operand" "i")] 4213 UNSPEC_VSX_XXSPLTW))] 4214 "VECTOR_MEM_VSX_P (<MODE>mode)" 4215 "xxspltw %x0,%x1,%2" 4216 [(set_attr "type" "vecperm")]) 4217 4218;; V16QI/V8HI splat support on ISA 2.07 4219(define_insn "vsx_vsplt<VSX_SPLAT_SUFFIX>_di" 4220 [(set (match_operand:VSX_SPLAT_I 0 "altivec_register_operand" "=v") 4221 (vec_duplicate:VSX_SPLAT_I 4222 (truncate:<VS_scalar> 4223 (match_operand:DI 1 "altivec_register_operand" "v"))))] 4224 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" 4225 "vsplt<VSX_SPLAT_SUFFIX> %0,%1,<VSX_SPLAT_COUNT>" 4226 [(set_attr "type" "vecperm")]) 4227 4228;; V2DF/V2DI splat for use by vec_splat builtin 4229(define_insn "vsx_xxspltd_<mode>" 4230 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") 4231 (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa") 4232 (match_operand:QI 2 "u5bit_cint_operand" "i")] 4233 UNSPEC_VSX_XXSPLTD))] 4234 "VECTOR_MEM_VSX_P (<MODE>mode)" 4235{ 4236 if ((BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 0) 4237 || (!BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 1)) 4238 return "xxpermdi %x0,%x1,%x1,0"; 4239 else 4240 return "xxpermdi %x0,%x1,%x1,3"; 4241} 4242 [(set_attr "type" "vecperm")]) 4243 4244;; V4SF/V4SI interleave 4245(define_insn "vsx_xxmrghw_<mode>" 4246 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa") 4247 (vec_select:VSX_W 4248 (vec_concat:<VS_double> 4249 (match_operand:VSX_W 1 "vsx_register_operand" "wa") 4250 (match_operand:VSX_W 2 "vsx_register_operand" "wa")) 4251 (parallel [(const_int 0) (const_int 4) 4252 (const_int 1) (const_int 5)])))] 4253 "VECTOR_MEM_VSX_P (<MODE>mode)" 4254{ 4255 if (BYTES_BIG_ENDIAN) 4256 return "xxmrghw %x0,%x1,%x2"; 4257 else 4258 return "xxmrglw %x0,%x2,%x1"; 4259} 4260 [(set_attr "type" "vecperm")]) 4261 4262(define_insn "vsx_xxmrglw_<mode>" 4263 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa") 4264 (vec_select:VSX_W 4265 (vec_concat:<VS_double> 4266 (match_operand:VSX_W 1 "vsx_register_operand" "wa") 4267 (match_operand:VSX_W 2 "vsx_register_operand" "wa")) 4268 (parallel [(const_int 2) (const_int 6) 4269 (const_int 3) (const_int 7)])))] 4270 "VECTOR_MEM_VSX_P (<MODE>mode)" 4271{ 4272 if (BYTES_BIG_ENDIAN) 4273 return "xxmrglw %x0,%x1,%x2"; 4274 else 4275 return "xxmrghw %x0,%x2,%x1"; 4276} 4277 [(set_attr "type" "vecperm")]) 4278 4279;; Shift left double by word immediate 4280(define_insn "vsx_xxsldwi_<mode>" 4281 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=wa") 4282 (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "wa") 4283 (match_operand:VSX_L 2 "vsx_register_operand" "wa") 4284 (match_operand:QI 3 "u5bit_cint_operand" "i")] 4285 UNSPEC_VSX_SLDWI))] 4286 "VECTOR_MEM_VSX_P (<MODE>mode)" 4287 "xxsldwi %x0,%x1,%x2,%3" 4288 [(set_attr "type" "vecperm") 4289 (set_attr "isa" "<VSisa>")]) 4290 4291 4292;; Vector reduction insns and splitters 4293 4294(define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df" 4295 [(set (match_operand:V2DF 0 "vfloat_operand" "=&wa,wa") 4296 (VEC_reduc:V2DF 4297 (vec_concat:V2DF 4298 (vec_select:DF 4299 (match_operand:V2DF 1 "vfloat_operand" "wa,wa") 4300 (parallel [(const_int 1)])) 4301 (vec_select:DF 4302 (match_dup 1) 4303 (parallel [(const_int 0)]))) 4304 (match_dup 1))) 4305 (clobber (match_scratch:V2DF 2 "=0,&wa"))] 4306 "VECTOR_UNIT_VSX_P (V2DFmode)" 4307 "#" 4308 "" 4309 [(const_int 0)] 4310{ 4311 rtx tmp = (GET_CODE (operands[2]) == SCRATCH) 4312 ? gen_reg_rtx (V2DFmode) 4313 : operands[2]; 4314 emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx)); 4315 emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1])); 4316 DONE; 4317} 4318 [(set_attr "length" "8") 4319 (set_attr "type" "veccomplex")]) 4320 4321(define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf" 4322 [(set (match_operand:V4SF 0 "vfloat_operand" "=wa") 4323 (VEC_reduc:V4SF 4324 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC) 4325 (match_operand:V4SF 1 "vfloat_operand" "wa"))) 4326 (clobber (match_scratch:V4SF 2 "=&wa")) 4327 (clobber (match_scratch:V4SF 3 "=&wa"))] 4328 "VECTOR_UNIT_VSX_P (V4SFmode)" 4329 "#" 4330 "" 4331 [(const_int 0)] 4332{ 4333 rtx op0 = operands[0]; 4334 rtx op1 = operands[1]; 4335 rtx tmp2, tmp3, tmp4; 4336 4337 if (can_create_pseudo_p ()) 4338 { 4339 tmp2 = gen_reg_rtx (V4SFmode); 4340 tmp3 = gen_reg_rtx (V4SFmode); 4341 tmp4 = gen_reg_rtx (V4SFmode); 4342 } 4343 else 4344 { 4345 tmp2 = operands[2]; 4346 tmp3 = operands[3]; 4347 tmp4 = tmp2; 4348 } 4349 4350 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx)); 4351 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1)); 4352 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3))); 4353 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3)); 4354 DONE; 4355} 4356 [(set_attr "length" "16") 4357 (set_attr "type" "veccomplex")]) 4358 4359;; Combiner patterns with the vector reduction patterns that knows we can get 4360;; to the top element of the V2DF array without doing an extract. 4361 4362(define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar" 4363 [(set (match_operand:DF 0 "vfloat_operand" "=&wa,wa") 4364 (vec_select:DF 4365 (VEC_reduc:V2DF 4366 (vec_concat:V2DF 4367 (vec_select:DF 4368 (match_operand:V2DF 1 "vfloat_operand" "wa,wa") 4369 (parallel [(const_int 1)])) 4370 (vec_select:DF 4371 (match_dup 1) 4372 (parallel [(const_int 0)]))) 4373 (match_dup 1)) 4374 (parallel [(const_int 1)]))) 4375 (clobber (match_scratch:DF 2 "=0,&wa"))] 4376 "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V2DFmode)" 4377 "#" 4378 "" 4379 [(const_int 0)] 4380{ 4381 rtx hi = gen_highpart (DFmode, operands[1]); 4382 rtx lo = (GET_CODE (operands[2]) == SCRATCH) 4383 ? gen_reg_rtx (DFmode) 4384 : operands[2]; 4385 4386 emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx)); 4387 emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo)); 4388 DONE; 4389} 4390 [(set_attr "length" "8") 4391 (set_attr "type" "veccomplex")]) 4392 4393(define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar" 4394 [(set (match_operand:SF 0 "vfloat_operand" "=f") 4395 (vec_select:SF 4396 (VEC_reduc:V4SF 4397 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC) 4398 (match_operand:V4SF 1 "vfloat_operand" "wa")) 4399 (parallel [(const_int 3)]))) 4400 (clobber (match_scratch:V4SF 2 "=&wa")) 4401 (clobber (match_scratch:V4SF 3 "=&wa")) 4402 (clobber (match_scratch:V4SF 4 "=0"))] 4403 "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V4SFmode)" 4404 "#" 4405 "" 4406 [(const_int 0)] 4407{ 4408 rtx op0 = operands[0]; 4409 rtx op1 = operands[1]; 4410 rtx tmp2, tmp3, tmp4, tmp5; 4411 4412 if (can_create_pseudo_p ()) 4413 { 4414 tmp2 = gen_reg_rtx (V4SFmode); 4415 tmp3 = gen_reg_rtx (V4SFmode); 4416 tmp4 = gen_reg_rtx (V4SFmode); 4417 tmp5 = gen_reg_rtx (V4SFmode); 4418 } 4419 else 4420 { 4421 tmp2 = operands[2]; 4422 tmp3 = operands[3]; 4423 tmp4 = tmp2; 4424 tmp5 = operands[4]; 4425 } 4426 4427 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx)); 4428 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1)); 4429 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3))); 4430 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3)); 4431 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5)); 4432 DONE; 4433} 4434 [(set_attr "length" "20") 4435 (set_attr "type" "veccomplex")]) 4436 4437 4438;; Power8 Vector fusion. The fused ops must be physically adjacent. 4439(define_peephole 4440 [(set (match_operand:P 0 "base_reg_operand") 4441 (match_operand:P 1 "short_cint_operand")) 4442 (set (match_operand:VSX_M 2 "vsx_register_operand") 4443 (mem:VSX_M (plus:P (match_dup 0) 4444 (match_operand:P 3 "int_reg_operand"))))] 4445 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR" 4446 "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion" 4447 [(set_attr "length" "8") 4448 (set_attr "type" "vecload")]) 4449 4450(define_peephole 4451 [(set (match_operand:P 0 "base_reg_operand") 4452 (match_operand:P 1 "short_cint_operand")) 4453 (set (match_operand:VSX_M 2 "vsx_register_operand") 4454 (mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand") 4455 (match_dup 0))))] 4456 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR" 4457 "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion" 4458 [(set_attr "length" "8") 4459 (set_attr "type" "vecload")]) 4460 4461 4462;; ISA 3.0 vector extend sign support 4463 4464(define_insn "vsx_sign_extend_qi_<mode>" 4465 [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v") 4466 (unspec:VSINT_84 4467 [(match_operand:V16QI 1 "vsx_register_operand" "v")] 4468 UNSPEC_VSX_SIGN_EXTEND))] 4469 "TARGET_P9_VECTOR" 4470 "vextsb2<wd> %0,%1" 4471 [(set_attr "type" "vecexts")]) 4472 4473(define_insn "vsx_sign_extend_hi_<mode>" 4474 [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v") 4475 (unspec:VSINT_84 4476 [(match_operand:V8HI 1 "vsx_register_operand" "v")] 4477 UNSPEC_VSX_SIGN_EXTEND))] 4478 "TARGET_P9_VECTOR" 4479 "vextsh2<wd> %0,%1" 4480 [(set_attr "type" "vecexts")]) 4481 4482(define_insn "*vsx_sign_extend_si_v2di" 4483 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v") 4484 (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")] 4485 UNSPEC_VSX_SIGN_EXTEND))] 4486 "TARGET_P9_VECTOR" 4487 "vextsw2d %0,%1" 4488 [(set_attr "type" "vecexts")]) 4489 4490 4491;; ISA 3.0 Binary Floating-Point Support 4492 4493;; VSX Scalar Extract Exponent Quad-Precision 4494(define_insn "xsxexpqp_<mode>" 4495 [(set (match_operand:DI 0 "altivec_register_operand" "=v") 4496 (unspec:DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")] 4497 UNSPEC_VSX_SXEXPDP))] 4498 "TARGET_P9_VECTOR" 4499 "xsxexpqp %0,%1" 4500 [(set_attr "type" "vecmove")]) 4501 4502;; VSX Scalar Extract Exponent Double-Precision 4503(define_insn "xsxexpdp" 4504 [(set (match_operand:DI 0 "register_operand" "=r") 4505 (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")] 4506 UNSPEC_VSX_SXEXPDP))] 4507 "TARGET_P9_VECTOR && TARGET_64BIT" 4508 "xsxexpdp %0,%x1" 4509 [(set_attr "type" "integer")]) 4510 4511;; VSX Scalar Extract Significand Quad-Precision 4512(define_insn "xsxsigqp_<mode>" 4513 [(set (match_operand:TI 0 "altivec_register_operand" "=v") 4514 (unspec:TI [(match_operand:IEEE128 1 "altivec_register_operand" "v")] 4515 UNSPEC_VSX_SXSIG))] 4516 "TARGET_P9_VECTOR" 4517 "xsxsigqp %0,%1" 4518 [(set_attr "type" "vecmove")]) 4519 4520;; VSX Scalar Extract Significand Double-Precision 4521(define_insn "xsxsigdp" 4522 [(set (match_operand:DI 0 "register_operand" "=r") 4523 (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")] 4524 UNSPEC_VSX_SXSIG))] 4525 "TARGET_P9_VECTOR && TARGET_64BIT" 4526 "xsxsigdp %0,%x1" 4527 [(set_attr "type" "integer")]) 4528 4529;; VSX Scalar Insert Exponent Quad-Precision Floating Point Argument 4530(define_insn "xsiexpqpf_<mode>" 4531 [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") 4532 (unspec:IEEE128 4533 [(match_operand:IEEE128 1 "altivec_register_operand" "v") 4534 (match_operand:DI 2 "altivec_register_operand" "v")] 4535 UNSPEC_VSX_SIEXPQP))] 4536 "TARGET_P9_VECTOR" 4537 "xsiexpqp %0,%1,%2" 4538 [(set_attr "type" "vecmove")]) 4539 4540;; VSX Scalar Insert Exponent Quad-Precision 4541(define_insn "xsiexpqp_<mode>" 4542 [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") 4543 (unspec:IEEE128 [(match_operand:TI 1 "altivec_register_operand" "v") 4544 (match_operand:DI 2 "altivec_register_operand" "v")] 4545 UNSPEC_VSX_SIEXPQP))] 4546 "TARGET_P9_VECTOR" 4547 "xsiexpqp %0,%1,%2" 4548 [(set_attr "type" "vecmove")]) 4549 4550;; VSX Scalar Insert Exponent Double-Precision 4551(define_insn "xsiexpdp" 4552 [(set (match_operand:DF 0 "vsx_register_operand" "=wa") 4553 (unspec:DF [(match_operand:DI 1 "register_operand" "r") 4554 (match_operand:DI 2 "register_operand" "r")] 4555 UNSPEC_VSX_SIEXPDP))] 4556 "TARGET_P9_VECTOR && TARGET_64BIT" 4557 "xsiexpdp %x0,%1,%2" 4558 [(set_attr "type" "fpsimple")]) 4559 4560;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument 4561(define_insn "xsiexpdpf" 4562 [(set (match_operand:DF 0 "vsx_register_operand" "=wa") 4563 (unspec:DF [(match_operand:DF 1 "register_operand" "r") 4564 (match_operand:DI 2 "register_operand" "r")] 4565 UNSPEC_VSX_SIEXPDP))] 4566 "TARGET_P9_VECTOR && TARGET_64BIT" 4567 "xsiexpdp %x0,%1,%2" 4568 [(set_attr "type" "fpsimple")]) 4569 4570;; VSX Scalar Compare Exponents Double-Precision 4571(define_expand "xscmpexpdp_<code>" 4572 [(set (match_dup 3) 4573 (compare:CCFP 4574 (unspec:DF 4575 [(match_operand:DF 1 "vsx_register_operand" "wa") 4576 (match_operand:DF 2 "vsx_register_operand" "wa")] 4577 UNSPEC_VSX_SCMPEXPDP) 4578 (const_int 0))) 4579 (set (match_operand:SI 0 "register_operand" "=r") 4580 (CMP_TEST:SI (match_dup 3) 4581 (const_int 0)))] 4582 "TARGET_P9_VECTOR" 4583{ 4584 if (<CODE> == UNORDERED && !HONOR_NANS (DFmode)) 4585 { 4586 emit_move_insn (operands[0], const0_rtx); 4587 DONE; 4588 } 4589 4590 operands[3] = gen_reg_rtx (CCFPmode); 4591}) 4592 4593(define_insn "*xscmpexpdp" 4594 [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") 4595 (compare:CCFP 4596 (unspec:DF [(match_operand:DF 1 "vsx_register_operand" "wa") 4597 (match_operand:DF 2 "vsx_register_operand" "wa")] 4598 UNSPEC_VSX_SCMPEXPDP) 4599 (match_operand:SI 3 "zero_constant" "j")))] 4600 "TARGET_P9_VECTOR" 4601 "xscmpexpdp %0,%x1,%x2" 4602 [(set_attr "type" "fpcompare")]) 4603 4604;; VSX Scalar Compare Exponents Quad-Precision 4605(define_expand "xscmpexpqp_<code>_<mode>" 4606 [(set (match_dup 3) 4607 (compare:CCFP 4608 (unspec:IEEE128 4609 [(match_operand:IEEE128 1 "vsx_register_operand" "v") 4610 (match_operand:IEEE128 2 "vsx_register_operand" "v")] 4611 UNSPEC_VSX_SCMPEXPQP) 4612 (const_int 0))) 4613 (set (match_operand:SI 0 "register_operand" "=r") 4614 (CMP_TEST:SI (match_dup 3) 4615 (const_int 0)))] 4616 "TARGET_P9_VECTOR" 4617{ 4618 if (<CODE> == UNORDERED && !HONOR_NANS (<MODE>mode)) 4619 { 4620 emit_move_insn (operands[0], const0_rtx); 4621 DONE; 4622 } 4623 4624 operands[3] = gen_reg_rtx (CCFPmode); 4625}) 4626 4627(define_insn "*xscmpexpqp" 4628 [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") 4629 (compare:CCFP 4630 (unspec:IEEE128 [(match_operand:IEEE128 1 "altivec_register_operand" "v") 4631 (match_operand:IEEE128 2 "altivec_register_operand" "v")] 4632 UNSPEC_VSX_SCMPEXPQP) 4633 (match_operand:SI 3 "zero_constant" "j")))] 4634 "TARGET_P9_VECTOR" 4635 "xscmpexpqp %0,%1,%2" 4636 [(set_attr "type" "fpcompare")]) 4637 4638;; VSX Scalar Test Data Class Quad-Precision 4639;; (Expansion for scalar_test_data_class (__ieee128, int)) 4640;; (Has side effect of setting the lt bit if operand 1 is negative, 4641;; setting the eq bit if any of the conditions tested by operand 2 4642;; are satisfied, and clearing the gt and undordered bits to zero.) 4643(define_expand "xststdcqp_<mode>" 4644 [(set (match_dup 3) 4645 (compare:CCFP 4646 (unspec:IEEE128 4647 [(match_operand:IEEE128 1 "altivec_register_operand" "v") 4648 (match_operand:SI 2 "u7bit_cint_operand" "n")] 4649 UNSPEC_VSX_STSTDC) 4650 (const_int 0))) 4651 (set (match_operand:SI 0 "register_operand" "=r") 4652 (eq:SI (match_dup 3) 4653 (const_int 0)))] 4654 "TARGET_P9_VECTOR" 4655{ 4656 operands[3] = gen_reg_rtx (CCFPmode); 4657}) 4658 4659;; VSX Scalar Test Data Class Double- and Single-Precision 4660;; (The lt bit is set if operand 1 is negative. The eq bit is set 4661;; if any of the conditions tested by operand 2 are satisfied. 4662;; The gt and unordered bits are cleared to zero.) 4663(define_expand "xststdc<sd>p" 4664 [(set (match_dup 3) 4665 (compare:CCFP 4666 (unspec:SFDF 4667 [(match_operand:SFDF 1 "vsx_register_operand" "wa") 4668 (match_operand:SI 2 "u7bit_cint_operand" "n")] 4669 UNSPEC_VSX_STSTDC) 4670 (match_dup 4))) 4671 (set (match_operand:SI 0 "register_operand" "=r") 4672 (eq:SI (match_dup 3) 4673 (const_int 0)))] 4674 "TARGET_P9_VECTOR" 4675{ 4676 operands[3] = gen_reg_rtx (CCFPmode); 4677 operands[4] = CONST0_RTX (SImode); 4678}) 4679 4680;; The VSX Scalar Test Negative Quad-Precision 4681(define_expand "xststdcnegqp_<mode>" 4682 [(set (match_dup 2) 4683 (compare:CCFP 4684 (unspec:IEEE128 4685 [(match_operand:IEEE128 1 "altivec_register_operand" "v") 4686 (const_int 0)] 4687 UNSPEC_VSX_STSTDC) 4688 (const_int 0))) 4689 (set (match_operand:SI 0 "register_operand" "=r") 4690 (lt:SI (match_dup 2) 4691 (const_int 0)))] 4692 "TARGET_P9_VECTOR" 4693{ 4694 operands[2] = gen_reg_rtx (CCFPmode); 4695}) 4696 4697;; The VSX Scalar Test Negative Double- and Single-Precision 4698(define_expand "xststdcneg<sd>p" 4699 [(set (match_dup 2) 4700 (compare:CCFP 4701 (unspec:SFDF 4702 [(match_operand:SFDF 1 "vsx_register_operand" "wa") 4703 (const_int 0)] 4704 UNSPEC_VSX_STSTDC) 4705 (match_dup 3))) 4706 (set (match_operand:SI 0 "register_operand" "=r") 4707 (lt:SI (match_dup 2) 4708 (const_int 0)))] 4709 "TARGET_P9_VECTOR" 4710{ 4711 operands[2] = gen_reg_rtx (CCFPmode); 4712 operands[3] = CONST0_RTX (SImode); 4713}) 4714 4715(define_insn "*xststdcqp_<mode>" 4716 [(set (match_operand:CCFP 0 "" "=y") 4717 (compare:CCFP 4718 (unspec:IEEE128 4719 [(match_operand:IEEE128 1 "altivec_register_operand" "v") 4720 (match_operand:SI 2 "u7bit_cint_operand" "n")] 4721 UNSPEC_VSX_STSTDC) 4722 (const_int 0)))] 4723 "TARGET_P9_VECTOR" 4724 "xststdcqp %0,%1,%2" 4725 [(set_attr "type" "fpcompare")]) 4726 4727(define_insn "*xststdc<sd>p" 4728 [(set (match_operand:CCFP 0 "" "=y") 4729 (compare:CCFP 4730 (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa") 4731 (match_operand:SI 2 "u7bit_cint_operand" "n")] 4732 UNSPEC_VSX_STSTDC) 4733 (match_operand:SI 3 "zero_constant" "j")))] 4734 "TARGET_P9_VECTOR" 4735 "xststdc<sd>p %0,%x1,%2" 4736 [(set_attr "type" "fpcompare")]) 4737 4738;; VSX Vector Extract Exponent Double and Single Precision 4739(define_insn "xvxexp<sd>p" 4740 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 4741 (unspec:VSX_F 4742 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")] 4743 UNSPEC_VSX_VXEXP))] 4744 "TARGET_P9_VECTOR" 4745 "xvxexp<sd>p %x0,%x1" 4746 [(set_attr "type" "vecsimple")]) 4747 4748;; VSX Vector Extract Significand Double and Single Precision 4749(define_insn "xvxsig<sd>p" 4750 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 4751 (unspec:VSX_F 4752 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")] 4753 UNSPEC_VSX_VXSIG))] 4754 "TARGET_P9_VECTOR" 4755 "xvxsig<sd>p %x0,%x1" 4756 [(set_attr "type" "vecsimple")]) 4757 4758;; VSX Vector Insert Exponent Double and Single Precision 4759(define_insn "xviexp<sd>p" 4760 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 4761 (unspec:VSX_F 4762 [(match_operand:VSX_F 1 "vsx_register_operand" "wa") 4763 (match_operand:VSX_F 2 "vsx_register_operand" "wa")] 4764 UNSPEC_VSX_VIEXP))] 4765 "TARGET_P9_VECTOR" 4766 "xviexp<sd>p %x0,%x1,%x2" 4767 [(set_attr "type" "vecsimple")]) 4768 4769;; VSX Vector Test Data Class Double and Single Precision 4770;; The corresponding elements of the result vector are all ones 4771;; if any of the conditions tested by operand 3 are satisfied. 4772(define_insn "xvtstdc<sd>p" 4773 [(set (match_operand:<VSI> 0 "vsx_register_operand" "=wa") 4774 (unspec:<VSI> 4775 [(match_operand:VSX_F 1 "vsx_register_operand" "wa") 4776 (match_operand:SI 2 "u7bit_cint_operand" "n")] 4777 UNSPEC_VSX_VTSTDC))] 4778 "TARGET_P9_VECTOR" 4779 "xvtstdc<sd>p %x0,%x1,%2" 4780 [(set_attr "type" "vecsimple")]) 4781 4782;; ISA 3.0 String Operations Support 4783 4784;; Compare vectors producing a vector result and a predicate, setting CR6 4785;; to indicate a combined status. This pattern matches v16qi, v8hi, and 4786;; v4si modes. It does not match v2df, v4sf, or v2di modes. There's no 4787;; need to match v4sf, v2df, or v2di modes because those are expanded 4788;; to use Power8 instructions. 4789(define_insn "*vsx_ne_<mode>_p" 4790 [(set (reg:CC CR6_REGNO) 4791 (unspec:CC 4792 [(ne:CC (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v") 4793 (match_operand:VSX_EXTRACT_I 2 "gpc_reg_operand" "v"))] 4794 UNSPEC_PREDICATE)) 4795 (set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=v") 4796 (ne:VSX_EXTRACT_I (match_dup 1) 4797 (match_dup 2)))] 4798 "TARGET_P9_VECTOR" 4799 "vcmpne<VSX_EXTRACT_WIDTH>. %0,%1,%2" 4800 [(set_attr "type" "vecsimple")]) 4801 4802(define_insn "*vector_nez_<mode>_p" 4803 [(set (reg:CC CR6_REGNO) 4804 (unspec:CC [(unspec:VI 4805 [(match_operand:VI 1 "gpc_reg_operand" "v") 4806 (match_operand:VI 2 "gpc_reg_operand" "v")] 4807 UNSPEC_NEZ_P)] 4808 UNSPEC_PREDICATE)) 4809 (set (match_operand:VI 0 "gpc_reg_operand" "=v") 4810 (unspec:VI [(match_dup 1) 4811 (match_dup 2)] 4812 UNSPEC_NEZ_P))] 4813 "TARGET_P9_VECTOR" 4814 "vcmpnez<VSX_EXTRACT_WIDTH>. %0,%1,%2" 4815 [(set_attr "type" "vecsimple")]) 4816 4817;; Return first position of match between vectors using natural order 4818;; for both LE and BE execution modes. 4819(define_expand "first_match_index_<mode>" 4820 [(match_operand:SI 0 "register_operand") 4821 (unspec:SI [(match_operand:VSX_EXTRACT_I 1 "register_operand") 4822 (match_operand:VSX_EXTRACT_I 2 "register_operand")] 4823 UNSPEC_VSX_FIRST_MATCH_INDEX)] 4824 "TARGET_P9_VECTOR" 4825{ 4826 int sh; 4827 4828 rtx cmp_result = gen_reg_rtx (<MODE>mode); 4829 rtx not_result = gen_reg_rtx (<MODE>mode); 4830 4831 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmp_result, operands[1], 4832 operands[2])); 4833 emit_insn (gen_one_cmpl<mode>2 (not_result, cmp_result)); 4834 4835 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2; 4836 4837 if (<MODE>mode == V16QImode) 4838 { 4839 if (!BYTES_BIG_ENDIAN) 4840 emit_insn (gen_vctzlsbb_<mode> (operands[0], not_result)); 4841 else 4842 emit_insn (gen_vclzlsbb_<mode> (operands[0], not_result)); 4843 } 4844 else 4845 { 4846 rtx tmp = gen_reg_rtx (SImode); 4847 if (!BYTES_BIG_ENDIAN) 4848 emit_insn (gen_vctzlsbb_<mode> (tmp, not_result)); 4849 else 4850 emit_insn (gen_vclzlsbb_<mode> (tmp, not_result)); 4851 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh))); 4852 } 4853 DONE; 4854}) 4855 4856;; Return first position of match between vectors or end of string (EOS) using 4857;; natural element order for both LE and BE execution modes. 4858(define_expand "first_match_or_eos_index_<mode>" 4859 [(match_operand:SI 0 "register_operand") 4860 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand") 4861 (match_operand:VSX_EXTRACT_I 2 "register_operand")] 4862 UNSPEC_VSX_FIRST_MATCH_EOS_INDEX)] 4863 "TARGET_P9_VECTOR" 4864{ 4865 int sh; 4866 rtx cmpz1_result = gen_reg_rtx (<MODE>mode); 4867 rtx cmpz2_result = gen_reg_rtx (<MODE>mode); 4868 rtx cmpz_result = gen_reg_rtx (<MODE>mode); 4869 rtx and_result = gen_reg_rtx (<MODE>mode); 4870 rtx result = gen_reg_rtx (<MODE>mode); 4871 rtx vzero = gen_reg_rtx (<MODE>mode); 4872 4873 /* Vector with zeros in elements that correspond to zeros in operands. */ 4874 emit_move_insn (vzero, CONST0_RTX (<MODE>mode)); 4875 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero)); 4876 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero)); 4877 emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result)); 4878 4879 /* Vector with ones in elments that do not match. */ 4880 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1], 4881 operands[2])); 4882 4883 /* Create vector with ones in elements where there was a zero in one of 4884 the source elements or the elements that match. */ 4885 emit_insn (gen_nand<mode>3 (result, and_result, cmpz_result)); 4886 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2; 4887 4888 if (<MODE>mode == V16QImode) 4889 { 4890 if (!BYTES_BIG_ENDIAN) 4891 emit_insn (gen_vctzlsbb_<mode> (operands[0], result)); 4892 else 4893 emit_insn (gen_vclzlsbb_<mode> (operands[0], result)); 4894 } 4895 else 4896 { 4897 rtx tmp = gen_reg_rtx (SImode); 4898 if (!BYTES_BIG_ENDIAN) 4899 emit_insn (gen_vctzlsbb_<mode> (tmp, result)); 4900 else 4901 emit_insn (gen_vclzlsbb_<mode> (tmp, result)); 4902 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh))); 4903 } 4904 DONE; 4905}) 4906 4907;; Return first position of mismatch between vectors using natural 4908;; element order for both LE and BE execution modes. 4909(define_expand "first_mismatch_index_<mode>" 4910 [(match_operand:SI 0 "register_operand") 4911 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand") 4912 (match_operand:VSX_EXTRACT_I 2 "register_operand")] 4913 UNSPEC_VSX_FIRST_MISMATCH_INDEX)] 4914 "TARGET_P9_VECTOR" 4915{ 4916 int sh; 4917 rtx cmp_result = gen_reg_rtx (<MODE>mode); 4918 4919 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1], 4920 operands[2])); 4921 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2; 4922 4923 if (<MODE>mode == V16QImode) 4924 { 4925 if (!BYTES_BIG_ENDIAN) 4926 emit_insn (gen_vctzlsbb_<mode> (operands[0], cmp_result)); 4927 else 4928 emit_insn (gen_vclzlsbb_<mode> (operands[0], cmp_result)); 4929 } 4930 else 4931 { 4932 rtx tmp = gen_reg_rtx (SImode); 4933 if (!BYTES_BIG_ENDIAN) 4934 emit_insn (gen_vctzlsbb_<mode> (tmp, cmp_result)); 4935 else 4936 emit_insn (gen_vclzlsbb_<mode> (tmp, cmp_result)); 4937 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh))); 4938 } 4939 DONE; 4940}) 4941 4942;; Return first position of mismatch between vectors or end of string (EOS) 4943;; using natural element order for both LE and BE execution modes. 4944(define_expand "first_mismatch_or_eos_index_<mode>" 4945 [(match_operand:SI 0 "register_operand") 4946 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand") 4947 (match_operand:VSX_EXTRACT_I 2 "register_operand")] 4948 UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX)] 4949 "TARGET_P9_VECTOR" 4950{ 4951 int sh; 4952 rtx cmpz1_result = gen_reg_rtx (<MODE>mode); 4953 rtx cmpz2_result = gen_reg_rtx (<MODE>mode); 4954 rtx cmpz_result = gen_reg_rtx (<MODE>mode); 4955 rtx not_cmpz_result = gen_reg_rtx (<MODE>mode); 4956 rtx and_result = gen_reg_rtx (<MODE>mode); 4957 rtx result = gen_reg_rtx (<MODE>mode); 4958 rtx vzero = gen_reg_rtx (<MODE>mode); 4959 4960 /* Vector with zeros in elements that correspond to zeros in operands. */ 4961 emit_move_insn (vzero, CONST0_RTX (<MODE>mode)); 4962 4963 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero)); 4964 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero)); 4965 emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result)); 4966 4967 /* Vector with ones in elments that match. */ 4968 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1], 4969 operands[2])); 4970 emit_insn (gen_one_cmpl<mode>2 (not_cmpz_result, cmpz_result)); 4971 4972 /* Create vector with ones in elements where there was a zero in one of 4973 the source elements or the elements did not match. */ 4974 emit_insn (gen_nand<mode>3 (result, and_result, not_cmpz_result)); 4975 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2; 4976 4977 if (<MODE>mode == V16QImode) 4978 { 4979 if (!BYTES_BIG_ENDIAN) 4980 emit_insn (gen_vctzlsbb_<mode> (operands[0], result)); 4981 else 4982 emit_insn (gen_vclzlsbb_<mode> (operands[0], result)); 4983 } 4984 else 4985 { 4986 rtx tmp = gen_reg_rtx (SImode); 4987 if (!BYTES_BIG_ENDIAN) 4988 emit_insn (gen_vctzlsbb_<mode> (tmp, result)); 4989 else 4990 emit_insn (gen_vclzlsbb_<mode> (tmp, result)); 4991 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh))); 4992 } 4993 DONE; 4994}) 4995 4996;; Load VSX Vector with Length 4997(define_expand "lxvl" 4998 [(set (match_dup 3) 4999 (ashift:DI (match_operand:DI 2 "register_operand") 5000 (const_int 56))) 5001 (set (match_operand:V16QI 0 "vsx_register_operand") 5002 (unspec:V16QI 5003 [(match_operand:DI 1 "gpc_reg_operand") 5004 (mem:V16QI (match_dup 1)) 5005 (match_dup 3)] 5006 UNSPEC_LXVL))] 5007 "TARGET_P9_VECTOR && TARGET_64BIT" 5008{ 5009 operands[3] = gen_reg_rtx (DImode); 5010}) 5011 5012(define_insn "*lxvl" 5013 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") 5014 (unspec:V16QI 5015 [(match_operand:DI 1 "gpc_reg_operand" "b") 5016 (mem:V16QI (match_dup 1)) 5017 (match_operand:DI 2 "register_operand" "r")] 5018 UNSPEC_LXVL))] 5019 "TARGET_P9_VECTOR && TARGET_64BIT" 5020 "lxvl %x0,%1,%2" 5021 [(set_attr "type" "vecload")]) 5022 5023(define_insn "lxvll" 5024 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") 5025 (unspec:V16QI [(match_operand:DI 1 "gpc_reg_operand" "b") 5026 (mem:V16QI (match_dup 1)) 5027 (match_operand:DI 2 "register_operand" "r")] 5028 UNSPEC_LXVLL))] 5029 "TARGET_P9_VECTOR" 5030 "lxvll %x0,%1,%2" 5031 [(set_attr "type" "vecload")]) 5032 5033;; Expand for builtin xl_len_r 5034(define_expand "xl_len_r" 5035 [(match_operand:V16QI 0 "vsx_register_operand") 5036 (match_operand:DI 1 "register_operand") 5037 (match_operand:DI 2 "register_operand")] 5038 "" 5039{ 5040 rtx shift_mask = gen_reg_rtx (V16QImode); 5041 rtx rtx_vtmp = gen_reg_rtx (V16QImode); 5042 rtx tmp = gen_reg_rtx (DImode); 5043 5044 emit_insn (gen_altivec_lvsl_reg (shift_mask, operands[2])); 5045 emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56))); 5046 emit_insn (gen_lxvll (rtx_vtmp, operands[1], tmp)); 5047 emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], rtx_vtmp, rtx_vtmp, 5048 shift_mask)); 5049 DONE; 5050}) 5051 5052(define_insn "stxvll" 5053 [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b")) 5054 (unspec:V16QI [(match_operand:V16QI 0 "vsx_register_operand" "wa") 5055 (mem:V16QI (match_dup 1)) 5056 (match_operand:DI 2 "register_operand" "r")] 5057 UNSPEC_STXVLL))] 5058 "TARGET_P9_VECTOR" 5059 "stxvll %x0,%1,%2" 5060 [(set_attr "type" "vecstore")]) 5061 5062;; Store VSX Vector with Length 5063(define_expand "stxvl" 5064 [(set (match_dup 3) 5065 (ashift:DI (match_operand:DI 2 "register_operand") 5066 (const_int 56))) 5067 (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand")) 5068 (unspec:V16QI 5069 [(match_operand:V16QI 0 "vsx_register_operand") 5070 (mem:V16QI (match_dup 1)) 5071 (match_dup 3)] 5072 UNSPEC_STXVL))] 5073 "TARGET_P9_VECTOR && TARGET_64BIT" 5074{ 5075 operands[3] = gen_reg_rtx (DImode); 5076}) 5077 5078(define_insn "*stxvl" 5079 [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b")) 5080 (unspec:V16QI 5081 [(match_operand:V16QI 0 "vsx_register_operand" "wa") 5082 (mem:V16QI (match_dup 1)) 5083 (match_operand:DI 2 "register_operand" "r")] 5084 UNSPEC_STXVL))] 5085 "TARGET_P9_VECTOR && TARGET_64BIT" 5086 "stxvl %x0,%1,%2" 5087 [(set_attr "type" "vecstore")]) 5088 5089;; Expand for builtin xst_len_r 5090(define_expand "xst_len_r" 5091 [(match_operand:V16QI 0 "vsx_register_operand" "=wa") 5092 (match_operand:DI 1 "register_operand" "b") 5093 (match_operand:DI 2 "register_operand" "r")] 5094 "UNSPEC_XST_LEN_R" 5095{ 5096 rtx shift_mask = gen_reg_rtx (V16QImode); 5097 rtx rtx_vtmp = gen_reg_rtx (V16QImode); 5098 rtx tmp = gen_reg_rtx (DImode); 5099 5100 emit_insn (gen_altivec_lvsr_reg (shift_mask, operands[2])); 5101 emit_insn (gen_altivec_vperm_v8hiv16qi (rtx_vtmp, operands[0], operands[0], 5102 shift_mask)); 5103 emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56))); 5104 emit_insn (gen_stxvll (rtx_vtmp, operands[1], tmp)); 5105 DONE; 5106}) 5107 5108;; Vector Compare Not Equal Byte (specified/not+eq:) 5109(define_insn "vcmpneb" 5110 [(set (match_operand:V16QI 0 "altivec_register_operand" "=v") 5111 (not:V16QI 5112 (eq:V16QI (match_operand:V16QI 1 "altivec_register_operand" "v") 5113 (match_operand:V16QI 2 "altivec_register_operand" "v"))))] 5114 "TARGET_P9_VECTOR" 5115 "vcmpneb %0,%1,%2" 5116 [(set_attr "type" "vecsimple")]) 5117 5118;; Vector Compare Not Equal or Zero Byte 5119(define_insn "vcmpnezb" 5120 [(set (match_operand:V16QI 0 "altivec_register_operand" "=v") 5121 (unspec:V16QI 5122 [(match_operand:V16QI 1 "altivec_register_operand" "v") 5123 (match_operand:V16QI 2 "altivec_register_operand" "v")] 5124 UNSPEC_VCMPNEZB))] 5125 "TARGET_P9_VECTOR" 5126 "vcmpnezb %0,%1,%2" 5127 [(set_attr "type" "vecsimple")]) 5128 5129;; Vector Compare Not Equal or Zero Byte predicate or record-form 5130(define_insn "vcmpnezb_p" 5131 [(set (reg:CC CR6_REGNO) 5132 (unspec:CC 5133 [(match_operand:V16QI 1 "altivec_register_operand" "v") 5134 (match_operand:V16QI 2 "altivec_register_operand" "v")] 5135 UNSPEC_VCMPNEZB)) 5136 (set (match_operand:V16QI 0 "altivec_register_operand" "=v") 5137 (unspec:V16QI 5138 [(match_dup 1) 5139 (match_dup 2)] 5140 UNSPEC_VCMPNEZB))] 5141 "TARGET_P9_VECTOR" 5142 "vcmpnezb. %0,%1,%2" 5143 [(set_attr "type" "vecsimple")]) 5144 5145;; Vector Compare Not Equal Half Word (specified/not+eq:) 5146(define_insn "vcmpneh" 5147 [(set (match_operand:V8HI 0 "altivec_register_operand" "=v") 5148 (not:V8HI 5149 (eq:V8HI (match_operand:V8HI 1 "altivec_register_operand" "v") 5150 (match_operand:V8HI 2 "altivec_register_operand" "v"))))] 5151 "TARGET_P9_VECTOR" 5152 "vcmpneh %0,%1,%2" 5153 [(set_attr "type" "vecsimple")]) 5154 5155;; Vector Compare Not Equal or Zero Half Word 5156(define_insn "vcmpnezh" 5157 [(set (match_operand:V8HI 0 "altivec_register_operand" "=v") 5158 (unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v") 5159 (match_operand:V8HI 2 "altivec_register_operand" "v")] 5160 UNSPEC_VCMPNEZH))] 5161 "TARGET_P9_VECTOR" 5162 "vcmpnezh %0,%1,%2" 5163 [(set_attr "type" "vecsimple")]) 5164 5165;; Vector Compare Not Equal Word (specified/not+eq:) 5166(define_insn "vcmpnew" 5167 [(set (match_operand:V4SI 0 "altivec_register_operand" "=v") 5168 (not:V4SI 5169 (eq:V4SI (match_operand:V4SI 1 "altivec_register_operand" "v") 5170 (match_operand:V4SI 2 "altivec_register_operand" "v"))))] 5171 "TARGET_P9_VECTOR" 5172 "vcmpnew %0,%1,%2" 5173 [(set_attr "type" "vecsimple")]) 5174 5175;; Vector Compare Not Equal or Zero Word 5176(define_insn "vcmpnezw" 5177 [(set (match_operand:V4SI 0 "altivec_register_operand" "=v") 5178 (unspec:V4SI [(match_operand:V4SI 1 "altivec_register_operand" "v") 5179 (match_operand:V4SI 2 "altivec_register_operand" "v")] 5180 UNSPEC_VCMPNEZW))] 5181 "TARGET_P9_VECTOR" 5182 "vcmpnezw %0,%1,%2" 5183 [(set_attr "type" "vecsimple")]) 5184 5185;; Vector Count Leading Zero Least-Significant Bits Byte 5186(define_insn "vclzlsbb_<mode>" 5187 [(set (match_operand:SI 0 "register_operand" "=r") 5188 (unspec:SI 5189 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")] 5190 UNSPEC_VCLZLSBB))] 5191 "TARGET_P9_VECTOR" 5192 "vclzlsbb %0,%1" 5193 [(set_attr "type" "vecsimple")]) 5194 5195;; Vector Count Trailing Zero Least-Significant Bits Byte 5196(define_insn "vctzlsbb_<mode>" 5197 [(set (match_operand:SI 0 "register_operand" "=r") 5198 (unspec:SI 5199 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")] 5200 UNSPEC_VCTZLSBB))] 5201 "TARGET_P9_VECTOR" 5202 "vctzlsbb %0,%1" 5203 [(set_attr "type" "vecsimple")]) 5204 5205;; Vector Extract Unsigned Byte Left-Indexed 5206(define_insn "vextublx" 5207 [(set (match_operand:SI 0 "register_operand" "=r") 5208 (unspec:SI 5209 [(match_operand:SI 1 "register_operand" "r") 5210 (match_operand:V16QI 2 "altivec_register_operand" "v")] 5211 UNSPEC_VEXTUBLX))] 5212 "TARGET_P9_VECTOR" 5213 "vextublx %0,%1,%2" 5214 [(set_attr "type" "vecsimple")]) 5215 5216;; Vector Extract Unsigned Byte Right-Indexed 5217(define_insn "vextubrx" 5218 [(set (match_operand:SI 0 "register_operand" "=r") 5219 (unspec:SI 5220 [(match_operand:SI 1 "register_operand" "r") 5221 (match_operand:V16QI 2 "altivec_register_operand" "v")] 5222 UNSPEC_VEXTUBRX))] 5223 "TARGET_P9_VECTOR" 5224 "vextubrx %0,%1,%2" 5225 [(set_attr "type" "vecsimple")]) 5226 5227;; Vector Extract Unsigned Half Word Left-Indexed 5228(define_insn "vextuhlx" 5229 [(set (match_operand:SI 0 "register_operand" "=r") 5230 (unspec:SI 5231 [(match_operand:SI 1 "register_operand" "r") 5232 (match_operand:V8HI 2 "altivec_register_operand" "v")] 5233 UNSPEC_VEXTUHLX))] 5234 "TARGET_P9_VECTOR" 5235 "vextuhlx %0,%1,%2" 5236 [(set_attr "type" "vecsimple")]) 5237 5238;; Vector Extract Unsigned Half Word Right-Indexed 5239(define_insn "vextuhrx" 5240 [(set (match_operand:SI 0 "register_operand" "=r") 5241 (unspec:SI 5242 [(match_operand:SI 1 "register_operand" "r") 5243 (match_operand:V8HI 2 "altivec_register_operand" "v")] 5244 UNSPEC_VEXTUHRX))] 5245 "TARGET_P9_VECTOR" 5246 "vextuhrx %0,%1,%2" 5247 [(set_attr "type" "vecsimple")]) 5248 5249;; Vector Extract Unsigned Word Left-Indexed 5250(define_insn "vextuwlx" 5251 [(set (match_operand:SI 0 "register_operand" "=r") 5252 (unspec:SI 5253 [(match_operand:SI 1 "register_operand" "r") 5254 (match_operand:V4SI 2 "altivec_register_operand" "v")] 5255 UNSPEC_VEXTUWLX))] 5256 "TARGET_P9_VECTOR" 5257 "vextuwlx %0,%1,%2" 5258 [(set_attr "type" "vecsimple")]) 5259 5260;; Vector Extract Unsigned Word Right-Indexed 5261(define_insn "vextuwrx" 5262 [(set (match_operand:SI 0 "register_operand" "=r") 5263 (unspec:SI 5264 [(match_operand:SI 1 "register_operand" "r") 5265 (match_operand:V4SI 2 "altivec_register_operand" "v")] 5266 UNSPEC_VEXTUWRX))] 5267 "TARGET_P9_VECTOR" 5268 "vextuwrx %0,%1,%2" 5269 [(set_attr "type" "vecsimple")]) 5270 5271;; Vector insert/extract word at arbitrary byte values. Note, the little 5272;; endian version needs to adjust the byte number, and the V4SI element in 5273;; vinsert4b. 5274(define_insn "extract4b" 5275 [(set (match_operand:V2DI 0 "vsx_register_operand") 5276 (unspec:V2DI [(match_operand:V16QI 1 "vsx_register_operand" "wa") 5277 (match_operand:QI 2 "const_0_to_12_operand" "n")] 5278 UNSPEC_XXEXTRACTUW))] 5279 "TARGET_P9_VECTOR" 5280{ 5281 if (!BYTES_BIG_ENDIAN) 5282 operands[2] = GEN_INT (12 - INTVAL (operands[2])); 5283 5284 return "xxextractuw %x0,%x1,%2"; 5285}) 5286 5287(define_expand "insert4b" 5288 [(set (match_operand:V16QI 0 "vsx_register_operand") 5289 (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand") 5290 (match_operand:V16QI 2 "vsx_register_operand") 5291 (match_operand:QI 3 "const_0_to_12_operand")] 5292 UNSPEC_XXINSERTW))] 5293 "TARGET_P9_VECTOR" 5294{ 5295 if (!BYTES_BIG_ENDIAN) 5296 { 5297 rtx op1 = operands[1]; 5298 rtx v4si_tmp = gen_reg_rtx (V4SImode); 5299 emit_insn (gen_vsx_xxpermdi_v4si_be (v4si_tmp, op1, op1, const1_rtx)); 5300 operands[1] = v4si_tmp; 5301 operands[3] = GEN_INT (12 - INTVAL (operands[3])); 5302 } 5303}) 5304 5305(define_insn "*insert4b_internal" 5306 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") 5307 (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa") 5308 (match_operand:V16QI 2 "vsx_register_operand" "0") 5309 (match_operand:QI 3 "const_0_to_12_operand" "n")] 5310 UNSPEC_XXINSERTW))] 5311 "TARGET_P9_VECTOR" 5312 "xxinsertw %x0,%x1,%3" 5313 [(set_attr "type" "vecperm")]) 5314 5315 5316;; Generate vector extract four float 32 values from left four elements 5317;; of eight element vector of float 16 values. 5318(define_expand "vextract_fp_from_shorth" 5319 [(set (match_operand:V4SF 0 "register_operand" "=wa") 5320 (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")] 5321 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH))] 5322 "TARGET_P9_VECTOR" 5323{ 5324 int i; 5325 int vals_le[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0}; 5326 int vals_be[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0}; 5327 5328 rtx rvals[16]; 5329 rtx mask = gen_reg_rtx (V16QImode); 5330 rtx tmp = gen_reg_rtx (V16QImode); 5331 rtvec v; 5332 5333 for (i = 0; i < 16; i++) 5334 if (!BYTES_BIG_ENDIAN) 5335 rvals[i] = GEN_INT (vals_le[i]); 5336 else 5337 rvals[i] = GEN_INT (vals_be[i]); 5338 5339 /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16 5340 inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move 5341 src half words 0,1,2,3 (LE), src half words 4,5,6,7 (BE) for the 5342 conversion instruction. */ 5343 v = gen_rtvec_v (16, rvals); 5344 emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v))); 5345 emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1], 5346 operands[1], mask)); 5347 emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp)); 5348 DONE; 5349}) 5350 5351;; Generate vector extract four float 32 values from right four elements 5352;; of eight element vector of float 16 values. 5353(define_expand "vextract_fp_from_shortl" 5354 [(set (match_operand:V4SF 0 "register_operand" "=wa") 5355 (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")] 5356 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL))] 5357 "TARGET_P9_VECTOR" 5358{ 5359 int vals_le[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0}; 5360 int vals_be[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0}; 5361 5362 int i; 5363 rtx rvals[16]; 5364 rtx mask = gen_reg_rtx (V16QImode); 5365 rtx tmp = gen_reg_rtx (V16QImode); 5366 rtvec v; 5367 5368 for (i = 0; i < 16; i++) 5369 if (!BYTES_BIG_ENDIAN) 5370 rvals[i] = GEN_INT (vals_le[i]); 5371 else 5372 rvals[i] = GEN_INT (vals_be[i]); 5373 5374 /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16 5375 inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move 5376 src half words 4,5,6,7 (LE), src half words 0,1,2,3 (BE) for the 5377 conversion instruction. */ 5378 v = gen_rtvec_v (16, rvals); 5379 emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v))); 5380 emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1], 5381 operands[1], mask)); 5382 emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp)); 5383 DONE; 5384}) 5385 5386;; Support for ISA 3.0 vector byte reverse 5387 5388;; Swap all bytes with in a vector 5389(define_insn "p9_xxbrq_v1ti" 5390 [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa") 5391 (bswap:V1TI (match_operand:V1TI 1 "vsx_register_operand" "wa")))] 5392 "TARGET_P9_VECTOR" 5393 "xxbrq %x0,%x1" 5394 [(set_attr "type" "vecperm")]) 5395 5396(define_expand "p9_xxbrq_v16qi" 5397 [(use (match_operand:V16QI 0 "vsx_register_operand" "=wa")) 5398 (use (match_operand:V16QI 1 "vsx_register_operand" "wa"))] 5399 "TARGET_P9_VECTOR" 5400{ 5401 rtx op0 = gen_reg_rtx (V1TImode); 5402 rtx op1 = gen_lowpart (V1TImode, operands[1]); 5403 emit_insn (gen_p9_xxbrq_v1ti (op0, op1)); 5404 emit_move_insn (operands[0], gen_lowpart (V16QImode, op0)); 5405 DONE; 5406}) 5407 5408;; Swap all bytes in each 64-bit element 5409(define_insn "p9_xxbrd_v2di" 5410 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") 5411 (bswap:V2DI (match_operand:V2DI 1 "vsx_register_operand" "wa")))] 5412 "TARGET_P9_VECTOR" 5413 "xxbrd %x0,%x1" 5414 [(set_attr "type" "vecperm")]) 5415 5416(define_expand "p9_xxbrd_v2df" 5417 [(use (match_operand:V2DF 0 "vsx_register_operand" "=wa")) 5418 (use (match_operand:V2DF 1 "vsx_register_operand" "wa"))] 5419 "TARGET_P9_VECTOR" 5420{ 5421 rtx op0 = gen_reg_rtx (V2DImode); 5422 rtx op1 = gen_lowpart (V2DImode, operands[1]); 5423 emit_insn (gen_p9_xxbrd_v2di (op0, op1)); 5424 emit_move_insn (operands[0], gen_lowpart (V2DFmode, op0)); 5425 DONE; 5426}) 5427 5428;; Swap all bytes in each 32-bit element 5429(define_insn "p9_xxbrw_v4si" 5430 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa") 5431 (bswap:V4SI (match_operand:V4SI 1 "vsx_register_operand" "wa")))] 5432 "TARGET_P9_VECTOR" 5433 "xxbrw %x0,%x1" 5434 [(set_attr "type" "vecperm")]) 5435 5436(define_expand "p9_xxbrw_v4sf" 5437 [(use (match_operand:V4SF 0 "vsx_register_operand" "=wa")) 5438 (use (match_operand:V4SF 1 "vsx_register_operand" "wa"))] 5439 "TARGET_P9_VECTOR" 5440{ 5441 rtx op0 = gen_reg_rtx (V4SImode); 5442 rtx op1 = gen_lowpart (V4SImode, operands[1]); 5443 emit_insn (gen_p9_xxbrw_v4si (op0, op1)); 5444 emit_move_insn (operands[0], gen_lowpart (V4SFmode, op0)); 5445 DONE; 5446}) 5447 5448;; Swap all bytes in each element of vector 5449(define_expand "revb_<mode>" 5450 [(use (match_operand:VEC_REVB 0 "vsx_register_operand")) 5451 (use (match_operand:VEC_REVB 1 "vsx_register_operand"))] 5452 "" 5453{ 5454 if (TARGET_P9_VECTOR) 5455 emit_insn (gen_p9_xxbr<VSX_XXBR>_<mode> (operands[0], operands[1])); 5456 else 5457 { 5458 /* Want to have the elements in reverse order relative 5459 to the endian mode in use, i.e. in LE mode, put elements 5460 in BE order. */ 5461 rtx sel = swap_endian_selector_for_mode(<MODE>mode); 5462 emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1], 5463 operands[1], sel)); 5464 } 5465 5466 DONE; 5467}) 5468 5469;; Reversing bytes in vector char is just a NOP. 5470(define_expand "revb_v16qi" 5471 [(set (match_operand:V16QI 0 "vsx_register_operand") 5472 (bswap:V16QI (match_operand:V16QI 1 "vsx_register_operand")))] 5473 "" 5474{ 5475 emit_move_insn (operands[0], operands[1]); 5476 DONE; 5477}) 5478 5479;; Swap all bytes in each 16-bit element 5480(define_insn "p9_xxbrh_v8hi" 5481 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") 5482 (bswap:V8HI (match_operand:V8HI 1 "vsx_register_operand" "wa")))] 5483 "TARGET_P9_VECTOR" 5484 "xxbrh %x0,%x1" 5485 [(set_attr "type" "vecperm")]) 5486 5487 5488;; Operand numbers for the following peephole2 5489(define_constants 5490 [(SFBOOL_TMP_GPR 0) ;; GPR temporary 5491 (SFBOOL_TMP_VSX 1) ;; vector temporary 5492 (SFBOOL_MFVSR_D 2) ;; move to gpr dest 5493 (SFBOOL_MFVSR_A 3) ;; move to gpr src 5494 (SFBOOL_BOOL_D 4) ;; and/ior/xor dest 5495 (SFBOOL_BOOL_A1 5) ;; and/ior/xor arg1 5496 (SFBOOL_BOOL_A2 6) ;; and/ior/xor arg1 5497 (SFBOOL_SHL_D 7) ;; shift left dest 5498 (SFBOOL_SHL_A 8) ;; shift left arg 5499 (SFBOOL_MTVSR_D 9) ;; move to vecter dest 5500 (SFBOOL_MFVSR_A_V4SF 10) ;; SFBOOL_MFVSR_A as V4SFmode 5501 (SFBOOL_BOOL_A_DI 11) ;; SFBOOL_BOOL_A1/A2 as DImode 5502 (SFBOOL_TMP_VSX_DI 12) ;; SFBOOL_TMP_VSX as DImode 5503 (SFBOOL_MTVSR_D_V4SF 13)]) ;; SFBOOL_MTVSRD_D as V4SFmode 5504 5505;; Attempt to optimize some common GLIBC operations using logical operations to 5506;; pick apart SFmode operations. For example, there is code from e_powf.c 5507;; after macro expansion that looks like: 5508;; 5509;; typedef union { 5510;; float value; 5511;; uint32_t word; 5512;; } ieee_float_shape_type; 5513;; 5514;; float t1; 5515;; int32_t is; 5516;; 5517;; do { 5518;; ieee_float_shape_type gf_u; 5519;; gf_u.value = (t1); 5520;; (is) = gf_u.word; 5521;; } while (0); 5522;; 5523;; do { 5524;; ieee_float_shape_type sf_u; 5525;; sf_u.word = (is & 0xfffff000); 5526;; (t1) = sf_u.value; 5527;; } while (0); 5528;; 5529;; 5530;; This would result in two direct move operations (convert to memory format, 5531;; direct move to GPR, do the AND operation, direct move to VSX, convert to 5532;; scalar format). With this peephole, we eliminate the direct move to the 5533;; GPR, and instead move the integer mask value to the vector register after a 5534;; shift and do the VSX logical operation. 5535 5536;; The insns for dealing with SFmode in GPR registers looks like: 5537;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN)) 5538;; 5539;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX)) 5540;; 5541;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3))) 5542;; 5543;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32))) 5544;; 5545;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD)) 5546;; 5547;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN)) 5548 5549(define_peephole2 5550 [(match_scratch:DI SFBOOL_TMP_GPR "r") 5551 (match_scratch:V4SF SFBOOL_TMP_VSX "wa") 5552 5553 ;; MFVSRWZ (aka zero_extend) 5554 (set (match_operand:DI SFBOOL_MFVSR_D "int_reg_operand") 5555 (zero_extend:DI 5556 (match_operand:SI SFBOOL_MFVSR_A "vsx_register_operand"))) 5557 5558 ;; AND/IOR/XOR operation on int 5559 (set (match_operand:SI SFBOOL_BOOL_D "int_reg_operand") 5560 (and_ior_xor:SI (match_operand:SI SFBOOL_BOOL_A1 "int_reg_operand") 5561 (match_operand:SI SFBOOL_BOOL_A2 "reg_or_cint_operand"))) 5562 5563 ;; SLDI 5564 (set (match_operand:DI SFBOOL_SHL_D "int_reg_operand") 5565 (ashift:DI (match_operand:DI SFBOOL_SHL_A "int_reg_operand") 5566 (const_int 32))) 5567 5568 ;; MTVSRD 5569 (set (match_operand:SF SFBOOL_MTVSR_D "vsx_register_operand") 5570 (unspec:SF [(match_dup SFBOOL_SHL_D)] UNSPEC_P8V_MTVSRD))] 5571 5572 "TARGET_POWERPC64 && TARGET_DIRECT_MOVE 5573 /* The REG_P (xxx) tests prevents SUBREG's, which allows us to use REGNO 5574 to compare registers, when the mode is different. */ 5575 && REG_P (operands[SFBOOL_MFVSR_D]) && REG_P (operands[SFBOOL_BOOL_D]) 5576 && REG_P (operands[SFBOOL_BOOL_A1]) && REG_P (operands[SFBOOL_SHL_D]) 5577 && REG_P (operands[SFBOOL_SHL_A]) && REG_P (operands[SFBOOL_MTVSR_D]) 5578 && (REG_P (operands[SFBOOL_BOOL_A2]) 5579 || CONST_INT_P (operands[SFBOOL_BOOL_A2])) 5580 && (REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D]) 5581 || peep2_reg_dead_p (2, operands[SFBOOL_MFVSR_D])) 5582 && (REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1]) 5583 || (REG_P (operands[SFBOOL_BOOL_A2]) 5584 && REGNO (operands[SFBOOL_MFVSR_D]) 5585 == REGNO (operands[SFBOOL_BOOL_A2]))) 5586 && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A]) 5587 && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D]) 5588 || peep2_reg_dead_p (3, operands[SFBOOL_BOOL_D])) 5589 && peep2_reg_dead_p (4, operands[SFBOOL_SHL_D])" 5590 [(set (match_dup SFBOOL_TMP_GPR) 5591 (ashift:DI (match_dup SFBOOL_BOOL_A_DI) 5592 (const_int 32))) 5593 5594 (set (match_dup SFBOOL_TMP_VSX_DI) 5595 (match_dup SFBOOL_TMP_GPR)) 5596 5597 (set (match_dup SFBOOL_MTVSR_D_V4SF) 5598 (and_ior_xor:V4SF (match_dup SFBOOL_MFVSR_A_V4SF) 5599 (match_dup SFBOOL_TMP_VSX)))] 5600{ 5601 rtx bool_a1 = operands[SFBOOL_BOOL_A1]; 5602 rtx bool_a2 = operands[SFBOOL_BOOL_A2]; 5603 int regno_mfvsr_d = REGNO (operands[SFBOOL_MFVSR_D]); 5604 int regno_mfvsr_a = REGNO (operands[SFBOOL_MFVSR_A]); 5605 int regno_tmp_vsx = REGNO (operands[SFBOOL_TMP_VSX]); 5606 int regno_mtvsr_d = REGNO (operands[SFBOOL_MTVSR_D]); 5607 5608 if (CONST_INT_P (bool_a2)) 5609 { 5610 rtx tmp_gpr = operands[SFBOOL_TMP_GPR]; 5611 emit_move_insn (tmp_gpr, bool_a2); 5612 operands[SFBOOL_BOOL_A_DI] = tmp_gpr; 5613 } 5614 else 5615 { 5616 int regno_bool_a1 = REGNO (bool_a1); 5617 int regno_bool_a2 = REGNO (bool_a2); 5618 int regno_bool_a = (regno_mfvsr_d == regno_bool_a1 5619 ? regno_bool_a2 : regno_bool_a1); 5620 operands[SFBOOL_BOOL_A_DI] = gen_rtx_REG (DImode, regno_bool_a); 5621 } 5622 5623 operands[SFBOOL_MFVSR_A_V4SF] = gen_rtx_REG (V4SFmode, regno_mfvsr_a); 5624 operands[SFBOOL_TMP_VSX_DI] = gen_rtx_REG (DImode, regno_tmp_vsx); 5625 operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d); 5626}) 5627 5628;; Support signed/unsigned long long to float conversion vectorization. 5629;; Note that any_float (pc) here is just for code attribute <su>. 5630(define_expand "vec_pack<su>_float_v2di" 5631 [(match_operand:V4SF 0 "vfloat_operand") 5632 (match_operand:V2DI 1 "vint_operand") 5633 (match_operand:V2DI 2 "vint_operand") 5634 (any_float (pc))] 5635 "TARGET_VSX" 5636{ 5637 rtx r1 = gen_reg_rtx (V4SFmode); 5638 rtx r2 = gen_reg_rtx (V4SFmode); 5639 emit_insn (gen_vsx_xvcv<su>xdsp (r1, operands[1])); 5640 emit_insn (gen_vsx_xvcv<su>xdsp (r2, operands[2])); 5641 rs6000_expand_extract_even (operands[0], r1, r2); 5642 DONE; 5643}) 5644 5645;; Support float to signed/unsigned long long conversion vectorization. 5646;; Note that any_fix (pc) here is just for code attribute <su>. 5647(define_expand "vec_unpack_<su>fix_trunc_hi_v4sf" 5648 [(match_operand:V2DI 0 "vint_operand") 5649 (match_operand:V4SF 1 "vfloat_operand") 5650 (any_fix (pc))] 5651 "TARGET_VSX" 5652{ 5653 rtx reg = gen_reg_rtx (V4SFmode); 5654 rs6000_expand_interleave (reg, operands[1], operands[1], BYTES_BIG_ENDIAN); 5655 emit_insn (gen_vsx_xvcvsp<su>xds (operands[0], reg)); 5656 DONE; 5657}) 5658 5659;; Note that any_fix (pc) here is just for code attribute <su>. 5660(define_expand "vec_unpack_<su>fix_trunc_lo_v4sf" 5661 [(match_operand:V2DI 0 "vint_operand") 5662 (match_operand:V4SF 1 "vfloat_operand") 5663 (any_fix (pc))] 5664 "TARGET_VSX" 5665{ 5666 rtx reg = gen_reg_rtx (V4SFmode); 5667 rs6000_expand_interleave (reg, operands[1], operands[1], !BYTES_BIG_ENDIAN); 5668 emit_insn (gen_vsx_xvcvsp<su>xds (operands[0], reg)); 5669 DONE; 5670}) 5671 5672(define_insn "vsx_<xvcvbf16>" 5673 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") 5674 (unspec:V16QI [(match_operand:V16QI 1 "vsx_register_operand" "wa")] 5675 XVCVBF16))] 5676 "TARGET_POWER10" 5677 "<xvcvbf16> %x0,%x1" 5678 [(set_attr "type" "vecfloat")]) 5679