1;; VSX patterns. 2;; Copyright (C) 2009-2018 Free Software Foundation, Inc. 3;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com> 4 5;; This file is part of GCC. 6 7;; GCC is free software; you can redistribute it and/or modify it 8;; under the terms of the GNU General Public License as published 9;; by the Free Software Foundation; either version 3, or (at your 10;; option) any later version. 11 12;; GCC is distributed in the hope that it will be useful, but WITHOUT 13;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15;; License for more details. 16 17;; You should have received a copy of the GNU General Public License 18;; along with GCC; see the file COPYING3. If not see 19;; <http://www.gnu.org/licenses/>. 20 21;; Iterator for comparison types 22(define_code_iterator CMP_TEST [eq lt gt unordered]) 23 24;; Mode attribute for vector floate and floato conversions 25(define_mode_attr VF_sxddp [(V2DI "sxd") (V2DF "dp")]) 26 27;; Iterator for both scalar and vector floating point types supported by VSX 28(define_mode_iterator VSX_B [DF V4SF V2DF]) 29 30;; Iterator for the 2 64-bit vector types 31(define_mode_iterator VSX_D [V2DF V2DI]) 32 33;; Mode iterator to handle swapping words on little endian for the 128-bit 34;; types that goes in a single vector register. 35(define_mode_iterator VSX_LE_128 [(KF "FLOAT128_VECTOR_P (KFmode)") 36 (TF "FLOAT128_VECTOR_P (TFmode)") 37 TI 38 V1TI]) 39 40;; Iterator for 128-bit integer types that go in a single vector register. 41(define_mode_iterator VSX_TI [TI V1TI]) 42 43;; Iterator for the 2 32-bit vector types 44(define_mode_iterator VSX_W [V4SF V4SI]) 45 46;; Iterator for the DF types 47(define_mode_iterator VSX_DF [V2DF DF]) 48 49;; Iterator for vector floating point types supported by VSX 50(define_mode_iterator VSX_F [V4SF V2DF]) 51 52;; Iterator for logical types supported by VSX 53(define_mode_iterator VSX_L [V16QI 54 V8HI 55 V4SI 56 V2DI 57 V4SF 58 V2DF 59 V1TI 60 TI 61 (KF "FLOAT128_VECTOR_P (KFmode)") 62 (TF "FLOAT128_VECTOR_P (TFmode)")]) 63 64;; Iterator for memory moves. 65(define_mode_iterator VSX_M [V16QI 66 V8HI 67 V4SI 68 V2DI 69 V4SF 70 V2DF 71 V1TI 72 (KF "FLOAT128_VECTOR_P (KFmode)") 73 (TF "FLOAT128_VECTOR_P (TFmode)") 74 TI]) 75 76(define_mode_attr VSX_XXBR [(V8HI "h") 77 (V4SI "w") 78 (V4SF "w") 79 (V2DF "d") 80 (V2DI "d") 81 (V1TI "q")]) 82 83;; Map into the appropriate load/store name based on the type 84(define_mode_attr VSm [(V16QI "vw4") 85 (V8HI "vw4") 86 (V4SI "vw4") 87 (V4SF "vw4") 88 (V2DF "vd2") 89 (V2DI "vd2") 90 (DF "d") 91 (TF "vd2") 92 (KF "vd2") 93 (V1TI "vd2") 94 (TI "vd2")]) 95 96;; Map into the appropriate suffix based on the type 97(define_mode_attr VSs [(V16QI "sp") 98 (V8HI "sp") 99 (V4SI "sp") 100 (V4SF "sp") 101 (V2DF "dp") 102 (V2DI "dp") 103 (DF "dp") 104 (SF "sp") 105 (TF "dp") 106 (KF "dp") 107 (V1TI "dp") 108 (TI "dp")]) 109 110;; Map the register class used 111(define_mode_attr VSr [(V16QI "v") 112 (V8HI "v") 113 (V4SI "v") 114 (V4SF "wf") 115 (V2DI "wd") 116 (V2DF "wd") 117 (DI "wi") 118 (DF "ws") 119 (SF "ww") 120 (TF "wp") 121 (KF "wq") 122 (V1TI "v") 123 (TI "wt")]) 124 125;; Map the register class used for float<->int conversions (floating point side) 126;; VSr2 is the preferred register class, VSr3 is any register class that will 127;; hold the data 128(define_mode_attr VSr2 [(V2DF "wd") 129 (V4SF "wf") 130 (DF "ws") 131 (SF "ww") 132 (DI "wi") 133 (KF "wq") 134 (TF "wp")]) 135 136(define_mode_attr VSr3 [(V2DF "wa") 137 (V4SF "wa") 138 (DF "ws") 139 (SF "ww") 140 (DI "wi") 141 (KF "wq") 142 (TF "wp")]) 143 144;; Map the register class for sp<->dp float conversions, destination 145(define_mode_attr VSr4 [(SF "ws") 146 (DF "f") 147 (V2DF "wd") 148 (V4SF "v")]) 149 150;; Map the register class for sp<->dp float conversions, source 151(define_mode_attr VSr5 [(SF "ws") 152 (DF "f") 153 (V2DF "v") 154 (V4SF "wd")]) 155 156;; The VSX register class that a type can occupy, even if it is not the 157;; preferred register class (VSr is the preferred register class that will get 158;; allocated first). 159(define_mode_attr VSa [(V16QI "wa") 160 (V8HI "wa") 161 (V4SI "wa") 162 (V4SF "wa") 163 (V2DI "wa") 164 (V2DF "wa") 165 (DI "wi") 166 (DF "ws") 167 (SF "ww") 168 (V1TI "wa") 169 (TI "wt") 170 (TF "wp") 171 (KF "wq")]) 172 173;; A mode attribute to disparage use of GPR registers, except for scalar 174;; integer modes. 175(define_mode_attr ??r [(V16QI "??r") 176 (V8HI "??r") 177 (V4SI "??r") 178 (V4SF "??r") 179 (V2DI "??r") 180 (V2DF "??r") 181 (V1TI "??r") 182 (KF "??r") 183 (TF "??r") 184 (TI "r")]) 185 186;; Same size integer type for floating point data 187(define_mode_attr VSi [(V4SF "v4si") 188 (V2DF "v2di") 189 (DF "di")]) 190 191(define_mode_attr VSI [(V4SF "V4SI") 192 (V2DF "V2DI") 193 (DF "DI")]) 194 195;; Word size for same size conversion 196(define_mode_attr VSc [(V4SF "w") 197 (V2DF "d") 198 (DF "d")]) 199 200;; Map into either s or v, depending on whether this is a scalar or vector 201;; operation 202(define_mode_attr VSv [(V16QI "v") 203 (V8HI "v") 204 (V4SI "v") 205 (V4SF "v") 206 (V2DI "v") 207 (V2DF "v") 208 (V1TI "v") 209 (DF "s") 210 (KF "v")]) 211 212;; Appropriate type for add ops (and other simple FP ops) 213(define_mode_attr VStype_simple [(V2DF "vecdouble") 214 (V4SF "vecfloat") 215 (DF "fp")]) 216 217(define_mode_attr VSfptype_simple [(V2DF "fp_addsub_d") 218 (V4SF "fp_addsub_s") 219 (DF "fp_addsub_d")]) 220 221;; Appropriate type for multiply ops 222(define_mode_attr VStype_mul [(V2DF "vecdouble") 223 (V4SF "vecfloat") 224 (DF "dmul")]) 225 226(define_mode_attr VSfptype_mul [(V2DF "fp_mul_d") 227 (V4SF "fp_mul_s") 228 (DF "fp_mul_d")]) 229 230;; Appropriate type for divide ops. 231(define_mode_attr VStype_div [(V2DF "vecdiv") 232 (V4SF "vecfdiv") 233 (DF "ddiv")]) 234 235(define_mode_attr VSfptype_div [(V2DF "fp_div_d") 236 (V4SF "fp_div_s") 237 (DF "fp_div_d")]) 238 239;; Appropriate type for sqrt ops. For now, just lump the vector sqrt with 240;; the scalar sqrt 241(define_mode_attr VStype_sqrt [(V2DF "dsqrt") 242 (V4SF "ssqrt") 243 (DF "dsqrt")]) 244 245(define_mode_attr VSfptype_sqrt [(V2DF "fp_sqrt_d") 246 (V4SF "fp_sqrt_s") 247 (DF "fp_sqrt_d")]) 248 249;; Iterator and modes for sp<->dp conversions 250;; Because scalar SF values are represented internally as double, use the 251;; V4SF type to represent this than SF. 252(define_mode_iterator VSX_SPDP [DF V4SF V2DF]) 253 254(define_mode_attr VS_spdp_res [(DF "V4SF") 255 (V4SF "V2DF") 256 (V2DF "V4SF")]) 257 258(define_mode_attr VS_spdp_insn [(DF "xscvdpsp") 259 (V4SF "xvcvspdp") 260 (V2DF "xvcvdpsp")]) 261 262(define_mode_attr VS_spdp_type [(DF "fp") 263 (V4SF "vecdouble") 264 (V2DF "vecdouble")]) 265 266;; Map the scalar mode for a vector type 267(define_mode_attr VS_scalar [(V1TI "TI") 268 (V2DF "DF") 269 (V2DI "DI") 270 (V4SF "SF") 271 (V4SI "SI") 272 (V8HI "HI") 273 (V16QI "QI")]) 274 275;; Map to a double-sized vector mode 276(define_mode_attr VS_double [(V4SI "V8SI") 277 (V4SF "V8SF") 278 (V2DI "V4DI") 279 (V2DF "V4DF") 280 (V1TI "V2TI")]) 281 282;; Map register class for 64-bit element in 128-bit vector for direct moves 283;; to/from gprs 284(define_mode_attr VS_64dm [(V2DF "wk") 285 (V2DI "wj")]) 286 287;; Map register class for 64-bit element in 128-bit vector for normal register 288;; to register moves 289(define_mode_attr VS_64reg [(V2DF "ws") 290 (V2DI "wi")]) 291 292;; Iterators for loading constants with xxspltib 293(define_mode_iterator VSINT_84 [V4SI V2DI DI SI]) 294(define_mode_iterator VSINT_842 [V8HI V4SI V2DI]) 295 296;; Vector reverse byte modes 297(define_mode_iterator VEC_REVB [V8HI V4SI V2DI V4SF V2DF V1TI]) 298 299;; Iterator for ISA 3.0 vector extract/insert of small integer vectors. 300;; VSX_EXTRACT_I2 doesn't include V4SImode because SI extracts can be 301;; done on ISA 2.07 and not just ISA 3.0. 302(define_mode_iterator VSX_EXTRACT_I [V16QI V8HI V4SI]) 303(define_mode_iterator VSX_EXTRACT_I2 [V16QI V8HI]) 304 305(define_mode_attr VSX_EXTRACT_WIDTH [(V16QI "b") 306 (V8HI "h") 307 (V4SI "w")]) 308 309;; Mode attribute to give the correct predicate for ISA 3.0 vector extract and 310;; insert to validate the operand number. 311(define_mode_attr VSX_EXTRACT_PREDICATE [(V16QI "const_0_to_15_operand") 312 (V8HI "const_0_to_7_operand") 313 (V4SI "const_0_to_3_operand")]) 314 315;; Mode attribute to give the constraint for vector extract and insert 316;; operations. 317(define_mode_attr VSX_EX [(V16QI "v") 318 (V8HI "v") 319 (V4SI "wa")]) 320 321;; Mode iterator for binary floating types other than double to 322;; optimize convert to that floating point type from an extract 323;; of an integer type 324(define_mode_iterator VSX_EXTRACT_FL [SF 325 (IF "FLOAT128_2REG_P (IFmode)") 326 (KF "TARGET_FLOAT128_HW") 327 (TF "FLOAT128_2REG_P (TFmode) 328 || (FLOAT128_IEEE_P (TFmode) 329 && TARGET_FLOAT128_HW)")]) 330 331;; Mode iterator for binary floating types that have a direct conversion 332;; from 64-bit integer to floating point 333(define_mode_iterator FL_CONV [SF 334 DF 335 (KF "TARGET_FLOAT128_HW") 336 (TF "TARGET_FLOAT128_HW 337 && FLOAT128_IEEE_P (TFmode)")]) 338 339;; Iterator for the 2 short vector types to do a splat from an integer 340(define_mode_iterator VSX_SPLAT_I [V16QI V8HI]) 341 342;; Mode attribute to give the count for the splat instruction to splat 343;; the value in the 64-bit integer slot 344(define_mode_attr VSX_SPLAT_COUNT [(V16QI "7") (V8HI "3")]) 345 346;; Mode attribute to give the suffix for the splat instruction 347(define_mode_attr VSX_SPLAT_SUFFIX [(V16QI "b") (V8HI "h")]) 348 349;; Constants for creating unspecs 350(define_c_enum "unspec" 351 [UNSPEC_VSX_CONCAT 352 UNSPEC_VSX_CVDPSXWS 353 UNSPEC_VSX_CVDPUXWS 354 UNSPEC_VSX_CVSPDP 355 UNSPEC_VSX_CVHPSP 356 UNSPEC_VSX_CVSPDPN 357 UNSPEC_VSX_CVDPSPN 358 UNSPEC_VSX_CVSXWDP 359 UNSPEC_VSX_CVUXWDP 360 UNSPEC_VSX_CVSXDSP 361 UNSPEC_VSX_CVUXDSP 362 UNSPEC_VSX_CVSPSXDS 363 UNSPEC_VSX_CVSPUXDS 364 UNSPEC_VSX_CVSXWSP 365 UNSPEC_VSX_CVUXWSP 366 UNSPEC_VSX_FLOAT2 367 UNSPEC_VSX_UNS_FLOAT2 368 UNSPEC_VSX_FLOATE 369 UNSPEC_VSX_UNS_FLOATE 370 UNSPEC_VSX_FLOATO 371 UNSPEC_VSX_UNS_FLOATO 372 UNSPEC_VSX_TDIV 373 UNSPEC_VSX_TSQRT 374 UNSPEC_VSX_SET 375 UNSPEC_VSX_ROUND_I 376 UNSPEC_VSX_ROUND_IC 377 UNSPEC_VSX_SLDWI 378 UNSPEC_VSX_XXPERM 379 380 UNSPEC_VSX_XXSPLTW 381 UNSPEC_VSX_XXSPLTD 382 UNSPEC_VSX_DIVSD 383 UNSPEC_VSX_DIVUD 384 UNSPEC_VSX_MULSD 385 UNSPEC_VSX_XVCVSXDDP 386 UNSPEC_VSX_XVCVUXDDP 387 UNSPEC_VSX_XVCVDPSXDS 388 UNSPEC_VSX_XVCDPSP 389 UNSPEC_VSX_XVCVDPUXDS 390 UNSPEC_VSX_SIGN_EXTEND 391 UNSPEC_VSX_XVCVSPSXWS 392 UNSPEC_VSX_XVCVSPSXDS 393 UNSPEC_VSX_VSLO 394 UNSPEC_VSX_EXTRACT 395 UNSPEC_VSX_SXEXPDP 396 UNSPEC_VSX_SXSIG 397 UNSPEC_VSX_SIEXPDP 398 UNSPEC_VSX_SIEXPQP 399 UNSPEC_VSX_SCMPEXPDP 400 UNSPEC_VSX_STSTDC 401 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH 402 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL 403 UNSPEC_VSX_VXEXP 404 UNSPEC_VSX_VXSIG 405 UNSPEC_VSX_VIEXP 406 UNSPEC_VSX_VTSTDC 407 UNSPEC_VSX_VEC_INIT 408 UNSPEC_VSX_VSIGNED2 409 410 UNSPEC_LXVL 411 UNSPEC_LXVLL 412 UNSPEC_LVSL_REG 413 UNSPEC_LVSR_REG 414 UNSPEC_STXVL 415 UNSPEC_STXVLL 416 UNSPEC_XL_LEN_R 417 UNSPEC_XST_LEN_R 418 419 UNSPEC_VCLZLSBB 420 UNSPEC_VCTZLSBB 421 UNSPEC_VEXTUBLX 422 UNSPEC_VEXTUHLX 423 UNSPEC_VEXTUWLX 424 UNSPEC_VEXTUBRX 425 UNSPEC_VEXTUHRX 426 UNSPEC_VEXTUWRX 427 UNSPEC_VCMPNEB 428 UNSPEC_VCMPNEZB 429 UNSPEC_VCMPNEH 430 UNSPEC_VCMPNEZH 431 UNSPEC_VCMPNEW 432 UNSPEC_VCMPNEZW 433 UNSPEC_XXEXTRACTUW 434 UNSPEC_XXINSERTW 435 UNSPEC_VSX_FIRST_MATCH_INDEX 436 UNSPEC_VSX_FIRST_MATCH_EOS_INDEX 437 UNSPEC_VSX_FIRST_MISMATCH_INDEX 438 UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX 439 ]) 440 441;; VSX moves 442 443;; The patterns for LE permuted loads and stores come before the general 444;; VSX moves so they match first. 445(define_insn_and_split "*vsx_le_perm_load_<mode>" 446 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>") 447 (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z"))] 448 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" 449 "#" 450 "&& 1" 451 [(set (match_dup 2) 452 (vec_select:<MODE> 453 (match_dup 1) 454 (parallel [(const_int 1) (const_int 0)]))) 455 (set (match_dup 0) 456 (vec_select:<MODE> 457 (match_dup 2) 458 (parallel [(const_int 1) (const_int 0)])))] 459{ 460 rtx mem = operands[1]; 461 462 /* Don't apply the swap optimization if we've already performed register 463 allocation and the hard register destination is not in the altivec 464 range. */ 465 if ((MEM_ALIGN (mem) >= 128) 466 && ((reg_or_subregno (operands[0]) >= FIRST_PSEUDO_REGISTER) 467 || ALTIVEC_REGNO_P (reg_or_subregno (operands[0])))) 468 { 469 rtx mem_address = XEXP (mem, 0); 470 enum machine_mode mode = GET_MODE (mem); 471 472 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) 473 { 474 /* Replace the source memory address with masked address. */ 475 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem); 476 emit_insn (lvx_set_expr); 477 DONE; 478 } 479 else if (rs6000_quadword_masked_address_p (mem_address)) 480 { 481 /* This rtl is already in the form that matches lvx 482 instruction, so leave it alone. */ 483 DONE; 484 } 485 /* Otherwise, fall through to transform into a swapping load. */ 486 } 487 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) 488 : operands[0]; 489} 490 [(set_attr "type" "vecload") 491 (set_attr "length" "8")]) 492 493(define_insn_and_split "*vsx_le_perm_load_<mode>" 494 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>") 495 (match_operand:VSX_W 1 "indexed_or_indirect_operand" "Z"))] 496 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" 497 "#" 498 "&& 1" 499 [(set (match_dup 2) 500 (vec_select:<MODE> 501 (match_dup 1) 502 (parallel [(const_int 2) (const_int 3) 503 (const_int 0) (const_int 1)]))) 504 (set (match_dup 0) 505 (vec_select:<MODE> 506 (match_dup 2) 507 (parallel [(const_int 2) (const_int 3) 508 (const_int 0) (const_int 1)])))] 509{ 510 rtx mem = operands[1]; 511 512 /* Don't apply the swap optimization if we've already performed register 513 allocation and the hard register destination is not in the altivec 514 range. */ 515 if ((MEM_ALIGN (mem) >= 128) 516 && ((REGNO(operands[0]) >= FIRST_PSEUDO_REGISTER) 517 || ALTIVEC_REGNO_P (REGNO(operands[0])))) 518 { 519 rtx mem_address = XEXP (mem, 0); 520 enum machine_mode mode = GET_MODE (mem); 521 522 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) 523 { 524 /* Replace the source memory address with masked address. */ 525 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem); 526 emit_insn (lvx_set_expr); 527 DONE; 528 } 529 else if (rs6000_quadword_masked_address_p (mem_address)) 530 { 531 /* This rtl is already in the form that matches lvx 532 instruction, so leave it alone. */ 533 DONE; 534 } 535 /* Otherwise, fall through to transform into a swapping load. */ 536 } 537 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) 538 : operands[0]; 539} 540 [(set_attr "type" "vecload") 541 (set_attr "length" "8")]) 542 543(define_insn_and_split "*vsx_le_perm_load_v8hi" 544 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") 545 (match_operand:V8HI 1 "indexed_or_indirect_operand" "Z"))] 546 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" 547 "#" 548 "&& 1" 549 [(set (match_dup 2) 550 (vec_select:V8HI 551 (match_dup 1) 552 (parallel [(const_int 4) (const_int 5) 553 (const_int 6) (const_int 7) 554 (const_int 0) (const_int 1) 555 (const_int 2) (const_int 3)]))) 556 (set (match_dup 0) 557 (vec_select:V8HI 558 (match_dup 2) 559 (parallel [(const_int 4) (const_int 5) 560 (const_int 6) (const_int 7) 561 (const_int 0) (const_int 1) 562 (const_int 2) (const_int 3)])))] 563{ 564 rtx mem = operands[1]; 565 566 /* Don't apply the swap optimization if we've already performed register 567 allocation and the hard register destination is not in the altivec 568 range. */ 569 if ((MEM_ALIGN (mem) >= 128) 570 && ((REGNO(operands[0]) >= FIRST_PSEUDO_REGISTER) 571 || ALTIVEC_REGNO_P (REGNO(operands[0])))) 572 { 573 rtx mem_address = XEXP (mem, 0); 574 enum machine_mode mode = GET_MODE (mem); 575 576 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) 577 { 578 /* Replace the source memory address with masked address. */ 579 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem); 580 emit_insn (lvx_set_expr); 581 DONE; 582 } 583 else if (rs6000_quadword_masked_address_p (mem_address)) 584 { 585 /* This rtl is already in the form that matches lvx 586 instruction, so leave it alone. */ 587 DONE; 588 } 589 /* Otherwise, fall through to transform into a swapping load. */ 590 } 591 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) 592 : operands[0]; 593} 594 [(set_attr "type" "vecload") 595 (set_attr "length" "8")]) 596 597(define_insn_and_split "*vsx_le_perm_load_v16qi" 598 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") 599 (match_operand:V16QI 1 "indexed_or_indirect_operand" "Z"))] 600 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" 601 "#" 602 "&& 1" 603 [(set (match_dup 2) 604 (vec_select:V16QI 605 (match_dup 1) 606 (parallel [(const_int 8) (const_int 9) 607 (const_int 10) (const_int 11) 608 (const_int 12) (const_int 13) 609 (const_int 14) (const_int 15) 610 (const_int 0) (const_int 1) 611 (const_int 2) (const_int 3) 612 (const_int 4) (const_int 5) 613 (const_int 6) (const_int 7)]))) 614 (set (match_dup 0) 615 (vec_select:V16QI 616 (match_dup 2) 617 (parallel [(const_int 8) (const_int 9) 618 (const_int 10) (const_int 11) 619 (const_int 12) (const_int 13) 620 (const_int 14) (const_int 15) 621 (const_int 0) (const_int 1) 622 (const_int 2) (const_int 3) 623 (const_int 4) (const_int 5) 624 (const_int 6) (const_int 7)])))] 625{ 626 rtx mem = operands[1]; 627 628 /* Don't apply the swap optimization if we've already performed register 629 allocation and the hard register destination is not in the altivec 630 range. */ 631 if ((MEM_ALIGN (mem) >= 128) 632 && ((REGNO(operands[0]) >= FIRST_PSEUDO_REGISTER) 633 || ALTIVEC_REGNO_P (REGNO(operands[0])))) 634 { 635 rtx mem_address = XEXP (mem, 0); 636 enum machine_mode mode = GET_MODE (mem); 637 638 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) 639 { 640 /* Replace the source memory address with masked address. */ 641 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem); 642 emit_insn (lvx_set_expr); 643 DONE; 644 } 645 else if (rs6000_quadword_masked_address_p (mem_address)) 646 { 647 /* This rtl is already in the form that matches lvx 648 instruction, so leave it alone. */ 649 DONE; 650 } 651 /* Otherwise, fall through to transform into a swapping load. */ 652 } 653 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) 654 : operands[0]; 655} 656 [(set_attr "type" "vecload") 657 (set_attr "length" "8")]) 658 659(define_insn "*vsx_le_perm_store_<mode>" 660 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand" "=Z") 661 (match_operand:VSX_D 1 "vsx_register_operand" "+<VSa>"))] 662 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" 663 "#" 664 [(set_attr "type" "vecstore") 665 (set_attr "length" "12")]) 666 667(define_split 668 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand") 669 (match_operand:VSX_D 1 "vsx_register_operand"))] 670 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed" 671 [(set (match_dup 2) 672 (vec_select:<MODE> 673 (match_dup 1) 674 (parallel [(const_int 1) (const_int 0)]))) 675 (set (match_dup 0) 676 (vec_select:<MODE> 677 (match_dup 2) 678 (parallel [(const_int 1) (const_int 0)])))] 679{ 680 rtx mem = operands[0]; 681 682 /* Don't apply the swap optimization if we've already performed register 683 allocation and the hard register source is not in the altivec range. */ 684 if ((MEM_ALIGN (mem) >= 128) 685 && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER) 686 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1])))) 687 { 688 rtx mem_address = XEXP (mem, 0); 689 enum machine_mode mode = GET_MODE (mem); 690 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) 691 { 692 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]); 693 emit_insn (stvx_set_expr); 694 DONE; 695 } 696 else if (rs6000_quadword_masked_address_p (mem_address)) 697 { 698 /* This rtl is already in the form that matches stvx instruction, 699 so leave it alone. */ 700 DONE; 701 } 702 /* Otherwise, fall through to transform into a swapping store. */ 703 } 704 705 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) 706 : operands[1]; 707}) 708 709;; The post-reload split requires that we re-permute the source 710;; register in case it is still live. 711(define_split 712 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand") 713 (match_operand:VSX_D 1 "vsx_register_operand"))] 714 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed" 715 [(set (match_dup 1) 716 (vec_select:<MODE> 717 (match_dup 1) 718 (parallel [(const_int 1) (const_int 0)]))) 719 (set (match_dup 0) 720 (vec_select:<MODE> 721 (match_dup 1) 722 (parallel [(const_int 1) (const_int 0)]))) 723 (set (match_dup 1) 724 (vec_select:<MODE> 725 (match_dup 1) 726 (parallel [(const_int 1) (const_int 0)])))] 727 "") 728 729(define_insn "*vsx_le_perm_store_<mode>" 730 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand" "=Z") 731 (match_operand:VSX_W 1 "vsx_register_operand" "+<VSa>"))] 732 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" 733 "#" 734 [(set_attr "type" "vecstore") 735 (set_attr "length" "12")]) 736 737(define_split 738 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand") 739 (match_operand:VSX_W 1 "vsx_register_operand"))] 740 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed" 741 [(set (match_dup 2) 742 (vec_select:<MODE> 743 (match_dup 1) 744 (parallel [(const_int 2) (const_int 3) 745 (const_int 0) (const_int 1)]))) 746 (set (match_dup 0) 747 (vec_select:<MODE> 748 (match_dup 2) 749 (parallel [(const_int 2) (const_int 3) 750 (const_int 0) (const_int 1)])))] 751{ 752 rtx mem = operands[0]; 753 754 /* Don't apply the swap optimization if we've already performed register 755 allocation and the hard register source is not in the altivec range. */ 756 if ((MEM_ALIGN (mem) >= 128) 757 && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER) 758 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1])))) 759 { 760 rtx mem_address = XEXP (mem, 0); 761 enum machine_mode mode = GET_MODE (mem); 762 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) 763 { 764 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]); 765 emit_insn (stvx_set_expr); 766 DONE; 767 } 768 else if (rs6000_quadword_masked_address_p (mem_address)) 769 { 770 /* This rtl is already in the form that matches stvx instruction, 771 so leave it alone. */ 772 DONE; 773 } 774 /* Otherwise, fall through to transform into a swapping store. */ 775 } 776 777 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) 778 : operands[1]; 779}) 780 781;; The post-reload split requires that we re-permute the source 782;; register in case it is still live. 783(define_split 784 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand") 785 (match_operand:VSX_W 1 "vsx_register_operand"))] 786 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed" 787 [(set (match_dup 1) 788 (vec_select:<MODE> 789 (match_dup 1) 790 (parallel [(const_int 2) (const_int 3) 791 (const_int 0) (const_int 1)]))) 792 (set (match_dup 0) 793 (vec_select:<MODE> 794 (match_dup 1) 795 (parallel [(const_int 2) (const_int 3) 796 (const_int 0) (const_int 1)]))) 797 (set (match_dup 1) 798 (vec_select:<MODE> 799 (match_dup 1) 800 (parallel [(const_int 2) (const_int 3) 801 (const_int 0) (const_int 1)])))] 802 "") 803 804(define_insn "*vsx_le_perm_store_v8hi" 805 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand" "=Z") 806 (match_operand:V8HI 1 "vsx_register_operand" "+wa"))] 807 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" 808 "#" 809 [(set_attr "type" "vecstore") 810 (set_attr "length" "12")]) 811 812(define_split 813 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand") 814 (match_operand:V8HI 1 "vsx_register_operand"))] 815 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed" 816 [(set (match_dup 2) 817 (vec_select:V8HI 818 (match_dup 1) 819 (parallel [(const_int 4) (const_int 5) 820 (const_int 6) (const_int 7) 821 (const_int 0) (const_int 1) 822 (const_int 2) (const_int 3)]))) 823 (set (match_dup 0) 824 (vec_select:V8HI 825 (match_dup 2) 826 (parallel [(const_int 4) (const_int 5) 827 (const_int 6) (const_int 7) 828 (const_int 0) (const_int 1) 829 (const_int 2) (const_int 3)])))] 830{ 831 rtx mem = operands[0]; 832 833 /* Don't apply the swap optimization if we've already performed register 834 allocation and the hard register source is not in the altivec range. */ 835 if ((MEM_ALIGN (mem) >= 128) 836 && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER) 837 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1])))) 838 { 839 rtx mem_address = XEXP (mem, 0); 840 enum machine_mode mode = GET_MODE (mem); 841 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) 842 { 843 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]); 844 emit_insn (stvx_set_expr); 845 DONE; 846 } 847 else if (rs6000_quadword_masked_address_p (mem_address)) 848 { 849 /* This rtl is already in the form that matches stvx instruction, 850 so leave it alone. */ 851 DONE; 852 } 853 /* Otherwise, fall through to transform into a swapping store. */ 854 } 855 856 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) 857 : operands[1]; 858}) 859 860;; The post-reload split requires that we re-permute the source 861;; register in case it is still live. 862(define_split 863 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand") 864 (match_operand:V8HI 1 "vsx_register_operand"))] 865 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed" 866 [(set (match_dup 1) 867 (vec_select:V8HI 868 (match_dup 1) 869 (parallel [(const_int 4) (const_int 5) 870 (const_int 6) (const_int 7) 871 (const_int 0) (const_int 1) 872 (const_int 2) (const_int 3)]))) 873 (set (match_dup 0) 874 (vec_select:V8HI 875 (match_dup 1) 876 (parallel [(const_int 4) (const_int 5) 877 (const_int 6) (const_int 7) 878 (const_int 0) (const_int 1) 879 (const_int 2) (const_int 3)]))) 880 (set (match_dup 1) 881 (vec_select:V8HI 882 (match_dup 1) 883 (parallel [(const_int 4) (const_int 5) 884 (const_int 6) (const_int 7) 885 (const_int 0) (const_int 1) 886 (const_int 2) (const_int 3)])))] 887 "") 888 889(define_insn "*vsx_le_perm_store_v16qi" 890 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand" "=Z") 891 (match_operand:V16QI 1 "vsx_register_operand" "+wa"))] 892 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" 893 "#" 894 [(set_attr "type" "vecstore") 895 (set_attr "length" "12")]) 896 897(define_split 898 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand") 899 (match_operand:V16QI 1 "vsx_register_operand"))] 900 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed" 901 [(set (match_dup 2) 902 (vec_select:V16QI 903 (match_dup 1) 904 (parallel [(const_int 8) (const_int 9) 905 (const_int 10) (const_int 11) 906 (const_int 12) (const_int 13) 907 (const_int 14) (const_int 15) 908 (const_int 0) (const_int 1) 909 (const_int 2) (const_int 3) 910 (const_int 4) (const_int 5) 911 (const_int 6) (const_int 7)]))) 912 (set (match_dup 0) 913 (vec_select:V16QI 914 (match_dup 2) 915 (parallel [(const_int 8) (const_int 9) 916 (const_int 10) (const_int 11) 917 (const_int 12) (const_int 13) 918 (const_int 14) (const_int 15) 919 (const_int 0) (const_int 1) 920 (const_int 2) (const_int 3) 921 (const_int 4) (const_int 5) 922 (const_int 6) (const_int 7)])))] 923{ 924 rtx mem = operands[0]; 925 926 /* Don't apply the swap optimization if we've already performed register 927 allocation and the hard register source is not in the altivec range. */ 928 if ((MEM_ALIGN (mem) >= 128) 929 && ((reg_or_subregno (operands[1]) >= FIRST_PSEUDO_REGISTER) 930 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1])))) 931 { 932 rtx mem_address = XEXP (mem, 0); 933 enum machine_mode mode = GET_MODE (mem); 934 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) 935 { 936 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]); 937 emit_insn (stvx_set_expr); 938 DONE; 939 } 940 else if (rs6000_quadword_masked_address_p (mem_address)) 941 { 942 /* This rtl is already in the form that matches stvx instruction, 943 so leave it alone. */ 944 DONE; 945 } 946 /* Otherwise, fall through to transform into a swapping store. */ 947 } 948 949 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) 950 : operands[1]; 951}) 952 953;; The post-reload split requires that we re-permute the source 954;; register in case it is still live. 955(define_split 956 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand") 957 (match_operand:V16QI 1 "vsx_register_operand"))] 958 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed" 959 [(set (match_dup 1) 960 (vec_select:V16QI 961 (match_dup 1) 962 (parallel [(const_int 8) (const_int 9) 963 (const_int 10) (const_int 11) 964 (const_int 12) (const_int 13) 965 (const_int 14) (const_int 15) 966 (const_int 0) (const_int 1) 967 (const_int 2) (const_int 3) 968 (const_int 4) (const_int 5) 969 (const_int 6) (const_int 7)]))) 970 (set (match_dup 0) 971 (vec_select:V16QI 972 (match_dup 1) 973 (parallel [(const_int 8) (const_int 9) 974 (const_int 10) (const_int 11) 975 (const_int 12) (const_int 13) 976 (const_int 14) (const_int 15) 977 (const_int 0) (const_int 1) 978 (const_int 2) (const_int 3) 979 (const_int 4) (const_int 5) 980 (const_int 6) (const_int 7)]))) 981 (set (match_dup 1) 982 (vec_select:V16QI 983 (match_dup 1) 984 (parallel [(const_int 8) (const_int 9) 985 (const_int 10) (const_int 11) 986 (const_int 12) (const_int 13) 987 (const_int 14) (const_int 15) 988 (const_int 0) (const_int 1) 989 (const_int 2) (const_int 3) 990 (const_int 4) (const_int 5) 991 (const_int 6) (const_int 7)])))] 992 "") 993 994;; Little endian word swapping for 128-bit types that are either scalars or the 995;; special V1TI container class, which it is not appropriate to use vec_select 996;; for the type. 997(define_insn "*vsx_le_permute_<mode>" 998 [(set (match_operand:VSX_TI 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z,&r,&r,Q") 999 (rotate:VSX_TI 1000 (match_operand:VSX_TI 1 "input_operand" "<VSa>,Z,<VSa>,r,Q,r") 1001 (const_int 64)))] 1002 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" 1003 "@ 1004 xxpermdi %x0,%x1,%x1,2 1005 lxvd2x %x0,%y1 1006 stxvd2x %x1,%y0 1007 mr %0,%L1\;mr %L0,%1 1008 ld%U1%X1 %0,%L1\;ld%U1%X1 %L0,%1 1009 std%U0%X0 %L1,%0\;std%U0%X0 %1,%L0" 1010 [(set_attr "length" "4,4,4,8,8,8") 1011 (set_attr "type" "vecperm,vecload,vecstore,*,load,store")]) 1012 1013(define_insn_and_split "*vsx_le_undo_permute_<mode>" 1014 [(set (match_operand:VSX_TI 0 "vsx_register_operand" "=<VSa>,<VSa>") 1015 (rotate:VSX_TI 1016 (rotate:VSX_TI 1017 (match_operand:VSX_TI 1 "vsx_register_operand" "0,<VSa>") 1018 (const_int 64)) 1019 (const_int 64)))] 1020 "!BYTES_BIG_ENDIAN && TARGET_VSX" 1021 "@ 1022 # 1023 xxlor %x0,%x1" 1024 "" 1025 [(set (match_dup 0) (match_dup 1))] 1026{ 1027 if (reload_completed && REGNO (operands[0]) == REGNO (operands[1])) 1028 { 1029 emit_note (NOTE_INSN_DELETED); 1030 DONE; 1031 } 1032} 1033 [(set_attr "length" "0,4") 1034 (set_attr "type" "veclogical")]) 1035 1036(define_insn_and_split "*vsx_le_perm_load_<mode>" 1037 [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>,r") 1038 (match_operand:VSX_LE_128 1 "memory_operand" "Z,Q"))] 1039 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" 1040 "@ 1041 # 1042 #" 1043 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" 1044 [(const_int 0)] 1045{ 1046 rtx tmp = (can_create_pseudo_p () 1047 ? gen_reg_rtx_and_attrs (operands[0]) 1048 : operands[0]); 1049 rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode); 1050 rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode); 1051 DONE; 1052} 1053 [(set_attr "type" "vecload,load") 1054 (set_attr "length" "8,8")]) 1055 1056(define_insn "*vsx_le_perm_store_<mode>" 1057 [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z,Q") 1058 (match_operand:VSX_LE_128 1 "vsx_register_operand" "+<VSa>,r"))] 1059 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" 1060 "@ 1061 # 1062 #" 1063 [(set_attr "type" "vecstore,store") 1064 (set_attr "length" "12,8")]) 1065 1066(define_split 1067 [(set (match_operand:VSX_LE_128 0 "memory_operand") 1068 (match_operand:VSX_LE_128 1 "vsx_register_operand"))] 1069 "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR" 1070 [(const_int 0)] 1071{ 1072 rtx tmp = (can_create_pseudo_p () 1073 ? gen_reg_rtx_and_attrs (operands[0]) 1074 : operands[0]); 1075 rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode); 1076 rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode); 1077 DONE; 1078}) 1079 1080;; Peepholes to catch loads and stores for TImode if TImode landed in 1081;; GPR registers on a little endian system. 1082(define_peephole2 1083 [(set (match_operand:VSX_TI 0 "int_reg_operand") 1084 (rotate:VSX_TI (match_operand:VSX_TI 1 "memory_operand") 1085 (const_int 64))) 1086 (set (match_operand:VSX_TI 2 "int_reg_operand") 1087 (rotate:VSX_TI (match_dup 0) 1088 (const_int 64)))] 1089 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR 1090 && (rtx_equal_p (operands[0], operands[2]) 1091 || peep2_reg_dead_p (2, operands[0]))" 1092 [(set (match_dup 2) (match_dup 1))]) 1093 1094(define_peephole2 1095 [(set (match_operand:VSX_TI 0 "int_reg_operand") 1096 (rotate:VSX_TI (match_operand:VSX_TI 1 "int_reg_operand") 1097 (const_int 64))) 1098 (set (match_operand:VSX_TI 2 "memory_operand") 1099 (rotate:VSX_TI (match_dup 0) 1100 (const_int 64)))] 1101 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR 1102 && peep2_reg_dead_p (2, operands[0])" 1103 [(set (match_dup 2) (match_dup 1))]) 1104 1105;; Peephole to catch memory to memory transfers for TImode if TImode landed in 1106;; VSX registers on a little endian system. The vector types and IEEE 128-bit 1107;; floating point are handled by the more generic swap elimination pass. 1108(define_peephole2 1109 [(set (match_operand:TI 0 "vsx_register_operand") 1110 (rotate:TI (match_operand:TI 1 "vsx_register_operand") 1111 (const_int 64))) 1112 (set (match_operand:TI 2 "vsx_register_operand") 1113 (rotate:TI (match_dup 0) 1114 (const_int 64)))] 1115 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR 1116 && (rtx_equal_p (operands[0], operands[2]) 1117 || peep2_reg_dead_p (2, operands[0]))" 1118 [(set (match_dup 2) (match_dup 1))]) 1119 1120;; The post-reload split requires that we re-permute the source 1121;; register in case it is still live. 1122(define_split 1123 [(set (match_operand:VSX_LE_128 0 "memory_operand") 1124 (match_operand:VSX_LE_128 1 "vsx_register_operand"))] 1125 "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR" 1126 [(const_int 0)] 1127{ 1128 rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode); 1129 rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode); 1130 rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode); 1131 DONE; 1132}) 1133 1134;; Vector constants that can be generated with XXSPLTIB that was added in ISA 1135;; 3.0. Both (const_vector [..]) and (vec_duplicate ...) forms are recognized. 1136(define_insn "xxspltib_v16qi" 1137 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") 1138 (vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "n")))] 1139 "TARGET_P9_VECTOR" 1140{ 1141 operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff); 1142 return "xxspltib %x0,%2"; 1143} 1144 [(set_attr "type" "vecperm")]) 1145 1146(define_insn "xxspltib_<mode>_nosplit" 1147 [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa,wa") 1148 (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "jwM,wE"))] 1149 "TARGET_P9_VECTOR" 1150{ 1151 rtx op1 = operands[1]; 1152 int value = 256; 1153 int num_insns = -1; 1154 1155 if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value) 1156 || num_insns != 1) 1157 gcc_unreachable (); 1158 1159 operands[2] = GEN_INT (value & 0xff); 1160 return "xxspltib %x0,%2"; 1161} 1162 [(set_attr "type" "vecperm")]) 1163 1164(define_insn_and_split "*xxspltib_<mode>_split" 1165 [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v") 1166 (match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))] 1167 "TARGET_P9_VECTOR" 1168 "#" 1169 "&& 1" 1170 [(const_int 0)] 1171{ 1172 int value = 256; 1173 int num_insns = -1; 1174 rtx op0 = operands[0]; 1175 rtx op1 = operands[1]; 1176 rtx tmp = ((can_create_pseudo_p ()) 1177 ? gen_reg_rtx (V16QImode) 1178 : gen_lowpart (V16QImode, op0)); 1179 1180 if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value) 1181 || num_insns != 2) 1182 gcc_unreachable (); 1183 1184 emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value))); 1185 1186 if (<MODE>mode == V2DImode) 1187 emit_insn (gen_vsx_sign_extend_qi_v2di (op0, tmp)); 1188 1189 else if (<MODE>mode == V4SImode) 1190 emit_insn (gen_vsx_sign_extend_qi_v4si (op0, tmp)); 1191 1192 else if (<MODE>mode == V8HImode) 1193 emit_insn (gen_altivec_vupkhsb (op0, tmp)); 1194 1195 else 1196 gcc_unreachable (); 1197 1198 DONE; 1199} 1200 [(set_attr "type" "vecperm") 1201 (set_attr "length" "8")]) 1202 1203 1204;; Prefer using vector registers over GPRs. Prefer using ISA 3.0's XXSPLTISB 1205;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or 1206;; all 1's, since the machine does not have to wait for the previous 1207;; instruction using the register being set (such as a store waiting on a slow 1208;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move. 1209 1210;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR) 1211;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW 1212;; VSX 0/-1 GPR 0/-1 VMX const GPR const LVX (VMX) STVX (VMX) 1213(define_insn "*vsx_mov<mode>_64bit" 1214 [(set (match_operand:VSX_M 0 "nonimmediate_operand" 1215 "=ZwO, <VSa>, <VSa>, r, we, ?wQ, 1216 ?&r, ??r, ??Y, <??r>, wo, v, 1217 ?<VSa>, *r, v, ??r, wZ, v") 1218 1219 (match_operand:VSX_M 1 "input_operand" 1220 "<VSa>, ZwO, <VSa>, we, r, r, 1221 wQ, Y, r, r, wE, jwM, 1222 ?jwM, jwM, W, W, v, wZ"))] 1223 1224 "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode) 1225 && (register_operand (operands[0], <MODE>mode) 1226 || register_operand (operands[1], <MODE>mode))" 1227{ 1228 return rs6000_output_move_128bit (operands); 1229} 1230 [(set_attr "type" 1231 "vecstore, vecload, vecsimple, mffgpr, mftgpr, load, 1232 store, load, store, *, vecsimple, vecsimple, 1233 vecsimple, *, *, *, vecstore, vecload") 1234 1235 (set_attr "length" 1236 "4, 4, 4, 8, 4, 8, 1237 8, 8, 8, 8, 4, 4, 1238 4, 8, 20, 20, 4, 4")]) 1239 1240;; VSX store VSX load VSX move GPR load GPR store GPR move 1241;; XXSPLTIB VSPLTISW VSX 0/-1 GPR 0/-1 VMX const GPR const 1242;; LVX (VMX) STVX (VMX) 1243(define_insn "*vsx_mov<mode>_32bit" 1244 [(set (match_operand:VSX_M 0 "nonimmediate_operand" 1245 "=ZwO, <VSa>, <VSa>, ??r, ??Y, <??r>, 1246 wo, v, ?<VSa>, *r, v, ??r, 1247 wZ, v") 1248 1249 (match_operand:VSX_M 1 "input_operand" 1250 "<VSa>, ZwO, <VSa>, Y, r, r, 1251 wE, jwM, ?jwM, jwM, W, W, 1252 v, wZ"))] 1253 1254 "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode) 1255 && (register_operand (operands[0], <MODE>mode) 1256 || register_operand (operands[1], <MODE>mode))" 1257{ 1258 return rs6000_output_move_128bit (operands); 1259} 1260 [(set_attr "type" 1261 "vecstore, vecload, vecsimple, load, store, *, 1262 vecsimple, vecsimple, vecsimple, *, *, *, 1263 vecstore, vecload") 1264 1265 (set_attr "length" 1266 "4, 4, 4, 16, 16, 16, 1267 4, 4, 4, 16, 20, 32, 1268 4, 4")]) 1269 1270;; Explicit load/store expanders for the builtin functions 1271(define_expand "vsx_load_<mode>" 1272 [(set (match_operand:VSX_M 0 "vsx_register_operand") 1273 (match_operand:VSX_M 1 "memory_operand"))] 1274 "VECTOR_MEM_VSX_P (<MODE>mode)" 1275{ 1276 /* Expand to swaps if needed, prior to swap optimization. */ 1277 if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR) 1278 { 1279 rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode); 1280 DONE; 1281 } 1282}) 1283 1284(define_expand "vsx_store_<mode>" 1285 [(set (match_operand:VSX_M 0 "memory_operand") 1286 (match_operand:VSX_M 1 "vsx_register_operand"))] 1287 "VECTOR_MEM_VSX_P (<MODE>mode)" 1288{ 1289 /* Expand to swaps if needed, prior to swap optimization. */ 1290 if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR) 1291 { 1292 rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode); 1293 DONE; 1294 } 1295}) 1296 1297;; Explicit load/store expanders for the builtin functions for lxvd2x, etc., 1298;; when you really want their element-reversing behavior. 1299(define_insn "vsx_ld_elemrev_v2di" 1300 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") 1301 (vec_select:V2DI 1302 (match_operand:V2DI 1 "memory_operand" "Z") 1303 (parallel [(const_int 1) (const_int 0)])))] 1304 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN" 1305 "lxvd2x %x0,%y1" 1306 [(set_attr "type" "vecload")]) 1307 1308(define_insn "vsx_ld_elemrev_v1ti" 1309 [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa") 1310 (vec_select:V1TI 1311 (match_operand:V1TI 1 "memory_operand" "Z") 1312 (parallel [(const_int 0)])))] 1313 "VECTOR_MEM_VSX_P (V1TImode) && !BYTES_BIG_ENDIAN" 1314{ 1315 return "lxvd2x %x0,%y1\;xxpermdi %x0,%x0,%x0,2"; 1316} 1317 [(set_attr "type" "vecload")]) 1318 1319(define_insn "vsx_ld_elemrev_v2df" 1320 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa") 1321 (vec_select:V2DF 1322 (match_operand:V2DF 1 "memory_operand" "Z") 1323 (parallel [(const_int 1) (const_int 0)])))] 1324 "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN" 1325 "lxvd2x %x0,%y1" 1326 [(set_attr "type" "vecload")]) 1327 1328(define_insn "vsx_ld_elemrev_v4si" 1329 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa") 1330 (vec_select:V4SI 1331 (match_operand:V4SI 1 "memory_operand" "Z") 1332 (parallel [(const_int 3) (const_int 2) 1333 (const_int 1) (const_int 0)])))] 1334 "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN" 1335 "lxvw4x %x0,%y1" 1336 [(set_attr "type" "vecload")]) 1337 1338(define_insn "vsx_ld_elemrev_v4sf" 1339 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") 1340 (vec_select:V4SF 1341 (match_operand:V4SF 1 "memory_operand" "Z") 1342 (parallel [(const_int 3) (const_int 2) 1343 (const_int 1) (const_int 0)])))] 1344 "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN" 1345 "lxvw4x %x0,%y1" 1346 [(set_attr "type" "vecload")]) 1347 1348(define_expand "vsx_ld_elemrev_v8hi" 1349 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") 1350 (vec_select:V8HI 1351 (match_operand:V8HI 1 "memory_operand" "Z") 1352 (parallel [(const_int 7) (const_int 6) 1353 (const_int 5) (const_int 4) 1354 (const_int 3) (const_int 2) 1355 (const_int 1) (const_int 0)])))] 1356 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN" 1357{ 1358 if (!TARGET_P9_VECTOR) 1359 { 1360 rtx tmp = gen_reg_rtx (V4SImode); 1361 rtx subreg, subreg2, perm[16], pcv; 1362 /* 2 is leftmost element in register */ 1363 unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2}; 1364 int i; 1365 1366 subreg = simplify_gen_subreg (V4SImode, operands[1], V8HImode, 0); 1367 emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg)); 1368 subreg2 = simplify_gen_subreg (V8HImode, tmp, V4SImode, 0); 1369 1370 for (i = 0; i < 16; ++i) 1371 perm[i] = GEN_INT (reorder[i]); 1372 1373 pcv = force_reg (V16QImode, 1374 gen_rtx_CONST_VECTOR (V16QImode, 1375 gen_rtvec_v (16, perm))); 1376 emit_insn (gen_altivec_vperm_v8hi_direct (operands[0], subreg2, 1377 subreg2, pcv)); 1378 DONE; 1379 } 1380}) 1381 1382(define_insn "*vsx_ld_elemrev_v8hi_internal" 1383 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") 1384 (vec_select:V8HI 1385 (match_operand:V8HI 1 "memory_operand" "Z") 1386 (parallel [(const_int 7) (const_int 6) 1387 (const_int 5) (const_int 4) 1388 (const_int 3) (const_int 2) 1389 (const_int 1) (const_int 0)])))] 1390 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" 1391 "lxvh8x %x0,%y1" 1392 [(set_attr "type" "vecload")]) 1393 1394(define_expand "vsx_ld_elemrev_v16qi" 1395 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") 1396 (vec_select:V16QI 1397 (match_operand:V16QI 1 "memory_operand" "Z") 1398 (parallel [(const_int 15) (const_int 14) 1399 (const_int 13) (const_int 12) 1400 (const_int 11) (const_int 10) 1401 (const_int 9) (const_int 8) 1402 (const_int 7) (const_int 6) 1403 (const_int 5) (const_int 4) 1404 (const_int 3) (const_int 2) 1405 (const_int 1) (const_int 0)])))] 1406 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN" 1407{ 1408 if (!TARGET_P9_VECTOR) 1409 { 1410 rtx tmp = gen_reg_rtx (V4SImode); 1411 rtx subreg, subreg2, perm[16], pcv; 1412 /* 3 is leftmost element in register */ 1413 unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3}; 1414 int i; 1415 1416 subreg = simplify_gen_subreg (V4SImode, operands[1], V16QImode, 0); 1417 emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg)); 1418 subreg2 = simplify_gen_subreg (V16QImode, tmp, V4SImode, 0); 1419 1420 for (i = 0; i < 16; ++i) 1421 perm[i] = GEN_INT (reorder[i]); 1422 1423 pcv = force_reg (V16QImode, 1424 gen_rtx_CONST_VECTOR (V16QImode, 1425 gen_rtvec_v (16, perm))); 1426 emit_insn (gen_altivec_vperm_v16qi_direct (operands[0], subreg2, 1427 subreg2, pcv)); 1428 DONE; 1429 } 1430}) 1431 1432(define_insn "*vsx_ld_elemrev_v16qi_internal" 1433 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") 1434 (vec_select:V16QI 1435 (match_operand:V16QI 1 "memory_operand" "Z") 1436 (parallel [(const_int 15) (const_int 14) 1437 (const_int 13) (const_int 12) 1438 (const_int 11) (const_int 10) 1439 (const_int 9) (const_int 8) 1440 (const_int 7) (const_int 6) 1441 (const_int 5) (const_int 4) 1442 (const_int 3) (const_int 2) 1443 (const_int 1) (const_int 0)])))] 1444 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" 1445 "lxvb16x %x0,%y1" 1446 [(set_attr "type" "vecload")]) 1447 1448(define_insn "vsx_st_elemrev_v1ti" 1449 [(set (match_operand:V1TI 0 "memory_operand" "=Z") 1450 (vec_select:V1TI 1451 (match_operand:V1TI 1 "vsx_register_operand" "+wa") 1452 (parallel [(const_int 0)]))) 1453 (clobber (match_dup 1))] 1454 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN" 1455{ 1456 return "xxpermdi %x1,%x1,%x1,2\;stxvd2x %x1,%y0"; 1457} 1458 [(set_attr "type" "vecstore")]) 1459 1460(define_insn "vsx_st_elemrev_v2df" 1461 [(set (match_operand:V2DF 0 "memory_operand" "=Z") 1462 (vec_select:V2DF 1463 (match_operand:V2DF 1 "vsx_register_operand" "wa") 1464 (parallel [(const_int 1) (const_int 0)])))] 1465 "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN" 1466 "stxvd2x %x1,%y0" 1467 [(set_attr "type" "vecstore")]) 1468 1469(define_insn "vsx_st_elemrev_v2di" 1470 [(set (match_operand:V2DI 0 "memory_operand" "=Z") 1471 (vec_select:V2DI 1472 (match_operand:V2DI 1 "vsx_register_operand" "wa") 1473 (parallel [(const_int 1) (const_int 0)])))] 1474 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN" 1475 "stxvd2x %x1,%y0" 1476 [(set_attr "type" "vecstore")]) 1477 1478(define_insn "vsx_st_elemrev_v4sf" 1479 [(set (match_operand:V4SF 0 "memory_operand" "=Z") 1480 (vec_select:V4SF 1481 (match_operand:V4SF 1 "vsx_register_operand" "wa") 1482 (parallel [(const_int 3) (const_int 2) 1483 (const_int 1) (const_int 0)])))] 1484 "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN" 1485 "stxvw4x %x1,%y0" 1486 [(set_attr "type" "vecstore")]) 1487 1488(define_insn "vsx_st_elemrev_v4si" 1489 [(set (match_operand:V4SI 0 "memory_operand" "=Z") 1490 (vec_select:V4SI 1491 (match_operand:V4SI 1 "vsx_register_operand" "wa") 1492 (parallel [(const_int 3) (const_int 2) 1493 (const_int 1) (const_int 0)])))] 1494 "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN" 1495 "stxvw4x %x1,%y0" 1496 [(set_attr "type" "vecstore")]) 1497 1498(define_expand "vsx_st_elemrev_v8hi" 1499 [(set (match_operand:V8HI 0 "memory_operand" "=Z") 1500 (vec_select:V8HI 1501 (match_operand:V8HI 1 "vsx_register_operand" "wa") 1502 (parallel [(const_int 7) (const_int 6) 1503 (const_int 5) (const_int 4) 1504 (const_int 3) (const_int 2) 1505 (const_int 1) (const_int 0)])))] 1506 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN" 1507{ 1508 if (!TARGET_P9_VECTOR) 1509 { 1510 rtx mem_subreg, subreg, perm[16], pcv; 1511 rtx tmp = gen_reg_rtx (V8HImode); 1512 /* 2 is leftmost element in register */ 1513 unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2}; 1514 int i; 1515 1516 for (i = 0; i < 16; ++i) 1517 perm[i] = GEN_INT (reorder[i]); 1518 1519 pcv = force_reg (V16QImode, 1520 gen_rtx_CONST_VECTOR (V16QImode, 1521 gen_rtvec_v (16, perm))); 1522 emit_insn (gen_altivec_vperm_v8hi_direct (tmp, operands[1], 1523 operands[1], pcv)); 1524 subreg = simplify_gen_subreg (V4SImode, tmp, V8HImode, 0); 1525 mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0); 1526 emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg)); 1527 DONE; 1528 } 1529}) 1530 1531(define_insn "*vsx_st_elemrev_v2di_internal" 1532 [(set (match_operand:V2DI 0 "memory_operand" "=Z") 1533 (vec_select:V2DI 1534 (match_operand:V2DI 1 "vsx_register_operand" "wa") 1535 (parallel [(const_int 1) (const_int 0)])))] 1536 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" 1537 "stxvd2x %x1,%y0" 1538 [(set_attr "type" "vecstore")]) 1539 1540(define_insn "*vsx_st_elemrev_v8hi_internal" 1541 [(set (match_operand:V8HI 0 "memory_operand" "=Z") 1542 (vec_select:V8HI 1543 (match_operand:V8HI 1 "vsx_register_operand" "wa") 1544 (parallel [(const_int 7) (const_int 6) 1545 (const_int 5) (const_int 4) 1546 (const_int 3) (const_int 2) 1547 (const_int 1) (const_int 0)])))] 1548 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" 1549 "stxvh8x %x1,%y0" 1550 [(set_attr "type" "vecstore")]) 1551 1552(define_expand "vsx_st_elemrev_v16qi" 1553 [(set (match_operand:V16QI 0 "memory_operand" "=Z") 1554 (vec_select:V16QI 1555 (match_operand:V16QI 1 "vsx_register_operand" "wa") 1556 (parallel [(const_int 15) (const_int 14) 1557 (const_int 13) (const_int 12) 1558 (const_int 11) (const_int 10) 1559 (const_int 9) (const_int 8) 1560 (const_int 7) (const_int 6) 1561 (const_int 5) (const_int 4) 1562 (const_int 3) (const_int 2) 1563 (const_int 1) (const_int 0)])))] 1564 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN" 1565{ 1566 if (!TARGET_P9_VECTOR) 1567 { 1568 rtx mem_subreg, subreg, perm[16], pcv; 1569 rtx tmp = gen_reg_rtx (V16QImode); 1570 /* 3 is leftmost element in register */ 1571 unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3}; 1572 int i; 1573 1574 for (i = 0; i < 16; ++i) 1575 perm[i] = GEN_INT (reorder[i]); 1576 1577 pcv = force_reg (V16QImode, 1578 gen_rtx_CONST_VECTOR (V16QImode, 1579 gen_rtvec_v (16, perm))); 1580 emit_insn (gen_altivec_vperm_v16qi_direct (tmp, operands[1], 1581 operands[1], pcv)); 1582 subreg = simplify_gen_subreg (V4SImode, tmp, V16QImode, 0); 1583 mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V16QImode, 0); 1584 emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg)); 1585 DONE; 1586 } 1587}) 1588 1589(define_insn "*vsx_st_elemrev_v16qi_internal" 1590 [(set (match_operand:V16QI 0 "memory_operand" "=Z") 1591 (vec_select:V16QI 1592 (match_operand:V16QI 1 "vsx_register_operand" "wa") 1593 (parallel [(const_int 15) (const_int 14) 1594 (const_int 13) (const_int 12) 1595 (const_int 11) (const_int 10) 1596 (const_int 9) (const_int 8) 1597 (const_int 7) (const_int 6) 1598 (const_int 5) (const_int 4) 1599 (const_int 3) (const_int 2) 1600 (const_int 1) (const_int 0)])))] 1601 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" 1602 "stxvb16x %x1,%y0" 1603 [(set_attr "type" "vecstore")]) 1604 1605 1606;; VSX vector floating point arithmetic instructions. The VSX scalar 1607;; instructions are now combined with the insn for the traditional floating 1608;; point unit. 1609(define_insn "*vsx_add<mode>3" 1610 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 1611 (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>") 1612 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))] 1613 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1614 "xvadd<VSs> %x0,%x1,%x2" 1615 [(set_attr "type" "<VStype_simple>") 1616 (set_attr "fp_type" "<VSfptype_simple>")]) 1617 1618(define_insn "*vsx_sub<mode>3" 1619 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 1620 (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>") 1621 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))] 1622 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1623 "xvsub<VSs> %x0,%x1,%x2" 1624 [(set_attr "type" "<VStype_simple>") 1625 (set_attr "fp_type" "<VSfptype_simple>")]) 1626 1627(define_insn "*vsx_mul<mode>3" 1628 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 1629 (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>") 1630 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))] 1631 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1632 "xvmul<VSs> %x0,%x1,%x2" 1633 [(set_attr "type" "<VStype_simple>") 1634 (set_attr "fp_type" "<VSfptype_mul>")]) 1635 1636; Emulate vector with scalar for vec_mul in V2DImode 1637(define_insn_and_split "vsx_mul_v2di" 1638 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") 1639 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa") 1640 (match_operand:V2DI 2 "vsx_register_operand" "wa")] 1641 UNSPEC_VSX_MULSD))] 1642 "VECTOR_MEM_VSX_P (V2DImode)" 1643 "#" 1644 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed" 1645 [(const_int 0)] 1646{ 1647 rtx op0 = operands[0]; 1648 rtx op1 = operands[1]; 1649 rtx op2 = operands[2]; 1650 rtx op3 = gen_reg_rtx (DImode); 1651 rtx op4 = gen_reg_rtx (DImode); 1652 rtx op5 = gen_reg_rtx (DImode); 1653 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0))); 1654 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0))); 1655 if (TARGET_POWERPC64) 1656 emit_insn (gen_muldi3 (op5, op3, op4)); 1657 else 1658 { 1659 rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false); 1660 emit_move_insn (op5, ret); 1661 } 1662 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1))); 1663 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1))); 1664 if (TARGET_POWERPC64) 1665 emit_insn (gen_muldi3 (op3, op3, op4)); 1666 else 1667 { 1668 rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false); 1669 emit_move_insn (op3, ret); 1670 } 1671 emit_insn (gen_vsx_concat_v2di (op0, op5, op3)); 1672 DONE; 1673} 1674 [(set_attr "type" "mul")]) 1675 1676(define_insn "*vsx_div<mode>3" 1677 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 1678 (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>") 1679 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))] 1680 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1681 "xvdiv<VSs> %x0,%x1,%x2" 1682 [(set_attr "type" "<VStype_div>") 1683 (set_attr "fp_type" "<VSfptype_div>")]) 1684 1685; Emulate vector with scalar for vec_div in V2DImode 1686(define_insn_and_split "vsx_div_v2di" 1687 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") 1688 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa") 1689 (match_operand:V2DI 2 "vsx_register_operand" "wa")] 1690 UNSPEC_VSX_DIVSD))] 1691 "VECTOR_MEM_VSX_P (V2DImode)" 1692 "#" 1693 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed" 1694 [(const_int 0)] 1695{ 1696 rtx op0 = operands[0]; 1697 rtx op1 = operands[1]; 1698 rtx op2 = operands[2]; 1699 rtx op3 = gen_reg_rtx (DImode); 1700 rtx op4 = gen_reg_rtx (DImode); 1701 rtx op5 = gen_reg_rtx (DImode); 1702 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0))); 1703 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0))); 1704 if (TARGET_POWERPC64) 1705 emit_insn (gen_divdi3 (op5, op3, op4)); 1706 else 1707 { 1708 rtx libfunc = optab_libfunc (sdiv_optab, DImode); 1709 rtx target = emit_library_call_value (libfunc, 1710 op5, LCT_NORMAL, DImode, 1711 op3, DImode, 1712 op4, DImode); 1713 emit_move_insn (op5, target); 1714 } 1715 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1))); 1716 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1))); 1717 if (TARGET_POWERPC64) 1718 emit_insn (gen_divdi3 (op3, op3, op4)); 1719 else 1720 { 1721 rtx libfunc = optab_libfunc (sdiv_optab, DImode); 1722 rtx target = emit_library_call_value (libfunc, 1723 op3, LCT_NORMAL, DImode, 1724 op3, DImode, 1725 op4, DImode); 1726 emit_move_insn (op3, target); 1727 } 1728 emit_insn (gen_vsx_concat_v2di (op0, op5, op3)); 1729 DONE; 1730} 1731 [(set_attr "type" "div")]) 1732 1733(define_insn_and_split "vsx_udiv_v2di" 1734 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") 1735 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa") 1736 (match_operand:V2DI 2 "vsx_register_operand" "wa")] 1737 UNSPEC_VSX_DIVUD))] 1738 "VECTOR_MEM_VSX_P (V2DImode)" 1739 "#" 1740 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed" 1741 [(const_int 0)] 1742{ 1743 rtx op0 = operands[0]; 1744 rtx op1 = operands[1]; 1745 rtx op2 = operands[2]; 1746 rtx op3 = gen_reg_rtx (DImode); 1747 rtx op4 = gen_reg_rtx (DImode); 1748 rtx op5 = gen_reg_rtx (DImode); 1749 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0))); 1750 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0))); 1751 if (TARGET_POWERPC64) 1752 emit_insn (gen_udivdi3 (op5, op3, op4)); 1753 else 1754 { 1755 rtx libfunc = optab_libfunc (udiv_optab, DImode); 1756 rtx target = emit_library_call_value (libfunc, 1757 op5, LCT_NORMAL, DImode, 1758 op3, DImode, 1759 op4, DImode); 1760 emit_move_insn (op5, target); 1761 } 1762 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1))); 1763 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1))); 1764 if (TARGET_POWERPC64) 1765 emit_insn (gen_udivdi3 (op3, op3, op4)); 1766 else 1767 { 1768 rtx libfunc = optab_libfunc (udiv_optab, DImode); 1769 rtx target = emit_library_call_value (libfunc, 1770 op3, LCT_NORMAL, DImode, 1771 op3, DImode, 1772 op4, DImode); 1773 emit_move_insn (op3, target); 1774 } 1775 emit_insn (gen_vsx_concat_v2di (op0, op5, op3)); 1776 DONE; 1777} 1778 [(set_attr "type" "div")]) 1779 1780;; *tdiv* instruction returning the FG flag 1781(define_expand "vsx_tdiv<mode>3_fg" 1782 [(set (match_dup 3) 1783 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand") 1784 (match_operand:VSX_B 2 "vsx_register_operand")] 1785 UNSPEC_VSX_TDIV)) 1786 (set (match_operand:SI 0 "gpc_reg_operand") 1787 (gt:SI (match_dup 3) 1788 (const_int 0)))] 1789 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1790{ 1791 operands[3] = gen_reg_rtx (CCFPmode); 1792}) 1793 1794;; *tdiv* instruction returning the FE flag 1795(define_expand "vsx_tdiv<mode>3_fe" 1796 [(set (match_dup 3) 1797 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand") 1798 (match_operand:VSX_B 2 "vsx_register_operand")] 1799 UNSPEC_VSX_TDIV)) 1800 (set (match_operand:SI 0 "gpc_reg_operand") 1801 (eq:SI (match_dup 3) 1802 (const_int 0)))] 1803 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1804{ 1805 operands[3] = gen_reg_rtx (CCFPmode); 1806}) 1807 1808(define_insn "*vsx_tdiv<mode>3_internal" 1809 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x") 1810 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>") 1811 (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,<VSa>")] 1812 UNSPEC_VSX_TDIV))] 1813 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1814 "x<VSv>tdiv<VSs> %0,%x1,%x2" 1815 [(set_attr "type" "<VStype_simple>") 1816 (set_attr "fp_type" "<VSfptype_simple>")]) 1817 1818(define_insn "vsx_fre<mode>2" 1819 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 1820 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")] 1821 UNSPEC_FRES))] 1822 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1823 "xvre<VSs> %x0,%x1" 1824 [(set_attr "type" "<VStype_simple>") 1825 (set_attr "fp_type" "<VSfptype_simple>")]) 1826 1827(define_insn "*vsx_neg<mode>2" 1828 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 1829 (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))] 1830 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1831 "xvneg<VSs> %x0,%x1" 1832 [(set_attr "type" "<VStype_simple>") 1833 (set_attr "fp_type" "<VSfptype_simple>")]) 1834 1835(define_insn "*vsx_abs<mode>2" 1836 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 1837 (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))] 1838 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1839 "xvabs<VSs> %x0,%x1" 1840 [(set_attr "type" "<VStype_simple>") 1841 (set_attr "fp_type" "<VSfptype_simple>")]) 1842 1843(define_insn "vsx_nabs<mode>2" 1844 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 1845 (neg:VSX_F 1846 (abs:VSX_F 1847 (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>"))))] 1848 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1849 "xvnabs<VSs> %x0,%x1" 1850 [(set_attr "type" "<VStype_simple>") 1851 (set_attr "fp_type" "<VSfptype_simple>")]) 1852 1853(define_insn "vsx_smax<mode>3" 1854 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 1855 (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>") 1856 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))] 1857 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1858 "xvmax<VSs> %x0,%x1,%x2" 1859 [(set_attr "type" "<VStype_simple>") 1860 (set_attr "fp_type" "<VSfptype_simple>")]) 1861 1862(define_insn "*vsx_smin<mode>3" 1863 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 1864 (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>") 1865 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))] 1866 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1867 "xvmin<VSs> %x0,%x1,%x2" 1868 [(set_attr "type" "<VStype_simple>") 1869 (set_attr "fp_type" "<VSfptype_simple>")]) 1870 1871(define_insn "*vsx_sqrt<mode>2" 1872 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 1873 (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))] 1874 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1875 "xvsqrt<VSs> %x0,%x1" 1876 [(set_attr "type" "<VStype_sqrt>") 1877 (set_attr "fp_type" "<VSfptype_sqrt>")]) 1878 1879(define_insn "*vsx_rsqrte<mode>2" 1880 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 1881 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")] 1882 UNSPEC_RSQRT))] 1883 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1884 "xvrsqrte<VSs> %x0,%x1" 1885 [(set_attr "type" "<VStype_simple>") 1886 (set_attr "fp_type" "<VSfptype_simple>")]) 1887 1888;; *tsqrt* returning the fg flag 1889(define_expand "vsx_tsqrt<mode>2_fg" 1890 [(set (match_dup 2) 1891 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")] 1892 UNSPEC_VSX_TSQRT)) 1893 (set (match_operand:SI 0 "gpc_reg_operand") 1894 (gt:SI (match_dup 2) 1895 (const_int 0)))] 1896 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1897{ 1898 operands[2] = gen_reg_rtx (CCFPmode); 1899}) 1900 1901;; *tsqrt* returning the fe flag 1902(define_expand "vsx_tsqrt<mode>2_fe" 1903 [(set (match_dup 2) 1904 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")] 1905 UNSPEC_VSX_TSQRT)) 1906 (set (match_operand:SI 0 "gpc_reg_operand") 1907 (eq:SI (match_dup 2) 1908 (const_int 0)))] 1909 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1910{ 1911 operands[2] = gen_reg_rtx (CCFPmode); 1912}) 1913 1914(define_insn "*vsx_tsqrt<mode>2_internal" 1915 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x") 1916 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")] 1917 UNSPEC_VSX_TSQRT))] 1918 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1919 "x<VSv>tsqrt<VSs> %0,%x1" 1920 [(set_attr "type" "<VStype_simple>") 1921 (set_attr "fp_type" "<VSfptype_simple>")]) 1922 1923;; Fused vector multiply/add instructions. Support the classical Altivec 1924;; versions of fma, which allows the target to be a separate register from the 1925;; 3 inputs. Under VSX, the target must be either the addend or the first 1926;; multiply. 1927 1928(define_insn "*vsx_fmav4sf4" 1929 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v") 1930 (fma:V4SF 1931 (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v") 1932 (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v") 1933 (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))] 1934 "VECTOR_UNIT_VSX_P (V4SFmode)" 1935 "@ 1936 xvmaddasp %x0,%x1,%x2 1937 xvmaddmsp %x0,%x1,%x3 1938 xvmaddasp %x0,%x1,%x2 1939 xvmaddmsp %x0,%x1,%x3 1940 vmaddfp %0,%1,%2,%3" 1941 [(set_attr "type" "vecfloat")]) 1942 1943(define_insn "*vsx_fmav2df4" 1944 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa") 1945 (fma:V2DF 1946 (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa") 1947 (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0") 1948 (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))] 1949 "VECTOR_UNIT_VSX_P (V2DFmode)" 1950 "@ 1951 xvmaddadp %x0,%x1,%x2 1952 xvmaddmdp %x0,%x1,%x3 1953 xvmaddadp %x0,%x1,%x2 1954 xvmaddmdp %x0,%x1,%x3" 1955 [(set_attr "type" "vecdouble")]) 1956 1957(define_insn "*vsx_fms<mode>4" 1958 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>") 1959 (fma:VSX_F 1960 (match_operand:VSX_F 1 "vsx_register_operand" "%<VSr>,<VSr>,<VSa>,<VSa>") 1961 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0") 1962 (neg:VSX_F 1963 (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))] 1964 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1965 "@ 1966 xvmsuba<VSs> %x0,%x1,%x2 1967 xvmsubm<VSs> %x0,%x1,%x3 1968 xvmsuba<VSs> %x0,%x1,%x2 1969 xvmsubm<VSs> %x0,%x1,%x3" 1970 [(set_attr "type" "<VStype_mul>")]) 1971 1972(define_insn "*vsx_nfma<mode>4" 1973 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>") 1974 (neg:VSX_F 1975 (fma:VSX_F 1976 (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSr>,<VSa>,<VSa>") 1977 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0") 1978 (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))] 1979 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1980 "@ 1981 xvnmadda<VSs> %x0,%x1,%x2 1982 xvnmaddm<VSs> %x0,%x1,%x3 1983 xvnmadda<VSs> %x0,%x1,%x2 1984 xvnmaddm<VSs> %x0,%x1,%x3" 1985 [(set_attr "type" "<VStype_mul>") 1986 (set_attr "fp_type" "<VSfptype_mul>")]) 1987 1988(define_insn "*vsx_nfmsv4sf4" 1989 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v") 1990 (neg:V4SF 1991 (fma:V4SF 1992 (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v") 1993 (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v") 1994 (neg:V4SF 1995 (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))))] 1996 "VECTOR_UNIT_VSX_P (V4SFmode)" 1997 "@ 1998 xvnmsubasp %x0,%x1,%x2 1999 xvnmsubmsp %x0,%x1,%x3 2000 xvnmsubasp %x0,%x1,%x2 2001 xvnmsubmsp %x0,%x1,%x3 2002 vnmsubfp %0,%1,%2,%3" 2003 [(set_attr "type" "vecfloat")]) 2004 2005(define_insn "*vsx_nfmsv2df4" 2006 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa") 2007 (neg:V2DF 2008 (fma:V2DF 2009 (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa") 2010 (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0") 2011 (neg:V2DF 2012 (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))))] 2013 "VECTOR_UNIT_VSX_P (V2DFmode)" 2014 "@ 2015 xvnmsubadp %x0,%x1,%x2 2016 xvnmsubmdp %x0,%x1,%x3 2017 xvnmsubadp %x0,%x1,%x2 2018 xvnmsubmdp %x0,%x1,%x3" 2019 [(set_attr "type" "vecdouble")]) 2020 2021;; Vector conditional expressions (no scalar version for these instructions) 2022(define_insn "vsx_eq<mode>" 2023 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 2024 (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>") 2025 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))] 2026 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2027 "xvcmpeq<VSs> %x0,%x1,%x2" 2028 [(set_attr "type" "<VStype_simple>") 2029 (set_attr "fp_type" "<VSfptype_simple>")]) 2030 2031(define_insn "vsx_gt<mode>" 2032 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 2033 (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>") 2034 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))] 2035 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2036 "xvcmpgt<VSs> %x0,%x1,%x2" 2037 [(set_attr "type" "<VStype_simple>") 2038 (set_attr "fp_type" "<VSfptype_simple>")]) 2039 2040(define_insn "*vsx_ge<mode>" 2041 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 2042 (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>") 2043 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))] 2044 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2045 "xvcmpge<VSs> %x0,%x1,%x2" 2046 [(set_attr "type" "<VStype_simple>") 2047 (set_attr "fp_type" "<VSfptype_simple>")]) 2048 2049;; Compare vectors producing a vector result and a predicate, setting CR6 to 2050;; indicate a combined status 2051(define_insn "*vsx_eq_<mode>_p" 2052 [(set (reg:CC CR6_REGNO) 2053 (unspec:CC 2054 [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>") 2055 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))] 2056 UNSPEC_PREDICATE)) 2057 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 2058 (eq:VSX_F (match_dup 1) 2059 (match_dup 2)))] 2060 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2061 "xvcmpeq<VSs>. %x0,%x1,%x2" 2062 [(set_attr "type" "<VStype_simple>")]) 2063 2064(define_insn "*vsx_gt_<mode>_p" 2065 [(set (reg:CC CR6_REGNO) 2066 (unspec:CC 2067 [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>") 2068 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))] 2069 UNSPEC_PREDICATE)) 2070 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 2071 (gt:VSX_F (match_dup 1) 2072 (match_dup 2)))] 2073 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2074 "xvcmpgt<VSs>. %x0,%x1,%x2" 2075 [(set_attr "type" "<VStype_simple>")]) 2076 2077(define_insn "*vsx_ge_<mode>_p" 2078 [(set (reg:CC CR6_REGNO) 2079 (unspec:CC 2080 [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>") 2081 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))] 2082 UNSPEC_PREDICATE)) 2083 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 2084 (ge:VSX_F (match_dup 1) 2085 (match_dup 2)))] 2086 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2087 "xvcmpge<VSs>. %x0,%x1,%x2" 2088 [(set_attr "type" "<VStype_simple>")]) 2089 2090;; Vector select 2091(define_insn "*vsx_xxsel<mode>" 2092 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>") 2093 (if_then_else:VSX_L 2094 (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>") 2095 (match_operand:VSX_L 4 "zero_constant" "")) 2096 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>") 2097 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))] 2098 "VECTOR_MEM_VSX_P (<MODE>mode)" 2099 "xxsel %x0,%x3,%x2,%x1" 2100 [(set_attr "type" "vecmove")]) 2101 2102(define_insn "*vsx_xxsel<mode>_uns" 2103 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>") 2104 (if_then_else:VSX_L 2105 (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>") 2106 (match_operand:VSX_L 4 "zero_constant" "")) 2107 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>") 2108 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))] 2109 "VECTOR_MEM_VSX_P (<MODE>mode)" 2110 "xxsel %x0,%x3,%x2,%x1" 2111 [(set_attr "type" "vecmove")]) 2112 2113;; Copy sign 2114(define_insn "vsx_copysign<mode>3" 2115 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 2116 (unspec:VSX_F 2117 [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>") 2118 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")] 2119 UNSPEC_COPYSIGN))] 2120 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2121 "xvcpsgn<VSs> %x0,%x2,%x1" 2122 [(set_attr "type" "<VStype_simple>") 2123 (set_attr "fp_type" "<VSfptype_simple>")]) 2124 2125;; For the conversions, limit the register class for the integer value to be 2126;; the fprs because we don't want to add the altivec registers to movdi/movsi. 2127;; For the unsigned tests, there isn't a generic double -> unsigned conversion 2128;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX. 2129;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md 2130;; in allowing virtual registers. 2131(define_insn "vsx_float<VSi><mode>2" 2132 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>") 2133 (float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))] 2134 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2135 "xvcvsx<VSc><VSs> %x0,%x1" 2136 [(set_attr "type" "<VStype_simple>") 2137 (set_attr "fp_type" "<VSfptype_simple>")]) 2138 2139(define_insn "vsx_floatuns<VSi><mode>2" 2140 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>") 2141 (unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))] 2142 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2143 "xvcvux<VSc><VSs> %x0,%x1" 2144 [(set_attr "type" "<VStype_simple>") 2145 (set_attr "fp_type" "<VSfptype_simple>")]) 2146 2147(define_insn "vsx_fix_trunc<mode><VSi>2" 2148 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>") 2149 (fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))] 2150 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2151 "x<VSv>cv<VSs>sx<VSc>s %x0,%x1" 2152 [(set_attr "type" "<VStype_simple>") 2153 (set_attr "fp_type" "<VSfptype_simple>")]) 2154 2155(define_insn "vsx_fixuns_trunc<mode><VSi>2" 2156 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>") 2157 (unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))] 2158 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2159 "x<VSv>cv<VSs>ux<VSc>s %x0,%x1" 2160 [(set_attr "type" "<VStype_simple>") 2161 (set_attr "fp_type" "<VSfptype_simple>")]) 2162 2163;; Math rounding functions 2164(define_insn "vsx_x<VSv>r<VSs>i" 2165 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>") 2166 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")] 2167 UNSPEC_VSX_ROUND_I))] 2168 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2169 "x<VSv>r<VSs>i %x0,%x1" 2170 [(set_attr "type" "<VStype_simple>") 2171 (set_attr "fp_type" "<VSfptype_simple>")]) 2172 2173(define_insn "vsx_x<VSv>r<VSs>ic" 2174 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>") 2175 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")] 2176 UNSPEC_VSX_ROUND_IC))] 2177 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2178 "x<VSv>r<VSs>ic %x0,%x1" 2179 [(set_attr "type" "<VStype_simple>") 2180 (set_attr "fp_type" "<VSfptype_simple>")]) 2181 2182(define_insn "vsx_btrunc<mode>2" 2183 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 2184 (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))] 2185 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2186 "xvr<VSs>iz %x0,%x1" 2187 [(set_attr "type" "<VStype_simple>") 2188 (set_attr "fp_type" "<VSfptype_simple>")]) 2189 2190(define_insn "*vsx_b2trunc<mode>2" 2191 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>") 2192 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")] 2193 UNSPEC_FRIZ))] 2194 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2195 "x<VSv>r<VSs>iz %x0,%x1" 2196 [(set_attr "type" "<VStype_simple>") 2197 (set_attr "fp_type" "<VSfptype_simple>")]) 2198 2199(define_insn "vsx_floor<mode>2" 2200 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 2201 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")] 2202 UNSPEC_FRIM))] 2203 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2204 "xvr<VSs>im %x0,%x1" 2205 [(set_attr "type" "<VStype_simple>") 2206 (set_attr "fp_type" "<VSfptype_simple>")]) 2207 2208(define_insn "vsx_ceil<mode>2" 2209 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 2210 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")] 2211 UNSPEC_FRIP))] 2212 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2213 "xvr<VSs>ip %x0,%x1" 2214 [(set_attr "type" "<VStype_simple>") 2215 (set_attr "fp_type" "<VSfptype_simple>")]) 2216 2217 2218;; VSX convert to/from double vector 2219 2220;; Convert between single and double precision 2221;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal 2222;; scalar single precision instructions internally use the double format. 2223;; Prefer the altivec registers, since we likely will need to do a vperm 2224(define_insn "vsx_<VS_spdp_insn>" 2225 [(set (match_operand:<VS_spdp_res> 0 "vsx_register_operand" "=<VSr4>,?<VSa>") 2226 (unspec:<VS_spdp_res> [(match_operand:VSX_SPDP 1 "vsx_register_operand" "<VSr5>,<VSa>")] 2227 UNSPEC_VSX_CVSPDP))] 2228 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2229 "<VS_spdp_insn> %x0,%x1" 2230 [(set_attr "type" "<VS_spdp_type>")]) 2231 2232;; xscvspdp, represent the scalar SF type as V4SF 2233(define_insn "vsx_xscvspdp" 2234 [(set (match_operand:DF 0 "vsx_register_operand" "=ws") 2235 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")] 2236 UNSPEC_VSX_CVSPDP))] 2237 "VECTOR_UNIT_VSX_P (V4SFmode)" 2238 "xscvspdp %x0,%x1" 2239 [(set_attr "type" "fp")]) 2240 2241;; Same as vsx_xscvspdp, but use SF as the type 2242(define_insn "vsx_xscvspdp_scalar2" 2243 [(set (match_operand:SF 0 "vsx_register_operand" "=ww") 2244 (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")] 2245 UNSPEC_VSX_CVSPDP))] 2246 "VECTOR_UNIT_VSX_P (V4SFmode)" 2247 "xscvspdp %x0,%x1" 2248 [(set_attr "type" "fp")]) 2249 2250;; Generate xvcvhpsp instruction 2251(define_insn "vsx_xvcvhpsp" 2252 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") 2253 (unspec:V4SF [(match_operand: V16QI 1 "vsx_register_operand" "wa")] 2254 UNSPEC_VSX_CVHPSP))] 2255 "TARGET_P9_VECTOR" 2256 "xvcvhpsp %x0,%x1" 2257 [(set_attr "type" "vecfloat")]) 2258 2259;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF 2260;; format of scalars is actually DF. 2261(define_insn "vsx_xscvdpsp_scalar" 2262 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") 2263 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "ww")] 2264 UNSPEC_VSX_CVSPDP))] 2265 "VECTOR_UNIT_VSX_P (V4SFmode)" 2266 "xscvdpsp %x0,%x1" 2267 [(set_attr "type" "fp")]) 2268 2269;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs 2270(define_insn "vsx_xscvdpspn" 2271 [(set (match_operand:V4SF 0 "vsx_register_operand" "=ww") 2272 (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "ws")] 2273 UNSPEC_VSX_CVDPSPN))] 2274 "TARGET_XSCVDPSPN" 2275 "xscvdpspn %x0,%x1" 2276 [(set_attr "type" "fp")]) 2277 2278(define_insn "vsx_xscvspdpn" 2279 [(set (match_operand:DF 0 "vsx_register_operand" "=ws") 2280 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")] 2281 UNSPEC_VSX_CVSPDPN))] 2282 "TARGET_XSCVSPDPN" 2283 "xscvspdpn %x0,%x1" 2284 [(set_attr "type" "fp")]) 2285 2286(define_insn "vsx_xscvdpspn_scalar" 2287 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") 2288 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "ww")] 2289 UNSPEC_VSX_CVDPSPN))] 2290 "TARGET_XSCVDPSPN" 2291 "xscvdpspn %x0,%x1" 2292 [(set_attr "type" "fp")]) 2293 2294;; Used by direct move to move a SFmode value from GPR to VSX register 2295(define_insn "vsx_xscvspdpn_directmove" 2296 [(set (match_operand:SF 0 "vsx_register_operand" "=wa") 2297 (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")] 2298 UNSPEC_VSX_CVSPDPN))] 2299 "TARGET_XSCVSPDPN" 2300 "xscvspdpn %x0,%x1" 2301 [(set_attr "type" "fp")]) 2302 2303;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long) 2304 2305(define_expand "vsx_xvcvsxddp_scale" 2306 [(match_operand:V2DF 0 "vsx_register_operand") 2307 (match_operand:V2DI 1 "vsx_register_operand") 2308 (match_operand:QI 2 "immediate_operand")] 2309 "VECTOR_UNIT_VSX_P (V2DFmode)" 2310{ 2311 rtx op0 = operands[0]; 2312 rtx op1 = operands[1]; 2313 int scale = INTVAL(operands[2]); 2314 emit_insn (gen_vsx_xvcvsxddp (op0, op1)); 2315 if (scale != 0) 2316 rs6000_scale_v2df (op0, op0, -scale); 2317 DONE; 2318}) 2319 2320(define_insn "vsx_xvcvsxddp" 2321 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa") 2322 (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")] 2323 UNSPEC_VSX_XVCVSXDDP))] 2324 "VECTOR_UNIT_VSX_P (V2DFmode)" 2325 "xvcvsxddp %x0,%x1" 2326 [(set_attr "type" "vecdouble")]) 2327 2328(define_expand "vsx_xvcvuxddp_scale" 2329 [(match_operand:V2DF 0 "vsx_register_operand") 2330 (match_operand:V2DI 1 "vsx_register_operand") 2331 (match_operand:QI 2 "immediate_operand")] 2332 "VECTOR_UNIT_VSX_P (V2DFmode)" 2333{ 2334 rtx op0 = operands[0]; 2335 rtx op1 = operands[1]; 2336 int scale = INTVAL(operands[2]); 2337 emit_insn (gen_vsx_xvcvuxddp (op0, op1)); 2338 if (scale != 0) 2339 rs6000_scale_v2df (op0, op0, -scale); 2340 DONE; 2341}) 2342 2343(define_insn "vsx_xvcvuxddp" 2344 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa") 2345 (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")] 2346 UNSPEC_VSX_XVCVUXDDP))] 2347 "VECTOR_UNIT_VSX_P (V2DFmode)" 2348 "xvcvuxddp %x0,%x1" 2349 [(set_attr "type" "vecdouble")]) 2350 2351(define_expand "vsx_xvcvdpsxds_scale" 2352 [(match_operand:V2DI 0 "vsx_register_operand") 2353 (match_operand:V2DF 1 "vsx_register_operand") 2354 (match_operand:QI 2 "immediate_operand")] 2355 "VECTOR_UNIT_VSX_P (V2DFmode)" 2356{ 2357 rtx op0 = operands[0]; 2358 rtx op1 = operands[1]; 2359 rtx tmp; 2360 int scale = INTVAL (operands[2]); 2361 if (scale == 0) 2362 tmp = op1; 2363 else 2364 { 2365 tmp = gen_reg_rtx (V2DFmode); 2366 rs6000_scale_v2df (tmp, op1, scale); 2367 } 2368 emit_insn (gen_vsx_xvcvdpsxds (op0, tmp)); 2369 DONE; 2370}) 2371 2372;; convert vector of 64-bit floating point numbers to vector of 2373;; 64-bit signed integer 2374(define_insn "vsx_xvcvdpsxds" 2375 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") 2376 (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")] 2377 UNSPEC_VSX_XVCVDPSXDS))] 2378 "VECTOR_UNIT_VSX_P (V2DFmode)" 2379 "xvcvdpsxds %x0,%x1" 2380 [(set_attr "type" "vecdouble")]) 2381 2382;; convert vector of 32-bit floating point numbers to vector of 2383;; 32-bit signed integer 2384(define_insn "vsx_xvcvspsxws" 2385 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa") 2386 (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")] 2387 UNSPEC_VSX_XVCVSPSXWS))] 2388 "VECTOR_UNIT_VSX_P (V4SFmode)" 2389 "xvcvspsxws %x0,%x1" 2390 [(set_attr "type" "vecfloat")]) 2391 2392;; convert vector of 64-bit floating point numbers to vector of 2393;; 64-bit unsigned integer 2394(define_expand "vsx_xvcvdpuxds_scale" 2395 [(match_operand:V2DI 0 "vsx_register_operand") 2396 (match_operand:V2DF 1 "vsx_register_operand") 2397 (match_operand:QI 2 "immediate_operand")] 2398 "VECTOR_UNIT_VSX_P (V2DFmode)" 2399{ 2400 rtx op0 = operands[0]; 2401 rtx op1 = operands[1]; 2402 rtx tmp; 2403 int scale = INTVAL (operands[2]); 2404 if (scale == 0) 2405 tmp = op1; 2406 else 2407 { 2408 tmp = gen_reg_rtx (V2DFmode); 2409 rs6000_scale_v2df (tmp, op1, scale); 2410 } 2411 emit_insn (gen_vsx_xvcvdpuxds (op0, tmp)); 2412 DONE; 2413}) 2414 2415;; convert vector of 32-bit floating point numbers to vector of 2416;; 32-bit unsigned integer 2417(define_insn "vsx_xvcvspuxws" 2418 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa") 2419 (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")] 2420 UNSPEC_VSX_XVCVSPSXWS))] 2421 "VECTOR_UNIT_VSX_P (V4SFmode)" 2422 "xvcvspuxws %x0,%x1" 2423 [(set_attr "type" "vecfloat")]) 2424 2425(define_insn "vsx_xvcvdpuxds" 2426 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") 2427 (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")] 2428 UNSPEC_VSX_XVCVDPUXDS))] 2429 "VECTOR_UNIT_VSX_P (V2DFmode)" 2430 "xvcvdpuxds %x0,%x1" 2431 [(set_attr "type" "vecdouble")]) 2432 2433;; Convert from 64-bit to 32-bit types 2434;; Note, favor the Altivec registers since the usual use of these instructions 2435;; is in vector converts and we need to use the Altivec vperm instruction. 2436 2437(define_insn "vsx_xvcvdpsxws" 2438 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa") 2439 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")] 2440 UNSPEC_VSX_CVDPSXWS))] 2441 "VECTOR_UNIT_VSX_P (V2DFmode)" 2442 "xvcvdpsxws %x0,%x1" 2443 [(set_attr "type" "vecdouble")]) 2444 2445(define_insn "vsx_xvcvdpuxws" 2446 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa") 2447 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")] 2448 UNSPEC_VSX_CVDPUXWS))] 2449 "VECTOR_UNIT_VSX_P (V2DFmode)" 2450 "xvcvdpuxws %x0,%x1" 2451 [(set_attr "type" "vecdouble")]) 2452 2453(define_insn "vsx_xvcvsxdsp" 2454 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa") 2455 (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wf,wa")] 2456 UNSPEC_VSX_CVSXDSP))] 2457 "VECTOR_UNIT_VSX_P (V2DFmode)" 2458 "xvcvsxdsp %x0,%x1" 2459 [(set_attr "type" "vecfloat")]) 2460 2461(define_insn "vsx_xvcvuxdsp" 2462 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa") 2463 (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wf,wa")] 2464 UNSPEC_VSX_CVUXDSP))] 2465 "VECTOR_UNIT_VSX_P (V2DFmode)" 2466 "xvcvuxdsp %x0,%x1" 2467 [(set_attr "type" "vecdouble")]) 2468 2469(define_insn "vsx_xvcdpsp" 2470 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa") 2471 (unspec:V4SF [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")] 2472 UNSPEC_VSX_XVCDPSP))] 2473 "VECTOR_UNIT_VSX_P (V2DFmode)" 2474 "xvcvdpsp %x0,%x1" 2475 [(set_attr "type" "vecdouble")]) 2476 2477;; Convert from 32-bit to 64-bit types 2478;; Provide both vector and scalar targets 2479(define_insn "vsx_xvcvsxwdp" 2480 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa") 2481 (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")] 2482 UNSPEC_VSX_CVSXWDP))] 2483 "VECTOR_UNIT_VSX_P (V2DFmode)" 2484 "xvcvsxwdp %x0,%x1" 2485 [(set_attr "type" "vecdouble")]) 2486 2487(define_insn "vsx_xvcvsxwdp_df" 2488 [(set (match_operand:DF 0 "vsx_register_operand" "=ws") 2489 (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")] 2490 UNSPEC_VSX_CVSXWDP))] 2491 "TARGET_VSX" 2492 "xvcvsxwdp %x0,%x1" 2493 [(set_attr "type" "vecdouble")]) 2494 2495(define_insn "vsx_xvcvuxwdp" 2496 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa") 2497 (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")] 2498 UNSPEC_VSX_CVUXWDP))] 2499 "VECTOR_UNIT_VSX_P (V2DFmode)" 2500 "xvcvuxwdp %x0,%x1" 2501 [(set_attr "type" "vecdouble")]) 2502 2503(define_insn "vsx_xvcvuxwdp_df" 2504 [(set (match_operand:DF 0 "vsx_register_operand" "=ws") 2505 (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")] 2506 UNSPEC_VSX_CVUXWDP))] 2507 "TARGET_VSX" 2508 "xvcvuxwdp %x0,%x1" 2509 [(set_attr "type" "vecdouble")]) 2510 2511(define_insn "vsx_xvcvspsxds" 2512 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa") 2513 (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")] 2514 UNSPEC_VSX_CVSPSXDS))] 2515 "VECTOR_UNIT_VSX_P (V2DFmode)" 2516 "xvcvspsxds %x0,%x1" 2517 [(set_attr "type" "vecdouble")]) 2518 2519(define_insn "vsx_xvcvspuxds" 2520 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa") 2521 (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")] 2522 UNSPEC_VSX_CVSPUXDS))] 2523 "VECTOR_UNIT_VSX_P (V2DFmode)" 2524 "xvcvspuxds %x0,%x1" 2525 [(set_attr "type" "vecdouble")]) 2526 2527(define_insn "vsx_xvcvsxwsp" 2528 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") 2529 (unspec:V4SF [(match_operand:V4SI 1 "vsx_register_operand" "wa")] 2530 UNSPEC_VSX_CVSXWSP))] 2531 "VECTOR_UNIT_VSX_P (V4SFmode)" 2532 "xvcvsxwsp %x0,%x1" 2533 [(set_attr "type" "vecfloat")]) 2534 2535(define_insn "vsx_xvcvuxwsp" 2536 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") 2537 (unspec:V4SF[(match_operand:V4SI 1 "vsx_register_operand" "wa")] 2538 UNSPEC_VSX_CVUXWSP))] 2539 "VECTOR_UNIT_VSX_P (V4SFmode)" 2540 "xvcvuxwsp %x0,%x1" 2541 [(set_attr "type" "vecfloat")]) 2542 2543;; Generate float2 double 2544;; convert two double to float 2545(define_expand "float2_v2df" 2546 [(use (match_operand:V4SF 0 "register_operand" "=wa")) 2547 (use (match_operand:V2DF 1 "register_operand" "wa")) 2548 (use (match_operand:V2DF 2 "register_operand" "wa"))] 2549 "VECTOR_UNIT_VSX_P (V4SFmode)" 2550{ 2551 rtx rtx_src1, rtx_src2, rtx_dst; 2552 2553 rtx_dst = operands[0]; 2554 rtx_src1 = operands[1]; 2555 rtx_src2 = operands[2]; 2556 2557 rs6000_generate_float2_double_code (rtx_dst, rtx_src1, rtx_src2); 2558 DONE; 2559}) 2560 2561;; Generate float2 2562;; convert two long long signed ints to float 2563(define_expand "float2_v2di" 2564 [(use (match_operand:V4SF 0 "register_operand" "=wa")) 2565 (use (match_operand:V2DI 1 "register_operand" "wa")) 2566 (use (match_operand:V2DI 2 "register_operand" "wa"))] 2567 "VECTOR_UNIT_VSX_P (V4SFmode)" 2568{ 2569 rtx rtx_src1, rtx_src2, rtx_dst; 2570 2571 rtx_dst = operands[0]; 2572 rtx_src1 = operands[1]; 2573 rtx_src2 = operands[2]; 2574 2575 rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2); 2576 DONE; 2577}) 2578 2579;; Generate uns_float2 2580;; convert two long long unsigned ints to float 2581(define_expand "uns_float2_v2di" 2582 [(use (match_operand:V4SF 0 "register_operand" "=wa")) 2583 (use (match_operand:V2DI 1 "register_operand" "wa")) 2584 (use (match_operand:V2DI 2 "register_operand" "wa"))] 2585 "VECTOR_UNIT_VSX_P (V4SFmode)" 2586{ 2587 rtx rtx_src1, rtx_src2, rtx_dst; 2588 2589 rtx_dst = operands[0]; 2590 rtx_src1 = operands[1]; 2591 rtx_src2 = operands[2]; 2592 2593 rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2); 2594 DONE; 2595}) 2596 2597;; Generate floate 2598;; convert double or long long signed to float 2599;; (Only even words are valid, BE numbering) 2600(define_expand "floate<mode>" 2601 [(use (match_operand:V4SF 0 "register_operand" "=wa")) 2602 (use (match_operand:VSX_D 1 "register_operand" "wa"))] 2603 "VECTOR_UNIT_VSX_P (V4SFmode)" 2604{ 2605 if (VECTOR_ELT_ORDER_BIG) 2606 { 2607 /* Shift left one word to put even word correct location */ 2608 rtx rtx_tmp; 2609 rtx rtx_val = GEN_INT (4); 2610 2611 rtx_tmp = gen_reg_rtx (V4SFmode); 2612 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1])); 2613 emit_insn (gen_altivec_vsldoi_v4sf (operands[0], 2614 rtx_tmp, rtx_tmp, rtx_val)); 2615 } 2616 else 2617 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1])); 2618 2619 DONE; 2620}) 2621 2622;; Generate uns_floate 2623;; convert long long unsigned to float 2624;; (Only even words are valid, BE numbering) 2625(define_expand "unsfloatev2di" 2626 [(use (match_operand:V4SF 0 "register_operand" "=wa")) 2627 (use (match_operand:V2DI 1 "register_operand" "wa"))] 2628 "VECTOR_UNIT_VSX_P (V4SFmode)" 2629{ 2630 if (VECTOR_ELT_ORDER_BIG) 2631 { 2632 /* Shift left one word to put even word correct location */ 2633 rtx rtx_tmp; 2634 rtx rtx_val = GEN_INT (4); 2635 2636 rtx_tmp = gen_reg_rtx (V4SFmode); 2637 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1])); 2638 emit_insn (gen_altivec_vsldoi_v4sf (operands[0], 2639 rtx_tmp, rtx_tmp, rtx_val)); 2640 } 2641 else 2642 emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1])); 2643 2644 DONE; 2645}) 2646 2647;; Generate floato 2648;; convert double or long long signed to float 2649;; Only odd words are valid, BE numbering) 2650(define_expand "floato<mode>" 2651 [(use (match_operand:V4SF 0 "register_operand" "=wa")) 2652 (use (match_operand:VSX_D 1 "register_operand" "wa"))] 2653 "VECTOR_UNIT_VSX_P (V4SFmode)" 2654{ 2655 if (VECTOR_ELT_ORDER_BIG) 2656 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1])); 2657 else 2658 { 2659 /* Shift left one word to put odd word correct location */ 2660 rtx rtx_tmp; 2661 rtx rtx_val = GEN_INT (4); 2662 2663 rtx_tmp = gen_reg_rtx (V4SFmode); 2664 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1])); 2665 emit_insn (gen_altivec_vsldoi_v4sf (operands[0], 2666 rtx_tmp, rtx_tmp, rtx_val)); 2667 } 2668 DONE; 2669}) 2670 2671;; Generate uns_floato 2672;; convert long long unsigned to float 2673;; (Only odd words are valid, BE numbering) 2674(define_expand "unsfloatov2di" 2675 [(use (match_operand:V4SF 0 "register_operand" "=wa")) 2676 (use (match_operand:V2DI 1 "register_operand" "wa"))] 2677 "VECTOR_UNIT_VSX_P (V4SFmode)" 2678{ 2679 if (VECTOR_ELT_ORDER_BIG) 2680 emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1])); 2681 else 2682 { 2683 /* Shift left one word to put odd word correct location */ 2684 rtx rtx_tmp; 2685 rtx rtx_val = GEN_INT (4); 2686 2687 rtx_tmp = gen_reg_rtx (V4SFmode); 2688 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1])); 2689 emit_insn (gen_altivec_vsldoi_v4sf (operands[0], 2690 rtx_tmp, rtx_tmp, rtx_val)); 2691 } 2692 DONE; 2693}) 2694 2695;; Generate vsigned2 2696;; convert two double float vectors to a vector of single precision ints 2697(define_expand "vsigned2_v2df" 2698 [(match_operand:V4SI 0 "register_operand" "=wa") 2699 (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "wa") 2700 (match_operand:V2DF 2 "register_operand" "wa")] 2701 UNSPEC_VSX_VSIGNED2)] 2702 "TARGET_VSX" 2703{ 2704 rtx rtx_src1, rtx_src2, rtx_dst; 2705 bool signed_convert=true; 2706 2707 rtx_dst = operands[0]; 2708 rtx_src1 = operands[1]; 2709 rtx_src2 = operands[2]; 2710 2711 rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2); 2712 DONE; 2713}) 2714 2715;; Generate vsignedo_v2df 2716;; signed double float to int convert odd word 2717(define_expand "vsignedo_v2df" 2718 [(set (match_operand:V4SI 0 "register_operand" "=wa") 2719 (match_operand:V2DF 1 "register_operand" "wa"))] 2720 "TARGET_VSX" 2721{ 2722 if (VECTOR_ELT_ORDER_BIG) 2723 { 2724 rtx rtx_tmp; 2725 rtx rtx_val = GEN_INT (12); 2726 rtx_tmp = gen_reg_rtx (V4SImode); 2727 2728 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1])); 2729 2730 /* Big endian word numbering for words in operand is 0 1 2 3. 2731 take (operand[1] operand[1]) and shift left one word 2732 0 1 2 3 0 1 2 3 => 1 2 3 0 2733 Words 1 and 3 are now are now where they need to be for result. */ 2734 2735 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp, 2736 rtx_tmp, rtx_val)); 2737 } 2738 else 2739 /* Little endian word numbering for operand is 3 2 1 0. 2740 Result words 3 and 1 are where they need to be. */ 2741 emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1])); 2742 2743 DONE; 2744} 2745 [(set_attr "type" "veccomplex")]) 2746 2747;; Generate vsignede_v2df 2748;; signed double float to int even word 2749(define_expand "vsignede_v2df" 2750 [(set (match_operand:V4SI 0 "register_operand" "=v") 2751 (match_operand:V2DF 1 "register_operand" "v"))] 2752 "TARGET_VSX" 2753{ 2754 if (VECTOR_ELT_ORDER_BIG) 2755 /* Big endian word numbering for words in operand is 0 1 2756 Result words 0 is where they need to be. */ 2757 emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1])); 2758 2759 else 2760 { 2761 rtx rtx_tmp; 2762 rtx rtx_val = GEN_INT (12); 2763 rtx_tmp = gen_reg_rtx (V4SImode); 2764 2765 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1])); 2766 2767 /* Little endian word numbering for operand is 3 2 1 0. 2768 take (operand[1] operand[1]) and shift left three words 2769 0 1 2 3 0 1 2 3 => 3 0 1 2 2770 Words 0 and 2 are now where they need to be for the result. */ 2771 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp, 2772 rtx_tmp, rtx_val)); 2773 } 2774 DONE; 2775} 2776 [(set_attr "type" "veccomplex")]) 2777 2778;; Generate unsigned2 2779;; convert two double float vectors to a vector of single precision 2780;; unsigned ints 2781(define_expand "vunsigned2_v2df" 2782[(match_operand:V4SI 0 "register_operand" "=v") 2783 (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "v") 2784 (match_operand:V2DF 2 "register_operand" "v")] 2785 UNSPEC_VSX_VSIGNED2)] 2786 "TARGET_VSX" 2787{ 2788 rtx rtx_src1, rtx_src2, rtx_dst; 2789 bool signed_convert=false; 2790 2791 rtx_dst = operands[0]; 2792 rtx_src1 = operands[1]; 2793 rtx_src2 = operands[2]; 2794 2795 rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2); 2796 DONE; 2797}) 2798 2799;; Generate vunsignedo_v2df 2800;; unsigned double float to int convert odd word 2801(define_expand "vunsignedo_v2df" 2802 [(set (match_operand:V4SI 0 "register_operand" "=v") 2803 (match_operand:V2DF 1 "register_operand" "v"))] 2804 "TARGET_VSX" 2805{ 2806 if (VECTOR_ELT_ORDER_BIG) 2807 { 2808 rtx rtx_tmp; 2809 rtx rtx_val = GEN_INT (12); 2810 rtx_tmp = gen_reg_rtx (V4SImode); 2811 2812 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1])); 2813 2814 /* Big endian word numbering for words in operand is 0 1 2 3. 2815 take (operand[1] operand[1]) and shift left one word 2816 0 1 2 3 0 1 2 3 => 1 2 3 0 2817 Words 1 and 3 are now are now where they need to be for result. */ 2818 2819 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp, 2820 rtx_tmp, rtx_val)); 2821 } 2822 else 2823 /* Little endian word numbering for operand is 3 2 1 0. 2824 Result words 3 and 1 are where they need to be. */ 2825 emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1])); 2826 2827 DONE; 2828} 2829 [(set_attr "type" "veccomplex")]) 2830 2831;; Generate vunsignede_v2df 2832;; unsigned double float to int even word 2833(define_expand "vunsignede_v2df" 2834 [(set (match_operand:V4SI 0 "register_operand" "=v") 2835 (match_operand:V2DF 1 "register_operand" "v"))] 2836 "TARGET_VSX" 2837{ 2838 if (VECTOR_ELT_ORDER_BIG) 2839 /* Big endian word numbering for words in operand is 0 1 2840 Result words 0 is where they need to be. */ 2841 emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1])); 2842 2843 else 2844 { 2845 rtx rtx_tmp; 2846 rtx rtx_val = GEN_INT (12); 2847 rtx_tmp = gen_reg_rtx (V4SImode); 2848 2849 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1])); 2850 2851 /* Little endian word numbering for operand is 3 2 1 0. 2852 take (operand[1] operand[1]) and shift left three words 2853 0 1 2 3 0 1 2 3 => 3 0 1 2 2854 Words 0 and 2 are now where they need to be for the result. */ 2855 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp, 2856 rtx_tmp, rtx_val)); 2857 } 2858 DONE; 2859} 2860 [(set_attr "type" "veccomplex")]) 2861 2862;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since 2863;; since the xvrdpiz instruction does not truncate the value if the floating 2864;; point value is < LONG_MIN or > LONG_MAX. 2865(define_insn "*vsx_float_fix_v2df2" 2866 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa") 2867 (float:V2DF 2868 (fix:V2DI 2869 (match_operand:V2DF 1 "vsx_register_operand" "wd,?wa"))))] 2870 "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT 2871 && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations 2872 && !flag_trapping_math && TARGET_FRIZ" 2873 "xvrdpiz %x0,%x1" 2874 [(set_attr "type" "vecdouble") 2875 (set_attr "fp_type" "fp_addsub_d")]) 2876 2877 2878;; Permute operations 2879 2880;; Build a V2DF/V2DI vector from two scalars 2881(define_insn "vsx_concat_<mode>" 2882 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we") 2883 (vec_concat:VSX_D 2884 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b") 2885 (match_operand:<VS_scalar> 2 "gpc_reg_operand" "wa,b")))] 2886 "VECTOR_MEM_VSX_P (<MODE>mode)" 2887{ 2888 if (which_alternative == 0) 2889 return (BYTES_BIG_ENDIAN 2890 ? "xxpermdi %x0,%x1,%x2,0" 2891 : "xxpermdi %x0,%x2,%x1,0"); 2892 2893 else if (which_alternative == 1) 2894 return (BYTES_BIG_ENDIAN 2895 ? "mtvsrdd %x0,%1,%2" 2896 : "mtvsrdd %x0,%2,%1"); 2897 2898 else 2899 gcc_unreachable (); 2900} 2901 [(set_attr "type" "vecperm")]) 2902 2903;; Combiner patterns to allow creating XXPERMDI's to access either double 2904;; word element in a vector register. 2905(define_insn "*vsx_concat_<mode>_1" 2906 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") 2907 (vec_concat:VSX_D 2908 (vec_select:<VS_scalar> 2909 (match_operand:VSX_D 1 "gpc_reg_operand" "wa") 2910 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")])) 2911 (match_operand:<VS_scalar> 3 "gpc_reg_operand" "wa")))] 2912 "VECTOR_MEM_VSX_P (<MODE>mode)" 2913{ 2914 HOST_WIDE_INT dword = INTVAL (operands[2]); 2915 if (BYTES_BIG_ENDIAN) 2916 { 2917 operands[4] = GEN_INT (2*dword); 2918 return "xxpermdi %x0,%x1,%x3,%4"; 2919 } 2920 else 2921 { 2922 operands[4] = GEN_INT (!dword); 2923 return "xxpermdi %x0,%x3,%x1,%4"; 2924 } 2925} 2926 [(set_attr "type" "vecperm")]) 2927 2928(define_insn "*vsx_concat_<mode>_2" 2929 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") 2930 (vec_concat:VSX_D 2931 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa") 2932 (vec_select:<VS_scalar> 2933 (match_operand:VSX_D 2 "gpc_reg_operand" "wa") 2934 (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))))] 2935 "VECTOR_MEM_VSX_P (<MODE>mode)" 2936{ 2937 HOST_WIDE_INT dword = INTVAL (operands[3]); 2938 if (BYTES_BIG_ENDIAN) 2939 { 2940 operands[4] = GEN_INT (dword); 2941 return "xxpermdi %x0,%x1,%x2,%4"; 2942 } 2943 else 2944 { 2945 operands[4] = GEN_INT (2 * !dword); 2946 return "xxpermdi %x0,%x2,%x1,%4"; 2947 } 2948} 2949 [(set_attr "type" "vecperm")]) 2950 2951(define_insn "*vsx_concat_<mode>_3" 2952 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") 2953 (vec_concat:VSX_D 2954 (vec_select:<VS_scalar> 2955 (match_operand:VSX_D 1 "gpc_reg_operand" "wa") 2956 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")])) 2957 (vec_select:<VS_scalar> 2958 (match_operand:VSX_D 3 "gpc_reg_operand" "wa") 2959 (parallel [(match_operand:QI 4 "const_0_to_1_operand" "n")]))))] 2960 "VECTOR_MEM_VSX_P (<MODE>mode)" 2961{ 2962 HOST_WIDE_INT dword1 = INTVAL (operands[2]); 2963 HOST_WIDE_INT dword2 = INTVAL (operands[4]); 2964 if (BYTES_BIG_ENDIAN) 2965 { 2966 operands[5] = GEN_INT ((2 * dword1) + dword2); 2967 return "xxpermdi %x0,%x1,%x3,%5"; 2968 } 2969 else 2970 { 2971 operands[5] = GEN_INT ((2 * !dword2) + !dword1); 2972 return "xxpermdi %x0,%x3,%x1,%5"; 2973 } 2974} 2975 [(set_attr "type" "vecperm")]) 2976 2977;; Special purpose concat using xxpermdi to glue two single precision values 2978;; together, relying on the fact that internally scalar floats are represented 2979;; as doubles. This is used to initialize a V4SF vector with 4 floats 2980(define_insn "vsx_concat_v2sf" 2981 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa") 2982 (unspec:V2DF 2983 [(match_operand:SF 1 "vsx_register_operand" "ww") 2984 (match_operand:SF 2 "vsx_register_operand" "ww")] 2985 UNSPEC_VSX_CONCAT))] 2986 "VECTOR_MEM_VSX_P (V2DFmode)" 2987{ 2988 if (BYTES_BIG_ENDIAN) 2989 return "xxpermdi %x0,%x1,%x2,0"; 2990 else 2991 return "xxpermdi %x0,%x2,%x1,0"; 2992} 2993 [(set_attr "type" "vecperm")]) 2994 2995;; V4SImode initialization splitter 2996(define_insn_and_split "vsx_init_v4si" 2997 [(set (match_operand:V4SI 0 "gpc_reg_operand" "=&r") 2998 (unspec:V4SI 2999 [(match_operand:SI 1 "reg_or_cint_operand" "rn") 3000 (match_operand:SI 2 "reg_or_cint_operand" "rn") 3001 (match_operand:SI 3 "reg_or_cint_operand" "rn") 3002 (match_operand:SI 4 "reg_or_cint_operand" "rn")] 3003 UNSPEC_VSX_VEC_INIT)) 3004 (clobber (match_scratch:DI 5 "=&r")) 3005 (clobber (match_scratch:DI 6 "=&r"))] 3006 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" 3007 "#" 3008 "&& reload_completed" 3009 [(const_int 0)] 3010{ 3011 rs6000_split_v4si_init (operands); 3012 DONE; 3013}) 3014 3015;; xxpermdi for little endian loads and stores. We need several of 3016;; these since the form of the PARALLEL differs by mode. 3017(define_insn "*vsx_xxpermdi2_le_<mode>" 3018 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>") 3019 (vec_select:VSX_D 3020 (match_operand:VSX_D 1 "vsx_register_operand" "<VSa>") 3021 (parallel [(const_int 1) (const_int 0)])))] 3022 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)" 3023 "xxpermdi %x0,%x1,%x1,2" 3024 [(set_attr "type" "vecperm")]) 3025 3026(define_insn "*vsx_xxpermdi4_le_<mode>" 3027 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>") 3028 (vec_select:VSX_W 3029 (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>") 3030 (parallel [(const_int 2) (const_int 3) 3031 (const_int 0) (const_int 1)])))] 3032 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)" 3033 "xxpermdi %x0,%x1,%x1,2" 3034 [(set_attr "type" "vecperm")]) 3035 3036(define_insn "*vsx_xxpermdi8_le_V8HI" 3037 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") 3038 (vec_select:V8HI 3039 (match_operand:V8HI 1 "vsx_register_operand" "wa") 3040 (parallel [(const_int 4) (const_int 5) 3041 (const_int 6) (const_int 7) 3042 (const_int 0) (const_int 1) 3043 (const_int 2) (const_int 3)])))] 3044 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)" 3045 "xxpermdi %x0,%x1,%x1,2" 3046 [(set_attr "type" "vecperm")]) 3047 3048(define_insn "*vsx_xxpermdi16_le_V16QI" 3049 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") 3050 (vec_select:V16QI 3051 (match_operand:V16QI 1 "vsx_register_operand" "wa") 3052 (parallel [(const_int 8) (const_int 9) 3053 (const_int 10) (const_int 11) 3054 (const_int 12) (const_int 13) 3055 (const_int 14) (const_int 15) 3056 (const_int 0) (const_int 1) 3057 (const_int 2) (const_int 3) 3058 (const_int 4) (const_int 5) 3059 (const_int 6) (const_int 7)])))] 3060 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)" 3061 "xxpermdi %x0,%x1,%x1,2" 3062 [(set_attr "type" "vecperm")]) 3063 3064;; lxvd2x for little endian loads. We need several of 3065;; these since the form of the PARALLEL differs by mode. 3066(define_insn "*vsx_lxvd2x2_le_<mode>" 3067 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>") 3068 (vec_select:VSX_D 3069 (match_operand:VSX_D 1 "memory_operand" "Z") 3070 (parallel [(const_int 1) (const_int 0)])))] 3071 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR" 3072 "lxvd2x %x0,%y1" 3073 [(set_attr "type" "vecload")]) 3074 3075(define_insn "*vsx_lxvd2x4_le_<mode>" 3076 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>") 3077 (vec_select:VSX_W 3078 (match_operand:VSX_W 1 "memory_operand" "Z") 3079 (parallel [(const_int 2) (const_int 3) 3080 (const_int 0) (const_int 1)])))] 3081 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR" 3082 "lxvd2x %x0,%y1" 3083 [(set_attr "type" "vecload")]) 3084 3085(define_insn "*vsx_lxvd2x8_le_V8HI" 3086 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") 3087 (vec_select:V8HI 3088 (match_operand:V8HI 1 "memory_operand" "Z") 3089 (parallel [(const_int 4) (const_int 5) 3090 (const_int 6) (const_int 7) 3091 (const_int 0) (const_int 1) 3092 (const_int 2) (const_int 3)])))] 3093 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR" 3094 "lxvd2x %x0,%y1" 3095 [(set_attr "type" "vecload")]) 3096 3097(define_insn "*vsx_lxvd2x16_le_V16QI" 3098 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") 3099 (vec_select:V16QI 3100 (match_operand:V16QI 1 "memory_operand" "Z") 3101 (parallel [(const_int 8) (const_int 9) 3102 (const_int 10) (const_int 11) 3103 (const_int 12) (const_int 13) 3104 (const_int 14) (const_int 15) 3105 (const_int 0) (const_int 1) 3106 (const_int 2) (const_int 3) 3107 (const_int 4) (const_int 5) 3108 (const_int 6) (const_int 7)])))] 3109 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR" 3110 "lxvd2x %x0,%y1" 3111 [(set_attr "type" "vecload")]) 3112 3113;; stxvd2x for little endian stores. We need several of 3114;; these since the form of the PARALLEL differs by mode. 3115(define_insn "*vsx_stxvd2x2_le_<mode>" 3116 [(set (match_operand:VSX_D 0 "memory_operand" "=Z") 3117 (vec_select:VSX_D 3118 (match_operand:VSX_D 1 "vsx_register_operand" "<VSa>") 3119 (parallel [(const_int 1) (const_int 0)])))] 3120 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR" 3121 "stxvd2x %x1,%y0" 3122 [(set_attr "type" "vecstore")]) 3123 3124(define_insn "*vsx_stxvd2x4_le_<mode>" 3125 [(set (match_operand:VSX_W 0 "memory_operand" "=Z") 3126 (vec_select:VSX_W 3127 (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>") 3128 (parallel [(const_int 2) (const_int 3) 3129 (const_int 0) (const_int 1)])))] 3130 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR" 3131 "stxvd2x %x1,%y0" 3132 [(set_attr "type" "vecstore")]) 3133 3134(define_insn "*vsx_stxvd2x8_le_V8HI" 3135 [(set (match_operand:V8HI 0 "memory_operand" "=Z") 3136 (vec_select:V8HI 3137 (match_operand:V8HI 1 "vsx_register_operand" "wa") 3138 (parallel [(const_int 4) (const_int 5) 3139 (const_int 6) (const_int 7) 3140 (const_int 0) (const_int 1) 3141 (const_int 2) (const_int 3)])))] 3142 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR" 3143 "stxvd2x %x1,%y0" 3144 [(set_attr "type" "vecstore")]) 3145 3146(define_insn "*vsx_stxvd2x16_le_V16QI" 3147 [(set (match_operand:V16QI 0 "memory_operand" "=Z") 3148 (vec_select:V16QI 3149 (match_operand:V16QI 1 "vsx_register_operand" "wa") 3150 (parallel [(const_int 8) (const_int 9) 3151 (const_int 10) (const_int 11) 3152 (const_int 12) (const_int 13) 3153 (const_int 14) (const_int 15) 3154 (const_int 0) (const_int 1) 3155 (const_int 2) (const_int 3) 3156 (const_int 4) (const_int 5) 3157 (const_int 6) (const_int 7)])))] 3158 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR" 3159 "stxvd2x %x1,%y0" 3160 [(set_attr "type" "vecstore")]) 3161 3162;; Convert a TImode value into V1TImode 3163(define_expand "vsx_set_v1ti" 3164 [(match_operand:V1TI 0 "nonimmediate_operand") 3165 (match_operand:V1TI 1 "nonimmediate_operand") 3166 (match_operand:TI 2 "input_operand") 3167 (match_operand:QI 3 "u5bit_cint_operand")] 3168 "VECTOR_MEM_VSX_P (V1TImode)" 3169{ 3170 if (operands[3] != const0_rtx) 3171 gcc_unreachable (); 3172 3173 emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1])); 3174 DONE; 3175}) 3176 3177;; Rewrite V2DF/V2DI set in terms of VEC_CONCAT 3178(define_expand "vsx_set_<mode>" 3179 [(use (match_operand:VSX_D 0 "vsx_register_operand")) 3180 (use (match_operand:VSX_D 1 "vsx_register_operand")) 3181 (use (match_operand:<VS_scalar> 2 "gpc_reg_operand")) 3182 (use (match_operand:QI 3 "const_0_to_1_operand"))] 3183 "VECTOR_MEM_VSX_P (<MODE>mode)" 3184{ 3185 rtx dest = operands[0]; 3186 rtx vec_reg = operands[1]; 3187 rtx value = operands[2]; 3188 rtx ele = operands[3]; 3189 rtx tmp = gen_reg_rtx (<VS_scalar>mode); 3190 3191 if (ele == const0_rtx) 3192 { 3193 emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const1_rtx)); 3194 emit_insn (gen_vsx_concat_<mode> (dest, value, tmp)); 3195 DONE; 3196 } 3197 else if (ele == const1_rtx) 3198 { 3199 emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const0_rtx)); 3200 emit_insn (gen_vsx_concat_<mode> (dest, tmp, value)); 3201 DONE; 3202 } 3203 else 3204 gcc_unreachable (); 3205}) 3206 3207;; Extract a DF/DI element from V2DF/V2DI 3208;; Optimize cases were we can do a simple or direct move. 3209;; Or see if we can avoid doing the move at all 3210 3211;; There are some unresolved problems with reload that show up if an Altivec 3212;; register was picked. Limit the scalar value to FPRs for now. 3213 3214(define_insn "vsx_extract_<mode>" 3215 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=d, d, wr, wr") 3216 3217 (vec_select:<VS_scalar> 3218 (match_operand:VSX_D 1 "gpc_reg_operand" "<VSa>, <VSa>, wm, wo") 3219 3220 (parallel 3221 [(match_operand:QI 2 "const_0_to_1_operand" "wD, n, wD, n")])))] 3222 "VECTOR_MEM_VSX_P (<MODE>mode)" 3223{ 3224 int element = INTVAL (operands[2]); 3225 int op0_regno = REGNO (operands[0]); 3226 int op1_regno = REGNO (operands[1]); 3227 int fldDM; 3228 3229 gcc_assert (IN_RANGE (element, 0, 1)); 3230 gcc_assert (VSX_REGNO_P (op1_regno)); 3231 3232 if (element == VECTOR_ELEMENT_SCALAR_64BIT) 3233 { 3234 if (op0_regno == op1_regno) 3235 return ASM_COMMENT_START " vec_extract to same register"; 3236 3237 else if (INT_REGNO_P (op0_regno) && TARGET_DIRECT_MOVE 3238 && TARGET_POWERPC64) 3239 return "mfvsrd %0,%x1"; 3240 3241 else if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno)) 3242 return "fmr %0,%1"; 3243 3244 else if (VSX_REGNO_P (op0_regno)) 3245 return "xxlor %x0,%x1,%x1"; 3246 3247 else 3248 gcc_unreachable (); 3249 } 3250 3251 else if (element == VECTOR_ELEMENT_MFVSRLD_64BIT && INT_REGNO_P (op0_regno) 3252 && TARGET_P9_VECTOR && TARGET_POWERPC64 && TARGET_DIRECT_MOVE) 3253 return "mfvsrld %0,%x1"; 3254 3255 else if (VSX_REGNO_P (op0_regno)) 3256 { 3257 fldDM = element << 1; 3258 if (!BYTES_BIG_ENDIAN) 3259 fldDM = 3 - fldDM; 3260 operands[3] = GEN_INT (fldDM); 3261 return "xxpermdi %x0,%x1,%x1,%3"; 3262 } 3263 3264 else 3265 gcc_unreachable (); 3266} 3267 [(set_attr "type" "veclogical,mftgpr,mftgpr,vecperm")]) 3268 3269;; Optimize extracting a single scalar element from memory. 3270(define_insn_and_split "*vsx_extract_<P:mode>_<VSX_D:mode>_load" 3271 [(set (match_operand:<VS_scalar> 0 "register_operand" "=<VSX_D:VS_64reg>,wr") 3272 (vec_select:<VSX_D:VS_scalar> 3273 (match_operand:VSX_D 1 "memory_operand" "m,m") 3274 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n,n")]))) 3275 (clobber (match_scratch:P 3 "=&b,&b"))] 3276 "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<VSX_D:MODE>mode)" 3277 "#" 3278 "&& reload_completed" 3279 [(set (match_dup 0) (match_dup 4))] 3280{ 3281 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2], 3282 operands[3], <VSX_D:VS_scalar>mode); 3283} 3284 [(set_attr "type" "fpload,load") 3285 (set_attr "length" "8")]) 3286 3287;; Optimize storing a single scalar element that is the right location to 3288;; memory 3289(define_insn "*vsx_extract_<mode>_store" 3290 [(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,wY") 3291 (vec_select:<VS_scalar> 3292 (match_operand:VSX_D 1 "register_operand" "d,wv,wb") 3293 (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))] 3294 "VECTOR_MEM_VSX_P (<MODE>mode)" 3295 "@ 3296 stfd%U0%X0 %1,%0 3297 stxsd%U0x %x1,%y0 3298 stxsd %1,%0" 3299 [(set_attr "type" "fpstore") 3300 (set_attr "length" "4")]) 3301 3302;; Variable V2DI/V2DF extract shift 3303(define_insn "vsx_vslo_<mode>" 3304 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v") 3305 (unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v") 3306 (match_operand:V2DI 2 "gpc_reg_operand" "v")] 3307 UNSPEC_VSX_VSLO))] 3308 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" 3309 "vslo %0,%1,%2" 3310 [(set_attr "type" "vecperm")]) 3311 3312;; Variable V2DI/V2DF extract 3313(define_insn_and_split "vsx_extract_<mode>_var" 3314 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v,<VSa>,r") 3315 (unspec:<VS_scalar> [(match_operand:VSX_D 1 "input_operand" "v,m,m") 3316 (match_operand:DI 2 "gpc_reg_operand" "r,r,r")] 3317 UNSPEC_VSX_EXTRACT)) 3318 (clobber (match_scratch:DI 3 "=r,&b,&b")) 3319 (clobber (match_scratch:V2DI 4 "=&v,X,X"))] 3320 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" 3321 "#" 3322 "&& reload_completed" 3323 [(const_int 0)] 3324{ 3325 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2], 3326 operands[3], operands[4]); 3327 DONE; 3328}) 3329 3330;; Extract a SF element from V4SF 3331(define_insn_and_split "vsx_extract_v4sf" 3332 [(set (match_operand:SF 0 "vsx_register_operand" "=ww") 3333 (vec_select:SF 3334 (match_operand:V4SF 1 "vsx_register_operand" "wa") 3335 (parallel [(match_operand:QI 2 "u5bit_cint_operand" "n")]))) 3336 (clobber (match_scratch:V4SF 3 "=0"))] 3337 "VECTOR_UNIT_VSX_P (V4SFmode)" 3338 "#" 3339 "&& 1" 3340 [(const_int 0)] 3341{ 3342 rtx op0 = operands[0]; 3343 rtx op1 = operands[1]; 3344 rtx op2 = operands[2]; 3345 rtx op3 = operands[3]; 3346 rtx tmp; 3347 HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2); 3348 3349 if (ele == 0) 3350 tmp = op1; 3351 else 3352 { 3353 if (GET_CODE (op3) == SCRATCH) 3354 op3 = gen_reg_rtx (V4SFmode); 3355 emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele))); 3356 tmp = op3; 3357 } 3358 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp)); 3359 DONE; 3360} 3361 [(set_attr "length" "8") 3362 (set_attr "type" "fp")]) 3363 3364(define_insn_and_split "*vsx_extract_v4sf_<mode>_load" 3365 [(set (match_operand:SF 0 "register_operand" "=f,wv,wb,?r") 3366 (vec_select:SF 3367 (match_operand:V4SF 1 "memory_operand" "m,Z,m,m") 3368 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")]))) 3369 (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))] 3370 "VECTOR_MEM_VSX_P (V4SFmode)" 3371 "#" 3372 "&& reload_completed" 3373 [(set (match_dup 0) (match_dup 4))] 3374{ 3375 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2], 3376 operands[3], SFmode); 3377} 3378 [(set_attr "type" "fpload,fpload,fpload,load") 3379 (set_attr "length" "8")]) 3380 3381;; Variable V4SF extract 3382(define_insn_and_split "vsx_extract_v4sf_var" 3383 [(set (match_operand:SF 0 "gpc_reg_operand" "=ww,ww,?r") 3384 (unspec:SF [(match_operand:V4SF 1 "input_operand" "v,m,m") 3385 (match_operand:DI 2 "gpc_reg_operand" "r,r,r")] 3386 UNSPEC_VSX_EXTRACT)) 3387 (clobber (match_scratch:DI 3 "=r,&b,&b")) 3388 (clobber (match_scratch:V2DI 4 "=&v,X,X"))] 3389 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT" 3390 "#" 3391 "&& reload_completed" 3392 [(const_int 0)] 3393{ 3394 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2], 3395 operands[3], operands[4]); 3396 DONE; 3397}) 3398 3399;; Expand the builtin form of xxpermdi to canonical rtl. 3400(define_expand "vsx_xxpermdi_<mode>" 3401 [(match_operand:VSX_L 0 "vsx_register_operand") 3402 (match_operand:VSX_L 1 "vsx_register_operand") 3403 (match_operand:VSX_L 2 "vsx_register_operand") 3404 (match_operand:QI 3 "u5bit_cint_operand")] 3405 "VECTOR_MEM_VSX_P (<MODE>mode)" 3406{ 3407 rtx target = operands[0]; 3408 rtx op0 = operands[1]; 3409 rtx op1 = operands[2]; 3410 int mask = INTVAL (operands[3]); 3411 rtx perm0 = GEN_INT ((mask >> 1) & 1); 3412 rtx perm1 = GEN_INT ((mask & 1) + 2); 3413 rtx (*gen) (rtx, rtx, rtx, rtx, rtx); 3414 3415 if (<MODE>mode == V2DFmode) 3416 gen = gen_vsx_xxpermdi2_v2df_1; 3417 else 3418 { 3419 gen = gen_vsx_xxpermdi2_v2di_1; 3420 if (<MODE>mode != V2DImode) 3421 { 3422 target = gen_lowpart (V2DImode, target); 3423 op0 = gen_lowpart (V2DImode, op0); 3424 op1 = gen_lowpart (V2DImode, op1); 3425 } 3426 } 3427 emit_insn (gen (target, op0, op1, perm0, perm1)); 3428 DONE; 3429}) 3430 3431;; Special version of xxpermdi that retains big-endian semantics. 3432(define_expand "vsx_xxpermdi_<mode>_be" 3433 [(match_operand:VSX_L 0 "vsx_register_operand") 3434 (match_operand:VSX_L 1 "vsx_register_operand") 3435 (match_operand:VSX_L 2 "vsx_register_operand") 3436 (match_operand:QI 3 "u5bit_cint_operand")] 3437 "VECTOR_MEM_VSX_P (<MODE>mode)" 3438{ 3439 rtx target = operands[0]; 3440 rtx op0 = operands[1]; 3441 rtx op1 = operands[2]; 3442 int mask = INTVAL (operands[3]); 3443 rtx perm0 = GEN_INT ((mask >> 1) & 1); 3444 rtx perm1 = GEN_INT ((mask & 1) + 2); 3445 rtx (*gen) (rtx, rtx, rtx, rtx, rtx); 3446 3447 if (<MODE>mode == V2DFmode) 3448 gen = gen_vsx_xxpermdi2_v2df_1; 3449 else 3450 { 3451 gen = gen_vsx_xxpermdi2_v2di_1; 3452 if (<MODE>mode != V2DImode) 3453 { 3454 target = gen_lowpart (V2DImode, target); 3455 op0 = gen_lowpart (V2DImode, op0); 3456 op1 = gen_lowpart (V2DImode, op1); 3457 } 3458 } 3459 /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a 3460 transformation we don't want; it is necessary for 3461 rs6000_expand_vec_perm_const_1 but not for this use. So we 3462 prepare for that by reversing the transformation here. */ 3463 if (BYTES_BIG_ENDIAN) 3464 emit_insn (gen (target, op0, op1, perm0, perm1)); 3465 else 3466 { 3467 rtx p0 = GEN_INT (3 - INTVAL (perm1)); 3468 rtx p1 = GEN_INT (3 - INTVAL (perm0)); 3469 emit_insn (gen (target, op1, op0, p0, p1)); 3470 } 3471 DONE; 3472}) 3473 3474(define_insn "vsx_xxpermdi2_<mode>_1" 3475 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd") 3476 (vec_select:VSX_D 3477 (vec_concat:<VS_double> 3478 (match_operand:VSX_D 1 "vsx_register_operand" "wd") 3479 (match_operand:VSX_D 2 "vsx_register_operand" "wd")) 3480 (parallel [(match_operand 3 "const_0_to_1_operand" "") 3481 (match_operand 4 "const_2_to_3_operand" "")])))] 3482 "VECTOR_MEM_VSX_P (<MODE>mode)" 3483{ 3484 int op3, op4, mask; 3485 3486 /* For little endian, swap operands and invert/swap selectors 3487 to get the correct xxpermdi. The operand swap sets up the 3488 inputs as a little endian array. The selectors are swapped 3489 because they are defined to use big endian ordering. The 3490 selectors are inverted to get the correct doublewords for 3491 little endian ordering. */ 3492 if (BYTES_BIG_ENDIAN) 3493 { 3494 op3 = INTVAL (operands[3]); 3495 op4 = INTVAL (operands[4]); 3496 } 3497 else 3498 { 3499 op3 = 3 - INTVAL (operands[4]); 3500 op4 = 3 - INTVAL (operands[3]); 3501 } 3502 3503 mask = (op3 << 1) | (op4 - 2); 3504 operands[3] = GEN_INT (mask); 3505 3506 if (BYTES_BIG_ENDIAN) 3507 return "xxpermdi %x0,%x1,%x2,%3"; 3508 else 3509 return "xxpermdi %x0,%x2,%x1,%3"; 3510} 3511 [(set_attr "type" "vecperm")]) 3512 3513;; Extraction of a single element in a small integer vector. Until ISA 3.0, 3514;; none of the small types were allowed in a vector register, so we had to 3515;; extract to a DImode and either do a direct move or store. 3516(define_expand "vsx_extract_<mode>" 3517 [(parallel [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand") 3518 (vec_select:<VS_scalar> 3519 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand") 3520 (parallel [(match_operand:QI 2 "const_int_operand")]))) 3521 (clobber (match_scratch:VSX_EXTRACT_I 3))])] 3522 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" 3523{ 3524 /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}. */ 3525 if (TARGET_P9_VECTOR) 3526 { 3527 emit_insn (gen_vsx_extract_<mode>_p9 (operands[0], operands[1], 3528 operands[2])); 3529 DONE; 3530 } 3531}) 3532 3533(define_insn "vsx_extract_<mode>_p9" 3534 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,<VSX_EX>") 3535 (vec_select:<VS_scalar> 3536 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>") 3537 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")]))) 3538 (clobber (match_scratch:SI 3 "=r,X"))] 3539 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB" 3540{ 3541 if (which_alternative == 0) 3542 return "#"; 3543 3544 else 3545 { 3546 HOST_WIDE_INT elt = INTVAL (operands[2]); 3547 HOST_WIDE_INT elt_adj = (!VECTOR_ELT_ORDER_BIG 3548 ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt 3549 : elt); 3550 3551 HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode); 3552 HOST_WIDE_INT offset = unit_size * elt_adj; 3553 3554 operands[2] = GEN_INT (offset); 3555 if (unit_size == 4) 3556 return "xxextractuw %x0,%x1,%2"; 3557 else 3558 return "vextractu<wd> %0,%1,%2"; 3559 } 3560} 3561 [(set_attr "type" "vecsimple")]) 3562 3563(define_split 3564 [(set (match_operand:<VS_scalar> 0 "int_reg_operand") 3565 (vec_select:<VS_scalar> 3566 (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand") 3567 (parallel [(match_operand:QI 2 "const_int_operand")]))) 3568 (clobber (match_operand:SI 3 "int_reg_operand"))] 3569 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB && reload_completed" 3570 [(const_int 0)] 3571{ 3572 rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0])); 3573 rtx op1 = operands[1]; 3574 rtx op2 = operands[2]; 3575 rtx op3 = operands[3]; 3576 HOST_WIDE_INT offset = INTVAL (op2) * GET_MODE_UNIT_SIZE (<MODE>mode); 3577 3578 emit_move_insn (op3, GEN_INT (offset)); 3579 if (VECTOR_ELT_ORDER_BIG) 3580 emit_insn (gen_vextu<wd>lx (op0_si, op3, op1)); 3581 else 3582 emit_insn (gen_vextu<wd>rx (op0_si, op3, op1)); 3583 DONE; 3584}) 3585 3586;; Optimize zero extracts to eliminate the AND after the extract. 3587(define_insn_and_split "*vsx_extract_<mode>_di_p9" 3588 [(set (match_operand:DI 0 "gpc_reg_operand" "=r,<VSX_EX>") 3589 (zero_extend:DI 3590 (vec_select:<VS_scalar> 3591 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>") 3592 (parallel [(match_operand:QI 2 "const_int_operand" "n,n")])))) 3593 (clobber (match_scratch:SI 3 "=r,X"))] 3594 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB" 3595 "#" 3596 "&& reload_completed" 3597 [(parallel [(set (match_dup 4) 3598 (vec_select:<VS_scalar> 3599 (match_dup 1) 3600 (parallel [(match_dup 2)]))) 3601 (clobber (match_dup 3))])] 3602{ 3603 operands[4] = gen_rtx_REG (<VS_scalar>mode, REGNO (operands[0])); 3604}) 3605 3606;; Optimize stores to use the ISA 3.0 scalar store instructions 3607(define_insn_and_split "*vsx_extract_<mode>_store_p9" 3608 [(set (match_operand:<VS_scalar> 0 "memory_operand" "=Z,m") 3609 (vec_select:<VS_scalar> 3610 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>,v") 3611 (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))) 3612 (clobber (match_scratch:<VS_scalar> 3 "=<VSX_EX>,&r")) 3613 (clobber (match_scratch:SI 4 "=X,&r"))] 3614 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB" 3615 "#" 3616 "&& reload_completed" 3617 [(parallel [(set (match_dup 3) 3618 (vec_select:<VS_scalar> 3619 (match_dup 1) 3620 (parallel [(match_dup 2)]))) 3621 (clobber (match_dup 4))]) 3622 (set (match_dup 0) 3623 (match_dup 3))]) 3624 3625(define_insn_and_split "*vsx_extract_si" 3626 [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wHwI,Z") 3627 (vec_select:SI 3628 (match_operand:V4SI 1 "gpc_reg_operand" "wJv,wJv,wJv") 3629 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")]))) 3630 (clobber (match_scratch:V4SI 3 "=wJv,wJv,wJv"))] 3631 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT && !TARGET_P9_VECTOR" 3632 "#" 3633 "&& reload_completed" 3634 [(const_int 0)] 3635{ 3636 rtx dest = operands[0]; 3637 rtx src = operands[1]; 3638 rtx element = operands[2]; 3639 rtx vec_tmp = operands[3]; 3640 int value; 3641 3642 if (!VECTOR_ELT_ORDER_BIG) 3643 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element)); 3644 3645 /* If the value is in the correct position, we can avoid doing the VSPLT<x> 3646 instruction. */ 3647 value = INTVAL (element); 3648 if (value != 1) 3649 emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element)); 3650 else 3651 vec_tmp = src; 3652 3653 if (MEM_P (operands[0])) 3654 { 3655 if (can_create_pseudo_p ()) 3656 dest = rs6000_address_for_fpconvert (dest); 3657 3658 if (TARGET_P8_VECTOR) 3659 emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp))); 3660 else 3661 emit_insn (gen_stfiwx (dest, gen_rtx_REG (DImode, REGNO (vec_tmp)))); 3662 } 3663 3664 else if (TARGET_P8_VECTOR) 3665 emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp))); 3666 else 3667 emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)), 3668 gen_rtx_REG (DImode, REGNO (vec_tmp))); 3669 3670 DONE; 3671} 3672 [(set_attr "type" "mftgpr,vecperm,fpstore") 3673 (set_attr "length" "8")]) 3674 3675(define_insn_and_split "*vsx_extract_<mode>_p8" 3676 [(set (match_operand:<VS_scalar> 0 "nonimmediate_operand" "=r") 3677 (vec_select:<VS_scalar> 3678 (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand" "v") 3679 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")]))) 3680 (clobber (match_scratch:VSX_EXTRACT_I2 3 "=v"))] 3681 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT 3682 && !TARGET_P9_VECTOR" 3683 "#" 3684 "&& reload_completed" 3685 [(const_int 0)] 3686{ 3687 rtx dest = operands[0]; 3688 rtx src = operands[1]; 3689 rtx element = operands[2]; 3690 rtx vec_tmp = operands[3]; 3691 int value; 3692 3693 if (!VECTOR_ELT_ORDER_BIG) 3694 element = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1 - INTVAL (element)); 3695 3696 /* If the value is in the correct position, we can avoid doing the VSPLT<x> 3697 instruction. */ 3698 value = INTVAL (element); 3699 if (<MODE>mode == V16QImode) 3700 { 3701 if (value != 7) 3702 emit_insn (gen_altivec_vspltb_direct (vec_tmp, src, element)); 3703 else 3704 vec_tmp = src; 3705 } 3706 else if (<MODE>mode == V8HImode) 3707 { 3708 if (value != 3) 3709 emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element)); 3710 else 3711 vec_tmp = src; 3712 } 3713 else 3714 gcc_unreachable (); 3715 3716 emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)), 3717 gen_rtx_REG (DImode, REGNO (vec_tmp))); 3718 DONE; 3719} 3720 [(set_attr "type" "mftgpr")]) 3721 3722;; Optimize extracting a single scalar element from memory. 3723(define_insn_and_split "*vsx_extract_<mode>_load" 3724 [(set (match_operand:<VS_scalar> 0 "register_operand" "=r") 3725 (vec_select:<VS_scalar> 3726 (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m") 3727 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")]))) 3728 (clobber (match_scratch:DI 3 "=&b"))] 3729 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" 3730 "#" 3731 "&& reload_completed" 3732 [(set (match_dup 0) (match_dup 4))] 3733{ 3734 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2], 3735 operands[3], <VS_scalar>mode); 3736} 3737 [(set_attr "type" "load") 3738 (set_attr "length" "8")]) 3739 3740;; Variable V16QI/V8HI/V4SI extract 3741(define_insn_and_split "vsx_extract_<mode>_var" 3742 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r,r") 3743 (unspec:<VS_scalar> 3744 [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m") 3745 (match_operand:DI 2 "gpc_reg_operand" "r,r,r")] 3746 UNSPEC_VSX_EXTRACT)) 3747 (clobber (match_scratch:DI 3 "=r,r,&b")) 3748 (clobber (match_scratch:V2DI 4 "=X,&v,X"))] 3749 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" 3750 "#" 3751 "&& reload_completed" 3752 [(const_int 0)] 3753{ 3754 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2], 3755 operands[3], operands[4]); 3756 DONE; 3757}) 3758 3759(define_insn_and_split "*vsx_extract_<mode>_<VS_scalar>mode_var" 3760 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r,r") 3761 (zero_extend:<VS_scalar> 3762 (unspec:<VSX_EXTRACT_I:VS_scalar> 3763 [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m") 3764 (match_operand:DI 2 "gpc_reg_operand" "r,r,r")] 3765 UNSPEC_VSX_EXTRACT))) 3766 (clobber (match_scratch:DI 3 "=r,r,&b")) 3767 (clobber (match_scratch:V2DI 4 "=X,&v,X"))] 3768 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT" 3769 "#" 3770 "&& reload_completed" 3771 [(const_int 0)] 3772{ 3773 machine_mode smode = <VS_scalar>mode; 3774 rs6000_split_vec_extract_var (gen_rtx_REG (smode, REGNO (operands[0])), 3775 operands[1], operands[2], 3776 operands[3], operands[4]); 3777 DONE; 3778}) 3779 3780;; VSX_EXTRACT optimizations 3781;; Optimize double d = (double) vec_extract (vi, <n>) 3782;; Get the element into the top position and use XVCVSWDP/XVCVUWDP 3783(define_insn_and_split "*vsx_extract_si_<uns>float_df" 3784 [(set (match_operand:DF 0 "gpc_reg_operand" "=ws") 3785 (any_float:DF 3786 (vec_select:SI 3787 (match_operand:V4SI 1 "gpc_reg_operand" "v") 3788 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")])))) 3789 (clobber (match_scratch:V4SI 3 "=v"))] 3790 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" 3791 "#" 3792 "&& 1" 3793 [(const_int 0)] 3794{ 3795 rtx dest = operands[0]; 3796 rtx src = operands[1]; 3797 rtx element = operands[2]; 3798 rtx v4si_tmp = operands[3]; 3799 int value; 3800 3801 if (!VECTOR_ELT_ORDER_BIG) 3802 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element)); 3803 3804 /* If the value is in the correct position, we can avoid doing the VSPLT<x> 3805 instruction. */ 3806 value = INTVAL (element); 3807 if (value != 0) 3808 { 3809 if (GET_CODE (v4si_tmp) == SCRATCH) 3810 v4si_tmp = gen_reg_rtx (V4SImode); 3811 emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element)); 3812 } 3813 else 3814 v4si_tmp = src; 3815 3816 emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp)); 3817 DONE; 3818}) 3819 3820;; Optimize <type> f = (<type>) vec_extract (vi, <n>) 3821;; where <type> is a floating point type that supported by the hardware that is 3822;; not double. First convert the value to double, and then to the desired 3823;; type. 3824(define_insn_and_split "*vsx_extract_si_<uns>float_<mode>" 3825 [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=ww") 3826 (any_float:VSX_EXTRACT_FL 3827 (vec_select:SI 3828 (match_operand:V4SI 1 "gpc_reg_operand" "v") 3829 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")])))) 3830 (clobber (match_scratch:V4SI 3 "=v")) 3831 (clobber (match_scratch:DF 4 "=ws"))] 3832 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" 3833 "#" 3834 "&& 1" 3835 [(const_int 0)] 3836{ 3837 rtx dest = operands[0]; 3838 rtx src = operands[1]; 3839 rtx element = operands[2]; 3840 rtx v4si_tmp = operands[3]; 3841 rtx df_tmp = operands[4]; 3842 int value; 3843 3844 if (!VECTOR_ELT_ORDER_BIG) 3845 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element)); 3846 3847 /* If the value is in the correct position, we can avoid doing the VSPLT<x> 3848 instruction. */ 3849 value = INTVAL (element); 3850 if (value != 0) 3851 { 3852 if (GET_CODE (v4si_tmp) == SCRATCH) 3853 v4si_tmp = gen_reg_rtx (V4SImode); 3854 emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element)); 3855 } 3856 else 3857 v4si_tmp = src; 3858 3859 if (GET_CODE (df_tmp) == SCRATCH) 3860 df_tmp = gen_reg_rtx (DFmode); 3861 3862 emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp)); 3863 3864 if (<MODE>mode == SFmode) 3865 emit_insn (gen_truncdfsf2 (dest, df_tmp)); 3866 else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode)) 3867 emit_insn (gen_extenddftf2_vsx (dest, df_tmp)); 3868 else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode) 3869 && TARGET_FLOAT128_HW) 3870 emit_insn (gen_extenddftf2_hw (dest, df_tmp)); 3871 else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode)) 3872 emit_insn (gen_extenddfif2 (dest, df_tmp)); 3873 else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW) 3874 emit_insn (gen_extenddfkf2_hw (dest, df_tmp)); 3875 else 3876 gcc_unreachable (); 3877 3878 DONE; 3879}) 3880 3881;; Optimize <type> f = (<ftype>) vec_extract (<vtype>, <n>) 3882;; Where <ftype> is SFmode, DFmode (and KFmode/TFmode if those types are IEEE 3883;; 128-bit hardware types) and <vtype> is vector char, vector unsigned char, 3884;; vector short or vector unsigned short. 3885(define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_fl_<FL_CONV:mode>" 3886 [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=<FL_CONV:VSr3>") 3887 (float:FL_CONV 3888 (vec_select:<VSX_EXTRACT_I:VS_scalar> 3889 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v") 3890 (parallel [(match_operand:QI 2 "const_int_operand" "n")])))) 3891 (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))] 3892 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT 3893 && TARGET_P9_VECTOR" 3894 "#" 3895 "&& reload_completed" 3896 [(parallel [(set (match_dup 3) 3897 (vec_select:<VSX_EXTRACT_I:VS_scalar> 3898 (match_dup 1) 3899 (parallel [(match_dup 2)]))) 3900 (clobber (scratch:SI))]) 3901 (set (match_dup 4) 3902 (sign_extend:DI (match_dup 3))) 3903 (set (match_dup 0) 3904 (float:<FL_CONV:MODE> (match_dup 4)))] 3905{ 3906 operands[4] = gen_rtx_REG (DImode, REGNO (operands[3])); 3907}) 3908 3909(define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_ufl_<FL_CONV:mode>" 3910 [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=<FL_CONV:VSr3>") 3911 (unsigned_float:FL_CONV 3912 (vec_select:<VSX_EXTRACT_I:VS_scalar> 3913 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v") 3914 (parallel [(match_operand:QI 2 "const_int_operand" "n")])))) 3915 (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))] 3916 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT 3917 && TARGET_P9_VECTOR" 3918 "#" 3919 "&& reload_completed" 3920 [(parallel [(set (match_dup 3) 3921 (vec_select:<VSX_EXTRACT_I:VS_scalar> 3922 (match_dup 1) 3923 (parallel [(match_dup 2)]))) 3924 (clobber (scratch:SI))]) 3925 (set (match_dup 0) 3926 (float:<FL_CONV:MODE> (match_dup 4)))] 3927{ 3928 operands[4] = gen_rtx_REG (DImode, REGNO (operands[3])); 3929}) 3930 3931;; V4SI/V8HI/V16QI set operation on ISA 3.0 3932(define_insn "vsx_set_<mode>_p9" 3933 [(set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=<VSX_EX>") 3934 (unspec:VSX_EXTRACT_I 3935 [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "0") 3936 (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VSX_EX>") 3937 (match_operand:QI 3 "<VSX_EXTRACT_PREDICATE>" "n")] 3938 UNSPEC_VSX_SET))] 3939 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_P9_VECTOR && TARGET_POWERPC64" 3940{ 3941 int ele = INTVAL (operands[3]); 3942 int nunits = GET_MODE_NUNITS (<MODE>mode); 3943 3944 if (!VECTOR_ELT_ORDER_BIG) 3945 ele = nunits - 1 - ele; 3946 3947 operands[3] = GEN_INT (GET_MODE_SIZE (<VS_scalar>mode) * ele); 3948 if (<MODE>mode == V4SImode) 3949 return "xxinsertw %x0,%x2,%3"; 3950 else 3951 return "vinsert<wd> %0,%2,%3"; 3952} 3953 [(set_attr "type" "vecperm")]) 3954 3955(define_insn_and_split "vsx_set_v4sf_p9" 3956 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa") 3957 (unspec:V4SF 3958 [(match_operand:V4SF 1 "gpc_reg_operand" "0") 3959 (match_operand:SF 2 "gpc_reg_operand" "ww") 3960 (match_operand:QI 3 "const_0_to_3_operand" "n")] 3961 UNSPEC_VSX_SET)) 3962 (clobber (match_scratch:SI 4 "=&wJwK"))] 3963 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64" 3964 "#" 3965 "&& reload_completed" 3966 [(set (match_dup 5) 3967 (unspec:V4SF [(match_dup 2)] 3968 UNSPEC_VSX_CVDPSPN)) 3969 (parallel [(set (match_dup 4) 3970 (vec_select:SI (match_dup 6) 3971 (parallel [(match_dup 7)]))) 3972 (clobber (scratch:SI))]) 3973 (set (match_dup 8) 3974 (unspec:V4SI [(match_dup 8) 3975 (match_dup 4) 3976 (match_dup 3)] 3977 UNSPEC_VSX_SET))] 3978{ 3979 unsigned int tmp_regno = reg_or_subregno (operands[4]); 3980 3981 operands[5] = gen_rtx_REG (V4SFmode, tmp_regno); 3982 operands[6] = gen_rtx_REG (V4SImode, tmp_regno); 3983 operands[7] = GEN_INT (VECTOR_ELT_ORDER_BIG ? 1 : 2); 3984 operands[8] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0])); 3985} 3986 [(set_attr "type" "vecperm") 3987 (set_attr "length" "12")]) 3988 3989;; Special case setting 0.0f to a V4SF element 3990(define_insn_and_split "*vsx_set_v4sf_p9_zero" 3991 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa") 3992 (unspec:V4SF 3993 [(match_operand:V4SF 1 "gpc_reg_operand" "0") 3994 (match_operand:SF 2 "zero_fp_constant" "j") 3995 (match_operand:QI 3 "const_0_to_3_operand" "n")] 3996 UNSPEC_VSX_SET)) 3997 (clobber (match_scratch:SI 4 "=&wJwK"))] 3998 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64" 3999 "#" 4000 "&& reload_completed" 4001 [(set (match_dup 4) 4002 (const_int 0)) 4003 (set (match_dup 5) 4004 (unspec:V4SI [(match_dup 5) 4005 (match_dup 4) 4006 (match_dup 3)] 4007 UNSPEC_VSX_SET))] 4008{ 4009 operands[5] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0])); 4010} 4011 [(set_attr "type" "vecperm") 4012 (set_attr "length" "8")]) 4013 4014;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is the element 4015;; that is in the default scalar position (1 for big endian, 2 for little 4016;; endian). We just need to do an xxinsertw since the element is in the 4017;; correct location. 4018 4019(define_insn "*vsx_insert_extract_v4sf_p9" 4020 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa") 4021 (unspec:V4SF 4022 [(match_operand:V4SF 1 "gpc_reg_operand" "0") 4023 (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa") 4024 (parallel 4025 [(match_operand:QI 3 "const_0_to_3_operand" "n")])) 4026 (match_operand:QI 4 "const_0_to_3_operand" "n")] 4027 UNSPEC_VSX_SET))] 4028 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64 4029 && (INTVAL (operands[3]) == (VECTOR_ELT_ORDER_BIG ? 1 : 2))" 4030{ 4031 int ele = INTVAL (operands[4]); 4032 4033 if (!VECTOR_ELT_ORDER_BIG) 4034 ele = GET_MODE_NUNITS (V4SFmode) - 1 - ele; 4035 4036 operands[4] = GEN_INT (GET_MODE_SIZE (SFmode) * ele); 4037 return "xxinsertw %x0,%x2,%4"; 4038} 4039 [(set_attr "type" "vecperm")]) 4040 4041;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is not the element 4042;; that is in the default scalar position (1 for big endian, 2 for little 4043;; endian). Convert the insert/extract to int and avoid doing the conversion. 4044 4045(define_insn_and_split "*vsx_insert_extract_v4sf_p9_2" 4046 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa") 4047 (unspec:V4SF 4048 [(match_operand:V4SF 1 "gpc_reg_operand" "0") 4049 (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa") 4050 (parallel 4051 [(match_operand:QI 3 "const_0_to_3_operand" "n")])) 4052 (match_operand:QI 4 "const_0_to_3_operand" "n")] 4053 UNSPEC_VSX_SET)) 4054 (clobber (match_scratch:SI 5 "=&wJwK"))] 4055 "VECTOR_MEM_VSX_P (V4SFmode) && VECTOR_MEM_VSX_P (V4SImode) 4056 && TARGET_P9_VECTOR && TARGET_POWERPC64 4057 && (INTVAL (operands[3]) != (VECTOR_ELT_ORDER_BIG ? 1 : 2))" 4058 "#" 4059 "&& 1" 4060 [(parallel [(set (match_dup 5) 4061 (vec_select:SI (match_dup 6) 4062 (parallel [(match_dup 3)]))) 4063 (clobber (scratch:SI))]) 4064 (set (match_dup 7) 4065 (unspec:V4SI [(match_dup 8) 4066 (match_dup 5) 4067 (match_dup 4)] 4068 UNSPEC_VSX_SET))] 4069{ 4070 if (GET_CODE (operands[5]) == SCRATCH) 4071 operands[5] = gen_reg_rtx (SImode); 4072 4073 operands[6] = gen_lowpart (V4SImode, operands[2]); 4074 operands[7] = gen_lowpart (V4SImode, operands[0]); 4075 operands[8] = gen_lowpart (V4SImode, operands[1]); 4076} 4077 [(set_attr "type" "vecperm")]) 4078 4079;; Expanders for builtins 4080(define_expand "vsx_mergel_<mode>" 4081 [(use (match_operand:VSX_D 0 "vsx_register_operand")) 4082 (use (match_operand:VSX_D 1 "vsx_register_operand")) 4083 (use (match_operand:VSX_D 2 "vsx_register_operand"))] 4084 "VECTOR_MEM_VSX_P (<MODE>mode)" 4085{ 4086 rtvec v; 4087 rtx x; 4088 4089 /* Special handling for LE with -maltivec=be. */ 4090 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) 4091 { 4092 v = gen_rtvec (2, GEN_INT (0), GEN_INT (2)); 4093 x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]); 4094 } 4095 else 4096 { 4097 v = gen_rtvec (2, GEN_INT (1), GEN_INT (3)); 4098 x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]); 4099 } 4100 4101 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v)); 4102 emit_insn (gen_rtx_SET (operands[0], x)); 4103 DONE; 4104}) 4105 4106(define_expand "vsx_mergeh_<mode>" 4107 [(use (match_operand:VSX_D 0 "vsx_register_operand")) 4108 (use (match_operand:VSX_D 1 "vsx_register_operand")) 4109 (use (match_operand:VSX_D 2 "vsx_register_operand"))] 4110 "VECTOR_MEM_VSX_P (<MODE>mode)" 4111{ 4112 rtvec v; 4113 rtx x; 4114 4115 /* Special handling for LE with -maltivec=be. */ 4116 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) 4117 { 4118 v = gen_rtvec (2, GEN_INT (1), GEN_INT (3)); 4119 x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[2], operands[1]); 4120 } 4121 else 4122 { 4123 v = gen_rtvec (2, GEN_INT (0), GEN_INT (2)); 4124 x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]); 4125 } 4126 4127 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v)); 4128 emit_insn (gen_rtx_SET (operands[0], x)); 4129 DONE; 4130}) 4131 4132;; V2DF/V2DI splat 4133;; We separate the register splat insn from the memory splat insn to force the 4134;; register allocator to generate the indexed form of the SPLAT when it is 4135;; given an offsettable memory reference. Otherwise, if the register and 4136;; memory insns were combined into a single insn, the register allocator will 4137;; load the value into a register, and then do a double word permute. 4138(define_expand "vsx_splat_<mode>" 4139 [(set (match_operand:VSX_D 0 "vsx_register_operand") 4140 (vec_duplicate:VSX_D 4141 (match_operand:<VS_scalar> 1 "input_operand")))] 4142 "VECTOR_MEM_VSX_P (<MODE>mode)" 4143{ 4144 rtx op1 = operands[1]; 4145 if (MEM_P (op1)) 4146 operands[1] = rs6000_address_for_fpconvert (op1); 4147 else if (!REG_P (op1)) 4148 op1 = force_reg (<VSX_D:VS_scalar>mode, op1); 4149}) 4150 4151(define_insn "vsx_splat_<mode>_reg" 4152 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>,?we") 4153 (vec_duplicate:VSX_D 4154 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "<VSX_D:VS_64reg>,b")))] 4155 "VECTOR_MEM_VSX_P (<MODE>mode)" 4156 "@ 4157 xxpermdi %x0,%x1,%x1,0 4158 mtvsrdd %x0,%1,%1" 4159 [(set_attr "type" "vecperm")]) 4160 4161(define_insn "vsx_splat_<VSX_D:mode>_mem" 4162 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>") 4163 (vec_duplicate:VSX_D 4164 (match_operand:<VSX_D:VS_scalar> 1 "memory_operand" "Z")))] 4165 "VECTOR_MEM_VSX_P (<MODE>mode)" 4166 "lxvdsx %x0,%y1" 4167 [(set_attr "type" "vecload")]) 4168 4169;; V4SI splat support 4170(define_insn "vsx_splat_v4si" 4171 [(set (match_operand:V4SI 0 "vsx_register_operand" "=we,we") 4172 (vec_duplicate:V4SI 4173 (match_operand:SI 1 "splat_input_operand" "r,Z")))] 4174 "TARGET_P9_VECTOR" 4175 "@ 4176 mtvsrws %x0,%1 4177 lxvwsx %x0,%y1" 4178 [(set_attr "type" "vecperm,vecload")]) 4179 4180;; SImode is not currently allowed in vector registers. This pattern 4181;; allows us to use direct move to get the value in a vector register 4182;; so that we can use XXSPLTW 4183(define_insn "vsx_splat_v4si_di" 4184 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,we") 4185 (vec_duplicate:V4SI 4186 (truncate:SI 4187 (match_operand:DI 1 "gpc_reg_operand" "wj,r"))))] 4188 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" 4189 "@ 4190 xxspltw %x0,%x1,1 4191 mtvsrws %x0,%1" 4192 [(set_attr "type" "vecperm")]) 4193 4194;; V4SF splat (ISA 3.0) 4195(define_insn_and_split "vsx_splat_v4sf" 4196 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa") 4197 (vec_duplicate:V4SF 4198 (match_operand:SF 1 "splat_input_operand" "Z,wy,r")))] 4199 "TARGET_P9_VECTOR" 4200 "@ 4201 lxvwsx %x0,%y1 4202 # 4203 mtvsrws %x0,%1" 4204 "&& reload_completed && vsx_register_operand (operands[1], SFmode)" 4205 [(set (match_dup 0) 4206 (unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN)) 4207 (set (match_dup 0) 4208 (unspec:V4SF [(match_dup 0) 4209 (const_int 0)] UNSPEC_VSX_XXSPLTW))] 4210 "" 4211 [(set_attr "type" "vecload,vecperm,mftgpr") 4212 (set_attr "length" "4,8,4")]) 4213 4214;; V4SF/V4SI splat from a vector element 4215(define_insn "vsx_xxspltw_<mode>" 4216 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>") 4217 (vec_duplicate:VSX_W 4218 (vec_select:<VS_scalar> 4219 (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>") 4220 (parallel 4221 [(match_operand:QI 2 "u5bit_cint_operand" "n")]))))] 4222 "VECTOR_MEM_VSX_P (<MODE>mode)" 4223{ 4224 if (!BYTES_BIG_ENDIAN) 4225 operands[2] = GEN_INT (3 - INTVAL (operands[2])); 4226 4227 return "xxspltw %x0,%x1,%2"; 4228} 4229 [(set_attr "type" "vecperm")]) 4230 4231(define_insn "vsx_xxspltw_<mode>_direct" 4232 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>") 4233 (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "<VSa>") 4234 (match_operand:QI 2 "u5bit_cint_operand" "i")] 4235 UNSPEC_VSX_XXSPLTW))] 4236 "VECTOR_MEM_VSX_P (<MODE>mode)" 4237 "xxspltw %x0,%x1,%2" 4238 [(set_attr "type" "vecperm")]) 4239 4240;; V16QI/V8HI splat support on ISA 2.07 4241(define_insn "vsx_vsplt<VSX_SPLAT_SUFFIX>_di" 4242 [(set (match_operand:VSX_SPLAT_I 0 "altivec_register_operand" "=v") 4243 (vec_duplicate:VSX_SPLAT_I 4244 (truncate:<VS_scalar> 4245 (match_operand:DI 1 "altivec_register_operand" "v"))))] 4246 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" 4247 "vsplt<VSX_SPLAT_SUFFIX> %0,%1,<VSX_SPLAT_COUNT>" 4248 [(set_attr "type" "vecperm")]) 4249 4250;; V2DF/V2DI splat for use by vec_splat builtin 4251(define_insn "vsx_xxspltd_<mode>" 4252 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") 4253 (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa") 4254 (match_operand:QI 2 "u5bit_cint_operand" "i")] 4255 UNSPEC_VSX_XXSPLTD))] 4256 "VECTOR_MEM_VSX_P (<MODE>mode)" 4257{ 4258 if ((VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 0) 4259 || (!VECTOR_ELT_ORDER_BIG && INTVAL (operands[2]) == 1)) 4260 return "xxpermdi %x0,%x1,%x1,0"; 4261 else 4262 return "xxpermdi %x0,%x1,%x1,3"; 4263} 4264 [(set_attr "type" "vecperm")]) 4265 4266;; V4SF/V4SI interleave 4267(define_insn "vsx_xxmrghw_<mode>" 4268 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>") 4269 (vec_select:VSX_W 4270 (vec_concat:<VS_double> 4271 (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>") 4272 (match_operand:VSX_W 2 "vsx_register_operand" "wf,<VSa>")) 4273 (parallel [(const_int 0) (const_int 4) 4274 (const_int 1) (const_int 5)])))] 4275 "VECTOR_MEM_VSX_P (<MODE>mode)" 4276{ 4277 if (BYTES_BIG_ENDIAN) 4278 return "xxmrghw %x0,%x1,%x2"; 4279 else 4280 return "xxmrglw %x0,%x2,%x1"; 4281} 4282 [(set_attr "type" "vecperm")]) 4283 4284(define_insn "vsx_xxmrglw_<mode>" 4285 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>") 4286 (vec_select:VSX_W 4287 (vec_concat:<VS_double> 4288 (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>") 4289 (match_operand:VSX_W 2 "vsx_register_operand" "wf,?<VSa>")) 4290 (parallel [(const_int 2) (const_int 6) 4291 (const_int 3) (const_int 7)])))] 4292 "VECTOR_MEM_VSX_P (<MODE>mode)" 4293{ 4294 if (BYTES_BIG_ENDIAN) 4295 return "xxmrglw %x0,%x1,%x2"; 4296 else 4297 return "xxmrghw %x0,%x2,%x1"; 4298} 4299 [(set_attr "type" "vecperm")]) 4300 4301;; Shift left double by word immediate 4302(define_insn "vsx_xxsldwi_<mode>" 4303 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSa>") 4304 (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "<VSa>") 4305 (match_operand:VSX_L 2 "vsx_register_operand" "<VSa>") 4306 (match_operand:QI 3 "u5bit_cint_operand" "i")] 4307 UNSPEC_VSX_SLDWI))] 4308 "VECTOR_MEM_VSX_P (<MODE>mode)" 4309 "xxsldwi %x0,%x1,%x2,%3" 4310 [(set_attr "type" "vecperm")]) 4311 4312 4313;; Vector reduction insns and splitters 4314 4315(define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df" 4316 [(set (match_operand:V2DF 0 "vfloat_operand" "=&wd,&?wa,wd,?wa") 4317 (VEC_reduc:V2DF 4318 (vec_concat:V2DF 4319 (vec_select:DF 4320 (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa") 4321 (parallel [(const_int 1)])) 4322 (vec_select:DF 4323 (match_dup 1) 4324 (parallel [(const_int 0)]))) 4325 (match_dup 1))) 4326 (clobber (match_scratch:V2DF 2 "=0,0,&wd,&wa"))] 4327 "VECTOR_UNIT_VSX_P (V2DFmode)" 4328 "#" 4329 "" 4330 [(const_int 0)] 4331{ 4332 rtx tmp = (GET_CODE (operands[2]) == SCRATCH) 4333 ? gen_reg_rtx (V2DFmode) 4334 : operands[2]; 4335 emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx)); 4336 emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1])); 4337 DONE; 4338} 4339 [(set_attr "length" "8") 4340 (set_attr "type" "veccomplex")]) 4341 4342(define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf" 4343 [(set (match_operand:V4SF 0 "vfloat_operand" "=wf,?wa") 4344 (VEC_reduc:V4SF 4345 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC) 4346 (match_operand:V4SF 1 "vfloat_operand" "wf,wa"))) 4347 (clobber (match_scratch:V4SF 2 "=&wf,&wa")) 4348 (clobber (match_scratch:V4SF 3 "=&wf,&wa"))] 4349 "VECTOR_UNIT_VSX_P (V4SFmode)" 4350 "#" 4351 "" 4352 [(const_int 0)] 4353{ 4354 rtx op0 = operands[0]; 4355 rtx op1 = operands[1]; 4356 rtx tmp2, tmp3, tmp4; 4357 4358 if (can_create_pseudo_p ()) 4359 { 4360 tmp2 = gen_reg_rtx (V4SFmode); 4361 tmp3 = gen_reg_rtx (V4SFmode); 4362 tmp4 = gen_reg_rtx (V4SFmode); 4363 } 4364 else 4365 { 4366 tmp2 = operands[2]; 4367 tmp3 = operands[3]; 4368 tmp4 = tmp2; 4369 } 4370 4371 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx)); 4372 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1)); 4373 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3))); 4374 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3)); 4375 DONE; 4376} 4377 [(set_attr "length" "16") 4378 (set_attr "type" "veccomplex")]) 4379 4380;; Combiner patterns with the vector reduction patterns that knows we can get 4381;; to the top element of the V2DF array without doing an extract. 4382 4383(define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar" 4384 [(set (match_operand:DF 0 "vfloat_operand" "=&ws,&?ws,ws,?ws") 4385 (vec_select:DF 4386 (VEC_reduc:V2DF 4387 (vec_concat:V2DF 4388 (vec_select:DF 4389 (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa") 4390 (parallel [(const_int 1)])) 4391 (vec_select:DF 4392 (match_dup 1) 4393 (parallel [(const_int 0)]))) 4394 (match_dup 1)) 4395 (parallel [(const_int 1)]))) 4396 (clobber (match_scratch:DF 2 "=0,0,&wd,&wa"))] 4397 "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V2DFmode)" 4398 "#" 4399 "" 4400 [(const_int 0)] 4401{ 4402 rtx hi = gen_highpart (DFmode, operands[1]); 4403 rtx lo = (GET_CODE (operands[2]) == SCRATCH) 4404 ? gen_reg_rtx (DFmode) 4405 : operands[2]; 4406 4407 emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx)); 4408 emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo)); 4409 DONE; 4410} 4411 [(set_attr "length" "8") 4412 (set_attr "type" "veccomplex")]) 4413 4414(define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar" 4415 [(set (match_operand:SF 0 "vfloat_operand" "=f,?f") 4416 (vec_select:SF 4417 (VEC_reduc:V4SF 4418 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC) 4419 (match_operand:V4SF 1 "vfloat_operand" "wf,wa")) 4420 (parallel [(const_int 3)]))) 4421 (clobber (match_scratch:V4SF 2 "=&wf,&wa")) 4422 (clobber (match_scratch:V4SF 3 "=&wf,&wa")) 4423 (clobber (match_scratch:V4SF 4 "=0,0"))] 4424 "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V4SFmode)" 4425 "#" 4426 "" 4427 [(const_int 0)] 4428{ 4429 rtx op0 = operands[0]; 4430 rtx op1 = operands[1]; 4431 rtx tmp2, tmp3, tmp4, tmp5; 4432 4433 if (can_create_pseudo_p ()) 4434 { 4435 tmp2 = gen_reg_rtx (V4SFmode); 4436 tmp3 = gen_reg_rtx (V4SFmode); 4437 tmp4 = gen_reg_rtx (V4SFmode); 4438 tmp5 = gen_reg_rtx (V4SFmode); 4439 } 4440 else 4441 { 4442 tmp2 = operands[2]; 4443 tmp3 = operands[3]; 4444 tmp4 = tmp2; 4445 tmp5 = operands[4]; 4446 } 4447 4448 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx)); 4449 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1)); 4450 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3))); 4451 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3)); 4452 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5)); 4453 DONE; 4454} 4455 [(set_attr "length" "20") 4456 (set_attr "type" "veccomplex")]) 4457 4458 4459;; Power8 Vector fusion. The fused ops must be physically adjacent. 4460(define_peephole 4461 [(set (match_operand:P 0 "base_reg_operand") 4462 (match_operand:P 1 "short_cint_operand")) 4463 (set (match_operand:VSX_M 2 "vsx_register_operand") 4464 (mem:VSX_M (plus:P (match_dup 0) 4465 (match_operand:P 3 "int_reg_operand"))))] 4466 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR" 4467 "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion" 4468 [(set_attr "length" "8") 4469 (set_attr "type" "vecload")]) 4470 4471(define_peephole 4472 [(set (match_operand:P 0 "base_reg_operand") 4473 (match_operand:P 1 "short_cint_operand")) 4474 (set (match_operand:VSX_M 2 "vsx_register_operand") 4475 (mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand") 4476 (match_dup 0))))] 4477 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR" 4478 "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion" 4479 [(set_attr "length" "8") 4480 (set_attr "type" "vecload")]) 4481 4482 4483;; ISA 3.0 vector extend sign support 4484 4485(define_insn "vsx_sign_extend_qi_<mode>" 4486 [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v") 4487 (unspec:VSINT_84 4488 [(match_operand:V16QI 1 "vsx_register_operand" "v")] 4489 UNSPEC_VSX_SIGN_EXTEND))] 4490 "TARGET_P9_VECTOR" 4491 "vextsb2<wd> %0,%1" 4492 [(set_attr "type" "vecexts")]) 4493 4494(define_insn "vsx_sign_extend_hi_<mode>" 4495 [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v") 4496 (unspec:VSINT_84 4497 [(match_operand:V8HI 1 "vsx_register_operand" "v")] 4498 UNSPEC_VSX_SIGN_EXTEND))] 4499 "TARGET_P9_VECTOR" 4500 "vextsh2<wd> %0,%1" 4501 [(set_attr "type" "vecexts")]) 4502 4503(define_insn "*vsx_sign_extend_si_v2di" 4504 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v") 4505 (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")] 4506 UNSPEC_VSX_SIGN_EXTEND))] 4507 "TARGET_P9_VECTOR" 4508 "vextsw2d %0,%1" 4509 [(set_attr "type" "vecexts")]) 4510 4511 4512;; ISA 3.0 Binary Floating-Point Support 4513 4514;; VSX Scalar Extract Exponent Quad-Precision 4515(define_insn "xsxexpqp_<mode>" 4516 [(set (match_operand:DI 0 "altivec_register_operand" "=v") 4517 (unspec:DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")] 4518 UNSPEC_VSX_SXEXPDP))] 4519 "TARGET_P9_VECTOR" 4520 "xsxexpqp %0,%1" 4521 [(set_attr "type" "vecmove")]) 4522 4523;; VSX Scalar Extract Exponent Double-Precision 4524(define_insn "xsxexpdp" 4525 [(set (match_operand:DI 0 "register_operand" "=r") 4526 (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")] 4527 UNSPEC_VSX_SXEXPDP))] 4528 "TARGET_P9_VECTOR && TARGET_64BIT" 4529 "xsxexpdp %0,%x1" 4530 [(set_attr "type" "integer")]) 4531 4532;; VSX Scalar Extract Significand Quad-Precision 4533(define_insn "xsxsigqp_<mode>" 4534 [(set (match_operand:TI 0 "altivec_register_operand" "=v") 4535 (unspec:TI [(match_operand:IEEE128 1 "altivec_register_operand" "v")] 4536 UNSPEC_VSX_SXSIG))] 4537 "TARGET_P9_VECTOR" 4538 "xsxsigqp %0,%1" 4539 [(set_attr "type" "vecmove")]) 4540 4541;; VSX Scalar Extract Significand Double-Precision 4542(define_insn "xsxsigdp" 4543 [(set (match_operand:DI 0 "register_operand" "=r") 4544 (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")] 4545 UNSPEC_VSX_SXSIG))] 4546 "TARGET_P9_VECTOR && TARGET_64BIT" 4547 "xsxsigdp %0,%x1" 4548 [(set_attr "type" "integer")]) 4549 4550;; VSX Scalar Insert Exponent Quad-Precision Floating Point Argument 4551(define_insn "xsiexpqpf_<mode>" 4552 [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") 4553 (unspec:IEEE128 4554 [(match_operand:IEEE128 1 "altivec_register_operand" "v") 4555 (match_operand:DI 2 "altivec_register_operand" "v")] 4556 UNSPEC_VSX_SIEXPQP))] 4557 "TARGET_P9_VECTOR" 4558 "xsiexpqp %0,%1,%2" 4559 [(set_attr "type" "vecmove")]) 4560 4561;; VSX Scalar Insert Exponent Quad-Precision 4562(define_insn "xsiexpqp_<mode>" 4563 [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") 4564 (unspec:IEEE128 [(match_operand:TI 1 "altivec_register_operand" "v") 4565 (match_operand:DI 2 "altivec_register_operand" "v")] 4566 UNSPEC_VSX_SIEXPQP))] 4567 "TARGET_P9_VECTOR" 4568 "xsiexpqp %0,%1,%2" 4569 [(set_attr "type" "vecmove")]) 4570 4571;; VSX Scalar Insert Exponent Double-Precision 4572(define_insn "xsiexpdp" 4573 [(set (match_operand:DF 0 "vsx_register_operand" "=wa") 4574 (unspec:DF [(match_operand:DI 1 "register_operand" "r") 4575 (match_operand:DI 2 "register_operand" "r")] 4576 UNSPEC_VSX_SIEXPDP))] 4577 "TARGET_P9_VECTOR && TARGET_64BIT" 4578 "xsiexpdp %x0,%1,%2" 4579 [(set_attr "type" "fpsimple")]) 4580 4581;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument 4582(define_insn "xsiexpdpf" 4583 [(set (match_operand:DF 0 "vsx_register_operand" "=wa") 4584 (unspec:DF [(match_operand:DF 1 "register_operand" "r") 4585 (match_operand:DI 2 "register_operand" "r")] 4586 UNSPEC_VSX_SIEXPDP))] 4587 "TARGET_P9_VECTOR && TARGET_64BIT" 4588 "xsiexpdp %x0,%1,%2" 4589 [(set_attr "type" "fpsimple")]) 4590 4591;; VSX Scalar Compare Exponents Double-Precision 4592(define_expand "xscmpexpdp_<code>" 4593 [(set (match_dup 3) 4594 (compare:CCFP 4595 (unspec:DF 4596 [(match_operand:DF 1 "vsx_register_operand" "wa") 4597 (match_operand:DF 2 "vsx_register_operand" "wa")] 4598 UNSPEC_VSX_SCMPEXPDP) 4599 (const_int 0))) 4600 (set (match_operand:SI 0 "register_operand" "=r") 4601 (CMP_TEST:SI (match_dup 3) 4602 (const_int 0)))] 4603 "TARGET_P9_VECTOR" 4604{ 4605 operands[3] = gen_reg_rtx (CCFPmode); 4606}) 4607 4608(define_insn "*xscmpexpdp" 4609 [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") 4610 (compare:CCFP 4611 (unspec:DF [(match_operand:DF 1 "vsx_register_operand" "wa") 4612 (match_operand:DF 2 "vsx_register_operand" "wa")] 4613 UNSPEC_VSX_SCMPEXPDP) 4614 (match_operand:SI 3 "zero_constant" "j")))] 4615 "TARGET_P9_VECTOR" 4616 "xscmpexpdp %0,%x1,%x2" 4617 [(set_attr "type" "fpcompare")]) 4618 4619;; VSX Scalar Test Data Class Quad-Precision 4620;; (Expansion for scalar_test_data_class (__ieee128, int)) 4621;; (Has side effect of setting the lt bit if operand 1 is negative, 4622;; setting the eq bit if any of the conditions tested by operand 2 4623;; are satisfied, and clearing the gt and undordered bits to zero.) 4624(define_expand "xststdcqp_<mode>" 4625 [(set (match_dup 3) 4626 (compare:CCFP 4627 (unspec:IEEE128 4628 [(match_operand:IEEE128 1 "altivec_register_operand" "v") 4629 (match_operand:SI 2 "u7bit_cint_operand" "n")] 4630 UNSPEC_VSX_STSTDC) 4631 (const_int 0))) 4632 (set (match_operand:SI 0 "register_operand" "=r") 4633 (eq:SI (match_dup 3) 4634 (const_int 0)))] 4635 "TARGET_P9_VECTOR" 4636{ 4637 operands[3] = gen_reg_rtx (CCFPmode); 4638}) 4639 4640;; VSX Scalar Test Data Class Double- and Single-Precision 4641;; (The lt bit is set if operand 1 is negative. The eq bit is set 4642;; if any of the conditions tested by operand 2 are satisfied. 4643;; The gt and unordered bits are cleared to zero.) 4644(define_expand "xststdc<Fvsx>" 4645 [(set (match_dup 3) 4646 (compare:CCFP 4647 (unspec:SFDF 4648 [(match_operand:SFDF 1 "vsx_register_operand" "wa") 4649 (match_operand:SI 2 "u7bit_cint_operand" "n")] 4650 UNSPEC_VSX_STSTDC) 4651 (match_dup 4))) 4652 (set (match_operand:SI 0 "register_operand" "=r") 4653 (eq:SI (match_dup 3) 4654 (const_int 0)))] 4655 "TARGET_P9_VECTOR" 4656{ 4657 operands[3] = gen_reg_rtx (CCFPmode); 4658 operands[4] = CONST0_RTX (SImode); 4659}) 4660 4661;; The VSX Scalar Test Negative Quad-Precision 4662(define_expand "xststdcnegqp_<mode>" 4663 [(set (match_dup 2) 4664 (compare:CCFP 4665 (unspec:IEEE128 4666 [(match_operand:IEEE128 1 "altivec_register_operand" "v") 4667 (const_int 0)] 4668 UNSPEC_VSX_STSTDC) 4669 (const_int 0))) 4670 (set (match_operand:SI 0 "register_operand" "=r") 4671 (lt:SI (match_dup 2) 4672 (const_int 0)))] 4673 "TARGET_P9_VECTOR" 4674{ 4675 operands[2] = gen_reg_rtx (CCFPmode); 4676}) 4677 4678;; The VSX Scalar Test Negative Double- and Single-Precision 4679(define_expand "xststdcneg<Fvsx>" 4680 [(set (match_dup 2) 4681 (compare:CCFP 4682 (unspec:SFDF 4683 [(match_operand:SFDF 1 "vsx_register_operand" "wa") 4684 (const_int 0)] 4685 UNSPEC_VSX_STSTDC) 4686 (match_dup 3))) 4687 (set (match_operand:SI 0 "register_operand" "=r") 4688 (lt:SI (match_dup 2) 4689 (const_int 0)))] 4690 "TARGET_P9_VECTOR" 4691{ 4692 operands[2] = gen_reg_rtx (CCFPmode); 4693 operands[3] = CONST0_RTX (SImode); 4694}) 4695 4696(define_insn "*xststdcqp_<mode>" 4697 [(set (match_operand:CCFP 0 "" "=y") 4698 (compare:CCFP 4699 (unspec:IEEE128 4700 [(match_operand:IEEE128 1 "altivec_register_operand" "v") 4701 (match_operand:SI 2 "u7bit_cint_operand" "n")] 4702 UNSPEC_VSX_STSTDC) 4703 (const_int 0)))] 4704 "TARGET_P9_VECTOR" 4705 "xststdcqp %0,%1,%2" 4706 [(set_attr "type" "fpcompare")]) 4707 4708(define_insn "*xststdc<Fvsx>" 4709 [(set (match_operand:CCFP 0 "" "=y") 4710 (compare:CCFP 4711 (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa") 4712 (match_operand:SI 2 "u7bit_cint_operand" "n")] 4713 UNSPEC_VSX_STSTDC) 4714 (match_operand:SI 3 "zero_constant" "j")))] 4715 "TARGET_P9_VECTOR" 4716 "xststdc<Fvsx> %0,%x1,%2" 4717 [(set_attr "type" "fpcompare")]) 4718 4719;; VSX Vector Extract Exponent Double and Single Precision 4720(define_insn "xvxexp<VSs>" 4721 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 4722 (unspec:VSX_F 4723 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")] 4724 UNSPEC_VSX_VXEXP))] 4725 "TARGET_P9_VECTOR" 4726 "xvxexp<VSs> %x0,%x1" 4727 [(set_attr "type" "vecsimple")]) 4728 4729;; VSX Vector Extract Significand Double and Single Precision 4730(define_insn "xvxsig<VSs>" 4731 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 4732 (unspec:VSX_F 4733 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")] 4734 UNSPEC_VSX_VXSIG))] 4735 "TARGET_P9_VECTOR" 4736 "xvxsig<VSs> %x0,%x1" 4737 [(set_attr "type" "vecsimple")]) 4738 4739;; VSX Vector Insert Exponent Double and Single Precision 4740(define_insn "xviexp<VSs>" 4741 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 4742 (unspec:VSX_F 4743 [(match_operand:VSX_F 1 "vsx_register_operand" "wa") 4744 (match_operand:VSX_F 2 "vsx_register_operand" "wa")] 4745 UNSPEC_VSX_VIEXP))] 4746 "TARGET_P9_VECTOR" 4747 "xviexp<VSs> %x0,%x1,%x2" 4748 [(set_attr "type" "vecsimple")]) 4749 4750;; VSX Vector Test Data Class Double and Single Precision 4751;; The corresponding elements of the result vector are all ones 4752;; if any of the conditions tested by operand 3 are satisfied. 4753(define_insn "xvtstdc<VSs>" 4754 [(set (match_operand:<VSI> 0 "vsx_register_operand" "=wa") 4755 (unspec:<VSI> 4756 [(match_operand:VSX_F 1 "vsx_register_operand" "wa") 4757 (match_operand:SI 2 "u7bit_cint_operand" "n")] 4758 UNSPEC_VSX_VTSTDC))] 4759 "TARGET_P9_VECTOR" 4760 "xvtstdc<VSs> %x0,%x1,%2" 4761 [(set_attr "type" "vecsimple")]) 4762 4763;; ISA 3.0 String Operations Support 4764 4765;; Compare vectors producing a vector result and a predicate, setting CR6 4766;; to indicate a combined status. This pattern matches v16qi, v8hi, and 4767;; v4si modes. It does not match v2df, v4sf, or v2di modes. There's no 4768;; need to match v4sf, v2df, or v2di modes because those are expanded 4769;; to use Power8 instructions. 4770(define_insn "*vsx_ne_<mode>_p" 4771 [(set (reg:CC CR6_REGNO) 4772 (unspec:CC 4773 [(ne:CC (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v") 4774 (match_operand:VSX_EXTRACT_I 2 "gpc_reg_operand" "v"))] 4775 UNSPEC_PREDICATE)) 4776 (set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=v") 4777 (ne:VSX_EXTRACT_I (match_dup 1) 4778 (match_dup 2)))] 4779 "TARGET_P9_VECTOR" 4780 "vcmpne<VSX_EXTRACT_WIDTH>. %0,%1,%2" 4781 [(set_attr "type" "vecsimple")]) 4782 4783(define_insn "*vector_nez_<mode>_p" 4784 [(set (reg:CC CR6_REGNO) 4785 (unspec:CC [(unspec:VI 4786 [(match_operand:VI 1 "gpc_reg_operand" "v") 4787 (match_operand:VI 2 "gpc_reg_operand" "v")] 4788 UNSPEC_NEZ_P)] 4789 UNSPEC_PREDICATE)) 4790 (set (match_operand:VI 0 "gpc_reg_operand" "=v") 4791 (unspec:VI [(match_dup 1) 4792 (match_dup 2)] 4793 UNSPEC_NEZ_P))] 4794 "TARGET_P9_VECTOR" 4795 "vcmpnez<VSX_EXTRACT_WIDTH>. %0,%1,%2" 4796 [(set_attr "type" "vecsimple")]) 4797 4798;; Return first position of match between vectors using natural order 4799;; for both LE and BE execution modes. 4800(define_expand "first_match_index_<mode>" 4801 [(match_operand:SI 0 "register_operand") 4802 (unspec:SI [(match_operand:VSX_EXTRACT_I 1 "register_operand") 4803 (match_operand:VSX_EXTRACT_I 2 "register_operand")] 4804 UNSPEC_VSX_FIRST_MATCH_INDEX)] 4805 "TARGET_P9_VECTOR" 4806{ 4807 int sh; 4808 4809 rtx cmp_result = gen_reg_rtx (<MODE>mode); 4810 rtx not_result = gen_reg_rtx (<MODE>mode); 4811 4812 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmp_result, operands[1], 4813 operands[2])); 4814 emit_insn (gen_one_cmpl<mode>2 (not_result, cmp_result)); 4815 4816 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2; 4817 4818 if (<MODE>mode == V16QImode) 4819 { 4820 if (!BYTES_BIG_ENDIAN) 4821 emit_insn (gen_vctzlsbb_<mode> (operands[0], not_result)); 4822 else 4823 emit_insn (gen_vclzlsbb_<mode> (operands[0], not_result)); 4824 } 4825 else 4826 { 4827 rtx tmp = gen_reg_rtx (SImode); 4828 if (!BYTES_BIG_ENDIAN) 4829 emit_insn (gen_vctzlsbb_<mode> (tmp, not_result)); 4830 else 4831 emit_insn (gen_vclzlsbb_<mode> (tmp, not_result)); 4832 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh))); 4833 } 4834 DONE; 4835}) 4836 4837;; Return first position of match between vectors or end of string (EOS) using 4838;; natural element order for both LE and BE execution modes. 4839(define_expand "first_match_or_eos_index_<mode>" 4840 [(match_operand:SI 0 "register_operand") 4841 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand") 4842 (match_operand:VSX_EXTRACT_I 2 "register_operand")] 4843 UNSPEC_VSX_FIRST_MATCH_EOS_INDEX)] 4844 "TARGET_P9_VECTOR" 4845{ 4846 int sh; 4847 rtx cmpz1_result = gen_reg_rtx (<MODE>mode); 4848 rtx cmpz2_result = gen_reg_rtx (<MODE>mode); 4849 rtx cmpz_result = gen_reg_rtx (<MODE>mode); 4850 rtx and_result = gen_reg_rtx (<MODE>mode); 4851 rtx result = gen_reg_rtx (<MODE>mode); 4852 rtx vzero = gen_reg_rtx (<MODE>mode); 4853 4854 /* Vector with zeros in elements that correspond to zeros in operands. */ 4855 emit_move_insn (vzero, CONST0_RTX (<MODE>mode)); 4856 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero)); 4857 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero)); 4858 emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result)); 4859 4860 /* Vector with ones in elments that do not match. */ 4861 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1], 4862 operands[2])); 4863 4864 /* Create vector with ones in elements where there was a zero in one of 4865 the source elements or the elements that match. */ 4866 emit_insn (gen_nand<mode>3 (result, and_result, cmpz_result)); 4867 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2; 4868 4869 if (<MODE>mode == V16QImode) 4870 { 4871 if (!BYTES_BIG_ENDIAN) 4872 emit_insn (gen_vctzlsbb_<mode> (operands[0], result)); 4873 else 4874 emit_insn (gen_vclzlsbb_<mode> (operands[0], result)); 4875 } 4876 else 4877 { 4878 rtx tmp = gen_reg_rtx (SImode); 4879 if (!BYTES_BIG_ENDIAN) 4880 emit_insn (gen_vctzlsbb_<mode> (tmp, result)); 4881 else 4882 emit_insn (gen_vclzlsbb_<mode> (tmp, result)); 4883 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh))); 4884 } 4885 DONE; 4886}) 4887 4888;; Return first position of mismatch between vectors using natural 4889;; element order for both LE and BE execution modes. 4890(define_expand "first_mismatch_index_<mode>" 4891 [(match_operand:SI 0 "register_operand") 4892 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand") 4893 (match_operand:VSX_EXTRACT_I 2 "register_operand")] 4894 UNSPEC_VSX_FIRST_MISMATCH_INDEX)] 4895 "TARGET_P9_VECTOR" 4896{ 4897 int sh; 4898 rtx cmp_result = gen_reg_rtx (<MODE>mode); 4899 4900 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1], 4901 operands[2])); 4902 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2; 4903 4904 if (<MODE>mode == V16QImode) 4905 { 4906 if (!BYTES_BIG_ENDIAN) 4907 emit_insn (gen_vctzlsbb_<mode> (operands[0], cmp_result)); 4908 else 4909 emit_insn (gen_vclzlsbb_<mode> (operands[0], cmp_result)); 4910 } 4911 else 4912 { 4913 rtx tmp = gen_reg_rtx (SImode); 4914 if (!BYTES_BIG_ENDIAN) 4915 emit_insn (gen_vctzlsbb_<mode> (tmp, cmp_result)); 4916 else 4917 emit_insn (gen_vclzlsbb_<mode> (tmp, cmp_result)); 4918 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh))); 4919 } 4920 DONE; 4921}) 4922 4923;; Return first position of mismatch between vectors or end of string (EOS) 4924;; using natural element order for both LE and BE execution modes. 4925(define_expand "first_mismatch_or_eos_index_<mode>" 4926 [(match_operand:SI 0 "register_operand") 4927 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand") 4928 (match_operand:VSX_EXTRACT_I 2 "register_operand")] 4929 UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX)] 4930 "TARGET_P9_VECTOR" 4931{ 4932 int sh; 4933 rtx cmpz1_result = gen_reg_rtx (<MODE>mode); 4934 rtx cmpz2_result = gen_reg_rtx (<MODE>mode); 4935 rtx cmpz_result = gen_reg_rtx (<MODE>mode); 4936 rtx not_cmpz_result = gen_reg_rtx (<MODE>mode); 4937 rtx and_result = gen_reg_rtx (<MODE>mode); 4938 rtx result = gen_reg_rtx (<MODE>mode); 4939 rtx vzero = gen_reg_rtx (<MODE>mode); 4940 4941 /* Vector with zeros in elements that correspond to zeros in operands. */ 4942 emit_move_insn (vzero, CONST0_RTX (<MODE>mode)); 4943 4944 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero)); 4945 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero)); 4946 emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result)); 4947 4948 /* Vector with ones in elments that match. */ 4949 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1], 4950 operands[2])); 4951 emit_insn (gen_one_cmpl<mode>2 (not_cmpz_result, cmpz_result)); 4952 4953 /* Create vector with ones in elements where there was a zero in one of 4954 the source elements or the elements did not match. */ 4955 emit_insn (gen_nand<mode>3 (result, and_result, not_cmpz_result)); 4956 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2; 4957 4958 if (<MODE>mode == V16QImode) 4959 { 4960 if (!BYTES_BIG_ENDIAN) 4961 emit_insn (gen_vctzlsbb_<mode> (operands[0], result)); 4962 else 4963 emit_insn (gen_vclzlsbb_<mode> (operands[0], result)); 4964 } 4965 else 4966 { 4967 rtx tmp = gen_reg_rtx (SImode); 4968 if (!BYTES_BIG_ENDIAN) 4969 emit_insn (gen_vctzlsbb_<mode> (tmp, result)); 4970 else 4971 emit_insn (gen_vclzlsbb_<mode> (tmp, result)); 4972 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh))); 4973 } 4974 DONE; 4975}) 4976 4977;; Load VSX Vector with Length 4978(define_expand "lxvl" 4979 [(set (match_dup 3) 4980 (ashift:DI (match_operand:DI 2 "register_operand") 4981 (const_int 56))) 4982 (set (match_operand:V16QI 0 "vsx_register_operand") 4983 (unspec:V16QI 4984 [(match_operand:DI 1 "gpc_reg_operand") 4985 (mem:V16QI (match_dup 1)) 4986 (match_dup 3)] 4987 UNSPEC_LXVL))] 4988 "TARGET_P9_VECTOR && TARGET_64BIT" 4989{ 4990 operands[3] = gen_reg_rtx (DImode); 4991}) 4992 4993(define_insn "*lxvl" 4994 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") 4995 (unspec:V16QI 4996 [(match_operand:DI 1 "gpc_reg_operand" "b") 4997 (mem:V16QI (match_dup 1)) 4998 (match_operand:DI 2 "register_operand" "r")] 4999 UNSPEC_LXVL))] 5000 "TARGET_P9_VECTOR && TARGET_64BIT" 5001 "lxvl %x0,%1,%2" 5002 [(set_attr "type" "vecload")]) 5003 5004(define_insn "lxvll" 5005 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") 5006 (unspec:V16QI [(match_operand:DI 1 "gpc_reg_operand" "b") 5007 (mem:V16QI (match_dup 1)) 5008 (match_operand:DI 2 "register_operand" "r")] 5009 UNSPEC_LXVLL))] 5010 "TARGET_P9_VECTOR" 5011 "lxvll %x0,%1,%2" 5012 [(set_attr "type" "vecload")]) 5013 5014;; Expand for builtin xl_len_r 5015(define_expand "xl_len_r" 5016 [(match_operand:V16QI 0 "vsx_register_operand") 5017 (match_operand:DI 1 "register_operand") 5018 (match_operand:DI 2 "register_operand")] 5019 "" 5020{ 5021 rtx shift_mask = gen_reg_rtx (V16QImode); 5022 rtx rtx_vtmp = gen_reg_rtx (V16QImode); 5023 rtx tmp = gen_reg_rtx (DImode); 5024 5025 emit_insn (gen_altivec_lvsl_reg (shift_mask, operands[2])); 5026 emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56))); 5027 emit_insn (gen_lxvll (rtx_vtmp, operands[1], tmp)); 5028 emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], rtx_vtmp, rtx_vtmp, 5029 shift_mask)); 5030 DONE; 5031}) 5032 5033(define_insn "stxvll" 5034 [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b")) 5035 (unspec:V16QI [(match_operand:V16QI 0 "vsx_register_operand" "wa") 5036 (mem:V16QI (match_dup 1)) 5037 (match_operand:DI 2 "register_operand" "r")] 5038 UNSPEC_STXVLL))] 5039 "TARGET_P9_VECTOR" 5040 "stxvll %x0,%1,%2" 5041 [(set_attr "type" "vecstore")]) 5042 5043;; Store VSX Vector with Length 5044(define_expand "stxvl" 5045 [(set (match_dup 3) 5046 (ashift:DI (match_operand:DI 2 "register_operand") 5047 (const_int 56))) 5048 (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand")) 5049 (unspec:V16QI 5050 [(match_operand:V16QI 0 "vsx_register_operand") 5051 (mem:V16QI (match_dup 1)) 5052 (match_dup 3)] 5053 UNSPEC_STXVL))] 5054 "TARGET_P9_VECTOR && TARGET_64BIT" 5055{ 5056 operands[3] = gen_reg_rtx (DImode); 5057}) 5058 5059(define_insn "*stxvl" 5060 [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b")) 5061 (unspec:V16QI 5062 [(match_operand:V16QI 0 "vsx_register_operand" "wa") 5063 (mem:V16QI (match_dup 1)) 5064 (match_operand:DI 2 "register_operand" "r")] 5065 UNSPEC_STXVL))] 5066 "TARGET_P9_VECTOR && TARGET_64BIT" 5067 "stxvl %x0,%1,%2" 5068 [(set_attr "type" "vecstore")]) 5069 5070;; Expand for builtin xst_len_r 5071(define_expand "xst_len_r" 5072 [(match_operand:V16QI 0 "vsx_register_operand" "=wa") 5073 (match_operand:DI 1 "register_operand" "b") 5074 (match_operand:DI 2 "register_operand" "r")] 5075 "UNSPEC_XST_LEN_R" 5076{ 5077 rtx shift_mask = gen_reg_rtx (V16QImode); 5078 rtx rtx_vtmp = gen_reg_rtx (V16QImode); 5079 rtx tmp = gen_reg_rtx (DImode); 5080 5081 emit_insn (gen_altivec_lvsr_reg (shift_mask, operands[2])); 5082 emit_insn (gen_altivec_vperm_v8hiv16qi (rtx_vtmp, operands[0], operands[0], 5083 shift_mask)); 5084 emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56))); 5085 emit_insn (gen_stxvll (rtx_vtmp, operands[1], tmp)); 5086 DONE; 5087}) 5088 5089;; Vector Compare Not Equal Byte (specified/not+eq:) 5090(define_insn "vcmpneb" 5091 [(set (match_operand:V16QI 0 "altivec_register_operand" "=v") 5092 (not:V16QI 5093 (eq:V16QI (match_operand:V16QI 1 "altivec_register_operand" "v") 5094 (match_operand:V16QI 2 "altivec_register_operand" "v"))))] 5095 "TARGET_P9_VECTOR" 5096 "vcmpneb %0,%1,%2" 5097 [(set_attr "type" "vecsimple")]) 5098 5099;; Vector Compare Not Equal or Zero Byte 5100(define_insn "vcmpnezb" 5101 [(set (match_operand:V16QI 0 "altivec_register_operand" "=v") 5102 (unspec:V16QI 5103 [(match_operand:V16QI 1 "altivec_register_operand" "v") 5104 (match_operand:V16QI 2 "altivec_register_operand" "v")] 5105 UNSPEC_VCMPNEZB))] 5106 "TARGET_P9_VECTOR" 5107 "vcmpnezb %0,%1,%2" 5108 [(set_attr "type" "vecsimple")]) 5109 5110;; Vector Compare Not Equal Half Word (specified/not+eq:) 5111(define_insn "vcmpneh" 5112 [(set (match_operand:V8HI 0 "altivec_register_operand" "=v") 5113 (not:V8HI 5114 (eq:V8HI (match_operand:V8HI 1 "altivec_register_operand" "v") 5115 (match_operand:V8HI 2 "altivec_register_operand" "v"))))] 5116 "TARGET_P9_VECTOR" 5117 "vcmpneh %0,%1,%2" 5118 [(set_attr "type" "vecsimple")]) 5119 5120;; Vector Compare Not Equal or Zero Half Word 5121(define_insn "vcmpnezh" 5122 [(set (match_operand:V8HI 0 "altivec_register_operand" "=v") 5123 (unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v") 5124 (match_operand:V8HI 2 "altivec_register_operand" "v")] 5125 UNSPEC_VCMPNEZH))] 5126 "TARGET_P9_VECTOR" 5127 "vcmpnezh %0,%1,%2" 5128 [(set_attr "type" "vecsimple")]) 5129 5130;; Vector Compare Not Equal Word (specified/not+eq:) 5131(define_insn "vcmpnew" 5132 [(set (match_operand:V4SI 0 "altivec_register_operand" "=v") 5133 (not:V4SI 5134 (eq:V4SI (match_operand:V4SI 1 "altivec_register_operand" "v") 5135 (match_operand:V4SI 2 "altivec_register_operand" "v"))))] 5136 "TARGET_P9_VECTOR" 5137 "vcmpnew %0,%1,%2" 5138 [(set_attr "type" "vecsimple")]) 5139 5140;; Vector Compare Not Equal or Zero Word 5141(define_insn "vcmpnezw" 5142 [(set (match_operand:V4SI 0 "altivec_register_operand" "=v") 5143 (unspec:V4SI [(match_operand:V4SI 1 "altivec_register_operand" "v") 5144 (match_operand:V4SI 2 "altivec_register_operand" "v")] 5145 UNSPEC_VCMPNEZW))] 5146 "TARGET_P9_VECTOR" 5147 "vcmpnezw %0,%1,%2" 5148 [(set_attr "type" "vecsimple")]) 5149 5150;; Vector Count Leading Zero Least-Significant Bits Byte 5151(define_insn "vclzlsbb_<mode>" 5152 [(set (match_operand:SI 0 "register_operand" "=r") 5153 (unspec:SI 5154 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")] 5155 UNSPEC_VCLZLSBB))] 5156 "TARGET_P9_VECTOR" 5157 "vclzlsbb %0,%1" 5158 [(set_attr "type" "vecsimple")]) 5159 5160;; Vector Count Trailing Zero Least-Significant Bits Byte 5161(define_insn "vctzlsbb_<mode>" 5162 [(set (match_operand:SI 0 "register_operand" "=r") 5163 (unspec:SI 5164 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")] 5165 UNSPEC_VCTZLSBB))] 5166 "TARGET_P9_VECTOR" 5167 "vctzlsbb %0,%1" 5168 [(set_attr "type" "vecsimple")]) 5169 5170;; Vector Extract Unsigned Byte Left-Indexed 5171(define_insn "vextublx" 5172 [(set (match_operand:SI 0 "register_operand" "=r") 5173 (unspec:SI 5174 [(match_operand:SI 1 "register_operand" "r") 5175 (match_operand:V16QI 2 "altivec_register_operand" "v")] 5176 UNSPEC_VEXTUBLX))] 5177 "TARGET_P9_VECTOR" 5178 "vextublx %0,%1,%2" 5179 [(set_attr "type" "vecsimple")]) 5180 5181;; Vector Extract Unsigned Byte Right-Indexed 5182(define_insn "vextubrx" 5183 [(set (match_operand:SI 0 "register_operand" "=r") 5184 (unspec:SI 5185 [(match_operand:SI 1 "register_operand" "r") 5186 (match_operand:V16QI 2 "altivec_register_operand" "v")] 5187 UNSPEC_VEXTUBRX))] 5188 "TARGET_P9_VECTOR" 5189 "vextubrx %0,%1,%2" 5190 [(set_attr "type" "vecsimple")]) 5191 5192;; Vector Extract Unsigned Half Word Left-Indexed 5193(define_insn "vextuhlx" 5194 [(set (match_operand:SI 0 "register_operand" "=r") 5195 (unspec:SI 5196 [(match_operand:SI 1 "register_operand" "r") 5197 (match_operand:V8HI 2 "altivec_register_operand" "v")] 5198 UNSPEC_VEXTUHLX))] 5199 "TARGET_P9_VECTOR" 5200 "vextuhlx %0,%1,%2" 5201 [(set_attr "type" "vecsimple")]) 5202 5203;; Vector Extract Unsigned Half Word Right-Indexed 5204(define_insn "vextuhrx" 5205 [(set (match_operand:SI 0 "register_operand" "=r") 5206 (unspec:SI 5207 [(match_operand:SI 1 "register_operand" "r") 5208 (match_operand:V8HI 2 "altivec_register_operand" "v")] 5209 UNSPEC_VEXTUHRX))] 5210 "TARGET_P9_VECTOR" 5211 "vextuhrx %0,%1,%2" 5212 [(set_attr "type" "vecsimple")]) 5213 5214;; Vector Extract Unsigned Word Left-Indexed 5215(define_insn "vextuwlx" 5216 [(set (match_operand:SI 0 "register_operand" "=r") 5217 (unspec:SI 5218 [(match_operand:SI 1 "register_operand" "r") 5219 (match_operand:V4SI 2 "altivec_register_operand" "v")] 5220 UNSPEC_VEXTUWLX))] 5221 "TARGET_P9_VECTOR" 5222 "vextuwlx %0,%1,%2" 5223 [(set_attr "type" "vecsimple")]) 5224 5225;; Vector Extract Unsigned Word Right-Indexed 5226(define_insn "vextuwrx" 5227 [(set (match_operand:SI 0 "register_operand" "=r") 5228 (unspec:SI 5229 [(match_operand:SI 1 "register_operand" "r") 5230 (match_operand:V4SI 2 "altivec_register_operand" "v")] 5231 UNSPEC_VEXTUWRX))] 5232 "TARGET_P9_VECTOR" 5233 "vextuwrx %0,%1,%2" 5234 [(set_attr "type" "vecsimple")]) 5235 5236;; Vector insert/extract word at arbitrary byte values. Note, the little 5237;; endian version needs to adjust the byte number, and the V4SI element in 5238;; vinsert4b. 5239(define_insn "extract4b" 5240 [(set (match_operand:V2DI 0 "vsx_register_operand") 5241 (unspec:V2DI [(match_operand:V16QI 1 "vsx_register_operand" "wa") 5242 (match_operand:QI 2 "const_0_to_12_operand" "n")] 5243 UNSPEC_XXEXTRACTUW))] 5244 "TARGET_P9_VECTOR" 5245{ 5246 if (!VECTOR_ELT_ORDER_BIG) 5247 operands[2] = GEN_INT (12 - INTVAL (operands[2])); 5248 5249 return "xxextractuw %x0,%x1,%2"; 5250}) 5251 5252(define_expand "insert4b" 5253 [(set (match_operand:V16QI 0 "vsx_register_operand") 5254 (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand") 5255 (match_operand:V16QI 2 "vsx_register_operand") 5256 (match_operand:QI 3 "const_0_to_12_operand")] 5257 UNSPEC_XXINSERTW))] 5258 "TARGET_P9_VECTOR" 5259{ 5260 if (!VECTOR_ELT_ORDER_BIG) 5261 { 5262 rtx op1 = operands[1]; 5263 rtx v4si_tmp = gen_reg_rtx (V4SImode); 5264 emit_insn (gen_vsx_xxpermdi_v4si_be (v4si_tmp, op1, op1, const1_rtx)); 5265 operands[1] = v4si_tmp; 5266 operands[3] = GEN_INT (12 - INTVAL (operands[3])); 5267 } 5268}) 5269 5270(define_insn "*insert4b_internal" 5271 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") 5272 (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa") 5273 (match_operand:V16QI 2 "vsx_register_operand" "0") 5274 (match_operand:QI 3 "const_0_to_12_operand" "n")] 5275 UNSPEC_XXINSERTW))] 5276 "TARGET_P9_VECTOR" 5277 "xxinsertw %x0,%x1,%3" 5278 [(set_attr "type" "vecperm")]) 5279 5280 5281;; Generate vector extract four float 32 values from left four elements 5282;; of eight element vector of float 16 values. 5283(define_expand "vextract_fp_from_shorth" 5284 [(set (match_operand:V4SF 0 "register_operand" "=wa") 5285 (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")] 5286 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH))] 5287 "TARGET_P9_VECTOR" 5288{ 5289 int i; 5290 int vals_le[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0}; 5291 int vals_be[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0}; 5292 5293 rtx rvals[16]; 5294 rtx mask = gen_reg_rtx (V16QImode); 5295 rtx tmp = gen_reg_rtx (V16QImode); 5296 rtvec v; 5297 5298 for (i = 0; i < 16; i++) 5299 if (!BYTES_BIG_ENDIAN) 5300 rvals[i] = GEN_INT (vals_le[i]); 5301 else 5302 rvals[i] = GEN_INT (vals_be[i]); 5303 5304 /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16 5305 inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move 5306 src half words 0,1,2,3 (LE), src half words 4,5,6,7 (BE) for the 5307 conversion instruction. */ 5308 v = gen_rtvec_v (16, rvals); 5309 emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v))); 5310 emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1], 5311 operands[1], mask)); 5312 emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp)); 5313 DONE; 5314}) 5315 5316;; Generate vector extract four float 32 values from right four elements 5317;; of eight element vector of float 16 values. 5318(define_expand "vextract_fp_from_shortl" 5319 [(set (match_operand:V4SF 0 "register_operand" "=wa") 5320 (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")] 5321 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL))] 5322 "TARGET_P9_VECTOR" 5323{ 5324 int vals_le[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0}; 5325 int vals_be[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0}; 5326 5327 int i; 5328 rtx rvals[16]; 5329 rtx mask = gen_reg_rtx (V16QImode); 5330 rtx tmp = gen_reg_rtx (V16QImode); 5331 rtvec v; 5332 5333 for (i = 0; i < 16; i++) 5334 if (!BYTES_BIG_ENDIAN) 5335 rvals[i] = GEN_INT (vals_le[i]); 5336 else 5337 rvals[i] = GEN_INT (vals_be[i]); 5338 5339 /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16 5340 inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move 5341 src half words 4,5,6,7 (LE), src half words 0,1,2,3 (BE) for the 5342 conversion instruction. */ 5343 v = gen_rtvec_v (16, rvals); 5344 emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v))); 5345 emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1], 5346 operands[1], mask)); 5347 emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp)); 5348 DONE; 5349}) 5350 5351;; Support for ISA 3.0 vector byte reverse 5352 5353;; Swap all bytes with in a vector 5354(define_insn "p9_xxbrq_v1ti" 5355 [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa") 5356 (bswap:V1TI (match_operand:V1TI 1 "vsx_register_operand" "wa")))] 5357 "TARGET_P9_VECTOR" 5358 "xxbrq %x0,%x1" 5359 [(set_attr "type" "vecperm")]) 5360 5361(define_expand "p9_xxbrq_v16qi" 5362 [(use (match_operand:V16QI 0 "vsx_register_operand" "=wa")) 5363 (use (match_operand:V16QI 1 "vsx_register_operand" "wa"))] 5364 "TARGET_P9_VECTOR" 5365{ 5366 rtx op0 = gen_reg_rtx (V1TImode); 5367 rtx op1 = gen_lowpart (V1TImode, operands[1]); 5368 emit_insn (gen_p9_xxbrq_v1ti (op0, op1)); 5369 emit_move_insn (operands[0], gen_lowpart (V16QImode, op0)); 5370 DONE; 5371}) 5372 5373;; Swap all bytes in each 64-bit element 5374(define_insn "p9_xxbrd_v2di" 5375 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") 5376 (bswap:V2DI (match_operand:V2DI 1 "vsx_register_operand" "wa")))] 5377 "TARGET_P9_VECTOR" 5378 "xxbrd %x0,%x1" 5379 [(set_attr "type" "vecperm")]) 5380 5381(define_expand "p9_xxbrd_v2df" 5382 [(use (match_operand:V2DF 0 "vsx_register_operand" "=wa")) 5383 (use (match_operand:V2DF 1 "vsx_register_operand" "wa"))] 5384 "TARGET_P9_VECTOR" 5385{ 5386 rtx op0 = gen_reg_rtx (V2DImode); 5387 rtx op1 = gen_lowpart (V2DImode, operands[1]); 5388 emit_insn (gen_p9_xxbrd_v2di (op0, op1)); 5389 emit_move_insn (operands[0], gen_lowpart (V2DFmode, op0)); 5390 DONE; 5391}) 5392 5393;; Swap all bytes in each 32-bit element 5394(define_insn "p9_xxbrw_v4si" 5395 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa") 5396 (bswap:V4SI (match_operand:V4SI 1 "vsx_register_operand" "wa")))] 5397 "TARGET_P9_VECTOR" 5398 "xxbrw %x0,%x1" 5399 [(set_attr "type" "vecperm")]) 5400 5401(define_expand "p9_xxbrw_v4sf" 5402 [(use (match_operand:V4SF 0 "vsx_register_operand" "=wa")) 5403 (use (match_operand:V4SF 1 "vsx_register_operand" "wa"))] 5404 "TARGET_P9_VECTOR" 5405{ 5406 rtx op0 = gen_reg_rtx (V4SImode); 5407 rtx op1 = gen_lowpart (V4SImode, operands[1]); 5408 emit_insn (gen_p9_xxbrw_v4si (op0, op1)); 5409 emit_move_insn (operands[0], gen_lowpart (V4SFmode, op0)); 5410 DONE; 5411}) 5412 5413;; Swap all bytes in each element of vector 5414(define_expand "revb_<mode>" 5415 [(use (match_operand:VEC_REVB 0 "vsx_register_operand")) 5416 (use (match_operand:VEC_REVB 1 "vsx_register_operand"))] 5417 "" 5418{ 5419 if (TARGET_P9_VECTOR) 5420 emit_insn (gen_p9_xxbr<VSX_XXBR>_<mode> (operands[0], operands[1])); 5421 else 5422 { 5423 /* Want to have the elements in reverse order relative 5424 to the endian mode in use, i.e. in LE mode, put elements 5425 in BE order. */ 5426 rtx sel = swap_endian_selector_for_mode(<MODE>mode); 5427 emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1], 5428 operands[1], sel)); 5429 } 5430 5431 DONE; 5432}) 5433 5434;; Reversing bytes in vector char is just a NOP. 5435(define_expand "revb_v16qi" 5436 [(set (match_operand:V16QI 0 "vsx_register_operand") 5437 (bswap:V16QI (match_operand:V16QI 1 "vsx_register_operand")))] 5438 "" 5439{ 5440 emit_move_insn (operands[0], operands[1]); 5441 DONE; 5442}) 5443 5444;; Swap all bytes in each 16-bit element 5445(define_insn "p9_xxbrh_v8hi" 5446 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") 5447 (bswap:V8HI (match_operand:V8HI 1 "vsx_register_operand" "wa")))] 5448 "TARGET_P9_VECTOR" 5449 "xxbrh %x0,%x1" 5450 [(set_attr "type" "vecperm")]) 5451 5452 5453;; Operand numbers for the following peephole2 5454(define_constants 5455 [(SFBOOL_TMP_GPR 0) ;; GPR temporary 5456 (SFBOOL_TMP_VSX 1) ;; vector temporary 5457 (SFBOOL_MFVSR_D 2) ;; move to gpr dest 5458 (SFBOOL_MFVSR_A 3) ;; move to gpr src 5459 (SFBOOL_BOOL_D 4) ;; and/ior/xor dest 5460 (SFBOOL_BOOL_A1 5) ;; and/ior/xor arg1 5461 (SFBOOL_BOOL_A2 6) ;; and/ior/xor arg1 5462 (SFBOOL_SHL_D 7) ;; shift left dest 5463 (SFBOOL_SHL_A 8) ;; shift left arg 5464 (SFBOOL_MTVSR_D 9) ;; move to vecter dest 5465 (SFBOOL_MFVSR_A_V4SF 10) ;; SFBOOL_MFVSR_A as V4SFmode 5466 (SFBOOL_BOOL_A_DI 11) ;; SFBOOL_BOOL_A1/A2 as DImode 5467 (SFBOOL_TMP_VSX_DI 12) ;; SFBOOL_TMP_VSX as DImode 5468 (SFBOOL_MTVSR_D_V4SF 13)]) ;; SFBOOL_MTVSRD_D as V4SFmode 5469 5470;; Attempt to optimize some common GLIBC operations using logical operations to 5471;; pick apart SFmode operations. For example, there is code from e_powf.c 5472;; after macro expansion that looks like: 5473;; 5474;; typedef union { 5475;; float value; 5476;; uint32_t word; 5477;; } ieee_float_shape_type; 5478;; 5479;; float t1; 5480;; int32_t is; 5481;; 5482;; do { 5483;; ieee_float_shape_type gf_u; 5484;; gf_u.value = (t1); 5485;; (is) = gf_u.word; 5486;; } while (0); 5487;; 5488;; do { 5489;; ieee_float_shape_type sf_u; 5490;; sf_u.word = (is & 0xfffff000); 5491;; (t1) = sf_u.value; 5492;; } while (0); 5493;; 5494;; 5495;; This would result in two direct move operations (convert to memory format, 5496;; direct move to GPR, do the AND operation, direct move to VSX, convert to 5497;; scalar format). With this peephole, we eliminate the direct move to the 5498;; GPR, and instead move the integer mask value to the vector register after a 5499;; shift and do the VSX logical operation. 5500 5501;; The insns for dealing with SFmode in GPR registers looks like: 5502;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN)) 5503;; 5504;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX)) 5505;; 5506;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3))) 5507;; 5508;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32))) 5509;; 5510;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD)) 5511;; 5512;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN)) 5513 5514(define_peephole2 5515 [(match_scratch:DI SFBOOL_TMP_GPR "r") 5516 (match_scratch:V4SF SFBOOL_TMP_VSX "wa") 5517 5518 ;; MFVSRWZ (aka zero_extend) 5519 (set (match_operand:DI SFBOOL_MFVSR_D "int_reg_operand") 5520 (zero_extend:DI 5521 (match_operand:SI SFBOOL_MFVSR_A "vsx_register_operand"))) 5522 5523 ;; AND/IOR/XOR operation on int 5524 (set (match_operand:SI SFBOOL_BOOL_D "int_reg_operand") 5525 (and_ior_xor:SI (match_operand:SI SFBOOL_BOOL_A1 "int_reg_operand") 5526 (match_operand:SI SFBOOL_BOOL_A2 "reg_or_cint_operand"))) 5527 5528 ;; SLDI 5529 (set (match_operand:DI SFBOOL_SHL_D "int_reg_operand") 5530 (ashift:DI (match_operand:DI SFBOOL_SHL_A "int_reg_operand") 5531 (const_int 32))) 5532 5533 ;; MTVSRD 5534 (set (match_operand:SF SFBOOL_MTVSR_D "vsx_register_operand") 5535 (unspec:SF [(match_dup SFBOOL_SHL_D)] UNSPEC_P8V_MTVSRD))] 5536 5537 "TARGET_POWERPC64 && TARGET_DIRECT_MOVE 5538 /* The REG_P (xxx) tests prevents SUBREG's, which allows us to use REGNO 5539 to compare registers, when the mode is different. */ 5540 && REG_P (operands[SFBOOL_MFVSR_D]) && REG_P (operands[SFBOOL_BOOL_D]) 5541 && REG_P (operands[SFBOOL_BOOL_A1]) && REG_P (operands[SFBOOL_SHL_D]) 5542 && REG_P (operands[SFBOOL_SHL_A]) && REG_P (operands[SFBOOL_MTVSR_D]) 5543 && (REG_P (operands[SFBOOL_BOOL_A2]) 5544 || CONST_INT_P (operands[SFBOOL_BOOL_A2])) 5545 && (REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D]) 5546 || peep2_reg_dead_p (2, operands[SFBOOL_MFVSR_D])) 5547 && (REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1]) 5548 || (REG_P (operands[SFBOOL_BOOL_A2]) 5549 && REGNO (operands[SFBOOL_MFVSR_D]) 5550 == REGNO (operands[SFBOOL_BOOL_A2]))) 5551 && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A]) 5552 && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D]) 5553 || peep2_reg_dead_p (3, operands[SFBOOL_BOOL_D])) 5554 && peep2_reg_dead_p (4, operands[SFBOOL_SHL_D])" 5555 [(set (match_dup SFBOOL_TMP_GPR) 5556 (ashift:DI (match_dup SFBOOL_BOOL_A_DI) 5557 (const_int 32))) 5558 5559 (set (match_dup SFBOOL_TMP_VSX_DI) 5560 (match_dup SFBOOL_TMP_GPR)) 5561 5562 (set (match_dup SFBOOL_MTVSR_D_V4SF) 5563 (and_ior_xor:V4SF (match_dup SFBOOL_MFVSR_A_V4SF) 5564 (match_dup SFBOOL_TMP_VSX)))] 5565{ 5566 rtx bool_a1 = operands[SFBOOL_BOOL_A1]; 5567 rtx bool_a2 = operands[SFBOOL_BOOL_A2]; 5568 int regno_mfvsr_d = REGNO (operands[SFBOOL_MFVSR_D]); 5569 int regno_mfvsr_a = REGNO (operands[SFBOOL_MFVSR_A]); 5570 int regno_tmp_vsx = REGNO (operands[SFBOOL_TMP_VSX]); 5571 int regno_mtvsr_d = REGNO (operands[SFBOOL_MTVSR_D]); 5572 5573 if (CONST_INT_P (bool_a2)) 5574 { 5575 rtx tmp_gpr = operands[SFBOOL_TMP_GPR]; 5576 emit_move_insn (tmp_gpr, bool_a2); 5577 operands[SFBOOL_BOOL_A_DI] = tmp_gpr; 5578 } 5579 else 5580 { 5581 int regno_bool_a1 = REGNO (bool_a1); 5582 int regno_bool_a2 = REGNO (bool_a2); 5583 int regno_bool_a = (regno_mfvsr_d == regno_bool_a1 5584 ? regno_bool_a2 : regno_bool_a1); 5585 operands[SFBOOL_BOOL_A_DI] = gen_rtx_REG (DImode, regno_bool_a); 5586 } 5587 5588 operands[SFBOOL_MFVSR_A_V4SF] = gen_rtx_REG (V4SFmode, regno_mfvsr_a); 5589 operands[SFBOOL_TMP_VSX_DI] = gen_rtx_REG (DImode, regno_tmp_vsx); 5590 operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d); 5591}) 5592